summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/nouveau/nouveau_class.h1171
-rw-r--r--src/gallium/drivers/nv50/Makefile12
-rw-r--r--src/gallium/drivers/nv50/SConscript1
-rw-r--r--src/gallium/drivers/nv50/nv50_formats.c452
-rw-r--r--src/gallium/drivers/nv50/nv50_miptree.c3
-rw-r--r--src/gallium/drivers/nv50/nv50_pc.c618
-rw-r--r--src/gallium/drivers/nv50/nv50_pc.h461
-rw-r--r--src/gallium/drivers/nv50/nv50_pc_emit.c1180
-rw-r--r--src/gallium/drivers/nv50/nv50_pc_optimize.c1112
-rw-r--r--src/gallium/drivers/nv50/nv50_pc_print.c314
-rw-r--r--src/gallium/drivers/nv50/nv50_pc_regalloc.c943
-rw-r--r--src/gallium/drivers/nv50/nv50_program.c5111
-rw-r--r--src/gallium/drivers/nv50/nv50_program.h176
-rw-r--r--src/gallium/drivers/nv50/nv50_push.c2
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c108
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.h13
-rw-r--r--src/gallium/drivers/nv50/nv50_shader_state.c626
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c58
-rw-r--r--src/gallium/drivers/nv50/nv50_state_validate.c66
-rw-r--r--src/gallium/drivers/nv50/nv50_tex.c52
-rw-r--r--src/gallium/drivers/nv50/nv50_texture.h9
-rw-r--r--src/gallium/drivers/nv50/nv50_tgsi_to_nc.c1661
-rw-r--r--src/gallium/drivers/nv50/nv50_vbo.c101
23 files changed, 9227 insertions, 5023 deletions
diff --git a/src/gallium/drivers/nouveau/nouveau_class.h b/src/gallium/drivers/nouveau/nouveau_class.h
index adfdd37b1b..f44979e562 100644
--- a/src/gallium/drivers/nouveau/nouveau_class.h
+++ b/src/gallium/drivers/nouveau/nouveau_class.h
@@ -735,6 +735,45 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT_HIGH 0x0000023c
+#define NVC0_MEMORY_TO_MEMORY_FORMAT 0x00009039
+
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_NOP 0x00000100
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_SERIALIZE 0x00000110
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_MODE_IN 0x00000204
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_PITCH_IN 0x00000208
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_HEIGHT_IN 0x0000020c
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_DEPTH_IN 0x00000210
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN_Z 0x00000214
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_MODE_OUT 0x00000220
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_PITCH_OUT 0x00000224
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_HEIGHT_OUT 0x00000228
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_DEPTH_OUT 0x0000022c
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT_Z 0x00000230
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT_HIGH 0x00000238
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT_LOW 0x0000023c
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC 0x00000300
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC_PUSH (1 << 0)
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC_LINEAR_IN (1 << 4)
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC_LINEAR_OUT (1 << 8)
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC_NOTIFY (1 << 13)
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC_INC_SHIFT 20
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_EXEC_INC_MASK 0x00f00000
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_DATA 0x00000304
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_HIGH 0x0000030c
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_LOW 0x00000310
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_PITCH_IN 0x00000314
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT 0x00000318
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN 0x0000031c
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_LINE_COUNT 0x00000320
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_NOTIFY_ADDRESS_HIGH 0x0000032c
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_NOTIFY_ADDRESS_LOW 0x00000330
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_NOTIFY 0x00000334
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN_X 0x00000344
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN_Y 0x00000348
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT_X 0x0000034c
+#define NVC0_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT_Y 0x00000350
+
+
#define NV01_MEMORY_LOCAL_BANKED 0x0000003d
@@ -4507,6 +4546,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV20TCL_VTXFMT_TYPE_SHIFT 0
#define NV20TCL_VTXFMT_TYPE_MASK 0x0000000f
#define NV20TCL_VTXFMT_TYPE_FLOAT 0x00000002
+#define NV20TCL_VTXFMT_TYPE_HALF 0x00000003
#define NV20TCL_VTXFMT_TYPE_UBYTE 0x00000004
#define NV20TCL_VTXFMT_TYPE_USHORT 0x00000005
#define NV20TCL_VTXFMT_SIZE_SHIFT 4
@@ -6990,6 +7030,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV40TCL_VTXFMT_TYPE_SHIFT 0
#define NV40TCL_VTXFMT_TYPE_MASK 0x0000000f
#define NV40TCL_VTXFMT_TYPE_FLOAT 0x00000002
+#define NV40TCL_VTXFMT_TYPE_HALF 0x00000003
#define NV40TCL_VTXFMT_TYPE_UBYTE 0x00000004
#define NV40TCL_VTXFMT_TYPE_USHORT 0x00000005
#define NV40TCL_VTXFMT_SIZE_SHIFT 4
@@ -7699,7 +7740,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50TCL_DMA_TIC 0x000001a0
#define NV50TCL_DMA_TEXTURE 0x000001a4
#define NV50TCL_DMA_STRMOUT 0x000001a8
-#define NV50TCL_DMA_UNK01AC 0x000001ac
+#define NV50TCL_DMA_CLIPID 0x000001ac
#define NV50TCL_DMA_COLOR(x) (0x000001c0+((x)*4))
#define NV50TCL_DMA_COLOR__SIZE 0x00000008
#define NV50TCL_RT_ADDRESS_HIGH(x) (0x00000200+((x)*32))
@@ -7916,8 +7957,20 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50TCL_DEPTH_RANGE_FAR__SIZE 0x00000010
#define NV50TCL_VIEWPORT_CLIP_HORIZ(x) (0x00000d00+((x)*8))
#define NV50TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008
+#define NV50TCL_VIEWPORT_CLIP_HORIZ_MIN_SHIFT 0
+#define NV50TCL_VIEWPORT_CLIP_HORIZ_MIN_MASK 0x0000ffff
+#define NV50TCL_VIEWPORT_CLIP_HORIZ_MAX_SHIFT 16
+#define NV50TCL_VIEWPORT_CLIP_HORIZ_MAX_MASK 0xffff0000
#define NV50TCL_VIEWPORT_CLIP_VERT(x) (0x00000d04+((x)*8))
#define NV50TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008
+#define NV50TCL_VIEWPORT_CLIP_VERT_MIN_SHIFT 0
+#define NV50TCL_VIEWPORT_CLIP_VERT_MIN_MASK 0x0000ffff
+#define NV50TCL_VIEWPORT_CLIP_VERT_MAX_SHIFT 16
+#define NV50TCL_VIEWPORT_CLIP_VERT_MAX_MASK 0xffff0000
+#define NV50TCL_CLIPID_REGION_HORIZ(x) (0x00000d40+((x)*8))
+#define NV50TCL_CLIPID_REGION_HORIZ__SIZE 0x00000004
+#define NV50TCL_CLIPID_REGION_VERT(x) (0x00000d44+((x)*8))
+#define NV50TCL_CLIPID_REGION_VERT__SIZE 0x00000004
#define NV50TCL_VERTEX_BUFFER_FIRST 0x00000d74
#define NV50TCL_VERTEX_BUFFER_COUNT 0x00000d78
#define NV50TCL_CLEAR_COLOR(x) (0x00000d80+((x)*4))
@@ -7975,14 +8028,16 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50TCL_GP_ADDRESS_LOW 0x00000f74
#define NV50TCL_VP_ADDRESS_HIGH 0x00000f7c
#define NV50TCL_VP_ADDRESS_LOW 0x00000f80
-#define NV50TCL_UNK0F84_ADDRESS_HIGH 0x00000f84
-#define NV50TCL_UNK0F84_ADDRESS_LOW 0x00000f88
+#define NV50TCL_VERTEX_RUNOUT_HIGH 0x00000f84
+#define NV50TCL_VERTEX_RUNOUT_LOW 0x00000f88
#define NV50TCL_DEPTH_BOUNDS(x) (0x00000f9c+((x)*4))
#define NV50TCL_DEPTH_BOUNDS__SIZE 0x00000002
#define NV50TCL_FP_ADDRESS_HIGH 0x00000fa4
#define NV50TCL_FP_ADDRESS_LOW 0x00000fa8
#define NV50TCL_MSAA_MASK(x) (0x00000fbc+((x)*4))
#define NV50TCL_MSAA_MASK__SIZE 0x00000004
+#define NV50TCL_CLIPID_ADDRESS_HIGH 0x00000fcc
+#define NV50TCL_CLIPID_ADDRESS_LOW 0x00000fd0
#define NV50TCL_ZETA_ADDRESS_HIGH 0x00000fe0
#define NV50TCL_ZETA_ADDRESS_LOW 0x00000fe4
#define NV50TCL_ZETA_FORMAT 0x00000fe8
@@ -8112,37 +8167,45 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50TCL_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
#define NV50TCL_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
#define NV50TCL_BLEND_FUNC_SRC_RGB 0x00001344
-#define NV50TCL_BLEND_FUNC_SRC_RGB_ZERO 0x00000000
-#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE 0x00000001
-#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00000300
-#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00000301
-#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00000302
-#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00000303
-#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00000304
-#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00000305
-#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00000306
-#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00000307
-#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00000308
-#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x00008001
-#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002
-#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x00008003
-#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ZERO 0x00004000
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE 0x00004001
+#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00004300
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00004301
+#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00004302
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00004303
+#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00004304
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00004305
+#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00004306
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00004307
+#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00004308
+#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x0000c001
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x0000c002
+#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x0000c003
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
+#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC1_COLOR 0x0000c900
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_COLOR 0x0000c901
+#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC1_ALPHA 0x0000c902
+#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_ALPHA 0x0000c903
#define NV50TCL_BLEND_FUNC_DST_RGB 0x00001348
-#define NV50TCL_BLEND_FUNC_DST_RGB_ZERO 0x00000000
-#define NV50TCL_BLEND_FUNC_DST_RGB_ONE 0x00000001
-#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00000300
-#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00000301
-#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00000302
-#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00000303
-#define NV50TCL_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00000304
-#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00000305
-#define NV50TCL_BLEND_FUNC_DST_RGB_DST_COLOR 0x00000306
-#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00000307
-#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00000308
-#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x00008001
-#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002
-#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x00008003
-#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV50TCL_BLEND_FUNC_DST_RGB_ZERO 0x00004000
+#define NV50TCL_BLEND_FUNC_DST_RGB_ONE 0x00004001
+#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00004300
+#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00004301
+#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00004302
+#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00004303
+#define NV50TCL_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00004304
+#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00004305
+#define NV50TCL_BLEND_FUNC_DST_RGB_DST_COLOR 0x00004306
+#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00004307
+#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00004308
+#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x0000c001
+#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x0000c002
+#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x0000c003
+#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
+#define NV50TCL_BLEND_FUNC_DST_RGB_SRC1_COLOR 0x0000c900
+#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC1_COLOR 0x0000c901
+#define NV50TCL_BLEND_FUNC_DST_RGB_SRC1_ALPHA 0x0000c902
+#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC1_ALPHA 0x0000c903
#define NV50TCL_BLEND_EQUATION_ALPHA 0x0000134c
#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
#define NV50TCL_BLEND_EQUATION_ALPHA_MIN 0x00008007
@@ -8150,37 +8213,45 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
#define NV50TCL_BLEND_FUNC_SRC_ALPHA 0x00001350
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ZERO 0x00000000
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE 0x00000001
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x00000300
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x00000301
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x00000302
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x00000303
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x00000304
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x00000305
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x00000306
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x00000307
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x00000308
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x00008001
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x00008002
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x00008003
-#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ZERO 0x00004000
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE 0x00004001
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x00004300
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x00004301
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x00004302
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x00004303
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x00004304
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x00004305
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x00004306
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x00004307
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x00004308
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x0000c001
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x0000c002
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x0000c003
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC1_COLOR 0x0000c900
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC1_COLOR 0x0000c901
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC1_ALPHA 0x0000c902
+#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC1_ALPHA 0x0000c903
#define NV50TCL_BLEND_FUNC_DST_ALPHA 0x00001358
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_ZERO 0x00000000
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE 0x00000001
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x00000300
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x00000301
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x00000302
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x00000303
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x00000304
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x00000305
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x00000306
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x00000307
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x00000308
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x00008001
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x00008002
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x00008003
-#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x00008004
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ZERO 0x00004000
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE 0x00004001
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x00004300
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x00004301
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x00004302
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x00004303
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x00004304
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x00004305
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x00004306
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x00004307
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x00004308
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x0000c001
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x0000c002
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x0000c003
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC1_COLOR 0x0000c900
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC1_COLOR 0x0000c901
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC1_ALPHA 0x0000c902
+#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC1_ALPHA 0x0000c903
#define NV50TCL_BLEND_ENABLE(x) (0x00001360+((x)*4))
#define NV50TCL_BLEND_ENABLE__SIZE 0x00000008
#define NV50TCL_STENCIL_FRONT_ENABLE 0x00001380
@@ -8239,6 +8310,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50TCL_FP_START_ID 0x00001414
#define NV50TCL_GP_VERTEX_OUTPUT_COUNT 0x00001420
#define NV50TCL_VB_ELEMENT_BASE 0x00001434
+#define NV50TCL_INSTANCE_BASE 0x00001438
#define NV50TCL_CODE_CB_FLUSH 0x00001440
#define NV50TCL_BIND_TSC(x) (0x00001444+((x)*8))
#define NV50TCL_BIND_TSC__SIZE 0x00000003
@@ -8256,6 +8328,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50TCL_BIND_TIC_TIC_MASK 0x7ffffe00
#define NV50TCL_STRMOUT_MAP(x) (0x00001480+((x)*4))
#define NV50TCL_STRMOUT_MAP__SIZE 0x00000020
+#define NV50TCL_CLIPID_HEIGHT 0x00001504
#define NV50TCL_VP_CLIP_DISTANCE_ENABLE 0x00001510
#define NV50TCL_VP_CLIP_DISTANCE_ENABLE_0 (1 << 0)
#define NV50TCL_VP_CLIP_DISTANCE_ENABLE_1 (1 << 1)
@@ -8340,7 +8413,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50TCL_GP_BUILTIN_RESULT_EN 0x000015cc
#define NV50TCL_GP_BUILTIN_RESULT_EN_VPORT_IDX (1 << 0)
#define NV50TCL_GP_BUILTIN_RESULT_EN_LAYER_IDX (1 << 16)
-#define NV50TCL_MULTISAMPLE_SAMPLES_LOG2 0x000015d0
+#define NV50TCL_MULTISAMPLE_MODE 0x000015d0
+#define NV50TCL_MULTISAMPLE_MODE_1X 0x00000000
+#define NV50TCL_MULTISAMPLE_MODE_2XMS 0x00000001
+#define NV50TCL_MULTISAMPLE_MODE_4XMS 0x00000002
+#define NV50TCL_MULTISAMPLE_MODE_8XMS 0x00000004
+#define NV50TCL_MULTISAMPLE_MODE_4XMS_4XCS 0x00000008
+#define NV50TCL_MULTISAMPLE_MODE_4XMS_12XCS 0x00000009
+#define NV50TCL_MULTISAMPLE_MODE_8XMS_8XCS 0x0000000a
#define NV50TCL_VERTEX_BEGIN 0x000015dc
#define NV50TCL_VERTEX_BEGIN_POINTS 0x00000000
#define NV50TCL_VERTEX_BEGIN_LINES 0x00000001
@@ -8356,6 +8436,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY 0x0000000b
#define NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY 0x0000000c
#define NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY 0x0000000d
+#define NV50TCL_VERTEX_BEGIN_PATCHES 0x0000000e
#define NV50TCL_VERTEX_END 0x000015e0
#define NV50TCL_EDGEFLAG_ENABLE 0x000015e4
#define NV50TCL_VB_ELEMENT_U32 0x000015e8
@@ -8369,6 +8450,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50TCL_VB_ELEMENT_U16_I0_MASK 0x0000ffff
#define NV50TCL_VB_ELEMENT_U16_I1_SHIFT 16
#define NV50TCL_VB_ELEMENT_U16_I1_MASK 0xffff0000
+#define NV50TCL_VERTEX_BASE_HIGH 0x000015f4
+#define NV50TCL_VERTEX_BASE_LOW 0x000015f8
#define NV50TCL_VERTEX_DATA 0x00001640
#define NV50TCL_PRIM_RESTART_ENABLE 0x00001644
#define NV50TCL_PRIM_RESTART_INDEX 0x00001648
@@ -8754,7 +8837,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50TCL_VIEWPORT_TRANSFORM_EN 0x0000192c
#define NV50TCL_VIEW_VOLUME_CLIP_CTRL 0x0000193c
#define NV50TCL_VIEWPORT_CLIP_RECTS_EN 0x0000194c
+#define NV50TCL_VIEWPORT_CLIP_MODE 0x00001950
+#define NV50TCL_VIEWPORT_CLIP_MODE_INCLUDE 0x00000000
+#define NV50TCL_VIEWPORT_CLIP_MODE_EXCLUDE 0x00000001
+#define NV50TCL_VIEWPORT_CLIP_MODE_UNKNOWN 0x00000002
#define NV50TCL_FP_CTRL_UNK196C 0x0000196c
+#define NV50TCL_CLIPID_ENABLE 0x0000197c
+#define NV50TCL_CLIPID_WIDTH 0x00001980
+#define NV50TCL_CLIPID_ID 0x00001984
#define NV50TCL_FP_INTERPOLANT_CTRL 0x00001988
#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_SHIFT 24
#define NV50TCL_FP_INTERPOLANT_CTRL_UMASK_MASK 0xff000000
@@ -8855,19 +8945,20 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8 0x00c00000
#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16 0x00d80000
#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8 0x00e80000
+#define NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_2_10_10_10 0x01800000
#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SHIFT 25
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_MASK 0x7e000000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT 0x7e000000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM 0x24000000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM 0x12000000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED 0x5a000000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED 0x6c000000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UINT 0x48000000
-#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SINT 0x36000000
+#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_MASK 0x0e000000
+#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT 0x0e000000
+#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM 0x02000000
+#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM 0x04000000
+#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED 0x0a000000
+#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED 0x0c000000
+#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UINT 0x08000000
+#define NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SINT 0x06000000
#define NV50TCL_VERTEX_ARRAY_ATTRIB_BGRA (1 << 31)
#define NV50TCL_QUERY_ADDRESS_HIGH 0x00001b00
#define NV50TCL_QUERY_ADDRESS_LOW 0x00001b04
-#define NV50TCL_QUERY_COUNTER 0x00001b08
+#define NV50TCL_QUERY_SEQUENCE 0x00001b08
#define NV50TCL_QUERY_GET 0x00001b0c
@@ -9022,4 +9113,938 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50_COMPUTE_USER_PARAM__SIZE 0x00000040
+#define NVC0TCL 0x00009097
+
+#define NVC0TCL_SEMAPHORE_ADDRESS_HIGH 0x00000010
+#define NVC0TCL_SEMAPHORE_ADDRESS_LOW 0x00000014
+#define NVC0TCL_NOP 0x00000100
+#define NVC0TCL_NOTIFY_ADDRESS_HIGH 0x00000104
+#define NVC0TCL_NOTIFY_ADDRESS_LOW 0x00000108
+#define NVC0TCL_NOTIFY 0x0000010c
+#define NVC0TCL_SERIALIZE 0x00000110
+#define NVC0TCL_EARLY_FRAGMENT_TESTS 0x00000210
+#define NVC0TCL_TESS_MODE 0x00000320
+#define NVC0TCL_TESS_MODE_PRIM_SHIFT 0
+#define NVC0TCL_TESS_MODE_PRIM_MASK 0x0000000f
+#define NVC0TCL_TESS_MODE_PRIM_ISOLINES 0x00000000
+#define NVC0TCL_TESS_MODE_PRIM_TRIANGLES 0x00000001
+#define NVC0TCL_TESS_MODE_PRIM_QUADS 0x00000002
+#define NVC0TCL_TESS_MODE_SPACING_SHIFT 4
+#define NVC0TCL_TESS_MODE_SPACING_MASK 0x000000f0
+#define NVC0TCL_TESS_MODE_SPACING_EQUAL 0x00000000
+#define NVC0TCL_TESS_MODE_SPACING_FRACTIONAL_ODD 0x00000010
+#define NVC0TCL_TESS_MODE_SPACING_FRACTIONAL_EVEN 0x00000020
+#define NVC0TCL_TESS_MODE_CW (1 << 8)
+#define NVC0TCL_TESS_MODE_CONNECTED (1 << 9)
+#define NVC0TCL_TESS_LEVEL_OUTER(x) (0x00000324+((x)*4))
+#define NVC0TCL_TESS_LEVEL_OUTER__SIZE 0x00000004
+#define NVC0TCL_TESS_LEVEL_INNER(x) (0x00000334+((x)*4))
+#define NVC0TCL_TESS_LEVEL_INNER__SIZE 0x00000002
+#define NVC0TCL_RASTERIZE_ENABLE 0x0000037c
+#define NVC0TCL_TFB_BUFFER_ENABLE(x) (0x00000380+((x)*32))
+#define NVC0TCL_TFB_BUFFER_ENABLE__SIZE 0x00000004
+#define NVC0TCL_TFB_ADDRESS_HIGH(x) (0x00000384+((x)*32))
+#define NVC0TCL_TFB_ADDRESS_HIGH__SIZE 0x00000004
+#define NVC0TCL_TFB_ADDRESS_LOW(x) (0x00000388+((x)*32))
+#define NVC0TCL_TFB_ADDRESS_LOW__SIZE 0x00000004
+#define NVC0TCL_TFB_BUFFER_SIZE(x) (0x0000038c+((x)*32))
+#define NVC0TCL_TFB_BUFFER_SIZE__SIZE 0x00000004
+#define NVC0TCL_TFB_PRIMITIVE_ID(x) (0x00000390+((x)*32))
+#define NVC0TCL_TFB_PRIMITIVE_ID__SIZE 0x00000004
+#define NVC0TCL_TFB_UNK0700(x) (0x00000700+((x)*16))
+#define NVC0TCL_TFB_UNK0700__SIZE 0x00000004
+#define NVC0TCL_TFB_VARYING_COUNT(x) (0x00000704+((x)*16))
+#define NVC0TCL_TFB_VARYING_COUNT__SIZE 0x00000004
+#define NVC0TCL_TFB_BUFFER_STRIDE(x) (0x00000708+((x)*16))
+#define NVC0TCL_TFB_BUFFER_STRIDE__SIZE 0x00000004
+#define NVC0TCL_TFB_ENABLE 0x00000744
+#define NVC0TCL_LOCAL_BASE 0x0000077c
+#define NVC0TCL_UNK0790_ADDRESS_HIGH 0x00000790
+#define NVC0TCL_UNK0790_ADDRESS_LOW 0x00000794
+#define NVC0TCL_RT_ADDRESS_HIGH(x) (0x00000800+((x)*32))
+#define NVC0TCL_RT_ADDRESS_HIGH__SIZE 0x00000008
+#define NVC0TCL_RT_ADDRESS_LOW(x) (0x00000804+((x)*32))
+#define NVC0TCL_RT_ADDRESS_LOW__SIZE 0x00000008
+#define NVC0TCL_RT_HORIZ(x) (0x00000808+((x)*32))
+#define NVC0TCL_RT_HORIZ__SIZE 0x00000008
+#define NVC0TCL_RT_VERT(x) (0x0000080c+((x)*32))
+#define NVC0TCL_RT_VERT__SIZE 0x00000008
+#define NVC0TCL_RT_FORMAT(x) (0x00000810+((x)*32))
+#define NVC0TCL_RT_FORMAT__SIZE 0x00000008
+#define NVC0TCL_RT_FORMAT_R32G32B32A32_FLOAT 0x000000c0
+#define NVC0TCL_RT_FORMAT_R32G32B32A32_SINT 0x000000c1
+#define NVC0TCL_RT_FORMAT_R32G32B32A32_UINT 0x000000c2
+#define NVC0TCL_RT_FORMAT_R32G32B32X32_FLOAT 0x000000c3
+#define NVC0TCL_RT_FORMAT_R16G16B16A16_UNORM 0x000000c6
+#define NVC0TCL_RT_FORMAT_R16G16B16A16_SNORM 0x000000c7
+#define NVC0TCL_RT_FORMAT_R16G16B16A16_SINT 0x000000c8
+#define NVC0TCL_RT_FORMAT_R16G16B16A16_UINT 0x000000c9
+#define NVC0TCL_RT_FORMAT_R16G16B16A16_FLOAT 0x000000ca
+#define NVC0TCL_RT_FORMAT_R32G32_FLOAT 0x000000cb
+#define NVC0TCL_RT_FORMAT_R32G32_SINT 0x000000cc
+#define NVC0TCL_RT_FORMAT_R32G32_UINT 0x000000cd
+#define NVC0TCL_RT_FORMAT_R16G16B16X16_FLOAT 0x000000ce
+#define NVC0TCL_RT_FORMAT_A8R8G8B8_UNORM 0x000000cf
+#define NVC0TCL_RT_FORMAT_A8R8G8B8_SRGB 0x000000d0
+#define NVC0TCL_RT_FORMAT_A2B10G10R10_UNORM 0x000000d1
+#define NVC0TCL_RT_FORMAT_A2B10G10R10_UINT 0x000000d2
+#define NVC0TCL_RT_FORMAT_A8B8G8R8_UNORM 0x000000d5
+#define NVC0TCL_RT_FORMAT_A8B8G8R8_SRGB 0x000000d6
+#define NVC0TCL_RT_FORMAT_A8B8G8R8_SNORM 0x000000d7
+#define NVC0TCL_RT_FORMAT_A8B8G8R8_SINT 0x000000d8
+#define NVC0TCL_RT_FORMAT_A8B8G8R8_UINT 0x000000d9
+#define NVC0TCL_RT_FORMAT_R16G16_UNORM 0x000000da
+#define NVC0TCL_RT_FORMAT_R16G16_SNORM 0x000000db
+#define NVC0TCL_RT_FORMAT_R16G16_SINT 0x000000dc
+#define NVC0TCL_RT_FORMAT_R16G16_UINT 0x000000dd
+#define NVC0TCL_RT_FORMAT_R16G16_FLOAT 0x000000de
+#define NVC0TCL_RT_FORMAT_A2R10G10B10_UNORM 0x000000df
+#define NVC0TCL_RT_FORMAT_B10G11R11_FLOAT 0x000000e0
+#define NVC0TCL_RT_FORMAT_R32_FLOAT 0x000000e5
+#define NVC0TCL_RT_FORMAT_X8R8G8B8_UNORM 0x000000e6
+#define NVC0TCL_RT_FORMAT_X8R8G8B8_SRGB 0x000000e7
+#define NVC0TCL_RT_FORMAT_R5G6B5_UNORM 0x000000e8
+#define NVC0TCL_RT_FORMAT_A1R5G5B5_UNORM 0x000000e9
+#define NVC0TCL_RT_FORMAT_R8G8_UNORM 0x000000ea
+#define NVC0TCL_RT_FORMAT_R8G8_SNORM 0x000000eb
+#define NVC0TCL_RT_FORMAT_R8G8_SINT 0x000000ec
+#define NVC0TCL_RT_FORMAT_R8G8_UINT 0x000000ed
+#define NVC0TCL_RT_FORMAT_R16_UNORM 0x000000ee
+#define NVC0TCL_RT_FORMAT_R16_SNORM 0x000000ef
+#define NVC0TCL_RT_FORMAT_R16_SINT 0x000000f0
+#define NVC0TCL_RT_FORMAT_R16_UINT 0x000000f1
+#define NVC0TCL_RT_FORMAT_R16_FLOAT 0x000000f2
+#define NVC0TCL_RT_FORMAT_R8_UNORM 0x000000f3
+#define NVC0TCL_RT_FORMAT_R8_SNORM 0x000000f4
+#define NVC0TCL_RT_FORMAT_R8_SINT 0x000000f5
+#define NVC0TCL_RT_FORMAT_R8_UINT 0x000000f6
+#define NVC0TCL_RT_FORMAT_A8_UNORM 0x000000f7
+#define NVC0TCL_RT_FORMAT_X1R5G5B5_UNORM 0x000000f8
+#define NVC0TCL_RT_FORMAT_X8B8G8R8_UNORM 0x000000f9
+#define NVC0TCL_RT_FORMAT_X8B8G8R8_SRGB 0x000000fa
+#define NVC0TCL_RT_TILE_MODE(x) (0x00000814+((x)*32))
+#define NVC0TCL_RT_TILE_MODE__SIZE 0x00000008
+#define NVC0TCL_RT_ARRAY_MODE(x) (0x00000818+((x)*32))
+#define NVC0TCL_RT_ARRAY_MODE__SIZE 0x00000008
+#define NVC0TCL_RT_ARRAY_MODE_LAYERS_SHIFT 0
+#define NVC0TCL_RT_ARRAY_MODE_LAYERS_MASK 0x0000ffff
+#define NVC0TCL_RT_ARRAY_MODE_VOLUME (1 << 16)
+#define NVC0TCL_RT_LAYER_STRIDE(x) (0x0000081c+((x)*32))
+#define NVC0TCL_RT_LAYER_STRIDE__SIZE 0x00000008
+#define NVC0TCL_VIEWPORT_SCALE_X(x) (0x00000a00+((x)*32))
+#define NVC0TCL_VIEWPORT_SCALE_X__SIZE 0x00000010
+#define NVC0TCL_VIEWPORT_SCALE_Y(x) (0x00000a04+((x)*32))
+#define NVC0TCL_VIEWPORT_SCALE_Y__SIZE 0x00000010
+#define NVC0TCL_VIEWPORT_SCALE_Z(x) (0x00000a08+((x)*32))
+#define NVC0TCL_VIEWPORT_SCALE_Z__SIZE 0x00000010
+#define NVC0TCL_VIEWPORT_TRANSLATE_X(x) (0x00000a0c+((x)*32))
+#define NVC0TCL_VIEWPORT_TRANSLATE_X__SIZE 0x00000010
+#define NVC0TCL_VIEWPORT_TRANSLATE_Y(x) (0x00000a10+((x)*32))
+#define NVC0TCL_VIEWPORT_TRANSLATE_Y__SIZE 0x00000010
+#define NVC0TCL_VIEWPORT_TRANSLATE_Z(x) (0x00000a14+((x)*32))
+#define NVC0TCL_VIEWPORT_TRANSLATE_Z__SIZE 0x00000010
+#define NVC0TCL_VIEWPORT_HORIZ(x) (0x00000c00+((x)*16))
+#define NVC0TCL_VIEWPORT_HORIZ__SIZE 0x00000010
+#define NVC0TCL_VIEWPORT_HORIZ_X_SHIFT 0
+#define NVC0TCL_VIEWPORT_HORIZ_X_MASK 0x0000ffff
+#define NVC0TCL_VIEWPORT_HORIZ_W_SHIFT 16
+#define NVC0TCL_VIEWPORT_HORIZ_W_MASK 0xffff0000
+#define NVC0TCL_VIEWPORT_VERT(x) (0x00000c04+((x)*16))
+#define NVC0TCL_VIEWPORT_VERT__SIZE 0x00000010
+#define NVC0TCL_VIEWPORT_VERT_Y_SHIFT 0
+#define NVC0TCL_VIEWPORT_VERT_Y_MASK 0x0000ffff
+#define NVC0TCL_VIEWPORT_VERT_H_SHIFT 16
+#define NVC0TCL_VIEWPORT_VERT_H_MASK 0xffff0000
+#define NVC0TCL_DEPTH_RANGE_NEAR(x) (0x00000c08+((x)*16))
+#define NVC0TCL_DEPTH_RANGE_NEAR__SIZE 0x00000010
+#define NVC0TCL_DEPTH_RANGE_FAR(x) (0x00000c0c+((x)*16))
+#define NVC0TCL_DEPTH_RANGE_FAR__SIZE 0x00000010
+#define NVC0TCL_VIEWPORT_CLIP_HORIZ(x) (0x00000d00+((x)*8))
+#define NVC0TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008
+#define NVC0TCL_VIEWPORT_CLIP_HORIZ_MIN_SHIFT 0
+#define NVC0TCL_VIEWPORT_CLIP_HORIZ_MIN_MASK 0x0000ffff
+#define NVC0TCL_VIEWPORT_CLIP_HORIZ_MAX_SHIFT 16
+#define NVC0TCL_VIEWPORT_CLIP_HORIZ_MAX_MASK 0xffff0000
+#define NVC0TCL_VIEWPORT_CLIP_VERT(x) (0x00000d04+((x)*8))
+#define NVC0TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008
+#define NVC0TCL_VIEWPORT_CLIP_VERT_MIN_SHIFT 0
+#define NVC0TCL_VIEWPORT_CLIP_VERT_MIN_MASK 0x0000ffff
+#define NVC0TCL_VIEWPORT_CLIP_VERT_MAX_SHIFT 16
+#define NVC0TCL_VIEWPORT_CLIP_VERT_MAX_MASK 0xffff0000
+#define NVC0TCL_CLIPID_REGION_HORIZ(x) (0x00000d40+((x)*8))
+#define NVC0TCL_CLIPID_REGION_HORIZ__SIZE 0x00000004
+#define NVC0TCL_CLIPID_REGION_VERT(x) (0x00000d44+((x)*8))
+#define NVC0TCL_CLIPID_REGION_VERT__SIZE 0x00000004
+#define NVC0TCL_VERTEX_BUFFER_FIRST 0x00000d74
+#define NVC0TCL_VERTEX_BUFFER_COUNT 0x00000d78
+#define NVC0TCL_CLEAR_COLOR(x) (0x00000d80+((x)*4))
+#define NVC0TCL_CLEAR_COLOR__SIZE 0x00000004
+#define NVC0TCL_CLEAR_DEPTH 0x00000d90
+#define NVC0TCL_STACK_ADDRESS_HIGH 0x00000d94
+#define NVC0TCL_STACK_ADDRESS_LOW 0x00000d98
+#define NVC0TCL_STACK_SIZE_LOG 0x00000d9c
+#define NVC0TCL_CLEAR_STENCIL 0x00000da0
+#define NVC0TCL_POLYGON_SMOOTH_ENABLE 0x00000db4
+#define NVC0TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0
+#define NVC0TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4
+#define NVC0TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8
+#define NVC0TCL_PATCH_VERTICES 0x00000dcc
+#define NVC0TCL_WATCHDOG_TIMER 0x00000de4
+#define NVC0TCL_WINDOW_OFFSET_X 0x00000df8
+#define NVC0TCL_WINDOW_OFFSET_Y 0x00000dfc
+#define NVC0TCL_SCISSOR_ENABLE(x) (0x00000e00+((x)*16))
+#define NVC0TCL_SCISSOR_ENABLE__SIZE 0x00000010
+#define NVC0TCL_SCISSOR_HORIZ(x) (0x00000e04+((x)*16))
+#define NVC0TCL_SCISSOR_HORIZ__SIZE 0x00000010
+#define NVC0TCL_SCISSOR_HORIZ_MIN_SHIFT 0
+#define NVC0TCL_SCISSOR_HORIZ_MIN_MASK 0x0000ffff
+#define NVC0TCL_SCISSOR_HORIZ_MAX_SHIFT 16
+#define NVC0TCL_SCISSOR_HORIZ_MAX_MASK 0xffff0000
+#define NVC0TCL_SCISSOR_VERT(x) (0x00000e08+((x)*16))
+#define NVC0TCL_SCISSOR_VERT__SIZE 0x00000010
+#define NVC0TCL_SCISSOR_VERT_MIN_SHIFT 0
+#define NVC0TCL_SCISSOR_VERT_MIN_MASK 0x0000ffff
+#define NVC0TCL_SCISSOR_VERT_MAX_SHIFT 16
+#define NVC0TCL_SCISSOR_VERT_MAX_MASK 0xffff0000
+#define NVC0TCL_LOCAL_WARPS_LOG_ALLOC 0x00000f44
+#define NVC0TCL_LOCAL_WARPS_NO_CLAMP 0x00000f48
+#define NVC0TCL_STACK_WARPS_LOG_ALLOC 0x00000f4c
+#define NVC0TCL_STACK_WARPS_NO_CLAMP 0x00000f50
+#define NVC0TCL_STENCIL_BACK_FUNC_REF 0x00000f54
+#define NVC0TCL_STENCIL_BACK_MASK 0x00000f58
+#define NVC0TCL_STENCIL_BACK_FUNC_MASK 0x00000f5c
+#define NVC0TCL_VERTEX_RUNOUT_HIGH 0x00000f84
+#define NVC0TCL_VERTEX_RUNOUT_LOW 0x00000f88
+#define NVC0TCL_DEPTH_BOUNDS(x) (0x00000f9c+((x)*4))
+#define NVC0TCL_DEPTH_BOUNDS__SIZE 0x00000002
+#define NVC0TCL_MSAA_MASK(x) (0x00000fbc+((x)*4))
+#define NVC0TCL_MSAA_MASK__SIZE 0x00000004
+#define NVC0TCL_CLIPID_ADDRESS_HIGH 0x00000fcc
+#define NVC0TCL_CLIPID_ADDRESS_LOW 0x00000fd0
+#define NVC0TCL_ZETA_ADDRESS_HIGH 0x00000fe0
+#define NVC0TCL_ZETA_ADDRESS_LOW 0x00000fe4
+#define NVC0TCL_ZETA_FORMAT 0x00000fe8
+#define NVC0TCL_ZETA_FORMAT_Z32_FLOAT 0x0000000a
+#define NVC0TCL_ZETA_FORMAT_Z16_UNORM 0x00000013
+#define NVC0TCL_ZETA_FORMAT_Z24S8_UNORM 0x00000014
+#define NVC0TCL_ZETA_FORMAT_X8Z24_UNORM 0x00000015
+#define NVC0TCL_ZETA_FORMAT_S8Z24_UNORM 0x00000016
+#define NVC0TCL_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM 0x00000019
+#define NVC0TCL_ZETA_TILE_MODE 0x00000fec
+#define NVC0TCL_ZETA_LAYER_STRIDE 0x00000ff0
+#define NVC0TCL_SCREEN_SCISSOR_HORIZ 0x00000ff4
+#define NVC0TCL_SCREEN_SCISSOR_HORIZ_W_SHIFT 16
+#define NVC0TCL_SCREEN_SCISSOR_HORIZ_W_MASK 0xffff0000
+#define NVC0TCL_SCREEN_SCISSOR_HORIZ_X_SHIFT 0
+#define NVC0TCL_SCREEN_SCISSOR_HORIZ_X_MASK 0x0000ffff
+#define NVC0TCL_SCREEN_SCISSOR_VERT 0x00000ff8
+#define NVC0TCL_SCREEN_SCISSOR_VERT_H_SHIFT 16
+#define NVC0TCL_SCREEN_SCISSOR_VERT_H_MASK 0xffff0000
+#define NVC0TCL_SCREEN_SCISSOR_VERT_Y_SHIFT 0
+#define NVC0TCL_SCREEN_SCISSOR_VERT_Y_MASK 0x0000ffff
+#define NVC0TCL_VTX_ATTR_DEFINE 0x0000114c
+#define NVC0TCL_VTX_ATTR_DEFINE_ATTR_SHIFT 0
+#define NVC0TCL_VTX_ATTR_DEFINE_ATTR_MASK 0x0000003f
+#define NVC0TCL_VTX_ATTR_DEFINE_COMP_SHIFT 8
+#define NVC0TCL_VTX_ATTR_DEFINE_COMP_MASK 0x00000f00
+#define NVC0TCL_VTX_ATTR_DEFINE_SIZE_SHIFT 12
+#define NVC0TCL_VTX_ATTR_DEFINE_SIZE_MASK 0x0000f000
+#define NVC0TCL_VTX_ATTR_DEFINE_TYPE_SHIFT 16
+#define NVC0TCL_VTX_ATTR_DEFINE_TYPE_MASK 0x000f0000
+#define NVC0TCL_VTX_ATTR_DEFINE_TYPE_FLOAT 0x00070000
+#define NVC0TCL_VTX_ATTR_DEFINE_TYPE_SNORM 0x00010000
+#define NVC0TCL_VTX_ATTR_DEFINE_TYPE_UNORM 0x00020000
+#define NVC0TCL_VTX_ATTR_DEFINE_TYPE_USCALED 0x00050000
+#define NVC0TCL_VTX_ATTR_DEFINE_TYPE_SSCALED 0x00060000
+#define NVC0TCL_VTX_ATTR_DEFINE_TYPE_UINT 0x00040000
+#define NVC0TCL_VTX_ATTR_DEFINE_TYPE_SINT 0x00030000
+#define NVC0TCL_VTX_ATTR_DATA(x) (0x00001150+((x)*4))
+#define NVC0TCL_VTX_ATTR_DATA__SIZE 0x00000004
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT(x) (0x00001160+((x)*4))
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT__SIZE 0x00000020
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_BUFFER_SHIFT 0
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_BUFFER_MASK 0x0000003f
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_CONST (1 << 6)
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_OFFSET_SHIFT 7
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_OFFSET_MASK 0x001fff80
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_SHIFT 21
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_MASK 0x07e00000
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_32_32_32_32 0x00200000
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_32_32_32 0x00400000
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_16_16_16_16 0x00600000
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_32_32 0x00800000
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_16_16_16 0x00a00000
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_8_8_8_8 0x01400000
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_16_16 0x01e00000
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_32 0x02400000
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_8_8_8 0x02600000
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_8_8 0x03000000
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_16 0x03600000
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_8 0x03a00000
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_FORMAT_2_10_10_10 0x06000000
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_TYPE_SHIFT 27
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_TYPE_MASK 0x78000000
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT 0x38000000
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_TYPE_SNORM 0x08000000
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_TYPE_UNORM 0x10000000
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_TYPE_USCALED 0x28000000
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_TYPE_SSCALED 0x30000000
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_TYPE_UINT 0x20000000
+#define NVC0TCL_VERTEX_ATTRIB_FORMAT_TYPE_SINT 0x18000000
+#define NVC0TCL_RT_CONTROL 0x0000121c
+#define NVC0TCL_RT_CONTROL_COUNT_SHIFT 0
+#define NVC0TCL_RT_CONTROL_COUNT_MASK 0x0000000f
+#define NVC0TCL_RT_CONTROL_MAP0_SHIFT 4
+#define NVC0TCL_RT_CONTROL_MAP0_MASK 0x00000070
+#define NVC0TCL_RT_CONTROL_MAP1_SHIFT 7
+#define NVC0TCL_RT_CONTROL_MAP1_MASK 0x00000380
+#define NVC0TCL_RT_CONTROL_MAP2_SHIFT 10
+#define NVC0TCL_RT_CONTROL_MAP2_MASK 0x00001c00
+#define NVC0TCL_RT_CONTROL_MAP3_SHIFT 13
+#define NVC0TCL_RT_CONTROL_MAP3_MASK 0x0000e000
+#define NVC0TCL_RT_CONTROL_MAP4_SHIFT 16
+#define NVC0TCL_RT_CONTROL_MAP4_MASK 0x00070000
+#define NVC0TCL_RT_CONTROL_MAP5_SHIFT 19
+#define NVC0TCL_RT_CONTROL_MAP5_MASK 0x00380000
+#define NVC0TCL_RT_CONTROL_MAP6_SHIFT 22
+#define NVC0TCL_RT_CONTROL_MAP6_MASK 0x01c00000
+#define NVC0TCL_RT_CONTROL_MAP7_SHIFT 25
+#define NVC0TCL_RT_CONTROL_MAP7_MASK 0x0e000000
+#define NVC0TCL_ZETA_HORIZ 0x00001228
+#define NVC0TCL_ZETA_VERT 0x0000122c
+#define NVC0TCL_ZETA_ARRAY_MODE 0x00001230
+#define NVC0TCL_ZETA_ARRAY_MODE_LAYERS_SHIFT 0
+#define NVC0TCL_ZETA_ARRAY_MODE_LAYERS_MASK 0x0000ffff
+#define NVC0TCL_ZETA_ARRAY_MODE_UNK (1 << 16)
+#define NVC0TCL_LINKED_TSC 0x00001234
+#define NVC0TCL_FP_RESULT_COUNT 0x00001298
+#define NVC0TCL_DEPTH_TEST_ENABLE 0x000012cc
+#define NVC0TCL_SHADE_MODEL 0x000012d4
+#define NVC0TCL_SHADE_MODEL_FLAT 0x00001d00
+#define NVC0TCL_SHADE_MODEL_SMOOTH 0x00001d01
+#define NVC0TCL_BLEND_INDEPENDENT 0x000012e4
+#define NVC0TCL_DEPTH_WRITE_ENABLE 0x000012e8
+#define NVC0TCL_ALPHA_TEST_ENABLE 0x000012ec
+#define NVC0TCL_PM_SET(x) (0x000012f0+((x)*4))
+#define NVC0TCL_PM_SET__SIZE 0x00000004
+#define NVC0TCL_VB_ELEMENT_U8_SETUP 0x00001300
+#define NVC0TCL_VB_ELEMENT_U8_SETUP_OFFSET_SHIFT 30
+#define NVC0TCL_VB_ELEMENT_U8_SETUP_OFFSET_MASK 0xc0000000
+#define NVC0TCL_VB_ELEMENT_U8_SETUP_COUNT_SHIFT 0
+#define NVC0TCL_VB_ELEMENT_U8_SETUP_COUNT_MASK 0x3fffffff
+#define NVC0TCL_VB_ELEMENT_U8 0x00001304
+#define NVC0TCL_VB_ELEMENT_U8_I0_SHIFT 0
+#define NVC0TCL_VB_ELEMENT_U8_I0_MASK 0x000000ff
+#define NVC0TCL_VB_ELEMENT_U8_I1_SHIFT 8
+#define NVC0TCL_VB_ELEMENT_U8_I1_MASK 0x0000ff00
+#define NVC0TCL_VB_ELEMENT_U8_I2_SHIFT 16
+#define NVC0TCL_VB_ELEMENT_U8_I2_MASK 0x00ff0000
+#define NVC0TCL_VB_ELEMENT_U8_I3_SHIFT 24
+#define NVC0TCL_VB_ELEMENT_U8_I3_MASK 0xff000000
+#define NVC0TCL_DEPTH_TEST_FUNC 0x0000130c
+#define NVC0TCL_DEPTH_TEST_FUNC_NEVER 0x00000200
+#define NVC0TCL_DEPTH_TEST_FUNC_LESS 0x00000201
+#define NVC0TCL_DEPTH_TEST_FUNC_EQUAL 0x00000202
+#define NVC0TCL_DEPTH_TEST_FUNC_LEQUAL 0x00000203
+#define NVC0TCL_DEPTH_TEST_FUNC_GREATER 0x00000204
+#define NVC0TCL_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205
+#define NVC0TCL_DEPTH_TEST_FUNC_GEQUAL 0x00000206
+#define NVC0TCL_DEPTH_TEST_FUNC_ALWAYS 0x00000207
+#define NVC0TCL_ALPHA_TEST_REF 0x00001310
+#define NVC0TCL_ALPHA_TEST_FUNC 0x00001314
+#define NVC0TCL_ALPHA_TEST_FUNC_NEVER 0x00000200
+#define NVC0TCL_ALPHA_TEST_FUNC_LESS 0x00000201
+#define NVC0TCL_ALPHA_TEST_FUNC_EQUAL 0x00000202
+#define NVC0TCL_ALPHA_TEST_FUNC_LEQUAL 0x00000203
+#define NVC0TCL_ALPHA_TEST_FUNC_GREATER 0x00000204
+#define NVC0TCL_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205
+#define NVC0TCL_ALPHA_TEST_FUNC_GEQUAL 0x00000206
+#define NVC0TCL_ALPHA_TEST_FUNC_ALWAYS 0x00000207
+#define NVC0TCL_BLEND_COLOR(x) (0x0000131c+((x)*4))
+#define NVC0TCL_BLEND_COLOR__SIZE 0x00000004
+#define NVC0TCL_TIC_FLUSH 0x00001330
+#define NVC0TCL_TSC_FLUSH 0x00001334
+#define NVC0TCL_TEX_CACHE_CTL 0x00001338
+#define NVC0TCL_BLEND_EQUATION_RGB 0x00001340
+#define NVC0TCL_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006
+#define NVC0TCL_BLEND_EQUATION_RGB_MIN 0x00008007
+#define NVC0TCL_BLEND_EQUATION_RGB_MAX 0x00008008
+#define NVC0TCL_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
+#define NVC0TCL_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
+#define NVC0TCL_BLEND_FUNC_SRC_RGB 0x00001344
+#define NVC0TCL_BLEND_FUNC_SRC_RGB_ZERO 0x00004000
+#define NVC0TCL_BLEND_FUNC_SRC_RGB_ONE 0x00004001
+#define NVC0TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00004300
+#define NVC0TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00004301
+#define NVC0TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00004302
+#define NVC0TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00004303
+#define NVC0TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00004304
+#define NVC0TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00004305
+#define NVC0TCL_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00004306
+#define NVC0TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00004307
+#define NVC0TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00004308
+#define NVC0TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x0000c001
+#define NVC0TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x0000c002
+#define NVC0TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x0000c003
+#define NVC0TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
+#define NVC0TCL_BLEND_FUNC_SRC_RGB_SRC1_COLOR 0x0000c900
+#define NVC0TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_COLOR 0x0000c901
+#define NVC0TCL_BLEND_FUNC_SRC_RGB_SRC1_ALPHA 0x0000c902
+#define NVC0TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_ALPHA 0x0000c903
+#define NVC0TCL_BLEND_FUNC_DST_RGB 0x00001348
+#define NVC0TCL_BLEND_FUNC_DST_RGB_ZERO 0x00004000
+#define NVC0TCL_BLEND_FUNC_DST_RGB_ONE 0x00004001
+#define NVC0TCL_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00004300
+#define NVC0TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00004301
+#define NVC0TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00004302
+#define NVC0TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00004303
+#define NVC0TCL_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00004304
+#define NVC0TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00004305
+#define NVC0TCL_BLEND_FUNC_DST_RGB_DST_COLOR 0x00004306
+#define NVC0TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00004307
+#define NVC0TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00004308
+#define NVC0TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x0000c001
+#define NVC0TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x0000c002
+#define NVC0TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x0000c003
+#define NVC0TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
+#define NVC0TCL_BLEND_FUNC_DST_RGB_SRC1_COLOR 0x0000c900
+#define NVC0TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC1_COLOR 0x0000c901
+#define NVC0TCL_BLEND_FUNC_DST_RGB_SRC1_ALPHA 0x0000c902
+#define NVC0TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC1_ALPHA 0x0000c903
+#define NVC0TCL_BLEND_EQUATION_ALPHA 0x0000134c
+#define NVC0TCL_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
+#define NVC0TCL_BLEND_EQUATION_ALPHA_MIN 0x00008007
+#define NVC0TCL_BLEND_EQUATION_ALPHA_MAX 0x00008008
+#define NVC0TCL_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
+#define NVC0TCL_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
+#define NVC0TCL_BLEND_FUNC_SRC_ALPHA 0x00001350
+#define NVC0TCL_BLEND_FUNC_SRC_ALPHA_ZERO 0x00004000
+#define NVC0TCL_BLEND_FUNC_SRC_ALPHA_ONE 0x00004001
+#define NVC0TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x00004300
+#define NVC0TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x00004301
+#define NVC0TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x00004302
+#define NVC0TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x00004303
+#define NVC0TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x00004304
+#define NVC0TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x00004305
+#define NVC0TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x00004306
+#define NVC0TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x00004307
+#define NVC0TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x00004308
+#define NVC0TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x0000c001
+#define NVC0TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x0000c002
+#define NVC0TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x0000c003
+#define NVC0TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
+#define NVC0TCL_BLEND_FUNC_SRC_ALPHA_SRC1_COLOR 0x0000c900
+#define NVC0TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC1_COLOR 0x0000c901
+#define NVC0TCL_BLEND_FUNC_SRC_ALPHA_SRC1_ALPHA 0x0000c902
+#define NVC0TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC1_ALPHA 0x0000c903
+#define NVC0TCL_BLEND_FUNC_DST_ALPHA 0x00001358
+#define NVC0TCL_BLEND_FUNC_DST_ALPHA_ZERO 0x00004000
+#define NVC0TCL_BLEND_FUNC_DST_ALPHA_ONE 0x00004001
+#define NVC0TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x00004300
+#define NVC0TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x00004301
+#define NVC0TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x00004302
+#define NVC0TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x00004303
+#define NVC0TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x00004304
+#define NVC0TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x00004305
+#define NVC0TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x00004306
+#define NVC0TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x00004307
+#define NVC0TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x00004308
+#define NVC0TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x0000c001
+#define NVC0TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x0000c002
+#define NVC0TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x0000c003
+#define NVC0TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
+#define NVC0TCL_BLEND_FUNC_DST_ALPHA_SRC1_COLOR 0x0000c900
+#define NVC0TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC1_COLOR 0x0000c901
+#define NVC0TCL_BLEND_FUNC_DST_ALPHA_SRC1_ALPHA 0x0000c902
+#define NVC0TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC1_ALPHA 0x0000c903
+#define NVC0TCL_STENCIL_ENABLE 0x00001380
+#define NVC0TCL_STENCIL_FRONT_OP_FAIL 0x00001384
+#define NVC0TCL_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000
+#define NVC0TCL_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a
+#define NVC0TCL_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00
+#define NVC0TCL_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01
+#define NVC0TCL_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02
+#define NVC0TCL_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03
+#define NVC0TCL_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507
+#define NVC0TCL_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508
+#define NVC0TCL_STENCIL_FRONT_OP_ZFAIL 0x00001388
+#define NVC0TCL_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000
+#define NVC0TCL_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a
+#define NVC0TCL_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00
+#define NVC0TCL_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01
+#define NVC0TCL_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02
+#define NVC0TCL_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03
+#define NVC0TCL_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NVC0TCL_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508
+#define NVC0TCL_STENCIL_FRONT_OP_ZPASS 0x0000138c
+#define NVC0TCL_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000
+#define NVC0TCL_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a
+#define NVC0TCL_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00
+#define NVC0TCL_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01
+#define NVC0TCL_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02
+#define NVC0TCL_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03
+#define NVC0TCL_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507
+#define NVC0TCL_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508
+#define NVC0TCL_STENCIL_FRONT_FUNC_FUNC 0x00001390
+#define NVC0TCL_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200
+#define NVC0TCL_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201
+#define NVC0TCL_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202
+#define NVC0TCL_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203
+#define NVC0TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204
+#define NVC0TCL_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NVC0TCL_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206
+#define NVC0TCL_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207
+#define NVC0TCL_STENCIL_FRONT_FUNC_REF 0x00001394
+#define NVC0TCL_STENCIL_FRONT_MASK 0x00001398
+#define NVC0TCL_STENCIL_FRONT_FUNC_MASK 0x0000139c
+#define NVC0TCL_FRAG_COLOR_CLAMP_EN 0x000013a8
+#define NVC0TCL_Y_ORIGIN_BOTTOM 0x000013ac
+#define NVC0TCL_LINE_WIDTH(x) (0x000013b0+((x)*4))
+#define NVC0TCL_LINE_WIDTH__SIZE 0x00000002
+#define NVC0TCL_POINT_COORD_REPLACE_MAP(x) (0x000013c0+((x)*4))
+#define NVC0TCL_POINT_COORD_REPLACE_MAP__SIZE 0x00000008
+#define NVC0TCL_GP_VERTEX_OUTPUT_COUNT 0x00001420
+#define NVC0TCL_FENCE 0x0000142c
+#define NVC0TCL_VB_ELEMENT_BASE 0x00001434
+#define NVC0TCL_INSTANCE_BASE 0x00001438
+#define NVC0TCL_CODE_CB_FLUSH 0x00001440
+#define NVC0TCL_CLIPID_HEIGHT 0x00001504
+#define NVC0TCL_VP_CLIP_DISTANCE_ENABLE 0x00001510
+#define NVC0TCL_VP_CLIP_DISTANCE_ENABLE_0 (1 << 0)
+#define NVC0TCL_VP_CLIP_DISTANCE_ENABLE_1 (1 << 1)
+#define NVC0TCL_VP_CLIP_DISTANCE_ENABLE_2 (1 << 2)
+#define NVC0TCL_VP_CLIP_DISTANCE_ENABLE_3 (1 << 3)
+#define NVC0TCL_VP_CLIP_DISTANCE_ENABLE_4 (1 << 4)
+#define NVC0TCL_VP_CLIP_DISTANCE_ENABLE_5 (1 << 5)
+#define NVC0TCL_VP_CLIP_DISTANCE_ENABLE_6 (1 << 6)
+#define NVC0TCL_VP_CLIP_DISTANCE_ENABLE_7 (1 << 7)
+#define NVC0TCL_SAMPLECNT_ENABLE 0x00001514
+#define NVC0TCL_POINT_SIZE 0x00001518
+#define NVC0TCL_POINT_SPRITE_ENABLE 0x00001520
+#define NVC0TCL_SAMPLECNT_RESET 0x00001530
+#define NVC0TCL_MULTISAMPLE_ZETA_ENABLE 0x00001534
+#define NVC0TCL_ZETA_ENABLE 0x00001538
+#define NVC0TCL_MULTISAMPLE_CTRL 0x0000153c
+#define NVC0TCL_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE (1 << 0)
+#define NVC0TCL_MULTISAMPLE_CTRL_ALPHA_TO_ONE (1 << 4)
+#define NVC0TCL_NOPERSPECTIVE_BITMAP(x) (0x00001540+((x)*4))
+#define NVC0TCL_NOPERSPECTIVE_BITMAP__SIZE 0x00000004
+#define NVC0TCL_COND_ADDRESS_HIGH 0x00001550
+#define NVC0TCL_COND_ADDRESS_LOW 0x00001554
+#define NVC0TCL_COND_MODE 0x00001558
+#define NVC0TCL_COND_MODE_NEVER 0x00000000
+#define NVC0TCL_COND_MODE_ALWAYS 0x00000001
+#define NVC0TCL_COND_MODE_RES 0x00000002
+#define NVC0TCL_COND_MODE_NOT_RES_AND_NOT_ID 0x00000003
+#define NVC0TCL_COND_MODE_RES_OR_ID 0x00000004
+#define NVC0TCL_TSC_ADDRESS_HIGH 0x0000155c
+#define NVC0TCL_TSC_ADDRESS_LOW 0x00001560
+#define NVC0TCL_TSC_LIMIT 0x00001564
+#define NVC0TCL_POLYGON_OFFSET_FACTOR 0x0000156c
+#define NVC0TCL_LINE_SMOOTH_ENABLE 0x00001570
+#define NVC0TCL_TIC_ADDRESS_HIGH 0x00001574
+#define NVC0TCL_TIC_ADDRESS_LOW 0x00001578
+#define NVC0TCL_TIC_LIMIT 0x0000157c
+#define NVC0TCL_PM_CONTROL(x) (0x00001580+((x)*4))
+#define NVC0TCL_PM_CONTROL__SIZE 0x00000004
+#define NVC0TCL_PM_CONTROL_UNK0 (1 << 0)
+#define NVC0TCL_PM_CONTROL_UNK1_SHIFT 4
+#define NVC0TCL_PM_CONTROL_UNK1_MASK 0x00000070
+#define NVC0TCL_PM_CONTROL_UNK2_SHIFT 8
+#define NVC0TCL_PM_CONTROL_UNK2_MASK 0xffffff00
+#define NVC0TCL_STENCIL_TWO_SIDE_ENABLE 0x00001594
+#define NVC0TCL_STENCIL_BACK_OP_FAIL 0x00001598
+#define NVC0TCL_STENCIL_BACK_OP_FAIL_ZERO 0x00000000
+#define NVC0TCL_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a
+#define NVC0TCL_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00
+#define NVC0TCL_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01
+#define NVC0TCL_STENCIL_BACK_OP_FAIL_INCR 0x00001e02
+#define NVC0TCL_STENCIL_BACK_OP_FAIL_DECR 0x00001e03
+#define NVC0TCL_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507
+#define NVC0TCL_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508
+#define NVC0TCL_STENCIL_BACK_OP_ZFAIL 0x0000159c
+#define NVC0TCL_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000
+#define NVC0TCL_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a
+#define NVC0TCL_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00
+#define NVC0TCL_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01
+#define NVC0TCL_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02
+#define NVC0TCL_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03
+#define NVC0TCL_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NVC0TCL_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508
+#define NVC0TCL_STENCIL_BACK_OP_ZPASS 0x000015a0
+#define NVC0TCL_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000
+#define NVC0TCL_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a
+#define NVC0TCL_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00
+#define NVC0TCL_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01
+#define NVC0TCL_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02
+#define NVC0TCL_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03
+#define NVC0TCL_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507
+#define NVC0TCL_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508
+#define NVC0TCL_STENCIL_BACK_FUNC_FUNC 0x000015a4
+#define NVC0TCL_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200
+#define NVC0TCL_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201
+#define NVC0TCL_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202
+#define NVC0TCL_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203
+#define NVC0TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204
+#define NVC0TCL_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NVC0TCL_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206
+#define NVC0TCL_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207
+#define NVC0TCL_MULTISAMPLE_COLOR_ENABLE 0x000015b4
+#define NVC0TCL_FRAMEBUFFER_SRGB 0x000015b8
+#define NVC0TCL_POLYGON_OFFSET_UNITS 0x000015bc
+#define NVC0TCL_GP_BUILTIN_RESULT_EN 0x000015cc
+#define NVC0TCL_GP_BUILTIN_RESULT_EN_VPORT (1 << 0)
+#define NVC0TCL_GP_BUILTIN_RESULT_EN_LAYER (1 << 16)
+#define NVC0TCL_MULTISAMPLE_MODE 0x000015d0
+#define NVC0TCL_MULTISAMPLE_MODE_1X 0x00000000
+#define NVC0TCL_MULTISAMPLE_MODE_2XMS 0x00000001
+#define NVC0TCL_MULTISAMPLE_MODE_4XMS 0x00000002
+#define NVC0TCL_MULTISAMPLE_MODE_8XMS 0x00000004
+#define NVC0TCL_MULTISAMPLE_MODE_4XMS_4XCS 0x00000008
+#define NVC0TCL_MULTISAMPLE_MODE_4XMS_12XCS 0x00000009
+#define NVC0TCL_MULTISAMPLE_MODE_8XMS_8XCS 0x0000000a
+#define NVC0TCL_EDGEFLAG_ENABLE 0x000015e4
+#define NVC0TCL_VB_ELEMENT_U32 0x000015e8
+#define NVC0TCL_VB_ELEMENT_U16_SETUP 0x000015ec
+#define NVC0TCL_VB_ELEMENT_U16_SETUP_OFFSET_SHIFT 30
+#define NVC0TCL_VB_ELEMENT_U16_SETUP_OFFSET_MASK 0xc0000000
+#define NVC0TCL_VB_ELEMENT_U16_SETUP_COUNT_SHIFT 0
+#define NVC0TCL_VB_ELEMENT_U16_SETUP_COUNT_MASK 0x3fffffff
+#define NVC0TCL_VB_ELEMENT_U16 0x000015f0
+#define NVC0TCL_VB_ELEMENT_U16_I0_SHIFT 0
+#define NVC0TCL_VB_ELEMENT_U16_I0_MASK 0x0000ffff
+#define NVC0TCL_VB_ELEMENT_U16_I1_SHIFT 16
+#define NVC0TCL_VB_ELEMENT_U16_I1_MASK 0xffff0000
+#define NVC0TCL_VERTEX_BASE_HIGH 0x000015f4
+#define NVC0TCL_VERTEX_BASE_LOW 0x000015f8
+#define NVC0TCL_CODE_ADDRESS_HIGH 0x00001608
+#define NVC0TCL_CODE_ADDRESS_LOW 0x0000160c
+#define NVC0TCL_VERTEX_BEGIN 0x00001618
+#define NVC0TCL_VERTEX_BEGIN_MODE_SHIFT 0
+#define NVC0TCL_VERTEX_BEGIN_MODE_MASK 0x0000000f
+#define NVC0TCL_VERTEX_BEGIN_MODE_POINTS 0x00000000
+#define NVC0TCL_VERTEX_BEGIN_MODE_LINES 0x00000001
+#define NVC0TCL_VERTEX_BEGIN_MODE_LINE_LOOP 0x00000002
+#define NVC0TCL_VERTEX_BEGIN_MODE_LINE_STRIP 0x00000003
+#define NVC0TCL_VERTEX_BEGIN_MODE_TRIANGLES 0x00000004
+#define NVC0TCL_VERTEX_BEGIN_MODE_TRIANGLE_STRIP 0x00000005
+#define NVC0TCL_VERTEX_BEGIN_MODE_TRIANGLE_FAN 0x00000006
+#define NVC0TCL_VERTEX_BEGIN_MODE_QUADS 0x00000007
+#define NVC0TCL_VERTEX_BEGIN_MODE_QUAD_STRIP 0x00000008
+#define NVC0TCL_VERTEX_BEGIN_MODE_POLYGON 0x00000009
+#define NVC0TCL_VERTEX_BEGIN_MODE_LINES_ADJACENCY 0x0000000a
+#define NVC0TCL_VERTEX_BEGIN_MODE_LINE_STRIP_ADJACENCY 0x0000000b
+#define NVC0TCL_VERTEX_BEGIN_MODE_TRIANGLES_ADJACENCY 0x0000000c
+#define NVC0TCL_VERTEX_BEGIN_MODE_TRIANGLE_STRIP_ADJACENCY 0x0000000d
+#define NVC0TCL_VERTEX_BEGIN_MODE_PATCHES 0x0000000e
+#define NVC0TCL_VERTEX_BEGIN_INSTANCE (1 << 26)
+#define NVC0TCL_VERTEX_END 0x00001614
+#define NVC0TCL_VERTEX_DATA 0x00001640
+#define NVC0TCL_PRIM_RESTART_ENABLE 0x00001644
+#define NVC0TCL_PRIM_RESTART_INDEX 0x00001648
+#define NVC0TCL_POINT_SMOOTH_ENABLE 0x00001658
+#define NVC0TCL_POINT_SPRITE_CTRL 0x00001660
+#define NVC0TCL_LINE_STIPPLE_ENABLE 0x0000166c
+#define NVC0TCL_LINE_STIPPLE_PATTERN 0x00001680
+#define NVC0TCL_PROVOKING_VERTEX_LAST 0x00001684
+#define NVC0TCL_VERTEX_TWO_SIDE_ENABLE 0x00001688
+#define NVC0TCL_POLYGON_STIPPLE_ENABLE 0x0000168c
+#define NVC0TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001700+((x)*4))
+#define NVC0TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020
+#define NVC0TCL_UNK17BC_ADDRESS_HIGH 0x000017bc
+#define NVC0TCL_UNK17BC_ADDRESS_LOW 0x000017c0
+#define NVC0TCL_UNK17BC_LIMIT 0x000017c4
+#define NVC0TCL_VP_POINT_SIZE_EN 0x00001910
+#define NVC0TCL_CULL_FACE_ENABLE 0x00001918
+#define NVC0TCL_FRONT_FACE 0x0000191c
+#define NVC0TCL_FRONT_FACE_CW 0x00000900
+#define NVC0TCL_FRONT_FACE_CCW 0x00000901
+#define NVC0TCL_CULL_FACE 0x00001920
+#define NVC0TCL_CULL_FACE_FRONT 0x00000404
+#define NVC0TCL_CULL_FACE_BACK 0x00000405
+#define NVC0TCL_CULL_FACE_FRONT_AND_BACK 0x00000408
+#define NVC0TCL_VIEWPORT_TRANSFORM_EN 0x0000192c
+#define NVC0TCL_VIEW_VOLUME_CLIP_CTRL 0x0000193c
+#define NVC0TCL_VIEWPORT_CLIP_RECTS_EN 0x0000194c
+#define NVC0TCL_VIEWPORT_CLIP_MODE 0x00001950
+#define NVC0TCL_VIEWPORT_CLIP_MODE_INCLUDE 0x00000000
+#define NVC0TCL_VIEWPORT_CLIP_MODE_EXCLUDE 0x00000001
+#define NVC0TCL_VIEWPORT_CLIP_MODE_UNKNOWN 0x00000002
+#define NVC0TCL_FP_ZORDER_CTRL 0x0000196c
+#define NVC0TCL_CLIPID_ENABLE 0x0000197c
+#define NVC0TCL_CLIPID_WIDTH 0x00001980
+#define NVC0TCL_CLIPID_ID 0x00001984
+#define NVC0TCL_REG_MODE 0x000019a0
+#define NVC0TCL_REG_MODE_PACKED 0x00000001
+#define NVC0TCL_REG_MODE_STRIPED 0x00000002
+#define NVC0TCL_FP_CONTROL 0x000019a8
+#define NVC0TCL_FP_CONTROL_MULTIPLE_RESULTS (1 << 0)
+#define NVC0TCL_FP_CONTROL_EXPORTS_Z (1 << 8)
+#define NVC0TCL_FP_CONTROL_USES_KIL (1 << 20)
+#define NVC0TCL_DEPTH_BOUNDS_EN 0x000019bc
+#define NVC0TCL_LOGIC_OP_ENABLE 0x000019c4
+#define NVC0TCL_LOGIC_OP 0x000019c8
+#define NVC0TCL_LOGIC_OP_CLEAR 0x00001500
+#define NVC0TCL_LOGIC_OP_AND 0x00001501
+#define NVC0TCL_LOGIC_OP_AND_REVERSE 0x00001502
+#define NVC0TCL_LOGIC_OP_COPY 0x00001503
+#define NVC0TCL_LOGIC_OP_AND_INVERTED 0x00001504
+#define NVC0TCL_LOGIC_OP_NOOP 0x00001505
+#define NVC0TCL_LOGIC_OP_XOR 0x00001506
+#define NVC0TCL_LOGIC_OP_OR 0x00001507
+#define NVC0TCL_LOGIC_OP_NOR 0x00001508
+#define NVC0TCL_LOGIC_OP_EQUIV 0x00001509
+#define NVC0TCL_LOGIC_OP_INVERT 0x0000150a
+#define NVC0TCL_LOGIC_OP_OR_REVERSE 0x0000150b
+#define NVC0TCL_LOGIC_OP_COPY_INVERTED 0x0000150c
+#define NVC0TCL_LOGIC_OP_OR_INVERTED 0x0000150d
+#define NVC0TCL_LOGIC_OP_NAND 0x0000150e
+#define NVC0TCL_LOGIC_OP_SET 0x0000150f
+#define NVC0TCL_CLEAR_BUFFERS 0x000019d0
+#define NVC0TCL_CLEAR_BUFFERS_Z (1 << 0)
+#define NVC0TCL_CLEAR_BUFFERS_S (1 << 1)
+#define NVC0TCL_CLEAR_BUFFERS_R (1 << 2)
+#define NVC0TCL_CLEAR_BUFFERS_G (1 << 3)
+#define NVC0TCL_CLEAR_BUFFERS_B (1 << 4)
+#define NVC0TCL_CLEAR_BUFFERS_A (1 << 5)
+#define NVC0TCL_CLEAR_BUFFERS_RT_SHIFT 6
+#define NVC0TCL_CLEAR_BUFFERS_RT_MASK 0x000003c0
+#define NVC0TCL_CLEAR_BUFFERS_LAYER_SHIFT 10
+#define NVC0TCL_CLEAR_BUFFERS_LAYER_MASK 0x0007fc00
+#define NVC0TCL_COLOR_MASK(x) (0x00001a00+((x)*4))
+#define NVC0TCL_COLOR_MASK__SIZE 0x00000008
+#define NVC0TCL_COLOR_MASK_R_SHIFT 0
+#define NVC0TCL_COLOR_MASK_R_MASK 0x0000000f
+#define NVC0TCL_COLOR_MASK_G_SHIFT 4
+#define NVC0TCL_COLOR_MASK_G_MASK 0x000000f0
+#define NVC0TCL_COLOR_MASK_B_SHIFT 8
+#define NVC0TCL_COLOR_MASK_B_MASK 0x00000f00
+#define NVC0TCL_COLOR_MASK_A_SHIFT 12
+#define NVC0TCL_COLOR_MASK_A_MASK 0x0000f000
+#define NVC0TCL_QUERY_ADDRESS_HIGH 0x00001b00
+#define NVC0TCL_QUERY_ADDRESS_LOW 0x00001b04
+#define NVC0TCL_QUERY_SEQUENCE 0x00001b08
+#define NVC0TCL_QUERY_GET 0x00001b0c
+#define NVC0TCL_VERTEX_ARRAY_FETCH(x) (0x00001c00+((x)*16))
+#define NVC0TCL_VERTEX_ARRAY_FETCH__SIZE 0x00000020
+#define NVC0TCL_VERTEX_ARRAY_FETCH_STRIDE_SHIFT 0
+#define NVC0TCL_VERTEX_ARRAY_FETCH_STRIDE_MASK 0x00000fff
+#define NVC0TCL_VERTEX_ARRAY_FETCH_ENABLE (1 << 12)
+#define NVC0TCL_BLEND_EQUATIONI_RGB(x) (0x00001e04+((x)*32))
+#define NVC0TCL_BLEND_EQUATIONI_RGB__SIZE 0x00000008
+#define NVC0TCL_BLEND_EQUATIONI_RGB_FUNC_ADD 0x00008006
+#define NVC0TCL_BLEND_EQUATIONI_RGB_MIN 0x00008007
+#define NVC0TCL_BLEND_EQUATIONI_RGB_MAX 0x00008008
+#define NVC0TCL_BLEND_EQUATIONI_RGB_FUNC_SUBTRACT 0x0000800a
+#define NVC0TCL_BLEND_EQUATIONI_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
+#define NVC0TCL_BLEND_FUNCI_SRC_RGB(x) (0x00001e08+((x)*32))
+#define NVC0TCL_BLEND_FUNCI_SRC_RGB__SIZE 0x00000008
+#define NVC0TCL_BLEND_FUNCI_SRC_RGB_ZERO 0x00004000
+#define NVC0TCL_BLEND_FUNCI_SRC_RGB_ONE 0x00004001
+#define NVC0TCL_BLEND_FUNCI_SRC_RGB_SRC_COLOR 0x00004300
+#define NVC0TCL_BLEND_FUNCI_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00004301
+#define NVC0TCL_BLEND_FUNCI_SRC_RGB_SRC_ALPHA 0x00004302
+#define NVC0TCL_BLEND_FUNCI_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00004303
+#define NVC0TCL_BLEND_FUNCI_SRC_RGB_DST_ALPHA 0x00004304
+#define NVC0TCL_BLEND_FUNCI_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00004305
+#define NVC0TCL_BLEND_FUNCI_SRC_RGB_DST_COLOR 0x00004306
+#define NVC0TCL_BLEND_FUNCI_SRC_RGB_ONE_MINUS_DST_COLOR 0x00004307
+#define NVC0TCL_BLEND_FUNCI_SRC_RGB_SRC_ALPHA_SATURATE 0x00004308
+#define NVC0TCL_BLEND_FUNCI_SRC_RGB_CONSTANT_COLOR 0x0000c001
+#define NVC0TCL_BLEND_FUNCI_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x0000c002
+#define NVC0TCL_BLEND_FUNCI_SRC_RGB_CONSTANT_ALPHA 0x0000c003
+#define NVC0TCL_BLEND_FUNCI_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
+#define NVC0TCL_BLEND_FUNCI_SRC_RGB_SRC1_COLOR 0x0000c900
+#define NVC0TCL_BLEND_FUNCI_SRC_RGB_ONE_MINUS_SRC1_COLOR 0x0000c901
+#define NVC0TCL_BLEND_FUNCI_SRC_RGB_SRC1_ALPHA 0x0000c902
+#define NVC0TCL_BLEND_FUNCI_SRC_RGB_ONE_MINUS_SRC1_ALPHA 0x0000c903
+#define NVC0TCL_BLEND_FUNCI_DST_RGB(x) (0x00001e0c+((x)*32))
+#define NVC0TCL_BLEND_FUNCI_DST_RGB__SIZE 0x00000008
+#define NVC0TCL_BLEND_FUNCI_DST_RGB_ZERO 0x00004000
+#define NVC0TCL_BLEND_FUNCI_DST_RGB_ONE 0x00004001
+#define NVC0TCL_BLEND_FUNCI_DST_RGB_SRC_COLOR 0x00004300
+#define NVC0TCL_BLEND_FUNCI_DST_RGB_ONE_MINUS_SRC_COLOR 0x00004301
+#define NVC0TCL_BLEND_FUNCI_DST_RGB_SRC_ALPHA 0x00004302
+#define NVC0TCL_BLEND_FUNCI_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00004303
+#define NVC0TCL_BLEND_FUNCI_DST_RGB_DST_ALPHA 0x00004304
+#define NVC0TCL_BLEND_FUNCI_DST_RGB_ONE_MINUS_DST_ALPHA 0x00004305
+#define NVC0TCL_BLEND_FUNCI_DST_RGB_DST_COLOR 0x00004306
+#define NVC0TCL_BLEND_FUNCI_DST_RGB_ONE_MINUS_DST_COLOR 0x00004307
+#define NVC0TCL_BLEND_FUNCI_DST_RGB_SRC_ALPHA_SATURATE 0x00004308
+#define NVC0TCL_BLEND_FUNCI_DST_RGB_CONSTANT_COLOR 0x0000c001
+#define NVC0TCL_BLEND_FUNCI_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x0000c002
+#define NVC0TCL_BLEND_FUNCI_DST_RGB_CONSTANT_ALPHA 0x0000c003
+#define NVC0TCL_BLEND_FUNCI_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
+#define NVC0TCL_BLEND_FUNCI_DST_RGB_SRC1_COLOR 0x0000c900
+#define NVC0TCL_BLEND_FUNCI_DST_RGB_ONE_MINUS_SRC1_COLOR 0x0000c901
+#define NVC0TCL_BLEND_FUNCI_DST_RGB_SRC1_ALPHA 0x0000c902
+#define NVC0TCL_BLEND_FUNCI_DST_RGB_ONE_MINUS_SRC1_ALPHA 0x0000c903
+#define NVC0TCL_BLEND_EQUATIONI_ALPHA(x) (0x00001e10+((x)*32))
+#define NVC0TCL_BLEND_EQUATIONI_ALPHA__SIZE 0x00000008
+#define NVC0TCL_BLEND_EQUATIONI_ALPHA_FUNC_ADD 0x00008006
+#define NVC0TCL_BLEND_EQUATIONI_ALPHA_MIN 0x00008007
+#define NVC0TCL_BLEND_EQUATIONI_ALPHA_MAX 0x00008008
+#define NVC0TCL_BLEND_EQUATIONI_ALPHA_FUNC_SUBTRACT 0x0000800a
+#define NVC0TCL_BLEND_EQUATIONI_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
+#define NVC0TCL_BLEND_FUNCI_SRC_ALPHA(x) (0x00001e14+((x)*32))
+#define NVC0TCL_BLEND_FUNCI_SRC_ALPHA__SIZE 0x00000008
+#define NVC0TCL_BLEND_FUNCI_SRC_ALPHA_ZERO 0x00004000
+#define NVC0TCL_BLEND_FUNCI_SRC_ALPHA_ONE 0x00004001
+#define NVC0TCL_BLEND_FUNCI_SRC_ALPHA_SRC_COLOR 0x00004300
+#define NVC0TCL_BLEND_FUNCI_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x00004301
+#define NVC0TCL_BLEND_FUNCI_SRC_ALPHA_SRC_ALPHA 0x00004302
+#define NVC0TCL_BLEND_FUNCI_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x00004303
+#define NVC0TCL_BLEND_FUNCI_SRC_ALPHA_DST_ALPHA 0x00004304
+#define NVC0TCL_BLEND_FUNCI_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x00004305
+#define NVC0TCL_BLEND_FUNCI_SRC_ALPHA_DST_COLOR 0x00004306
+#define NVC0TCL_BLEND_FUNCI_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x00004307
+#define NVC0TCL_BLEND_FUNCI_SRC_ALPHA_SRC_ALPHA_SATURATE 0x00004308
+#define NVC0TCL_BLEND_FUNCI_SRC_ALPHA_CONSTANT_COLOR 0x0000c001
+#define NVC0TCL_BLEND_FUNCI_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x0000c002
+#define NVC0TCL_BLEND_FUNCI_SRC_ALPHA_CONSTANT_ALPHA 0x0000c003
+#define NVC0TCL_BLEND_FUNCI_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
+#define NVC0TCL_BLEND_FUNCI_SRC_ALPHA_SRC1_COLOR 0x0000c900
+#define NVC0TCL_BLEND_FUNCI_SRC_ALPHA_ONE_MINUS_SRC1_COLOR 0x0000c901
+#define NVC0TCL_BLEND_FUNCI_SRC_ALPHA_SRC1_ALPHA 0x0000c902
+#define NVC0TCL_BLEND_FUNCI_SRC_ALPHA_ONE_MINUS_SRC1_ALPHA 0x0000c903
+#define NVC0TCL_BLEND_FUNCI_DST_ALPHA(x) (0x00001e18+((x)*32))
+#define NVC0TCL_BLEND_FUNCI_DST_ALPHA__SIZE 0x00000008
+#define NVC0TCL_BLEND_FUNCI_DST_ALPHA_ZERO 0x00004000
+#define NVC0TCL_BLEND_FUNCI_DST_ALPHA_ONE 0x00004001
+#define NVC0TCL_BLEND_FUNCI_DST_ALPHA_SRC_COLOR 0x00004300
+#define NVC0TCL_BLEND_FUNCI_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x00004301
+#define NVC0TCL_BLEND_FUNCI_DST_ALPHA_SRC_ALPHA 0x00004302
+#define NVC0TCL_BLEND_FUNCI_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x00004303
+#define NVC0TCL_BLEND_FUNCI_DST_ALPHA_DST_ALPHA 0x00004304
+#define NVC0TCL_BLEND_FUNCI_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x00004305
+#define NVC0TCL_BLEND_FUNCI_DST_ALPHA_DST_COLOR 0x00004306
+#define NVC0TCL_BLEND_FUNCI_DST_ALPHA_ONE_MINUS_DST_COLOR 0x00004307
+#define NVC0TCL_BLEND_FUNCI_DST_ALPHA_SRC_ALPHA_SATURATE 0x00004308
+#define NVC0TCL_BLEND_FUNCI_DST_ALPHA_CONSTANT_COLOR 0x0000c001
+#define NVC0TCL_BLEND_FUNCI_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x0000c002
+#define NVC0TCL_BLEND_FUNCI_DST_ALPHA_CONSTANT_ALPHA 0x0000c003
+#define NVC0TCL_BLEND_FUNCI_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
+#define NVC0TCL_BLEND_FUNCI_DST_ALPHA_SRC1_COLOR 0x0000c900
+#define NVC0TCL_BLEND_FUNCI_DST_ALPHA_ONE_MINUS_SRC1_COLOR 0x0000c901
+#define NVC0TCL_BLEND_FUNCI_DST_ALPHA_SRC1_ALPHA 0x0000c902
+#define NVC0TCL_BLEND_FUNCI_DST_ALPHA_ONE_MINUS_SRC1_ALPHA 0x0000c903
+#define NVC0TCL_SP_SELECT(x) (0x00002000+((x)*64))
+#define NVC0TCL_SP_SELECT__SIZE 0x00000006
+#define NVC0TCL_SP_SELECT_ENABLE (1 << 0)
+#define NVC0TCL_SP_SELECT_PROGRAM_SHIFT 4
+#define NVC0TCL_SP_SELECT_PROGRAM_MASK 0x000000f0
+#define NVC0TCL_SP_START_ID(x) (0x00002004+((x)*64))
+#define NVC0TCL_SP_START_ID__SIZE 0x00000006
+#define NVC0TCL_SP_GPR_ALLOC(x) (0x0000200c+((x)*64))
+#define NVC0TCL_SP_GPR_ALLOC__SIZE 0x00000006
+#define NVC0TCL_CB_SIZE 0x00002380
+#define NVC0TCL_CB_BIND(x) (0x00002410+((x)*32))
+#define NVC0TCL_CB_BIND__SIZE 0x00000005
+#define NVC0TCL_CB_BIND_VALID (1 << 0)
+#define NVC0TCL_CB_BIND_INDEX_SHIFT 4
+#define NVC0TCL_CB_BIND_INDEX_MASK 0x000000f0
+#define NVC0TCL_BIND_TIC(x) (0x00002404+((x)*32))
+#define NVC0TCL_BIND_TIC__SIZE 0x00000005
+#define NVC0TCL_BIND_TIC_ACTIVE (1 << 0)
+#define NVC0TCL_BIND_TIC_TEXTURE_SHIFT 1
+#define NVC0TCL_BIND_TIC_TEXTURE_MASK 0x000001fe
+#define NVC0TCL_BIND_TIC_TIC_SHIFT 9
+#define NVC0TCL_BIND_TIC_TIC_MASK 0x7ffffe00
+#define NVC0TCL_TEX_LIMITS(x) (0x00002200+((x)*16))
+#define NVC0TCL_TEX_LIMITS__SIZE 0x00000005
+#define NVC0TCL_TEX_LIMITS_SAMPLERS_LOG2_SHIFT 0
+#define NVC0TCL_TEX_LIMITS_SAMPLERS_LOG2_MASK 0x0000000f
+#define NVC0TCL_TEX_LIMITS_TEXTURES_LOG2_SHIFT 4
+#define NVC0TCL_TEX_LIMITS_TEXTURES_LOG2_MASK 0x000000f0
+#define NVC0TCL_CB_ADDR_HIGH 0x00002384
+#define NVC0TCL_CB_ADDR_LOW 0x00002388
+#define NVC0TCL_CB_POS 0x0000238c
+#define NVC0TCL_CB_DATA(x) (0x00002390+((x)*4))
+#define NVC0TCL_CB_DATA__SIZE 0x00000010
+#define NVC0TCL_TFB_VARYING_LOCS(x) (0x00002800+((x)*4))
+#define NVC0TCL_TFB_VARYING_LOCS__SIZE 0x00000080
+#define NVC0TCL_UNK_UPLOAD_POS 0x00003800
+#define NVC0TCL_UNK_UPLOAD_DATA 0x00003804
+#define NVC0TCL_VERTEX_ARRAY_SELECT 0x00003820
+#define NVC0TCL_VERTEX_ARRAY_ADDRESS 0x00003824
+#define NVC0TCL_BLEND_ENABLEI 0x00003858
+#define NVC0TCL_POLYGON_MODE_FRONT 0x00003868
+#define NVC0TCL_POLYGON_MODE_FRONT_POINT 0x00001b00
+#define NVC0TCL_POLYGON_MODE_FRONT_LINE 0x00001b01
+#define NVC0TCL_POLYGON_MODE_FRONT_FILL 0x00001b02
+#define NVC0TCL_POLYGON_MODE_BACK 0x00003870
+#define NVC0TCL_POLYGON_MODE_BACK_POINT 0x00001b00
+#define NVC0TCL_POLYGON_MODE_BACK_LINE 0x00001b01
+#define NVC0TCL_POLYGON_MODE_BACK_FILL 0x00001b02
+#define NVC0TCL_GP_SELECT 0x00003878
+#define NVC0TCL_GP_SELECT_ENABLE (1 << 0)
+#define NVC0TCL_GP_SELECT_PROGRAM_SHIFT 4
+#define NVC0TCL_GP_SELECT_PROGRAM_MASK 0x000000f0
+#define NVC0TCL_TEP_SELECT 0x00003880
+#define NVC0TCL_TEP_SELECT_ENABLE (1 << 0)
+#define NVC0TCL_TEP_SELECT_PROGRAM_SHIFT 4
+#define NVC0TCL_TEP_SELECT_PROGRAM_MASK 0x000000f0
+
+
+#define NVC0_COMPUTE 0x000090c0
+
+#define NVC0_COMPUTE_NOP 0x00000100
+#define NVC0_COMPUTE_NOTIFY 0x00000104
+#define NVC0_COMPUTE_SERIALIZE 0x00000110
+#define NVC0_COMPUTE_LOCAL_SIZE 0x00000204
+#define NVC0_COMPUTE_SHARED_BASE 0x00000214
+#define NVC0_COMPUTE_GRIDDIM_YX 0x00000238
+#define NVC0_COMPUTE_GRIDDIM_YX_X_SHIFT 0
+#define NVC0_COMPUTE_GRIDDIM_YX_X_MASK 0x0000ffff
+#define NVC0_COMPUTE_GRIDDIM_YX_Y_SHIFT 16
+#define NVC0_COMPUTE_GRIDDIM_YX_Y_MASK 0xffff0000
+#define NVC0_COMPUTE_GRIDDIM_Z 0x0000023c
+#define NVC0_COMPUTE_SHARED_SIZE 0x0000024c
+#define NVC0_COMPUTE_BLOCK_ALLOC 0x00000250
+#define NVC0_COMPUTE_BLOCK_ALLOC_THREADS_SHIFT 0
+#define NVC0_COMPUTE_BLOCK_ALLOC_THREADS_MASK 0x0000ffff
+#define NVC0_COMPUTE_BLOCK_ALLOC_BARRIERS_SHIFT 16
+#define NVC0_COMPUTE_BLOCK_ALLOC_BARRIERS_MASK 0xffff0000
+#define NVC0_COMPUTE_CP_GPR_ALLOC 0x000002c0
+#define NVC0_COMPUTE_GLOBAL_BASE 0x000002c8
+#define NVC0_COMPUTE_GLOBAL_BASE_HIGH_SHIFT 0
+#define NVC0_COMPUTE_GLOBAL_BASE_HIGH_MASK 0x000000ff
+#define NVC0_COMPUTE_GLOBAL_BASE_INDEX_SHIFT 16
+#define NVC0_COMPUTE_GLOBAL_BASE_INDEX_MASK 0x00ff0000
+#define NVC0_COMPUTE_GLOBAL_BASE_FLAGS_SHIFT 28
+#define NVC0_COMPUTE_GLOBAL_BASE_FLAGS_MASK 0xf0000000
+#define NVC0_COMPUTE_LAUNCH 0x00000368
+#define NVC0_COMPUTE_BLOCKDIM_YX 0x000003ac
+#define NVC0_COMPUTE_BLOCKDIM_YX_X_SHIFT 0
+#define NVC0_COMPUTE_BLOCKDIM_YX_X_MASK 0x0000ffff
+#define NVC0_COMPUTE_BLOCKDIM_YX_Y_SHIFT 16
+#define NVC0_COMPUTE_BLOCKDIM_YX_Y_MASK 0xffff0000
+#define NVC0_COMPUTE_BLOCKDIM_Z 0x000003b0
+#define NVC0_COMPUTE_CP_START_ID 0x000003b4
+#define NVC0_COMPUTE_LOCAL_BASE 0x0000077c
+#define NVC0_COMPUTE_UNK0790_ADDRESS_HIGH 0x00000790
+#define NVC0_COMPUTE_UNK0790_ADDRESS_LOW 0x00000794
+#define NVC0_COMPUTE_LINKED_TSC 0x00001234
+#define NVC0_COMPUTE_TSC_ADDRESS_HIGH 0x0000155c
+#define NVC0_COMPUTE_TSC_ADDRESS_LOW 0x00001560
+#define NVC0_COMPUTE_TSC_LIMIT 0x00001564
+#define NVC0_COMPUTE_TIC_ADDRESS_HIGH 0x00001574
+#define NVC0_COMPUTE_TIC_ADDRESS_LOW 0x00001578
+#define NVC0_COMPUTE_TIC_LIMIT 0x0000157c
+#define NVC0_COMPUTE_CODE_ADDRESS_HIGH 0x00001608
+#define NVC0_COMPUTE_CODE_ADDRESS_LOW 0x0000160c
+#define NVC0_COMPUTE_CB_BIND 0x00001694
+#define NVC0_COMPUTE_CB_BIND_INDEX_SHIFT 1
+#define NVC0_COMPUTE_CB_BIND_INDEX_MASK 0xfffffffe
+#define NVC0_COMPUTE_CB_BIND_VALID (1 << 0)
+#define NVC0_COMPUTE_QUERY_ADDRESS_HIGH 0x00001b00
+#define NVC0_COMPUTE_QUERY_ADDRESS_LOW 0x00001b04
+#define NVC0_COMPUTE_QUERY_SEQUENCE 0x00001b08
+#define NVC0_COMPUTE_QUERY_GET 0x00001b0c
+#define NVC0_COMPUTE_CB_ADDRESS_HIGH 0x00002384
+#define NVC0_COMPUTE_CB_ADDRESS_LOW 0x00002388
+#define NVC0_COMPUTE_CB_POS 0x0000238c
+#define NVC0_COMPUTE_CB_DATA 0x00002390
+
+
#endif /* NOUVEAU_REG_H */
diff --git a/src/gallium/drivers/nv50/Makefile b/src/gallium/drivers/nv50/Makefile
index e31e6f8662..bf1e8201a0 100644
--- a/src/gallium/drivers/nv50/Makefile
+++ b/src/gallium/drivers/nv50/Makefile
@@ -8,9 +8,9 @@ C_SOURCES = \
nv50_clear.c \
nv50_context.c \
nv50_draw.c \
+ nv50_formats.c \
nv50_miptree.c \
nv50_query.c \
- nv50_program.c \
nv50_resource.c \
nv50_screen.c \
nv50_state.c \
@@ -19,6 +19,14 @@ C_SOURCES = \
nv50_tex.c \
nv50_transfer.c \
nv50_vbo.c \
- nv50_push.c
+ nv50_push.c \
+ nv50_program.c \
+ nv50_shader_state.c \
+ nv50_pc.c \
+ nv50_pc_print.c \
+ nv50_pc_emit.c \
+ nv50_tgsi_to_nc.c \
+ nv50_pc_optimize.c \
+ nv50_pc_regalloc.c
include ../../Makefile.template
diff --git a/src/gallium/drivers/nv50/SConscript b/src/gallium/drivers/nv50/SConscript
index 8625f92622..e4a93c15ce 100644
--- a/src/gallium/drivers/nv50/SConscript
+++ b/src/gallium/drivers/nv50/SConscript
@@ -9,6 +9,7 @@ nv50 = env.ConvenienceLibrary(
'nv50_clear.c',
'nv50_context.c',
'nv50_draw.c',
+ 'nv50_formats.c',
'nv50_miptree.c',
'nv50_query.c',
'nv50_program.c',
diff --git a/src/gallium/drivers/nv50/nv50_formats.c b/src/gallium/drivers/nv50/nv50_formats.c
new file mode 100644
index 0000000000..e1c7dae306
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_formats.c
@@ -0,0 +1,452 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nv50_screen.h"
+#include "nv50_texture.h"
+#include "nouveau/nouveau_class.h"
+#include "pipe/p_defines.h"
+
+#define A_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \
+ NV50TIC_0_0_MAPR_##cr | NV50TIC_0_0_TYPER_##t0 | \
+ NV50TIC_0_0_MAPG_##cg | NV50TIC_0_0_TYPEG_##t1 | \
+ NV50TIC_0_0_MAPB_##cb | NV50TIC_0_0_TYPEB_##t2 | \
+ NV50TIC_0_0_MAPA_##ca | NV50TIC_0_0_TYPEA_##t3 | \
+ NV50TIC_0_0_FMT_##sz, \
+ NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_##sz | \
+ NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_##t0 | \
+ (NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_##t0 << 3) | (r << 31)
+
+#define B_(cr, cg, cb, ca, t0, t1, t2, t3, sz, r) \
+ NV50TIC_0_0_MAPR_##cr | NV50TIC_0_0_TYPER_##t0 | \
+ NV50TIC_0_0_MAPG_##cg | NV50TIC_0_0_TYPEG_##t1 | \
+ NV50TIC_0_0_MAPB_##cb | NV50TIC_0_0_TYPEB_##t2 | \
+ NV50TIC_0_0_MAPA_##ca | NV50TIC_0_0_TYPEA_##t3 | \
+ NV50TIC_0_0_FMT_##sz, 0
+
+#define VERTEX_BUFFER PIPE_BIND_VERTEX_BUFFER
+#define SAMPLER_VIEW PIPE_BIND_SAMPLER_VIEW
+#define RENDER_TARGET PIPE_BIND_RENDER_TARGET
+#define DEPTH_STENCIL PIPE_BIND_DEPTH_STENCIL
+#define SCANOUT PIPE_BIND_SCANOUT
+
+/* for vertex buffers: */
+#define NV50TIC_0_0_FMT_8_8_8 NV50TIC_0_0_FMT_8_8_8_8
+#define NV50TIC_0_0_FMT_16_16_16 NV50TIC_0_0_FMT_16_16_16_16
+#define NV50TIC_0_0_FMT_32_32_32 NV50TIC_0_0_FMT_32_32_32_32
+
+const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
+{
+ /* COMMON FORMATS */
+
+ [PIPE_FORMAT_B8G8R8A8_UNORM] = { NV50TCL_RT_FORMAT_A8R8G8B8_UNORM,
+ A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
+
+ [PIPE_FORMAT_B8G8R8X8_UNORM] = { NV50TCL_RT_FORMAT_X8R8G8B8_UNORM,
+ A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
+
+ [PIPE_FORMAT_B8G8R8A8_SRGB] = { NV50TCL_RT_FORMAT_A8R8G8B8_SRGB,
+ A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_B8G8R8X8_SRGB] = { NV50TCL_RT_FORMAT_X8R8G8B8_SRGB,
+ A_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 1),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_B5G6R5_UNORM] = { NV50TCL_RT_FORMAT_R5G6B5_UNORM,
+ B_(C2, C1, C0, ONE, UNORM, UNORM, UNORM, UNORM, 5_6_5, 1),
+ SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
+
+ [PIPE_FORMAT_B5G5R5A1_UNORM] = { NV50TCL_RT_FORMAT_A1R5G5B5_UNORM,
+ B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 1_5_5_5, 1),
+ SAMPLER_VIEW | RENDER_TARGET | SCANOUT },
+
+ [PIPE_FORMAT_B4G4R4A4_UNORM] = { 0,
+ B_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 4_4_4_4, 1),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R10G10B10A2_UNORM] = { NV50TCL_RT_FORMAT_A2B10G10R10_UNORM,
+ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 0),
+ SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER | SCANOUT },
+
+ [PIPE_FORMAT_B10G10R10A2_UNORM] = { NV50TCL_RT_FORMAT_A2R10G10B10_UNORM,
+ A_(C2, C1, C0, C3, UNORM, UNORM, UNORM, UNORM, 2_10_10_10, 1),
+ SAMPLER_VIEW | RENDER_TARGET | VERTEX_BUFFER },
+
+ /* DEPTH/STENCIL FORMATS */
+
+ [PIPE_FORMAT_Z16_UNORM] = { NV50TCL_ZETA_FORMAT_Z16_UNORM,
+ B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 16_DEPTH, 0),
+ SAMPLER_VIEW | DEPTH_STENCIL },
+
+ [PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50TCL_ZETA_FORMAT_S8Z24_UNORM,
+ B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0),
+ SAMPLER_VIEW | DEPTH_STENCIL },
+
+ [PIPE_FORMAT_Z24X8_UNORM] = { NV50TCL_ZETA_FORMAT_X8Z24_UNORM,
+ B_(C0, C0, C0, ONE, UNORM, UINT, UINT, UINT, 8_24, 0),
+ SAMPLER_VIEW | DEPTH_STENCIL },
+
+ [PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50TCL_ZETA_FORMAT_S8Z24_UNORM,
+ B_(C1, C1, C1, ONE, UINT, UNORM, UINT, UINT, 24_8, 0),
+ SAMPLER_VIEW | DEPTH_STENCIL },
+
+ [PIPE_FORMAT_Z32_FLOAT] = { NV50TCL_ZETA_FORMAT_Z32_FLOAT,
+ B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_DEPTH, 0),
+ SAMPLER_VIEW | DEPTH_STENCIL },
+
+ [PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED] = {
+ NV50TCL_ZETA_FORMAT_Z32_FLOAT_X24S8_UNORM,
+ B_(C0, C0, C0, ONE, FLOAT, UINT, UINT, UINT, 32_8, 0),
+ SAMPLER_VIEW | DEPTH_STENCIL },
+
+ /* LUMINANCE, ALPHA, INTENSITY */
+
+ [PIPE_FORMAT_L8_UNORM] = { 0,
+ A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_L8_SRGB] = { 0,
+ A_(C0, C0, C0, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_I8_UNORM] = { 0,
+ A_(C0, C0, C0, C0, UNORM, UNORM, UNORM, UNORM, 8, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_A8_UNORM] = { NV50TCL_RT_FORMAT_A8_UNORM,
+ A_(ZERO, ZERO, ZERO, C0, UNORM, UNORM, UNORM, UNORM, 8, 0),
+ SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_L8A8_UNORM] = { 0,
+ A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_L8A8_SRGB] = { 0,
+ A_(C0, C0, C0, C1, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
+ SAMPLER_VIEW },
+
+ /* DXT, RGTC */
+
+ [PIPE_FORMAT_DXT1_RGB] = { 0,
+ B_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, DXT1, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_DXT1_RGBA] = { 0,
+ B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT1, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_DXT3_RGBA] = { 0,
+ B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT3, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_DXT5_RGBA] = { 0,
+ B_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, DXT5, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_RGTC1_UNORM] = { 0,
+ B_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC1, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_RGTC1_SNORM] = { 0,
+ B_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC1, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_RGTC2_UNORM] = { 0,
+ B_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, RGTC2, 0),
+ SAMPLER_VIEW },
+
+ [PIPE_FORMAT_RGTC2_SNORM] = { 0,
+ B_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, RGTC2, 0),
+ SAMPLER_VIEW },
+
+ /* FLOAT 16 */
+
+ [PIPE_FORMAT_R16G16B16A16_FLOAT] = { NV50TCL_RT_FORMAT_R16G16B16A16_FLOAT,
+ A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R16G16B16_FLOAT] = { NV50TCL_RT_FORMAT_R16G16B16X16_FLOAT,
+ A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R16G16_FLOAT] = { NV50TCL_RT_FORMAT_R16G16_FLOAT,
+ A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R16_FLOAT] = { NV50TCL_RT_FORMAT_R16_FLOAT,
+ A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ /* FLOAT 32 */
+
+ [PIPE_FORMAT_R32G32B32A32_FLOAT] = { NV50TCL_RT_FORMAT_R32G32B32A32_FLOAT,
+ A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R32G32B32_FLOAT] = { NV50TCL_RT_FORMAT_R32G32B32X32_FLOAT,
+ A_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R32G32_FLOAT] = { NV50TCL_RT_FORMAT_R32G32_FLOAT,
+ A_(C0, C1, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R32_FLOAT] = { NV50TCL_RT_FORMAT_R32_FLOAT,
+ A_(C0, ZERO, ZERO, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ /* ODD FORMATS */
+
+ [PIPE_FORMAT_R11G11B10_FLOAT] = { NV50TCL_RT_FORMAT_B10G11R11_FLOAT,
+ B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 10_11_11, 0),
+ SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R9G9B9E5_FLOAT] = { 0,
+ B_(C0, C1, C2, ONE, FLOAT, FLOAT, FLOAT, FLOAT, 5_9_9_9, 0),
+ SAMPLER_VIEW },
+
+ /* SNORM 32 */
+
+ [PIPE_FORMAT_R32G32B32A32_SNORM] = { 0,
+ A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32G32B32_SNORM] = { 0,
+ A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32G32_SNORM] = { 0,
+ A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32_SNORM] = { 0,
+ A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ /* UNORM 32 */
+
+ [PIPE_FORMAT_R32G32B32A32_UNORM] = { 0,
+ A_(C0, C1, C2, C3, FLOAT, FLOAT, FLOAT, FLOAT, 32_32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32G32B32_UNORM] = { 0,
+ A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32G32_UNORM] = { 0,
+ A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32_UNORM] = { 0,
+ A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ /* SNORM 16 */
+
+ [PIPE_FORMAT_R16G16B16A16_SNORM] = { NV50TCL_RT_FORMAT_R16G16B16A16_SNORM,
+ A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R16G16B16_SNORM] = { 0,
+ A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R16G16_SNORM] = { NV50TCL_RT_FORMAT_R16G16_SNORM,
+ A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R16_SNORM] = { 0,
+ A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ /* UNORM 16 */
+
+ [PIPE_FORMAT_R16G16B16A16_UNORM] = { NV50TCL_RT_FORMAT_R16G16B16A16_UNORM,
+ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R16G16B16_UNORM] = { 0,
+ A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R16G16_UNORM] = { NV50TCL_RT_FORMAT_R16G16_UNORM,
+ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R16_UNORM] = { 0,
+ A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ /* SNORM 8 */
+
+ [PIPE_FORMAT_R8G8B8A8_SNORM] = { NV50TCL_RT_FORMAT_A8B8G8R8_SNORM,
+ A_(C0, C1, C2, C3, SNORM, SNORM, SNORM, SNORM, 8_8_8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R8G8B8_SNORM] = { 0,
+ A_(C0, C1, C2, ONE, SNORM, SNORM, SNORM, SNORM, 8_8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R8G8_SNORM] = { NV50TCL_RT_FORMAT_R8G8_SNORM,
+ A_(C0, C1, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R8_SNORM] = { NV50TCL_RT_FORMAT_R8_SNORM,
+ A_(C0, ZERO, ZERO, ONE, SNORM, SNORM, SNORM, SNORM, 8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ /* UNORM 8 */
+
+ [PIPE_FORMAT_R8G8B8A8_UNORM] = { NV50TCL_RT_FORMAT_A8B8G8R8_UNORM,
+ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R8G8B8A8_SRGB] = { NV50TCL_RT_FORMAT_A8B8G8R8_SRGB,
+ A_(C0, C1, C2, C3, UNORM, UNORM, UNORM, UNORM, 8_8_8_8, 0),
+ SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R8G8B8_UNORM] = { NV50TCL_RT_FORMAT_X8B8G8R8_UNORM,
+ A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R8G8B8_SRGB] = { NV50TCL_RT_FORMAT_X8B8G8R8_SRGB,
+ A_(C0, C1, C2, ONE, UNORM, UNORM, UNORM, UNORM, 8_8_8, 0),
+ SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R8G8_UNORM] = { NV50TCL_RT_FORMAT_R8G8_UNORM,
+ A_(C0, C1, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ [PIPE_FORMAT_R8_UNORM] = { NV50TCL_RT_FORMAT_R8_UNORM,
+ A_(C0, ZERO, ZERO, ONE, UNORM, UNORM, UNORM, UNORM, 8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW | RENDER_TARGET },
+
+ /* SSCALED 32 */
+
+ [PIPE_FORMAT_R32G32B32A32_SSCALED] = { 0,
+ A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32G32B32_SSCALED] = { 0,
+ A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32G32_SSCALED] = { 0,
+ A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32_SSCALED] = { 0,
+ A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ /* USCALED 32 */
+
+ [PIPE_FORMAT_R32G32B32A32_USCALED] = { 0,
+ A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 32_32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32G32B32_USCALED] = { 0,
+ A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 32_32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32G32_USCALED] = { 0,
+ A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32_32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R32_USCALED] = { 0,
+ A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 32, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ /* SSCALED 16 */
+
+ [PIPE_FORMAT_R16G16B16A16_SSCALED] = { 0,
+ A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R16G16B16_SSCALED] = { 0,
+ A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R16G16_SSCALED] = { 0,
+ A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R16_SSCALED] = { 0,
+ A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ /* USCALED 16 */
+
+ [PIPE_FORMAT_R16G16B16A16_USCALED] = { 0,
+ A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 16_16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R16G16B16_USCALED] = { 0,
+ A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 16_16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R16G16_USCALED] = { 0,
+ A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16_16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R16_USCALED] = { 0,
+ A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 16, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ /* SSCALED 8 */
+
+ [PIPE_FORMAT_R8G8B8A8_SSCALED] = { 0,
+ A_(C0, C1, C2, C3, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R8G8B8_SSCALED] = { 0,
+ A_(C0, C1, C2, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R8G8_SSCALED] = { 0,
+ A_(C0, C1, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R8_SSCALED] = { 0,
+ A_(C0, ZERO, ZERO, ONE, SSCALED, SSCALED, SSCALED, SSCALED, 8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ /* USCALED 8 */
+
+ [PIPE_FORMAT_R8G8B8A8_USCALED] = { 0,
+ A_(C0, C1, C2, C3, USCALED, USCALED, USCALED, USCALED, 8_8_8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R8G8B8_USCALED] = { 0,
+ A_(C0, C1, C2, ONE, USCALED, USCALED, USCALED, USCALED, 8_8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R8G8_USCALED] = { 0,
+ A_(C0, C1, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8_8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+
+ [PIPE_FORMAT_R8_USCALED] = { 0,
+ A_(C0, ZERO, ZERO, ONE, USCALED, USCALED, USCALED, USCALED, 8, 0),
+ VERTEX_BUFFER | SAMPLER_VIEW },
+};
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index b7cd92158f..12b5ad106c 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -159,6 +159,9 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *tmp
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
tile_flags = 0x2800;
break;
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED:
+ tile_flags = 0xe000;
+ break;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
case PIPE_FORMAT_R32G32B32_FLOAT:
tile_flags = 0x7400;
diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c
new file mode 100644
index 0000000000..5041fc7505
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_pc.c
@@ -0,0 +1,618 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nv50_pc.h"
+#include "nv50_program.h"
+
+#include <stdio.h>
+
+/* returns TRUE if operands 0 and 1 can be swapped */
+boolean
+nv_op_commutative(uint opcode)
+{
+ switch (opcode) {
+ case NV_OP_ADD:
+ case NV_OP_MUL:
+ case NV_OP_MAD:
+ case NV_OP_AND:
+ case NV_OP_OR:
+ case NV_OP_XOR:
+ case NV_OP_MIN:
+ case NV_OP_MAX:
+ case NV_OP_SAD:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+/* return operand to which the address register applies */
+int
+nv50_indirect_opnd(struct nv_instruction *i)
+{
+ if (!i->src[4])
+ return -1;
+
+ switch (i->opcode) {
+ case NV_OP_MOV:
+ case NV_OP_LDA:
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+boolean
+nv50_nvi_can_use_imm(struct nv_instruction *nvi, int s)
+{
+ if (nvi->flags_src || nvi->flags_def)
+ return FALSE;
+
+ switch (nvi->opcode) {
+ case NV_OP_ADD:
+ case NV_OP_MUL:
+ case NV_OP_AND:
+ case NV_OP_OR:
+ case NV_OP_XOR:
+ case NV_OP_SHL:
+ case NV_OP_SHR:
+ return (s == 1) && (nvi->src[0]->value->reg.file == NV_FILE_GPR) &&
+ (nvi->def[0]->reg.file == NV_FILE_GPR);
+ case NV_OP_MOV:
+ assert(s == 0);
+ return (nvi->def[0]->reg.file == NV_FILE_GPR);
+ default:
+ return FALSE;
+ }
+}
+
+boolean
+nv50_nvi_can_load(struct nv_instruction *nvi, int s, struct nv_value *value)
+{
+ int i;
+
+ for (i = 0; i < 3 && nvi->src[i]; ++i)
+ if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
+ return FALSE;
+
+ switch (nvi->opcode) {
+ case NV_OP_ABS:
+ case NV_OP_ADD:
+ case NV_OP_CEIL:
+ case NV_OP_FLOOR:
+ case NV_OP_TRUNC:
+ case NV_OP_CVT:
+ case NV_OP_MAD:
+ case NV_OP_MUL:
+ case NV_OP_SAT:
+ case NV_OP_SUB:
+ case NV_OP_MAX:
+ case NV_OP_MIN:
+ if (s == 0 && (value->reg.file == NV_FILE_MEM_S ||
+ value->reg.file == NV_FILE_MEM_P))
+ return TRUE;
+ if (s == 1 &&
+ value->reg.file >= NV_FILE_MEM_C(0) &&
+ value->reg.file <= NV_FILE_MEM_C(15))
+ return TRUE;
+ if (s == 2 && nvi->src[1]->value->reg.file == NV_FILE_GPR)
+ return TRUE;
+ return FALSE;
+ case NV_OP_MOV:
+ assert(s == 0);
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+/* Return whether this instruction can be executed conditionally. */
+boolean
+nv50_nvi_can_predicate(struct nv_instruction *nvi)
+{
+ int i;
+
+ if (nvi->flags_src)
+ return FALSE;
+ for (i = 0; i < 4 && nvi->src[i]; ++i)
+ if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
+ return FALSE;
+ return TRUE;
+}
+
+ubyte
+nv50_supported_src_mods(uint opcode, int s)
+{
+ switch (opcode) {
+ case NV_OP_ABS:
+ return NV_MOD_NEG | NV_MOD_ABS; /* obviously */
+ case NV_OP_ADD:
+ case NV_OP_MUL:
+ case NV_OP_MAD:
+ return NV_MOD_NEG;
+ case NV_OP_DFDX:
+ case NV_OP_DFDY:
+ assert(s == 0);
+ return NV_MOD_NEG;
+ case NV_OP_MAX:
+ case NV_OP_MIN:
+ return NV_MOD_ABS;
+ case NV_OP_CVT:
+ case NV_OP_LG2:
+ case NV_OP_NEG:
+ case NV_OP_PREEX2:
+ case NV_OP_PRESIN:
+ case NV_OP_RCP:
+ case NV_OP_RSQ:
+ return NV_MOD_ABS | NV_MOD_NEG;
+ default:
+ return 0;
+ }
+}
+
+int
+nv_nvi_refcount(struct nv_instruction *nvi)
+{
+ int i, rc;
+
+ rc = nvi->flags_def ? nvi->flags_def->refc : 0;
+
+ for (i = 0; i < 4; ++i) {
+ if (!nvi->def[i])
+ return rc;
+ rc += nvi->def[i]->refc;
+ }
+ return rc;
+}
+
+int
+nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val,
+ struct nv_value *new_val)
+{
+ int i, n;
+
+ if (old_val == new_val)
+ return old_val->refc;
+
+ for (i = 0, n = 0; i < pc->num_refs; ++i) {
+ if (pc->refs[i]->value == old_val) {
+ ++n;
+ nv_reference(pc, &pc->refs[i], new_val);
+ }
+ }
+ return n;
+}
+
+static void
+nv_pc_free_refs(struct nv_pc *pc)
+{
+ int i;
+ for (i = 0; i < pc->num_refs; i += 64)
+ FREE(pc->refs[i]);
+}
+
+static const char *
+edge_name(ubyte type)
+{
+ switch (type) {
+ case CFG_EDGE_FORWARD: return "forward";
+ case CFG_EDGE_BACK: return "back";
+ case CFG_EDGE_LOOP_ENTER: return "loop";
+ case CFG_EDGE_LOOP_LEAVE: return "break";
+ default:
+ return "?";
+ }
+}
+
+void
+nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv)
+{
+ struct nv_basic_block *bb[64], *bbb[16], *b;
+ int j, p, pp;
+
+ bb[0] = root;
+ p = 1;
+ pp = 0;
+
+ while (p > 0) {
+ b = bb[--p];
+ b->priv = 0;
+
+ for (j = 1; j >= 0; --j) {
+ if (!b->out[j])
+ continue;
+
+ switch (b->out_kind[j]) {
+ case CFG_EDGE_BACK:
+ continue;
+ case CFG_EDGE_FORWARD:
+ if (++b->out[j]->priv == b->out[j]->num_in)
+ bb[p++] = b->out[j];
+ break;
+ case CFG_EDGE_LOOP_ENTER:
+ bb[p++] = b->out[j];
+ break;
+ case CFG_EDGE_LOOP_LEAVE:
+ bbb[pp++] = b->out[j];
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ f(priv, b);
+
+ if (!p)
+ while (pp > 0)
+ bb[p++] = bbb[--pp];
+ }
+}
+
+static void
+nv_do_print_program(void *priv, struct nv_basic_block *b)
+{
+ struct nv_instruction *i = b->phi;
+
+ debug_printf("=== BB %i ", b->id);
+ if (b->out[0])
+ debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id);
+ if (b->out[1])
+ debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id);
+ debug_printf("===\n");
+
+ i = b->phi;
+ if (!i)
+ i = b->entry;
+ for (; i; i = i->next)
+ nv_print_instruction(i);
+}
+
+void
+nv_print_program(struct nv_basic_block *root)
+{
+ nv_pc_pass_in_order(root, nv_do_print_program, root);
+
+ debug_printf("END\n\n");
+}
+
+static INLINE void
+nvcg_show_bincode(struct nv_pc *pc)
+{
+ int i;
+
+ for (i = 0; i < pc->bin_size / 4; ++i)
+ debug_printf("0x%08x ", pc->emit[i]);
+ debug_printf("\n");
+}
+
+static int
+nv50_emit_program(struct nv_pc *pc)
+{
+ uint32_t *code = pc->emit;
+ int n;
+
+ debug_printf("emitting program: size = %u\n", pc->bin_size);
+
+ for (n = 0; n < pc->num_blocks; ++n) {
+ struct nv_instruction *i;
+ struct nv_basic_block *b = pc->bb_list[n];
+
+ for (i = b->entry; i; i = i->next) {
+ nv50_emit_instruction(pc, i);
+
+ pc->bin_pos += 1 + (pc->emit[0] & 1);
+ pc->emit += 1 + (pc->emit[0] & 1);
+ }
+ }
+ assert(pc->emit == &code[pc->bin_size / 4]);
+
+ /* XXX: we can do better than this ... */
+ if (!(pc->emit[-2] & 1) || (pc->emit[-2] & 2) || (pc->emit[-1] & 3) == 3) {
+ pc->emit[0] = 0xf0000001;
+ pc->emit[1] = 0xe0000000;
+ pc->bin_size += 8;
+ }
+
+ pc->emit = code;
+ code[pc->bin_size / 4 - 1] |= 1;
+
+ nvcg_show_bincode(pc);
+
+ return 0;
+}
+
+int
+nv50_generate_code(struct nv50_translation_info *ti)
+{
+ struct nv_pc *pc;
+ int ret;
+
+ pc = CALLOC_STRUCT(nv_pc);
+ if (!pc)
+ return 1;
+
+ ret = nv50_tgsi_to_nc(pc, ti);
+ if (ret)
+ goto out;
+ nv_print_program(pc->root);
+
+ /* optimization */
+ ret = nv_pc_exec_pass0(pc);
+ if (ret)
+ goto out;
+
+ /* register allocation */
+ ret = nv_pc_exec_pass1(pc);
+ if (ret)
+ goto out;
+
+ /* prepare for emission */
+ ret = nv_pc_exec_pass2(pc);
+ if (ret)
+ goto out;
+
+ pc->emit = CALLOC(pc->bin_size / 4 + 2, 4);
+ if (!pc->emit) {
+ ret = 3;
+ goto out;
+ }
+ ret = nv50_emit_program(pc);
+ if (ret)
+ goto out;
+
+ ti->p->code_size = pc->bin_size;
+ ti->p->code = pc->emit;
+
+ ti->p->immd_size = pc->immd_count * 4;
+ ti->p->immd = pc->immd_buf;
+
+ /* highest 16 bit reg to num of 32 bit regs */
+ ti->p->max_gpr = (pc->max_reg[NV_FILE_GPR] >> 1) + 1;
+
+ ti->p->fixups = pc->fixups;
+ ti->p->num_fixups = pc->num_fixups;
+
+ debug_printf("SHADER TRANSLATION - %s\n", ret ? "failure" : "success");
+
+out:
+ nv_pc_free_refs(pc);
+ if (ret) {
+ if (pc->emit)
+ free(pc->emit);
+ if (pc->immd_buf)
+ free(pc->immd_buf);
+ if (pc->fixups)
+ free(pc->fixups);
+ }
+ free(pc);
+
+ return ret;
+}
+
+static void
+nvbb_insert_phi(struct nv_basic_block *b, struct nv_instruction *i)
+{
+ if (!b->phi) {
+ i->prev = NULL;
+ b->phi = i;
+ i->next = b->entry;
+ if (b->entry) {
+ assert(!b->entry->prev && b->exit);
+ b->entry->prev = i;
+ } else {
+ b->entry = i;
+ b->exit = i;
+ }
+ } else {
+ assert(b->entry);
+ if (b->entry->opcode == NV_OP_PHI) { /* insert after entry */
+ assert(b->entry == b->exit);
+ b->entry->next = i;
+ i->prev = b->entry;
+ b->entry = i;
+ b->exit = i;
+ } else { /* insert before entry */
+ assert(b->entry->prev && b->exit);
+ i->next = b->entry;
+ i->prev = b->entry->prev;
+ b->entry->prev = i;
+ i->prev->next = i;
+ }
+ }
+}
+
+void
+nvbb_insert_tail(struct nv_basic_block *b, struct nv_instruction *i)
+{
+ if (i->opcode == NV_OP_PHI) {
+ nvbb_insert_phi(b, i);
+ } else {
+ i->prev = b->exit;
+ if (b->exit)
+ b->exit->next = i;
+ b->exit = i;
+ if (!b->entry)
+ b->entry = i;
+ else
+ if (i->prev && i->prev->opcode == NV_OP_PHI)
+ b->entry = i;
+ }
+
+ i->bb = b;
+ b->num_instructions++;
+}
+
+void
+nv_nvi_delete(struct nv_instruction *nvi)
+{
+ struct nv_basic_block *b = nvi->bb;
+ int j;
+
+ /* debug_printf("REM: "); nv_print_instruction(nvi); */
+
+ for (j = 0; j < 5; ++j)
+ nv_reference(NULL, &nvi->src[j], NULL);
+ nv_reference(NULL, &nvi->flags_src, NULL);
+
+ if (nvi->next)
+ nvi->next->prev = nvi->prev;
+ else {
+ assert(nvi == b->exit);
+ b->exit = nvi->prev;
+ }
+
+ if (nvi->prev)
+ nvi->prev->next = nvi->next;
+
+ if (nvi == b->entry) {
+ /* PHIs don't get hooked to b->entry */
+ b->entry = nvi->next;
+ assert(!nvi->prev || nvi->prev->opcode == NV_OP_PHI);
+ }
+
+ if (nvi == b->phi) {
+ if (nvi->opcode != NV_OP_PHI)
+ debug_printf("NOTE: b->phi points to non-PHI instruction\n");
+
+ assert(!nvi->prev);
+ if (!nvi->next || nvi->next->opcode != NV_OP_PHI)
+ b->phi = NULL;
+ else
+ b->phi = nvi->next;
+ }
+}
+
+void
+nv_nvi_permute(struct nv_instruction *i1, struct nv_instruction *i2)
+{
+ struct nv_basic_block *b = i1->bb;
+
+ assert(i1->opcode != NV_OP_PHI &&
+ i2->opcode != NV_OP_PHI);
+ assert(i1->next == i2);
+
+ if (b->exit == i2)
+ b->exit = i1;
+
+ if (b->entry == i1)
+ b->entry = i2;
+
+ i2->prev = i1->prev;
+ i1->next = i2->next;
+ i2->next = i1;
+ i1->prev = i2;
+
+ if (i2->prev)
+ i2->prev->next = i2;
+ if (i1->next)
+ i1->next->prev = i1;
+}
+
+void
+nvbb_attach_block(struct nv_basic_block *parent,
+ struct nv_basic_block *b, ubyte edge_kind)
+{
+ assert(b->num_in < 8);
+
+ if (parent->out[0]) {
+ assert(!parent->out[1]);
+ parent->out[1] = b;
+ parent->out_kind[1] = edge_kind;
+ } else {
+ parent->out[0] = b;
+ parent->out_kind[0] = edge_kind;
+ }
+
+ b->in[b->num_in] = parent;
+ b->in_kind[b->num_in++] = edge_kind;
+}
+
+/* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */
+
+boolean
+nvbb_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d)
+{
+ int j;
+
+ if (b == d)
+ return TRUE;
+
+ for (j = 0; j < b->num_in; ++j)
+ if ((b->in_kind[j] != CFG_EDGE_BACK) && !nvbb_dominated_by(b->in[j], d))
+ return FALSE;
+
+ return j ? TRUE : FALSE;
+}
+
+/* check if bf (future) can be reached from bp (past) */
+boolean
+nvbb_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
+ struct nv_basic_block *bt)
+{
+ if (bf == bp)
+ return TRUE;
+ if (bp == bt)
+ return FALSE;
+
+ if (bp->out[0] && bp->out_kind[0] != CFG_EDGE_BACK &&
+ nvbb_reachable_by(bf, bp->out[0], bt))
+ return TRUE;
+ if (bp->out[1] && bp->out_kind[1] != CFG_EDGE_BACK &&
+ nvbb_reachable_by(bf, bp->out[1], bt))
+ return TRUE;
+ return FALSE;
+}
+
+static struct nv_basic_block *
+nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df)
+{
+ int i;
+
+ if (!nvbb_dominated_by(df, b)) {
+ for (i = 0; i < df->num_in; ++i) {
+ if (df->in_kind[i] == CFG_EDGE_BACK)
+ continue;
+ if (nvbb_dominated_by(df->in[i], b))
+ return df;
+ }
+ }
+ for (i = 0; i < 2 && b->out[i]; ++i) {
+ if (b->out_kind[i] == CFG_EDGE_BACK)
+ continue;
+ if ((df = nvbb_find_dom_frontier(b, b->out[i])))
+ return df;
+ }
+ return NULL;
+}
+
+struct nv_basic_block *
+nvbb_dom_frontier(struct nv_basic_block *b)
+{
+ struct nv_basic_block *df;
+ int i;
+
+ for (i = 0; i < 2 && b->out[i]; ++i)
+ if ((df = nvbb_find_dom_frontier(b, b->out[i])))
+ return df;
+ return NULL;
+}
diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h
new file mode 100644
index 0000000000..d24375100d
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_pc.h
@@ -0,0 +1,461 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NV50_COMPILER_H__
+#define __NV50_COMPILER_H__
+
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+
+#define NV_OP_PHI 0
+#define NV_OP_EXTRACT 1
+#define NV_OP_COMBINE 2
+#define NV_OP_LDA 3
+#define NV_OP_STA 4
+#define NV_OP_MOV 5
+#define NV_OP_ADD 6
+#define NV_OP_SUB 7
+#define NV_OP_NEG 8
+#define NV_OP_MUL 9
+#define NV_OP_MAD 10
+#define NV_OP_CVT 11
+#define NV_OP_SAT 12
+#define NV_OP_NOT 13
+#define NV_OP_AND 14
+#define NV_OP_OR 15
+#define NV_OP_XOR 16
+#define NV_OP_SHL 17
+#define NV_OP_SHR 18
+#define NV_OP_RCP 19
+#define NV_OP_UNDEF 20
+#define NV_OP_RSQ 21
+#define NV_OP_LG2 22
+#define NV_OP_SIN 23
+#define NV_OP_COS 24
+#define NV_OP_EX2 25
+#define NV_OP_PRESIN 26
+#define NV_OP_PREEX2 27
+#define NV_OP_MIN 28
+#define NV_OP_MAX 29
+#define NV_OP_SET 30
+#define NV_OP_SAD 31
+#define NV_OP_KIL 32
+#define NV_OP_BRA 33
+#define NV_OP_CALL 34
+#define NV_OP_RET 35
+#define NV_OP_BREAK 36
+#define NV_OP_BREAKADDR 37
+#define NV_OP_JOINAT 38
+#define NV_OP_TEX 39
+#define NV_OP_TXB 40
+#define NV_OP_TXL 41
+#define NV_OP_TXF 42
+#define NV_OP_TXQ 43
+#define NV_OP_DFDX 44
+#define NV_OP_DFDY 45
+#define NV_OP_QUADOP 46
+#define NV_OP_LINTERP 47
+#define NV_OP_PINTERP 48
+#define NV_OP_ABS 49
+#define NV_OP_CEIL 50
+#define NV_OP_FLOOR 51
+#define NV_OP_TRUNC 52
+#define NV_OP_NOP 53
+#define NV_OP_SELECT 54
+#define NV_OP_EXPORT 55
+#define NV_OP_JOIN 56
+#define NV_OP_COUNT 57
+
+#define NV_FILE_GPR 0
+#define NV_FILE_OUT 1
+#define NV_FILE_ADDR 2
+#define NV_FILE_FLAGS 3
+#define NV_FILE_IMM 16
+#define NV_FILE_MEM_S 32
+#define NV_FILE_MEM_P 33
+#define NV_FILE_MEM_V 34
+#define NV_FILE_MEM_L 48
+#define NV_FILE_MEM_G(i) (64 + i)
+#define NV_FILE_MEM_C(i) (80 + i)
+
+#define NV_MOD_NEG 1
+#define NV_MOD_ABS 2
+#define NV_MOD_NOT 4
+#define NV_MOD_SAT 8
+
+#define NV_TYPE_U8 0x00
+#define NV_TYPE_S8 0x01
+#define NV_TYPE_U16 0x02
+#define NV_TYPE_S16 0x03
+#define NV_TYPE_U32 0x04
+#define NV_TYPE_S32 0x05
+#define NV_TYPE_P32 0x07
+#define NV_TYPE_F32 0x09
+#define NV_TYPE_F64 0x0b
+#define NV_TYPE_VEC(x, n) (NV_TYPE_##x | (n << 4))
+#define NV_TYPE_LO 0x00
+#define NV_TYPE_HI 0x80
+#define NV_TYPE_ANY 0xff
+
+#define NV_TYPE_ISINT(t) ((t) <= 5)
+#define NV_TYPE_ISFLT(t) ((t) & 0x08)
+
+#define NV_CC_FL 0x0
+#define NV_CC_LT 0x1
+#define NV_CC_EQ 0x2
+#define NV_CC_LE 0x3
+#define NV_CC_GT 0x4
+#define NV_CC_NE 0x5
+#define NV_CC_GE 0x6
+#define NV_CC_U 0x8
+#define NV_CC_TR 0xf
+
+#define NV_PC_MAX_INSTRUCTIONS 2048
+#define NV_PC_MAX_VALUES (NV_PC_MAX_INSTRUCTIONS * 4)
+
+static INLINE boolean
+nv_is_vector_op(uint opcode)
+{
+ return (opcode >= NV_OP_TEX) && (opcode <= NV_OP_TXQ);
+}
+
+static INLINE uint
+nv_type_order(ubyte type)
+{
+ switch (type & 0xf) {
+ case NV_TYPE_U8:
+ case NV_TYPE_S8:
+ return 0;
+ case NV_TYPE_U16:
+ case NV_TYPE_S16:
+ return 1;
+ case NV_TYPE_U32:
+ case NV_TYPE_F32:
+ case NV_TYPE_S32:
+ case NV_TYPE_P32:
+ return 2;
+ case NV_TYPE_F64:
+ return 3;
+ }
+ assert(0);
+}
+
+static INLINE uint
+nv_type_sizeof(ubyte type)
+{
+ if (type & 0xf0)
+ return (1 << nv_type_order(type)) * (type >> 4);
+ return 1 << nv_type_order(type);
+}
+
+static INLINE uint
+nv_type_sizeof_base(ubyte type)
+{
+ return 1 << nv_type_order(type);
+}
+
+struct nv_reg {
+ int id;
+ ubyte file;
+ ubyte type; /* type of generating instruction's result */
+ union {
+ float f32;
+ double f64;
+ int32_t s32;
+ uint32_t u32;
+ } imm;
+};
+
+struct nv_range {
+ struct nv_range *next;
+ int bgn;
+ int end;
+};
+
+struct nv_value {
+ struct nv_reg reg;
+ struct nv_instruction *insn;
+ struct nv_value *join;
+ int n;
+ struct nv_range *livei;
+ int refc;
+
+ struct nv_value *next;
+ struct nv_value *prev;
+};
+
+struct nv_ref {
+ struct nv_value *value;
+ struct nv_instruction *insn;
+ ubyte mod;
+ ubyte typecast;
+ ubyte flags; /* not used yet */
+};
+
+struct nv_basic_block;
+
+struct nv_instruction {
+ struct nv_instruction *next;
+ struct nv_instruction *prev;
+ uint opcode;
+ int serial;
+ struct nv_value *def[4];
+ struct nv_value *flags_def;
+ struct nv_ref *src[5];
+ struct nv_ref *flags_src;
+ struct nv_basic_block *bb;
+ struct nv_basic_block *target; /* target block of control flow insn */
+ ubyte cc;
+ ubyte set_cond : 4;
+ ubyte fixed : 1; /* don't optimize away */
+ ubyte is_terminator : 1;
+ ubyte is_join : 1;
+ ubyte is_long : 1; /* for emission */
+ /* */
+ ubyte saturate : 1;
+ ubyte centroid : 1;
+ ubyte flat : 1;
+ ubyte padding : 4;
+ ubyte tex_live : 1;
+ /* */
+ ubyte tex_t; /* TIC binding */
+ ubyte tex_s; /* TSC binding */
+ ubyte tex_argc : 3;
+ ubyte tex_cube : 1;
+ ubyte tex_mask : 4;
+ /* */
+ ubyte quadop;
+};
+
+#define CFG_EDGE_FORWARD 0
+#define CFG_EDGE_BACK 1
+#define CFG_EDGE_LOOP_ENTER 2
+#define CFG_EDGE_LOOP_LEAVE 4
+
+struct nv_basic_block {
+ struct nv_instruction *entry; /* first non-phi instruction */
+ struct nv_instruction *exit;
+ struct nv_instruction *phi; /* very first instruction */
+ int num_instructions;
+
+ struct nv_basic_block *out[2]; /* no indirect branches -> 2 */
+ struct nv_basic_block *in[8]; /* hope that suffices */
+ uint num_in;
+ ubyte out_kind[2];
+ ubyte in_kind[8];
+
+ int id;
+ struct nv_basic_block *last_visitor;
+ uint priv;
+ uint pass_seq;
+
+ uint32_t bin_pos; /* position, size in emitted code */
+ uint32_t bin_size;
+
+ uint32_t live_set[NV_PC_MAX_VALUES / 32];
+};
+
+#define NV_FIXUP_CFLOW_RELOC 0
+#define NV_FIXUP_PARAM_RELOC 1
+
+struct nv_fixup {
+ ubyte type;
+ ubyte shift;
+ uint32_t mask;
+ uint32_t data;
+ uint32_t offset;
+};
+
+static INLINE void
+nv_fixup_apply(uint32_t *bin, struct nv_fixup *fixup, uint32_t data)
+{
+ uint32_t val;
+
+ val = bin[fixup->offset / 4] & ~fixup->mask;
+ data = (fixup->shift < 0) ? (data >> fixup->shift) : (data << fixup->shift);
+ val |= (fixup->data + data) & fixup->mask;
+ bin[fixup->offset / 4] = val;
+}
+
+struct nv_pc {
+ struct nv50_translation_info *ti;
+
+ struct nv_basic_block *root;
+ struct nv_basic_block *current_block;
+ struct nv_basic_block *parent_block;
+
+ int loop_nesting_bound;
+ uint pass_seq;
+
+ struct nv_value values[NV_PC_MAX_VALUES];
+ struct nv_instruction instructions[NV_PC_MAX_INSTRUCTIONS];
+ struct nv_ref **refs;
+ struct nv_basic_block **bb_list;
+ int num_values;
+ int num_instructions;
+ int num_refs;
+ int num_blocks;
+
+ int max_reg[4];
+
+ uint32_t *immd_buf; /* populated on emit */
+ unsigned immd_count;
+
+ uint32_t *emit;
+ unsigned bin_size;
+ unsigned bin_pos;
+
+ struct nv_fixup *fixups;
+ int num_fixups;
+};
+
+void nvbb_insert_tail(struct nv_basic_block *, struct nv_instruction *);
+
+static INLINE struct nv_instruction *
+new_instruction(struct nv_pc *pc, uint opcode)
+{
+ struct nv_instruction *insn;
+
+ insn = &pc->instructions[pc->num_instructions++];
+ assert(pc->num_instructions < NV_PC_MAX_INSTRUCTIONS);
+
+ insn->cc = NV_CC_TR;
+ insn->opcode = opcode;
+
+ nvbb_insert_tail(pc->current_block, insn);
+ return insn;
+}
+
+static INLINE struct nv_value *
+new_value(struct nv_pc *pc, ubyte file, ubyte type)
+{
+ struct nv_value *value = &pc->values[pc->num_values];
+
+ assert(pc->num_values < NV_PC_MAX_VALUES - 1);
+
+ value->n = pc->num_values++;
+ value->join = value;
+ value->reg.id = -1;
+ value->reg.file = file;
+ value->reg.type = type;
+ return value;
+}
+
+static INLINE struct nv_value *
+new_value_like(struct nv_pc *pc, struct nv_value *like)
+{
+ return new_value(pc, like->reg.file, like->reg.type);
+}
+
+static INLINE struct nv_ref *
+new_ref(struct nv_pc *pc, struct nv_value *val)
+{
+ int i;
+ struct nv_ref *ref;
+
+ if ((pc->num_refs % 64) == 0) {
+ const unsigned old_size = pc->num_refs * sizeof(struct nv_ref *);
+ const unsigned new_size = (pc->num_refs + 64) * sizeof(struct nv_ref *);
+
+ pc->refs = REALLOC(pc->refs, old_size, new_size);
+
+ ref = CALLOC(64, sizeof(struct nv_ref));
+ for (i = 0; i < 64; ++i)
+ pc->refs[pc->num_refs + i] = &ref[i];
+ }
+
+ ref = pc->refs[pc->num_refs++];
+ ref->value = val;
+ ref->typecast = val->reg.type;
+
+ ++val->refc;
+ return ref;
+}
+
+static INLINE struct nv_basic_block *
+new_basic_block(struct nv_pc *pc)
+{
+ struct nv_basic_block *bb = CALLOC_STRUCT(nv_basic_block);
+
+ bb->id = pc->num_blocks++;
+ return bb;
+}
+
+static INLINE void
+nv_reference(struct nv_pc *pc, struct nv_ref **d, struct nv_value *s)
+{
+ if (*d)
+ --(*d)->value->refc;
+
+ if (s) {
+ if (!*d)
+ *d = new_ref(pc, s);
+ else {
+ (*d)->value = s;
+ ++(s->refc);
+ }
+ } else {
+ *d = NULL;
+ }
+}
+
+/* nv50_emit.c */
+void nv50_emit_instruction(struct nv_pc *, struct nv_instruction *);
+
+/* nv50_print.c */
+const char *nv_opcode_name(uint opcode);
+void nv_print_instruction(struct nv_instruction *);
+
+/* nv50_pc.c */
+
+void nv_print_program(struct nv_basic_block *b);
+
+boolean nv_op_commutative(uint opcode);
+int nv50_indirect_opnd(struct nv_instruction *);
+boolean nv50_nvi_can_use_imm(struct nv_instruction *, int s);
+boolean nv50_nvi_can_predicate(struct nv_instruction *);
+boolean nv50_nvi_can_load(struct nv_instruction *, int s, struct nv_value *);
+ubyte nv50_supported_src_mods(uint opcode, int s);
+int nv_nvi_refcount(struct nv_instruction *);
+void nv_nvi_delete(struct nv_instruction *);
+void nv_nvi_permute(struct nv_instruction *, struct nv_instruction *);
+void nvbb_attach_block(struct nv_basic_block *parent,
+ struct nv_basic_block *, ubyte edge_kind);
+boolean nvbb_dominated_by(struct nv_basic_block *, struct nv_basic_block *);
+boolean nvbb_reachable_by(struct nv_basic_block *, struct nv_basic_block *,
+ struct nv_basic_block *);
+struct nv_basic_block *nvbb_dom_frontier(struct nv_basic_block *);
+int nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val,
+ struct nv_value *new_val);
+
+typedef void (*nv_pc_pass_func)(void *priv, struct nv_basic_block *b);
+
+void nv_pc_pass_in_order(struct nv_basic_block *, nv_pc_pass_func, void *);
+
+int nv_pc_exec_pass0(struct nv_pc *pc);
+int nv_pc_exec_pass1(struct nv_pc *pc);
+int nv_pc_exec_pass2(struct nv_pc *pc);
+
+int nv50_tgsi_to_nc(struct nv_pc *, struct nv50_translation_info *);
+
+#endif // NV50_COMPILER_H
diff --git a/src/gallium/drivers/nv50/nv50_pc_emit.c b/src/gallium/drivers/nv50/nv50_pc_emit.c
new file mode 100644
index 0000000000..bc151c3a80
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_pc_emit.c
@@ -0,0 +1,1180 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nv50_context.h"
+#include "nv50_pc.h"
+
+// Definitions
+
+#define FLAGS_CC_SHIFT 7
+#define FLAGS_ID_SHIFT 12
+#define FLAGS_WR_ID_SHIFT 4
+#define FLAGS_CC_MASK (0x1f << FLAGS_CC_SHIFT)
+#define FLAGS_ID_MASK (0x03 << FLAGS_ID_SHIFT)
+#define FLAGS_WR_EN (1 << 6)
+#define FLAGS_WR_ID_MASK (0x3 << FLAGS_WR_ID_SHIFT)
+
+const ubyte nv50_inst_min_size_tab[NV_OP_COUNT] =
+{
+ 0, 0, 0, 8, 8, 4, 4, 4, 8, 4, 4, 8, 8, 8, 8, 8, /* 15 */
+ 8, 8, 8, 4, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, /* 31 */
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, /* 47 */
+ 4, 8, 8, 8, 8, 8, 0, 0, 8
+};
+
+/* XXX: silence, you ! */
+unsigned
+nv50_inst_min_size(struct nv_instruction *i);
+
+unsigned
+nv50_inst_min_size(struct nv_instruction *i)
+{
+ int n;
+
+ if (nv50_inst_min_size_tab[i->opcode] > 4)
+ return 8;
+
+ if (i->def[0] && i->def[0]->reg.file != NV_FILE_GPR)
+ return 8;
+ if (i->def[0]->join->reg.id > 63)
+ return 8;
+
+ for (n = 0; n < 3; ++n) {
+ if (!i->src[n])
+ break;
+ if (i->src[n]->value->reg.file != NV_FILE_GPR &&
+ i->src[n]->value->reg.file != NV_FILE_MEM_V)
+ return 8;
+ if (i->src[n]->value->reg.id > 63)
+ return 8;
+ }
+
+ if (i->flags_def || i->flags_src || i->src[4])
+ return 8;
+
+ if (i->is_join)
+ return 8;
+
+ if (i->src[2]) {
+ if (i->saturate || i->src[2]->mod)
+ return 8;
+ if (i->src[0]->mod ^ i->src[1]->mod)
+ return 8;
+ if ((i->src[0]->mod | i->src[1]->mod) & NV_MOD_ABS)
+ return 8;
+ if (i->def[0]->join->reg.id < 0 ||
+ i->def[0]->join->reg.id != i->src[2]->value->join->reg.id)
+ return 8;
+ }
+
+ return nv50_inst_min_size_tab[i->opcode];
+}
+
+static INLINE ubyte
+STYPE(struct nv_instruction *nvi, int s)
+{
+ return nvi->src[s]->typecast;
+}
+
+static INLINE ubyte
+DTYPE(struct nv_instruction *nvi, int d)
+{
+ return nvi->def[d]->reg.type;
+}
+
+static INLINE struct nv_reg *
+SREG(struct nv_ref *ref)
+{
+ return &ref->value->join->reg;
+}
+
+static INLINE struct nv_reg *
+DREG(struct nv_value *val)
+{
+ return &val->join->reg;
+}
+
+static INLINE ubyte
+SFILE(struct nv_instruction *nvi, int s)
+{
+ return nvi->src[s]->value->reg.file;
+}
+
+static INLINE ubyte
+DFILE(struct nv_instruction *nvi, int d)
+{
+ return nvi->def[0]->reg.file;
+}
+
+static INLINE void
+SID(struct nv_pc *pc, struct nv_ref *ref, int pos)
+{
+ pc->emit[pos / 32] |= SREG(ref)->id << (pos % 32);
+}
+
+static INLINE void
+DID(struct nv_pc *pc, struct nv_value *val, int pos)
+{
+ pc->emit[pos / 32] |= DREG(val)->id << (pos % 32);
+}
+
+static INLINE uint32_t
+get_immd_u32(struct nv_ref *ref)
+{
+ assert(ref->value->reg.file == NV_FILE_IMM);
+ return ref->value->reg.imm.u32;
+}
+
+static INLINE void
+set_immd_u32(struct nv_pc *pc, uint32_t u32)
+{
+ pc->emit[1] |= 3;
+ pc->emit[0] |= (u32 & 0x3f) << 16;
+ pc->emit[1] |= (u32 >> 6) << 2;
+}
+
+static INLINE void
+set_immd(struct nv_pc *pc, struct nv_ref *ref)
+{
+ assert(ref->value->reg.file == NV_FILE_IMM);
+ set_immd_u32(pc, get_immd_u32(ref));
+}
+
+static void
+new_fixup(struct nv_pc *pc, unsigned type, uint32_t data, uint32_t m, int s)
+{
+ const unsigned size = sizeof(struct nv_fixup);
+ const unsigned n = pc->num_fixups;
+ return;
+
+ if (!(n % 8))
+ pc->fixups = REALLOC(pc->fixups, n * size, (n + 8) * size);
+
+ pc->fixups[n].offset = pc->bin_pos + (s / 32);
+ pc->fixups[n].type = type;
+ pc->fixups[n].data = data;
+ pc->fixups[n].mask = m << (s % 32);
+ pc->fixups[n].shift = s % 32;
+
+ ++pc->num_fixups;
+
+ assert(((data << (s % 32)) & pc->fixups[n].mask) == (data << (s % 32)));
+}
+
+static void
+nv_pc_alloc_immd(struct nv_pc *pc, struct nv_ref *ref)
+{
+ uint32_t i, val = get_immd_u32(ref);
+
+ for (i = 0; i < pc->immd_count; ++i)
+ if (pc->immd_buf[i] == val)
+ break;
+
+ if (i == pc->immd_count) {
+ if (!(pc->immd_count % 8))
+ pc->immd_buf = REALLOC(pc->immd_buf,
+ pc->immd_count * 4, (pc->immd_count + 8) * 4);
+ pc->immd_buf[pc->immd_count++] = val;
+ }
+
+ SREG(ref)->id = i;
+}
+
+static INLINE void
+set_pred(struct nv_pc *pc, struct nv_instruction *i)
+{
+ assert(!(pc->emit[1] & 0x00003f80));
+
+ pc->emit[1] |= i->cc << 7;
+ if (i->flags_src)
+ pc->emit[1] |= SREG(i->flags_src)->id << 12;
+}
+
+static INLINE void
+set_pred_wr(struct nv_pc *pc, struct nv_instruction *i)
+{
+ assert(!(pc->emit[1] & 0x00000070));
+
+ if (i->flags_def)
+ pc->emit[1] |= (DREG(i->flags_def)->id << 4) | 0x40;
+}
+
+static INLINE void
+set_a16_bits(struct nv_pc *pc, uint id)
+{
+ ++id; /* $a0 is always 0 */
+ pc->emit[0] |= (id & 3) << 26;
+ pc->emit[1] |= id & 4;
+}
+
+static INLINE void
+set_addr(struct nv_pc *pc, struct nv_instruction *i)
+{
+ if (i->src[4])
+ set_a16_bits(pc, SREG(i->src[4])->id);
+}
+
+static void
+set_dst(struct nv_pc *pc, struct nv_value *value)
+{
+ struct nv_reg *reg = &value->join->reg;
+
+ if (reg->id < 0) {
+ debug_printf("WARNING: unused dst, hope we can bucket it !\n");
+ pc->emit[0] |= 127 << 2;
+ pc->emit[1] |= 0x8;
+ return;
+ }
+
+ if (reg->file == NV_FILE_OUT)
+ pc->emit[1] |= 0x8;
+ else
+ if (reg->file == NV_FILE_ADDR)
+ assert(0);
+
+ pc->emit[0] |= reg->id << 2;
+}
+
+static void
+set_src_0(struct nv_pc *pc, struct nv_ref *ref)
+{
+ struct nv_reg *reg = SREG(ref);
+
+ if (reg->file == NV_FILE_MEM_S)
+ pc->emit[1] |= 0x00200000;
+ else
+ if (reg->file == NV_FILE_MEM_P)
+ pc->emit[0] |= 0x01800000;
+ else
+ if (reg->file != NV_FILE_GPR)
+ NOUVEAU_ERR("invalid src0 register file: %d\n", reg->file);
+
+ assert(reg->id < 128);
+ pc->emit[0] |= reg->id << 9;
+}
+
+static void
+set_src_1(struct nv_pc *pc, struct nv_ref *ref)
+{
+ struct nv_reg *reg = SREG(ref);
+
+ if (reg->file >= NV_FILE_MEM_C(0) &&
+ reg->file <= NV_FILE_MEM_C(15)) {
+ assert(!(pc->emit[1] & 0x01800000));
+
+ pc->emit[0] |= 0x00800000;
+ pc->emit[1] |= (reg->file - NV_FILE_MEM_C(0)) << 22;
+ } else
+ if (reg->file != NV_FILE_GPR)
+ NOUVEAU_ERR("invalid src1 register file: %d\n", reg->file);
+
+ assert(reg->id < 128);
+ pc->emit[0] |= reg->id << 16;
+}
+
+static void
+set_src_2(struct nv_pc *pc, struct nv_ref *ref)
+{
+ struct nv_reg *reg = SREG(ref);
+
+ if (reg->file >= NV_FILE_MEM_C(0) &&
+ reg->file <= NV_FILE_MEM_C(15)) {
+ assert(!(pc->emit[1] & 0x01800000));
+
+ pc->emit[0] |= 0x01000000;
+ pc->emit[1] |= (reg->file - NV_FILE_MEM_C(0)) << 22;
+ } else
+ if (reg->file != NV_FILE_GPR)
+ NOUVEAU_ERR("invalid src2 register file: %d\n", reg->file);
+
+ assert(reg->id < 128);
+ pc->emit[1] |= reg->id << 14;
+}
+
+/* the default form:
+ * - long instruction
+ * - 1 to 3 sources in slots 0, 1, 2
+ * - address & flags
+ */
+static void
+emit_form_MAD(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] |= 1;
+
+ set_pred(pc, i);
+ set_pred_wr(pc, i);
+
+ if (i->def[0])
+ set_dst(pc, i->def[0]);
+ else {
+ pc->emit[0] |= 0x01fc;
+ pc->emit[1] |= 0x0008;
+ }
+
+ if (i->src[0])
+ set_src_0(pc, i->src[0]);
+
+ if (i->src[1])
+ set_src_1(pc, i->src[1]);
+
+ if (i->src[2])
+ set_src_2(pc, i->src[2]);
+
+ set_addr(pc, i);
+}
+
+/* like default form, but 2nd source in slot 2, no 3rd source */
+static void
+emit_form_ADD(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] |= 1;
+
+ if (i->def[0])
+ set_dst(pc, i->def[0]);
+ else {
+ pc->emit[0] |= 0x01fc;
+ pc->emit[1] |= 0x0008;
+ }
+
+ set_pred(pc, i);
+ set_pred_wr(pc, i);
+
+ if (i->src[0])
+ set_src_0(pc, i->src[0]);
+
+ if (i->src[1])
+ set_src_2(pc, i->src[1]);
+
+ set_addr(pc, i);
+}
+
+/* short mul */
+static void
+emit_form_MUL(struct nv_pc *pc, struct nv_instruction *i)
+{
+ assert(!i->is_long && !(pc->emit[0] & 1));
+
+ assert(i->def[0]);
+ set_dst(pc, i->def[0]);
+
+ if (i->src[0])
+ set_src_0(pc, i->src[0]);
+
+ if (i->src[1])
+ set_src_1(pc, i->src[1]);
+}
+
+/* default immediate form
+ * - 1 to 3 sources where last is immediate
+ * - no address or predicate possible
+ */
+static void
+emit_form_IMM(struct nv_pc *pc, struct nv_instruction *i, ubyte mod_mask)
+{
+ pc->emit[0] |= 1;
+
+ assert(i->def[0]);
+ assert(i->src[0]);
+ set_dst(pc, i->def[0]);
+
+ assert(!i->src[4] && !i->flags_src && !i->flags_def);
+
+ if (i->src[2]) {
+ set_immd(pc, i->src[2]);
+ set_src_0(pc, i->src[1]);
+ set_src_1(pc, i->src[0]);
+ } else
+ if (i->src[1]) {
+ set_immd(pc, i->src[1]);
+ set_src_0(pc, i->src[0]);
+ } else
+ set_immd(pc, i->src[0]);
+
+ assert(!mod_mask);
+}
+
+static void
+set_ld_st_size(struct nv_pc *pc, ubyte type)
+{
+ switch (type) {
+ case NV_TYPE_F64:
+ pc->emit[1] |= 0x8000;
+ break;
+ case NV_TYPE_F32:
+ case NV_TYPE_S32:
+ case NV_TYPE_U32:
+ pc->emit[1] |= 0xc000;
+ break;
+ case NV_TYPE_S16:
+ pc->emit[1] |= 0x6000;
+ break;
+ case NV_TYPE_U16:
+ pc->emit[1] |= 0x4000;
+ break;
+ case NV_TYPE_S8:
+ pc->emit[1] |= 0x2000;
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+emit_ld(struct nv_pc *pc, struct nv_instruction *i)
+{
+ ubyte sf = SFILE(i, 0);
+
+ if (sf == NV_FILE_IMM) {
+ sf = NV_FILE_MEM_C(0);
+ nv_pc_alloc_immd(pc, i->src[0]);
+
+ new_fixup(pc, NV_FIXUP_PARAM_RELOC, SREG(i->src[0])->id, 0xffff, 9);
+ }
+
+ if (sf == NV_FILE_MEM_S ||
+ sf == NV_FILE_MEM_P) {
+ pc->emit[0] = 0x10000001;
+ pc->emit[1] = 0x04200000 | (0x3c << 12);
+ if (sf == NV_FILE_MEM_P)
+ pc->emit[0] |= 0x01800000;
+ } else
+ if (sf >= NV_FILE_MEM_C(0) &&
+ sf <= NV_FILE_MEM_C(15)) {
+ pc->emit[0] = 0x10000001;
+ pc->emit[1] = 0x24000000;
+ pc->emit[1] |= (sf - NV_FILE_MEM_C(0)) << 22;
+ } else
+ if (sf >= NV_FILE_MEM_G(0) &&
+ sf <= NV_FILE_MEM_G(15)) {
+ pc->emit[0] = 0xd0000001 | ((sf - NV_FILE_MEM_G(0)) << 16);
+ pc->emit[1] = 0xa0000000;
+
+ assert(i->src[4] && SREG(i->src[4])->file == NV_FILE_GPR);
+ SID(pc, i->src[4], 9);
+ } else
+ if (sf == NV_FILE_MEM_L) {
+ pc->emit[0] = 0xd0000001;
+ pc->emit[1] = 0x40000000;
+ } else {
+ NOUVEAU_ERR("invalid ld source file\n");
+ abort();
+ }
+
+ set_ld_st_size(pc, STYPE(i, 0));
+
+ set_dst(pc, i->def[0]);
+ set_pred_wr(pc, i);
+
+ set_pred(pc, i);
+
+ if (sf < NV_FILE_MEM_G(0) ||
+ sf > NV_FILE_MEM_G(15)) {
+ SID(pc, i->src[0], 9);
+ set_addr(pc, i);
+ }
+}
+
+static void
+emit_st(struct nv_pc *pc, struct nv_instruction *i)
+{
+
+}
+
+static int
+verify_mov(struct nv_instruction *i)
+{
+ ubyte sf = SFILE(i, 0);
+ ubyte df = DFILE(i, 0);
+
+ if (df == NV_FILE_GPR)
+ return 0;
+
+ if (df != NV_FILE_OUT &&
+ df != NV_FILE_FLAGS &&
+ df != NV_FILE_ADDR)
+ return 1;
+
+ if (sf == NV_FILE_FLAGS)
+ return 2;
+ if (sf == NV_FILE_ADDR)
+ return 3;
+ if (sf == NV_FILE_IMM && df != NV_FILE_OUT)
+ return 4;
+
+ return 0;
+}
+
+static void
+emit_mov(struct nv_pc *pc, struct nv_instruction *i)
+{
+ assert(!verify_mov(i));
+
+ if (SFILE(i, 0) >= NV_FILE_MEM_S)
+ emit_ld(pc, i);
+ else
+ if (SFILE(i, 0) == NV_FILE_FLAGS) {
+ pc->emit[0] = 0x00000001 | (DREG(i->def[0])->id << 2);
+ pc->emit[1] = 0x20000780 | (SREG(i->src[0])->id << 12);
+ } else
+ if (SFILE(i, 0) == NV_FILE_ADDR) {
+ pc->emit[0] = 0x00000001 | (DREG(i->def[0])->id << 2);
+ pc->emit[1] = 0x40000780;
+ set_a16_bits(pc, SREG(i->src[0])->id);
+ } else
+ if (DFILE(i, 0) == NV_FILE_FLAGS) {
+ pc->emit[0] = 0x000001fd;
+ pc->emit[1] = 0xa0000788 | (1 << 6);
+ pc->emit[0] |= SREG(i->src[0])->id << 9;
+ pc->emit[1] |= DREG(i->def[0])->id << 4;
+ } else
+ if (SFILE(i, 0) == NV_FILE_IMM) {
+ if (i->opcode == NV_OP_LDA) {
+ emit_ld(pc, i);
+ } else {
+ pc->emit[0] = 0x10008001;
+ pc->emit[1] = 0x00000003;
+
+ emit_form_IMM(pc, i, 0);
+ }
+ } else {
+ pc->emit[0] = 0x10000000;
+ pc->emit[0] |= DREG(i->def[0])->id << 2;
+ pc->emit[0] |= SREG(i->src[0])->id << 9;
+
+ if (!i->is_long) {
+ pc->emit[0] |= 0x8000;
+ } else {
+ pc->emit[0] |= 0x00000001;
+ pc->emit[1] = 0x0403c000;
+
+ set_pred(pc, i);
+ }
+ }
+
+ if (DFILE(i, 0) == NV_FILE_OUT)
+ pc->emit[1] |= 0x8;
+}
+
+static void
+emit_interp(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x80000000;
+
+ assert(DFILE(i, 0) == NV_FILE_GPR);
+ assert(SFILE(i, 0) == NV_FILE_MEM_V);
+
+ DID(pc, i->def[0], 2);
+ SID(pc, i->src[0], 16);
+
+ if (i->flat)
+ pc->emit[0] |= 1 << 8;
+ else
+ if (i->opcode == NV_OP_PINTERP) {
+ pc->emit[0] |= 1 << 25;
+ pc->emit[0] |= SREG(i->src[1])->id << 9;
+ }
+
+ if (i->centroid)
+ pc->emit[0] |= 1 << 24;
+
+ assert(i->is_long || !i->flags_src);
+
+ if (i->is_long) {
+ set_pred(pc, i);
+
+ pc->emit[1] |=
+ (pc->emit[0] & (3 << 24)) >> (24 - 16) |
+ (pc->emit[0] & (1 << 8)) >> (18 - 8);
+
+ pc->emit[0] |= 1;
+ pc->emit[0] &= ~0x03000100;
+ }
+}
+
+static void
+emit_minmax(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x30000000;
+ pc->emit[1] = (i->opcode == NV_OP_MIN) ? (2 << 28) : 0;
+
+ switch (DTYPE(i, 0)) {
+ case NV_TYPE_F32:
+ pc->emit[0] |= 0x80000000;
+ pc->emit[1] |= 0x80000000;
+ break;
+ case NV_TYPE_S32:
+ pc->emit[1] |= 0x8c000000;
+ break;
+ case NV_TYPE_U32:
+ pc->emit[1] |= 0x84000000;
+ break;
+ }
+
+ emit_form_MAD(pc, i);
+
+ if (i->src[0]->mod & NV_MOD_ABS) pc->emit[1] |= 0x00100000;
+ if (i->src[1]->mod & NV_MOD_ABS) pc->emit[1] |= 0x00080000;
+}
+
+static void
+emit_add_f32(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0xb0000000;
+
+ if (SFILE(i, 1) == NV_FILE_IMM) {
+ emit_form_IMM(pc, i, 0);
+
+ if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 0x8000;
+ if (i->src[1]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 22;
+ } else
+ if (i->is_long) {
+ emit_form_ADD(pc, i);
+
+ if (i->src[0]->mod & NV_MOD_NEG) pc->emit[1] |= 1 << 26;
+ if (i->src[1]->mod & NV_MOD_NEG) pc->emit[1] |= 1 << 27;
+ } else {
+ emit_form_MUL(pc, i);
+
+ if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 0x8000;
+ if (i->src[1]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 22;
+ }
+}
+
+static void
+emit_add_b32(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0x20008000;
+
+ if (SFILE(i, 1) == NV_FILE_IMM) {
+ emit_form_IMM(pc, i, 0);
+ } else
+ if (i->is_long) {
+ pc->emit[0] = 0x20000000;
+ pc->emit[1] = 0x04000000;
+ emit_form_ADD(pc, i);
+ } else {
+ emit_form_MUL(pc, i);
+ }
+
+ if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 28;
+ if (i->src[1]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 22;
+}
+
+static void
+emit_add_a16(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0xd0000001 | (get_immd_u32(i->src[0]) << 9);
+ pc->emit[1] = 0x20000000;
+
+ pc->emit[0] |= (DREG(i->def[0])->id + 1) << 2;
+
+ set_pred(pc, i);
+
+ if (i->src[1])
+ set_a16_bits(pc, SREG(i->src[1])->id);
+}
+
+static void
+emit_flow(struct nv_pc *pc, struct nv_instruction *i, ubyte flow_op)
+{
+ pc->emit[0] = 0x00000003 | (flow_op << 28);
+ pc->emit[1] = 0x00000000;
+
+ set_pred(pc, i);
+
+ if (i->target && (i->opcode != NV_OP_BREAK)) {
+ new_fixup(pc, NV_FIXUP_CFLOW_RELOC, i->target->bin_pos, 0x7ff800, 11);
+ pc->emit[0] |= (i->target->bin_pos / 4) << 11;
+ }
+}
+
+static INLINE void
+emit_add(struct nv_pc *pc, struct nv_instruction *i)
+{
+ if (DFILE(i, 0) == NV_FILE_ADDR)
+ emit_add_a16(pc, i);
+ else {
+ switch (DTYPE(i, 0)) {
+ case NV_TYPE_F32:
+ emit_add_f32(pc, i);
+ break;
+ case NV_TYPE_U32:
+ case NV_TYPE_S32:
+ emit_add_b32(pc, i);
+ break;
+ }
+ }
+}
+
+static void
+emit_bitop2(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0xd0000000;
+
+ if (SFILE(i, 0) == NV_FILE_IMM) {
+ emit_form_IMM(pc, i, 0);
+
+ if (i->opcode == NV_OP_OR)
+ pc->emit[0] |= 0x0100;
+ else
+ if (i->opcode == NV_OP_XOR)
+ pc->emit[0] |= 0x8000;
+ } else {
+ emit_form_MAD(pc, i);
+
+ pc->emit[1] |= 0x04000000;
+
+ if (i->opcode == NV_OP_OR)
+ pc->emit[1] |= 0x4000;
+ else
+ if (i->opcode == NV_OP_XOR)
+ pc->emit[1] |= 0x8000;
+ }
+}
+
+static void
+emit_arl(struct nv_pc *pc, struct nv_instruction *i)
+{
+ assert(SFILE(i, 0) == NV_FILE_GPR);
+ assert(SFILE(i, 1) == NV_FILE_IMM);
+
+ assert(!i->flags_def);
+
+ pc->emit[0] = 0x00000001;
+ pc->emit[1] = 0xc0000000;
+
+ set_dst(pc, i->def[0]);
+ set_pred(pc, i);
+ set_src_0(pc, i->src[0]);
+ pc->emit[0] |= (get_immd_u32(i->src[1]) & 0x3f) << 16;
+}
+
+static void
+emit_shift(struct nv_pc *pc, struct nv_instruction *i)
+{
+ if (DFILE(i, 0) == NV_FILE_ADDR) {
+ emit_arl(pc, i);
+ return;
+ }
+
+ pc->emit[0] = 0x30000001;
+ pc->emit[1] = 0xc4000000;
+
+ if (i->opcode == NV_OP_SHR)
+ pc->emit[1] |= 1 << 29;
+
+ if (SFILE(i, 1) == NV_FILE_IMM) {
+ pc->emit[1] |= 1 << 20;
+ pc->emit[0] |= (get_immd_u32(i->src[1]) & 0x7f) << 16;
+
+ set_pred(pc, i);
+ } else
+ emit_form_MAD(pc, i);
+
+ if (STYPE(i, 0) == NV_TYPE_S32)
+ pc->emit[1] |= 1 << 27;
+}
+
+static void
+emit_flop(struct nv_pc *pc, struct nv_instruction *i)
+{
+ struct nv_ref *src0 = i->src[0];
+
+ pc->emit[0] = 0x90000000;
+
+ assert(SREG(src0)->type == NV_TYPE_F32);
+ assert(SREG(src0)->file == NV_FILE_GPR);
+
+ if (!i->is_long) {
+ emit_form_MUL(pc, i);
+ assert(i->opcode == NV_OP_RCP && !src0->mod);
+ return;
+ }
+
+ pc->emit[1] = (i->opcode - NV_OP_RCP) << 29;
+
+ emit_form_MAD(pc, i);
+
+ if (src0->mod & NV_MOD_NEG) pc->emit[1] |= 0x04000000;
+ if (src0->mod & NV_MOD_ABS) pc->emit[1] |= 0x00100000;
+}
+
+static void
+emit_mad_f32(struct nv_pc *pc, struct nv_instruction *i)
+{
+ const boolean neg_mul = (i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG;
+ const boolean neg_add = (i->src[2]->mod & NV_MOD_NEG);
+
+ pc->emit[0] = 0xe0000000;
+
+ if (!i->is_long) {
+ emit_form_MUL(pc, i);
+ assert(!neg_mul && !neg_add);
+ return;
+ }
+
+ emit_form_MAD(pc, i);
+
+ if (neg_mul) pc->emit[1] |= 0x04000000;
+ if (neg_add) pc->emit[1] |= 0x08000000;
+
+ if (i->saturate)
+ pc->emit[1] |= 0x20000000;
+}
+
+static INLINE void
+emit_mad(struct nv_pc *pc, struct nv_instruction *i)
+{
+ emit_mad_f32(pc, i);
+}
+
+static void
+emit_mul_f32(struct nv_pc *pc, struct nv_instruction *i)
+{
+ boolean neg = (i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG;
+
+ pc->emit[0] = 0xc0000000;
+
+ if (SFILE(i, 1) == NV_FILE_IMM) {
+ emit_form_IMM(pc, i, 0);
+
+ if (neg)
+ pc->emit[0] |= 0x8000;
+ } else
+ if (i->is_long) {
+ emit_form_MAD(pc, i);
+
+ if (neg)
+ pc->emit[1] |= 0x08 << 24;
+ } else {
+ emit_form_MUL(pc, i);
+
+ if (neg)
+ pc->emit[0] |= 0x8000;
+ }
+}
+
+static void
+emit_set(struct nv_pc *pc, struct nv_instruction *nvi)
+{
+ assert(nvi->is_long);
+
+ pc->emit[0] = 0x30000000;
+ pc->emit[1] = 0x60000000;
+
+ pc->emit[1] |= nvi->set_cond << 14;
+
+ switch (STYPE(nvi, 0)) {
+ case NV_TYPE_U32: pc->emit[1] |= 0x04000000; break;
+ case NV_TYPE_S32: pc->emit[1] |= 0x0c000000; break;
+ case NV_TYPE_F32: pc->emit[0] |= 0x80000000; break;
+ default:
+ assert(0);
+ break;
+ }
+
+ emit_form_MAD(pc, nvi);
+}
+
+#define CVT_RN (0x00 << 16)
+#define CVT_FLOOR (0x02 << 16)
+#define CVT_CEIL (0x04 << 16)
+#define CVT_TRUNC (0x06 << 16)
+#define CVT_SAT (0x08 << 16)
+#define CVT_ABS (0x10 << 16)
+
+#define CVT_X32_X32 0x04004000
+#define CVT_X32_S32 0x04014000
+#define CVT_F32_F32 ((0xc0 << 24) | CVT_X32_X32)
+#define CVT_S32_F32 ((0x88 << 24) | CVT_X32_X32)
+#define CVT_U32_F32 ((0x80 << 24) | CVT_X32_X32)
+#define CVT_F32_S32 ((0x40 << 24) | CVT_X32_S32)
+#define CVT_F32_U32 ((0x40 << 24) | CVT_X32_X32)
+#define CVT_S32_S32 ((0x08 << 24) | CVT_X32_S32)
+#define CVT_S32_U32 ((0x08 << 24) | CVT_X32_X32)
+#define CVT_U32_S32 ((0x00 << 24) | CVT_X32_S32)
+#define CVT_U32_U32 ((0x00 << 24) | CVT_X32_X32)
+
+#define CVT_NEG 0x20000000
+#define CVT_RI 0x08000000
+
+static void
+emit_cvt(struct nv_pc *pc, struct nv_instruction *nvi)
+{
+ ubyte dst_type = nvi->def[0] ? DTYPE(nvi, 0) : STYPE(nvi, 0);
+
+ pc->emit[0] = 0xa0000000;
+
+ switch (dst_type) {
+ case NV_TYPE_F32:
+ switch (STYPE(nvi, 0)) {
+ case NV_TYPE_F32: pc->emit[1] = CVT_F32_F32; break;
+ case NV_TYPE_S32: pc->emit[1] = CVT_F32_S32; break;
+ case NV_TYPE_U32: pc->emit[1] = CVT_F32_U32; break;
+ }
+ break;
+ case NV_TYPE_S32:
+ switch (STYPE(nvi, 0)) {
+ case NV_TYPE_F32: pc->emit[1] = CVT_S32_F32; break;
+ case NV_TYPE_S32: pc->emit[1] = CVT_S32_S32; break;
+ case NV_TYPE_U32: pc->emit[1] = CVT_S32_U32; break;
+ }
+ break;
+ case NV_TYPE_U32:
+ switch (STYPE(nvi, 0)) {
+ case NV_TYPE_F32: pc->emit[1] = CVT_U32_F32; break;
+ case NV_TYPE_S32: pc->emit[1] = CVT_U32_S32; break;
+ case NV_TYPE_U32: pc->emit[1] = CVT_U32_U32; break;
+ }
+ break;
+ }
+ if (pc->emit[1] == CVT_F32_F32 &&
+ (nvi->opcode == NV_OP_CEIL || nvi->opcode == NV_OP_FLOOR ||
+ nvi->opcode == NV_OP_TRUNC))
+ pc->emit[1] |= CVT_RI;
+
+ switch (nvi->opcode) {
+ case NV_OP_CEIL: pc->emit[1] |= CVT_CEIL; break;
+ case NV_OP_FLOOR: pc->emit[1] |= CVT_FLOOR; break;
+ case NV_OP_TRUNC: pc->emit[1] |= CVT_TRUNC; break;
+
+ case NV_OP_ABS: pc->emit[1] |= CVT_ABS; break;
+ case NV_OP_SAT: pc->emit[1] |= CVT_SAT; break;
+ case NV_OP_NEG: pc->emit[1] |= CVT_NEG; break;
+ default:
+ assert(nvi->opcode == NV_OP_CVT);
+ break;
+ }
+ assert(nvi->opcode != NV_OP_ABS || !(nvi->src[0]->mod & NV_MOD_NEG));
+
+ if (nvi->src[0]->mod & NV_MOD_NEG) pc->emit[1] ^= CVT_NEG;
+ if (nvi->src[0]->mod & NV_MOD_ABS) pc->emit[1] |= CVT_ABS;
+
+ emit_form_MAD(pc, nvi);
+}
+
+static void
+emit_tex(struct nv_pc *pc, struct nv_instruction *i)
+{
+ pc->emit[0] = 0xf0000001;
+ pc->emit[1] = 0x00000000;
+
+ DID(pc, i->def[0], 2);
+
+ set_pred(pc, i);
+
+ pc->emit[0] |= i->tex_t << 9;
+ pc->emit[0] |= i->tex_s << 17;
+
+ pc->emit[0] |= i->tex_argc << 22;
+
+ pc->emit[0] |= (i->tex_mask & 0x3) << 25;
+ pc->emit[1] |= (i->tex_mask & 0xc) << 12;
+
+ if (i->tex_live)
+ pc->emit[1] |= 4;
+
+ if (i->tex_cube)
+ pc->emit[0] |= 0x08000000;
+
+ if (i->opcode == NV_OP_TXB)
+ pc->emit[1] |= 0x20000000;
+ else
+ if (i->opcode == NV_OP_TXL)
+ pc->emit[1] |= 0x40000000;
+ else
+ pc->emit[0] -= 1 << 22;
+}
+
+static void
+emit_cvt2fixed(struct nv_pc *pc, struct nv_instruction *i)
+{
+ ubyte mod = i->src[0]->mod;
+
+ pc->emit[0] = 0xb0000000;
+ pc->emit[1] = 0xc0000000;
+
+ if (i->opcode == NV_OP_PREEX2)
+ pc->emit[1] |= 0x4000;
+
+ emit_form_MAD(pc, i);
+
+ if (mod & NV_MOD_NEG) pc->emit[1] |= 0x04000000;
+ if (mod & NV_MOD_ABS) pc->emit[1] |= 0x00100000;
+}
+
+static void
+emit_ddx(struct nv_pc *pc, struct nv_instruction *i)
+{
+ assert(i->is_long && SFILE(i, 0) == NV_FILE_GPR);
+
+ pc->emit[0] = (i->src[0]->mod & NV_MOD_NEG) ? 0xc0240001 : 0xc0140001;
+ pc->emit[1] = (i->src[0]->mod & NV_MOD_NEG) ? 0x86400000 : 0x89800000;
+
+ DID(pc, i->def[0], 2);
+ SID(pc, i->src[0], 9);
+ SID(pc, i->src[0], 32 + 14);
+
+ set_pred(pc, i);
+ set_pred_wr(pc, i);
+}
+
+static void
+emit_ddy(struct nv_pc *pc, struct nv_instruction *i)
+{
+ assert(i->is_long && SFILE(i, 0) == NV_FILE_GPR);
+
+ pc->emit[0] = (i->src[0]->mod & NV_MOD_NEG) ? 0xc0250001 : 0xc0150001;
+ pc->emit[1] = (i->src[0]->mod & NV_MOD_NEG) ? 0x85800000 : 0x8a400000;
+
+ DID(pc, i->def[0], 2);
+ SID(pc, i->src[0], 9);
+ SID(pc, i->src[0], 32 + 14);
+
+ set_pred(pc, i);
+ set_pred_wr(pc, i);
+}
+
+void
+nv50_emit_instruction(struct nv_pc *pc, struct nv_instruction *i)
+{
+ // nv_print_instruction(i);
+
+ switch (i->opcode) {
+ case NV_OP_MOV:
+ if (DFILE(i, 0) == NV_FILE_ADDR)
+ emit_add_a16(pc, i);
+ else
+ emit_mov(pc, i);
+ break;
+ case NV_OP_LDA:
+ emit_mov(pc, i);
+ break;
+ case NV_OP_STA:
+ emit_st(pc, i);
+ break;
+ case NV_OP_LINTERP:
+ case NV_OP_PINTERP:
+ emit_interp(pc, i);
+ break;
+ case NV_OP_ADD:
+ emit_add(pc, i);
+ break;
+ case NV_OP_AND:
+ case NV_OP_OR:
+ case NV_OP_XOR:
+ emit_bitop2(pc, i);
+ break;
+ case NV_OP_CVT:
+ case NV_OP_ABS:
+ case NV_OP_NEG:
+ case NV_OP_SAT:
+ case NV_OP_CEIL:
+ case NV_OP_FLOOR:
+ case NV_OP_TRUNC:
+ emit_cvt(pc, i);
+ break;
+ case NV_OP_DFDX:
+ emit_ddx(pc, i);
+ break;
+ case NV_OP_DFDY:
+ emit_ddy(pc, i);
+ break;
+ case NV_OP_RCP:
+ case NV_OP_RSQ:
+ case NV_OP_LG2:
+ case NV_OP_SIN:
+ case NV_OP_COS:
+ case NV_OP_EX2:
+ emit_flop(pc, i);
+ break;
+ case NV_OP_PRESIN:
+ case NV_OP_PREEX2:
+ emit_cvt2fixed(pc, i);
+ break;
+ case NV_OP_MAD:
+ emit_mad(pc, i);
+ break;
+ case NV_OP_MAX:
+ case NV_OP_MIN:
+ emit_minmax(pc, i);
+ break;
+ case NV_OP_MUL:
+ emit_mul_f32(pc, i);
+ break;
+ case NV_OP_SET:
+ emit_set(pc, i);
+ break;
+ case NV_OP_SHL:
+ case NV_OP_SHR:
+ emit_shift(pc, i);
+ break;
+ case NV_OP_TEX:
+ case NV_OP_TXB:
+ case NV_OP_TXL:
+ emit_tex(pc, i);
+ break;
+ case NV_OP_KIL:
+ emit_flow(pc, i, 0x0);
+ break;
+ case NV_OP_BRA:
+ emit_flow(pc, i, 0x1);
+ break;
+ case NV_OP_CALL:
+ emit_flow(pc, i, 0x2);
+ break;
+ case NV_OP_RET:
+ emit_flow(pc, i, 0x3);
+ break;
+ case NV_OP_BREAKADDR:
+ emit_flow(pc, i, 0x4);
+ break;
+ case NV_OP_BREAK:
+ emit_flow(pc, i, 0x5);
+ break;
+ case NV_OP_JOINAT:
+ emit_flow(pc, i, 0xa);
+ break;
+ case NV_OP_NOP:
+ case NV_OP_JOIN:
+ pc->emit[0] = 0xf0000001;
+ pc->emit[1] = 0xe0000000;
+ break;
+ case NV_OP_PHI:
+ case NV_OP_UNDEF:
+ case NV_OP_SUB:
+ NOUVEAU_ERR("operation \"%s\" should have been eliminated\n",
+ nv_opcode_name(i->opcode));
+ break;
+ default:
+ NOUVEAU_ERR("unhandled NV_OP: %d\n", i->opcode);
+ abort();
+ break;
+ }
+
+ if (i->is_join) {
+ assert(i->is_long && !(pc->emit[1] & 1));
+ pc->emit[1] |= 2;
+ }
+
+ assert((pc->emit[0] & 1) == i->is_long);
+}
diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c
new file mode 100644
index 0000000000..80f3bb34b0
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c
@@ -0,0 +1,1112 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nv50_pc.h"
+
+#define DESCEND_ARBITRARY(j, f) \
+do { \
+ b->pass_seq = ctx->pc->pass_seq; \
+ \
+ for (j = 0; j < 2; ++j) \
+ if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
+ f(ctx, b->out[j]); \
+} while (0)
+
+extern unsigned nv50_inst_min_size(struct nv_instruction *);
+
+struct nv_pc_pass {
+ struct nv_pc *pc;
+};
+
+static INLINE boolean
+values_equal(struct nv_value *a, struct nv_value *b)
+{
+ /* XXX: sizes */
+ return (a->reg.file == b->reg.file && a->join->reg.id == b->join->reg.id);
+}
+
+static INLINE boolean
+inst_commutation_check(struct nv_instruction *a,
+ struct nv_instruction *b)
+{
+ int si, di;
+
+ for (di = 0; di < 4; ++di) {
+ if (!a->def[di])
+ break;
+ for (si = 0; si < 5; ++si) {
+ if (!b->src[si])
+ continue;
+ if (values_equal(a->def[di], b->src[si]->value))
+ return FALSE;
+ }
+ }
+
+ if (b->flags_src && b->flags_src->value == a->flags_def)
+ return FALSE;
+
+ return TRUE;
+}
+
+/* Check whether we can swap the order of the instructions,
+ * where a & b may be either the earlier or the later one.
+ */
+static boolean
+inst_commutation_legal(struct nv_instruction *a,
+ struct nv_instruction *b)
+{
+ return inst_commutation_check(a, b) && inst_commutation_check(b, a);
+}
+
+static INLINE boolean
+inst_cullable(struct nv_instruction *nvi)
+{
+ return (!(nvi->is_terminator || nvi->is_join ||
+ nvi->target ||
+ nvi->fixed ||
+ nv_nvi_refcount(nvi)));
+}
+
+static INLINE boolean
+nvi_isnop(struct nv_instruction *nvi)
+{
+ if (nvi->opcode == NV_OP_EXPORT || nvi->opcode == NV_OP_UNDEF)
+ return TRUE;
+
+ if (nvi->fixed ||
+ nvi->is_terminator ||
+ nvi->flags_src ||
+ nvi->flags_def ||
+ nvi->is_join)
+ return FALSE;
+
+ if (nvi->def[0]->join->reg.id < 0)
+ return TRUE;
+
+ if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
+ return FALSE;
+
+ if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
+ return FALSE;
+
+ if (nvi->src[0]->value->join->reg.id < 0) {
+ debug_printf("nvi_isnop: orphaned value detected\n");
+ return TRUE;
+ }
+
+ if (nvi->opcode == NV_OP_SELECT)
+ if (!values_equal(nvi->def[0], nvi->src[1]->value))
+ return FALSE;
+
+ return values_equal(nvi->def[0], nvi->src[0]->value);
+}
+
+struct nv_pass {
+ struct nv_pc *pc;
+ int n;
+ void *priv;
+};
+
+static int
+nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
+
+static void
+nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
+{
+ struct nv_pc *pc = (struct nv_pc *)priv;
+ struct nv_basic_block *in;
+ struct nv_instruction *nvi, *next;
+ int j;
+ uint size, n32 = 0;
+
+ for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->bin_size; --j);
+ if (j >= 0) {
+ in = pc->bb_list[j];
+
+ /* check for no-op branches (BRA $PC+8) */
+ if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
+ in->bin_size -= 8;
+ pc->bin_size -= 8;
+
+ for (++j; j < pc->num_blocks; ++j)
+ pc->bb_list[j]->bin_pos -= 8;
+
+ nv_nvi_delete(in->exit);
+ }
+ b->bin_pos = in->bin_pos + in->bin_size;
+ }
+
+ pc->bb_list[pc->num_blocks++] = b;
+
+ /* visit node */
+
+ for (nvi = b->entry; nvi; nvi = next) {
+ next = nvi->next;
+ if (nvi_isnop(nvi))
+ nv_nvi_delete(nvi);
+ }
+
+ for (nvi = b->entry; nvi; nvi = next) {
+ next = nvi->next;
+
+ size = nv50_inst_min_size(nvi);
+ if (nvi->next && size < 8)
+ ++n32;
+ else
+ if ((n32 & 1) && nvi->next &&
+ nv50_inst_min_size(nvi->next) == 4 &&
+ inst_commutation_legal(nvi, nvi->next)) {
+ ++n32;
+ debug_printf("permuting: ");
+ nv_print_instruction(nvi);
+ nv_print_instruction(nvi->next);
+ nv_nvi_permute(nvi, nvi->next);
+ next = nvi;
+ } else {
+ nvi->is_long = 1;
+
+ b->bin_size += n32 & 1;
+ if (n32 & 1)
+ nvi->prev->is_long = 1;
+ n32 = 0;
+ }
+ b->bin_size += 1 + nvi->is_long;
+ }
+
+ if (!b->entry) {
+ debug_printf("block %p is now empty\n", b);
+ } else
+ if (!b->exit->is_long) {
+ assert(n32);
+ b->exit->is_long = 1;
+ b->bin_size += 1;
+
+ /* might have del'd a hole tail of instructions */
+ if (!b->exit->prev->is_long && !(n32 & 1)) {
+ b->bin_size += 1;
+ b->exit->prev->is_long = 1;
+ }
+ }
+ assert(!b->entry || (b->exit && b->exit->is_long));
+
+ pc->bin_size += b->bin_size *= 4;
+}
+
+int
+nv_pc_exec_pass2(struct nv_pc *pc)
+{
+ struct nv_pass pass;
+
+ pass.pc = pc;
+
+ pc->pass_seq++;
+ nv_pass_flatten(&pass, pc->root);
+
+ debug_printf("preparing %u blocks for emission\n", pc->num_blocks);
+
+ pc->bb_list = CALLOC(pc->num_blocks, sizeof(struct nv_basic_block *));
+ pc->num_blocks = 0;
+
+ nv_pc_pass_in_order(pc->root, nv_pc_pass_pre_emission, pc);
+
+ return 0;
+}
+
+static INLINE boolean
+is_cmem_load(struct nv_instruction *nvi)
+{
+ return (nvi->opcode == NV_OP_LDA &&
+ nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
+ nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
+}
+
+static INLINE boolean
+is_smem_load(struct nv_instruction *nvi)
+{
+ return (nvi->opcode == NV_OP_LDA &&
+ (nvi->src[0]->value->reg.file == NV_FILE_MEM_S ||
+ nvi->src[0]->value->reg.file <= NV_FILE_MEM_P));
+}
+
+static INLINE boolean
+is_immd_move(struct nv_instruction *nvi)
+{
+ return (nvi->opcode == NV_OP_MOV &&
+ nvi->src[0]->value->reg.file == NV_FILE_IMM);
+}
+
+static INLINE void
+check_swap_src_0_1(struct nv_instruction *nvi)
+{
+ static const ubyte cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
+
+ struct nv_ref *src0 = nvi->src[0], *src1 = nvi->src[1];
+
+ if (!nv_op_commutative(nvi->opcode))
+ return;
+ assert(src0 && src1);
+
+ if (src1->value->reg.file == NV_FILE_IMM) {
+ /* should only be present from folding a constant MUL part of a MAD */
+ assert(nvi->opcode == NV_OP_ADD);
+ return;
+ }
+
+ if (is_cmem_load(src0->value->insn)) {
+ if (!is_cmem_load(src1->value->insn)) {
+ nvi->src[0] = src1;
+ nvi->src[1] = src0;
+ /* debug_printf("swapping cmem load to 1\n"); */
+ }
+ } else
+ if (is_smem_load(src1->value->insn)) {
+ if (!is_smem_load(src0->value->insn)) {
+ nvi->src[0] = src1;
+ nvi->src[1] = src0;
+ /* debug_printf("swapping smem load to 0\n"); */
+ }
+ }
+
+ if (nvi->opcode == NV_OP_SET && nvi->src[0] != src0)
+ nvi->set_cond = cc_swapped[nvi->set_cond];
+}
+
+static int
+nv_pass_fold_stores(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *nvi, *sti, *next;
+ int j;
+
+ for (sti = b->entry; sti; sti = next) {
+ next = sti->next;
+
+ /* only handling MOV to $oX here */
+ if (!sti->def[0] || sti->def[0]->reg.file != NV_FILE_OUT)
+ continue;
+ if (sti->opcode != NV_OP_MOV && sti->opcode != NV_OP_STA)
+ continue;
+
+ nvi = sti->src[0]->value->insn;
+ if (!nvi || nvi->opcode == NV_OP_PHI)
+ continue;
+ assert(nvi->def[0] == sti->src[0]->value);
+
+ if (nvi->def[0]->refc > 1)
+ continue;
+
+ /* cannot write to $oX when using immediate */
+ for (j = 0; j < 4 && nvi->src[j]; ++j)
+ if (nvi->src[j]->value->reg.file == NV_FILE_IMM)
+ break;
+ if (j < 4 && nvi->src[j])
+ continue;
+
+ nvi->def[0] = sti->def[0];
+ nvi->fixed = sti->fixed;
+
+ nv_nvi_delete(sti);
+ }
+ DESCEND_ARBITRARY(j, nv_pass_fold_stores);
+
+ return 0;
+}
+
+static int
+nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *nvi, *ld;
+ int j;
+
+ for (nvi = b->entry; nvi; nvi = nvi->next) {
+ check_swap_src_0_1(nvi);
+
+ for (j = 0; j < 3; ++j) {
+ if (!nvi->src[j])
+ break;
+ ld = nvi->src[j]->value->insn;
+ if (!ld)
+ continue;
+
+ if (is_immd_move(ld) && nv50_nvi_can_use_imm(nvi, j)) {
+ nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
+ continue;
+ }
+
+ if (ld->opcode != NV_OP_LDA)
+ continue;
+ if (!nv50_nvi_can_load(nvi, j, ld->src[0]->value))
+ continue;
+
+ if (j == 0 && ld->src[4]) /* can't load shared mem */
+ continue;
+
+ /* fold it ! */ /* XXX: ref->insn */
+ nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
+ if (ld->src[4])
+ nv_reference(ctx->pc, &nvi->src[4], ld->src[4]->value);
+ }
+ }
+ DESCEND_ARBITRARY(j, nv_pass_fold_loads);
+
+ return 0;
+}
+
+static int
+nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+ int j;
+ struct nv_instruction *nvi, *mi, *next;
+ ubyte mod;
+
+ for (nvi = b->entry; nvi; nvi = next) {
+ next = nvi->next;
+ if (nvi->opcode == NV_OP_SUB) {
+ nvi->opcode = NV_OP_ADD;
+ nvi->src[1]->mod ^= NV_MOD_NEG;
+ }
+
+ /* should not put any modifiers on NEG and ABS */
+ assert(nvi->opcode != NV_MOD_NEG || !nvi->src[0]->mod);
+ assert(nvi->opcode != NV_MOD_ABS || !nvi->src[0]->mod);
+
+ for (j = 0; j < 4; ++j) {
+ if (!nvi->src[j])
+ break;
+
+ mi = nvi->src[j]->value->insn;
+ if (!mi)
+ continue;
+ if (mi->def[0]->refc > 1)
+ continue;
+
+ if (mi->opcode == NV_OP_NEG) mod = NV_MOD_NEG;
+ else
+ if (mi->opcode == NV_OP_ABS) mod = NV_MOD_ABS;
+ else
+ continue;
+
+ if (nvi->opcode == NV_OP_ABS)
+ mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
+ else
+ if (nvi->opcode == NV_OP_NEG && mod == NV_MOD_NEG) {
+ nvi->opcode = NV_OP_MOV;
+ mod = 0;
+ }
+
+ if (!(nv50_supported_src_mods(nvi->opcode, j) & mod))
+ continue;
+
+ nv_reference(ctx->pc, &nvi->src[j], mi->src[0]->value);
+
+ nvi->src[j]->mod ^= mod;
+ }
+
+ if (nvi->opcode == NV_OP_SAT) {
+ mi = nvi->src[0]->value->insn;
+
+ if ((mi->opcode == NV_OP_MAD) && !mi->flags_def) {
+ mi->saturate = 1;
+ mi->def[0] = nvi->def[0];
+ nv_nvi_delete(nvi);
+ }
+ }
+ }
+ DESCEND_ARBITRARY(j, nv_pass_lower_mods);
+
+ return 0;
+}
+
+#define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
+
+static struct nv_value *
+find_immediate(struct nv_ref *ref)
+{
+ struct nv_value *src;
+
+ if (!ref)
+ return NULL;
+
+ src = ref->value;
+ while (src->insn && src->insn->opcode == NV_OP_MOV) {
+ assert(!src->insn->src[0]->mod);
+ src = src->insn->src[0]->value;
+ }
+ return (src->reg.file == NV_FILE_IMM) ? src : NULL;
+}
+
+static void
+modifiers_apply(uint32_t *val, ubyte type, ubyte mod)
+{
+ if (mod & NV_MOD_ABS) {
+ if (type == NV_TYPE_F32)
+ *val &= 0x7fffffff;
+ else
+ if ((*val) & (1 << 31))
+ *val = ~(*val) + 1;
+ }
+ if (mod & NV_MOD_NEG) {
+ if (type == NV_TYPE_F32)
+ *val ^= 0x80000000;
+ else
+ *val = ~(*val) + 1;
+ }
+}
+
+static INLINE uint
+modifiers_opcode(ubyte mod)
+{
+ switch (mod) {
+ case NV_MOD_NEG: return NV_OP_NEG;
+ case NV_MOD_ABS: return NV_OP_ABS;
+ case 0:
+ return NV_OP_MOV;
+ default:
+ return NV_OP_NOP;
+ }
+}
+
+static void
+constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
+ struct nv_value *src0, struct nv_value *src1)
+{
+ struct nv_value *val;
+ union {
+ float f32;
+ uint32_t u32;
+ int32_t s32;
+ } u0, u1, u;
+ ubyte type;
+
+ if (!nvi->def[0])
+ return;
+ type = nvi->def[0]->reg.type;
+
+ u.u32 = 0;
+ u0.u32 = src0->reg.imm.u32;
+ u1.u32 = src1->reg.imm.u32;
+
+ modifiers_apply(&u0.u32, type, nvi->src[0]->mod);
+ modifiers_apply(&u0.u32, type, nvi->src[1]->mod);
+
+ switch (nvi->opcode) {
+ case NV_OP_MAD:
+ if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
+ return;
+ /* fall through */
+ case NV_OP_MUL:
+ switch (type) {
+ case NV_TYPE_F32: u.f32 = u0.f32 * u1.f32; break;
+ case NV_TYPE_U32: u.u32 = u0.u32 * u1.u32; break;
+ case NV_TYPE_S32: u.s32 = u0.s32 * u1.s32; break;
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ case NV_OP_ADD:
+ switch (type) {
+ case NV_TYPE_F32: u.f32 = u0.f32 + u1.f32; break;
+ case NV_TYPE_U32: u.u32 = u0.u32 + u1.u32; break;
+ case NV_TYPE_S32: u.s32 = u0.s32 + u1.s32; break;
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ case NV_OP_SUB:
+ switch (type) {
+ case NV_TYPE_F32: u.f32 = u0.f32 - u1.f32;
+ case NV_TYPE_U32: u.u32 = u0.u32 - u1.u32;
+ case NV_TYPE_S32: u.s32 = u0.s32 - u1.s32;
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ default:
+ return;
+ }
+
+ nvi->opcode = NV_OP_MOV;
+
+ val = new_value(pc, NV_FILE_IMM, type);
+
+ val->reg.imm.u32 = u.u32;
+
+ nv_reference(pc, &nvi->src[1], NULL);
+ nv_reference(pc, &nvi->src[0], val);
+
+ if (nvi->src[2]) { /* from MAD */
+ nvi->src[1] = nvi->src[0];
+ nvi->src[0] = nvi->src[2];
+ nvi->src[2] = NULL;
+ nvi->opcode = NV_OP_ADD;
+ }
+}
+
+static void
+constant_operand(struct nv_pc *pc,
+ struct nv_instruction *nvi, struct nv_value *val, int s)
+{
+ union {
+ float f32;
+ uint32_t u32;
+ int32_t s32;
+ } u;
+ int t = s ? 0 : 1;
+ uint op;
+ ubyte type;
+
+ if (!nvi->def[0])
+ return;
+ type = nvi->def[0]->reg.type;
+
+ u.u32 = val->reg.imm.u32;
+ modifiers_apply(&u.u32, type, nvi->src[s]->mod);
+
+ switch (nvi->opcode) {
+ case NV_OP_MUL:
+ if ((type == NV_TYPE_F32 && u.f32 == 1.0f) ||
+ (NV_TYPE_ISINT(type) && u.u32 == 1)) {
+ if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
+ break;
+ nvi->opcode = op;
+ nv_reference(pc, &nvi->src[s], NULL);
+ nvi->src[0] = nvi->src[t];
+ nvi->src[1] = NULL;
+ } else
+ if ((type == NV_TYPE_F32 && u.f32 == 2.0f) ||
+ (NV_TYPE_ISINT(type) && u.u32 == 2)) {
+ nvi->opcode = NV_OP_ADD;
+ nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
+ nvi->src[s]->mod = nvi->src[t]->mod;
+ } else
+ if (type == NV_TYPE_F32 && u.f32 == -1.0f) {
+ if (nvi->src[t]->mod & NV_MOD_NEG)
+ nvi->opcode = NV_OP_MOV;
+ else
+ nvi->opcode = NV_OP_NEG;
+ nv_reference(pc, &nvi->src[s], NULL);
+ nvi->src[0] = nvi->src[t];
+ nvi->src[1] = NULL;
+ } else
+ if (type == NV_TYPE_F32 && u.f32 == -2.0f) {
+ nvi->opcode = NV_OP_ADD;
+ nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
+ nvi->src[s]->mod = (nvi->src[t]->mod ^= NV_MOD_NEG);
+ } else
+ if (u.u32 == 0) {
+ nvi->opcode = NV_OP_MOV;
+ nv_reference(pc, &nvi->src[t], NULL);
+ if (s) {
+ nvi->src[0] = nvi->src[1];
+ nvi->src[1] = NULL;
+ }
+ }
+ break;
+ case NV_OP_ADD:
+ if (u.u32 == 0) {
+ if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
+ break;
+ nvi->opcode = op;
+ nv_reference(pc, &nvi->src[s], NULL);
+ nvi->src[0] = nvi->src[t];
+ nvi->src[1] = NULL;
+ }
+ break;
+ case NV_OP_RCP:
+ u.f32 = 1.0f / u.f32;
+ (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
+ nvi->opcode = NV_OP_MOV;
+ assert(s == 0);
+ nv_reference(pc, &nvi->src[0], val);
+ break;
+ case NV_OP_RSQ:
+ u.f32 = 1.0f / sqrtf(u.f32);
+ (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
+ nvi->opcode = NV_OP_MOV;
+ assert(s == 0);
+ nv_reference(pc, &nvi->src[0], val);
+ break;
+ default:
+ break;
+ }
+}
+
+static int
+nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *nvi, *next;
+ int j;
+
+ for (nvi = b->entry; nvi; nvi = next) {
+ struct nv_value *src0, *src1, *src;
+ int mod;
+
+ next = nvi->next;
+
+ src0 = find_immediate(nvi->src[0]);
+ src1 = find_immediate(nvi->src[1]);
+
+ if (src0 && src1)
+ constant_expression(ctx->pc, nvi, src0, src1);
+ else {
+ if (src0)
+ constant_operand(ctx->pc, nvi, src0, 0);
+ else
+ if (src1)
+ constant_operand(ctx->pc, nvi, src1, 1);
+ }
+
+ /* try to combine MUL, ADD into MAD */
+ if (nvi->opcode != NV_OP_ADD)
+ continue;
+
+ src0 = nvi->src[0]->value;
+ src1 = nvi->src[1]->value;
+
+ if (SRC_IS_MUL(src0) && src0->refc == 1)
+ src = src0;
+ else
+ if (SRC_IS_MUL(src1) && src1->refc == 1)
+ src = src1;
+ else
+ continue;
+
+ nvi->opcode = NV_OP_MAD;
+ mod = nvi->src[(src == src0) ? 0 : 1]->mod;
+ nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL);
+ nvi->src[2] = nvi->src[(src == src0) ? 1 : 0];
+
+ assert(!(mod & ~NV_MOD_NEG));
+ nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
+ nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
+ nvi->src[0]->mod = src->insn->src[0]->mod ^ mod;
+ nvi->src[1]->mod = src->insn->src[1]->mod;
+ }
+ DESCEND_ARBITRARY(j, nv_pass_lower_arith);
+
+ return 0;
+}
+
+/*
+set $r2 g f32 $r2 $r3
+cvt abs rn f32 $r2 s32 $r2
+cvt f32 $c0 # f32 $r2
+e $c0 bra 0x80
+*/
+#if 0
+static int
+nv_pass_lower_cond(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+ /* XXX: easier in IR builder for now */
+ return 0;
+}
+#endif
+
+/* TODO: redundant store elimination */
+
+struct load_record {
+ struct load_record *next;
+ uint64_t data;
+ struct nv_value *value;
+};
+
+#define LOAD_RECORD_POOL_SIZE 1024
+
+struct nv_pass_reld_elim {
+ struct nv_pc *pc;
+
+ struct load_record *imm;
+ struct load_record *mem_s;
+ struct load_record *mem_v;
+ struct load_record *mem_c[16];
+ struct load_record *mem_l;
+
+ struct load_record pool[LOAD_RECORD_POOL_SIZE];
+ int alloc;
+};
+
+static int
+nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b)
+{
+ struct load_record **rec, *it;
+ struct nv_instruction *ld, *next;
+ uint64_t data;
+ struct nv_value *val;
+ int j;
+
+ for (ld = b->entry; ld; ld = next) {
+ next = ld->next;
+ if (!ld->src[0])
+ continue;
+ val = ld->src[0]->value;
+ rec = NULL;
+
+ if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
+ data = val->reg.id;
+ rec = &ctx->mem_v;
+ } else
+ if (ld->opcode == NV_OP_LDA) {
+ data = val->reg.id;
+ if (val->reg.file >= NV_FILE_MEM_C(0) &&
+ val->reg.file <= NV_FILE_MEM_C(15))
+ rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
+ else
+ if (val->reg.file == NV_FILE_MEM_S)
+ rec = &ctx->mem_s;
+ else
+ if (val->reg.file == NV_FILE_MEM_L)
+ rec = &ctx->mem_l;
+ } else
+ if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
+ data = val->reg.imm.u32;
+ rec = &ctx->imm;
+ }
+
+ if (!rec || !ld->def[0]->refc)
+ continue;
+
+ for (it = *rec; it; it = it->next)
+ if (it->data == data)
+ break;
+
+ if (it) {
+ if (ld->def[0]->reg.id >= 0)
+ it->value = ld->def[0];
+ else
+ nvcg_replace_value(ctx->pc, ld->def[0], it->value);
+ } else {
+ if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
+ continue;
+ it = &ctx->pool[ctx->alloc++];
+ it->next = *rec;
+ it->data = data;
+ it->value = ld->def[0];
+ *rec = it;
+ }
+ }
+
+ ctx->imm = NULL;
+ ctx->mem_s = NULL;
+ ctx->mem_v = NULL;
+ for (j = 0; j < 16; ++j)
+ ctx->mem_c[j] = NULL;
+ ctx->mem_l = NULL;
+ ctx->alloc = 0;
+
+ DESCEND_ARBITRARY(j, nv_pass_reload_elim);
+
+ return 0;
+}
+
+static int
+nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+ int i, c, j;
+
+ for (i = 0; i < ctx->pc->num_instructions; ++i) {
+ struct nv_instruction *nvi = &ctx->pc->instructions[i];
+ struct nv_value *def[4];
+
+ if (!nv_is_vector_op(nvi->opcode))
+ continue;
+ nvi->tex_mask = 0;
+
+ for (c = 0; c < 4; ++c) {
+ if (nvi->def[c]->refc)
+ nvi->tex_mask |= 1 << c;
+ def[c] = nvi->def[c];
+ }
+
+ j = 0;
+ for (c = 0; c < 4; ++c)
+ if (nvi->tex_mask & (1 << c))
+ nvi->def[j++] = def[c];
+ for (c = 0; c < 4; ++c)
+ if (!(nvi->tex_mask & (1 << c)))
+ nvi->def[j++] = def[c];
+ assert(j == 4);
+ }
+ return 0;
+}
+
+struct nv_pass_dce {
+ struct nv_pc *pc;
+ uint removed;
+};
+
+static int
+nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
+{
+ int j;
+ struct nv_instruction *nvi, *next;
+
+ for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
+ next = nvi->next;
+
+ if (inst_cullable(nvi)) {
+ nv_nvi_delete(nvi);
+
+ ++ctx->removed;
+ }
+ }
+ DESCEND_ARBITRARY(j, nv_pass_dce);
+
+ return 0;
+}
+
+/* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
+ * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
+ * BREAK and dummy ELSE block.
+ */
+static INLINE boolean
+bb_is_if_else_endif(struct nv_basic_block *bb)
+{
+ if (!bb->out[0] || !bb->out[1])
+ return FALSE;
+
+ if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
+ return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
+ !bb->out[1]->out[1]);
+ } else {
+ return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
+ !bb->out[0]->out[1] &&
+ !bb->out[1]->out[1]);
+ }
+}
+
+/* predicate instructions and remove branch at the end */
+static void
+predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
+ struct nv_value *p, ubyte cc)
+{
+ struct nv_instruction *nvi;
+
+ if (!b->entry)
+ return;
+ for (nvi = b->entry; nvi->next; nvi = nvi->next) {
+ if (!nvi_isnop(nvi)) {
+ nvi->cc = cc;
+ nv_reference(pc, &nvi->flags_src, p);
+ }
+ }
+
+ if (nvi->opcode == NV_OP_BRA)
+ nv_nvi_delete(nvi);
+ else
+ if (!nvi_isnop(nvi)) {
+ nvi->cc = cc;
+ nv_reference(pc, &nvi->flags_src, p);
+ }
+}
+
+/* NOTE: Run this after register allocation, we can just cut out the cflow
+ * instructions and hook the predicates to the conditional OPs if they are
+ * not using immediates; better than inserting SELECT to join definitions.
+ *
+ * NOTE: Should adapt prior optimization to make this possible more often.
+ */
+static int
+nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *nvi;
+ struct nv_value *pred;
+ int i;
+ int n0 = 0, n1 = 0;
+
+ if (bb_is_if_else_endif(b)) {
+
+ debug_printf("pass_flatten: IF/ELSE/ENDIF construct at BB:%i\n", b->id);
+
+ for (n0 = 0, nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
+ if (!nv50_nvi_can_predicate(nvi))
+ break;
+ if (!nvi) {
+ for (n1 = 0, nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
+ if (!nv50_nvi_can_predicate(nvi))
+ break;
+ if (nvi) {
+ debug_printf("cannot predicate: "); nv_print_instruction(nvi);
+ }
+ } else {
+ debug_printf("cannot predicate: "); nv_print_instruction(nvi);
+ }
+
+ if (!nvi && n0 < 12 && n1 < 12) { /* 12 as arbitrary limit */
+ assert(b->exit && b->exit->flags_src);
+ pred = b->exit->flags_src->value;
+
+ predicate_instructions(ctx->pc, b->out[0], pred, NV_CC_NE | NV_CC_U);
+ predicate_instructions(ctx->pc, b->out[1], pred, NV_CC_EQ);
+
+ assert(b->exit && b->exit->opcode == NV_OP_BRA);
+ nv_nvi_delete(b->exit);
+
+ if (b->exit && b->exit->opcode == NV_OP_JOINAT)
+ nv_nvi_delete(b->exit);
+
+ if ((nvi = b->out[0]->out[0]->entry)) {
+ nvi->is_join = 0;
+ if (nvi->opcode == NV_OP_JOIN)
+ nv_nvi_delete(nvi);
+ }
+ }
+ }
+ DESCEND_ARBITRARY(i, nv_pass_flatten);
+
+ return 0;
+}
+
+/* local common subexpression elimination, stupid O(n^2) implementation */
+static int
+nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *ir, *ik, *next;
+ struct nv_instruction *entry = b->phi ? b->phi : b->entry;
+ int s;
+ unsigned int reps;
+
+ do {
+ reps = 0;
+ for (ir = entry; ir; ir = next) {
+ next = ir->next;
+ for (ik = entry; ik != ir; ik = ik->next) {
+ if (ir->opcode != ik->opcode)
+ continue;
+
+ if (ik->opcode == NV_OP_LDA ||
+ ik->opcode == NV_OP_STA ||
+ ik->opcode == NV_OP_MOV ||
+ nv_is_vector_op(ik->opcode))
+ continue; /* ignore loads, stores & moves */
+
+ if (ik->src[4] || ir->src[4])
+ continue; /* don't mess with address registers */
+
+ if (ik->flags_src || ir->flags_src ||
+ ik->flags_def || ir->flags_def)
+ continue; /* and also not with flags, for now */
+
+ assert(ik->def[0] && ir->def[0]);
+
+ if (ik->def[0]->reg.file == NV_FILE_OUT ||
+ ir->def[0]->reg.file == NV_FILE_OUT ||
+ !values_equal(ik->def[0], ir->def[0]))
+ continue;
+
+ for (s = 0; s < 3; ++s) {
+ struct nv_value *a, *b;
+
+ if (!ik->src[s]) {
+ if (ir->src[s])
+ break;
+ continue;
+ }
+ if (ik->src[s]->mod != ir->src[s]->mod)
+ break;
+ a = ik->src[s]->value;
+ b = ir->src[s]->value;
+ if (a == b)
+ continue;
+ if (a->reg.file != b->reg.file ||
+ a->reg.id < 0 ||
+ a->reg.id != b->reg.id)
+ break;
+ }
+ if (s == 3) {
+ nv_nvi_delete(ir);
+ ++reps;
+ nvcg_replace_value(ctx->pc, ir->def[0], ik->def[0]);
+ break;
+ }
+ }
+ }
+ } while(reps);
+
+ DESCEND_ARBITRARY(s, nv_pass_cse);
+
+ return 0;
+}
+
+int
+nv_pc_exec_pass0(struct nv_pc *pc)
+{
+ struct nv_pass_reld_elim *reldelim;
+ struct nv_pass pass;
+ struct nv_pass_dce dce;
+ int ret;
+
+ pass.n = 0;
+ pass.pc = pc;
+
+ /* Do this first, so we don't have to pay attention
+ * to whether sources are supported memory loads.
+ */
+ pc->pass_seq++;
+ ret = nv_pass_lower_arith(&pass, pc->root);
+ if (ret)
+ return ret;
+
+ pc->pass_seq++;
+ ret = nv_pass_fold_loads(&pass, pc->root);
+ if (ret)
+ return ret;
+
+ pc->pass_seq++;
+ ret = nv_pass_fold_stores(&pass, pc->root);
+ if (ret)
+ return ret;
+
+ reldelim = CALLOC_STRUCT(nv_pass_reld_elim);
+ reldelim->pc = pc;
+ pc->pass_seq++;
+ ret = nv_pass_reload_elim(reldelim, pc->root);
+ FREE(reldelim);
+ if (ret)
+ return ret;
+
+ pc->pass_seq++;
+ ret = nv_pass_cse(&pass, pc->root);
+ if (ret)
+ return ret;
+
+ pc->pass_seq++;
+ ret = nv_pass_lower_mods(&pass, pc->root);
+ if (ret)
+ return ret;
+
+ dce.pc = pc;
+ do {
+ dce.removed = 0;
+ pc->pass_seq++;
+ ret = nv_pass_dce(&dce, pc->root);
+ if (ret)
+ return ret;
+ } while (dce.removed);
+
+ ret = nv_pass_tex_mask(&pass, pc->root);
+ if (ret)
+ return ret;
+
+ return ret;
+}
diff --git a/src/gallium/drivers/nv50/nv50_pc_print.c b/src/gallium/drivers/nv50/nv50_pc_print.c
new file mode 100644
index 0000000000..7bdeb1c78d
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_pc_print.c
@@ -0,0 +1,314 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nv50_context.h"
+#include "nv50_pc.h"
+
+#define NVXX_DEBUG 0
+
+#define PRINT(args...) debug_printf(args)
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
+#endif
+
+static const char *norm = "\x1b[00m";
+static const char *gree = "\x1b[32m";
+static const char *blue = "\x1b[34m";
+static const char *cyan = "\x1b[36m";
+static const char *orng = "\x1b[33m";
+static const char *mgta = "\x1b[35m";
+
+static const char *nv_opcode_names[NV_OP_COUNT + 1] = {
+ "phi",
+ "extract",
+ "combine",
+ "lda",
+ "sta",
+ "mov",
+ "add",
+ "sub",
+ "neg",
+ "mul",
+ "mad",
+ "cvt",
+ "sat",
+ "not",
+ "and",
+ "or",
+ "xor",
+ "shl",
+ "shr",
+ "rcp",
+ "undef",
+ "rsqrt",
+ "lg2",
+ "sin",
+ "cos",
+ "ex2",
+ "presin",
+ "preex2",
+ "min",
+ "max",
+ "set",
+ "sad",
+ "kil",
+ "bra",
+ "call",
+ "ret",
+ "break",
+ "breakaddr",
+ "joinat",
+ "tex",
+ "texbias",
+ "texlod",
+ "texfetch",
+ "texsize",
+ "dfdx",
+ "dfdy",
+ "quadop",
+ "linterp",
+ "pinterp",
+ "abs",
+ "ceil",
+ "floor",
+ "trunc",
+ "nop",
+ "select",
+ "export",
+ "join",
+ "BAD_OP"
+};
+
+static const char *nv_cond_names[] =
+{
+ "never", "lt" , "eq" , "le" , "gt" , "ne" , "ge" , "",
+ "never", "ltu", "equ", "leu", "gtu", "neu", "geu", ""
+};
+
+static const char *nv_modifier_strings[] =
+{
+ "",
+ "neg",
+ "abs",
+ "neg abs",
+ "not",
+ "not neg"
+ "not abs",
+ "not neg abs",
+ "sat",
+ "BAD_MOD"
+};
+
+const char *
+nv_opcode_name(uint opcode)
+{
+ return nv_opcode_names[MIN2(opcode, ARRAY_SIZE(nv_opcode_names) - 1)];
+}
+
+static INLINE const char *
+nv_type_name(ubyte type)
+{
+ switch (type) {
+ case NV_TYPE_U16: return "u16";
+ case NV_TYPE_S16: return "s16";
+ case NV_TYPE_F32: return "f32";
+ case NV_TYPE_U32: return "u32";
+ case NV_TYPE_S32: return "s32";
+ case NV_TYPE_P32: return "p32";
+ case NV_TYPE_F64: return "f64";
+ default:
+ return "BAD_TYPE";
+ }
+}
+
+static INLINE const char *
+nv_cond_name(ubyte cc)
+{
+ return nv_cond_names[MIN2(cc, 15)];
+}
+
+static INLINE const char *
+nv_modifier_string(ubyte mod)
+{
+ return nv_modifier_strings[MIN2(mod, 9)];
+}
+
+static INLINE int
+nv_value_id(struct nv_value *value)
+{
+ if (value->join->reg.id >= 0)
+ return value->join->reg.id;
+ return value->n;
+}
+
+static INLINE boolean
+nv_value_allocated(struct nv_value *value)
+{
+ return (value->reg.id >= 0) ? TRUE : FALSE;
+}
+
+static INLINE void
+nv_print_address(const char c, int buf, struct nv_value *a, int offset)
+{
+ if (buf >= 0)
+ PRINT(" %s%c%i[", cyan, c, buf);
+ else
+ PRINT(" %s%c[", cyan, c);
+ if (a)
+ PRINT("%s$a%i%s+", mgta, nv_value_id(a), cyan);
+ PRINT("%s0x%x%s]", orng, offset, cyan);
+}
+
+static INLINE void
+nv_print_cond(struct nv_instruction *nvi)
+{
+ char pfx = nv_value_allocated(nvi->flags_src->value->join) ? '$' : '%';
+
+ PRINT("%s%s %s%cc%i ",
+ gree, nv_cond_name(nvi->cc),
+ mgta, pfx, nv_value_id(nvi->flags_src->value));
+}
+
+static INLINE void
+nv_print_value(struct nv_value *value, struct nv_value *ind, ubyte type)
+{
+ char reg_pfx = '$';
+
+ if (type == NV_TYPE_ANY)
+ type = value->reg.type;
+
+ if (value->reg.file != NV_FILE_FLAGS)
+ PRINT(" %s%s", gree, nv_type_name(type));
+
+ if (!nv_value_allocated(value->join))
+ reg_pfx = '%';
+
+ switch (value->reg.file) {
+ case NV_FILE_GPR:
+ PRINT(" %s%cr%i", blue, reg_pfx, nv_value_id(value));
+ break;
+ case NV_FILE_OUT:
+ PRINT(" %s%co%i", mgta, reg_pfx, nv_value_id(value));
+ break;
+ case NV_FILE_ADDR:
+ PRINT(" %s%ca%i", mgta, reg_pfx, nv_value_id(value));
+ break;
+ case NV_FILE_FLAGS:
+ PRINT(" %s%cc%i", mgta, reg_pfx, nv_value_id(value));
+ break;
+ case NV_FILE_MEM_S:
+ nv_print_address('s', -1, ind, 4 * nv_value_id(value));
+ break;
+ case NV_FILE_MEM_P:
+ nv_print_address('p', -1, ind, 4 * nv_value_id(value));
+ break;
+ case NV_FILE_MEM_V:
+ nv_print_address('v', -1, ind, 4 * nv_value_id(value));
+ break;
+ case NV_FILE_IMM:
+ switch (type) {
+ case NV_TYPE_U16:
+ case NV_TYPE_S16:
+ PRINT(" %s0x%04x", orng, value->reg.imm.u32);
+ break;
+ case NV_TYPE_F32:
+ PRINT(" %s%f", orng, value->reg.imm.f32);
+ break;
+ case NV_TYPE_F64:
+ PRINT(" %s%f", orng, value->reg.imm.f64);
+ break;
+ case NV_TYPE_U32:
+ case NV_TYPE_S32:
+ case NV_TYPE_P32:
+ PRINT(" %s0x%08x", orng, value->reg.imm.u32);
+ break;
+ }
+ break;
+ default:
+ if (value->reg.file >= NV_FILE_MEM_G(0) &&
+ value->reg.file <= NV_FILE_MEM_G(15))
+ nv_print_address('g', value->reg.file - NV_FILE_MEM_G(0), ind,
+ nv_value_id(value) * 4);
+ else
+ if (value->reg.file >= NV_FILE_MEM_C(0) &&
+ value->reg.file <= NV_FILE_MEM_C(15))
+ nv_print_address('c', value->reg.file - NV_FILE_MEM_C(0), ind,
+ nv_value_id(value) * 4);
+ else
+ NOUVEAU_ERR(" BAD_FILE[%i]", nv_value_id(value));
+ break;
+ }
+}
+
+static INLINE void
+nv_print_ref(struct nv_ref *ref, struct nv_value *ind)
+{
+ nv_print_value(ref->value, ind, ref->typecast);
+}
+
+void
+nv_print_instruction(struct nv_instruction *i)
+{
+ int j;
+
+ PRINT("%i: ", i->serial);
+
+ if (i->flags_src)
+ nv_print_cond(i);
+
+ PRINT("%s", gree);
+ if (i->opcode == NV_OP_SET)
+ PRINT("set %s", nv_cond_name(i->set_cond));
+ else
+ if (i->saturate)
+ PRINT("sat %s", nv_opcode_name(i->opcode));
+ else
+ PRINT("%s", nv_opcode_name(i->opcode));
+
+ if (i->flags_def)
+ nv_print_value(i->flags_def, NULL, NV_TYPE_ANY);
+
+ /* Only STORE & STA can write to MEM, and they do not def
+ * anything, so the address is thus part of the source.
+ */
+ if (i->def[0])
+ nv_print_value(i->def[0], NULL, NV_TYPE_ANY);
+ else
+ if (i->target)
+ PRINT(" %s(BB:%i)", orng, i->target->id);
+ else
+ PRINT(" #");
+
+ for (j = 0; j < 4; ++j) {
+ if (!i->src[j])
+ continue;
+
+ if (i->src[j]->mod)
+ PRINT(" %s%s", gree, nv_modifier_string(i->src[j]->mod));
+
+ nv_print_ref(i->src[j],
+ (j == nv50_indirect_opnd(i)) ?
+ i->src[4]->value : NULL);
+ }
+ PRINT(" %s%c\n", norm, i->is_long ? 'l' : 's');
+}
diff --git a/src/gallium/drivers/nv50/nv50_pc_regalloc.c b/src/gallium/drivers/nv50/nv50_pc_regalloc.c
new file mode 100644
index 0000000000..d45dd7f95f
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_pc_regalloc.c
@@ -0,0 +1,943 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nv50_context.h"
+#include "nv50_pc.h"
+
+#include "util/u_simple_list.h"
+
+#define NUM_REGISTER_FILES 4
+
+struct register_set {
+ struct nv_pc *pc;
+
+ uint32_t last[NUM_REGISTER_FILES];
+ uint32_t bits[NUM_REGISTER_FILES][8];
+};
+
+struct nv_pc_pass {
+ struct nv_pc *pc;
+
+ struct nv_instruction **insns;
+ int num_insns;
+
+ uint pass_seq;
+};
+
+static void
+ranges_coalesce(struct nv_range *range)
+{
+ while (range->next && range->end >= range->next->bgn) {
+ struct nv_range *rnn = range->next->next;
+ assert(range->bgn <= range->next->bgn);
+ range->end = MAX2(range->end, range->next->end);
+ FREE(range->next);
+ range->next = rnn;
+ }
+}
+
+static boolean
+add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range)
+{
+ struct nv_range *range, **nextp = &val->livei;
+
+ for (range = val->livei; range; range = range->next) {
+ if (end < range->bgn)
+ break; /* insert before */
+
+ if (bgn > range->end) {
+ nextp = &range->next;
+ continue; /* insert after */
+ }
+
+ /* overlap */
+ if (bgn < range->bgn) {
+ range->bgn = bgn;
+ if (end > range->end)
+ range->end = end;
+ ranges_coalesce(range);
+ return TRUE;
+ }
+ if (end > range->end) {
+ range->end = end;
+ ranges_coalesce(range);
+ return TRUE;
+ }
+ assert(bgn >= range->bgn);
+ assert(end <= range->end);
+ return TRUE;
+ }
+
+ if (!new_range)
+ new_range = CALLOC_STRUCT(nv_range);
+
+ new_range->bgn = bgn;
+ new_range->end = end;
+ new_range->next = range;
+ *(nextp) = new_range;
+ return FALSE;
+}
+
+static void
+add_range(struct nv_value *val, struct nv_basic_block *b, int end)
+{
+ int bgn;
+
+ if (!val->insn) /* ignore non-def values */
+ return;
+ assert(b->entry->serial <= b->exit->serial);
+ assert(b->phi->serial <= end);
+ assert(b->exit->serial + 1 >= end);
+
+ bgn = val->insn->serial;
+ if (bgn < b->entry->serial || bgn > b->exit->serial)
+ bgn = b->entry->serial;
+
+ if (bgn > end) {
+ debug_printf("Aieee! BLOCK [%i, %i], RANGE [%i, %i)\n",
+ b->entry->serial, b->exit->serial, bgn, end);
+ }
+ assert(bgn <= end);
+
+ if (bgn < val->insn->serial)
+ debug_printf("WARNING: leaking value %i ?\n", val->n);
+
+ add_range_ex(val, bgn, end, NULL);
+}
+
+#ifdef NV50_RA_DEBUG_JOIN
+static void
+livei_print(struct nv_value *a)
+{
+ struct nv_range *r = a->livei;
+
+ debug_printf("livei %i: ", a->n);
+ while (r) {
+ debug_printf("[%i, %i) ", r->bgn, r->end);
+ r = r->next;
+ }
+ debug_printf("\n");
+}
+#endif
+
+static void
+livei_unify(struct nv_value *dst, struct nv_value *src)
+{
+ struct nv_range *range, *next;
+
+ for (range = src->livei; range; range = next) {
+ next = range->next;
+ if (add_range_ex(dst, range->bgn, range->end, range))
+ FREE(range);
+ }
+ src->livei = NULL;
+}
+
+static void
+livei_release(struct nv_value *val)
+{
+ struct nv_range *range, *next;
+
+ for (range = val->livei; range; range = next) {
+ next = range->next;
+ FREE(range);
+ }
+}
+
+static boolean
+livei_have_overlap(struct nv_value *a, struct nv_value *b)
+{
+ struct nv_range *r_a, *r_b;
+
+ for (r_a = a->livei; r_a; r_a = r_a->next) {
+ for (r_b = b->livei; r_b; r_b = r_b->next) {
+ if (r_b->bgn < r_a->end &&
+ r_b->end > r_a->bgn)
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+static int
+livei_end(struct nv_value *a)
+{
+ struct nv_range *r = a->livei;
+
+ assert(r);
+ while (r->next)
+ r = r->next;
+ return r->end;
+}
+
+static boolean
+livei_contains(struct nv_value *a, int pos)
+{
+ struct nv_range *r;
+
+ for (r = a->livei; r && r->bgn <= pos; r = r->next)
+ if (r->end > pos)
+ return TRUE;
+ return FALSE;
+}
+
+static boolean
+reg_assign(struct register_set *set, struct nv_value **def, int n)
+{
+ int i, id, s;
+ uint m;
+ int f = def[0]->reg.file;
+
+ s = n << (nv_type_order(def[0]->reg.type) - 1);
+ m = (1 << s) - 1;
+
+ id = set->last[f];
+
+ for (i = 0; i * 32 < set->last[f]; ++i) {
+ if (set->bits[f][i] == 0xffffffff)
+ continue;
+
+ for (id = 0; id < 32; id += s)
+ if (!(set->bits[f][i] & (m << id)))
+ break;
+ if (id < 32)
+ break;
+ }
+ if (i * 32 + id > set->last[f])
+ return FALSE;
+
+ set->bits[f][i] |= m << id;
+
+ id += i * 32;
+
+ set->pc->max_reg[f] = MAX2(set->pc->max_reg[f], id + s - 1);
+
+ id >>= nv_type_order(def[0]->reg.type) - 1;
+
+ for (i = 0; i < n; ++i)
+ if (def[i]->livei)
+ def[i]->reg.id = id++;
+
+ return TRUE;
+}
+
+static INLINE void
+reg_occupy(struct register_set *set, struct nv_value *val)
+{
+ int s, id = val->reg.id, f = val->reg.file;
+ uint m;
+
+ if (id < 0)
+ return;
+ s = nv_type_order(val->reg.type) - 1;
+ id <<= s;
+ m = (1 << (1 << s)) - 1;
+
+ set->bits[f][id / 32] |= m << (id % 32);
+
+ if (set->pc->max_reg[f] < id)
+ set->pc->max_reg[f] = id;
+}
+
+static INLINE void
+reg_release(struct register_set *set, struct nv_value *val)
+{
+ int s, id = val->reg.id, f = val->reg.file;
+ uint m;
+
+ if (id < 0)
+ return;
+
+ s = nv_type_order(val->reg.type) - 1;
+ id <<= s;
+ m = (1 << (1 << s)) - 1;
+
+ set->bits[f][id / 32] &= ~(m << (id % 32));
+}
+
+static INLINE boolean
+join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
+{
+ int i;
+ struct nv_value *val;
+
+ if (a->reg.file != b->reg.file ||
+ nv_type_sizeof(a->reg.type) != nv_type_sizeof(b->reg.type))
+ return FALSE;
+
+ if (a->join->reg.id == b->join->reg.id)
+ return TRUE;
+
+#if 1
+ /* either a or b or both have been assigned */
+
+ if (a->join->reg.id >= 0 && b->join->reg.id >= 0)
+ return FALSE;
+ else
+ if (b->join->reg.id >= 0) {
+ if (a->join->reg.id >= 0)
+ return FALSE;
+ val = a;
+ a = b;
+ b = val;
+ }
+
+ for (i = 0; i < ctx->pc->num_values; ++i) {
+ val = &ctx->pc->values[i];
+
+ if (val->join->reg.id != a->join->reg.id)
+ continue;
+ if (val->join != a->join && livei_have_overlap(val->join, b->join))
+ return FALSE;
+ }
+ return TRUE;
+#endif
+ return FALSE;
+}
+
+static INLINE void
+do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
+{
+ int j;
+ struct nv_value *bjoin = b->join;
+
+ if (b->join->reg.id >= 0)
+ a->join->reg.id = b->join->reg.id;
+
+ livei_unify(a->join, b->join);
+
+#ifdef NV50_RA_DEBUG_JOIN
+ debug_printf("joining %i to %i\n", b->n, a->n);
+#endif
+
+ /* make a->join the new representative */
+ for (j = 0; j < ctx->pc->num_values; ++j)
+ if (ctx->pc->values[j].join == bjoin)
+ ctx->pc->values[j].join = a->join;
+
+ assert(b->join == a->join);
+}
+
+static INLINE void
+try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
+{
+ if (!join_allowed(ctx, a, b)) {
+#ifdef NV50_RA_DEBUG_JOIN
+ debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n);
+#endif
+ return;
+ }
+ if (livei_have_overlap(a->join, b->join)) {
+#ifdef NV50_RA_DEBUG_JOIN
+ debug_printf("cannot join %i to %i: livei overlap\n", b->n, a->n);
+ livei_print(a);
+ livei_print(b);
+#endif
+ return;
+ }
+
+ do_join_values(ctx, a, b);
+}
+
+static INLINE boolean
+need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p)
+{
+ int i = 0, n = 0;
+
+ for (; i < 2; ++i)
+ if (p->out[i] && p->out_kind[i] != CFG_EDGE_LOOP_LEAVE)
+ ++n;
+
+ return (b->num_in > 1) && (n == 2);
+}
+
+/* For each operand of each PHI in b, generate a new value by inserting a MOV
+ * at the end of the block it is coming from and replace the operand with its
+ * result. This eliminates liveness conflicts and enables us to let values be
+ * copied to the right register if such a conflict exists nonetheless.
+ */
+static int
+pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *i, *ni;
+ struct nv_value *val;
+ struct nv_basic_block *p, *pn;
+ int n, j;
+
+ b->pass_seq = ctx->pc->pass_seq;
+
+ for (n = 0; n < b->num_in; ++n) {
+ p = pn = b->in[n];
+ assert(p);
+
+ if (need_new_else_block(b, p)) {
+ pn = new_basic_block(ctx->pc);
+
+ if (p->out[0] == b)
+ p->out[0] = pn;
+ else
+ p->out[1] = pn;
+
+ if (p->exit->target == b) /* target to new else-block */
+ p->exit->target = pn;
+
+ b->in[n] = pn;
+
+ pn->out[0] = b;
+ pn->in[0] = p;
+ pn->num_in = 1;
+ }
+ ctx->pc->current_block = pn;
+
+ for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
+ for (j = 0; j < 4 && i->src[j]; ++j) {
+ if (nvbb_reachable_by(p, i->src[j]->value->insn->bb, b))
+ break;
+ }
+ if (j >= 4 || !i->src[j])
+ continue;
+ val = i->src[j]->value;
+
+ ni = new_instruction(ctx->pc, NV_OP_MOV);
+
+ /* TODO: insert instruction at correct position in the first place */
+ if (ni->prev && ni->prev->target)
+ nv_nvi_permute(ni->prev, ni);
+
+ ni->def[0] = new_value(ctx->pc, val->reg.file, val->reg.type);
+ ni->def[0]->insn = ni;
+ ni->src[0] = new_ref(ctx->pc, val);
+
+ nv_reference(ctx->pc, &i->src[j], ni->def[0]);
+ }
+
+ if (pn != p && pn->exit) {
+ ctx->pc->current_block = b->in[n ? 0 : 1];
+ ni = new_instruction(ctx->pc, NV_OP_BRA);
+ ni->target = b;
+ ni->is_terminator = 1;
+ }
+ }
+
+ for (j = 0; j < 2; ++j)
+ if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq)
+ pass_generate_phi_movs(ctx, b->out[j]);
+
+ return 0;
+}
+
+static int
+pass_join_values(struct nv_pc_pass *ctx, int iter)
+{
+ int c, n;
+
+ for (n = 0; n < ctx->num_insns; ++n) {
+ struct nv_instruction *i = ctx->insns[n];
+
+ switch (i->opcode) {
+ case NV_OP_PHI:
+ if (!iter)
+ continue;
+ try_join_values(ctx, i->src[0]->value, i->src[1]->value);
+ try_join_values(ctx, i->def[0], i->src[0]->value);
+ break;
+ case NV_OP_MOV:
+ if (iter && i->src[0]->value->insn &&
+ !nv_is_vector_op(i->src[0]->value->join->insn->opcode))
+ try_join_values(ctx, i->def[0], i->src[0]->value);
+ break;
+ case NV_OP_SELECT:
+ if (!iter)
+ break;
+ assert(join_allowed(ctx, i->def[0], i->src[0]->value));
+ assert(join_allowed(ctx, i->def[0], i->src[1]->value));
+ do_join_values(ctx, i->def[0], i->src[0]->value);
+ do_join_values(ctx, i->def[0], i->src[1]->value);
+ break;
+ case NV_OP_TEX:
+ case NV_OP_TXB:
+ case NV_OP_TXL:
+ case NV_OP_TXQ:
+ if (iter)
+ break;
+ for (c = 0; c < 4; ++c) {
+ if (!i->src[c])
+ break;
+ do_join_values(ctx, i->def[c], i->src[c]->value);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ return 0;
+}
+
+/* Order the instructions so that live intervals can be expressed in numbers. */
+static void
+pass_order_instructions(void *priv, struct nv_basic_block *b)
+{
+ struct nv_pc_pass *ctx = (struct nv_pc_pass *)priv;
+ struct nv_instruction *i;
+
+ b->pass_seq = ctx->pc->pass_seq;
+
+ assert(!b->exit || !b->exit->next);
+ for (i = b->phi; i; i = i->next) {
+ i->serial = ctx->num_insns;
+ ctx->insns[ctx->num_insns++] = i;
+ }
+}
+
+static void
+bb_live_set_print(struct nv_pc *pc, struct nv_basic_block *b)
+{
+#ifdef NV50_RA_DEBUG_LIVE_SETS
+ int j;
+ struct nv_value *val;
+
+ debug_printf("LIVE-INs of BB:%i: ", b->id);
+
+ for (j = 0; j < pc->num_values; ++j) {
+ if (!(b->live_set[j / 32] & (1 << (j % 32))))
+ continue;
+ val = &pc->values[j];
+ if (!val->insn)
+ continue;
+ debug_printf("%i ", val->n);
+ }
+ debug_printf("\n");
+#endif
+}
+
+static INLINE void
+live_set_add(struct nv_basic_block *b, struct nv_value *val)
+{
+ if (!val->insn) /* don't add non-def values */
+ return;
+ b->live_set[val->n / 32] |= 1 << (val->n % 32);
+}
+
+static INLINE void
+live_set_rem(struct nv_basic_block *b, struct nv_value *val)
+{
+ b->live_set[val->n / 32] &= ~(1 << (val->n % 32));
+}
+
+static INLINE boolean
+live_set_test(struct nv_basic_block *b, struct nv_ref *ref)
+{
+ int n = ref->value->n;
+ return b->live_set[n / 32] & (1 << (n % 32));
+}
+
+/* The live set of a block contains those values that are live immediately
+ * before the beginning of the block, so do a backwards scan.
+ */
+static int
+pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *i;
+ int j, n, ret = 0;
+
+ debug_printf("pass_build_live_sets BB:%i\n", b->id);
+
+ if (b->pass_seq >= ctx->pc->pass_seq) {
+ debug_printf("already visited\n");
+ return 0;
+ }
+ b->pass_seq = ctx->pc->pass_seq;
+
+ /* slight hack for undecidedness: set phi = entry if it's undefined */
+ if (!b->phi)
+ b->phi = b->entry;
+
+ for (n = 0; n < 2; ++n) {
+ if (!b->out[n] || b->out[n] == b)
+ continue;
+ ret = pass_build_live_sets(ctx, b->out[n]);
+ if (ret)
+ return ret;
+
+ if (n == 0) {
+ for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
+ b->live_set[j] = b->out[n]->live_set[j];
+ } else {
+ for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
+ b->live_set[j] |= b->out[n]->live_set[j];
+ }
+
+ /* Kick values out of our live set that are created in incoming
+ * blocks of our successors that are not us.
+ */
+ for (i = b->out[n]->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
+ for (j = 0; j < 4; ++j) {
+ if (!i->src[j])
+ break;
+ assert(i->src[j]->value->insn);
+
+ if (nvbb_reachable_by(b, i->src[j]->value->insn->bb, b->out[n])) {
+ live_set_add(b, i->src[j]->value);
+ debug_printf("BB:%i liveset + %i\n", b->id, i->src[j]->value->n);
+ } else {
+ live_set_rem(b, i->src[j]->value);
+ debug_printf("BB:%i liveset - %i\n", b->id, i->src[j]->value->n);
+ }
+ }
+ }
+ }
+
+ if (!b->entry)
+ return 0;
+
+ bb_live_set_print(ctx->pc, b);
+
+ for (i = b->exit; i; i = i->prev) {
+ for (j = 0; j < 4; j++) {
+ if (!i->def[j])
+ break;
+ live_set_rem(b, i->def[j]);
+ }
+ for (j = 0; j < 4; j++) {
+ if (!i->src[j])
+ break;
+ live_set_add(b, i->src[j]->value);
+ }
+ if (i->src[4])
+ live_set_add(b, i->src[4]->value);
+ if (i->flags_def)
+ live_set_rem(b, i->flags_def);
+ if (i->flags_src)
+ live_set_add(b, i->flags_src->value);
+ }
+ bb_live_set_print(ctx->pc, b);
+
+ return 0;
+}
+
+static void collect_live_values(struct nv_basic_block *b, const int n)
+{
+ int i;
+
+ if (b->out[0]) {
+ if (b->out[1]) { /* what to do about back-edges ? */
+ for (i = 0; i < n; ++i)
+ b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i];
+ } else {
+ memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t));
+ }
+ } else
+ if (b->out[1]) {
+ memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t));
+ } else {
+ memset(b->live_set, 0, n * sizeof(uint32_t));
+ }
+}
+
+/* NOTE: the live intervals of phi functions start the the first non-phi instruction */
+static int
+pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *i, *i_stop;
+ int j, s;
+ const int n = (ctx->pc->num_values + 31) / 32;
+
+ debug_printf("building intervals for BB %i\n", b->id);
+
+ /* verify that first block does not have live-in values */
+ if (b->num_in == 0)
+ for (j = 0; j < n; ++j)
+ assert(b->live_set[j] == 0);
+
+ collect_live_values(b, n);
+
+ /* remove live-outs def'd in a parallel block, hopefully they're all phi'd */
+ for (j = 0; j < 2; ++j) {
+ if (!b->out[j] || !b->out[j]->phi)
+ continue;
+ for (i = b->out[j]->phi; i->opcode == NV_OP_PHI; i = i->next) {
+ live_set_rem(b, i->def[0]);
+
+ for (s = 0; s < 4; ++s) {
+ if (!i->src[s])
+ break;
+ assert(i->src[s]->value->insn);
+ if (nvbb_reachable_by(b, i->src[s]->value->insn->bb, b->out[j]))
+ live_set_add(b, i->src[s]->value);
+ else
+ live_set_rem(b, i->src[s]->value);
+ }
+ }
+ }
+
+ /* remaining live-outs are live until the end */
+ if (b->exit) {
+ for (j = 0; j < ctx->pc->num_values; ++j) {
+ if (!(b->live_set[j / 32] & (1 << (j % 32))))
+ continue;
+#ifdef NV50_RA_DEBUG_LIVEI
+ debug_printf("adding range for live value %i\n", j);
+#endif
+ add_range(&ctx->pc->values[j], b, b->exit->serial + 1);
+ }
+ }
+ debug_printf("%s: looping through instructions now\n", __func__);
+
+ i_stop = b->entry ? b->entry->prev : NULL;
+
+ /* don't have to include phi functions here (will have 0 live range) */
+ for (i = b->exit; i != i_stop; i = i->prev) {
+ assert(i->serial >= b->phi->serial && i->serial <= b->exit->serial);
+ for (j = 0; j < 4; ++j) {
+ if (i->def[j])
+ live_set_rem(b, i->def[j]);
+ }
+ if (i->flags_def)
+ live_set_rem(b, i->flags_def);
+
+ for (j = 0; j < 5; ++j) {
+ if (i->src[j] && !live_set_test(b, i->src[j])) {
+ live_set_add(b, i->src[j]->value);
+#ifdef NV50_RA_DEBUG_LIVEI
+ debug_printf("adding range for source that ends living: %i\n",
+ i->src[j]->value->n);
+#endif
+ add_range(i->src[j]->value, b, i->serial);
+ }
+ }
+ if (i->flags_src && !live_set_test(b, i->flags_src)) {
+ live_set_add(b, i->flags_src->value);
+#ifdef NV50_RA_DEBUG_LIVEI
+ debug_printf("adding range for source that ends living: %i\n",
+ i->flags_src->value->n);
+#endif
+ add_range(i->flags_src->value, b, i->serial);
+ }
+ }
+
+ b->pass_seq = ctx->pc->pass_seq;
+
+ if (b->out[0] && b->out[0]->pass_seq < ctx->pc->pass_seq)
+ pass_build_intervals(ctx, b->out[0]);
+
+ if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq)
+ pass_build_intervals(ctx, b->out[1]);
+
+ return 0;
+}
+
+static INLINE void
+nv50_ctor_register_set(struct nv_pc *pc, struct register_set *set)
+{
+ memset(set, 0, sizeof(*set));
+
+ set->last[NV_FILE_GPR] = 255;
+ set->last[NV_FILE_OUT] = 127;
+ set->last[NV_FILE_FLAGS] = 4;
+ set->last[NV_FILE_ADDR] = 4;
+
+ set->pc = pc;
+}
+
+static void
+insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
+{
+ struct nv_value *elem = list->prev;
+
+ // debug_printf("inserting value %i\n", nval->n);
+
+ for (elem = list->prev;
+ elem != list && elem->livei->bgn > nval->livei->bgn;
+ elem = elem->prev);
+ /* now elem begins before or at the same time as val */
+
+ nval->prev = elem;
+ nval->next = elem->next;
+ elem->next->prev = nval;
+ elem->next = nval;
+}
+
+static int
+pass_linear_scan(struct nv_pc_pass *ctx, int iter)
+{
+ struct nv_instruction *i;
+ struct register_set f, free;
+ int k, n;
+ struct nv_value *cur, *val, *tmp[2];
+ struct nv_value active, inactive, handled, unhandled;
+
+ make_empty_list(&active);
+ make_empty_list(&inactive);
+ make_empty_list(&handled);
+ make_empty_list(&unhandled);
+
+ nv50_ctor_register_set(ctx->pc, &free);
+
+ /* joined values should have range = NULL and thus not be added;
+ * also, fixed memory values won't be added because they're not
+ * def'd, just used
+ */
+ for (n = 0; n < ctx->num_insns; ++n) {
+ i = ctx->insns[n];
+
+ for (k = 0; k < 4; ++k) {
+ if (i->def[k] && i->def[k]->livei)
+ insert_ordered_tail(&unhandled, i->def[k]);
+ else
+ if (0 && i->def[k])
+ debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n);
+ }
+ if (i->flags_def && i->flags_def->livei)
+ insert_ordered_tail(&unhandled, i->flags_def);
+ }
+
+ for (val = unhandled.next; val != unhandled.prev; val = val->next) {
+ assert(val->join == val);
+ assert(val->livei->bgn <= val->next->livei->bgn);
+ }
+
+ foreach_s(cur, tmp[0], &unhandled) {
+ remove_from_list(cur);
+
+ /* debug_printf("handling value %i\n", cur->n); */
+
+ foreach_s(val, tmp[1], &active) {
+ if (livei_end(val) <= cur->livei->bgn) {
+ reg_release(&free, val);
+ move_to_head(&handled, val);
+ } else
+ if (!livei_contains(val, cur->livei->bgn)) {
+ reg_release(&free, val);
+ move_to_head(&inactive, val);
+ }
+ }
+
+ foreach_s(val, tmp[1], &inactive) {
+ if (livei_end(val) <= cur->livei->bgn)
+ move_to_head(&handled, val);
+ else
+ if (livei_contains(val, cur->livei->bgn)) {
+ reg_occupy(&free, val);
+ move_to_head(&active, val);
+ }
+ }
+
+ f = free;
+
+ foreach(val, &inactive)
+ if (livei_have_overlap(val, cur))
+ reg_occupy(&f, val);
+
+ foreach(val, &unhandled)
+ if (val->reg.id >= 0 && livei_have_overlap(val, cur))
+ reg_occupy(&f, val);
+
+ if (cur->reg.id < 0) {
+ boolean mem = FALSE;
+
+ if (nv_is_vector_op(cur->insn->opcode))
+ mem = !reg_assign(&f, &cur->insn->def[0], 4);
+ else
+ if (iter)
+ mem = !reg_assign(&f, &cur, 1);
+
+ if (mem) {
+ NOUVEAU_ERR("out of registers\n");
+ abort();
+ }
+ }
+ insert_at_head(&active, cur);
+ reg_occupy(&free, cur);
+ }
+
+ return 0;
+}
+
+int
+nv_pc_exec_pass1(struct nv_pc *pc)
+{
+ struct nv_pc_pass *ctx;
+ int i, ret;
+
+ debug_printf("REGISTER ALLOCATION - entering\n");
+
+ ctx = CALLOC_STRUCT(nv_pc_pass);
+ if (!ctx)
+ return -1;
+ ctx->pc = pc;
+
+ nv_print_program(ctx->pc->root);
+
+ ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *));
+
+ pc->pass_seq++;
+ ret = pass_generate_phi_movs(ctx, pc->root);
+ assert(!ret);
+
+ nv_print_program(ctx->pc->root);
+
+ for (i = 0; i < pc->loop_nesting_bound; ++i) {
+ pc->pass_seq++;
+ ret = pass_build_live_sets(ctx, pc->root);
+ assert(!ret && "live sets");
+ if (ret) {
+ NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i);
+ goto out;
+ }
+ }
+
+ pc->pass_seq++;
+ nv_pc_pass_in_order(pc->root, pass_order_instructions, ctx);
+
+ pc->pass_seq++;
+ ret = pass_build_intervals(ctx, pc->root);
+ assert(!ret && "build intervals");
+ if (ret) {
+ NOUVEAU_ERR("failed to build live intervals\n");
+ goto out;
+ }
+
+#ifdef NV50_RA_DEBUG_LIVEI
+ for (i = 0; i < pc->num_values; ++i)
+ livei_print(&pc->values[i]);
+#endif
+
+ for (i = 0; i < 2; ++i) {
+ ret = pass_join_values(ctx, i);
+ if (ret)
+ goto out;
+ ret = pass_linear_scan(ctx, i);
+ if (ret)
+ goto out;
+ }
+ assert(!ret && "joining");
+
+ for (i = 0; i < pc->num_values; ++i)
+ livei_release(&pc->values[i]);
+
+ debug_printf("REGISTER ALLOCATION - leaving\n");
+ nv_print_program(ctx->pc->root);
+
+out:
+ FREE(ctx);
+ return ret;
+}
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 8cb1639013..d47941d3b1 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2008 Ben Skeggs
+ * Copyright 2010 Chrsitoph Bumiller
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -20,4674 +20,557 @@
* SOFTWARE.
*/
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_state.h"
-#include "util/u_inlines.h"
+#include "nv50_program.h"
+#include "nv50_pc.h"
+#include "nv50_context.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
-
-#include "nv50_context.h"
-#include "nv50_transfer.h"
-
-#define NV50_SU_MAX_TEMP 127
-#define NV50_SU_MAX_ADDR 4
-//#define NV50_PROGRAM_DUMP
-
-/* $a5 and $a6 always seem to be 0, and using $a7 gives you noise */
-
-/* ARL - gallium craps itself on progs/vp/arl.txt
- *
- * MSB - Like MAD, but MUL+SUB
- * - Fuck it off, introduce a way to negate args for ops that
- * support it.
- *
- * Look into inlining IMMD for ops other than MOV (make it general?)
- * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD,
- * but can emit to P_TEMP first - then MOV later. NVIDIA does this
- *
- * In ops such as ADD it's possible to construct a bad opcode in the !is_long()
- * case, if the emit_src() causes the inst to suddenly become long.
- *
- * Verify half-insns work where expected - and force disable them where they
- * don't work - MUL has it forcibly disabled atm as it fixes POW..
- *
- * FUCK! watch dst==src vectors, can overwrite components that are needed.
- * ie. SUB R0, R0.yzxw, R0
- *
- * Things to check with renouveau:
- * FP attr/result assignment - how?
- * attrib
- * - 0x16bc maps vp output onto fp hpos
- * - 0x16c0 maps vp output onto fp col0
- * result
- * - colr always 0-3
- * - depr always 4
- * 0x16bc->0x16e8 --> some binding between vp/fp regs
- * 0x16b8 --> VP output count
- *
- * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005
- * "MOV rcol.x, fcol.y" = 0x00000004
- * 0x19a8 --> as above but 0x00000100 and 0x00000000
- * - 0x00100000 used when KIL used
- * 0x196c --> as above but 0x00000011 and 0x00000000
- *
- * 0x1988 --> 0xXXNNNNNN
- * - XX == FP high something
- */
-struct nv50_reg {
- enum {
- P_TEMP,
- P_ATTR,
- P_RESULT,
- P_CONST,
- P_IMMD,
- P_ADDR
- } type;
- int index;
-
- int hw;
- int mod;
-
- int rhw; /* result hw for FP outputs, or interpolant index */
- int acc; /* instruction where this reg is last read (first insn == 1) */
-
- int vtx; /* vertex index, for GP inputs (TGSI Dimension.Index) */
- int indirect[2]; /* index into pc->addr, or -1 */
-
- ubyte buf_index; /* c{0 .. 15}[] or g{0 .. 15}[] */
-};
-
-#define NV50_MOD_NEG 1
-#define NV50_MOD_ABS 2
-#define NV50_MOD_NEG_ABS (NV50_MOD_NEG | NV50_MOD_ABS)
-#define NV50_MOD_SAT 4
-#define NV50_MOD_I32 8
-
-/* NV50_MOD_I32 is used to indicate integer mode for neg/abs */
-
-/* STACK: Conditionals and loops have to use the (per warp) stack.
- * Stack entries consist of an entry type (divergent path, join at),
- * a mask indicating the active threads of the warp, and an address.
- * MPs can store 12 stack entries internally, if we need more (and
- * we probably do), we have to create a stack buffer in VRAM.
- */
-/* impose low limits for now */
-#define NV50_MAX_COND_NESTING 4
-#define NV50_MAX_LOOP_NESTING 3
-
-#define JOIN_ON(e) e; pc->p->exec_tail->inst[1] |= 2
-
-struct nv50_pc {
- struct nv50_program *p;
-
- /* hw resources */
- struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
- struct nv50_reg r_addr[NV50_SU_MAX_ADDR];
-
- /* tgsi resources */
- struct nv50_reg *temp;
- int temp_nr;
- struct nv50_reg *attr;
- int attr_nr;
- struct nv50_reg *result;
- int result_nr;
- struct nv50_reg *param;
- int param_nr;
- struct nv50_reg *immd;
- uint32_t *immd_buf;
- int immd_nr;
- struct nv50_reg **addr;
- int addr_nr;
- struct nv50_reg *sysval;
- int sysval_nr;
-
- struct nv50_reg *temp_temp[16];
- struct nv50_program_exec *temp_temp_exec[16];
- unsigned temp_temp_nr;
-
- /* broadcast and destination replacement regs */
- struct nv50_reg *r_brdc;
- struct nv50_reg *r_dst[4];
-
- struct nv50_reg reg_instances[16];
- unsigned reg_instance_nr;
-
- unsigned interp_mode[32];
- /* perspective interpolation registers */
- struct nv50_reg *iv_p;
- struct nv50_reg *iv_c;
-
- struct nv50_program_exec *if_insn[NV50_MAX_COND_NESTING];
- struct nv50_program_exec *if_join[NV50_MAX_COND_NESTING];
- struct nv50_program_exec *loop_brka[NV50_MAX_LOOP_NESTING];
- int if_lvl, loop_lvl;
- unsigned loop_pos[NV50_MAX_LOOP_NESTING];
-
- unsigned *insn_pos; /* actual program offset of each TGSI insn */
- boolean in_subroutine;
-
- /* current instruction and total number of insns */
- unsigned insn_cur;
- unsigned insn_nr;
-
- boolean allow32;
-
- uint8_t edgeflag_out;
-};
-
-static struct nv50_reg *get_address_reg(struct nv50_pc *, struct nv50_reg *);
-
-static INLINE void
-ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw)
-{
- reg->type = type;
- reg->index = index;
- reg->hw = hw;
- reg->mod = 0;
- reg->rhw = -1;
- reg->vtx = -1;
- reg->acc = 0;
- reg->indirect[0] = reg->indirect[1] = -1;
- reg->buf_index = (type == P_CONST) ? 1 : 0;
-}
+#include "tgsi/tgsi_dump.h"
static INLINE unsigned
-popcnt4(uint32_t val)
-{
- static const unsigned cnt[16]
- = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
- return cnt[val & 0xf];
-}
-
-static void
-terminate_mbb(struct nv50_pc *pc)
-{
- int i;
-
- /* remove records of temporary address register values */
- for (i = 0; i < NV50_SU_MAX_ADDR; ++i)
- if (pc->r_addr[i].index < 0)
- pc->r_addr[i].acc = 0;
-}
-
-static void
-alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
-{
- int i = 0;
-
- if (reg->type == P_RESULT) {
- if (pc->p->cfg.high_result < (reg->hw + 1))
- pc->p->cfg.high_result = reg->hw + 1;
- }
-
- if (reg->type != P_TEMP)
- return;
-
- if (reg->hw >= 0) {
- /*XXX: do this here too to catch FP temp-as-attr usage..
- * not clean, but works */
- if (pc->p->cfg.high_temp < (reg->hw + 1))
- pc->p->cfg.high_temp = reg->hw + 1;
- return;
- }
-
- if (reg->rhw != -1) {
- /* try to allocate temporary with index rhw first */
- if (!(pc->r_temp[reg->rhw])) {
- pc->r_temp[reg->rhw] = reg;
- reg->hw = reg->rhw;
- if (pc->p->cfg.high_temp < (reg->rhw + 1))
- pc->p->cfg.high_temp = reg->rhw + 1;
- return;
- }
- /* make sure we don't get things like $r0 needs to go
- * in $r1 and $r1 in $r0
- */
- i = pc->result_nr * 4;
- }
-
- for (; i < NV50_SU_MAX_TEMP; i++) {
- if (!(pc->r_temp[i])) {
- pc->r_temp[i] = reg;
- reg->hw = i;
- if (pc->p->cfg.high_temp < (i + 1))
- pc->p->cfg.high_temp = i + 1;
- return;
- }
- }
-
- NOUVEAU_ERR("out of registers\n");
- abort();
-}
-
-static INLINE struct nv50_reg *
-reg_instance(struct nv50_pc *pc, struct nv50_reg *reg)
-{
- struct nv50_reg *ri;
-
- assert(pc->reg_instance_nr < 16);
- ri = &pc->reg_instances[pc->reg_instance_nr++];
- if (reg) {
- alloc_reg(pc, reg);
- *ri = *reg;
- reg->indirect[0] = reg->indirect[1] = -1;
- reg->mod = 0;
- }
- return ri;
-}
-
-/* XXX: For shaders that aren't executed linearly (e.g. shaders that
- * contain loops), we need to assign all hw regs to TGSI TEMPs early,
- * lest we risk temp_temps overwriting regs alloc'd "later".
- */
-static struct nv50_reg *
-alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
-{
- struct nv50_reg *r;
- int i;
-
- if (dst && dst->type == P_TEMP && dst->hw == -1)
- return dst;
-
- for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
- if (!pc->r_temp[i]) {
- r = MALLOC_STRUCT(nv50_reg);
- ctor_reg(r, P_TEMP, -1, i);
- pc->r_temp[i] = r;
- return r;
- }
- }
-
- NOUVEAU_ERR("out of registers\n");
- abort();
- return NULL;
-}
-
-/* release the hardware resource held by r */
-static void
-release_hw(struct nv50_pc *pc, struct nv50_reg *r)
+bitcount4(const uint32_t val)
{
- assert(r->type == P_TEMP);
- if (r->hw == -1)
- return;
-
- assert(pc->r_temp[r->hw] == r);
- pc->r_temp[r->hw] = NULL;
-
- r->acc = 0;
- if (r->index == -1)
- FREE(r);
-}
-
-static void
-free_temp(struct nv50_pc *pc, struct nv50_reg *r)
-{
- if (r->index == -1) {
- unsigned hw = r->hw;
-
- FREE(pc->r_temp[hw]);
- pc->r_temp[hw] = NULL;
- }
+ static const unsigned cnt[16]
+ = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
+ return cnt[val & 0xf];
}
-static int
-alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx)
-{
- int i;
-
- if ((idx + 4) >= NV50_SU_MAX_TEMP)
- return 1;
+static unsigned
+nv50_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c)
+{
+ unsigned mask = inst->Dst[0].Register.WriteMask;
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_SIN:
+ return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
+ case TGSI_OPCODE_DP3:
+ return 0x7;
+ case TGSI_OPCODE_DP4:
+ case TGSI_OPCODE_DPH:
+ case TGSI_OPCODE_KIL: /* WriteMask ignored */
+ return 0xf;
+ case TGSI_OPCODE_DST:
+ return mask & (c ? 0xa : 0x6);
+ case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_EXP:
+ case TGSI_OPCODE_LG2:
+ case TGSI_OPCODE_LOG:
+ case TGSI_OPCODE_POW:
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ:
+ case TGSI_OPCODE_SCS:
+ return 0x1;
+ case TGSI_OPCODE_IF:
+ return 0x1;
+ case TGSI_OPCODE_LIT:
+ return 0xb;
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXL:
+ case TGSI_OPCODE_TXP:
+ {
+ const struct tgsi_instruction_texture *tex;
+
+ assert(inst->Instruction.Texture);
+ tex = &inst->Texture;
+
+ mask = 0x7;
+ if (inst->Instruction.Opcode != TGSI_OPCODE_TEX &&
+ inst->Instruction.Opcode != TGSI_OPCODE_TXD)
+ mask |= 0x8; /* bias, lod or proj */
+
+ switch (tex->Texture) {
+ case TGSI_TEXTURE_1D:
+ mask &= 0x9;
+ break;
+ case TGSI_TEXTURE_SHADOW1D:
+ mask &= 0x5;
+ break;
+ case TGSI_TEXTURE_2D:
+ mask &= 0xb;
+ break;
+ default:
+ break;
+ }
+ }
+ return mask;
+ case TGSI_OPCODE_XPD:
+ {
+ unsigned x = 0;
+ if (mask & 1) x |= 0x6;
+ if (mask & 2) x |= 0x5;
+ if (mask & 4) x |= 0x3;
+ return x;
+ }
+ default:
+ break;
+ }
+
+ return mask;
+}
+
+static void
+nv50_indirect_inputs(struct nv50_translation_info *ti, int id)
+{
+ int i, c;
+
+ for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i)
+ for (c = 0; c < 4; ++c)
+ ti->input_access[i][c] = id;
+
+ ti->indirect_inputs = TRUE;
+}
+
+static void
+nv50_indirect_outputs(struct nv50_translation_info *ti, int id)
+{
+ int i, c;
+
+ for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i)
+ for (c = 0; c < 4; ++c)
+ ti->output_access[i][c] = id;
+
+ ti->indirect_outputs = TRUE;
+}
+
+static void
+prog_inst(struct nv50_translation_info *ti,
+ const struct tgsi_full_instruction *inst, int id)
+{
+ const struct tgsi_dst_register *dst;
+ const struct tgsi_src_register *src;
+ int s, c, k;
+ unsigned mask;
+
+ if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
+ for (c = 0; c < 4; ++c) {
+ dst = &inst->Dst[0].Register;
+ if (inst->Dst[0].Register.Indirect)
+ nv50_indirect_outputs(ti, id);
+ if (!(dst->WriteMask & (1 << c)))
+ continue;
+ ti->output_access[dst->Index][c] = id;
+ }
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
+ inst->Src[0].Register.File == TGSI_FILE_INPUT &&
+ dst->Index == ti->edgeflag_out)
+ ti->p->vp.edgeflag = inst->Src[0].Register.Index;
+ }
- if (pc->r_temp[idx] || pc->r_temp[idx + 1] ||
- pc->r_temp[idx + 2] || pc->r_temp[idx + 3])
- return alloc_temp4(pc, dst, idx + 4);
+ for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
+ src = &inst->Src[s].Register;
+ if (src->File != TGSI_FILE_INPUT)
+ continue;
+ mask = nv50_tgsi_src_mask(inst, s);
- for (i = 0; i < 4; i++) {
- dst[i] = MALLOC_STRUCT(nv50_reg);
- ctor_reg(dst[i], P_TEMP, -1, idx + i);
- pc->r_temp[idx + i] = dst[i];
- }
+ if (inst->Src[s].Register.Indirect)
+ nv50_indirect_inputs(ti, id);
- return 0;
+ for (c = 0; c < 4; ++c) {
+ if (!(mask & (1 << c)))
+ continue;
+ k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
+ if (k <= TGSI_SWIZZLE_W)
+ ti->input_access[src->Index][k] = id;
+ }
+ }
}
static void
-free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4])
+prog_immediate(struct nv50_translation_info *ti,
+ const struct tgsi_full_immediate *imm)
{
- int i;
+ int c;
+ unsigned n = ++ti->immd32_nr;
- for (i = 0; i < 4; i++)
- free_temp(pc, reg[i]);
-}
+ tgsi_dump_immediate(imm);
-static struct nv50_reg *
-temp_temp(struct nv50_pc *pc, struct nv50_program_exec *e)
-{
- if (pc->temp_temp_nr >= 16)
- assert(0);
+ if (n == (1 << (ffs(n) - 1)))
+ ti->immd32 = REALLOC(ti->immd32, (n / 2) * 16, (n * 2) * 16);
- pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL);
- pc->temp_temp_exec[pc->temp_temp_nr] = e;
- return pc->temp_temp[pc->temp_temp_nr++];
+ for (c = 0; c < 4; ++c)
+ ti->immd32[(n - 1) * 4 + c] = imm->u[c].Uint;
}
-/* This *must* be called for all nv50_program_exec that have been
- * given as argument to temp_temp, or the temps will be leaked !
- */
-static void
-kill_temp_temp(struct nv50_pc *pc, struct nv50_program_exec *e)
-{
- int i;
-
- for (i = 0; i < pc->temp_temp_nr; i++)
- if (pc->temp_temp_exec[i] == e)
- free_temp(pc, pc->temp_temp[i]);
- if (!e)
- pc->temp_temp_nr = 0;
+static INLINE unsigned
+translate_interpolate(const struct tgsi_full_declaration *decl)
+{
+ unsigned mode;
+
+ if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT)
+ mode = NV50_INTERP_FLAT;
+ else
+ if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
+ mode = 0;
+ else
+ mode = NV50_INTERP_LINEAR;
+
+ if (decl->Declaration.Centroid)
+ mode |= NV50_INTERP_CENTROID;
+
+ return mode;
+}
+
+static void
+prog_decl(struct nv50_translation_info *ti,
+ const struct tgsi_full_declaration *decl)
+{
+ unsigned i, first, last, sn = 0, si = 0;
+
+ first = decl->Range.First;
+ last = decl->Range.Last;
+
+ if (decl->Declaration.Semantic) {
+ sn = decl->Semantic.Name;
+ si = decl->Semantic.Index;
+ }
+ tgsi_dump_declaration(decl);
+
+ switch (decl->Declaration.File) {
+ case TGSI_FILE_INPUT:
+ for (i = first; i <= last; ++i)
+ ti->interp_mode[i] = translate_interpolate(decl);
+
+ if (!decl->Declaration.Semantic)
+ break;
+
+ for (i = first; i <= last; ++i) {
+ ti->p->in[i].sn = sn;
+ ti->p->in[i].si = si;
+ }
+
+ switch (sn) {
+ case TGSI_SEMANTIC_FACE:
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (ti->p->type == PIPE_SHADER_FRAGMENT)
+ ti->p->vp.bfc[si] = first;
+ break;
+ }
+ break;
+ case TGSI_FILE_OUTPUT:
+ if (!decl->Declaration.Semantic)
+ break;
+
+ for (i = first; i <= last; ++i) {
+ ti->p->out[i].sn = sn;
+ ti->p->out[i].si = si;
+ }
+
+ switch (sn) {
+ case TGSI_SEMANTIC_BCOLOR:
+ ti->p->vp.bfc[si] = first;
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ ti->p->vp.psiz = first;
+ break;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ ti->edgeflag_out = first;
+ break;
+ default:
+ break;
+ }
+ break;
+ case TGSI_FILE_SYSTEM_VALUE:
+ switch (decl->Semantic.Name) {
+ case TGSI_SEMANTIC_FACE:
+ break;
+ case TGSI_SEMANTIC_INSTANCEID:
+ break;
+ case TGSI_SEMANTIC_PRIMID:
+ break;
+ /*
+ case TGSI_SEMANTIC_PRIMIDIN:
+ break;
+ case TGSI_SEMANTIC_VERTEXID:
+ break;
+ */
+ default:
+ break;
+ }
+ break;
+ case TGSI_FILE_CONSTANT:
+ ti->p->parm_size = MAX2(ti->p->parm_size, (last + 1) * 16);
+ break;
+ case TGSI_FILE_ADDRESS:
+ case TGSI_FILE_SAMPLER:
+ case TGSI_FILE_TEMPORARY:
+ break;
+ default:
+ assert(0);
+ break;
+ }
}
static int
-ctor_immd_4u32(struct nv50_pc *pc,
- uint32_t x, uint32_t y, uint32_t z, uint32_t w)
-{
- unsigned size = pc->immd_nr * 4 * sizeof(uint32_t);
-
- pc->immd_buf = REALLOC(pc->immd_buf, size, size + 4 * sizeof(uint32_t));
-
- pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
- pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
- pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
- pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
-
- return pc->immd_nr++;
-}
-
-static INLINE int
-ctor_immd_4f32(struct nv50_pc *pc, float x, float y, float z, float w)
-{
- return ctor_immd_4u32(pc, fui(x), fui(y), fui(z), fui(w));
-}
-
-static struct nv50_reg *
-alloc_immd(struct nv50_pc *pc, float f)
-{
- struct nv50_reg *r = MALLOC_STRUCT(nv50_reg);
- unsigned hw;
-
- for (hw = 0; hw < pc->immd_nr * 4; hw++)
- if (pc->immd_buf[hw] == fui(f))
- break;
-
- if (hw == pc->immd_nr * 4)
- hw = ctor_immd_4f32(pc, f, -f, 0.5 * f, 0) * 4;
-
- ctor_reg(r, P_IMMD, -1, hw);
- return r;
-}
-
-static struct nv50_program_exec *
-exec(struct nv50_pc *pc)
-{
- struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec);
-
- e->param.index = -1;
- return e;
-}
-
-static void
-emit(struct nv50_pc *pc, struct nv50_program_exec *e)
-{
- struct nv50_program *p = pc->p;
-
- if (p->exec_tail)
- p->exec_tail->next = e;
- if (!p->exec_head)
- p->exec_head = e;
- p->exec_tail = e;
- p->exec_size += (e->inst[0] & 1) ? 2 : 1;
-
- kill_temp_temp(pc, e);
-}
-
-static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *);
-
-static boolean
-is_long(struct nv50_program_exec *e)
-{
- if (e->inst[0] & 1)
- return TRUE;
- return FALSE;
-}
-
-static boolean
-is_immd(struct nv50_program_exec *e)
-{
- if (is_long(e) && (e->inst[1] & 3) == 3)
- return TRUE;
- return FALSE;
-}
-
-static boolean
-is_join(struct nv50_program_exec *e)
-{
- if (is_long(e) && (e->inst[1] & 3) == 2)
- return TRUE;
- return FALSE;
-}
-
-static INLINE boolean
-is_control_flow(struct nv50_program_exec *e)
-{
- return (e->inst[0] & 2);
-}
-
-static INLINE void
-set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx,
- struct nv50_program_exec *e)
-{
- assert(!is_immd(e));
- set_long(pc, e);
- e->inst[1] &= ~((0x1f << 7) | (0x3 << 12));
- e->inst[1] |= (pred << 7) | (idx << 12);
-}
-
-static INLINE void
-set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx,
- struct nv50_program_exec *e)
-{
- set_long(pc, e);
- e->inst[1] &= ~((0x3 << 4) | (1 << 6));
- e->inst[1] |= (idx << 4) | (on << 6);
-}
-
-static INLINE void
-set_long(struct nv50_pc *pc, struct nv50_program_exec *e)
-{
- if (is_long(e))
- return;
-
- e->inst[0] |= 1;
- set_pred(pc, 0xf, 0, e);
- set_pred_wr(pc, 0, 0, e);
-}
-
-static INLINE void
-set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
-{
- if (dst->type == P_RESULT) {
- set_long(pc, e);
- e->inst[1] |= 0x00000008;
- }
-
- alloc_reg(pc, dst);
- if (dst->hw > 63)
- set_long(pc, e);
- e->inst[0] |= (dst->hw << 2);
-}
-
-static INLINE void
-set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
-{
- set_long(pc, e);
- /* XXX: can't be predicated - bits overlap; cases where both
- * are required should be avoided by using pc->allow32 */
- set_pred(pc, 0, 0, e);
- set_pred_wr(pc, 0, 0, e);
-
- e->inst[1] |= 0x00000002 | 0x00000001;
- e->inst[0] |= (pc->immd_buf[imm->hw] & 0x3f) << 16;
- e->inst[1] |= (pc->immd_buf[imm->hw] >> 6) << 2;
-}
-
-static INLINE void
-set_addr(struct nv50_program_exec *e, struct nv50_reg *a)
-{
- assert(a->type == P_ADDR);
-
- assert(!(e->inst[0] & 0x0c000000));
- assert(!(e->inst[1] & 0x00000004));
-
- e->inst[0] |= (a->hw & 3) << 26;
- e->inst[1] |= a->hw & 4;
-}
-
-static void
-emit_arl(struct nv50_pc *, struct nv50_reg *, struct nv50_reg *, uint8_t);
-
-static void
-emit_shl_imm(struct nv50_pc *, struct nv50_reg *, struct nv50_reg *, int);
-
-static void
-emit_mov_from_addr(struct nv50_pc *pc, struct nv50_reg *dst,
- struct nv50_reg *src)
-{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[1] = 0x40000000;
- set_long(pc, e);
- set_dst(pc, dst, e);
- set_addr(e, src);
-
- emit(pc, e);
-}
-
-static void
-emit_add_addr_imm(struct nv50_pc *pc, struct nv50_reg *dst,
- struct nv50_reg *src0, uint16_t src1_val)
-{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] = 0xd0000000 | (src1_val << 9);
- e->inst[1] = 0x20000000;
- set_long(pc, e);
- e->inst[0] |= dst->hw << 2;
- if (src0) /* otherwise will add to $a0, which is always 0 */
- set_addr(e, src0);
-
- emit(pc, e);
-}
-
-#define INTERP_LINEAR 0
-#define INTERP_FLAT 1
-#define INTERP_PERSPECTIVE 2
-#define INTERP_CENTROID 4
-
-/* interpolant index has been stored in dst->rhw */
-static void
-emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *iv,
- unsigned mode)
-{
- struct nv50_program_exec *e = exec(pc);
- assert(dst->rhw != -1);
-
- e->inst[0] |= 0x80000000;
- set_dst(pc, dst, e);
- e->inst[0] |= (dst->rhw << 16);
-
- if (mode & INTERP_FLAT) {
- e->inst[0] |= (1 << 8);
- } else {
- if (mode & INTERP_PERSPECTIVE) {
- e->inst[0] |= (1 << 25);
- alloc_reg(pc, iv);
- e->inst[0] |= (iv->hw << 9);
- }
-
- if (mode & INTERP_CENTROID)
- e->inst[0] |= (1 << 24);
- }
-
- emit(pc, e);
-}
-
-static void
-set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
- struct nv50_program_exec *e)
-{
- set_long(pc, e);
-
- e->param.index = src->hw & 127;
- e->param.shift = s;
- e->param.mask = m << (s % 32);
-
- if (src->hw < 0 || src->hw > 127) /* need (additional) address reg */
- set_addr(e, get_address_reg(pc, src));
- else
- if (src->acc < 0) {
- assert(src->type == P_CONST);
- set_addr(e, pc->addr[src->indirect[0]]);
- }
-
- e->inst[1] |= (src->buf_index << 22);
-}
-
-/* Never apply nv50_reg::mod in emit_mov, or carefully check the code !!! */
-static void
-emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
-{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] = 0x10000000;
- if (!pc->allow32)
- set_long(pc, e);
-
- set_dst(pc, dst, e);
-
- if (!is_long(e) && src->type == P_IMMD) {
- set_immd(pc, src, e);
- /*XXX: 32-bit, but steals part of "half" reg space - need to
- * catch and handle this case if/when we do half-regs
- */
- } else
- if (src->type == P_IMMD || src->type == P_CONST) {
- set_long(pc, e);
- set_data(pc, src, 0x7f, 9, e);
- e->inst[1] |= 0x20000000; /* mov from c[] */
- } else {
- if (src->type == P_ATTR) {
- set_long(pc, e);
- e->inst[1] |= 0x00200000;
-
- if (src->vtx >= 0) {
- /* indirect (vertex base + c) load from p[] */
- e->inst[0] |= 0x01800000;
- set_addr(e, get_address_reg(pc, src));
- }
- }
-
- alloc_reg(pc, src);
- if (src->hw > 63)
- set_long(pc, e);
- e->inst[0] |= (src->hw << 9);
- }
-
- if (is_long(e) && !is_immd(e)) {
- e->inst[1] |= 0x04000000; /* 32-bit */
- e->inst[1] |= 0x0000c000; /* 32-bit c[] load / lane mask 0:1 */
- if (!(e->inst[1] & 0x20000000))
- e->inst[1] |= 0x00030000; /* lane mask 2:3 */
- } else
- e->inst[0] |= 0x00008000;
-
- emit(pc, e);
-}
-
-static INLINE void
-emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f)
-{
- struct nv50_reg *imm = alloc_immd(pc, f);
- emit_mov(pc, dst, imm);
- FREE(imm);
-}
-
-/* Assign the hw of the discarded temporary register src
- * to the tgsi register dst and free src.
- */
-static void
-assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
-{
- assert(src->index == -1 && src->hw != -1);
-
- if (pc->if_lvl || pc->loop_lvl ||
- (dst->type != P_TEMP) ||
- (src->hw < pc->result_nr * 4 &&
- pc->p->type == PIPE_SHADER_FRAGMENT) ||
- pc->p->info.opcode_count[TGSI_OPCODE_CAL] ||
- pc->p->info.opcode_count[TGSI_OPCODE_BRA]) {
-
- emit_mov(pc, dst, src);
- free_temp(pc, src);
- return;
- }
-
- if (dst->hw != -1)
- pc->r_temp[dst->hw] = NULL;
- pc->r_temp[src->hw] = dst;
- dst->hw = src->hw;
-
- FREE(src);
-}
-
-static void
-emit_nop(struct nv50_pc *pc)
-{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] = 0xf0000000;
- set_long(pc, e);
- e->inst[1] = 0xe0000000;
- emit(pc, e);
-}
-
-static boolean
-check_swap_src_0_1(struct nv50_pc *pc,
- struct nv50_reg **s0, struct nv50_reg **s1)
-{
- struct nv50_reg *src0 = *s0, *src1 = *s1;
-
- if (src0->type == P_CONST) {
- if (src1->type != P_CONST) {
- *s0 = src1;
- *s1 = src0;
- return TRUE;
- }
- } else
- if (src1->type == P_ATTR) {
- if (src0->type != P_ATTR) {
- *s0 = src1;
- *s1 = src0;
- return TRUE;
- }
- }
-
- return FALSE;
-}
-
-static void
-set_src_0_restricted(struct nv50_pc *pc, struct nv50_reg *src,
- struct nv50_program_exec *e)
-{
- struct nv50_reg *temp;
-
- if (src->type != P_TEMP) {
- temp = temp_temp(pc, e);
- emit_mov(pc, temp, src);
- src = temp;
- }
-
- alloc_reg(pc, src);
- if (src->hw > 63)
- set_long(pc, e);
- e->inst[0] |= (src->hw << 9);
-}
-
-static void
-set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
-{
- if (src->type == P_ATTR) {
- set_long(pc, e);
- e->inst[1] |= 0x00200000;
-
- if (src->vtx >= 0) {
- e->inst[0] |= 0x01800000; /* src from p[] */
- set_addr(e, get_address_reg(pc, src));
- }
- } else
- if (src->type == P_CONST || src->type == P_IMMD) {
- struct nv50_reg *temp = temp_temp(pc, e);
-
- emit_mov(pc, temp, src);
- src = temp;
- }
-
- alloc_reg(pc, src);
- if (src->hw > 63)
- set_long(pc, e);
- e->inst[0] |= (src->hw << 9);
-}
-
-static void
-set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
-{
- if (src->type == P_ATTR) {
- struct nv50_reg *temp = temp_temp(pc, e);
-
- emit_mov(pc, temp, src);
- src = temp;
- } else
- if (src->type == P_CONST || src->type == P_IMMD) {
- if (e->inst[0] & 0x01800000) {
- struct nv50_reg *temp = temp_temp(pc, e);
-
- emit_mov(pc, temp, src);
- src = temp;
- } else {
- assert(!(e->inst[0] & 0x00800000));
- set_data(pc, src, 0x7f, 16, e);
- e->inst[0] |= 0x00800000;
- }
- }
-
- alloc_reg(pc, src);
- if (src->hw > 63)
- set_long(pc, e);
- e->inst[0] |= ((src->hw & 127) << 16);
-}
-
-static void
-set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
-{
- set_long(pc, e);
-
- if (src->type == P_ATTR) {
- struct nv50_reg *temp = temp_temp(pc, e);
-
- emit_mov(pc, temp, src);
- src = temp;
- } else
- if (src->type == P_CONST || src->type == P_IMMD) {
- if (e->inst[0] & 0x01800000) {
- struct nv50_reg *temp = temp_temp(pc, e);
-
- emit_mov(pc, temp, src);
- src = temp;
- } else {
- assert(!(e->inst[0] & 0x01000000));
- set_data(pc, src, 0x7f, 32+14, e);
- e->inst[0] |= 0x01000000;
- }
- }
-
- alloc_reg(pc, src);
- e->inst[1] |= ((src->hw & 127) << 14);
-}
-
-static void
-set_half_src(struct nv50_pc *pc, struct nv50_reg *src, int lh,
- struct nv50_program_exec *e, int pos)
-{
- struct nv50_reg *r = src;
-
- alloc_reg(pc, r);
- if (r->type != P_TEMP) {
- r = temp_temp(pc, e);
- emit_mov(pc, r, src);
- }
-
- if (r->hw > (NV50_SU_MAX_TEMP / 2)) {
- NOUVEAU_ERR("out of low GPRs\n");
- abort();
- }
-
- e->inst[pos / 32] |= ((src->hw * 2) + lh) << (pos % 32);
-}
-
-static void
-emit_mov_from_pred(struct nv50_pc *pc, struct nv50_reg *dst, int pred)
-{
- struct nv50_program_exec *e = exec(pc);
-
- assert(dst->type == P_TEMP);
- e->inst[1] = 0x20000000 | (pred << 12);
- set_long(pc, e);
- set_dst(pc, dst, e);
-
- emit(pc, e);
-}
-
-static void
-emit_mov_to_pred(struct nv50_pc *pc, int pred, struct nv50_reg *src)
-{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] = 0x000001fc;
- e->inst[1] = 0xa0000008;
- set_long(pc, e);
- set_pred_wr(pc, 1, pred, e);
- set_src_0_restricted(pc, src, e);
-
- emit(pc, e);
-}
-
-static void
-emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
- struct nv50_reg *src1)
-{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] |= 0xc0000000;
-
- if (!pc->allow32)
- set_long(pc, e);
-
- check_swap_src_0_1(pc, &src0, &src1);
- set_dst(pc, dst, e);
- set_src_0(pc, src0, e);
- if (src1->type == P_IMMD && !is_long(e)) {
- if (src0->mod ^ src1->mod)
- e->inst[0] |= 0x00008000;
- set_immd(pc, src1, e);
- } else {
- set_src_1(pc, src1, e);
- if ((src0->mod ^ src1->mod) & NV50_MOD_NEG) {
- if (is_long(e))
- e->inst[1] |= 0x08000000;
- else
- e->inst[0] |= 0x00008000;
- }
- }
-
- emit(pc, e);
-}
-
-static void
-emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
- struct nv50_reg *src0, struct nv50_reg *src1)
-{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] = 0xb0000000;
-
- alloc_reg(pc, src1);
- check_swap_src_0_1(pc, &src0, &src1);
-
- if (!pc->allow32 || (src0->mod | src1->mod) || src1->hw > 63) {
- set_long(pc, e);
- e->inst[1] |= ((src0->mod & NV50_MOD_NEG) << 26) |
- ((src1->mod & NV50_MOD_NEG) << 27);
- }
-
- set_dst(pc, dst, e);
- set_src_0(pc, src0, e);
- if (src1->type == P_CONST || src1->type == P_ATTR || is_long(e))
- set_src_2(pc, src1, e);
- else
- if (src1->type == P_IMMD)
- set_immd(pc, src1, e);
- else
- set_src_1(pc, src1, e);
-
- emit(pc, e);
-}
-
-static void
-emit_arl(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
- uint8_t s)
-{
- struct nv50_program_exec *e = exec(pc);
-
- set_long(pc, e);
- e->inst[1] |= 0xc0000000;
-
- e->inst[0] |= dst->hw << 2;
- e->inst[0] |= s << 16; /* shift left */
- set_src_0(pc, src, e);
-
- emit(pc, e);
-}
-
-static boolean
-address_reg_suitable(struct nv50_reg *a, struct nv50_reg *r)
-{
- if (!r)
- return FALSE;
-
- if (r->vtx != a->vtx)
- return FALSE;
- if (r->vtx >= 0)
- return (r->indirect[1] == a->indirect[1]);
-
- if (r->hw < a->rhw || (r->hw - a->rhw) >= 128)
- return FALSE;
-
- if (a->index >= 0)
- return (a->index == r->indirect[0]);
- return (a->indirect[0] == r->indirect[0]);
-}
-
-static void
-load_vertex_base(struct nv50_pc *pc, struct nv50_reg *dst,
- struct nv50_reg *a, int shift)
-{
- struct nv50_reg mem, *temp;
-
- ctor_reg(&mem, P_ATTR, -1, dst->vtx);
-
- assert(dst->type == P_ADDR);
- if (!a) {
- emit_arl(pc, dst, &mem, 0);
- return;
- }
- temp = alloc_temp(pc, NULL);
-
- if (shift) {
- emit_mov_from_addr(pc, temp, a);
- if (shift < 0)
- emit_shl_imm(pc, temp, temp, shift);
- emit_arl(pc, dst, temp, MAX2(shift, 0));
- }
- emit_mov(pc, temp, &mem);
- set_addr(pc->p->exec_tail, dst);
-
- emit_arl(pc, dst, temp, 0);
- free_temp(pc, temp);
-}
-
-/* case (ref == NULL): allocate address register for TGSI_FILE_ADDRESS
- * case (vtx >= 0, acc >= 0): load vertex base from a[vtx * 4] to $aX
- * case (vtx >= 0, acc < 0): load vertex base from s[$aY + vtx * 4] to $aX
- * case (vtx < 0, acc >= 0): memory address too high to encode
- * case (vtx < 0, acc < 0): get source register for TGSI_FILE_ADDRESS
- */
-static struct nv50_reg *
-get_address_reg(struct nv50_pc *pc, struct nv50_reg *ref)
-{
- int i;
- struct nv50_reg *a_ref, *a = NULL;
-
- for (i = 0; i < NV50_SU_MAX_ADDR; ++i) {
- if (pc->r_addr[i].acc == 0)
- a = &pc->r_addr[i]; /* an unused address reg */
- else
- if (address_reg_suitable(&pc->r_addr[i], ref)) {
- pc->r_addr[i].acc = pc->insn_cur;
- return &pc->r_addr[i];
- } else
- if (!a && pc->r_addr[i].index < 0 &&
- pc->r_addr[i].acc < pc->insn_cur)
- a = &pc->r_addr[i];
- }
- if (!a) {
- /* We'll be able to spill address regs when this
- * mess is replaced with a proper compiler ...
- */
- NOUVEAU_ERR("out of address regs\n");
- abort();
- return NULL;
- }
-
- /* initialize and reserve for this TGSI instruction */
- a->rhw = 0;
- a->index = a->indirect[0] = a->indirect[1] = -1;
- a->acc = pc->insn_cur;
-
- if (!ref) {
- a->vtx = -1;
- return a;
- }
- a->vtx = ref->vtx;
-
- /* now put in the correct value ... */
-
- if (ref->vtx >= 0) {
- a->indirect[1] = ref->indirect[1];
-
- /* For an indirect vertex index, we need to shift address right
- * by 2, the address register will contain vtx * 16, we need to
- * load from a[vtx * 4].
- */
- load_vertex_base(pc, a, (ref->acc < 0) ?
- pc->addr[ref->indirect[1]] : NULL, -2);
- } else {
- assert(ref->acc < 0 || ref->indirect[0] < 0);
-
- a->rhw = ref->hw & ~0x7f;
- a->indirect[0] = ref->indirect[0];
- a_ref = (ref->acc < 0) ? pc->addr[ref->indirect[0]] : NULL;
-
- emit_add_addr_imm(pc, a, a_ref, a->rhw * 4);
- }
- return a;
-}
-
-#define NV50_MAX_F32 0x880
-#define NV50_MAX_S32 0x08c
-#define NV50_MAX_U32 0x084
-#define NV50_MIN_F32 0x8a0
-#define NV50_MIN_S32 0x0ac
-#define NV50_MIN_U32 0x0a4
-
-static void
-emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
- struct nv50_reg *src0, struct nv50_reg *src1)
-{
- struct nv50_program_exec *e = exec(pc);
-
- set_long(pc, e);
- e->inst[0] |= 0x30000000 | ((sub & 0x800) << 20);
- e->inst[1] |= (sub << 24);
-
- check_swap_src_0_1(pc, &src0, &src1);
- set_dst(pc, dst, e);
- set_src_0(pc, src0, e);
- set_src_1(pc, src1, e);
-
- if (src0->mod & NV50_MOD_ABS)
- e->inst[1] |= 0x00100000;
- if (src1->mod & NV50_MOD_ABS)
- e->inst[1] |= 0x00080000;
-
- emit(pc, e);
-}
-
-static INLINE void
-emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
- struct nv50_reg *src1)
-{
- src1->mod ^= NV50_MOD_NEG;
- emit_add(pc, dst, src0, src1);
- src1->mod ^= NV50_MOD_NEG;
-}
-
-static void
-emit_bitop2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
- struct nv50_reg *src1, unsigned op)
-{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] = 0xd0000000;
- set_long(pc, e);
-
- check_swap_src_0_1(pc, &src0, &src1);
- set_dst(pc, dst, e);
- set_src_0(pc, src0, e);
-
- if (op != TGSI_OPCODE_AND && op != TGSI_OPCODE_OR &&
- op != TGSI_OPCODE_XOR)
- assert(!"invalid bit op");
-
- assert(!(src0->mod | src1->mod));
-
- if (src1->type == P_IMMD && src0->type == P_TEMP && pc->allow32) {
- set_immd(pc, src1, e);
- if (op == TGSI_OPCODE_OR)
- e->inst[0] |= 0x0100;
- else
- if (op == TGSI_OPCODE_XOR)
- e->inst[0] |= 0x8000;
- } else {
- set_src_1(pc, src1, e);
- e->inst[1] |= 0x04000000; /* 32 bit */
- if (op == TGSI_OPCODE_OR)
- e->inst[1] |= 0x4000;
- else
- if (op == TGSI_OPCODE_XOR)
- e->inst[1] |= 0x8000;
- }
-
- emit(pc, e);
-}
-
-static void
-emit_not(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
-{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] = 0xd0000000;
- e->inst[1] = 0x0402c000;
- set_long(pc, e);
- set_dst(pc, dst, e);
- set_src_1(pc, src, e);
-
- emit(pc, e);
-}
-
-static void
-emit_shift(struct nv50_pc *pc, struct nv50_reg *dst,
- struct nv50_reg *src0, struct nv50_reg *src1, unsigned dir)
+nv50_vertprog_prepare(struct nv50_translation_info *ti)
{
- struct nv50_program_exec *e = exec(pc);
+ struct nv50_program *p = ti->p;
+ int i, c;
+ unsigned num_inputs = 0;
- e->inst[0] = 0x30000000;
- e->inst[1] = 0xc4000000;
+ ti->input_file = NV_FILE_MEM_S;
+ ti->output_file = NV_FILE_OUT;
- set_long(pc, e);
- set_dst(pc, dst, e);
- set_src_0(pc, src0, e);
+ for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
+ p->in[i].id = i;
+ p->in[i].hw = num_inputs;
- if (src1->type == P_IMMD) {
- e->inst[1] |= (1 << 20);
- e->inst[0] |= (pc->immd_buf[src1->hw] & 0x7f) << 16;
- } else
- set_src_1(pc, src1, e);
+ for (c = 0; c < 4; ++c) {
+ if (!ti->input_access[i][c])
+ continue;
+ ti->input_map[i][c] = num_inputs++;
+ p->vp.attrs[(4 * i + c) / 32] |= 1 << ((i * 4 + c) % 32);
+ }
+ }
- if (dir != TGSI_OPCODE_SHL)
- e->inst[1] |= (1 << 29);
+ for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
+ p->out[i].id = i;
+ p->out[i].hw = p->max_out;
- if (dir == TGSI_OPCODE_ISHR)
- e->inst[1] |= (1 << 27);
+ for (c = 0; c < 4; ++c) {
+ if (!ti->output_access[i][c])
+ continue;
+ ti->output_map[i][c] = p->max_out++;
+ p->out[i].mask |= 1 << c;
+ }
+ }
- emit(pc, e);
-}
-
-static void
-emit_shl_imm(struct nv50_pc *pc, struct nv50_reg *dst,
- struct nv50_reg *src, int s)
-{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] = 0x30000000;
- e->inst[1] = 0xc4100000;
- if (s < 0) {
- e->inst[1] |= 1 << 29;
- s = -s;
- }
- e->inst[1] |= ((s & 0x7f) << 16);
-
- set_long(pc, e);
- set_dst(pc, dst, e);
- set_src_0(pc, src, e);
-
- emit(pc, e);
-}
+ if (p->vp.psiz < 0x40)
+ p->vp.psiz = p->out[p->vp.psiz].hw;
-static void
-emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
- struct nv50_reg *src1, struct nv50_reg *src2)
-{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] |= 0xe0000000;
-
- check_swap_src_0_1(pc, &src0, &src1);
- set_dst(pc, dst, e);
- set_src_0(pc, src0, e);
- set_src_1(pc, src1, e);
- set_src_2(pc, src2, e);
-
- if ((src0->mod ^ src1->mod) & NV50_MOD_NEG)
- e->inst[1] |= 0x04000000;
- if (src2->mod & NV50_MOD_NEG)
- e->inst[1] |= 0x08000000;
-
- emit(pc, e);
+ return 0;
}
-static INLINE void
-emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
- struct nv50_reg *src1, struct nv50_reg *src2)
-{
- src2->mod ^= NV50_MOD_NEG;
- emit_mad(pc, dst, src0, src1, src2);
- src2->mod ^= NV50_MOD_NEG;
-}
-
-#define NV50_FLOP_RCP 0
-#define NV50_FLOP_RSQ 2
-#define NV50_FLOP_LG2 3
-#define NV50_FLOP_SIN 4
-#define NV50_FLOP_COS 5
-#define NV50_FLOP_EX2 6
-
-/* rcp, rsqrt, lg2 support neg and abs */
-static void
-emit_flop(struct nv50_pc *pc, unsigned sub,
- struct nv50_reg *dst, struct nv50_reg *src)
-{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] |= 0x90000000;
- if (sub || src->mod) {
- set_long(pc, e);
- e->inst[1] |= (sub << 29);
- }
-
- set_dst(pc, dst, e);
- set_src_0_restricted(pc, src, e);
-
- assert(!src->mod || sub < 4);
-
- if (src->mod & NV50_MOD_NEG)
- e->inst[1] |= 0x04000000;
- if (src->mod & NV50_MOD_ABS)
- e->inst[1] |= 0x00100000;
-
- emit(pc, e);
-}
-
-static void
-emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
-{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] |= 0xb0000000;
-
- set_dst(pc, dst, e);
- set_src_0(pc, src, e);
- set_long(pc, e);
- e->inst[1] |= (6 << 29) | 0x00004000;
-
- if (src->mod & NV50_MOD_NEG)
- e->inst[1] |= 0x04000000;
- if (src->mod & NV50_MOD_ABS)
- e->inst[1] |= 0x00100000;
-
- emit(pc, e);
-}
-
-static void
-emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
-{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] |= 0xb0000000;
-
- set_dst(pc, dst, e);
- set_src_0(pc, src, e);
- set_long(pc, e);
- e->inst[1] |= (6 << 29);
-
- if (src->mod & NV50_MOD_NEG)
- e->inst[1] |= 0x04000000;
- if (src->mod & NV50_MOD_ABS)
- e->inst[1] |= 0x00100000;
-
- emit(pc, e);
-}
-
-#define CVT_RN (0x00 << 16)
-#define CVT_FLOOR (0x02 << 16)
-#define CVT_CEIL (0x04 << 16)
-#define CVT_TRUNC (0x06 << 16)
-#define CVT_SAT (0x08 << 16)
-#define CVT_ABS (0x10 << 16)
-
-#define CVT_X32_X32 0x04004000
-#define CVT_X32_S32 0x04014000
-#define CVT_F32_F32 ((0xc0 << 24) | CVT_X32_X32)
-#define CVT_S32_F32 ((0x88 << 24) | CVT_X32_X32)
-#define CVT_U32_F32 ((0x80 << 24) | CVT_X32_X32)
-#define CVT_F32_S32 ((0x40 << 24) | CVT_X32_S32)
-#define CVT_F32_U32 ((0x40 << 24) | CVT_X32_X32)
-#define CVT_S32_S32 ((0x08 << 24) | CVT_X32_S32)
-#define CVT_S32_U32 ((0x08 << 24) | CVT_X32_X32)
-#define CVT_U32_S32 ((0x00 << 24) | CVT_X32_S32)
-
-#define CVT_NEG 0x20000000
-#define CVT_RI 0x08000000
-
-static void
-emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
- int wp, uint32_t cvn)
-{
- struct nv50_program_exec *e;
-
- e = exec(pc);
-
- if (src->mod & NV50_MOD_NEG) cvn |= CVT_NEG;
- if (src->mod & NV50_MOD_ABS) cvn |= CVT_ABS;
-
- e->inst[0] = 0xa0000000;
- e->inst[1] = cvn;
- set_long(pc, e);
- set_src_0(pc, src, e);
-
- if (wp >= 0)
- set_pred_wr(pc, 1, wp, e);
-
- if (dst)
- set_dst(pc, dst, e);
- else {
- e->inst[0] |= 0x000001fc;
- e->inst[1] |= 0x00000008;
- }
-
- emit(pc, e);
-}
-
-/* nv50 Condition codes:
- * 0x1 = LT
- * 0x2 = EQ
- * 0x3 = LE
- * 0x4 = GT
- * 0x5 = NE
- * 0x6 = GE
- * 0x7 = set condition code ? (used before bra.lt/le/gt/ge)
- * 0x8 = unordered bit (allows NaN)
- *
- * mode = 0x04 (u32), 0x0c (s32), 0x80 (f32)
- */
-static void
-emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp,
- struct nv50_reg *src0, struct nv50_reg *src1, uint8_t mode)
-{
- static const unsigned cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
-
- struct nv50_program_exec *e = exec(pc);
- struct nv50_reg *rdst;
-
- assert(ccode < 16);
- if (check_swap_src_0_1(pc, &src0, &src1))
- ccode = cc_swapped[ccode & 7] | (ccode & 8);
-
- rdst = dst;
- if (dst && dst->type != P_TEMP)
- dst = alloc_temp(pc, NULL);
-
- set_long(pc, e);
- e->inst[0] |= 0x30000000 | (mode << 24);
- e->inst[1] |= 0x60000000 | (ccode << 14);
-
- if (wp >= 0)
- set_pred_wr(pc, 1, wp, e);
- if (dst)
- set_dst(pc, dst, e);
- else {
- e->inst[0] |= 0x000001fc;
- e->inst[1] |= 0x00000008;
- }
-
- set_src_0(pc, src0, e);
- set_src_1(pc, src1, e);
-
- emit(pc, e);
-
- if (rdst && mode == 0x80) /* convert to float ? */
- emit_cvt(pc, rdst, dst, -1, CVT_ABS | CVT_F32_S32);
- if (rdst && rdst != dst)
- free_temp(pc, dst);
-}
-
-static INLINE void
-map_tgsi_setop_hw(unsigned op, uint8_t *cc, uint8_t *ty)
-{
- switch (op) {
- case TGSI_OPCODE_SLT: *cc = 0x1; *ty = 0x80; break;
- case TGSI_OPCODE_SGE: *cc = 0x6; *ty = 0x80; break;
- case TGSI_OPCODE_SEQ: *cc = 0x2; *ty = 0x80; break;
- case TGSI_OPCODE_SGT: *cc = 0x4; *ty = 0x80; break;
- case TGSI_OPCODE_SLE: *cc = 0x3; *ty = 0x80; break;
- case TGSI_OPCODE_SNE: *cc = 0xd; *ty = 0x80; break;
-
- case TGSI_OPCODE_ISLT: *cc = 0x1; *ty = 0x0c; break;
- case TGSI_OPCODE_ISGE: *cc = 0x6; *ty = 0x0c; break;
- case TGSI_OPCODE_USEQ: *cc = 0x2; *ty = 0x04; break;
- case TGSI_OPCODE_USGE: *cc = 0x6; *ty = 0x04; break;
- case TGSI_OPCODE_USLT: *cc = 0x1; *ty = 0x04; break;
- case TGSI_OPCODE_USNE: *cc = 0x5; *ty = 0x04; break;
- default:
- assert(0);
- return;
- }
-}
-
-static void
-emit_add_b32(struct nv50_pc *pc, struct nv50_reg *dst,
- struct nv50_reg *src0, struct nv50_reg *rsrc1)
-{
- struct nv50_program_exec *e = exec(pc);
- struct nv50_reg *src1;
-
- e->inst[0] = 0x20000000;
-
- alloc_reg(pc, rsrc1);
- check_swap_src_0_1(pc, &src0, &rsrc1);
-
- src1 = rsrc1;
- if (src0->mod & rsrc1->mod & NV50_MOD_NEG) {
- src1 = temp_temp(pc, e);
- emit_cvt(pc, src1, rsrc1, -1, CVT_S32_S32);
- }
-
- if (!pc->allow32 || src1->hw > 63 ||
- (src1->type != P_TEMP && src1->type != P_IMMD))
- set_long(pc, e);
-
- set_dst(pc, dst, e);
- set_src_0(pc, src0, e);
-
- if (is_long(e)) {
- e->inst[1] |= 1 << 26;
- set_src_2(pc, src1, e);
- } else {
- e->inst[0] |= 0x8000;
- if (src1->type == P_IMMD)
- set_immd(pc, src1, e);
- else
- set_src_1(pc, src1, e);
- }
-
- if (src0->mod & NV50_MOD_NEG)
- e->inst[0] |= 1 << 28;
- else
- if (src1->mod & NV50_MOD_NEG)
- e->inst[0] |= 1 << 22;
-
- emit(pc, e);
-}
-
-static void
-emit_mad_u16(struct nv50_pc *pc, struct nv50_reg *dst,
- struct nv50_reg *src0, int lh_0, struct nv50_reg *src1, int lh_1,
- struct nv50_reg *src2)
-{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] = 0x60000000;
- if (!pc->allow32)
- set_long(pc, e);
- set_dst(pc, dst, e);
-
- set_half_src(pc, src0, lh_0, e, 9);
- set_half_src(pc, src1, lh_1, e, 16);
- alloc_reg(pc, src2);
- if (is_long(e) || (src2->type != P_TEMP) || (src2->hw != dst->hw))
- set_src_2(pc, src2, e);
-
- emit(pc, e);
-}
-
-static void
-emit_mul_u16(struct nv50_pc *pc, struct nv50_reg *dst,
- struct nv50_reg *src0, int lh_0, struct nv50_reg *src1, int lh_1)
-{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] = 0x40000000;
- set_long(pc, e);
- set_dst(pc, dst, e);
-
- set_half_src(pc, src0, lh_0, e, 9);
- set_half_src(pc, src1, lh_1, e, 16);
-
- emit(pc, e);
-}
-
-static void
-emit_sad(struct nv50_pc *pc, struct nv50_reg *dst,
- struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2)
-{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] = 0x50000000;
- if (!pc->allow32)
- set_long(pc, e);
- check_swap_src_0_1(pc, &src0, &src1);
- set_dst(pc, dst, e);
- set_src_0(pc, src0, e);
- set_src_1(pc, src1, e);
- alloc_reg(pc, src2);
- if (is_long(e) || (src2->type != dst->type) || (src2->hw != dst->hw))
- set_src_2(pc, src2, e);
-
- if (is_long(e))
- e->inst[1] |= 0x0c << 24;
- else
- e->inst[0] |= 0x81 << 8;
-
- emit(pc, e);
-}
-
-static INLINE void
-emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
-{
- emit_cvt(pc, dst, src, -1, CVT_FLOOR | CVT_F32_F32 | CVT_RI);
-}
-
-static void
-emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
- struct nv50_reg *v, struct nv50_reg *e)
-{
- struct nv50_reg *temp = alloc_temp(pc, NULL);
-
- emit_flop(pc, NV50_FLOP_LG2, temp, v);
- emit_mul(pc, temp, temp, e);
- emit_preex2(pc, temp, temp);
- emit_flop(pc, NV50_FLOP_EX2, dst, temp);
-
- free_temp(pc, temp);
-}
-
-static INLINE void
-emit_sat(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
-{
- emit_cvt(pc, dst, src, -1, CVT_SAT | CVT_F32_F32);
-}
-
-static void
-emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
- struct nv50_reg **src)
-{
- struct nv50_reg *one = alloc_immd(pc, 1.0);
- struct nv50_reg *zero = alloc_immd(pc, 0.0);
- struct nv50_reg *neg128 = alloc_immd(pc, -127.999999);
- struct nv50_reg *pos128 = alloc_immd(pc, 127.999999);
- struct nv50_reg *tmp[4] = { 0 };
- boolean allow32 = pc->allow32;
-
- pc->allow32 = FALSE;
-
- if (mask & (3 << 1)) {
- tmp[0] = alloc_temp(pc, NULL);
- emit_minmax(pc, NV50_MAX_F32, tmp[0], src[0], zero);
- }
-
- if (mask & (1 << 2)) {
- set_pred_wr(pc, 1, 0, pc->p->exec_tail);
-
- tmp[1] = temp_temp(pc, NULL);
- emit_minmax(pc, NV50_MAX_F32, tmp[1], src[1], zero);
-
- tmp[3] = temp_temp(pc, NULL);
- emit_minmax(pc, NV50_MAX_F32, tmp[3], src[3], neg128);
- emit_minmax(pc, NV50_MIN_F32, tmp[3], tmp[3], pos128);
-
- emit_pow(pc, dst[2], tmp[1], tmp[3]);
- emit_mov(pc, dst[2], zero);
- set_pred(pc, 3, 0, pc->p->exec_tail);
- }
-
- if (mask & (1 << 1))
- assimilate_temp(pc, dst[1], tmp[0]);
- else
- if (mask & (1 << 2))
- free_temp(pc, tmp[0]);
-
- pc->allow32 = allow32;
-
- /* do this last, in case src[i,j] == dst[0,3] */
- if (mask & (1 << 0))
- emit_mov(pc, dst[0], one);
-
- if (mask & (1 << 3))
- emit_mov(pc, dst[3], one);
-
- FREE(pos128);
- FREE(neg128);
- FREE(zero);
- FREE(one);
-}
-
-static void
-emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
-{
- struct nv50_program_exec *e;
- const int r_pred = 1;
-
- e = exec(pc);
- e->inst[0] = 0x00000002; /* discard */
- set_long(pc, e); /* sets cond code to ALWAYS */
-
- if (src) {
- set_pred(pc, 0x1 /* cc = LT */, r_pred, e);
- /* write to predicate reg */
- emit_cvt(pc, NULL, src, r_pred, CVT_F32_F32);
- }
-
- emit(pc, e);
-}
-
-static struct nv50_program_exec *
-emit_control_flow(struct nv50_pc *pc, unsigned op, int pred, unsigned cc)
-{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] = (op << 28) | 2;
- set_long(pc, e);
- if (pred >= 0)
- set_pred(pc, cc, pred, e);
-
- emit(pc, e);
- return e;
-}
-
-static INLINE struct nv50_program_exec *
-emit_breakaddr(struct nv50_pc *pc)
-{
- return emit_control_flow(pc, 0x4, -1, 0);
-}
-
-static INLINE void
-emit_break(struct nv50_pc *pc, int pred, unsigned cc)
-{
- emit_control_flow(pc, 0x5, pred, cc);
-}
-
-static INLINE struct nv50_program_exec *
-emit_joinat(struct nv50_pc *pc)
-{
- return emit_control_flow(pc, 0xa, -1, 0);
-}
-
-static INLINE struct nv50_program_exec *
-emit_branch(struct nv50_pc *pc, int pred, unsigned cc)
-{
- return emit_control_flow(pc, 0x1, pred, cc);
-}
-
-static INLINE struct nv50_program_exec *
-emit_call(struct nv50_pc *pc, int pred, unsigned cc)
-{
- return emit_control_flow(pc, 0x2, pred, cc);
-}
-
-static INLINE void
-emit_ret(struct nv50_pc *pc, int pred, unsigned cc)
-{
- emit_control_flow(pc, 0x3, pred, cc);
-}
-
-static void
-emit_prim_cmd(struct nv50_pc *pc, unsigned cmd)
-{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] = 0xf0000000 | (cmd << 9);
- e->inst[1] = 0xc0000000;
- set_long(pc, e);
-
- emit(pc, e);
-}
-
-#define QOP_ADD 0
-#define QOP_SUBR 1
-#define QOP_SUB 2
-#define QOP_MOV_SRC1 3
-
-/* For a quad of threads / top left, top right, bottom left, bottom right
- * pixels, do a different operation, and take src0 from a specific thread.
- */
-static void
-emit_quadop(struct nv50_pc *pc, struct nv50_reg *dst, int wp, int lane_src0,
- struct nv50_reg *src0, struct nv50_reg *src1, ubyte qop)
-{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] = 0xc0000000;
- e->inst[1] = 0x80000000;
- set_long(pc, e);
- e->inst[0] |= lane_src0 << 16;
- set_src_0(pc, src0, e);
- set_src_2(pc, src1, e);
-
- if (wp >= 0)
- set_pred_wr(pc, 1, wp, e);
-
- if (dst)
- set_dst(pc, dst, e);
- else {
- e->inst[0] |= 0x000001fc;
- e->inst[1] |= 0x00000008;
- }
-
- e->inst[0] |= (qop & 3) << 20;
- e->inst[1] |= (qop >> 2) << 22;
-
- emit(pc, e);
-}
-
-static void
-load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
- struct nv50_reg **src, unsigned arg, boolean proj)
-{
- int mod[3] = { src[0]->mod, src[1]->mod, src[2]->mod };
-
- src[0]->mod |= NV50_MOD_ABS;
- src[1]->mod |= NV50_MOD_ABS;
- src[2]->mod |= NV50_MOD_ABS;
-
- emit_minmax(pc, NV50_MAX_F32, t[2], src[0], src[1]);
- emit_minmax(pc, NV50_MAX_F32, t[2], src[2], t[2]);
-
- src[0]->mod = mod[0];
- src[1]->mod = mod[1];
- src[2]->mod = mod[2];
-
- if (proj && 0 /* looks more correct without this */)
- emit_mul(pc, t[2], t[2], src[3]);
- else
- if (arg == 4) /* there is no textureProj(samplerCubeShadow) */
- emit_mov(pc, t[3], src[3]);
-
- emit_flop(pc, NV50_FLOP_RCP, t[2], t[2]);
-
- emit_mul(pc, t[0], src[0], t[2]);
- emit_mul(pc, t[1], src[1], t[2]);
- emit_mul(pc, t[2], src[2], t[2]);
-}
-
-static void
-load_proj_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
- struct nv50_reg **src, unsigned dim, unsigned arg)
-{
- unsigned c, mode;
-
- if (src[0]->type == P_TEMP && src[0]->rhw != -1) {
- mode = pc->interp_mode[src[0]->index] | INTERP_PERSPECTIVE;
-
- t[3]->rhw = src[3]->rhw;
- emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID));
- emit_flop(pc, NV50_FLOP_RCP, t[3], t[3]);
-
- for (c = 0; c < dim; ++c) {
- t[c]->rhw = src[c]->rhw;
- emit_interp(pc, t[c], t[3], mode);
- }
- if (arg != dim) { /* depth reference value */
- t[dim]->rhw = src[2]->rhw;
- emit_interp(pc, t[dim], t[3], mode);
- }
- } else {
- /* XXX: for some reason the blob sometimes uses MAD
- * (mad f32 $rX $rY $rZ neg $r63)
- */
- emit_flop(pc, NV50_FLOP_RCP, t[3], src[3]);
- for (c = 0; c < dim; ++c)
- emit_mul(pc, t[c], src[c], t[3]);
- if (arg != dim) /* depth reference value */
- emit_mul(pc, t[dim], src[2], t[3]);
- }
-}
-
-static INLINE void
-get_tex_dim(unsigned type, unsigned *dim, unsigned *arg)
-{
- switch (type) {
- case TGSI_TEXTURE_1D:
- *arg = *dim = 1;
- break;
- case TGSI_TEXTURE_SHADOW1D:
- *dim = 1;
- *arg = 2;
- break;
- case TGSI_TEXTURE_UNKNOWN:
- case TGSI_TEXTURE_2D:
- case TGSI_TEXTURE_RECT:
- *arg = *dim = 2;
- break;
- case TGSI_TEXTURE_SHADOW2D:
- case TGSI_TEXTURE_SHADOWRECT:
- *dim = 2;
- *arg = 3;
- break;
- case TGSI_TEXTURE_3D:
- case TGSI_TEXTURE_CUBE:
- *dim = *arg = 3;
- break;
- default:
- assert(0);
- break;
- }
-}
-
-/* We shouldn't execute TEXLOD if any of the pixels in a quad have
- * different LOD values, so branch off groups of equal LOD.
- */
-static void
-emit_texlod_sequence(struct nv50_pc *pc, struct nv50_reg *tlod,
- struct nv50_reg *src, struct nv50_program_exec *tex)
-{
- struct nv50_program_exec *join_at;
- unsigned i, target = pc->p->exec_size + 9 * 2;
-
- if (pc->p->type != PIPE_SHADER_FRAGMENT) {
- emit(pc, tex);
- return;
- }
- pc->allow32 = FALSE;
-
- /* Subtract lod of each pixel from lod of top left pixel, jump
- * texlod insn if result is 0, then repeat for 2 other pixels.
- */
- join_at = emit_joinat(pc);
- emit_quadop(pc, NULL, 0, 0, tlod, tlod, 0x55);
- emit_branch(pc, 0, 2)->param.index = target;
-
- for (i = 1; i < 4; ++i) {
- emit_quadop(pc, NULL, 0, i, tlod, tlod, 0x55);
- emit_branch(pc, 0, 2)->param.index = target;
- }
-
- emit_mov(pc, tlod, src); /* target */
- emit(pc, tex); /* texlod */
-
- join_at->param.index = target + 2 * 2;
- JOIN_ON(emit_nop(pc)); /* join _after_ tex */
-}
-
-static void
-emit_texbias_sequence(struct nv50_pc *pc, struct nv50_reg *t[4], unsigned arg,
- struct nv50_program_exec *tex)
-{
- struct nv50_program_exec *e;
- struct nv50_reg imm_1248, *t123[4][4], *r_bits = alloc_temp(pc, NULL);
- int r_pred = 0;
- unsigned n, c, i, cc[4] = { 0x0a, 0x13, 0x11, 0x10 };
-
- pc->allow32 = FALSE;
- ctor_reg(&imm_1248, P_IMMD, -1, ctor_immd_4u32(pc, 1, 2, 4, 8) * 4);
-
- /* Subtract bias value of thread i from bias values of each thread,
- * store result in r_pred, and set bit i in r_bits if result was 0.
- */
- assert(arg < 4);
- for (i = 0; i < 4; ++i, ++imm_1248.hw) {
- emit_quadop(pc, NULL, r_pred, i, t[arg], t[arg], 0x55);
- emit_mov(pc, r_bits, &imm_1248);
- set_pred(pc, 2, r_pred, pc->p->exec_tail);
- }
- emit_mov_to_pred(pc, r_pred, r_bits);
-
- /* The lanes of a quad are now grouped by the bit in r_pred they have
- * set. Put the input values for TEX into a new register set for each
- * group and execute TEX only for a specific group.
- * We cannot use the same register set for each group because we need
- * the derivatives, which are implicitly calculated, to be correct.
- */
- for (i = 1; i < 4; ++i) {
- alloc_temp4(pc, t123[i], 0);
-
- for (c = 0; c <= arg; ++c)
- emit_mov(pc, t123[i][c], t[c]);
-
- *(e = exec(pc)) = *(tex);
- e->inst[0] &= ~0x01fc;
- set_dst(pc, t123[i][0], e);
- set_pred(pc, cc[i], r_pred, e);
- emit(pc, e);
- }
- /* finally TEX on the original regs (where we kept the input) */
- set_pred(pc, cc[0], r_pred, tex);
- emit(pc, tex);
-
- /* put the 3 * n other results into regs for lane 0 */
- n = popcnt4(((e->inst[0] >> 25) & 0x3) | ((e->inst[1] >> 12) & 0xc));
- for (i = 1; i < 4; ++i) {
- for (c = 0; c < n; ++c) {
- emit_mov(pc, t[c], t123[i][c]);
- set_pred(pc, cc[i], r_pred, pc->p->exec_tail);
- }
- free_temp4(pc, t123[i]);
- }
-
- emit_nop(pc);
- free_temp(pc, r_bits);
-}
-
-static void
-emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
- struct nv50_reg **src, unsigned unit, unsigned type,
- boolean proj, int bias_lod)
-{
- struct nv50_reg *t[4];
- struct nv50_program_exec *e;
- unsigned c, dim, arg;
-
- /* t[i] must be within a single 128 bit super-reg */
- alloc_temp4(pc, t, 0);
-
- e = exec(pc);
- e->inst[0] = 0xf0000000;
- set_long(pc, e);
- set_dst(pc, t[0], e);
-
- /* TIC and TSC binding indices (TSC is ignored as TSC_LINKED = TRUE): */
- e->inst[0] |= (unit << 9) /* | (unit << 17) */;
-
- /* live flag (don't set if TEX results affect input to another TEX): */
- /* e->inst[0] |= 0x00000004; */
-
- get_tex_dim(type, &dim, &arg);
-
- if (type == TGSI_TEXTURE_CUBE) {
- e->inst[0] |= 0x08000000;
- load_cube_tex_coords(pc, t, src, arg, proj);
- } else
- if (proj)
- load_proj_tex_coords(pc, t, src, dim, arg);
- else {
- for (c = 0; c < dim; c++)
- emit_mov(pc, t[c], src[c]);
- if (arg != dim) /* depth reference value (always src.z here) */
- emit_mov(pc, t[dim], src[2]);
- }
-
- e->inst[0] |= (mask & 0x3) << 25;
- e->inst[1] |= (mask & 0xc) << 12;
-
- if (!bias_lod) {
- e->inst[0] |= (arg - 1) << 22;
- emit(pc, e);
- } else
- if (bias_lod < 0) {
- assert(pc->p->type == PIPE_SHADER_FRAGMENT);
- e->inst[0] |= arg << 22;
- e->inst[1] |= 0x20000000; /* texbias */
- emit_mov(pc, t[arg], src[3]);
- emit_texbias_sequence(pc, t, arg, e);
- } else {
- e->inst[0] |= arg << 22;
- e->inst[1] |= 0x40000000; /* texlod */
- emit_mov(pc, t[arg], src[3]);
- emit_texlod_sequence(pc, t[arg], src[3], e);
- }
-
-#if 1
- c = 0;
- if (mask & 1) emit_mov(pc, dst[0], t[c++]);
- if (mask & 2) emit_mov(pc, dst[1], t[c++]);
- if (mask & 4) emit_mov(pc, dst[2], t[c++]);
- if (mask & 8) emit_mov(pc, dst[3], t[c]);
-
- free_temp4(pc, t);
-#else
- /* XXX: if p.e. MUL is used directly after TEX, it would still use
- * the texture coordinates, not the fetched values: latency ? */
-
- for (c = 0; c < 4; c++) {
- if (mask & (1 << c))
- assimilate_temp(pc, dst[c], t[c]);
- else
- free_temp(pc, t[c]);
- }
-#endif
-}
-
-static void
-emit_ddx(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
-{
- struct nv50_program_exec *e = exec(pc);
-
- assert(src->type == P_TEMP);
-
- e->inst[0] = (src->mod & NV50_MOD_NEG) ? 0xc0240000 : 0xc0140000;
- e->inst[1] = (src->mod & NV50_MOD_NEG) ? 0x86400000 : 0x89800000;
- set_long(pc, e);
- set_dst(pc, dst, e);
- set_src_0(pc, src, e);
- set_src_2(pc, src, e);
-
- emit(pc, e);
-}
-
-static void
-emit_ddy(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
-{
- struct nv50_program_exec *e = exec(pc);
-
- assert(src->type == P_TEMP);
-
- e->inst[0] = (src->mod & NV50_MOD_NEG) ? 0xc0250000 : 0xc0150000;
- e->inst[1] = (src->mod & NV50_MOD_NEG) ? 0x85800000 : 0x8a400000;
- set_long(pc, e);
- set_dst(pc, dst, e);
- set_src_0(pc, src, e);
- set_src_2(pc, src, e);
-
- emit(pc, e);
-}
-
-static void
-convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e)
-{
- unsigned q = 0, m = ~0;
-
- assert(!is_long(e));
-
- switch (e->inst[0] >> 28) {
- case 0x1:
- /* MOV */
- q = 0x0403c000;
- m = 0xffff7fff;
- break;
- case 0x2:
- case 0x3:
- /* ADD, SUB, SUBR b32 */
- m = ~(0x8000 | (127 << 16));
- q = ((e->inst[0] & (~m)) >> 2) | (1 << 26);
- break;
- case 0x5:
- /* SAD */
- m = ~(0x81 << 8);
- q = (0x0c << 24) | ((e->inst[0] & (0x7f << 2)) << 12);
- break;
- case 0x6:
- /* MAD u16 */
- q = (e->inst[0] & (0x7f << 2)) << 12;
- break;
- case 0x8:
- /* INTERP (move centroid, perspective and flat bits) */
- m = ~0x03000100;
- q = (e->inst[0] & (3 << 24)) >> (24 - 16);
- q |= (e->inst[0] & (1 << 8)) << (18 - 8);
- break;
- case 0x9:
- /* RCP */
- break;
- case 0xB:
- /* ADD */
- m = ~(127 << 16);
- q = ((e->inst[0] & (~m)) >> 2);
- break;
- case 0xC:
- /* MUL */
- m = ~0x00008000;
- q = ((e->inst[0] & (~m)) << 12);
- break;
- case 0xE:
- /* MAD (if src2 == dst) */
- q = ((e->inst[0] & 0x1fc) << 12);
- break;
- default:
- assert(0);
- break;
- }
-
- set_long(pc, e);
- pc->p->exec_size++;
-
- e->inst[0] &= m;
- e->inst[1] |= q;
-}
-
-/* Some operations support an optional negation flag. */
static int
-get_supported_mods(const struct tgsi_full_instruction *insn, int i)
-{
- switch (insn->Instruction.Opcode) {
- case TGSI_OPCODE_ADD:
- case TGSI_OPCODE_COS:
- case TGSI_OPCODE_DDX:
- case TGSI_OPCODE_DDY:
- case TGSI_OPCODE_DP3:
- case TGSI_OPCODE_DP4:
- case TGSI_OPCODE_EX2:
- case TGSI_OPCODE_KIL:
- case TGSI_OPCODE_LG2:
- case TGSI_OPCODE_MAD:
- case TGSI_OPCODE_MUL:
- case TGSI_OPCODE_POW:
- case TGSI_OPCODE_RCP:
- case TGSI_OPCODE_RSQ: /* ignored, RSQ = rsqrt(abs(src.x)) */
- case TGSI_OPCODE_SCS:
- case TGSI_OPCODE_SIN:
- case TGSI_OPCODE_SUB:
- return NV50_MOD_NEG;
- case TGSI_OPCODE_MAX:
- case TGSI_OPCODE_MIN:
- case TGSI_OPCODE_INEG: /* tgsi src sign toggle/set would be stupid */
- return NV50_MOD_ABS;
- case TGSI_OPCODE_CEIL:
- case TGSI_OPCODE_FLR:
- case TGSI_OPCODE_TRUNC:
- return NV50_MOD_NEG | NV50_MOD_ABS;
- case TGSI_OPCODE_F2I:
- case TGSI_OPCODE_F2U:
- case TGSI_OPCODE_I2F:
- case TGSI_OPCODE_U2F:
- return NV50_MOD_NEG | NV50_MOD_ABS | NV50_MOD_I32;
- case TGSI_OPCODE_UADD:
- return NV50_MOD_NEG | NV50_MOD_I32;
- case TGSI_OPCODE_SAD:
- case TGSI_OPCODE_SHL:
- case TGSI_OPCODE_IMAX:
- case TGSI_OPCODE_IMIN:
- case TGSI_OPCODE_ISHR:
- case TGSI_OPCODE_NOT:
- case TGSI_OPCODE_UMAD:
- case TGSI_OPCODE_UMAX:
- case TGSI_OPCODE_UMIN:
- case TGSI_OPCODE_UMUL:
- case TGSI_OPCODE_USHR:
- return NV50_MOD_I32;
- default:
- return 0;
- }
-}
-
-/* Return a read mask for source registers deduced from opcode & write mask. */
-static unsigned
-nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
-{
- unsigned x, mask = insn->Dst[0].Register.WriteMask;
-
- switch (insn->Instruction.Opcode) {
- case TGSI_OPCODE_COS:
- case TGSI_OPCODE_SIN:
- return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
- case TGSI_OPCODE_DP3:
- return 0x7;
- case TGSI_OPCODE_DP4:
- case TGSI_OPCODE_DPH:
- case TGSI_OPCODE_KIL: /* WriteMask ignored */
- return 0xf;
- case TGSI_OPCODE_DST:
- return mask & (c ? 0xa : 0x6);
- case TGSI_OPCODE_EX2:
- case TGSI_OPCODE_EXP:
- case TGSI_OPCODE_LG2:
- case TGSI_OPCODE_LOG:
- case TGSI_OPCODE_POW:
- case TGSI_OPCODE_RCP:
- case TGSI_OPCODE_RSQ:
- case TGSI_OPCODE_SCS:
- return 0x1;
- case TGSI_OPCODE_IF:
- return 0x1;
- case TGSI_OPCODE_LIT:
- return 0xb;
- case TGSI_OPCODE_TEX:
- case TGSI_OPCODE_TXB:
- case TGSI_OPCODE_TXL:
- case TGSI_OPCODE_TXP:
- {
- const struct tgsi_instruction_texture *tex;
-
- assert(insn->Instruction.Texture);
- tex = &insn->Texture;
-
- mask = 0x7;
- if (insn->Instruction.Opcode != TGSI_OPCODE_TEX &&
- insn->Instruction.Opcode != TGSI_OPCODE_TXD)
- mask |= 0x8; /* bias, lod or proj */
-
- switch (tex->Texture) {
- case TGSI_TEXTURE_1D:
- mask &= 0x9;
- break;
- case TGSI_TEXTURE_SHADOW1D:
- mask &= 0x5;
- break;
- case TGSI_TEXTURE_2D:
- mask &= 0xb;
- break;
- default:
- break;
- }
- }
- return mask;
- case TGSI_OPCODE_XPD:
- x = 0;
- if (mask & 1) x |= 0x6;
- if (mask & 2) x |= 0x5;
- if (mask & 4) x |= 0x3;
- return x;
- default:
- break;
- }
-
- return mask;
-}
-
-static struct nv50_reg *
-tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
-{
- switch (dst->Register.File) {
- case TGSI_FILE_TEMPORARY:
- return &pc->temp[dst->Register.Index * 4 + c];
- case TGSI_FILE_OUTPUT:
- return &pc->result[dst->Register.Index * 4 + c];
- case TGSI_FILE_ADDRESS:
- {
- struct nv50_reg *r = pc->addr[dst->Register.Index * 4 + c];
- if (!r) {
- r = get_address_reg(pc, NULL);
- r->index = dst->Register.Index * 4 + c;
- pc->addr[r->index] = r;
- }
- assert(r);
- return r;
- }
- case TGSI_FILE_NULL:
- return NULL;
- case TGSI_FILE_SYSTEM_VALUE:
- assert(pc->sysval[dst->Register.Index].type == P_RESULT);
- assert(c == 0);
- return &pc->sysval[dst->Register.Index];
- default:
- break;
- }
-
- return NULL;
-}
-
-static struct nv50_reg *
-tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
- int mod)
-{
- struct nv50_reg *r = NULL;
- struct nv50_reg *temp = NULL;
- unsigned sgn, c, swz, cvn;
-
- if (src->Register.File != TGSI_FILE_CONSTANT)
- assert(!src->Register.Indirect);
-
- sgn = tgsi_util_get_full_src_register_sign_mode(src, chan);
-
- c = tgsi_util_get_full_src_register_swizzle(src, chan);
- switch (c) {
- case TGSI_SWIZZLE_X:
- case TGSI_SWIZZLE_Y:
- case TGSI_SWIZZLE_Z:
- case TGSI_SWIZZLE_W:
- switch (src->Register.File) {
- case TGSI_FILE_INPUT:
- r = &pc->attr[src->Register.Index * 4 + c];
-
- if (!src->Dimension.Dimension)
- break;
- r = reg_instance(pc, r);
- r->vtx = src->Dimension.Index;
-
- if (!src->Dimension.Indirect)
- break;
- swz = tgsi_util_get_src_register_swizzle(
- &src->DimIndirect, 0);
- r->acc = -1;
- r->indirect[1] = src->DimIndirect.Index * 4 + swz;
- break;
- case TGSI_FILE_TEMPORARY:
- r = &pc->temp[src->Register.Index * 4 + c];
- break;
- case TGSI_FILE_CONSTANT:
- if (!src->Register.Indirect) {
- r = &pc->param[src->Register.Index * 4 + c];
- break;
- }
- /* Indicate indirection by setting r->acc < 0 and
- * use the index field to select the address reg.
- */
- r = reg_instance(pc, NULL);
- ctor_reg(r, P_CONST, -1, src->Register.Index * 4 + c);
-
- swz = tgsi_util_get_src_register_swizzle(
- &src->Indirect, 0);
- r->acc = -1;
- r->indirect[0] = src->Indirect.Index * 4 + swz;
- break;
- case TGSI_FILE_IMMEDIATE:
- r = &pc->immd[src->Register.Index * 4 + c];
- break;
- case TGSI_FILE_SAMPLER:
- return NULL;
- case TGSI_FILE_ADDRESS:
- r = pc->addr[src->Register.Index * 4 + c];
- assert(r);
- break;
- case TGSI_FILE_SYSTEM_VALUE:
- assert(c == 0);
- r = &pc->sysval[src->Register.Index];
- break;
- default:
- assert(0);
- break;
- }
- break;
- default:
- assert(0);
- break;
- }
-
- cvn = (mod & NV50_MOD_I32) ? CVT_S32_S32 : CVT_F32_F32;
-
- switch (sgn) {
- case TGSI_UTIL_SIGN_CLEAR:
- r->mod = NV50_MOD_ABS;
- break;
- case TGSI_UTIL_SIGN_SET:
- r->mod = NV50_MOD_NEG_ABS;
- break;
- case TGSI_UTIL_SIGN_TOGGLE:
- r->mod = NV50_MOD_NEG;
- break;
- default:
- assert(!r->mod && sgn == TGSI_UTIL_SIGN_KEEP);
- break;
- }
-
- if ((r->mod & mod) != r->mod) {
- temp = temp_temp(pc, NULL);
- emit_cvt(pc, temp, r, -1, cvn);
- r->mod = 0;
- r = temp;
- } else
- r->mod |= mod & NV50_MOD_I32;
-
- assert(r);
- if (r->acc >= 0 && r->vtx < 0 && r != temp)
- return reg_instance(pc, r); /* will clear r->mod */
- return r;
-}
-
-/* return TRUE for ops that produce only a single result */
-static boolean
-is_scalar_op(unsigned op)
-{
- switch (op) {
- case TGSI_OPCODE_COS:
- case TGSI_OPCODE_DP2:
- case TGSI_OPCODE_DP3:
- case TGSI_OPCODE_DP4:
- case TGSI_OPCODE_DPH:
- case TGSI_OPCODE_EX2:
- case TGSI_OPCODE_LG2:
- case TGSI_OPCODE_POW:
- case TGSI_OPCODE_RCP:
- case TGSI_OPCODE_RSQ:
- case TGSI_OPCODE_SIN:
- /*
- case TGSI_OPCODE_KIL:
- case TGSI_OPCODE_LIT:
- case TGSI_OPCODE_SCS:
- */
- return TRUE;
- default:
- return FALSE;
- }
-}
-
-/* Returns a bitmask indicating which dst components depend
- * on source s, component c (reverse of nv50_tgsi_src_mask).
- */
-static unsigned
-nv50_tgsi_dst_revdep(unsigned op, int s, int c)
-{
- if (is_scalar_op(op))
- return 0x1;
-
- switch (op) {
- case TGSI_OPCODE_DST:
- return (1 << c) & (s ? 0xa : 0x6);
- case TGSI_OPCODE_XPD:
- switch (c) {
- case 0: return 0x6;
- case 1: return 0x5;
- case 2: return 0x3;
- case 3: return 0x0;
- default:
- assert(0);
- return 0x0;
- }
- case TGSI_OPCODE_EXP:
- case TGSI_OPCODE_LOG:
- case TGSI_OPCODE_LIT:
- case TGSI_OPCODE_SCS:
- case TGSI_OPCODE_TEX:
- case TGSI_OPCODE_TXB:
- case TGSI_OPCODE_TXL:
- case TGSI_OPCODE_TXP:
- /* these take care of dangerous swizzles themselves */
- return 0x0;
- case TGSI_OPCODE_IF:
- case TGSI_OPCODE_KIL:
- /* don't call this function for these ops */
- assert(0);
- return 0;
- default:
- /* linear vector instruction */
- return (1 << c);
- }
-}
-
-static INLINE boolean
-has_pred(struct nv50_program_exec *e, unsigned cc)
-{
- if (!is_long(e) || is_immd(e))
- return FALSE;
- return ((e->inst[1] & 0x780) == (cc << 7));
-}
-
-/* on ENDIF see if we can do "@p0.neu single_op" instead of:
- * join_at ENDIF
- * @p0.eq bra ENDIF
- * single_op
- * ENDIF: nop.join
- */
-static boolean
-nv50_kill_branch(struct nv50_pc *pc)
-{
- int lvl = pc->if_lvl;
-
- if (pc->if_insn[lvl]->next != pc->p->exec_tail)
- return FALSE;
- if (is_immd(pc->p->exec_tail))
- return FALSE;
-
- /* if ccode == 'true', the BRA is from an ELSE and the predicate
- * reg may no longer be valid, since we currently always use $p0
- */
- if (has_pred(pc->if_insn[lvl], 0xf))
- return FALSE;
- assert(pc->if_insn[lvl] && pc->if_join[lvl]);
-
- /* We'll use the exec allocated for JOIN_AT (we can't easily
- * access nv50_program_exec's prev).
- */
- pc->p->exec_size -= 4; /* remove JOIN_AT and BRA */
-
- *pc->if_join[lvl] = *pc->p->exec_tail;
-
- FREE(pc->if_insn[lvl]);
- FREE(pc->p->exec_tail);
-
- pc->p->exec_tail = pc->if_join[lvl];
- pc->p->exec_tail->next = NULL;
- set_pred(pc, 0xd, 0, pc->p->exec_tail);
-
- return TRUE;
-}
-
-static void
-nv50_fp_move_results(struct nv50_pc *pc)
-{
- struct nv50_reg reg;
- unsigned i;
-
- ctor_reg(&reg, P_TEMP, -1, -1);
-
- for (i = 0; i < pc->result_nr * 4; ++i) {
- if (pc->result[i].rhw < 0 || pc->result[i].hw < 0)
- continue;
- if (pc->result[i].rhw != pc->result[i].hw) {
- reg.hw = pc->result[i].rhw;
- emit_mov(pc, &reg, &pc->result[i]);
- }
- }
-}
-
-static boolean
-nv50_program_tx_insn(struct nv50_pc *pc,
- const struct tgsi_full_instruction *inst)
-{
- struct nv50_reg *rdst[4], *dst[4], *brdc, *src[3][4], *temp;
- unsigned mask, sat, unit = 0;
- int i, c;
-
- mask = inst->Dst[0].Register.WriteMask;
- sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE;
-
- memset(src, 0, sizeof(src));
-
- for (c = 0; c < 4; c++) {
- if ((mask & (1 << c)) && !pc->r_dst[c])
- dst[c] = tgsi_dst(pc, c, &inst->Dst[0]);
- else
- dst[c] = pc->r_dst[c];
- rdst[c] = dst[c];
- }
-
- for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
- const struct tgsi_full_src_register *fs = &inst->Src[i];
- unsigned src_mask;
- int mod_supp;
-
- src_mask = nv50_tgsi_src_mask(inst, i);
- mod_supp = get_supported_mods(inst, i);
-
- if (fs->Register.File == TGSI_FILE_SAMPLER)
- unit = fs->Register.Index;
-
- for (c = 0; c < 4; c++)
- if (src_mask & (1 << c))
- src[i][c] = tgsi_src(pc, c, fs, mod_supp);
- }
-
- brdc = temp = pc->r_brdc;
- if (brdc && brdc->type != P_TEMP) {
- temp = temp_temp(pc, NULL);
- if (sat)
- brdc = temp;
- } else
- if (sat) {
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)) || dst[c]->type == P_TEMP)
- continue;
- /* rdst[c] = dst[c]; */ /* done above */
- dst[c] = temp_temp(pc, NULL);
- }
- }
-
- assert(brdc || !is_scalar_op(inst->Instruction.Opcode));
-
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_ABS:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_cvt(pc, dst[c], src[0][c], -1,
- CVT_ABS | CVT_F32_F32);
- }
- break;
- case TGSI_OPCODE_ADD:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_add(pc, dst[c], src[0][c], src[1][c]);
- }
- break;
- case TGSI_OPCODE_AND:
- case TGSI_OPCODE_XOR:
- case TGSI_OPCODE_OR:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_bitop2(pc, dst[c], src[0][c], src[1][c],
- inst->Instruction.Opcode);
- }
- break;
- case TGSI_OPCODE_ARL:
- temp = temp_temp(pc, NULL);
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_cvt(pc, temp, src[0][c], -1,
- CVT_FLOOR | CVT_S32_F32);
- emit_arl(pc, dst[c], temp, 4);
- }
- break;
- case TGSI_OPCODE_BGNLOOP:
- pc->loop_brka[pc->loop_lvl] = emit_breakaddr(pc);
- pc->loop_pos[pc->loop_lvl++] = pc->p->exec_size;
- terminate_mbb(pc);
- break;
- case TGSI_OPCODE_BGNSUB:
- assert(!pc->in_subroutine);
- pc->in_subroutine = TRUE;
- /* probably not necessary, but align to 8 byte boundary */
- if (!is_long(pc->p->exec_tail))
- convert_to_long(pc, pc->p->exec_tail);
- break;
- case TGSI_OPCODE_BRK:
- assert(pc->loop_lvl > 0);
- emit_break(pc, -1, 0);
- break;
- case TGSI_OPCODE_CAL:
- assert(inst->Label.Label < pc->insn_nr);
- emit_call(pc, -1, 0)->param.index = inst->Label.Label;
- /* replaced by actual offset in nv50_program_fixup_insns */
- break;
- case TGSI_OPCODE_CEIL:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_cvt(pc, dst[c], src[0][c], -1,
- CVT_CEIL | CVT_F32_F32 | CVT_RI);
- }
- break;
- case TGSI_OPCODE_CMP:
- pc->allow32 = FALSE;
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_cvt(pc, NULL, src[0][c], 1, CVT_F32_F32);
- emit_mov(pc, dst[c], src[1][c]);
- set_pred(pc, 0x1, 1, pc->p->exec_tail); /* @SF */
- emit_mov(pc, dst[c], src[2][c]);
- set_pred(pc, 0x6, 1, pc->p->exec_tail); /* @NSF */
- }
- break;
- case TGSI_OPCODE_CONT:
- assert(pc->loop_lvl > 0);
- emit_branch(pc, -1, 0)->param.index =
- pc->loop_pos[pc->loop_lvl - 1];
- break;
- case TGSI_OPCODE_COS:
- if (mask & 8) {
- emit_precossin(pc, temp, src[0][3]);
- emit_flop(pc, NV50_FLOP_COS, dst[3], temp);
- if (!(mask &= 7))
- break;
- if (temp == dst[3])
- temp = brdc = temp_temp(pc, NULL);
- }
- emit_precossin(pc, temp, src[0][0]);
- emit_flop(pc, NV50_FLOP_COS, brdc, temp);
- break;
- case TGSI_OPCODE_DDX:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_ddx(pc, dst[c], src[0][c]);
- }
- break;
- case TGSI_OPCODE_DDY:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_ddy(pc, dst[c], src[0][c]);
- }
- break;
- case TGSI_OPCODE_DP3:
- emit_mul(pc, temp, src[0][0], src[1][0]);
- emit_mad(pc, temp, src[0][1], src[1][1], temp);
- emit_mad(pc, brdc, src[0][2], src[1][2], temp);
- break;
- case TGSI_OPCODE_DP4:
- emit_mul(pc, temp, src[0][0], src[1][0]);
- emit_mad(pc, temp, src[0][1], src[1][1], temp);
- emit_mad(pc, temp, src[0][2], src[1][2], temp);
- emit_mad(pc, brdc, src[0][3], src[1][3], temp);
- break;
- case TGSI_OPCODE_DPH:
- emit_mul(pc, temp, src[0][0], src[1][0]);
- emit_mad(pc, temp, src[0][1], src[1][1], temp);
- emit_mad(pc, temp, src[0][2], src[1][2], temp);
- emit_add(pc, brdc, src[1][3], temp);
- break;
- case TGSI_OPCODE_DST:
- if (mask & (1 << 1))
- emit_mul(pc, dst[1], src[0][1], src[1][1]);
- if (mask & (1 << 2))
- emit_mov(pc, dst[2], src[0][2]);
- if (mask & (1 << 3))
- emit_mov(pc, dst[3], src[1][3]);
- if (mask & (1 << 0))
- emit_mov_immdval(pc, dst[0], 1.0f);
- break;
- case TGSI_OPCODE_ELSE:
- emit_branch(pc, -1, 0);
- pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size;
- pc->if_insn[pc->if_lvl++] = pc->p->exec_tail;
- terminate_mbb(pc);
- break;
- case TGSI_OPCODE_EMIT:
- emit_prim_cmd(pc, 1);
- break;
- case TGSI_OPCODE_ENDIF:
- pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size;
-
- /* try to replace branch over 1 insn with a predicated insn */
- if (nv50_kill_branch(pc) == TRUE)
- break;
-
- if (pc->if_join[pc->if_lvl]) {
- pc->if_join[pc->if_lvl]->param.index = pc->p->exec_size;
- pc->if_join[pc->if_lvl] = NULL;
- }
- terminate_mbb(pc);
- /* emit a NOP as join point, we could set it on the next
- * one, but would have to make sure it is long and !immd
- */
- JOIN_ON(emit_nop(pc));
- break;
- case TGSI_OPCODE_ENDLOOP:
- emit_branch(pc, -1, 0)->param.index =
- pc->loop_pos[--pc->loop_lvl];
- pc->loop_brka[pc->loop_lvl]->param.index = pc->p->exec_size;
- terminate_mbb(pc);
- break;
- case TGSI_OPCODE_ENDPRIM:
- emit_prim_cmd(pc, 2);
- break;
- case TGSI_OPCODE_ENDSUB:
- assert(pc->in_subroutine);
- terminate_mbb(pc);
- pc->in_subroutine = FALSE;
- break;
- case TGSI_OPCODE_EX2:
- emit_preex2(pc, temp, src[0][0]);
- emit_flop(pc, NV50_FLOP_EX2, brdc, temp);
- break;
- case TGSI_OPCODE_EXP:
- {
- struct nv50_reg *t[2];
-
- assert(!temp);
- t[0] = temp_temp(pc, NULL);
- t[1] = temp_temp(pc, NULL);
-
- if (mask & 0x6)
- emit_mov(pc, t[0], src[0][0]);
- if (mask & 0x3)
- emit_flr(pc, t[1], src[0][0]);
-
- if (mask & (1 << 1))
- emit_sub(pc, dst[1], t[0], t[1]);
- if (mask & (1 << 0)) {
- emit_preex2(pc, t[1], t[1]);
- emit_flop(pc, NV50_FLOP_EX2, dst[0], t[1]);
- }
- if (mask & (1 << 2)) {
- emit_preex2(pc, t[0], t[0]);
- emit_flop(pc, NV50_FLOP_EX2, dst[2], t[0]);
- }
- if (mask & (1 << 3))
- emit_mov_immdval(pc, dst[3], 1.0f);
- }
- break;
- case TGSI_OPCODE_F2I:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_cvt(pc, dst[c], src[0][c], -1,
- CVT_TRUNC | CVT_S32_F32);
- }
- break;
- case TGSI_OPCODE_F2U:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_cvt(pc, dst[c], src[0][c], -1,
- CVT_TRUNC | CVT_U32_F32);
- }
- break;
- case TGSI_OPCODE_FLR:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_flr(pc, dst[c], src[0][c]);
- }
- break;
- case TGSI_OPCODE_FRC:
- temp = temp_temp(pc, NULL);
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_flr(pc, temp, src[0][c]);
- emit_sub(pc, dst[c], src[0][c], temp);
- }
- break;
- case TGSI_OPCODE_I2F:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_cvt(pc, dst[c], src[0][c], -1, CVT_F32_S32);
- }
- break;
- case TGSI_OPCODE_IF:
- assert(pc->if_lvl < NV50_MAX_COND_NESTING);
- emit_cvt(pc, NULL, src[0][0], 0, CVT_ABS | CVT_F32_F32);
- pc->if_join[pc->if_lvl] = emit_joinat(pc);
- pc->if_insn[pc->if_lvl++] = emit_branch(pc, 0, 2);;
- terminate_mbb(pc);
- break;
- case TGSI_OPCODE_IMAX:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_minmax(pc, 0x08c, dst[c], src[0][c], src[1][c]);
- }
- break;
- case TGSI_OPCODE_IMIN:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_minmax(pc, 0x0ac, dst[c], src[0][c], src[1][c]);
- }
- break;
- case TGSI_OPCODE_INEG:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_cvt(pc, dst[c], src[0][c], -1,
- CVT_S32_S32 | CVT_NEG);
- }
- break;
- case TGSI_OPCODE_KIL:
- assert(src[0][0] && src[0][1] && src[0][2] && src[0][3]);
- emit_kil(pc, src[0][0]);
- emit_kil(pc, src[0][1]);
- emit_kil(pc, src[0][2]);
- emit_kil(pc, src[0][3]);
- break;
- case TGSI_OPCODE_KILP:
- emit_kil(pc, NULL);
- break;
- case TGSI_OPCODE_LIT:
- emit_lit(pc, &dst[0], mask, &src[0][0]);
- break;
- case TGSI_OPCODE_LG2:
- emit_flop(pc, NV50_FLOP_LG2, brdc, src[0][0]);
- break;
- case TGSI_OPCODE_LOG:
- {
- struct nv50_reg *t[2];
-
- t[0] = temp_temp(pc, NULL);
- if (mask & (1 << 1))
- t[1] = temp_temp(pc, NULL);
- else
- t[1] = t[0];
-
- emit_cvt(pc, t[0], src[0][0], -1, CVT_ABS | CVT_F32_F32);
- emit_flop(pc, NV50_FLOP_LG2, t[1], t[0]);
- if (mask & (1 << 2))
- emit_mov(pc, dst[2], t[1]);
- emit_flr(pc, t[1], t[1]);
- if (mask & (1 << 0))
- emit_mov(pc, dst[0], t[1]);
- if (mask & (1 << 1)) {
- t[1]->mod = NV50_MOD_NEG;
- emit_preex2(pc, t[1], t[1]);
- t[1]->mod = 0;
- emit_flop(pc, NV50_FLOP_EX2, t[1], t[1]);
- emit_mul(pc, dst[1], t[0], t[1]);
- }
- if (mask & (1 << 3))
- emit_mov_immdval(pc, dst[3], 1.0f);
- }
- break;
- case TGSI_OPCODE_LRP:
- temp = temp_temp(pc, NULL);
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_sub(pc, temp, src[1][c], src[2][c]);
- emit_mad(pc, dst[c], temp, src[0][c], src[2][c]);
- }
- break;
- case TGSI_OPCODE_MAD:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
- }
- break;
- case TGSI_OPCODE_MAX:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_minmax(pc, 0x880, dst[c], src[0][c], src[1][c]);
- }
- break;
- case TGSI_OPCODE_MIN:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_minmax(pc, 0x8a0, dst[c], src[0][c], src[1][c]);
- }
- break;
- case TGSI_OPCODE_MOV:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_mov(pc, dst[c], src[0][c]);
- }
- break;
- case TGSI_OPCODE_MUL:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_mul(pc, dst[c], src[0][c], src[1][c]);
- }
- break;
- case TGSI_OPCODE_NOT:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_not(pc, dst[c], src[0][c]);
- }
- break;
- case TGSI_OPCODE_POW:
- emit_pow(pc, brdc, src[0][0], src[1][0]);
- break;
- case TGSI_OPCODE_RCP:
- if (!sat && popcnt4(mask) == 1)
- brdc = dst[ffs(mask) - 1];
- emit_flop(pc, NV50_FLOP_RCP, brdc, src[0][0]);
- break;
- case TGSI_OPCODE_RET:
- if (pc->p->type == PIPE_SHADER_FRAGMENT && !pc->in_subroutine)
- nv50_fp_move_results(pc);
- emit_ret(pc, -1, 0);
- break;
- case TGSI_OPCODE_RSQ:
- if (!sat && popcnt4(mask) == 1)
- brdc = dst[ffs(mask) - 1];
- src[0][0]->mod |= NV50_MOD_ABS;
- emit_flop(pc, NV50_FLOP_RSQ, brdc, src[0][0]);
- break;
- case TGSI_OPCODE_SAD:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_sad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
- }
- break;
- case TGSI_OPCODE_SCS:
- temp = temp_temp(pc, NULL);
- if (mask & 3)
- emit_precossin(pc, temp, src[0][0]);
- if (mask & (1 << 0))
- emit_flop(pc, NV50_FLOP_COS, dst[0], temp);
- if (mask & (1 << 1))
- emit_flop(pc, NV50_FLOP_SIN, dst[1], temp);
- if (mask & (1 << 2))
- emit_mov_immdval(pc, dst[2], 0.0);
- if (mask & (1 << 3))
- emit_mov_immdval(pc, dst[3], 1.0);
- break;
- case TGSI_OPCODE_SHL:
- case TGSI_OPCODE_ISHR:
- case TGSI_OPCODE_USHR:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_shift(pc, dst[c], src[0][c], src[1][c],
- inst->Instruction.Opcode);
- }
- break;
- case TGSI_OPCODE_SIN:
- if (mask & 8) {
- emit_precossin(pc, temp, src[0][3]);
- emit_flop(pc, NV50_FLOP_SIN, dst[3], temp);
- if (!(mask &= 7))
- break;
- if (temp == dst[3])
- temp = brdc = temp_temp(pc, NULL);
- }
- emit_precossin(pc, temp, src[0][0]);
- emit_flop(pc, NV50_FLOP_SIN, brdc, temp);
- break;
- case TGSI_OPCODE_SLT:
- case TGSI_OPCODE_SGE:
- case TGSI_OPCODE_SEQ:
- case TGSI_OPCODE_SGT:
- case TGSI_OPCODE_SLE:
- case TGSI_OPCODE_SNE:
- case TGSI_OPCODE_ISLT:
- case TGSI_OPCODE_ISGE:
- case TGSI_OPCODE_USEQ:
- case TGSI_OPCODE_USGE:
- case TGSI_OPCODE_USLT:
- case TGSI_OPCODE_USNE:
- {
- uint8_t cc, ty;
-
- map_tgsi_setop_hw(inst->Instruction.Opcode, &cc, &ty);
-
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_set(pc, cc, dst[c], -1, src[0][c], src[1][c], ty);
- }
- }
- break;
- case TGSI_OPCODE_SUB:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_sub(pc, dst[c], src[0][c], src[1][c]);
- }
- break;
- case TGSI_OPCODE_TEX:
- emit_tex(pc, dst, mask, src[0], unit,
- inst->Texture.Texture, FALSE, 0);
- break;
- case TGSI_OPCODE_TXB:
- emit_tex(pc, dst, mask, src[0], unit,
- inst->Texture.Texture, FALSE, -1);
- break;
- case TGSI_OPCODE_TXL:
- emit_tex(pc, dst, mask, src[0], unit,
- inst->Texture.Texture, FALSE, 1);
- break;
- case TGSI_OPCODE_TXP:
- emit_tex(pc, dst, mask, src[0], unit,
- inst->Texture.Texture, TRUE, 0);
- break;
- case TGSI_OPCODE_TRUNC:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_cvt(pc, dst[c], src[0][c], -1,
- CVT_TRUNC | CVT_F32_F32 | CVT_RI);
- }
- break;
- case TGSI_OPCODE_U2F:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_cvt(pc, dst[c], src[0][c], -1, CVT_F32_U32);
- }
- break;
- case TGSI_OPCODE_UADD:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_add_b32(pc, dst[c], src[0][c], src[1][c]);
- }
- break;
- case TGSI_OPCODE_UMAX:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_minmax(pc, 0x084, dst[c], src[0][c], src[1][c]);
- }
- break;
- case TGSI_OPCODE_UMIN:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_minmax(pc, 0x0a4, dst[c], src[0][c], src[1][c]);
- }
- break;
- case TGSI_OPCODE_UMAD:
- {
- assert(!temp);
- temp = temp_temp(pc, NULL);
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_mul_u16(pc, temp, src[0][c], 0, src[1][c], 1);
- emit_mad_u16(pc, temp, src[0][c], 1, src[1][c], 0,
- temp);
- emit_shl_imm(pc, temp, temp, 16);
- emit_mad_u16(pc, temp, src[0][c], 0, src[1][c], 0,
- temp);
- emit_add_b32(pc, dst[c], temp, src[2][c]);
- }
- }
- break;
- case TGSI_OPCODE_UMUL:
- {
- assert(!temp);
- temp = temp_temp(pc, NULL);
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_mul_u16(pc, temp, src[0][c], 0, src[1][c], 1);
- emit_mad_u16(pc, temp, src[0][c], 1, src[1][c], 0,
- temp);
- emit_shl_imm(pc, temp, temp, 16);
- emit_mad_u16(pc, dst[c], src[0][c], 0, src[1][c], 0,
- temp);
- }
- }
- break;
- case TGSI_OPCODE_XPD:
- temp = temp_temp(pc, NULL);
- if (mask & (1 << 0)) {
- emit_mul(pc, temp, src[0][2], src[1][1]);
- emit_msb(pc, dst[0], src[0][1], src[1][2], temp);
- }
- if (mask & (1 << 1)) {
- emit_mul(pc, temp, src[0][0], src[1][2]);
- emit_msb(pc, dst[1], src[0][2], src[1][0], temp);
- }
- if (mask & (1 << 2)) {
- emit_mul(pc, temp, src[0][1], src[1][0]);
- emit_msb(pc, dst[2], src[0][0], src[1][1], temp);
- }
- if (mask & (1 << 3))
- emit_mov_immdval(pc, dst[3], 1.0);
- break;
- case TGSI_OPCODE_END:
- if (pc->p->type == PIPE_SHADER_FRAGMENT)
- nv50_fp_move_results(pc);
-
- if (!pc->p->exec_tail ||
- is_immd(pc->p->exec_tail) ||
- is_join(pc->p->exec_tail) ||
- is_control_flow(pc->p->exec_tail))
- emit_nop(pc);
-
- /* last insn must be long so it can have the exit bit set */
- if (!is_long(pc->p->exec_tail))
- convert_to_long(pc, pc->p->exec_tail);
-
- pc->p->exec_tail->inst[1] |= 1; /* set exit bit */
-
- terminate_mbb(pc);
- break;
- default:
- NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
- return FALSE;
- }
-
- if (brdc) {
- if (sat)
- emit_sat(pc, brdc, brdc);
- for (c = 0; c < 4; c++)
- if ((mask & (1 << c)) && dst[c] != brdc)
- emit_mov(pc, dst[c], brdc);
- } else
- if (sat) {
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- /* In this case we saturate later, and dst[c] won't
- * be another temp_temp (and thus lost), since rdst
- * already is TEMP (see above). */
- if (rdst[c]->type == P_TEMP && rdst[c]->index < 0)
- continue;
- emit_sat(pc, rdst[c], dst[c]);
- }
- }
-
- kill_temp_temp(pc, NULL);
- pc->reg_instance_nr = 0;
-
- return TRUE;
-}
-
-static void
-prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn)
-{
- struct nv50_reg *r, *reg = NULL;
- const struct tgsi_full_src_register *src;
- const struct tgsi_dst_register *dst;
- unsigned i, c, k, mask;
-
- dst = &insn->Dst[0].Register;
- mask = dst->WriteMask;
-
- if (dst->File == TGSI_FILE_TEMPORARY)
- reg = pc->temp;
- else
- if (dst->File == TGSI_FILE_OUTPUT) {
- reg = pc->result;
-
- if (insn->Instruction.Opcode == TGSI_OPCODE_MOV &&
- dst->Index == pc->edgeflag_out &&
- insn->Src[0].Register.File == TGSI_FILE_INPUT)
- pc->p->cfg.edgeflag_in = insn->Src[0].Register.Index;
- }
-
- if (reg) {
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- reg[dst->Index * 4 + c].acc = pc->insn_nr;
- }
- }
-
- for (i = 0; i < insn->Instruction.NumSrcRegs; i++) {
- src = &insn->Src[i];
-
- if (src->Register.File == TGSI_FILE_TEMPORARY)
- reg = pc->temp;
- else
- if (src->Register.File == TGSI_FILE_INPUT)
- reg = pc->attr;
- else
- continue;
-
- mask = nv50_tgsi_src_mask(insn, i);
-
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- k = tgsi_util_get_full_src_register_swizzle(src, c);
-
- r = &reg[src->Register.Index * 4 + k];
-
- /* If used before written, pre-allocate the reg,
- * lest we overwrite results from a subroutine.
- */
- if (!r->acc && r->type == P_TEMP)
- alloc_reg(pc, r);
-
- r->acc = pc->insn_nr;
- }
- }
-}
-
-/* Returns a bitmask indicating which dst components need to be
- * written to temporaries first to avoid 'corrupting' sources.
- *
- * m[i] (out) indicate component to write in the i-th position
- * rdep[c] (in) bitmasks of dst[i] that require dst[c] as source
- */
-static unsigned
-nv50_revdep_reorder(unsigned m[4], unsigned rdep[4])
-{
- unsigned i, c, x, unsafe = 0;
-
- for (c = 0; c < 4; c++)
- m[c] = c;
-
- /* Swap as long as a dst component written earlier is depended on
- * by one written later, but the next one isn't depended on by it.
- */
- for (c = 0; c < 3; c++) {
- if (rdep[m[c + 1]] & (1 << m[c]))
- continue; /* if next one is depended on by us */
- for (i = c + 1; i < 4; i++)
- /* if we are depended on by a later one */
- if (rdep[m[c]] & (1 << m[i]))
- break;
- if (i == 4)
- continue;
- /* now, swap */
- x = m[c];
- m[c] = m[c + 1];
- m[c + 1] = x;
-
- /* restart */
- c = 0;
- }
-
- /* mark dependencies that could not be resolved by reordering */
- for (i = 0; i < 3; ++i)
- for (c = i + 1; c < 4; ++c)
- if (rdep[m[i]] & (1 << m[c]))
- unsafe |= (1 << i);
-
- /* NOTE: $unsafe is with respect to order, not component */
- return unsafe;
-}
-
-/* Select a suitable dst register for broadcasting scalar results,
- * or return NULL if we have to allocate an extra TEMP.
- *
- * If e.g. only 1 component is written, we may also emit the final
- * result to a write-only register.
- */
-static struct nv50_reg *
-tgsi_broadcast_dst(struct nv50_pc *pc,
- const struct tgsi_full_dst_register *fd, unsigned mask)
-{
- if (fd->Register.File == TGSI_FILE_TEMPORARY) {
- int c = ffs(~mask & fd->Register.WriteMask);
- if (c)
- return tgsi_dst(pc, c - 1, fd);
- } else {
- int c = ffs(fd->Register.WriteMask) - 1;
- if ((1 << c) == fd->Register.WriteMask)
- return tgsi_dst(pc, c, fd);
- }
-
- return NULL;
-}
-
-/* Scan source swizzles and return a bitmask indicating dst regs that
- * also occur among the src regs, and fill rdep for nv50_revdep_reoder.
- */
-static unsigned
-nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn,
- unsigned rdep[4])
-{
- const struct tgsi_full_dst_register *fd = &insn->Dst[0];
- const struct tgsi_full_src_register *fs;
- unsigned i, deqs = 0;
-
- for (i = 0; i < 4; ++i)
- rdep[i] = 0;
-
- for (i = 0; i < insn->Instruction.NumSrcRegs; i++) {
- unsigned chn, mask = nv50_tgsi_src_mask(insn, i);
- int ms = get_supported_mods(insn, i);
-
- fs = &insn->Src[i];
- if (fs->Register.File != fd->Register.File ||
- fs->Register.Index != fd->Register.Index)
- continue;
-
- for (chn = 0; chn < 4; ++chn) {
- unsigned s, c;
-
- if (!(mask & (1 << chn))) /* src is not read */
- continue;
- c = tgsi_util_get_full_src_register_swizzle(fs, chn);
- s = tgsi_util_get_full_src_register_sign_mode(fs, chn);
-
- if (!(fd->Register.WriteMask & (1 << c)))
- continue;
-
- if (s == TGSI_UTIL_SIGN_TOGGLE && !(ms & NV50_MOD_NEG))
- continue;
- if (s == TGSI_UTIL_SIGN_CLEAR && !(ms & NV50_MOD_ABS))
- continue;
- if ((s == TGSI_UTIL_SIGN_SET) && ((ms & 3) != 3))
- continue;
-
- rdep[c] |= nv50_tgsi_dst_revdep(
- insn->Instruction.Opcode, i, chn);
- deqs |= (1 << c);
- }
- }
-
- return deqs;
-}
-
-static boolean
-nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
-{
- struct tgsi_full_instruction insn = tok->FullInstruction;
- const struct tgsi_full_dst_register *fd;
- unsigned i, deqs, rdep[4], m[4];
-
- fd = &tok->FullInstruction.Dst[0];
- deqs = nv50_tgsi_scan_swizzle(&insn, rdep);
-
- if (is_scalar_op(insn.Instruction.Opcode)) {
- pc->r_brdc = tgsi_broadcast_dst(pc, fd, deqs);
- if (!pc->r_brdc)
- pc->r_brdc = temp_temp(pc, NULL);
- return nv50_program_tx_insn(pc, &insn);
- }
- pc->r_brdc = NULL;
-
- if (!deqs || (!rdep[0] && !rdep[1] && !rdep[2] && !rdep[3]))
- return nv50_program_tx_insn(pc, &insn);
-
- deqs = nv50_revdep_reorder(m, rdep);
-
- for (i = 0; i < 4; ++i) {
- assert(pc->r_dst[m[i]] == NULL);
-
- insn.Dst[0].Register.WriteMask =
- fd->Register.WriteMask & (1 << m[i]);
-
- if (!insn.Dst[0].Register.WriteMask)
- continue;
-
- if (deqs & (1 << i))
- pc->r_dst[m[i]] = alloc_temp(pc, NULL);
-
- if (!nv50_program_tx_insn(pc, &insn))
- return FALSE;
- }
-
- for (i = 0; i < 4; i++) {
- struct nv50_reg *reg = pc->r_dst[i];
- if (!reg)
- continue;
- pc->r_dst[i] = NULL;
-
- if (insn.Instruction.Saturate == TGSI_SAT_ZERO_ONE)
- emit_sat(pc, tgsi_dst(pc, i, fd), reg);
- else
- emit_mov(pc, tgsi_dst(pc, i, fd), reg);
- free_temp(pc, reg);
- }
-
- return TRUE;
-}
-
-static void
-load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg)
-{
- struct nv50_reg *iv, **ppiv;
- unsigned mode = pc->interp_mode[reg->index];
-
- ppiv = (mode & INTERP_CENTROID) ? &pc->iv_c : &pc->iv_p;
- iv = *ppiv;
-
- if ((mode & INTERP_PERSPECTIVE) && !iv) {
- iv = *ppiv = alloc_temp(pc, NULL);
- iv->rhw = popcnt4(pc->p->cfg.regs[1] >> 24) - 1;
-
- emit_interp(pc, iv, NULL, mode & INTERP_CENTROID);
- emit_flop(pc, NV50_FLOP_RCP, iv, iv);
-
- /* XXX: when loading interpolants dynamically, move these
- * to the program head, or make sure it can't be skipped.
- */
- }
-
- emit_interp(pc, reg, iv, mode);
-}
-
-/* The face input is always at v[255] (varying space), with a
- * value of 0 for back-facing, and 0xffffffff for front-facing.
- */
-static void
-load_frontfacing(struct nv50_pc *pc, struct nv50_reg *sv)
-{
- struct nv50_reg *temp = alloc_temp(pc, NULL);
- int r_pred = 0;
-
- temp->rhw = 255;
- emit_interp(pc, temp, NULL, INTERP_FLAT);
-
- emit_cvt(pc, sv, temp, r_pred, CVT_ABS | CVT_F32_S32);
-
- emit_not(pc, temp, temp);
- set_pred(pc, 0x2, r_pred, pc->p->exec_tail);
- emit_cvt(pc, sv, temp, -1, CVT_F32_S32);
- set_pred(pc, 0x2, r_pred, pc->p->exec_tail);
-
- free_temp(pc, temp);
-}
-
-static void
-load_instance_id(struct nv50_pc *pc, unsigned index)
-{
- struct nv50_reg reg, mem;
-
- ctor_reg(&reg, P_TEMP, -1, -1);
- ctor_reg(&mem, P_CONST, -1, 24); /* startInstance */
- mem.buf_index = 2;
-
- emit_add_b32(pc, &reg, &pc->sysval[index], &mem);
- pc->sysval[index] = reg;
-}
-
-static void
-copy_semantic_info(struct nv50_program *p)
-{
- unsigned i, id;
-
- for (i = 0; i < p->cfg.in_nr; ++i) {
- id = p->cfg.in[i].id;
- p->cfg.in[i].sn = p->info.input_semantic_name[id];
- p->cfg.in[i].si = p->info.input_semantic_index[id];
- }
-
- for (i = 0; i < p->cfg.out_nr; ++i) {
- id = p->cfg.out[i].id;
- p->cfg.out[i].sn = p->info.output_semantic_name[id];
- p->cfg.out[i].si = p->info.output_semantic_index[id];
- }
-}
-
-static boolean
-nv50_program_tx_prep(struct nv50_pc *pc)
-{
- struct tgsi_parse_context tp;
- struct nv50_program *p = pc->p;
- boolean ret = FALSE;
- unsigned i, c, instance_id = 0, vertex_id = 0, flat_nr = 0;
-
- tgsi_parse_init(&tp, pc->p->pipe.tokens);
- while (!tgsi_parse_end_of_tokens(&tp)) {
- const union tgsi_full_token *tok = &tp.FullToken;
-
- tgsi_parse_token(&tp);
- switch (tok->Token.Type) {
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- {
- const struct tgsi_full_immediate *imm =
- &tp.FullToken.FullImmediate;
-
- ctor_immd_4f32(pc, imm->u[0].Float,
- imm->u[1].Float,
- imm->u[2].Float,
- imm->u[3].Float);
- }
- break;
- case TGSI_TOKEN_TYPE_DECLARATION:
- {
- const struct tgsi_full_declaration *d;
- unsigned si, last, first, mode;
-
- d = &tp.FullToken.FullDeclaration;
- first = d->Range.First;
- last = d->Range.Last;
-
- switch (d->Declaration.File) {
- case TGSI_FILE_TEMPORARY:
- break;
- case TGSI_FILE_OUTPUT:
- if (!d->Declaration.Semantic ||
- p->type == PIPE_SHADER_FRAGMENT)
- break;
-
- si = d->Semantic.Index;
- switch (d->Semantic.Name) {
- case TGSI_SEMANTIC_BCOLOR:
- p->cfg.two_side[si].hw = first;
- if (p->cfg.out_nr > first)
- p->cfg.out_nr = first;
- break;
- case TGSI_SEMANTIC_PSIZE:
- p->cfg.psiz = first;
- if (p->cfg.out_nr > first)
- p->cfg.out_nr = first;
- break;
- case TGSI_SEMANTIC_EDGEFLAG:
- pc->edgeflag_out = first;
- break;
- /*
- case TGSI_SEMANTIC_CLIP_DISTANCE:
- p->cfg.clpd = MIN2(p->cfg.clpd, first);
- break;
- */
- default:
- break;
- }
- break;
- case TGSI_FILE_INPUT:
- {
- if (p->type != PIPE_SHADER_FRAGMENT)
- break;
-
- switch (d->Declaration.Interpolate) {
- case TGSI_INTERPOLATE_CONSTANT:
- mode = INTERP_FLAT;
- flat_nr++;
- break;
- case TGSI_INTERPOLATE_PERSPECTIVE:
- mode = INTERP_PERSPECTIVE;
- p->cfg.regs[1] |= 0x08 << 24;
- break;
- default:
- mode = INTERP_LINEAR;
- break;
- }
- if (d->Declaration.Centroid)
- mode |= INTERP_CENTROID;
-
- assert(last < 32);
- for (i = first; i <= last; i++)
- pc->interp_mode[i] = mode;
- }
- break;
- case TGSI_FILE_SYSTEM_VALUE:
- assert(d->Declaration.Semantic);
- switch (d->Semantic.Name) {
- case TGSI_SEMANTIC_FACE:
- assert(p->type == PIPE_SHADER_FRAGMENT);
- load_frontfacing(pc,
- &pc->sysval[first]);
- break;
- case TGSI_SEMANTIC_INSTANCEID:
- assert(p->type == PIPE_SHADER_VERTEX);
- instance_id = first;
- p->cfg.regs[0] |= (1 << 4);
- break;
- case TGSI_SEMANTIC_PRIMID:
- assert(p->type != PIPE_SHADER_VERTEX);
- p->cfg.prim_id = first;
- break;
- /*
- case TGSI_SEMANTIC_PRIMIDIN:
- assert(p->type == PIPE_SHADER_GEOMETRY);
- pc->sysval[first].hw = 6;
- p->cfg.regs[0] |= (1 << 8);
- break;
- case TGSI_SEMANTIC_VERTEXID:
- assert(p->type == PIPE_SHADER_VERTEX);
- vertex_id = first;
- p->cfg.regs[0] |= (1 << 12) | (1 << 0);
- break;
- */
- }
- break;
- case TGSI_FILE_ADDRESS:
- case TGSI_FILE_CONSTANT:
- case TGSI_FILE_SAMPLER:
- break;
- default:
- NOUVEAU_ERR("bad decl file %d\n",
- d->Declaration.File);
- goto out_err;
- }
- }
- break;
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- pc->insn_nr++;
- prep_inspect_insn(pc, &tok->FullInstruction);
- break;
- default:
- break;
- }
- }
-
- if (p->type == PIPE_SHADER_VERTEX || p->type == PIPE_SHADER_GEOMETRY) {
- int rid = 0;
-
- if (p->type == PIPE_SHADER_GEOMETRY) {
- for (i = 0; i < pc->attr_nr; ++i) {
- p->cfg.in[i].hw = rid;
- p->cfg.in[i].id = i;
-
- for (c = 0; c < 4; ++c) {
- int n = i * 4 + c;
- if (!pc->attr[n].acc)
- continue;
- pc->attr[n].hw = rid++;
- p->cfg.in[i].mask |= 1 << c;
- }
- }
- } else {
- for (i = 0; i < pc->attr_nr * 4; ++i) {
- if (pc->attr[i].acc) {
- pc->attr[i].hw = rid++;
- p->cfg.attr[i / 32] |= 1 << (i % 32);
- }
- }
- if (p->cfg.regs[0] & (1 << 0))
- pc->sysval[vertex_id].hw = rid++;
- if (p->cfg.regs[0] & (1 << 4)) {
- pc->sysval[instance_id].hw = rid++;
- load_instance_id(pc, instance_id);
- }
- }
-
- for (i = 0, rid = 0; i < pc->result_nr; ++i) {
- p->cfg.out[i].hw = rid;
- p->cfg.out[i].id = i;
-
- for (c = 0; c < 4; ++c) {
- int n = i * 4 + c;
- if (!pc->result[n].acc)
- continue;
- pc->result[n].hw = rid++;
- p->cfg.out[i].mask |= 1 << c;
- }
- }
- if (p->cfg.prim_id < 0x40) {
- /* GP has to write to PrimitiveID */
- ctor_reg(&pc->sysval[p->cfg.prim_id],
- P_RESULT, p->cfg.prim_id, rid);
- p->cfg.prim_id = rid++;
- }
-
- for (c = 0; c < 2; ++c)
- if (p->cfg.two_side[c].hw < 0x40)
- p->cfg.two_side[c] = p->cfg.out[
- p->cfg.two_side[c].hw];
-
- if (p->cfg.psiz < 0x40)
- p->cfg.psiz = p->cfg.out[p->cfg.psiz].hw;
-
- copy_semantic_info(p);
- } else
- if (p->type == PIPE_SHADER_FRAGMENT) {
- int rid = 0, aid;
- unsigned n = 0, m = pc->attr_nr - flat_nr;
-
- pc->allow32 = TRUE;
-
- /* do we read FragCoord ? */
- if (pc->attr_nr &&
- p->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
- /* select FCRD components we want accessible */
- for (c = 0; c < 4; ++c)
- if (pc->attr[c].acc)
- p->cfg.regs[1] |= 1 << (24 + c);
- aid = 0;
- } else /* offset by 1 if FCRD.w is needed for pinterp */
- aid = popcnt4(p->cfg.regs[1] >> 24);
-
- /* non-flat interpolants have to be mapped to
- * the lower hardware IDs, so sort them:
- */
- for (i = 0; i < pc->attr_nr; i++) {
- if (pc->interp_mode[i] == INTERP_FLAT)
- p->cfg.in[m++].id = i;
- else {
- if (!(pc->interp_mode[i] & INTERP_PERSPECTIVE))
- p->cfg.in[n].linear = TRUE;
- p->cfg.in[n++].id = i;
- }
- }
- copy_semantic_info(p);
-
- for (n = 0; n < pc->attr_nr; ++n) {
- p->cfg.in[n].hw = rid = aid;
- i = p->cfg.in[n].id;
-
- if (p->info.input_semantic_name[i] ==
- TGSI_SEMANTIC_FACE) {
- load_frontfacing(pc, &pc->attr[i * 4]);
- continue;
- }
-
- for (c = 0; c < 4; ++c) {
- if (!pc->attr[i * 4 + c].acc)
- continue;
- pc->attr[i * 4 + c].rhw = rid++;
- p->cfg.in[n].mask |= 1 << c;
-
- load_interpolant(pc, &pc->attr[i * 4 + c]);
- }
- aid += popcnt4(p->cfg.in[n].mask);
- }
-
- m = popcnt4(p->cfg.regs[1] >> 24);
-
- /* set count of non-position inputs and of non-flat
- * non-position inputs for FP_INTERPOLANT_CTRL
- */
- p->cfg.regs[1] |= aid - m;
-
- if (flat_nr) {
- i = p->cfg.in[pc->attr_nr - flat_nr].hw;
- p->cfg.regs[1] |= (i - m) << 16;
- } else
- p->cfg.regs[1] |= p->cfg.regs[1] << 16;
-
- /* mark color semantic for light-twoside */
- n = 0x80;
- for (i = 0; i < p->cfg.in_nr; i++) {
- if (p->cfg.in[i].sn == TGSI_SEMANTIC_COLOR) {
- n = MIN2(n, p->cfg.in[i].hw - m);
- p->cfg.two_side[p->cfg.in[i].si] = p->cfg.in[i];
-
- p->cfg.regs[0] += /* increase colour count */
- popcnt4(p->cfg.in[i].mask) << 16;
- }
- }
- if (n < 0x80)
- p->cfg.regs[0] += n;
-
- if (p->cfg.prim_id < 0x40) {
- pc->sysval[p->cfg.prim_id].rhw = rid++;
- emit_interp(pc, &pc->sysval[p->cfg.prim_id], NULL,
- INTERP_FLAT);
- /* increase FP_INTERPOLANT_CTRL_COUNT */
- p->cfg.regs[1] += 1;
- }
-
- /* Initialize FP results:
- * FragDepth is always first TGSI and last hw output
- */
- i = p->info.writes_z ? 4 : 0;
- for (rid = 0; i < pc->result_nr * 4; i++)
- pc->result[i].rhw = rid++;
- if (p->info.writes_z)
- pc->result[2].rhw = rid++;
-
- p->cfg.high_result = rid;
-
- /* separate/different colour results for MRTs ? */
- if (pc->result_nr - (p->info.writes_z ? 1 : 0) > 1)
- p->cfg.regs[2] |= 1;
- }
-
- if (pc->immd_nr) {
- int rid = 0;
-
- pc->immd = MALLOC(pc->immd_nr * 4 * sizeof(struct nv50_reg));
- if (!pc->immd)
- goto out_err;
-
- for (i = 0; i < pc->immd_nr; i++) {
- for (c = 0; c < 4; c++, rid++)
- ctor_reg(&pc->immd[rid], P_IMMD, i, rid);
- }
- }
-
- ret = TRUE;
-out_err:
- if (pc->iv_p)
- free_temp(pc, pc->iv_p);
- if (pc->iv_c)
- free_temp(pc, pc->iv_c);
-
- tgsi_parse_free(&tp);
- return ret;
-}
-
-static void
-free_nv50_pc(struct nv50_pc *pc)
-{
- if (pc->immd)
- FREE(pc->immd);
- if (pc->param)
- FREE(pc->param);
- if (pc->result)
- FREE(pc->result);
- if (pc->attr)
- FREE(pc->attr);
- if (pc->temp)
- FREE(pc->temp);
- if (pc->sysval)
- FREE(pc->sysval);
- if (pc->insn_pos)
- FREE(pc->insn_pos);
-
- FREE(pc);
-}
-
-static INLINE uint32_t
-nv50_map_gs_output_prim(unsigned pprim)
-{
- switch (pprim) {
- case PIPE_PRIM_POINTS:
- return NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_POINTS;
- case PIPE_PRIM_LINE_STRIP:
- return NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP;
- case PIPE_PRIM_TRIANGLE_STRIP:
- return NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP;
- default:
- NOUVEAU_ERR("invalid GS_OUTPUT_PRIMITIVE: %u\n", pprim);
- abort();
- return 0;
- }
-}
-
-static boolean
-ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
-{
- int i, c;
- unsigned rtype[2] = { P_ATTR, P_RESULT };
-
- pc->p = p;
- pc->temp_nr = p->info.file_max[TGSI_FILE_TEMPORARY] + 1;
- pc->attr_nr = p->info.file_max[TGSI_FILE_INPUT] + 1;
- pc->result_nr = p->info.file_max[TGSI_FILE_OUTPUT] + 1;
- pc->param_nr = p->info.file_max[TGSI_FILE_CONSTANT] + 1;
- pc->addr_nr = p->info.file_max[TGSI_FILE_ADDRESS] + 1;
- assert(pc->addr_nr <= 2);
- pc->sysval_nr = p->info.file_max[TGSI_FILE_SYSTEM_VALUE] + 1;
-
- p->cfg.high_temp = 4;
-
- p->cfg.two_side[0].hw = 0x40;
- p->cfg.two_side[1].hw = 0x40;
- p->cfg.prim_id = 0x40;
-
- p->cfg.edgeflag_in = pc->edgeflag_out = 0xff;
-
- for (i = 0; i < p->info.num_properties; ++i) {
- unsigned *data = &p->info.properties[i].data[0];
-
- switch (p->info.properties[i].name) {
- case TGSI_PROPERTY_GS_OUTPUT_PRIM:
- p->cfg.prim_type = nv50_map_gs_output_prim(data[0]);
- break;
- case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
- p->cfg.vert_count = data[0];
- break;
- default:
- break;
- }
- }
-
- switch (p->type) {
- case PIPE_SHADER_VERTEX:
- p->cfg.psiz = 0x40;
- p->cfg.clpd = 0x40;
- p->cfg.out_nr = pc->result_nr;
- break;
- case PIPE_SHADER_GEOMETRY:
- assert(p->cfg.prim_type);
- assert(p->cfg.vert_count);
-
- p->cfg.psiz = 0x80;
- p->cfg.clpd = 0x80;
- p->cfg.prim_id = 0x80;
- p->cfg.out_nr = pc->result_nr;
- p->cfg.in_nr = pc->attr_nr;
-
- p->cfg.two_side[0].hw = 0x80;
- p->cfg.two_side[1].hw = 0x80;
- break;
- case PIPE_SHADER_FRAGMENT:
- rtype[0] = rtype[1] = P_TEMP;
-
- p->cfg.regs[0] = 0x01000004;
- p->cfg.in_nr = pc->attr_nr;
-
- if (p->info.writes_z) {
- p->cfg.regs[2] |= 0x00000100;
- p->cfg.regs[3] |= 0x00000011;
- }
- if (p->info.uses_kill)
- p->cfg.regs[2] |= 0x00100000;
- break;
- }
-
- if (pc->temp_nr) {
- pc->temp = MALLOC(pc->temp_nr * 4 * sizeof(struct nv50_reg));
- if (!pc->temp)
- return FALSE;
-
- for (i = 0; i < pc->temp_nr * 4; ++i)
- ctor_reg(&pc->temp[i], P_TEMP, i / 4, -1);
- }
-
- if (pc->attr_nr) {
- pc->attr = MALLOC(pc->attr_nr * 4 * sizeof(struct nv50_reg));
- if (!pc->attr)
- return FALSE;
-
- for (i = 0; i < pc->attr_nr * 4; ++i)
- ctor_reg(&pc->attr[i], rtype[0], i / 4, -1);
- }
-
- if (pc->result_nr) {
- unsigned nr = pc->result_nr * 4;
-
- pc->result = MALLOC(nr * sizeof(struct nv50_reg));
- if (!pc->result)
- return FALSE;
-
- for (i = 0; i < nr; ++i)
- ctor_reg(&pc->result[i], rtype[1], i / 4, -1);
- }
-
- if (pc->param_nr) {
- int rid = 0;
-
- pc->param = MALLOC(pc->param_nr * 4 * sizeof(struct nv50_reg));
- if (!pc->param)
- return FALSE;
-
- for (i = 0; i < pc->param_nr; ++i)
- for (c = 0; c < 4; ++c, ++rid)
- ctor_reg(&pc->param[rid], P_CONST, i, rid);
- }
-
- if (pc->addr_nr) {
- pc->addr = CALLOC(pc->addr_nr * 4, sizeof(struct nv50_reg *));
- if (!pc->addr)
- return FALSE;
- }
- for (i = 0; i < NV50_SU_MAX_ADDR; ++i)
- ctor_reg(&pc->r_addr[i], P_ADDR, -1, i + 1);
-
- if (pc->sysval_nr) {
- pc->sysval = CALLOC(pc->sysval_nr, sizeof(struct nv50_reg *));
- if (!pc->sysval)
- return FALSE;
- /* will only ever use SYSTEM_VALUE[i].x (hopefully) */
- for (i = 0; i < pc->sysval_nr; ++i)
- ctor_reg(&pc->sysval[i], rtype[0], i, -1);
- }
-
- return TRUE;
-}
-
-static void
-nv50_program_fixup_insns(struct nv50_pc *pc)
-{
- struct nv50_program_exec *e, **bra_list;
- unsigned i, n, pos;
-
- bra_list = CALLOC(pc->p->exec_size, sizeof(struct nv50_program_exec *));
-
- /* Collect branch instructions, we need to adjust their offsets
- * when converting 32 bit instructions to 64 bit ones
- */
- for (n = 0, e = pc->p->exec_head; e; e = e->next)
- if (e->param.index >= 0 && !e->param.mask)
- bra_list[n++] = e;
-
- /* Make sure we don't have any single 32 bit instructions. */
- for (e = pc->p->exec_head, pos = 0; e; e = e->next) {
- pos += is_long(e) ? 2 : 1;
-
- if ((pos & 1) && (!e->next || is_long(e->next))) {
- for (i = 0; i < n; ++i)
- if (bra_list[i]->param.index >= pos)
- bra_list[i]->param.index += 1;
- for (i = 0; i < pc->insn_nr; ++i)
- if (pc->insn_pos[i] >= pos)
- pc->insn_pos[i] += 1;
- convert_to_long(pc, e);
- ++pos;
- }
- }
-
- FREE(bra_list);
-
- if (!pc->p->info.opcode_count[TGSI_OPCODE_CAL])
- return;
-
- /* fill in CALL offsets */
- for (e = pc->p->exec_head; e; e = e->next) {
- if ((e->inst[0] & 2) && (e->inst[0] >> 28) == 0x2)
- e->param.index = pc->insn_pos[e->param.index];
- }
-}
-
-static boolean
-nv50_program_tx(struct nv50_program *p)
-{
- struct tgsi_parse_context parse;
- struct nv50_pc *pc;
- boolean ret;
-
- pc = CALLOC_STRUCT(nv50_pc);
- if (!pc)
- return FALSE;
-
- ret = ctor_nv50_pc(pc, p);
- if (ret == FALSE)
- goto out_cleanup;
-
- ret = nv50_program_tx_prep(pc);
- if (ret == FALSE)
- goto out_cleanup;
-
- pc->insn_pos = MALLOC(pc->insn_nr * sizeof(unsigned));
-
- tgsi_parse_init(&parse, pc->p->pipe.tokens);
- while (!tgsi_parse_end_of_tokens(&parse)) {
- const union tgsi_full_token *tok = &parse.FullToken;
-
- /* previously allow32 was FALSE for first & last instruction */
- pc->allow32 = TRUE;
-
- tgsi_parse_token(&parse);
-
- switch (tok->Token.Type) {
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- pc->insn_pos[pc->insn_cur] = pc->p->exec_size;
- ++pc->insn_cur;
- ret = nv50_tgsi_insn(pc, tok);
- if (ret == FALSE)
- goto out_err;
- break;
- default:
- break;
- }
- }
-
- nv50_program_fixup_insns(pc);
-
- p->param_nr = pc->param_nr * 4;
- p->immd_nr = pc->immd_nr * 4;
- p->immd = pc->immd_buf;
-
-out_err:
- tgsi_parse_free(&parse);
-
-out_cleanup:
- free_nv50_pc(pc);
- return ret;
-}
-
-static void
-nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
-{
- if (nv50_program_tx(p) == FALSE)
- assert(0);
- p->translated = TRUE;
-}
-
-static void
-nv50_program_upload_data(struct nv50_context *nv50, uint32_t *map,
- unsigned start, unsigned count, unsigned cbuf)
-{
- struct nouveau_channel *chan = nv50->screen->base.channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
-
- while (count) {
- unsigned nr = count > 2047 ? 2047 : count;
-
- BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1);
- OUT_RING (chan, (cbuf << 0) | (start << 8));
- BEGIN_RING(chan, tesla, NV50TCL_CB_DATA(0) | 0x40000000, nr);
- OUT_RINGp (chan, map, nr);
-
- map += nr;
- start += nr;
- count -= nr;
- }
-}
-
-static void
-nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
-{
- struct pipe_context *pipe = &nv50->pipe;
- struct pipe_transfer *transfer;
-
- if (!p->data[0] && p->immd_nr) {
- struct nouveau_resource *heap = nv50->screen->immd_heap;
-
- if (nouveau_resource_alloc(heap, p->immd_nr, p, &p->data[0])) {
- while (heap->next && heap->size < p->immd_nr) {
- struct nv50_program *evict = heap->next->priv;
- nouveau_resource_free(&evict->data[0]);
- }
-
- if (nouveau_resource_alloc(heap, p->immd_nr, p,
- &p->data[0]))
- assert(0);
- }
-
- /* immediates only need to be uploaded again when freed */
- nv50_program_upload_data(nv50, p->immd, p->data[0]->start,
- p->immd_nr, NV50_CB_PMISC);
- }
-
- assert(p->param_nr <= 16384);
-
- if (p->param_nr) {
- unsigned cb;
- uint32_t *map = pipe_buffer_map(pipe,
- nv50->constbuf[p->type],
- PIPE_TRANSFER_READ,
- &transfer);
- switch (p->type) {
- case PIPE_SHADER_GEOMETRY: cb = NV50_CB_PGP; break;
- case PIPE_SHADER_FRAGMENT: cb = NV50_CB_PFP; break;
- default:
- cb = NV50_CB_PVP;
- assert(p->type == PIPE_SHADER_VERTEX);
- break;
- }
-
- nv50_program_upload_data(nv50, map, 0, p->param_nr, cb);
- pipe_buffer_unmap(pipe, nv50->constbuf[p->type],
- transfer);
- }
-}
-
-static void
-nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
-{
- struct nouveau_channel *chan = nv50->screen->base.channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nv50_program_exec *e;
- uint32_t *up, i;
- boolean upload = FALSE;
- unsigned offset;
- int width;
-
- if (!p->bo) {
- nouveau_bo_new(chan->device, NOUVEAU_BO_VRAM, 0x100,
- p->exec_size * 4, &p->bo);
- upload = TRUE;
- }
-
- if (p->data[0] && p->data[0]->start != p->data_start[0])
- upload = TRUE;
-
- if (!upload)
- return;
-
- up = MALLOC(p->exec_size * 4);
-
- for (i = 0, e = p->exec_head; e; e = e->next) {
- unsigned ei, ci, bs;
-
- if (e->param.index >= 0 && e->param.mask) {
- bs = (e->inst[1] >> 22) & 0x07;
- assert(bs < 2);
- ei = e->param.shift >> 5;
- ci = e->param.index;
- if (bs == 0)
- ci += p->data[bs]->start;
-
- e->inst[ei] &= ~e->param.mask;
- e->inst[ei] |= (ci << e->param.shift);
- } else
- if (e->param.index >= 0) {
- /* zero mask means param is a jump/branch offset */
- assert(!(e->param.index & 1));
- /* seem to be 8 byte steps */
- ei = (e->param.index >> 1) + 0 /* START_ID */;
-
- e->inst[0] &= 0xf0000fff;
- e->inst[0] |= ei << 12;
- }
-
- up[i++] = e->inst[0];
- if (is_long(e))
- up[i++] = e->inst[1];
- }
- assert(i == p->exec_size);
-
- if (p->data[0])
- p->data_start[0] = p->data[0]->start;
-
-#ifdef NV50_PROGRAM_DUMP
- NOUVEAU_ERR("-------\n");
- for (e = p->exec_head; e; e = e->next) {
- NOUVEAU_ERR("0x%08x\n", e->inst[0]);
- if (is_long(e))
- NOUVEAU_ERR("0x%08x\n", e->inst[1]);
- }
-#endif
-
- /* SIFC_HEIGHT/SIFC_WIDTH of 65536 do not work, and are not reported
- * as data error either. hw bug ? */
-#define SIFC_MAX_WIDTH (65536 - 256)
- offset = 0;
- width = p->exec_size * 4;
- while (width > 0) {
- nv50_upload_sifc(nv50, p->bo, offset, NOUVEAU_BO_VRAM,
- NV50_2D_DST_FORMAT_R8_UNORM, 65536, 1, 262144,
- &up[offset / 4], NV50_2D_SIFC_FORMAT_R8_UNORM,
- 0, 0, 0, MIN2(SIFC_MAX_WIDTH, width), 1, 1);
- width -= SIFC_MAX_WIDTH;
- offset += SIFC_MAX_WIDTH;
- }
- BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1);
- OUT_RING (chan, 0);
-
- FREE(up);
-}
-
-struct nouveau_stateobj *
-nv50_vertprog_validate(struct nv50_context *nv50)
-{
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nv50_program *p = nv50->vertprog;
- struct nouveau_stateobj *so;
-
- if (!p->translated) {
- nv50_program_validate(nv50, p);
- if (!p->translated)
- assert(0);
- }
-
- nv50_program_validate_data(nv50, p);
- nv50_program_validate_code(nv50, p);
-
- if (!(nv50->dirty & NV50_NEW_VERTPROG))
- return NULL;
-
- so = so_new(5, 7, 2);
- so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
- so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_LOW, 0, 0);
- so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2);
- so_data (so, p->cfg.attr[0]);
- so_data (so, p->cfg.attr[1]);
- so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);
- so_data (so, p->cfg.high_result);
- so_method(so, tesla, NV50TCL_VP_REG_ALLOC_TEMP, 1);
- so_data (so, p->cfg.high_temp);
- so_method(so, tesla, NV50TCL_VP_START_ID, 1);
- so_data (so, 0); /* program start offset */
- return so;
-}
-
-struct nouveau_stateobj *
-nv50_fragprog_validate(struct nv50_context *nv50)
-{
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nv50_program *p = nv50->fragprog;
- struct nouveau_stateobj *so;
-
- if (!p->translated) {
- nv50_program_validate(nv50, p);
- if (!p->translated)
- assert(0);
- }
-
- nv50_program_validate_data(nv50, p);
- nv50_program_validate_code(nv50, p);
-
- if (!(nv50->dirty & NV50_NEW_FRAGPROG))
- return NULL;
-
- so = so_new(6, 7, 2);
- so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
- so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_LOW, 0, 0);
- so_method(so, tesla, NV50TCL_FP_REG_ALLOC_TEMP, 1);
- so_data (so, p->cfg.high_temp);
- so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1);
- so_data (so, p->cfg.high_result);
- so_method(so, tesla, NV50TCL_FP_CONTROL, 1);
- so_data (so, p->cfg.regs[2]);
- so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1);
- so_data (so, p->cfg.regs[3]);
- so_method(so, tesla, NV50TCL_FP_START_ID, 1);
- so_data (so, 0); /* program start offset */
- return so;
-}
-
-struct nouveau_stateobj *
-nv50_geomprog_validate(struct nv50_context *nv50)
-{
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nv50_program *p = nv50->geomprog;
- struct nouveau_stateobj *so;
-
- if (!p->translated) {
- nv50_program_validate(nv50, p);
- if (!p->translated)
- assert(0);
- }
-
- nv50_program_validate_data(nv50, p);
- nv50_program_validate_code(nv50, p);
-
- if (!(nv50->dirty & NV50_NEW_GEOMPROG))
- return NULL;
-
- so = so_new(6, 7, 2);
- so_method(so, tesla, NV50TCL_GP_ADDRESS_HIGH, 2);
- so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_LOW, 0, 0);
- so_method(so, tesla, NV50TCL_GP_REG_ALLOC_TEMP, 1);
- so_data (so, p->cfg.high_temp);
- so_method(so, tesla, NV50TCL_GP_REG_ALLOC_RESULT, 1);
- so_data (so, p->cfg.high_result);
- so_method(so, tesla, NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE, 1);
- so_data (so, p->cfg.prim_type);
- so_method(so, tesla, NV50TCL_GP_VERTEX_OUTPUT_COUNT, 1);
- so_data (so, p->cfg.vert_count);
- so_method(so, tesla, NV50TCL_GP_START_ID, 1);
- so_data (so, 0);
- return so;
-}
-
-static uint32_t
-nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base)
-{
- struct nv50_program *vp;
- struct nv50_program *fp = nv50->fragprog;
- unsigned i, c, m = base;
- uint32_t origin = 0x00000010;
-
- vp = nv50->geomprog ? nv50->geomprog : nv50->vertprog;
-
- /* XXX: this might not work correctly in all cases yet - we'll
- * just assume that an FP generic input that is not written in
- * the VP is PointCoord.
- */
- memset(pntc, 0, 8 * sizeof(uint32_t));
-
- for (i = 0; i < fp->cfg.in_nr; i++) {
- unsigned j, n = popcnt4(fp->cfg.in[i].mask);
-
- if (fp->cfg.in[i].sn != TGSI_SEMANTIC_GENERIC) {
- m += n;
- continue;
- }
-
- for (j = 0; j < vp->cfg.out_nr; ++j)
- if (vp->cfg.out[j].sn == fp->cfg.in[i].sn &&
- vp->cfg.out[j].si == fp->cfg.in[i].si)
- break;
-
- if (j < vp->info.num_outputs) {
- ubyte enable =
- (nv50->rasterizer->pipe.sprite_coord_enable >> vp->cfg.out[j].si) & 1;
-
- if (enable == 0) {
- m += n;
- continue;
- }
- }
-
- /* this is either PointCoord or replaced by sprite coords */
- for (c = 0; c < 4; c++) {
- if (!(fp->cfg.in[i].mask & (1 << c)))
- continue;
- pntc[m / 8] |= (c + 1) << ((m % 8) * 4);
- ++m;
- }
- }
- return (nv50->rasterizer->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT ? 0 : origin);
+nv50_fragprog_prepare(struct nv50_translation_info *ti)
+{
+ struct nv50_program *p = ti->p;
+ int i, j, c;
+ unsigned nvary, nintp, depr;
+ unsigned n = 0, m = 0, skip = 0;
+ ubyte sn[16], si[16];
+
+ /* FP flags */
+
+ if (ti->scan.writes_z) {
+ p->fp.flags[1] = 0x11;
+ p->fp.flags[0] |= NV50TCL_FP_CONTROL_EXPORTS_Z;
+ }
+
+ if (ti->scan.uses_kill)
+ p->fp.flags[0] |= NV50TCL_FP_CONTROL_USES_KIL;
+
+ /* FP inputs */
+
+ ti->input_file = NV_FILE_MEM_V;
+ ti->output_file = NV_FILE_GPR;
+
+ /* count non-flat inputs, save semantic info */
+ for (i = 0; i < p->in_nr; ++i) {
+ m += (ti->interp_mode[i] & NV50_INTERP_FLAT) ? 0 : 1;
+ sn[i] = p->in[i].sn;
+ si[i] = p->in[i].si;
+ }
+
+ /* reorder p->in[] so that non-flat inputs are first and
+ * kick out special inputs that don't use VP/GP_RESULT_MAP
+ */
+ nintp = 0;
+ for (i = 0; i < p->in_nr; ++i) {
+ if (sn[i] == TGSI_SEMANTIC_POSITION) {
+ for (c = 0; c < 4; ++c) {
+ ti->input_map[i][c] = nintp;
+ if (ti->input_access[i][c]) {
+ p->fp.interp |= 1 << (24 + c);
+ ++nintp;
+ }
+ }
+ skip++;
+ continue;
+ } else
+ if (sn[i] == TGSI_SEMANTIC_FACE) {
+ ti->input_map[i][0] = 255;
+ skip++;
+ continue;
+ }
+
+ j = (ti->interp_mode[i] & NV50_INTERP_FLAT) ? m++ : n++;
+
+ if (sn[i] == TGSI_SEMANTIC_COLOR)
+ p->vp.bfc[si[i]] = j;
+
+ p->in[j].linear = (ti->interp_mode[i] & NV50_INTERP_LINEAR) ? 1 : 0;
+ p->in[j].id = i;
+ p->in[j].sn = sn[i];
+ p->in[j].si = si[i];
+ }
+ assert(n <= m);
+ p->in_nr -= skip;
+
+ if (!(p->fp.interp & (8 << 24))) {
+ p->fp.interp |= (8 << 24);
+ ++nintp;
+ }
+
+ p->fp.colors = (1 << 24) | 4; /* CLAMP, FFC0_ID = 4 */
+
+ for (i = 0; i < p->in_nr; ++i) {
+ int j = p->in[i].id;
+ p->in[i].hw = nintp;
+
+ for (c = 0; c < 4; ++c) {
+ if (!ti->input_access[j][c])
+ continue;
+ p->in[i].mask |= 1 << c;
+ ti->input_map[j][c] = nintp++;
+ }
+ /* count color inputs */
+ if (i == p->vp.bfc[0] || i == p->vp.bfc[1])
+ p->fp.colors += bitcount4(p->in[i].mask) << 16;
+ }
+ nintp -= bitcount4(p->fp.interp >> 24); /* subtract position inputs */
+ nvary = nintp;
+ if (n < m)
+ nvary -= p->in[n].hw;
+
+ p->fp.interp |= nvary << NV50TCL_FP_INTERPOLANT_CTRL_COUNT_NONFLAT_SHIFT;
+ p->fp.interp |= nintp << NV50TCL_FP_INTERPOLANT_CTRL_COUNT_SHIFT;
+
+ /* FP outputs */
+
+ if (p->out_nr > (1 + (ti->scan.writes_z ? 1 : 0)))
+ p->fp.flags[0] |= NV50TCL_FP_CONTROL_MULTIPLE_RESULTS;
+
+ depr = p->out_nr;
+ for (i = 0; i < p->out_nr; ++i) {
+ p->out[i].id = i;
+ if (p->out[i].sn == TGSI_SEMANTIC_POSITION) {
+ depr = i;
+ continue;
+ }
+ p->out[i].hw = p->max_out;
+ p->out[i].mask = 0xf;
+
+ for (c = 0; c < 4; ++c)
+ ti->output_map[i][c] = p->max_out++;
+ }
+ if (depr < p->out_nr) {
+ p->out[depr].mask = 0x4;
+ p->out[depr].hw = p->max_out++;
+ }
+
+ return 0;
}
static int
-nv50_vec4_map(uint32_t *map32, int mid, uint8_t zval, uint32_t lin[4],
- struct nv50_sreg4 *fpi, struct nv50_sreg4 *vpo)
+nv50_geomprog_prepare(struct nv50_translation_info *ti)
{
- int c;
- uint8_t mv = vpo->mask, mf = fpi->mask, oid = vpo->hw;
- uint8_t *map = (uint8_t *)map32;
-
- for (c = 0; c < 4; ++c) {
- if (mf & 1) {
- if (fpi->linear == TRUE)
- lin[mid / 32] |= 1 << (mid % 32);
- if (mv & 1)
- map[mid] = oid;
- else
- map[mid] = (c == 3) ? (zval + 1) : zval;
- ++mid;
- }
-
- oid += mv & 1;
- mf >>= 1;
- mv >>= 1;
- }
+ ti->input_file = NV_FILE_MEM_S;
+ ti->output_file = NV_FILE_OUT;
- return mid;
-}
-
-struct nouveau_stateobj *
-nv50_fp_linkage_validate(struct nv50_context *nv50)
-{
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nv50_program *vp = nv50->vertprog;
- struct nv50_program *fp = nv50->fragprog;
- struct nouveau_stateobj *so;
- struct nv50_sreg4 dummy;
- int i, n, c, m = 0;
- uint32_t map[16], lin[4], reg[6], pcrd[8];
- uint8_t zval = 0x40;
-
- if (nv50->geomprog) {
- vp = nv50->geomprog;
- zval = 0x80;
- }
- memset(map, 0, sizeof(map));
- memset(lin, 0, sizeof(lin));
-
- reg[1] = 0x00000004; /* low and high clip distance map ids */
- reg[2] = 0x00000000; /* layer index map id (disabled, GP only) */
- reg[3] = 0x00000000; /* point size map id & enable */
- reg[5] = 0x00000000; /* primitive ID map slot */
- reg[0] = fp->cfg.regs[0]; /* colour semantic reg */
- reg[4] = fp->cfg.regs[1]; /* interpolant info */
-
- dummy.linear = FALSE;
- dummy.mask = 0xf; /* map all components of HPOS */
- m = nv50_vec4_map(map, m, zval, lin, &dummy, &vp->cfg.out[0]);
-
- dummy.mask = 0x0;
-
- if (vp->cfg.clpd < 0x40) {
- for (c = 0; c < vp->cfg.clpd_nr; ++c) {
- map[m / 4] |= (vp->cfg.clpd + c) << ((m % 4) * 8);
- ++m;
- }
- reg[1] = (m << 8);
- }
-
- reg[0] |= m << 8; /* adjust BFC0 id */
-
- /* if light_twoside is active, it seems FFC0_ID == BFC0_ID is bad */
- if (nv50->rasterizer->pipe.light_twoside) {
- struct nv50_sreg4 *vpo = &vp->cfg.two_side[0];
- struct nv50_sreg4 *fpi = &fp->cfg.two_side[0];
-
- m = nv50_vec4_map(map, m, zval, lin, &fpi[0], &vpo[0]);
- m = nv50_vec4_map(map, m, zval, lin, &fpi[1], &vpo[1]);
- }
-
- reg[0] += m - 4; /* adjust FFC0 id */
- reg[4] |= m << 8; /* set mid where 'normal' FP inputs start */
-
- for (i = 0; i < fp->cfg.in_nr; i++) {
- /* maybe even remove these from cfg.io */
- if (fp->cfg.in[i].sn == TGSI_SEMANTIC_POSITION ||
- fp->cfg.in[i].sn == TGSI_SEMANTIC_FACE)
- continue;
-
- for (n = 0; n < vp->cfg.out_nr; ++n)
- if (vp->cfg.out[n].sn == fp->cfg.in[i].sn &&
- vp->cfg.out[n].si == fp->cfg.in[i].si)
- break;
-
- m = nv50_vec4_map(map, m, zval, lin, &fp->cfg.in[i],
- (n < vp->cfg.out_nr) ?
- &vp->cfg.out[n] : &dummy);
- }
- /* PrimitiveID either is replaced by the system value, or
- * written by the geometry shader into an output register
- */
- if (fp->cfg.prim_id < 0x40) {
- map[m / 4] |= vp->cfg.prim_id << ((m % 4) * 8);
- reg[5] = m++;
- }
-
- if (nv50->rasterizer->pipe.point_size_per_vertex) {
- map[m / 4] |= vp->cfg.psiz << ((m % 4) * 8);
- reg[3] = (m++ << 4) | 1;
- }
-
- /* now fill the stateobj (at most 28 so_data) */
- so = so_new(10, 54, 0);
-
- n = (m + 3) / 4;
- assert(m <= 64);
- if (vp->type == PIPE_SHADER_GEOMETRY) {
- so_method(so, tesla, NV50TCL_GP_RESULT_MAP_SIZE, 1);
- so_data (so, m);
- so_method(so, tesla, NV50TCL_GP_RESULT_MAP(0), n);
- so_datap (so, map, n);
- } else {
- so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1);
- so_data (so, vp->cfg.regs[0]);
-
- so_method(so, tesla, NV50TCL_MAP_SEMANTIC_4, 1);
- so_data (so, reg[5]);
-
- so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
- so_data (so, m);
- so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n);
- so_datap (so, map, n);
- }
-
- so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4);
- so_datap (so, reg, 4);
-
- so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1);
- so_data (so, reg[4]);
-
- so_method(so, tesla, NV50TCL_NOPERSPECTIVE_BITMAP(0), 4);
- so_datap (so, lin, 4);
-
- if (nv50->rasterizer->pipe.sprite_coord_enable) {
- so_method(so, tesla, NV50TCL_POINT_SPRITE_CTRL, 1);
- so_data (so,
- nv50_pntc_replace(nv50, pcrd, (reg[4] >> 8) & 0xff));
-
- so_method(so, tesla, NV50TCL_POINT_COORD_REPLACE_MAP(0), 8);
- so_datap (so, pcrd, 8);
- }
-
- so_method(so, tesla, NV50TCL_GP_ENABLE, 1);
- so_data (so, (vp->type == PIPE_SHADER_GEOMETRY) ? 1 : 0);
-
- return so;
+ assert(0);
+ return 1;
}
static int
-construct_vp_gp_mapping(uint32_t *map32, int m,
- struct nv50_program *vp, struct nv50_program *gp)
-{
- uint8_t *map = (uint8_t *)map32;
- int i, j, c;
-
- for (i = 0; i < gp->cfg.in_nr; ++i) {
- uint8_t oid = 0, mv = 0, mg = gp->cfg.in[i].mask;
-
- for (j = 0; j < vp->cfg.out_nr; ++j) {
- if (vp->cfg.out[j].sn == gp->cfg.in[i].sn &&
- vp->cfg.out[j].si == gp->cfg.in[i].si) {
- mv = vp->cfg.out[j].mask;
- oid = vp->cfg.out[j].hw;
- break;
- }
- }
-
- for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) {
- if (mg & mv & 1)
- map[m++] = oid;
- else
- if (mg & 1)
- map[m++] = (c == 3) ? 0x41 : 0x40;
- oid += mv & 1;
- }
- }
- return m;
-}
-
-struct nouveau_stateobj *
-nv50_gp_linkage_validate(struct nv50_context *nv50)
+nv50_prog_scan(struct nv50_translation_info *ti)
+{
+ struct nv50_program *p = ti->p;
+ struct tgsi_parse_context parse;
+ int ret;
+
+ p->vp.edgeflag = 0x40;
+ p->vp.psiz = 0x40;
+ p->vp.bfc[0] = 0x40;
+ p->vp.bfc[1] = 0x40;
+ p->gp.primid = 0x80;
+
+ tgsi_scan_shader(p->pipe.tokens, &ti->scan);
+
+ tgsi_parse_init(&parse, p->pipe.tokens);
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ prog_immediate(ti, &parse.FullToken.FullImmediate);
+ break;
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ prog_decl(ti, &parse.FullToken.FullDeclaration);
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->inst_nr);
+ break;
+ }
+ }
+
+ p->in_nr = ti->scan.file_max[TGSI_FILE_INPUT] + 1;
+ p->out_nr = ti->scan.file_max[TGSI_FILE_OUTPUT] + 1;
+
+ switch (p->type) {
+ case PIPE_SHADER_VERTEX:
+ ret = nv50_vertprog_prepare(ti);
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ ret = nv50_fragprog_prepare(ti);
+ break;
+ case PIPE_SHADER_GEOMETRY:
+ ret = nv50_geomprog_prepare(ti);
+ break;
+ default:
+ assert(!"unsupported program type");
+ ret = -1;
+ break;
+ }
+
+ assert(!ret);
+ return ret;
+}
+
+boolean
+nv50_program_tx(struct nv50_program *p)
{
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_stateobj *so;
- struct nv50_program *vp = nv50->vertprog;
- struct nv50_program *gp = nv50->geomprog;
- uint32_t map[16];
- int m = 0;
+ struct nv50_translation_info *ti;
+ int ret;
- if (!gp)
- return NULL;
- memset(map, 0, sizeof(map));
+ ti = CALLOC_STRUCT(nv50_translation_info);
+ ti->p = p;
- m = construct_vp_gp_mapping(map, m, vp, gp);
+ ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS;
- so = so_new(3, 24 - 3, 0);
+ ret = nv50_prog_scan(ti);
+ if (ret) {
+ NOUVEAU_ERR("unsupported shader program\n");
+ goto out;
+ }
- so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1);
- so_data (so, vp->cfg.regs[0] | gp->cfg.regs[0]);
+ ret = nv50_generate_code(ti);
+ if (ret) {
+ NOUVEAU_ERR("error during shader translation\n");
+ goto out;
+ }
- assert(m <= 32);
- so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
- so_data (so, m);
-
- m = (m + 3) / 4;
- so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), m);
- so_datap (so, map, m);
-
- return so;
+out:
+ if (ti->immd32)
+ FREE(ti->immd32);
+ FREE(ti);
+ return ret ? FALSE : TRUE;
}
void
nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
{
- while (p->exec_head) {
- struct nv50_program_exec *e = p->exec_head;
-
- p->exec_head = e->next;
- FREE(e);
- }
- p->exec_tail = NULL;
- p->exec_size = 0;
+ nouveau_bo_ref(NULL, &p->bo);
- nouveau_bo_ref(NULL, &p->bo);
+ so_ref(NULL, &p->so);
- FREE(p->immd);
- nouveau_resource_free(&p->data[0]);
+ if (p->code)
+ FREE(p->code);
- p->translated = 0;
+ p->translated = FALSE;
}
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
index 1e3ad6bff0..1184d9be3b 100644
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -1,75 +1,127 @@
-#ifndef __NV50_PROGRAM_H__
-#define __NV50_PROGRAM_H__
+/*
+ * Copyright 2010 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NV50_PROG_H__
+#define __NV50_PROG_H__
#include "pipe/p_state.h"
#include "tgsi/tgsi_scan.h"
+#include "nouveau/nouveau_class.h"
-struct nv50_program_exec {
- struct nv50_program_exec *next;
+struct nv50_varying {
+ uint8_t id; /* tgsi index */
+ uint8_t hw; /* hw index, nv50 wants flat FP inputs last */
- unsigned inst[2];
- struct {
- int index;
- unsigned mask;
- unsigned shift;
- } param;
+ uint8_t mask : 4;
+ uint8_t linear : 1;
+ uint8_t pad : 3;
+
+ ubyte sn; /* semantic name */
+ ubyte si; /* semantic index */
+};
+
+struct nv50_program {
+ struct pipe_shader_state pipe;
+
+ ubyte type;
+ boolean translated;
+
+ struct nouveau_bo *bo;
+ struct nouveau_stateobj *so;
+
+ uint32_t *code;
+ unsigned code_size;
+ unsigned code_start; /* offset inside bo */
+ uint32_t *immd;
+ unsigned immd_size;
+ unsigned parm_size; /* size limit of uniform buffer */
+
+ ubyte max_gpr; /* REG_ALLOC_TEMP */
+ ubyte max_out; /* REG_ALLOC_RESULT or FP_RESULT_COUNT */
+
+ ubyte in_nr;
+ ubyte out_nr;
+ struct nv50_varying in[16];
+ struct nv50_varying out[16];
+
+ struct {
+ uint32_t attrs[3]; /* VP_ATTR_EN_0,1 and VP_GP_BUILTIN_ATTR_EN */
+ ubyte psiz;
+ ubyte bfc[2];
+ ubyte edgeflag;
+ ubyte clpd;
+ ubyte clpd_nr;
+ } vp;
+
+ struct {
+ uint32_t flags[2]; /* 0x19a8, 196c */
+ uint32_t interp; /* 0x1988 */
+ uint32_t colors; /* 0x1904 */
+ } fp;
+
+ struct {
+ ubyte primid; /* primitive id output register */
+ uint8_t vert_count;
+ uint8_t prim_type; /* point, line strip or tri strip */
+ } gp;
+
+ void *fixups;
+ unsigned num_fixups;
};
-struct nv50_sreg4 {
- uint8_t hw; /* hw index, nv50 wants flat FP inputs last */
- uint8_t id; /* tgsi index */
+#define NV50_INTERP_LINEAR (1 << 0)
+#define NV50_INTERP_FLAT (1 << 1)
+#define NV50_INTERP_CENTROID (1 << 2)
- uint8_t mask;
- boolean linear;
+#define NV50_PROG_MAX_SUBROUTINES 8
- ubyte sn, si; /* semantic name & index */
+/* analyze TGSI and see which TEMP[] are used as subroutine inputs/outputs */
+struct nv50_subroutine {
+ int id;
+ uint32_t argv[4][1]; /* 4 bitmasks, for each of xyzw, only allow 32 TEMPs */
+ uint32_t retv[4][1];
};
-struct nv50_program {
- struct pipe_shader_state pipe;
- struct tgsi_shader_info info;
- boolean translated;
-
- unsigned type;
- struct nv50_program_exec *exec_head;
- struct nv50_program_exec *exec_tail;
- unsigned exec_size;
- struct nouveau_resource *data[1];
- unsigned data_start[1];
-
- struct nouveau_bo *bo;
-
- uint32_t *immd;
- unsigned immd_nr;
- unsigned param_nr;
-
- struct {
- unsigned high_temp;
- unsigned high_result;
-
- uint32_t attr[2];
- uint32_t regs[4];
-
- /* for VPs, io_nr doesn't count 'private' results (PSIZ etc.) */
- unsigned in_nr, out_nr;
- struct nv50_sreg4 in[PIPE_MAX_SHADER_INPUTS];
- struct nv50_sreg4 out[PIPE_MAX_SHADER_OUTPUTS];
-
- /* FP colour inputs, VP/GP back colour outputs */
- struct nv50_sreg4 two_side[2];
-
- /* GP only */
- unsigned vert_count;
- uint8_t prim_type;
-
- /* VP & GP only */
- uint8_t clpd, clpd_nr;
- uint8_t psiz;
- uint8_t edgeflag_in;
-
- /* FP & GP only */
- uint8_t prim_id;
- } cfg;
+struct nv50_translation_info {
+ struct nv50_program *p;
+ unsigned inst_nr;
+ ubyte input_file;
+ ubyte output_file;
+ ubyte input_map[PIPE_MAX_SHADER_INPUTS][4];
+ ubyte output_map[PIPE_MAX_SHADER_OUTPUTS][4];
+ ubyte interp_mode[PIPE_MAX_SHADER_INPUTS];
+ int input_access[PIPE_MAX_SHADER_INPUTS][4];
+ int output_access[PIPE_MAX_SHADER_OUTPUTS][4];
+ boolean indirect_inputs;
+ boolean indirect_outputs;
+ struct tgsi_shader_info scan;
+ uint32_t *immd32;
+ unsigned immd32_nr;
+ ubyte edgeflag_out;
+ struct nv50_subroutine subr[NV50_PROG_MAX_SUBROUTINES];
+ int subr_nr;
};
-#endif
+int nv50_generate_code(struct nv50_translation_info *ti);
+boolean nv50_program_tx(struct nv50_program *p);
+
+#endif /* __NV50_PROG_H__ */
diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c
index 6a2ffd5a3c..0091927a98 100644
--- a/src/gallium/drivers/nv50/nv50_push.c
+++ b/src/gallium/drivers/nv50/nv50_push.c
@@ -227,7 +227,7 @@ nv50_push_elements_instanced(struct pipe_context *pipe,
ctx.idxbuf = NULL;
ctx.vtx_size = 0;
ctx.edgeflag = 0.5f;
- ctx.edgeflag_attr = nv50->vertprog->cfg.edgeflag_in;
+ ctx.edgeflag_attr = nv50->vertprog->vp.edgeflag;
/* map vertex buffers, determine vertex size */
for (i = 0; i < nv50->vtxelt->num_elements; i++) {
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index ca4b01b12b..78137d6940 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -34,75 +34,38 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
- unsigned tex_usage, unsigned geom_flags)
+ unsigned usage, unsigned geom_flags)
{
if (sample_count > 1)
return FALSE;
- if (tex_usage & PIPE_BIND_RENDER_TARGET) {
+ if (!util_format_s3tc_enabled) {
switch (format) {
- case PIPE_FORMAT_B8G8R8X8_UNORM:
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- case PIPE_FORMAT_B5G6R5_UNORM:
- case PIPE_FORMAT_R16G16B16A16_SNORM:
- case PIPE_FORMAT_R16G16B16A16_UNORM:
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- case PIPE_FORMAT_R16G16_SNORM:
- case PIPE_FORMAT_R16G16_UNORM:
- return TRUE;
- default:
- break;
- }
- } else
- if (tex_usage & PIPE_BIND_DEPTH_STENCIL) {
- switch (format) {
- case PIPE_FORMAT_Z32_FLOAT:
- case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- return TRUE;
- default:
- break;
- }
- } else {
- if (tex_usage & PIPE_BIND_SAMPLER_VIEW) {
- switch (format) {
- case PIPE_FORMAT_DXT1_RGB:
- case PIPE_FORMAT_DXT1_RGBA:
- case PIPE_FORMAT_DXT3_RGBA:
- case PIPE_FORMAT_DXT5_RGBA:
- return util_format_s3tc_enabled;
- default:
- break;
- }
- }
- switch (format) {
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- case PIPE_FORMAT_B8G8R8X8_UNORM:
- case PIPE_FORMAT_B8G8R8A8_SRGB:
- case PIPE_FORMAT_B8G8R8X8_SRGB:
- case PIPE_FORMAT_B5G5R5A1_UNORM:
- case PIPE_FORMAT_B4G4R4A4_UNORM:
- case PIPE_FORMAT_B5G6R5_UNORM:
- case PIPE_FORMAT_L8_UNORM:
- case PIPE_FORMAT_A8_UNORM:
- case PIPE_FORMAT_I8_UNORM:
- case PIPE_FORMAT_L8A8_UNORM:
- case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- case PIPE_FORMAT_Z32_FLOAT:
- case PIPE_FORMAT_R16G16B16A16_SNORM:
- case PIPE_FORMAT_R16G16B16A16_UNORM:
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- case PIPE_FORMAT_R16G16_SNORM:
- case PIPE_FORMAT_R16G16_UNORM:
- return TRUE;
+ case PIPE_FORMAT_DXT1_RGB:
+ case PIPE_FORMAT_DXT1_RGBA:
+ case PIPE_FORMAT_DXT3_RGBA:
+ case PIPE_FORMAT_DXT5_RGBA:
+ return FALSE;
default:
break;
}
}
- return FALSE;
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ if ((nouveau_screen(pscreen)->device->chipset & 0xf0) != 0xa0)
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+
+ /* transfers & shared are always supported */
+ usage &= ~(PIPE_BIND_TRANSFER_READ |
+ PIPE_BIND_TRANSFER_WRITE |
+ PIPE_BIND_SHARED);
+
+ return (nv50_format_table[format].usage & usage) == usage;
}
static int
@@ -290,14 +253,23 @@ nv50_screen_relocs(struct nv50_screen *screen)
}
}
+#ifndef NOUVEAU_GETPARAM_GRAPH_UNITS
+# define NOUVEAU_GETPARAM_GRAPH_UNITS 13
+#endif
+
+extern int nouveau_device_get_param(struct nouveau_device *dev,
+ uint64_t param, uint64_t *value);
+
struct pipe_screen *
nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
{
struct nv50_screen *screen = CALLOC_STRUCT(nv50_screen);
struct nouveau_channel *chan;
struct pipe_screen *pscreen;
+ uint64_t value;
unsigned chipset = dev->chipset;
unsigned tesla_class = 0;
+ unsigned stack_size;
int ret, i;
const unsigned rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
@@ -515,6 +487,24 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
OUT_RING (chan, 0x121 | (NV50_CB_PGP << 12));
OUT_RING (chan, 0x131 | (NV50_CB_PFP << 12));
+ /* shader stack */
+ nouveau_device_get_param(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
+
+ stack_size = util_bitcount(value & 0xffff);
+ stack_size *= util_bitcount((value >> 24) & 0xf);
+ stack_size *= 32 * 64 * 8;
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
+ stack_size, &screen->stack_bo);
+ if (ret) {
+ nv50_screen_destroy(pscreen);
+ return NULL;
+ }
+ BEGIN_RING(chan, screen->tesla, NV50TCL_STACK_ADDRESS_HIGH, 3);
+ OUT_RELOCh(chan, screen->stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, screen->stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RING (chan, 4);
+
/* Vertex array limits - max them out */
for (i = 0; i < 16; i++) {
BEGIN_RING(chan, screen->tesla,
diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h
index fbf15a7596..1517f5608f 100644
--- a/src/gallium/drivers/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nv50/nv50_screen.h
@@ -22,11 +22,11 @@ struct nv50_screen {
struct nouveau_resource *immd_heap;
- struct pipe_resource *strm_vbuf[16];
-
struct nouveau_bo *tic;
struct nouveau_bo *tsc;
+ struct nouveau_bo *stack_bo;
+
boolean force_push;
};
@@ -38,4 +38,13 @@ nv50_screen(struct pipe_screen *screen)
extern void nv50_screen_relocs(struct nv50_screen *);
+struct nv50_format {
+ uint32_t rt;
+ uint32_t tic;
+ uint32_t vtx;
+ uint32_t usage;
+};
+
+extern const struct nv50_format nv50_format_table[];
+
#endif
diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c
new file mode 100644
index 0000000000..5f70df3662
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_shader_state.c
@@ -0,0 +1,626 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+
+#include "nv50_context.h"
+#include "nv50_transfer.h"
+
+static void
+nv50_transfer_constbuf(struct nv50_context *nv50,
+ struct pipe_resource *buf, unsigned size, unsigned cbi)
+{
+ struct pipe_context *pipe = &nv50->pipe;
+ struct pipe_transfer *transfer;
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ uint32_t *map;
+ unsigned count, start;
+
+ map = pipe_buffer_map(pipe, buf, PIPE_TRANSFER_READ, &transfer);
+ if (!map)
+ return;
+
+ count = buf->width0; /* MIN2(buf->width0, size); */
+ start = 0;
+
+ while (count) {
+ unsigned nr = count;
+ nr = MIN2(nr, 2047);
+
+ /* FIXME: emit relocs for unsuiTed MM */
+ BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1);
+ OUT_RING (chan, (start << 8) | cbi);
+ BEGIN_RING_NI(chan, tesla, NV50TCL_CB_DATA(0), nr);
+ OUT_RINGp (chan, map, nr);
+
+ count -= nr;
+ start += nr;
+ map += nr;
+ }
+
+ pipe_buffer_unmap(pipe, buf, transfer);
+}
+
+static void
+nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
+{
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ unsigned cbi;
+
+ if (p->immd_size) {
+ uint32_t *data = p->immd;
+ unsigned count = p->immd_size / 4;
+ unsigned start = 0;
+
+ while (count) {
+ unsigned nr = count;
+ nr = MIN2(nr, 2047);
+
+ BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1);
+ OUT_RING (chan, (start << 8) | NV50_CB_PMISC);
+ BEGIN_RING_NI(chan, tesla, NV50TCL_CB_DATA(0), nr);
+ OUT_RINGp (chan, data, nr);
+
+ count -= nr;
+ start += nr;
+ data += nr;
+ }
+ }
+
+ /* If the state tracker doesn't change the constbuf, and it is first
+ * validated with a program that doesn't use it, this check prevents
+ * it from even being uploaded. */
+ /*
+ if (p->parm_size == 0)
+ return;
+ */
+
+ switch (p->type) {
+ case PIPE_SHADER_VERTEX:
+ cbi = NV50_CB_PVP;
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ cbi = NV50_CB_PFP;
+ break;
+ case PIPE_SHADER_GEOMETRY:
+ cbi = NV50_CB_PGP;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ nv50_transfer_constbuf(nv50, nv50->constbuf[p->type], p->parm_size, cbi);
+}
+
+static void
+nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
+{
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_grobj *eng2d = nv50->screen->eng2d;
+ int ret;
+ unsigned offset;
+ unsigned size = p->code_size;
+ uint32_t *data = p->code;
+
+ assert(p->translated);
+
+ /* TODO: use a single bo (for each type) for shader code */
+ if (p->bo)
+ return;
+ ret = nouveau_bo_new(chan->device, NOUVEAU_BO_VRAM, 0x100, size, &p->bo);
+ assert(!ret);
+
+ offset = p->code_start = 0;
+
+ BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 2);
+ OUT_RING (chan, NV50_2D_DST_FORMAT_R8_UNORM);
+ OUT_RING (chan, 1);
+ BEGIN_RING(chan, eng2d, NV50_2D_DST_PITCH, 1);
+ OUT_RING (chan, 0x40000);
+ BEGIN_RING(chan, eng2d, NV50_2D_DST_WIDTH, 2);
+ OUT_RING (chan, 0x10000);
+ OUT_RING (chan, 1);
+
+ while (size) {
+ unsigned nr = size / 4;
+
+ if (AVAIL_RING(chan) < 32)
+ FIRE_RING(chan);
+
+ nr = MIN2(nr, AVAIL_RING(chan) - 18);
+ nr = MIN2(nr, 1792);
+ if (nr < (size / 4))
+ nr &= ~0x3f;
+ assert(!(size & 3));
+
+ BEGIN_RING(chan, eng2d, NV50_2D_DST_ADDRESS_HIGH, 2);
+ OUT_RELOCh(chan, p->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RELOCl(chan, p->bo, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(chan, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, NV50_2D_SIFC_FORMAT_R8_UNORM);
+ BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10);
+ OUT_RING (chan, nr * 4);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, 0);
+
+ BEGIN_RING_NI(chan, eng2d, NV50_2D_SIFC_DATA, nr);
+ OUT_RINGp (chan, data, nr);
+
+ data += nr;
+ offset += nr * 4;
+ size -= nr * 4;
+ }
+
+ BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1);
+ OUT_RING (chan, 0);
+}
+
+static void
+nv50_vp_update_stateobj(struct nv50_context *nv50, struct nv50_program *p)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so = so_new(5, 7, 2);
+
+ nv50_program_validate_code(nv50, p);
+
+ so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
+ so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2);
+ so_data (so, p->vp.attrs[0]);
+ so_data (so, p->vp.attrs[1]);
+ so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);
+ so_data (so, p->max_out);
+ so_method(so, tesla, NV50TCL_VP_REG_ALLOC_TEMP, 1);
+ so_data (so, p->max_gpr);
+ so_method(so, tesla, NV50TCL_VP_START_ID, 1);
+ so_data (so, p->code_start);
+
+ so_ref(so, &p->so);
+ so_ref(NULL, &so);
+}
+
+static void
+nv50_fp_update_stateobj(struct nv50_context *nv50, struct nv50_program *p)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so = so_new(6, 7, 2);
+
+ nv50_program_validate_code(nv50, p);
+
+ so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
+ so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_method(so, tesla, NV50TCL_FP_REG_ALLOC_TEMP, 1);
+ so_data (so, p->max_gpr);
+ so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1);
+ so_data (so, p->max_out);
+ so_method(so, tesla, NV50TCL_FP_CONTROL, 1);
+ so_data (so, p->fp.flags[0]);
+ so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1);
+ so_data (so, p->fp.flags[1]);
+ so_method(so, tesla, NV50TCL_FP_START_ID, 1);
+ so_data (so, p->code_start);
+
+ so_ref(so, &p->so);
+ so_ref(NULL, &so);
+}
+
+static void
+nv50_gp_update_stateobj(struct nv50_context *nv50, struct nv50_program *p)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so = so_new(6, 7, 2);
+
+ nv50_program_validate_code(nv50, p);
+
+ so_method(so, tesla, NV50TCL_GP_ADDRESS_HIGH, 2);
+ so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
+ NOUVEAU_BO_LOW, 0, 0);
+ so_method(so, tesla, NV50TCL_GP_REG_ALLOC_TEMP, 1);
+ so_data (so, p->max_gpr);
+ so_method(so, tesla, NV50TCL_GP_REG_ALLOC_RESULT, 1);
+ so_data (so, p->max_out);
+ so_method(so, tesla, NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE, 1);
+ so_data (so, p->gp.prim_type);
+ so_method(so, tesla, NV50TCL_GP_VERTEX_OUTPUT_COUNT, 1);
+ so_data (so, p->gp.vert_count);
+ so_method(so, tesla, NV50TCL_GP_START_ID, 1);
+ so_data (so, p->code_start);
+
+ so_ref(so, &p->so);
+ so_ref(NULL, &so);
+}
+
+static boolean
+nv50_program_validate(struct nv50_program *p)
+{
+ p->translated = nv50_program_tx(p);
+ assert(p->translated);
+ return p->translated;
+}
+
+struct nouveau_stateobj *
+nv50_vertprog_validate(struct nv50_context *nv50)
+{
+ struct nv50_program *p = nv50->vertprog;
+ struct nouveau_stateobj *so = NULL;
+
+ if (!p->translated) {
+ if (nv50_program_validate(p))
+ nv50_vp_update_stateobj(nv50, p);
+ else
+ return NULL;
+ }
+
+ if (nv50->dirty & NV50_NEW_VERTPROG_CB)
+ nv50_program_validate_data(nv50, p);
+
+ if (!(nv50->dirty & NV50_NEW_VERTPROG))
+ return NULL;
+
+ nv50_program_validate_code(nv50, p);
+
+ so_ref(p->so, &so);
+ return so;
+}
+
+struct nouveau_stateobj *
+nv50_fragprog_validate(struct nv50_context *nv50)
+{
+ struct nv50_program *p = nv50->fragprog;
+ struct nouveau_stateobj *so = NULL;
+
+ if (!p->translated) {
+ if (nv50_program_validate(p))
+ nv50_fp_update_stateobj(nv50, p);
+ else
+ return NULL;
+ }
+
+ if (nv50->dirty & NV50_NEW_FRAGPROG_CB)
+ nv50_program_validate_data(nv50, p);
+
+ if (!(nv50->dirty & NV50_NEW_FRAGPROG))
+ return NULL;
+
+ nv50_program_validate_code(nv50, p);
+
+ so_ref(p->so, &so);
+ return so;
+}
+
+struct nouveau_stateobj *
+nv50_geomprog_validate(struct nv50_context *nv50)
+{
+ struct nv50_program *p = nv50->geomprog;
+ struct nouveau_stateobj *so = NULL;
+
+ if (!p->translated) {
+ if (nv50_program_validate(p))
+ nv50_gp_update_stateobj(nv50, p);
+ else
+ return NULL;
+ }
+
+ if (nv50->dirty & NV50_NEW_GEOMPROG_CB)
+ nv50_program_validate_data(nv50, p);
+
+ if (!(nv50->dirty & NV50_NEW_GEOMPROG))
+ return NULL;
+
+ nv50_program_validate_code(nv50, p);
+
+ so_ref(p->so, &so);
+ return so;
+}
+
+/* XXX: this might not work correctly in all cases yet: we assume that
+ * an FP generic input that is not written in the VP is gl_PointCoord.
+ */
+static uint32_t
+nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned m)
+{
+ struct nv50_program *vp = nv50->vertprog;
+ struct nv50_program *fp = nv50->fragprog;
+ unsigned i, c;
+
+ memset(pntc, 0, 8 * sizeof(uint32_t));
+
+ if (nv50->geomprog)
+ vp = nv50->geomprog;
+
+ for (i = 0; i < fp->in_nr; i++) {
+ unsigned j, n = util_bitcount(fp->in[i].mask);
+
+ if (fp->in[i].sn != TGSI_SEMANTIC_GENERIC) {
+ m += n;
+ continue;
+ }
+
+ for (j = 0; j < vp->out_nr; ++j)
+ if (vp->out[j].sn == fp->in[i].sn && vp->out[j].si == fp->in[i].si)
+ break;
+
+ if (j < vp->out_nr) {
+ ubyte en = nv50->rasterizer->pipe.sprite_coord_enable;
+
+ if (!(en & (1 << vp->out[j].si))) {
+ m += n;
+ continue;
+ }
+ }
+
+ /* this is either PointCoord or replaced by sprite coords */
+ for (c = 0; c < 4; c++) {
+ if (!(fp->in[i].mask & (1 << c)))
+ continue;
+ pntc[m / 8] |= (c + 1) << ((m % 8) * 4);
+ ++m;
+ }
+ }
+ if (nv50->rasterizer->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
+ return 0;
+ return (1 << 4);
+}
+
+static int
+nv50_vec4_map(uint32_t *map32, int mid, uint32_t lin[4],
+ struct nv50_varying *in, struct nv50_varying *out)
+{
+ int c;
+ uint8_t mv = out->mask, mf = in->mask, oid = out->hw;
+ uint8_t *map = (uint8_t *)map32;
+
+ for (c = 0; c < 4; ++c) {
+ if (mf & 1) {
+ if (in->linear)
+ lin[mid / 32] |= 1 << (mid % 32);
+ if (mv & 1)
+ map[mid] = oid;
+ else
+ if (c == 3)
+ map[mid] |= 1;
+ ++mid;
+ }
+
+ oid += mv & 1;
+ mf >>= 1;
+ mv >>= 1;
+ }
+
+ return mid;
+}
+
+struct nouveau_stateobj *
+nv50_fp_linkage_validate(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nv50_program *vp;
+ struct nv50_program *fp = nv50->fragprog;
+ struct nouveau_stateobj *so;
+ struct nv50_varying dummy;
+ int i, n, c, m;
+
+ uint32_t map[16], lin[4], pntc[8];
+
+ uint32_t interp = fp->fp.interp;
+ uint32_t colors = fp->fp.colors;
+ uint32_t clip = 0x04;
+ uint32_t psiz = 0x000;
+ uint32_t primid = 0;
+ uint32_t sysval = 0;
+
+ if (nv50->geomprog) {
+ vp = nv50->geomprog;
+ memset(map, 0x80, sizeof(map));
+ } else {
+ vp = nv50->vertprog;
+ memset(map, 0x40, sizeof(map));
+ }
+ memset(lin, 0, sizeof(lin));
+
+ dummy.linear = 0;
+ dummy.mask = 0xf; /* map all components of HPOS */
+ m = nv50_vec4_map(map, 0, lin, &dummy, &vp->out[0]);
+
+ if (vp->vp.clpd < 0x40) {
+ for (c = 0; c < vp->vp.clpd_nr; ++c) {
+ map[m / 4] |= (vp->vp.clpd + c) << ((m % 4) * 8);
+ ++m;
+ }
+ clip |= vp->vp.clpd_nr << 8;
+ }
+
+ colors |= m << 8; /* adjust BFC0 id */
+
+ /* if light_twoside is active, it seems FFC0_ID == BFC0_ID is bad */
+ if (nv50->rasterizer->pipe.light_twoside) {
+ for (i = 0; i < 2; ++i)
+ m = nv50_vec4_map(map, m, lin,
+ &fp->in[fp->vp.bfc[i]],
+ &vp->out[vp->vp.bfc[i]]);
+ }
+
+ colors += m - 4; /* adjust FFC0 id */
+ interp |= m << 8; /* set mid where 'normal' FP inputs start */
+
+ dummy.mask = 0x0;
+ for (i = 0; i < fp->in_nr; i++) {
+ for (n = 0; n < vp->out_nr; ++n)
+ if (vp->out[n].sn == fp->in[i].sn &&
+ vp->out[n].si == fp->in[i].si)
+ break;
+
+ m = nv50_vec4_map(map, m, lin,
+ &fp->in[i], (n < vp->out_nr) ? &vp->out[n] : &dummy);
+ }
+
+ /* PrimitiveID either is replaced by the system value, or
+ * written by the geometry shader into an output register
+ */
+ if (fp->gp.primid < 0x40) {
+ i = (m % 4) * 8;
+ map[m / 4] = (map[m / 4] & ~(0xff << i)) | (vp->gp.primid << i);
+ primid = m++;
+ }
+
+ if (nv50->rasterizer->pipe.point_size_per_vertex) {
+ i = (m % 4) * 8;
+ map[m / 4] = (map[m / 4] & ~(0xff << i)) | (vp->vp.psiz << i);
+ psiz = (m++ << 4) | 1;
+ }
+
+ /* now fill the stateobj (at most 28 so_data) */
+ so = so_new(10, 54, 0);
+
+ n = (m + 3) / 4;
+ assert(m <= 64);
+ if (vp->type == PIPE_SHADER_GEOMETRY) {
+ so_method(so, tesla, NV50TCL_GP_RESULT_MAP_SIZE, 1);
+ so_data (so, m);
+ so_method(so, tesla, NV50TCL_GP_RESULT_MAP(0), n);
+ so_datap (so, map, n);
+ } else {
+ so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1);
+ so_data (so, vp->vp.attrs[2]);
+
+ so_method(so, tesla, NV50TCL_MAP_SEMANTIC_4, 1);
+ so_data (so, primid);
+
+ so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
+ so_data (so, m);
+ so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n);
+ so_datap (so, map, n);
+ }
+
+ so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4);
+ so_data (so, colors);
+ so_data (so, clip);
+ so_data (so, sysval);
+ so_data (so, psiz);
+
+ so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1);
+ so_data (so, interp);
+
+ so_method(so, tesla, NV50TCL_NOPERSPECTIVE_BITMAP(0), 4);
+ so_datap (so, lin, 4);
+
+ if (nv50->rasterizer->pipe.sprite_coord_enable) {
+ so_method(so, tesla, NV50TCL_POINT_SPRITE_CTRL, 1);
+ so_data (so,
+ nv50_pntc_replace(nv50, pntc, (interp >> 8) & 0xff));
+
+ so_method(so, tesla, NV50TCL_POINT_COORD_REPLACE_MAP(0), 8);
+ so_datap (so, pntc, 8);
+ }
+
+ so_method(so, tesla, NV50TCL_GP_ENABLE, 1);
+ so_data (so, (vp->type == PIPE_SHADER_GEOMETRY) ? 1 : 0);
+
+ return so;
+}
+
+static int
+nv50_vp_gp_mapping(uint32_t *map32, int m,
+ struct nv50_program *vp, struct nv50_program *gp)
+{
+ uint8_t *map = (uint8_t *)map32;
+ int i, j, c;
+
+ for (i = 0; i < gp->in_nr; ++i) {
+ uint8_t oid = 0, mv = 0, mg = gp->in[i].mask;
+
+ for (j = 0; j < vp->out_nr; ++j) {
+ if (vp->out[j].sn == gp->in[i].sn &&
+ vp->out[j].si == gp->in[i].si) {
+ mv = vp->out[j].mask;
+ oid = vp->out[j].hw;
+ break;
+ }
+ }
+
+ for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) {
+ if (mg & mv & 1)
+ map[m++] = oid;
+ else
+ if (mg & 1)
+ map[m++] = (c == 3) ? 0x41 : 0x40;
+ oid += mv & 1;
+ }
+ }
+ return m;
+}
+
+struct nouveau_stateobj *
+nv50_gp_linkage_validate(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so;
+ struct nv50_program *vp = nv50->vertprog;
+ struct nv50_program *gp = nv50->geomprog;
+ uint32_t map[16];
+ int m = 0;
+
+ if (!gp)
+ return NULL;
+ memset(map, 0, sizeof(map));
+
+ m = nv50_vp_gp_mapping(map, m, vp, gp);
+
+ so = so_new(3, 24 - 3, 0);
+
+ so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1);
+ so_data (so, vp->vp.attrs[2] | gp->vp.attrs[2]);
+
+ assert(m <= 32);
+ so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
+ so_data (so, m);
+
+ m = (m + 3) / 4;
+ so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), m);
+ so_datap (so, map, m);
+
+ return so;
+}
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index ec0c0ff283..3afce06557 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -48,6 +48,53 @@ nv50_colormask(unsigned mask)
return cmask;
}
+static INLINE uint32_t
+nv50_blend_func(unsigned factor)
+{
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ZERO:
+ return NV50TCL_BLEND_FUNC_SRC_RGB_ZERO;
+ case PIPE_BLENDFACTOR_ONE:
+ return NV50TCL_BLEND_FUNC_SRC_RGB_ONE;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return NV50TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return NV50TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return NV50TCL_BLEND_FUNC_SRC_RGB_DST_COLOR;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA;
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ return NV50TCL_BLEND_FUNC_SRC_RGB_SRC1_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_COLOR;
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ return NV50TCL_BLEND_FUNC_SRC_RGB_SRC1_ALPHA;
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ return NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC1_ALPHA;
+ default:
+ return NV50TCL_BLEND_FUNC_SRC_RGB_ZERO;
+ }
+}
+
static void *
nv50_blend_state_create(struct pipe_context *pipe,
const struct pipe_blend_state *cso)
@@ -80,12 +127,12 @@ nv50_blend_state_create(struct pipe_context *pipe,
if (blend_enabled) {
so_method(so, tesla, NV50TCL_BLEND_EQUATION_RGB, 5);
so_data (so, nvgl_blend_eqn(cso->rt[0].rgb_func));
- so_data (so, 0x4000 | nvgl_blend_func(cso->rt[0].rgb_src_factor));
- so_data (so, 0x4000 | nvgl_blend_func(cso->rt[0].rgb_dst_factor));
+ so_data (so, nv50_blend_func(cso->rt[0].rgb_src_factor));
+ so_data (so, nv50_blend_func(cso->rt[0].rgb_dst_factor));
so_data (so, nvgl_blend_eqn(cso->rt[0].alpha_func));
- so_data (so, 0x4000 | nvgl_blend_func(cso->rt[0].alpha_src_factor));
+ so_data (so, nv50_blend_func(cso->rt[0].alpha_src_factor));
so_method(so, tesla, NV50TCL_BLEND_FUNC_DST_ALPHA, 1);
- so_data (so, 0x4000 | nvgl_blend_func(cso->rt[0].alpha_dst_factor));
+ so_data (so, nv50_blend_func(cso->rt[0].alpha_dst_factor));
}
if (cso->logicop_enable == 0 ) {
@@ -546,7 +593,6 @@ nv50_vp_state_create(struct pipe_context *pipe,
p->pipe.tokens = tgsi_dup_tokens(cso->tokens);
p->type = PIPE_SHADER_VERTEX;
- tgsi_scan_shader(p->pipe.tokens, &p->info);
return (void *)p;
}
@@ -578,7 +624,6 @@ nv50_fp_state_create(struct pipe_context *pipe,
p->pipe.tokens = tgsi_dup_tokens(cso->tokens);
p->type = PIPE_SHADER_FRAGMENT;
- tgsi_scan_shader(p->pipe.tokens, &p->info);
return (void *)p;
}
@@ -610,7 +655,6 @@ nv50_gp_state_create(struct pipe_context *pipe,
p->pipe.tokens = tgsi_dup_tokens(cso->tokens);
p->type = PIPE_SHADER_GEOMETRY;
- tgsi_scan_shader(p->pipe.tokens, &p->info);
return (void *)p;
}
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 524696f35d..f1d8202dff 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -56,6 +56,8 @@ validate_fb(struct nv50_context *nv50)
assert(h == fb->cbufs[i]->height);
}
+ assert(nv50_format_table[fb->cbufs[i]->format].rt);
+
so_method(so, tesla, NV50TCL_RT_HORIZ(i), 2);
so_data (so, fb->cbufs[i]->width);
so_data (so, fb->cbufs[i]->height);
@@ -65,39 +67,9 @@ validate_fb(struct nv50_context *nv50)
NOUVEAU_BO_HIGH | NOUVEAU_BO_RDWR, 0, 0);
so_reloc (so, bo, fb->cbufs[i]->offset, NOUVEAU_BO_VRAM |
NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0);
- switch (fb->cbufs[i]->format) {
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- so_data(so, NV50TCL_RT_FORMAT_A8R8G8B8_UNORM);
- break;
- case PIPE_FORMAT_B8G8R8X8_UNORM:
- so_data(so, NV50TCL_RT_FORMAT_X8R8G8B8_UNORM);
- break;
- case PIPE_FORMAT_B5G6R5_UNORM:
- so_data(so, NV50TCL_RT_FORMAT_R5G6B5_UNORM);
- break;
- case PIPE_FORMAT_R16G16B16A16_SNORM:
- so_data(so, NV50TCL_RT_FORMAT_R16G16B16A16_SNORM);
- break;
- case PIPE_FORMAT_R16G16B16A16_UNORM:
- so_data(so, NV50TCL_RT_FORMAT_R16G16B16A16_UNORM);
- break;
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- so_data(so, NV50TCL_RT_FORMAT_R32G32B32A32_FLOAT);
- break;
- case PIPE_FORMAT_R16G16_SNORM:
- so_data(so, NV50TCL_RT_FORMAT_R16G16_SNORM);
- break;
- case PIPE_FORMAT_R16G16_UNORM:
- so_data(so, NV50TCL_RT_FORMAT_R16G16_UNORM);
- break;
- default:
- NOUVEAU_ERR("AIIII unknown format %s\n",
- util_format_name(fb->cbufs[i]->format));
- so_data(so, NV50TCL_RT_FORMAT_X8R8G8B8_UNORM);
- break;
- }
- so_data(so, nv50_miptree(pt)->
- level[fb->cbufs[i]->level].tile_mode << 4);
+ so_data (so, nv50_format_table[fb->cbufs[i]->format].rt);
+ so_data (so, nv50_miptree(pt)->
+ level[fb->cbufs[i]->level].tile_mode << 4);
so_data(so, 0x00000000);
so_method(so, tesla, NV50TCL_RT_ARRAY_MODE, 1);
@@ -117,33 +89,17 @@ validate_fb(struct nv50_context *nv50)
assert(h == fb->zsbuf->height);
}
+ assert(nv50_format_table[fb->zsbuf->format].rt);
+
so_method(so, tesla, NV50TCL_ZETA_ADDRESS_HIGH, 5);
so_reloc (so, bo, fb->zsbuf->offset, NOUVEAU_BO_VRAM |
NOUVEAU_BO_HIGH | NOUVEAU_BO_RDWR, 0, 0);
so_reloc (so, bo, fb->zsbuf->offset, NOUVEAU_BO_VRAM |
NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0);
- switch (fb->zsbuf->format) {
- case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM);
- break;
- case PIPE_FORMAT_Z24X8_UNORM:
- so_data(so, NV50TCL_ZETA_FORMAT_X8Z24_UNORM);
- break;
- case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
- so_data(so, NV50TCL_ZETA_FORMAT_Z24S8_UNORM);
- break;
- case PIPE_FORMAT_Z32_FLOAT:
- so_data(so, NV50TCL_ZETA_FORMAT_Z32_FLOAT);
- break;
- default:
- NOUVEAU_ERR("AIIII unknown format %s\n",
- util_format_name(fb->zsbuf->format));
- so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM);
- break;
- }
- so_data(so, nv50_miptree(pt)->
- level[fb->zsbuf->level].tile_mode << 4);
- so_data(so, 0x00000000);
+ so_data (so, nv50_format_table[fb->zsbuf->format].rt);
+ so_data (so, nv50_miptree(pt)->
+ level[fb->zsbuf->level].tile_mode << 4);
+ so_data (so, 0x00000000);
so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1);
so_data (so, 1);
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index 5ea0c1d726..5535818370 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -29,56 +29,6 @@
#include "util/u_format.h"
-#define _MIXED(pf, t0, t1, t2, t3, cr, cg, cb, ca, f) \
-[PIPE_FORMAT_##pf] = ( \
- NV50TIC_0_0_MAPR_##cr | NV50TIC_0_0_TYPER_##t0 | \
- NV50TIC_0_0_MAPG_##cg | NV50TIC_0_0_TYPEG_##t1 | \
- NV50TIC_0_0_MAPB_##cb | NV50TIC_0_0_TYPEB_##t2 | \
- NV50TIC_0_0_MAPA_##ca | NV50TIC_0_0_TYPEA_##t3 | \
- NV50TIC_0_0_FMT_##f)
-
-#define _(pf, t, cr, cg, cb, ca, f) _MIXED(pf, t, t, t, t, cr, cg, cb, ca, f)
-
-static const uint32_t nv50_texture_formats[PIPE_FORMAT_COUNT] =
-{
- _(B8G8R8A8_UNORM, UNORM, C2, C1, C0, C3, 8_8_8_8),
- _(B8G8R8A8_SRGB, UNORM, C2, C1, C0, C3, 8_8_8_8),
- _(B8G8R8X8_UNORM, UNORM, C2, C1, C0, ONE, 8_8_8_8),
- _(B8G8R8X8_SRGB, UNORM, C2, C1, C0, ONE, 8_8_8_8),
- _(B5G5R5A1_UNORM, UNORM, C2, C1, C0, C3, 1_5_5_5),
- _(B4G4R4A4_UNORM, UNORM, C2, C1, C0, C3, 4_4_4_4),
-
- _(B5G6R5_UNORM, UNORM, C2, C1, C0, ONE, 5_6_5),
-
- _(L8_UNORM, UNORM, C0, C0, C0, ONE, 8),
- _(L8_SRGB, UNORM, C0, C0, C0, ONE, 8),
- _(A8_UNORM, UNORM, ZERO, ZERO, ZERO, C0, 8),
- _(I8_UNORM, UNORM, C0, C0, C0, C0, 8),
-
- _(L8A8_UNORM, UNORM, C0, C0, C0, C1, 8_8),
- _(L8A8_SRGB, UNORM, C0, C0, C0, C1, 8_8),
-
- _(DXT1_RGB, UNORM, C0, C1, C2, ONE, DXT1),
- _(DXT1_RGBA, UNORM, C0, C1, C2, C3, DXT1),
- _(DXT3_RGBA, UNORM, C0, C1, C2, C3, DXT3),
- _(DXT5_RGBA, UNORM, C0, C1, C2, C3, DXT5),
-
- _MIXED(S8_USCALED_Z24_UNORM, UINT, UNORM, UINT, UINT, C1, C1, C1, ONE, 24_8),
- _MIXED(Z24_UNORM_S8_USCALED, UNORM, UINT, UINT, UINT, C0, C0, C0, ONE, 8_24),
-
- _(R16G16B16A16_SNORM, UNORM, C0, C1, C2, C3, 16_16_16_16),
- _(R16G16B16A16_UNORM, SNORM, C0, C1, C2, C3, 16_16_16_16),
- _(R32G32B32A32_FLOAT, FLOAT, C0, C1, C2, C3, 32_32_32_32),
-
- _(R16G16_SNORM, SNORM, C0, C1, ZERO, ONE, 16_16),
- _(R16G16_UNORM, UNORM, C0, C1, ZERO, ONE, 16_16),
-
- _MIXED(Z32_FLOAT, FLOAT, UINT, UINT, UINT, C0, C0, C0, ONE, 32_DEPTH)
-};
-
-#undef _
-#undef _MIXED
-
static INLINE uint32_t
nv50_tic_swizzle(uint32_t tc, unsigned swz)
{
@@ -106,7 +56,7 @@ nv50_tex_construct(struct nv50_sampler_view *view)
struct nv50_miptree *mt = nv50_miptree(view->pipe.texture);
uint32_t swz[4], *tic = view->tic;
- tic[0] = nv50_texture_formats[view->pipe.format];
+ tic[0] = nv50_format_table[view->pipe.format].tic;
swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r);
swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g);
diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h
index 3475d3e432..b4939943e8 100644
--- a/src/gallium/drivers/nv50/nv50_texture.h
+++ b/src/gallium/drivers/nv50/nv50_texture.h
@@ -45,24 +45,32 @@
#define NV50TIC_0_0_TYPEA_SNORM 0x00008000
#define NV50TIC_0_0_TYPEA_SINT 0x00018000
#define NV50TIC_0_0_TYPEA_UINT 0x00020000
+#define NV50TIC_0_0_TYPEA_SSCALED 0x00028000
+#define NV50TIC_0_0_TYPEA_USCALED 0x00030000
#define NV50TIC_0_0_TYPEA_FLOAT 0x00038000
#define NV50TIC_0_0_TYPEB_MASK 0x00007000
#define NV50TIC_0_0_TYPEB_UNORM 0x00002000
#define NV50TIC_0_0_TYPEB_SNORM 0x00001000
#define NV50TIC_0_0_TYPEB_SINT 0x00003000
#define NV50TIC_0_0_TYPEB_UINT 0x00004000
+#define NV50TIC_0_0_TYPEB_SSCALED 0x00005000
+#define NV50TIC_0_0_TYPEB_USCALED 0x00006000
#define NV50TIC_0_0_TYPEB_FLOAT 0x00007000
#define NV50TIC_0_0_TYPEG_MASK 0x00000e00
#define NV50TIC_0_0_TYPEG_UNORM 0x00000400
#define NV50TIC_0_0_TYPEG_SNORM 0x00000200
#define NV50TIC_0_0_TYPEG_SINT 0x00000600
#define NV50TIC_0_0_TYPEG_UINT 0x00000800
+#define NV50TIC_0_0_TYPEG_SSCALED 0x00000a00
+#define NV50TIC_0_0_TYPEG_USCALED 0x00000c00
#define NV50TIC_0_0_TYPEG_FLOAT 0x00000e00
#define NV50TIC_0_0_TYPER_MASK 0x000001c0
#define NV50TIC_0_0_TYPER_UNORM 0x00000080
#define NV50TIC_0_0_TYPER_SNORM 0x00000040
#define NV50TIC_0_0_TYPER_SINT 0x000000c0
#define NV50TIC_0_0_TYPER_UINT 0x00000100
+#define NV50TIC_0_0_TYPER_SSCALED 0x00000140
+#define NV50TIC_0_0_TYPER_USCALED 0x00000180
#define NV50TIC_0_0_TYPER_FLOAT 0x000001c0
#define NV50TIC_0_0_FMT_MASK 0x0000003f
#define NV50TIC_0_0_FMT_32_32_32_32 0x00000001
@@ -90,6 +98,7 @@
#define NV50TIC_0_0_FMT_8_24 0x0000002a
#define NV50TIC_0_0_FMT_32_DEPTH 0x0000002f
#define NV50TIC_0_0_FMT_32_8 0x00000030
+#define NV50TIC_0_0_FMT_16_DEPTH 0x0000003a
#define NV50TIC_0_1_OFFSET_LOW_MASK 0xffffffff
#define NV50TIC_0_1_OFFSET_LOW_SHIFT 0
diff --git a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
new file mode 100644
index 0000000000..dafff725b8
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
@@ -0,0 +1,1661 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* XXX: need to clean this up so we get the typecasting right more naturally */
+
+#include <unistd.h>
+
+#include "nv50_context.h"
+#include "nv50_pc.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+
+#include "util/u_simple_list.h"
+#include "tgsi/tgsi_dump.h"
+
+#define BLD_MAX_TEMPS 64
+#define BLD_MAX_ADDRS 4
+#define BLD_MAX_PREDS 4
+#define BLD_MAX_IMMDS 128
+
+#define BLD_MAX_COND_NESTING 4
+#define BLD_MAX_LOOP_NESTING 4
+#define BLD_MAX_CALL_NESTING 2
+
+/* collects all values assigned to the same TGSI register */
+struct bld_value_stack {
+ struct nv_value *top;
+ struct nv_value **body;
+ unsigned size;
+ uint16_t loop_use; /* 1 bit per loop level, indicates if used/defd */
+ uint16_t loop_def;
+};
+
+static INLINE void
+bld_vals_push_val(struct bld_value_stack *stk, struct nv_value *val)
+{
+ assert(!stk->size || (stk->body[stk->size - 1] != val));
+
+ if (!(stk->size % 8)) {
+ unsigned old_sz = (stk->size + 0) * sizeof(struct nv_value *);
+ unsigned new_sz = (stk->size + 8) * sizeof(struct nv_value *);
+ stk->body = (struct nv_value **)REALLOC(stk->body, old_sz, new_sz);
+ }
+ stk->body[stk->size++] = val;
+}
+
+static INLINE boolean
+bld_vals_del_val(struct bld_value_stack *stk, struct nv_value *val)
+{
+ unsigned i;
+
+ for (i = stk->size - 1; i >= 0; --i)
+ if (stk->body[i] == val)
+ break;
+ if (i < 0)
+ return FALSE;
+
+ if (i != stk->size - 1)
+ stk->body[i] = stk->body[stk->size - 1];
+
+ --stk->size; /* XXX: old size in REALLOC */
+ return TRUE;
+}
+
+static INLINE void
+bld_vals_push(struct bld_value_stack *stk)
+{
+ bld_vals_push_val(stk, stk->top);
+ stk->top = NULL;
+}
+
+static INLINE void
+bld_push_values(struct bld_value_stack *stacks, int n)
+{
+ int i, c;
+
+ for (i = 0; i < n; ++i)
+ for (c = 0; c < 4; ++c)
+ if (stacks[i * 4 + c].top)
+ bld_vals_push(&stacks[i * 4 + c]);
+}
+
+struct bld_context {
+ struct nv50_translation_info *ti;
+
+ struct nv_pc *pc;
+ struct nv_basic_block *b;
+
+ struct tgsi_parse_context parse[BLD_MAX_CALL_NESTING];
+ int call_lvl;
+
+ struct nv_basic_block *cond_bb[BLD_MAX_COND_NESTING];
+ struct nv_basic_block *join_bb[BLD_MAX_COND_NESTING];
+ struct nv_basic_block *else_bb[BLD_MAX_COND_NESTING];
+ int cond_lvl;
+ struct nv_basic_block *loop_bb[BLD_MAX_LOOP_NESTING];
+ struct nv_basic_block *brkt_bb[BLD_MAX_LOOP_NESTING];
+ int loop_lvl;
+
+ struct bld_value_stack tvs[BLD_MAX_TEMPS][4]; /* TGSI_FILE_TEMPORARY */
+ struct bld_value_stack avs[BLD_MAX_ADDRS][4]; /* TGSI_FILE_ADDRESS */
+ struct bld_value_stack pvs[BLD_MAX_PREDS][4]; /* TGSI_FILE_PREDICATE */
+ struct bld_value_stack ovs[PIPE_MAX_SHADER_OUTPUTS][4];
+
+ uint32_t outputs_written[(PIPE_MAX_SHADER_OUTPUTS + 31) / 32];
+
+ struct nv_value *frgcrd[4];
+ struct nv_value *sysval[4];
+
+ /* wipe on new BB */
+ struct nv_value *saved_addr[4][2];
+ struct nv_value *saved_inputs[128];
+ struct nv_value *saved_immd[BLD_MAX_IMMDS];
+ uint num_immds;
+};
+
+static INLINE ubyte
+bld_stack_file(struct bld_context *bld, struct bld_value_stack *stk)
+{
+ if (stk < &bld->avs[0][0])
+ return NV_FILE_GPR;
+ else
+ if (stk < &bld->pvs[0][0])
+ return NV_FILE_ADDR;
+ else
+ if (stk < &bld->ovs[0][0])
+ return NV_FILE_FLAGS;
+ else
+ return NV_FILE_OUT;
+}
+
+static INLINE struct nv_value *
+bld_fetch(struct bld_context *bld, struct bld_value_stack *stk, int i, int c)
+{
+ stk[i * 4 + c].loop_use |= 1 << bld->loop_lvl;
+
+ return stk[i * 4 + c].top;
+}
+
+static struct nv_value *
+bld_loop_phi(struct bld_context *, struct bld_value_stack *, struct nv_value *);
+
+/* If a variable is defined in a loop without prior use, we don't need
+ * a phi in the loop header to account for backwards flow.
+ *
+ * However, if this variable is then also used outside the loop, we do
+ * need a phi after all. But we must not use this phi's def inside the
+ * loop, so we can eliminate the phi if it is unused later.
+ */
+static INLINE void
+bld_store(struct bld_context *bld, struct bld_value_stack *stk, int i, int c,
+ struct nv_value *val)
+{
+ const uint16_t m = 1 << bld->loop_lvl;
+
+ stk = &stk[i * 4 + c];
+
+ if (bld->loop_lvl && !(m & (stk->loop_def | stk->loop_use)))
+ bld_loop_phi(bld, stk, val);
+
+ stk->top = val;
+ stk->loop_def |= 1 << bld->loop_lvl;
+}
+
+static INLINE void
+bld_clear_def_use(struct bld_value_stack *stk, int n, int lvl)
+{
+ int i;
+ const uint16_t mask = ~(1 << lvl);
+
+ for (i = 0; i < n * 4; ++i) {
+ stk[i].loop_def &= mask;
+ stk[i].loop_use &= mask;
+ }
+}
+
+#define FETCH_TEMP(i, c) bld_fetch(bld, &bld->tvs[0][0], i, c)
+#define STORE_TEMP(i, c, v) bld_store(bld, &bld->tvs[0][0], i, c, (v))
+#define FETCH_ADDR(i, c) bld_fetch(bld, &bld->avs[0][0], i, c)
+#define STORE_ADDR(i, c, v) bld_store(bld, &bld->avs[0][0], i, c, (v))
+#define FETCH_PRED(i, c) bld_fetch(bld, &bld->pvs[0][0], i, c)
+#define STORE_PRED(i, c, v) bld_store(bld, &bld->pvs[0][0], i, c, (v))
+
+#define STORE_OUTR(i, c, v) \
+ do { \
+ bld->ovs[i][c].top = (v); \
+ bld->outputs_written[(i) / 8] |= 1 << (((i) * 4 + (c)) % 32); \
+ } while (0)
+
+static INLINE void
+bld_warn_uninitialized(struct bld_context *bld, int kind,
+ struct bld_value_stack *stk, struct nv_basic_block *b)
+{
+ long i = (stk - &bld->tvs[0][0]) / 4;
+ long c = (stk - &bld->tvs[0][0]) & 3;
+
+ if (c == 3)
+ c = -1;
+
+ debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n",
+ i, (int)('x' + c), kind ? "may be" : "is", b->id);
+}
+
+static INLINE struct nv_value *
+bld_def(struct nv_instruction *i, int c, struct nv_value *value)
+{
+ i->def[c] = value;
+ value->insn = i;
+ return value;
+}
+
+static INLINE struct nv_value *
+find_by_bb(struct bld_value_stack *stack, struct nv_basic_block *b)
+{
+ int i;
+
+ if (stack->top && stack->top->insn->bb == b)
+ return stack->top;
+
+ for (i = stack->size - 1; i >= 0; --i)
+ if (stack->body[i]->insn->bb == b)
+ return stack->body[i];
+ return NULL;
+}
+
+/* fetch value from stack that was defined in the specified basic block,
+ * or search for first definitions in all of its predecessors
+ */
+static void
+fetch_by_bb(struct bld_value_stack *stack,
+ struct nv_value **vals, int *n,
+ struct nv_basic_block *b)
+{
+ int i;
+ struct nv_value *val;
+
+ assert(*n < 16); /* MAX_COND_NESTING */
+
+ val = find_by_bb(stack, b);
+ if (val) {
+ for (i = 0; i < *n; ++i)
+ if (vals[i] == val)
+ return;
+ vals[(*n)++] = val;
+ return;
+ }
+ for (i = 0; i < b->num_in; ++i)
+ if (b->in_kind[i] != CFG_EDGE_BACK)
+ fetch_by_bb(stack, vals, n, b->in[i]);
+}
+
+static INLINE struct nv_value *
+bld_load_imm_u32(struct bld_context *bld, uint32_t u);
+
+static INLINE struct nv_value *
+bld_undef(struct bld_context *bld, ubyte file)
+{
+ struct nv_instruction *nvi = new_instruction(bld->pc, NV_OP_UNDEF);
+
+ return bld_def(nvi, 0, new_value(bld->pc, file, NV_TYPE_U32));
+}
+
+static struct nv_value *
+bld_phi(struct bld_context *bld, struct nv_basic_block *b,
+ struct bld_value_stack *stack)
+{
+ struct nv_basic_block *in;
+ struct nv_value *vals[16], *val;
+ struct nv_instruction *phi;
+ int i, j, n;
+
+ do {
+ i = n = 0;
+ fetch_by_bb(stack, vals, &n, b);
+
+ if (!n) {
+ bld_warn_uninitialized(bld, 0, stack, b);
+ return NULL;
+ }
+
+ if (n == 1) {
+ if (nvbb_dominated_by(b, vals[0]->insn->bb))
+ break;
+
+ bld_warn_uninitialized(bld, 1, stack, b);
+
+ /* back-tracking to insert missing value of other path */
+ in = b;
+ while (in->in[0]) {
+ if (in->num_in == 1) {
+ in = in->in[0];
+ } else {
+ if (!nvbb_reachable_by(in->in[0], vals[0]->insn->bb, b))
+ in = in->in[0];
+ else
+ if (!nvbb_reachable_by(in->in[1], vals[0]->insn->bb, b))
+ in = in->in[1];
+ else
+ in = in->in[0];
+ }
+ }
+ bld->pc->current_block = in;
+
+ /* should make this a no-op */
+ bld_vals_push_val(stack, bld_undef(bld, vals[0]->reg.file));
+ continue;
+ }
+
+ for (i = 0; i < n; ++i) {
+ /* if value dominates b, continue to the redefinitions */
+ if (nvbb_dominated_by(b, vals[i]->insn->bb))
+ continue;
+
+ /* if value dominates any in-block, b should be the dom frontier */
+ for (j = 0; j < b->num_in; ++j)
+ if (nvbb_dominated_by(b->in[j], vals[i]->insn->bb))
+ break;
+ /* otherwise, find the dominance frontier and put the phi there */
+ if (j == b->num_in) {
+ in = nvbb_dom_frontier(vals[i]->insn->bb);
+ val = bld_phi(bld, in, stack);
+ bld_vals_push_val(stack, val);
+ break;
+ }
+ }
+ } while(i < n);
+
+ bld->pc->current_block = b;
+
+ if (n == 1)
+ return vals[0];
+
+ phi = new_instruction(bld->pc, NV_OP_PHI);
+
+ bld_def(phi, 0, new_value(bld->pc, vals[0]->reg.file, vals[0]->reg.type));
+ for (i = 0; i < n; ++i)
+ phi->src[i] = new_ref(bld->pc, vals[i]);
+
+ return phi->def[0];
+}
+
+static struct nv_value *
+bld_loop_phi(struct bld_context *bld, struct bld_value_stack *stack,
+ struct nv_value *def)
+{
+ struct nv_basic_block *bb = bld->pc->current_block;
+ struct nv_instruction *phi;
+ struct nv_value *val;
+
+ val = bld_phi(bld, bld->pc->current_block, stack);
+ if (!val) {
+ bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]->in[0];
+
+ val = bld_undef(bld, bld_stack_file(bld, stack));
+ }
+
+ bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1];
+
+ phi = new_instruction(bld->pc, NV_OP_PHI);
+
+ bld_def(phi, 0, new_value_like(bld->pc, val));
+ if (!def)
+ def = phi->def[0];
+
+ bld_vals_push_val(stack, phi->def[0]);
+
+ phi->target = (struct nv_basic_block *)stack; /* cheat */
+
+ nv_reference(bld->pc, &phi->src[0], val);
+ nv_reference(bld->pc, &phi->src[1], def);
+
+ bld->pc->current_block = bb;
+
+ return phi->def[0];
+}
+
+static INLINE struct nv_value *
+bld_fetch_global(struct bld_context *bld, struct bld_value_stack *stack)
+{
+ const uint16_t m = 1 << bld->loop_lvl;
+ const uint16_t use = stack->loop_use;
+
+ stack->loop_use |= m;
+
+ /* If neither used nor def'd inside the loop, build a phi in foresight,
+ * so we don't have to replace stuff later on, which requires tracking.
+ */
+ if (bld->loop_lvl && !((use | stack->loop_def) & m))
+ return bld_loop_phi(bld, stack, NULL);
+
+ return bld_phi(bld, bld->pc->current_block, stack);
+}
+
+static INLINE struct nv_value *
+bld_imm_u32(struct bld_context *bld, uint32_t u)
+{
+ int i;
+ unsigned n = bld->num_immds;
+
+ for (i = 0; i < n; ++i)
+ if (bld->saved_immd[i]->reg.imm.u32 == u)
+ return bld->saved_immd[i];
+ assert(n < BLD_MAX_IMMDS);
+
+ bld->num_immds++;
+
+ bld->saved_immd[n] = new_value(bld->pc, NV_FILE_IMM, NV_TYPE_U32);
+ bld->saved_immd[n]->reg.imm.u32 = u;
+ return bld->saved_immd[n];
+}
+
+static void
+bld_replace_value(struct nv_pc *, struct nv_basic_block *, struct nv_value *,
+ struct nv_value *);
+
+/* Replace the source of the phi in the loop header by the last assignment,
+ * or eliminate the phi function if there is no assignment inside the loop.
+ *
+ * Redundancy situation 1 - (used) but (not redefined) value:
+ * %3 = phi %0, %3 = %3 is used
+ * %3 = phi %0, %4 = is new definition
+ *
+ * Redundancy situation 2 - (not used) but (redefined) value:
+ * %3 = phi %0, %2 = %2 is used, %3 could be used outside, deleted by DCE
+ */
+static void
+bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb)
+{
+ struct nv_instruction *phi, *next;
+ struct nv_value *val;
+ struct bld_value_stack *stk;
+ int s;
+
+ for (phi = bb->phi; phi && phi->opcode == NV_OP_PHI; phi = next) {
+ next = phi->next;
+
+ stk = (struct bld_value_stack *)phi->target;
+ phi->target = NULL;
+
+ val = bld_fetch_global(bld, stk);
+
+ nv_reference(bld->pc, &phi->src[1], val);
+
+ s = -1;
+ if (phi->src[0]->value == phi->def[0] ||
+ phi->src[0]->value == phi->src[1]->value)
+ s = 1;
+ else
+ if (phi->src[1]->value == phi->def[0])
+ s = 0;
+
+ if (s >= 0) {
+ bld_vals_del_val(stk, phi->def[0]);
+
+ ++bld->pc->pass_seq;
+ bld_replace_value(bld->pc, bb, phi->def[0], phi->src[s]->value);
+
+ nv_nvi_delete(phi);
+ }
+ }
+}
+
+static INLINE struct nv_value *
+bld_imm_f32(struct bld_context *bld, float f)
+{
+ return bld_imm_u32(bld, fui(f));
+}
+
+#define SET_TYPE(v, t) ((v)->reg.type = NV_TYPE_##t)
+
+static struct nv_value *
+bld_insn_1(struct bld_context *bld, uint opcode, struct nv_value *src0)
+{
+ struct nv_instruction *insn = new_instruction(bld->pc, opcode);
+ assert(insn);
+
+ nv_reference(bld->pc, &insn->src[0], src0); /* NOTE: new_ref would suffice */
+
+ return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.type));
+}
+
+static struct nv_value *
+bld_insn_2(struct bld_context *bld, uint opcode,
+ struct nv_value *src0, struct nv_value *src1)
+{
+ struct nv_instruction *insn = new_instruction(bld->pc, opcode);
+
+ nv_reference(bld->pc, &insn->src[0], src0);
+ nv_reference(bld->pc, &insn->src[1], src1);
+
+ return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.type));
+}
+
+static struct nv_value *
+bld_insn_3(struct bld_context *bld, uint opcode,
+ struct nv_value *src0, struct nv_value *src1,
+ struct nv_value *src2)
+{
+ struct nv_instruction *insn = new_instruction(bld->pc, opcode);
+
+ nv_reference(bld->pc, &insn->src[0], src0);
+ nv_reference(bld->pc, &insn->src[1], src1);
+ nv_reference(bld->pc, &insn->src[2], src2);
+
+ return bld_def(insn, 0, new_value(bld->pc, NV_FILE_GPR, src0->reg.type));
+}
+
+#define BLD_INSN_1_EX(d, op, dt, s0, s0t) \
+ do { \
+ (d) = bld_insn_1(bld, (NV_OP_##op), (s0)); \
+ (d)->reg.type = NV_TYPE_##dt; \
+ (d)->insn->src[0]->typecast = NV_TYPE_##s0t; \
+ } while(0)
+
+#define BLD_INSN_2_EX(d, op, dt, s0, s0t, s1, s1t) \
+ do { \
+ (d) = bld_insn_2(bld, (NV_OP_##op), (s0), (s1)); \
+ (d)->reg.type = NV_TYPE_##dt; \
+ (d)->insn->src[0]->typecast = NV_TYPE_##s0t; \
+ (d)->insn->src[1]->typecast = NV_TYPE_##s1t; \
+ } while(0)
+
+static struct nv_value *
+bld_pow(struct bld_context *bld, struct nv_value *x, struct nv_value *e)
+{
+ struct nv_value *val;
+
+ BLD_INSN_1_EX(val, LG2, F32, x, F32);
+ BLD_INSN_2_EX(val, MUL, F32, e, F32, val, F32);
+ val = bld_insn_1(bld, NV_OP_PREEX2, val);
+ val = bld_insn_1(bld, NV_OP_EX2, val);
+
+ return val;
+}
+
+static INLINE struct nv_value *
+bld_load_imm_f32(struct bld_context *bld, float f)
+{
+ return bld_insn_1(bld, NV_OP_MOV, bld_imm_f32(bld, f));
+}
+
+static INLINE struct nv_value *
+bld_load_imm_u32(struct bld_context *bld, uint32_t u)
+{
+ return bld_insn_1(bld, NV_OP_MOV, bld_imm_u32(bld, u));
+}
+
+static struct nv_value *
+bld_get_address(struct bld_context *bld, int id, struct nv_value *indirect)
+{
+ int i;
+ struct nv_instruction *nvi;
+
+ for (i = 0; i < 4; ++i) {
+ if (!bld->saved_addr[i][0])
+ break;
+ if (bld->saved_addr[i][1] == indirect) {
+ nvi = bld->saved_addr[i][0]->insn;
+ if (nvi->src[0]->value->reg.imm.u32 == id)
+ return bld->saved_addr[i][0];
+ }
+ }
+ i &= 3;
+
+ bld->saved_addr[i][0] = bld_load_imm_u32(bld, id);
+ bld->saved_addr[i][0]->reg.file = NV_FILE_ADDR;
+ bld->saved_addr[i][1] = indirect;
+ return bld->saved_addr[i][0];
+}
+
+
+static struct nv_value *
+bld_predicate(struct bld_context *bld, struct nv_value *src, boolean bool_only)
+{
+ struct nv_instruction *nvi = src->insn;
+
+ if (nvi->opcode == NV_OP_LDA ||
+ nvi->opcode == NV_OP_PHI ||
+ nvi->bb != bld->pc->current_block) {
+ nvi = new_instruction(bld->pc, NV_OP_CVT);
+ nv_reference(bld->pc, &nvi->src[0], src);
+ } else
+ if (bool_only) {
+ while (nvi->opcode == NV_OP_ABS || nvi->opcode == NV_OP_CVT ||
+ nvi->opcode == NV_OP_NEG) {
+ /* TGSI SET gets conversion to f32, we only need source 0/~0 */
+ if (!nvi->def[0]->insn->flags_src)
+ nvi = nvi->src[0]->value->insn;
+ }
+ }
+
+ if (!nvi->flags_def) {
+ nvi->flags_def = new_value(bld->pc, NV_FILE_FLAGS, NV_TYPE_U16);
+ nvi->flags_def->insn = nvi;
+ }
+ return nvi->flags_def;
+}
+
+static void
+bld_kil(struct bld_context *bld, struct nv_value *src)
+{
+ struct nv_instruction *nvi;
+
+ src = bld_predicate(bld, src, FALSE);
+ nvi = new_instruction(bld->pc, NV_OP_KIL);
+ nvi->fixed = 1;
+ nvi->flags_src = new_ref(bld->pc, src);
+ nvi->cc = NV_CC_LT;
+}
+
+static void
+bld_flow(struct bld_context *bld, uint opcode, ubyte cc,
+ struct nv_value *src, struct nv_basic_block *target,
+ boolean plan_reconverge)
+{
+ struct nv_instruction *nvi;
+
+ if (plan_reconverge)
+ new_instruction(bld->pc, NV_OP_JOINAT)->fixed = 1;
+
+ nvi = new_instruction(bld->pc, opcode);
+ nvi->is_terminator = 1;
+ nvi->cc = cc;
+ nvi->target = target;
+ if (src)
+ nvi->flags_src = new_ref(bld->pc, src);
+}
+
+static ubyte
+translate_setcc(unsigned opcode)
+{
+ switch (opcode) {
+ case TGSI_OPCODE_SLT: return NV_CC_LT;
+ case TGSI_OPCODE_SGE: return NV_CC_GE;
+ case TGSI_OPCODE_SEQ: return NV_CC_EQ;
+ case TGSI_OPCODE_SGT: return NV_CC_GT;
+ case TGSI_OPCODE_SLE: return NV_CC_LE;
+ case TGSI_OPCODE_SNE: return NV_CC_NE | NV_CC_U;
+ case TGSI_OPCODE_STR: return NV_CC_TR;
+ case TGSI_OPCODE_SFL: return NV_CC_FL;
+
+ case TGSI_OPCODE_ISLT: return NV_CC_LT;
+ case TGSI_OPCODE_ISGE: return NV_CC_GE;
+ case TGSI_OPCODE_USEQ: return NV_CC_EQ;
+ case TGSI_OPCODE_USGE: return NV_CC_GE;
+ case TGSI_OPCODE_USLT: return NV_CC_LT;
+ case TGSI_OPCODE_USNE: return NV_CC_NE;
+ default:
+ assert(0);
+ return NV_CC_FL;
+ }
+}
+
+static uint
+translate_opcode(uint opcode)
+{
+ switch (opcode) {
+ case TGSI_OPCODE_ABS: return NV_OP_ABS;
+ case TGSI_OPCODE_ADD:
+ case TGSI_OPCODE_SUB:
+ case TGSI_OPCODE_UADD: return NV_OP_ADD;
+ case TGSI_OPCODE_AND: return NV_OP_AND;
+ case TGSI_OPCODE_EX2: return NV_OP_EX2;
+ case TGSI_OPCODE_CEIL: return NV_OP_CEIL;
+ case TGSI_OPCODE_FLR: return NV_OP_FLOOR;
+ case TGSI_OPCODE_TRUNC: return NV_OP_TRUNC;
+ case TGSI_OPCODE_COS: return NV_OP_COS;
+ case TGSI_OPCODE_SIN: return NV_OP_SIN;
+ case TGSI_OPCODE_DDX: return NV_OP_DFDX;
+ case TGSI_OPCODE_DDY: return NV_OP_DFDY;
+ case TGSI_OPCODE_F2I:
+ case TGSI_OPCODE_F2U:
+ case TGSI_OPCODE_I2F:
+ case TGSI_OPCODE_U2F: return NV_OP_CVT;
+ case TGSI_OPCODE_INEG: return NV_OP_NEG;
+ case TGSI_OPCODE_LG2: return NV_OP_LG2;
+ case TGSI_OPCODE_ISHR:
+ case TGSI_OPCODE_USHR: return NV_OP_SHR;
+ case TGSI_OPCODE_MAD:
+ case TGSI_OPCODE_UMAD: return NV_OP_MAD;
+ case TGSI_OPCODE_MAX:
+ case TGSI_OPCODE_IMAX:
+ case TGSI_OPCODE_UMAX: return NV_OP_MAX;
+ case TGSI_OPCODE_MIN:
+ case TGSI_OPCODE_IMIN:
+ case TGSI_OPCODE_UMIN: return NV_OP_MIN;
+ case TGSI_OPCODE_MUL:
+ case TGSI_OPCODE_UMUL: return NV_OP_MUL;
+ case TGSI_OPCODE_OR: return NV_OP_OR;
+ case TGSI_OPCODE_RCP: return NV_OP_RCP;
+ case TGSI_OPCODE_RSQ: return NV_OP_RSQ;
+ case TGSI_OPCODE_SAD: return NV_OP_SAD;
+ case TGSI_OPCODE_SHL: return NV_OP_SHL;
+ case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SGT:
+ case TGSI_OPCODE_SLE:
+ case TGSI_OPCODE_SNE:
+ case TGSI_OPCODE_ISLT:
+ case TGSI_OPCODE_ISGE:
+ case TGSI_OPCODE_USEQ:
+ case TGSI_OPCODE_USGE:
+ case TGSI_OPCODE_USLT:
+ case TGSI_OPCODE_USNE: return NV_OP_SET;
+ case TGSI_OPCODE_TEX: return NV_OP_TEX;
+ case TGSI_OPCODE_TXP: return NV_OP_TEX;
+ case TGSI_OPCODE_TXB: return NV_OP_TXB;
+ case TGSI_OPCODE_TXL: return NV_OP_TXL;
+ case TGSI_OPCODE_XOR: return NV_OP_XOR;
+ default:
+ return NV_OP_NOP;
+ }
+}
+
+static ubyte
+infer_src_type(unsigned opcode)
+{
+ switch (opcode) {
+ case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_AND:
+ case TGSI_OPCODE_OR:
+ case TGSI_OPCODE_XOR:
+ case TGSI_OPCODE_SAD:
+ case TGSI_OPCODE_U2F:
+ case TGSI_OPCODE_UADD:
+ case TGSI_OPCODE_UDIV:
+ case TGSI_OPCODE_UMOD:
+ case TGSI_OPCODE_UMAD:
+ case TGSI_OPCODE_UMUL:
+ case TGSI_OPCODE_UMAX:
+ case TGSI_OPCODE_UMIN:
+ case TGSI_OPCODE_USEQ:
+ case TGSI_OPCODE_USGE:
+ case TGSI_OPCODE_USLT:
+ case TGSI_OPCODE_USNE:
+ case TGSI_OPCODE_USHR:
+ return NV_TYPE_U32;
+ case TGSI_OPCODE_I2F:
+ case TGSI_OPCODE_IDIV:
+ case TGSI_OPCODE_IMAX:
+ case TGSI_OPCODE_IMIN:
+ case TGSI_OPCODE_INEG:
+ case TGSI_OPCODE_ISGE:
+ case TGSI_OPCODE_ISHR:
+ case TGSI_OPCODE_ISLT:
+ return NV_TYPE_S32;
+ default:
+ return NV_TYPE_F32;
+ }
+}
+
+static ubyte
+infer_dst_type(unsigned opcode)
+{
+ switch (opcode) {
+ case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_F2U:
+ case TGSI_OPCODE_AND:
+ case TGSI_OPCODE_OR:
+ case TGSI_OPCODE_XOR:
+ case TGSI_OPCODE_SAD:
+ case TGSI_OPCODE_UADD:
+ case TGSI_OPCODE_UDIV:
+ case TGSI_OPCODE_UMOD:
+ case TGSI_OPCODE_UMAD:
+ case TGSI_OPCODE_UMUL:
+ case TGSI_OPCODE_UMAX:
+ case TGSI_OPCODE_UMIN:
+ case TGSI_OPCODE_USEQ:
+ case TGSI_OPCODE_USGE:
+ case TGSI_OPCODE_USLT:
+ case TGSI_OPCODE_USNE:
+ case TGSI_OPCODE_USHR:
+ return NV_TYPE_U32;
+ case TGSI_OPCODE_F2I:
+ case TGSI_OPCODE_IDIV:
+ case TGSI_OPCODE_IMAX:
+ case TGSI_OPCODE_IMIN:
+ case TGSI_OPCODE_INEG:
+ case TGSI_OPCODE_ISGE:
+ case TGSI_OPCODE_ISHR:
+ case TGSI_OPCODE_ISLT:
+ return NV_TYPE_S32;
+ default:
+ return NV_TYPE_F32;
+ }
+}
+
+static void
+emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst,
+ unsigned chan, struct nv_value *value)
+{
+ const struct tgsi_full_dst_register *reg = &inst->Dst[0];
+
+ assert(chan < 4);
+
+ if (inst->Instruction.Opcode != TGSI_OPCODE_MOV)
+ value->reg.type = infer_dst_type(inst->Instruction.Opcode);
+
+ switch (inst->Instruction.Saturate) {
+ case TGSI_SAT_NONE:
+ break;
+ case TGSI_SAT_ZERO_ONE:
+ BLD_INSN_1_EX(value, SAT, F32, value, F32);
+ break;
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ value = bld_insn_2(bld, NV_OP_MAX, value, bld_load_imm_f32(bld, -1.0f));
+ value = bld_insn_2(bld, NV_OP_MIN, value, bld_load_imm_f32(bld, 1.0f));
+ value->reg.type = NV_TYPE_F32;
+ break;
+ }
+
+ switch (reg->Register.File) {
+ case TGSI_FILE_OUTPUT:
+ value = bld_insn_1(bld, NV_OP_MOV, value);
+ value->reg.file = bld->ti->output_file;
+
+ if (bld->ti->p->type == PIPE_SHADER_FRAGMENT) {
+ STORE_OUTR(reg->Register.Index, chan, value);
+ } else {
+ value->insn->fixed = 1;
+ value->reg.id = bld->ti->output_map[reg->Register.Index][chan];
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ assert(reg->Register.Index < BLD_MAX_TEMPS);
+ value->reg.file = NV_FILE_GPR;
+ if (value->insn->bb != bld->pc->current_block)
+ value = bld_insn_1(bld, NV_OP_MOV, value);
+ STORE_TEMP(reg->Register.Index, chan, value);
+ break;
+ case TGSI_FILE_ADDRESS:
+ assert(reg->Register.Index < BLD_MAX_ADDRS);
+ value->reg.file = NV_FILE_ADDR;
+ STORE_ADDR(reg->Register.Index, chan, value);
+ break;
+ }
+}
+
+static INLINE uint32_t
+bld_is_output_written(struct bld_context *bld, int i, int c)
+{
+ if (c < 0)
+ return bld->outputs_written[i / 8] & (0xf << ((i * 4) % 32));
+ return bld->outputs_written[i / 8] & (1 << ((i * 4 + c) % 32));
+}
+
+static void
+bld_export_outputs(struct bld_context *bld)
+{
+ struct nv_value *vals[4];
+ struct nv_instruction *nvi;
+ int i, c, n;
+
+ bld_push_values(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS);
+
+ for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) {
+ if (!bld_is_output_written(bld, i, -1))
+ continue;
+ for (n = 0, c = 0; c < 4; ++c) {
+ if (!bld_is_output_written(bld, i, c))
+ continue;
+ vals[n] = bld_fetch_global(bld, &bld->ovs[i][c]);
+ assert(vals[n]);
+ vals[n] = bld_insn_1(bld, NV_OP_MOV, vals[n]);
+ vals[n++]->reg.id = bld->ti->output_map[i][c];
+ }
+ assert(n);
+
+ (nvi = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1;
+
+ for (c = 0; c < n; ++c)
+ nvi->src[c] = new_ref(bld->pc, vals[c]);
+ }
+}
+
+static void
+bld_new_block(struct bld_context *bld, struct nv_basic_block *b)
+{
+ int i;
+
+ bld_push_values(&bld->tvs[0][0], BLD_MAX_TEMPS);
+ bld_push_values(&bld->avs[0][0], BLD_MAX_ADDRS);
+ bld_push_values(&bld->pvs[0][0], BLD_MAX_PREDS);
+ bld_push_values(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS);
+
+ bld->pc->current_block = b;
+
+ for (i = 0; i < 4; ++i)
+ bld->saved_addr[i][0] = NULL;
+
+ for (i = 0; i < 128; ++i)
+ bld->saved_inputs[i] = NULL;
+}
+
+static struct nv_value *
+bld_saved_input(struct bld_context *bld, unsigned i, unsigned c)
+{
+ unsigned idx = bld->ti->input_map[i][c];
+
+ if (bld->ti->p->type != PIPE_SHADER_FRAGMENT)
+ return NULL;
+ if (bld->saved_inputs[idx])
+ return bld->saved_inputs[idx];
+ return NULL;
+}
+
+static struct nv_value *
+bld_interpolate(struct bld_context *bld, unsigned mode, struct nv_value *val)
+{
+ if (mode & (NV50_INTERP_LINEAR | NV50_INTERP_FLAT))
+ val = bld_insn_1(bld, NV_OP_LINTERP, val);
+ else
+ val = bld_insn_2(bld, NV_OP_PINTERP, val, bld->frgcrd[3]);
+
+ val->insn->flat = (mode & NV50_INTERP_FLAT) ? 1 : 0;
+ val->insn->centroid = (mode & NV50_INTERP_CENTROID) ? 1 : 0;
+ return val;
+}
+
+static struct nv_value *
+emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn,
+ const unsigned s, const unsigned chan)
+{
+ const struct tgsi_full_src_register *src = &insn->Src[s];
+ struct nv_value *res;
+ unsigned idx, swz, dim_idx, ind_idx, ind_swz;
+ ubyte type = infer_src_type(insn->Instruction.Opcode);
+
+ idx = src->Register.Index;
+ swz = tgsi_util_get_full_src_register_swizzle(src, chan);
+ dim_idx = -1;
+ ind_idx = -1;
+ ind_swz = 0;
+
+ if (src->Register.Indirect) {
+ ind_idx = src->Indirect.Index;
+ ind_swz = tgsi_util_get_src_register_swizzle(&src->Indirect, 0);
+ }
+
+ switch (src->Register.File) {
+ case TGSI_FILE_CONSTANT:
+ dim_idx = src->Dimension.Index ? src->Dimension.Index + 2 : 1;
+ assert(dim_idx < 14);
+ assert(dim_idx == 1); /* for now */
+
+ res = new_value(bld->pc, NV_FILE_MEM_C(dim_idx), type);
+ res->reg.type = type;
+ res->reg.id = (idx * 4 + swz) & 127;
+ res = bld_insn_1(bld, NV_OP_LDA, res);
+
+ if (src->Register.Indirect)
+ res->insn->src[4] = new_ref(bld->pc, FETCH_ADDR(ind_idx, ind_swz));
+ if (idx >= (128 / 4))
+ res->insn->src[4] =
+ new_ref(bld->pc, bld_get_address(bld, (idx * 16) & ~0x1ff, NULL));
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ assert(idx < bld->ti->immd32_nr);
+ res = bld_load_imm_u32(bld, bld->ti->immd32[idx * 4 + swz]);
+ res->reg.type = type;
+ break;
+ case TGSI_FILE_INPUT:
+ res = bld_saved_input(bld, idx, swz);
+ if (res && (insn->Instruction.Opcode != TGSI_OPCODE_TXP))
+ return res;
+
+ res = new_value(bld->pc, bld->ti->input_file, type);
+ res->reg.id = bld->ti->input_map[idx][swz];
+
+ if (res->reg.file == NV_FILE_MEM_V) {
+ res = bld_interpolate(bld, bld->ti->interp_mode[idx], res);
+ } else {
+ assert(src->Dimension.Dimension == 0);
+ res = bld_insn_1(bld, NV_OP_LDA, res);
+ }
+ assert(res->reg.type == type);
+
+ bld->saved_inputs[bld->ti->input_map[idx][swz]] = res;
+ break;
+ case TGSI_FILE_TEMPORARY:
+ /* this should be load from l[], with reload elimination later on */
+ res = bld_fetch_global(bld, &bld->tvs[idx][swz]);
+ break;
+ case TGSI_FILE_ADDRESS:
+ res = bld_fetch_global(bld, &bld->avs[idx][swz]);
+ break;
+ case TGSI_FILE_PREDICATE:
+ res = bld_fetch_global(bld, &bld->pvs[idx][swz]);
+ break;
+ default:
+ NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src->Register.File);
+ abort();
+ break;
+ }
+ if (!res) {
+ debug_printf("WARNING: undefined source value in TGSI instruction\n");
+ return bld_load_imm_u32(bld, 0);
+ }
+
+ switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) {
+ case TGSI_UTIL_SIGN_KEEP:
+ break;
+ case TGSI_UTIL_SIGN_CLEAR:
+ res = bld_insn_1(bld, NV_OP_ABS, res);
+ break;
+ case TGSI_UTIL_SIGN_TOGGLE:
+ res = bld_insn_1(bld, NV_OP_NEG, res);
+ break;
+ case TGSI_UTIL_SIGN_SET:
+ res = bld_insn_1(bld, NV_OP_ABS, res);
+ res = bld_insn_1(bld, NV_OP_NEG, res);
+ break;
+ default:
+ NOUVEAU_ERR("illegal/unhandled src reg sign mode\n");
+ abort();
+ break;
+ }
+
+ return res;
+}
+
+static void
+bld_lit(struct bld_context *bld, struct nv_value *dst0[4],
+ const struct tgsi_full_instruction *insn)
+{
+ struct nv_value *val0, *zero;
+ unsigned mask = insn->Dst[0].Register.WriteMask;
+
+ if (mask & ((1 << 0) | (1 << 3)))
+ dst0[3] = dst0[0] = bld_load_imm_f32(bld, 1.0f);
+
+ if (mask & (3 << 1)) {
+ zero = bld_load_imm_f32(bld, 0.0f);
+ val0 = bld_insn_2(bld, NV_OP_MAX, emit_fetch(bld, insn, 0, 0), zero);
+
+ if (mask & (1 << 1))
+ dst0[1] = val0;
+ }
+
+ if (mask & (1 << 2)) {
+ struct nv_value *val1, *val3, *src1, *src3;
+ struct nv_value *pos128 = bld_load_imm_f32(bld, 127.999999f);
+ struct nv_value *neg128 = bld_load_imm_f32(bld, -127.999999f);
+
+ src1 = emit_fetch(bld, insn, 0, 1);
+ src3 = emit_fetch(bld, insn, 0, 3);
+
+ val0->insn->flags_def = new_value(bld->pc, NV_FILE_FLAGS, NV_TYPE_U16);
+ val0->insn->flags_def->insn = val0->insn;
+
+ val1 = bld_insn_2(bld, NV_OP_MAX, src1, zero);
+ val3 = bld_insn_2(bld, NV_OP_MAX, src3, neg128);
+ val3 = bld_insn_2(bld, NV_OP_MIN, val3, pos128);
+ val3 = bld_pow(bld, val1, val3);
+
+ dst0[2] = bld_insn_1(bld, NV_OP_MOV, zero);
+ dst0[2]->insn->cc = NV_CC_LE;
+ dst0[2]->insn->flags_src = new_ref(bld->pc, val0->insn->flags_def);
+
+ dst0[2] = bld_insn_2(bld, NV_OP_SELECT, val3, dst0[2]);
+ }
+}
+
+static INLINE void
+get_tex_dim(const struct tgsi_full_instruction *insn, int *dim, int *arg)
+{
+ switch (insn->Texture.Texture) {
+ case TGSI_TEXTURE_1D:
+ *arg = *dim = 1;
+ break;
+ case TGSI_TEXTURE_SHADOW1D:
+ *dim = 1;
+ *arg = 2;
+ break;
+ case TGSI_TEXTURE_UNKNOWN:
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ *arg = *dim = 2;
+ break;
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ *dim = 2;
+ *arg = 3;
+ break;
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+ *dim = *arg = 3;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static void
+load_proj_tex_coords(struct bld_context *bld,
+ struct nv_value *t[4], int dim,
+ const struct tgsi_full_instruction *insn)
+{
+ int c, mask = 0;
+
+ t[3] = emit_fetch(bld, insn, 0, 3);
+
+ if (t[3]->insn->opcode == NV_OP_PINTERP) {
+ t[3]->insn->opcode = NV_OP_LINTERP;
+ nv_reference(bld->pc, &t[3]->insn->src[1], NULL);
+ }
+
+ t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]);
+
+ for (c = 0; c < dim; ++c) {
+ t[c] = emit_fetch(bld, insn, 0, c);
+ if (t[c]->insn->opcode == NV_OP_LINTERP)
+ t[c]->insn->opcode = NV_OP_PINTERP;
+
+ if (t[c]->insn->opcode == NV_OP_PINTERP)
+ nv_reference(bld->pc, &t[c]->insn->src[1], t[3]);
+ else
+ mask |= 1 << c;
+ }
+
+ for (c = 0; mask; ++c, mask >>= 1) {
+ if (!(mask & 1))
+ continue;
+ t[c] = bld_insn_2(bld, NV_OP_MUL, t[c], t[3]);
+ }
+}
+
+static void
+bld_tex(struct bld_context *bld, struct nv_value *dst0[4],
+ const struct tgsi_full_instruction *insn)
+{
+ struct nv_value *t[4];
+ struct nv_instruction *nvi;
+ uint opcode = translate_opcode(insn->Instruction.Opcode);
+ int arg, dim, c;
+
+ get_tex_dim(insn, &dim, &arg);
+
+ if (insn->Texture.Texture == TGSI_TEXTURE_CUBE) {
+ }
+ // else
+ if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) {
+ load_proj_tex_coords(bld, t, dim, insn);
+ } else
+ for (c = 0; c < dim; ++c)
+ t[c] = emit_fetch(bld, insn, 0, c);
+
+ if (arg != dim)
+ t[dim] = emit_fetch(bld, insn, 0, 2);
+
+ if (insn->Instruction.Opcode == TGSI_OPCODE_TXB ||
+ insn->Instruction.Opcode == TGSI_OPCODE_TXL) {
+ t[arg++] = emit_fetch(bld, insn, 0, 3);
+ }
+
+ for (c = 0; c < arg; ++c) {
+ t[c] = bld_insn_1(bld, NV_OP_MOV, t[c]);
+ t[c]->reg.type = NV_TYPE_F32;
+ }
+
+ nvi = new_instruction(bld->pc, opcode);
+
+ for (c = 0; c < 4; ++c) {
+ nvi->def[c] = dst0[c] = new_value(bld->pc, NV_FILE_GPR, NV_TYPE_F32);
+ nvi->def[c]->insn = nvi;
+ }
+ for (c = 0; c < arg; ++c)
+ nvi->src[c] = new_ref(bld->pc, t[c]);
+
+ nvi->tex_t = insn->Src[1].Register.Index;
+ nvi->tex_s = 0;
+ nvi->tex_mask = 0xf;
+ nvi->tex_cube = (insn->Texture.Texture == TGSI_TEXTURE_CUBE) ? 1 : 0;
+ nvi->tex_live = 0;
+ nvi->tex_argc = arg;
+}
+
+#define FOR_EACH_DST0_ENABLED_CHANNEL(chan, inst) \
+ for (chan = 0; chan < 4; ++chan) \
+ if ((inst)->Dst[0].Register.WriteMask & (1 << chan))
+
+static void
+bld_instruction(struct bld_context *bld,
+ const struct tgsi_full_instruction *insn)
+{
+ struct nv_value *src0;
+ struct nv_value *src1;
+ struct nv_value *src2;
+ struct nv_value *dst0[4];
+ struct nv_value *temp;
+ int c;
+ uint opcode = translate_opcode(insn->Instruction.Opcode);
+
+ tgsi_dump_instruction(insn, 1);
+
+ switch (insn->Instruction.Opcode) {
+ case TGSI_OPCODE_ADD:
+ case TGSI_OPCODE_MAX:
+ case TGSI_OPCODE_MIN:
+ case TGSI_OPCODE_MUL:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ src1 = emit_fetch(bld, insn, 1, c);
+ dst0[c] = bld_insn_2(bld, opcode, src0, src1);
+ }
+ break;
+ case TGSI_OPCODE_ARL:
+ src1 = bld_imm_u32(bld, 4);
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ (temp = bld_insn_1(bld, NV_OP_FLOOR, temp))->reg.type = NV_TYPE_S32;
+ dst0[c] = bld_insn_2(bld, NV_OP_SHL, temp, src1);
+ }
+ break;
+ case TGSI_OPCODE_CMP:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ src1 = emit_fetch(bld, insn, 1, c);
+ src2 = emit_fetch(bld, insn, 2, c);
+ src0 = bld_predicate(bld, src0, FALSE);
+
+ src1 = bld_insn_1(bld, NV_OP_MOV, src1);
+ src1->insn->flags_src = new_ref(bld->pc, src0);
+ src1->insn->cc = NV_CC_LT;
+
+ src2 = bld_insn_1(bld, NV_OP_MOV, src2);
+ src2->insn->flags_src = new_ref(bld->pc, src0);
+ src2->insn->cc = NV_CC_GE;
+
+ dst0[c] = bld_insn_2(bld, NV_OP_SELECT, src1, src2);
+ }
+ break;
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_SIN:
+ src0 = emit_fetch(bld, insn, 0, 0);
+ temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
+ if (insn->Dst[0].Register.WriteMask & 7)
+ temp = bld_insn_1(bld, opcode, temp);
+ for (c = 0; c < 3; ++c)
+ if (insn->Dst[0].Register.WriteMask & (1 << c))
+ dst0[c] = temp;
+ if (!(insn->Dst[0].Register.WriteMask & (1 << 3)))
+ break;
+ src0 = emit_fetch(bld, insn, 0, 3);
+ temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
+ dst0[3] = bld_insn_1(bld, opcode, temp);
+ break;
+ case TGSI_OPCODE_DP3:
+ src0 = emit_fetch(bld, insn, 0, 0);
+ src1 = emit_fetch(bld, insn, 1, 0);
+ temp = bld_insn_2(bld, NV_OP_MUL, src0, src1);
+ for (c = 1; c < 3; ++c) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ src1 = emit_fetch(bld, insn, 1, c);
+ temp = bld_insn_3(bld, NV_OP_MAD, src0, src1, temp);
+ }
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = temp;
+ break;
+ case TGSI_OPCODE_DP4:
+ src0 = emit_fetch(bld, insn, 0, 0);
+ src1 = emit_fetch(bld, insn, 1, 0);
+ temp = bld_insn_2(bld, NV_OP_MUL, src0, src1);
+ for (c = 1; c < 4; ++c) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ src1 = emit_fetch(bld, insn, 1, c);
+ temp = bld_insn_3(bld, NV_OP_MAD, src0, src1, temp);
+ }
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = temp;
+ break;
+ case TGSI_OPCODE_EX2:
+ src0 = emit_fetch(bld, insn, 0, 0);
+ temp = bld_insn_1(bld, NV_OP_PREEX2, src0);
+ temp = bld_insn_1(bld, NV_OP_EX2, temp);
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = temp;
+ break;
+ case TGSI_OPCODE_FRC:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ dst0[c] = bld_insn_1(bld, NV_OP_FLOOR, src0);
+ dst0[c] = bld_insn_2(bld, NV_OP_SUB, src0, dst0[c]);
+ }
+ break;
+ case TGSI_OPCODE_KIL:
+ for (c = 0; c < 4; ++c) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ bld_kil(bld, src0);
+ }
+ break;
+ case TGSI_OPCODE_KILP:
+ (new_instruction(bld->pc, NV_OP_KIL))->fixed = 1;
+ break;
+ case TGSI_OPCODE_IF:
+ {
+ struct nv_basic_block *b = new_basic_block(bld->pc);
+
+ nvbb_attach_block(bld->pc->current_block, b, CFG_EDGE_FORWARD);
+
+ bld->join_bb[bld->cond_lvl] = bld->pc->current_block;
+ bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
+
+ src1 = bld_predicate(bld, emit_fetch(bld, insn, 0, 0), TRUE);
+
+ bld_flow(bld, NV_OP_BRA, NV_CC_EQ, src1, NULL, (bld->cond_lvl == 0));
+
+ ++bld->cond_lvl;
+ bld_new_block(bld, b);
+ }
+ break;
+ case TGSI_OPCODE_ELSE:
+ {
+ struct nv_basic_block *b = new_basic_block(bld->pc);
+
+ --bld->cond_lvl;
+ nvbb_attach_block(bld->join_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
+
+ bld->cond_bb[bld->cond_lvl]->exit->target = b;
+ bld->cond_bb[bld->cond_lvl] = bld->pc->current_block;
+
+ new_instruction(bld->pc, NV_OP_BRA)->is_terminator = 1;
+
+ ++bld->cond_lvl;
+ bld_new_block(bld, b);
+ }
+ break;
+ case TGSI_OPCODE_ENDIF:
+ {
+ struct nv_basic_block *b = new_basic_block(bld->pc);
+
+ --bld->cond_lvl;
+ nvbb_attach_block(bld->pc->current_block, b, CFG_EDGE_FORWARD);
+ nvbb_attach_block(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD);
+
+ bld->cond_bb[bld->cond_lvl]->exit->target = b;
+
+ bld_new_block(bld, b);
+
+ if (!bld->cond_lvl && bld->join_bb[bld->cond_lvl]) {
+ bld->join_bb[bld->cond_lvl]->exit->prev->target = b;
+ new_instruction(bld->pc, NV_OP_JOIN)->is_join = TRUE;
+ }
+ }
+ break;
+ case TGSI_OPCODE_BGNLOOP:
+ {
+ struct nv_basic_block *bl = new_basic_block(bld->pc);
+ struct nv_basic_block *bb = new_basic_block(bld->pc);
+
+ bld->loop_bb[bld->loop_lvl] = bl;
+ bld->brkt_bb[bld->loop_lvl] = bb;
+
+ bld_flow(bld, NV_OP_BREAKADDR, NV_CC_TR, NULL, bb, FALSE);
+
+ nvbb_attach_block(bld->pc->current_block, bl, CFG_EDGE_LOOP_ENTER);
+
+ bld_new_block(bld, bld->loop_bb[bld->loop_lvl++]);
+
+ if (bld->loop_lvl == bld->pc->loop_nesting_bound)
+ bld->pc->loop_nesting_bound++;
+
+ bld_clear_def_use(&bld->tvs[0][0], BLD_MAX_TEMPS, bld->loop_lvl);
+ bld_clear_def_use(&bld->avs[0][0], BLD_MAX_ADDRS, bld->loop_lvl);
+ bld_clear_def_use(&bld->pvs[0][0], BLD_MAX_PREDS, bld->loop_lvl);
+ }
+ break;
+ case TGSI_OPCODE_BRK:
+ {
+ struct nv_basic_block *bb = bld->brkt_bb[bld->loop_lvl - 1];
+
+ bld_flow(bld, NV_OP_BREAK, NV_CC_TR, NULL, bb, FALSE);
+
+ /* XXX: don't do this for redundant BRKs */
+ nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_LOOP_LEAVE);
+ }
+ break;
+ case TGSI_OPCODE_CONT:
+ {
+ struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1];
+
+ bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, bb, FALSE);
+
+ nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_BACK);
+ }
+ break;
+ case TGSI_OPCODE_ENDLOOP:
+ {
+ struct nv_basic_block *bb = bld->loop_bb[--bld->loop_lvl];
+
+ bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, bb, FALSE);
+
+ nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_BACK);
+
+ bld_loop_end(bld, bb); /* replace loop-side operand of the phis */
+
+ bld_new_block(bld, bld->brkt_bb[bld->loop_lvl]);
+ }
+ break;
+ case TGSI_OPCODE_ABS:
+ case TGSI_OPCODE_CEIL:
+ case TGSI_OPCODE_FLR:
+ case TGSI_OPCODE_TRUNC:
+ case TGSI_OPCODE_DDX:
+ case TGSI_OPCODE_DDY:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ dst0[c] = bld_insn_1(bld, opcode, src0);
+ }
+ break;
+ case TGSI_OPCODE_LIT:
+ bld_lit(bld, dst0, insn);
+ break;
+ case TGSI_OPCODE_LRP:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ src1 = emit_fetch(bld, insn, 1, c);
+ src2 = emit_fetch(bld, insn, 2, c);
+ dst0[c] = bld_insn_2(bld, NV_OP_SUB, src1, src2);
+ dst0[c] = bld_insn_3(bld, NV_OP_MAD, dst0[c], src0, src2);
+ }
+ break;
+ case TGSI_OPCODE_MOV:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = emit_fetch(bld, insn, 0, c);
+ break;
+ case TGSI_OPCODE_MAD:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ src1 = emit_fetch(bld, insn, 1, c);
+ src2 = emit_fetch(bld, insn, 2, c);
+ dst0[c] = bld_insn_3(bld, opcode, src0, src1, src2);
+ }
+ break;
+ case TGSI_OPCODE_POW:
+ src0 = emit_fetch(bld, insn, 0, 0);
+ src1 = emit_fetch(bld, insn, 1, 0);
+ temp = bld_pow(bld, src0, src1);
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = temp;
+ break;
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_LG2:
+ src0 = emit_fetch(bld, insn, 0, 0);
+ temp = bld_insn_1(bld, opcode, src0);
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = temp;
+ break;
+ case TGSI_OPCODE_RSQ:
+ src0 = emit_fetch(bld, insn, 0, 0);
+ temp = bld_insn_1(bld, NV_OP_ABS, src0);
+ temp = bld_insn_1(bld, NV_OP_RSQ, temp);
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ dst0[c] = temp;
+ break;
+ case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SGT:
+ case TGSI_OPCODE_SLE:
+ case TGSI_OPCODE_SNE:
+ case TGSI_OPCODE_ISLT:
+ case TGSI_OPCODE_ISGE:
+ case TGSI_OPCODE_USEQ:
+ case TGSI_OPCODE_USGE:
+ case TGSI_OPCODE_USLT:
+ case TGSI_OPCODE_USNE:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ src1 = emit_fetch(bld, insn, 1, c);
+ dst0[c] = bld_insn_2(bld, NV_OP_SET, src0, src1);
+ dst0[c]->insn->set_cond = translate_setcc(insn->Instruction.Opcode);
+ dst0[c]->reg.type = infer_dst_type(insn->Instruction.Opcode);
+
+ dst0[c]->insn->src[0]->typecast =
+ dst0[c]->insn->src[1]->typecast =
+ infer_src_type(insn->Instruction.Opcode);
+
+ if (dst0[c]->reg.type != NV_TYPE_F32)
+ break;
+ dst0[c] = bld_insn_1(bld, NV_OP_ABS, dst0[c]);
+ dst0[c]->insn->src[0]->typecast = NV_TYPE_S32;
+ dst0[c]->reg.type = NV_TYPE_S32;
+ dst0[c] = bld_insn_1(bld, NV_OP_CVT, dst0[c]);
+ dst0[c]->reg.type = NV_TYPE_F32;
+ }
+ break;
+ case TGSI_OPCODE_SCS:
+ if (insn->Dst[0].Register.WriteMask & 0x3) {
+ src0 = emit_fetch(bld, insn, 0, 0);
+ temp = bld_insn_1(bld, NV_OP_PRESIN, src0);
+ if (insn->Dst[0].Register.WriteMask & 0x1)
+ dst0[0] = bld_insn_1(bld, NV_OP_COS, temp);
+ if (insn->Dst[0].Register.WriteMask & 0x2)
+ dst0[1] = bld_insn_1(bld, NV_OP_SIN, temp);
+ }
+ if (insn->Dst[0].Register.WriteMask & 0x4)
+ dst0[2] = bld_imm_f32(bld, 0.0f);
+ if (insn->Dst[0].Register.WriteMask & 0x8)
+ dst0[3] = bld_imm_f32(bld, 1.0f);
+ break;
+ case TGSI_OPCODE_SUB:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ src0 = emit_fetch(bld, insn, 0, c);
+ src1 = emit_fetch(bld, insn, 1, c);
+ dst0[c] = bld_insn_2(bld, NV_OP_ADD, src0, src1);
+ dst0[c]->insn->src[1]->mod ^= NV_MOD_NEG;
+ }
+ break;
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXL:
+ case TGSI_OPCODE_TXP:
+ bld_tex(bld, dst0, insn);
+ break;
+ case TGSI_OPCODE_XPD:
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) {
+ if (c == 3) {
+ dst0[3] = bld_imm_f32(bld, 1.0f);
+ break;
+ }
+ src0 = emit_fetch(bld, insn, 0, (c + 1) % 3);
+ src1 = emit_fetch(bld, insn, 1, (c + 2) % 3);
+ dst0[c] = bld_insn_2(bld, NV_OP_MUL, src0, src1);
+
+ src0 = emit_fetch(bld, insn, 0, (c + 2) % 3);
+ src1 = emit_fetch(bld, insn, 1, (c + 1) % 3);
+ dst0[c] = bld_insn_3(bld, NV_OP_MAD, src0, src1, dst0[c]);
+
+ dst0[c]->insn->src[2]->mod ^= NV_MOD_NEG;
+ }
+ break;
+ case TGSI_OPCODE_RET:
+ (new_instruction(bld->pc, NV_OP_RET))->fixed = 1;
+ break;
+ case TGSI_OPCODE_END:
+ if (bld->ti->p->type == PIPE_SHADER_FRAGMENT)
+ bld_export_outputs(bld);
+ break;
+ default:
+ NOUVEAU_ERR("unhandled opcode %u\n", insn->Instruction.Opcode);
+ abort();
+ break;
+ }
+
+ FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
+ emit_store(bld, insn, c, dst0[c]);
+}
+
+static INLINE void
+bld_free_value_trackers(struct bld_value_stack *base, int n)
+{
+ int i, c;
+
+ for (i = 0; i < n; ++i)
+ for (c = 0; c < 4; ++c)
+ if (base[i * 4 + c].body)
+ FREE(base[i * 4 + c].body);
+}
+
+int
+nv50_tgsi_to_nc(struct nv_pc *pc, struct nv50_translation_info *ti)
+{
+ struct bld_context *bld = CALLOC_STRUCT(bld_context);
+ int c;
+
+ pc->root = pc->current_block = new_basic_block(pc);
+
+ bld->pc = pc;
+ bld->ti = ti;
+
+ pc->loop_nesting_bound = 1;
+
+ c = util_bitcount(bld->ti->p->fp.interp >> 24);
+ if (c && ti->p->type == PIPE_SHADER_FRAGMENT) {
+ bld->frgcrd[3] = new_value(pc, NV_FILE_MEM_V, NV_TYPE_F32);
+ bld->frgcrd[3]->reg.id = c - 1;
+ bld->frgcrd[3] = bld_insn_1(bld, NV_OP_LINTERP, bld->frgcrd[3]);
+ bld->frgcrd[3] = bld_insn_1(bld, NV_OP_RCP, bld->frgcrd[3]);
+ }
+
+ tgsi_parse_init(&bld->parse[0], ti->p->pipe.tokens);
+
+ while (!tgsi_parse_end_of_tokens(&bld->parse[bld->call_lvl])) {
+ const union tgsi_full_token *tok = &bld->parse[bld->call_lvl].FullToken;
+
+ tgsi_parse_token(&bld->parse[bld->call_lvl]);
+
+ switch (tok->Token.Type) {
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ bld_instruction(bld, &tok->FullInstruction);
+ break;
+ default:
+ break;
+ }
+ }
+
+ bld_free_value_trackers(&bld->tvs[0][0], BLD_MAX_TEMPS);
+ bld_free_value_trackers(&bld->avs[0][0], BLD_MAX_ADDRS);
+ bld_free_value_trackers(&bld->pvs[0][0], BLD_MAX_PREDS);
+
+ bld_free_value_trackers(&bld->ovs[0][0], PIPE_MAX_SHADER_OUTPUTS);
+
+ FREE(bld);
+ return 0;
+}
+
+/* If a variable is assigned in a loop, replace all references to the value
+ * from outside the loop with a phi value.
+ */
+static void
+bld_replace_value(struct nv_pc *pc, struct nv_basic_block *b,
+ struct nv_value *old_val,
+ struct nv_value *new_val)
+{
+ struct nv_instruction *nvi;
+
+ for (nvi = b->entry; nvi; nvi = nvi->next) {
+ int s;
+ for (s = 0; s < 5; ++s) {
+ if (!nvi->src[s])
+ continue;
+ if (nvi->src[s]->value == old_val)
+ nv_reference(pc, &nvi->src[s], new_val);
+ }
+ if (nvi->flags_src && nvi->flags_src->value == old_val)
+ nv_reference(pc, &nvi->flags_src, new_val);
+ }
+
+ b->pass_seq = pc->pass_seq;
+
+ if (b->out[0] && b->out[0]->pass_seq < pc->pass_seq)
+ bld_replace_value(pc, b->out[0], old_val, new_val);
+
+ if (b->out[1] && b->out[1]->pass_seq < pc->pass_seq)
+ bld_replace_value(pc, b->out[1], old_val, new_val);
+}
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index 1f11950199..d41a59d05d 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -29,96 +29,6 @@
#include "nv50_context.h"
#include "nv50_resource.h"
-static INLINE uint32_t
-nv50_vbo_type_to_hw(enum pipe_format format)
-{
- const struct util_format_description *desc;
-
- desc = util_format_description(format);
- assert(desc);
-
- switch (desc->channel[0].type) {
- case UTIL_FORMAT_TYPE_FLOAT:
- return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT;
- case UTIL_FORMAT_TYPE_UNSIGNED:
- if (desc->channel[0].normalized) {
- return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM;
- }
- return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED;
- case UTIL_FORMAT_TYPE_SIGNED:
- if (desc->channel[0].normalized) {
- return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM;
- }
- return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED;
- /*
- case PIPE_FORMAT_TYPE_UINT:
- return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UINT;
- case PIPE_FORMAT_TYPE_SINT:
- return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SINT; */
- default:
- return 0;
- }
-}
-
-static INLINE uint32_t
-nv50_vbo_size_to_hw(unsigned size, unsigned nr_c)
-{
- static const uint32_t hw_values[] = {
- 0, 0, 0, 0,
- NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8,
- NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8,
- NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8,
- NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8,
- NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16,
- NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16,
- NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16,
- NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16,
- 0, 0, 0, 0,
- NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32,
- NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32,
- NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32,
- NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 };
-
- /* we'd also have R11G11B10 and R10G10B10A2 */
-
- assert(nr_c > 0 && nr_c <= 4);
-
- if (size > 32)
- return 0;
- size >>= (3 - 2);
-
- return hw_values[size + (nr_c - 1)];
-}
-
-static INLINE uint32_t
-nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
-{
- uint32_t hw_type, hw_size;
- enum pipe_format pf = ve->src_format;
- const struct util_format_description *desc;
- unsigned size, nr_components;
-
- desc = util_format_description(pf);
- assert(desc);
-
- size = util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0);
- nr_components = util_format_get_nr_components(pf);
-
- hw_type = nv50_vbo_type_to_hw(pf);
- hw_size = nv50_vbo_size_to_hw(size, nr_components);
-
- if (!hw_type || !hw_size) {
- NOUVEAU_ERR("unsupported vbo format: %s\n", util_format_name(pf));
- abort();
- return 0x24e80000;
- }
-
- if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_Z) /* BGRA */
- hw_size |= (1 << 31); /* no real swizzle bits :-( */
-
- return (hw_type | hw_size);
-}
-
struct instance {
struct nouveau_bo *bo;
unsigned delta;
@@ -533,7 +443,7 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
so_data (so, fui(v[1]));
break;
case 1:
- if (attrib == nv50->vertprog->cfg.edgeflag_in) {
+ if (attrib == nv50->vertprog->vp.edgeflag) {
so_method(so, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
so_data (so, v[0] ? 1 : 0);
}
@@ -554,11 +464,8 @@ nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso)
{
unsigned i;
- for (i = 0; i < cso->num_elements; ++i) {
- struct pipe_vertex_element *ve = &cso->pipe[i];
-
- cso->hw[i] = nv50_vbo_vtxelt_to_hw(ve);
- }
+ for (i = 0; i < cso->num_elements; ++i)
+ cso->hw[i] = nv50_format_table[cso->pipe[i].src_format].vtx;
}
struct nouveau_stateobj *
@@ -574,7 +481,7 @@ nv50_vbo_validate(struct nv50_context *nv50)
nv50->vbo_fifo = 0;
if (nv50->screen->force_push ||
- nv50->vertprog->cfg.edgeflag_in < 16)
+ nv50->vertprog->vp.edgeflag < 16)
nv50->vbo_fifo = 0xffff;
for (i = 0; i < nv50->vtxbuf_nr; i++) {