diff options
Diffstat (limited to 'src/gallium/drivers/r300')
24 files changed, 1087 insertions, 1994 deletions
diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index faceec9842..d7a2c8c462 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -9,7 +9,6 @@ C_SOURCES = \ r300_chipset.c \ r300_clear.c \ r300_context.c \ - r300_debug.c \ r300_emit.c \ r300_flush.c \ r300_fs.c \ @@ -21,6 +20,22 @@ C_SOURCES = \ r300_state_invariant.c \ r300_vs.c \ r300_surface.c \ - r300_texture.c + r300_texture.c \ + r300_tgsi_to_rc.c + +LIBRARY_INCLUDES = \ + -I$(TOP)/src/mesa/drivers/dri/r300/compiler \ + -I$(TOP)/src/mesa \ + -I$(TOP)/include + +COMPILER_ARCHIVE = $(TOP)/src/mesa/drivers/dri/r300/compiler/libr300compiler.a + +EXTRA_OBJECTS = \ + $(COMPILER_ARCHIVE) include ../../Makefile.template + +.PHONY : $(COMPILER_ARCHIVE) + +$(COMPILER_ARCHIVE): + cd $(TOP)/src/mesa/drivers/dri/r300/compiler; make diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 233a32b53c..c8510bc63e 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -52,7 +52,7 @@ static boolean r300_draw_range_elements(struct pipe_context* pipe, draw_set_mapped_constant_buffer(r300->draw, r300->shader_constants[PIPE_SHADER_VERTEX].constants, - r300->shader_constants[PIPE_SHADER_VERTEX].user_count * + r300->shader_constants[PIPE_SHADER_VERTEX].count * (sizeof(float) * 4)); draw_arrays(r300->draw, mode, start, count); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index d891fd6265..fc8a449893 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -34,6 +34,9 @@ #include "r300_screen.h" #include "r300_winsys.h" +struct r300_fragment_shader; +struct r300_vertex_shader; + struct r300_blend_state { uint32_t blend_control; /* R300_RB3D_CBLEND: 0x4e04 */ uint32_t alpha_blend_control; /* R300_RB3D_ABLEND: 0x4e08 */ @@ -143,71 +146,10 @@ struct r300_constant_buffer { /* Buffer of constants */ /* XXX first number should be raised */ float constants[32][4]; - /* Number of user-defined constants */ - unsigned user_count; /* Total number of constants */ unsigned count; }; -struct r300_fragment_shader { - /* Parent class */ - struct pipe_shader_state state; - struct tgsi_shader_info info; - - /* Has this shader been translated yet? */ - boolean translated; - - /* Pixel stack size */ - int stack_size; - - /* Are there immediates in this shader? - * If not, we can heavily optimize recompilation. */ - boolean uses_imms; -}; - -struct r3xx_fragment_shader { - /* Parent class */ - struct r300_fragment_shader shader; - - /* Number of ALU instructions */ - int alu_instruction_count; - - /* Number of texture instructions */ - int tex_instruction_count; - - /* Number of texture indirections */ - int indirections; - - /* Indirection node offsets */ - int alu_offset[4]; - - /* Machine instructions */ - struct { - uint32_t alu_rgb_inst; - uint32_t alu_rgb_addr; - uint32_t alu_alpha_inst; - uint32_t alu_alpha_addr; - } instructions[64]; /* XXX magic num */ -}; - -struct r5xx_fragment_shader { - /* Parent class */ - struct r300_fragment_shader shader; - - /* Number of used instructions */ - int instruction_count; - - /* Machine instructions */ - struct { - uint32_t inst0; - uint32_t inst1; - uint32_t inst2; - uint32_t inst3; - uint32_t inst4; - uint32_t inst5; - } instructions[256]; /*< XXX magic number */ -}; - struct r300_texture { /* Parent class */ struct pipe_texture tex; @@ -242,33 +184,6 @@ struct r300_vertex_format { int fs_tab[16]; }; -struct r300_vertex_shader { - /* Parent class */ - struct pipe_shader_state state; - struct tgsi_shader_info info; - - /* Fallback shader, because Draw has issues */ - struct draw_vertex_shader* draw; - - /* Has this shader been translated yet? */ - boolean translated; - - /* Are there immediates in this shader? - * If not, we can heavily optimize recompilation. */ - boolean uses_imms; - - /* Number of used instructions */ - int instruction_count; - - /* Machine instructions */ - struct { - uint32_t inst0; - uint32_t inst1; - uint32_t inst2; - uint32_t inst3; - } instructions[128]; /*< XXX magic number */ -}; - static struct pipe_viewport_state r300_viewport_identity = { .scale = {1.0, 1.0, 1.0, 1.0}, .translate = {0.0, 0.0, 0.0, 0.0}, diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c deleted file mode 100644 index c83e8526cf..0000000000 --- a/src/gallium/drivers/r300/r300_debug.c +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "r300_debug.h" - -void r3xx_dump_fs(struct r3xx_fragment_shader* fs) -{ - int i; - - for (i = 0; i < fs->alu_instruction_count; i++) { - } -} - -void r5xx_fs_dump(struct r5xx_fragment_shader* fs) -{ - int i; - uint32_t inst; - - for (i = 0; i < fs->instruction_count; i++) { - inst = fs->instructions[i].inst0; - debug_printf("%d: 0: CMN_INST 0x%08x:", i, inst); - switch (inst & 0x3) { - case R500_INST_TYPE_ALU: - debug_printf("ALU "); - break; - case R500_INST_TYPE_OUT: - debug_printf("OUT "); - break; - case R500_INST_TYPE_FC: - debug_printf("FC "); - break; - case R500_INST_TYPE_TEX: - debug_printf("TEX "); - break; - } - debug_printf("%s %s %s %s ", - inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "", - inst & R500_INST_LAST ? "LAST" : "", - inst & R500_INST_NOP ? "NOP" : "", - inst & R500_INST_ALU_WAIT ? "ALU_WAIT" : ""); - debug_printf("wmask: %s omask: %s\n", - r5xx_fs_mask[(inst >> 11) & 0xf], - r5xx_fs_mask[(inst >> 15) & 0xf]); - switch (inst & 0x3) { - case R500_INST_TYPE_ALU: - case R500_INST_TYPE_OUT: - inst = fs->instructions[i].inst1; - debug_printf(" 1: RGB_ADDR 0x%08x:", inst); - debug_printf("Addr0: %d%c, Addr1: %d%c, " - "Addr2: %d%c, srcp:%d\n", - inst & 0xff, (inst & (1 << 8)) ? 'c' : 't', - (inst >> 10) & 0xff, (inst & (1 << 18)) ? 'c' : 't', - (inst >> 20) & 0xff, (inst & (1 << 28)) ? 'c' : 't', - (inst >> 30)); - - inst = fs->instructions[i].inst2; - debug_printf(" 2: ALPHA_ADDR 0x%08x:", inst); - debug_printf("Addr0: %d%c, Addr1: %d%c, " - "Addr2: %d%c, srcp:%d\n", - inst & 0xff, (inst & (1 << 8)) ? 'c' : 't', - (inst >> 10) & 0xff, (inst & (1 << 18)) ? 'c' : 't', - (inst >> 20) & 0xff, (inst & (1 << 28)) ? 'c' : 't', - (inst >> 30)); - - inst = fs->instructions[i].inst3; - debug_printf(" 3: RGB_INST 0x%08x:", inst); - debug_printf("rgb_A_src:%d %s/%s/%s %d " - "rgb_B_src:%d %s/%s/%s %d\n", - inst & 0x3, r5xx_fs_swiz[(inst >> 2) & 0x7], - r5xx_fs_swiz[(inst >> 5) & 0x7], - r5xx_fs_swiz[(inst >> 8) & 0x7], - (inst >> 11) & 0x3, (inst >> 13) & 0x3, - r5xx_fs_swiz[(inst >> 15) & 0x7], - r5xx_fs_swiz[(inst >> 18) & 0x7], - r5xx_fs_swiz[(inst >> 21) & 0x7], - (inst >> 24) & 0x3); - - inst = fs->instructions[i].inst4; - debug_printf(" 4: ALPHA_INST 0x%08x:", inst); - debug_printf("%s dest:%d%s alp_A_src:%d %s %d " - "alp_B_src:%d %s %d w:%d\n", - r5xx_fs_op_alpha[inst & 0xf], (inst >> 4) & 0x7f, - inst & (1<<11) ? "(rel)":"", (inst >> 12) & 0x3, - r5xx_fs_swiz[(inst >> 14) & 0x7], (inst >> 17) & 0x3, - (inst >> 19) & 0x3, r5xx_fs_swiz[(inst >> 21) & 0x7], - (inst >> 24) & 0x3, (inst >> 31) & 0x1); - - inst = fs->instructions[i].inst5; - debug_printf(" 5: RGBA_INST 0x%08x:", inst); - debug_printf("%s dest:%d%s rgb_C_src:%d %s/%s/%s %d " - "alp_C_src:%d %s %d\n", - r5xx_fs_op_rgb[inst & 0xf], (inst >> 4) & 0x7f, - inst & (1 << 11) ? "(rel)":"", (inst >> 12) & 0x3, - r5xx_fs_swiz[(inst >> 14) & 0x7], - r5xx_fs_swiz[(inst >> 17) & 0x7], - r5xx_fs_swiz[(inst >> 20) & 0x7], - (inst >> 23) & 0x3, (inst >> 25) & 0x3, - r5xx_fs_swiz[(inst >> 27) & 0x7], (inst >> 30) & 0x3); - break; - case R500_INST_TYPE_FC: - /* XXX don't even bother yet */ - break; - case R500_INST_TYPE_TEX: - inst = fs->instructions[i].inst1; - debug_printf(" 1: TEX_INST 0x%08x: id: %d " - "op:%s, %s, %s %s\n", - inst, (inst >> 16) & 0xf, - r5xx_fs_tex[(inst >> 22) & 0x7], - (inst & (1 << 25)) ? "ACQ" : "", - (inst & (1 << 26)) ? "IGNUNC" : "", - (inst & (1 << 27)) ? "UNSCALED" : "SCALED"); - - inst = fs->instructions[i].inst2; - debug_printf(" 2: TEX_ADDR 0x%08x: " - "src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", - inst, inst & 0x7f, inst & (1 << 7) ? "(rel)" : "", - r5xx_fs_swiz[(inst >> 8) & 0x3], - r5xx_fs_swiz[(inst >> 10) & 0x3], - r5xx_fs_swiz[(inst >> 12) & 0x3], - r5xx_fs_swiz[(inst >> 14) & 0x3], - (inst >> 16) & 0x7f, inst & (1 << 23) ? "(rel)" : "", - r5xx_fs_swiz[(inst >> 24) & 0x3], - r5xx_fs_swiz[(inst >> 26) & 0x3], - r5xx_fs_swiz[(inst >> 28) & 0x3], - r5xx_fs_swiz[(inst >> 30) & 0x3]); - - inst = fs->instructions[i].inst3; - debug_printf(" 3: TEX_DXDY 0x%08x\n", inst); - break; - } - } -} - -static void r300_vs_op_dump(uint32_t op) -{ - debug_printf(" dst: %d%s op: ", - (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]); - if (op & 0x80) { - if (op & 0x1) { - debug_printf("PVS_MACRO_OP_2CLK_M2X_ADD\n"); - } else { - debug_printf(" PVS_MACRO_OP_2CLK_MADD\n"); - } - } else if (op & 0x40) { - debug_printf("%s\n", r300_vs_me_ops[op & 0x1f]); - } else { - debug_printf("%s\n", r300_vs_ve_ops[op & 0x1f]); - } -} - -void r300_vs_src_dump(uint32_t src) -{ - debug_printf(" reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n", - (src >> 5) & 0x7f, r300_vs_src_debug[src & 0x3], - src & (1 << 25) ? "-" : " ", - r300_vs_swiz_debug[(src >> 13) & 0x7], - src & (1 << 26) ? "-" : " ", - r300_vs_swiz_debug[(src >> 16) & 0x7], - src & (1 << 27) ? "-" : " ", - r300_vs_swiz_debug[(src >> 19) & 0x7], - src & (1 << 28) ? "-" : " ", - r300_vs_swiz_debug[(src >> 22) & 0x7]); -} - -void r300_vs_dump(struct r300_vertex_shader* vs) -{ - int i; - - for (i = 0; i < vs->instruction_count; i++) { - debug_printf("%d: op: 0x%08x", i, vs->instructions[i].inst0); - r300_vs_op_dump(vs->instructions[i].inst0); - debug_printf(" src0: 0x%08x", vs->instructions[i].inst1); - r300_vs_src_dump(vs->instructions[i].inst1); - debug_printf(" src1: 0x%08x", vs->instructions[i].inst2); - r300_vs_src_dump(vs->instructions[i].inst2); - debug_printf(" src2: 0x%08x", vs->instructions[i].inst3); - r300_vs_src_dump(vs->instructions[i].inst3); - } -} diff --git a/src/gallium/drivers/r300/r300_debug.h b/src/gallium/drivers/r300/r300_debug.h deleted file mode 100644 index 6b58c1e250..0000000000 --- a/src/gallium/drivers/r300/r300_debug.h +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R300_DEBUG_H -#define R300_DEBUG_H - -#include "r300_reg.h" -#include "r300_fs.h" -#include "r300_vs.h" - -static char* r5xx_fs_swiz[] = { - " R", - " G", - " B", - " A", - " 0", - ".5", - " 1", - " U", -}; - -static char* r5xx_fs_op_rgb[] = { - "MAD", - "DP3", - "DP4", - "D2A", - "MIN", - "MAX", - "---", - "CND", - "CMP", - "FRC", - "SOP", - "MDH", - "MDV", -}; - -static char* r5xx_fs_op_alpha[] = { - "MAD", - " DP", - "MIN", - "MAX", - "---", - "CND", - "CMP", - "FRC", - "EX2", - "LN2", - "RCP", - "RSQ", - "SIN", - "COS", - "MDH", - "MDV", -}; - -static char* r5xx_fs_mask[] = { - "NONE", - "R ", - " G ", - "RG ", - " B ", - "R B ", - " GB ", - "RGB ", - " A", - "R A", - " G A", - "RG A", - " BA", - "R BA", - " GBA", - "RGBA", -}; - -static char* r5xx_fs_tex[] = { - " NOP", - " LD", - "TEXKILL", - " PROJ", - "LODBIAS", - " LOD", - " DXDY", -}; - -static char* r300_vs_ve_ops[] = { - /* R300 vector ops */ - " VE_NO_OP", - " VE_DOT_PRODUCT", - " VE_MULTIPLY", - " VE_ADD", - " VE_MULTIPLY_ADD", - " VE_DISTANCE_FACTOR", - " VE_FRACTION", - " VE_MAXIMUM", - " VE_MINIMUM", - "VE_SET_GREATER_THAN_EQUAL", - " VE_SET_LESS_THAN", - " VE_MULTIPLYX2_ADD", - " VE_MULTIPLY_CLAMP", - " VE_FLT2FIX_DX", - " VE_FLT2FIX_DX_RND", - /* R500 vector ops */ - " VE_PRED_SET_EQ_PUSH", - " VE_PRED_SET_GT_PUSH", - " VE_PRED_SET_GTE_PUSH", - " VE_PRED_SET_NEQ_PUSH", - " VE_COND_WRITE_EQ", - " VE_COND_WRITE_GT", - " VE_COND_WRITE_GTE", - " VE_COND_WRITE_NEQ", - " VE_SET_GREATER_THAN", - " VE_SET_EQUAL", - " VE_SET_NOT_EQUAL", - " (reserved)", - " (reserved)", - " (reserved)", -}; - -static char* r300_vs_me_ops[] = { - /* R300 math ops */ - " ME_NO_OP", - " ME_EXP_BASE2_DX", - " ME_LOG_BASE2_DX", - " ME_EXP_BASEE_FF", - " ME_LIGHT_COEFF_DX", - " ME_POWER_FUNC_FF", - " ME_RECIP_DX", - " ME_RECIP_FF", - " ME_RECIP_SQRT_DX", - " ME_RECIP_SQRT_FF", - " ME_MULTIPLY", - " ME_EXP_BASE2_FULL_DX", - " ME_LOG_BASE2_FULL_DX", - " ME_POWER_FUNC_FF_CLAMP_B", - "ME_POWER_FUNC_FF_CLAMP_B1", - "ME_POWER_FUNC_FF_CLAMP_01", - " ME_SIN", - " ME_COS", - /* R500 math ops */ - " ME_LOG_BASE2_IEEE", - " ME_RECIP_IEEE", - " ME_RECIP_SQRT_IEEE", - " ME_PRED_SET_EQ", - " ME_PRED_SET_GT", - " ME_PRED_SET_GTE", - " ME_PRED_SET_NEQ", - " ME_PRED_SET_CLR", - " ME_PRED_SET_INV", - " ME_PRED_SET_POP", - " ME_PRED_SET_RESTORE", - " (reserved)", - " (reserved)", - " (reserved)", -}; - -/* XXX refactor to avoid clashing symbols */ -static char* r300_vs_src_debug[] = { - "t", - "i", - "c", - "a", -}; - -static char* r300_vs_dst_debug[] = { - "t", - "a0", - "o", - "ox", - "a", - "i", - "u", - "u", -}; - -static char* r300_vs_swiz_debug[] = { - "X", - "Y", - "Z", - "W", - "0", - "1", - "U", - "U", -}; - -void r5xx_fs_dump(struct r5xx_fragment_shader* fs); -void r3xx_dump_fs(struct r3xx_fragment_shader* fs); - -void r300_vs_dump(struct r300_vertex_shader* vs); - -#endif /* R300_DEBUG_H */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 7ba56cdc1d..53256fc6dd 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -24,6 +24,9 @@ #include "r300_emit.h" +#include "r300_fs.h" +#include "r300_vs.h" + void r300_emit_blend_state(struct r300_context* r300, struct r300_blend_state* blend) { @@ -109,73 +112,158 @@ void r300_emit_dsa_state(struct r300_context* r300, END_CS; } -void r300_emit_fragment_shader(struct r300_context* r300, - struct r3xx_fragment_shader* fs) +static const float * get_shader_constant( + struct r300_context * r300, + struct rc_constant * constant, + struct r300_constant_buffer * externals) +{ + static const float zero[4] = { 0.0, 0.0, 0.0, 0.0 }; + switch(constant->Type) { + case RC_CONSTANT_EXTERNAL: + return externals->constants[constant->u.External]; + + case RC_CONSTANT_IMMEDIATE: + return constant->u.Immediate; + + default: + debug_printf("r300: Implementation error: Unhandled constant type %i\n", + constant->Type); + return zero; + } +} + +/* Convert a normal single-precision float into the 7.16 format + * used by the R300 fragment shader. + */ +static uint32_t pack_float24(float f) { + union { + float fl; + uint32_t u; + } u; + float mantissa; + int exponent; + uint32_t float24 = 0; + + if (f == 0.0) + return 0; + + u.fl = f; + + mantissa = frexpf(f, &exponent); + + /* Handle -ve */ + if (mantissa < 0) { + float24 |= (1 << 23); + mantissa = mantissa * -1.0; + } + /* Handle exponent, bias of 63 */ + exponent += 62; + float24 |= (exponent << 16); + /* Kill 7 LSB of mantissa */ + float24 |= (u.u & 0x7FFFFF) >> 7; + + return float24; +} + +void r300_emit_fragment_program_code(struct r300_context* r300, + struct rX00_fragment_program_code* generic_code, + struct r300_constant_buffer* externals) +{ + struct r300_fragment_program_code * code = &generic_code->code.r300; + struct rc_constant_list * constants = &generic_code->constants; int i; CS_LOCALS(r300); - BEGIN_CS(22); - - OUT_CS_REG(R300_US_CONFIG, fs->indirections); - OUT_CS_REG(R300_US_PIXSIZE, fs->shader.stack_size); - /* XXX figure out exactly how big the sizes are on this reg */ - OUT_CS_REG(R300_US_CODE_OFFSET, 0x40); - /* XXX figure these ones out a bit better kthnx */ - OUT_CS_REG(R300_US_CODE_ADDR_0, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_1, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_2, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_3, 0x40 | R300_RGBA_OUT); - - for (i = 0; i < fs->alu_instruction_count; i++) { - OUT_CS_REG(R300_US_ALU_RGB_INST_0 + (4 * i), - fs->instructions[i].alu_rgb_inst); - OUT_CS_REG(R300_US_ALU_RGB_ADDR_0 + (4 * i), - fs->instructions[i].alu_rgb_addr); - OUT_CS_REG(R300_US_ALU_ALPHA_INST_0 + (4 * i), - fs->instructions[i].alu_alpha_inst); - OUT_CS_REG(R300_US_ALU_ALPHA_ADDR_0 + (4 * i), - fs->instructions[i].alu_alpha_addr); + BEGIN_CS(15 + + code->alu.length * 4 + + (code->tex.length ? (1 + code->tex.length) : 0) + + (constants->Count ? (1 + constants->Count * 4) : 0)); + + OUT_CS_REG(R300_US_CONFIG, code->config); + OUT_CS_REG(R300_US_PIXSIZE, code->pixsize); + OUT_CS_REG(R300_US_CODE_OFFSET, code->code_offset); + + OUT_CS_REG_SEQ(R300_US_CODE_ADDR_0, 4); + for(i = 0; i < 4; ++i) + OUT_CS(code->code_addr[i]); + + OUT_CS_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) + OUT_CS(code->alu.inst[i].rgb_inst); + + OUT_CS_REG_SEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) + OUT_CS(code->alu.inst[i].rgb_addr); + + OUT_CS_REG_SEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) + OUT_CS(code->alu.inst[i].alpha_inst); + + OUT_CS_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) + OUT_CS(code->alu.inst[i].alpha_addr); + + if (code->tex.length) { + OUT_CS_REG_SEQ(R300_US_TEX_INST_0, code->tex.length); + for(i = 0; i < code->tex.length; ++i) + OUT_CS(code->tex.inst[i]); + } + + if (constants->Count) { + OUT_CS_ONE_REG(R300_PFS_PARAM_0_X, constants->Count * 4); + for(i = 0; i < constants->Count; ++i) { + const float * data = get_shader_constant(r300, &constants->Constants[i], externals); + OUT_CS(pack_float24(data[0])); + OUT_CS(pack_float24(data[1])); + OUT_CS(pack_float24(data[2])); + OUT_CS(pack_float24(data[3])); + } } END_CS; } -void r500_emit_fragment_shader(struct r300_context* r300, - struct r5xx_fragment_shader* fs) +void r500_emit_fragment_program_code(struct r300_context* r300, + struct rX00_fragment_program_code* generic_code, + struct r300_constant_buffer* externals) { + struct r500_fragment_program_code * code = &generic_code->code.r500; + struct rc_constant_list * constants = &generic_code->constants; int i; - struct r300_constant_buffer* constants = - &r300->shader_constants[PIPE_SHADER_FRAGMENT]; CS_LOCALS(r300); - BEGIN_CS(9 + (fs->instruction_count * 6) + (constants->count ? 3 : 0) + - (constants->count * 4)); - OUT_CS_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); - OUT_CS_REG(R500_US_PIXSIZE, fs->shader.stack_size); - OUT_CS_REG(R500_US_CODE_ADDR, R500_US_CODE_START_ADDR(0) | - R500_US_CODE_END_ADDR(fs->instruction_count)); + BEGIN_CS(13 + + ((code->inst_end + 1) * 6) + + (constants->Count ? (3 + (constants->Count * 4)) : 0)); + OUT_CS_REG(R500_US_CONFIG, 0); + OUT_CS_REG(R500_US_PIXSIZE, code->max_temp_idx); + OUT_CS_REG(R500_US_CODE_RANGE, + R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end)); + OUT_CS_REG(R500_US_CODE_OFFSET, 0); + OUT_CS_REG(R500_US_CODE_ADDR, + R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(code->inst_end)); OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_INSTR); - OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, fs->instruction_count * 6); - for (i = 0; i < fs->instruction_count; i++) { - OUT_CS(fs->instructions[i].inst0); - OUT_CS(fs->instructions[i].inst1); - OUT_CS(fs->instructions[i].inst2); - OUT_CS(fs->instructions[i].inst3); - OUT_CS(fs->instructions[i].inst4); - OUT_CS(fs->instructions[i].inst5); - } - - if (constants->count) { - OUT_CS_REG(R500_GA_US_VECTOR_INDEX, - R500_GA_US_VECTOR_INDEX_TYPE_CONST); - OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, constants->count * 4); - for (i = 0; i < constants->count; i++) { - OUT_CS_32F(constants->constants[i][0]); - OUT_CS_32F(constants->constants[i][1]); - OUT_CS_32F(constants->constants[i][2]); - OUT_CS_32F(constants->constants[i][3]); + OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, (code->inst_end + 1) * 6); + for (i = 0; i <= code->inst_end; i++) { + OUT_CS(code->inst[i].inst0); + OUT_CS(code->inst[i].inst1); + OUT_CS(code->inst[i].inst2); + OUT_CS(code->inst[i].inst3); + OUT_CS(code->inst[i].inst4); + OUT_CS(code->inst[i].inst5); + } + + if (constants->Count) { + OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST); + OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, constants->Count * 4); + for (i = 0; i < constants->Count; i++) { + const float * data = get_shader_constant(r300, &constants->Constants[i], externals); + OUT_CS_32F(data[0]); + OUT_CS_32F(data[1]); + OUT_CS_32F(data[2]); + OUT_CS_32F(data[3]); } } @@ -190,7 +278,7 @@ void r300_emit_fb_state(struct r300_context* r300, int i; CS_LOCALS(r300); - BEGIN_CS((8 * fb->nr_cbufs) + (fb->zsbuf ? 8 : 0) + 4); + BEGIN_CS((10 * fb->nr_cbufs) + (fb->zsbuf ? 10 : 0) + 4); for (i = 0; i < fb->nr_cbufs; i++) { tex = (struct r300_texture*)fb->cbufs[i]->texture; assert(tex && tex->buffer && "cbuf is marked, but NULL!"); @@ -199,8 +287,10 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); - OUT_CS_REG(R300_RB3D_COLORPITCH0 + (4 * i), pixpitch | - r300_translate_colorformat(tex->tex.format)); + OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); + OUT_CS_RELOC(tex->buffer, pixpitch | + r300_translate_colorformat(tex->tex.format), 0, + RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), r300_translate_out_fmt(fb->cbufs[i]->format)); @@ -216,7 +306,8 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG(R300_ZB_FORMAT, r300_translate_zsformat(tex->tex.format)); - OUT_CS_REG(R300_ZB_DEPTHPITCH, pixpitch); + OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); + OUT_CS_RELOC(tex->buffer, pixpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); } OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, @@ -380,13 +471,13 @@ void r300_emit_vertex_format_state(struct r300_context* r300) END_CS; } -void r300_emit_vertex_shader(struct r300_context* r300, - struct r300_vertex_shader* vs) +void r300_emit_vertex_program_code(struct r300_context* r300, + struct r300_vertex_program_code* code, + struct r300_constant_buffer* constants) { int i; struct r300_screen* r300screen = r300_screen(r300->context.screen); - struct r300_constant_buffer* constants = - &r300->shader_constants[PIPE_SHADER_VERTEX]; + unsigned instruction_count = code->length / 4; CS_LOCALS(r300); if (!r300screen->caps->has_tcl) { @@ -395,10 +486,10 @@ void r300_emit_vertex_shader(struct r300_context* r300, return; } - if (constants->count) { - BEGIN_CS(14 + (vs->instruction_count * 4) + (constants->count * 4)); + if (code->constants.Count) { + BEGIN_CS(14 + code->length + (code->constants.Count * 4)); } else { - BEGIN_CS(11 + (vs->instruction_count * 4)); + BEGIN_CS(11 + code->length); } /* R300_VAP_PVS_CODE_CNTL_0 @@ -408,30 +499,27 @@ void r300_emit_vertex_shader(struct r300_context* r300, * XXX these could be optimized to select better values... */ OUT_CS_REG_SEQ(R300_VAP_PVS_CODE_CNTL_0, 3); OUT_CS(R300_PVS_FIRST_INST(0) | - R300_PVS_XYZW_VALID_INST(vs->instruction_count - 1) | - R300_PVS_LAST_INST(vs->instruction_count - 1)); - OUT_CS(R300_PVS_MAX_CONST_ADDR(constants->count - 1)); - OUT_CS(vs->instruction_count - 1); + R300_PVS_XYZW_VALID_INST(instruction_count - 1) | + R300_PVS_LAST_INST(instruction_count - 1)); + OUT_CS(R300_PVS_MAX_CONST_ADDR(code->constants.Count - 1)); + OUT_CS(instruction_count - 1); OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0); - OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, vs->instruction_count * 4); - for (i = 0; i < vs->instruction_count; i++) { - OUT_CS(vs->instructions[i].inst0); - OUT_CS(vs->instructions[i].inst1); - OUT_CS(vs->instructions[i].inst2); - OUT_CS(vs->instructions[i].inst3); - } + OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->length); + for (i = 0; i < code->length; i++) + OUT_CS(code->body.d[i]); - if (constants->count) { + if (code->constants.Count) { OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, (r300screen->caps->is_r500 ? R500_PVS_CONST_START : R300_PVS_CONST_START)); - OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, constants->count * 4); - for (i = 0; i < constants->count; i++) { - OUT_CS_32F(constants->constants[i][0]); - OUT_CS_32F(constants->constants[i][1]); - OUT_CS_32F(constants->constants[i][2]); - OUT_CS_32F(constants->constants[i][3]); + OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->constants.Count * 4); + for (i = 0; i < code->constants.Count; i++) { + const float * data = get_shader_constant(r300, &code->constants.Constants[i], constants); + OUT_CS_32F(data[0]); + OUT_CS_32F(data[1]); + OUT_CS_32F(data[2]); + OUT_CS_32F(data[3]); } } @@ -443,6 +531,12 @@ void r300_emit_vertex_shader(struct r300_context* r300, END_CS; } +void r300_emit_vertex_shader(struct r300_context* r300, + struct r300_vertex_shader* vs) +{ + r300_emit_vertex_program_code(r300, &vs->code, &r300->shader_constants[PIPE_SHADER_VERTEX]); +} + void r300_emit_viewport_state(struct r300_context* r300, struct r300_viewport_state* viewport) { @@ -531,10 +625,11 @@ validate: } else { debug_printf("No VBO while emitting dirty state!\n"); } - if (r300->winsys->validate(r300->winsys)) { + if (!r300->winsys->validate(r300->winsys)) { r300->context.flush(&r300->context, 0, NULL); if (invalid) { /* Well, hell. */ + debug_printf("r300: Stuck in validation loop, gonna quit now."); exit(1); } invalid = TRUE; @@ -563,11 +658,9 @@ validate: if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER) { if (r300screen->caps->is_r500) { - r500_emit_fragment_shader(r300, - (struct r5xx_fragment_shader*)r300->fs); + r500_emit_fragment_program_code(r300, &r300->fs->code, &r300->shader_constants[PIPE_SHADER_FRAGMENT]); } else { - r300_emit_fragment_shader(r300, - (struct r3xx_fragment_shader*)r300->fs); + r300_emit_fragment_program_code(r300, &r300->fs->code, &r300->shader_constants[PIPE_SHADER_FRAGMENT]); } r300->dirty_state &= ~R300_NEW_FRAGMENT_SHADER; } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index fda26f3948..350691d592 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -30,6 +30,9 @@ #include "r300_screen.h" #include "r300_state_inlines.h" +struct rX00_fragment_program_code; +struct r300_vertex_program_code; + void r300_emit_blend_state(struct r300_context* r300, struct r300_blend_state* blend); @@ -42,11 +45,13 @@ void r300_emit_clip_state(struct r300_context* r300, void r300_emit_dsa_state(struct r300_context* r300, struct r300_dsa_state* dsa); -void r300_emit_fragment_shader(struct r300_context* r300, - struct r3xx_fragment_shader* fs); +void r300_emit_fragment_program_code(struct r300_context* r300, + struct rX00_fragment_program_code* generic_code, + struct r300_constant_buffer* externals); -void r500_emit_fragment_shader(struct r300_context* r300, - struct r5xx_fragment_shader* fs); +void r500_emit_fragment_program_code(struct r300_context* r300, + struct rX00_fragment_program_code* generic_code, + struct r300_constant_buffer* externals); void r300_emit_fb_state(struct r300_context* r300, struct pipe_framebuffer_state* fb); @@ -68,6 +73,10 @@ void r300_emit_vertex_buffer(struct r300_context* r300); void r300_emit_vertex_format_state(struct r300_context* r300); +void r300_emit_vertex_program_code(struct r300_context* r300, + struct r300_vertex_program_code* code, + struct r300_constant_buffer* constants); + void r300_emit_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs); diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 4b304306d0..36463b9a2e 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -23,87 +23,115 @@ #include "r300_fs.h" -void r300_translate_fragment_shader(struct r300_context* r300, - struct r300_fragment_shader* fs) +#include "r300_tgsi_to_rc.h" + +#include "radeon_compiler.h" + +static void find_output_registers(struct r300_fragment_program_compiler * compiler, + struct r300_fragment_shader * fs) { - struct tgsi_parse_context parser; - int i; - boolean is_r500 = r300_screen(r300->context.screen)->caps->is_r500; - struct r300_constant_buffer* consts = - &r300->shader_constants[PIPE_SHADER_FRAGMENT]; + unsigned i; - struct r300_fs_asm* assembler = CALLOC_STRUCT(r300_fs_asm); - if (assembler == NULL) { - return; - } - /* Setup starting offset for immediates. */ - assembler->imm_offset = consts->user_count; - /* Enable depth writes, if needed. */ - assembler->writes_depth = fs->info.writes_z; - - /* Make sure we start at the beginning of the shader. */ - if (is_r500) { - ((struct r5xx_fragment_shader*)fs)->instruction_count = 0; - } + /* Mark the outputs as not present initially */ + compiler->OutputColor = fs->info.num_outputs; + compiler->OutputDepth = fs->info.num_outputs; - tgsi_parse_init(&parser, fs->state.tokens); + /* Now see where they really are. */ + for(i = 0; i < fs->info.num_outputs; ++i) { + switch(fs->info.output_semantic_name[i]) { + case TGSI_SEMANTIC_COLOR: + compiler->OutputColor = i; + break; + case TGSI_SEMANTIC_POSITION: + compiler->OutputDepth = i; + break; + } + } +} - while (!tgsi_parse_end_of_tokens(&parser)) { - tgsi_parse_token(&parser); +static void allocate_hardware_inputs( + struct r300_fragment_program_compiler * c, + void (*allocate)(void * data, unsigned input, unsigned hwreg), + void * mydata) +{ + struct tgsi_shader_info* info = &((struct r300_fragment_shader*)c->UserData)->info; + int total_colors = 0; + int colors = 0; + int total_generic = 0; + int generic = 0; + int i; - /* This is seriously the lamest way to create fragment programs ever. - * I blame TGSI. */ - switch (parser.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - /* Allocated registers sitting at the beginning - * of the program. */ - r300_fs_declare(assembler, &parser.FullToken.FullDeclaration); + for (i = 0; i < info->num_inputs; i++) { + switch (info->input_semantic_name[i]) { + case TGSI_SEMANTIC_COLOR: + total_colors++; break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - debug_printf("r300: Emitting immediate to constant buffer, " - "position %d\n", - assembler->imm_offset + assembler->imm_count); - /* I am not amused by the length of these. */ - for (i = 0; i < 4; i++) { - consts->constants[assembler->imm_offset + - assembler->imm_count][i] = - parser.FullToken.FullImmediate.u.ImmediateFloat32[i] - .Float; - } - assembler->imm_count++; + case TGSI_SEMANTIC_FOG: + case TGSI_SEMANTIC_GENERIC: + total_generic++; break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - if (is_r500) { - r5xx_fs_instruction((struct r5xx_fragment_shader*)fs, - assembler, &parser.FullToken.FullInstruction); - } else { - r3xx_fs_instruction((struct r3xx_fragment_shader*)fs, - assembler, &parser.FullToken.FullInstruction); - } + } + } + + for(i = 0; i < info->num_inputs; i++) { + switch (info->input_semantic_name[i]) { + case TGSI_SEMANTIC_COLOR: + allocate(mydata, i, colors); + colors++; + break; + case TGSI_SEMANTIC_FOG: + case TGSI_SEMANTIC_GENERIC: + allocate(mydata, i, total_colors + generic); + generic++; break; } } +} + +void r300_translate_fragment_shader(struct r300_context* r300, + struct r300_fragment_shader* fs) +{ + struct r300_fragment_program_compiler compiler; + struct tgsi_to_rc ttr; + + memset(&compiler, 0, sizeof(compiler)); + rc_init(&compiler.Base); + compiler.Base.Debug = 1; + + compiler.code = &fs->code; + compiler.is_r500 = r300_screen(r300->context.screen)->caps->is_r500; + compiler.AllocateHwInputs = &allocate_hardware_inputs; + compiler.UserData = fs; + + /* TODO: Program compilation depends on texture compare modes, + * which are sampler state. Therefore, programs need to be recompiled + * depending on this state as in the classic Mesa driver. + * + * This is not yet handled correctly. + */ - debug_printf("r300: fs: %d texs and %d colors, first free reg is %d\n", - assembler->tex_count, assembler->color_count, - assembler->tex_count + assembler->color_count); - - consts->count = consts->user_count + assembler->imm_count; - fs->uses_imms = assembler->imm_count; - debug_printf("r300: fs: %d total constants, " - "%d from user and %d from immediates\n", consts->count, - consts->user_count, assembler->imm_count); - r3xx_fs_finalize(fs, assembler); - if (is_r500) { - r5xx_fs_finalize((struct r5xx_fragment_shader*)fs, assembler); + find_output_registers(&compiler, fs); + + if (compiler.Base.Debug) { + debug_printf("r300: Initial fragment program\n"); + tgsi_dump(fs->state.tokens, 0); } - tgsi_dump(fs->state.tokens, 0); - /* XXX finish r300 dumper too */ - if (is_r500) { - r5xx_fs_dump((struct r5xx_fragment_shader*)fs); + /* Translate TGSI to our internal representation */ + ttr.compiler = &compiler.Base; + ttr.info = &fs->info; + + r300_tgsi_to_rc(&ttr, fs->state.tokens); + + /* Invoke the compiler */ + r3xx_compile_fragment_program(&compiler); + if (compiler.Base.Error) { + /* Todo: Fail gracefully */ + fprintf(stderr, "r300 FP: Compiler error\n"); + abort(); } - tgsi_parse_free(&parser); - FREE(assembler); + /* And, finally... */ + rc_destroy(&compiler.Base); + fs->translated = TRUE; } diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h index 18deb7a05e..9fab789402 100644 --- a/src/gallium/drivers/r300/r300_fs.h +++ b/src/gallium/drivers/r300/r300_fs.h @@ -30,6 +30,21 @@ #include "r3xx_fs.h" #include "r5xx_fs.h" +#include "radeon_code.h" + +struct r300_fragment_shader { + /* Parent class */ + struct pipe_shader_state state; + struct tgsi_shader_info info; + + /* Has this shader been translated yet? */ + boolean translated; + + /* Compiled code */ + struct rX00_fragment_program_code code; +}; + + void r300_translate_fragment_shader(struct r300_context* r300, struct r300_fragment_shader* fs); diff --git a/src/gallium/drivers/r300/r300_fs_inlines.h b/src/gallium/drivers/r300/r300_fs_inlines.h deleted file mode 100644 index be4be9465e..0000000000 --- a/src/gallium/drivers/r300/r300_fs_inlines.h +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> - * Joakim Sindholt <opensource@zhasha.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R300_FS_INLINES_H -#define R300_FS_INLINES_H - -#include "tgsi/tgsi_parse.h" - -#include "r300_context.h" -#include "r300_debug.h" -#include "r300_reg.h" -#include "r300_screen.h" -#include "r300_shader_inlines.h" - -/* Temporary struct used to hold assembly state while putting together - * fragment programs. */ -struct r300_fs_asm { - /* Pipe context. */ - struct r300_context* r300; - /* Number of colors. */ - unsigned color_count; - /* Number of texcoords. */ - unsigned tex_count; - /* Offset for temporary registers. Inputs and temporaries have no - * distinguishing markings, so inputs start at 0 and the first usable - * temporary register is after all inputs. */ - unsigned temp_offset; - /* Number of requested temporary registers. */ - unsigned temp_count; - /* Offset for immediate constants. Neither R300 nor R500 can do four - * inline constants per source, so instead we copy immediates into the - * constant buffer. */ - unsigned imm_offset; - /* Number of immediate constants. */ - unsigned imm_count; - /* Are depth writes enabled? */ - boolean writes_depth; - /* Depth write offset. This is the TGSI output that corresponds to - * depth writes. */ - unsigned depth_output; -}; - -static INLINE void r300_fs_declare(struct r300_fs_asm* assembler, - struct tgsi_full_declaration* decl) -{ - switch (decl->Declaration.File) { - case TGSI_FILE_INPUT: - switch (decl->Semantic.SemanticName) { - case TGSI_SEMANTIC_COLOR: - assembler->color_count++; - break; - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_GENERIC: - assembler->tex_count++; - break; - default: - debug_printf("r300: fs: Bad semantic declaration %d\n", - decl->Semantic.SemanticName); - break; - } - break; - case TGSI_FILE_OUTPUT: - /* Depth write. Mark the position of the output so we can - * identify it later. */ - if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) { - assembler->depth_output = decl->DeclarationRange.First; - } - break; - case TGSI_FILE_CONSTANT: - break; - case TGSI_FILE_TEMPORARY: - assembler->temp_count++; - break; - default: - debug_printf("r300: fs: Bad file %d\n", decl->Declaration.File); - break; - } - - assembler->temp_offset = assembler->color_count + assembler->tex_count; -} - -static INLINE unsigned r300_fs_src(struct r300_fs_asm* assembler, - struct tgsi_src_register* src) -{ - switch (src->File) { - case TGSI_FILE_NULL: - return 0; - case TGSI_FILE_INPUT: - /* XXX may be wrong */ - return src->Index; - break; - case TGSI_FILE_TEMPORARY: - return src->Index + assembler->temp_offset; - break; - case TGSI_FILE_IMMEDIATE: - return (src->Index + assembler->imm_offset) | (1 << 8); - break; - case TGSI_FILE_CONSTANT: - /* XXX magic */ - return src->Index | (1 << 8); - break; - default: - debug_printf("r300: fs: Unimplemented src %d\n", src->File); - break; - } - return 0; -} - -static INLINE unsigned r300_fs_dst(struct r300_fs_asm* assembler, - struct tgsi_dst_register* dst) -{ - switch (dst->File) { - case TGSI_FILE_NULL: - /* This happens during KIL instructions. */ - return 0; - break; - case TGSI_FILE_OUTPUT: - return 0; - break; - case TGSI_FILE_TEMPORARY: - return dst->Index + assembler->temp_offset; - break; - default: - debug_printf("r300: fs: Unimplemented dst %d\n", dst->File); - break; - } - return 0; -} - -static INLINE boolean r300_fs_is_depr(struct r300_fs_asm* assembler, - struct tgsi_dst_register* dst) -{ - return (assembler->writes_depth && - (dst->File == TGSI_FILE_OUTPUT) && - (dst->Index == assembler->depth_output)); -} - -#endif /* R300_FS_INLINES_H */ diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index da1d5ffe2f..96a7304621 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -147,6 +147,8 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) case PIPE_CAP_TGSI_CONT_SUPPORTED: /* XXX */ return 0; + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + return 1; default: debug_printf("r300: Implementation error: Bad param %d\n", param); @@ -320,13 +322,14 @@ r300_get_tex_transfer(struct pipe_screen *screen, trans = CALLOC_STRUCT(r300_transfer); if (trans) { pipe_texture_reference(&trans->transfer.texture, texture); - trans->transfer.format = trans->transfer.format; + trans->transfer.format = texture->format; trans->transfer.width = w; trans->transfer.height = h; trans->transfer.block = texture->block; trans->transfer.nblocksx = texture->nblocksx[level]; trans->transfer.nblocksy = texture->nblocksy[level]; - trans->transfer.stride = tex->stride; + trans->transfer.stride = align(pf_get_stride(&trans->transfer.block, + texture->width[level]), 32); trans->transfer.usage = usage; trans->offset = offset; } diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 68da0aa4cb..a02fb34b2a 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -32,6 +32,7 @@ #include "r300_reg.h" #include "r300_state_inlines.h" #include "r300_fs.h" +#include "r300_vs.h" /* r300_state: Functions used to intialize state context by translating * Gallium state objects into semi-native r300 state objects. */ @@ -137,7 +138,6 @@ static void const struct pipe_constant_buffer* buffer) { struct r300_context* r300 = r300_context(pipe); - int i = r300->shader_constants[shader].user_count; /* This entire chunk of code seems ever-so-slightly baked. * It's as if I've got pipe_buffer* matryoshkas... */ @@ -148,26 +148,13 @@ static void map, buffer->buffer->size); pipe->winsys->buffer_unmap(pipe->winsys, buffer->buffer); - r300->shader_constants[shader].user_count = + r300->shader_constants[shader].count = buffer->buffer->size / (sizeof(float) * 4); } else { - r300->shader_constants[shader].user_count = 0; + r300->shader_constants[shader].count = 0; } r300->dirty_state |= R300_NEW_CONSTANTS; - - /* If the number of constants have changed, invalidate the shader. */ - if (r300->shader_constants[shader].user_count != i) { - if (shader == PIPE_SHADER_FRAGMENT && r300->fs && - r300->fs->uses_imms) { - r300->fs->translated = FALSE; - r300_translate_fragment_shader(r300, r300->fs); - } else if (shader == PIPE_SHADER_VERTEX && r300->vs && - r300->vs->uses_imms) { - r300->vs->translated = FALSE; - r300_translate_vertex_shader(r300, r300->vs); - } - } } /* Create a new depth, stencil, and alpha state based on the CSO dsa state. @@ -284,14 +271,9 @@ static void static void* r300_create_fs_state(struct pipe_context* pipe, const struct pipe_shader_state* shader) { - struct r300_context* r300 = r300_context(pipe); struct r300_fragment_shader* fs = NULL; - if (r300_screen(r300->context.screen)->caps->is_r500) { - fs = (struct r300_fragment_shader*)CALLOC_STRUCT(r5xx_fragment_shader); - } else { - fs = (struct r300_fragment_shader*)CALLOC_STRUCT(r3xx_fragment_shader); - } + fs = (struct r300_fragment_shader*)CALLOC_STRUCT(r300_fragment_shader); /* Copy state directly into shader. */ fs->state = *shader; @@ -315,7 +297,6 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) r300_translate_fragment_shader(r300, fs); } - fs->translated = TRUE; r300->fs = fs; r300->dirty_state |= R300_NEW_FRAGMENT_SHADER; @@ -325,6 +306,7 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) static void r300_delete_fs_state(struct pipe_context* pipe, void* shader) { struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader; + rc_constants_destroy(&fs->code.constants); FREE(fs->state.tokens); FREE(shader); } @@ -688,6 +670,7 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader) if (r300_screen(pipe->screen)->caps->has_tcl) { struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader; + rc_constants_destroy(&vs->code.constants); draw_delete_vertex_shader(r300->draw, vs->draw); FREE(vs->state.tokens); FREE(shader); diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 2477b30822..ea670f41fb 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -22,6 +22,9 @@ #include "r300_state_derived.h" +#include "r300_fs.h" +#include "r300_vs.h" + /* r300_state_derived: Various bits of state which are dependent upon * currently bound CSO data. */ diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index 430129d5bd..1e92374a4e 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -34,7 +34,7 @@ void r300_emit_invariant_state(struct r300_context* r300) struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; CS_LOCALS(r300); - BEGIN_CS(22 + (caps->has_tcl ? 2: 0)); + BEGIN_CS(24 + (caps->has_tcl ? 2: 0)); /*** Graphics Backend (GB) ***/ /* Various GB enables */ @@ -56,6 +56,7 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x0); OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x0); OUT_CS_REG(R300_FG_DEPTH_SRC, 0x0); + OUT_CS_REG(R300_US_W_FMT, 0x0); /*** VAP ***/ /* Max and min vertex index clamp. */ @@ -72,7 +73,7 @@ void r300_emit_invariant_state(struct r300_context* r300) END_CS; /* XXX unsorted stuff from surface_fill */ - BEGIN_CS(71 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0)); + BEGIN_CS(64 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0)); /* Flush PVS. */ OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); @@ -132,11 +133,5 @@ void r300_emit_invariant_state(struct r300_context* r300) /* XXX */ OUT_CS_REG(R300_SC_CLIP_RULE, 0xaaaa); - OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4); - OUT_CS(R300_C0_SEL_B | R300_C1_SEL_G | R300_C2_SEL_R | R300_C3_SEL_A); - OUT_CS(R300_US_OUT_FMT_UNUSED); - OUT_CS(R300_US_OUT_FMT_UNUSED); - OUT_CS(R300_US_OUT_FMT_UNUSED); - OUT_CS_REG(R300_US_W_FMT, R300_W_FMT_W0); END_CS; } diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c index fdabe4d9cf..a093f83945 100644 --- a/src/gallium/drivers/r300/r300_surface.c +++ b/src/gallium/drivers/r300/r300_surface.c @@ -37,7 +37,7 @@ static void r300_surface_setup(struct r300_context* r300, r300_emit_dsa_state(r300, &dsa_clear_state); r300_emit_rs_state(r300, &rs_clear_state); - BEGIN_CS(24); + BEGIN_CS(26); /* Viewport setup */ OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); @@ -78,8 +78,10 @@ static void r300_surface_setup(struct r300_context* r300, /* Setup colorbuffer. */ OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1); OUT_CS_RELOC(dest->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); - OUT_CS_REG(R300_RB3D_COLORPITCH0, pixpitch | - r300_translate_colorformat(dest->tex.format)); + OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0, 1); + OUT_CS_RELOC(dest->buffer, pixpitch | + r300_translate_colorformat(dest->tex.format), 0, + RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0xf); END_CS; @@ -125,9 +127,10 @@ validate: r300->context.flush(&r300->context, 0, NULL); goto validate; } - if (r300->winsys->validate(r300->winsys)) { + if (!r300->winsys->validate(r300->winsys)) { r300->context.flush(&r300->context, 0, NULL); if (invalid) { + debug_printf("r300: Stuck in validation loop, gonna fallback."); goto fallback; } invalid = TRUE; @@ -138,10 +141,14 @@ validate: /* Vertex shader setup */ if (caps->has_tcl) { - r300_emit_vertex_shader(r300, &r300_passthrough_vertex_shader); + r300_emit_vertex_program_code(r300, &r300_passthrough_vertex_shader, 0); } else { BEGIN_CS(4); - OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VAP_TCL_BYPASS); + OUT_CS_REG(R300_VAP_CNTL_STATUS, +#ifdef PIPE_ARCH_BIG_ENDIAN + R300_VC_32BIT_SWAP | +#endif + R300_VAP_TCL_BYPASS); OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(5) | R300_PVS_NUM_CNTLRS(5) | R300_PVS_NUM_FPUS(caps->num_vert_fpus) | @@ -151,10 +158,10 @@ validate: /* Fragment shader setup */ if (caps->is_r500) { - r500_emit_fragment_shader(r300, &r5xx_passthrough_fragment_shader); + r500_emit_fragment_program_code(r300, &r5xx_passthrough_fragment_shader, 0); r300_emit_rs_block_state(r300, &r5xx_rs_block_clear_state); } else { - r300_emit_fragment_shader(r300, &r3xx_passthrough_fragment_shader); + r300_emit_fragment_program_code(r300, &r3xx_passthrough_fragment_shader, 0); r300_emit_rs_block_state(r300, &r3xx_rs_block_clear_state); } @@ -256,9 +263,10 @@ validate: r300->context.flush(&r300->context, 0, NULL); goto validate; } - if (r300->winsys->validate(r300->winsys)) { + if (!r300->winsys->validate(r300->winsys)) { r300->context.flush(&r300->context, 0, NULL); if (invalid) { + debug_printf("r300: Stuck in validation loop, gonna fallback."); goto fallback; } invalid = TRUE; @@ -275,10 +283,14 @@ validate: /* Vertex shader setup */ if (caps->has_tcl) { - r300_emit_vertex_shader(r300, &r300_passthrough_vertex_shader); + r300_emit_vertex_program_code(r300, &r300_passthrough_vertex_shader, 0); } else { BEGIN_CS(4); - OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VAP_TCL_BYPASS); + OUT_CS_REG(R300_VAP_CNTL_STATUS, +#ifdef PIPE_ARCH_BIG_ENDIAN + R300_VC_32BIT_SWAP | +#endif + R300_VAP_TCL_BYPASS); OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(5) | R300_PVS_NUM_CNTLRS(5) | R300_PVS_NUM_FPUS(caps->num_vert_fpus) | @@ -288,10 +300,10 @@ validate: /* Fragment shader setup */ if (caps->is_r500) { - r500_emit_fragment_shader(r300, &r5xx_texture_fragment_shader); + r500_emit_fragment_program_code(r300, &r5xx_texture_fragment_shader, 0); r300_emit_rs_block_state(r300, &r5xx_rs_block_copy_state); } else { - r300_emit_fragment_shader(r300, &r3xx_texture_fragment_shader); + r300_emit_fragment_program_code(r300, &r3xx_texture_fragment_shader, 0); r300_emit_rs_block_state(r300, &r3xx_rs_block_copy_state); } diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 11c7858d42..590052509c 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -22,13 +22,6 @@ #include "r300_texture.h" -/* XXX maths need to go to util */ - -static int minify(int i) -{ - return MAX2(1, i >> 1); -} - static void r300_setup_texture_state(struct r300_texture* tex, unsigned width, unsigned height, @@ -55,6 +48,9 @@ static void r300_setup_texture_state(struct r300_texture* tex, if (height > 2048) { state->format2 |= R500_TXHEIGHT_BIT11; } + + debug_printf("r300: Set texture state (%dx%d, pitch %d, %d levels)\n", + width, height, pitch, levels); } static void r300_setup_miptree(struct r300_texture* tex) @@ -71,19 +67,23 @@ static void r300_setup_miptree(struct r300_texture* tex) } base->nblocksx[i] = pf_get_nblocksx(&base->block, base->width[i]); - base->nblocksy[i] = pf_get_nblocksy(&base->block, base->width[i]); + base->nblocksy[i] = pf_get_nblocksy(&base->block, base->height[i]); /* Radeons enjoy things in multiples of 64. * * XXX * POT, uncompressed, unmippmapped textures can be aligned to 32, * instead of 64. */ - stride = align(base->nblocksx[i] * base->block.size, 64); + stride = align(pf_get_stride(&base->block, base->width[i]), 32); size = stride * base->nblocksy[i] * base->depth[i]; - tex->offset[i] = align(tex->size, 64); + tex->offset[i] = align(tex->size, 32); tex->size = tex->offset[i] + size; + debug_printf("r300: Texture miptree: Level %d " + "(%dx%dx%d px, pitch %d bytes)\n", + i, base->width[i], base->height[i], base->depth[i], + stride); /* Save stride of first level to the texture. */ if (i == 0) { tex->stride = stride; @@ -111,7 +111,7 @@ static struct pipe_texture* r300_setup_texture_state(tex, template->width[0], template->height[0], template->width[0], template->last_level); - tex->buffer = screen->buffer_create(screen, 64, + tex->buffer = screen->buffer_create(screen, 1024, PIPE_BUFFER_USAGE_PIXEL, tex->size); diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c new file mode 100644 index 0000000000..3adbb715f3 --- /dev/null +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -0,0 +1,337 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_tgsi_to_rc.h" + +#include "radeon_compiler.h" +#include "radeon_program.h" + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" +#include "tgsi/tgsi_util.h" + + +static unsigned translate_opcode(unsigned opcode) +{ + switch(opcode) { + case TGSI_OPCODE_ARL: return OPCODE_ARL; + case TGSI_OPCODE_MOV: return OPCODE_MOV; + case TGSI_OPCODE_LIT: return OPCODE_LIT; + case TGSI_OPCODE_RCP: return OPCODE_RCP; + case TGSI_OPCODE_RSQ: return OPCODE_RSQ; + case TGSI_OPCODE_EXP: return OPCODE_EXP; + case TGSI_OPCODE_LOG: return OPCODE_LOG; + case TGSI_OPCODE_MUL: return OPCODE_MUL; + case TGSI_OPCODE_ADD: return OPCODE_ADD; + case TGSI_OPCODE_DP3: return OPCODE_DP3; + case TGSI_OPCODE_DP4: return OPCODE_DP4; + case TGSI_OPCODE_DST: return OPCODE_DST; + case TGSI_OPCODE_MIN: return OPCODE_MIN; + case TGSI_OPCODE_MAX: return OPCODE_MAX; + case TGSI_OPCODE_SLT: return OPCODE_SLT; + case TGSI_OPCODE_SGE: return OPCODE_SGE; + case TGSI_OPCODE_MAD: return OPCODE_MAD; + case TGSI_OPCODE_SUB: return OPCODE_SUB; + case TGSI_OPCODE_LRP: return OPCODE_LRP; + /* case TGSI_OPCODE_CND: return OPCODE_CND; */ + /* case TGSI_OPCODE_CND0: return OPCODE_CND0; */ + case TGSI_OPCODE_DP2A: return OPCODE_DP2A; + /* gap */ + case TGSI_OPCODE_FRC: return OPCODE_FRC; + /* case TGSI_OPCODE_CLAMP: return OPCODE_CLAMP; */ + case TGSI_OPCODE_FLR: return OPCODE_FLR; + /* case TGSI_OPCODE_ROUND: return OPCODE_ROUND; */ + case TGSI_OPCODE_EX2: return OPCODE_EX2; + case TGSI_OPCODE_LG2: return OPCODE_LG2; + case TGSI_OPCODE_POW: return OPCODE_POW; + case TGSI_OPCODE_XPD: return OPCODE_XPD; + /* gap */ + case TGSI_OPCODE_ABS: return OPCODE_ABS; + case TGSI_OPCODE_RCC: return OPCODE_RCC; + case TGSI_OPCODE_DPH: return OPCODE_DPH; + case TGSI_OPCODE_COS: return OPCODE_COS; + case TGSI_OPCODE_DDX: return OPCODE_DDX; + case TGSI_OPCODE_DDY: return OPCODE_DDY; + /* case TGSI_OPCODE_KILP: return OPCODE_KILP; */ + case TGSI_OPCODE_PK2H: return OPCODE_PK2H; + case TGSI_OPCODE_PK2US: return OPCODE_PK2US; + case TGSI_OPCODE_PK4B: return OPCODE_PK4B; + case TGSI_OPCODE_PK4UB: return OPCODE_PK4UB; + case TGSI_OPCODE_RFL: return OPCODE_RFL; + case TGSI_OPCODE_SEQ: return OPCODE_SEQ; + case TGSI_OPCODE_SFL: return OPCODE_SFL; + case TGSI_OPCODE_SGT: return OPCODE_SGT; + case TGSI_OPCODE_SIN: return OPCODE_SIN; + case TGSI_OPCODE_SLE: return OPCODE_SLE; + case TGSI_OPCODE_SNE: return OPCODE_SNE; + case TGSI_OPCODE_STR: return OPCODE_STR; + case TGSI_OPCODE_TEX: return OPCODE_TEX; + case TGSI_OPCODE_TXD: return OPCODE_TXD; + case TGSI_OPCODE_TXP: return OPCODE_TXP; + case TGSI_OPCODE_UP2H: return OPCODE_UP2H; + case TGSI_OPCODE_UP2US: return OPCODE_UP2US; + case TGSI_OPCODE_UP4B: return OPCODE_UP4B; + case TGSI_OPCODE_UP4UB: return OPCODE_UP4UB; + case TGSI_OPCODE_X2D: return OPCODE_X2D; + case TGSI_OPCODE_ARA: return OPCODE_ARA; + case TGSI_OPCODE_ARR: return OPCODE_ARR; + case TGSI_OPCODE_BRA: return OPCODE_BRA; + case TGSI_OPCODE_CAL: return OPCODE_CAL; + case TGSI_OPCODE_RET: return OPCODE_RET; + case TGSI_OPCODE_SSG: return OPCODE_SSG; + case TGSI_OPCODE_CMP: return OPCODE_CMP; + case TGSI_OPCODE_SCS: return OPCODE_SCS; + case TGSI_OPCODE_TXB: return OPCODE_TXB; + /* case TGSI_OPCODE_NRM: return OPCODE_NRM; */ + /* case TGSI_OPCODE_DIV: return OPCODE_DIV; */ + case TGSI_OPCODE_DP2: return OPCODE_DP2; + case TGSI_OPCODE_TXL: return OPCODE_TXL; + case TGSI_OPCODE_BRK: return OPCODE_BRK; + case TGSI_OPCODE_IF: return OPCODE_IF; + /* case TGSI_OPCODE_LOOP: return OPCODE_LOOP; */ + /* case TGSI_OPCODE_REP: return OPCODE_REP; */ + case TGSI_OPCODE_ELSE: return OPCODE_ELSE; + case TGSI_OPCODE_ENDIF: return OPCODE_ENDIF; + case TGSI_OPCODE_ENDLOOP: return OPCODE_ENDLOOP; + /* case TGSI_OPCODE_ENDREP: return OPCODE_ENDREP; */ + case TGSI_OPCODE_PUSHA: return OPCODE_PUSHA; + case TGSI_OPCODE_POPA: return OPCODE_POPA; + /* case TGSI_OPCODE_CEIL: return OPCODE_CEIL; */ + /* case TGSI_OPCODE_I2F: return OPCODE_I2F; */ + case TGSI_OPCODE_NOT: return OPCODE_NOT; + case TGSI_OPCODE_TRUNC: return OPCODE_TRUNC; + /* case TGSI_OPCODE_SHL: return OPCODE_SHL; */ + /* case TGSI_OPCODE_SHR: return OPCODE_SHR; */ + case TGSI_OPCODE_AND: return OPCODE_AND; + case TGSI_OPCODE_OR: return OPCODE_OR; + /* case TGSI_OPCODE_MOD: return OPCODE_MOD; */ + case TGSI_OPCODE_XOR: return OPCODE_XOR; + /* case TGSI_OPCODE_SAD: return OPCODE_SAD; */ + /* case TGSI_OPCODE_TXF: return OPCODE_TXF; */ + /* case TGSI_OPCODE_TXQ: return OPCODE_TXQ; */ + case TGSI_OPCODE_CONT: return OPCODE_CONT; + /* case TGSI_OPCODE_EMIT: return OPCODE_EMIT; */ + /* case TGSI_OPCODE_ENDPRIM: return OPCODE_ENDPRIM; */ + /* case TGSI_OPCODE_BGNLOOP2: return OPCODE_BGNLOOP2; */ + case TGSI_OPCODE_BGNSUB: return OPCODE_BGNSUB; + /* case TGSI_OPCODE_ENDLOOP2: return OPCODE_ENDLOOP2; */ + case TGSI_OPCODE_ENDSUB: return OPCODE_ENDSUB; + case TGSI_OPCODE_NOISE1: return OPCODE_NOISE1; + case TGSI_OPCODE_NOISE2: return OPCODE_NOISE2; + case TGSI_OPCODE_NOISE3: return OPCODE_NOISE3; + case TGSI_OPCODE_NOISE4: return OPCODE_NOISE4; + case TGSI_OPCODE_NOP: return OPCODE_NOP; + /* gap */ + case TGSI_OPCODE_NRM4: return OPCODE_NRM4; + /* case TGSI_OPCODE_CALLNZ: return OPCODE_CALLNZ; */ + /* case TGSI_OPCODE_IFC: return OPCODE_IFC; */ + /* case TGSI_OPCODE_BREAKC: return OPCODE_BREAKC; */ + case TGSI_OPCODE_KIL: return OPCODE_KIL; + case TGSI_OPCODE_END: return OPCODE_END; + case TGSI_OPCODE_SWZ: return OPCODE_SWZ; + } + + fprintf(stderr, "Unknown opcode: %i\n", opcode); + abort(); +} + +static unsigned translate_saturate(unsigned saturate) +{ + switch(saturate) { + case TGSI_SAT_NONE: return SATURATE_OFF; + case TGSI_SAT_ZERO_ONE: return SATURATE_ZERO_ONE; + case TGSI_SAT_MINUS_PLUS_ONE: return SATURATE_PLUS_MINUS_ONE; + } + + fprintf(stderr, "Unknown saturate mode: %i\n", saturate); + abort(); +} + +static unsigned translate_register_file(unsigned file) +{ + switch(file) { + case TGSI_FILE_CONSTANT: return PROGRAM_CONSTANT; + case TGSI_FILE_IMMEDIATE: return PROGRAM_CONSTANT; + case TGSI_FILE_INPUT: return PROGRAM_INPUT; + case TGSI_FILE_OUTPUT: return PROGRAM_OUTPUT; + case TGSI_FILE_TEMPORARY: return PROGRAM_TEMPORARY; + case TGSI_FILE_ADDRESS: return PROGRAM_ADDRESS; + } + + fprintf(stderr, "Unhandled register file: %i\n", file); + abort(); +} + +static int translate_register_index( + struct tgsi_to_rc * ttr, + unsigned file, + int index) +{ + if (file == TGSI_FILE_IMMEDIATE) + return ttr->immediate_offset + index; + + return index; +} + +static void transform_dstreg( + struct tgsi_to_rc * ttr, + struct prog_dst_register * dst, + struct tgsi_full_dst_register * src) +{ + dst->File = translate_register_file(src->DstRegister.File); + dst->Index = translate_register_index(ttr, src->DstRegister.File, src->DstRegister.Index); + dst->WriteMask = src->DstRegister.WriteMask; + dst->RelAddr = src->DstRegister.Indirect; +} + +static void transform_srcreg( + struct tgsi_to_rc * ttr, + struct prog_src_register * dst, + struct tgsi_full_src_register * src) +{ + dst->File = translate_register_file(src->SrcRegister.File); + dst->Index = translate_register_index(ttr, src->SrcRegister.File, src->SrcRegister.Index); + dst->RelAddr = src->SrcRegister.Indirect; + dst->Swizzle = tgsi_util_get_full_src_register_extswizzle(src, 0); + dst->Swizzle |= tgsi_util_get_full_src_register_extswizzle(src, 1) << 3; + dst->Swizzle |= tgsi_util_get_full_src_register_extswizzle(src, 2) << 6; + dst->Swizzle |= tgsi_util_get_full_src_register_extswizzle(src, 3) << 9; + dst->Abs = src->SrcRegisterExtMod.Absolute; + dst->Negate = + src->SrcRegisterExtSwz.NegateX | + (src->SrcRegisterExtSwz.NegateY << 1) | + (src->SrcRegisterExtSwz.NegateZ << 2) | + (src->SrcRegisterExtSwz.NegateW << 3); + dst->Negate ^= src->SrcRegister.Negate ? NEGATE_XYZW : 0; +} + +static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_ext_texture src) +{ + switch(src.Texture) { + case TGSI_TEXTURE_1D: + dst->I.TexSrcTarget = TEXTURE_1D_INDEX; + break; + case TGSI_TEXTURE_2D: + dst->I.TexSrcTarget = TEXTURE_2D_INDEX; + break; + case TGSI_TEXTURE_3D: + dst->I.TexSrcTarget = TEXTURE_3D_INDEX; + break; + case TGSI_TEXTURE_CUBE: + dst->I.TexSrcTarget = TEXTURE_CUBE_INDEX; + break; + case TGSI_TEXTURE_RECT: + dst->I.TexSrcTarget = TEXTURE_RECT_INDEX; + break; + case TGSI_TEXTURE_SHADOW1D: + dst->I.TexSrcTarget = TEXTURE_1D_INDEX; + dst->I.TexShadow = 1; + break; + case TGSI_TEXTURE_SHADOW2D: + dst->I.TexSrcTarget = TEXTURE_2D_INDEX; + dst->I.TexShadow = 1; + break; + case TGSI_TEXTURE_SHADOWRECT: + dst->I.TexSrcTarget = TEXTURE_RECT_INDEX; + dst->I.TexShadow = 1; + break; + } +} + +static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_instruction * src) +{ + if (src->Instruction.Opcode == TGSI_OPCODE_END) + return; + + struct rc_instruction * dst = rc_insert_new_instruction(ttr->compiler, ttr->compiler->Program.Instructions.Prev); + int i; + + dst->I.Opcode = translate_opcode(src->Instruction.Opcode); + dst->I.SaturateMode = translate_saturate(src->Instruction.Saturate); + + if (src->Instruction.NumDstRegs) + transform_dstreg(ttr, &dst->I.DstReg, &src->FullDstRegisters[0]); + + for(i = 0; i < src->Instruction.NumSrcRegs; ++i) { + if (src->FullSrcRegisters[i].SrcRegister.File == TGSI_FILE_SAMPLER) + dst->I.TexSrcUnit = src->FullSrcRegisters[i].SrcRegister.Index; + else + transform_srcreg(ttr, &dst->I.SrcReg[i], &src->FullSrcRegisters[i]); + } + + /* Texturing. */ + transform_texture(dst, src->InstructionExtTexture); +} + +static void handle_immediate(struct tgsi_to_rc * ttr, struct tgsi_full_immediate * imm) +{ + struct rc_constant constant; + int i; + + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.Size = 4; + for(i = 0; i < 4; ++i) + constant.u.Immediate[i] = imm->u[i].Float; + rc_constants_add(&ttr->compiler->Program.Constants, &constant); +} + +void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens) +{ + struct tgsi_parse_context parser; + int i; + + /* Allocate constants placeholders. + * + * Note: What if declared constants are not contiguous? */ + for(i = 0; i <= ttr->info->file_max[TGSI_FILE_CONSTANT]; ++i) { + struct rc_constant constant; + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_EXTERNAL; + constant.Size = 4; + constant.u.External = i; + rc_constants_add(&ttr->compiler->Program.Constants, &constant); + } + + ttr->immediate_offset = ttr->compiler->Program.Constants.Count; + + tgsi_parse_init(&parser, tokens); + + while (!tgsi_parse_end_of_tokens(&parser)) { + tgsi_parse_token(&parser); + + switch (parser.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: + handle_immediate(ttr, &parser.FullToken.FullImmediate); + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + transform_instruction(ttr, &parser.FullToken.FullInstruction); + break; + } + } + + tgsi_parse_free(&parser); + + rc_calculate_inputs_outputs(ttr->compiler); +} + diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.h b/src/gallium/drivers/r300/r300_tgsi_to_rc.h new file mode 100644 index 0000000000..93e90ec6d2 --- /dev/null +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.h @@ -0,0 +1,41 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_TGSI_TO_RC_H +#define R300_TGSI_TO_RC_H + +struct radeon_compiler; + +struct tgsi_full_declaration; +struct tgsi_shader_info; +struct tgsi_token; + +struct tgsi_to_rc { + struct radeon_compiler * compiler; + const struct tgsi_shader_info * info; + + int immediate_offset; +}; + +void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens); + +#endif /* R300_TGSI_TO_RC_H */ diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index f87435f9f0..2cb903bba2 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -22,391 +22,213 @@ #include "r300_vs.h" -static void r300_vs_declare(struct r300_vs_asm* assembler, - struct tgsi_full_declaration* decl) -{ - switch (decl->Declaration.File) { - case TGSI_FILE_INPUT: - break; - case TGSI_FILE_OUTPUT: - switch (decl->Semantic.SemanticName) { - case TGSI_SEMANTIC_POSITION: - assembler->tab[decl->DeclarationRange.First] = 0; - break; - case TGSI_SEMANTIC_COLOR: - assembler->tab[decl->DeclarationRange.First] = - (assembler->point_size ? 1 : 0) + - assembler->out_colors; - break; - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_GENERIC: - /* XXX multiple? */ - assembler->tab[decl->DeclarationRange.First] = - (assembler->point_size ? 1 : 0) + - assembler->out_colors + - assembler->out_texcoords; - break; - case TGSI_SEMANTIC_PSIZE: - assembler->tab[decl->DeclarationRange.First] = 1; - break; - default: - debug_printf("r300: vs: Bad semantic declaration %d\n", - decl->Semantic.SemanticName); - break; - } - break; - case TGSI_FILE_CONSTANT: - break; - case TGSI_FILE_TEMPORARY: - assembler->temp_count++; - break; - default: - debug_printf("r300: vs: Bad file %d\n", decl->Declaration.File); - break; - } -} +#include "r300_context.h" +#include "r300_tgsi_to_rc.h" -static INLINE unsigned r300_vs_src_type(struct r300_vs_asm* assembler, - struct tgsi_src_register* src) -{ - switch (src->File) { - case TGSI_FILE_NULL: - case TGSI_FILE_INPUT: - /* Probably a zero or one swizzle */ - return R300_PVS_SRC_REG_INPUT; - case TGSI_FILE_TEMPORARY: - return R300_PVS_SRC_REG_TEMPORARY; - case TGSI_FILE_CONSTANT: - case TGSI_FILE_IMMEDIATE: - return R300_PVS_SRC_REG_CONSTANT; - default: - debug_printf("r300: vs: Unimplemented src type %d\n", src->File); - break; - } - return 0; -} +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" -static INLINE unsigned r300_vs_src(struct r300_vs_asm* assembler, - struct tgsi_src_register* src) -{ - switch (src->File) { - case TGSI_FILE_NULL: - case TGSI_FILE_INPUT: - case TGSI_FILE_TEMPORARY: - case TGSI_FILE_CONSTANT: - return src->Index; - case TGSI_FILE_IMMEDIATE: - return src->Index + assembler->imm_offset; - default: - debug_printf("r300: vs: Unimplemented src type %d\n", src->File); - break; - } - return 0; -} +#include "radeon_compiler.h" -static INLINE unsigned r300_vs_dst_type(struct r300_vs_asm* assembler, - struct tgsi_dst_register* dst) -{ - switch (dst->File) { - case TGSI_FILE_TEMPORARY: - return R300_PVS_DST_REG_TEMPORARY; - case TGSI_FILE_OUTPUT: - return R300_PVS_DST_REG_OUT; - default: - debug_printf("r300: vs: Unimplemented dst type %d\n", dst->File); - break; - } - return 0; -} -static INLINE unsigned r300_vs_dst(struct r300_vs_asm* assembler, - struct tgsi_dst_register* dst) +static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) { - switch (dst->File) { - case TGSI_FILE_TEMPORARY: - return dst->Index; - case TGSI_FILE_OUTPUT: - return assembler->tab[dst->Index]; - default: - debug_printf("r300: vs: Unimplemented dst %d\n", dst->File); - break; - } - return 0; -} + struct r300_vertex_shader * vs = c->UserData; + struct tgsi_shader_info* info = &vs->info; + boolean pointsize = false; + int out_colors = 0; + int colors = 0; + int out_generic = 0; + int generic = 0; + int i; -static uint32_t r300_vs_op(unsigned op) -{ - switch (op) { - case TGSI_OPCODE_DP3: - case TGSI_OPCODE_DP4: - return R300_VE_DOT_PRODUCT; - case TGSI_OPCODE_MUL: - return R300_VE_MULTIPLY; - case TGSI_OPCODE_ADD: - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SUB: - case TGSI_OPCODE_SWZ: - return R300_VE_ADD; - case TGSI_OPCODE_MAX: - return R300_VE_MAXIMUM; - case TGSI_OPCODE_SLT: - return R300_VE_SET_LESS_THAN; - case TGSI_OPCODE_RSQ: - return R300_PVS_DST_MATH_INST | R300_ME_RECIP_DX; - case TGSI_OPCODE_MAD: - return R300_PVS_DST_MACRO_INST | R300_PVS_MACRO_OP_2CLK_MADD; - default: - break; - } - return 0; -} + /* Fill in the input mapping */ + for (i = 0; i < info->num_inputs; i++) + c->code->inputs[i] = i; -static uint32_t r300_vs_swiz(struct tgsi_full_src_register* reg) -{ - if (reg->SrcRegister.Extended) { - return (reg->SrcRegister.Negate ? (0xf << 12) : 0) | - reg->SrcRegisterExtSwz.ExtSwizzleX | - (reg->SrcRegisterExtSwz.ExtSwizzleY << 3) | - (reg->SrcRegisterExtSwz.ExtSwizzleZ << 6) | - (reg->SrcRegisterExtSwz.ExtSwizzleW << 9); - } else { - return (reg->SrcRegister.Negate ? (0xf << 12) : 0) | - reg->SrcRegister.SwizzleX | - (reg->SrcRegister.SwizzleY << 3) | - (reg->SrcRegister.SwizzleZ << 6) | - (reg->SrcRegister.SwizzleW << 9); + /* Fill in the output mapping */ + for (i = 0; i < info->num_outputs; i++) { + switch (info->output_semantic_name[i]) { + case TGSI_SEMANTIC_PSIZE: + pointsize = true; + break; + case TGSI_SEMANTIC_COLOR: + out_colors++; + break; + case TGSI_SEMANTIC_FOG: + case TGSI_SEMANTIC_GENERIC: + out_generic++; + break; + } } -} -/* XXX icky icky icky icky */ -static uint32_t r300_vs_scalar_swiz(struct tgsi_full_src_register* reg) -{ - if (reg->SrcRegister.Extended) { - return (reg->SrcRegister.Negate ? (0xf << 12) : 0) | - reg->SrcRegisterExtSwz.ExtSwizzleX | - (reg->SrcRegisterExtSwz.ExtSwizzleX << 3) | - (reg->SrcRegisterExtSwz.ExtSwizzleX << 6) | - (reg->SrcRegisterExtSwz.ExtSwizzleX << 9); - } else { - return (reg->SrcRegister.Negate ? (0xf << 12) : 0) | - reg->SrcRegister.SwizzleX | - (reg->SrcRegister.SwizzleX << 3) | - (reg->SrcRegister.SwizzleX << 6) | - (reg->SrcRegister.SwizzleX << 9); - } -} + struct tgsi_parse_context parser; -/* XXX scalar stupidity */ -static void r300_vs_emit_inst(struct r300_vertex_shader* vs, - struct r300_vs_asm* assembler, - struct tgsi_full_src_register* src, - struct tgsi_full_dst_register* dst, - unsigned op, - unsigned count, - boolean is_scalar) -{ - int i = vs->instruction_count; - vs->instructions[i].inst0 = R300_PVS_DST_OPCODE(r300_vs_op(op)) | - R300_PVS_DST_REG_TYPE(r300_vs_dst_type(assembler, &dst->DstRegister)) | - R300_PVS_DST_OFFSET(r300_vs_dst(assembler, &dst->DstRegister)) | - R300_PVS_DST_WE(dst->DstRegister.WriteMask); - switch (count) { - case 3: - vs->instructions[i].inst3 = - R300_PVS_SRC_REG_TYPE(r300_vs_src_type(assembler, - &src[2].SrcRegister)) | - R300_PVS_SRC_OFFSET(r300_vs_src(assembler, - &src[2].SrcRegister)) | - R300_PVS_SRC_SWIZZLE(r300_vs_swiz(&src[2])); - /* Fall through */ - case 2: - vs->instructions[i].inst2 = - R300_PVS_SRC_REG_TYPE(r300_vs_src_type(assembler, - &src[1].SrcRegister)) | - R300_PVS_SRC_OFFSET(r300_vs_src(assembler, - &src[1].SrcRegister)) | - R300_PVS_SRC_SWIZZLE(r300_vs_swiz(&src[1])); - /* Fall through */ - case 1: - vs->instructions[i].inst1 = - R300_PVS_SRC_REG_TYPE(r300_vs_src_type(assembler, - &src[0].SrcRegister)) | - R300_PVS_SRC_OFFSET(r300_vs_src(assembler, - &src[0].SrcRegister)) | - /* XXX the icky, it burns */ - R300_PVS_SRC_SWIZZLE(is_scalar ? r300_vs_scalar_swiz(&src[0]) - : r300_vs_swiz(&src[0])); - break; - } - vs->instruction_count++; -} + tgsi_parse_init(&parser, vs->state.tokens); -static void r300_vs_instruction(struct r300_vertex_shader* vs, - struct r300_vs_asm* assembler, - struct tgsi_full_instruction* inst) -{ - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_RSQ: - r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, - 1, TRUE); - break; - case TGSI_OPCODE_SUB: - inst->FullSrcRegisters[1].SrcRegister.Negate = - !inst->FullSrcRegisters[1].SrcRegister.Negate; - /* Fall through */ - case TGSI_OPCODE_ADD: - case TGSI_OPCODE_MUL: - case TGSI_OPCODE_MAX: - case TGSI_OPCODE_SLT: - r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, - 2, FALSE); - break; - case TGSI_OPCODE_DP3: - /* Set alpha swizzle to zero for src0 and src1 */ - if (!inst->FullSrcRegisters[0].SrcRegister.Extended) { - inst->FullSrcRegisters[0].SrcRegister.Extended = TRUE; - inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleX = - inst->FullSrcRegisters[0].SrcRegister.SwizzleX; - inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleY = - inst->FullSrcRegisters[0].SrcRegister.SwizzleY; - inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleZ = - inst->FullSrcRegisters[0].SrcRegister.SwizzleZ; - } - inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleW = - TGSI_EXTSWIZZLE_ZERO; - if (!inst->FullSrcRegisters[1].SrcRegister.Extended) { - inst->FullSrcRegisters[1].SrcRegister.Extended = TRUE; - inst->FullSrcRegisters[1].SrcRegisterExtSwz.ExtSwizzleX = - inst->FullSrcRegisters[1].SrcRegister.SwizzleX; - inst->FullSrcRegisters[1].SrcRegisterExtSwz.ExtSwizzleY = - inst->FullSrcRegisters[1].SrcRegister.SwizzleY; - inst->FullSrcRegisters[1].SrcRegisterExtSwz.ExtSwizzleZ = - inst->FullSrcRegisters[1].SrcRegister.SwizzleZ; - } - inst->FullSrcRegisters[1].SrcRegisterExtSwz.ExtSwizzleW = - TGSI_EXTSWIZZLE_ZERO; - /* Fall through */ - case TGSI_OPCODE_DP4: - r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, - 2, FALSE); - break; - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: - inst->FullSrcRegisters[1] = r300_constant_zero; - r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, - 2, FALSE); - break; - case TGSI_OPCODE_MAD: - r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, - 3, FALSE); - break; - case TGSI_OPCODE_END: - break; - default: - debug_printf("r300: vs: Bad opcode %d\n", - inst->Instruction.Opcode); - break; - } -} + while (!tgsi_parse_end_of_tokens(&parser)) { + tgsi_parse_token(&parser); -static void r300_vs_init(struct r300_vertex_shader* vs, - struct r300_vs_asm* assembler) -{ - struct tgsi_shader_info* info = &vs->info; - int i; + if (parser.FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION) + continue; - for (i = 0; i < info->num_outputs; i++) { - switch (info->output_semantic_name[i]) { + struct tgsi_full_declaration * decl = &parser.FullToken.FullDeclaration; + + if (decl->Declaration.File != TGSI_FILE_OUTPUT) + continue; + + switch (decl->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + c->code->outputs[decl->DeclarationRange.First] = 0; + break; case TGSI_SEMANTIC_PSIZE: - assembler->point_size = TRUE; + c->code->outputs[decl->DeclarationRange.First] = 1; break; case TGSI_SEMANTIC_COLOR: - assembler->out_colors++; + c->code->outputs[decl->DeclarationRange.First] = 1 + + (pointsize ? 1 : 0) + + colors++; break; case TGSI_SEMANTIC_FOG: case TGSI_SEMANTIC_GENERIC: - assembler->out_texcoords++; + c->code->outputs[decl->DeclarationRange.First] = 1 + + (pointsize ? 1 : 0) + + out_colors + + generic++; + break; + default: + debug_printf("r300: vs: Bad semantic declaration %d\n", + decl->Semantic.SemanticName); break; } } - vs->instruction_count = 0; + tgsi_parse_free(&parser); } + void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs) { - struct tgsi_parse_context parser; - int i; - struct r300_constant_buffer* consts = - &r300->shader_constants[PIPE_SHADER_VERTEX]; + struct r300_vertex_program_compiler compiler; + struct tgsi_to_rc ttr; - struct r300_vs_asm* assembler = CALLOC_STRUCT(r300_vs_asm); - if (assembler == NULL) { - return; - } + /* Setup the compiler */ + rc_init(&compiler.Base); - /* Init assembler. */ - r300_vs_init(vs, assembler); + compiler.Base.Debug = 1; + compiler.code = &vs->code; + compiler.UserData = vs; - /* Setup starting offset for immediates. */ - assembler->imm_offset = consts->user_count; + if (compiler.Base.Debug) { + debug_printf("r300: Initial vertex program\n"); + tgsi_dump(vs->state.tokens, 0); + } - tgsi_parse_init(&parser, vs->state.tokens); + /* Translate TGSI to our internal representation */ + ttr.compiler = &compiler.Base; + ttr.info = &vs->info; - while (!tgsi_parse_end_of_tokens(&parser)) { - tgsi_parse_token(&parser); + r300_tgsi_to_rc(&ttr, vs->state.tokens); - /* This is seriously the lamest way to create fragment programs ever. - * I blame TGSI. */ - switch (parser.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - /* Allocated registers sitting at the beginning - * of the program. */ - r300_vs_declare(assembler, &parser.FullToken.FullDeclaration); - break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - debug_printf("r300: Emitting immediate to constant buffer, " - "position %d\n", - assembler->imm_offset + assembler->imm_count); - /* I am not amused by the length of these. */ - for (i = 0; i < 4; i++) { - consts->constants[assembler->imm_offset + - assembler->imm_count][i] = - parser.FullToken.FullImmediate.u.ImmediateFloat32[i] - .Float; - } - assembler->imm_count++; - break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - r300_vs_instruction(vs, assembler, - &parser.FullToken.FullInstruction); - break; - } - } + compiler.RequiredOutputs = ~(~0 << vs->info.num_outputs); + compiler.SetHwInputOutput = &set_vertex_inputs_outputs; - debug_printf("r300: vs: %d texs and %d colors, first free reg is %d\n", - assembler->tex_count, assembler->color_count, - assembler->tex_count + assembler->color_count); + /* Invoke the compiler */ + r3xx_compile_vertex_program(&compiler); + if (compiler.Base.Error) { + /* Todo: Fail gracefully */ + fprintf(stderr, "r300 VP: Compiler error\n"); + abort(); + } - consts->count = consts->user_count + assembler->imm_count; - vs->uses_imms = assembler->imm_count; - debug_printf("r300: vs: %d total constants, " - "%d from user and %d from immediates\n", consts->count, - consts->user_count, assembler->imm_count); + /* And, finally... */ + rc_destroy(&compiler.Base); + vs->translated = TRUE; +} - debug_printf("r300: vs: tab: %d %d %d %d\n", assembler->tab[0], - assembler->tab[1], assembler->tab[2], assembler->tab[3]); - tgsi_dump(vs->state.tokens, 0); - /* XXX finish r300 vertex shader dumper */ - r300_vs_dump(vs); +/* XXX get these to r300_reg */ +#define R300_PVS_DST_OPCODE(x) ((x) << 0) +# define R300_VE_DOT_PRODUCT 1 +# define R300_VE_MULTIPLY 2 +# define R300_VE_ADD 3 +# define R300_VE_MAXIMUM 7 +# define R300_VE_SET_LESS_THAN 10 +#define R300_PVS_DST_MATH_INST (1 << 6) +# define R300_ME_RECIP_DX 6 +#define R300_PVS_DST_MACRO_INST (1 << 7) +# define R300_PVS_MACRO_OP_2CLK_MADD 0 +#define R300_PVS_DST_REG_TYPE(x) ((x) << 8) +# define R300_PVS_DST_REG_TEMPORARY 0 +# define R300_PVS_DST_REG_A0 1 +# define R300_PVS_DST_REG_OUT 2 +# define R300_PVS_DST_REG_OUT_REPL_X 3 +# define R300_PVS_DST_REG_ALT_TEMPORARY 4 +# define R300_PVS_DST_REG_INPUT 5 +#define R300_PVS_DST_OFFSET(x) ((x) << 13) +#define R300_PVS_DST_WE(x) ((x) << 20) +#define R300_PVS_DST_WE_XYZW (0xf << 20) + +#define R300_PVS_SRC_REG_TYPE(x) ((x) << 0) +# define R300_PVS_SRC_REG_TEMPORARY 0 +# define R300_PVS_SRC_REG_INPUT 1 +# define R300_PVS_SRC_REG_CONSTANT 2 +# define R300_PVS_SRC_REG_ALT_TEMPORARY 3 +#define R300_PVS_SRC_OFFSET(x) ((x) << 5) +#define R300_PVS_SRC_SWIZZLE(x) ((x) << 13) +# define R300_PVS_SRC_SELECT_X 0 +# define R300_PVS_SRC_SELECT_Y 1 +# define R300_PVS_SRC_SELECT_Z 2 +# define R300_PVS_SRC_SELECT_W 3 +# define R300_PVS_SRC_SELECT_FORCE_0 4 +# define R300_PVS_SRC_SELECT_FORCE_1 5 +# define R300_PVS_SRC_SWIZZLE_XYZW \ + ((R300_PVS_SRC_SELECT_X | (R300_PVS_SRC_SELECT_Y << 3) | \ + (R300_PVS_SRC_SELECT_Z << 6) | (R300_PVS_SRC_SELECT_W << 9)) << 13) +# define R300_PVS_SRC_SWIZZLE_ZERO \ + ((R300_PVS_SRC_SELECT_FORCE_0 | (R300_PVS_SRC_SELECT_FORCE_0 << 3) | \ + (R300_PVS_SRC_SELECT_FORCE_0 << 6) | \ + (R300_PVS_SRC_SELECT_FORCE_0 << 9)) << 13) +# define R300_PVS_SRC_SWIZZLE_ONE \ + ((R300_PVS_SRC_SELECT_FORCE_1 | (R300_PVS_SRC_SELECT_FORCE_1 << 3) | \ + (R300_PVS_SRC_SELECT_FORCE_1 << 6) | \ + (R300_PVS_SRC_SELECT_FORCE_1 << 9)) << 13) +#define R300_PVS_MODIFIER_X (1 << 25) +#define R300_PVS_MODIFIER_Y (1 << 26) +#define R300_PVS_MODIFIER_Z (1 << 27) +#define R300_PVS_MODIFIER_W (1 << 28) +#define R300_PVS_NEGATE_XYZW \ + (R300_PVS_MODIFIER_X | R300_PVS_MODIFIER_Y | \ + R300_PVS_MODIFIER_Z | R300_PVS_MODIFIER_W) + +struct r300_vertex_program_code r300_passthrough_vertex_shader = { + .length = 8, /* two instructions */ + + /* MOV out[0], in[0] */ + .body.d[0] = R300_PVS_DST_OPCODE(R300_VE_ADD) | + R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | + R300_PVS_DST_OFFSET(0) | R300_PVS_DST_WE_XYZW, + .body.d[1] = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | + R300_PVS_SRC_OFFSET(0) | R300_PVS_SRC_SWIZZLE_XYZW, + .body.d[2] = R300_PVS_SRC_SWIZZLE_ZERO, + .body.d[3] = 0x0, + + /* MOV out[1], in[1] */ + .body.d[4] = R300_PVS_DST_OPCODE(R300_VE_ADD) | + R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | + R300_PVS_DST_OFFSET(1) | R300_PVS_DST_WE_XYZW, + .body.d[5] = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | + R300_PVS_SRC_OFFSET(1) | R300_PVS_SRC_SWIZZLE_XYZW, + .body.d[6] = R300_PVS_SRC_SWIZZLE_ZERO, + .body.d[7] = 0x0, + + .inputs[0] = 0, + .inputs[1] = 1, + .outputs[0] = 0, + .outputs[1] = 1, + + .InputsRead = 3, + .OutputsWritten = 3 +}; - tgsi_parse_free(&parser); - FREE(assembler); -} diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 165d717812..2a4ce315e3 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -23,134 +23,31 @@ #ifndef R300_VS_H #define R300_VS_H -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_dump.h" +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" -#include "r300_context.h" -#include "r300_debug.h" -#include "r300_reg.h" -#include "r300_screen.h" -#include "r300_shader_inlines.h" +#include "radeon_code.h" -/* XXX get these to r300_reg */ -#define R300_PVS_DST_OPCODE(x) ((x) << 0) -# define R300_VE_DOT_PRODUCT 1 -# define R300_VE_MULTIPLY 2 -# define R300_VE_ADD 3 -# define R300_VE_MAXIMUM 7 -# define R300_VE_SET_LESS_THAN 10 -#define R300_PVS_DST_MATH_INST (1 << 6) -# define R300_ME_RECIP_DX 6 -#define R300_PVS_DST_MACRO_INST (1 << 7) -# define R300_PVS_MACRO_OP_2CLK_MADD 0 -#define R300_PVS_DST_REG_TYPE(x) ((x) << 8) -# define R300_PVS_DST_REG_TEMPORARY 0 -# define R300_PVS_DST_REG_A0 1 -# define R300_PVS_DST_REG_OUT 2 -# define R300_PVS_DST_REG_OUT_REPL_X 3 -# define R300_PVS_DST_REG_ALT_TEMPORARY 4 -# define R300_PVS_DST_REG_INPUT 5 -#define R300_PVS_DST_OFFSET(x) ((x) << 13) -#define R300_PVS_DST_WE(x) ((x) << 20) -#define R300_PVS_DST_WE_XYZW (0xf << 20) +struct r300_context; -#define R300_PVS_SRC_REG_TYPE(x) ((x) << 0) -# define R300_PVS_SRC_REG_TEMPORARY 0 -# define R300_PVS_SRC_REG_INPUT 1 -# define R300_PVS_SRC_REG_CONSTANT 2 -# define R300_PVS_SRC_REG_ALT_TEMPORARY 3 -#define R300_PVS_SRC_OFFSET(x) ((x) << 5) -#define R300_PVS_SRC_SWIZZLE(x) ((x) << 13) -# define R300_PVS_SRC_SELECT_X 0 -# define R300_PVS_SRC_SELECT_Y 1 -# define R300_PVS_SRC_SELECT_Z 2 -# define R300_PVS_SRC_SELECT_W 3 -# define R300_PVS_SRC_SELECT_FORCE_0 4 -# define R300_PVS_SRC_SELECT_FORCE_1 5 -# define R300_PVS_SRC_SWIZZLE_XYZW \ - ((R300_PVS_SRC_SELECT_X | (R300_PVS_SRC_SELECT_Y << 3) | \ - (R300_PVS_SRC_SELECT_Z << 6) | (R300_PVS_SRC_SELECT_W << 9)) << 13) -# define R300_PVS_SRC_SWIZZLE_ZERO \ - ((R300_PVS_SRC_SELECT_FORCE_0 | (R300_PVS_SRC_SELECT_FORCE_0 << 3) | \ - (R300_PVS_SRC_SELECT_FORCE_0 << 6) | \ - (R300_PVS_SRC_SELECT_FORCE_0 << 9)) << 13) -# define R300_PVS_SRC_SWIZZLE_ONE \ - ((R300_PVS_SRC_SELECT_FORCE_1 | (R300_PVS_SRC_SELECT_FORCE_1 << 3) | \ - (R300_PVS_SRC_SELECT_FORCE_1 << 6) | \ - (R300_PVS_SRC_SELECT_FORCE_1 << 9)) << 13) -#define R300_PVS_MODIFIER_X (1 << 25) -#define R300_PVS_MODIFIER_Y (1 << 26) -#define R300_PVS_MODIFIER_Z (1 << 27) -#define R300_PVS_MODIFIER_W (1 << 28) -#define R300_PVS_NEGATE_XYZW \ - (R300_PVS_MODIFIER_X | R300_PVS_MODIFIER_Y | \ - R300_PVS_MODIFIER_Z | R300_PVS_MODIFIER_W) +struct r300_vertex_shader { + /* Parent class */ + struct pipe_shader_state state; + struct tgsi_shader_info info; -/* Temporary struct used to hold assembly state while putting together - * fragment programs. */ -struct r300_vs_asm { - /* Pipe context. */ - struct r300_context* r300; - /* Number of colors. */ - unsigned color_count; - /* Number of texcoords. */ - unsigned tex_count; - /* Number of requested temporary registers. */ - unsigned temp_count; - /* Offset for immediate constants. Neither R300 nor R500 can do four - * inline constants per source, so instead we copy immediates into the - * constant buffer. */ - unsigned imm_offset; - /* Number of immediate constants. */ - unsigned imm_count; - /* Number of colors to write. */ - unsigned out_colors; - /* Number of texcoords to write. */ - unsigned out_texcoords; - /* Whether to emit point size. */ - boolean point_size; - /* Tab of declared outputs to OVM outputs. */ - unsigned tab[16]; -}; + /* Fallback shader, because Draw has issues */ + struct draw_vertex_shader* draw; -static struct r300_vertex_shader r300_passthrough_vertex_shader = { - /* XXX translate these back into normal instructions */ - .instruction_count = 2, - .instructions[0].inst0 = R300_PVS_DST_OPCODE(R300_VE_ADD) | - R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | - R300_PVS_DST_OFFSET(0) | R300_PVS_DST_WE_XYZW, - .instructions[0].inst1 = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | - R300_PVS_SRC_OFFSET(0) | R300_PVS_SRC_SWIZZLE_XYZW, - .instructions[0].inst2 = R300_PVS_SRC_SWIZZLE_ZERO, - .instructions[0].inst3 = 0x0, - .instructions[1].inst0 = R300_PVS_DST_OPCODE(R300_VE_ADD) | - R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | - R300_PVS_DST_OFFSET(1) | R300_PVS_DST_WE_XYZW, - .instructions[1].inst1 = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | - R300_PVS_SRC_OFFSET(1) | R300_PVS_SRC_SWIZZLE_XYZW, - .instructions[1].inst2 = R300_PVS_SRC_SWIZZLE_ZERO, - .instructions[1].inst3 = 0x0, -}; + /* Has this shader been translated yet? */ + boolean translated; -static struct r300_vertex_shader r300_texture_vertex_shader = { - /* XXX translate these back into normal instructions */ - .instruction_count = 2, - .instructions[0].inst0 = R300_PVS_DST_OPCODE(R300_VE_ADD) | - R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | - R300_PVS_DST_OFFSET(0) | R300_PVS_DST_WE_XYZW, - .instructions[0].inst1 = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | - R300_PVS_SRC_OFFSET(0) | R300_PVS_SRC_SWIZZLE_XYZW, - .instructions[0].inst2 = R300_PVS_SRC_SWIZZLE_ZERO, - .instructions[0].inst3 = 0x0, - .instructions[1].inst0 = R300_PVS_DST_OPCODE(R300_VE_ADD) | - R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | - R300_PVS_DST_OFFSET(1) | R300_PVS_DST_WE_XYZW, - .instructions[1].inst1 = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | - R300_PVS_SRC_OFFSET(1) | R300_PVS_SRC_SWIZZLE_XYZW, - .instructions[1].inst2 = R300_PVS_SRC_SWIZZLE_ZERO, - .instructions[1].inst3 = 0x0, + /* Machine code (if translated) */ + struct r300_vertex_program_code code; }; + +extern struct r300_vertex_program_code r300_passthrough_vertex_shader; + void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs); diff --git a/src/gallium/drivers/r300/r3xx_fs.c b/src/gallium/drivers/r300/r3xx_fs.c index 6e05d76977..c1c1194d58 100644 --- a/src/gallium/drivers/r300/r3xx_fs.c +++ b/src/gallium/drivers/r300/r3xx_fs.c @@ -23,74 +23,52 @@ #include "r3xx_fs.h" -static INLINE uint32_t r3xx_rgb_op(unsigned op) -{ - switch (op) { - case TGSI_OPCODE_MOV: - return R300_ALU_OUTC_CMP; - default: - return 0; - } -} +#include "r300_reg.h" -static INLINE uint32_t r3xx_alpha_op(unsigned op) -{ - switch (op) { - case TGSI_OPCODE_MOV: - return R300_ALU_OUTA_CMP; - default: - return 0; - } -} +struct rX00_fragment_program_code r3xx_passthrough_fragment_shader = { + .code.r300.alu.length = 1, + .code.r300.tex.length = 0, -static INLINE void r3xx_emit_maths(struct r3xx_fragment_shader* fs, - struct r300_fs_asm* assembler, - struct tgsi_full_src_register* src, - struct tgsi_full_dst_register* dst, - unsigned op, - unsigned count) -{ - int i = fs->alu_instruction_count; + .code.r300.config = 0, + .code.r300.pixsize = 0, + .code.r300.code_offset = 0, + .code.r300.code_addr[3] = R300_RGBA_OUT, - fs->instructions[i].alu_rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) | + .code.r300.alu.inst[0].rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) | R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) | R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) | - r3xx_rgb_op(op); - fs->instructions[i].alu_rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) | - R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ; - fs->instructions[i].alu_alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) | + R300_ALU_OUTC_CMP, + .code.r300.alu.inst[0].rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) | + R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ, + .code.r300.alu.inst[0].alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) | R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) | R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) | - r3xx_alpha_op(op); - fs->instructions[i].alu_alpha_addr = R300_ALPHA_ADDR0(0) | - R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT; + R300_ALU_OUTA_CMP, + .code.r300.alu.inst[0].alpha_addr = R300_ALPHA_ADDR0(0) | + R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT, +}; - fs->alu_instruction_count++; -} +struct rX00_fragment_program_code r3xx_texture_fragment_shader = { + .code.r300.alu.length = 1, + .code.r300.tex.length = 1, -void r3xx_fs_finalize(struct r300_fragment_shader* fs, - struct r300_fs_asm* assembler) -{ - fs->stack_size = assembler->temp_count + assembler->temp_offset + 1; -} + .code.r300.config = R300_PFS_CNTL_FIRST_NODE_HAS_TEX, + .code.r300.pixsize = 0, + .code.r300.code_offset = 0, + .code.r300.code_addr[3] = R300_RGBA_OUT, -void r3xx_fs_instruction(struct r3xx_fragment_shader* fs, - struct r300_fs_asm* assembler, - struct tgsi_full_instruction* inst) -{ - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_MOV: - /* src0 -> src1 and src2 forced to zero */ - inst->FullSrcRegisters[1] = inst->FullSrcRegisters[0]; - inst->FullSrcRegisters[2] = r300_constant_zero; - r3xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); - break; - case TGSI_OPCODE_END: - break; - default: - debug_printf("r300: fs: Bad opcode %d\n", - inst->Instruction.Opcode); - break; - } -} + .code.r300.tex.inst[0] = R300_TEX_OP_LD << R300_TEX_INST_SHIFT, + + .code.r300.alu.inst[0].rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) | + R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) | + R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) | + R300_ALU_OUTC_CMP, + .code.r300.alu.inst[0].rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) | + R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ, + .code.r300.alu.inst[0].alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) | + R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) | + R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) | + R300_ALU_OUTA_CMP, + .code.r300.alu.inst[0].alpha_addr = R300_ALPHA_ADDR0(0) | + R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT, +}; diff --git a/src/gallium/drivers/r300/r3xx_fs.h b/src/gallium/drivers/r300/r3xx_fs.h index 3da39ec252..51cd245724 100644 --- a/src/gallium/drivers/r300/r3xx_fs.h +++ b/src/gallium/drivers/r300/r3xx_fs.h @@ -24,53 +24,9 @@ #ifndef R3XX_FS_H #define R3XX_FS_H -#include "r300_fs_inlines.h" +#include "radeon_code.h" -static struct r3xx_fragment_shader r3xx_passthrough_fragment_shader = { - .alu_instruction_count = 1, - .tex_instruction_count = 0, - .indirections = 0, - .shader.stack_size = 1, - - .instructions[0].alu_rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) | - R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) | - R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) | - R300_ALU_OUTC_CMP, - .instructions[0].alu_rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) | - R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ, - .instructions[0].alu_alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) | - R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) | - R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) | - R300_ALU_OUTA_CMP, - .instructions[0].alu_alpha_addr = R300_ALPHA_ADDR0(0) | - R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT, -}; - -static struct r3xx_fragment_shader r3xx_texture_fragment_shader = { - .alu_instruction_count = 1, - .tex_instruction_count = 0, - .indirections = 0, - .shader.stack_size = 1, - - .instructions[0].alu_rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) | - R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) | - R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) | - R300_ALU_OUTC_CMP, - .instructions[0].alu_rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) | - R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ, - .instructions[0].alu_alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) | - R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) | - R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) | - R300_ALU_OUTA_CMP, - .instructions[0].alu_alpha_addr = R300_ALPHA_ADDR0(0) | - R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT, -}; - -void r3xx_fs_finalize(struct r300_fragment_shader* fs, - struct r300_fs_asm* assembler); - -void r3xx_fs_instruction(struct r3xx_fragment_shader* fs, - struct r300_fs_asm* assembler, - struct tgsi_full_instruction* inst); +struct rX00_fragment_program_code r3xx_passthrough_fragment_shader; +struct rX00_fragment_program_code r3xx_texture_fragment_shader; #endif /* R3XX_FS_H */ diff --git a/src/gallium/drivers/r300/r5xx_fs.c b/src/gallium/drivers/r300/r5xx_fs.c index 99d826278c..f072deab0d 100644 --- a/src/gallium/drivers/r300/r5xx_fs.c +++ b/src/gallium/drivers/r300/r5xx_fs.c @@ -23,445 +23,103 @@ #include "r5xx_fs.h" -static INLINE unsigned r5xx_fix_swiz(unsigned s) -{ - /* For historical reasons, the swizzle values x, y, z, w, and 0 are - * equivalent to the actual machine code, but 1 is not. Thus, we just - * adjust it a bit... */ - if (s == TGSI_EXTSWIZZLE_ONE) { - return R500_SWIZZLE_ONE; - } else { - return s; - } -} - -static uint32_t r5xx_rgba_swiz(struct tgsi_full_src_register* reg) -{ - if (reg->SrcRegister.Extended) { - return r5xx_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleX) | - (r5xx_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleY) << 3) | - (r5xx_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleZ) << 6) | - (r5xx_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleW) << 9); - } else { - return reg->SrcRegister.SwizzleX | - (reg->SrcRegister.SwizzleY << 3) | - (reg->SrcRegister.SwizzleZ << 6) | - (reg->SrcRegister.SwizzleW << 9); - } -} - -static uint32_t r5xx_strq_swiz(struct tgsi_full_src_register* reg) -{ - return reg->SrcRegister.SwizzleX | - (reg->SrcRegister.SwizzleY << 2) | - (reg->SrcRegister.SwizzleZ << 4) | - (reg->SrcRegister.SwizzleW << 6); -} - -static INLINE uint32_t r5xx_rgb_swiz(struct tgsi_full_src_register* reg) -{ - /* Only the first 9 bits... */ - return (r5xx_rgba_swiz(reg) & 0x1ff) | - (reg->SrcRegister.Negate ? (1 << 9) : 0) | - (reg->SrcRegisterExtMod.Absolute ? (1 << 10) : 0); -} - -static INLINE uint32_t r5xx_alpha_swiz(struct tgsi_full_src_register* reg) -{ - /* Only the last 3 bits... */ - return (r5xx_rgba_swiz(reg) >> 9) | - (reg->SrcRegister.Negate ? (1 << 9) : 0) | - (reg->SrcRegisterExtMod.Absolute ? (1 << 10) : 0); -} - -static INLINE uint32_t r5xx_rgba_op(unsigned op) -{ - switch (op) { - case TGSI_OPCODE_COS: - case TGSI_OPCODE_EX2: - case TGSI_OPCODE_LG2: - case TGSI_OPCODE_RCP: - case TGSI_OPCODE_RSQ: - case TGSI_OPCODE_SIN: - return R500_ALU_RGBA_OP_SOP; - case TGSI_OPCODE_DDX: - return R500_ALU_RGBA_OP_MDH; - case TGSI_OPCODE_DDY: - return R500_ALU_RGBA_OP_MDV; - case TGSI_OPCODE_FRC: - return R500_ALU_RGBA_OP_FRC; - case TGSI_OPCODE_DP3: - return R500_ALU_RGBA_OP_DP3; - case TGSI_OPCODE_DP4: - case TGSI_OPCODE_DPH: - return R500_ALU_RGBA_OP_DP4; - case TGSI_OPCODE_ABS: - case TGSI_OPCODE_CMP: - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: - return R500_ALU_RGBA_OP_CMP; - case TGSI_OPCODE_ADD: - case TGSI_OPCODE_MAD: - case TGSI_OPCODE_MUL: - case TGSI_OPCODE_SUB: - return R500_ALU_RGBA_OP_MAD; - default: - return 0; - } -} - -static INLINE uint32_t r5xx_alpha_op(unsigned op) -{ - switch (op) { - case TGSI_OPCODE_COS: - return R500_ALPHA_OP_COS; - case TGSI_OPCODE_EX2: - return R500_ALPHA_OP_EX2; - case TGSI_OPCODE_LG2: - return R500_ALPHA_OP_LN2; - case TGSI_OPCODE_RCP: - return R500_ALPHA_OP_RCP; - case TGSI_OPCODE_RSQ: - return R500_ALPHA_OP_RSQ; - case TGSI_OPCODE_FRC: - return R500_ALPHA_OP_FRC; - case TGSI_OPCODE_SIN: - return R500_ALPHA_OP_SIN; - case TGSI_OPCODE_DDX: - return R500_ALPHA_OP_MDH; - case TGSI_OPCODE_DDY: - return R500_ALPHA_OP_MDV; - case TGSI_OPCODE_DP3: - case TGSI_OPCODE_DP4: - case TGSI_OPCODE_DPH: - return R500_ALPHA_OP_DP; - case TGSI_OPCODE_ABS: - case TGSI_OPCODE_CMP: - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: - return R500_ALPHA_OP_CMP; - case TGSI_OPCODE_ADD: - case TGSI_OPCODE_MAD: - case TGSI_OPCODE_MUL: - case TGSI_OPCODE_SUB: - return R500_ALPHA_OP_MAD; - default: - return 0; - } -} - -static INLINE uint32_t r5xx_tex_op(unsigned op) -{ - switch (op) { - case TGSI_OPCODE_KIL: - return R500_TEX_INST_TEXKILL; - case TGSI_OPCODE_TEX: - return R500_TEX_INST_LD; - case TGSI_OPCODE_TXB: - return R500_TEX_INST_LODBIAS; - case TGSI_OPCODE_TXP: - return R500_TEX_INST_PROJ; - default: - return 0; - } -} - -/* Setup an ALU operation. */ -static INLINE void r5xx_emit_maths(struct r5xx_fragment_shader* fs, - struct r300_fs_asm* assembler, - struct tgsi_full_src_register* src, - struct tgsi_full_dst_register* dst, - unsigned op, - unsigned count) -{ - int i = fs->instruction_count; - - if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { - fs->instructions[i].inst0 = R500_INST_TYPE_OUT; - if (r300_fs_is_depr(assembler, dst)) { - fs->instructions[i].inst4 = R500_W_OMASK; - } else { - fs->instructions[i].inst0 |= - R500_ALU_OMASK(dst->DstRegister.WriteMask); - } - } else { - fs->instructions[i].inst0 = R500_INST_TYPE_ALU | - R500_ALU_WMASK(dst->DstRegister.WriteMask); - } - - fs->instructions[i].inst0 |= R500_INST_TEX_SEM_WAIT; - - fs->instructions[i].inst4 |= - R500_ALPHA_ADDRD(r300_fs_dst(assembler, &dst->DstRegister)); - fs->instructions[i].inst5 = - R500_ALU_RGBA_ADDRD(r300_fs_dst(assembler, &dst->DstRegister)); - - switch (count) { - case 3: - fs->instructions[i].inst1 = - R500_RGB_ADDR2(r300_fs_src(assembler, &src[2].SrcRegister)); - fs->instructions[i].inst2 = - R500_ALPHA_ADDR2(r300_fs_src(assembler, &src[2].SrcRegister)); - fs->instructions[i].inst5 |= - R500_ALU_RGBA_SEL_C_SRC2 | - R500_SWIZ_RGBA_C(r5xx_rgb_swiz(&src[2])) | - R500_ALU_RGBA_ALPHA_SEL_C_SRC2 | - R500_SWIZ_ALPHA_C(r5xx_alpha_swiz(&src[2])); - case 2: - fs->instructions[i].inst1 |= - R500_RGB_ADDR1(r300_fs_src(assembler, &src[1].SrcRegister)); - fs->instructions[i].inst2 |= - R500_ALPHA_ADDR1(r300_fs_src(assembler, &src[1].SrcRegister)); - fs->instructions[i].inst3 = - R500_ALU_RGB_SEL_B_SRC1 | - R500_SWIZ_RGB_B(r5xx_rgb_swiz(&src[1])); - fs->instructions[i].inst4 |= - R500_ALPHA_SEL_B_SRC1 | - R500_SWIZ_ALPHA_B(r5xx_alpha_swiz(&src[1])); - case 1: - case 0: - default: - fs->instructions[i].inst1 |= - R500_RGB_ADDR0(r300_fs_src(assembler, &src[0].SrcRegister)); - fs->instructions[i].inst2 |= - R500_ALPHA_ADDR0(r300_fs_src(assembler, &src[0].SrcRegister)); - fs->instructions[i].inst3 |= - R500_ALU_RGB_SEL_A_SRC0 | - R500_SWIZ_RGB_A(r5xx_rgb_swiz(&src[0])); - fs->instructions[i].inst4 |= - R500_ALPHA_SEL_A_SRC0 | - R500_SWIZ_ALPHA_A(r5xx_alpha_swiz(&src[0])); - break; - } - - fs->instructions[i].inst4 |= r5xx_alpha_op(op); - fs->instructions[i].inst5 |= r5xx_rgba_op(op); - - fs->instruction_count++; -} - -static INLINE void r5xx_emit_tex(struct r5xx_fragment_shader* fs, - struct r300_fs_asm* assembler, - struct tgsi_full_src_register* src, - struct tgsi_full_dst_register* dst, - uint32_t op) -{ - int i = fs->instruction_count; - - fs->instructions[i].inst0 = R500_INST_TYPE_TEX | - R500_TEX_WMASK(dst->DstRegister.WriteMask) | - R500_INST_TEX_SEM_WAIT; - fs->instructions[i].inst1 = R500_TEX_ID(0) | - R500_TEX_SEM_ACQUIRE | //R500_TEX_IGNORE_UNCOVERED | - r5xx_tex_op(op); - fs->instructions[i].inst2 = - R500_TEX_SRC_ADDR(r300_fs_src(assembler, &src->SrcRegister)) | - R500_SWIZ_TEX_STRQ(r5xx_strq_swiz(src)) | - R500_TEX_DST_ADDR(r300_fs_dst(assembler, &dst->DstRegister)) | +#include "r300_reg.h" + +/* XXX this all should find its way back to r300_reg */ +/* Swizzle tools */ +#define R500_SWIZZLE_ZERO 4 +#define R500_SWIZZLE_HALF 5 +#define R500_SWIZZLE_ONE 6 +#define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6)) +#define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6)) +#define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6)) +#define R500_SWIZ_MOD_NEG 1 +#define R500_SWIZ_MOD_ABS 2 +#define R500_SWIZ_MOD_NEG_ABS 3 +/* Swizzles for inst2 */ +#define R500_SWIZ_TEX_STRQ(x) ((x) << 8) +#define R500_SWIZ_TEX_RGBA(x) ((x) << 24) +/* Swizzles for inst3 */ +#define R500_SWIZ_RGB_A(x) ((x) << 2) +#define R500_SWIZ_RGB_B(x) ((x) << 15) +/* Swizzles for inst4 */ +#define R500_SWIZ_ALPHA_A(x) ((x) << 14) +#define R500_SWIZ_ALPHA_B(x) ((x) << 21) +/* Swizzle for inst5 */ +#define R500_SWIZ_RGBA_C(x) ((x) << 14) +#define R500_SWIZ_ALPHA_C(x) ((x) << 27) +/* Writemasks */ +#define R500_TEX_WMASK(x) ((x) << 11) +#define R500_ALU_WMASK(x) ((x) << 11) +#define R500_ALU_OMASK(x) ((x) << 15) +#define R500_W_OMASK (1 << 31) + +struct rX00_fragment_program_code r5xx_passthrough_fragment_shader = { + .code.r500.max_temp_idx = 0, + .code.r500.inst_end = 0, + + .code.r500.inst[0].inst0 = R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | R500_INST_LAST | + R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK | + R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP, + .code.r500.inst[0].inst1 = + R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST | + R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST, + .code.r500.inst[0].inst2 = + R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST | + R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST, + .code.r500.inst[0].inst3 = + R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R | + R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B, + .code.r500.inst[0].inst4 = + R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A, + .code.r500.inst[0].inst5 = + R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 | + R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 | + R500_ALU_RGBA_A_SWIZ_0, +}; + +struct rX00_fragment_program_code r5xx_texture_fragment_shader = { + .code.r500.max_temp_idx = 0, + .code.r500.inst_end = 1, + + .code.r500.inst[0].inst0 = R500_INST_TYPE_TEX | + R500_INST_TEX_SEM_WAIT | + R500_INST_RGB_WMASK_RGB | R500_INST_ALPHA_WMASK | + R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP, + .code.r500.inst[0].inst1 = R500_TEX_ID(0) | R500_TEX_INST_LD | + R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED, + .code.r500.inst[0].inst2 = R500_TEX_SRC_ADDR(0) | + R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G | + R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A | + R500_TEX_DST_ADDR(0) | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G | - R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; - - if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { - fs->instructions[i].inst2 |= - R500_TEX_DST_ADDR(assembler->temp_count + - assembler->temp_offset); - - fs->instruction_count++; - - /* Setup and emit a MOV. */ - src[0].SrcRegister.Index = assembler->temp_count; - src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - - src[1] = src[0]; - src[2] = r300_constant_zero; - r5xx_emit_maths(fs, assembler, src, dst, TGSI_OPCODE_MOV, 3); - } else { - fs->instruction_count++; - } -} - -void r5xx_fs_finalize(struct r5xx_fragment_shader* fs, - struct r300_fs_asm* assembler) -{ - /* XXX should this just go with OPCODE_END? */ - fs->instructions[fs->instruction_count - 1].inst0 |= - R500_INST_LAST; -} - -void r5xx_fs_instruction(struct r5xx_fragment_shader* fs, - struct r300_fs_asm* assembler, - struct tgsi_full_instruction* inst) -{ - /* Switch between opcodes. When possible, prefer using the official - * AMD/ATI names for opcodes, please, as it facilitates using the - * documentation. */ - switch (inst->Instruction.Opcode) { - /* XXX trig needs extra prep */ - case TGSI_OPCODE_COS: - case TGSI_OPCODE_SIN: - /* The simple scalar ops. */ - case TGSI_OPCODE_EX2: - case TGSI_OPCODE_LG2: - case TGSI_OPCODE_RCP: - case TGSI_OPCODE_RSQ: - /* Copy red swizzle to alpha for src0 */ - inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleW = - inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleX; - inst->FullSrcRegisters[0].SrcRegister.SwizzleW = - inst->FullSrcRegisters[0].SrcRegister.SwizzleX; - /* Fall through */ - case TGSI_OPCODE_DDX: - case TGSI_OPCODE_DDY: - case TGSI_OPCODE_FRC: - r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, 1); - break; - - /* The dot products. */ - case TGSI_OPCODE_DPH: - /* Set alpha swizzle to one for src0 */ - if (!inst->FullSrcRegisters[0].SrcRegister.Extended) { - inst->FullSrcRegisters[0].SrcRegister.Extended = TRUE; - inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleX = - inst->FullSrcRegisters[0].SrcRegister.SwizzleX; - inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleY = - inst->FullSrcRegisters[0].SrcRegister.SwizzleY; - inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleZ = - inst->FullSrcRegisters[0].SrcRegister.SwizzleZ; - } - inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleW = - TGSI_EXTSWIZZLE_ONE; - /* Fall through */ - case TGSI_OPCODE_DP3: - case TGSI_OPCODE_DP4: - r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, 2); - break; - - /* Simple three-source operations. */ - case TGSI_OPCODE_CMP: - /* Swap src0 and src2 */ - inst->FullSrcRegisters[3] = inst->FullSrcRegisters[2]; - inst->FullSrcRegisters[2] = inst->FullSrcRegisters[0]; - inst->FullSrcRegisters[0] = inst->FullSrcRegisters[3]; - r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); - break; - - /* The MAD variants. */ - case TGSI_OPCODE_SUB: - /* Just like ADD, but flip the negation on src1 first */ - inst->FullSrcRegisters[1].SrcRegister.Negate = - !inst->FullSrcRegisters[1].SrcRegister.Negate; - /* Fall through */ - case TGSI_OPCODE_ADD: - /* Force src0 to one, move all registers over */ - inst->FullSrcRegisters[2] = inst->FullSrcRegisters[1]; - inst->FullSrcRegisters[1] = inst->FullSrcRegisters[0]; - inst->FullSrcRegisters[0] = r300_constant_one; - r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); - break; - case TGSI_OPCODE_MUL: - /* Force our src2 to zero */ - inst->FullSrcRegisters[2] = r300_constant_zero; - r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); - break; - case TGSI_OPCODE_MAD: - r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); - break; - - /* The MOV variants. */ - case TGSI_OPCODE_ABS: - /* Set absolute value modifiers. */ - inst->FullSrcRegisters[0].SrcRegisterExtMod.Absolute = TRUE; - /* Fall through */ - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: - /* src0 -> src1 and src2 forced to zero */ - inst->FullSrcRegisters[1] = inst->FullSrcRegisters[0]; - inst->FullSrcRegisters[2] = r300_constant_zero; - r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); - break; - - /* The compound and hybrid insts. */ - case TGSI_OPCODE_LRP: - /* LRP DST A, B, C -> MAD TMP -A, C, C; MAD DST A, B, TMP */ - inst->FullSrcRegisters[3] = inst->FullSrcRegisters[1]; - inst->FullSrcRegisters[1] = inst->FullSrcRegisters[2]; - inst->FullSrcRegisters[0].SrcRegister.Negate = - !(inst->FullSrcRegisters[0].SrcRegister.Negate); - inst->FullDstRegisters[1] = inst->FullDstRegisters[0]; - inst->FullDstRegisters[0].DstRegister.Index = - assembler->temp_count; - inst->FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], TGSI_OPCODE_MAD, 3); - inst->FullSrcRegisters[2].SrcRegister.Index = - assembler->temp_count; - inst->FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst->FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst->FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst->FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; - inst->FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; - inst->FullSrcRegisters[1] = inst->FullSrcRegisters[3]; - inst->FullSrcRegisters[0].SrcRegister.Negate = - !(inst->FullSrcRegisters[0].SrcRegister.Negate); - inst->FullDstRegisters[0] = inst->FullDstRegisters[1]; - r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], TGSI_OPCODE_MAD, 3); - break; - case TGSI_OPCODE_POW: - /* POW DST A, B -> LG2 TMP A; MUL TMP TMP, B; EX2 DST TMP */ - inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleW = - inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleX; - inst->FullSrcRegisters[0].SrcRegister.SwizzleW = - inst->FullSrcRegisters[0].SrcRegister.SwizzleX; - inst->FullDstRegisters[1] = inst->FullDstRegisters[0]; - inst->FullDstRegisters[0].DstRegister.Index = - assembler->temp_count; - inst->FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], TGSI_OPCODE_LG2, 1); - inst->FullSrcRegisters[0].SrcRegister.Index = - assembler->temp_count; - inst->FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst->FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst->FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst->FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; - inst->FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; - inst->FullSrcRegisters[2] = r300_constant_zero; - r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], TGSI_OPCODE_MUL, 3); - inst->FullDstRegisters[0] = inst->FullDstRegisters[1]; - r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], TGSI_OPCODE_EX2, 1); - break; - - /* The texture instruction set. */ - case TGSI_OPCODE_KIL: - case TGSI_OPCODE_TEX: - case TGSI_OPCODE_TXB: - case TGSI_OPCODE_TXP: - r5xx_emit_tex(fs, assembler, &inst->FullSrcRegisters[0], - &inst->FullDstRegisters[0], inst->Instruction.Opcode); - break; - - /* This is the end. My only friend, the end. */ - case TGSI_OPCODE_END: - break; - default: - debug_printf("r300: fs: Bad opcode %d\n", - inst->Instruction.Opcode); - break; - } - - /* Clamp, if saturation flags are set. */ - if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) { - fs->instructions[fs->instruction_count - 1].inst0 |= - R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP; - } -} + R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A, + .code.r500.inst[0].inst3 = 0x0, + .code.r500.inst[0].inst4 = 0x0, + .code.r500.inst[0].inst5 = 0x0, + + .code.r500.inst[1].inst0 = R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | R500_INST_LAST | + R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK | + R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP, + .code.r500.inst[1].inst1 = + R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST | + R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST, + .code.r500.inst[1].inst2 = + R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST | + R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST, + .code.r500.inst[1].inst3 = + R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R | + R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B, + .code.r500.inst[1].inst4 = + R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A, + .code.r500.inst[1].inst5 = + R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 | + R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 | + R500_ALU_RGBA_A_SWIZ_0, +}; diff --git a/src/gallium/drivers/r300/r5xx_fs.h b/src/gallium/drivers/r300/r5xx_fs.h index 629e587be4..a4addde32b 100644 --- a/src/gallium/drivers/r300/r5xx_fs.h +++ b/src/gallium/drivers/r300/r5xx_fs.h @@ -24,109 +24,9 @@ #ifndef R5XX_FS_H #define R5XX_FS_H -#include "r300_fs_inlines.h" +#include "radeon_code.h" -/* XXX this all should find its way back to r300_reg */ -/* Swizzle tools */ -#define R500_SWIZZLE_ZERO 4 -#define R500_SWIZZLE_HALF 5 -#define R500_SWIZZLE_ONE 6 -#define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6)) -#define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6)) -#define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6)) -#define R500_SWIZ_MOD_NEG 1 -#define R500_SWIZ_MOD_ABS 2 -#define R500_SWIZ_MOD_NEG_ABS 3 -/* Swizzles for inst2 */ -#define R500_SWIZ_TEX_STRQ(x) ((x) << 8) -#define R500_SWIZ_TEX_RGBA(x) ((x) << 24) -/* Swizzles for inst3 */ -#define R500_SWIZ_RGB_A(x) ((x) << 2) -#define R500_SWIZ_RGB_B(x) ((x) << 15) -/* Swizzles for inst4 */ -#define R500_SWIZ_ALPHA_A(x) ((x) << 14) -#define R500_SWIZ_ALPHA_B(x) ((x) << 21) -/* Swizzle for inst5 */ -#define R500_SWIZ_RGBA_C(x) ((x) << 14) -#define R500_SWIZ_ALPHA_C(x) ((x) << 27) -/* Writemasks */ -#define R500_TEX_WMASK(x) ((x) << 11) -#define R500_ALU_WMASK(x) ((x) << 11) -#define R500_ALU_OMASK(x) ((x) << 15) -#define R500_W_OMASK (1 << 31) - -static struct r5xx_fragment_shader r5xx_passthrough_fragment_shader = { - .shader.stack_size = 0, - .instruction_count = 1, - .instructions[0].inst0 = R500_INST_TYPE_OUT | - R500_INST_TEX_SEM_WAIT | R500_INST_LAST | - R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK | - R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP, - .instructions[0].inst1 = - R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST | - R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST, - .instructions[0].inst2 = - R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST | - R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST, - .instructions[0].inst3 = - R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R | - R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B | - R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R | - R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B, - .instructions[0].inst4 = - R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A, - .instructions[0].inst5 = - R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 | - R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 | - R500_ALU_RGBA_A_SWIZ_0, -}; - -static struct r5xx_fragment_shader r5xx_texture_fragment_shader = { - .shader.stack_size = 1, - .instruction_count = 2, - .instructions[0].inst0 = R500_INST_TYPE_TEX | - R500_INST_TEX_SEM_WAIT | - R500_INST_RGB_WMASK_RGB | R500_INST_ALPHA_WMASK | - R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP, - .instructions[0].inst1 = R500_TEX_ID(0) | R500_TEX_INST_LD | - R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED, - .instructions[0].inst2 = R500_TEX_SRC_ADDR(0) | - R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G | - R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A | - R500_TEX_DST_ADDR(0) | - R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G | - R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A, - .instructions[0].inst3 = 0x0, - .instructions[0].inst4 = 0x0, - .instructions[0].inst5 = 0x0, - .instructions[1].inst0 = R500_INST_TYPE_OUT | - R500_INST_TEX_SEM_WAIT | R500_INST_LAST | - R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK | - R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP, - .instructions[1].inst1 = - R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST | - R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST, - .instructions[1].inst2 = - R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST | - R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST, - .instructions[1].inst3 = - R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R | - R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B | - R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R | - R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B, - .instructions[1].inst4 = - R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A, - .instructions[1].inst5 = - R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 | - R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 | - R500_ALU_RGBA_A_SWIZ_0, -}; - -void r5xx_fs_finalize(struct r5xx_fragment_shader* fs, - struct r300_fs_asm* assembler); - -void r5xx_fs_instruction(struct r5xx_fragment_shader* fs, - struct r300_fs_asm* assembler, - struct tgsi_full_instruction* inst); +struct rX00_fragment_program_code r5xx_passthrough_fragment_shader; +struct rX00_fragment_program_code r5xx_texture_fragment_shader; #endif /* R5XX_FS_H */ |