diff options
author | Alex Deucher <alex@botch2.com> | 2008-05-12 11:58:35 -0400 |
---|---|---|
committer | Alex Deucher <alex@botch2.com> | 2008-05-12 11:58:35 -0400 |
commit | e000f2ab6e8491bf8175c38c42fabb04833d5209 (patch) | |
tree | 17c99b7dcb8358c59eecab2cafc64fb01f71968f | |
parent | 2a4d1085cb9d2d03e6aeb2c71a59888826c31afd (diff) | |
parent | 3d1528027889d67ca98002833dcb42b3f2f48067 (diff) |
Merge branch 'r500-support' of git+ssh://agd5f@git.freedesktop.org/git/mesa/mesa into r500-support
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_cmdbuf.c | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_context.h | 10 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_reg.h | 33 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_state.c | 97 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_swtcl.c | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r500_fragprog.c | 324 |
6 files changed, 270 insertions, 201 deletions
diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index df7f29a2ce..8668dba9f0 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -54,7 +54,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_state.h" // Set this to 1 for extremely verbose debugging of command buffers -#define DEBUG_CMDBUF 0 +#define DEBUG_CMDBUF 1 /** * Send the current command buffer via ioctl to the hardware. @@ -412,6 +412,10 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R500_US_OUT_FMT, 5); if (is_r500) { + ALLOC_STATE(fp, always, R500_FP_CMDSIZE, 0); + r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(R500_US_CONFIG, 2); + r300->hw.fp.cmd[R500_FP_CNTL] = R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO; + ALLOC_STATE(r500fp, r500fp, R300_FPI_CMDSIZE, 0); r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 0, 0); ALLOC_STATE(r500fp_const, r500fp_const, R300_FPP_CMDSIZE, 0); diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index bb5f5c35f0..815a729969 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -354,6 +354,11 @@ struct r300_state_atom { #define R300_FP_NODE3 8 #define R300_FP_CMDSIZE 9 +#define R500_FP_CMD_0 0 +#define R500_FP_CNTL 1 +#define R500_FP_PIXSIZE 2 +#define R500_FP_CMDSIZE 3 + #define R300_FPT_CMD_0 0 #define R300_FPT_INSTR_0 1 #define R300_FPT_CMDSIZE 65 @@ -802,10 +807,7 @@ struct r500_fragment_program { int cur_node; int first_node_has_tex; - int alu_offset; - int alu_end; - int tex_offset; - int tex_end; + int temp_reg_offset; /* Hardware constants. * Contains a pointer to the value. The destination of the pointer diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 8e3fe0c524..b404e515df 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -721,23 +721,16 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R500_RS_IP_13 0x40A8 #define R500_RS_IP_14 0x40AC #define R500_RS_IP_15 0x40B0 +#define R500_RS_IP_PTR_K0 62 +#define R500_RS_IP_PTR_K1 63 #define R500_RS_IP_TEX_PTR_S_SHIFT 0 #define R500_RS_IP_TEX_PTR_T_SHIFT 6 #define R500_RS_IP_TEX_PTR_R_SHIFT 12 #define R500_RS_IP_TEX_PTR_Q_SHIFT 18 #define R500_RS_IP_COL_PTR_SHIFT 24 #define R500_RS_IP_COL_FMT_SHIFT 27 -#define R500_RS_IP_COL_FMT_RGBA (0 << 27) -#define R500_RS_IP_COL_FMT_RGB0 (1 << 27) -#define R500_RS_IP_COL_FMT_RGB1 (2 << 27) -/* gap */ -#define R500_RS_IP_COL_FMT_000A (4 << 27) -#define R500_RS_IP_COL_FMT_0000 (5 << 27) -#define R500_RS_IP_COL_FMT_0001 (6 << 27) -/* gap */ -#define R500_RS_IP_COL_FMT_111A (8 << 27) -#define R500_RS_IP_COL_FMT_1110 (9 << 27) -#define R500_RS_IP_COL_FMT_1111 (10 << 27) +# define R500_RS_COL_PTR(x) (x << 24) +# define R500_RS_COL_FMT(x) (x << 27) /* gap */ #define R500_RS_IP_OFFSET_DIS (0 << 31) #define R500_RS_IP_OFFSET_EN (1 << 31) @@ -1177,15 +1170,15 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_RS_TEX_PTR(x) (x << 0)
# define R300_RS_COL_PTR(x) (x << 6)
# define R300_RS_COL_FMT(x) (x << 9)
-# define R300_RS_COL_FMT_RGBA 0
-# define R300_RS_COL_FMT_RGB0 2
-# define R300_RS_COL_FMT_RGB1 3
-# define R300_RS_COL_FMT_000A 4
-# define R300_RS_COL_FMT_0000 5
-# define R300_RS_COL_FMT_0001 6
-# define R300_RS_COL_FMT_111A 8
-# define R300_RS_COL_FMT_1110 9
-# define R300_RS_COL_FMT_1111 10
+# define R300_RS_COL_FMT_RGBA 0 +# define R300_RS_COL_FMT_RGB0 2 +# define R300_RS_COL_FMT_RGB1 3 +# define R300_RS_COL_FMT_000A 4 +# define R300_RS_COL_FMT_0000 5 +# define R300_RS_COL_FMT_0001 6 +# define R300_RS_COL_FMT_111A 8 +# define R300_RS_COL_FMT_1110 9 +# define R300_RS_COL_FMT_1111 10 # define R300_RS_SEL_S(x) (x << 13) # define R300_RS_SEL_T(x) (x << 16) # define R300_RS_SEL_R(x) (x << 19) diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 10002e3c4f..298de096fb 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1501,7 +1501,7 @@ static void r300SetupRSUnit(GLcontext * ctx) int rs_tex_count = 0, rs_col_count = 0; int i, count; - memset(interp_col, 0, 8); + memset(interp_col, 0, sizeof(interp_col)); if (hw_tcl_on) OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; @@ -1640,22 +1640,17 @@ static void r500SetupRSUnit(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); /* I'm still unsure if these are needed */ - GLuint interp_magic[8] = { - 0x00, - 1 << 24, - 2 << 24, - 3 << 24, - 0x00, - 0x00, - 0x00, - 0x00 - }; + GLuint interp_col[8]; union r300_outputs_written OutputsWritten; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; GLuint InputsRead; int fp_reg, high_rr; + int rs_tex_count = 0, rs_col_count = 0; int in_texcoords, col_interp_nr; - int i; + int i, count; + memset(interp_col, 0, sizeof(interp_col)); if (hw_tcl_on) OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; else @@ -1672,7 +1667,7 @@ static void r500SetupRSUnit(GLcontext * ctx) R300_STATECHANGE(r300, rc); R300_STATECHANGE(r300, rr); - fp_reg = in_texcoords = col_interp_nr = high_rr = 0; + fp_reg = col_interp_nr = high_rr = in_texcoords = 0; r300->hw.rr.cmd[R300_RR_INST_1] = 0; @@ -1690,15 +1685,51 @@ static void r500SetupRSUnit(GLcontext * ctx) InputsRead &= ~FRAG_BIT_WPOS; } + if (InputsRead & FRAG_BIT_COL0) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size; + interp_col[0] |= R500_RS_COL_PTR(rs_col_count); + if (count == 3) + interp_col[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGB1); + rs_col_count += count; + } + else + interp_col[0] = R500_RS_COL_FMT(R300_RS_COL_FMT_0001); + + if (InputsRead & FRAG_BIT_COL1) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size; + interp_col[1] |= R500_RS_COL_PTR(1); + if (count == 3) + interp_col[1] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGB1); + rs_col_count += count; + } + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - - // r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0 | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | (in_texcoords << R300_RS_INTERP_SRC_SHIFT) - - r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = (0 << R500_RS_IP_TEX_PTR_S_SHIFT) | - (1 << R500_RS_IP_TEX_PTR_T_SHIFT) | - (2 << R500_RS_IP_TEX_PTR_R_SHIFT) | - (3 << R500_RS_IP_TEX_PTR_Q_SHIFT) | - (in_texcoords << 0) | interp_magic[i]; + GLuint swiz; + + /* with TCL we always seem to route 4 components */ + if (InputsRead & (FRAG_BIT_TEX0 << i)) { + + if (hw_tcl_on) + count = 4; + else + count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; + + swiz = 0; + if (count == 4) + swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_Q_SHIFT; + else + swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; + + if (count >= 3) + swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_R_SHIFT; + else + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; + + /* always have a least 2 tex coords */ + swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_T_SHIFT; + swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_S_SHIFT; + } + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = interp_col[i] | rs_tex_count | swiz; r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0; if (InputsRead & (FRAG_BIT_TEX0 << i)) { @@ -1715,16 +1746,11 @@ static void r500SetupRSUnit(GLcontext * ctx) WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i); } } - /* Need to count all coords enabled at vof */ - if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { - in_texcoords++; - } } if (InputsRead & FRAG_BIT_COL0) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { - // r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R300_RS_ROUTE_0_COLOR | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); - r300->hw.rr.cmd[R300_RR_INST_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); + r300->hw.rr.cmd[R300_RR_INST_0] |= R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); InputsRead &= ~FRAG_BIT_COL0; col_interp_nr++; } else { @@ -1734,7 +1760,6 @@ static void r500SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL1) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { - // r300->hw.rr.cmd[R300_RR_ROUTE_1] |= R300_RS_ROUTE_1_UNKNOWN11 | R300_RS_ROUTE_1_COLOR1 | (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT); r300->hw.rr.cmd[R300_RR_INST_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); InputsRead &= ~FRAG_BIT_COL1; if (high_rr < 1) @@ -1751,7 +1776,7 @@ static void r500SetupRSUnit(GLcontext * ctx) col_interp_nr++; } - r300->hw.rc.cmd[1] = 0 | ((in_texcoords << 2) << R300_IT_COUNT_SHIFT) + r300->hw.rc.cmd[1] = 0 | (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_interp_nr << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; @@ -2323,10 +2348,11 @@ void r300UpdateShaders(r300ContextPtr rmesa) hw_tcl_on = future_hw_tcl_on = 0; r300ResetHwState(rmesa); + r300UpdateStateParameters(ctx, _NEW_PROGRAM); return; } - r300UpdateStateParameters(ctx, _NEW_PROGRAM); } + r300UpdateStateParameters(ctx, _NEW_PROGRAM); } static void r300SetupPixelShader(r300ContextPtr rmesa) @@ -2433,6 +2459,9 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) return; } + R300_STATECHANGE(rmesa, fp); + rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = fp->max_temp_idx; + R300_STATECHANGE(rmesa, r500fp); /* Emit our shader... */ for (i = 0; i < fp->cs->nrslots; i++) { @@ -2449,10 +2478,10 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) R300_STATECHANGE(rmesa, r500fp_const); for (i = 0; i < fp->const_nr; i++) { - rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(fp->constant[i][0]); - rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(fp->constant[i][1]); - rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(fp->constant[i][2]); - rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(fp->constant[i][3]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(fp->constant[i][0]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(fp->constant[i][1]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(fp->constant[i][2]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(fp->constant[i][3]); } bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, fp->const_nr * 4); diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index a41fa1023a..8aebd9be3e 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -575,6 +575,7 @@ static void r300RenderStart(GLcontext *ctx) r300ChooseRenderState(ctx); r300SetVertexFormat(ctx); + r300UpdateShaders(rmesa); r300UpdateShaderStates(rmesa); r300EmitCacheFlush(rmesa); diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index b08beb617f..f9ef582d0a 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -65,6 +65,9 @@ #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs +#define R500_US_NUM_TEMP_REGS 128 +#define R500_US_NUM_CONST_REGS 256 + /* "Register" flags */ #define REG_CONSTANT (1 << 8) #define REG_SRC_REL (1 << 9) @@ -76,6 +79,7 @@ #define R500_SWIZZLE_ONE 6 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6)) #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6)) +#define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6)) /* Swizzles for inst2 */ #define MAKE_SWIZ_TEX_STRQ(x) (x << 8) #define MAKE_SWIZ_TEX_RGBA(x) (x << 24) @@ -120,6 +124,30 @@ static inline GLuint make_strq_swizzle(struct prog_src_register src) { return swiz; } +static int get_temp(struct r500_fragment_program *fp, int slot) { + + COMPILE_STATE; + + int r = slot; + + while (cs->inputs[r].refcount != 0) { + /* Crap, taken. */ + r++; + } + + fp->temp_reg_offset = r - slot; + + if (r >= R500_US_NUM_TEMP_REGS) { + ERROR("Out of hardware temps!\n"); + return 0; + } + + if (r > fp->max_temp_idx) + fp->max_temp_idx = r; + + return r; +} + /* Borrowed verbatim from r300_fragprog since it hasn't changed. */ static GLuint emit_const4fv(struct r500_fragment_program *fp, const GLfloat * cp) @@ -133,8 +161,7 @@ static GLuint emit_const4fv(struct r500_fragment_program *fp, } if (index >= fp->const_nr) { - /* TODO: This should be r5xx nums, not r300 */ - if (index >= PFS_NUM_CONST_REGS) { + if (index >= R500_US_NUM_CONST_REGS) { ERROR("Out of hw constants!\n"); return reg; } @@ -148,16 +175,17 @@ static GLuint emit_const4fv(struct r500_fragment_program *fp, } static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) { + COMPILE_STATE; GLuint reg; switch (src.File) { case PROGRAM_TEMPORARY: - reg = (src.Index << 0x1) | 0x1; + reg = src.Index + fp->temp_reg_offset; break; case PROGRAM_INPUT: - /* Ugly hack needed to work around Mesa; - * fragments don't get loaded right otherwise! */ - reg = 0x0; + reg = cs->inputs[src.Index].reg; break; + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: case PROGRAM_CONSTANT: reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters-> ParameterValues[src.Index]); @@ -174,7 +202,7 @@ static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_regist GLuint reg; switch (dest.File) { case PROGRAM_TEMPORARY: - reg = (dest.Index << 0x1) | 0x1; + reg = dest.Index + fp->temp_reg_offset; break; case PROGRAM_OUTPUT: /* Eventually we may need to handle multiple @@ -189,6 +217,53 @@ static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_regist return reg; } +static void emit_tex(struct r500_fragment_program *fp, + struct prog_instruction *fpi, int opcode, int dest, int counter) +{ + int hwsrc, hwdest; + GLuint mask; + + mask = fpi->DstReg.WriteMask << 11; + hwsrc = make_src(fp, fpi->SrcReg[0]); + + fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask + | R500_INST_TEX_SEM_WAIT; + + fp->inst[counter].inst1 = fpi->TexSrcUnit + | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; + + if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) + fp->inst[counter].inst1 |= R500_TEX_UNSCALED; + + switch (opcode) { + case OPCODE_TEX: + fp->inst[counter].inst1 |= R500_TEX_INST_LD; + break; + case OPCODE_TXB: + fp->inst[counter].inst1 |= R500_TEX_INST_LODBIAS; + break; + case OPCODE_TXP: + fp->inst[counter].inst1 |= R500_TEX_INST_PROJ; + break; + default: + ERROR("emit_tex can't handle opcode %x\n", opcode); + } + + fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc) + /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */ + | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G + | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A + | R500_TEX_DST_ADDR(dest) + | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G + | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; + + + + fp->inst[counter].inst3 = 0x0; + fp->inst[counter].inst4 = 0x0; + fp->inst[counter].inst5 = 0x0; +} + static void dumb_shader(struct r500_fragment_program *fp) { fp->inst[0].inst0 = R500_INST_TYPE_TEX @@ -264,7 +339,30 @@ static void dumb_shader(struct r500_fragment_program *fp) fp->translated = GL_TRUE; } -static void emit_alu(struct r500_fragment_program *fp) { +/* static void emit_alu(struct r500_fragment_program *fp) { + * } */ + +static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_src_register src, GLuint dest) { + /* The r3xx shader uses MAD to implement MOV. We are using CMP, since + * it is technically more accurate and recommended by ATI/AMD. */ + GLuint src_reg = make_src(fp, src); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT; + fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(src)) + | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src)) + | R500_ALU_RGB_OMOD_DISABLE; + fp->inst[counter].inst4 = R500_ALPHA_OP_CMP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src)) + | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src)) + | R500_ALPHA_OMOD_DISABLE; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP + | R500_ALU_RGBA_ADDRD(dest) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); } static GLboolean parse_program(struct r500_fragment_program *fp) @@ -289,23 +387,12 @@ static GLboolean parse_program(struct r500_fragment_program *fp) switch (fpi->Opcode) { case OPCODE_ABS: - src[0] = make_src(fp, fpi->SrcReg[0]); - /* Variation on MOV */ - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | mask; - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_MOD_A_ABS | R500_ALU_RGB_SEL_B_SRC0 - | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAX - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 - | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) | R500_ALPHA_MOD_A_ABS - | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX - | R500_ALU_RGBA_ADDRD(dest); + emit_mov(fp, counter, fpi->SrcReg[0], dest); + fp->inst[counter].inst0 |= mask; + fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS + | R500_ALU_RGB_MOD_B_ABS; + fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS + | R500_ALPHA_MOD_B_ABS; break; case OPCODE_ADD: src[0] = make_src(fp, fpi->SrcReg[0]); @@ -314,9 +401,9 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst0 = R500_INST_TYPE_ALU | mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); + | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(0); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); + | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(0); fp->inst[counter].inst3 = /* 1 */ MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); @@ -334,13 +421,12 @@ static GLboolean parse_program(struct r500_fragment_program *fp) case OPCODE_DP3: src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); - src[2] = make_src(fp, fpi->SrcReg[2]); fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | mask; + | R500_INST_TEX_SEM_WAIT | mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]); + | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]); + | R500_ALPHA_ADDR1(src[1]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); @@ -349,23 +435,18 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3 - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2])) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2])); + | R500_ALU_RGBA_ADDRD(dest); break; case OPCODE_DP4: src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); - src[2] = make_src(fp, fpi->SrcReg[2]); /* Based on DP3 */ fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | mask; + | R500_INST_TEX_SEM_WAIT | mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]); + | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]); + | R500_ALPHA_ADDR1(src[1]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); @@ -374,11 +455,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4 - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2])) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2])); + | R500_ALU_RGBA_ADDRD(dest); break; case OPCODE_MAD: src[0] = make_src(fp, fpi->SrcReg[0]); @@ -439,31 +516,15 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ADDRD(dest); break; case OPCODE_MOV: - src[0] = make_src(fp, fpi->SrcReg[0]); - /* We use MAX, but MIN, CND, and CMP also work. - * Just remember to disable the OMOD! */ - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | mask; - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | R500_ALU_RGB_R_SWIZ_A_R | R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B - | R500_ALU_RGB_SEL_B_SRC0 - | R500_ALU_RGB_R_SWIZ_B_R | R500_ALU_RGB_G_SWIZ_B_G | R500_ALU_RGB_B_SWIZ_B_B - | R500_ALU_RGB_OMOD_DISABLE; - fp->inst[counter].inst4 = R500_ALPHA_OP_MAX - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SEL_B_SRC0 - | R500_ALPHA_OMOD_DISABLE; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX - | R500_ALU_RGBA_ADDRD(dest); + emit_mov(fp, counter, fpi->SrcReg[0], dest); + fp->inst[counter].inst0 |= mask; break; case OPCODE_MUL: src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); /* Variation on MAD: src0*src1+0 */ fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | mask; + | R500_INST_TEX_SEM_WAIT | mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) @@ -509,37 +570,13 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ALPHA_MOD_C_NEG; break; case OPCODE_TEX: - src[0] = make_src(fp, fpi->SrcReg[0]); - fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask - | R500_INST_TEX_SEM_WAIT; - fp->inst[counter].inst1 = fpi->TexSrcUnit - | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; - fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(src[0]) - /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */ - | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G - | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A - | R500_TEX_DST_ADDR(dest) - | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G - | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; - fp->inst[counter].inst3 = 0x0; - fp->inst[counter].inst4 = 0x0; - fp->inst[counter].inst5 = 0x0; + emit_tex(fp, fpi, OPCODE_TEX, dest, counter); + break; + case OPCODE_TXB: + emit_tex(fp, fpi, OPCODE_TXB, dest, counter); break; case OPCODE_TXP: - src[0] = make_src(fp, fpi->SrcReg[0]); - fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask; - fp->inst[counter].inst1 = fpi->TexSrcUnit - | R500_TEX_INST_PROJ | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; - fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(src[0]) - /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */ - | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G - | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A - | R500_TEX_DST_ADDR(dest) - | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G - | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; - fp->inst[counter].inst3 = 0x0; - fp->inst[counter].inst4 = 0x0; - fp->inst[counter].inst5 = 0x0; + emit_tex(fp, fpi, OPCODE_TXP, dest, counter); break; default: ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); @@ -563,15 +600,37 @@ static GLboolean parse_program(struct r500_fragment_program *fp) } - fp->cs->nrslots = counter; - - /* Finish him! (If it's an output instruction...) - * Yes, I know it's ugly... */ + /* Finish him! (If it's an ALU/OUT instruction...) */ if ((fp->inst[counter].inst0 & 0x3) ^ 0x2) { fp->inst[counter].inst0 |= R500_INST_TYPE_OUT - | R500_INST_TEX_SEM_WAIT | R500_INST_LAST; + | R500_INST_TEX_SEM_WAIT | R500_INST_LAST; + } else { + /* We still need to put an output inst, right? */ + counter++; + fp->inst[counter].inst0 = R500_INST_TYPE_OUT + | R500_INST_TEX_SEM_WAIT | R500_INST_LAST + | R500_INST_RGB_OMASK_R | R500_INST_RGB_OMASK_G + | R500_INST_RGB_OMASK_B | R500_INST_ALPHA_OMASK; + fp->inst[counter].inst1 = R500_RGB_ADDR0(dest); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(dest); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(0) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SEL_B_SRC0 + | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_1; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(0) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); } + fp->cs->nrslots = counter; + + fp->max_temp_idx++; + return GL_TRUE; } @@ -593,7 +652,10 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) fp->cur_node = 0; fp->first_node_has_tex = 0; fp->const_nr = 0; - fp->max_temp_idx = 0; + /* Size of pixel stack, plus 1. */ + fp->max_temp_idx = 1; + /* Temp register offset. */ + fp->temp_reg_offset = 0; fp->node[0].alu_end = -1; fp->node[0].tex_end = -1; @@ -610,16 +672,16 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) * configures itself based on the fragprog's InputsRead * * NOTE: this depends on get_hw_temp() allocating registers in order, - * starting from register 0. + * starting from register 0, so we're just going to do that instead. */ -#if 0 /* Texcoords come first */ for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) { if (InputsRead & (FRAG_BIT_TEX0 << i)) { cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0; cs->inputs[FRAG_ATTRIB_TEX0 + i].reg = - get_hw_temp(fp, 0); + fp->temp_reg_offset; + fp->temp_reg_offset++; } } InputsRead &= ~FRAG_BITS_TEX_ANY; @@ -627,22 +689,27 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) /* fragment position treated as a texcoord */ if (InputsRead & FRAG_BIT_WPOS) { cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0; - cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(fp, 0); - insert_wpos(&mp->Base); + cs->inputs[FRAG_ATTRIB_WPOS].reg = + fp->temp_reg_offset; + fp->temp_reg_offset++; } InputsRead &= ~FRAG_BIT_WPOS; /* Then primary colour */ if (InputsRead & FRAG_BIT_COL0) { cs->inputs[FRAG_ATTRIB_COL0].refcount = 0; - cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(fp, 0); + cs->inputs[FRAG_ATTRIB_COL0].reg = + fp->temp_reg_offset; + fp->temp_reg_offset++; } InputsRead &= ~FRAG_BIT_COL0; /* Secondary color */ if (InputsRead & FRAG_BIT_COL1) { cs->inputs[FRAG_ATTRIB_COL1].refcount = 0; - cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(fp, 0); + cs->inputs[FRAG_ATTRIB_COL1].reg = + fp->temp_reg_offset; + fp->temp_reg_offset++; } InputsRead &= ~FRAG_BIT_COL1; @@ -654,48 +721,31 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) if (InputsRead & (1 << i)) cs->inputs[i].reg = 0; } -#endif /* Pre-parse the mesa program, grabbing refcounts on input/temp regs. * That way, we can free up the reg when it's no longer needed */ if (!mp->Base.Instructions) { - ERROR("No instructions found in program\n"); + ERROR("No instructions found in program, going to go die now.\n"); return; } +#if 0 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { int idx; - for (i = 0; i < 3; i++) { idx = fpi->SrcReg[i].Index; - switch (fpi->SrcReg[i].File) { - case PROGRAM_TEMPORARY: - if (!(temps_used & (1 << idx))) { - cs->temps[idx].reg = -1; - cs->temps[idx].refcount = 1; - temps_used |= (1 << idx); - } else - cs->temps[idx].refcount++; - break; - case PROGRAM_INPUT: + if (fpi->SrcReg[i].File == PROGRAM_INPUT) { cs->inputs[idx].refcount++; - break; - default: - break; + if (fp->max_temp_idx < idx) + fp->max_temp_idx = idx; } } - - idx = fpi->DstReg.Index; - if (fpi->DstReg.File == PROGRAM_TEMPORARY) { - if (!(temps_used & (1 << idx))) { - cs->temps[idx].reg = -1; - cs->temps[idx].refcount = 1; - temps_used |= (1 << idx); - } else - cs->temps[idx].refcount++; - } } +#endif + + fp->max_temp_idx = fp->temp_reg_offset + 1; + cs->temp_in_use = temps_used; } @@ -731,16 +781,6 @@ void r500TranslateFragmentShader(r300ContextPtr r300, return; } - /* Finish off */ - fp->node[fp->cur_node].alu_end = - cs->nrslots - fp->node[fp->cur_node].alu_offset - 1; - if (fp->node[fp->cur_node].tex_end < 0) - fp->node[fp->cur_node].tex_end = 0; - fp->alu_offset = 0; - fp->alu_end = cs->nrslots - 1; - //assert(fp->node[fp->cur_node].alu_end >= 0); - //assert(fp->alu_end >= 0); - fp->translated = GL_TRUE; r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM); } |