From 7247446ba81b6bafede9ead750e5b5e81f3f1a10 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Tue, 21 Jul 2009 21:12:40 +0200 Subject: radeon: fix colorbuffer pitch emission regarding tiling in KMS/CS case We need to emit a relocation for pitch register so that kernel can check and properly setup tiling on the color buffer. --- src/mesa/drivers/dri/radeon/radeon_state_init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c index aaa82b1d6a..57aa7f1ca4 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state_init.c +++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c @@ -423,7 +423,7 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) if (drb) dwords += 6; if (rrb) - dwords += 6; + dwords += 8; BEGIN_BATCH_NO_AUTOSTATE(dwords); /* In the CS case we need to split this up */ @@ -449,7 +449,7 @@ static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0)); - OUT_BATCH(cbpitch); + OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); } // if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) { -- cgit v1.2.3 From 600a53a32edea7d03efa21103ad7122670c4ed4a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 27 Jul 2009 01:57:25 -0400 Subject: r600: Use R600_SCRATCH_REG_OFFSET rather than RADEON_SCRATCH_REG_OFFSET noticed by vehemens on IRC. --- src/mesa/drivers/dri/radeon/radeon_screen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 290ef2394d..118e74008f 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -1025,7 +1025,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET); else screen->scratch = (__volatile__ uint32_t *) - ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET); + ((GLubyte *)screen->status.map + R600_SCRATCH_REG_OFFSET); screen->buffers = drmMapBufs( sPriv->fd ); if ( !screen->buffers ) { -- cgit v1.2.3 From 6b142866abb3097a84cf4b6dae69423154ab40a6 Mon Sep 17 00:00:00 2001 From: Pauli Nieminen Date: Mon, 27 Jul 2009 17:13:48 +0300 Subject: radeon: Add r6xx/r7xx chip family to get_chip_family_name This fixes problem that glxinfo was reporting r600+ cards as unknown. Signed-off-by: Pauli Nieminen --- src/mesa/drivers/dri/radeon/radeon_common_context.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index 285e015c92..a50cd056e1 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -85,6 +85,17 @@ static const char* get_chip_family_name(int chip_family) case CHIP_FAMILY_R580: return "R580"; case CHIP_FAMILY_RV560: return "RV560"; case CHIP_FAMILY_RV570: return "RV570"; + case CHIP_FAMILY_R600: return "R600"; + case CHIP_FAMILY_RV610: return "RV610"; + case CHIP_FAMILY_RV630: return "RV630"; + case CHIP_FAMILY_RV670: return "RV670"; + case CHIP_FAMILY_RV620: return "RV620"; + case CHIP_FAMILY_RV635: return "RV635"; + case CHIP_FAMILY_RS780: return "RS780"; + case CHIP_FAMILY_RV770: return "RV770"; + case CHIP_FAMILY_RV730: return "RV730"; + case CHIP_FAMILY_RV710: return "RV710"; + case CHIP_FAMILY_RV740: return "RV740"; default: return "unknown"; } } -- cgit v1.2.3 From e5bed439be4fd7c3a349aedc4bff7eec4e4d363e Mon Sep 17 00:00:00 2001 From: Nicolai Hähnle Date: Wed, 15 Jul 2009 17:36:42 +0200 Subject: r300: Detangle fragment program compiler from driver-specific structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is in preparation of sharing the fragment program compiler with Gallium: Compiler code is moved into its own directory and modified so that it no longer depends on driver structures. Signed-off-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/Makefile | 22 +- src/mesa/drivers/dri/r300/compiler/r300_fragprog.c | 450 +++++++++ src/mesa/drivers/dri/r300/compiler/r300_fragprog.h | 49 + .../drivers/dri/r300/compiler/r300_fragprog_emit.c | 345 +++++++ .../dri/r300/compiler/r300_fragprog_swizzle.c | 225 +++++ .../dri/r300/compiler/r300_fragprog_swizzle.h | 42 + src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 315 ++++++ src/mesa/drivers/dri/r300/compiler/r500_fragprog.c | 482 ++++++++++ src/mesa/drivers/dri/r300/compiler/r500_fragprog.h | 52 + .../drivers/dri/r300/compiler/r500_fragprog_emit.c | 329 +++++++ .../drivers/dri/r300/compiler/radeon_compiler.h | 162 ++++ .../drivers/dri/r300/compiler/radeon_nqssadce.c | 297 ++++++ .../drivers/dri/r300/compiler/radeon_nqssadce.h | 93 ++ .../drivers/dri/r300/compiler/radeon_program.c | 128 +++ .../drivers/dri/r300/compiler/radeon_program.h | 131 +++ .../drivers/dri/r300/compiler/radeon_program_alu.c | 635 +++++++++++++ .../drivers/dri/r300/compiler/radeon_program_alu.h | 53 ++ .../dri/r300/compiler/radeon_program_pair.c | 1004 ++++++++++++++++++++ .../dri/r300/compiler/radeon_program_pair.h | 126 +++ src/mesa/drivers/dri/r300/r300_context.h | 136 +-- src/mesa/drivers/dri/r300/r300_fragprog.c | 451 --------- src/mesa/drivers/dri/r300/r300_fragprog.h | 111 --- src/mesa/drivers/dri/r300/r300_fragprog_common.c | 280 +----- src/mesa/drivers/dri/r300/r300_fragprog_emit.c | 344 ------- src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c | 225 ----- src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h | 42 - src/mesa/drivers/dri/r300/r300_ioctl.c | 63 +- src/mesa/drivers/dri/r300/r300_state.c | 16 +- src/mesa/drivers/dri/r300/r300_swtcl.c | 8 +- src/mesa/drivers/dri/r300/r300_vertprog.c | 14 +- src/mesa/drivers/dri/r300/r500_fragprog.c | 480 ---------- src/mesa/drivers/dri/r300/r500_fragprog.h | 52 - src/mesa/drivers/dri/r300/r500_fragprog_emit.c | 327 ------- src/mesa/drivers/dri/r300/radeon_nqssadce.c | 297 ------ src/mesa/drivers/dri/r300/radeon_nqssadce.h | 93 -- src/mesa/drivers/dri/r300/radeon_program.c | 128 --- src/mesa/drivers/dri/r300/radeon_program.h | 131 --- src/mesa/drivers/dri/r300/radeon_program_alu.c | 635 ------------- src/mesa/drivers/dri/r300/radeon_program_alu.h | 53 -- src/mesa/drivers/dri/r300/radeon_program_pair.c | 1004 -------------------- src/mesa/drivers/dri/r300/radeon_program_pair.h | 126 --- src/mesa/drivers/dri/radeon/radeon_screen.c | 4 +- 42 files changed, 5026 insertions(+), 4934 deletions(-) create mode 100644 src/mesa/drivers/dri/r300/compiler/r300_fragprog.c create mode 100644 src/mesa/drivers/dri/r300/compiler/r300_fragprog.h create mode 100644 src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c create mode 100644 src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c create mode 100644 src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h create mode 100644 src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c create mode 100644 src/mesa/drivers/dri/r300/compiler/r500_fragprog.c create mode 100644 src/mesa/drivers/dri/r300/compiler/r500_fragprog.h create mode 100644 src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_compiler.h create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_program.c create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_program.h create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h delete mode 100644 src/mesa/drivers/dri/r300/r300_fragprog.c delete mode 100644 src/mesa/drivers/dri/r300/r300_fragprog.h delete mode 100644 src/mesa/drivers/dri/r300/r300_fragprog_emit.c delete mode 100644 src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c delete mode 100644 src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h delete mode 100644 src/mesa/drivers/dri/r300/r500_fragprog.c delete mode 100644 src/mesa/drivers/dri/r300/r500_fragprog.h delete mode 100644 src/mesa/drivers/dri/r300/r500_fragprog_emit.c delete mode 100644 src/mesa/drivers/dri/r300/radeon_nqssadce.c delete mode 100644 src/mesa/drivers/dri/r300/radeon_nqssadce.h delete mode 100644 src/mesa/drivers/dri/r300/radeon_program.c delete mode 100644 src/mesa/drivers/dri/r300/radeon_program.h delete mode 100644 src/mesa/drivers/dri/r300/radeon_program_alu.c delete mode 100644 src/mesa/drivers/dri/r300/radeon_program_alu.h delete mode 100644 src/mesa/drivers/dri/r300/radeon_program_pair.c delete mode 100644 src/mesa/drivers/dri/r300/radeon_program_pair.h (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 7460410ee6..3a7de6d5be 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -38,6 +38,18 @@ RADEON_COMMON_SOURCES = \ radeon_span.c \ radeon_fbo.c +RADEON_COMPILER_SOURCES = \ + compiler/radeon_nqssadce.c \ + compiler/radeon_program.c \ + compiler/radeon_program_alu.c \ + compiler/radeon_program_pair.c \ + compiler/r3xx_fragprog.c \ + compiler/r300_fragprog.c \ + compiler/r300_fragprog_swizzle.c \ + compiler/r300_fragprog_emit.c \ + compiler/r500_fragprog.c \ + compiler/r500_fragprog_emit.c \ + DRIVER_SOURCES = \ radeon_screen.c \ r300_context.c \ @@ -48,21 +60,13 @@ DRIVER_SOURCES = \ r300_render.c \ r300_tex.c \ r300_texstate.c \ - radeon_program.c \ - radeon_program_alu.c \ - radeon_program_pair.c \ - radeon_nqssadce.c \ r300_vertprog.c \ r300_fragprog_common.c \ - r300_fragprog.c \ - r300_fragprog_swizzle.c \ - r300_fragprog_emit.c \ - r500_fragprog.c \ - r500_fragprog_emit.c \ r300_shader.c \ r300_emit.c \ r300_swtcl.c \ $(RADEON_COMMON_SOURCES) \ + $(RADEON_COMPILER_SOURCES) \ $(EGL_SOURCES) \ $(CS_SOURCES) diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c new file mode 100644 index 0000000000..00ef964571 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c @@ -0,0 +1,450 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "compiler/r300_fragprog.h" + +#include "shader/prog_parameter.h" + +#include "r300_reg.h" + +static void reset_srcreg(struct prog_src_register* reg) +{ + _mesa_bzero(reg, sizeof(*reg)); + reg->Swizzle = SWIZZLE_NOOP; +} + +static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu) +{ + gl_state_index fail_value_tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0 + }; + struct prog_src_register reg = { 0, }; + + fail_value_tokens[2] = tmu; + reg.File = PROGRAM_STATE_VAR; + reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens); + reg.Swizzle = SWIZZLE_WWWW; + return reg; +} + +/** + * Transform TEX, TXP, TXB, and KIL instructions in the following way: + * - premultiply texture coordinates for RECT + * - extract operand swizzles + * - introduce a temporary register when write masks are needed + * + * \todo If/when r5xx uses the radeon_program architecture, this can probably + * be reused. + */ +GLboolean r300_transform_TEX( + struct radeon_transform_context *t, + struct prog_instruction* orig_inst, void* data) +{ + struct r300_fragment_program_compiler *compiler = + (struct r300_fragment_program_compiler*)data; + struct prog_instruction inst = *orig_inst; + struct prog_instruction* tgt; + GLboolean destredirect = GL_FALSE; + + if (inst.Opcode != OPCODE_TEX && + inst.Opcode != OPCODE_TXB && + inst.Opcode != OPCODE_TXP && + inst.Opcode != OPCODE_KIL) + return GL_FALSE; + + if (inst.Opcode != OPCODE_KIL && + t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->state.unit[inst.TexSrcUnit].texture_compare_func; + + if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { + tgt = radeonAppendInstructions(t->Program, 1); + + tgt->Opcode = OPCODE_MOV; + tgt->DstReg = inst.DstReg; + if (comparefunc == GL_ALWAYS) { + tgt->SrcReg[0].File = PROGRAM_BUILTIN; + tgt->SrcReg[0].Swizzle = SWIZZLE_1111; + } else { + tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit); + } + return GL_TRUE; + } + + inst.DstReg.File = PROGRAM_TEMPORARY; + inst.DstReg.Index = radeonFindFreeTemporary(t); + inst.DstReg.WriteMask = WRITEMASK_XYZW; + } + + + /* Hardware uses [0..1]x[0..1] range for rectangle textures + * instead of [0..Width]x[0..Height]. + * Add a scaling instruction. + */ + if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) { + gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, + 0 + }; + + int tempreg = radeonFindFreeTemporary(t); + int factor_index; + + tokens[2] = inst.TexSrcUnit; + factor_index = _mesa_add_state_reference(t->Program->Parameters, tokens); + + tgt = radeonAppendInstructions(t->Program, 1); + + tgt->Opcode = OPCODE_MUL; + tgt->DstReg.File = PROGRAM_TEMPORARY; + tgt->DstReg.Index = tempreg; + tgt->SrcReg[0] = inst.SrcReg[0]; + tgt->SrcReg[1].File = PROGRAM_STATE_VAR; + tgt->SrcReg[1].Index = factor_index; + + reset_srcreg(&inst.SrcReg[0]); + inst.SrcReg[0].File = PROGRAM_TEMPORARY; + inst.SrcReg[0].Index = tempreg; + } + + if (inst.Opcode != OPCODE_KIL) { + if (inst.DstReg.File != PROGRAM_TEMPORARY || + inst.DstReg.WriteMask != WRITEMASK_XYZW) { + int tempreg = radeonFindFreeTemporary(t); + + inst.DstReg.File = PROGRAM_TEMPORARY; + inst.DstReg.Index = tempreg; + inst.DstReg.WriteMask = WRITEMASK_XYZW; + destredirect = GL_TRUE; + } else if (inst.SaturateMode) { + destredirect = GL_TRUE; + } + } + + if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) { + int tmpreg = radeonFindFreeTemporary(t); + tgt = radeonAppendInstructions(t->Program, 1); + tgt->Opcode = OPCODE_MOV; + tgt->DstReg.File = PROGRAM_TEMPORARY; + tgt->DstReg.Index = tmpreg; + tgt->SrcReg[0] = inst.SrcReg[0]; + + reset_srcreg(&inst.SrcReg[0]); + inst.SrcReg[0].File = PROGRAM_TEMPORARY; + inst.SrcReg[0].Index = tmpreg; + } + + tgt = radeonAppendInstructions(t->Program, 1); + _mesa_copy_instructions(tgt, &inst, 1); + + if (inst.Opcode != OPCODE_KIL && + t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->state.unit[inst.TexSrcUnit].texture_compare_func; + GLuint depthmode = compiler->state.unit[inst.TexSrcUnit].depth_texture_mode; + int rcptemp = radeonFindFreeTemporary(t); + int pass, fail; + + tgt = radeonAppendInstructions(t->Program, 3); + + tgt[0].Opcode = OPCODE_RCP; + tgt[0].DstReg.File = PROGRAM_TEMPORARY; + tgt[0].DstReg.Index = rcptemp; + tgt[0].DstReg.WriteMask = WRITEMASK_W; + tgt[0].SrcReg[0] = inst.SrcReg[0]; + tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW; + + tgt[1].Opcode = OPCODE_MAD; + tgt[1].DstReg = inst.DstReg; + tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask; + tgt[1].SrcReg[0] = inst.SrcReg[0]; + tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ; + tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY; + tgt[1].SrcReg[1].Index = rcptemp; + tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW; + tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY; + tgt[1].SrcReg[2].Index = inst.DstReg.Index; + if (depthmode == 0) /* GL_LUMINANCE */ + tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + else if (depthmode == 2) /* GL_ALPHA */ + tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW; + + /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: + * r < tex <=> -tex+r < 0 + * r >= tex <=> not (-tex+r < 0 */ + if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) + tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW; + else + tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW; + + tgt[2].Opcode = OPCODE_CMP; + tgt[2].DstReg = orig_inst->DstReg; + tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY; + tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index; + + if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + pass = 1; + fail = 2; + } else { + pass = 2; + fail = 1; + } + + tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN; + tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111; + tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit); + } else if (destredirect) { + tgt = radeonAppendInstructions(t->Program, 1); + + tgt->Opcode = OPCODE_MOV; + tgt->DstReg = orig_inst->DstReg; + tgt->SaturateMode = inst.SaturateMode; + tgt->SrcReg[0].File = PROGRAM_TEMPORARY; + tgt->SrcReg[0].Index = inst.DstReg.Index; + } + + return GL_TRUE; +} + +/* just some random things... */ +void r300FragmentProgramDump(struct rX00_fragment_program_code *c) +{ + struct r300_fragment_program_code *code = &c->code.r300; + int n, i, j; + static int pc = 0; + + fprintf(stderr, "pc=%d*************************************\n", pc++); + + fprintf(stderr, "Hardware program\n"); + fprintf(stderr, "----------------\n"); + + for (n = 0; n < (code->cur_node + 1); n++) { + fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " + "alu_end: %d, tex_end: %d, flags: %08x\n", n, + code->node[n].alu_offset, + code->node[n].tex_offset, + code->node[n].alu_end, code->node[n].tex_end, + code->node[n].flags); + + if (n > 0 || code->first_node_has_tex) { + fprintf(stderr, " TEX:\n"); + for (i = code->node[n].tex_offset; + i <= code->node[n].tex_offset + code->node[n].tex_end; + ++i) { + const char *instr; + + switch ((code->tex. + inst[i] >> R300_TEX_INST_SHIFT) & + 15) { + case R300_TEX_OP_LD: + instr = "TEX"; + break; + case R300_TEX_OP_KIL: + instr = "KIL"; + break; + case R300_TEX_OP_TXP: + instr = "TXP"; + break; + case R300_TEX_OP_TXB: + instr = "TXB"; + break; + default: + instr = "UNKNOWN"; + } + + fprintf(stderr, + " %s t%i, %c%i, texture[%i] (%08x)\n", + instr, + (code->tex. + inst[i] >> R300_DST_ADDR_SHIFT) & 31, + 't', + (code->tex. + inst[i] >> R300_SRC_ADDR_SHIFT) & 31, + (code->tex. + inst[i] & R300_TEX_ID_MASK) >> + R300_TEX_ID_SHIFT, + code->tex.inst[i]); + } + } + + for (i = code->node[n].alu_offset; + i <= code->node[n].alu_offset + code->node[n].alu_end; ++i) { + char srcc[3][10], dstc[20]; + char srca[3][10], dsta[20]; + char argc[3][20]; + char arga[3][20]; + char flags[5], tmp[10]; + + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].inst1 >> (j * 6); + int rega = code->alu.inst[i].inst3 >> (j * 6); + + sprintf(srcc[j], "%c%i", + (regc & 32) ? 'c' : 't', regc & 31); + sprintf(srca[j], "%c%i", + (rega & 32) ? 'c' : 't', rega & 31); + } + + dstc[0] = 0; + sprintf(flags, "%s%s%s", + (code->alu.inst[i]. + inst1 & R300_ALU_DSTC_REG_X) ? "x" : "", + (code->alu.inst[i]. + inst1 & R300_ALU_DSTC_REG_Y) ? "y" : "", + (code->alu.inst[i]. + inst1 & R300_ALU_DSTC_REG_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(dstc, "t%i.%s ", + (code->alu.inst[i]. + inst1 >> R300_ALU_DSTC_SHIFT) & 31, + flags); + } + sprintf(flags, "%s%s%s", + (code->alu.inst[i]. + inst1 & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", + (code->alu.inst[i]. + inst1 & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", + (code->alu.inst[i]. + inst1 & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(tmp, "o%i.%s", + (code->alu.inst[i]. + inst1 >> R300_ALU_DSTC_SHIFT) & 31, + flags); + strcat(dstc, tmp); + } + + dsta[0] = 0; + if (code->alu.inst[i].inst3 & R300_ALU_DSTA_REG) { + sprintf(dsta, "t%i.w ", + (code->alu.inst[i]. + inst3 >> R300_ALU_DSTA_SHIFT) & 31); + } + if (code->alu.inst[i].inst3 & R300_ALU_DSTA_OUTPUT) { + sprintf(tmp, "o%i.w ", + (code->alu.inst[i]. + inst3 >> R300_ALU_DSTA_SHIFT) & 31); + strcat(dsta, tmp); + } + if (code->alu.inst[i].inst3 & R300_ALU_DSTA_DEPTH) { + strcat(dsta, "Z"); + } + + fprintf(stderr, + "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" + " w: %3s %3s %3s -> %-20s (%08x)\n", i, + srcc[0], srcc[1], srcc[2], dstc, + code->alu.inst[i].inst1, srca[0], srca[1], + srca[2], dsta, code->alu.inst[i].inst3); + + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].inst0 >> (j * 7); + int rega = code->alu.inst[i].inst2 >> (j * 7); + int d; + char buf[20]; + + d = regc & 31; + if (d < 12) { + switch (d % 4) { + case R300_ALU_ARGC_SRC0C_XYZ: + sprintf(buf, "%s.xyz", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_XXX: + sprintf(buf, "%s.xxx", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_YYY: + sprintf(buf, "%s.yyy", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_ZZZ: + sprintf(buf, "%s.zzz", + srcc[d / 4]); + break; + } + } else if (d < 15) { + sprintf(buf, "%s.www", srca[d - 12]); + } else if (d == 20) { + sprintf(buf, "0.0"); + } else if (d == 21) { + sprintf(buf, "1.0"); + } else if (d == 22) { + sprintf(buf, "0.5"); + } else if (d >= 23 && d < 32) { + d -= 23; + switch (d / 3) { + case 0: + sprintf(buf, "%s.yzx", + srcc[d % 3]); + break; + case 1: + sprintf(buf, "%s.zxy", + srcc[d % 3]); + break; + case 2: + sprintf(buf, "%s.Wzy", + srcc[d % 3]); + break; + } + } else { + sprintf(buf, "%i", d); + } + + sprintf(argc[j], "%s%s%s%s", + (regc & 32) ? "-" : "", + (regc & 64) ? "|" : "", + buf, (regc & 64) ? "|" : ""); + + d = rega & 31; + if (d < 9) { + sprintf(buf, "%s.%c", srcc[d / 3], + 'x' + (char)(d % 3)); + } else if (d < 12) { + sprintf(buf, "%s.w", srca[d - 9]); + } else if (d == 16) { + sprintf(buf, "0.0"); + } else if (d == 17) { + sprintf(buf, "1.0"); + } else if (d == 18) { + sprintf(buf, "0.5"); + } else { + sprintf(buf, "%i", d); + } + + sprintf(arga[j], "%s%s%s%s", + (rega & 32) ? "-" : "", + (rega & 64) ? "|" : "", + buf, (rega & 64) ? "|" : ""); + } + + fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" + " w: %8s %8s %8s op: %08x\n", + argc[0], argc[1], argc[2], + code->alu.inst[i].inst0, arga[0], arga[1], + arga[2], code->alu.inst[i].inst2); + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h new file mode 100644 index 0000000000..186fad6490 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs + * Jerome Glisse + */ +#ifndef __R300_FRAGPROG_H_ +#define __R300_FRAGPROG_H_ + +#include "shader/program.h" +#include "shader/prog_instruction.h" + +#include "compiler/radeon_compiler.h" +#include "compiler/radeon_program.h" + + +extern GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); + +extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c); + +extern GLboolean r300_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data); + +#endif diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c new file mode 100644 index 0000000000..1cfb565b6e --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -0,0 +1,345 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * Emit the r300_fragment_program_code that can be understood by the hardware. + * Input is a pre-transformed radeon_program. + * + * \author Ben Skeggs + * + * \author Jerome Glisse + * + * \todo FogOption + */ + +#include "compiler/r300_fragprog.h" + +#include "r300_reg.h" + +#include "compiler/radeon_program_pair.h" +#include "compiler/r300_fragprog_swizzle.h" + + +#define PROG_CODE \ + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ + struct r300_fragment_program_code *code = &c->code->code.r300 + +#define error(fmt, args...) do { \ + fprintf(stderr, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + } while(0) + + +static GLboolean emit_const(void* data, GLuint file, GLuint index, GLuint *hwindex) +{ + PROG_CODE; + + for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) { + if (code->constant[*hwindex].File == file && + code->constant[*hwindex].Index == index) + break; + } + + if (*hwindex >= code->const_nr) { + if (*hwindex >= R300_PFS_NUM_CONST_REGS) { + error("Out of hw constants!\n"); + return GL_FALSE; + } + + code->const_nr++; + code->constant[*hwindex].File = file; + code->constant[*hwindex].Index = index; + } + + return GL_TRUE; +} + + +/** + * Mark a temporary register as used. + */ +static void use_temporary(struct r300_fragment_program_code *code, GLuint index) +{ + if (index > code->max_temp_idx) + code->max_temp_idx = index; +} + + +static GLuint translate_rgb_opcode(GLuint opcode) +{ + switch(opcode) { + case OPCODE_CMP: return R300_ALU_OUTC_CMP; + case OPCODE_DP3: return R300_ALU_OUTC_DP3; + case OPCODE_DP4: return R300_ALU_OUTC_DP4; + case OPCODE_FRC: return R300_ALU_OUTC_FRC; + default: + error("translate_rgb_opcode(%i): Unknown opcode", opcode); + /* fall through */ + case OPCODE_NOP: + /* fall through */ + case OPCODE_MAD: return R300_ALU_OUTC_MAD; + case OPCODE_MAX: return R300_ALU_OUTC_MAX; + case OPCODE_MIN: return R300_ALU_OUTC_MIN; + case OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; + } +} + +static GLuint translate_alpha_opcode(GLuint opcode) +{ + switch(opcode) { + case OPCODE_CMP: return R300_ALU_OUTA_CMP; + case OPCODE_DP3: return R300_ALU_OUTA_DP4; + case OPCODE_DP4: return R300_ALU_OUTA_DP4; + case OPCODE_EX2: return R300_ALU_OUTA_EX2; + case OPCODE_FRC: return R300_ALU_OUTA_FRC; + case OPCODE_LG2: return R300_ALU_OUTA_LG2; + default: + error("translate_rgb_opcode(%i): Unknown opcode", opcode); + /* fall through */ + case OPCODE_NOP: + /* fall through */ + case OPCODE_MAD: return R300_ALU_OUTA_MAD; + case OPCODE_MAX: return R300_ALU_OUTA_MAX; + case OPCODE_MIN: return R300_ALU_OUTA_MIN; + case OPCODE_RCP: return R300_ALU_OUTA_RCP; + case OPCODE_RSQ: return R300_ALU_OUTA_RSQ; + } +} + +/** + * Emit one paired ALU instruction. + */ +static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) +{ + PROG_CODE; + + if (code->alu.length >= R300_PFS_MAX_ALU_INST) { + error("Too many ALU instructions"); + return GL_FALSE; + } + + int ip = code->alu.length++; + int j; + code->node[code->cur_node].alu_end++; + + code->alu.inst[ip].inst0 = translate_rgb_opcode(inst->RGB.Opcode); + code->alu.inst[ip].inst2 = translate_alpha_opcode(inst->Alpha.Opcode); + + for(j = 0; j < 3; ++j) { + GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5); + if (!inst->RGB.Src[j].Constant) + use_temporary(code, inst->RGB.Src[j].Index); + code->alu.inst[ip].inst1 |= src << (6*j); + + src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5); + if (!inst->Alpha.Src[j].Constant) + use_temporary(code, inst->Alpha.Src[j].Index); + code->alu.inst[ip].inst3 |= src << (6*j); + + GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); + arg |= inst->RGB.Arg[j].Abs << 6; + arg |= inst->RGB.Arg[j].Negate << 5; + code->alu.inst[ip].inst0 |= arg << (7*j); + + arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle); + arg |= inst->Alpha.Arg[j].Abs << 6; + arg |= inst->Alpha.Arg[j].Negate << 5; + code->alu.inst[ip].inst2 |= arg << (7*j); + } + + if (inst->RGB.Saturate) + code->alu.inst[ip].inst0 |= R300_ALU_OUTC_CLAMP; + if (inst->Alpha.Saturate) + code->alu.inst[ip].inst2 |= R300_ALU_OUTA_CLAMP; + + if (inst->RGB.WriteMask) { + use_temporary(code, inst->RGB.DestIndex); + code->alu.inst[ip].inst1 |= + (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) | + (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); + } + if (inst->RGB.OutputWriteMask) { + code->alu.inst[ip].inst1 |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT); + code->node[code->cur_node].flags |= R300_RGBA_OUT; + } + + if (inst->Alpha.WriteMask) { + use_temporary(code, inst->Alpha.DestIndex); + code->alu.inst[ip].inst3 |= + (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) | + R300_ALU_DSTA_REG; + } + if (inst->Alpha.OutputWriteMask) { + code->alu.inst[ip].inst3 |= R300_ALU_DSTA_OUTPUT; + code->node[code->cur_node].flags |= R300_RGBA_OUT; + } + if (inst->Alpha.DepthWriteMask) { + code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH; + code->node[code->cur_node].flags |= R300_W_OUT; + c->code->writes_depth = GL_TRUE; + } + + return GL_TRUE; +} + + +/** + * Finish the current node without advancing to the next one. + */ +static GLboolean finish_node(struct r300_fragment_program_compiler *c) +{ + struct r300_fragment_program_code *code = &c->code->code.r300; + struct r300_fragment_program_node *node = &code->node[code->cur_node]; + + if (node->alu_end < 0) { + /* Generate a single NOP for this node */ + struct radeon_pair_instruction inst; + _mesa_bzero(&inst, sizeof(inst)); + if (!emit_alu(c, &inst)) + return GL_FALSE; + } + + if (node->tex_end < 0) { + if (code->cur_node == 0) { + node->tex_end = 0; + } else { + error("Node %i has no TEX instructions", code->cur_node); + return GL_FALSE; + } + } else { + if (code->cur_node == 0) + code->first_node_has_tex = 1; + } + + return GL_TRUE; +} + + +/** + * Begin a block of texture instructions. + * Create the necessary indirection. + */ +static GLboolean begin_tex(void* data) +{ + PROG_CODE; + + if (code->cur_node == 0) { + if (code->node[0].alu_end < 0 && + code->node[0].tex_end < 0) + return GL_TRUE; + } + + if (code->cur_node == 3) { + error("Too many texture indirections"); + return GL_FALSE; + } + + if (!finish_node(c)) + return GL_FALSE; + + struct r300_fragment_program_node *node = &code->node[++code->cur_node]; + node->alu_offset = code->alu.length; + node->alu_end = -1; + node->tex_offset = code->tex.length; + node->tex_end = -1; + return GL_TRUE; +} + + +static GLboolean emit_tex(void* data, struct prog_instruction* inst) +{ + PROG_CODE; + + if (code->tex.length >= R300_PFS_MAX_TEX_INST) { + error("Too many TEX instructions"); + return GL_FALSE; + } + + GLuint unit = inst->TexSrcUnit; + GLuint dest = inst->DstReg.Index; + GLuint opcode; + + switch(inst->Opcode) { + case OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; + case OPCODE_TEX: opcode = R300_TEX_OP_LD; break; + case OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; + case OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; + default: + error("Unknown texture opcode %i", inst->Opcode); + return GL_FALSE; + } + + if (inst->Opcode == OPCODE_KIL) { + unit = 0; + dest = 0; + } else { + use_temporary(code, dest); + } + + use_temporary(code, inst->SrcReg[0].Index); + + code->node[code->cur_node].tex_end++; + code->tex.inst[code->tex.length++] = + (inst->SrcReg[0].Index << R300_SRC_ADDR_SHIFT) | + (dest << R300_DST_ADDR_SHIFT) | + (unit << R300_TEX_ID_SHIFT) | + (opcode << R300_TEX_INST_SHIFT); + return GL_TRUE; +} + + +static const struct radeon_pair_handler pair_handler = { + .EmitConst = &emit_const, + .EmitPaired = &emit_alu, + .EmitTex = &emit_tex, + .BeginTexBlock = &begin_tex, + .MaxHwTemps = R300_PFS_NUM_TEMP_REGS +}; + +/** + * Final compilation step: Turn the intermediate radeon_program into + * machine-readable instructions. + */ +GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) +{ + struct r300_fragment_program_code *code = &compiler->code->code.r300; + + _mesa_bzero(code, sizeof(struct r300_fragment_program_code)); + code->node[0].alu_end = -1; + code->node[0].tex_end = -1; + + if (!radeonPairProgram(compiler->ctx, compiler->program, &pair_handler, compiler)) + return GL_FALSE; + + if (!finish_node(compiler)) + return GL_FALSE; + + return GL_TRUE; +} + diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c new file mode 100644 index 0000000000..fc9d855bce --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c @@ -0,0 +1,225 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * Utilities to deal with the somewhat odd restriction on R300 fragment + * program swizzles. + */ + +#include "r300_fragprog_swizzle.h" + +#include "r300_reg.h" +#include "radeon_nqssadce.h" + +#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, SWIZZLE_##y, SWIZZLE_##z, SWIZZLE_ZERO)) + +struct swizzle_data { + GLuint hash; /**< swizzle value this matches */ + GLuint base; /**< base value for hw swizzle */ + GLuint stride; /**< difference in base between arg0/1/2 */ +}; + +static const struct swizzle_data native_swizzles[] = { + {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4}, + {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4}, + {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4}, + {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4}, + {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1}, + {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1}, + {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1}, + {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1}, + {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0}, + {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0} +}; + +static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]); + + +/** + * Find a native RGB swizzle that matches the given swizzle. + * Returns 0 if none found. + */ +static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle) +{ + int i, comp; + + for(i = 0; i < num_native_swizzles; ++i) { + const struct swizzle_data* sd = &native_swizzles[i]; + for(comp = 0; comp < 3; ++comp) { + GLuint swz = GET_SWZ(swizzle, comp); + if (swz == SWIZZLE_NIL) + continue; + if (swz != GET_SWZ(sd->hash, comp)) + break; + } + if (comp == 3) + return sd; + } + + return 0; +} + + +/** + * Check whether the given instruction supports the swizzle and negate + * combinations in the given source register. + */ +GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) +{ + if (reg.Abs) + reg.Negate = NEGATE_NONE; + + if (opcode == OPCODE_KIL || + opcode == OPCODE_TEX || + opcode == OPCODE_TXB || + opcode == OPCODE_TXP) { + int j; + + if (reg.Abs || reg.Negate) + return GL_FALSE; + + for(j = 0; j < 4; ++j) { + GLuint swz = GET_SWZ(reg.Swizzle, j); + if (swz == SWIZZLE_NIL) + continue; + if (swz != j) + return GL_FALSE; + } + + return GL_TRUE; + } + + GLuint relevant = 0; + int j; + + for(j = 0; j < 3; ++j) + if (GET_SWZ(reg.Swizzle, j) != SWIZZLE_NIL) + relevant |= 1 << j; + + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) + return GL_FALSE; + + if (!lookup_native_swizzle(reg.Swizzle)) + return GL_FALSE; + + return GL_TRUE; +} + + +/** + * Generate MOV dst, src using only native swizzles. + */ +void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) +{ + if (src.Abs) + src.Negate = NEGATE_NONE; + + while(dst.WriteMask) { + const struct swizzle_data *best_swizzle = 0; + GLuint best_matchcount = 0; + GLuint best_matchmask = 0; + int i, comp; + + for(i = 0; i < num_native_swizzles; ++i) { + const struct swizzle_data *sd = &native_swizzles[i]; + GLuint matchcount = 0; + GLuint matchmask = 0; + for(comp = 0; comp < 3; ++comp) { + if (!GET_BIT(dst.WriteMask, comp)) + continue; + GLuint swz = GET_SWZ(src.Swizzle, comp); + if (swz == SWIZZLE_NIL) + continue; + if (swz == GET_SWZ(sd->hash, comp)) { + /* check if the negate bit of current component + * is the same for already matched components */ + if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp)))) + continue; + + matchcount++; + matchmask |= 1 << comp; + } + } + if (matchcount > best_matchcount) { + best_swizzle = sd; + best_matchcount = matchcount; + best_matchmask = matchmask; + if (matchmask == (dst.WriteMask & WRITEMASK_XYZ)) + break; + } + } + + struct prog_instruction *inst; + + _mesa_insert_instructions(s->Program, s->IP, 1); + inst = s->Program->Instructions + s->IP++; + inst->Opcode = OPCODE_MOV; + inst->DstReg = dst; + inst->DstReg.WriteMask &= (best_matchmask | WRITEMASK_W); + inst->SrcReg[0] = src; + inst->SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE; + /* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */ + + dst.WriteMask &= ~inst->DstReg.WriteMask; + } +} + + +/** + * Translate an RGB (XYZ) swizzle into the hardware code for the given + * instruction source. + */ +GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle) +{ + const struct swizzle_data* sd = lookup_native_swizzle(swizzle); + + if (!sd) { + _mesa_printf("Not a native swizzle: %08x\n", swizzle); + return 0; + } + + return sd->base + src*sd->stride; +} + + +/** + * Translate an Alpha (W) swizzle into the hardware code for the given + * instruction source. + */ +GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle) +{ + if (swizzle < 3) + return swizzle + 3*src; + + switch(swizzle) { + case SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; + case SWIZZLE_ONE: return R300_ALU_ARGA_ONE; + case SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; + default: return R300_ALU_ARGA_ONE; + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h new file mode 100644 index 0000000000..231bf4eef5 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __R300_FRAGPROG_SWIZZLE_H_ +#define __R300_FRAGPROG_SWIZZLE_H_ + +#include "main/glheader.h" +#include "shader/prog_instruction.h" + +struct nqssadce_state; + +GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); +void r300FPBuildSwizzle(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); + +GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle); +GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle); + +#endif /* __R300_FRAGPROG_SWIZZLE_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c new file mode 100644 index 0000000000..75abdcfc42 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -0,0 +1,315 @@ +/* + * Copyright 2009 Nicolai Hähnle + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "shader/prog_statevars.h" + +#include "radeon_nqssadce.h" +#include "radeon_program_alu.h" +#include "r300_fragprog.h" +#include "r300_fragprog_swizzle.h" +#include "r500_fragprog.h" + + +static void nqssadce_init(struct nqssadce_state* s) +{ + s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; + s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; +} + +/** + * Transform the program to support fragment.position. + * + * Introduce a small fragment at the start of the program that will be + * the only code that directly reads the FRAG_ATTRIB_WPOS input. + * All other code pieces that reference that input will be rewritten + * to read from a newly allocated temporary. + * + */ +static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) +{ + GLuint InputsRead = compiler->program->InputsRead; + + if (!(InputsRead & FRAG_BIT_WPOS)) { + compiler->code->wpos_attr = FRAG_ATTRIB_MAX; + return; + } + + static gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 + }; + struct prog_instruction *fpi; + GLuint window_index; + int i = 0; + + for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i) + { + if (!(InputsRead & (1 << i))) { + InputsRead &= ~(1 << FRAG_ATTRIB_WPOS); + InputsRead |= 1 << i; + compiler->program->InputsRead = InputsRead; + compiler->code->wpos_attr = i; + break; + } + } + + GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); + + _mesa_insert_instructions(compiler->program, 0, 3); + fpi = compiler->program->Instructions; + i = 0; + + /* perspective divide */ + fpi[i].Opcode = OPCODE_RCP; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_W; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = compiler->code->wpos_attr; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; + i++; + + fpi[i].Opcode = OPCODE_MUL; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = compiler->code->wpos_attr; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + + fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[1].Index = tempregi; + fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; + i++; + + /* viewport transformation */ + window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); + + fpi[i].Opcode = OPCODE_MAD; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[0].Index = tempregi; + fpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[1].Index = window_index; + fpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[2].Index = window_index; + fpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + i++; + + for (; i < compiler->program->NumInstructions; ++i) { + int reg; + for (reg = 0; reg < 3; reg++) { + if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && + fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { + fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[reg].Index = tempregi; + } + } + } +} + + +/** + * Rewrite fragment.fogcoord to use a texture coordinate slot. + * Note that fogcoord is forced into an X001 pattern, and this enforcement + * is done here. + * + * See also the counterpart rewriting for vertex programs. + */ +static void rewriteFog(struct r300_fragment_program_compiler *compiler) +{ + struct rX00_fragment_program_code *code = compiler->code; + GLuint InputsRead = compiler->program->InputsRead; + int i; + + if (!(InputsRead & FRAG_BIT_FOGC)) { + code->fog_attr = FRAG_ATTRIB_MAX; + return; + } + + for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i) + { + if (!(InputsRead & (1 << i))) { + InputsRead &= ~(1 << FRAG_ATTRIB_FOGC); + InputsRead |= 1 << i; + compiler->program->InputsRead = InputsRead; + code->fog_attr = i; + break; + } + } + + { + struct prog_instruction *inst; + + inst = compiler->program->Instructions; + while (inst->Opcode != OPCODE_END) { + const int src_regs = _mesa_num_inst_src_regs(inst->Opcode); + for (i = 0; i < src_regs; ++i) { + if (inst->SrcReg[i].File == PROGRAM_INPUT && inst->SrcReg[i].Index == FRAG_ATTRIB_FOGC) { + inst->SrcReg[i].Index = code->fog_attr; + inst->SrcReg[i].Swizzle = combine_swizzles( + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE), + inst->SrcReg[i].Swizzle); + } + } + ++inst; + } + } +} + + +static void rewrite_depth_out(struct gl_program *prog) +{ + struct prog_instruction *inst; + + for (inst = prog->Instructions; inst->Opcode != OPCODE_END; ++inst) { + if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != FRAG_RESULT_DEPTH) + continue; + + if (inst->DstReg.WriteMask & WRITEMASK_Z) { + inst->DstReg.WriteMask = WRITEMASK_W; + } else { + inst->DstReg.WriteMask = 0; + continue; + } + + switch (inst->Opcode) { + case OPCODE_FRC: + case OPCODE_MOV: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + break; + case OPCODE_ADD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MUL: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + break; + case OPCODE_CMP: + case OPCODE_MAD: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]); + break; + default: + // Scalar instructions needn't be reswizzled + break; + } + } +} + +GLboolean r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) +{ + GLboolean success = GL_FALSE; + + if (c->debug) { + fflush(stdout); + _mesa_printf("Fragment Program: Initial program:\n"); + _mesa_print_program(c->program); + fflush(stdout); + } + + insert_WPOS_trailer(c); + + rewriteFog(c); + + rewrite_depth_out(c->program); + + if (c->is_r500) { + struct radeon_program_transformation transformations[] = { + { &r500_transform_TEX, c }, + { &radeonTransformALU, 0 }, + { &radeonTransformDeriv, 0 }, + { &radeonTransformTrigScale, 0 } + }; + radeonLocalTransform(c->ctx, c->program, 4, transformations); + } else { + struct radeon_program_transformation transformations[] = { + { &r300_transform_TEX, c }, + { &radeonTransformALU, 0 }, + { &radeonTransformTrigSimple, 0 } + }; + radeonLocalTransform(c->ctx, c->program, 3, transformations); + } + + if (c->debug) { + _mesa_printf("Fragment Program: After native rewrite:\n"); + _mesa_print_program(c->program); + fflush(stdout); + } + + if (c->is_r500) { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &r500FPIsNativeSwizzle, + .BuildSwizzle = &r500FPBuildSwizzle + }; + radeonNqssaDce(c->ctx, c->program, &nqssadce); + } else { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &r300FPIsNativeSwizzle, + .BuildSwizzle = &r300FPBuildSwizzle + }; + radeonNqssaDce(c->ctx, c->program, &nqssadce); + } + + if (c->debug) { + _mesa_printf("Compiler: after NqSSA-DCE:\n"); + _mesa_print_program(c->program); + fflush(stdout); + } + + if (c->is_r500) { + success = r500BuildFragmentProgramHwCode(c); + } else { + success = r300BuildFragmentProgramHwCode(c); + } + + if (!success || c->debug) { + if (c->is_r500) { + r500FragmentProgramDump(c->code); + } else { + r300FragmentProgramDump(c->code); + } + } + + return success; +} diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c new file mode 100644 index 0000000000..fdc18caacb --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -0,0 +1,482 @@ +/* + * Copyright 2008 Corbin Simpson + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "compiler/r500_fragprog.h" + +#include "r300_reg.h" + +static void reset_srcreg(struct prog_src_register* reg) +{ + _mesa_bzero(reg, sizeof(*reg)); + reg->Swizzle = SWIZZLE_NOOP; +} + +static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu) +{ + gl_state_index fail_value_tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0 + }; + struct prog_src_register reg = { 0, }; + + fail_value_tokens[2] = tmu; + reg.File = PROGRAM_STATE_VAR; + reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens); + reg.Swizzle = SWIZZLE_WWWW; + return reg; +} + +/** + * Transform TEX, TXP, TXB, and KIL instructions in the following way: + * - premultiply texture coordinates for RECT + * - extract operand swizzles + * - introduce a temporary register when write masks are needed + * + */ +GLboolean r500_transform_TEX( + struct radeon_transform_context *t, + struct prog_instruction* orig_inst, void* data) +{ + struct r300_fragment_program_compiler *compiler = + (struct r300_fragment_program_compiler*)data; + struct prog_instruction inst = *orig_inst; + struct prog_instruction* tgt; + GLboolean destredirect = GL_FALSE; + + if (inst.Opcode != OPCODE_TEX && + inst.Opcode != OPCODE_TXB && + inst.Opcode != OPCODE_TXP && + inst.Opcode != OPCODE_KIL) + return GL_FALSE; + + /* ARB_shadow & EXT_shadow_funcs */ + if (inst.Opcode != OPCODE_KIL && + t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->state.unit[inst.TexSrcUnit].texture_compare_func; + + if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { + tgt = radeonAppendInstructions(t->Program, 1); + + tgt->Opcode = OPCODE_MOV; + tgt->DstReg = inst.DstReg; + if (comparefunc == GL_ALWAYS) { + tgt->SrcReg[0].File = PROGRAM_BUILTIN; + tgt->SrcReg[0].Swizzle = SWIZZLE_1111; + } else { + tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit); + } + return GL_TRUE; + } + + inst.DstReg.File = PROGRAM_TEMPORARY; + inst.DstReg.Index = radeonFindFreeTemporary(t); + inst.DstReg.WriteMask = WRITEMASK_XYZW; + } else if (inst.Opcode != OPCODE_KIL && inst.DstReg.File != PROGRAM_TEMPORARY) { + int tempreg = radeonFindFreeTemporary(t); + + inst.DstReg.File = PROGRAM_TEMPORARY; + inst.DstReg.Index = tempreg; + inst.DstReg.WriteMask = WRITEMASK_XYZW; + destredirect = GL_TRUE; + } + + if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) { + int tmpreg = radeonFindFreeTemporary(t); + tgt = radeonAppendInstructions(t->Program, 1); + tgt->Opcode = OPCODE_MOV; + tgt->DstReg.File = PROGRAM_TEMPORARY; + tgt->DstReg.Index = tmpreg; + tgt->SrcReg[0] = inst.SrcReg[0]; + + reset_srcreg(&inst.SrcReg[0]); + inst.SrcReg[0].File = PROGRAM_TEMPORARY; + inst.SrcReg[0].Index = tmpreg; + } + + tgt = radeonAppendInstructions(t->Program, 1); + _mesa_copy_instructions(tgt, &inst, 1); + + if (inst.Opcode != OPCODE_KIL && + t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->state.unit[inst.TexSrcUnit].texture_compare_func; + GLuint depthmode = compiler->state.unit[inst.TexSrcUnit].depth_texture_mode; + int rcptemp = radeonFindFreeTemporary(t); + int pass, fail; + + tgt = radeonAppendInstructions(t->Program, 3); + + tgt[0].Opcode = OPCODE_RCP; + tgt[0].DstReg.File = PROGRAM_TEMPORARY; + tgt[0].DstReg.Index = rcptemp; + tgt[0].DstReg.WriteMask = WRITEMASK_W; + tgt[0].SrcReg[0] = inst.SrcReg[0]; + tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW; + + tgt[1].Opcode = OPCODE_MAD; + tgt[1].DstReg = inst.DstReg; + tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask; + tgt[1].SrcReg[0] = inst.SrcReg[0]; + tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ; + tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY; + tgt[1].SrcReg[1].Index = rcptemp; + tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW; + tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY; + tgt[1].SrcReg[2].Index = inst.DstReg.Index; + if (depthmode == 0) /* GL_LUMINANCE */ + tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + else if (depthmode == 2) /* GL_ALPHA */ + tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW; + + /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: + * r < tex <=> -tex+r < 0 + * r >= tex <=> not (-tex+r < 0 */ + if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) + tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW; + else + tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW; + + tgt[2].Opcode = OPCODE_CMP; + tgt[2].DstReg = orig_inst->DstReg; + tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY; + tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index; + + if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + pass = 1; + fail = 2; + } else { + pass = 2; + fail = 1; + } + + tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN; + tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111; + tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit); + } else if (destredirect) { + tgt = radeonAppendInstructions(t->Program, 1); + + tgt->Opcode = OPCODE_MOV; + tgt->DstReg = orig_inst->DstReg; + tgt->SrcReg[0].File = PROGRAM_TEMPORARY; + tgt->SrcReg[0].Index = inst.DstReg.Index; + } + + return GL_TRUE; +} + +GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) +{ + GLuint relevant; + int i; + + if (opcode == OPCODE_TEX || + opcode == OPCODE_TXB || + opcode == OPCODE_TXP || + opcode == OPCODE_KIL) { + if (reg.Abs) + return GL_FALSE; + + if (opcode == OPCODE_KIL && (reg.Swizzle != SWIZZLE_NOOP || reg.Negate != NEGATE_NONE)) + return GL_FALSE; + + if (reg.Negate) + reg.Negate ^= NEGATE_XYZW; + + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(reg.Swizzle, i); + if (swz == SWIZZLE_NIL) { + reg.Negate &= ~(1 << i); + continue; + } + if (swz >= 4) + return GL_FALSE; + } + + if (reg.Negate) + return GL_FALSE; + + return GL_TRUE; + } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) { + /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles; + * if it doesn't fit perfectly into a .xyzw case... */ + if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs && !reg.Negate) + return GL_TRUE; + + return GL_FALSE; + } else { + /* ALU instructions support almost everything */ + if (reg.Abs) + return GL_TRUE; + + relevant = 0; + for(i = 0; i < 3; ++i) { + GLuint swz = GET_SWZ(reg.Swizzle, i); + if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO) + relevant |= 1 << i; + } + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) + return GL_FALSE; + + return GL_TRUE; + } +} + +/** + * Implement a MOV with a potentially non-native swizzle. + * + * The only thing we *cannot* do in an ALU instruction is per-component + * negation. Therefore, we split the MOV into two instructions when necessary. + */ +void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) +{ + struct prog_instruction *inst; + GLuint negatebase[2] = { 0, 0 }; + int i; + + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(src.Swizzle, i); + if (swz == SWIZZLE_NIL) + continue; + negatebase[GET_BIT(src.Negate, i)] |= 1 << i; + } + + _mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0)); + inst = s->Program->Instructions + s->IP; + + for(i = 0; i <= 1; ++i) { + if (!negatebase[i]) + continue; + + inst->Opcode = OPCODE_MOV; + inst->DstReg = dst; + inst->DstReg.WriteMask = negatebase[i]; + inst->SrcReg[0] = src; + inst->SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW; + inst++; + s->IP++; + } +} + + +static char *toswiz(int swiz_val) { + switch(swiz_val) { + case 0: return "R"; + case 1: return "G"; + case 2: return "B"; + case 3: return "A"; + case 4: return "0"; + case 5: return "1/2"; + case 6: return "1"; + case 7: return "U"; + } + return NULL; +} + +static char *toop(int op_val) +{ + char *str = NULL; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP3"; break; + case 2: str = "DP4"; break; + case 3: str = "D2A"; break; + case 4: str = "MIN"; break; + case 5: str = "MAX"; break; + case 6: str = "Reserved"; break; + case 7: str = "CND"; break; + case 8: str = "CMP"; break; + case 9: str = "FRC"; break; + case 10: str = "SOP"; break; + case 11: str = "MDH"; break; + case 12: str = "MDV"; break; + } + return str; +} + +static char *to_alpha_op(int op_val) +{ + char *str = NULL; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP"; break; + case 2: str = "MIN"; break; + case 3: str = "MAX"; break; + case 4: str = "Reserved"; break; + case 5: str = "CND"; break; + case 6: str = "CMP"; break; + case 7: str = "FRC"; break; + case 8: str = "EX2"; break; + case 9: str = "LN2"; break; + case 10: str = "RCP"; break; + case 11: str = "RSQ"; break; + case 12: str = "SIN"; break; + case 13: str = "COS"; break; + case 14: str = "MDH"; break; + case 15: str = "MDV"; break; + } + return str; +} + +static char *to_mask(int val) +{ + char *str = NULL; + switch(val) { + case 0: str = "NONE"; break; + case 1: str = "R"; break; + case 2: str = "G"; break; + case 3: str = "RG"; break; + case 4: str = "B"; break; + case 5: str = "RB"; break; + case 6: str = "GB"; break; + case 7: str = "RGB"; break; + case 8: str = "A"; break; + case 9: str = "AR"; break; + case 10: str = "AG"; break; + case 11: str = "ARG"; break; + case 12: str = "AB"; break; + case 13: str = "ARB"; break; + case 14: str = "AGB"; break; + case 15: str = "ARGB"; break; + } + return str; +} + +static char *to_texop(int val) +{ + switch(val) { + case 0: return "NOP"; + case 1: return "LD"; + case 2: return "TEXKILL"; + case 3: return "PROJ"; + case 4: return "LODBIAS"; + case 5: return "LOD"; + case 6: return "DXDY"; + } + return NULL; +} + +void r500FragmentProgramDump(struct rX00_fragment_program_code *c) +{ + struct r500_fragment_program_code *code = &c->code.r500; + fprintf(stderr, "R500 Fragment Program:\n--------\n"); + + int n; + uint32_t inst; + uint32_t inst0; + char *str = NULL; + + if (code->const_nr) { + fprintf(stderr, "--------\nConstants:\n"); + for (n = 0; n < code->const_nr; n++) { + fprintf(stderr, "Constant %d: %i[%i]\n", n, + code->constant[n].File, code->constant[n].Index); + } + fprintf(stderr, "--------\n"); + } + + for (n = 0; n < code->inst_end+1; n++) { + inst0 = inst = code->inst[n].inst0; + fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst); + switch(inst & 0x3) { + case R500_INST_TYPE_ALU: str = "ALU"; break; + case R500_INST_TYPE_OUT: str = "OUT"; break; + case R500_INST_TYPE_FC: str = "FC"; break; + case R500_INST_TYPE_TEX: str = "TEX"; break; + }; + fprintf(stderr,"%s %s %s %s %s ", str, + inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "", + inst & R500_INST_LAST ? "LAST" : "", + inst & R500_INST_NOP ? "NOP" : "", + inst & R500_INST_ALU_WAIT ? "ALU WAIT" : ""); + fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf), + to_mask((inst >> 15) & 0xf)); + + switch(inst0 & 0x3) { + case 0: + case 1: + fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1); + inst = code->inst[n].inst1; + + fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1<<8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', + (inst >> 30)); + + fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2); + inst = code->inst[n].inst2; + fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1<<8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', + (inst >> 30)); + fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3); + inst = code->inst[n].inst3; + fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n", + (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), + (inst >> 11) & 0x3, + (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), + (inst >> 24) & 0x3); + + + fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4); + inst = code->inst[n].inst4; + fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf), + (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, + (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3, + (inst >> 31) & 0x1); + + fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5); + inst = code->inst[n].inst5; + fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf), + (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7), + (inst >> 23) & 0x3, + (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3); + break; + case 2: + break; + case 3: + inst = code->inst[n].inst1; + fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf, + to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "", + (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED"); + inst = code->inst[n].inst2; + fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst, + inst & 127, inst & (1<<7) ? "(rel)" : "", + toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3), + toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3), + (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "", + toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3), + toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3)); + + fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3); + break; + } + fprintf(stderr,"\n"); + } + +} diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h new file mode 100644 index 0000000000..232993f581 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs + * Jerome Glisse + */ +#ifndef __R500_FRAGPROG_H_ +#define __R500_FRAGPROG_H_ + +#include "shader/prog_parameter.h" +#include "shader/prog_instruction.h" + +#include "compiler/radeon_compiler.h" +#include "compiler/radeon_nqssadce.h" + +extern GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); + +extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c); + +extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); + +extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src); + +extern GLboolean r500_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data); + +#endif diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c new file mode 100644 index 0000000000..237489e119 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -0,0 +1,329 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * Copyright 2008 Corbin Simpson + * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * \author Ben Skeggs + * + * \author Jerome Glisse + * + * \author Corbin Simpson + * + * \todo Depth write, WPOS/FOGC inputs + * + * \todo FogOption + * + */ + +#include "compiler/r500_fragprog.h" + +#include "r300_reg.h" + +#include "compiler/radeon_program_pair.h" + + +#define PROG_CODE \ + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ + struct r500_fragment_program_code *code = &c->code->code.r500 + +#define error(fmt, args...) do { \ + fprintf(stderr, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + } while(0) + + +/** + * Callback to register hardware constants. + */ +static GLboolean emit_const(void *data, GLuint file, GLuint idx, GLuint *hwindex) +{ + PROG_CODE; + + for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) { + if (code->constant[*hwindex].File == file && + code->constant[*hwindex].Index == idx) + break; + } + + if (*hwindex >= code->const_nr) { + if (*hwindex >= R500_PFS_NUM_CONST_REGS) { + error("Out of hw constants!\n"); + return GL_FALSE; + } + + code->const_nr++; + code->constant[*hwindex].File = file; + code->constant[*hwindex].Index = idx; + } + + return GL_TRUE; +} + +static GLuint translate_rgb_op(GLuint opcode) +{ + switch(opcode) { + case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; + case OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; + case OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; + case OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; + case OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; + case OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; + default: + error("translate_rgb_op(%d): unknown opcode\n", opcode); + /* fall through */ + case OPCODE_NOP: + /* fall through */ + case OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; + case OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; + case OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; + case OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; + } +} + +static GLuint translate_alpha_op(GLuint opcode) +{ + switch(opcode) { + case OPCODE_CMP: return R500_ALPHA_OP_CMP; + case OPCODE_COS: return R500_ALPHA_OP_COS; + case OPCODE_DDX: return R500_ALPHA_OP_MDH; + case OPCODE_DDY: return R500_ALPHA_OP_MDV; + case OPCODE_DP3: return R500_ALPHA_OP_DP; + case OPCODE_DP4: return R500_ALPHA_OP_DP; + case OPCODE_EX2: return R500_ALPHA_OP_EX2; + case OPCODE_FRC: return R500_ALPHA_OP_FRC; + case OPCODE_LG2: return R500_ALPHA_OP_LN2; + default: + error("translate_alpha_op(%d): unknown opcode\n", opcode); + /* fall through */ + case OPCODE_NOP: + /* fall through */ + case OPCODE_MAD: return R500_ALPHA_OP_MAD; + case OPCODE_MAX: return R500_ALPHA_OP_MAX; + case OPCODE_MIN: return R500_ALPHA_OP_MIN; + case OPCODE_RCP: return R500_ALPHA_OP_RCP; + case OPCODE_RSQ: return R500_ALPHA_OP_RSQ; + case OPCODE_SIN: return R500_ALPHA_OP_SIN; + } +} + +static GLuint fix_hw_swizzle(GLuint swz) +{ + if (swz == 5) swz = 6; + if (swz == SWIZZLE_NIL) swz = 4; + return swz; +} + +static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg) +{ + GLuint t = inst->RGB.Arg[arg].Source; + int comp; + t |= inst->RGB.Arg[arg].Negate << 11; + t |= inst->RGB.Arg[arg].Abs << 12; + + for(comp = 0; comp < 3; ++comp) + t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2); + + return t; +} + +static GLuint translate_arg_alpha(struct radeon_pair_instruction *inst, int i) +{ + GLuint t = inst->Alpha.Arg[i].Source; + t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2; + t |= inst->Alpha.Arg[i].Negate << 5; + t |= inst->Alpha.Arg[i].Abs << 6; + return t; +} + +static void use_temporary(struct r500_fragment_program_code* code, GLuint index) +{ + if (index > code->max_temp_idx) + code->max_temp_idx = index; +} + +static GLuint use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src) +{ + if (!src.Constant) + use_temporary(code, src.Index); + return src.Index | src.Constant << 8; +} + + +/** + * Emit a paired ALU instruction. + */ +static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) +{ + PROG_CODE; + + if (code->inst_end >= 511) { + error("emit_alu: Too many instructions"); + return GL_FALSE; + } + + int ip = ++code->inst_end; + + code->inst[ip].inst5 = translate_rgb_op(inst->RGB.Opcode); + code->inst[ip].inst4 = translate_alpha_op(inst->Alpha.Opcode); + + if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) + code->inst[ip].inst0 = R500_INST_TYPE_OUT; + else + code->inst[ip].inst0 = R500_INST_TYPE_ALU; + code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT; + + code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14); + code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); + if (inst->Alpha.DepthWriteMask) { + code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; + c->code->writes_depth = GL_TRUE; + } + + code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); + code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex); + use_temporary(code, inst->Alpha.DestIndex); + use_temporary(code, inst->RGB.DestIndex); + + if (inst->RGB.Saturate) + code->inst[ip].inst0 |= R500_INST_RGB_CLAMP; + if (inst->Alpha.Saturate) + code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP; + + code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0])); + code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1])); + code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2])); + + code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0])); + code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1])); + code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2])); + + code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT; + code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT; + code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT; + + code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT; + code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; + code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; + + return GL_TRUE; +} + +static GLuint translate_strq_swizzle(struct prog_src_register src) +{ + GLuint swiz = 0; + int i; + for (i = 0; i < 4; i++) + swiz |= (GET_SWZ(src.Swizzle, i) & 0x3) << i*2; + return swiz; +} + +/** + * Emit a single TEX instruction + */ +static GLboolean emit_tex(void *data, struct prog_instruction *inst) +{ + PROG_CODE; + + if (code->inst_end >= 511) { + error("emit_tex: Too many instructions"); + return GL_FALSE; + } + + int ip = ++code->inst_end; + + code->inst[ip].inst0 = R500_INST_TYPE_TEX + | (inst->DstReg.WriteMask << 11) + | R500_INST_TEX_SEM_WAIT; + code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) + | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; + + if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) + code->inst[ip].inst1 |= R500_TEX_UNSCALED; + + switch (inst->Opcode) { + case OPCODE_KIL: + code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; + break; + case OPCODE_TEX: + code->inst[ip].inst1 |= R500_TEX_INST_LD; + break; + case OPCODE_TXB: + code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; + break; + case OPCODE_TXP: + code->inst[ip].inst1 |= R500_TEX_INST_PROJ; + break; + default: + error("emit_tex can't handle opcode %x\n", inst->Opcode); + } + + code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) + | (translate_strq_swizzle(inst->SrcReg[0]) << 8) + | R500_TEX_DST_ADDR(inst->DstReg.Index) + | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G + | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; + + return GL_TRUE; +} + +static const struct radeon_pair_handler pair_handler = { + .EmitConst = emit_const, + .EmitPaired = emit_paired, + .EmitTex = emit_tex, + .MaxHwTemps = 128 +}; + +GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) +{ + struct r500_fragment_program_code *code = &compiler->code->code.r500; + + _mesa_bzero(code, sizeof(*code)); + code->max_temp_idx = 1; + code->inst_offset = 0; + code->inst_end = -1; + + if (!radeonPairProgram(compiler->ctx, compiler->program, &pair_handler, compiler)) + return GL_FALSE; + + if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { + /* This may happen when dead-code elimination is disabled or + * when most of the fragment program logic is leading to a KIL */ + if (code->inst_end >= 511) { + error("Introducing fake OUT: Too many instructions"); + return GL_FALSE; + } + + int ip = ++code->inst_end; + code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; + } + + return GL_TRUE; +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h new file mode 100644 index 0000000000..e1a691db4f --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -0,0 +1,162 @@ +/* + * Copyright 2009 Nicolai Hähnle + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_COMPILER_H +#define RADEON_COMPILER_H + +#include "main/mtypes.h" +#include "shader/prog_instruction.h" + +#define R300_PFS_MAX_ALU_INST 64 +#define R300_PFS_MAX_TEX_INST 32 +#define R300_PFS_MAX_TEX_INDIRECT 4 +#define R300_PFS_NUM_TEMP_REGS 32 +#define R300_PFS_NUM_CONST_REGS 32 + +#define R500_PFS_MAX_INST 512 +#define R500_PFS_NUM_TEMP_REGS 128 +#define R500_PFS_NUM_CONST_REGS 256 + + +#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) +#define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1) + + +/** + * Stores state that influences the compilation of a fragment program. + */ +struct r300_fragment_program_external_state { + struct { + /** + * If the sampler is used as a shadow sampler, + * this field is: + * 0 - GL_LUMINANCE + * 1 - GL_INTENSITY + * 2 - GL_ALPHA + * depending on the depth texture mode. + */ + GLuint depth_texture_mode : 2; + + /** + * If the sampler is used as a shadow sampler, + * this field is (texture_compare_func - GL_NEVER). + * [e.g. if compare function is GL_LEQUAL, this field is 3] + * + * Otherwise, this field is 0. + */ + GLuint texture_compare_func : 3; + } unit[16]; +}; + + + +struct r300_fragment_program_node { + int tex_offset; /**< first tex instruction */ + int tex_end; /**< last tex instruction, relative to tex_offset */ + int alu_offset; /**< first ALU instruction */ + int alu_end; /**< last ALU instruction, relative to alu_offset */ + int flags; +}; + +/** + * Stores an R300 fragment program in its compiled-to-hardware form. + */ +struct r300_fragment_program_code { + struct { + int length; /**< total # of texture instructions used */ + GLuint inst[R300_PFS_MAX_TEX_INST]; + } tex; + + struct { + int length; /**< total # of ALU instructions used */ + struct { + GLuint inst0; + GLuint inst1; + GLuint inst2; + GLuint inst3; + } inst[R300_PFS_MAX_ALU_INST]; + } alu; + + struct r300_fragment_program_node node[4]; + int cur_node; + int first_node_has_tex; + + /** + * Remember which program register a given hardware constant + * belongs to. + */ + struct prog_src_register constant[R300_PFS_NUM_CONST_REGS]; + int const_nr; + + int max_temp_idx; +}; + + +struct r500_fragment_program_code { + struct { + GLuint inst0; + GLuint inst1; + GLuint inst2; + GLuint inst3; + GLuint inst4; + GLuint inst5; + } inst[R500_PFS_MAX_INST]; + + int inst_offset; + int inst_end; + + /** + * Remember which program register a given hardware constant + * belongs to. + */ + struct prog_src_register constant[R500_PFS_NUM_CONST_REGS]; + int const_nr; + + int max_temp_idx; +}; + +struct rX00_fragment_program_code { + union { + struct r300_fragment_program_code r300; + struct r500_fragment_program_code r500; + } code; + + GLboolean writes_depth; + + /* attribute that we are sending the WPOS in */ + gl_frag_attrib wpos_attr; + /* attribute that we are sending the fog coordinate in */ + gl_frag_attrib fog_attr; +}; + +struct r300_fragment_program_compiler { + GLcontext * ctx; + struct rX00_fragment_program_code *code; + struct gl_program *program; + struct r300_fragment_program_external_state state; + GLboolean is_r500; + GLboolean debug; +}; + +GLboolean r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c); + +#endif /* RADEON_COMPILER_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c new file mode 100644 index 0000000000..202a8532b6 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c @@ -0,0 +1,297 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * + * "Not-quite SSA" and Dead-Code Elimination. + * + * @note This code uses SWIZZLE_NIL in a source register to indicate that + * the corresponding component is ignored by the corresponding instruction. + */ + +#include "radeon_nqssadce.h" + + +/** + * Return the @ref register_state for the given register (or 0 for untracked + * registers, i.e. constants). + */ +static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index) +{ + switch(file) { + case PROGRAM_TEMPORARY: return &s->Temps[index]; + case PROGRAM_OUTPUT: return &s->Outputs[index]; + case PROGRAM_ADDRESS: return &s->Address; + default: return 0; + } +} + + +/** + * Left multiplication of a register with a swizzle + * + * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles. + */ +struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg) +{ + struct prog_src_register tmp = srcreg; + int i; + tmp.Swizzle = 0; + tmp.Negate = NEGATE_NONE; + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(swizzle, i); + if (swz < 4) { + tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); + tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; + } else { + tmp.Swizzle |= swz << (i*3); + } + } + return tmp; +} + + +static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, + struct prog_instruction *inst, GLint src, GLuint sourced) +{ + int i; + GLuint deswz_source = 0; + + for(i = 0; i < 4; ++i) { + if (GET_BIT(sourced, i)) { + GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i); + deswz_source |= 1 << swz; + } else { + inst->SrcReg[src].Swizzle &= ~(7 << (3*i)); + inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); + } + } + + if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) { + struct prog_dst_register dstreg = inst->DstReg; + dstreg.File = PROGRAM_TEMPORARY; + dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY); + dstreg.WriteMask = sourced; + + s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]); + + inst = s->Program->Instructions + s->IP; + inst->SrcReg[src].File = PROGRAM_TEMPORARY; + inst->SrcReg[src].Index = dstreg.Index; + inst->SrcReg[src].Swizzle = 0; + inst->SrcReg[src].Negate = NEGATE_NONE; + inst->SrcReg[src].Abs = 0; + for(i = 0; i < 4; ++i) { + if (GET_BIT(sourced, i)) + inst->SrcReg[src].Swizzle |= i << (3*i); + else + inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); + } + deswz_source = sourced; + } + + struct register_state *regstate; + + if (inst->SrcReg[src].RelAddr) { + regstate = get_reg_state(s, PROGRAM_ADDRESS, 0); + if (regstate) + regstate->Sourced |= WRITEMASK_X; + } else { + regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index); + if (regstate) + regstate->Sourced |= deswz_source & 0xf; + } + + return inst; +} + +static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex) +{ + int nsrc = _mesa_num_inst_src_regs(inst->Opcode); + int i; + for(i = 0; i < nsrc; ++i) + if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex) + inst->SrcReg[i].Index = newindex; +} + +static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex) +{ + GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY); + int ip; + for(ip = 0; ip < s->IP; ++ip) { + struct prog_instruction* inst = s->Program->Instructions + ip; + if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex) + inst->DstReg.Index = newindex; + unalias_srcregs(inst, oldindex, newindex); + } + unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex); +} + + +/** + * Handle one instruction. + */ +static void process_instruction(struct nqssadce_state* s) +{ + struct prog_instruction *inst = s->Program->Instructions + s->IP; + + if (inst->Opcode == OPCODE_END) + return; + + if (inst->Opcode != OPCODE_KIL) { + struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index); + if (!regstate) { + _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n", + inst->DstReg.File, inst->DstReg.Index); + return; + } + + inst->DstReg.WriteMask &= regstate->Sourced; + regstate->Sourced &= ~inst->DstReg.WriteMask; + + if (inst->DstReg.WriteMask == 0) { + _mesa_delete_instructions(s->Program, s->IP, 1); + return; + } + + if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced) + unalias_temporary(s, inst->DstReg.Index); + } + + /* Attention: Due to swizzle emulation code, the following + * might change the instruction stream under us, so we have + * to be careful with the inst pointer. */ + switch (inst->Opcode) { + case OPCODE_ARL: + case OPCODE_DDX: + case OPCODE_DDY: + case OPCODE_FRC: + case OPCODE_MOV: + inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); + break; + case OPCODE_ADD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MUL: + case OPCODE_SGE: + case OPCODE_SLT: + inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); + inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); + break; + case OPCODE_CMP: + case OPCODE_MAD: + inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); + inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); + inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask); + break; + case OPCODE_COS: + case OPCODE_EX2: + case OPCODE_LG2: + case OPCODE_RCP: + case OPCODE_RSQ: + case OPCODE_SIN: + inst = track_used_srcreg(s, inst, 0, 0x1); + break; + case OPCODE_DP3: + inst = track_used_srcreg(s, inst, 0, 0x7); + inst = track_used_srcreg(s, inst, 1, 0x7); + break; + case OPCODE_DP4: + inst = track_used_srcreg(s, inst, 0, 0xf); + inst = track_used_srcreg(s, inst, 1, 0xf); + break; + case OPCODE_KIL: + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXP: + inst = track_used_srcreg(s, inst, 0, 0xf); + break; + case OPCODE_DST: + inst = track_used_srcreg(s, inst, 0, 0x6); + inst = track_used_srcreg(s, inst, 1, 0xa); + break; + case OPCODE_EXP: + case OPCODE_LOG: + case OPCODE_POW: + inst = track_used_srcreg(s, inst, 0, 0x3); + break; + case OPCODE_LIT: + inst = track_used_srcreg(s, inst, 0, 0xb); + break; + default: + _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode); + return; + } +} + +static void calculateInputsOutputs(struct gl_program *p) +{ + struct prog_instruction *inst; + GLuint InputsRead, OutputsWritten; + + inst = p->Instructions; + InputsRead = 0; + OutputsWritten = 0; + while (inst->Opcode != OPCODE_END) + { + int i, num_src_regs; + + num_src_regs = _mesa_num_inst_src_regs(inst->Opcode); + for (i = 0; i < num_src_regs; ++i) { + if (inst->SrcReg[i].File == PROGRAM_INPUT) + InputsRead |= 1 << inst->SrcReg[i].Index; + } + + if (inst->DstReg.File == PROGRAM_OUTPUT) + OutputsWritten |= 1 << inst->DstReg.Index; + + ++inst; + } + + p->InputsRead = InputsRead; + p->OutputsWritten = OutputsWritten; +} + +void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr) +{ + struct nqssadce_state s; + + _mesa_bzero(&s, sizeof(s)); + s.Ctx = ctx; + s.Program = p; + s.Descr = descr; + s.Descr->Init(&s); + s.IP = p->NumInstructions; + + while(s.IP > 0) { + s.IP--; + process_instruction(&s); + } + + calculateInputsOutputs(p); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h new file mode 100644 index 0000000000..8626f21c25 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_NQSSADCE_H_ +#define __RADEON_PROGRAM_NQSSADCE_H_ + +#include "radeon_program.h" + + +struct register_state { + /** + * Bitmask indicating which components of the register are sourced + * by later instructions. + */ + GLuint Sourced : 4; +}; + +/** + * Maintain state such as which registers are used, which registers are + * read from, etc. + */ +struct nqssadce_state { + GLcontext *Ctx; + struct gl_program *Program; + struct radeon_nqssadce_descr *Descr; + + /** + * All instructions after this instruction pointer have been dealt with. + */ + int IP; + + /** + * Which registers are read by subsequent instructions? + */ + struct register_state Temps[MAX_PROGRAM_TEMPS]; + struct register_state Outputs[VERT_RESULT_MAX]; + struct register_state Address; +}; + + +/** + * This structure contains a description of the hardware in-so-far as + * it is required for the NqSSA-DCE pass. + */ +struct radeon_nqssadce_descr { + /** + * Fill in which outputs + */ + void (*Init)(struct nqssadce_state *); + + /** + * Check whether the given swizzle, absolute and negate combination + * can be implemented natively by the hardware for this opcode. + */ + GLboolean (*IsNativeSwizzle)(GLuint opcode, struct prog_src_register reg); + + /** + * Emit (at the current IP) the instruction MOV dst, src; + * The transformation will work recursively on the emitted instruction(s). + */ + void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); + + void *Data; +}; + +void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr); +struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg); + +#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c new file mode 100644 index 0000000000..da5e7aefce --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c @@ -0,0 +1,128 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program.h" + +#include "shader/prog_print.h" + + +/** + * Transform the given clause in the following way: + * 1. Replace it with an empty clause + * 2. For every instruction in the original clause, try the given + * transformations in order. + * 3. If one of the transformations returns GL_TRUE, assume that it + * has emitted the appropriate instruction(s) into the new clause; + * otherwise, copy the instruction verbatim. + * + * \note The transformation is currently not recursive; in other words, + * instructions emitted by transformations are not transformed. + * + * \note The transform is called 'local' because it can only look at + * one instruction at a time. + */ +void radeonLocalTransform( + GLcontext *Ctx, + struct gl_program *program, + int num_transformations, + struct radeon_program_transformation* transformations) +{ + struct radeon_transform_context ctx; + int ip; + + ctx.Ctx = Ctx; + ctx.Program = program; + ctx.OldInstructions = program->Instructions; + ctx.OldNumInstructions = program->NumInstructions; + + program->Instructions = 0; + program->NumInstructions = 0; + + for(ip = 0; ip < ctx.OldNumInstructions; ++ip) { + struct prog_instruction *instr = ctx.OldInstructions + ip; + int i; + + for(i = 0; i < num_transformations; ++i) { + struct radeon_program_transformation* t = transformations + i; + + if (t->function(&ctx, instr, t->userData)) + break; + } + + if (i >= num_transformations) { + struct prog_instruction* dest = radeonAppendInstructions(program, 1); + _mesa_copy_instructions(dest, instr, 1); + } + } + + _mesa_free_instructions(ctx.OldInstructions, ctx.OldNumInstructions); +} + + +static void scan_instructions(GLboolean* used, const struct prog_instruction* insts, GLuint count) +{ + GLuint i; + for (i = 0; i < count; i++) { + const struct prog_instruction *inst = insts + i; + const GLuint n = _mesa_num_inst_src_regs(inst->Opcode); + GLuint k; + + for (k = 0; k < n; k++) { + if (inst->SrcReg[k].File == PROGRAM_TEMPORARY) + used[inst->SrcReg[k].Index] = GL_TRUE; + } + } +} + +GLint radeonFindFreeTemporary(struct radeon_transform_context *t) +{ + GLboolean used[MAX_PROGRAM_TEMPS]; + GLuint i; + + _mesa_memset(used, 0, sizeof(used)); + scan_instructions(used, t->Program->Instructions, t->Program->NumInstructions); + scan_instructions(used, t->OldInstructions, t->OldNumInstructions); + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + if (!used[i]) + return i; + } + + return -1; +} + + +/** + * Append the given number of instructions to the program and return a + * pointer to the first new instruction. + */ +struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count) +{ + int oldnum = program->NumInstructions; + _mesa_insert_instructions(program, oldnum, count); + return program->Instructions + oldnum; +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h new file mode 100644 index 0000000000..88474d43a2 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -0,0 +1,131 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_H_ +#define __RADEON_PROGRAM_H_ + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" +#include "shader/program.h" +#include "shader/prog_instruction.h" + + +enum { + CLAUSE_MIXED = 0, + CLAUSE_ALU, + CLAUSE_TEX +}; + +enum { + PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */ +}; + +enum { + OPCODE_REPL_ALPHA = MAX_OPCODE /**< used in paired instructions */ +}; + +#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO) +#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE) + +static inline GLuint get_swz(GLuint swz, GLuint idx) +{ + if (idx & 0x4) + return idx; + return GET_SWZ(swz, idx); +} + +static inline GLuint combine_swizzles4(GLuint src, GLuint swz_x, GLuint swz_y, GLuint swz_z, GLuint swz_w) +{ + GLuint ret = 0; + + ret |= get_swz(src, swz_x); + ret |= get_swz(src, swz_y) << 3; + ret |= get_swz(src, swz_z) << 6; + ret |= get_swz(src, swz_w) << 9; + + return ret; +} + +static inline GLuint combine_swizzles(GLuint src, GLuint swz) +{ + GLuint ret = 0; + + ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_X)); + ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Y)) << 3; + ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Z)) << 6; + ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_W)) << 9; + + return ret; +} + + +/** + * Transformation context that is passed to local transformations. + * + * Care must be taken with some operations during transformation, + * e.g. finding new temporary registers must use @ref radeonFindFreeTemporary + */ +struct radeon_transform_context { + GLcontext *Ctx; + struct gl_program *Program; + struct prog_instruction *OldInstructions; + GLuint OldNumInstructions; +}; + +/** + * A transformation that can be passed to \ref radeonLocalTransform. + * + * The function will be called once for each instruction. + * It has to either emit the appropriate transformed code for the instruction + * and return GL_TRUE, or return GL_FALSE if it doesn't understand the + * instruction. + * + * The function gets passed the userData as last parameter. + */ +struct radeon_program_transformation { + GLboolean (*function)( + struct radeon_transform_context*, + struct prog_instruction*, + void*); + void *userData; +}; + +void radeonLocalTransform( + GLcontext* ctx, + struct gl_program *program, + int num_transformations, + struct radeon_program_transformation* transformations); + +/** + * Find a usable free temporary register during program transformation + */ +GLint radeonFindFreeTemporary(struct radeon_transform_context *ctx); + +struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count); + +#endif diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c new file mode 100644 index 0000000000..8283723bad --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -0,0 +1,635 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * + * Shareable transformations that transform "special" ALU instructions + * into ALU instructions that are supported by hardware. + * + */ + +#include "radeon_program_alu.h" + +#include "shader/prog_parameter.h" + + +static struct prog_instruction *emit1(struct gl_program* p, + gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, + struct prog_src_register SrcReg) +{ + struct prog_instruction *fpi = radeonAppendInstructions(p, 1); + + fpi->Opcode = Opcode; + fpi->SaturateMode = Saturate; + fpi->DstReg = DstReg; + fpi->SrcReg[0] = SrcReg; + return fpi; +} + +static struct prog_instruction *emit2(struct gl_program* p, + gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, + struct prog_src_register SrcReg0, struct prog_src_register SrcReg1) +{ + struct prog_instruction *fpi = radeonAppendInstructions(p, 1); + + fpi->Opcode = Opcode; + fpi->SaturateMode = Saturate; + fpi->DstReg = DstReg; + fpi->SrcReg[0] = SrcReg0; + fpi->SrcReg[1] = SrcReg1; + return fpi; +} + +static struct prog_instruction *emit3(struct gl_program* p, + gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, + struct prog_src_register SrcReg0, struct prog_src_register SrcReg1, + struct prog_src_register SrcReg2) +{ + struct prog_instruction *fpi = radeonAppendInstructions(p, 1); + + fpi->Opcode = Opcode; + fpi->SaturateMode = Saturate; + fpi->DstReg = DstReg; + fpi->SrcReg[0] = SrcReg0; + fpi->SrcReg[1] = SrcReg1; + fpi->SrcReg[2] = SrcReg2; + return fpi; +} + +static struct prog_dst_register dstreg(int file, int index) +{ + struct prog_dst_register dst; + dst.File = file; + dst.Index = index; + dst.WriteMask = WRITEMASK_XYZW; + dst.CondMask = COND_TR; + dst.CondSwizzle = SWIZZLE_NOOP; + dst.CondSrc = 0; + dst.pad = 0; + return dst; +} + +static struct prog_dst_register dstregtmpmask(int index, int mask) +{ + struct prog_dst_register dst; + dst.File = PROGRAM_TEMPORARY; + dst.Index = index; + dst.WriteMask = mask; + dst.CondMask = COND_TR; + dst.CondSwizzle = SWIZZLE_NOOP; + dst.CondSrc = 0; + dst.pad = 0; + return dst; +} + +static const struct prog_src_register builtin_zero = { + .File = PROGRAM_BUILTIN, + .Index = 0, + .Swizzle = SWIZZLE_0000 +}; +static const struct prog_src_register builtin_one = { + .File = PROGRAM_BUILTIN, + .Index = 0, + .Swizzle = SWIZZLE_1111 +}; +static const struct prog_src_register srcreg_undefined = { + .File = PROGRAM_UNDEFINED, + .Index = 0, + .Swizzle = SWIZZLE_NOOP +}; + +static struct prog_src_register srcreg(int file, int index) +{ + struct prog_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + return src; +} + +static struct prog_src_register srcregswz(int file, int index, int swz) +{ + struct prog_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + src.Swizzle = swz; + return src; +} + +static struct prog_src_register absolute(struct prog_src_register reg) +{ + struct prog_src_register newreg = reg; + newreg.Abs = 1; + newreg.Negate = NEGATE_NONE; + return newreg; +} + +static struct prog_src_register negate(struct prog_src_register reg) +{ + struct prog_src_register newreg = reg; + newreg.Negate = newreg.Negate ^ NEGATE_XYZW; + return newreg; +} + +static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x, GLuint y, GLuint z, GLuint w) +{ + struct prog_src_register swizzled = reg; + swizzled.Swizzle = MAKE_SWIZZLE4( + x >= 4 ? x : GET_SWZ(reg.Swizzle, x), + y >= 4 ? y : GET_SWZ(reg.Swizzle, y), + z >= 4 ? z : GET_SWZ(reg.Swizzle, z), + w >= 4 ? w : GET_SWZ(reg.Swizzle, w)); + return swizzled; +} + +static struct prog_src_register scalar(struct prog_src_register reg) +{ + return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); +} + +static void transform_ABS(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + struct prog_src_register src = inst->SrcReg[0]; + src.Abs = 1; + src.Negate = NEGATE_NONE; + emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src); +} + +static void transform_DPH(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + struct prog_src_register src0 = inst->SrcReg[0]; + src0.Negate &= ~NEGATE_W; + src0.Swizzle &= ~(7 << (3 * 3)); + src0.Swizzle |= SWIZZLE_ONE << (3 * 3); + emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, inst->SrcReg[1]); +} + +/** + * [1, src0.y*src1.y, src0.z, src1.w] + * So basically MUL with lotsa swizzling. + */ +static void transform_DST(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + emit2(t->Program, OPCODE_MUL, inst->SaturateMode, inst->DstReg, + swizzle(inst->SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE), + swizzle(inst->SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W)); +} + +static void transform_FLR(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + emit1(t->Program, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]); + emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, + inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg))); +} + +/** + * Definition of LIT (from ARB_fragment_program): + * + * tmp = VectorLoad(op0); + * if (tmp.x < 0) tmp.x = 0; + * if (tmp.y < 0) tmp.y = 0; + * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); + * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; + * result.x = 1.0; + * result.y = tmp.x; + * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; + * result.w = 1.0; + * + * The longest path of computation is the one leading to result.z, + * consisting of 5 operations. This implementation of LIT takes + * 5 slots, if the subsequent optimization passes are clever enough + * to pair instructions correctly. + */ +static void transform_LIT(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + static const GLfloat LitConst[4] = { -127.999999 }; + + GLuint constant; + GLuint constant_swizzle; + GLuint temp; + int needTemporary = 0; + struct prog_src_register srctemp; + + constant = _mesa_add_unnamed_constant(t->Program->Parameters, LitConst, 1, &constant_swizzle); + + if (inst->DstReg.WriteMask != WRITEMASK_XYZW) { + needTemporary = 1; + } else if (inst->DstReg.File != PROGRAM_TEMPORARY) { + // LIT is typically followed by DP3/DP4, so there's no point + // in creating special code for this case + needTemporary = 1; + } + + if (needTemporary) { + temp = radeonFindFreeTemporary(t); + } else { + temp = inst->DstReg.Index; + } + srctemp = srcreg(PROGRAM_TEMPORARY, temp); + + // tmp.x = max(0.0, Src.x); + // tmp.y = max(0.0, Src.y); + // tmp.w = clamp(Src.z, -128+eps, 128-eps); + emit2(t->Program, OPCODE_MAX, 0, + dstregtmpmask(temp, WRITEMASK_XYW), + inst->SrcReg[0], + swizzle(srcreg(PROGRAM_CONSTANT, constant), + SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3)); + emit2(t->Program, OPCODE_MIN, 0, + dstregtmpmask(temp, WRITEMASK_Z), + swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle))); + + // tmp.w = Pow(tmp.y, tmp.w) + emit1(t->Program, OPCODE_LG2, 0, + dstregtmpmask(temp, WRITEMASK_W), + swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); + emit2(t->Program, OPCODE_MUL, 0, + dstregtmpmask(temp, WRITEMASK_W), + swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)); + emit1(t->Program, OPCODE_EX2, 0, + dstregtmpmask(temp, WRITEMASK_W), + swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); + + // tmp.z = (tmp.x > 0) ? tmp.w : 0.0 + emit3(t->Program, OPCODE_CMP, inst->SaturateMode, + dstregtmpmask(temp, WRITEMASK_Z), + negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), + swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + builtin_zero); + + // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 + emit1(t->Program, OPCODE_MOV, inst->SaturateMode, + dstregtmpmask(temp, WRITEMASK_XYW), + swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE)); + + if (needTemporary) + emit1(t->Program, OPCODE_MOV, 0, inst->DstReg, srctemp); +} + +static void transform_LRP(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + + emit2(t->Program, OPCODE_ADD, 0, + dstreg(PROGRAM_TEMPORARY, tempreg), + inst->SrcReg[1], negate(inst->SrcReg[2])); + emit3(t->Program, OPCODE_MAD, inst->SaturateMode, + inst->DstReg, + inst->SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[2]); +} + +static void transform_POW(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg); + struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg); + tempdst.WriteMask = WRITEMASK_W; + tempsrc.Swizzle = SWIZZLE_WWWW; + + emit1(t->Program, OPCODE_LG2, 0, tempdst, scalar(inst->SrcReg[0])); + emit2(t->Program, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->SrcReg[1])); + emit1(t->Program, OPCODE_EX2, inst->SaturateMode, inst->DstReg, tempsrc); +} + +static void transform_RSQ(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + emit1(t->Program, OPCODE_RSQ, inst->SaturateMode, inst->DstReg, absolute(inst->SrcReg[0])); +} + +static void transform_SGE(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + + emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); + emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg, + srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one); +} + +static void transform_SLT(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + + emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); + emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg, + srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero); +} + +static void transform_SUB(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1])); +} + +static void transform_SWZ(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, inst->SrcReg[0]); +} + +static void transform_XPD(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + + emit2(t->Program, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg), + swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), + swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W)); + emit3(t->Program, OPCODE_MAD, inst->SaturateMode, inst->DstReg, + swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W), + swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), + negate(srcreg(PROGRAM_TEMPORARY, tempreg))); +} + + +/** + * Can be used as a transformation for @ref radeonClauseLocalTransform, + * no userData necessary. + * + * Eliminates the following ALU instructions: + * ABS, DPH, DST, FLR, LIT, LRP, POW, SGE, SLT, SUB, SWZ, XPD + * using: + * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP + * + * Transforms RSQ to Radeon's native RSQ by explicitly setting + * absolute value. + * + * @note should be applicable to R300 and R500 fragment programs. + */ +GLboolean radeonTransformALU(struct radeon_transform_context* t, + struct prog_instruction* inst, + void* unused) +{ + switch(inst->Opcode) { + case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE; + case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE; + case OPCODE_DST: transform_DST(t, inst); return GL_TRUE; + case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE; + case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE; + case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE; + case OPCODE_POW: transform_POW(t, inst); return GL_TRUE; + case OPCODE_RSQ: transform_RSQ(t, inst); return GL_TRUE; + case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE; + case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE; + case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE; + case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE; + case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE; + default: + return GL_FALSE; + } +} + + +static void sincos_constants(struct radeon_transform_context* t, GLuint *constants) +{ + static const GLfloat SinCosConsts[2][4] = { + { + 1.273239545, // 4/PI + -0.405284735, // -4/(PI*PI) + 3.141592654, // PI + 0.2225 // weight + }, + { + 0.75, + 0.5, + 0.159154943, // 1/(2*PI) + 6.283185307 // 2*PI + } + }; + int i; + + for(i = 0; i < 2; ++i) { + GLuint swz; + constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz); + ASSERT(swz == SWIZZLE_NOOP); + } +} + +/** + * Approximate sin(x), where x is clamped to (-pi/2, pi/2). + * + * MUL tmp.xy, src, { 4/PI, -4/(PI^2) } + * MAD tmp.x, tmp.y, |src|, tmp.x + * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x + * MAD dest, tmp.y, weight, tmp.x + */ +static void sin_approx(struct radeon_transform_context* t, + struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants) +{ + GLuint tempreg = radeonFindFreeTemporary(t); + + emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + srcreg(PROGRAM_CONSTANT, constants[0])); + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), + negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X))); + emit3(t->Program, OPCODE_MAD, 0, dst, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); +} + +/** + * Translate the trigonometric functions COS, SIN, and SCS + * using only the basic instructions + * MOV, ADD, MUL, MAD, FRC + */ +GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t, + struct prog_instruction* inst, + void* unused) +{ + if (inst->Opcode != OPCODE_COS && + inst->Opcode != OPCODE_SIN && + inst->Opcode != OPCODE_SCS) + return GL_FALSE; + + GLuint constants[2]; + GLuint tempreg = radeonFindFreeTemporary(t); + + sincos_constants(t, constants); + + if (inst->Opcode == OPCODE_COS) { + // MAD tmp.x, src, 1/(2*PI), 0.75 + // FRC tmp.x, tmp.x + // MAD tmp.z, tmp.x, 2*PI, -PI + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); + emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); + + sin_approx(t, inst->DstReg, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + constants); + } else if (inst->Opcode == OPCODE_SIN) { + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); + emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); + + sin_approx(t, inst->DstReg, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + constants); + } else { + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W)); + emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + srcreg(PROGRAM_TEMPORARY, tempreg)); + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + srcreg(PROGRAM_TEMPORARY, tempreg), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); + + struct prog_dst_register dst = inst->DstReg; + + dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X; + sin_approx(t, dst, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + constants); + + dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y; + sin_approx(t, dst, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + constants); + } + + return GL_TRUE; +} + + +/** + * Transform the trigonometric functions COS, SIN, and SCS + * to include pre-scaling by 1/(2*PI) and taking the fractional + * part, so that the input to COS and SIN is always in the range [0,1). + * SCS is replaced by one COS and one SIN instruction. + * + * @warning This transformation implicitly changes the semantics of SIN and COS! + */ +GLboolean radeonTransformTrigScale(struct radeon_transform_context* t, + struct prog_instruction* inst, + void* unused) +{ + if (inst->Opcode != OPCODE_COS && + inst->Opcode != OPCODE_SIN && + inst->Opcode != OPCODE_SCS) + return GL_FALSE; + + static const GLfloat RCP_2PI[] = { 0.15915494309189535 }; + GLuint temp; + GLuint constant; + GLuint constant_swizzle; + + temp = radeonFindFreeTemporary(t); + constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle); + + emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W), + swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)); + emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W), + srcreg(PROGRAM_TEMPORARY, temp)); + + if (inst->Opcode == OPCODE_COS) { + emit1(t->Program, OPCODE_COS, inst->SaturateMode, inst->DstReg, + srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } else if (inst->Opcode == OPCODE_SIN) { + emit1(t->Program, OPCODE_SIN, inst->SaturateMode, + inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } else if (inst->Opcode == OPCODE_SCS) { + struct prog_dst_register moddst = inst->DstReg; + + if (inst->DstReg.WriteMask & WRITEMASK_X) { + moddst.WriteMask = WRITEMASK_X; + emit1(t->Program, OPCODE_COS, inst->SaturateMode, moddst, + srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } + if (inst->DstReg.WriteMask & WRITEMASK_Y) { + moddst.WriteMask = WRITEMASK_Y; + emit1(t->Program, OPCODE_SIN, inst->SaturateMode, moddst, + srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } + } + + return GL_TRUE; +} + +/** + * Rewrite DDX/DDY instructions to properly work with r5xx shaders. + * The r5xx MDH/MDV instruction provides per-quad partial derivatives. + * It takes the form A*B+C. A and C are set by setting src0. B should be -1. + * + * @warning This explicitly changes the form of DDX and DDY! + */ + +GLboolean radeonTransformDeriv(struct radeon_transform_context* t, + struct prog_instruction* inst, + void* unused) +{ + if (inst->Opcode != OPCODE_DDX && inst->Opcode != OPCODE_DDY) + return GL_FALSE; + + struct prog_src_register B = inst->SrcReg[1]; + + B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, + SWIZZLE_ONE, SWIZZLE_ONE); + B.Negate = NEGATE_XYZW; + + emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg, + inst->SrcReg[0], B); + + return GL_TRUE; +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h new file mode 100644 index 0000000000..b45958115c --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_ALU_H_ +#define __RADEON_PROGRAM_ALU_H_ + +#include "radeon_program.h" + +GLboolean radeonTransformALU( + struct radeon_transform_context *t, + struct prog_instruction*, + void*); + +GLboolean radeonTransformTrigSimple( + struct radeon_transform_context *t, + struct prog_instruction*, + void*); + +GLboolean radeonTransformTrigScale( + struct radeon_transform_context *t, + struct prog_instruction*, + void*); + +GLboolean radeonTransformDeriv( + struct radeon_transform_context *t, + struct prog_instruction*, + void*); + +#endif /* __RADEON_PROGRAM_ALU_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c new file mode 100644 index 0000000000..d6fb474cf2 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c @@ -0,0 +1,1004 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * + * Perform temporary register allocation and attempt to pair off instructions + * in RGB and Alpha pairs. Also attempts to optimize the TEX instruction + * vs. ALU instruction scheduling. + */ + +#include "radeon_program_pair.h" + +#include "radeon_common.h" + +#include "shader/prog_print.h" + +#define error(fmt, args...) do { \ + _mesa_problem(s->Ctx, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + s->Error = GL_TRUE; \ +} while(0) + +struct pair_state_instruction { + GLuint IsTex:1; /**< Is a texture instruction */ + GLuint NeedRGB:1; /**< Needs the RGB ALU */ + GLuint NeedAlpha:1; /**< Needs the Alpha ALU */ + GLuint IsTranscendent:1; /**< Is a special transcendent instruction */ + + /** + * Number of (read and write) dependencies that must be resolved before + * this instruction can be scheduled. + */ + GLuint NumDependencies:5; + + /** + * Next instruction in the linked list of ready instructions. + */ + struct pair_state_instruction *NextReady; + + /** + * Values that this instruction writes + */ + struct reg_value *Values[4]; +}; + + +/** + * Used to keep track of which instructions read a value. + */ +struct reg_value_reader { + GLuint IP; /**< IP of the instruction that performs this access */ + struct reg_value_reader *Next; +}; + +/** + * Used to keep track which values are stored in each component of a + * PROGRAM_TEMPORARY. + */ +struct reg_value { + GLuint IP; /**< IP of the instruction that writes this value */ + struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */ + + /** + * Unordered linked list of instructions that read from this value. + */ + struct reg_value_reader *Readers; + + /** + * Number of readers of this value. This is calculated during @ref scan_instructions + * and continually decremented during code emission. + * When this count reaches zero, the instruction that writes the @ref Next value + * can be scheduled. + */ + GLuint NumReaders; +}; + +/** + * Used to translate a PROGRAM_INPUT or PROGRAM_TEMPORARY Mesa register + * to the proper hardware temporary. + */ +struct pair_register_translation { + GLuint Allocated:1; + GLuint HwIndex:8; + GLuint RefCount:23; /**< # of times this occurs in an unscheduled instruction SrcReg or DstReg */ + + /** + * Notes the value that is currently contained in each component + * (only used for PROGRAM_TEMPORARY registers). + */ + struct reg_value *Value[4]; +}; + +struct pair_state { + GLcontext *Ctx; + struct gl_program *Program; + const struct radeon_pair_handler *Handler; + GLboolean Error; + GLboolean Debug; + GLboolean Verbose; + void *UserData; + + /** + * Translate Mesa registers to hardware registers + */ + struct pair_register_translation Inputs[FRAG_ATTRIB_MAX]; + struct pair_register_translation Temps[MAX_PROGRAM_TEMPS]; + + /** + * Derived information about program instructions. + */ + struct pair_state_instruction *Instructions; + + struct { + GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */ + } HwTemps[128]; + + /** + * Linked list of instructions that can be scheduled right now, + * based on which ALU/TEX resources they require. + */ + struct pair_state_instruction *ReadyFullALU; + struct pair_state_instruction *ReadyRGB; + struct pair_state_instruction *ReadyAlpha; + struct pair_state_instruction *ReadyTEX; + + /** + * Pool of @ref reg_value structures for fast allocation. + */ + struct reg_value *ValuePool; + GLuint ValuePoolUsed; + struct reg_value_reader *ReaderPool; + GLuint ReaderPoolUsed; +}; + + +static struct pair_register_translation *get_register(struct pair_state *s, GLuint file, GLuint index) +{ + switch(file) { + case PROGRAM_TEMPORARY: return &s->Temps[index]; + case PROGRAM_INPUT: return &s->Inputs[index]; + default: return 0; + } +} + +static void alloc_hw_reg(struct pair_state *s, GLuint file, GLuint index, GLuint hwindex) +{ + struct pair_register_translation *t = get_register(s, file, index); + ASSERT(!s->HwTemps[hwindex].RefCount); + ASSERT(!t->Allocated); + s->HwTemps[hwindex].RefCount = t->RefCount; + t->Allocated = 1; + t->HwIndex = hwindex; +} + +static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index) +{ + GLuint hwindex; + + struct pair_register_translation *t = get_register(s, file, index); + if (!t) { + _mesa_problem(s->Ctx, "get_hw_reg: %i[%i]\n", file, index); + return 0; + } + + if (t->Allocated) + return t->HwIndex; + + for(hwindex = 0; hwindex < s->Handler->MaxHwTemps; ++hwindex) + if (!s->HwTemps[hwindex].RefCount) + break; + + if (hwindex >= s->Handler->MaxHwTemps) { + error("Ran out of hardware temporaries"); + return 0; + } + + alloc_hw_reg(s, file, index, hwindex); + return hwindex; +} + + +static void deref_hw_reg(struct pair_state *s, GLuint hwindex) +{ + if (!s->HwTemps[hwindex].RefCount) { + error("Hwindex %i refcount error", hwindex); + return; + } + + s->HwTemps[hwindex].RefCount--; +} + +static void add_pairinst_to_list(struct pair_state_instruction **list, struct pair_state_instruction *pairinst) +{ + pairinst->NextReady = *list; + *list = pairinst; +} + +/** + * The instruction at the given IP has become ready. Link it into the ready + * instructions. + */ +static void instruction_ready(struct pair_state *s, int ip) +{ + struct pair_state_instruction *pairinst = s->Instructions + ip; + + if (s->Verbose) + _mesa_printf("instruction_ready(%i)\n", ip); + + if (pairinst->IsTex) + add_pairinst_to_list(&s->ReadyTEX, pairinst); + else if (!pairinst->NeedAlpha) + add_pairinst_to_list(&s->ReadyRGB, pairinst); + else if (!pairinst->NeedRGB) + add_pairinst_to_list(&s->ReadyAlpha, pairinst); + else + add_pairinst_to_list(&s->ReadyFullALU, pairinst); +} + + +/** + * Finally rewrite ADD, MOV, MUL as the appropriate native instruction + * and reverse the order of arguments for CMP. + */ +static void final_rewrite(struct pair_state *s, struct prog_instruction *inst) +{ + struct prog_src_register tmp; + + switch(inst->Opcode) { + case OPCODE_ADD: + inst->SrcReg[2] = inst->SrcReg[1]; + inst->SrcReg[1].File = PROGRAM_BUILTIN; + inst->SrcReg[1].Swizzle = SWIZZLE_1111; + inst->SrcReg[1].Negate = NEGATE_NONE; + inst->Opcode = OPCODE_MAD; + break; + case OPCODE_CMP: + tmp = inst->SrcReg[2]; + inst->SrcReg[2] = inst->SrcReg[0]; + inst->SrcReg[0] = tmp; + break; + case OPCODE_MOV: + /* AMD say we should use CMP. + * However, when we transform + * KIL -r0; + * into + * CMP tmp, -r0, -r0, 0; + * KIL tmp; + * we get incorrect behaviour on R500 when r0 == 0.0. + * It appears that the R500 KIL hardware treats -0.0 as less + * than zero. + */ + inst->SrcReg[1].File = PROGRAM_BUILTIN; + inst->SrcReg[1].Swizzle = SWIZZLE_1111; + inst->SrcReg[2].File = PROGRAM_BUILTIN; + inst->SrcReg[2].Swizzle = SWIZZLE_0000; + inst->Opcode = OPCODE_MAD; + break; + case OPCODE_MUL: + inst->SrcReg[2].File = PROGRAM_BUILTIN; + inst->SrcReg[2].Swizzle = SWIZZLE_0000; + inst->Opcode = OPCODE_MAD; + break; + default: + /* nothing to do */ + break; + } +} + + +/** + * Classify an instruction according to which ALUs etc. it needs + */ +static void classify_instruction(struct pair_state *s, + struct prog_instruction *inst, struct pair_state_instruction *pairinst) +{ + pairinst->NeedRGB = (inst->DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0; + pairinst->NeedAlpha = (inst->DstReg.WriteMask & WRITEMASK_W) ? 1 : 0; + + switch(inst->Opcode) { + case OPCODE_ADD: + case OPCODE_CMP: + case OPCODE_DDX: + case OPCODE_DDY: + case OPCODE_FRC: + case OPCODE_MAD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MOV: + case OPCODE_MUL: + break; + case OPCODE_COS: + case OPCODE_EX2: + case OPCODE_LG2: + case OPCODE_RCP: + case OPCODE_RSQ: + case OPCODE_SIN: + pairinst->IsTranscendent = 1; + pairinst->NeedAlpha = 1; + break; + case OPCODE_DP4: + pairinst->NeedAlpha = 1; + /* fall through */ + case OPCODE_DP3: + pairinst->NeedRGB = 1; + break; + case OPCODE_KIL: + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXP: + case OPCODE_END: + pairinst->IsTex = 1; + break; + default: + error("Unknown opcode %d\n", inst->Opcode); + break; + } +} + + +/** + * Count which (input, temporary) register is read and written how often, + * and scan the instruction stream to find dependencies. + */ +static void scan_instructions(struct pair_state *s) +{ + struct prog_instruction *inst; + struct pair_state_instruction *pairinst; + GLuint ip; + + for(inst = s->Program->Instructions, pairinst = s->Instructions, ip = 0; + inst->Opcode != OPCODE_END; + ++inst, ++pairinst, ++ip) { + final_rewrite(s, inst); + classify_instruction(s, inst, pairinst); + + int nsrc = _mesa_num_inst_src_regs(inst->Opcode); + int j; + for(j = 0; j < nsrc; j++) { + struct pair_register_translation *t = + get_register(s, inst->SrcReg[j].File, inst->SrcReg[j].Index); + if (!t) + continue; + + t->RefCount++; + + if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { + int i; + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, i); + if (swz >= 4) + continue; /* constant or NIL swizzle */ + if (!t->Value[swz]) + continue; /* this is an undefined read */ + + /* Do not add a dependency if this instruction + * also rewrites the value. The code below adds + * a dependency for the DstReg, which is a superset + * of the SrcReg dependency. */ + if (inst->DstReg.File == PROGRAM_TEMPORARY && + inst->DstReg.Index == inst->SrcReg[j].Index && + GET_BIT(inst->DstReg.WriteMask, swz)) + continue; + + struct reg_value_reader* r = &s->ReaderPool[s->ReaderPoolUsed++]; + pairinst->NumDependencies++; + t->Value[swz]->NumReaders++; + r->IP = ip; + r->Next = t->Value[swz]->Readers; + t->Value[swz]->Readers = r; + } + } + } + + int ndst = _mesa_num_inst_dst_regs(inst->Opcode); + if (ndst) { + struct pair_register_translation *t = + get_register(s, inst->DstReg.File, inst->DstReg.Index); + if (t) { + t->RefCount++; + + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + int j; + for(j = 0; j < 4; ++j) { + if (!GET_BIT(inst->DstReg.WriteMask, j)) + continue; + + struct reg_value* v = &s->ValuePool[s->ValuePoolUsed++]; + v->IP = ip; + if (t->Value[j]) { + pairinst->NumDependencies++; + t->Value[j]->Next = v; + } + t->Value[j] = v; + pairinst->Values[j] = v; + } + } + } + } + + if (s->Verbose) + _mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies); + + if (!pairinst->NumDependencies) + instruction_ready(s, ip); + } + + /* Clear the PROGRAM_TEMPORARY state */ + int i, j; + for(i = 0; i < MAX_PROGRAM_TEMPS; ++i) { + for(j = 0; j < 4; ++j) + s->Temps[i].Value[j] = 0; + } +} + + +/** + * Reserve hardware temporary registers for the program inputs. + * + * @note This allocation is performed explicitly, because the order of inputs + * is determined by the RS hardware. + */ +static void allocate_input_registers(struct pair_state *s) +{ + GLuint InputsRead = s->Program->InputsRead; + int i; + GLuint hwindex = 0; + + /* Primary colour */ + if (InputsRead & FRAG_BIT_COL0) + alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL0, hwindex++); + InputsRead &= ~FRAG_BIT_COL0; + + /* Secondary color */ + if (InputsRead & FRAG_BIT_COL1) + alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL1, hwindex++); + InputsRead &= ~FRAG_BIT_COL1; + + /* Texcoords */ + for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) + alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++); + } + InputsRead &= ~FRAG_BITS_TEX_ANY; + + /* Fogcoords treated as a texcoord */ + if (InputsRead & FRAG_BIT_FOGC) + alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_FOGC, hwindex++); + InputsRead &= ~FRAG_BIT_FOGC; + + /* fragment position treated as a texcoord */ + if (InputsRead & FRAG_BIT_WPOS) + alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++); + InputsRead &= ~FRAG_BIT_WPOS; + + /* Anything else */ + if (InputsRead) + error("Don't know how to handle inputs 0x%x\n", InputsRead); +} + + +static void decrement_dependencies(struct pair_state *s, int ip) +{ + struct pair_state_instruction *pairinst = s->Instructions + ip; + ASSERT(pairinst->NumDependencies > 0); + if (!--pairinst->NumDependencies) + instruction_ready(s, ip); +} + +/** + * Update the dependency tracking state based on what the instruction + * at the given IP does. + */ +static void commit_instruction(struct pair_state *s, int ip) +{ + struct prog_instruction *inst = s->Program->Instructions + ip; + struct pair_state_instruction *pairinst = s->Instructions + ip; + + if (s->Verbose) + _mesa_printf("commit_instruction(%i)\n", ip); + + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + struct pair_register_translation *t = &s->Temps[inst->DstReg.Index]; + deref_hw_reg(s, t->HwIndex); + + int i; + for(i = 0; i < 4; ++i) { + if (!GET_BIT(inst->DstReg.WriteMask, i)) + continue; + + t->Value[i] = pairinst->Values[i]; + if (t->Value[i]->NumReaders) { + struct reg_value_reader *r; + for(r = pairinst->Values[i]->Readers; r; r = r->Next) + decrement_dependencies(s, r->IP); + } else if (t->Value[i]->Next) { + /* This happens when the only reader writes + * the register at the same time */ + decrement_dependencies(s, t->Value[i]->Next->IP); + } + } + } + + int nsrc = _mesa_num_inst_src_regs(inst->Opcode); + int i; + for(i = 0; i < nsrc; i++) { + struct pair_register_translation *t = get_register(s, inst->SrcReg[i].File, inst->SrcReg[i].Index); + if (!t) + continue; + + deref_hw_reg(s, get_hw_reg(s, inst->SrcReg[i].File, inst->SrcReg[i].Index)); + + if (inst->SrcReg[i].File != PROGRAM_TEMPORARY) + continue; + + int j; + for(j = 0; j < 4; ++j) { + GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); + if (swz >= 4) + continue; + if (!t->Value[swz]) + continue; + + /* Do not free a dependency if this instruction + * also rewrites the value. See scan_instructions. */ + if (inst->DstReg.File == PROGRAM_TEMPORARY && + inst->DstReg.Index == inst->SrcReg[i].Index && + GET_BIT(inst->DstReg.WriteMask, swz)) + continue; + + if (!--t->Value[swz]->NumReaders) { + if (t->Value[swz]->Next) + decrement_dependencies(s, t->Value[swz]->Next->IP); + } + } + } +} + + +/** + * Emit all ready texture instructions in a single block. + * + * Emit as a single block to (hopefully) sample many textures in parallel, + * and to avoid hardware indirections on R300. + * + * In R500, we don't really know when the result of a texture instruction + * arrives. So allocate all destinations first, to make sure they do not + * arrive early and overwrite a texture coordinate we're going to use later + * in the block. + */ +static void emit_all_tex(struct pair_state *s) +{ + struct pair_state_instruction *readytex; + struct pair_state_instruction *pairinst; + + ASSERT(s->ReadyTEX); + + // Don't let the ready list change under us! + readytex = s->ReadyTEX; + s->ReadyTEX = 0; + + // Allocate destination hardware registers in one block to avoid conflicts. + for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { + int ip = pairinst - s->Instructions; + struct prog_instruction *inst = s->Program->Instructions + ip; + if (inst->Opcode != OPCODE_KIL) + get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); + } + + if (s->Debug) + _mesa_printf(" BEGIN_TEX\n"); + + if (s->Handler->BeginTexBlock) + s->Error = s->Error || !s->Handler->BeginTexBlock(s->UserData); + + for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { + int ip = pairinst - s->Instructions; + struct prog_instruction *inst = s->Program->Instructions + ip; + commit_instruction(s, ip); + + if (inst->Opcode != OPCODE_KIL) + inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); + inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index); + + if (s->Debug) { + _mesa_printf(" "); + _mesa_print_instruction(inst); + fflush(stdout); + } + s->Error = s->Error || !s->Handler->EmitTex(s->UserData, inst); + } + + if (s->Debug) + _mesa_printf(" END_TEX\n"); +} + + +static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instruction *pair, + struct prog_src_register src, GLboolean rgb, GLboolean alpha) +{ + int candidate = -1; + int candidate_quality = -1; + int i; + + if (!rgb && !alpha) + return 0; + + GLuint constant; + GLuint index; + + if (src.File == PROGRAM_TEMPORARY || src.File == PROGRAM_INPUT) { + constant = 0; + index = get_hw_reg(s, src.File, src.Index); + } else { + constant = 1; + s->Error |= !s->Handler->EmitConst(s->UserData, src.File, src.Index, &index); + } + + for(i = 0; i < 3; ++i) { + int q = 0; + if (rgb) { + if (pair->RGB.Src[i].Used) { + if (pair->RGB.Src[i].Constant != constant || + pair->RGB.Src[i].Index != index) + continue; + q++; + } + } + if (alpha) { + if (pair->Alpha.Src[i].Used) { + if (pair->Alpha.Src[i].Constant != constant || + pair->Alpha.Src[i].Index != index) + continue; + q++; + } + } + if (q > candidate_quality) { + candidate_quality = q; + candidate = i; + } + } + + if (candidate >= 0) { + if (rgb) { + pair->RGB.Src[candidate].Used = 1; + pair->RGB.Src[candidate].Constant = constant; + pair->RGB.Src[candidate].Index = index; + } + if (alpha) { + pair->Alpha.Src[candidate].Used = 1; + pair->Alpha.Src[candidate].Constant = constant; + pair->Alpha.Src[candidate].Index = index; + } + } + + return candidate; +} + +/** + * Fill the given ALU instruction's opcodes and source operands into the given pair, + * if possible. + */ +static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip) +{ + struct pair_state_instruction *pairinst = s->Instructions + ip; + struct prog_instruction *inst = s->Program->Instructions + ip; + + ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP); + ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP); + + if (pairinst->NeedRGB) { + if (pairinst->IsTranscendent) + pair->RGB.Opcode = OPCODE_REPL_ALPHA; + else + pair->RGB.Opcode = inst->Opcode; + if (inst->SaturateMode == SATURATE_ZERO_ONE) + pair->RGB.Saturate = 1; + } + if (pairinst->NeedAlpha) { + pair->Alpha.Opcode = inst->Opcode; + if (inst->SaturateMode == SATURATE_ZERO_ONE) + pair->Alpha.Saturate = 1; + } + + int nargs = _mesa_num_inst_src_regs(inst->Opcode); + int i; + + /* Special case for DDX/DDY (MDH/MDV). */ + if (inst->Opcode == OPCODE_DDX || inst->Opcode == OPCODE_DDY) { + if (pair->RGB.Src[0].Used || pair->Alpha.Src[0].Used) + return GL_FALSE; + else + nargs++; + } + + for(i = 0; i < nargs; ++i) { + int source; + if (pairinst->NeedRGB && !pairinst->IsTranscendent) { + GLboolean srcrgb = GL_FALSE; + GLboolean srcalpha = GL_FALSE; + int j; + for(j = 0; j < 3; ++j) { + GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); + if (swz < 3) + srcrgb = GL_TRUE; + else if (swz < 4) + srcalpha = GL_TRUE; + } + source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); + if (source < 0) + return GL_FALSE; + pair->RGB.Arg[i].Source = source; + pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; + pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (NEGATE_X | NEGATE_Y | NEGATE_Z)); + } + if (pairinst->NeedAlpha) { + GLboolean srcrgb = GL_FALSE; + GLboolean srcalpha = GL_FALSE; + GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3); + if (swz < 3) + srcrgb = GL_TRUE; + else if (swz < 4) + srcalpha = GL_TRUE; + source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); + if (source < 0) + return GL_FALSE; + pair->Alpha.Arg[i].Source = source; + pair->Alpha.Arg[i].Swizzle = swz; + pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & NEGATE_W); + } + } + + return GL_TRUE; +} + + +/** + * Fill in the destination register information. + * + * This is split from filling in source registers because we want + * to avoid allocating hardware temporaries for destinations until + * we are absolutely certain that we're going to emit a certain + * instruction pairing. + */ +static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip) +{ + struct pair_state_instruction *pairinst = s->Instructions + ip; + struct prog_instruction *inst = s->Program->Instructions + ip; + + if (inst->DstReg.File == PROGRAM_OUTPUT) { + if (inst->DstReg.Index == FRAG_RESULT_COLOR) { + pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; + pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } else if (inst->DstReg.Index == FRAG_RESULT_DEPTH) { + pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } + } else { + GLuint hwindex = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); + if (pairinst->NeedRGB) { + pair->RGB.DestIndex = hwindex; + pair->RGB.WriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; + } + if (pairinst->NeedAlpha) { + pair->Alpha.DestIndex = hwindex; + pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } + } +} + + +/** + * Find a good ALU instruction or pair of ALU instruction and emit it. + * + * Prefer emitting full ALU instructions, so that when we reach a point + * where no full ALU instruction can be emitted, we have more candidates + * for RGB/Alpha pairing. + */ +static void emit_alu(struct pair_state *s) +{ + struct radeon_pair_instruction pair; + + if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) { + int ip; + if (s->ReadyFullALU) { + ip = s->ReadyFullALU - s->Instructions; + s->ReadyFullALU = s->ReadyFullALU->NextReady; + } else if (s->ReadyRGB) { + ip = s->ReadyRGB - s->Instructions; + s->ReadyRGB = s->ReadyRGB->NextReady; + } else { + ip = s->ReadyAlpha - s->Instructions; + s->ReadyAlpha = s->ReadyAlpha->NextReady; + } + + _mesa_bzero(&pair, sizeof(pair)); + fill_instruction_into_pair(s, &pair, ip); + fill_dest_into_pair(s, &pair, ip); + commit_instruction(s, ip); + } else { + struct pair_state_instruction **prgb; + struct pair_state_instruction **palpha; + + /* Some pairings might fail because they require too + * many source slots; try all possible pairings if necessary */ + for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { + for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { + int rgbip = *prgb - s->Instructions; + int alphaip = *palpha - s->Instructions; + _mesa_bzero(&pair, sizeof(pair)); + fill_instruction_into_pair(s, &pair, rgbip); + if (!fill_instruction_into_pair(s, &pair, alphaip)) + continue; + *prgb = (*prgb)->NextReady; + *palpha = (*palpha)->NextReady; + fill_dest_into_pair(s, &pair, rgbip); + fill_dest_into_pair(s, &pair, alphaip); + commit_instruction(s, rgbip); + commit_instruction(s, alphaip); + goto success; + } + } + + /* No success in pairing; just take the first RGB instruction */ + int ip = s->ReadyRGB - s->Instructions; + s->ReadyRGB = s->ReadyRGB->NextReady; + _mesa_bzero(&pair, sizeof(pair)); + fill_instruction_into_pair(s, &pair, ip); + fill_dest_into_pair(s, &pair, ip); + commit_instruction(s, ip); + success: ; + } + + if (s->Debug) + radeonPrintPairInstruction(&pair); + + s->Error = s->Error || !s->Handler->EmitPaired(s->UserData, &pair); +} + + +GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, + const struct radeon_pair_handler* handler, void *userdata) +{ + struct pair_state s; + + _mesa_bzero(&s, sizeof(s)); + s.Ctx = ctx; + s.Program = _mesa_clone_program(ctx, program); + s.Handler = handler; + s.UserData = userdata; + s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE; + s.Verbose = GL_FALSE && s.Debug; + + s.Instructions = (struct pair_state_instruction*)_mesa_calloc( + sizeof(struct pair_state_instruction)*s.Program->NumInstructions); + s.ValuePool = (struct reg_value*)_mesa_calloc(sizeof(struct reg_value)*s.Program->NumInstructions*4); + s.ReaderPool = (struct reg_value_reader*)_mesa_calloc( + sizeof(struct reg_value_reader)*s.Program->NumInstructions*12); + + if (s.Debug) + _mesa_printf("Emit paired program\n"); + + scan_instructions(&s); + allocate_input_registers(&s); + + while(!s.Error && + (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { + if (s.ReadyTEX) + emit_all_tex(&s); + + while(s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha) + emit_alu(&s); + } + + if (s.Debug) + _mesa_printf(" END\n"); + + _mesa_free(s.Instructions); + _mesa_free(s.ValuePool); + _mesa_free(s.ReaderPool); + + _mesa_reference_program(ctx, &s.Program, NULL); + + return !s.Error; +} + + +static void print_pair_src(int i, struct radeon_pair_instruction_source* src) +{ + _mesa_printf(" Src%i = %s[%i]", i, src->Constant ? "CNST" : "TEMP", src->Index); +} + +static const char* opcode_string(GLuint opcode) +{ + if (opcode == OPCODE_REPL_ALPHA) + return "SOP"; + else + return _mesa_opcode_string(opcode); +} + +static int num_pairinst_args(GLuint opcode) +{ + if (opcode == OPCODE_REPL_ALPHA) + return 0; + else + return _mesa_num_inst_src_regs(opcode); +} + +static char swizzle_char(GLuint swz) +{ + switch(swz) { + case SWIZZLE_X: return 'x'; + case SWIZZLE_Y: return 'y'; + case SWIZZLE_Z: return 'z'; + case SWIZZLE_W: return 'w'; + case SWIZZLE_ZERO: return '0'; + case SWIZZLE_ONE: return '1'; + case SWIZZLE_NIL: return '_'; + default: return '?'; + } +} + +void radeonPrintPairInstruction(struct radeon_pair_instruction *inst) +{ + int nargs; + int i; + + _mesa_printf(" RGB: "); + for(i = 0; i < 3; ++i) { + if (inst->RGB.Src[i].Used) + print_pair_src(i, inst->RGB.Src + i); + } + _mesa_printf("\n"); + _mesa_printf(" Alpha:"); + for(i = 0; i < 3; ++i) { + if (inst->Alpha.Src[i].Used) + print_pair_src(i, inst->Alpha.Src + i); + } + _mesa_printf("\n"); + + _mesa_printf(" %s%s", opcode_string(inst->RGB.Opcode), inst->RGB.Saturate ? "_SAT" : ""); + if (inst->RGB.WriteMask) + _mesa_printf(" TEMP[%i].%s%s%s", inst->RGB.DestIndex, + (inst->RGB.WriteMask & 1) ? "x" : "", + (inst->RGB.WriteMask & 2) ? "y" : "", + (inst->RGB.WriteMask & 4) ? "z" : ""); + if (inst->RGB.OutputWriteMask) + _mesa_printf(" COLOR.%s%s%s", + (inst->RGB.OutputWriteMask & 1) ? "x" : "", + (inst->RGB.OutputWriteMask & 2) ? "y" : "", + (inst->RGB.OutputWriteMask & 4) ? "z" : ""); + nargs = num_pairinst_args(inst->RGB.Opcode); + for(i = 0; i < nargs; ++i) { + const char* abs = inst->RGB.Arg[i].Abs ? "|" : ""; + const char* neg = inst->RGB.Arg[i].Negate ? "-" : ""; + _mesa_printf(", %s%sSrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[i].Source, + swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 0)), + swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 1)), + swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 2)), + abs); + } + _mesa_printf("\n"); + + _mesa_printf(" %s%s", opcode_string(inst->Alpha.Opcode), inst->Alpha.Saturate ? "_SAT" : ""); + if (inst->Alpha.WriteMask) + _mesa_printf(" TEMP[%i].w", inst->Alpha.DestIndex); + if (inst->Alpha.OutputWriteMask) + _mesa_printf(" COLOR.w"); + if (inst->Alpha.DepthWriteMask) + _mesa_printf(" DEPTH.w"); + nargs = num_pairinst_args(inst->Alpha.Opcode); + for(i = 0; i < nargs; ++i) { + const char* abs = inst->Alpha.Arg[i].Abs ? "|" : ""; + const char* neg = inst->Alpha.Arg[i].Negate ? "-" : ""; + _mesa_printf(", %s%sSrc%i.%c%s", neg, abs, inst->Alpha.Arg[i].Source, + swizzle_char(inst->Alpha.Arg[i].Swizzle), abs); + } + _mesa_printf("\n"); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h new file mode 100644 index 0000000000..4624a24629 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_PAIR_H_ +#define __RADEON_PROGRAM_PAIR_H_ + +#include "radeon_program.h" + + +/** + * Represents a paired instruction, as found in R300 and R500 + * fragment programs. + */ +struct radeon_pair_instruction_source { + GLuint Index:8; + GLuint Constant:1; + GLuint Used:1; +}; + +struct radeon_pair_instruction_rgb { + GLuint Opcode:8; + GLuint DestIndex:8; + GLuint WriteMask:3; + GLuint OutputWriteMask:3; + GLuint Saturate:1; + + struct radeon_pair_instruction_source Src[3]; + + struct { + GLuint Source:2; + GLuint Swizzle:9; + GLuint Abs:1; + GLuint Negate:1; + } Arg[3]; +}; + +struct radeon_pair_instruction_alpha { + GLuint Opcode:8; + GLuint DestIndex:8; + GLuint WriteMask:1; + GLuint OutputWriteMask:1; + GLuint DepthWriteMask:1; + GLuint Saturate:1; + + struct radeon_pair_instruction_source Src[3]; + + struct { + GLuint Source:2; + GLuint Swizzle:3; + GLuint Abs:1; + GLuint Negate:1; + } Arg[3]; +}; + +struct radeon_pair_instruction { + struct radeon_pair_instruction_rgb RGB; + struct radeon_pair_instruction_alpha Alpha; +}; + + +/** + * + */ +struct radeon_pair_handler { + /** + * Fill in the proper hardware index for the given constant register. + * + * @return GL_FALSE on error. + */ + GLboolean (*EmitConst)(void*, GLuint file, GLuint index, GLuint *hwindex); + + /** + * Write a paired instruction to the hardware. + * + * @return GL_FALSE on error. + */ + GLboolean (*EmitPaired)(void*, struct radeon_pair_instruction*); + + /** + * Write a texture instruction to the hardware. + * Register indices have already been rewritten to the allocated + * hardware register numbers. + * + * @return GL_FALSE on error. + */ + GLboolean (*EmitTex)(void*, struct prog_instruction*); + + /** + * Called before a block of contiguous, independent texture + * instructions is emitted. + */ + GLboolean (*BeginTexBlock)(void*); + + GLuint MaxHwTemps; +}; + +GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, + const struct radeon_pair_handler*, void *userdata); + +void radeonPrintPairInstruction(struct radeon_pair_instruction *inst); + +#endif /* __RADEON_PROGRAM_PAIR_H_ */ diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index f7af7d4e57..b692f8bf80 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/mtypes.h" #include "shader/prog_instruction.h" +#include "compiler/radeon_compiler.h" struct r300_context; typedef struct r300_context r300ContextRec; @@ -396,9 +397,6 @@ struct r300_hw_state { /* Can be tested with colormat currently. */ #define VSF_MAX_FRAGMENT_TEMPS (14) -#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) -#define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1) - #define COLOR_IS_RGBA #define TAG(x) r300##x #include "tnl_dd/t_dd_vertex.h" @@ -413,7 +411,7 @@ struct r300_vertex_program { GLuint FogAttr; GLuint WPosAttr; } key; - + struct r300_vertex_shader_hw_code { int length; union { @@ -441,110 +439,6 @@ struct r300_vertex_program_cont { struct r300_vertex_program *progs; }; -#define R300_PFS_MAX_ALU_INST 64 -#define R300_PFS_MAX_TEX_INST 32 -#define R300_PFS_MAX_TEX_INDIRECT 4 -#define R300_PFS_NUM_TEMP_REGS 32 -#define R300_PFS_NUM_CONST_REGS 32 - -#define R500_PFS_MAX_INST 512 -#define R500_PFS_NUM_TEMP_REGS 128 -#define R500_PFS_NUM_CONST_REGS 256 - -struct r300_pfs_compile_state; -struct r500_pfs_compile_state; - -/** - * Stores state that influences the compilation of a fragment program. - */ -struct r300_fragment_program_external_state { - struct { - /** - * If the sampler is used as a shadow sampler, - * this field is: - * 0 - GL_LUMINANCE - * 1 - GL_INTENSITY - * 2 - GL_ALPHA - * depending on the depth texture mode. - */ - GLuint depth_texture_mode : 2; - - /** - * If the sampler is used as a shadow sampler, - * this field is (texture_compare_func - GL_NEVER). - * [e.g. if compare function is GL_LEQUAL, this field is 3] - * - * Otherwise, this field is 0. - */ - GLuint texture_compare_func : 3; - } unit[16]; -}; - - -struct r300_fragment_program_node { - int tex_offset; /**< first tex instruction */ - int tex_end; /**< last tex instruction, relative to tex_offset */ - int alu_offset; /**< first ALU instruction */ - int alu_end; /**< last ALU instruction, relative to alu_offset */ - int flags; -}; - -/** - * Stores an R300 fragment program in its compiled-to-hardware form. - */ -struct r300_fragment_program_code { - struct { - int length; /**< total # of texture instructions used */ - GLuint inst[R300_PFS_MAX_TEX_INST]; - } tex; - - struct { - int length; /**< total # of ALU instructions used */ - struct { - GLuint inst0; - GLuint inst1; - GLuint inst2; - GLuint inst3; - } inst[R300_PFS_MAX_ALU_INST]; - } alu; - - struct r300_fragment_program_node node[4]; - int cur_node; - int first_node_has_tex; - - /** - * Remember which program register a given hardware constant - * belongs to. - */ - struct prog_src_register constant[R300_PFS_NUM_CONST_REGS]; - int const_nr; - - int max_temp_idx; -}; - - -struct r500_fragment_program_code { - struct { - GLuint inst0; - GLuint inst1; - GLuint inst2; - GLuint inst3; - GLuint inst4; - GLuint inst5; - } inst[R500_PFS_MAX_INST]; - - int inst_offset; - int inst_end; - - /** - * Remember which program register a given hardware constant - * belongs to. - */ - struct prog_src_register constant[R500_PFS_NUM_CONST_REGS]; - int const_nr; - - int max_temp_idx; -}; /** * Store everything about a fragment program that is needed @@ -555,22 +449,10 @@ struct r300_fragment_program { GLboolean translated; GLboolean error; - - struct r300_fragment_program_external_state state; - union rX00_fragment_program_code { - struct r300_fragment_program_code r300; - struct r500_fragment_program_code r500; - } code; - - GLboolean writes_depth; - GLuint optimization; - struct r300_fragment_program *next; + struct r300_fragment_program_external_state state; - /* attribute that we are sending the WPOS in */ - gl_frag_attrib wpos_attr; - /* attribute that we are sending the fog coordinate in */ - gl_frag_attrib fog_attr; + struct rX00_fragment_program_code code; }; struct r300_fragment_program_cont { @@ -583,12 +465,6 @@ struct r300_fragment_program_cont { struct r300_fragment_program *progs; }; -struct r300_fragment_program_compiler { - r300ContextPtr r300; - struct r300_fragment_program *fp; - union rX00_fragment_program_code *code; - struct gl_program *program; -}; #define R300_MAX_AOS_ARRAYS 16 @@ -610,8 +486,6 @@ struct r300_swtcl_info { struct r300_vtable { void (* SetupRSUnit)(GLcontext *ctx); void (* SetupFragmentShaderTextures)(GLcontext *ctx, int *tmu_mappings); - GLboolean (* BuildFragmentProgramHwCode)(struct r300_fragment_program_compiler *compiler); - void (* FragmentProgramDump)(union rX00_fragment_program_code *code); void (* SetupPixelShader)(GLcontext *ctx); }; @@ -669,7 +543,7 @@ struct r300_context { uint32_t s3tc_force_disabled:1; uint32_t stencil_two_side_disabled:1; } options; - + struct r300_swtcl_info swtcl; struct r300_vertex_buffer vbuf; struct r300_index_buffer ind_buf; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c deleted file mode 100644 index 55c1cfe631..0000000000 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ /dev/null @@ -1,451 +0,0 @@ -/* - * Copyright (C) 2005 Ben Skeggs. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "r300_fragprog.h" - -#include "shader/prog_parameter.h" - -#include "r300_context.h" -#include "r300_fragprog_swizzle.h" - -static void reset_srcreg(struct prog_src_register* reg) -{ - _mesa_bzero(reg, sizeof(*reg)); - reg->Swizzle = SWIZZLE_NOOP; -} - -static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu) -{ - gl_state_index fail_value_tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0 - }; - struct prog_src_register reg = { 0, }; - - fail_value_tokens[2] = tmu; - reg.File = PROGRAM_STATE_VAR; - reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens); - reg.Swizzle = SWIZZLE_WWWW; - return reg; -} - -/** - * Transform TEX, TXP, TXB, and KIL instructions in the following way: - * - premultiply texture coordinates for RECT - * - extract operand swizzles - * - introduce a temporary register when write masks are needed - * - * \todo If/when r5xx uses the radeon_program architecture, this can probably - * be reused. - */ -GLboolean r300_transform_TEX( - struct radeon_transform_context *t, - struct prog_instruction* orig_inst, void* data) -{ - struct r300_fragment_program_compiler *compiler = - (struct r300_fragment_program_compiler*)data; - struct prog_instruction inst = *orig_inst; - struct prog_instruction* tgt; - GLboolean destredirect = GL_FALSE; - - if (inst.Opcode != OPCODE_TEX && - inst.Opcode != OPCODE_TXB && - inst.Opcode != OPCODE_TXP && - inst.Opcode != OPCODE_KIL) - return GL_FALSE; - - if (inst.Opcode != OPCODE_KIL && - t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; - - if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { - tgt = radeonAppendInstructions(t->Program, 1); - - tgt->Opcode = OPCODE_MOV; - tgt->DstReg = inst.DstReg; - if (comparefunc == GL_ALWAYS) { - tgt->SrcReg[0].File = PROGRAM_BUILTIN; - tgt->SrcReg[0].Swizzle = SWIZZLE_1111; - } else { - tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit); - } - return GL_TRUE; - } - - inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = radeonFindFreeTemporary(t); - inst.DstReg.WriteMask = WRITEMASK_XYZW; - } - - - /* Hardware uses [0..1]x[0..1] range for rectangle textures - * instead of [0..Width]x[0..Height]. - * Add a scaling instruction. - */ - if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) { - gl_state_index tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, - 0 - }; - - int tempreg = radeonFindFreeTemporary(t); - int factor_index; - - tokens[2] = inst.TexSrcUnit; - factor_index = _mesa_add_state_reference(t->Program->Parameters, tokens); - - tgt = radeonAppendInstructions(t->Program, 1); - - tgt->Opcode = OPCODE_MUL; - tgt->DstReg.File = PROGRAM_TEMPORARY; - tgt->DstReg.Index = tempreg; - tgt->SrcReg[0] = inst.SrcReg[0]; - tgt->SrcReg[1].File = PROGRAM_STATE_VAR; - tgt->SrcReg[1].Index = factor_index; - - reset_srcreg(&inst.SrcReg[0]); - inst.SrcReg[0].File = PROGRAM_TEMPORARY; - inst.SrcReg[0].Index = tempreg; - } - - if (inst.Opcode != OPCODE_KIL) { - if (inst.DstReg.File != PROGRAM_TEMPORARY || - inst.DstReg.WriteMask != WRITEMASK_XYZW) { - int tempreg = radeonFindFreeTemporary(t); - - inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = tempreg; - inst.DstReg.WriteMask = WRITEMASK_XYZW; - destredirect = GL_TRUE; - } else if (inst.SaturateMode) { - destredirect = GL_TRUE; - } - } - - if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) { - int tmpreg = radeonFindFreeTemporary(t); - tgt = radeonAppendInstructions(t->Program, 1); - tgt->Opcode = OPCODE_MOV; - tgt->DstReg.File = PROGRAM_TEMPORARY; - tgt->DstReg.Index = tmpreg; - tgt->SrcReg[0] = inst.SrcReg[0]; - - reset_srcreg(&inst.SrcReg[0]); - inst.SrcReg[0].File = PROGRAM_TEMPORARY; - inst.SrcReg[0].Index = tmpreg; - } - - tgt = radeonAppendInstructions(t->Program, 1); - _mesa_copy_instructions(tgt, &inst, 1); - - if (inst.Opcode != OPCODE_KIL && - t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; - GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode; - int rcptemp = radeonFindFreeTemporary(t); - int pass, fail; - - tgt = radeonAppendInstructions(t->Program, 3); - - tgt[0].Opcode = OPCODE_RCP; - tgt[0].DstReg.File = PROGRAM_TEMPORARY; - tgt[0].DstReg.Index = rcptemp; - tgt[0].DstReg.WriteMask = WRITEMASK_W; - tgt[0].SrcReg[0] = inst.SrcReg[0]; - tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW; - - tgt[1].Opcode = OPCODE_MAD; - tgt[1].DstReg = inst.DstReg; - tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask; - tgt[1].SrcReg[0] = inst.SrcReg[0]; - tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ; - tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY; - tgt[1].SrcReg[1].Index = rcptemp; - tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW; - tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY; - tgt[1].SrcReg[2].Index = inst.DstReg.Index; - if (depthmode == 0) /* GL_LUMINANCE */ - tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); - else if (depthmode == 2) /* GL_ALPHA */ - tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW; - - /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: - * r < tex <=> -tex+r < 0 - * r >= tex <=> not (-tex+r < 0 */ - if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) - tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW; - else - tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW; - - tgt[2].Opcode = OPCODE_CMP; - tgt[2].DstReg = orig_inst->DstReg; - tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY; - tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index; - - if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { - pass = 1; - fail = 2; - } else { - pass = 2; - fail = 1; - } - - tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN; - tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111; - tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit); - } else if (destredirect) { - tgt = radeonAppendInstructions(t->Program, 1); - - tgt->Opcode = OPCODE_MOV; - tgt->DstReg = orig_inst->DstReg; - tgt->SaturateMode = inst.SaturateMode; - tgt->SrcReg[0].File = PROGRAM_TEMPORARY; - tgt->SrcReg[0].Index = inst.DstReg.Index; - } - - return GL_TRUE; -} - -/* just some random things... */ -void r300FragmentProgramDump(union rX00_fragment_program_code *c) -{ - struct r300_fragment_program_code *code = &c->r300; - int n, i, j; - static int pc = 0; - - fprintf(stderr, "pc=%d*************************************\n", pc++); - - fprintf(stderr, "Hardware program\n"); - fprintf(stderr, "----------------\n"); - - for (n = 0; n < (code->cur_node + 1); n++) { - fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " - "alu_end: %d, tex_end: %d, flags: %08x\n", n, - code->node[n].alu_offset, - code->node[n].tex_offset, - code->node[n].alu_end, code->node[n].tex_end, - code->node[n].flags); - - if (n > 0 || code->first_node_has_tex) { - fprintf(stderr, " TEX:\n"); - for (i = code->node[n].tex_offset; - i <= code->node[n].tex_offset + code->node[n].tex_end; - ++i) { - const char *instr; - - switch ((code->tex. - inst[i] >> R300_TEX_INST_SHIFT) & - 15) { - case R300_TEX_OP_LD: - instr = "TEX"; - break; - case R300_TEX_OP_KIL: - instr = "KIL"; - break; - case R300_TEX_OP_TXP: - instr = "TXP"; - break; - case R300_TEX_OP_TXB: - instr = "TXB"; - break; - default: - instr = "UNKNOWN"; - } - - fprintf(stderr, - " %s t%i, %c%i, texture[%i] (%08x)\n", - instr, - (code->tex. - inst[i] >> R300_DST_ADDR_SHIFT) & 31, - 't', - (code->tex. - inst[i] >> R300_SRC_ADDR_SHIFT) & 31, - (code->tex. - inst[i] & R300_TEX_ID_MASK) >> - R300_TEX_ID_SHIFT, - code->tex.inst[i]); - } - } - - for (i = code->node[n].alu_offset; - i <= code->node[n].alu_offset + code->node[n].alu_end; ++i) { - char srcc[3][10], dstc[20]; - char srca[3][10], dsta[20]; - char argc[3][20]; - char arga[3][20]; - char flags[5], tmp[10]; - - for (j = 0; j < 3; ++j) { - int regc = code->alu.inst[i].inst1 >> (j * 6); - int rega = code->alu.inst[i].inst3 >> (j * 6); - - sprintf(srcc[j], "%c%i", - (regc & 32) ? 'c' : 't', regc & 31); - sprintf(srca[j], "%c%i", - (rega & 32) ? 'c' : 't', rega & 31); - } - - dstc[0] = 0; - sprintf(flags, "%s%s%s", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_REG_X) ? "x" : "", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_REG_Y) ? "y" : "", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_REG_Z) ? "z" : ""); - if (flags[0] != 0) { - sprintf(dstc, "t%i.%s ", - (code->alu.inst[i]. - inst1 >> R300_ALU_DSTC_SHIFT) & 31, - flags); - } - sprintf(flags, "%s%s%s", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); - if (flags[0] != 0) { - sprintf(tmp, "o%i.%s", - (code->alu.inst[i]. - inst1 >> R300_ALU_DSTC_SHIFT) & 31, - flags); - strcat(dstc, tmp); - } - - dsta[0] = 0; - if (code->alu.inst[i].inst3 & R300_ALU_DSTA_REG) { - sprintf(dsta, "t%i.w ", - (code->alu.inst[i]. - inst3 >> R300_ALU_DSTA_SHIFT) & 31); - } - if (code->alu.inst[i].inst3 & R300_ALU_DSTA_OUTPUT) { - sprintf(tmp, "o%i.w ", - (code->alu.inst[i]. - inst3 >> R300_ALU_DSTA_SHIFT) & 31); - strcat(dsta, tmp); - } - if (code->alu.inst[i].inst3 & R300_ALU_DSTA_DEPTH) { - strcat(dsta, "Z"); - } - - fprintf(stderr, - "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" - " w: %3s %3s %3s -> %-20s (%08x)\n", i, - srcc[0], srcc[1], srcc[2], dstc, - code->alu.inst[i].inst1, srca[0], srca[1], - srca[2], dsta, code->alu.inst[i].inst3); - - for (j = 0; j < 3; ++j) { - int regc = code->alu.inst[i].inst0 >> (j * 7); - int rega = code->alu.inst[i].inst2 >> (j * 7); - int d; - char buf[20]; - - d = regc & 31; - if (d < 12) { - switch (d % 4) { - case R300_ALU_ARGC_SRC0C_XYZ: - sprintf(buf, "%s.xyz", - srcc[d / 4]); - break; - case R300_ALU_ARGC_SRC0C_XXX: - sprintf(buf, "%s.xxx", - srcc[d / 4]); - break; - case R300_ALU_ARGC_SRC0C_YYY: - sprintf(buf, "%s.yyy", - srcc[d / 4]); - break; - case R300_ALU_ARGC_SRC0C_ZZZ: - sprintf(buf, "%s.zzz", - srcc[d / 4]); - break; - } - } else if (d < 15) { - sprintf(buf, "%s.www", srca[d - 12]); - } else if (d == 20) { - sprintf(buf, "0.0"); - } else if (d == 21) { - sprintf(buf, "1.0"); - } else if (d == 22) { - sprintf(buf, "0.5"); - } else if (d >= 23 && d < 32) { - d -= 23; - switch (d / 3) { - case 0: - sprintf(buf, "%s.yzx", - srcc[d % 3]); - break; - case 1: - sprintf(buf, "%s.zxy", - srcc[d % 3]); - break; - case 2: - sprintf(buf, "%s.Wzy", - srcc[d % 3]); - break; - } - } else { - sprintf(buf, "%i", d); - } - - sprintf(argc[j], "%s%s%s%s", - (regc & 32) ? "-" : "", - (regc & 64) ? "|" : "", - buf, (regc & 64) ? "|" : ""); - - d = rega & 31; - if (d < 9) { - sprintf(buf, "%s.%c", srcc[d / 3], - 'x' + (char)(d % 3)); - } else if (d < 12) { - sprintf(buf, "%s.w", srca[d - 9]); - } else if (d == 16) { - sprintf(buf, "0.0"); - } else if (d == 17) { - sprintf(buf, "1.0"); - } else if (d == 18) { - sprintf(buf, "0.5"); - } else { - sprintf(buf, "%i", d); - } - - sprintf(arga[j], "%s%s%s%s", - (rega & 32) ? "-" : "", - (rega & 64) ? "|" : "", - buf, (rega & 64) ? "|" : ""); - } - - fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" - " w: %8s %8s %8s op: %08x\n", - argc[0], argc[1], argc[2], - code->alu.inst[i].inst0, arga[0], arga[1], - arga[2], code->alu.inst[i].inst2); - } - } -} diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h deleted file mode 100644 index 5ce6f33cee..0000000000 --- a/src/mesa/drivers/dri/r300/r300_fragprog.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (C) 2005 Ben Skeggs. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/* - * Authors: - * Ben Skeggs - * Jerome Glisse - */ -#ifndef __R300_FRAGPROG_H_ -#define __R300_FRAGPROG_H_ - -#include "shader/program.h" -#include "shader/prog_instruction.h" - -#include "r300_context.h" -#include "radeon_program.h" - -#define DRI_CONF_FP_OPTIMIZATION_SPEED 0 -#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1 - -#if 1 - -/** - * Fragment program helper macros - */ - -/* Produce unshifted source selectors */ -#define FP_TMP(idx) (idx) -#define FP_CONST(idx) ((idx) | (1 << 5)) - -/* Produce source/dest selector dword */ -#define FP_SELC_MASK_NO 0 -#define FP_SELC_MASK_X 1 -#define FP_SELC_MASK_Y 2 -#define FP_SELC_MASK_XY 3 -#define FP_SELC_MASK_Z 4 -#define FP_SELC_MASK_XZ 5 -#define FP_SELC_MASK_YZ 6 -#define FP_SELC_MASK_XYZ 7 - -#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \ - (((destidx) << R300_ALU_DSTC_SHIFT) | \ - (FP_SELC_MASK_##regmask << 23) | \ - (FP_SELC_MASK_##outmask << 26) | \ - ((src0) << R300_ALU_SRC0C_SHIFT) | \ - ((src1) << R300_ALU_SRC1C_SHIFT) | \ - ((src2) << R300_ALU_SRC2C_SHIFT)) - -#define FP_SELA_MASK_NO 0 -#define FP_SELA_MASK_W 1 - -#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \ - (((destidx) << R300_ALU_DSTA_SHIFT) | \ - (FP_SELA_MASK_##regmask << 23) | \ - (FP_SELA_MASK_##outmask << 24) | \ - ((src0) << R300_ALU_SRC0A_SHIFT) | \ - ((src1) << R300_ALU_SRC1A_SHIFT) | \ - ((src2) << R300_ALU_SRC2A_SHIFT)) - -/* Produce unshifted argument selectors */ -#define FP_ARGC(source) R300_ALU_ARGC_##source -#define FP_ARGA(source) R300_ALU_ARGA_##source -#define FP_ABS(arg) ((arg) | (1 << 6)) -#define FP_NEG(arg) ((arg) ^ (1 << 5)) - -/* Produce instruction dword */ -#define FP_INSTRC(opcode,arg0,arg1,arg2) \ - (R300_ALU_OUTC_##opcode | \ - ((arg0) << R300_ALU_ARG0C_SHIFT) | \ - ((arg1) << R300_ALU_ARG1C_SHIFT) | \ - ((arg2) << R300_ALU_ARG2C_SHIFT)) - -#define FP_INSTRA(opcode,arg0,arg1,arg2) \ - (R300_ALU_OUTA_##opcode | \ - ((arg0) << R300_ALU_ARG0A_SHIFT) | \ - ((arg1) << R300_ALU_ARG1A_SHIFT) | \ - ((arg2) << R300_ALU_ARG2A_SHIFT)) - -#endif - -extern GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); - -extern void r300FragmentProgramDump(union rX00_fragment_program_code *c); - -extern GLboolean r300_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data); - -#endif diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index f5c4c0f4a0..f28162a2b6 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -43,175 +43,13 @@ #include "shader/prog_print.h" #include "r300_state.h" -#include "r300_fragprog.h" -#include "r300_fragprog_swizzle.h" -#include "r500_fragprog.h" -#include "radeon_program.h" -#include "radeon_program_alu.h" +#include "compiler/radeon_program.h" +#include "compiler/radeon_program_alu.h" +#include "compiler/r300_fragprog_swizzle.h" +#include "compiler/r300_fragprog.h" +#include "compiler/r500_fragprog.h" -static void nqssadce_init(struct nqssadce_state* s) -{ - s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; - s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; -} - -/** - * Transform the program to support fragment.position. - * - * Introduce a small fragment at the start of the program that will be - * the only code that directly reads the FRAG_ATTRIB_WPOS input. - * All other code pieces that reference that input will be rewritten - * to read from a newly allocated temporary. - * - */ -static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) -{ - GLuint InputsRead = compiler->fp->Base->InputsRead; - - if (!(InputsRead & FRAG_BIT_WPOS)) { - compiler->fp->wpos_attr = FRAG_ATTRIB_MAX; - return; - } - - static gl_state_index tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 - }; - struct prog_instruction *fpi; - GLuint window_index; - int i = 0; - - for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i) - { - if (!(InputsRead & (1 << i))) { - InputsRead &= ~(1 << FRAG_ATTRIB_WPOS); - InputsRead |= 1 << i; - compiler->fp->Base->InputsRead = InputsRead; - compiler->fp->wpos_attr = i; - break; - } - } - - GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); - - _mesa_insert_instructions(compiler->program, 0, 3); - fpi = compiler->program->Instructions; - i = 0; - - /* perspective divide */ - fpi[i].Opcode = OPCODE_RCP; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_W; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = compiler->fp->wpos_attr; - fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; - i++; - - fpi[i].Opcode = OPCODE_MUL; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = compiler->fp->wpos_attr; - fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - - fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[1].Index = tempregi; - fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; - i++; - - /* viewport transformation */ - window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); - - fpi[i].Opcode = OPCODE_MAD; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[0].Index = tempregi; - fpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; - fpi[i].SrcReg[1].Index = window_index; - fpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; - fpi[i].SrcReg[2].Index = window_index; - fpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - i++; - - for (; i < compiler->program->NumInstructions; ++i) { - int reg; - for (reg = 0; reg < 3; reg++) { - if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && - fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { - fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[reg].Index = tempregi; - } - } - } -} - - -/** - * Rewrite fragment.fogcoord to use a texture coordinate slot. - * Note that fogcoord is forced into an X001 pattern, and this enforcement - * is done here. - * - * See also the counterpart rewriting for vertex programs. - */ -static void rewriteFog(struct r300_fragment_program_compiler *compiler) -{ - struct r300_fragment_program *fp = compiler->fp; - GLuint InputsRead; - int i; - - InputsRead = fp->Base->InputsRead; - - if (!(InputsRead & FRAG_BIT_FOGC)) { - fp->fog_attr = FRAG_ATTRIB_MAX; - return; - } - - for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i) - { - if (!(InputsRead & (1 << i))) { - InputsRead &= ~(1 << FRAG_ATTRIB_FOGC); - InputsRead |= 1 << i; - fp->Base->InputsRead = InputsRead; - fp->fog_attr = i; - break; - } - } - - { - struct prog_instruction *inst; - - inst = compiler->program->Instructions; - while (inst->Opcode != OPCODE_END) { - const int src_regs = _mesa_num_inst_src_regs(inst->Opcode); - for (i = 0; i < src_regs; ++i) { - if (inst->SrcReg[i].File == PROGRAM_INPUT && inst->SrcReg[i].Index == FRAG_ATTRIB_FOGC) { - inst->SrcReg[i].Index = fp->fog_attr; - inst->SrcReg[i].Swizzle = combine_swizzles( - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE), - inst->SrcReg[i].Swizzle); - } - } - ++inst; - } - } -} static GLuint build_dtm(GLuint depthmode) { @@ -251,121 +89,23 @@ static void build_state( } } -static void rewrite_depth_out(struct gl_program *prog) -{ - struct prog_instruction *inst; - - for (inst = prog->Instructions; inst->Opcode != OPCODE_END; ++inst) { - if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != FRAG_RESULT_DEPTH) - continue; - - if (inst->DstReg.WriteMask & WRITEMASK_Z) { - inst->DstReg.WriteMask = WRITEMASK_W; - } else { - inst->DstReg.WriteMask = 0; - continue; - } - - switch (inst->Opcode) { - case OPCODE_FRC: - case OPCODE_MOV: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - break; - case OPCODE_ADD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MUL: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); - break; - case OPCODE_CMP: - case OPCODE_MAD: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); - inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]); - break; - default: - // Scalar instructions needn't be reswizzled - break; - } - } -} void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *fp) { r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_fragment_program_compiler compiler; - compiler.r300 = r300; - compiler.fp = fp; + compiler.ctx = ctx; compiler.code = &fp->code; + compiler.state = fp->state; compiler.program = fp->Base; + compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE; + compiler.debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE; - if (RADEON_DEBUG & DEBUG_PIXEL) { - fflush(stdout); - _mesa_printf("Fragment Program: Initial program:\n"); - _mesa_print_program(compiler.program); - fflush(stdout); - } - - insert_WPOS_trailer(&compiler); - - rewriteFog(&compiler); - - rewrite_depth_out(compiler.program); - - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - struct radeon_program_transformation transformations[] = { - { &r500_transform_TEX, &compiler }, - { &radeonTransformALU, 0 }, - { &radeonTransformDeriv, 0 }, - { &radeonTransformTrigScale, 0 } - }; - radeonLocalTransform(ctx, compiler.program, 4, transformations); - } else { - struct radeon_program_transformation transformations[] = { - { &r300_transform_TEX, &compiler }, - { &radeonTransformALU, 0 }, - { &radeonTransformTrigSimple, 0 } - }; - radeonLocalTransform(ctx, compiler.program, 3, transformations); - } - - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Fragment Program: After native rewrite:\n"); - _mesa_print_program(compiler.program); - fflush(stdout); - } - - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &r500FPIsNativeSwizzle, - .BuildSwizzle = &r500FPBuildSwizzle - }; - radeonNqssaDce(ctx, compiler.program, &nqssadce); - } else { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &r300FPIsNativeSwizzle, - .BuildSwizzle = &r300FPBuildSwizzle - }; - radeonNqssaDce(ctx, compiler.program, &nqssadce); - } - - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Compiler: after NqSSA-DCE:\n"); - _mesa_print_program(compiler.program); - fflush(stdout); - } - - if (!r300->vtbl.BuildFragmentProgramHwCode(&compiler)) + if (!r3xx_compile_fragment_program(&compiler)) fp->error = GL_TRUE; fp->translated = GL_TRUE; - - if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) - r300->vtbl.FragmentProgramDump(&fp->code); } struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx) diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c deleted file mode 100644 index b75656e7ee..0000000000 --- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c +++ /dev/null @@ -1,344 +0,0 @@ -/* - * Copyright (C) 2005 Ben Skeggs. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * \file - * - * Emit the r300_fragment_program_code that can be understood by the hardware. - * Input is a pre-transformed radeon_program. - * - * \author Ben Skeggs - * - * \author Jerome Glisse - * - * \todo FogOption - */ - -#include "r300_fragprog.h" - -#include "radeon_program_pair.h" -#include "r300_fragprog_swizzle.h" -#include "r300_reg.h" - - -#define PROG_CODE \ - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ - struct r300_fragment_program_code *code = &c->code->r300 - -#define error(fmt, args...) do { \ - fprintf(stderr, "%s::%s(): " fmt "\n", \ - __FILE__, __FUNCTION__, ##args); \ - } while(0) - - -static GLboolean emit_const(void* data, GLuint file, GLuint index, GLuint *hwindex) -{ - PROG_CODE; - - for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) { - if (code->constant[*hwindex].File == file && - code->constant[*hwindex].Index == index) - break; - } - - if (*hwindex >= code->const_nr) { - if (*hwindex >= R300_PFS_NUM_CONST_REGS) { - error("Out of hw constants!\n"); - return GL_FALSE; - } - - code->const_nr++; - code->constant[*hwindex].File = file; - code->constant[*hwindex].Index = index; - } - - return GL_TRUE; -} - - -/** - * Mark a temporary register as used. - */ -static void use_temporary(struct r300_fragment_program_code *code, GLuint index) -{ - if (index > code->max_temp_idx) - code->max_temp_idx = index; -} - - -static GLuint translate_rgb_opcode(GLuint opcode) -{ - switch(opcode) { - case OPCODE_CMP: return R300_ALU_OUTC_CMP; - case OPCODE_DP3: return R300_ALU_OUTC_DP3; - case OPCODE_DP4: return R300_ALU_OUTC_DP4; - case OPCODE_FRC: return R300_ALU_OUTC_FRC; - default: - error("translate_rgb_opcode(%i): Unknown opcode", opcode); - /* fall through */ - case OPCODE_NOP: - /* fall through */ - case OPCODE_MAD: return R300_ALU_OUTC_MAD; - case OPCODE_MAX: return R300_ALU_OUTC_MAX; - case OPCODE_MIN: return R300_ALU_OUTC_MIN; - case OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; - } -} - -static GLuint translate_alpha_opcode(GLuint opcode) -{ - switch(opcode) { - case OPCODE_CMP: return R300_ALU_OUTA_CMP; - case OPCODE_DP3: return R300_ALU_OUTA_DP4; - case OPCODE_DP4: return R300_ALU_OUTA_DP4; - case OPCODE_EX2: return R300_ALU_OUTA_EX2; - case OPCODE_FRC: return R300_ALU_OUTA_FRC; - case OPCODE_LG2: return R300_ALU_OUTA_LG2; - default: - error("translate_rgb_opcode(%i): Unknown opcode", opcode); - /* fall through */ - case OPCODE_NOP: - /* fall through */ - case OPCODE_MAD: return R300_ALU_OUTA_MAD; - case OPCODE_MAX: return R300_ALU_OUTA_MAX; - case OPCODE_MIN: return R300_ALU_OUTA_MIN; - case OPCODE_RCP: return R300_ALU_OUTA_RCP; - case OPCODE_RSQ: return R300_ALU_OUTA_RSQ; - } -} - -/** - * Emit one paired ALU instruction. - */ -static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) -{ - PROG_CODE; - - if (code->alu.length >= R300_PFS_MAX_ALU_INST) { - error("Too many ALU instructions"); - return GL_FALSE; - } - - int ip = code->alu.length++; - int j; - code->node[code->cur_node].alu_end++; - - code->alu.inst[ip].inst0 = translate_rgb_opcode(inst->RGB.Opcode); - code->alu.inst[ip].inst2 = translate_alpha_opcode(inst->Alpha.Opcode); - - for(j = 0; j < 3; ++j) { - GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5); - if (!inst->RGB.Src[j].Constant) - use_temporary(code, inst->RGB.Src[j].Index); - code->alu.inst[ip].inst1 |= src << (6*j); - - src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5); - if (!inst->Alpha.Src[j].Constant) - use_temporary(code, inst->Alpha.Src[j].Index); - code->alu.inst[ip].inst3 |= src << (6*j); - - GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); - arg |= inst->RGB.Arg[j].Abs << 6; - arg |= inst->RGB.Arg[j].Negate << 5; - code->alu.inst[ip].inst0 |= arg << (7*j); - - arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle); - arg |= inst->Alpha.Arg[j].Abs << 6; - arg |= inst->Alpha.Arg[j].Negate << 5; - code->alu.inst[ip].inst2 |= arg << (7*j); - } - - if (inst->RGB.Saturate) - code->alu.inst[ip].inst0 |= R300_ALU_OUTC_CLAMP; - if (inst->Alpha.Saturate) - code->alu.inst[ip].inst2 |= R300_ALU_OUTA_CLAMP; - - if (inst->RGB.WriteMask) { - use_temporary(code, inst->RGB.DestIndex); - code->alu.inst[ip].inst1 |= - (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) | - (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); - } - if (inst->RGB.OutputWriteMask) { - code->alu.inst[ip].inst1 |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT); - code->node[code->cur_node].flags |= R300_RGBA_OUT; - } - - if (inst->Alpha.WriteMask) { - use_temporary(code, inst->Alpha.DestIndex); - code->alu.inst[ip].inst3 |= - (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) | - R300_ALU_DSTA_REG; - } - if (inst->Alpha.OutputWriteMask) { - code->alu.inst[ip].inst3 |= R300_ALU_DSTA_OUTPUT; - code->node[code->cur_node].flags |= R300_RGBA_OUT; - } - if (inst->Alpha.DepthWriteMask) { - code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH; - code->node[code->cur_node].flags |= R300_W_OUT; - c->fp->writes_depth = GL_TRUE; - } - - return GL_TRUE; -} - - -/** - * Finish the current node without advancing to the next one. - */ -static GLboolean finish_node(struct r300_fragment_program_compiler *c) -{ - struct r300_fragment_program_code *code = &c->code->r300; - struct r300_fragment_program_node *node = &code->node[code->cur_node]; - - if (node->alu_end < 0) { - /* Generate a single NOP for this node */ - struct radeon_pair_instruction inst; - _mesa_bzero(&inst, sizeof(inst)); - if (!emit_alu(c, &inst)) - return GL_FALSE; - } - - if (node->tex_end < 0) { - if (code->cur_node == 0) { - node->tex_end = 0; - } else { - error("Node %i has no TEX instructions", code->cur_node); - return GL_FALSE; - } - } else { - if (code->cur_node == 0) - code->first_node_has_tex = 1; - } - - return GL_TRUE; -} - - -/** - * Begin a block of texture instructions. - * Create the necessary indirection. - */ -static GLboolean begin_tex(void* data) -{ - PROG_CODE; - - if (code->cur_node == 0) { - if (code->node[0].alu_end < 0 && - code->node[0].tex_end < 0) - return GL_TRUE; - } - - if (code->cur_node == 3) { - error("Too many texture indirections"); - return GL_FALSE; - } - - if (!finish_node(c)) - return GL_FALSE; - - struct r300_fragment_program_node *node = &code->node[++code->cur_node]; - node->alu_offset = code->alu.length; - node->alu_end = -1; - node->tex_offset = code->tex.length; - node->tex_end = -1; - return GL_TRUE; -} - - -static GLboolean emit_tex(void* data, struct prog_instruction* inst) -{ - PROG_CODE; - - if (code->tex.length >= R300_PFS_MAX_TEX_INST) { - error("Too many TEX instructions"); - return GL_FALSE; - } - - GLuint unit = inst->TexSrcUnit; - GLuint dest = inst->DstReg.Index; - GLuint opcode; - - switch(inst->Opcode) { - case OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; - case OPCODE_TEX: opcode = R300_TEX_OP_LD; break; - case OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; - case OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; - default: - error("Unknown texture opcode %i", inst->Opcode); - return GL_FALSE; - } - - if (inst->Opcode == OPCODE_KIL) { - unit = 0; - dest = 0; - } else { - use_temporary(code, dest); - } - - use_temporary(code, inst->SrcReg[0].Index); - - code->node[code->cur_node].tex_end++; - code->tex.inst[code->tex.length++] = - (inst->SrcReg[0].Index << R300_SRC_ADDR_SHIFT) | - (dest << R300_DST_ADDR_SHIFT) | - (unit << R300_TEX_ID_SHIFT) | - (opcode << R300_TEX_INST_SHIFT); - return GL_TRUE; -} - - -static const struct radeon_pair_handler pair_handler = { - .EmitConst = &emit_const, - .EmitPaired = &emit_alu, - .EmitTex = &emit_tex, - .BeginTexBlock = &begin_tex, - .MaxHwTemps = R300_PFS_NUM_TEMP_REGS -}; - -/** - * Final compilation step: Turn the intermediate radeon_program into - * machine-readable instructions. - */ -GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) -{ - struct r300_fragment_program_code *code = &compiler->code->r300; - - _mesa_bzero(code, sizeof(struct r300_fragment_program_code)); - code->node[0].alu_end = -1; - code->node[0].tex_end = -1; - - if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler)) - return GL_FALSE; - - if (!finish_node(compiler)) - return GL_FALSE; - - return GL_TRUE; -} - diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c deleted file mode 100644 index fc9d855bce..0000000000 --- a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * @file - * Utilities to deal with the somewhat odd restriction on R300 fragment - * program swizzles. - */ - -#include "r300_fragprog_swizzle.h" - -#include "r300_reg.h" -#include "radeon_nqssadce.h" - -#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, SWIZZLE_##y, SWIZZLE_##z, SWIZZLE_ZERO)) - -struct swizzle_data { - GLuint hash; /**< swizzle value this matches */ - GLuint base; /**< base value for hw swizzle */ - GLuint stride; /**< difference in base between arg0/1/2 */ -}; - -static const struct swizzle_data native_swizzles[] = { - {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4}, - {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4}, - {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4}, - {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4}, - {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1}, - {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1}, - {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1}, - {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1}, - {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0}, - {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0} -}; - -static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]); - - -/** - * Find a native RGB swizzle that matches the given swizzle. - * Returns 0 if none found. - */ -static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle) -{ - int i, comp; - - for(i = 0; i < num_native_swizzles; ++i) { - const struct swizzle_data* sd = &native_swizzles[i]; - for(comp = 0; comp < 3; ++comp) { - GLuint swz = GET_SWZ(swizzle, comp); - if (swz == SWIZZLE_NIL) - continue; - if (swz != GET_SWZ(sd->hash, comp)) - break; - } - if (comp == 3) - return sd; - } - - return 0; -} - - -/** - * Check whether the given instruction supports the swizzle and negate - * combinations in the given source register. - */ -GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) -{ - if (reg.Abs) - reg.Negate = NEGATE_NONE; - - if (opcode == OPCODE_KIL || - opcode == OPCODE_TEX || - opcode == OPCODE_TXB || - opcode == OPCODE_TXP) { - int j; - - if (reg.Abs || reg.Negate) - return GL_FALSE; - - for(j = 0; j < 4; ++j) { - GLuint swz = GET_SWZ(reg.Swizzle, j); - if (swz == SWIZZLE_NIL) - continue; - if (swz != j) - return GL_FALSE; - } - - return GL_TRUE; - } - - GLuint relevant = 0; - int j; - - for(j = 0; j < 3; ++j) - if (GET_SWZ(reg.Swizzle, j) != SWIZZLE_NIL) - relevant |= 1 << j; - - if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) - return GL_FALSE; - - if (!lookup_native_swizzle(reg.Swizzle)) - return GL_FALSE; - - return GL_TRUE; -} - - -/** - * Generate MOV dst, src using only native swizzles. - */ -void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) -{ - if (src.Abs) - src.Negate = NEGATE_NONE; - - while(dst.WriteMask) { - const struct swizzle_data *best_swizzle = 0; - GLuint best_matchcount = 0; - GLuint best_matchmask = 0; - int i, comp; - - for(i = 0; i < num_native_swizzles; ++i) { - const struct swizzle_data *sd = &native_swizzles[i]; - GLuint matchcount = 0; - GLuint matchmask = 0; - for(comp = 0; comp < 3; ++comp) { - if (!GET_BIT(dst.WriteMask, comp)) - continue; - GLuint swz = GET_SWZ(src.Swizzle, comp); - if (swz == SWIZZLE_NIL) - continue; - if (swz == GET_SWZ(sd->hash, comp)) { - /* check if the negate bit of current component - * is the same for already matched components */ - if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp)))) - continue; - - matchcount++; - matchmask |= 1 << comp; - } - } - if (matchcount > best_matchcount) { - best_swizzle = sd; - best_matchcount = matchcount; - best_matchmask = matchmask; - if (matchmask == (dst.WriteMask & WRITEMASK_XYZ)) - break; - } - } - - struct prog_instruction *inst; - - _mesa_insert_instructions(s->Program, s->IP, 1); - inst = s->Program->Instructions + s->IP++; - inst->Opcode = OPCODE_MOV; - inst->DstReg = dst; - inst->DstReg.WriteMask &= (best_matchmask | WRITEMASK_W); - inst->SrcReg[0] = src; - inst->SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE; - /* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */ - - dst.WriteMask &= ~inst->DstReg.WriteMask; - } -} - - -/** - * Translate an RGB (XYZ) swizzle into the hardware code for the given - * instruction source. - */ -GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle) -{ - const struct swizzle_data* sd = lookup_native_swizzle(swizzle); - - if (!sd) { - _mesa_printf("Not a native swizzle: %08x\n", swizzle); - return 0; - } - - return sd->base + src*sd->stride; -} - - -/** - * Translate an Alpha (W) swizzle into the hardware code for the given - * instruction source. - */ -GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle) -{ - if (swizzle < 3) - return swizzle + 3*src; - - switch(swizzle) { - case SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; - case SWIZZLE_ONE: return R300_ALU_ARGA_ONE; - case SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; - default: return R300_ALU_ARGA_ONE; - } -} diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h b/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h deleted file mode 100644 index 231bf4eef5..0000000000 --- a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __R300_FRAGPROG_SWIZZLE_H_ -#define __R300_FRAGPROG_SWIZZLE_H_ - -#include "main/glheader.h" -#include "shader/prog_instruction.h" - -struct nqssadce_state; - -GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); -void r300FPBuildSwizzle(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); - -GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle); -GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle); - -#endif /* __R300_FRAGPROG_SWIZZLE_H_ */ diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index ddabd53992..2fa626bab2 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -55,7 +55,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_vertprog.h" #include "radeon_reg.h" #include "r300_emit.h" -#include "r300_fragprog.h" #include "r300_context.h" #include "vblank.h" @@ -66,6 +65,66 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define CLEARBUFFER_DEPTH 0x2 #define CLEARBUFFER_STENCIL 0x4 +#if 1 + +/** + * Fragment program helper macros + */ + +/* Produce unshifted source selectors */ +#define FP_TMP(idx) (idx) +#define FP_CONST(idx) ((idx) | (1 << 5)) + +/* Produce source/dest selector dword */ +#define FP_SELC_MASK_NO 0 +#define FP_SELC_MASK_X 1 +#define FP_SELC_MASK_Y 2 +#define FP_SELC_MASK_XY 3 +#define FP_SELC_MASK_Z 4 +#define FP_SELC_MASK_XZ 5 +#define FP_SELC_MASK_YZ 6 +#define FP_SELC_MASK_XYZ 7 + +#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \ + (((destidx) << R300_ALU_DSTC_SHIFT) | \ + (FP_SELC_MASK_##regmask << 23) | \ + (FP_SELC_MASK_##outmask << 26) | \ + ((src0) << R300_ALU_SRC0C_SHIFT) | \ + ((src1) << R300_ALU_SRC1C_SHIFT) | \ + ((src2) << R300_ALU_SRC2C_SHIFT)) + +#define FP_SELA_MASK_NO 0 +#define FP_SELA_MASK_W 1 + +#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \ + (((destidx) << R300_ALU_DSTA_SHIFT) | \ + (FP_SELA_MASK_##regmask << 23) | \ + (FP_SELA_MASK_##outmask << 24) | \ + ((src0) << R300_ALU_SRC0A_SHIFT) | \ + ((src1) << R300_ALU_SRC1A_SHIFT) | \ + ((src2) << R300_ALU_SRC2A_SHIFT)) + +/* Produce unshifted argument selectors */ +#define FP_ARGC(source) R300_ALU_ARGC_##source +#define FP_ARGA(source) R300_ALU_ARGA_##source +#define FP_ABS(arg) ((arg) | (1 << 6)) +#define FP_NEG(arg) ((arg) ^ (1 << 5)) + +/* Produce instruction dword */ +#define FP_INSTRC(opcode,arg0,arg1,arg2) \ + (R300_ALU_OUTC_##opcode | \ + ((arg0) << R300_ALU_ARG0C_SHIFT) | \ + ((arg1) << R300_ALU_ARG1C_SHIFT) | \ + ((arg2) << R300_ALU_ARG2C_SHIFT)) + +#define FP_INSTRA(opcode,arg0,arg1,arg2) \ + (R300_ALU_OUTA_##opcode | \ + ((arg0) << R300_ALU_ARG0A_SHIFT) | \ + ((arg1) << R300_ALU_ARG1A_SHIFT) | \ + ((arg2) << R300_ALU_ARG2A_SHIFT)) + +#endif + static void r300EmitClearState(GLcontext * ctx); static void r300ClearBuffer(r300ContextPtr r300, int flags, @@ -546,7 +605,7 @@ static int r300KernelClear(GLcontext *ctx, GLuint flags) /* Make sure it fits there. */ radeon_cs_space_reset_bos(r300->radeon.cmdbuf.cs); - + if (flags & BUFFER_BIT_COLOR0) { rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_COLOR0); radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 12fbf281d9..62a03281ca 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -62,8 +62,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_emit.h" #include "r300_tex.h" #include "r300_fragprog_common.h" -#include "r300_fragprog.h" -#include "r500_fragprog.h" #include "r300_render.h" #include "r300_vertprog.h" @@ -458,7 +456,7 @@ static GLboolean current_fragment_program_writes_depth(GLcontext* ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - return ctx->FragmentProgram._Current && r300->selected_fp->writes_depth; + return ctx->FragmentProgram._Current && r300->selected_fp->code.writes_depth; } static void r300SetEarlyZState(GLcontext * ctx) @@ -1230,7 +1228,7 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) { r300ContextPtr r300 = R300_CONTEXT(ctx); int i; - struct r300_fragment_program_code *code = &r300->selected_fp->code.r300; + struct r300_fragment_program_code *code = &r300->selected_fp->code.code.r300; R300_STATECHANGE(r300, fpt); @@ -1272,7 +1270,7 @@ static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) { r300ContextPtr r300 = R300_CONTEXT(ctx); int i; - struct r500_fragment_program_code *code = &r300->selected_fp->code.r500; + struct r500_fragment_program_code *code = &r300->selected_fp->code.code.r500; /* find all the texture instructions and relocate the texture units */ for (i = 0; i < code->inst_end + 1; i++) { @@ -2063,7 +2061,7 @@ static void r300SetupPixelShader(GLcontext *ctx) struct r300_fragment_program_code *code; int i, k; - code = &fp->code.r300; + code = &fp->code.code.r300; R300_STATECHANGE(rmesa, fpi[0]); R300_STATECHANGE(rmesa, fpi[1]); @@ -2137,7 +2135,7 @@ static void r500SetupPixelShader(GLcontext *ctx) ((drm_r300_cmd_header_t *) rmesa->hw.r500fp.cmd)->r500fp.count = 0; ((drm_r300_cmd_header_t *) rmesa->hw.r500fp_const.cmd)->r500fp.count = 0; - code = &fp->code.r500; + code = &fp->code.code.r500; R300_STATECHANGE(rmesa, fp); rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = code->max_temp_idx; @@ -2345,13 +2343,9 @@ void r300InitShaderFunctions(r300ContextPtr r300) r300->vtbl.SetupRSUnit = r500SetupRSUnit; r300->vtbl.SetupPixelShader = r500SetupPixelShader; r300->vtbl.SetupFragmentShaderTextures = r500SetupFragmentShaderTextures; - r300->vtbl.BuildFragmentProgramHwCode = r500BuildFragmentProgramHwCode; - r300->vtbl.FragmentProgramDump = r500FragmentProgramDump; } else { r300->vtbl.SetupRSUnit = r300SetupRSUnit; r300->vtbl.SetupPixelShader = r300SetupPixelShader; r300->vtbl.SetupFragmentShaderTextures = r300SetupFragmentShaderTextures; - r300->vtbl.BuildFragmentProgramHwCode = r300BuildFragmentProgramHwCode; - r300->vtbl.FragmentProgramDump = r300FragmentProgramDump; } } diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index 56ed519cf4..a7e8e71149 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -150,16 +150,16 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_ ADD_ATTR(VERT_ATTRIB_POINT_SIZE, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_POINT_SIZE, swiz, MASK_X, 0); } - if (rmesa->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) { - int tex_id = rmesa->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0; + if (rmesa->selected_fp->code.wpos_attr != FRAG_ATTRIB_MAX) { + int tex_id = rmesa->selected_fp->code.wpos_attr - FRAG_ATTRIB_TEX0; VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS]; VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_POS]; RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id); } - if (rmesa->selected_fp->fog_attr != FRAG_ATTRIB_MAX) { - int tex_id = rmesa->selected_fp->fog_attr - FRAG_ATTRIB_TEX0; + if (rmesa->selected_fp->code.fog_attr != FRAG_ATTRIB_MAX) { + int tex_id = rmesa->selected_fp->code.fog_attr - FRAG_ATTRIB_TEX0; VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG]; VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG]; diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index de32013032..ab5ca4322e 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -40,7 +40,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "shader/prog_statevars.h" #include "tnl/tnl.h" -#include "radeon_nqssadce.h" +#include "compiler/radeon_nqssadce.h" #include "r300_context.h" #include "r300_state.h" @@ -1558,12 +1558,12 @@ static struct r300_vertex_program *build_program(GLcontext *ctx, _mesa_insert_mvp_code(ctx, vp->Base); } - if (r300->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) { - pos_as_texcoord(&vp->Base->Base, r300->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0); + if (r300->selected_fp->code.wpos_attr != FRAG_ATTRIB_MAX) { + pos_as_texcoord(&vp->Base->Base, r300->selected_fp->code.wpos_attr - FRAG_ATTRIB_TEX0); } - if (r300->selected_fp->fog_attr != FRAG_ATTRIB_MAX) { - fog_as_texcoord(&vp->Base->Base, r300->selected_fp->fog_attr - FRAG_ATTRIB_TEX0); + if (r300->selected_fp->code.fog_attr != FRAG_ATTRIB_MAX) { + fog_as_texcoord(&vp->Base->Base, r300->selected_fp->code.fog_attr - FRAG_ATTRIB_TEX0); } addArtificialOutputs(ctx, prog); @@ -1640,8 +1640,8 @@ struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx) vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; wanted_key.FpReads = r300->selected_fp->Base->InputsRead; - wanted_key.FogAttr = r300->selected_fp->fog_attr; - wanted_key.WPosAttr = r300->selected_fp->wpos_attr; + wanted_key.FogAttr = r300->selected_fp->code.fog_attr; + wanted_key.WPosAttr = r300->selected_fp->code.wpos_attr; for (vp = vpc->progs; vp; vp = vp->next) { if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c deleted file mode 100644 index 4d58cf2162..0000000000 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ /dev/null @@ -1,480 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "r500_fragprog.h" - -static void reset_srcreg(struct prog_src_register* reg) -{ - _mesa_bzero(reg, sizeof(*reg)); - reg->Swizzle = SWIZZLE_NOOP; -} - -static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu) -{ - gl_state_index fail_value_tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0 - }; - struct prog_src_register reg = { 0, }; - - fail_value_tokens[2] = tmu; - reg.File = PROGRAM_STATE_VAR; - reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens); - reg.Swizzle = SWIZZLE_WWWW; - return reg; -} - -/** - * Transform TEX, TXP, TXB, and KIL instructions in the following way: - * - premultiply texture coordinates for RECT - * - extract operand swizzles - * - introduce a temporary register when write masks are needed - * - */ -GLboolean r500_transform_TEX( - struct radeon_transform_context *t, - struct prog_instruction* orig_inst, void* data) -{ - struct r300_fragment_program_compiler *compiler = - (struct r300_fragment_program_compiler*)data; - struct prog_instruction inst = *orig_inst; - struct prog_instruction* tgt; - GLboolean destredirect = GL_FALSE; - - if (inst.Opcode != OPCODE_TEX && - inst.Opcode != OPCODE_TXB && - inst.Opcode != OPCODE_TXP && - inst.Opcode != OPCODE_KIL) - return GL_FALSE; - - /* ARB_shadow & EXT_shadow_funcs */ - if (inst.Opcode != OPCODE_KIL && - t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; - - if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { - tgt = radeonAppendInstructions(t->Program, 1); - - tgt->Opcode = OPCODE_MOV; - tgt->DstReg = inst.DstReg; - if (comparefunc == GL_ALWAYS) { - tgt->SrcReg[0].File = PROGRAM_BUILTIN; - tgt->SrcReg[0].Swizzle = SWIZZLE_1111; - } else { - tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit); - } - return GL_TRUE; - } - - inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = radeonFindFreeTemporary(t); - inst.DstReg.WriteMask = WRITEMASK_XYZW; - } else if (inst.Opcode != OPCODE_KIL && inst.DstReg.File != PROGRAM_TEMPORARY) { - int tempreg = radeonFindFreeTemporary(t); - - inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = tempreg; - inst.DstReg.WriteMask = WRITEMASK_XYZW; - destredirect = GL_TRUE; - } - - if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) { - int tmpreg = radeonFindFreeTemporary(t); - tgt = radeonAppendInstructions(t->Program, 1); - tgt->Opcode = OPCODE_MOV; - tgt->DstReg.File = PROGRAM_TEMPORARY; - tgt->DstReg.Index = tmpreg; - tgt->SrcReg[0] = inst.SrcReg[0]; - - reset_srcreg(&inst.SrcReg[0]); - inst.SrcReg[0].File = PROGRAM_TEMPORARY; - inst.SrcReg[0].Index = tmpreg; - } - - tgt = radeonAppendInstructions(t->Program, 1); - _mesa_copy_instructions(tgt, &inst, 1); - - if (inst.Opcode != OPCODE_KIL && - t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; - GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode; - int rcptemp = radeonFindFreeTemporary(t); - int pass, fail; - - tgt = radeonAppendInstructions(t->Program, 3); - - tgt[0].Opcode = OPCODE_RCP; - tgt[0].DstReg.File = PROGRAM_TEMPORARY; - tgt[0].DstReg.Index = rcptemp; - tgt[0].DstReg.WriteMask = WRITEMASK_W; - tgt[0].SrcReg[0] = inst.SrcReg[0]; - tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW; - - tgt[1].Opcode = OPCODE_MAD; - tgt[1].DstReg = inst.DstReg; - tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask; - tgt[1].SrcReg[0] = inst.SrcReg[0]; - tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ; - tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY; - tgt[1].SrcReg[1].Index = rcptemp; - tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW; - tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY; - tgt[1].SrcReg[2].Index = inst.DstReg.Index; - if (depthmode == 0) /* GL_LUMINANCE */ - tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); - else if (depthmode == 2) /* GL_ALPHA */ - tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW; - - /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: - * r < tex <=> -tex+r < 0 - * r >= tex <=> not (-tex+r < 0 */ - if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) - tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW; - else - tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW; - - tgt[2].Opcode = OPCODE_CMP; - tgt[2].DstReg = orig_inst->DstReg; - tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY; - tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index; - - if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { - pass = 1; - fail = 2; - } else { - pass = 2; - fail = 1; - } - - tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN; - tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111; - tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit); - } else if (destredirect) { - tgt = radeonAppendInstructions(t->Program, 1); - - tgt->Opcode = OPCODE_MOV; - tgt->DstReg = orig_inst->DstReg; - tgt->SrcReg[0].File = PROGRAM_TEMPORARY; - tgt->SrcReg[0].Index = inst.DstReg.Index; - } - - return GL_TRUE; -} - -GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) -{ - GLuint relevant; - int i; - - if (opcode == OPCODE_TEX || - opcode == OPCODE_TXB || - opcode == OPCODE_TXP || - opcode == OPCODE_KIL) { - if (reg.Abs) - return GL_FALSE; - - if (opcode == OPCODE_KIL && (reg.Swizzle != SWIZZLE_NOOP || reg.Negate != NEGATE_NONE)) - return GL_FALSE; - - if (reg.Negate) - reg.Negate ^= NEGATE_XYZW; - - for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(reg.Swizzle, i); - if (swz == SWIZZLE_NIL) { - reg.Negate &= ~(1 << i); - continue; - } - if (swz >= 4) - return GL_FALSE; - } - - if (reg.Negate) - return GL_FALSE; - - return GL_TRUE; - } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) { - /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles; - * if it doesn't fit perfectly into a .xyzw case... */ - if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs && !reg.Negate) - return GL_TRUE; - - return GL_FALSE; - } else { - /* ALU instructions support almost everything */ - if (reg.Abs) - return GL_TRUE; - - relevant = 0; - for(i = 0; i < 3; ++i) { - GLuint swz = GET_SWZ(reg.Swizzle, i); - if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO) - relevant |= 1 << i; - } - if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) - return GL_FALSE; - - return GL_TRUE; - } -} - -/** - * Implement a MOV with a potentially non-native swizzle. - * - * The only thing we *cannot* do in an ALU instruction is per-component - * negation. Therefore, we split the MOV into two instructions when necessary. - */ -void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) -{ - struct prog_instruction *inst; - GLuint negatebase[2] = { 0, 0 }; - int i; - - for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(src.Swizzle, i); - if (swz == SWIZZLE_NIL) - continue; - negatebase[GET_BIT(src.Negate, i)] |= 1 << i; - } - - _mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0)); - inst = s->Program->Instructions + s->IP; - - for(i = 0; i <= 1; ++i) { - if (!negatebase[i]) - continue; - - inst->Opcode = OPCODE_MOV; - inst->DstReg = dst; - inst->DstReg.WriteMask = negatebase[i]; - inst->SrcReg[0] = src; - inst->SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW; - inst++; - s->IP++; - } -} - - -static char *toswiz(int swiz_val) { - switch(swiz_val) { - case 0: return "R"; - case 1: return "G"; - case 2: return "B"; - case 3: return "A"; - case 4: return "0"; - case 5: return "1/2"; - case 6: return "1"; - case 7: return "U"; - } - return NULL; -} - -static char *toop(int op_val) -{ - char *str = NULL; - switch (op_val) { - case 0: str = "MAD"; break; - case 1: str = "DP3"; break; - case 2: str = "DP4"; break; - case 3: str = "D2A"; break; - case 4: str = "MIN"; break; - case 5: str = "MAX"; break; - case 6: str = "Reserved"; break; - case 7: str = "CND"; break; - case 8: str = "CMP"; break; - case 9: str = "FRC"; break; - case 10: str = "SOP"; break; - case 11: str = "MDH"; break; - case 12: str = "MDV"; break; - } - return str; -} - -static char *to_alpha_op(int op_val) -{ - char *str = NULL; - switch (op_val) { - case 0: str = "MAD"; break; - case 1: str = "DP"; break; - case 2: str = "MIN"; break; - case 3: str = "MAX"; break; - case 4: str = "Reserved"; break; - case 5: str = "CND"; break; - case 6: str = "CMP"; break; - case 7: str = "FRC"; break; - case 8: str = "EX2"; break; - case 9: str = "LN2"; break; - case 10: str = "RCP"; break; - case 11: str = "RSQ"; break; - case 12: str = "SIN"; break; - case 13: str = "COS"; break; - case 14: str = "MDH"; break; - case 15: str = "MDV"; break; - } - return str; -} - -static char *to_mask(int val) -{ - char *str = NULL; - switch(val) { - case 0: str = "NONE"; break; - case 1: str = "R"; break; - case 2: str = "G"; break; - case 3: str = "RG"; break; - case 4: str = "B"; break; - case 5: str = "RB"; break; - case 6: str = "GB"; break; - case 7: str = "RGB"; break; - case 8: str = "A"; break; - case 9: str = "AR"; break; - case 10: str = "AG"; break; - case 11: str = "ARG"; break; - case 12: str = "AB"; break; - case 13: str = "ARB"; break; - case 14: str = "AGB"; break; - case 15: str = "ARGB"; break; - } - return str; -} - -static char *to_texop(int val) -{ - switch(val) { - case 0: return "NOP"; - case 1: return "LD"; - case 2: return "TEXKILL"; - case 3: return "PROJ"; - case 4: return "LODBIAS"; - case 5: return "LOD"; - case 6: return "DXDY"; - } - return NULL; -} - -void r500FragmentProgramDump(union rX00_fragment_program_code *c) -{ - struct r500_fragment_program_code *code = &c->r500; - fprintf(stderr, "R500 Fragment Program:\n--------\n"); - - int n; - uint32_t inst; - uint32_t inst0; - char *str = NULL; - - if (code->const_nr) { - fprintf(stderr, "--------\nConstants:\n"); - for (n = 0; n < code->const_nr; n++) { - fprintf(stderr, "Constant %d: %i[%i]\n", n, - code->constant[n].File, code->constant[n].Index); - } - fprintf(stderr, "--------\n"); - } - - for (n = 0; n < code->inst_end+1; n++) { - inst0 = inst = code->inst[n].inst0; - fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst); - switch(inst & 0x3) { - case R500_INST_TYPE_ALU: str = "ALU"; break; - case R500_INST_TYPE_OUT: str = "OUT"; break; - case R500_INST_TYPE_FC: str = "FC"; break; - case R500_INST_TYPE_TEX: str = "TEX"; break; - }; - fprintf(stderr,"%s %s %s %s %s ", str, - inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "", - inst & R500_INST_LAST ? "LAST" : "", - inst & R500_INST_NOP ? "NOP" : "", - inst & R500_INST_ALU_WAIT ? "ALU WAIT" : ""); - fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf), - to_mask((inst >> 15) & 0xf)); - - switch(inst0 & 0x3) { - case 0: - case 1: - fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1); - inst = code->inst[n].inst1; - - fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", - inst & 0xff, (inst & (1<<8)) ? 'c' : 't', - (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', - (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', - (inst >> 30)); - - fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2); - inst = code->inst[n].inst2; - fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", - inst & 0xff, (inst & (1<<8)) ? 'c' : 't', - (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', - (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', - (inst >> 30)); - fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3); - inst = code->inst[n].inst3; - fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n", - (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), - (inst >> 11) & 0x3, - (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), - (inst >> 24) & 0x3); - - - fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4); - inst = code->inst[n].inst4; - fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf), - (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", - (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, - (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3, - (inst >> 31) & 0x1); - - fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5); - inst = code->inst[n].inst5; - fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf), - (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", - (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7), - (inst >> 23) & 0x3, - (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3); - break; - case 2: - break; - case 3: - inst = code->inst[n].inst1; - fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf, - to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "", - (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED"); - inst = code->inst[n].inst2; - fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst, - inst & 127, inst & (1<<7) ? "(rel)" : "", - toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3), - toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3), - (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "", - toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3), - toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3)); - - fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3); - break; - } - fprintf(stderr,"\n"); - } - -} diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/r500_fragprog.h deleted file mode 100644 index 1179bf6607..0000000000 --- a/src/mesa/drivers/dri/r300/r500_fragprog.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (C) 2005 Ben Skeggs. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/* - * Authors: - * Ben Skeggs - * Jerome Glisse - */ -#ifndef __R500_FRAGPROG_H_ -#define __R500_FRAGPROG_H_ - -#include "shader/prog_parameter.h" -#include "shader/prog_instruction.h" - -#include "r300_context.h" -#include "radeon_nqssadce.h" - -extern GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); - -extern void r500FragmentProgramDump(union rX00_fragment_program_code *c); - -extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); - -extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src); - -extern GLboolean r500_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data); - -#endif diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c deleted file mode 100644 index 30f4514897..0000000000 --- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c +++ /dev/null @@ -1,327 +0,0 @@ -/* - * Copyright (C) 2005 Ben Skeggs. - * - * Copyright 2008 Corbin Simpson - * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * \file - * - * \author Ben Skeggs - * - * \author Jerome Glisse - * - * \author Corbin Simpson - * - * \todo Depth write, WPOS/FOGC inputs - * - * \todo FogOption - * - */ - -#include "r500_fragprog.h" - -#include "radeon_program_pair.h" - - -#define PROG_CODE \ - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ - struct r500_fragment_program_code *code = &c->code->r500 - -#define error(fmt, args...) do { \ - fprintf(stderr, "%s::%s(): " fmt "\n", \ - __FILE__, __FUNCTION__, ##args); \ - } while(0) - - -/** - * Callback to register hardware constants. - */ -static GLboolean emit_const(void *data, GLuint file, GLuint idx, GLuint *hwindex) -{ - PROG_CODE; - - for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) { - if (code->constant[*hwindex].File == file && - code->constant[*hwindex].Index == idx) - break; - } - - if (*hwindex >= code->const_nr) { - if (*hwindex >= R500_PFS_NUM_CONST_REGS) { - error("Out of hw constants!\n"); - return GL_FALSE; - } - - code->const_nr++; - code->constant[*hwindex].File = file; - code->constant[*hwindex].Index = idx; - } - - return GL_TRUE; -} - -static GLuint translate_rgb_op(GLuint opcode) -{ - switch(opcode) { - case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; - case OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; - case OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; - case OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; - case OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; - case OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; - default: - error("translate_rgb_op(%d): unknown opcode\n", opcode); - /* fall through */ - case OPCODE_NOP: - /* fall through */ - case OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; - case OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; - case OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; - case OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; - } -} - -static GLuint translate_alpha_op(GLuint opcode) -{ - switch(opcode) { - case OPCODE_CMP: return R500_ALPHA_OP_CMP; - case OPCODE_COS: return R500_ALPHA_OP_COS; - case OPCODE_DDX: return R500_ALPHA_OP_MDH; - case OPCODE_DDY: return R500_ALPHA_OP_MDV; - case OPCODE_DP3: return R500_ALPHA_OP_DP; - case OPCODE_DP4: return R500_ALPHA_OP_DP; - case OPCODE_EX2: return R500_ALPHA_OP_EX2; - case OPCODE_FRC: return R500_ALPHA_OP_FRC; - case OPCODE_LG2: return R500_ALPHA_OP_LN2; - default: - error("translate_alpha_op(%d): unknown opcode\n", opcode); - /* fall through */ - case OPCODE_NOP: - /* fall through */ - case OPCODE_MAD: return R500_ALPHA_OP_MAD; - case OPCODE_MAX: return R500_ALPHA_OP_MAX; - case OPCODE_MIN: return R500_ALPHA_OP_MIN; - case OPCODE_RCP: return R500_ALPHA_OP_RCP; - case OPCODE_RSQ: return R500_ALPHA_OP_RSQ; - case OPCODE_SIN: return R500_ALPHA_OP_SIN; - } -} - -static GLuint fix_hw_swizzle(GLuint swz) -{ - if (swz == 5) swz = 6; - if (swz == SWIZZLE_NIL) swz = 4; - return swz; -} - -static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg) -{ - GLuint t = inst->RGB.Arg[arg].Source; - int comp; - t |= inst->RGB.Arg[arg].Negate << 11; - t |= inst->RGB.Arg[arg].Abs << 12; - - for(comp = 0; comp < 3; ++comp) - t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2); - - return t; -} - -static GLuint translate_arg_alpha(struct radeon_pair_instruction *inst, int i) -{ - GLuint t = inst->Alpha.Arg[i].Source; - t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2; - t |= inst->Alpha.Arg[i].Negate << 5; - t |= inst->Alpha.Arg[i].Abs << 6; - return t; -} - -static void use_temporary(struct r500_fragment_program_code* code, GLuint index) -{ - if (index > code->max_temp_idx) - code->max_temp_idx = index; -} - -static GLuint use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src) -{ - if (!src.Constant) - use_temporary(code, src.Index); - return src.Index | src.Constant << 8; -} - - -/** - * Emit a paired ALU instruction. - */ -static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) -{ - PROG_CODE; - - if (code->inst_end >= 511) { - error("emit_alu: Too many instructions"); - return GL_FALSE; - } - - int ip = ++code->inst_end; - - code->inst[ip].inst5 = translate_rgb_op(inst->RGB.Opcode); - code->inst[ip].inst4 = translate_alpha_op(inst->Alpha.Opcode); - - if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) - code->inst[ip].inst0 = R500_INST_TYPE_OUT; - else - code->inst[ip].inst0 = R500_INST_TYPE_ALU; - code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT; - - code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14); - code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); - if (inst->Alpha.DepthWriteMask) { - code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; - c->fp->writes_depth = GL_TRUE; - } - - code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); - code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex); - use_temporary(code, inst->Alpha.DestIndex); - use_temporary(code, inst->RGB.DestIndex); - - if (inst->RGB.Saturate) - code->inst[ip].inst0 |= R500_INST_RGB_CLAMP; - if (inst->Alpha.Saturate) - code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP; - - code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0])); - code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1])); - code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2])); - - code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0])); - code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1])); - code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2])); - - code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT; - code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT; - code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT; - - code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT; - code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; - code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; - - return GL_TRUE; -} - -static GLuint translate_strq_swizzle(struct prog_src_register src) -{ - GLuint swiz = 0; - int i; - for (i = 0; i < 4; i++) - swiz |= (GET_SWZ(src.Swizzle, i) & 0x3) << i*2; - return swiz; -} - -/** - * Emit a single TEX instruction - */ -static GLboolean emit_tex(void *data, struct prog_instruction *inst) -{ - PROG_CODE; - - if (code->inst_end >= 511) { - error("emit_tex: Too many instructions"); - return GL_FALSE; - } - - int ip = ++code->inst_end; - - code->inst[ip].inst0 = R500_INST_TYPE_TEX - | (inst->DstReg.WriteMask << 11) - | R500_INST_TEX_SEM_WAIT; - code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) - | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; - - if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) - code->inst[ip].inst1 |= R500_TEX_UNSCALED; - - switch (inst->Opcode) { - case OPCODE_KIL: - code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; - break; - case OPCODE_TEX: - code->inst[ip].inst1 |= R500_TEX_INST_LD; - break; - case OPCODE_TXB: - code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; - break; - case OPCODE_TXP: - code->inst[ip].inst1 |= R500_TEX_INST_PROJ; - break; - default: - error("emit_tex can't handle opcode %x\n", inst->Opcode); - } - - code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) - | (translate_strq_swizzle(inst->SrcReg[0]) << 8) - | R500_TEX_DST_ADDR(inst->DstReg.Index) - | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G - | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; - - return GL_TRUE; -} - -static const struct radeon_pair_handler pair_handler = { - .EmitConst = emit_const, - .EmitPaired = emit_paired, - .EmitTex = emit_tex, - .MaxHwTemps = 128 -}; - -GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) -{ - struct r500_fragment_program_code *code = &compiler->code->r500; - - _mesa_bzero(code, sizeof(*code)); - code->max_temp_idx = 1; - code->inst_offset = 0; - code->inst_end = -1; - - if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler)) - return GL_FALSE; - - if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { - /* This may happen when dead-code elimination is disabled or - * when most of the fragment program logic is leading to a KIL */ - if (code->inst_end >= 511) { - error("Introducing fake OUT: Too many instructions"); - return GL_FALSE; - } - - int ip = ++code->inst_end; - code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; - } - - return GL_TRUE; -} diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/radeon_nqssadce.c deleted file mode 100644 index 202a8532b6..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_nqssadce.c +++ /dev/null @@ -1,297 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * @file - * - * "Not-quite SSA" and Dead-Code Elimination. - * - * @note This code uses SWIZZLE_NIL in a source register to indicate that - * the corresponding component is ignored by the corresponding instruction. - */ - -#include "radeon_nqssadce.h" - - -/** - * Return the @ref register_state for the given register (or 0 for untracked - * registers, i.e. constants). - */ -static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index) -{ - switch(file) { - case PROGRAM_TEMPORARY: return &s->Temps[index]; - case PROGRAM_OUTPUT: return &s->Outputs[index]; - case PROGRAM_ADDRESS: return &s->Address; - default: return 0; - } -} - - -/** - * Left multiplication of a register with a swizzle - * - * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles. - */ -struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg) -{ - struct prog_src_register tmp = srcreg; - int i; - tmp.Swizzle = 0; - tmp.Negate = NEGATE_NONE; - for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(swizzle, i); - if (swz < 4) { - tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); - tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; - } else { - tmp.Swizzle |= swz << (i*3); - } - } - return tmp; -} - - -static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, - struct prog_instruction *inst, GLint src, GLuint sourced) -{ - int i; - GLuint deswz_source = 0; - - for(i = 0; i < 4; ++i) { - if (GET_BIT(sourced, i)) { - GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i); - deswz_source |= 1 << swz; - } else { - inst->SrcReg[src].Swizzle &= ~(7 << (3*i)); - inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); - } - } - - if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) { - struct prog_dst_register dstreg = inst->DstReg; - dstreg.File = PROGRAM_TEMPORARY; - dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY); - dstreg.WriteMask = sourced; - - s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]); - - inst = s->Program->Instructions + s->IP; - inst->SrcReg[src].File = PROGRAM_TEMPORARY; - inst->SrcReg[src].Index = dstreg.Index; - inst->SrcReg[src].Swizzle = 0; - inst->SrcReg[src].Negate = NEGATE_NONE; - inst->SrcReg[src].Abs = 0; - for(i = 0; i < 4; ++i) { - if (GET_BIT(sourced, i)) - inst->SrcReg[src].Swizzle |= i << (3*i); - else - inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); - } - deswz_source = sourced; - } - - struct register_state *regstate; - - if (inst->SrcReg[src].RelAddr) { - regstate = get_reg_state(s, PROGRAM_ADDRESS, 0); - if (regstate) - regstate->Sourced |= WRITEMASK_X; - } else { - regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index); - if (regstate) - regstate->Sourced |= deswz_source & 0xf; - } - - return inst; -} - -static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex) -{ - int nsrc = _mesa_num_inst_src_regs(inst->Opcode); - int i; - for(i = 0; i < nsrc; ++i) - if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex) - inst->SrcReg[i].Index = newindex; -} - -static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex) -{ - GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY); - int ip; - for(ip = 0; ip < s->IP; ++ip) { - struct prog_instruction* inst = s->Program->Instructions + ip; - if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex) - inst->DstReg.Index = newindex; - unalias_srcregs(inst, oldindex, newindex); - } - unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex); -} - - -/** - * Handle one instruction. - */ -static void process_instruction(struct nqssadce_state* s) -{ - struct prog_instruction *inst = s->Program->Instructions + s->IP; - - if (inst->Opcode == OPCODE_END) - return; - - if (inst->Opcode != OPCODE_KIL) { - struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index); - if (!regstate) { - _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n", - inst->DstReg.File, inst->DstReg.Index); - return; - } - - inst->DstReg.WriteMask &= regstate->Sourced; - regstate->Sourced &= ~inst->DstReg.WriteMask; - - if (inst->DstReg.WriteMask == 0) { - _mesa_delete_instructions(s->Program, s->IP, 1); - return; - } - - if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced) - unalias_temporary(s, inst->DstReg.Index); - } - - /* Attention: Due to swizzle emulation code, the following - * might change the instruction stream under us, so we have - * to be careful with the inst pointer. */ - switch (inst->Opcode) { - case OPCODE_ARL: - case OPCODE_DDX: - case OPCODE_DDY: - case OPCODE_FRC: - case OPCODE_MOV: - inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); - break; - case OPCODE_ADD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MUL: - case OPCODE_SGE: - case OPCODE_SLT: - inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); - inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); - break; - case OPCODE_CMP: - case OPCODE_MAD: - inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); - inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); - inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask); - break; - case OPCODE_COS: - case OPCODE_EX2: - case OPCODE_LG2: - case OPCODE_RCP: - case OPCODE_RSQ: - case OPCODE_SIN: - inst = track_used_srcreg(s, inst, 0, 0x1); - break; - case OPCODE_DP3: - inst = track_used_srcreg(s, inst, 0, 0x7); - inst = track_used_srcreg(s, inst, 1, 0x7); - break; - case OPCODE_DP4: - inst = track_used_srcreg(s, inst, 0, 0xf); - inst = track_used_srcreg(s, inst, 1, 0xf); - break; - case OPCODE_KIL: - case OPCODE_TEX: - case OPCODE_TXB: - case OPCODE_TXP: - inst = track_used_srcreg(s, inst, 0, 0xf); - break; - case OPCODE_DST: - inst = track_used_srcreg(s, inst, 0, 0x6); - inst = track_used_srcreg(s, inst, 1, 0xa); - break; - case OPCODE_EXP: - case OPCODE_LOG: - case OPCODE_POW: - inst = track_used_srcreg(s, inst, 0, 0x3); - break; - case OPCODE_LIT: - inst = track_used_srcreg(s, inst, 0, 0xb); - break; - default: - _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode); - return; - } -} - -static void calculateInputsOutputs(struct gl_program *p) -{ - struct prog_instruction *inst; - GLuint InputsRead, OutputsWritten; - - inst = p->Instructions; - InputsRead = 0; - OutputsWritten = 0; - while (inst->Opcode != OPCODE_END) - { - int i, num_src_regs; - - num_src_regs = _mesa_num_inst_src_regs(inst->Opcode); - for (i = 0; i < num_src_regs; ++i) { - if (inst->SrcReg[i].File == PROGRAM_INPUT) - InputsRead |= 1 << inst->SrcReg[i].Index; - } - - if (inst->DstReg.File == PROGRAM_OUTPUT) - OutputsWritten |= 1 << inst->DstReg.Index; - - ++inst; - } - - p->InputsRead = InputsRead; - p->OutputsWritten = OutputsWritten; -} - -void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr) -{ - struct nqssadce_state s; - - _mesa_bzero(&s, sizeof(s)); - s.Ctx = ctx; - s.Program = p; - s.Descr = descr; - s.Descr->Init(&s); - s.IP = p->NumInstructions; - - while(s.IP > 0) { - s.IP--; - process_instruction(&s); - } - - calculateInputsOutputs(p); -} diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/radeon_nqssadce.h deleted file mode 100644 index 8626f21c25..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_nqssadce.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __RADEON_PROGRAM_NQSSADCE_H_ -#define __RADEON_PROGRAM_NQSSADCE_H_ - -#include "radeon_program.h" - - -struct register_state { - /** - * Bitmask indicating which components of the register are sourced - * by later instructions. - */ - GLuint Sourced : 4; -}; - -/** - * Maintain state such as which registers are used, which registers are - * read from, etc. - */ -struct nqssadce_state { - GLcontext *Ctx; - struct gl_program *Program; - struct radeon_nqssadce_descr *Descr; - - /** - * All instructions after this instruction pointer have been dealt with. - */ - int IP; - - /** - * Which registers are read by subsequent instructions? - */ - struct register_state Temps[MAX_PROGRAM_TEMPS]; - struct register_state Outputs[VERT_RESULT_MAX]; - struct register_state Address; -}; - - -/** - * This structure contains a description of the hardware in-so-far as - * it is required for the NqSSA-DCE pass. - */ -struct radeon_nqssadce_descr { - /** - * Fill in which outputs - */ - void (*Init)(struct nqssadce_state *); - - /** - * Check whether the given swizzle, absolute and negate combination - * can be implemented natively by the hardware for this opcode. - */ - GLboolean (*IsNativeSwizzle)(GLuint opcode, struct prog_src_register reg); - - /** - * Emit (at the current IP) the instruction MOV dst, src; - * The transformation will work recursively on the emitted instruction(s). - */ - void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); - - void *Data; -}; - -void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr); -struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg); - -#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */ diff --git a/src/mesa/drivers/dri/r300/radeon_program.c b/src/mesa/drivers/dri/r300/radeon_program.c deleted file mode 100644 index da5e7aefce..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_program.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "radeon_program.h" - -#include "shader/prog_print.h" - - -/** - * Transform the given clause in the following way: - * 1. Replace it with an empty clause - * 2. For every instruction in the original clause, try the given - * transformations in order. - * 3. If one of the transformations returns GL_TRUE, assume that it - * has emitted the appropriate instruction(s) into the new clause; - * otherwise, copy the instruction verbatim. - * - * \note The transformation is currently not recursive; in other words, - * instructions emitted by transformations are not transformed. - * - * \note The transform is called 'local' because it can only look at - * one instruction at a time. - */ -void radeonLocalTransform( - GLcontext *Ctx, - struct gl_program *program, - int num_transformations, - struct radeon_program_transformation* transformations) -{ - struct radeon_transform_context ctx; - int ip; - - ctx.Ctx = Ctx; - ctx.Program = program; - ctx.OldInstructions = program->Instructions; - ctx.OldNumInstructions = program->NumInstructions; - - program->Instructions = 0; - program->NumInstructions = 0; - - for(ip = 0; ip < ctx.OldNumInstructions; ++ip) { - struct prog_instruction *instr = ctx.OldInstructions + ip; - int i; - - for(i = 0; i < num_transformations; ++i) { - struct radeon_program_transformation* t = transformations + i; - - if (t->function(&ctx, instr, t->userData)) - break; - } - - if (i >= num_transformations) { - struct prog_instruction* dest = radeonAppendInstructions(program, 1); - _mesa_copy_instructions(dest, instr, 1); - } - } - - _mesa_free_instructions(ctx.OldInstructions, ctx.OldNumInstructions); -} - - -static void scan_instructions(GLboolean* used, const struct prog_instruction* insts, GLuint count) -{ - GLuint i; - for (i = 0; i < count; i++) { - const struct prog_instruction *inst = insts + i; - const GLuint n = _mesa_num_inst_src_regs(inst->Opcode); - GLuint k; - - for (k = 0; k < n; k++) { - if (inst->SrcReg[k].File == PROGRAM_TEMPORARY) - used[inst->SrcReg[k].Index] = GL_TRUE; - } - } -} - -GLint radeonFindFreeTemporary(struct radeon_transform_context *t) -{ - GLboolean used[MAX_PROGRAM_TEMPS]; - GLuint i; - - _mesa_memset(used, 0, sizeof(used)); - scan_instructions(used, t->Program->Instructions, t->Program->NumInstructions); - scan_instructions(used, t->OldInstructions, t->OldNumInstructions); - - for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { - if (!used[i]) - return i; - } - - return -1; -} - - -/** - * Append the given number of instructions to the program and return a - * pointer to the first new instruction. - */ -struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count) -{ - int oldnum = program->NumInstructions; - _mesa_insert_instructions(program, oldnum, count); - return program->Instructions + oldnum; -} diff --git a/src/mesa/drivers/dri/r300/radeon_program.h b/src/mesa/drivers/dri/r300/radeon_program.h deleted file mode 100644 index 88474d43a2..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_program.h +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __RADEON_PROGRAM_H_ -#define __RADEON_PROGRAM_H_ - -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" -#include "shader/prog_instruction.h" - - -enum { - CLAUSE_MIXED = 0, - CLAUSE_ALU, - CLAUSE_TEX -}; - -enum { - PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */ -}; - -enum { - OPCODE_REPL_ALPHA = MAX_OPCODE /**< used in paired instructions */ -}; - -#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO) -#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE) - -static inline GLuint get_swz(GLuint swz, GLuint idx) -{ - if (idx & 0x4) - return idx; - return GET_SWZ(swz, idx); -} - -static inline GLuint combine_swizzles4(GLuint src, GLuint swz_x, GLuint swz_y, GLuint swz_z, GLuint swz_w) -{ - GLuint ret = 0; - - ret |= get_swz(src, swz_x); - ret |= get_swz(src, swz_y) << 3; - ret |= get_swz(src, swz_z) << 6; - ret |= get_swz(src, swz_w) << 9; - - return ret; -} - -static inline GLuint combine_swizzles(GLuint src, GLuint swz) -{ - GLuint ret = 0; - - ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_X)); - ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Y)) << 3; - ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Z)) << 6; - ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_W)) << 9; - - return ret; -} - - -/** - * Transformation context that is passed to local transformations. - * - * Care must be taken with some operations during transformation, - * e.g. finding new temporary registers must use @ref radeonFindFreeTemporary - */ -struct radeon_transform_context { - GLcontext *Ctx; - struct gl_program *Program; - struct prog_instruction *OldInstructions; - GLuint OldNumInstructions; -}; - -/** - * A transformation that can be passed to \ref radeonLocalTransform. - * - * The function will be called once for each instruction. - * It has to either emit the appropriate transformed code for the instruction - * and return GL_TRUE, or return GL_FALSE if it doesn't understand the - * instruction. - * - * The function gets passed the userData as last parameter. - */ -struct radeon_program_transformation { - GLboolean (*function)( - struct radeon_transform_context*, - struct prog_instruction*, - void*); - void *userData; -}; - -void radeonLocalTransform( - GLcontext* ctx, - struct gl_program *program, - int num_transformations, - struct radeon_program_transformation* transformations); - -/** - * Find a usable free temporary register during program transformation - */ -GLint radeonFindFreeTemporary(struct radeon_transform_context *ctx); - -struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count); - -#endif diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.c b/src/mesa/drivers/dri/r300/radeon_program_alu.c deleted file mode 100644 index 8283723bad..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_program_alu.c +++ /dev/null @@ -1,635 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * @file - * - * Shareable transformations that transform "special" ALU instructions - * into ALU instructions that are supported by hardware. - * - */ - -#include "radeon_program_alu.h" - -#include "shader/prog_parameter.h" - - -static struct prog_instruction *emit1(struct gl_program* p, - gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, - struct prog_src_register SrcReg) -{ - struct prog_instruction *fpi = radeonAppendInstructions(p, 1); - - fpi->Opcode = Opcode; - fpi->SaturateMode = Saturate; - fpi->DstReg = DstReg; - fpi->SrcReg[0] = SrcReg; - return fpi; -} - -static struct prog_instruction *emit2(struct gl_program* p, - gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, - struct prog_src_register SrcReg0, struct prog_src_register SrcReg1) -{ - struct prog_instruction *fpi = radeonAppendInstructions(p, 1); - - fpi->Opcode = Opcode; - fpi->SaturateMode = Saturate; - fpi->DstReg = DstReg; - fpi->SrcReg[0] = SrcReg0; - fpi->SrcReg[1] = SrcReg1; - return fpi; -} - -static struct prog_instruction *emit3(struct gl_program* p, - gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, - struct prog_src_register SrcReg0, struct prog_src_register SrcReg1, - struct prog_src_register SrcReg2) -{ - struct prog_instruction *fpi = radeonAppendInstructions(p, 1); - - fpi->Opcode = Opcode; - fpi->SaturateMode = Saturate; - fpi->DstReg = DstReg; - fpi->SrcReg[0] = SrcReg0; - fpi->SrcReg[1] = SrcReg1; - fpi->SrcReg[2] = SrcReg2; - return fpi; -} - -static struct prog_dst_register dstreg(int file, int index) -{ - struct prog_dst_register dst; - dst.File = file; - dst.Index = index; - dst.WriteMask = WRITEMASK_XYZW; - dst.CondMask = COND_TR; - dst.CondSwizzle = SWIZZLE_NOOP; - dst.CondSrc = 0; - dst.pad = 0; - return dst; -} - -static struct prog_dst_register dstregtmpmask(int index, int mask) -{ - struct prog_dst_register dst; - dst.File = PROGRAM_TEMPORARY; - dst.Index = index; - dst.WriteMask = mask; - dst.CondMask = COND_TR; - dst.CondSwizzle = SWIZZLE_NOOP; - dst.CondSrc = 0; - dst.pad = 0; - return dst; -} - -static const struct prog_src_register builtin_zero = { - .File = PROGRAM_BUILTIN, - .Index = 0, - .Swizzle = SWIZZLE_0000 -}; -static const struct prog_src_register builtin_one = { - .File = PROGRAM_BUILTIN, - .Index = 0, - .Swizzle = SWIZZLE_1111 -}; -static const struct prog_src_register srcreg_undefined = { - .File = PROGRAM_UNDEFINED, - .Index = 0, - .Swizzle = SWIZZLE_NOOP -}; - -static struct prog_src_register srcreg(int file, int index) -{ - struct prog_src_register src = srcreg_undefined; - src.File = file; - src.Index = index; - return src; -} - -static struct prog_src_register srcregswz(int file, int index, int swz) -{ - struct prog_src_register src = srcreg_undefined; - src.File = file; - src.Index = index; - src.Swizzle = swz; - return src; -} - -static struct prog_src_register absolute(struct prog_src_register reg) -{ - struct prog_src_register newreg = reg; - newreg.Abs = 1; - newreg.Negate = NEGATE_NONE; - return newreg; -} - -static struct prog_src_register negate(struct prog_src_register reg) -{ - struct prog_src_register newreg = reg; - newreg.Negate = newreg.Negate ^ NEGATE_XYZW; - return newreg; -} - -static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x, GLuint y, GLuint z, GLuint w) -{ - struct prog_src_register swizzled = reg; - swizzled.Swizzle = MAKE_SWIZZLE4( - x >= 4 ? x : GET_SWZ(reg.Swizzle, x), - y >= 4 ? y : GET_SWZ(reg.Swizzle, y), - z >= 4 ? z : GET_SWZ(reg.Swizzle, z), - w >= 4 ? w : GET_SWZ(reg.Swizzle, w)); - return swizzled; -} - -static struct prog_src_register scalar(struct prog_src_register reg) -{ - return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); -} - -static void transform_ABS(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - struct prog_src_register src = inst->SrcReg[0]; - src.Abs = 1; - src.Negate = NEGATE_NONE; - emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src); -} - -static void transform_DPH(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - struct prog_src_register src0 = inst->SrcReg[0]; - src0.Negate &= ~NEGATE_W; - src0.Swizzle &= ~(7 << (3 * 3)); - src0.Swizzle |= SWIZZLE_ONE << (3 * 3); - emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, inst->SrcReg[1]); -} - -/** - * [1, src0.y*src1.y, src0.z, src1.w] - * So basically MUL with lotsa swizzling. - */ -static void transform_DST(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - emit2(t->Program, OPCODE_MUL, inst->SaturateMode, inst->DstReg, - swizzle(inst->SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE), - swizzle(inst->SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W)); -} - -static void transform_FLR(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - int tempreg = radeonFindFreeTemporary(t); - emit1(t->Program, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]); - emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, - inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg))); -} - -/** - * Definition of LIT (from ARB_fragment_program): - * - * tmp = VectorLoad(op0); - * if (tmp.x < 0) tmp.x = 0; - * if (tmp.y < 0) tmp.y = 0; - * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); - * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; - * result.x = 1.0; - * result.y = tmp.x; - * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; - * result.w = 1.0; - * - * The longest path of computation is the one leading to result.z, - * consisting of 5 operations. This implementation of LIT takes - * 5 slots, if the subsequent optimization passes are clever enough - * to pair instructions correctly. - */ -static void transform_LIT(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - static const GLfloat LitConst[4] = { -127.999999 }; - - GLuint constant; - GLuint constant_swizzle; - GLuint temp; - int needTemporary = 0; - struct prog_src_register srctemp; - - constant = _mesa_add_unnamed_constant(t->Program->Parameters, LitConst, 1, &constant_swizzle); - - if (inst->DstReg.WriteMask != WRITEMASK_XYZW) { - needTemporary = 1; - } else if (inst->DstReg.File != PROGRAM_TEMPORARY) { - // LIT is typically followed by DP3/DP4, so there's no point - // in creating special code for this case - needTemporary = 1; - } - - if (needTemporary) { - temp = radeonFindFreeTemporary(t); - } else { - temp = inst->DstReg.Index; - } - srctemp = srcreg(PROGRAM_TEMPORARY, temp); - - // tmp.x = max(0.0, Src.x); - // tmp.y = max(0.0, Src.y); - // tmp.w = clamp(Src.z, -128+eps, 128-eps); - emit2(t->Program, OPCODE_MAX, 0, - dstregtmpmask(temp, WRITEMASK_XYW), - inst->SrcReg[0], - swizzle(srcreg(PROGRAM_CONSTANT, constant), - SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3)); - emit2(t->Program, OPCODE_MIN, 0, - dstregtmpmask(temp, WRITEMASK_Z), - swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle))); - - // tmp.w = Pow(tmp.y, tmp.w) - emit1(t->Program, OPCODE_LG2, 0, - dstregtmpmask(temp, WRITEMASK_W), - swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); - emit2(t->Program, OPCODE_MUL, 0, - dstregtmpmask(temp, WRITEMASK_W), - swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)); - emit1(t->Program, OPCODE_EX2, 0, - dstregtmpmask(temp, WRITEMASK_W), - swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); - - // tmp.z = (tmp.x > 0) ? tmp.w : 0.0 - emit3(t->Program, OPCODE_CMP, inst->SaturateMode, - dstregtmpmask(temp, WRITEMASK_Z), - negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), - swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - builtin_zero); - - // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 - emit1(t->Program, OPCODE_MOV, inst->SaturateMode, - dstregtmpmask(temp, WRITEMASK_XYW), - swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE)); - - if (needTemporary) - emit1(t->Program, OPCODE_MOV, 0, inst->DstReg, srctemp); -} - -static void transform_LRP(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - int tempreg = radeonFindFreeTemporary(t); - - emit2(t->Program, OPCODE_ADD, 0, - dstreg(PROGRAM_TEMPORARY, tempreg), - inst->SrcReg[1], negate(inst->SrcReg[2])); - emit3(t->Program, OPCODE_MAD, inst->SaturateMode, - inst->DstReg, - inst->SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[2]); -} - -static void transform_POW(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - int tempreg = radeonFindFreeTemporary(t); - struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg); - struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg); - tempdst.WriteMask = WRITEMASK_W; - tempsrc.Swizzle = SWIZZLE_WWWW; - - emit1(t->Program, OPCODE_LG2, 0, tempdst, scalar(inst->SrcReg[0])); - emit2(t->Program, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->SrcReg[1])); - emit1(t->Program, OPCODE_EX2, inst->SaturateMode, inst->DstReg, tempsrc); -} - -static void transform_RSQ(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - emit1(t->Program, OPCODE_RSQ, inst->SaturateMode, inst->DstReg, absolute(inst->SrcReg[0])); -} - -static void transform_SGE(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - int tempreg = radeonFindFreeTemporary(t); - - emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); - emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg, - srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one); -} - -static void transform_SLT(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - int tempreg = radeonFindFreeTemporary(t); - - emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); - emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg, - srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero); -} - -static void transform_SUB(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1])); -} - -static void transform_SWZ(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, inst->SrcReg[0]); -} - -static void transform_XPD(struct radeon_transform_context* t, - struct prog_instruction* inst) -{ - int tempreg = radeonFindFreeTemporary(t); - - emit2(t->Program, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg), - swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), - swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W)); - emit3(t->Program, OPCODE_MAD, inst->SaturateMode, inst->DstReg, - swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W), - swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), - negate(srcreg(PROGRAM_TEMPORARY, tempreg))); -} - - -/** - * Can be used as a transformation for @ref radeonClauseLocalTransform, - * no userData necessary. - * - * Eliminates the following ALU instructions: - * ABS, DPH, DST, FLR, LIT, LRP, POW, SGE, SLT, SUB, SWZ, XPD - * using: - * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP - * - * Transforms RSQ to Radeon's native RSQ by explicitly setting - * absolute value. - * - * @note should be applicable to R300 and R500 fragment programs. - */ -GLboolean radeonTransformALU(struct radeon_transform_context* t, - struct prog_instruction* inst, - void* unused) -{ - switch(inst->Opcode) { - case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE; - case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE; - case OPCODE_DST: transform_DST(t, inst); return GL_TRUE; - case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE; - case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE; - case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE; - case OPCODE_POW: transform_POW(t, inst); return GL_TRUE; - case OPCODE_RSQ: transform_RSQ(t, inst); return GL_TRUE; - case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE; - case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE; - case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE; - case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE; - case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE; - default: - return GL_FALSE; - } -} - - -static void sincos_constants(struct radeon_transform_context* t, GLuint *constants) -{ - static const GLfloat SinCosConsts[2][4] = { - { - 1.273239545, // 4/PI - -0.405284735, // -4/(PI*PI) - 3.141592654, // PI - 0.2225 // weight - }, - { - 0.75, - 0.5, - 0.159154943, // 1/(2*PI) - 6.283185307 // 2*PI - } - }; - int i; - - for(i = 0; i < 2; ++i) { - GLuint swz; - constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz); - ASSERT(swz == SWIZZLE_NOOP); - } -} - -/** - * Approximate sin(x), where x is clamped to (-pi/2, pi/2). - * - * MUL tmp.xy, src, { 4/PI, -4/(PI^2) } - * MAD tmp.x, tmp.y, |src|, tmp.x - * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x - * MAD dest, tmp.y, weight, tmp.x - */ -static void sin_approx(struct radeon_transform_context* t, - struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants) -{ - GLuint tempreg = radeonFindFreeTemporary(t); - - emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - srcreg(PROGRAM_CONSTANT, constants[0])); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), - absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), - negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X))); - emit3(t->Program, OPCODE_MAD, 0, dst, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), - swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); -} - -/** - * Translate the trigonometric functions COS, SIN, and SCS - * using only the basic instructions - * MOV, ADD, MUL, MAD, FRC - */ -GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t, - struct prog_instruction* inst, - void* unused) -{ - if (inst->Opcode != OPCODE_COS && - inst->Opcode != OPCODE_SIN && - inst->Opcode != OPCODE_SCS) - return GL_FALSE; - - GLuint constants[2]; - GLuint tempreg = radeonFindFreeTemporary(t); - - sincos_constants(t, constants); - - if (inst->Opcode == OPCODE_COS) { - // MAD tmp.x, src, 1/(2*PI), 0.75 - // FRC tmp.x, tmp.x - // MAD tmp.z, tmp.x, 2*PI, -PI - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); - emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - - sin_approx(t, inst->DstReg, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - constants); - } else if (inst->Opcode == OPCODE_SIN) { - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); - emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - - sin_approx(t, inst->DstReg, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - constants); - } else { - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W)); - emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - srcreg(PROGRAM_TEMPORARY, tempreg)); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - srcreg(PROGRAM_TEMPORARY, tempreg), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - - struct prog_dst_register dst = inst->DstReg; - - dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X; - sin_approx(t, dst, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - constants); - - dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y; - sin_approx(t, dst, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), - constants); - } - - return GL_TRUE; -} - - -/** - * Transform the trigonometric functions COS, SIN, and SCS - * to include pre-scaling by 1/(2*PI) and taking the fractional - * part, so that the input to COS and SIN is always in the range [0,1). - * SCS is replaced by one COS and one SIN instruction. - * - * @warning This transformation implicitly changes the semantics of SIN and COS! - */ -GLboolean radeonTransformTrigScale(struct radeon_transform_context* t, - struct prog_instruction* inst, - void* unused) -{ - if (inst->Opcode != OPCODE_COS && - inst->Opcode != OPCODE_SIN && - inst->Opcode != OPCODE_SCS) - return GL_FALSE; - - static const GLfloat RCP_2PI[] = { 0.15915494309189535 }; - GLuint temp; - GLuint constant; - GLuint constant_swizzle; - - temp = radeonFindFreeTemporary(t); - constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle); - - emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W), - swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)); - emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W), - srcreg(PROGRAM_TEMPORARY, temp)); - - if (inst->Opcode == OPCODE_COS) { - emit1(t->Program, OPCODE_COS, inst->SaturateMode, inst->DstReg, - srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); - } else if (inst->Opcode == OPCODE_SIN) { - emit1(t->Program, OPCODE_SIN, inst->SaturateMode, - inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); - } else if (inst->Opcode == OPCODE_SCS) { - struct prog_dst_register moddst = inst->DstReg; - - if (inst->DstReg.WriteMask & WRITEMASK_X) { - moddst.WriteMask = WRITEMASK_X; - emit1(t->Program, OPCODE_COS, inst->SaturateMode, moddst, - srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); - } - if (inst->DstReg.WriteMask & WRITEMASK_Y) { - moddst.WriteMask = WRITEMASK_Y; - emit1(t->Program, OPCODE_SIN, inst->SaturateMode, moddst, - srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); - } - } - - return GL_TRUE; -} - -/** - * Rewrite DDX/DDY instructions to properly work with r5xx shaders. - * The r5xx MDH/MDV instruction provides per-quad partial derivatives. - * It takes the form A*B+C. A and C are set by setting src0. B should be -1. - * - * @warning This explicitly changes the form of DDX and DDY! - */ - -GLboolean radeonTransformDeriv(struct radeon_transform_context* t, - struct prog_instruction* inst, - void* unused) -{ - if (inst->Opcode != OPCODE_DDX && inst->Opcode != OPCODE_DDY) - return GL_FALSE; - - struct prog_src_register B = inst->SrcReg[1]; - - B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, - SWIZZLE_ONE, SWIZZLE_ONE); - B.Negate = NEGATE_XYZW; - - emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg, - inst->SrcReg[0], B); - - return GL_TRUE; -} diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.h b/src/mesa/drivers/dri/r300/radeon_program_alu.h deleted file mode 100644 index b45958115c..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_program_alu.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __RADEON_PROGRAM_ALU_H_ -#define __RADEON_PROGRAM_ALU_H_ - -#include "radeon_program.h" - -GLboolean radeonTransformALU( - struct radeon_transform_context *t, - struct prog_instruction*, - void*); - -GLboolean radeonTransformTrigSimple( - struct radeon_transform_context *t, - struct prog_instruction*, - void*); - -GLboolean radeonTransformTrigScale( - struct radeon_transform_context *t, - struct prog_instruction*, - void*); - -GLboolean radeonTransformDeriv( - struct radeon_transform_context *t, - struct prog_instruction*, - void*); - -#endif /* __RADEON_PROGRAM_ALU_H_ */ diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/radeon_program_pair.c deleted file mode 100644 index d6fb474cf2..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_program_pair.c +++ /dev/null @@ -1,1004 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * @file - * - * Perform temporary register allocation and attempt to pair off instructions - * in RGB and Alpha pairs. Also attempts to optimize the TEX instruction - * vs. ALU instruction scheduling. - */ - -#include "radeon_program_pair.h" - -#include "radeon_common.h" - -#include "shader/prog_print.h" - -#define error(fmt, args...) do { \ - _mesa_problem(s->Ctx, "%s::%s(): " fmt "\n", \ - __FILE__, __FUNCTION__, ##args); \ - s->Error = GL_TRUE; \ -} while(0) - -struct pair_state_instruction { - GLuint IsTex:1; /**< Is a texture instruction */ - GLuint NeedRGB:1; /**< Needs the RGB ALU */ - GLuint NeedAlpha:1; /**< Needs the Alpha ALU */ - GLuint IsTranscendent:1; /**< Is a special transcendent instruction */ - - /** - * Number of (read and write) dependencies that must be resolved before - * this instruction can be scheduled. - */ - GLuint NumDependencies:5; - - /** - * Next instruction in the linked list of ready instructions. - */ - struct pair_state_instruction *NextReady; - - /** - * Values that this instruction writes - */ - struct reg_value *Values[4]; -}; - - -/** - * Used to keep track of which instructions read a value. - */ -struct reg_value_reader { - GLuint IP; /**< IP of the instruction that performs this access */ - struct reg_value_reader *Next; -}; - -/** - * Used to keep track which values are stored in each component of a - * PROGRAM_TEMPORARY. - */ -struct reg_value { - GLuint IP; /**< IP of the instruction that writes this value */ - struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */ - - /** - * Unordered linked list of instructions that read from this value. - */ - struct reg_value_reader *Readers; - - /** - * Number of readers of this value. This is calculated during @ref scan_instructions - * and continually decremented during code emission. - * When this count reaches zero, the instruction that writes the @ref Next value - * can be scheduled. - */ - GLuint NumReaders; -}; - -/** - * Used to translate a PROGRAM_INPUT or PROGRAM_TEMPORARY Mesa register - * to the proper hardware temporary. - */ -struct pair_register_translation { - GLuint Allocated:1; - GLuint HwIndex:8; - GLuint RefCount:23; /**< # of times this occurs in an unscheduled instruction SrcReg or DstReg */ - - /** - * Notes the value that is currently contained in each component - * (only used for PROGRAM_TEMPORARY registers). - */ - struct reg_value *Value[4]; -}; - -struct pair_state { - GLcontext *Ctx; - struct gl_program *Program; - const struct radeon_pair_handler *Handler; - GLboolean Error; - GLboolean Debug; - GLboolean Verbose; - void *UserData; - - /** - * Translate Mesa registers to hardware registers - */ - struct pair_register_translation Inputs[FRAG_ATTRIB_MAX]; - struct pair_register_translation Temps[MAX_PROGRAM_TEMPS]; - - /** - * Derived information about program instructions. - */ - struct pair_state_instruction *Instructions; - - struct { - GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */ - } HwTemps[128]; - - /** - * Linked list of instructions that can be scheduled right now, - * based on which ALU/TEX resources they require. - */ - struct pair_state_instruction *ReadyFullALU; - struct pair_state_instruction *ReadyRGB; - struct pair_state_instruction *ReadyAlpha; - struct pair_state_instruction *ReadyTEX; - - /** - * Pool of @ref reg_value structures for fast allocation. - */ - struct reg_value *ValuePool; - GLuint ValuePoolUsed; - struct reg_value_reader *ReaderPool; - GLuint ReaderPoolUsed; -}; - - -static struct pair_register_translation *get_register(struct pair_state *s, GLuint file, GLuint index) -{ - switch(file) { - case PROGRAM_TEMPORARY: return &s->Temps[index]; - case PROGRAM_INPUT: return &s->Inputs[index]; - default: return 0; - } -} - -static void alloc_hw_reg(struct pair_state *s, GLuint file, GLuint index, GLuint hwindex) -{ - struct pair_register_translation *t = get_register(s, file, index); - ASSERT(!s->HwTemps[hwindex].RefCount); - ASSERT(!t->Allocated); - s->HwTemps[hwindex].RefCount = t->RefCount; - t->Allocated = 1; - t->HwIndex = hwindex; -} - -static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index) -{ - GLuint hwindex; - - struct pair_register_translation *t = get_register(s, file, index); - if (!t) { - _mesa_problem(s->Ctx, "get_hw_reg: %i[%i]\n", file, index); - return 0; - } - - if (t->Allocated) - return t->HwIndex; - - for(hwindex = 0; hwindex < s->Handler->MaxHwTemps; ++hwindex) - if (!s->HwTemps[hwindex].RefCount) - break; - - if (hwindex >= s->Handler->MaxHwTemps) { - error("Ran out of hardware temporaries"); - return 0; - } - - alloc_hw_reg(s, file, index, hwindex); - return hwindex; -} - - -static void deref_hw_reg(struct pair_state *s, GLuint hwindex) -{ - if (!s->HwTemps[hwindex].RefCount) { - error("Hwindex %i refcount error", hwindex); - return; - } - - s->HwTemps[hwindex].RefCount--; -} - -static void add_pairinst_to_list(struct pair_state_instruction **list, struct pair_state_instruction *pairinst) -{ - pairinst->NextReady = *list; - *list = pairinst; -} - -/** - * The instruction at the given IP has become ready. Link it into the ready - * instructions. - */ -static void instruction_ready(struct pair_state *s, int ip) -{ - struct pair_state_instruction *pairinst = s->Instructions + ip; - - if (s->Verbose) - _mesa_printf("instruction_ready(%i)\n", ip); - - if (pairinst->IsTex) - add_pairinst_to_list(&s->ReadyTEX, pairinst); - else if (!pairinst->NeedAlpha) - add_pairinst_to_list(&s->ReadyRGB, pairinst); - else if (!pairinst->NeedRGB) - add_pairinst_to_list(&s->ReadyAlpha, pairinst); - else - add_pairinst_to_list(&s->ReadyFullALU, pairinst); -} - - -/** - * Finally rewrite ADD, MOV, MUL as the appropriate native instruction - * and reverse the order of arguments for CMP. - */ -static void final_rewrite(struct pair_state *s, struct prog_instruction *inst) -{ - struct prog_src_register tmp; - - switch(inst->Opcode) { - case OPCODE_ADD: - inst->SrcReg[2] = inst->SrcReg[1]; - inst->SrcReg[1].File = PROGRAM_BUILTIN; - inst->SrcReg[1].Swizzle = SWIZZLE_1111; - inst->SrcReg[1].Negate = NEGATE_NONE; - inst->Opcode = OPCODE_MAD; - break; - case OPCODE_CMP: - tmp = inst->SrcReg[2]; - inst->SrcReg[2] = inst->SrcReg[0]; - inst->SrcReg[0] = tmp; - break; - case OPCODE_MOV: - /* AMD say we should use CMP. - * However, when we transform - * KIL -r0; - * into - * CMP tmp, -r0, -r0, 0; - * KIL tmp; - * we get incorrect behaviour on R500 when r0 == 0.0. - * It appears that the R500 KIL hardware treats -0.0 as less - * than zero. - */ - inst->SrcReg[1].File = PROGRAM_BUILTIN; - inst->SrcReg[1].Swizzle = SWIZZLE_1111; - inst->SrcReg[2].File = PROGRAM_BUILTIN; - inst->SrcReg[2].Swizzle = SWIZZLE_0000; - inst->Opcode = OPCODE_MAD; - break; - case OPCODE_MUL: - inst->SrcReg[2].File = PROGRAM_BUILTIN; - inst->SrcReg[2].Swizzle = SWIZZLE_0000; - inst->Opcode = OPCODE_MAD; - break; - default: - /* nothing to do */ - break; - } -} - - -/** - * Classify an instruction according to which ALUs etc. it needs - */ -static void classify_instruction(struct pair_state *s, - struct prog_instruction *inst, struct pair_state_instruction *pairinst) -{ - pairinst->NeedRGB = (inst->DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0; - pairinst->NeedAlpha = (inst->DstReg.WriteMask & WRITEMASK_W) ? 1 : 0; - - switch(inst->Opcode) { - case OPCODE_ADD: - case OPCODE_CMP: - case OPCODE_DDX: - case OPCODE_DDY: - case OPCODE_FRC: - case OPCODE_MAD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MOV: - case OPCODE_MUL: - break; - case OPCODE_COS: - case OPCODE_EX2: - case OPCODE_LG2: - case OPCODE_RCP: - case OPCODE_RSQ: - case OPCODE_SIN: - pairinst->IsTranscendent = 1; - pairinst->NeedAlpha = 1; - break; - case OPCODE_DP4: - pairinst->NeedAlpha = 1; - /* fall through */ - case OPCODE_DP3: - pairinst->NeedRGB = 1; - break; - case OPCODE_KIL: - case OPCODE_TEX: - case OPCODE_TXB: - case OPCODE_TXP: - case OPCODE_END: - pairinst->IsTex = 1; - break; - default: - error("Unknown opcode %d\n", inst->Opcode); - break; - } -} - - -/** - * Count which (input, temporary) register is read and written how often, - * and scan the instruction stream to find dependencies. - */ -static void scan_instructions(struct pair_state *s) -{ - struct prog_instruction *inst; - struct pair_state_instruction *pairinst; - GLuint ip; - - for(inst = s->Program->Instructions, pairinst = s->Instructions, ip = 0; - inst->Opcode != OPCODE_END; - ++inst, ++pairinst, ++ip) { - final_rewrite(s, inst); - classify_instruction(s, inst, pairinst); - - int nsrc = _mesa_num_inst_src_regs(inst->Opcode); - int j; - for(j = 0; j < nsrc; j++) { - struct pair_register_translation *t = - get_register(s, inst->SrcReg[j].File, inst->SrcReg[j].Index); - if (!t) - continue; - - t->RefCount++; - - if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { - int i; - for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, i); - if (swz >= 4) - continue; /* constant or NIL swizzle */ - if (!t->Value[swz]) - continue; /* this is an undefined read */ - - /* Do not add a dependency if this instruction - * also rewrites the value. The code below adds - * a dependency for the DstReg, which is a superset - * of the SrcReg dependency. */ - if (inst->DstReg.File == PROGRAM_TEMPORARY && - inst->DstReg.Index == inst->SrcReg[j].Index && - GET_BIT(inst->DstReg.WriteMask, swz)) - continue; - - struct reg_value_reader* r = &s->ReaderPool[s->ReaderPoolUsed++]; - pairinst->NumDependencies++; - t->Value[swz]->NumReaders++; - r->IP = ip; - r->Next = t->Value[swz]->Readers; - t->Value[swz]->Readers = r; - } - } - } - - int ndst = _mesa_num_inst_dst_regs(inst->Opcode); - if (ndst) { - struct pair_register_translation *t = - get_register(s, inst->DstReg.File, inst->DstReg.Index); - if (t) { - t->RefCount++; - - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - int j; - for(j = 0; j < 4; ++j) { - if (!GET_BIT(inst->DstReg.WriteMask, j)) - continue; - - struct reg_value* v = &s->ValuePool[s->ValuePoolUsed++]; - v->IP = ip; - if (t->Value[j]) { - pairinst->NumDependencies++; - t->Value[j]->Next = v; - } - t->Value[j] = v; - pairinst->Values[j] = v; - } - } - } - } - - if (s->Verbose) - _mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies); - - if (!pairinst->NumDependencies) - instruction_ready(s, ip); - } - - /* Clear the PROGRAM_TEMPORARY state */ - int i, j; - for(i = 0; i < MAX_PROGRAM_TEMPS; ++i) { - for(j = 0; j < 4; ++j) - s->Temps[i].Value[j] = 0; - } -} - - -/** - * Reserve hardware temporary registers for the program inputs. - * - * @note This allocation is performed explicitly, because the order of inputs - * is determined by the RS hardware. - */ -static void allocate_input_registers(struct pair_state *s) -{ - GLuint InputsRead = s->Program->InputsRead; - int i; - GLuint hwindex = 0; - - /* Primary colour */ - if (InputsRead & FRAG_BIT_COL0) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL0, hwindex++); - InputsRead &= ~FRAG_BIT_COL0; - - /* Secondary color */ - if (InputsRead & FRAG_BIT_COL1) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL1, hwindex++); - InputsRead &= ~FRAG_BIT_COL1; - - /* Texcoords */ - for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) { - if (InputsRead & (FRAG_BIT_TEX0 << i)) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++); - } - InputsRead &= ~FRAG_BITS_TEX_ANY; - - /* Fogcoords treated as a texcoord */ - if (InputsRead & FRAG_BIT_FOGC) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_FOGC, hwindex++); - InputsRead &= ~FRAG_BIT_FOGC; - - /* fragment position treated as a texcoord */ - if (InputsRead & FRAG_BIT_WPOS) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++); - InputsRead &= ~FRAG_BIT_WPOS; - - /* Anything else */ - if (InputsRead) - error("Don't know how to handle inputs 0x%x\n", InputsRead); -} - - -static void decrement_dependencies(struct pair_state *s, int ip) -{ - struct pair_state_instruction *pairinst = s->Instructions + ip; - ASSERT(pairinst->NumDependencies > 0); - if (!--pairinst->NumDependencies) - instruction_ready(s, ip); -} - -/** - * Update the dependency tracking state based on what the instruction - * at the given IP does. - */ -static void commit_instruction(struct pair_state *s, int ip) -{ - struct prog_instruction *inst = s->Program->Instructions + ip; - struct pair_state_instruction *pairinst = s->Instructions + ip; - - if (s->Verbose) - _mesa_printf("commit_instruction(%i)\n", ip); - - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - struct pair_register_translation *t = &s->Temps[inst->DstReg.Index]; - deref_hw_reg(s, t->HwIndex); - - int i; - for(i = 0; i < 4; ++i) { - if (!GET_BIT(inst->DstReg.WriteMask, i)) - continue; - - t->Value[i] = pairinst->Values[i]; - if (t->Value[i]->NumReaders) { - struct reg_value_reader *r; - for(r = pairinst->Values[i]->Readers; r; r = r->Next) - decrement_dependencies(s, r->IP); - } else if (t->Value[i]->Next) { - /* This happens when the only reader writes - * the register at the same time */ - decrement_dependencies(s, t->Value[i]->Next->IP); - } - } - } - - int nsrc = _mesa_num_inst_src_regs(inst->Opcode); - int i; - for(i = 0; i < nsrc; i++) { - struct pair_register_translation *t = get_register(s, inst->SrcReg[i].File, inst->SrcReg[i].Index); - if (!t) - continue; - - deref_hw_reg(s, get_hw_reg(s, inst->SrcReg[i].File, inst->SrcReg[i].Index)); - - if (inst->SrcReg[i].File != PROGRAM_TEMPORARY) - continue; - - int j; - for(j = 0; j < 4; ++j) { - GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); - if (swz >= 4) - continue; - if (!t->Value[swz]) - continue; - - /* Do not free a dependency if this instruction - * also rewrites the value. See scan_instructions. */ - if (inst->DstReg.File == PROGRAM_TEMPORARY && - inst->DstReg.Index == inst->SrcReg[i].Index && - GET_BIT(inst->DstReg.WriteMask, swz)) - continue; - - if (!--t->Value[swz]->NumReaders) { - if (t->Value[swz]->Next) - decrement_dependencies(s, t->Value[swz]->Next->IP); - } - } - } -} - - -/** - * Emit all ready texture instructions in a single block. - * - * Emit as a single block to (hopefully) sample many textures in parallel, - * and to avoid hardware indirections on R300. - * - * In R500, we don't really know when the result of a texture instruction - * arrives. So allocate all destinations first, to make sure they do not - * arrive early and overwrite a texture coordinate we're going to use later - * in the block. - */ -static void emit_all_tex(struct pair_state *s) -{ - struct pair_state_instruction *readytex; - struct pair_state_instruction *pairinst; - - ASSERT(s->ReadyTEX); - - // Don't let the ready list change under us! - readytex = s->ReadyTEX; - s->ReadyTEX = 0; - - // Allocate destination hardware registers in one block to avoid conflicts. - for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { - int ip = pairinst - s->Instructions; - struct prog_instruction *inst = s->Program->Instructions + ip; - if (inst->Opcode != OPCODE_KIL) - get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); - } - - if (s->Debug) - _mesa_printf(" BEGIN_TEX\n"); - - if (s->Handler->BeginTexBlock) - s->Error = s->Error || !s->Handler->BeginTexBlock(s->UserData); - - for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { - int ip = pairinst - s->Instructions; - struct prog_instruction *inst = s->Program->Instructions + ip; - commit_instruction(s, ip); - - if (inst->Opcode != OPCODE_KIL) - inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); - inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index); - - if (s->Debug) { - _mesa_printf(" "); - _mesa_print_instruction(inst); - fflush(stdout); - } - s->Error = s->Error || !s->Handler->EmitTex(s->UserData, inst); - } - - if (s->Debug) - _mesa_printf(" END_TEX\n"); -} - - -static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instruction *pair, - struct prog_src_register src, GLboolean rgb, GLboolean alpha) -{ - int candidate = -1; - int candidate_quality = -1; - int i; - - if (!rgb && !alpha) - return 0; - - GLuint constant; - GLuint index; - - if (src.File == PROGRAM_TEMPORARY || src.File == PROGRAM_INPUT) { - constant = 0; - index = get_hw_reg(s, src.File, src.Index); - } else { - constant = 1; - s->Error |= !s->Handler->EmitConst(s->UserData, src.File, src.Index, &index); - } - - for(i = 0; i < 3; ++i) { - int q = 0; - if (rgb) { - if (pair->RGB.Src[i].Used) { - if (pair->RGB.Src[i].Constant != constant || - pair->RGB.Src[i].Index != index) - continue; - q++; - } - } - if (alpha) { - if (pair->Alpha.Src[i].Used) { - if (pair->Alpha.Src[i].Constant != constant || - pair->Alpha.Src[i].Index != index) - continue; - q++; - } - } - if (q > candidate_quality) { - candidate_quality = q; - candidate = i; - } - } - - if (candidate >= 0) { - if (rgb) { - pair->RGB.Src[candidate].Used = 1; - pair->RGB.Src[candidate].Constant = constant; - pair->RGB.Src[candidate].Index = index; - } - if (alpha) { - pair->Alpha.Src[candidate].Used = 1; - pair->Alpha.Src[candidate].Constant = constant; - pair->Alpha.Src[candidate].Index = index; - } - } - - return candidate; -} - -/** - * Fill the given ALU instruction's opcodes and source operands into the given pair, - * if possible. - */ -static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip) -{ - struct pair_state_instruction *pairinst = s->Instructions + ip; - struct prog_instruction *inst = s->Program->Instructions + ip; - - ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP); - ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP); - - if (pairinst->NeedRGB) { - if (pairinst->IsTranscendent) - pair->RGB.Opcode = OPCODE_REPL_ALPHA; - else - pair->RGB.Opcode = inst->Opcode; - if (inst->SaturateMode == SATURATE_ZERO_ONE) - pair->RGB.Saturate = 1; - } - if (pairinst->NeedAlpha) { - pair->Alpha.Opcode = inst->Opcode; - if (inst->SaturateMode == SATURATE_ZERO_ONE) - pair->Alpha.Saturate = 1; - } - - int nargs = _mesa_num_inst_src_regs(inst->Opcode); - int i; - - /* Special case for DDX/DDY (MDH/MDV). */ - if (inst->Opcode == OPCODE_DDX || inst->Opcode == OPCODE_DDY) { - if (pair->RGB.Src[0].Used || pair->Alpha.Src[0].Used) - return GL_FALSE; - else - nargs++; - } - - for(i = 0; i < nargs; ++i) { - int source; - if (pairinst->NeedRGB && !pairinst->IsTranscendent) { - GLboolean srcrgb = GL_FALSE; - GLboolean srcalpha = GL_FALSE; - int j; - for(j = 0; j < 3; ++j) { - GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); - if (swz < 3) - srcrgb = GL_TRUE; - else if (swz < 4) - srcalpha = GL_TRUE; - } - source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); - if (source < 0) - return GL_FALSE; - pair->RGB.Arg[i].Source = source; - pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; - pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (NEGATE_X | NEGATE_Y | NEGATE_Z)); - } - if (pairinst->NeedAlpha) { - GLboolean srcrgb = GL_FALSE; - GLboolean srcalpha = GL_FALSE; - GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3); - if (swz < 3) - srcrgb = GL_TRUE; - else if (swz < 4) - srcalpha = GL_TRUE; - source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); - if (source < 0) - return GL_FALSE; - pair->Alpha.Arg[i].Source = source; - pair->Alpha.Arg[i].Swizzle = swz; - pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & NEGATE_W); - } - } - - return GL_TRUE; -} - - -/** - * Fill in the destination register information. - * - * This is split from filling in source registers because we want - * to avoid allocating hardware temporaries for destinations until - * we are absolutely certain that we're going to emit a certain - * instruction pairing. - */ -static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip) -{ - struct pair_state_instruction *pairinst = s->Instructions + ip; - struct prog_instruction *inst = s->Program->Instructions + ip; - - if (inst->DstReg.File == PROGRAM_OUTPUT) { - if (inst->DstReg.Index == FRAG_RESULT_COLOR) { - pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; - pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } else if (inst->DstReg.Index == FRAG_RESULT_DEPTH) { - pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } - } else { - GLuint hwindex = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); - if (pairinst->NeedRGB) { - pair->RGB.DestIndex = hwindex; - pair->RGB.WriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; - } - if (pairinst->NeedAlpha) { - pair->Alpha.DestIndex = hwindex; - pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } - } -} - - -/** - * Find a good ALU instruction or pair of ALU instruction and emit it. - * - * Prefer emitting full ALU instructions, so that when we reach a point - * where no full ALU instruction can be emitted, we have more candidates - * for RGB/Alpha pairing. - */ -static void emit_alu(struct pair_state *s) -{ - struct radeon_pair_instruction pair; - - if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) { - int ip; - if (s->ReadyFullALU) { - ip = s->ReadyFullALU - s->Instructions; - s->ReadyFullALU = s->ReadyFullALU->NextReady; - } else if (s->ReadyRGB) { - ip = s->ReadyRGB - s->Instructions; - s->ReadyRGB = s->ReadyRGB->NextReady; - } else { - ip = s->ReadyAlpha - s->Instructions; - s->ReadyAlpha = s->ReadyAlpha->NextReady; - } - - _mesa_bzero(&pair, sizeof(pair)); - fill_instruction_into_pair(s, &pair, ip); - fill_dest_into_pair(s, &pair, ip); - commit_instruction(s, ip); - } else { - struct pair_state_instruction **prgb; - struct pair_state_instruction **palpha; - - /* Some pairings might fail because they require too - * many source slots; try all possible pairings if necessary */ - for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { - for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { - int rgbip = *prgb - s->Instructions; - int alphaip = *palpha - s->Instructions; - _mesa_bzero(&pair, sizeof(pair)); - fill_instruction_into_pair(s, &pair, rgbip); - if (!fill_instruction_into_pair(s, &pair, alphaip)) - continue; - *prgb = (*prgb)->NextReady; - *palpha = (*palpha)->NextReady; - fill_dest_into_pair(s, &pair, rgbip); - fill_dest_into_pair(s, &pair, alphaip); - commit_instruction(s, rgbip); - commit_instruction(s, alphaip); - goto success; - } - } - - /* No success in pairing; just take the first RGB instruction */ - int ip = s->ReadyRGB - s->Instructions; - s->ReadyRGB = s->ReadyRGB->NextReady; - _mesa_bzero(&pair, sizeof(pair)); - fill_instruction_into_pair(s, &pair, ip); - fill_dest_into_pair(s, &pair, ip); - commit_instruction(s, ip); - success: ; - } - - if (s->Debug) - radeonPrintPairInstruction(&pair); - - s->Error = s->Error || !s->Handler->EmitPaired(s->UserData, &pair); -} - - -GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, - const struct radeon_pair_handler* handler, void *userdata) -{ - struct pair_state s; - - _mesa_bzero(&s, sizeof(s)); - s.Ctx = ctx; - s.Program = _mesa_clone_program(ctx, program); - s.Handler = handler; - s.UserData = userdata; - s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE; - s.Verbose = GL_FALSE && s.Debug; - - s.Instructions = (struct pair_state_instruction*)_mesa_calloc( - sizeof(struct pair_state_instruction)*s.Program->NumInstructions); - s.ValuePool = (struct reg_value*)_mesa_calloc(sizeof(struct reg_value)*s.Program->NumInstructions*4); - s.ReaderPool = (struct reg_value_reader*)_mesa_calloc( - sizeof(struct reg_value_reader)*s.Program->NumInstructions*12); - - if (s.Debug) - _mesa_printf("Emit paired program\n"); - - scan_instructions(&s); - allocate_input_registers(&s); - - while(!s.Error && - (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { - if (s.ReadyTEX) - emit_all_tex(&s); - - while(s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha) - emit_alu(&s); - } - - if (s.Debug) - _mesa_printf(" END\n"); - - _mesa_free(s.Instructions); - _mesa_free(s.ValuePool); - _mesa_free(s.ReaderPool); - - _mesa_reference_program(ctx, &s.Program, NULL); - - return !s.Error; -} - - -static void print_pair_src(int i, struct radeon_pair_instruction_source* src) -{ - _mesa_printf(" Src%i = %s[%i]", i, src->Constant ? "CNST" : "TEMP", src->Index); -} - -static const char* opcode_string(GLuint opcode) -{ - if (opcode == OPCODE_REPL_ALPHA) - return "SOP"; - else - return _mesa_opcode_string(opcode); -} - -static int num_pairinst_args(GLuint opcode) -{ - if (opcode == OPCODE_REPL_ALPHA) - return 0; - else - return _mesa_num_inst_src_regs(opcode); -} - -static char swizzle_char(GLuint swz) -{ - switch(swz) { - case SWIZZLE_X: return 'x'; - case SWIZZLE_Y: return 'y'; - case SWIZZLE_Z: return 'z'; - case SWIZZLE_W: return 'w'; - case SWIZZLE_ZERO: return '0'; - case SWIZZLE_ONE: return '1'; - case SWIZZLE_NIL: return '_'; - default: return '?'; - } -} - -void radeonPrintPairInstruction(struct radeon_pair_instruction *inst) -{ - int nargs; - int i; - - _mesa_printf(" RGB: "); - for(i = 0; i < 3; ++i) { - if (inst->RGB.Src[i].Used) - print_pair_src(i, inst->RGB.Src + i); - } - _mesa_printf("\n"); - _mesa_printf(" Alpha:"); - for(i = 0; i < 3; ++i) { - if (inst->Alpha.Src[i].Used) - print_pair_src(i, inst->Alpha.Src + i); - } - _mesa_printf("\n"); - - _mesa_printf(" %s%s", opcode_string(inst->RGB.Opcode), inst->RGB.Saturate ? "_SAT" : ""); - if (inst->RGB.WriteMask) - _mesa_printf(" TEMP[%i].%s%s%s", inst->RGB.DestIndex, - (inst->RGB.WriteMask & 1) ? "x" : "", - (inst->RGB.WriteMask & 2) ? "y" : "", - (inst->RGB.WriteMask & 4) ? "z" : ""); - if (inst->RGB.OutputWriteMask) - _mesa_printf(" COLOR.%s%s%s", - (inst->RGB.OutputWriteMask & 1) ? "x" : "", - (inst->RGB.OutputWriteMask & 2) ? "y" : "", - (inst->RGB.OutputWriteMask & 4) ? "z" : ""); - nargs = num_pairinst_args(inst->RGB.Opcode); - for(i = 0; i < nargs; ++i) { - const char* abs = inst->RGB.Arg[i].Abs ? "|" : ""; - const char* neg = inst->RGB.Arg[i].Negate ? "-" : ""; - _mesa_printf(", %s%sSrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[i].Source, - swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 0)), - swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 1)), - swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 2)), - abs); - } - _mesa_printf("\n"); - - _mesa_printf(" %s%s", opcode_string(inst->Alpha.Opcode), inst->Alpha.Saturate ? "_SAT" : ""); - if (inst->Alpha.WriteMask) - _mesa_printf(" TEMP[%i].w", inst->Alpha.DestIndex); - if (inst->Alpha.OutputWriteMask) - _mesa_printf(" COLOR.w"); - if (inst->Alpha.DepthWriteMask) - _mesa_printf(" DEPTH.w"); - nargs = num_pairinst_args(inst->Alpha.Opcode); - for(i = 0; i < nargs; ++i) { - const char* abs = inst->Alpha.Arg[i].Abs ? "|" : ""; - const char* neg = inst->Alpha.Arg[i].Negate ? "-" : ""; - _mesa_printf(", %s%sSrc%i.%c%s", neg, abs, inst->Alpha.Arg[i].Source, - swizzle_char(inst->Alpha.Arg[i].Swizzle), abs); - } - _mesa_printf("\n"); -} diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.h b/src/mesa/drivers/dri/r300/radeon_program_pair.h deleted file mode 100644 index 4624a24629..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_program_pair.h +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __RADEON_PROGRAM_PAIR_H_ -#define __RADEON_PROGRAM_PAIR_H_ - -#include "radeon_program.h" - - -/** - * Represents a paired instruction, as found in R300 and R500 - * fragment programs. - */ -struct radeon_pair_instruction_source { - GLuint Index:8; - GLuint Constant:1; - GLuint Used:1; -}; - -struct radeon_pair_instruction_rgb { - GLuint Opcode:8; - GLuint DestIndex:8; - GLuint WriteMask:3; - GLuint OutputWriteMask:3; - GLuint Saturate:1; - - struct radeon_pair_instruction_source Src[3]; - - struct { - GLuint Source:2; - GLuint Swizzle:9; - GLuint Abs:1; - GLuint Negate:1; - } Arg[3]; -}; - -struct radeon_pair_instruction_alpha { - GLuint Opcode:8; - GLuint DestIndex:8; - GLuint WriteMask:1; - GLuint OutputWriteMask:1; - GLuint DepthWriteMask:1; - GLuint Saturate:1; - - struct radeon_pair_instruction_source Src[3]; - - struct { - GLuint Source:2; - GLuint Swizzle:3; - GLuint Abs:1; - GLuint Negate:1; - } Arg[3]; -}; - -struct radeon_pair_instruction { - struct radeon_pair_instruction_rgb RGB; - struct radeon_pair_instruction_alpha Alpha; -}; - - -/** - * - */ -struct radeon_pair_handler { - /** - * Fill in the proper hardware index for the given constant register. - * - * @return GL_FALSE on error. - */ - GLboolean (*EmitConst)(void*, GLuint file, GLuint index, GLuint *hwindex); - - /** - * Write a paired instruction to the hardware. - * - * @return GL_FALSE on error. - */ - GLboolean (*EmitPaired)(void*, struct radeon_pair_instruction*); - - /** - * Write a texture instruction to the hardware. - * Register indices have already been rewritten to the allocated - * hardware register numbers. - * - * @return GL_FALSE on error. - */ - GLboolean (*EmitTex)(void*, struct prog_instruction*); - - /** - * Called before a block of contiguous, independent texture - * instructions is emitted. - */ - GLboolean (*BeginTexBlock)(void*); - - GLuint MaxHwTemps; -}; - -GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, - const struct radeon_pair_handler*, void *userdata); - -void radeonPrintPairInstruction(struct radeon_pair_instruction *inst); - -#endif /* __RADEON_PROGRAM_PAIR_H_ */ diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 118e74008f..5f1af5b0da 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -57,7 +57,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_tex.h" #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) #include "r300_context.h" -#include "r300_fragprog.h" #include "r300_tex.h" #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) #include "r600_context.h" @@ -150,6 +149,9 @@ extern const struct dri_extension point_extensions[]; #elif RADEON_COMMON && (defined(RADEON_COMMON_FOR_R300) || defined(RADEON_COMMON_FOR_R600)) +#define DRI_CONF_FP_OPTIMIZATION_SPEED 0 +#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1 + /* TODO: integrate these into xmlpool.h! */ #define DRI_CONF_MAX_TEXTURE_IMAGE_UNITS(def,min,max) \ DRI_CONF_OPT_BEGIN_V(texture_image_units,int,def, # min ":" # max ) \ -- cgit v1.2.3 From e629c50e2be03144e8aeef3c51624ae8db9957b8 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 28 Jul 2009 17:59:54 -0400 Subject: r600: implement texture border color --- src/mesa/drivers/dri/r600/r600_tex.c | 14 ++++++-------- src/mesa/drivers/dri/r600/r700_render.c | 9 +++++++++ src/mesa/drivers/dri/radeon/radeon_common_context.h | 5 +++++ 3 files changed, 20 insertions(+), 8 deletions(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c index a1e8945390..444024ee7e 100644 --- a/src/mesa/drivers/dri/r600/r600_tex.c +++ b/src/mesa/drivers/dri/r600/r600_tex.c @@ -269,14 +269,12 @@ static void r600SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloa static void r600SetTexBorderColor(radeonTexObjPtr t, const GLfloat color[4]) { -#if 0 - GLubyte c[4]; - CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]); - CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]); - CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]); - CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]); - t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]); -#endif + t->TD_PS_SAMPLER0_BORDER_ALPHA = *((uint32_t*)&(color[3])); + t->TD_PS_SAMPLER0_BORDER_RED = *((uint32_t*)&(color[2])); + t->TD_PS_SAMPLER0_BORDER_GREEN = *((uint32_t*)&(color[1])); + t->TD_PS_SAMPLER0_BORDER_BLUE = *((uint32_t*)&(color[0])); + SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_BORDER_COLOR_REGISTER, + BORDER_COLOR_TYPE_shift, BORDER_COLOR_TYPE_mask); } /** diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index f2fec27eff..4e0d5391d0 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -187,6 +187,15 @@ GLboolean r700SendTextureState(context_t *context) R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1); R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2); END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(2 + 4); + R600_OUT_BATCH_REGSEQ((TD_PS_SAMPLER0_BORDER_RED + (i * 16)), 4); + R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_RED); + R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_GREEN); + R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_BLUE); + R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_ALPHA); + END_BATCH(); + COMMIT_BATCH(); } } diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h index e4a8da0596..0cdacb1c36 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h @@ -239,6 +239,11 @@ struct radeon_tex_obj { GLuint SQ_TEX_SAMPLER1; GLuint SQ_TEX_SAMPLER2; + GLuint TD_PS_SAMPLER0_BORDER_RED; + GLuint TD_PS_SAMPLER0_BORDER_GREEN; + GLuint TD_PS_SAMPLER0_BORDER_BLUE; + GLuint TD_PS_SAMPLER0_BORDER_ALPHA; + GLboolean border_fallback; -- cgit v1.2.3 From b116f57bacb79205a1f80c7055964c60b402a19d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 29 Jul 2009 18:06:20 -0400 Subject: r600: fix texture pitch alignment fixes texwrap --- src/mesa/drivers/dri/r600/r600_texstate.c | 6 ++++-- src/mesa/drivers/dri/radeon/radeon_common_context.c | 10 +++++++++- src/mesa/drivers/dri/radeon/radeon_common_context.h | 2 ++ src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c | 9 ++++++--- 4 files changed, 21 insertions(+), 6 deletions(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index 70dd540481..082bfd75f6 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -556,7 +556,7 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex radeonTexObj *t = radeon_tex_obj(texObj); const struct gl_texture_image *firstImage; int firstlevel = t->mt ? t->mt->firstLevel : 0; - GLuint uTexelPitch; + GLuint uTexelPitch, row_align;; firstImage = t->base.Image[0][firstlevel]; @@ -595,7 +595,9 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex return; } - uTexelPitch = (firstImage->Width + R700_TEXEL_PITCH_ALIGNMENT_MASK) + row_align = rmesa->radeon.texture_row_align - 1; + uTexelPitch = ((firstImage->Width * t->mt->bpp + row_align) & ~row_align) / t->mt->bpp; + uTexelPitch = (uTexelPitch + R700_TEXEL_PITCH_ALIGNMENT_MASK) & ~R700_TEXEL_PITCH_ALIGNMENT_MASK; /* min pitch is 8 */ diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index a50cd056e1..4e4eba5d94 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -241,7 +241,15 @@ GLboolean radeonInitContext(radeonContextPtr radeon, radeon->texture_depth = ( glVisual->rgbBits > 16 ) ? DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16; - radeon->texture_row_align = 32; + if (IS_R600_CLASS(radeon->radeonScreen)) { + radeon->texture_row_align = 256; + radeon->texture_rect_row_align = 256; + radeon->texture_compressed_row_align = 256; + } else { + radeon->texture_row_align = 32; + radeon->texture_rect_row_align = 64; + radeon->texture_compressed_row_align = 64; + } return GL_TRUE; } diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h index 0cdacb1c36..cd1986e1fc 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h @@ -429,6 +429,8 @@ struct radeon_context { int texture_depth; float initialMaxAnisotropy; uint32_t texture_row_align; + uint32_t texture_rect_row_align; + uint32_t texture_compressed_row_align; struct radeon_dma dma; struct radeon_hw_state hw; diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c index f04a07fecd..071a18e7d8 100644 --- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c +++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c @@ -90,16 +90,18 @@ static void compute_tex_image_offset(radeonContextPtr rmesa, radeon_mipmap_tree GLuint face, GLuint level, GLuint* curOffset) { radeon_mipmap_level *lvl = &mt->levels[level]; - uint32_t row_align = rmesa->texture_row_align - 1; + uint32_t row_align; /* Find image size in bytes */ if (mt->compressed) { /* TODO: Is this correct? Need test cases for compressed textures! */ - lvl->rowstride = (lvl->width * mt->bpp + 63) & ~63; + row_align = rmesa->texture_compressed_row_align - 1; + lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align; lvl->size = radeon_compressed_texture_size(mt->radeon->glCtx, lvl->width, lvl->height, lvl->depth, mt->compressed); } else if (mt->target == GL_TEXTURE_RECTANGLE_NV) { - lvl->rowstride = (lvl->width * mt->bpp + 63) & ~63; + row_align = rmesa->texture_rect_row_align - 1; + lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align; lvl->size = lvl->rowstride * lvl->height; } else if (mt->tilebits & RADEON_TXO_MICRO_TILE) { /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, @@ -108,6 +110,7 @@ static void compute_tex_image_offset(radeonContextPtr rmesa, radeon_mipmap_tree lvl->rowstride = (lvl->width * mt->bpp * 2 + 31) & ~31; lvl->size = lvl->rowstride * ((lvl->height + 1) / 2) * lvl->depth; } else { + row_align = rmesa->texture_row_align - 1; lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align; lvl->size = lvl->rowstride * lvl->height * lvl->depth; } -- cgit v1.2.3 From 1fcb321e2fa1903b815b099e59bd85aac823850a Mon Sep 17 00:00:00 2001 From: Pauli Nieminen Date: Fri, 31 Jul 2009 14:03:36 +0300 Subject: radeon: Remove unused variable from context. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolai Hähnle --- src/mesa/drivers/dri/radeon/radeon_common_context.h | 1 - src/mesa/drivers/dri/radeon/radeon_lock.c | 2 -- 2 files changed, 3 deletions(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h index cd1986e1fc..d7e94a6894 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h @@ -445,7 +445,6 @@ struct radeon_context { GLuint numClipRects; /* Cliprects for the draw buffer */ drm_clip_rect_t *pClipRects; unsigned int lastStamp; - GLboolean lost_context; drm_radeon_sarea_t *sarea; /* Private SAREA data */ /* Mirrors of some DRI state */ diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c index 5774f7ebcf..2f0ed1cfce 100644 --- a/src/mesa/drivers/dri/radeon/radeon_lock.c +++ b/src/mesa/drivers/dri/radeon/radeon_lock.c @@ -85,8 +85,6 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags) } rmesa->vtbl.get_lock(rmesa); - - rmesa->lost_context = GL_TRUE; } void radeon_lock_hardware(radeonContextPtr radeon) -- cgit v1.2.3 From 55bc8b139067f2596da654075a4fc6e5940e22dd Mon Sep 17 00:00:00 2001 From: Pauli Nieminen Date: Wed, 29 Jul 2009 01:28:33 +0300 Subject: radeon: Cliprects has to be updated before doing anything with clip rectangles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported to fix corruption while dragging an active window by John Bridgman. Signed-off-by: Pauli Nieminen Signed-off-by: Nicolai Hähnle --- src/mesa/drivers/dri/radeon/radeon_common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index dde615a4d9..7f503a9ff7 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -887,10 +887,11 @@ void radeonUpdatePageFlipping(radeonContextPtr radeon) void radeon_window_moved(radeonContextPtr radeon) { + /* Cliprects has to be updated before doing anything else */ + radeonSetCliprects(radeon); if (!radeon->radeonScreen->driScreen->dri2.enabled) { radeonUpdatePageFlipping(radeon); } - radeonSetCliprects(radeon); } void radeon_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height) -- cgit v1.2.3 From 2730ee75c73e79f4196d6df5540da7063a96c25e Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Fri, 31 Jul 2009 23:20:22 +0200 Subject: radeon: s/r300/radeon in shared code error message --- src/mesa/drivers/dri/radeon/radeon_texture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index ad501c454c..6a065f0468 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -951,7 +951,7 @@ int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *t fprintf(stderr, " Allocate new miptree\n"); radeon_try_alloc_miptree(rmesa, t, &baseimage->base, 0, texObj->BaseLevel); if (!t->mt) { - _mesa_problem(ctx, "r300_validate_texture failed to alloc miptree"); + _mesa_problem(ctx, "radeon_validate_texture failed to alloc miptree"); return GL_FALSE; } } -- cgit v1.2.3 From 801c3fcbca69a17f0696522b91cbfc378094974b Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Fri, 31 Jul 2009 23:24:44 +0200 Subject: radeon: fix r100/r200 compressed texture stride This almost fixes compressed mipmapped textures on r200, though some small mip levels are still broken. Leave r300 compressed texture stride as is though afaik it's different to pre-radeon-rewrite too. Also do the fixup for rs600 uncompressed row stride at same place. --- src/mesa/drivers/dri/r300/r300_context.c | 5 ----- src/mesa/drivers/dri/radeon/radeon_common_context.c | 13 ++++++++++++- 2 files changed, 12 insertions(+), 6 deletions(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 6f3aab986d..db404b3847 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -451,11 +451,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, r300InitState(r300); r300InitShaderFunctions(r300); - if (screen->chip_family == CHIP_FAMILY_RS600 || screen->chip_family == CHIP_FAMILY_RS690 || - screen->chip_family == CHIP_FAMILY_RS740) { - r300->radeon.texture_row_align = 64; - } - r300InitGLExtensions(ctx); return GL_TRUE; diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index 4e4eba5d94..2a017b59cf 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -245,9 +245,20 @@ GLboolean radeonInitContext(radeonContextPtr radeon, radeon->texture_row_align = 256; radeon->texture_rect_row_align = 256; radeon->texture_compressed_row_align = 256; - } else { + } else if (IS_R200_CLASS(radeon->radeonScreen) || + IS_R100_CLASS(radeon->radeonScreen)) { radeon->texture_row_align = 32; radeon->texture_rect_row_align = 64; + radeon->texture_compressed_row_align = 32; + } else { /* R300 - not sure this is all correct */ + int chip_family = radeon->radeonScreen->chip_family; + if (chip_family == CHIP_FAMILY_RS600 || + chip_family == CHIP_FAMILY_RS690 || + chip_family == CHIP_FAMILY_RS740) + radeon->texture_row_align = 64; + else + radeon->texture_row_align = 32; + radeon->texture_rect_row_align = 64; radeon->texture_compressed_row_align = 64; } -- cgit v1.2.3 From 282c0c411cc1d9c10adc75cb066e8af819073975 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Aug 2009 02:28:22 -0400 Subject: r600: fix r600SetTexOffset We need to properly set up a fake bo for the texture override, so add a new function to radeon_bo_legacy.c. This could probably be used on radeon/r200/r300 to unify the bo handling for texture override. compiz now works :) --- src/mesa/drivers/dri/r600/r600_texstate.c | 12 ++++++----- src/mesa/drivers/dri/radeon/radeon_bo_legacy.c | 29 ++++++++++++++++++++++++++ src/mesa/drivers/dri/radeon/radeon_bo_legacy.h | 3 +++ 3 files changed, 39 insertions(+), 5 deletions(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index 4840586858..ee9b64ee43 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -568,9 +568,6 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex } } - if (t->image_override && t->bo) - return; - switch (texObj->Target) { case GL_TEXTURE_1D: SETfield(t->SQ_TEX_RESOURCE0, SQ_TEX_DIM_1D, DIM_shift, DIM_mask); @@ -701,7 +698,7 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname, struct gl_texture_object *tObj = _mesa_lookup_texture(rmesa->radeon.glCtx, texname); radeonTexObjPtr t = radeon_tex_obj(tObj); - uint32_t pitch_val; + uint32_t pitch_val, size; if (!tObj) return; @@ -711,7 +708,12 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname, if (!offset) return; - t->bo = NULL; + size = pitch;//h * w * (depth / 8); + if (t->bo) { + radeon_bo_unref(t->bo); + t->bo = NULL; + } + t->bo = radeon_legacy_bo_alloc_fake(rmesa->radeon.radeonScreen->bom, size, offset); t->override_offset = offset; pitch_val = pitch; switch (depth) { diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c index 992eb4611b..6084b356a3 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c +++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c @@ -904,3 +904,32 @@ unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo) return bo->size; } +/* + * Fake up a bo for things like texture image_override. + * bo->offset already includes fb_location + */ +struct bo_legacy *radeon_legacy_bo_alloc_fake(struct radeon_bo_manager *bom, + int size, + uint32_t offset) +{ + struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom; + struct bo_legacy *bo; + +#ifdef RADEON_DEBUG_BO + bo = bo_allocate(boml, size, 0, RADEON_GEM_DOMAIN_VRAM, 0, szBufUsage); +#else + bo = bo_allocate(boml, size, 0, RADEON_GEM_DOMAIN_VRAM, 0); +#endif /* RADEON_DEBUG_BO */ + if (bo == NULL) + return NULL; + bo->static_bo = 1; + bo->offset = offset; + bo->base.handle = bo->offset; + bo->ptr = boml->screen->driScreen->pFB + (offset - boml->fb_location); + if (bo->base.handle > boml->nhandle) { + boml->nhandle = bo->base.handle + 1; + } + radeon_bo_ref(&(bo->base)); + return bo; +} + diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h index 0db817cab0..b57d6df9aa 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h +++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h @@ -42,5 +42,8 @@ struct radeon_bo_manager *radeon_bo_manager_legacy_ctor(struct radeon_screen *sc void radeon_bo_manager_legacy_dtor(struct radeon_bo_manager *bom); void radeon_bo_legacy_texture_age(struct radeon_bo_manager *bom); unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo); +struct bo_legacy *radeon_legacy_bo_alloc_fake(struct radeon_bo_manager *bom, + int size, + uint32_t offset); #endif -- cgit v1.2.3 From 583ed4aae54c308009346c083a1a0d49d4361631 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Aug 2009 03:12:56 -0400 Subject: r600: fix the build when RADEON_DEBUG_BO is set --- src/mesa/drivers/dri/radeon/radeon_bo_legacy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c index 6084b356a3..e608520a6e 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c +++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c @@ -916,7 +916,7 @@ struct bo_legacy *radeon_legacy_bo_alloc_fake(struct radeon_bo_manager *bom, struct bo_legacy *bo; #ifdef RADEON_DEBUG_BO - bo = bo_allocate(boml, size, 0, RADEON_GEM_DOMAIN_VRAM, 0, szBufUsage); + bo = bo_allocate(boml, size, 0, RADEON_GEM_DOMAIN_VRAM, 0, "fake bo"); #else bo = bo_allocate(boml, size, 0, RADEON_GEM_DOMAIN_VRAM, 0); #endif /* RADEON_DEBUG_BO */ -- cgit v1.2.3 From f538d0275398276b8f9634a0b1857a01641ae927 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Aug 2009 11:21:10 -0400 Subject: r600: add some missing pci ids --- src/mesa/drivers/dri/radeon/radeon_chipset.h | 2 ++ src/mesa/drivers/dri/radeon/radeon_screen.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index 0a6a2df35b..a9b4102cc1 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -356,7 +356,9 @@ #define PCI_CHIP_RV770_947A 0x947A #define PCI_CHIP_RV770_947B 0x947B +#define PCI_CHIP_RV730_9480 0x9480 #define PCI_CHIP_RV730_9487 0x9487 +#define PCI_CHIP_RV730_9488 0x9488 #define PCI_CHIP_RV730_9489 0x9489 #define PCI_CHIP_RV730_948F 0x948F #define PCI_CHIP_RV730_9490 0x9490 diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 5f1af5b0da..f8f1a2c5cb 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -878,7 +878,9 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) screen->chip_flags = RADEON_CHIPSET_TCL; break; + case PCI_CHIP_RV730_9480: case PCI_CHIP_RV730_9487: + case PCI_CHIP_RV730_9488: case PCI_CHIP_RV730_9489: case PCI_CHIP_RV730_948F: case PCI_CHIP_RV730_9490: -- cgit v1.2.3 From e0d61fd696b3561d575a9ee5055a1484a5ac6926 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Aug 2009 16:10:32 -0400 Subject: r600: add some new r7xx pci ids --- src/mesa/drivers/dri/radeon/radeon_chipset.h | 5 +++++ src/mesa/drivers/dri/radeon/radeon_screen.c | 5 +++++ 2 files changed, 10 insertions(+) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index a9b4102cc1..a275c8fb14 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -363,6 +363,7 @@ #define PCI_CHIP_RV730_948F 0x948F #define PCI_CHIP_RV730_9490 0x9490 #define PCI_CHIP_RV730_9491 0x9491 +#define PCI_CHIP_RV730_9495 0x9495 #define PCI_CHIP_RV730_9498 0x9498 #define PCI_CHIP_RV730_949C 0x949C #define PCI_CHIP_RV730_949E 0x949E @@ -376,12 +377,16 @@ #define PCI_CHIP_RV710_9552 0x9552 #define PCI_CHIP_RV710_9553 0x9553 #define PCI_CHIP_RV710_9555 0x9555 +#define PCI_CHIP_RV710_9557 0x9557 #define PCI_CHIP_RV740_94A0 0x94A0 #define PCI_CHIP_RV740_94A1 0x94A1 +#define PCI_CHIP_RV740_94A3 0x94A3 #define PCI_CHIP_RV740_94B1 0x94B1 #define PCI_CHIP_RV740_94B3 0x94B3 +#define PCI_CHIP_RV740_94B4 0x94B4 #define PCI_CHIP_RV740_94B5 0x94B5 +#define PCI_CHIP_RV740_94B9 0x94B9 enum { CHIP_FAMILY_R100, diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index f8f1a2c5cb..507a620e68 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -885,6 +885,7 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) case PCI_CHIP_RV730_948F: case PCI_CHIP_RV730_9490: case PCI_CHIP_RV730_9491: + case PCI_CHIP_RV730_9495: case PCI_CHIP_RV730_9498: case PCI_CHIP_RV730_949C: case PCI_CHIP_RV730_949E: @@ -901,15 +902,19 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) case PCI_CHIP_RV710_9552: case PCI_CHIP_RV710_9553: case PCI_CHIP_RV710_9555: + case PCI_CHIP_RV710_9557: screen->chip_family = CHIP_FAMILY_RV710; screen->chip_flags = RADEON_CHIPSET_TCL; break; case PCI_CHIP_RV740_94A0: case PCI_CHIP_RV740_94A1: + case PCI_CHIP_RV740_94A3: case PCI_CHIP_RV740_94B1: case PCI_CHIP_RV740_94B3: + case PCI_CHIP_RV740_94B4: case PCI_CHIP_RV740_94B5: + case PCI_CHIP_RV740_94B9: screen->chip_family = CHIP_FAMILY_RV740; screen->chip_flags = RADEON_CHIPSET_TCL; break; -- cgit v1.2.3 From 4221e81b2489c4c91092ef49bba181a1bed216c8 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 3 Aug 2009 14:38:16 -0700 Subject: radeon: Fix inverted test for disabling flushing of front buffer output. (corresponding fix to the intel driver one) --- src/mesa/drivers/dri/radeon/radeon_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index 7f503a9ff7..39f1993332 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -1093,7 +1093,7 @@ void radeonFlush(GLcontext *ctx) * each of N places that do rendering. This has worse performances, * but it is much easier to get correct. */ - if (radeon->is_front_buffer_rendering) { + if (!radeon->is_front_buffer_rendering) { radeon->front_buffer_dirty = GL_FALSE; } } -- cgit v1.2.3 From 50c736589ee0edbedf9ac434e883483b82b3030a Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Tue, 4 Aug 2009 00:21:07 +0200 Subject: radeon: more fixes for compressed textures - fix not respecting required hardware stride with compressedTexImage - this fixes #22615. - make sure correct stride is used in various places - fix stored miptree never matching with a TexImage call with compressed texture - don't always store data with compressedtexsubimage at offset 0, and actually use the supplied pixel data... (untested) - make sure rows for compressed texture handling are rounded up not down Note that trying to access stored compressed textures in hardware miptrees from core mesa (get_compressed_teximage, swrast fallbacks) can't work correctly, since RowStride isn't really set to anything useful, plus some places (at least get_compressed_teximage) assume this data has native stride and no padding. --- src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c | 4 +-- src/mesa/drivers/dri/radeon/radeon_texture.c | 37 ++++++++++++++++++------ 2 files changed, 30 insertions(+), 11 deletions(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c index 071a18e7d8..02e0dc7774 100644 --- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c +++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c @@ -366,8 +366,8 @@ GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_textu mt->width0 == firstImage->Width && mt->height0 == firstImage->Height && mt->depth0 == firstImage->Depth && - mt->bpp == firstImage->TexFormat->TexelBytes && - mt->compressed == compressed); + mt->compressed == compressed && + (!mt->compressed ? (mt->bpp == firstImage->TexFormat->TexelBytes) : 1)); } diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index 6a065f0468..fa16f44c18 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -610,9 +610,17 @@ static void radeon_teximage( if (pixels) { radeon_teximage_map(image, GL_TRUE); - if (compressed) { - memcpy(texImage->Data, pixels, imageSize); + if (image->mt) { + uint32_t srcRowStride, bytesPerRow, rows; + srcRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width); + bytesPerRow = srcRowStride; + rows = (height + 3) / 4; + copy_rows(texImage->Data, image->mt->levels[level].rowstride, + pixels, srcRowStride, rows, bytesPerRow); + } else { + memcpy(texImage->Data, pixels, imageSize); + } } else { GLuint dstRowStride; GLuint *dstImageOffsets; @@ -756,14 +764,23 @@ static void radeon_texsubimage(GLcontext* ctx, int dims, GLenum target, int leve } if (compressed) { - uint32_t srcRowStride, bytesPerRow, rows; - dstRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, texImage->Width); + uint32_t srcRowStride, bytesPerRow, rows; + GLubyte *img_start; + if (!image->mt) { + dstRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, texImage->Width); + img_start = _mesa_compressed_image_address(xoffset, yoffset, 0, + texImage->TexFormat->MesaFormat, + texImage->Width, texImage->Data); + } + else { + uint32_t blocks_x = dstRowStride / (image->mt->bpp * 4); + img_start = texImage->Data + image->mt->bpp * 4 * (blocks_x * (yoffset / 4) + xoffset / 4); + } srcRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width); bytesPerRow = srcRowStride; - rows = height / 4; + rows = (height + 3) / 4; - copy_rows(texImage->Data, dstRowStride, image->base.Data, srcRowStride, rows, - bytesPerRow); + copy_rows(img_start, dstRowStride, pixels, srcRowStride, rows, bytesPerRow); } else { if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat, @@ -884,8 +901,8 @@ static void migrate_image_to_miptree(radeon_mipmap_tree *mt, radeon_texture_imag uint32_t height; /* need to confirm this value is correct */ if (mt->compressed) { - height = image->base.Height / 4; - srcrowstride = image->base.RowStride * mt->bpp; + height = (image->base.Height + 3) / 4; + srcrowstride = _mesa_compressed_row_stride(image->base.TexFormat->MesaFormat, image->base.Width); } else { height = image->base.Height * image->base.Depth; srcrowstride = image->base.Width * image->base.TexFormat->TexelBytes; @@ -1000,6 +1017,8 @@ radeon_get_tex_image(GLcontext * ctx, GLenum target, GLint level, } if (compressed) { + /* FIXME: this can't work for small textures (mips) which + use different hw stride */ _mesa_get_compressed_teximage(ctx, target, level, pixels, texObj, texImage); } else { -- cgit v1.2.3 From 15f5f839b1a52a49bb60e73625b8c6b2f73a75e8 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Wed, 5 Aug 2009 01:12:16 +0200 Subject: radeon: fix miptree comparison breakage another case of image never matching miptree in case of compressed textures --- src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c index 02e0dc7774..d4082bf68f 100644 --- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c +++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c @@ -326,7 +326,8 @@ GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt, if (face >= mt->faces || level < mt->firstLevel || level > mt->lastLevel) return GL_FALSE; - if (texImage->IsCompressed != mt->compressed) + if ((!texImage->IsCompressed && mt->compressed) || + (texImage->IsCompressed && !mt->compressed)) return GL_FALSE; if (!texImage->IsCompressed && -- cgit v1.2.3 From 2dc3fb7803cd31b20a06bcad23949be5b3a1dbb2 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 7 Aug 2009 16:16:08 +1000 Subject: radeon span: add r200 depth/stencil span read/writing this should only really affect DRI2 since we mostly have a surface in DRI1. I don't think this is perfect yet, but it is a better start than nothing. --- src/mesa/drivers/dri/radeon/radeon_span.c | 124 ++++++++++++++++++++++++++---- 1 file changed, 111 insertions(+), 13 deletions(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c index b2a468b4fd..5e4bf00d7a 100644 --- a/src/mesa/drivers/dri/radeon/radeon_span.c +++ b/src/mesa/drivers/dri/radeon/radeon_span.c @@ -51,6 +51,59 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb); + +/* r200 depth buffer is always tiled - this is the formula + according to the docs unless I typo'ed in it +*/ +static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb, + GLint x, GLint y) +{ + GLubyte *ptr = rrb->bo->ptr; + GLint offset; + if (rrb->has_surface) { + offset = x * rrb->cpp + y * rrb->pitch; + } else { + GLuint b; + offset = 0; + b = (((y >> 4) * (rrb->pitch >> 8) + (x >> 6))); + offset += (b >> 1) << 12; + offset += (((rrb->pitch >> 8) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11; + offset += ((y >> 2) & 0x3) << 9; + offset += ((x >> 3) & 0x1) << 8; + offset += ((x >> 4) & 0x3) << 6; + offset += ((x >> 2) & 0x1) << 5; + offset += ((y >> 1) & 0x1) << 4; + offset += ((x >> 1) & 0x1) << 3; + offset += (y & 0x1) << 2; + offset += (x & 0x1) << 1; + } + return &ptr[offset]; +} + +static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb, + GLint x, GLint y) +{ + GLubyte *ptr = rrb->bo->ptr; + GLint offset; + if (rrb->has_surface) { + offset = x * rrb->cpp + y * rrb->pitch; + } else { + GLuint b; + offset = 0; + b = (((y & 0x7ff) >> 4) * (rrb->pitch >> 7) + (x >> 5)); + offset += (b >> 1) << 12; + offset += (((rrb->pitch >> 7) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11; + offset += ((y >> 2) & 0x3) << 9; + offset += ((x >> 2) & 0x1) << 8; + offset += ((x >> 3) & 0x3) << 6; + offset += ((y >> 1) & 0x1) << 5; + offset += ((x >> 1) & 0x1) << 4; + offset += (y & 0x1) << 3; + offset += (x & 0x1) << 2; + } + return &ptr[offset]; +} + /* radeon tiling on r300-r500 has 4 states, macro-linear/micro-linear macro-linear/micro-tiled @@ -61,7 +114,6 @@ static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb); 4 byte surface 8/16 byte (unused) */ - static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb, GLint x, GLint y) { @@ -285,11 +337,21 @@ s8z24_to_z24s8(uint32_t val) */ #define VALUE_TYPE GLushort +#if defined(RADEON_COMMON_FOR_R200) +#define WRITE_DEPTH( _x, _y, d ) \ + *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d +#else #define WRITE_DEPTH( _x, _y, d ) \ *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d +#endif +#if defined(RADEON_COMMON_FOR_R200) +#define READ_DEPTH( d, _x, _y ) \ + d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) +#else #define READ_DEPTH( d, _x, _y ) \ d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) +#endif #define TAG(x) radeon##x##_z16 #include "depthtmp.h" @@ -301,7 +363,7 @@ s8z24_to_z24s8(uint32_t val) */ #define VALUE_TYPE GLuint -#ifdef COMPILE_R300 +#if defined(COMPILE_R300) #define WRITE_DEPTH( _x, _y, d ) \ do { \ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ @@ -310,6 +372,15 @@ do { \ tmp |= ((d << 8) & 0xffffff00); \ *_ptr = tmp; \ } while (0) +#elif defined(RADEON_COMMON_FOR_R200) +#define WRITE_DEPTH( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ + tmp &= 0xff000000; \ + tmp |= ((d) & 0x00ffffff); \ + *_ptr = tmp; \ +} while (0) #else #define WRITE_DEPTH( _x, _y, d ) \ do { \ @@ -321,19 +392,21 @@ do { \ } while (0) #endif -#ifdef COMPILE_R300 +#if defined(COMPILE_R300) #define READ_DEPTH( d, _x, _y ) \ do { \ d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \ }while(0) +#elif defined(RADEON_COMMON_FOR_R200) +#define READ_DEPTH( d, _x, _y ) \ + do { \ + d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; \ + }while(0) #else #define READ_DEPTH( d, _x, _y ) \ d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; #endif -/* - fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\ - d = *(GLuint*)(radeon_ptr(rrb, _x, _y )) & 0x00ffffff; -*/ + #define TAG(x) radeon##x##_z24 #include "depthtmp.h" @@ -345,12 +418,19 @@ do { \ */ #define VALUE_TYPE GLuint -#ifdef COMPILE_R300 +#if defined(COMPILE_R300) #define WRITE_DEPTH( _x, _y, d ) \ do { \ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ *_ptr = d; \ } while (0) +#elif defined(RADEON_COMMON_FOR_R200) +#define WRITE_DEPTH( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = z24s8_to_s8z24(d); \ + *_ptr = tmp; \ +} while (0) #else #define WRITE_DEPTH( _x, _y, d ) \ do { \ @@ -360,20 +440,22 @@ do { \ } while (0) #endif -#ifdef COMPILE_R300 +#if defined(COMPILE_R300) #define READ_DEPTH( d, _x, _y ) \ do { \ d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \ }while(0) +#elif defined(RADEON_COMMON_FOR_R200) +#define READ_DEPTH( d, _x, _y ) \ + do { \ + d = s8z24_to_z24s8(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off))); \ + }while(0) #else #define READ_DEPTH( d, _x, _y ) do { \ d = s8z24_to_z24s8(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off ))); \ } while (0) #endif -/* - fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\ - d = *(GLuint*)(radeon_ptr(rrb, _x, _y )) & 0x00ffffff; -*/ + #define TAG(x) radeon##x##_z24_s8 #include "depthtmp.h" @@ -392,6 +474,15 @@ do { \ tmp |= (d) & 0xff; \ *_ptr = tmp; \ } while (0) +#elif defined(RADEON_COMMON_FOR_R200) +#define WRITE_STENCIL( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off); \ + GLuint tmp = *_ptr; \ + tmp &= 0x00ffffff; \ + tmp |= (((d) & 0xff) << 24); \ + *_ptr = tmp; \ +} while (0) #else #define WRITE_STENCIL( _x, _y, d ) \ do { \ @@ -410,6 +501,13 @@ do { \ GLuint tmp = *_ptr; \ d = tmp & 0x000000ff; \ } while (0) +#elif defined(RADEON_COMMON_FOR_R200) +#define READ_STENCIL( d, _x, _y ) \ +do { \ + GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ + d = (tmp & 0xff000000) >> 24; \ +} while (0) #else #define READ_STENCIL( d, _x, _y ) \ do { \ -- cgit v1.2.3 From 239c8bfb10d3cd61547ccc460f0b89062f3520bc Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 7 Aug 2009 19:40:05 +1000 Subject: radeon: enable tiling fallbacks in 3D driver. Only really got good testing on r500 so far, need to enable in DDX and play some more. --- src/mesa/drivers/dri/radeon/radeon_bo_drm.h | 16 ++++++++++++++++ src/mesa/drivers/dri/radeon/radeon_bo_legacy.c | 2 ++ src/mesa/drivers/dri/radeon/radeon_common_context.c | 11 ++++++++++- 3 files changed, 28 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_drm.h b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h index 8eeaea1cb2..d52fb017d8 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bo_drm.h +++ b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h @@ -83,6 +83,10 @@ struct radeon_bo_funcs { int (*bo_unmap)(struct radeon_bo *bo); int (*bo_wait)(struct radeon_bo *bo); int (*bo_is_static)(struct radeon_bo *bo); + int (*bo_set_tiling)(struct radeon_bo *bo, uint32_t tiling_flags, + uint32_t pitch); + int (*bo_get_tiling)(struct radeon_bo *bo, uint32_t *tiling_flags, + uint32_t *pitch); }; struct radeon_bo_manager { @@ -187,6 +191,18 @@ static inline int _radeon_bo_wait(struct radeon_bo *bo, return bo->bom->funcs->bo_wait(bo); } +static inline int radeon_bo_set_tiling(struct radeon_bo *bo, + uint32_t tiling_flags, uint32_t pitch) +{ + return bo->bom->funcs->bo_set_tiling(bo, tiling_flags, pitch); +} + +static inline int radeon_bo_get_tiling(struct radeon_bo *bo, + uint32_t *tiling_flags, uint32_t *pitch) +{ + return bo->bom->funcs->bo_get_tiling(bo, tiling_flags, pitch); +} + static inline int radeon_bo_is_static(struct radeon_bo *bo) { if (bo->bom->funcs->bo_is_static) diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c index e608520a6e..8c19f30106 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c +++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c @@ -577,6 +577,8 @@ static struct radeon_bo_funcs bo_legacy_funcs = { bo_unmap, NULL, bo_is_static, + NULL, + NULL, }; static int bo_vram_validate(struct radeon_bo *bo, diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index 2a017b59cf..5e744f9deb 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -762,8 +762,10 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) bo = depth_bo; radeon_bo_ref(bo); } else { + uint32_t tiling_flags = 0, pitch = 0; + int ret; #ifdef RADEON_DEBUG_BO - bo = radeon_bo_open(radeon->radeonScreen->bom, + bo = radeon_bo_open(radeon->radeonScreen->bom, buffers[i].name, 0, 0, @@ -784,6 +786,13 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) regname, buffers[i].name); } + + ret = radeon_bo_get_tiling(bo, &tiling_flags, &pitch); + if (tiling_flags & RADEON_TILING_MACRO) + bo->flags |= RADEON_BO_FLAGS_MACRO_TILE; + if (tiling_flags & RADEON_TILING_MICRO) + bo->flags |= RADEON_BO_FLAGS_MICRO_TILE; + } if (buffers[i].attachment == __DRI_BUFFER_DEPTH) { -- cgit v1.2.3 From caf4f0ede78b4f6b1e340e33b96f21ce8a6ac344 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 7 Aug 2009 16:20:26 -0400 Subject: radeon: fix the build with older drm headers --- src/mesa/drivers/dri/radeon/radeon_bo_drm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_drm.h b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h index d52fb017d8..427162d98f 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bo_drm.h +++ b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h @@ -42,6 +42,12 @@ #define RADEON_BO_FLAGS_MACRO_TILE 1 #define RADEON_BO_FLAGS_MICRO_TILE 2 +#define RADEON_TILING_MACRO 0x1 +#define RADEON_TILING_MICRO 0x2 +#define RADEON_TILING_SWAP 0x4 +#define RADEON_TILING_SURFACE 0x8 /* this object requires a surface + * when mapped - i.e. front buffer */ + struct radeon_bo_manager; struct radeon_bo { -- cgit v1.2.3 From f7474a577d54765d893b78fbf0d6a969c2dc60cd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 7 Aug 2009 17:35:12 -0400 Subject: radeon: correct fix for tiling with the legacy build --- src/mesa/drivers/dri/radeon/radeon_bo_drm.h | 6 ------ src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_drm.h b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h index 427162d98f..d52fb017d8 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bo_drm.h +++ b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h @@ -42,12 +42,6 @@ #define RADEON_BO_FLAGS_MACRO_TILE 1 #define RADEON_BO_FLAGS_MICRO_TILE 2 -#define RADEON_TILING_MACRO 0x1 -#define RADEON_TILING_MICRO 0x2 -#define RADEON_TILING_SWAP 0x4 -#define RADEON_TILING_SURFACE 0x8 /* this object requires a surface - * when mapped - i.e. front buffer */ - struct radeon_bo_manager; struct radeon_bo { diff --git a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h index e0c70dd9a1..a42870f4a9 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h +++ b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h @@ -15,6 +15,12 @@ #define RADEON_GEM_DOMAIN_GTT 0x2 // GTT or cache flushed #define RADEON_GEM_DOMAIN_VRAM 0x4 // VRAM domain +#define RADEON_TILING_MACRO 0x1 +#define RADEON_TILING_MICRO 0x2 +#define RADEON_TILING_SWAP 0x4 +#define RADEON_TILING_SURFACE 0x8 /* this object requires a surface + * when mapped - i.e. front buffer */ + /* to be used to build locally in mesa with no libdrm bits */ #include "../radeon/radeon_bo_drm.h" #include "../radeon/radeon_cs_drm.h" -- cgit v1.2.3 From e0c9157671e0f3868c2c53125f885fb9be1e3a62 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 10 Aug 2009 10:10:13 +1000 Subject: radeon: fix cut-n-paste in alphabits in fbo code --- src/mesa/drivers/dri/radeon/radeon_fbo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c index f28efa33e9..322f510f16 100644 --- a/src/mesa/drivers/dri/radeon/radeon_fbo.c +++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c @@ -291,7 +291,7 @@ radeon_create_renderbuffer(GLenum format, __DRIdrawablePrivate *driDrawPriv) rrb->base.RedBits = 8; rrb->base.GreenBits = 8; rrb->base.BlueBits = 8; - rrb->base.AlphaBits = 8; + rrb->base.AlphaBits = 0; rrb->base.DataType = GL_UNSIGNED_BYTE; break; case GL_RGBA8: -- cgit v1.2.3 From b4c95697150b8eb0bb60bb996f04be323bb56332 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 8 Aug 2009 20:05:11 +1000 Subject: radeon_fbo: switch short to byte for 565 --- src/mesa/drivers/dri/radeon/radeon_fbo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c index 322f510f16..f05b106aaf 100644 --- a/src/mesa/drivers/dri/radeon/radeon_fbo.c +++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c @@ -407,7 +407,7 @@ restart: rrb->cpp = 2; rrb->base._ActualFormat = GL_RGB5; rrb->base._BaseFormat = GL_RGB; - rrb->base.DataType = GL_UNSIGNED_SHORT; + rrb->base.DataType = GL_UNSIGNED_BYTE; DBG("Render to RGB5 texture OK\n"); } else if (texImage->TexFormat == &_mesa_texformat_argb1555) { -- cgit v1.2.3 From c2b29b5df506d747e9a53bbcf5a45dc7cfe65643 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 11 Aug 2009 22:15:18 -0400 Subject: r600: use the drm ioctls for swap and texture upload NOTE: THIS REQUIRES AN UPDATED DRM! --- src/mesa/drivers/dri/radeon/radeon_bo_legacy.c | 32 ++++++++++++++++++++++---- src/mesa/drivers/dri/radeon/radeon_common.c | 26 --------------------- 2 files changed, 27 insertions(+), 31 deletions(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c index 8c19f30106..6d6ae5d3b2 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c +++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c @@ -624,12 +624,34 @@ static int bo_vram_validate(struct radeon_bo *bo, if (bo_legacy->dirty || bo_legacy->tobj->base.dirty_images[0]) { if (IS_R600_CLASS(boml->screen)) { - char *src = bo_legacy->ptr; - char *dst = (char *) boml->screen->driScreen->pFB + - (bo_legacy->offset - boml->fb_location); + drm_radeon_texture_t tex; + drm_radeon_tex_image_t tmp; + int ret; + + tex.offset = bo_legacy->offset; + tex.image = &tmp; + assert(!(tex.offset & 1023)); - /* FIXME: alignment, pitch, etc. */ - memcpy(dst, src, bo->size); + tmp.x = 0; + tmp.y = 0; + tmp.width = bo->size; + tmp.height = 1; + tmp.data = bo_legacy->ptr; + tex.format = RADEON_TXFORMAT_ARGB8888; + tex.width = tmp.width; + tex.height = tmp.height; + tex.pitch = bo->size; + do { + ret = drmCommandWriteRead(bo->bom->fd, + DRM_RADEON_TEXTURE, + &tex, + sizeof(drm_radeon_texture_t)); + if (ret) { + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "DRM_RADEON_TEXTURE: again!\n"); + usleep(1); + } + } while (ret == -EAGAIN); } else { /* Copy to VRAM using a blit. * All memory is 4K aligned. We're using 1024 pixels wide blits. diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index 39f1993332..330c2c8a86 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -481,32 +481,6 @@ void radeonCopyBuffer( __DRIdrawablePrivate *dPriv, if (!n) continue; - if (IS_R600_CLASS(rmesa->radeonScreen)) { - int cpp = rmesa->radeonScreen->cpp; - int src_pitch = rmesa->radeonScreen->backPitch * cpp; - int dst_pitch = rmesa->radeonScreen->frontPitch * cpp; - char *src = (char *)rmesa->radeonScreen->driScreen->pFB + rmesa->radeonScreen->backOffset; - char *dst = (char *)rmesa->radeonScreen->driScreen->pFB + rmesa->radeonScreen->frontOffset; - int j; - drm_clip_rect_t *pb = rmesa->sarea->boxes; - - for (j = 0; j < n; j++) { - int x = pb[j].x1; - int y = pb[j].y1; - int w = pb[j].x2 - x; - int h = pb[j].y2 - y; - - src += (y * src_pitch) + (x * cpp); - dst += (y * dst_pitch) + (x * cpp); - - while (h--) { - memcpy(dst, src, w * cpp); - src += src_pitch; - dst += dst_pitch; - } - } - } - ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP ); if ( ret ) { -- cgit v1.2.3 From 29173d3d5cf02d58e720b5c7fe48a0630c7d5d5f Mon Sep 17 00:00:00 2001 From: Pauli Nieminen Date: Thu, 30 Jul 2009 20:17:29 +0300 Subject: radeon: Add protection against recursive DRM locking. Reference counting protects DRM lock call from recursive locking that would cause hang. Code also adds optional debugging output for recursive call that is compiled only if NDEBUG is not defined. This code is not 100% thread safe because mesa doesn't include increment and test atomic operation. There is built-in gcc functions but they are only available from gcc 4.2. --- .../drivers/dri/radeon/radeon_common_context.c | 1 + .../drivers/dri/radeon/radeon_common_context.h | 1 + src/mesa/drivers/dri/radeon/radeon_lock.c | 53 +++++++++++++++++++++- src/mesa/drivers/dri/radeon/radeon_lock.h | 12 ++++- 4 files changed, 64 insertions(+), 3 deletions(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index 5e744f9deb..d68aa2bd62 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -211,6 +211,7 @@ GLboolean radeonInitContext(radeonContextPtr radeon, radeon->dri.screen = sPriv; radeon->dri.hwContext = driContextPriv->hHWContext; radeon->dri.hwLock = &sPriv->pSAREA->lock; + radeon->dri.hwLockCount = 0; radeon->dri.fd = sPriv->fd; radeon->dri.drmMinor = sPriv->drm_version.minor; diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h index d7e94a6894..f8e1a25c9f 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h @@ -365,6 +365,7 @@ struct radeon_dri_mirror { drm_context_t hwContext; drm_hw_lock_t *hwLock; + int hwLockCount; int fd; int drmMinor; }; diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c index 2f0ed1cfce..6294b7e42b 100644 --- a/src/mesa/drivers/dri/radeon/radeon_lock.c +++ b/src/mesa/drivers/dri/radeon/radeon_lock.c @@ -86,8 +86,34 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags) rmesa->vtbl.get_lock(rmesa); } - -void radeon_lock_hardware(radeonContextPtr radeon) +#ifndef NDEBUG +struct lock_debug { + const char* function; + const char* file; + int line; +}; + +static struct lock_debug ldebug = {0}; +#endif + +#if 0 +/** TODO: use atomic operations for reference counting **/ +/** gcc 4.2 has builtin functios for this **/ +#define ATOMIC_INC_AND_FETCH(atomic) __sync_add_and_fetch(&atomic, 1) +#define ATOMIC_DEC_AND_FETCH(atomic) __sync_sub_and_fetch(&atomic, 1) +#else +#define ATOMIC_INC_AND_FETCH(atomic) (++atomic) +#define ATOMIC_DEC_AND_FETCH(atomic) (--atomic) +#endif + + +void radeon_lock_hardware(radeonContextPtr radeon +#ifndef NDEBUG + ,const char* function + ,const char* file + ,const int line +#endif + ) { char ret = 0; struct radeon_framebuffer *rfb = NULL; @@ -102,16 +128,39 @@ void radeon_lock_hardware(radeonContextPtr radeon) } if (!radeon->radeonScreen->driScreen->dri2.enabled) { + if (ATOMIC_INC_AND_FETCH(radeon->dri.hwLockCount) > 1) + { +#ifndef NDEBUG + if ( RADEON_DEBUG & DEBUG_SANITY ) + fprintf(stderr, "*** %d times of recursive call to %s ***\n" + "Original call was from %s (file: %s line: %d)\n" + "Now call is coming from %s (file: %s line: %d)\n" + , radeon->dri.hwLockCount, __FUNCTION__ + , ldebug.function, ldebug.file, ldebug.line + , function, file, line + ); +#endif + return; + } DRM_CAS(radeon->dri.hwLock, radeon->dri.hwContext, (DRM_LOCK_HELD | radeon->dri.hwContext), ret ); if (ret) radeonGetLock(radeon, 0); +#ifndef NDEBUG + ldebug.function = function; + ldebug.file = file; + ldebug.line = line; +#endif } } void radeon_unlock_hardware(radeonContextPtr radeon) { if (!radeon->radeonScreen->driScreen->dri2.enabled) { + if (ATOMIC_DEC_AND_FETCH(radeon->dri.hwLockCount) > 0) + { + return; + } DRM_UNLOCK( radeon->dri.fd, radeon->dri.hwLock, radeon->dri.hwContext ); diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.h b/src/mesa/drivers/dri/radeon/radeon_lock.h index 2817709eed..da5a5b4371 100644 --- a/src/mesa/drivers/dri/radeon/radeon_lock.h +++ b/src/mesa/drivers/dri/radeon/radeon_lock.h @@ -48,12 +48,22 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags); -void radeon_lock_hardware(radeonContextPtr rmesa); +void radeon_lock_hardware(radeonContextPtr rmesa +#ifndef NDEBUG + ,const char* function + ,const char* file + ,const int line +#endif + ); void radeon_unlock_hardware(radeonContextPtr rmesa); /* Lock the hardware and validate our state. */ +#ifdef NDEBUG #define LOCK_HARDWARE( rmesa ) radeon_lock_hardware(rmesa) +#else +#define LOCK_HARDWARE( rmesa ) radeon_lock_hardware(rmesa, __FUNCTION__, __FILE__, __LINE__) +#endif #define UNLOCK_HARDWARE( rmesa ) radeon_unlock_hardware(rmesa) #endif -- cgit v1.2.3 From ef3ad412c746203727324edbd4cbe04079332d7c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 12 Aug 2009 11:12:46 -0700 Subject: radeon: Minor warnings cleanup. --- src/mesa/drivers/dri/r600/r600_context.h | 11 +++++++++++ src/mesa/drivers/dri/r600/r700_clear.c | 1 + src/mesa/drivers/dri/r600/r700_oglprog.c | 2 +- src/mesa/drivers/dri/r600/r700_state.c | 2 -- src/mesa/drivers/dri/radeon/radeon_common_context.c | 7 +++---- 5 files changed, 16 insertions(+), 7 deletions(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h index fbb8164af5..30ddce682c 100644 --- a/src/mesa/drivers/dri/r600/r600_context.h +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -158,6 +158,17 @@ extern GLboolean r700InitChipObject(context_t *context); extern GLboolean r700SendContextStates(context_t *context); extern GLboolean r700SendViewportState(context_t *context, int id); extern GLboolean r700SendRenderTargetState(context_t *context, int id); +extern GLboolean r700SendTextureState(context_t *context); +extern GLboolean r700SendDepthTargetState(context_t *context); +extern GLboolean r700SendUCPState(context_t *context); +extern GLboolean r700SendFSState(context_t *context); +extern void r700EmitState(GLcontext * ctx); + +extern GLboolean r700SyncSurf(context_t *context, + struct radeon_bo *pbo, + uint32_t read_domain, + uint32_t write_domain, + uint32_t sync_type); extern int r700SetupStreams(GLcontext * ctx); extern void r700SetupVTXConstants(GLcontext * ctx, diff --git a/src/mesa/drivers/dri/r600/r700_clear.c b/src/mesa/drivers/dri/r600/r700_clear.c index e84be38622..05d4af331e 100644 --- a/src/mesa/drivers/dri/r600/r700_clear.c +++ b/src/mesa/drivers/dri/r600/r700_clear.c @@ -31,6 +31,7 @@ #include "main/imports.h" #include "main/mtypes.h" #include "main/enums.h" +#include "swrast/swrast.h" #include "radeon_lock.h" #include "r600_context.h" diff --git a/src/mesa/drivers/dri/r600/r700_oglprog.c b/src/mesa/drivers/dri/r600/r700_oglprog.c index 36de143b1a..c49b90c1cc 100644 --- a/src/mesa/drivers/dri/r600/r700_oglprog.c +++ b/src/mesa/drivers/dri/r600/r700_oglprog.c @@ -33,6 +33,7 @@ #include "tnl/tnl.h" #include "r600_context.h" +#include "r600_emit.h" #include "r700_oglprog.h" #include "r700_fragprog.h" @@ -87,7 +88,6 @@ static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog) { struct r700_vertex_program * vp; struct r700_fragment_program * fp; - context_t *context = R700_CONTEXT(ctx); switch (prog->Target) { diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index e95f52400a..7f54cf9f56 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -179,8 +179,6 @@ static void r700FetchStateParameter(GLcontext * ctx, const gl_state_index state[STATE_LENGTH], GLfloat * value) { - context_t *context = R700_CONTEXT(ctx); - /* TODO */ } diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index d68aa2bd62..3a9468e88d 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -295,11 +295,10 @@ void radeonDestroyContext(__DRIcontextPrivate *driContextPriv ) GET_CURRENT_CONTEXT(ctx); radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate; radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL; - - /* +r6/r7 */ - __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) /* +r6/r7 */ + __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private); - /* --------- */ +#endif if (radeon == current) { radeon_firevertices(radeon); -- cgit v1.2.3 From b6a4f5f1d3f0a79b4502d0b30d8b259e8189b70f Mon Sep 17 00:00:00 2001 From: Pauli Nieminen Date: Tue, 4 Aug 2009 14:42:20 +0300 Subject: r200: Prevent TexGenMatrix from leaking when destroying r200 context. Signed-off-by: Pauli Nieminen --- src/mesa/drivers/dri/r200/r200_context.c | 12 ++++++++++++ src/mesa/drivers/dri/radeon/radeon_screen.c | 10 +++++----- 2 files changed, 17 insertions(+), 5 deletions(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c index 9a92a32079..8cb287de26 100644 --- a/src/mesa/drivers/dri/r200/r200_context.c +++ b/src/mesa/drivers/dri/r200/r200_context.c @@ -500,3 +500,15 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, } +void r200DestroyContext( __DRIcontextPrivate *driContextPriv ) +{ + int i; + r200ContextPtr rmesa = (r200ContextPtr)driContextPriv->driverPrivate; + if (rmesa) + { + for ( i = 0 ; i < R200_MAX_TEXTURE_UNITS ; i++ ) { + _math_matrix_dtr( &rmesa->TexGenMatrix[i] ); + } + } + radeonDestroyContext(driContextPriv); +} diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 507a620e68..89bb31ea18 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -1596,11 +1596,6 @@ static GLboolean radeonCreateContext(const __GLcontextModes * glVisual, return r300CreateContext(glVisual, driContextPriv, sharedContextPriv); #endif -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) - if (IS_R200_CLASS(screen)) - return r200CreateContext(glVisual, driContextPriv, sharedContextPriv); -#endif - #if !RADEON_COMMON (void)screen; return r100CreateContext(glVisual, driContextPriv, sharedContextPriv); @@ -1800,8 +1795,13 @@ getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo ) const struct __DriverAPIRec driDriverAPI = { .InitScreen = radeonInitScreen, .DestroyScreen = radeonDestroyScreen, +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) + .CreateContext = r200CreateContext, + .DestroyContext = r200DestroyContext, +#else .CreateContext = radeonCreateContext, .DestroyContext = radeonDestroyContext, +#endif .CreateBuffer = radeonCreateBuffer, .DestroyBuffer = radeonDestroyBuffer, .SwapBuffers = radeonSwapBuffers, -- cgit v1.2.3 From 2f6675b8160c5fa2e6e9b5642c133fd2843a7508 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Aug 2009 14:49:07 -0400 Subject: r600: clean up Create/DestroyContext --- src/mesa/drivers/dri/r600/r600_context.c | 2 ++ src/mesa/drivers/dri/radeon/radeon_common_context.c | 11 +---------- src/mesa/drivers/dri/radeon/radeon_screen.c | 8 +++----- 3 files changed, 6 insertions(+), 15 deletions(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index eacf811282..7009374b0c 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -395,6 +395,8 @@ r600DestroyContext (__DRIcontextPrivate * driContextPriv) if (context) FREE(context->hw.pStateList); + + radeonDestroyContext(driContextPriv); } diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index 3a9468e88d..f71dc1cb23 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -306,16 +306,7 @@ void radeonDestroyContext(__DRIcontextPrivate *driContextPriv ) } assert(radeon); - if (radeon) - { - -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) /* +r6/r7 */ - if (IS_R600_CLASS(screen)) - { - r600DestroyContext(driContextPriv); - } -#endif - + if (radeon) { if (radeon->dma.current) { rcommonFlushCmdBuf( radeon, __FUNCTION__ ); } diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 89bb31ea18..7b759661ca 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -1586,11 +1586,6 @@ static GLboolean radeonCreateContext(const __GLcontextModes * glVisual, { __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private); -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) - if (IS_R600_CLASS(screen)) - return r600CreateContext(glVisual, driContextPriv, sharedContextPriv); -#endif - #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) if (IS_R300_CLASS(screen)) return r300CreateContext(glVisual, driContextPriv, sharedContextPriv); @@ -1798,6 +1793,9 @@ const struct __DriverAPIRec driDriverAPI = { #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) .CreateContext = r200CreateContext, .DestroyContext = r200DestroyContext, +#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) + .CreateContext = r600CreateContext, + .DestroyContext = r600DestroyContext, #else .CreateContext = radeonCreateContext, .DestroyContext = radeonDestroyContext, -- cgit v1.2.3 From a245c05dd3a1ca48204dd84252e6964aba91d4df Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Aug 2009 15:38:45 -0400 Subject: r600: fix warning --- src/mesa/drivers/dri/radeon/radeon_bo_legacy.c | 4 ++-- src/mesa/drivers/dri/radeon/radeon_bo_legacy.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c index 6d6ae5d3b2..d6d22cb4c3 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c +++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c @@ -932,7 +932,7 @@ unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo) * Fake up a bo for things like texture image_override. * bo->offset already includes fb_location */ -struct bo_legacy *radeon_legacy_bo_alloc_fake(struct radeon_bo_manager *bom, +struct radeon_bo *radeon_legacy_bo_alloc_fake(struct radeon_bo_manager *bom, int size, uint32_t offset) { @@ -954,6 +954,6 @@ struct bo_legacy *radeon_legacy_bo_alloc_fake(struct radeon_bo_manager *bom, boml->nhandle = bo->base.handle + 1; } radeon_bo_ref(&(bo->base)); - return bo; + return &(bo->base); } diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h index b57d6df9aa..455adebc09 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h +++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h @@ -42,7 +42,7 @@ struct radeon_bo_manager *radeon_bo_manager_legacy_ctor(struct radeon_screen *sc void radeon_bo_manager_legacy_dtor(struct radeon_bo_manager *bom); void radeon_bo_legacy_texture_age(struct radeon_bo_manager *bom); unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo); -struct bo_legacy *radeon_legacy_bo_alloc_fake(struct radeon_bo_manager *bom, +struct radeon_bo *radeon_legacy_bo_alloc_fake(struct radeon_bo_manager *bom, int size, uint32_t offset); -- cgit v1.2.3 From d534648d904da71e604babcf408c00eae7922d16 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sun, 26 Jul 2009 12:57:41 +0200 Subject: radeon: constify some parameters --- src/mesa/drivers/dri/radeon/radeon_dma.c | 10 +++++----- src/mesa/drivers/dri/radeon/radeon_dma.h | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.c b/src/mesa/drivers/dri/radeon/radeon_dma.c index 48114a0012..dbab7b1000 100644 --- a/src/mesa/drivers/dri/radeon/radeon_dma.c +++ b/src/mesa/drivers/dri/radeon/radeon_dma.c @@ -52,7 +52,7 @@ do { \ } while (0) #endif -static void radeonEmitVec4(uint32_t *out, GLvoid * data, int stride, int count) +static void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count) { int i; @@ -70,7 +70,7 @@ static void radeonEmitVec4(uint32_t *out, GLvoid * data, int stride, int count) } } -void radeonEmitVec8(uint32_t *out, GLvoid * data, int stride, int count) +void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count) { int i; @@ -89,7 +89,7 @@ void radeonEmitVec8(uint32_t *out, GLvoid * data, int stride, int count) } } -void radeonEmitVec12(uint32_t *out, GLvoid * data, int stride, int count) +void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count) { int i; @@ -110,7 +110,7 @@ void radeonEmitVec12(uint32_t *out, GLvoid * data, int stride, int count) } } -static void radeonEmitVec16(uint32_t *out, GLvoid * data, int stride, int count) +static void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count) { int i; @@ -132,7 +132,7 @@ static void radeonEmitVec16(uint32_t *out, GLvoid * data, int stride, int count) } void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, - GLvoid * data, int size, int stride, int count) + const GLvoid * data, int size, int stride, int count) { radeonContextPtr rmesa = RADEON_CONTEXT(ctx); uint32_t *out; diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.h b/src/mesa/drivers/dri/radeon/radeon_dma.h index 06e388fc1d..581847eca3 100644 --- a/src/mesa/drivers/dri/radeon/radeon_dma.h +++ b/src/mesa/drivers/dri/radeon/radeon_dma.h @@ -33,11 +33,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #ifndef RADEON_DMA_H #define RADEON_DMA_H -void radeonEmitVec8(uint32_t *out, GLvoid * data, int stride, int count); -void radeonEmitVec12(uint32_t *out, GLvoid * data, int stride, int count); +void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count); +void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count); void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, - GLvoid * data, int size, int stride, int count); + const GLvoid * data, int size, int stride, int count); void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size); void radeonAllocDmaRegion(radeonContextPtr rmesa, -- cgit v1.2.3 From 2233ac61e1a690f47a7d4a9d0894c1c20c9c330f Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 1 Aug 2009 15:11:57 +0200 Subject: radeon: export emitvec* functions --- src/mesa/drivers/dri/radeon/radeon_dma.c | 4 ++-- src/mesa/drivers/dri/radeon/radeon_dma.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.c b/src/mesa/drivers/dri/radeon/radeon_dma.c index dbab7b1000..7144abbe00 100644 --- a/src/mesa/drivers/dri/radeon/radeon_dma.c +++ b/src/mesa/drivers/dri/radeon/radeon_dma.c @@ -52,7 +52,7 @@ do { \ } while (0) #endif -static void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count) +void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count) { int i; @@ -110,7 +110,7 @@ void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count) } } -static void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count) +void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count) { int i; diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.h b/src/mesa/drivers/dri/radeon/radeon_dma.h index 581847eca3..55509ed00c 100644 --- a/src/mesa/drivers/dri/radeon/radeon_dma.h +++ b/src/mesa/drivers/dri/radeon/radeon_dma.h @@ -33,8 +33,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #ifndef RADEON_DMA_H #define RADEON_DMA_H +void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count); void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count); void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count); +void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count); void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, const GLvoid * data, int size, int stride, int count); -- cgit v1.2.3 From 6bcbeb02d61442919a2ae4dfd642547e5f7b1439 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 11 Jul 2009 15:00:52 +0200 Subject: radeon: add VBO support (not enabled yet) --- src/mesa/drivers/dri/r300/Makefile | 3 +- .../drivers/dri/radeon/radeon_buffer_objects.c | 217 +++++++++++++++++++++ .../drivers/dri/radeon/radeon_buffer_objects.h | 52 +++++ 3 files changed, 271 insertions(+), 1 deletion(-) create mode 100644 src/mesa/drivers/dri/radeon/radeon_buffer_objects.c create mode 100644 src/mesa/drivers/dri/radeon/radeon_buffer_objects.h (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 95c6765bc4..2390d1896a 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -36,7 +36,8 @@ RADEON_COMMON_SOURCES = \ radeon_cs_legacy.c \ radeon_mipmap_tree.c \ radeon_span.c \ - radeon_fbo.c + radeon_fbo.c \ + radeon_buffer_objects.c DRIVER_SOURCES = \ radeon_screen.c \ diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c new file mode 100644 index 0000000000..1420636863 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c @@ -0,0 +1,217 @@ +/* + * Copyright 2009 Maciej Cencora + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_buffer_objects.h" + +#include "main/imports.h" +#include "main/mtypes.h" +#include "main/bufferobj.h" + +#include "radeon_common.h" + +struct radeon_buffer_object * +get_radeon_buffer_object(struct gl_buffer_object *obj) +{ + return (struct radeon_buffer_object *) obj; +} + +static struct gl_buffer_object * +radeonNewBufferObject(GLcontext * ctx, + GLuint name, + GLenum target) +{ + struct radeon_buffer_object *obj = CALLOC_STRUCT(radeon_buffer_object); + + _mesa_initialize_buffer_object(&obj->Base, name, target); + + obj->bo = NULL; + + return &obj->Base; +} + +/** + * Called via glDeleteBuffersARB(). + */ +static void +radeonDeleteBufferObject(GLcontext * ctx, + struct gl_buffer_object *obj) +{ + struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj); + + if (obj->Pointer) { + radeon_bo_unmap(radeon_obj->bo); + } + + if (radeon_obj->bo) { + radeon_bo_unref(radeon_obj->bo); + } + + _mesa_free(radeon_obj); +} + + +/** + * Allocate space for and store data in a buffer object. Any data that was + * previously stored in the buffer object is lost. If data is NULL, + * memory will be allocated, but no copy will occur. + * Called via glBufferDataARB(). + */ +static void +radeonBufferData(GLcontext * ctx, + GLenum target, + GLsizeiptrARB size, + const GLvoid * data, + GLenum usage, + struct gl_buffer_object *obj) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj); + + radeon_obj->Base.Size = size; + radeon_obj->Base.Usage = usage; + + if (radeon_obj->bo != NULL) { + radeon_bo_unref(radeon_obj->bo); + radeon_obj->bo = NULL; + } + + if (size != 0) { + radeon_obj->bo = radeon_bo_open(radeon->radeonScreen->bom, + 0, + size, + 32, + RADEON_GEM_DOMAIN_GTT, + 0); + + if (data != NULL) { + radeon_bo_map(radeon_obj->bo, GL_TRUE); + + _mesa_memcpy(radeon_obj->bo->ptr, data, size); + + radeon_bo_unmap(radeon_obj->bo); + } + } +} + +/** + * Replace data in a subrange of buffer object. If the data range + * specified by size + offset extends beyond the end of the buffer or + * if data is NULL, no copy is performed. + * Called via glBufferSubDataARB(). + */ +static void +radeonBufferSubData(GLcontext * ctx, + GLenum target, + GLintptrARB offset, + GLsizeiptrARB size, + const GLvoid * data, + struct gl_buffer_object *obj) +{ + struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj); + + radeon_bo_map(radeon_obj->bo, GL_TRUE); + + _mesa_memcpy(radeon_obj->bo->ptr + offset, data, size); + + radeon_bo_unmap(radeon_obj->bo); +} + +/** + * Called via glGetBufferSubDataARB() + */ +static void +radeonGetBufferSubData(GLcontext * ctx, + GLenum target, + GLintptrARB offset, + GLsizeiptrARB size, + GLvoid * data, + struct gl_buffer_object *obj) +{ + struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj); + + radeon_bo_map(radeon_obj->bo, GL_FALSE); + + _mesa_memcpy(data, radeon_obj->bo->ptr + offset, size); + + radeon_bo_unmap(radeon_obj->bo); +} + +/** + * Called via glMapBufferARB() + */ +static void * +radeonMapBuffer(GLcontext * ctx, + GLenum target, + GLenum access, + struct gl_buffer_object *obj) +{ + struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj); + + if (access == GL_WRITE_ONLY_ARB) { + radeonFlush(ctx); + } + + if (radeon_obj->bo == NULL) { + obj->Pointer = NULL; + return NULL; + } + + radeon_bo_map(radeon_obj->bo, access == GL_WRITE_ONLY_ARB); + + return obj->Pointer = radeon_obj->bo->ptr; +} + + +/** + * Called via glUnmapBufferARB() + */ +static GLboolean +radeonUnmapBuffer(GLcontext * ctx, + GLenum target, + struct gl_buffer_object *obj) +{ + struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj); + + if (radeon_obj->bo != NULL) { + radeon_bo_unmap(radeon_obj->bo); + obj->Pointer = NULL; + } + + return GL_TRUE; +} + +void +radeonInitBufferObjectFuncs(struct dd_function_table *functions) +{ + functions->NewBufferObject = radeonNewBufferObject; + functions->DeleteBuffer = radeonDeleteBufferObject; + functions->BufferData = radeonBufferData; + functions->BufferSubData = radeonBufferSubData; + functions->GetBufferSubData = radeonGetBufferSubData; + functions->MapBuffer = radeonMapBuffer; + functions->UnmapBuffer = radeonUnmapBuffer; +} diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.h b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.h new file mode 100644 index 0000000000..d681960825 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.h @@ -0,0 +1,52 @@ +/* + * Copyright 2009 Maciej Cencora + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_BUFFER_OBJECTS_H +#define RADEON_BUFFER_OBJECTS_H + +#include "main/mtypes.h" + +struct radeon_bo; + +/** + * Radeon vertex/pixel buffer object, derived from Mesa's gl_buffer_object. + */ +struct radeon_buffer_object +{ + struct gl_buffer_object Base; + struct radeon_bo *bo; +}; + +struct radeon_buffer_object * +get_radeon_buffer_object(struct gl_buffer_object *obj); + +/** + * Hook the bufferobject implementation into mesa: + */ +void radeonInitBufferObjectFuncs(struct dd_function_table *functions); + +#endif -- cgit v1.2.3 From e029c91fd32f934161dad05ffc46a949c70c79db Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 1 Aug 2009 12:00:35 +0200 Subject: radeon: handle debug versions of radeon_bo_open --- src/mesa/drivers/dri/radeon/radeon_buffer_objects.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c index 1420636863..69556923b6 100644 --- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c +++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c @@ -100,13 +100,22 @@ radeonBufferData(GLcontext * ctx, } if (size != 0) { +#ifdef RADEON_DEBUG_BO + radeon_obj->bo = radeon_bo_open(radeon->radeonScreen->bom, + 0, + size, + 32, + RADEON_GEM_DOMAIN_GTT, + 0, + "Radeon Named object"); +#else radeon_obj->bo = radeon_bo_open(radeon->radeonScreen->bom, 0, size, 32, RADEON_GEM_DOMAIN_GTT, 0); - +#endif if (data != NULL) { radeon_bo_map(radeon_obj->bo, GL_TRUE); -- cgit v1.2.3 From f2daded8123c0d82e4cd29710a5b2dfcc99068a1 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 15 Aug 2009 21:34:17 +1000 Subject: radeon space: realign with drm space check code --- src/mesa/drivers/dri/radeon/radeon_cs_space_drm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_space_drm.c b/src/mesa/drivers/dri/radeon/radeon_cs_space_drm.c index 5a8df7bb8c..89cbbb5a6b 100644 --- a/src/mesa/drivers/dri/radeon/radeon_cs_space_drm.c +++ b/src/mesa/drivers/dri/radeon/radeon_cs_space_drm.c @@ -82,7 +82,7 @@ static inline int radeon_cs_setup_bo(struct radeon_cs_space_check *sc, struct ra if (write_domain == RADEON_GEM_DOMAIN_VRAM) { sizes->op_read -= bo->size; sizes->op_vram_write += bo->size; - } else if (write_domain == RADEON_GEM_DOMAIN_VRAM) { + } else if (write_domain == RADEON_GEM_DOMAIN_GTT) { sizes->op_read -= bo->size; sizes->op_gart_write += bo->size; } -- cgit v1.2.3 From 8d60c0b7514dad075e5d46448614e8e8c5c230a7 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Thu, 11 Jun 2009 16:00:03 +0200 Subject: r300: clear not_flushed OQ list after flush --- src/mesa/drivers/dri/r300/r300_ioctl.c | 13 ++++++++++++- src/mesa/drivers/dri/radeon/radeon_buffer_objects.c | 2 +- src/mesa/drivers/dri/radeon/radeon_common.c | 4 ++-- 3 files changed, 15 insertions(+), 4 deletions(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 7558f9e225..da801f42e4 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/imports.h" #include "main/macros.h" #include "main/context.h" +#include "main/simple_list.h" #include "swrast/swrast.h" #include "radeon_common.h" @@ -56,6 +57,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_reg.h" #include "r300_emit.h" #include "r300_context.h" +#include "r300_queryobj.h" #include "vblank.h" @@ -753,10 +755,19 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask) } } +static void r300Flush(GLcontext *ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + radeonFlush(ctx); + + make_empty_list(&r300->query.not_flushed_head); +} + void r300InitIoctlFuncs(struct dd_function_table *functions) { functions->Clear = r300Clear; functions->Finish = radeonFinish; - functions->Flush = radeonFlush; + functions->Flush = r300Flush; } diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c index 69556923b6..827f1d253d 100644 --- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c +++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c @@ -181,7 +181,7 @@ radeonMapBuffer(GLcontext * ctx, struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj); if (access == GL_WRITE_ONLY_ARB) { - radeonFlush(ctx); + ctx->Driver.Flush(ctx); } if (radeon_obj->bo == NULL) { diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index 330c2c8a86..0614c89459 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -880,7 +880,7 @@ void radeon_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei he if (!radeon->meta.internal_viewport_call && ctx->DrawBuffer->Name == 0) { if (radeon->is_front_buffer_rendering) { - radeonFlush(ctx); + ctx->Driver.Flush(ctx); } radeon_update_renderbuffers(driContext, driContext->driDrawablePriv); if (driContext->driDrawablePriv != driContext->driReadablePriv) @@ -1211,7 +1211,7 @@ void rcommonInitCmdBuf(radeonContextPtr rmesa) rmesa->cmdbuf.size = size; radeon_cs_space_set_flush(rmesa->cmdbuf.cs, - (void (*)(void *))radeonFlush, rmesa->glCtx); + (void (*)(void *))rmesa->glCtx->Driver.Flush, rmesa->glCtx); if (!rmesa->radeonScreen->kernel_mm) { radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, rmesa->radeonScreen->texSize[0]); -- cgit v1.2.3 From d2b1b9e8d5407e87fc2a6276568088115c28029f Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 15 Aug 2009 15:10:29 +0200 Subject: radeon: add flag for drm OQ support --- src/mesa/drivers/dri/radeon/radeon_screen.c | 12 +++++++----- src/mesa/drivers/dri/radeon/radeon_screen.h | 1 + 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 7b759661ca..c8d491621a 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -999,6 +999,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->drmSupportsPointSprites = (sPriv->drm_version.minor >= 13); screen->drmSupportsCubeMapsR100 = (sPriv->drm_version.minor >= 15); screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25); + screen->drmSupportsOcclusionQueries = (sPriv->drm_version.minor >= 30); } ret = radeon_set_screen_flags(screen, dri_priv->deviceID); @@ -1094,7 +1095,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) /* +r6/r7 */ if(screen->chip_family >= CHIP_FAMILY_R600) { - if (ret) + if (ret) { FREE( screen ); fprintf(stderr, "Unable to get fb location need newer drm\n"); @@ -1107,18 +1108,18 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) } else { - if (ret) + if (ret) { if (screen->chip_family < CHIP_FAMILY_RS600 && !screen->kernel_mm) screen->fbLocation = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16; - else + else { FREE( screen ); fprintf(stderr, "Unable to get fb location need newer drm\n"); return NULL; } - } - else + } + else { screen->fbLocation = (temp & 0xffff) << 16; } @@ -1298,6 +1299,7 @@ radeonCreateScreen2(__DRIscreenPrivate *sPriv) screen->drmSupportsPointSprites = 1; screen->drmSupportsCubeMapsR100 = 1; screen->drmSupportsVertexProgram = 1; + screen->drmSupportsOcclusionQueries = 1; screen->irq = 1; ret = radeonGetParam(sPriv, RADEON_PARAM_DEVICE_ID, &device_id); diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h index 2a2f6b1b0b..f0dd46b0b1 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.h +++ b/src/mesa/drivers/dri/radeon/radeon_screen.h @@ -99,6 +99,7 @@ typedef struct radeon_screen { GLboolean drmSupportsPointSprites; /* need radeon kernel module >= 1.13 */ GLboolean drmSupportsCubeMapsR100; /* need radeon kernel module >= 1.15 */ GLboolean drmSupportsVertexProgram; /* need radeon kernel module >= 1.25 */ + GLboolean drmSupportsOcclusionQueries; /* need radeon kernel module >= 1.30 */ GLboolean depthHasSurface; /* Configuration cache with default values for all contexts */ -- cgit v1.2.3 From 743c4af5cdc4c8634d1a26055a8d70f933c88024 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 17 Aug 2009 19:09:15 +1000 Subject: radeon: turn off bo debugging --- src/mesa/drivers/dri/radeon/radeon_common_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h index f8e1a25c9f..549395a793 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h @@ -596,7 +596,7 @@ extern int RADEON_DEBUG; #ifndef HAVE_LIBDRM_RADEON #ifndef RADEON_DEBUG_BO -#define RADEON_DEBUG_BO 1 +#define RADEON_DEBUG_BO 0 #endif #endif -- cgit v1.2.3 From c3380ded10200f2df0cfba4abbe9a9eb892f7cbb Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 17 Aug 2009 15:42:19 -0400 Subject: radeon: remove RADEON_DEBUG_BO stuff This stuff was a vestige of the r600 bring up and now mostly serves to periodically break the build. --- src/mesa/drivers/dri/r200/r200_cmdbuf.c | 6 -- src/mesa/drivers/dri/r600/r600_emit.c | 23 ++---- src/mesa/drivers/dri/radeon/radeon_bo_drm.h | 26 ------ src/mesa/drivers/dri/radeon/radeon_bo_legacy.c | 61 ++------------ .../drivers/dri/radeon/radeon_buffer_objects.c | 11 +-- .../drivers/dri/radeon/radeon_common_context.c | 96 +--------------------- .../drivers/dri/radeon/radeon_common_context.h | 6 -- src/mesa/drivers/dri/radeon/radeon_dma.c | 6 -- src/mesa/drivers/dri/radeon/radeon_fbo.c | 10 --- src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c | 8 -- 10 files changed, 16 insertions(+), 237 deletions(-) (limited to 'src/mesa/drivers/dri/radeon') diff --git a/src/mesa/drivers/dri/r200/r200_cmdbuf.c b/src/mesa/drivers/dri/r200/r200_cmdbuf.c index 5d0f367b38..14d6bc19c9 100644 --- a/src/mesa/drivers/dri/r200/r200_cmdbuf.c +++ b/src/mesa/drivers/dri/r200/r200_cmdbuf.c @@ -240,15 +240,9 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa, radeonEmitState(&rmesa->radeon); -#ifdef RADEON_DEBUG_BO - rmesa->radeon.tcl.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom, - 0, R200_ELT_BUF_SZ, 4, - RADEON_GEM_DOMAIN_GTT, 0, "ELT"); -#else rmesa->radeon.tcl.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom, 0, R200_ELT_BUF_SZ, 4, RADEON_GEM_DOMAIN_GTT, 0); -#endif rmesa->radeon.tcl.elt_dma_offset = 0; rmesa->tcl.elt_used = min_nr * 2; diff --git a/src/mesa/drivers/dri/r600/r600_emit.c b/src/mesa/drivers/dri/r600/r600_emit.c index 685f7fe473..b695ed9583 100644 --- a/src/mesa/drivers/dri/r600/r600_emit.c +++ b/src/mesa/drivers/dri/r600/r600_emit.c @@ -63,26 +63,15 @@ GLboolean r600EmitShader(GLcontext * ctx, struct radeon_bo * pbo; uint32_t *out; -shader_again_alloc: -#ifdef RADEON_DEBUG_BO - pbo = radeon_bo_open(radeonctx->radeonScreen->bom, - 0, - sizeinDWORD * 4, - 256, - RADEON_GEM_DOMAIN_GTT, - 0, - szShaderUsage); -#else +shader_again_alloc: pbo = radeon_bo_open(radeonctx->radeonScreen->bom, - 0, - sizeinDWORD * 4, - 256, + 0, + sizeinDWORD * 4, + 256, RADEON_GEM_DOMAIN_GTT, - 0); -#endif /* RADEON_DEBUG_BO */ + 0); - if (!pbo) - { + if (!pbo) { rcommonFlushCmdBuf(radeonctx, __FUNCTION__); goto shader_again_alloc; } diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_drm.h b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h index d52fb017d8..8789e3ab09 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bo_drm.h +++ b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h @@ -34,10 +34,6 @@ #include //#include "radeon_track.h" -#ifndef RADEON_DEBUG_BO -#define RADEON_DEBUG_BO 0 -#endif - /* bo object */ #define RADEON_BO_FLAGS_MACRO_TILE 1 #define RADEON_BO_FLAGS_MICRO_TILE 2 @@ -61,22 +57,12 @@ struct radeon_bo { /* bo functions */ struct radeon_bo_funcs { -#ifdef RADEON_DEBUG_BO - struct radeon_bo *(*bo_open)(struct radeon_bo_manager *bom, - uint32_t handle, - uint32_t size, - uint32_t alignment, - uint32_t domains, - uint32_t flags, - char * szBufUsage); -#else struct radeon_bo *(*bo_open)(struct radeon_bo_manager *bom, uint32_t handle, uint32_t size, uint32_t alignment, uint32_t domains, uint32_t flags); -#endif /* RADEON_DEBUG_BO */ void (*bo_ref)(struct radeon_bo *bo); struct radeon_bo *(*bo_unref)(struct radeon_bo *bo); int (*bo_map)(struct radeon_bo *bo, int write); @@ -114,20 +100,13 @@ static inline struct radeon_bo *_radeon_bo_open(struct radeon_bo_manager *bom, uint32_t alignment, uint32_t domains, uint32_t flags, -#ifdef RADEON_DEBUG_BO - char * szBufUsage, -#endif /* RADEON_DEBUG_BO */ const char *file, const char *func, int line) { struct radeon_bo *bo; -#ifdef RADEON_DEBUG_BO - bo = bom->funcs->bo_open(bom, handle, size, alignment, domains, flags, szBufUsage); -#else bo = bom->funcs->bo_open(bom, handle, size, alignment, domains, flags); -#endif /* RADEON_DEBUG_BO */ #ifdef RADEON_BO_TRACK if (bo) { @@ -210,13 +189,8 @@ static inline int radeon_bo_is_static(struct radeon_bo *bo) return 0; } -#ifdef RADEON_DEBUG_BO -#define radeon_bo_open(bom, h, s, a, d, f, u)\ - _radeon_bo_open(bom, h, s, a, d, f, u, __FILE__, __FUNCTION__, __LINE__) -#else #define radeon_bo_open(bom, h, s, a, d, f)\ _radeon_bo_open(bom, h, s, a, d, f, __FILE__, __FUNCTION__, __LINE__) -#endif /* RADEON_DEBUG_BO */ #define radeon_bo_ref(bo)\ _radeon_bo_ref(bo, __FILE__, __FUNCTION__, __LINE__) #define radeon_bo_unref(bo)\ diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c index d6d22cb4c3..5575da6971 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c +++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c @@ -69,9 +69,6 @@ struct bo_legacy { void *ptr; struct bo_legacy *next, *prev; struct bo_legacy *pnext, *pprev; -#ifdef RADEON_DEBUG_BO - char szBufUsage[16]; -#endif /* RADEON_DEBUG_BO */ }; struct bo_manager_legacy { @@ -289,12 +286,7 @@ static struct bo_legacy *bo_allocate(struct bo_manager_legacy *boml, uint32_t size, uint32_t alignment, uint32_t domains, -#ifdef RADEON_DEBUG_BO - uint32_t flags, - char * szBufUsage) -#else uint32_t flags) -#endif /* RADEON_DEBUG_BO */ { struct bo_legacy *bo_legacy; static int pgsize; @@ -327,10 +319,6 @@ static struct bo_legacy *bo_allocate(struct bo_manager_legacy *boml, bo_legacy->next->prev = bo_legacy; } -#ifdef RADEON_DEBUG_BO - sprintf(bo_legacy->szBufUsage, "%s", szBufUsage); -#endif /* RADEON_DEBUG_BO */ - return bo_legacy; } @@ -429,12 +417,7 @@ static struct radeon_bo *bo_open(struct radeon_bo_manager *bom, uint32_t size, uint32_t alignment, uint32_t domains, -#ifdef RADEON_DEBUG_BO - uint32_t flags, - char * szBufUsage) -#else uint32_t flags) -#endif /* RADEON_DEBUG_BO */ { struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom; struct bo_legacy *bo_legacy; @@ -451,11 +434,7 @@ static struct radeon_bo *bo_open(struct radeon_bo_manager *bom, } return NULL; } -#ifdef RADEON_DEBUG_BO - bo_legacy = bo_allocate(boml, size, alignment, domains, flags, szBufUsage); -#else bo_legacy = bo_allocate(boml, size, alignment, domains, flags); -#endif /* RADEON_DEBUG_BO */ bo_legacy->static_bo = 0; r = legacy_new_handle(boml, &bo_legacy->base.handle); if (r) { @@ -713,14 +692,8 @@ int radeon_bo_legacy_validate(struct radeon_bo *bo, int retries = 0; if (bo_legacy->map_count) { -#ifdef RADEON_DEBUG_BO - fprintf(stderr, "bo(%p, %d, %s) is mapped (%d) can't valide it.\n", - bo, bo->size, bo_legacy->szBufUsage, bo_legacy->map_count); -#else fprintf(stderr, "bo(%p, %d) is mapped (%d) can't valide it.\n", bo, bo->size, bo_legacy->map_count); -#endif /* RADEON_DEBUG_BO */ - return -EINVAL; } if (bo_legacy->static_bo || bo_legacy->validated) { @@ -792,21 +765,13 @@ void radeon_bo_manager_legacy_dtor(struct radeon_bo_manager *bom) } static struct bo_legacy *radeon_legacy_bo_alloc_static(struct bo_manager_legacy *bom, - int size, -#ifdef RADEON_DEBUG_BO - uint32_t offset, - char * szBufUsage) -#else - uint32_t offset) -#endif /* RADEON_DEBUG_BO */ + int size, + uint32_t offset) { struct bo_legacy *bo; -#ifdef RADEON_DEBUG_BO - bo = bo_allocate(bom, size, 0, RADEON_GEM_DOMAIN_VRAM, 0, szBufUsage); -#else bo = bo_allocate(bom, size, 0, RADEON_GEM_DOMAIN_VRAM, 0); -#endif /* RADEON_DEBUG_BO */ + if (bo == NULL) return NULL; bo->static_bo = 1; @@ -867,11 +832,8 @@ struct radeon_bo_manager *radeon_bo_manager_legacy_ctor(struct radeon_screen *sc size = 4096*4096*4; /* allocate front */ -#ifdef RADEON_DEBUG_BO - bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->frontOffset, "FRONT BUF"); -#else bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->frontOffset); -#endif /* RADEON_DEBUG_BO */ + if (!bo) { radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom); return NULL; @@ -881,11 +843,8 @@ struct radeon_bo_manager *radeon_bo_manager_legacy_ctor(struct radeon_screen *sc } /* allocate back */ -#ifdef RADEON_DEBUG_BO - bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->backOffset, "BACK BUF"); -#else bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->backOffset); -#endif /* RADEON_DEBUG_BO */ + if (!bo) { radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom); return NULL; @@ -895,11 +854,8 @@ struct radeon_bo_manager *radeon_bo_manager_legacy_ctor(struct radeon_screen *sc } /* allocate depth */ -#ifdef RADEON_DEBUG_BO - bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->depthOffset, "Z BUF"); -#else bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->depthOffset); -#endif /* RADEON_DEBUG_BO */ + if (!bo) { radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom); return NULL; @@ -939,11 +895,8 @@ struct radeon_bo *radeon_legacy_bo_alloc_fake(struct radeon_bo_manager *bom, struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom; struct bo_legacy *bo; -#ifdef RADEON_DEBUG_BO - bo = bo_allocate(boml, size, 0, RADEON_GEM_DOMAIN_VRAM, 0, "fake bo"); -#else bo = bo_allocate(boml, size, 0, RADEON_GEM_DOMAIN_VRAM, 0); -#endif /* RADEON_DEBUG_BO */ + if (bo == NULL) return NULL; bo->static_bo = 1; diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c index 827f1d253d..e8ae51e6ea 100644 --- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c +++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c @@ -100,22 +100,13 @@ radeonBufferData(GLcontext * ctx, } if (size != 0) { -#ifdef RADEON_DEBUG_BO - radeon_obj->bo = radeon_bo_open(radeon->radeonScreen->bom, - 0, - size, - 32, - RADEON_GEM_DOMAIN_GTT, - 0, - "Radeon Named object"); -#else radeon_obj->bo = radeon_bo_open(radeon->radeonScreen->bom, 0, size, 32, RADEON_GEM_DOMAIN_GTT, 0); -#endif + if (data != NULL) { radeon_bo_map(radeon_obj->bo, GL_TRUE); diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index f71dc1cb23..c0abcbfa21 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -377,88 +377,48 @@ radeon_make_kernel_renderbuffer_current(radeonContextPtr radeon, if ((rb = (void *)draw->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) { if (!rb->bo) { -#ifdef RADEON_DEBUG_BO - rb->bo = radeon_bo_open(radeon->radeonScreen->bom, - radeon->radeonScreen->frontOffset, - 0, - 0, - RADEON_GEM_DOMAIN_VRAM, - 0, - "Front Buf"); -#else rb->bo = radeon_bo_open(radeon->radeonScreen->bom, radeon->radeonScreen->frontOffset, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); -#endif /* RADEON_DEBUG_BO */ } rb->cpp = radeon->radeonScreen->cpp; rb->pitch = radeon->radeonScreen->frontPitch * rb->cpp; } if ((rb = (void *)draw->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer)) { if (!rb->bo) { -#ifdef RADEON_DEBUG_BO rb->bo = radeon_bo_open(radeon->radeonScreen->bom, - radeon->radeonScreen->backOffset, - 0, - 0, - RADEON_GEM_DOMAIN_VRAM, - 0, - "Back Buf"); -#else - rb->bo = radeon_bo_open(radeon->radeonScreen->bom, radeon->radeonScreen->backOffset, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); -#endif /* RADEON_DEBUG_BO */ } rb->cpp = radeon->radeonScreen->cpp; rb->pitch = radeon->radeonScreen->backPitch * rb->cpp; } if ((rb = (void *)draw->base.Attachment[BUFFER_DEPTH].Renderbuffer)) { if (!rb->bo) { -#ifdef RADEON_DEBUG_BO - rb->bo = radeon_bo_open(radeon->radeonScreen->bom, - radeon->radeonScreen->depthOffset, - 0, - 0, - RADEON_GEM_DOMAIN_VRAM, - 0, - "Z Buf"); -#else rb->bo = radeon_bo_open(radeon->radeonScreen->bom, radeon->radeonScreen->depthOffset, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); -#endif /* RADEON_DEBUG_BO */ } rb->cpp = radeon->radeonScreen->cpp; rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp; } if ((rb = (void *)draw->base.Attachment[BUFFER_STENCIL].Renderbuffer)) { if (!rb->bo) { -#ifdef RADEON_DEBUG_BO - rb->bo = radeon_bo_open(radeon->radeonScreen->bom, - radeon->radeonScreen->depthOffset, - 0, - 0, - RADEON_GEM_DOMAIN_VRAM, - 0, - "Stencil Buf"); -#else rb->bo = radeon_bo_open(radeon->radeonScreen->bom, radeon->radeonScreen->depthOffset, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); -#endif /* RADEON_DEBUG_BO */ } rb->cpp = radeon->radeonScreen->cpp; rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp; @@ -481,16 +441,6 @@ radeon_make_renderbuffer_current(radeonContextPtr radeon, if ((rb = (void *)draw->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) { if (!rb->bo) { -#ifdef RADEON_DEBUG_BO - rb->bo = radeon_bo_open(radeon->radeonScreen->bom, - radeon->radeonScreen->frontOffset + - radeon->radeonScreen->fbLocation, - size, - 4096, - RADEON_GEM_DOMAIN_VRAM, - 0, - "Front Buf"); -#else rb->bo = radeon_bo_open(radeon->radeonScreen->bom, radeon->radeonScreen->frontOffset + radeon->radeonScreen->fbLocation, @@ -498,23 +448,12 @@ radeon_make_renderbuffer_current(radeonContextPtr radeon, 4096, RADEON_GEM_DOMAIN_VRAM, 0); -#endif /* RADEON_DEBUG_BO */ } rb->cpp = radeon->radeonScreen->cpp; rb->pitch = radeon->radeonScreen->frontPitch * rb->cpp; } if ((rb = (void *)draw->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer)) { if (!rb->bo) { -#ifdef RADEON_DEBUG_BO - rb->bo = radeon_bo_open(radeon->radeonScreen->bom, - radeon->radeonScreen->backOffset + - radeon->radeonScreen->fbLocation, - size, - 4096, - RADEON_GEM_DOMAIN_VRAM, - 0, - "Back Buf"); -#else rb->bo = radeon_bo_open(radeon->radeonScreen->bom, radeon->radeonScreen->backOffset + radeon->radeonScreen->fbLocation, @@ -522,55 +461,32 @@ radeon_make_renderbuffer_current(radeonContextPtr radeon, 4096, RADEON_GEM_DOMAIN_VRAM, 0); -#endif /* RADEON_DEBUG_BO */ } rb->cpp = radeon->radeonScreen->cpp; rb->pitch = radeon->radeonScreen->backPitch * rb->cpp; } if ((rb = (void *)draw->base.Attachment[BUFFER_DEPTH].Renderbuffer)) { if (!rb->bo) { -#ifdef RADEON_DEBUG_BO rb->bo = radeon_bo_open(radeon->radeonScreen->bom, - radeon->radeonScreen->depthOffset + - radeon->radeonScreen->fbLocation, - size, - 4096, - RADEON_GEM_DOMAIN_VRAM, - 0, - "Z Buf"); -#else - rb->bo = radeon_bo_open(radeon->radeonScreen->bom, radeon->radeonScreen->depthOffset + radeon->radeonScreen->fbLocation, size, 4096, RADEON_GEM_DOMAIN_VRAM, 0); -#endif /* RADEON_DEBUG_BO */ } rb->cpp = radeon->radeonScreen->cpp; rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp; } if ((rb = (void *)draw->base.Attachment[BUFFER_STENCIL].Renderbuffer)) { if (!rb->bo) { -#ifdef RADEON_DEBUG_BO rb->bo = radeon_bo_open(radeon->radeonScreen->bom, - radeon->radeonScreen->depthOffset + - radeon->radeonScreen->fbLocation, - size, - 4096, - RADEON_GEM_DOMAIN_VRAM, - 0, - "Stencil Buf"); -#else - rb->bo = radeon_bo_open(radeon->radeonScreen->bom, radeon->radeonScreen->depthOffset + radeon->radeonScreen->fbLocation, size, 4096, RADEON_GEM_DOMAIN_VRAM, 0); -#endif /* RADEON_DEBUG_BO */ } rb->cpp = radeon->radeonScreen->cpp; rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp; @@ -755,22 +671,14 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) } else { uint32_t tiling_flags = 0, pitch = 0; int ret; -#ifdef RADEON_DEBUG_BO - bo = radeon_bo_open(radeon->radeonScreen->bom, - buffers[i].name, - 0, - 0, - RADEON_GEM_DOMAIN_VRAM, - buffers[i].flags, - regname); -#else + bo = radeon_bo_open(radeon->radeonScreen->bom, buffers[i].name, 0, 0, RADEON_GEM_DOMAIN_VRAM, buffers[i].flags); -#endif /* RADEON_DEBUG_BO */ + if (bo == NULL) { fprintf(stderr, "failed to attach %s %d\n", diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h index 549395a793..ee46c1f81a 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h @@ -594,10 +594,4 @@ extern int RADEON_DEBUG; #define RADEON_DEBUG 0 #endif -#ifndef HAVE_LIBDRM_RADEON -#ifndef RADEON_DEBUG_BO -#define RADEON_DEBUG_BO 0 -#endif -#endif - #endif diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.c b/src/mesa/drivers/dri/radeon/radeon_dma.c index 7144abbe00..5e755c51ed 100644 --- a/src/mesa/drivers/dri/radeon/radeon_dma.c +++ b/src/mesa/drivers/dri/radeon/radeon_dma.c @@ -185,15 +185,9 @@ void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size) } again_alloc: -#ifdef RADEON_DEBUG_BO - rmesa->dma.current = radeon_bo_open(rmesa->radeonScreen->bom, - 0, size, 4, RADEON_GEM_DOMAIN_GTT, - 0, "dma.current"); -#else rmesa->dma.current = radeon_bo_open(rmesa->radeonScreen->bom, 0, size, 4, RADEON_GEM_DOMAIN_GTT, 0); -#endif /* RADEON_DEBUG_BO */ if (!rmesa->dma.current) { rcommonFlushCmdBuf(rmesa, __FUNCTION__); diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c index f05b106aaf..8303917b0b 100644 --- a/src/mesa/drivers/dri/radeon/radeon_fbo.c +++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c @@ -185,22 +185,12 @@ radeon_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rrb->pitch = pitch * cpp; rrb->cpp = cpp; -#ifdef RADEON_DEBUG_BO - rrb->bo = radeon_bo_open(radeon->radeonScreen->bom, - 0, - size, - 0, - RADEON_GEM_DOMAIN_VRAM, - 0, - "Radeon RBO"); -#else rrb->bo = radeon_bo_open(radeon->radeonScreen->bom, 0, size, 0, RADEON_GEM_DOMAIN_VRAM, 0); -#endif /* RADEON_DEBUG_BO */ rb->Width = width; rb->Height = height; return GL_TRUE; diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c index d4082bf68f..eba9f5857f 100644 --- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c +++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c @@ -215,18 +215,10 @@ radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj * else calculate_miptree_layout_r100(rmesa, mt); -#ifdef RADEON_DEBUG_BO - mt->bo = radeon_bo_open(rmesa->radeonScreen->bom, - 0, mt->totalsize, 1024, - RADEON_GEM_DOMAIN_VRAM, - 0, - "MIPMAP TREE"); -#else mt->bo = radeon_bo_open(rmesa->radeonScreen->bom, 0, mt->totalsize, 1024, RADEON_GEM_DOMAIN_VRAM, 0); -#endif /* RADEON_DEBUG_BO */ return mt; } -- cgit v1.2.3