diff options
author | Ian Romanick <ian.d.romanick@intel.com> | 2009-08-18 12:20:36 -0700 |
---|---|---|
committer | Ian Romanick <ian.d.romanick@intel.com> | 2009-08-18 12:20:36 -0700 |
commit | a512985fd81c1ed4ccc5e69aaa05015cf7ff844d (patch) | |
tree | 69e6e898deaeaed2b4dfb5851707c68261c464de /src/mesa/drivers/dri/r300 | |
parent | 0b5af41c6fae2809f4567a7cecbd207e5e4f3ab5 (diff) | |
parent | c80bc3abcd3939e5e2d45aea4b01ff22bfec244b (diff) |
Merge branch 'master' into asm-shader-rework-1
Conflicts:
src/mesa/shader/arbprogparse.c
Diffstat (limited to 'src/mesa/drivers/dri/r300')
82 files changed, 4595 insertions, 4016 deletions
diff --git a/src/mesa/drivers/dri/r300/.gitignore b/src/mesa/drivers/dri/r300/.gitignore deleted file mode 100644 index 2f9cd1a987..0000000000 --- a/src/mesa/drivers/dri/r300/.gitignore +++ /dev/null @@ -1,15 +0,0 @@ -radeon_bocs_wrapper.h -radeon_bo_legacy.[ch] -radeon_chipset.h -radeon_cmdbuf.h -radeon_common.[ch] -radeon_common_context.[ch] -radeon_cs_legacy.[ch] -radeon_dma.[ch] -radeon_fbo.c -radeon_lock.[ch] -radeon_mipmap_tree.[ch] -radeon_screen.[ch] -radeon_span.[ch] -radeon_texture.[ch] -server diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index a77209074a..77b3d168f3 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -36,7 +36,8 @@ RADEON_COMMON_SOURCES = \ radeon_cs_legacy.c \ radeon_mipmap_tree.c \ radeon_span.c \ - radeon_fbo.c + radeon_fbo.c \ + radeon_buffer_objects.c DRIVER_SOURCES = \ radeon_screen.c \ @@ -48,20 +49,12 @@ DRIVER_SOURCES = \ r300_render.c \ r300_tex.c \ r300_texstate.c \ - radeon_program.c \ - radeon_program_alu.c \ - radeon_program_pair.c \ - radeon_nqssadce.c \ r300_vertprog.c \ r300_fragprog_common.c \ - r300_fragprog.c \ - r300_fragprog_swizzle.c \ - r300_fragprog_emit.c \ - r500_fragprog.c \ - r500_fragprog_emit.c \ r300_shader.c \ r300_emit.c \ r300_swtcl.c \ + r300_queryobj.c \ $(RADEON_COMMON_SOURCES) \ $(EGL_SOURCES) \ $(CS_SOURCES) @@ -73,54 +66,18 @@ DRIVER_DEFINES = -DCOMPILE_R300 -DR200_MERGED=0 \ # -DRADEON_BO_TRACK \ -Wall -SYMLINKS = \ - server/radeon_dri.c \ - server/radeon_dri.h \ - server/radeon.h \ - server/radeon_macros.h \ - server/radeon_reg.h \ - server/radeon_egl.c - -COMMON_SYMLINKS = \ - radeon_chipset.h \ - radeon_screen.c \ - radeon_screen.h \ - radeon_span.h \ - radeon_span.c \ - radeon_bo_legacy.c \ - radeon_cs_legacy.c \ - radeon_bo_legacy.h \ - radeon_cs_legacy.h \ - radeon_bocs_wrapper.h \ - radeon_lock.c \ - radeon_lock.h \ - radeon_common.c \ - radeon_common.h \ - radeon_common_context.c \ - radeon_common_context.h \ - radeon_cmdbuf.h \ - radeon_dma.c \ - radeon_dma.h \ - radeon_mipmap_tree.c \ - radeon_mipmap_tree.h \ - radeon_texture.c \ - radeon_texture.h \ - radeon_fbo.c \ - $(CS_SOURCES) - DRI_LIB_DEPS += $(RADEON_LDFLAGS) +PIPE_DRIVERS = compiler/libr300compiler.a + ##### TARGETS ##### include ../Makefile.template -server: - mkdir -p server - -$(SYMLINKS): server - @[ -e $@ ] || ln -sf ../../radeon/$@ server/ +symlinks: -$(COMMON_SYMLINKS): - @[ -e $@ ] || ln -sf ../radeon/$@ ./ +# Mark the archive phony so that we always check for recompilation +.PHONY : compiler/libr300compiler.a -symlinks: $(SYMLINKS) $(COMMON_SYMLINKS) +compiler/libr300compiler.a: + cd compiler && $(MAKE) diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile new file mode 100644 index 0000000000..d973844192 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -0,0 +1,75 @@ +# src/mesa/drivers/dri/r300/compiler/Makefile + +TOP = ../../../../../.. +include $(TOP)/configs/current + +LIBNAME = r300compiler + +C_SOURCES = \ + radeon_code.c \ + radeon_compiler.c \ + radeon_nqssadce.c \ + radeon_program.c \ + radeon_program_alu.c \ + radeon_program_pair.c \ + r3xx_fragprog.c \ + r300_fragprog.c \ + r300_fragprog_swizzle.c \ + r300_fragprog_emit.c \ + r500_fragprog.c \ + r500_fragprog_emit.c \ + r3xx_vertprog.c \ + r3xx_vertprog_dump.c \ + \ + memory_pool.c + + +### Basic defines ### + +OBJECTS = $(C_SOURCES:.c=.o) \ + $(CPP_SOURCES:.cpp=.o) \ + $(ASM_SOURCES:.S=.o) + +INCLUDES = \ + -I. \ + -I$(TOP)/include \ + -I$(TOP)/src/mesa \ + + +##### TARGETS ##### + +default: depend lib$(LIBNAME).a + +lib$(LIBNAME).a: $(OBJECTS) Makefile $(TOP)/configs/current + $(MKLIB) -o $(LIBNAME) -static $(OBJECTS) + +depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) 2> /dev/null + +# Emacs tags +tags: + etags `find . -name \*.[ch]` `find ../include` + +# Remove .o and backup files +clean: + rm -f $(OBJECTS) lib$(LIBNAME).a depend depend.bak + +# Dummy target +install: + @echo -n "" + +##### RULES ##### + +.c.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + +.cpp.o: + $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(LIBRARY_DEFINES) $< -o $@ + +.S.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + + +sinclude depend diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.c b/src/mesa/drivers/dri/r300/compiler/memory_pool.c new file mode 100644 index 0000000000..37aa2b6579 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/memory_pool.c @@ -0,0 +1,95 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "memory_pool.h" + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + + +#define POOL_LARGE_ALLOC 4096 +#define POOL_ALIGN 4 + + +struct memory_block { + struct memory_block * next; +}; + +void memory_pool_init(struct memory_pool * pool) +{ + memset(pool, 0, sizeof(struct memory_pool)); +} + + +void memory_pool_destroy(struct memory_pool * pool) +{ + while(pool->blocks) { + struct memory_block * block = pool->blocks; + pool->blocks = block->next; + free(block); + } +} + +static void refill_pool(struct memory_pool * pool) +{ + unsigned int blocksize = pool->total_allocated; + struct memory_block * newblock; + + if (!blocksize) + blocksize = 2*POOL_LARGE_ALLOC; + + newblock = (struct memory_block*)malloc(blocksize); + newblock->next = pool->blocks; + pool->blocks = newblock; + + pool->head = (unsigned char*)(newblock + 1); + pool->end = ((unsigned char*)newblock) + blocksize; + pool->total_allocated += blocksize; +} + + +void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes) +{ + if (bytes < POOL_LARGE_ALLOC) { + if (pool->head + bytes > pool->end) + refill_pool(pool); + + assert(pool->head + bytes <= pool->end); + + void * ptr = pool->head; + + pool->head += bytes; + pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1)); + + return ptr; + } else { + struct memory_block * block = (struct memory_block*)malloc(bytes + sizeof(struct memory_block)); + + block->next = pool->blocks; + pool->blocks = block; + + return (block + 1); + } +} + + diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.h b/src/mesa/drivers/dri/r300/compiler/memory_pool.h new file mode 100644 index 0000000000..ce23c319ad --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/memory_pool.h @@ -0,0 +1,49 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef MEMORY_POOL_H +#define MEMORY_POOL_H + +struct memory_block; + +/** + * Provides a pool of memory that can quickly be allocated from, at the + * cost of being unable to explicitly free one of the allocated blocks. + * Instead, the entire pool can be freed at once. + * + * The idea is to allow one to quickly allocate a flexible amount of + * memory during operations like shader compilation while avoiding + * reference counting headaches. + */ +struct memory_pool { + unsigned char * head; + unsigned char * end; + unsigned int total_allocated; + struct memory_block * blocks; +}; + + +void memory_pool_init(struct memory_pool * pool); +void memory_pool_destroy(struct memory_pool * pool); +void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes); + +#endif /* MEMORY_POOL_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c new file mode 100644 index 0000000000..6c9fba4914 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c @@ -0,0 +1,416 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "r300_fragprog.h" + +#include "shader/prog_parameter.h" + +#include "../r300_reg.h" + +static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu) +{ + struct prog_src_register reg = { 0, }; + + reg.File = PROGRAM_STATE_VAR; + reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu); + reg.Swizzle = SWIZZLE_WWWW; + return reg; +} + +/** + * Transform TEX, TXP, TXB, and KIL instructions in the following way: + * - premultiply texture coordinates for RECT + * - extract operand swizzles + * - introduce a temporary register when write masks are needed + */ +GLboolean r300_transform_TEX( + struct radeon_compiler * c, + struct rc_instruction* inst, + void* data) +{ + struct r300_fragment_program_compiler *compiler = + (struct r300_fragment_program_compiler*)data; + + if (inst->I.Opcode != OPCODE_TEX && + inst->I.Opcode != OPCODE_TXB && + inst->I.Opcode != OPCODE_TXP && + inst->I.Opcode != OPCODE_KIL) + return GL_FALSE; + + /* ARB_shadow & EXT_shadow_funcs */ + if (inst->I.Opcode != OPCODE_KIL && + c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; + + if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { + inst->I.Opcode = OPCODE_MOV; + + if (comparefunc == GL_ALWAYS) { + inst->I.SrcReg[0].File = PROGRAM_BUILTIN; + inst->I.SrcReg[0].Swizzle = SWIZZLE_1111; + } else { + inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit); + } + + return GL_TRUE; + } else { + GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; + GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode; + struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst); + struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp); + struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad); + int pass, fail; + + inst_rcp->I.Opcode = OPCODE_RCP; + inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY; + inst_rcp->I.DstReg.Index = rc_find_free_temporary(c); + inst_rcp->I.DstReg.WriteMask = WRITEMASK_W; + inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW; + + inst_cmp->I.DstReg = inst->I.DstReg; + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = rc_find_free_temporary(c); + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + + inst_mad->I.Opcode = OPCODE_MAD; + inst_mad->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mad->I.DstReg.Index = rc_find_free_temporary(c); + inst_mad->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ; + inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index; + inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW; + inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index; + if (depthmode == 0) /* GL_LUMINANCE */ + inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + else if (depthmode == 2) /* GL_ALPHA */ + inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW; + + /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: + * r < tex <=> -tex+r < 0 + * r >= tex <=> not (-tex+r < 0 */ + if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) + inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW; + else + inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW; + + inst_cmp->I.Opcode = OPCODE_CMP; + /* DstReg has been filled out above */ + inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index; + + if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + pass = 1; + fail = 2; + } else { + pass = 2; + fail = 1; + } + + inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN; + inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111; + inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit); + } + } + + /* Hardware uses [0..1]x[0..1] range for rectangle textures + * instead of [0..Width]x[0..Height]. + * Add a scaling instruction. + */ + if (inst->I.Opcode != OPCODE_KIL && inst->I.TexSrcTarget == TEXTURE_RECT_INDEX) { + struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst->Prev); + + inst_mul->I.Opcode = OPCODE_MUL; + inst_mul->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mul->I.DstReg.Index = rc_find_free_temporary(c); + inst_mul->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_mul->I.SrcReg[1].File = PROGRAM_STATE_VAR; + inst_mul->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->I.TexSrcUnit); + + reset_srcreg(&inst->I.SrcReg[0]); + inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].Index = inst_mul->I.DstReg.Index; + } + + /* Cannot write texture to output registers or with masks */ + if (inst->I.Opcode != OPCODE_KIL && + (inst->I.DstReg.File != PROGRAM_TEMPORARY || inst->I.DstReg.WriteMask != WRITEMASK_XYZW)) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); + + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg = inst->I.DstReg; + inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c); + + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index; + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + } + + + /* Cannot read texture coordinate from constants file */ + if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mov->I.DstReg.Index = rc_find_free_temporary(c); + inst_mov->I.SrcReg[0] = inst->I.SrcReg[0]; + + reset_srcreg(&inst->I.SrcReg[0]); + inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index; + } + + return GL_TRUE; +} + +/* just some random things... */ +void r300FragmentProgramDump(struct rX00_fragment_program_code *c) +{ + struct r300_fragment_program_code *code = &c->code.r300; + int n, i, j; + static int pc = 0; + + fprintf(stderr, "pc=%d*************************************\n", pc++); + + fprintf(stderr, "Hardware program\n"); + fprintf(stderr, "----------------\n"); + + for (n = 0; n <= (code->config & 3); n++) { + uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n]; + int alu_offset = (code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT; + int alu_end = (code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT; + int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT; + int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT; + + fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " + "alu_end: %d, tex_end: %d (code_addr: %08x)\n", n, + alu_offset, tex_offset, alu_end, tex_end, code_addr); + + if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) { + fprintf(stderr, " TEX:\n"); + for (i = tex_offset; + i <= tex_offset + tex_end; + ++i) { + const char *instr; + + switch ((code->tex. + inst[i] >> R300_TEX_INST_SHIFT) & + 15) { + case R300_TEX_OP_LD: + instr = "TEX"; + break; + case R300_TEX_OP_KIL: + instr = "KIL"; + break; + case R300_TEX_OP_TXP: + instr = "TXP"; + break; + case R300_TEX_OP_TXB: + instr = "TXB"; + break; + default: + instr = "UNKNOWN"; + } + + fprintf(stderr, + " %s t%i, %c%i, texture[%i] (%08x)\n", + instr, + (code->tex. + inst[i] >> R300_DST_ADDR_SHIFT) & 31, + 't', + (code->tex. + inst[i] >> R300_SRC_ADDR_SHIFT) & 31, + (code->tex. + inst[i] & R300_TEX_ID_MASK) >> + R300_TEX_ID_SHIFT, + code->tex.inst[i]); + } + } + + for (i = alu_offset; + i <= alu_offset + alu_end; ++i) { + char srcc[3][10], dstc[20]; + char srca[3][10], dsta[20]; + char argc[3][20]; + char arga[3][20]; + char flags[5], tmp[10]; + + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].rgb_addr >> (j * 6); + int rega = code->alu.inst[i].alpha_addr >> (j * 6); + + sprintf(srcc[j], "%c%i", + (regc & 32) ? 'c' : 't', regc & 31); + sprintf(srca[j], "%c%i", + (rega & 32) ? 'c' : 't', rega & 31); + } + + dstc[0] = 0; + sprintf(flags, "%s%s%s", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(dstc, "t%i.%s ", + (code->alu.inst[i]. + rgb_addr >> R300_ALU_DSTC_SHIFT) & 31, + flags); + } + sprintf(flags, "%s%s%s", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(tmp, "o%i.%s", + (code->alu.inst[i]. + rgb_addr >> R300_ALU_DSTC_SHIFT) & 31, + flags); + strcat(dstc, tmp); + } + + dsta[0] = 0; + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) { + sprintf(dsta, "t%i.w ", + (code->alu.inst[i]. + alpha_addr >> R300_ALU_DSTA_SHIFT) & 31); + } + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) { + sprintf(tmp, "o%i.w ", + (code->alu.inst[i]. + alpha_addr >> R300_ALU_DSTA_SHIFT) & 31); + strcat(dsta, tmp); + } + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) { + strcat(dsta, "Z"); + } + + fprintf(stderr, + "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" + " w: %3s %3s %3s -> %-20s (%08x)\n", i, + srcc[0], srcc[1], srcc[2], dstc, + code->alu.inst[i].rgb_addr, srca[0], srca[1], + srca[2], dsta, code->alu.inst[i].alpha_addr); + + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].rgb_inst >> (j * 7); + int rega = code->alu.inst[i].alpha_inst >> (j * 7); + int d; + char buf[20]; + + d = regc & 31; + if (d < 12) { + switch (d % 4) { + case R300_ALU_ARGC_SRC0C_XYZ: + sprintf(buf, "%s.xyz", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_XXX: + sprintf(buf, "%s.xxx", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_YYY: + sprintf(buf, "%s.yyy", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_ZZZ: + sprintf(buf, "%s.zzz", + srcc[d / 4]); + break; + } + } else if (d < 15) { + sprintf(buf, "%s.www", srca[d - 12]); + } else if (d == 20) { + sprintf(buf, "0.0"); + } else if (d == 21) { + sprintf(buf, "1.0"); + } else if (d == 22) { + sprintf(buf, "0.5"); + } else if (d >= 23 && d < 32) { + d -= 23; + switch (d / 3) { + case 0: + sprintf(buf, "%s.yzx", + srcc[d % 3]); + break; + case 1: + sprintf(buf, "%s.zxy", + srcc[d % 3]); + break; + case 2: + sprintf(buf, "%s.Wzy", + srcc[d % 3]); + break; + } + } else { + sprintf(buf, "%i", d); + } + + sprintf(argc[j], "%s%s%s%s", + (regc & 32) ? "-" : "", + (regc & 64) ? "|" : "", + buf, (regc & 64) ? "|" : ""); + + d = rega & 31; + if (d < 9) { + sprintf(buf, "%s.%c", srcc[d / 3], + 'x' + (char)(d % 3)); + } else if (d < 12) { + sprintf(buf, "%s.w", srca[d - 9]); + } else if (d == 16) { + sprintf(buf, "0.0"); + } else if (d == 17) { + sprintf(buf, "1.0"); + } else if (d == 18) { + sprintf(buf, "0.5"); + } else { + sprintf(buf, "%i", d); + } + + sprintf(arga[j], "%s%s%s%s", + (rega & 32) ? "-" : "", + (rega & 64) ? "|" : "", + buf, (rega & 64) ? "|" : ""); + } + + fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" + " w: %8s %8s %8s op: %08x\n", + argc[0], argc[1], argc[2], + code->alu.inst[i].rgb_inst, arga[0], arga[1], + arga[2], code->alu.inst[i].alpha_inst); + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h new file mode 100644 index 0000000000..0ac46dbd9c --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs <darktama@iinet.net.au> + * Jerome Glisse <j.glisse@gmail.com> + */ +#ifndef __R300_FRAGPROG_H_ +#define __R300_FRAGPROG_H_ + +#include "shader/program.h" +#include "shader/prog_instruction.h" + +#include "radeon_compiler.h" +#include "radeon_program.h" + + +extern void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); + +extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c); + +extern GLboolean r300_transform_TEX(struct radeon_compiler * c, struct rc_instruction* inst, void* data); + +#endif diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index b75656e7ee..305dc074ee 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -40,57 +40,43 @@ #include "r300_fragprog.h" +#include "../r300_reg.h" + #include "radeon_program_pair.h" #include "r300_fragprog_swizzle.h" -#include "r300_reg.h" +struct r300_emit_state { + struct r300_fragment_program_compiler * compiler; + + unsigned current_node : 2; + unsigned node_first_tex : 8; + unsigned node_first_alu : 8; + uint32_t node_flags; +}; + #define PROG_CODE \ - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ - struct r300_fragment_program_code *code = &c->code->r300 + struct r300_emit_state * emit = (struct r300_emit_state*)data; \ + struct r300_fragment_program_compiler *c = emit->compiler; \ + struct r300_fragment_program_code *code = &c->code->code.r300 #define error(fmt, args...) do { \ - fprintf(stderr, "%s::%s(): " fmt "\n", \ + rc_error(&c->Base, "%s::%s(): " fmt "\n", \ __FILE__, __FUNCTION__, ##args); \ } while(0) -static GLboolean emit_const(void* data, GLuint file, GLuint index, GLuint *hwindex) -{ - PROG_CODE; - - for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) { - if (code->constant[*hwindex].File == file && - code->constant[*hwindex].Index == index) - break; - } - - if (*hwindex >= code->const_nr) { - if (*hwindex >= R300_PFS_NUM_CONST_REGS) { - error("Out of hw constants!\n"); - return GL_FALSE; - } - - code->const_nr++; - code->constant[*hwindex].File = file; - code->constant[*hwindex].Index = index; - } - - return GL_TRUE; -} - - /** * Mark a temporary register as used. */ static void use_temporary(struct r300_fragment_program_code *code, GLuint index) { - if (index > code->max_temp_idx) - code->max_temp_idx = index; + if (index > code->pixsize) + code->pixsize = index; } -static GLuint translate_rgb_opcode(GLuint opcode) +static GLuint translate_rgb_opcode(struct r300_fragment_program_compiler * c, GLuint opcode) { switch(opcode) { case OPCODE_CMP: return R300_ALU_OUTC_CMP; @@ -109,7 +95,7 @@ static GLuint translate_rgb_opcode(GLuint opcode) } } -static GLuint translate_alpha_opcode(GLuint opcode) +static GLuint translate_alpha_opcode(struct r300_fragment_program_compiler * c, GLuint opcode) { switch(opcode) { case OPCODE_CMP: return R300_ALU_OUTA_CMP; @@ -145,63 +131,62 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) int ip = code->alu.length++; int j; - code->node[code->cur_node].alu_end++; - code->alu.inst[ip].inst0 = translate_rgb_opcode(inst->RGB.Opcode); - code->alu.inst[ip].inst2 = translate_alpha_opcode(inst->Alpha.Opcode); + code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode); + code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode); for(j = 0; j < 3; ++j) { GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5); if (!inst->RGB.Src[j].Constant) use_temporary(code, inst->RGB.Src[j].Index); - code->alu.inst[ip].inst1 |= src << (6*j); + code->alu.inst[ip].rgb_addr |= src << (6*j); src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5); if (!inst->Alpha.Src[j].Constant) use_temporary(code, inst->Alpha.Src[j].Index); - code->alu.inst[ip].inst3 |= src << (6*j); + code->alu.inst[ip].alpha_addr |= src << (6*j); GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); arg |= inst->RGB.Arg[j].Abs << 6; arg |= inst->RGB.Arg[j].Negate << 5; - code->alu.inst[ip].inst0 |= arg << (7*j); + code->alu.inst[ip].rgb_inst |= arg << (7*j); arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle); arg |= inst->Alpha.Arg[j].Abs << 6; arg |= inst->Alpha.Arg[j].Negate << 5; - code->alu.inst[ip].inst2 |= arg << (7*j); + code->alu.inst[ip].alpha_inst |= arg << (7*j); } if (inst->RGB.Saturate) - code->alu.inst[ip].inst0 |= R300_ALU_OUTC_CLAMP; + code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP; if (inst->Alpha.Saturate) - code->alu.inst[ip].inst2 |= R300_ALU_OUTA_CLAMP; + code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP; if (inst->RGB.WriteMask) { use_temporary(code, inst->RGB.DestIndex); - code->alu.inst[ip].inst1 |= + code->alu.inst[ip].rgb_addr |= (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) | (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); } if (inst->RGB.OutputWriteMask) { - code->alu.inst[ip].inst1 |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT); - code->node[code->cur_node].flags |= R300_RGBA_OUT; + code->alu.inst[ip].rgb_addr |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT); + emit->node_flags |= R300_RGBA_OUT; } if (inst->Alpha.WriteMask) { use_temporary(code, inst->Alpha.DestIndex); - code->alu.inst[ip].inst3 |= + code->alu.inst[ip].alpha_addr |= (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) | R300_ALU_DSTA_REG; } if (inst->Alpha.OutputWriteMask) { - code->alu.inst[ip].inst3 |= R300_ALU_DSTA_OUTPUT; - code->node[code->cur_node].flags |= R300_RGBA_OUT; + code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT; + emit->node_flags |= R300_RGBA_OUT; } if (inst->Alpha.DepthWriteMask) { - code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH; - code->node[code->cur_node].flags |= R300_W_OUT; - c->fp->writes_depth = GL_TRUE; + code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH; + emit->node_flags |= R300_W_OUT; + c->code->writes_depth = GL_TRUE; } return GL_TRUE; @@ -211,31 +196,50 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) /** * Finish the current node without advancing to the next one. */ -static GLboolean finish_node(struct r300_fragment_program_compiler *c) +static GLboolean finish_node(struct r300_emit_state * emit) { - struct r300_fragment_program_code *code = &c->code->r300; - struct r300_fragment_program_node *node = &code->node[code->cur_node]; + struct r300_fragment_program_compiler * c = emit->compiler; + struct r300_fragment_program_code *code = &emit->compiler->code->code.r300; - if (node->alu_end < 0) { + if (code->alu.length == emit->node_first_alu) { /* Generate a single NOP for this node */ struct radeon_pair_instruction inst; _mesa_bzero(&inst, sizeof(inst)); - if (!emit_alu(c, &inst)) + if (!emit_alu(emit, &inst)) return GL_FALSE; } - if (node->tex_end < 0) { - if (code->cur_node == 0) { - node->tex_end = 0; - } else { - error("Node %i has no TEX instructions", code->cur_node); + unsigned alu_offset = emit->node_first_alu; + unsigned alu_end = code->alu.length - alu_offset - 1; + unsigned tex_offset = emit->node_first_tex; + unsigned tex_end = code->tex.length - tex_offset - 1; + + if (code->tex.length == emit->node_first_tex) { + if (emit->current_node > 0) { + error("Node %i has no TEX instructions", emit->current_node); return GL_FALSE; } + + tex_end = 0; } else { - if (code->cur_node == 0) - code->first_node_has_tex = 1; + if (emit->current_node == 0) + code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX; } + /* Write the config register. + * Note: The order in which the words for each node are written + * is not correct here and needs to be fixed up once we're entirely + * done + * + * Also note that the register specification from AMD is slightly + * incorrect in its description of this register. */ + code->code_addr[emit->current_node] = + (alu_offset << R300_ALU_START_SHIFT) | + (alu_end << R300_ALU_SIZE_SHIFT) | + (tex_offset << R300_TEX_START_SHIFT) | + (tex_end << R300_TEX_SIZE_SHIFT) | + emit->node_flags; + return GL_TRUE; } @@ -248,30 +252,28 @@ static GLboolean begin_tex(void* data) { PROG_CODE; - if (code->cur_node == 0) { - if (code->node[0].alu_end < 0 && - code->node[0].tex_end < 0) - return GL_TRUE; + if (code->alu.length == emit->node_first_alu && + code->tex.length == emit->node_first_tex) { + return GL_TRUE; } - if (code->cur_node == 3) { + if (emit->current_node == 3) { error("Too many texture indirections"); return GL_FALSE; } - if (!finish_node(c)) + if (!finish_node(emit)) return GL_FALSE; - struct r300_fragment_program_node *node = &code->node[++code->cur_node]; - node->alu_offset = code->alu.length; - node->alu_end = -1; - node->tex_offset = code->tex.length; - node->tex_end = -1; + emit->current_node++; + emit->node_first_tex = code->tex.length; + emit->node_first_alu = code->alu.length; + emit->node_flags = 0; return GL_TRUE; } -static GLboolean emit_tex(void* data, struct prog_instruction* inst) +static GLboolean emit_tex(void* data, struct radeon_pair_texture_instruction* inst) { PROG_CODE; @@ -281,31 +283,30 @@ static GLboolean emit_tex(void* data, struct prog_instruction* inst) } GLuint unit = inst->TexSrcUnit; - GLuint dest = inst->DstReg.Index; + GLuint dest = inst->DestIndex; GLuint opcode; switch(inst->Opcode) { - case OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; - case OPCODE_TEX: opcode = R300_TEX_OP_LD; break; - case OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; - case OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; + case RADEON_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; + case RADEON_OPCODE_TEX: opcode = R300_TEX_OP_LD; break; + case RADEON_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; + case RADEON_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; default: error("Unknown texture opcode %i", inst->Opcode); return GL_FALSE; } - if (inst->Opcode == OPCODE_KIL) { + if (inst->Opcode == RADEON_OPCODE_KIL) { unit = 0; dest = 0; } else { use_temporary(code, dest); } - use_temporary(code, inst->SrcReg[0].Index); + use_temporary(code, inst->SrcIndex); - code->node[code->cur_node].tex_end++; code->tex.inst[code->tex.length++] = - (inst->SrcReg[0].Index << R300_SRC_ADDR_SHIFT) | + (inst->SrcIndex << R300_SRC_ADDR_SHIFT) | (dest << R300_DST_ADDR_SHIFT) | (unit << R300_TEX_ID_SHIFT) | (opcode << R300_TEX_INST_SHIFT); @@ -314,7 +315,6 @@ static GLboolean emit_tex(void* data, struct prog_instruction* inst) static const struct radeon_pair_handler pair_handler = { - .EmitConst = &emit_const, .EmitPaired = &emit_alu, .EmitTex = &emit_tex, .BeginTexBlock = &begin_tex, @@ -325,20 +325,36 @@ static const struct radeon_pair_handler pair_handler = { * Final compilation step: Turn the intermediate radeon_program into * machine-readable instructions. */ -GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) +void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) { - struct r300_fragment_program_code *code = &compiler->code->r300; - - _mesa_bzero(code, sizeof(struct r300_fragment_program_code)); - code->node[0].alu_end = -1; - code->node[0].tex_end = -1; + struct r300_emit_state emit; + struct r300_fragment_program_code *code = &compiler->code->code.r300; - if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler)) - return GL_FALSE; + memset(&emit, 0, sizeof(emit)); + emit.compiler = compiler; - if (!finish_node(compiler)) - return GL_FALSE; + _mesa_bzero(code, sizeof(struct r300_fragment_program_code)); - return GL_TRUE; + radeonPairProgram(compiler, &pair_handler, &emit); + if (compiler->Base.Error) + return; + + /* Finish the program */ + finish_node(&emit); + + code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */ + code->code_offset = + (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) | + ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) | + (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) | + ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT); + + if (emit.current_node < 3) { + int shift = 3 - emit.current_node; + int i; + for(i = 0; i <= emit.current_node; ++i) + code->code_addr[shift + i] = code->code_addr[i]; + for(i = 0; i < shift; ++i) + code->code_addr[i] = 0; + } } - diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c index fc9d855bce..1b14cc3888 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c @@ -33,8 +33,9 @@ #include "r300_fragprog_swizzle.h" -#include "r300_reg.h" +#include "../r300_reg.h" #include "radeon_nqssadce.h" +#include "radeon_compiler.h" #define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, SWIZZLE_##y, SWIZZLE_##z, SWIZZLE_ZERO)) @@ -174,18 +175,15 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, } } - struct prog_instruction *inst; - - _mesa_insert_instructions(s->Program, s->IP, 1); - inst = s->Program->Instructions + s->IP++; - inst->Opcode = OPCODE_MOV; - inst->DstReg = dst; - inst->DstReg.WriteMask &= (best_matchmask | WRITEMASK_W); - inst->SrcReg[0] = src; - inst->SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE; + struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev); + inst->I.Opcode = OPCODE_MOV; + inst->I.DstReg = dst; + inst->I.DstReg.WriteMask &= (best_matchmask | WRITEMASK_W); + inst->I.SrcReg[0] = src; + inst->I.SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE; /* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */ - dst.WriteMask &= ~inst->DstReg.WriteMask; + dst.WriteMask &= ~inst->I.DstReg.WriteMask; } } diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h index 231bf4eef5..231bf4eef5 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.h +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c new file mode 100644 index 0000000000..d39b82be71 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -0,0 +1,149 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "shader/prog_statevars.h" + +#include "radeon_nqssadce.h" +#include "radeon_program_alu.h" +#include "r300_fragprog.h" +#include "r300_fragprog_swizzle.h" +#include "r500_fragprog.h" + + +static void nqssadce_init(struct nqssadce_state* s) +{ + struct r300_fragment_program_compiler * c = s->UserData; + s->Outputs[c->OutputColor].Sourced = WRITEMASK_XYZW; + s->Outputs[c->OutputDepth].Sourced = WRITEMASK_W; +} + +static void rewrite_depth_out(struct r300_fragment_program_compiler * c) +{ + struct rc_instruction *rci; + + for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) { + struct prog_instruction * inst = &rci->I; + + if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != c->OutputDepth) + continue; + + if (inst->DstReg.WriteMask & WRITEMASK_Z) { + inst->DstReg.WriteMask = WRITEMASK_W; + } else { + inst->DstReg.WriteMask = 0; + continue; + } + + switch (inst->Opcode) { + case OPCODE_FRC: + case OPCODE_MOV: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + break; + case OPCODE_ADD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MUL: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + break; + case OPCODE_CMP: + case OPCODE_MAD: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]); + break; + default: + // Scalar instructions needn't be reswizzled + break; + } + } +} + +void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) +{ + rewrite_depth_out(c); + + if (c->is_r500) { + struct radeon_program_transformation transformations[] = { + { &r500_transform_TEX, c }, + { &radeonTransformALU, 0 }, + { &radeonTransformDeriv, 0 }, + { &radeonTransformTrigScale, 0 } + }; + radeonLocalTransform(&c->Base, 4, transformations); + } else { + struct radeon_program_transformation transformations[] = { + { &r300_transform_TEX, c }, + { &radeonTransformALU, 0 }, + { &radeonTransformTrigSimple, 0 } + }; + radeonLocalTransform(&c->Base, 3, transformations); + } + + if (c->Base.Debug) { + _mesa_printf("Fragment Program: After native rewrite:\n"); + rc_print_program(&c->Base.Program); + fflush(stdout); + } + + if (c->is_r500) { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &r500FPIsNativeSwizzle, + .BuildSwizzle = &r500FPBuildSwizzle + }; + radeonNqssaDce(&c->Base, &nqssadce, c); + } else { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &r300FPIsNativeSwizzle, + .BuildSwizzle = &r300FPBuildSwizzle + }; + radeonNqssaDce(&c->Base, &nqssadce, c); + } + + if (c->Base.Debug) { + _mesa_printf("Compiler: after NqSSA-DCE:\n"); + rc_print_program(&c->Base.Program); + fflush(stdout); + } + + if (c->is_r500) { + r500BuildFragmentProgramHwCode(c); + } else { + r300BuildFragmentProgramHwCode(c); + } + + rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); + + if (c->Base.Debug) { + if (c->is_r500) { + r500FragmentProgramDump(c->code); + } else { + r300FragmentProgramDump(c->code); + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c new file mode 100644 index 0000000000..fc9c8f805a --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -0,0 +1,615 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include "../r300_reg.h" + +#include "radeon_nqssadce.h" +#include "radeon_program.h" +#include "radeon_program_alu.h" + +#include "shader/prog_print.h" + + +/* + * Take an already-setup and valid source then swizzle it appropriately to + * obtain a constant ZERO or ONE source. + */ +#define __CONST(x, y) \ + (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_src_class(vpi->SrcReg[x].File), \ + NEGATE_NONE) | (vpi->SrcReg[x].RelAddr << 4)) + + +static unsigned long t_dst_mask(GLuint mask) +{ + /* WRITEMASK_* is equivalent to VSF_FLAG_* */ + return mask & WRITEMASK_XYZW; +} + +static unsigned long t_dst_class(gl_register_file file) +{ + + switch (file) { + case PROGRAM_TEMPORARY: + return PVS_DST_REG_TEMPORARY; + case PROGRAM_OUTPUT: + return PVS_DST_REG_OUT; + case PROGRAM_ADDRESS: + return PVS_DST_REG_A0; + /* + case PROGRAM_INPUT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_NAMED_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_WRITE_ONLY: + case PROGRAM_ADDRESS: + */ + default: + fprintf(stderr, "problem in %s", __FUNCTION__); + _mesa_exit(-1); + return -1; + } +} + +static unsigned long t_dst_index(struct r300_vertex_program_code *vp, + struct prog_dst_register *dst) +{ + if (dst->File == PROGRAM_OUTPUT) + return vp->outputs[dst->Index]; + + return dst->Index; +} + +static unsigned long t_src_class(gl_register_file file) +{ + switch (file) { + case PROGRAM_TEMPORARY: + return PVS_SRC_REG_TEMPORARY; + case PROGRAM_INPUT: + return PVS_SRC_REG_INPUT; + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + case PROGRAM_STATE_VAR: + return PVS_SRC_REG_CONSTANT; + /* + case PROGRAM_OUTPUT: + case PROGRAM_WRITE_ONLY: + case PROGRAM_ADDRESS: + */ + default: + fprintf(stderr, "problem in %s", __FUNCTION__); + _mesa_exit(-1); + return -1; + } +} + +static GLboolean t_src_conflict(struct prog_src_register a, struct prog_src_register b) +{ + unsigned long aclass = t_src_class(a.File); + unsigned long bclass = t_src_class(b.File); + + if (aclass != bclass) + return GL_FALSE; + if (aclass == PVS_SRC_REG_TEMPORARY) + return GL_FALSE; + + if (a.RelAddr || b.RelAddr) + return GL_TRUE; + if (a.Index != b.Index) + return GL_TRUE; + + return GL_FALSE; +} + +static INLINE unsigned long t_swizzle(GLubyte swizzle) +{ + /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ + return swizzle; +} + +static unsigned long t_src_index(struct r300_vertex_program_code *vp, + struct prog_src_register *src) +{ + if (src->File == PROGRAM_INPUT) { + assert(vp->inputs[src->Index] != -1); + return vp->inputs[src->Index]; + } else { + if (src->Index < 0) { + fprintf(stderr, + "negative offsets for indirect addressing do not work.\n"); + return 0; + } + return src->Index; + } +} + +/* these two functions should probably be merged... */ + +static unsigned long t_src(struct r300_vertex_program_code *vp, + struct prog_src_register *src) +{ + /* src->Negate uses the NEGATE_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 1)), + t_swizzle(GET_SWZ(src->Swizzle, 2)), + t_swizzle(GET_SWZ(src->Swizzle, 3)), + t_src_class(src->File), + src->Negate) | (src->RelAddr << 4); +} + +static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, + struct prog_src_register *src) +{ + /* src->Negate uses the NEGATE_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_src_class(src->File), + src->Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src->RelAddr << 4); +} + +static GLboolean valid_dst(struct r300_vertex_program_code *vp, + struct prog_dst_register *dst) +{ + if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { + return GL_FALSE; + } else if (dst->File == PROGRAM_ADDRESS) { + assert(dst->Index == 0); + } + + return GL_TRUE; +} + +static void ei_vector1(struct r300_vertex_program_code *vp, + GLuint hw_opcode, + struct prog_instruction *vpi, + GLuint * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); +} + +static void ei_vector2(struct r300_vertex_program_code *vp, + GLuint hw_opcode, + struct prog_instruction *vpi, + GLuint * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = t_src(vp, &vpi->SrcReg[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); +} + +static void ei_math1(struct r300_vertex_program_code *vp, + GLuint hw_opcode, + struct prog_instruction *vpi, + GLuint * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); +} + +static void ei_lit(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst) +{ + //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} + + inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + /* NOTE: Users swizzling might not work. */ + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (vpi->SrcReg[0].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (vpi->SrcReg[0].RelAddr << 4); + inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (vpi->SrcReg[0].RelAddr << 4); +} + +static void ei_mad(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, + GL_FALSE, + GL_TRUE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = t_src(vp, &vpi->SrcReg[1]); + inst[3] = t_src(vp, &vpi->SrcReg[2]); +} + +static void ei_pow(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); +} + + +static void translate_vertex_program(struct r300_vertex_program_compiler * compiler) +{ + struct rc_instruction *rci; + + compiler->code->pos_end = 0; /* Not supported yet */ + compiler->code->length = 0; + + compiler->SetHwInputOutput(compiler); + + for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) { + struct prog_instruction *vpi = &rci->I; + GLuint *inst = compiler->code->body.d + compiler->code->length; + + /* Skip instructions writing to non-existing destination */ + if (!valid_dst(compiler->code, &vpi->DstReg)) + continue; + + if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) { + rc_error(&compiler->Base, "Vertex program has too many instructions\n"); + return; + } + + switch (vpi->Opcode) { + case OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; + case OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break; + case OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; + case OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; + case OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; + case OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break; + case OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; + case OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break; + case OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break; + case OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break; + case OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break; + case OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break; + case OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break; + case OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break; + case OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break; + case OPCODE_POW: ei_pow(compiler->code, vpi, inst); break; + case OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break; + case OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break; + case OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break; + case OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; + default: + rc_error(&compiler->Base, "Unknown opcode %i\n", vpi->Opcode); + return; + } + + compiler->code->length += 4; + + if (compiler->Base.Error) + return; + } +} + +struct temporary_allocation { + GLuint Allocated:1; + GLuint HwTemp:15; + struct rc_instruction * LastRead; +}; + +static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler) +{ + struct rc_instruction *inst; + GLuint num_orig_temps = 0; + GLboolean hwtemps[VSF_MAX_FRAGMENT_TEMPS]; + struct temporary_allocation * ta; + GLuint i, j; + + compiler->code->num_temporaries = 0; + memset(hwtemps, 0, sizeof(hwtemps)); + + /* Pass 1: Count original temporaries and allocate structures */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + + for (i = 0; i < numsrcs; ++i) { + if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) { + if (inst->I.SrcReg[i].Index >= num_orig_temps) + num_orig_temps = inst->I.SrcReg[i].Index + 1; + } + } + + if (numdsts) { + if (inst->I.DstReg.File == PROGRAM_TEMPORARY) { + if (inst->I.DstReg.Index >= num_orig_temps) + num_orig_temps = inst->I.DstReg.Index + 1; + } + } + } + + ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool, + sizeof(struct temporary_allocation) * num_orig_temps); + memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps); + + /* Pass 2: Determine original temporary lifetimes */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + + for (i = 0; i < numsrcs; ++i) { + if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) + ta[inst->I.SrcReg[i].Index].LastRead = inst; + } + } + + /* Pass 3: Register allocation */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + + for (i = 0; i < numsrcs; ++i) { + if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) { + GLuint orig = inst->I.SrcReg[i].Index; + inst->I.SrcReg[i].Index = ta[orig].HwTemp; + + if (ta[orig].Allocated && inst == ta[orig].LastRead) + hwtemps[ta[orig].HwTemp] = GL_FALSE; + } + } + + if (numdsts) { + if (inst->I.DstReg.File == PROGRAM_TEMPORARY) { + GLuint orig = inst->I.DstReg.Index; + + if (!ta[orig].Allocated) { + for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) { + if (!hwtemps[j]) + break; + } + if (j >= VSF_MAX_FRAGMENT_TEMPS) { + fprintf(stderr, "Out of hw temporaries\n"); + } else { + ta[orig].Allocated = GL_TRUE; + ta[orig].HwTemp = j; + hwtemps[j] = GL_TRUE; + + if (j >= compiler->code->num_temporaries) + compiler->code->num_temporaries = j + 1; + } + } + + inst->I.DstReg.Index = ta[orig].HwTemp; + } + } + } +} + + +/** + * Vertex engine cannot read two inputs or two constants at the same time. + * Introduce intermediate MOVs to temporary registers to account for this. + */ +static GLboolean transform_source_conflicts( + struct radeon_compiler *c, + struct rc_instruction* inst, + void* unused) +{ + GLuint num_operands = _mesa_num_inst_src_regs(inst->I.Opcode); + + if (num_operands == 3) { + if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2]) + || t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) { + int tmpreg = rc_find_free_temporary(c); + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mov->I.DstReg.Index = tmpreg; + inst_mov->I.SrcReg[0] = inst->I.SrcReg[2]; + + reset_srcreg(&inst->I.SrcReg[2]); + inst->I.SrcReg[2].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[2].Index = tmpreg; + } + } + + if (num_operands >= 2) { + if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) { + int tmpreg = rc_find_free_temporary(c); + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mov->I.DstReg.Index = tmpreg; + inst_mov->I.SrcReg[0] = inst->I.SrcReg[1]; + + reset_srcreg(&inst->I.SrcReg[1]); + inst->I.SrcReg[1].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[1].Index = tmpreg; + } + } + + return GL_TRUE; +} + +static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler) +{ + int i; + + for(i = 0; i < 32; ++i) { + if ((compiler->RequiredOutputs & (1 << i)) && + !(compiler->Base.Program.OutputsWritten & (1 << i))) { + struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev); + inst->I.Opcode = OPCODE_MOV; + + inst->I.DstReg.File = PROGRAM_OUTPUT; + inst->I.DstReg.Index = i; + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + + inst->I.SrcReg[0].File = PROGRAM_CONSTANT; + inst->I.SrcReg[0].Index = 0; + inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW; + + compiler->Base.Program.OutputsWritten |= 1 << i; + } + } +} + +static void nqssadceInit(struct nqssadce_state* s) +{ + struct r300_vertex_program_compiler * compiler = s->UserData; + int i; + + for(i = 0; i < VERT_RESULT_MAX; ++i) { + if (compiler->RequiredOutputs & (1 << i)) + s->Outputs[i].Sourced = WRITEMASK_XYZW; + } +} + +static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) +{ + (void) opcode; + (void) reg; + + return GL_TRUE; +} + + + +void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) +{ + addArtificialOutputs(compiler); + + { + struct radeon_program_transformation transformations[] = { + { &r300_transform_vertex_alu, 0 }, + }; + radeonLocalTransform(&compiler->Base, 1, transformations); + } + + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after native rewrite:\n"); + rc_print_program(&compiler->Base.Program); + fflush(stdout); + } + + { + /* Note: This pass has to be done seperately from ALU rewrite, + * otherwise non-native ALU instructions with source conflits + * will not be treated properly. + */ + struct radeon_program_transformation transformations[] = { + { &transform_source_conflicts, 0 }, + }; + radeonLocalTransform(&compiler->Base, 1, transformations); + } + + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after source conflict resolve:\n"); + rc_print_program(&compiler->Base.Program); + fflush(stdout); + } + + { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadceInit, + .IsNativeSwizzle = &swizzleIsNative, + .BuildSwizzle = NULL + }; + radeonNqssaDce(&compiler->Base, &nqssadce, compiler); + + /* We need this step for reusing temporary registers */ + allocate_temporary_registers(compiler); + + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after NQSSADCE:\n"); + rc_print_program(&compiler->Base.Program); + fflush(stdout); + } + } + + translate_vertex_program(compiler); + + rc_constants_copy(&compiler->code->constants, &compiler->Base.Program.Constants); + + compiler->code->InputsRead = compiler->Base.Program.InputsRead; + compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten; + + if (compiler->Base.Debug) { + printf("Final vertex program code:\n"); + r300_vertex_program_dump(compiler->code); + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c new file mode 100644 index 0000000000..39cc6953ba --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c @@ -0,0 +1,177 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_code.h" + +#include <stdio.h> + +static char* r300_vs_ve_ops[] = { + /* R300 vector ops */ + " VE_NO_OP", + " VE_DOT_PRODUCT", + " VE_MULTIPLY", + " VE_ADD", + " VE_MULTIPLY_ADD", + " VE_DISTANCE_FACTOR", + " VE_FRACTION", + " VE_MAXIMUM", + " VE_MINIMUM", + "VE_SET_GREATER_THAN_EQUAL", + " VE_SET_LESS_THAN", + " VE_MULTIPLYX2_ADD", + " VE_MULTIPLY_CLAMP", + " VE_FLT2FIX_DX", + " VE_FLT2FIX_DX_RND", + /* R500 vector ops */ + " VE_PRED_SET_EQ_PUSH", + " VE_PRED_SET_GT_PUSH", + " VE_PRED_SET_GTE_PUSH", + " VE_PRED_SET_NEQ_PUSH", + " VE_COND_WRITE_EQ", + " VE_COND_WRITE_GT", + " VE_COND_WRITE_GTE", + " VE_COND_WRITE_NEQ", + " VE_SET_GREATER_THAN", + " VE_SET_EQUAL", + " VE_SET_NOT_EQUAL", + " (reserved)", + " (reserved)", + " (reserved)", +}; + +static char* r300_vs_me_ops[] = { + /* R300 math ops */ + " ME_NO_OP", + " ME_EXP_BASE2_DX", + " ME_LOG_BASE2_DX", + " ME_EXP_BASEE_FF", + " ME_LIGHT_COEFF_DX", + " ME_POWER_FUNC_FF", + " ME_RECIP_DX", + " ME_RECIP_FF", + " ME_RECIP_SQRT_DX", + " ME_RECIP_SQRT_FF", + " ME_MULTIPLY", + " ME_EXP_BASE2_FULL_DX", + " ME_LOG_BASE2_FULL_DX", + " ME_POWER_FUNC_FF_CLAMP_B", + "ME_POWER_FUNC_FF_CLAMP_B1", + "ME_POWER_FUNC_FF_CLAMP_01", + " ME_SIN", + " ME_COS", + /* R500 math ops */ + " ME_LOG_BASE2_IEEE", + " ME_RECIP_IEEE", + " ME_RECIP_SQRT_IEEE", + " ME_PRED_SET_EQ", + " ME_PRED_SET_GT", + " ME_PRED_SET_GTE", + " ME_PRED_SET_NEQ", + " ME_PRED_SET_CLR", + " ME_PRED_SET_INV", + " ME_PRED_SET_POP", + " ME_PRED_SET_RESTORE", + " (reserved)", + " (reserved)", + " (reserved)", +}; + +/* XXX refactor to avoid clashing symbols */ +static char* r300_vs_src_debug[] = { + "t", + "i", + "c", + "a", +}; + +static char* r300_vs_dst_debug[] = { + "t", + "a0", + "o", + "ox", + "a", + "i", + "u", + "u", +}; + +static char* r300_vs_swiz_debug[] = { + "X", + "Y", + "Z", + "W", + "0", + "1", + "U", + "U", +}; + + +static void r300_vs_op_dump(uint32_t op) +{ + printf(" dst: %d%s op: ", + (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]); + if (op & 0x80) { + if (op & 0x1) { + printf("PVS_MACRO_OP_2CLK_M2X_ADD\n"); + } else { + printf(" PVS_MACRO_OP_2CLK_MADD\n"); + } + } else if (op & 0x40) { + printf("%s\n", r300_vs_me_ops[op & 0x1f]); + } else { + printf("%s\n", r300_vs_ve_ops[op & 0x1f]); + } +} + +static void r300_vs_src_dump(uint32_t src) +{ + printf(" reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n", + (src >> 5) & 0x7f, r300_vs_src_debug[src & 0x3], + src & (1 << 25) ? "-" : " ", + r300_vs_swiz_debug[(src >> 13) & 0x7], + src & (1 << 26) ? "-" : " ", + r300_vs_swiz_debug[(src >> 16) & 0x7], + src & (1 << 27) ? "-" : " ", + r300_vs_swiz_debug[(src >> 19) & 0x7], + src & (1 << 28) ? "-" : " ", + r300_vs_swiz_debug[(src >> 22) & 0x7]); +} + +void r300_vertex_program_dump(struct r300_vertex_program_code * vs) +{ + unsigned instrcount = vs->length / 4; + unsigned i; + + for(i = 0; i < instrcount; i++) { + unsigned offset = i*4; + unsigned src; + + printf("%d: op: 0x%08x", i, vs->body.d[offset]); + r300_vs_op_dump(vs->body.d[offset]); + + for(src = 0; src < 3; ++src) { + printf(" src%i: 0x%08x", src, vs->body.d[offset+1+src]); + r300_vs_src_dump(vs->body.d[offset+1+src]); + } + } +} diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index 4d58cf2162..7e2faed690 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -27,158 +27,141 @@ #include "r500_fragprog.h" -static void reset_srcreg(struct prog_src_register* reg) -{ - _mesa_bzero(reg, sizeof(*reg)); - reg->Swizzle = SWIZZLE_NOOP; -} +#include "../r300_reg.h" -static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu) +static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu) { - gl_state_index fail_value_tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0 - }; struct prog_src_register reg = { 0, }; - fail_value_tokens[2] = tmu; reg.File = PROGRAM_STATE_VAR; - reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens); + reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu); reg.Swizzle = SWIZZLE_WWWW; return reg; } /** * Transform TEX, TXP, TXB, and KIL instructions in the following way: - * - premultiply texture coordinates for RECT - * - extract operand swizzles - * - introduce a temporary register when write masks are needed - * + * - implement texture compare (shadow extensions) + * - extract non-native source / destination operands */ GLboolean r500_transform_TEX( - struct radeon_transform_context *t, - struct prog_instruction* orig_inst, void* data) + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data) { struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)data; - struct prog_instruction inst = *orig_inst; - struct prog_instruction* tgt; - GLboolean destredirect = GL_FALSE; - - if (inst.Opcode != OPCODE_TEX && - inst.Opcode != OPCODE_TXB && - inst.Opcode != OPCODE_TXP && - inst.Opcode != OPCODE_KIL) + + if (inst->I.Opcode != OPCODE_TEX && + inst->I.Opcode != OPCODE_TXB && + inst->I.Opcode != OPCODE_TXP && + inst->I.Opcode != OPCODE_KIL) return GL_FALSE; /* ARB_shadow & EXT_shadow_funcs */ - if (inst.Opcode != OPCODE_KIL && - t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; + if (inst->I.Opcode != OPCODE_KIL && + c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { - tgt = radeonAppendInstructions(t->Program, 1); + inst->I.Opcode = OPCODE_MOV; - tgt->Opcode = OPCODE_MOV; - tgt->DstReg = inst.DstReg; if (comparefunc == GL_ALWAYS) { - tgt->SrcReg[0].File = PROGRAM_BUILTIN; - tgt->SrcReg[0].Swizzle = SWIZZLE_1111; + inst->I.SrcReg[0].File = PROGRAM_BUILTIN; + inst->I.SrcReg[0].Swizzle = SWIZZLE_1111; } else { - tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit); + inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit); } + return GL_TRUE; + } else { + GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; + GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode; + struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst); + struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp); + struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad); + int pass, fail; + + inst_rcp->I.Opcode = OPCODE_RCP; + inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY; + inst_rcp->I.DstReg.Index = rc_find_free_temporary(c); + inst_rcp->I.DstReg.WriteMask = WRITEMASK_W; + inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW; + + inst_cmp->I.DstReg = inst->I.DstReg; + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = rc_find_free_temporary(c); + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + + inst_mad->I.Opcode = OPCODE_MAD; + inst_mad->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mad->I.DstReg.Index = rc_find_free_temporary(c); + inst_mad->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ; + inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index; + inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW; + inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index; + if (depthmode == 0) /* GL_LUMINANCE */ + inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + else if (depthmode == 2) /* GL_ALPHA */ + inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW; + + /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: + * r < tex <=> -tex+r < 0 + * r >= tex <=> not (-tex+r < 0 */ + if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) + inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW; + else + inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW; + + inst_cmp->I.Opcode = OPCODE_CMP; + /* DstReg has been filled out above */ + inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index; + + if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + pass = 1; + fail = 2; + } else { + pass = 2; + fail = 1; + } + + inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN; + inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111; + inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit); } + } - inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = radeonFindFreeTemporary(t); - inst.DstReg.WriteMask = WRITEMASK_XYZW; - } else if (inst.Opcode != OPCODE_KIL && inst.DstReg.File != PROGRAM_TEMPORARY) { - int tempreg = radeonFindFreeTemporary(t); + /* Cannot write texture to output registers */ + if (inst->I.Opcode != OPCODE_KIL && inst->I.DstReg.File != PROGRAM_TEMPORARY) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); - inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = tempreg; - inst.DstReg.WriteMask = WRITEMASK_XYZW; - destredirect = GL_TRUE; - } + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg = inst->I.DstReg; + inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c); - if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) { - int tmpreg = radeonFindFreeTemporary(t); - tgt = radeonAppendInstructions(t->Program, 1); - tgt->Opcode = OPCODE_MOV; - tgt->DstReg.File = PROGRAM_TEMPORARY; - tgt->DstReg.Index = tmpreg; - tgt->SrcReg[0] = inst.SrcReg[0]; - - reset_srcreg(&inst.SrcReg[0]); - inst.SrcReg[0].File = PROGRAM_TEMPORARY; - inst.SrcReg[0].Index = tmpreg; + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index; + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; } - tgt = radeonAppendInstructions(t->Program, 1); - _mesa_copy_instructions(tgt, &inst, 1); - - if (inst.Opcode != OPCODE_KIL && - t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; - GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode; - int rcptemp = radeonFindFreeTemporary(t); - int pass, fail; - - tgt = radeonAppendInstructions(t->Program, 3); - - tgt[0].Opcode = OPCODE_RCP; - tgt[0].DstReg.File = PROGRAM_TEMPORARY; - tgt[0].DstReg.Index = rcptemp; - tgt[0].DstReg.WriteMask = WRITEMASK_W; - tgt[0].SrcReg[0] = inst.SrcReg[0]; - tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW; - - tgt[1].Opcode = OPCODE_MAD; - tgt[1].DstReg = inst.DstReg; - tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask; - tgt[1].SrcReg[0] = inst.SrcReg[0]; - tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ; - tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY; - tgt[1].SrcReg[1].Index = rcptemp; - tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW; - tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY; - tgt[1].SrcReg[2].Index = inst.DstReg.Index; - if (depthmode == 0) /* GL_LUMINANCE */ - tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); - else if (depthmode == 2) /* GL_ALPHA */ - tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW; - - /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: - * r < tex <=> -tex+r < 0 - * r >= tex <=> not (-tex+r < 0 */ - if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) - tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW; - else - tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW; - - tgt[2].Opcode = OPCODE_CMP; - tgt[2].DstReg = orig_inst->DstReg; - tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY; - tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index; - - if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { - pass = 1; - fail = 2; - } else { - pass = 2; - fail = 1; - } + /* Cannot read texture coordinate from constants file */ + if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN; - tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111; - tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit); - } else if (destredirect) { - tgt = radeonAppendInstructions(t->Program, 1); + inst_mov->I.Opcode = OPCODE_MOV; + inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mov->I.DstReg.Index = rc_find_free_temporary(c); + inst_mov->I.SrcReg[0] = inst->I.SrcReg[0]; - tgt->Opcode = OPCODE_MOV; - tgt->DstReg = orig_inst->DstReg; - tgt->SrcReg[0].File = PROGRAM_TEMPORARY; - tgt->SrcReg[0].Index = inst.DstReg.Index; + reset_srcreg(&inst->I.SrcReg[0]); + inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index; } return GL_TRUE; @@ -249,7 +232,6 @@ GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) */ void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) { - struct prog_instruction *inst; GLuint negatebase[2] = { 0, 0 }; int i; @@ -260,20 +242,16 @@ void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, negatebase[GET_BIT(src.Negate, i)] |= 1 << i; } - _mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0)); - inst = s->Program->Instructions + s->IP; - for(i = 0; i <= 1; ++i) { if (!negatebase[i]) continue; - inst->Opcode = OPCODE_MOV; - inst->DstReg = dst; - inst->DstReg.WriteMask = negatebase[i]; - inst->SrcReg[0] = src; - inst->SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW; - inst++; - s->IP++; + struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev); + inst->I.Opcode = OPCODE_MOV; + inst->I.DstReg = dst; + inst->I.DstReg.WriteMask = negatebase[i]; + inst->I.SrcReg[0] = src; + inst->I.SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW; } } @@ -375,9 +353,9 @@ static char *to_texop(int val) return NULL; } -void r500FragmentProgramDump(union rX00_fragment_program_code *c) +void r500FragmentProgramDump(struct rX00_fragment_program_code *c) { - struct r500_fragment_program_code *code = &c->r500; + struct r500_fragment_program_code *code = &c->code.r500; fprintf(stderr, "R500 Fragment Program:\n--------\n"); int n; @@ -385,15 +363,6 @@ void r500FragmentProgramDump(union rX00_fragment_program_code *c) uint32_t inst0; char *str = NULL; - if (code->const_nr) { - fprintf(stderr, "--------\nConstants:\n"); - for (n = 0; n < code->const_nr; n++) { - fprintf(stderr, "Constant %d: %i[%i]\n", n, - code->constant[n].File, code->constant[n].Index); - } - fprintf(stderr, "--------\n"); - } - for (n = 0; n < code->inst_end+1; n++) { inst0 = inst = code->inst[n].inst0; fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst); diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h index 1179bf6607..9091f65cd2 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h @@ -36,17 +36,20 @@ #include "shader/prog_parameter.h" #include "shader/prog_instruction.h" -#include "r300_context.h" +#include "radeon_compiler.h" #include "radeon_nqssadce.h" -extern GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); +extern void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); -extern void r500FragmentProgramDump(union rX00_fragment_program_code *c); +extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c); extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src); -extern GLboolean r500_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data); +extern GLboolean r500_transform_TEX( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data); #endif diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index 30f4514897..d694725c9b 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -45,47 +45,22 @@ #include "r500_fragprog.h" +#include "../r300_reg.h" + #include "radeon_program_pair.h" #define PROG_CODE \ struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ - struct r500_fragment_program_code *code = &c->code->r500 + struct r500_fragment_program_code *code = &c->code->code.r500 #define error(fmt, args...) do { \ - fprintf(stderr, "%s::%s(): " fmt "\n", \ + rc_error(&c->Base, "%s::%s(): " fmt "\n", \ __FILE__, __FUNCTION__, ##args); \ } while(0) -/** - * Callback to register hardware constants. - */ -static GLboolean emit_const(void *data, GLuint file, GLuint idx, GLuint *hwindex) -{ - PROG_CODE; - - for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) { - if (code->constant[*hwindex].File == file && - code->constant[*hwindex].Index == idx) - break; - } - - if (*hwindex >= code->const_nr) { - if (*hwindex >= R500_PFS_NUM_CONST_REGS) { - error("Out of hw constants!\n"); - return GL_FALSE; - } - - code->const_nr++; - code->constant[*hwindex].File = file; - code->constant[*hwindex].Index = idx; - } - - return GL_TRUE; -} - -static GLuint translate_rgb_op(GLuint opcode) +static GLuint translate_rgb_op(struct r300_fragment_program_compiler *c, GLuint opcode) { switch(opcode) { case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; @@ -106,7 +81,7 @@ static GLuint translate_rgb_op(GLuint opcode) } } -static GLuint translate_alpha_op(GLuint opcode) +static GLuint translate_alpha_op(struct r300_fragment_program_compiler *c, GLuint opcode) { switch(opcode) { case OPCODE_CMP: return R500_ALPHA_OP_CMP; @@ -189,8 +164,8 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) int ip = ++code->inst_end; - code->inst[ip].inst5 = translate_rgb_op(inst->RGB.Opcode); - code->inst[ip].inst4 = translate_alpha_op(inst->Alpha.Opcode); + code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode); + code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode); if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) code->inst[ip].inst0 = R500_INST_TYPE_OUT; @@ -202,7 +177,7 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); if (inst->Alpha.DepthWriteMask) { code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; - c->fp->writes_depth = GL_TRUE; + c->code->writes_depth = GL_TRUE; } code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); @@ -234,19 +209,19 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) return GL_TRUE; } -static GLuint translate_strq_swizzle(struct prog_src_register src) +static GLuint translate_strq_swizzle(GLuint swizzle) { GLuint swiz = 0; int i; for (i = 0; i < 4; i++) - swiz |= (GET_SWZ(src.Swizzle, i) & 0x3) << i*2; + swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2; return swiz; } /** * Emit a single TEX instruction */ -static GLboolean emit_tex(void *data, struct prog_instruction *inst) +static GLboolean emit_tex(void *data, struct radeon_pair_texture_instruction *inst) { PROG_CODE; @@ -258,7 +233,7 @@ static GLboolean emit_tex(void *data, struct prog_instruction *inst) int ip = ++code->inst_end; code->inst[ip].inst0 = R500_INST_TYPE_TEX - | (inst->DstReg.WriteMask << 11) + | (inst->WriteMask << 11) | R500_INST_TEX_SEM_WAIT; code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; @@ -267,25 +242,25 @@ static GLboolean emit_tex(void *data, struct prog_instruction *inst) code->inst[ip].inst1 |= R500_TEX_UNSCALED; switch (inst->Opcode) { - case OPCODE_KIL: + case RADEON_OPCODE_KIL: code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; break; - case OPCODE_TEX: + case RADEON_OPCODE_TEX: code->inst[ip].inst1 |= R500_TEX_INST_LD; break; - case OPCODE_TXB: + case RADEON_OPCODE_TXB: code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; break; - case OPCODE_TXP: + case RADEON_OPCODE_TXP: code->inst[ip].inst1 |= R500_TEX_INST_PROJ; break; default: error("emit_tex can't handle opcode %x\n", inst->Opcode); } - code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) - | (translate_strq_swizzle(inst->SrcReg[0]) << 8) - | R500_TEX_DST_ADDR(inst->DstReg.Index) + code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcIndex) + | (translate_strq_swizzle(inst->SrcSwizzle) << 8) + | R500_TEX_DST_ADDR(inst->DestIndex) | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; @@ -293,35 +268,32 @@ static GLboolean emit_tex(void *data, struct prog_instruction *inst) } static const struct radeon_pair_handler pair_handler = { - .EmitConst = emit_const, .EmitPaired = emit_paired, .EmitTex = emit_tex, .MaxHwTemps = 128 }; -GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) +void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) { - struct r500_fragment_program_code *code = &compiler->code->r500; + struct r500_fragment_program_code *code = &compiler->code->code.r500; _mesa_bzero(code, sizeof(*code)); code->max_temp_idx = 1; - code->inst_offset = 0; code->inst_end = -1; - if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler)) - return GL_FALSE; + radeonPairProgram(compiler, &pair_handler, compiler); + if (compiler->Base.Error) + return; if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { /* This may happen when dead-code elimination is disabled or * when most of the fragment program logic is leading to a KIL */ if (code->inst_end >= 511) { - error("Introducing fake OUT: Too many instructions"); - return GL_FALSE; + rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions"); + return; } int ip = ++code->inst_end; code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; } - - return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.c b/src/mesa/drivers/dri/r300/compiler/radeon_code.c new file mode 100644 index 0000000000..c7923004df --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.c @@ -0,0 +1,170 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "main/mtypes.h" +#include "shader/prog_instruction.h" + +#include "radeon_code.h" + +void rc_constants_init(struct rc_constant_list * c) +{ + memset(c, 0, sizeof(*c)); +} + +/** + * Copy a constants structure, assuming that the destination structure + * is not initialized. + */ +void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src) +{ + dst->Constants = malloc(sizeof(struct rc_constant) * src->Count); + memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count); + dst->Count = src->Count; + dst->_Reserved = src->Count; +} + +void rc_constants_destroy(struct rc_constant_list * c) +{ + free(c->Constants); + memset(c, 0, sizeof(*c)); +} + +unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant) +{ + unsigned index = c->Count; + + if (c->Count >= c->_Reserved) { + struct rc_constant * newlist; + + c->_Reserved = c->_Reserved * 2; + if (!c->_Reserved) + c->_Reserved = 16; + + newlist = malloc(sizeof(struct rc_constant) * c->_Reserved); + memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count); + + free(c->Constants); + c->Constants = newlist; + } + + c->Constants[index] = *constant; + c->Count++; + + return index; +} + + +/** + * Add a state vector to the constant list, while trying to avoid duplicates. + */ +unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1) +{ + unsigned index; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_STATE) { + if (c->Constants[index].u.State[0] == state0 && + c->Constants[index].u.State[1] == state1) + return index; + } + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_STATE; + constant.Size = 4; + constant.u.State[0] = state0; + constant.u.State[1] = state1; + + return rc_constants_add(c, &constant); +} + + +/** + * Add an immediate vector to the constant list, while trying to avoid + * duplicates. + */ +unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data) +{ + unsigned index; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { + if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4)) + return index; + } + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.Size = 4; + memcpy(constant.u.Immediate, data, sizeof(float) * 4); + + return rc_constants_add(c, &constant); +} + + +/** + * Add an immediate scalar to the constant list, while trying to avoid + * duplicates. + */ +unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle) +{ + unsigned index; + int free_index = -1; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { + for(unsigned comp = 0; comp < c->Constants[index].Size; ++comp) { + if (c->Constants[index].u.Immediate[comp] == data) { + *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp); + return index; + } + } + + if (c->Constants[index].Size < 4) + free_index = index; + } + } + + if (free_index >= 0) { + unsigned comp = c->Constants[free_index].Size++; + c->Constants[free_index].u.Immediate[comp] = data; + *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp); + return free_index; + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.Size = 1; + constant.u.Immediate[0] = data; + *swizzle = SWIZZLE_XXXX; + + return rc_constants_add(c, &constant); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h new file mode 100644 index 0000000000..0806fb1b5c --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -0,0 +1,206 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_CODE_H +#define RADEON_CODE_H + +#include <stdint.h> + +#define R300_PFS_MAX_ALU_INST 64 +#define R300_PFS_MAX_TEX_INST 32 +#define R300_PFS_MAX_TEX_INDIRECT 4 +#define R300_PFS_NUM_TEMP_REGS 32 +#define R300_PFS_NUM_CONST_REGS 32 + +#define R500_PFS_MAX_INST 512 +#define R500_PFS_NUM_TEMP_REGS 128 +#define R500_PFS_NUM_CONST_REGS 256 + + +#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) + +enum { + /** + * External constants are constants whose meaning is unknown to this + * compiler. For example, a Mesa gl_program's constants are turned + * into external constants. + */ + RC_CONSTANT_EXTERNAL = 0, + + RC_CONSTANT_IMMEDIATE, + + /** + * Constant referring to state that is known by this compiler, + * see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state. + */ + RC_CONSTANT_STATE +}; + +enum { + RC_STATE_SHADOW_AMBIENT = 0, + + RC_STATE_R300_WINDOW_DIMENSION, + RC_STATE_R300_TEXRECT_FACTOR +}; + +struct rc_constant { + unsigned Type:2; /**< RC_CONSTANT_xxx */ + unsigned Size:3; + + union { + unsigned External; + float Immediate[4]; + unsigned State[2]; + } u; +}; + +struct rc_constant_list { + struct rc_constant * Constants; + unsigned Count; + + unsigned _Reserved; +}; + +void rc_constants_init(struct rc_constant_list * c); +void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src); +void rc_constants_destroy(struct rc_constant_list * c); +unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant); +unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2); +unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data); +unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle); + +/** + * Stores state that influences the compilation of a fragment program. + */ +struct r300_fragment_program_external_state { + struct { + /** + * If the sampler is used as a shadow sampler, + * this field is: + * 0 - GL_LUMINANCE + * 1 - GL_INTENSITY + * 2 - GL_ALPHA + * depending on the depth texture mode. + */ + unsigned depth_texture_mode : 2; + + /** + * If the sampler is used as a shadow sampler, + * this field is (texture_compare_func - GL_NEVER). + * [e.g. if compare function is GL_LEQUAL, this field is 3] + * + * Otherwise, this field is 0. + */ + unsigned texture_compare_func : 3; + } unit[16]; +}; + + + +struct r300_fragment_program_node { + int tex_offset; /**< first tex instruction */ + int tex_end; /**< last tex instruction, relative to tex_offset */ + int alu_offset; /**< first ALU instruction */ + int alu_end; /**< last ALU instruction, relative to alu_offset */ + int flags; +}; + +/** + * Stores an R300 fragment program in its compiled-to-hardware form. + */ +struct r300_fragment_program_code { + struct { + int length; /**< total # of texture instructions used */ + uint32_t inst[R300_PFS_MAX_TEX_INST]; + } tex; + + struct { + int length; /**< total # of ALU instructions used */ + struct { + uint32_t rgb_inst; + uint32_t rgb_addr; + uint32_t alpha_inst; + uint32_t alpha_addr; + } inst[R300_PFS_MAX_ALU_INST]; + } alu; + + uint32_t config; /* US_CONFIG */ + uint32_t pixsize; /* US_PIXSIZE */ + uint32_t code_offset; /* US_CODE_OFFSET */ + uint32_t code_addr[4]; /* US_CODE_ADDR */ +}; + + +struct r500_fragment_program_code { + struct { + uint32_t inst0; + uint32_t inst1; + uint32_t inst2; + uint32_t inst3; + uint32_t inst4; + uint32_t inst5; + } inst[R500_PFS_MAX_INST]; + + int inst_end; /* Number of instructions - 1; also, last instruction to be executed */ + + int max_temp_idx; +}; + +struct rX00_fragment_program_code { + union { + struct r300_fragment_program_code r300; + struct r500_fragment_program_code r500; + } code; + + unsigned writes_depth:1; + + struct rc_constant_list constants; +}; + + +#define VSF_MAX_FRAGMENT_LENGTH (255*4) +#define VSF_MAX_FRAGMENT_TEMPS (14) + +#define VSF_MAX_INPUTS 32 +#define VSF_MAX_OUTPUTS 32 + +struct r300_vertex_program_code { + int length; + union { + uint32_t d[VSF_MAX_FRAGMENT_LENGTH]; + float f[VSF_MAX_FRAGMENT_LENGTH]; + } body; + + int pos_end; + int num_temporaries; /* Number of temp vars used by program */ + int inputs[VSF_MAX_INPUTS]; + int outputs[VSF_MAX_OUTPUTS]; + + struct rc_constant_list constants; + + uint32_t InputsRead; + uint32_t OutputsWritten; +}; + +void r300_vertex_program_dump(struct r300_vertex_program_code * vs); + +#endif /* RADEON_CODE_H */
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c new file mode 100644 index 0000000000..da950d5289 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -0,0 +1,262 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include <stdarg.h> + +#include "radeon_program.h" + + +void rc_init(struct radeon_compiler * c) +{ + memset(c, 0, sizeof(*c)); + + memory_pool_init(&c->Pool); + c->Program.Instructions.Prev = &c->Program.Instructions; + c->Program.Instructions.Next = &c->Program.Instructions; + c->Program.Instructions.I.Opcode = OPCODE_END; +} + +void rc_destroy(struct radeon_compiler * c) +{ + rc_constants_destroy(&c->Program.Constants); + memory_pool_destroy(&c->Pool); + free(c->ErrorMsg); +} + +void rc_debug(struct radeon_compiler * c, const char * fmt, ...) +{ + va_list ap; + + if (!c->Debug) + return; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +void rc_error(struct radeon_compiler * c, const char * fmt, ...) +{ + va_list ap; + + c->Error = GL_TRUE; + + if (!c->ErrorMsg) { + /* Only remember the first error */ + char buf[1024]; + int written; + + va_start(ap, fmt); + written = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + if (written < sizeof(buf)) { + c->ErrorMsg = strdup(buf); + } else { + c->ErrorMsg = malloc(written + 1); + + va_start(ap, fmt); + vsnprintf(c->ErrorMsg, written + 1, fmt, ap); + va_end(ap); + } + } + + if (c->Debug) { + fprintf(stderr, "r300compiler error: "); + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + } +} + +/** + * Rewrite the program such that everything that source the given input + * register will source new_input instead. + */ +void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input) +{ + struct rc_instruction * inst; + + c->Program.InputsRead &= ~(1 << input); + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const unsigned numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + unsigned i; + + for(i = 0; i < numsrcs; ++i) { + if (inst->I.SrcReg[i].File == PROGRAM_INPUT && inst->I.SrcReg[i].Index == input) { + inst->I.SrcReg[i].File = new_input.File; + inst->I.SrcReg[i].Index = new_input.Index; + inst->I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->I.SrcReg[i].Swizzle); + if (!inst->I.SrcReg[i].Abs) { + inst->I.SrcReg[i].Negate ^= new_input.Negate; + inst->I.SrcReg[i].Abs = new_input.Abs; + } + + c->Program.InputsRead |= 1 << new_input.Index; + } + } + } +} + + +/** + * Rewrite the program such that everything that writes into the given + * output register will instead write to new_output. The new_output + * writemask is honoured. + */ +void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask) +{ + struct rc_instruction * inst; + + c->Program.OutputsWritten &= ~(1 << output); + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const unsigned numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + + if (numdsts) { + if (inst->I.DstReg.File == PROGRAM_OUTPUT && inst->I.DstReg.Index == output) { + inst->I.DstReg.Index = new_output; + inst->I.DstReg.WriteMask &= writemask; + + c->Program.OutputsWritten |= 1 << new_output; + } + } + } +} + + +/** + * Rewrite the program such that a given output is duplicated. + */ +void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output) +{ + unsigned tempreg = rc_find_free_temporary(c); + struct rc_instruction * inst; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const unsigned numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + + if (numdsts) { + if (inst->I.DstReg.File == PROGRAM_OUTPUT && inst->I.DstReg.Index == output) { + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = tempreg; + } + } + } + + inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + inst->I.Opcode = OPCODE_MOV; + inst->I.DstReg.File = PROGRAM_OUTPUT; + inst->I.DstReg.Index = output; + + inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].Index = tempreg; + inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW; + + inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + inst->I.Opcode = OPCODE_MOV; + inst->I.DstReg.File = PROGRAM_OUTPUT; + inst->I.DstReg.Index = dup_output; + + inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[0].Index = tempreg; + inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW; + + c->Program.OutputsWritten |= 1 << dup_output; +} + + +/** + * Introduce standard code fragment to deal with fragment.position. + */ +void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input) +{ + unsigned tempregi = rc_find_free_temporary(c); + + c->Program.InputsRead &= ~(1 << wpos); + c->Program.InputsRead |= 1 << new_input; + + /* perspective divide */ + struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); + inst_rcp->I.Opcode = OPCODE_RCP; + + inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY; + inst_rcp->I.DstReg.Index = tempregi; + inst_rcp->I.DstReg.WriteMask = WRITEMASK_W; + + inst_rcp->I.SrcReg[0].File = PROGRAM_INPUT; + inst_rcp->I.SrcReg[0].Index = new_input; + inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW; + + struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst_rcp); + inst_mul->I.Opcode = OPCODE_MUL; + + inst_mul->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mul->I.DstReg.Index = tempregi; + inst_mul->I.DstReg.WriteMask = WRITEMASK_XYZ; + + inst_mul->I.SrcReg[0].File = PROGRAM_INPUT; + inst_mul->I.SrcReg[0].Index = new_input; + + inst_mul->I.SrcReg[1].File = PROGRAM_TEMPORARY; + inst_mul->I.SrcReg[1].Index = tempregi; + inst_mul->I.SrcReg[1].Swizzle = SWIZZLE_WWWW; + + /* viewport transformation */ + struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_mul); + inst_mad->I.Opcode = OPCODE_MAD; + + inst_mad->I.DstReg.File = PROGRAM_TEMPORARY; + inst_mad->I.DstReg.Index = tempregi; + inst_mad->I.DstReg.WriteMask = WRITEMASK_XYZ; + + inst_mad->I.SrcReg[0].File = PROGRAM_TEMPORARY; + inst_mad->I.SrcReg[0].Index = tempregi; + inst_mad->I.SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + inst_mad->I.SrcReg[1].File = PROGRAM_STATE_VAR; + inst_mad->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); + inst_mad->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + inst_mad->I.SrcReg[2].File = PROGRAM_STATE_VAR; + inst_mad->I.SrcReg[2].Index = inst_mad->I.SrcReg[1].Index; + inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + struct rc_instruction * inst; + for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) { + const unsigned numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + unsigned i; + + for(i = 0; i < numsrcs; i++) { + if (inst->I.SrcReg[i].File == PROGRAM_INPUT && + inst->I.SrcReg[i].Index == wpos) { + inst->I.SrcReg[i].File = PROGRAM_TEMPORARY; + inst->I.SrcReg[i].Index = tempregi; + } + } + } +} + diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h new file mode 100644 index 0000000000..e63ab8840a --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -0,0 +1,108 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_COMPILER_H +#define RADEON_COMPILER_H + +#include "main/mtypes.h" +#include "shader/prog_instruction.h" + +#include "memory_pool.h" +#include "radeon_code.h" + + +struct rc_instruction { + struct rc_instruction * Prev; + struct rc_instruction * Next; + struct prog_instruction I; +}; + +struct rc_program { + /** + * Instructions.Next points to the first instruction, + * Instructions.Prev points to the last instruction. + */ + struct rc_instruction Instructions; + + /* Long term, we should probably remove InputsRead & OutputsWritten, + * since updating dependent state can be fragile, and they aren't + * actually used very often. */ + uint32_t InputsRead; + uint32_t OutputsWritten; + uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */ + + struct rc_constant_list Constants; +}; + +struct radeon_compiler { + struct memory_pool Pool; + struct rc_program Program; + unsigned Debug:1; + unsigned Error:1; + char * ErrorMsg; +}; + +void rc_init(struct radeon_compiler * c); +void rc_destroy(struct radeon_compiler * c); + +void rc_debug(struct radeon_compiler * c, const char * fmt, ...); +void rc_error(struct radeon_compiler * c, const char * fmt, ...); + +void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program); + +void rc_calculate_inputs_outputs(struct radeon_compiler * c); + +void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input); +void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask); +void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output); +void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input); + +struct r300_fragment_program_compiler { + struct radeon_compiler Base; + struct rX00_fragment_program_code *code; + struct r300_fragment_program_external_state state; + unsigned is_r500; + unsigned OutputDepth; + unsigned OutputColor; + + void * UserData; + void (*AllocateHwInputs)( + struct r300_fragment_program_compiler * c, + void (*allocate)(void * data, unsigned input, unsigned hwreg), + void * mydata); +}; + +void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c); + + +struct r300_vertex_program_compiler { + struct radeon_compiler Base; + struct r300_vertex_program_code *code; + GLbitfield RequiredOutputs; + + void * UserData; + void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c); +}; + +void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c); + +#endif /* RADEON_COMPILER_H */ diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c index 840c9733b1..aaaa50ad1f 100644 --- a/src/mesa/drivers/dri/r300/radeon_nqssadce.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c @@ -36,6 +36,8 @@ #include "radeon_nqssadce.h" +#include "radeon_compiler.h" + /** * Return the @ref register_state for the given register (or 0 for untracked @@ -76,9 +78,10 @@ struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register s } -static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, - struct prog_instruction *inst, GLint src, GLuint sourced) +static void track_used_srcreg(struct nqssadce_state* s, + GLint src, GLuint sourced) { + struct prog_instruction * inst = &s->IP->I; int i; GLuint deswz_source = 0; @@ -95,12 +98,11 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) { struct prog_dst_register dstreg = inst->DstReg; dstreg.File = PROGRAM_TEMPORARY; - dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY); + dstreg.Index = rc_find_free_temporary(s->Compiler); dstreg.WriteMask = sourced; s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]); - inst = s->Program->Instructions + s->IP; inst->SrcReg[src].File = PROGRAM_TEMPORARY; inst->SrcReg[src].Index = dstreg.Index; inst->SrcReg[src].Swizzle = 0; @@ -117,37 +119,36 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, struct register_state *regstate; - if (inst->SrcReg[src].RelAddr) + if (inst->SrcReg[src].RelAddr) { regstate = get_reg_state(s, PROGRAM_ADDRESS, 0); - else + if (regstate) + regstate->Sourced |= WRITEMASK_X; + } else { regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index); - - if (regstate) - regstate->Sourced |= deswz_source & 0xf; - - return inst; + if (regstate) + regstate->Sourced |= deswz_source & 0xf; + } } -static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex) +static void unalias_srcregs(struct rc_instruction *inst, GLuint oldindex, GLuint newindex) { - int nsrc = _mesa_num_inst_src_regs(inst->Opcode); + int nsrc = _mesa_num_inst_src_regs(inst->I.Opcode); int i; for(i = 0; i < nsrc; ++i) - if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex) - inst->SrcReg[i].Index = newindex; + if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY && inst->I.SrcReg[i].Index == oldindex) + inst->I.SrcReg[i].Index = newindex; } static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex) { - GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY); - int ip; - for(ip = 0; ip < s->IP; ++ip) { - struct prog_instruction* inst = s->Program->Instructions + ip; - if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex) - inst->DstReg.Index = newindex; + GLuint newindex = rc_find_free_temporary(s->Compiler); + struct rc_instruction * inst; + for(inst = s->Compiler->Program.Instructions.Next; inst != s->IP; inst = inst->Next) { + if (inst->I.DstReg.File == PROGRAM_TEMPORARY && inst->I.DstReg.Index == oldindex) + inst->I.DstReg.Index = newindex; unalias_srcregs(inst, oldindex, newindex); } - unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex); + unalias_srcregs(s->IP, oldindex, newindex); } @@ -156,7 +157,8 @@ static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex) */ static void process_instruction(struct nqssadce_state* s) { - struct prog_instruction *inst = s->Program->Instructions + s->IP; + struct prog_instruction *inst = &s->IP->I; + GLuint WriteMask; if (inst->Opcode == OPCODE_END) return; @@ -164,7 +166,7 @@ static void process_instruction(struct nqssadce_state* s) if (inst->Opcode != OPCODE_KIL) { struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index); if (!regstate) { - _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n", + rc_error(s->Compiler, "NqssaDce: bad destination register (%i[%i])\n", inst->DstReg.File, inst->DstReg.Index); return; } @@ -173,7 +175,9 @@ static void process_instruction(struct nqssadce_state* s) regstate->Sourced &= ~inst->DstReg.WriteMask; if (inst->DstReg.WriteMask == 0) { - _mesa_delete_instructions(s->Program, s->IP, 1); + struct rc_instruction * inst_remove = s->IP; + s->IP = s->IP->Prev; + rc_remove_instruction(inst_remove); return; } @@ -181,16 +185,15 @@ static void process_instruction(struct nqssadce_state* s) unalias_temporary(s, inst->DstReg.Index); } - /* Attention: Due to swizzle emulation code, the following - * might change the instruction stream under us, so we have - * to be careful with the inst pointer. */ + WriteMask = inst->DstReg.WriteMask; + switch (inst->Opcode) { case OPCODE_ARL: case OPCODE_DDX: case OPCODE_DDY: case OPCODE_FRC: case OPCODE_MOV: - inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); + track_used_srcreg(s, 0, WriteMask); break; case OPCODE_ADD: case OPCODE_MAX: @@ -198,14 +201,14 @@ static void process_instruction(struct nqssadce_state* s) case OPCODE_MUL: case OPCODE_SGE: case OPCODE_SLT: - inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); - inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); + track_used_srcreg(s, 0, WriteMask); + track_used_srcreg(s, 1, WriteMask); break; case OPCODE_CMP: case OPCODE_MAD: - inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); - inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); - inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask); + track_used_srcreg(s, 0, WriteMask); + track_used_srcreg(s, 1, WriteMask); + track_used_srcreg(s, 2, WriteMask); break; case OPCODE_COS: case OPCODE_EX2: @@ -213,83 +216,79 @@ static void process_instruction(struct nqssadce_state* s) case OPCODE_RCP: case OPCODE_RSQ: case OPCODE_SIN: - inst = track_used_srcreg(s, inst, 0, 0x1); + track_used_srcreg(s, 0, 0x1); break; case OPCODE_DP3: - inst = track_used_srcreg(s, inst, 0, 0x7); - inst = track_used_srcreg(s, inst, 1, 0x7); + track_used_srcreg(s, 0, 0x7); + track_used_srcreg(s, 1, 0x7); break; case OPCODE_DP4: - inst = track_used_srcreg(s, inst, 0, 0xf); - inst = track_used_srcreg(s, inst, 1, 0xf); + track_used_srcreg(s, 0, 0xf); + track_used_srcreg(s, 1, 0xf); break; case OPCODE_KIL: case OPCODE_TEX: case OPCODE_TXB: case OPCODE_TXP: - inst = track_used_srcreg(s, inst, 0, 0xf); + track_used_srcreg(s, 0, 0xf); break; case OPCODE_DST: - inst = track_used_srcreg(s, inst, 0, 0x6); - inst = track_used_srcreg(s, inst, 1, 0xa); + track_used_srcreg(s, 0, 0x6); + track_used_srcreg(s, 1, 0xa); break; case OPCODE_EXP: case OPCODE_LOG: case OPCODE_POW: - inst = track_used_srcreg(s, inst, 0, 0x3); + track_used_srcreg(s, 0, 0x3); break; case OPCODE_LIT: - inst = track_used_srcreg(s, inst, 0, 0xb); + track_used_srcreg(s, 0, 0xb); break; default: - _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode); + rc_error(s->Compiler, "NqssaDce: Unknown opcode %d\n", inst->Opcode); return; } + + s->IP = s->IP->Prev; } -static void calculateInputsOutputs(struct gl_program *p) +void rc_calculate_inputs_outputs(struct radeon_compiler * c) { - struct prog_instruction *inst; - GLuint InputsRead, OutputsWritten; + struct rc_instruction *inst; - inst = p->Instructions; - InputsRead = 0; - OutputsWritten = 0; - while (inst->Opcode != OPCODE_END) + c->Program.InputsRead = 0; + c->Program.OutputsWritten = 0; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - int i, num_src_regs; + int i; + int num_src_regs = _mesa_num_inst_src_regs(inst->I.Opcode); - num_src_regs = _mesa_num_inst_src_regs(inst->Opcode); for (i = 0; i < num_src_regs; ++i) { - if (inst->SrcReg[i].File == PROGRAM_INPUT) - InputsRead |= 1 << inst->SrcReg[i].Index; + if (inst->I.SrcReg[i].File == PROGRAM_INPUT) + c->Program.InputsRead |= 1 << inst->I.SrcReg[i].Index; } - if (inst->DstReg.File == PROGRAM_OUTPUT) - OutputsWritten |= 1 << inst->DstReg.Index; - - ++inst; + if (_mesa_num_inst_dst_regs(inst->I.Opcode)) { + if (inst->I.DstReg.File == PROGRAM_OUTPUT) + c->Program.OutputsWritten |= 1 << inst->I.DstReg.Index; + } } - - p->InputsRead = InputsRead; - p->OutputsWritten = OutputsWritten; } -void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr) +void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data) { struct nqssadce_state s; _mesa_bzero(&s, sizeof(s)); - s.Ctx = ctx; - s.Program = p; + s.Compiler = c; s.Descr = descr; + s.UserData = data; s.Descr->Init(&s); - s.IP = p->NumInstructions; + s.IP = c->Program.Instructions.Prev; - while(s.IP > 0) { - s.IP--; + while(s.IP != &c->Program.Instructions && !c->Error) process_instruction(&s); - } - calculateInputsOutputs(p); + rc_calculate_inputs_outputs(c); } diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h index 8626f21c25..b3fc77a35a 100644 --- a/src/mesa/drivers/dri/r300/radeon_nqssadce.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h @@ -30,7 +30,6 @@ #include "radeon_program.h" - struct register_state { /** * Bitmask indicating which components of the register are sourced @@ -44,14 +43,13 @@ struct register_state { * read from, etc. */ struct nqssadce_state { - GLcontext *Ctx; - struct gl_program *Program; + struct radeon_compiler *Compiler; struct radeon_nqssadce_descr *Descr; /** * All instructions after this instruction pointer have been dealt with. */ - int IP; + struct rc_instruction * IP; /** * Which registers are read by subsequent instructions? @@ -59,6 +57,8 @@ struct nqssadce_state { struct register_state Temps[MAX_PROGRAM_TEMPS]; struct register_state Outputs[VERT_RESULT_MAX]; struct register_state Address; + + void * UserData; }; @@ -83,11 +83,9 @@ struct radeon_nqssadce_descr { * The transformation will work recursively on the emitted instruction(s). */ void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); - - void *Data; }; -void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr); +void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data); struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg); #endif /* __RADEON_PROGRAM_NQSSADCE_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c new file mode 100644 index 0000000000..208d3b90c8 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c @@ -0,0 +1,181 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program.h" + +#include "radeon_compiler.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" + + +/** + * Transform the given clause in the following way: + * 1. Replace it with an empty clause + * 2. For every instruction in the original clause, try the given + * transformations in order. + * 3. If one of the transformations returns GL_TRUE, assume that it + * has emitted the appropriate instruction(s) into the new clause; + * otherwise, copy the instruction verbatim. + * + * \note The transformation is currently not recursive; in other words, + * instructions emitted by transformations are not transformed. + * + * \note The transform is called 'local' because it can only look at + * one instruction at a time. + */ +void radeonLocalTransform( + struct radeon_compiler * c, + int num_transformations, + struct radeon_program_transformation* transformations) +{ + struct rc_instruction * inst = c->Program.Instructions.Next; + + while(inst != &c->Program.Instructions) { + struct rc_instruction * current = inst; + int i; + + inst = inst->Next; + + for(i = 0; i < num_transformations; ++i) { + struct radeon_program_transformation* t = transformations + i; + + if (t->function(c, current, t->userData)) + break; + } + } +} + + +GLint rc_find_free_temporary(struct radeon_compiler * c) +{ + GLboolean used[MAX_PROGRAM_TEMPS]; + GLuint i; + + memset(used, 0, sizeof(used)); + + for (struct rc_instruction * rcinst = c->Program.Instructions.Next; rcinst != &c->Program.Instructions; rcinst = rcinst->Next) { + const struct prog_instruction *inst = &rcinst->I; + const GLuint n = _mesa_num_inst_src_regs(inst->Opcode); + GLuint k; + + for (k = 0; k < n; k++) { + if (inst->SrcReg[k].File == PROGRAM_TEMPORARY) + used[inst->SrcReg[k].Index] = GL_TRUE; + } + } + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + if (!used[i]) + return i; + } + + return -1; +} + + +struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c) +{ + struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction)); + + inst->Prev = 0; + inst->Next = 0; + + _mesa_init_instructions(&inst->I, 1); + + return inst; +} + + +struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after) +{ + struct rc_instruction * inst = rc_alloc_instruction(c); + + inst->Prev = after; + inst->Next = after->Next; + + inst->Prev->Next = inst; + inst->Next->Prev = inst; + + return inst; +} + +void rc_remove_instruction(struct rc_instruction * inst) +{ + inst->Prev->Next = inst->Next; + inst->Next->Prev = inst->Prev; +} + + +void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program) +{ + struct prog_instruction *source; + unsigned int i; + + for(source = program->Instructions; source->Opcode != OPCODE_END; ++source) { + struct rc_instruction * dest = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + dest->I = *source; + } + + c->Program.ShadowSamplers = program->ShadowSamplers; + c->Program.InputsRead = program->InputsRead; + c->Program.OutputsWritten = program->OutputsWritten; + + for(i = 0; i < program->Parameters->NumParameters; ++i) { + struct rc_constant constant; + + constant.Type = RC_CONSTANT_EXTERNAL; + constant.Size = 4; + constant.u.External = i; + + rc_constants_add(&c->Program.Constants, &constant); + } +} + + +/** + * Print program to stderr, default options. + */ +void rc_print_program(const struct rc_program *prog) +{ + GLuint indent = 0; + GLuint linenum = 1; + struct rc_instruction *inst; + + fprintf(stderr, "# Radeon Compiler Program\n"); + + for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) { + fprintf(stderr, "%3d: ", linenum); + + /* Massive hack: We rely on the fact that the printers do not actually + * use the gl_program argument (last argument) in debug mode */ + indent = _mesa_fprint_instruction_opt( + stderr, &inst->I, + indent, PROG_PRINT_DEBUG, 0); + + linenum++; + } +} diff --git a/src/mesa/drivers/dri/r300/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index 88474d43a2..561958608c 100644 --- a/src/mesa/drivers/dri/r300/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -34,12 +34,9 @@ #include "shader/program.h" #include "shader/prog_instruction.h" - -enum { - CLAUSE_MIXED = 0, - CLAUSE_ALU, - CLAUSE_TEX -}; +struct radeon_compiler; +struct rc_instruction; +struct rc_program; enum { PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */ @@ -83,19 +80,12 @@ static inline GLuint combine_swizzles(GLuint src, GLuint swz) return ret; } +static INLINE void reset_srcreg(struct prog_src_register* reg) +{ + _mesa_bzero(reg, sizeof(*reg)); + reg->Swizzle = SWIZZLE_NOOP; +} -/** - * Transformation context that is passed to local transformations. - * - * Care must be taken with some operations during transformation, - * e.g. finding new temporary registers must use @ref radeonFindFreeTemporary - */ -struct radeon_transform_context { - GLcontext *Ctx; - struct gl_program *Program; - struct prog_instruction *OldInstructions; - GLuint OldNumInstructions; -}; /** * A transformation that can be passed to \ref radeonLocalTransform. @@ -109,23 +99,23 @@ struct radeon_transform_context { */ struct radeon_program_transformation { GLboolean (*function)( - struct radeon_transform_context*, - struct prog_instruction*, + struct radeon_compiler*, + struct rc_instruction*, void*); void *userData; }; void radeonLocalTransform( - GLcontext* ctx, - struct gl_program *program, + struct radeon_compiler *c, int num_transformations, struct radeon_program_transformation* transformations); -/** - * Find a usable free temporary register during program transformation - */ -GLint radeonFindFreeTemporary(struct radeon_transform_context *ctx); +GLint rc_find_free_temporary(struct radeon_compiler * c); + +struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c); +struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after); +void rc_remove_instruction(struct rc_instruction * inst); -struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count); +void rc_print_program(const struct rc_program *prog); #endif diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 8283723bad..609e510ff2 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -35,49 +35,52 @@ #include "radeon_program_alu.h" -#include "shader/prog_parameter.h" +#include "radeon_compiler.h" -static struct prog_instruction *emit1(struct gl_program* p, +static struct rc_instruction *emit1( + struct radeon_compiler * c, struct rc_instruction * after, gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, struct prog_src_register SrcReg) { - struct prog_instruction *fpi = radeonAppendInstructions(p, 1); + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); - fpi->Opcode = Opcode; - fpi->SaturateMode = Saturate; - fpi->DstReg = DstReg; - fpi->SrcReg[0] = SrcReg; + fpi->I.Opcode = Opcode; + fpi->I.SaturateMode = Saturate; + fpi->I.DstReg = DstReg; + fpi->I.SrcReg[0] = SrcReg; return fpi; } -static struct prog_instruction *emit2(struct gl_program* p, +static struct rc_instruction *emit2( + struct radeon_compiler * c, struct rc_instruction * after, gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, struct prog_src_register SrcReg0, struct prog_src_register SrcReg1) { - struct prog_instruction *fpi = radeonAppendInstructions(p, 1); + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); - fpi->Opcode = Opcode; - fpi->SaturateMode = Saturate; - fpi->DstReg = DstReg; - fpi->SrcReg[0] = SrcReg0; - fpi->SrcReg[1] = SrcReg1; + fpi->I.Opcode = Opcode; + fpi->I.SaturateMode = Saturate; + fpi->I.DstReg = DstReg; + fpi->I.SrcReg[0] = SrcReg0; + fpi->I.SrcReg[1] = SrcReg1; return fpi; } -static struct prog_instruction *emit3(struct gl_program* p, +static struct rc_instruction *emit3( + struct radeon_compiler * c, struct rc_instruction * after, gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, struct prog_src_register SrcReg0, struct prog_src_register SrcReg1, struct prog_src_register SrcReg2) { - struct prog_instruction *fpi = radeonAppendInstructions(p, 1); - - fpi->Opcode = Opcode; - fpi->SaturateMode = Saturate; - fpi->DstReg = DstReg; - fpi->SrcReg[0] = SrcReg0; - fpi->SrcReg[1] = SrcReg1; - fpi->SrcReg[2] = SrcReg2; + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + + fpi->I.Opcode = Opcode; + fpi->I.SaturateMode = Saturate; + fpi->I.DstReg = DstReg; + fpi->I.SrcReg[0] = SrcReg0; + fpi->I.SrcReg[1] = SrcReg1; + fpi->I.SrcReg[2] = SrcReg2; return fpi; } @@ -171,44 +174,63 @@ static struct prog_src_register scalar(struct prog_src_register reg) return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); } -static void transform_ABS(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_ABS(struct radeon_compiler* c, + struct rc_instruction* inst) { - struct prog_src_register src = inst->SrcReg[0]; + struct prog_src_register src = inst->I.SrcReg[0]; src.Abs = 1; src.Negate = NEGATE_NONE; - emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src); + emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode, inst->I.DstReg, src); + rc_remove_instruction(inst); } -static void transform_DPH(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_DP3(struct radeon_compiler* c, + struct rc_instruction* inst) { - struct prog_src_register src0 = inst->SrcReg[0]; + struct prog_src_register src0 = inst->I.SrcReg[0]; + struct prog_src_register src1 = inst->I.SrcReg[1]; + src0.Negate &= ~NEGATE_W; + src0.Swizzle &= ~(7 << (3 * 3)); + src0.Swizzle |= SWIZZLE_ZERO << (3 * 3); + src1.Negate &= ~NEGATE_W; + src1.Swizzle &= ~(7 << (3 * 3)); + src1.Swizzle |= SWIZZLE_ZERO << (3 * 3); + emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, src1); + rc_remove_instruction(inst); +} + +static void transform_DPH(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct prog_src_register src0 = inst->I.SrcReg[0]; src0.Negate &= ~NEGATE_W; src0.Swizzle &= ~(7 << (3 * 3)); src0.Swizzle |= SWIZZLE_ONE << (3 * 3); - emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, inst->SrcReg[1]); + emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, inst->I.SrcReg[1]); + rc_remove_instruction(inst); } /** * [1, src0.y*src1.y, src0.z, src1.w] * So basically MUL with lotsa swizzling. */ -static void transform_DST(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_DST(struct radeon_compiler* c, + struct rc_instruction* inst) { - emit2(t->Program, OPCODE_MUL, inst->SaturateMode, inst->DstReg, - swizzle(inst->SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE), - swizzle(inst->SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W)); + emit2(c, inst->Prev, OPCODE_MUL, inst->I.SaturateMode, inst->I.DstReg, + swizzle(inst->I.SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE), + swizzle(inst->I.SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W)); + rc_remove_instruction(inst); } -static void transform_FLR(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_FLR(struct radeon_compiler* c, + struct rc_instruction* inst) { - int tempreg = radeonFindFreeTemporary(t); - emit1(t->Program, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]); - emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, - inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg))); + int tempreg = rc_find_free_temporary(c); + emit1(c, inst->Prev, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0]); + emit2(c, inst->Prev, OPCODE_ADD, inst->I.SaturateMode, inst->I.DstReg, + inst->I.SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg))); + rc_remove_instruction(inst); } /** @@ -229,152 +251,159 @@ static void transform_FLR(struct radeon_transform_context* t, * 5 slots, if the subsequent optimization passes are clever enough * to pair instructions correctly. */ -static void transform_LIT(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_LIT(struct radeon_compiler* c, + struct rc_instruction* inst) { - static const GLfloat LitConst[4] = { -127.999999 }; - GLuint constant; GLuint constant_swizzle; GLuint temp; - int needTemporary = 0; struct prog_src_register srctemp; - constant = _mesa_add_unnamed_constant(t->Program->Parameters, LitConst, 1, &constant_swizzle); + constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle); - if (inst->DstReg.WriteMask != WRITEMASK_XYZW) { - needTemporary = 1; - } else if (inst->DstReg.File != PROGRAM_TEMPORARY) { - // LIT is typically followed by DP3/DP4, so there's no point - // in creating special code for this case - needTemporary = 1; - } + if (inst->I.DstReg.WriteMask != WRITEMASK_XYZW || inst->I.DstReg.File != PROGRAM_TEMPORARY) { + struct rc_instruction * inst_mov; - if (needTemporary) { - temp = radeonFindFreeTemporary(t); - } else { - temp = inst->DstReg.Index; + inst_mov = emit1(c, inst, + OPCODE_MOV, 0, inst->I.DstReg, + srcreg(PROGRAM_TEMPORARY, rc_find_free_temporary(c))); + + inst->I.DstReg.File = PROGRAM_TEMPORARY; + inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index; + inst->I.DstReg.WriteMask = WRITEMASK_XYZW; } + + temp = inst->I.DstReg.Index; srctemp = srcreg(PROGRAM_TEMPORARY, temp); // tmp.x = max(0.0, Src.x); // tmp.y = max(0.0, Src.y); // tmp.w = clamp(Src.z, -128+eps, 128-eps); - emit2(t->Program, OPCODE_MAX, 0, + emit2(c, inst->Prev, OPCODE_MAX, 0, dstregtmpmask(temp, WRITEMASK_XYW), - inst->SrcReg[0], + inst->I.SrcReg[0], swizzle(srcreg(PROGRAM_CONSTANT, constant), SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3)); - emit2(t->Program, OPCODE_MIN, 0, + emit2(c, inst->Prev, OPCODE_MIN, 0, dstregtmpmask(temp, WRITEMASK_Z), swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle))); // tmp.w = Pow(tmp.y, tmp.w) - emit1(t->Program, OPCODE_LG2, 0, + emit1(c, inst->Prev, OPCODE_LG2, 0, dstregtmpmask(temp, WRITEMASK_W), swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); - emit2(t->Program, OPCODE_MUL, 0, + emit2(c, inst->Prev, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W), swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)); - emit1(t->Program, OPCODE_EX2, 0, + emit1(c, inst->Prev, OPCODE_EX2, 0, dstregtmpmask(temp, WRITEMASK_W), swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); // tmp.z = (tmp.x > 0) ? tmp.w : 0.0 - emit3(t->Program, OPCODE_CMP, inst->SaturateMode, + emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, dstregtmpmask(temp, WRITEMASK_Z), negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), builtin_zero); // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 - emit1(t->Program, OPCODE_MOV, inst->SaturateMode, + emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode, dstregtmpmask(temp, WRITEMASK_XYW), swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE)); - if (needTemporary) - emit1(t->Program, OPCODE_MOV, 0, inst->DstReg, srctemp); + rc_remove_instruction(inst); } -static void transform_LRP(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_LRP(struct radeon_compiler* c, + struct rc_instruction* inst) { - int tempreg = radeonFindFreeTemporary(t); + int tempreg = rc_find_free_temporary(c); - emit2(t->Program, OPCODE_ADD, 0, + emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), - inst->SrcReg[1], negate(inst->SrcReg[2])); - emit3(t->Program, OPCODE_MAD, inst->SaturateMode, - inst->DstReg, - inst->SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[2]); + inst->I.SrcReg[1], negate(inst->I.SrcReg[2])); + emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode, + inst->I.DstReg, + inst->I.SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[2]); + + rc_remove_instruction(inst); } -static void transform_POW(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_POW(struct radeon_compiler* c, + struct rc_instruction* inst) { - int tempreg = radeonFindFreeTemporary(t); + int tempreg = rc_find_free_temporary(c); struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg); struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg); tempdst.WriteMask = WRITEMASK_W; tempsrc.Swizzle = SWIZZLE_WWWW; - emit1(t->Program, OPCODE_LG2, 0, tempdst, scalar(inst->SrcReg[0])); - emit2(t->Program, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->SrcReg[1])); - emit1(t->Program, OPCODE_EX2, inst->SaturateMode, inst->DstReg, tempsrc); + emit1(c, inst->Prev, OPCODE_LG2, 0, tempdst, scalar(inst->I.SrcReg[0])); + emit2(c, inst->Prev, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->I.SrcReg[1])); + emit1(c, inst->Prev, OPCODE_EX2, inst->I.SaturateMode, inst->I.DstReg, tempsrc); + + rc_remove_instruction(inst); } -static void transform_RSQ(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_RSQ(struct radeon_compiler* c, + struct rc_instruction* inst) { - emit1(t->Program, OPCODE_RSQ, inst->SaturateMode, inst->DstReg, absolute(inst->SrcReg[0])); + inst->I.SrcReg[0] = absolute(inst->I.SrcReg[0]); } -static void transform_SGE(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_SGE(struct radeon_compiler* c, + struct rc_instruction* inst) { - int tempreg = radeonFindFreeTemporary(t); + int tempreg = rc_find_free_temporary(c); - emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); - emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg, + emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1])); + emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one); + + rc_remove_instruction(inst); } -static void transform_SLT(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_SLT(struct radeon_compiler* c, + struct rc_instruction* inst) { - int tempreg = radeonFindFreeTemporary(t); + int tempreg = rc_find_free_temporary(c); - emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); - emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg, + emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1])); + emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero); + + rc_remove_instruction(inst); } -static void transform_SUB(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_SUB(struct radeon_compiler* c, + struct rc_instruction* inst) { - emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1])); + inst->I.Opcode = OPCODE_ADD; + inst->I.SrcReg[1] = negate(inst->I.SrcReg[1]); } -static void transform_SWZ(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_SWZ(struct radeon_compiler* c, + struct rc_instruction* inst) { - emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, inst->SrcReg[0]); + inst->I.Opcode = OPCODE_MOV; } -static void transform_XPD(struct radeon_transform_context* t, - struct prog_instruction* inst) +static void transform_XPD(struct radeon_compiler* c, + struct rc_instruction* inst) { - int tempreg = radeonFindFreeTemporary(t); - - emit2(t->Program, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg), - swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), - swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W)); - emit3(t->Program, OPCODE_MAD, inst->SaturateMode, inst->DstReg, - swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W), - swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), + int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg), + swizzle(inst->I.SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), + swizzle(inst->I.SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W)); + emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode, inst->I.DstReg, + swizzle(inst->I.SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W), + swizzle(inst->I.SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), negate(srcreg(PROGRAM_TEMPORARY, tempreg))); + + rc_remove_instruction(inst); } @@ -392,31 +421,64 @@ static void transform_XPD(struct radeon_transform_context* t, * * @note should be applicable to R300 and R500 fragment programs. */ -GLboolean radeonTransformALU(struct radeon_transform_context* t, - struct prog_instruction* inst, +GLboolean radeonTransformALU( + struct radeon_compiler * c, + struct rc_instruction* inst, void* unused) { - switch(inst->Opcode) { - case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE; - case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE; - case OPCODE_DST: transform_DST(t, inst); return GL_TRUE; - case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE; - case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE; - case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE; - case OPCODE_POW: transform_POW(t, inst); return GL_TRUE; - case OPCODE_RSQ: transform_RSQ(t, inst); return GL_TRUE; - case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE; - case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE; - case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE; - case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE; - case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE; + switch(inst->I.Opcode) { + case OPCODE_ABS: transform_ABS(c, inst); return GL_TRUE; + case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE; + case OPCODE_DST: transform_DST(c, inst); return GL_TRUE; + case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE; + case OPCODE_LIT: transform_LIT(c, inst); return GL_TRUE; + case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE; + case OPCODE_POW: transform_POW(c, inst); return GL_TRUE; + case OPCODE_RSQ: transform_RSQ(c, inst); return GL_TRUE; + case OPCODE_SGE: transform_SGE(c, inst); return GL_TRUE; + case OPCODE_SLT: transform_SLT(c, inst); return GL_TRUE; + case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE; + case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE; + case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE; default: return GL_FALSE; } } -static void sincos_constants(struct radeon_transform_context* t, GLuint *constants) +static void transform_r300_vertex_ABS(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* Note: r500 can take absolute values, but r300 cannot. */ + inst->I.Opcode = OPCODE_MAX; + inst->I.SrcReg[1] = inst->I.SrcReg[0]; + inst->I.SrcReg[1].Negate ^= NEGATE_XYZW; +} + +/** + * For use with radeonLocalTransform, this transforms non-native ALU + * instructions of the r300 up to r500 vertex engine. + */ +GLboolean r300_transform_vertex_alu( + struct radeon_compiler * c, + struct rc_instruction* inst, + void* unused) +{ + switch(inst->I.Opcode) { + case OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return GL_TRUE; + case OPCODE_DP3: transform_DP3(c, inst); return GL_TRUE; + case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE; + case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE; + case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE; + case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE; + case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE; + case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE; + default: + return GL_FALSE; + } +} + +static void sincos_constants(struct radeon_compiler* c, GLuint *constants) { static const GLfloat SinCosConsts[2][4] = { { @@ -434,11 +496,8 @@ static void sincos_constants(struct radeon_transform_context* t, GLuint *constan }; int i; - for(i = 0; i < 2; ++i) { - GLuint swz; - constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz); - ASSERT(swz == SWIZZLE_NOOP); - } + for(i = 0; i < 2; ++i) + constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]); } /** @@ -449,23 +508,24 @@ static void sincos_constants(struct radeon_transform_context* t, GLuint *constan * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x * MAD dest, tmp.y, weight, tmp.x */ -static void sin_approx(struct radeon_transform_context* t, +static void sin_approx( + struct radeon_compiler* c, struct rc_instruction * after, struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants) { - GLuint tempreg = radeonFindFreeTemporary(t); + GLuint tempreg = rc_find_free_temporary(c); - emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + emit2(c, after->Prev, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY), swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), srcreg(PROGRAM_CONSTANT, constants[0])); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X), + emit3(c, after->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y), + emit3(c, after->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X))); - emit3(t->Program, OPCODE_MAD, 0, dst, + emit3(c, after->Prev, OPCODE_MAD, 0, dst, swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); @@ -476,78 +536,80 @@ static void sin_approx(struct radeon_transform_context* t, * using only the basic instructions * MOV, ADD, MUL, MAD, FRC */ -GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t, - struct prog_instruction* inst, +GLboolean radeonTransformTrigSimple(struct radeon_compiler* c, + struct rc_instruction* inst, void* unused) { - if (inst->Opcode != OPCODE_COS && - inst->Opcode != OPCODE_SIN && - inst->Opcode != OPCODE_SCS) + if (inst->I.Opcode != OPCODE_COS && + inst->I.Opcode != OPCODE_SIN && + inst->I.Opcode != OPCODE_SCS) return GL_FALSE; GLuint constants[2]; - GLuint tempreg = radeonFindFreeTemporary(t); + GLuint tempreg = rc_find_free_temporary(c); - sincos_constants(t, constants); + sincos_constants(c, constants); - if (inst->Opcode == OPCODE_COS) { + if (inst->I.Opcode == OPCODE_COS) { // MAD tmp.x, src, 1/(2*PI), 0.75 // FRC tmp.x, tmp.x // MAD tmp.z, tmp.x, 2*PI, -PI - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); - emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), + emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - sin_approx(t, inst->DstReg, + sin_approx(c, inst->Prev, inst->I.DstReg, swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), constants); - } else if (inst->Opcode == OPCODE_SIN) { - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + } else if (inst->I.Opcode == OPCODE_SIN) { + emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); - emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), + emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - sin_approx(t, inst->DstReg, + sin_approx(c, inst->Prev, inst->I.DstReg, swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), constants); } else { - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W)); - emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY), srcreg(PROGRAM_TEMPORARY, tempreg)); - emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), srcreg(PROGRAM_TEMPORARY, tempreg), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - struct prog_dst_register dst = inst->DstReg; + struct prog_dst_register dst = inst->I.DstReg; - dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X; - sin_approx(t, dst, + dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_X; + sin_approx(c, inst->Prev, dst, swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), constants); - dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y; - sin_approx(t, dst, + dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_Y; + sin_approx(c, inst->Prev, dst, swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), constants); } + rc_remove_instruction(inst); + return GL_TRUE; } @@ -560,50 +622,52 @@ GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t, * * @warning This transformation implicitly changes the semantics of SIN and COS! */ -GLboolean radeonTransformTrigScale(struct radeon_transform_context* t, - struct prog_instruction* inst, +GLboolean radeonTransformTrigScale(struct radeon_compiler* c, + struct rc_instruction* inst, void* unused) { - if (inst->Opcode != OPCODE_COS && - inst->Opcode != OPCODE_SIN && - inst->Opcode != OPCODE_SCS) + if (inst->I.Opcode != OPCODE_COS && + inst->I.Opcode != OPCODE_SIN && + inst->I.Opcode != OPCODE_SCS) return GL_FALSE; - static const GLfloat RCP_2PI[] = { 0.15915494309189535 }; + static const GLfloat RCP_2PI = 0.15915494309189535; GLuint temp; GLuint constant; GLuint constant_swizzle; - temp = radeonFindFreeTemporary(t); - constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle); + temp = rc_find_free_temporary(c); + constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle); - emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W), - swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + emit2(c, inst->Prev, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W), + swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)); - emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W), + emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W), srcreg(PROGRAM_TEMPORARY, temp)); - if (inst->Opcode == OPCODE_COS) { - emit1(t->Program, OPCODE_COS, inst->SaturateMode, inst->DstReg, + if (inst->I.Opcode == OPCODE_COS) { + emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, inst->I.DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); - } else if (inst->Opcode == OPCODE_SIN) { - emit1(t->Program, OPCODE_SIN, inst->SaturateMode, - inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); - } else if (inst->Opcode == OPCODE_SCS) { - struct prog_dst_register moddst = inst->DstReg; + } else if (inst->I.Opcode == OPCODE_SIN) { + emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode, + inst->I.DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } else if (inst->I.Opcode == OPCODE_SCS) { + struct prog_dst_register moddst = inst->I.DstReg; - if (inst->DstReg.WriteMask & WRITEMASK_X) { + if (inst->I.DstReg.WriteMask & WRITEMASK_X) { moddst.WriteMask = WRITEMASK_X; - emit1(t->Program, OPCODE_COS, inst->SaturateMode, moddst, + emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, moddst, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); } - if (inst->DstReg.WriteMask & WRITEMASK_Y) { + if (inst->I.DstReg.WriteMask & WRITEMASK_Y) { moddst.WriteMask = WRITEMASK_Y; - emit1(t->Program, OPCODE_SIN, inst->SaturateMode, moddst, + emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode, moddst, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); } } + rc_remove_instruction(inst); + return GL_TRUE; } @@ -615,21 +679,15 @@ GLboolean radeonTransformTrigScale(struct radeon_transform_context* t, * @warning This explicitly changes the form of DDX and DDY! */ -GLboolean radeonTransformDeriv(struct radeon_transform_context* t, - struct prog_instruction* inst, +GLboolean radeonTransformDeriv(struct radeon_compiler* c, + struct rc_instruction* inst, void* unused) { - if (inst->Opcode != OPCODE_DDX && inst->Opcode != OPCODE_DDY) + if (inst->I.Opcode != OPCODE_DDX && inst->I.Opcode != OPCODE_DDY) return GL_FALSE; - struct prog_src_register B = inst->SrcReg[1]; - - B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, - SWIZZLE_ONE, SWIZZLE_ONE); - B.Negate = NEGATE_XYZW; - - emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg, - inst->SrcReg[0], B); + inst->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE); + inst->I.SrcReg[1].Negate = NEGATE_XYZW; return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h index b45958115c..147efec6fc 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_alu.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h @@ -31,23 +31,28 @@ #include "radeon_program.h" GLboolean radeonTransformALU( - struct radeon_transform_context *t, - struct prog_instruction*, + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +GLboolean r300_transform_vertex_alu( + struct radeon_compiler * c, + struct rc_instruction * inst, void*); GLboolean radeonTransformTrigSimple( - struct radeon_transform_context *t, - struct prog_instruction*, + struct radeon_compiler * c, + struct rc_instruction * inst, void*); GLboolean radeonTransformTrigScale( - struct radeon_transform_context *t, - struct prog_instruction*, + struct radeon_compiler * c, + struct rc_instruction * inst, void*); GLboolean radeonTransformDeriv( - struct radeon_transform_context *t, - struct prog_instruction*, + struct radeon_compiler * c, + struct rc_instruction * inst, void*); #endif /* __RADEON_PROGRAM_ALU_H_ */ diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c index d6fb474cf2..48616ac461 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c @@ -35,17 +35,19 @@ #include "radeon_program_pair.h" -#include "radeon_common.h" - +#include "memory_pool.h" +#include "radeon_compiler.h" #include "shader/prog_print.h" #define error(fmt, args...) do { \ - _mesa_problem(s->Ctx, "%s::%s(): " fmt "\n", \ + rc_error(&s->Compiler->Base, "%s::%s(): " fmt "\n", \ __FILE__, __FUNCTION__, ##args); \ - s->Error = GL_TRUE; \ } while(0) struct pair_state_instruction { + struct prog_instruction Instruction; + GLuint IP; /**< Position of this instruction in original program */ + GLuint IsTex:1; /**< Is a texture instruction */ GLuint NeedRGB:1; /**< Needs the RGB ALU */ GLuint NeedAlpha:1; /**< Needs the Alpha ALU */ @@ -73,7 +75,7 @@ struct pair_state_instruction { * Used to keep track of which instructions read a value. */ struct reg_value_reader { - GLuint IP; /**< IP of the instruction that performs this access */ + struct pair_state_instruction *Reader; struct reg_value_reader *Next; }; @@ -82,7 +84,7 @@ struct reg_value_reader { * PROGRAM_TEMPORARY. */ struct reg_value { - GLuint IP; /**< IP of the instruction that writes this value */ + struct pair_state_instruction *Writer; struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */ /** @@ -116,11 +118,8 @@ struct pair_register_translation { }; struct pair_state { - GLcontext *Ctx; - struct gl_program *Program; + struct r300_fragment_program_compiler * Compiler; const struct radeon_pair_handler *Handler; - GLboolean Error; - GLboolean Debug; GLboolean Verbose; void *UserData; @@ -130,11 +129,6 @@ struct pair_state { struct pair_register_translation Inputs[FRAG_ATTRIB_MAX]; struct pair_register_translation Temps[MAX_PROGRAM_TEMPS]; - /** - * Derived information about program instructions. - */ - struct pair_state_instruction *Instructions; - struct { GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */ } HwTemps[128]; @@ -147,14 +141,6 @@ struct pair_state { struct pair_state_instruction *ReadyRGB; struct pair_state_instruction *ReadyAlpha; struct pair_state_instruction *ReadyTEX; - - /** - * Pool of @ref reg_value structures for fast allocation. - */ - struct reg_value *ValuePool; - GLuint ValuePoolUsed; - struct reg_value_reader *ReaderPool; - GLuint ReaderPoolUsed; }; @@ -183,7 +169,7 @@ static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index) struct pair_register_translation *t = get_register(s, file, index); if (!t) { - _mesa_problem(s->Ctx, "get_hw_reg: %i[%i]\n", file, index); + error("get_hw_reg: %i[%i]\n", file, index); return 0; } @@ -221,15 +207,13 @@ static void add_pairinst_to_list(struct pair_state_instruction **list, struct pa } /** - * The instruction at the given IP has become ready. Link it into the ready + * The given instruction has become ready. Link it into the ready * instructions. */ -static void instruction_ready(struct pair_state *s, int ip) +static void instruction_ready(struct pair_state *s, struct pair_state_instruction *pairinst) { - struct pair_state_instruction *pairinst = s->Instructions + ip; - if (s->Verbose) - _mesa_printf("instruction_ready(%i)\n", ip); + _mesa_printf("instruction_ready(%i)\n", pairinst->IP); if (pairinst->IsTex) add_pairinst_to_list(&s->ReadyTEX, pairinst); @@ -296,12 +280,12 @@ static void final_rewrite(struct pair_state *s, struct prog_instruction *inst) * Classify an instruction according to which ALUs etc. it needs */ static void classify_instruction(struct pair_state *s, - struct prog_instruction *inst, struct pair_state_instruction *pairinst) + struct pair_state_instruction *psi) { - pairinst->NeedRGB = (inst->DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0; - pairinst->NeedAlpha = (inst->DstReg.WriteMask & WRITEMASK_W) ? 1 : 0; + psi->NeedRGB = (psi->Instruction.DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0; + psi->NeedAlpha = (psi->Instruction.DstReg.WriteMask & WRITEMASK_W) ? 1 : 0; - switch(inst->Opcode) { + switch(psi->Instruction.Opcode) { case OPCODE_ADD: case OPCODE_CMP: case OPCODE_DDX: @@ -319,24 +303,24 @@ static void classify_instruction(struct pair_state *s, case OPCODE_RCP: case OPCODE_RSQ: case OPCODE_SIN: - pairinst->IsTranscendent = 1; - pairinst->NeedAlpha = 1; + psi->IsTranscendent = 1; + psi->NeedAlpha = 1; break; case OPCODE_DP4: - pairinst->NeedAlpha = 1; + psi->NeedAlpha = 1; /* fall through */ case OPCODE_DP3: - pairinst->NeedRGB = 1; + psi->NeedRGB = 1; break; case OPCODE_KIL: case OPCODE_TEX: case OPCODE_TXB: case OPCODE_TXP: case OPCODE_END: - pairinst->IsTex = 1; + psi->IsTex = 1; break; default: - error("Unknown opcode %d\n", inst->Opcode); + error("Unknown opcode %d\n", psi->Instruction.Opcode); break; } } @@ -348,30 +332,34 @@ static void classify_instruction(struct pair_state *s, */ static void scan_instructions(struct pair_state *s) { - struct prog_instruction *inst; - struct pair_state_instruction *pairinst; + struct rc_instruction *source; GLuint ip; - for(inst = s->Program->Instructions, pairinst = s->Instructions, ip = 0; - inst->Opcode != OPCODE_END; - ++inst, ++pairinst, ++ip) { - final_rewrite(s, inst); - classify_instruction(s, inst, pairinst); + for(source = s->Compiler->Base.Program.Instructions.Next, ip = 0; + source != &s->Compiler->Base.Program.Instructions; + source = source->Next, ++ip) { + struct pair_state_instruction *pairinst = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*pairinst)); + memset(pairinst, 0, sizeof(struct pair_state_instruction)); + + pairinst->Instruction = source->I; + pairinst->IP = ip; + final_rewrite(s, &pairinst->Instruction); + classify_instruction(s, pairinst); - int nsrc = _mesa_num_inst_src_regs(inst->Opcode); + int nsrc = _mesa_num_inst_src_regs(pairinst->Instruction.Opcode); int j; for(j = 0; j < nsrc; j++) { struct pair_register_translation *t = - get_register(s, inst->SrcReg[j].File, inst->SrcReg[j].Index); + get_register(s, pairinst->Instruction.SrcReg[j].File, pairinst->Instruction.SrcReg[j].Index); if (!t) continue; t->RefCount++; - if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { + if (pairinst->Instruction.SrcReg[j].File == PROGRAM_TEMPORARY) { int i; for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, i); + GLuint swz = GET_SWZ(pairinst->Instruction.SrcReg[j].Swizzle, i); if (swz >= 4) continue; /* constant or NIL swizzle */ if (!t->Value[swz]) @@ -381,36 +369,37 @@ static void scan_instructions(struct pair_state *s) * also rewrites the value. The code below adds * a dependency for the DstReg, which is a superset * of the SrcReg dependency. */ - if (inst->DstReg.File == PROGRAM_TEMPORARY && - inst->DstReg.Index == inst->SrcReg[j].Index && - GET_BIT(inst->DstReg.WriteMask, swz)) + if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY && + pairinst->Instruction.DstReg.Index == pairinst->Instruction.SrcReg[j].Index && + GET_BIT(pairinst->Instruction.DstReg.WriteMask, swz)) continue; - struct reg_value_reader* r = &s->ReaderPool[s->ReaderPoolUsed++]; + struct reg_value_reader* r = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*r)); pairinst->NumDependencies++; t->Value[swz]->NumReaders++; - r->IP = ip; + r->Reader = pairinst; r->Next = t->Value[swz]->Readers; t->Value[swz]->Readers = r; } } } - int ndst = _mesa_num_inst_dst_regs(inst->Opcode); + int ndst = _mesa_num_inst_dst_regs(pairinst->Instruction.Opcode); if (ndst) { struct pair_register_translation *t = - get_register(s, inst->DstReg.File, inst->DstReg.Index); + get_register(s, pairinst->Instruction.DstReg.File, pairinst->Instruction.DstReg.Index); if (t) { t->RefCount++; - if (inst->DstReg.File == PROGRAM_TEMPORARY) { + if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY) { int j; for(j = 0; j < 4; ++j) { - if (!GET_BIT(inst->DstReg.WriteMask, j)) + if (!GET_BIT(pairinst->Instruction.DstReg.WriteMask, j)) continue; - struct reg_value* v = &s->ValuePool[s->ValuePoolUsed++]; - v->IP = ip; + struct reg_value* v = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*v)); + memset(v, 0, sizeof(struct reg_value)); + v->Writer = pairinst; if (t->Value[j]) { pairinst->NumDependencies++; t->Value[j]->Next = v; @@ -426,7 +415,7 @@ static void scan_instructions(struct pair_state *s) _mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies); if (!pairinst->NumDependencies) - instruction_ready(s, ip); + instruction_ready(s, pairinst); } /* Clear the PROGRAM_TEMPORARY state */ @@ -438,70 +427,23 @@ static void scan_instructions(struct pair_state *s) } -/** - * Reserve hardware temporary registers for the program inputs. - * - * @note This allocation is performed explicitly, because the order of inputs - * is determined by the RS hardware. - */ -static void allocate_input_registers(struct pair_state *s) -{ - GLuint InputsRead = s->Program->InputsRead; - int i; - GLuint hwindex = 0; - - /* Primary colour */ - if (InputsRead & FRAG_BIT_COL0) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL0, hwindex++); - InputsRead &= ~FRAG_BIT_COL0; - - /* Secondary color */ - if (InputsRead & FRAG_BIT_COL1) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL1, hwindex++); - InputsRead &= ~FRAG_BIT_COL1; - - /* Texcoords */ - for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) { - if (InputsRead & (FRAG_BIT_TEX0 << i)) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++); - } - InputsRead &= ~FRAG_BITS_TEX_ANY; - - /* Fogcoords treated as a texcoord */ - if (InputsRead & FRAG_BIT_FOGC) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_FOGC, hwindex++); - InputsRead &= ~FRAG_BIT_FOGC; - - /* fragment position treated as a texcoord */ - if (InputsRead & FRAG_BIT_WPOS) - alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++); - InputsRead &= ~FRAG_BIT_WPOS; - - /* Anything else */ - if (InputsRead) - error("Don't know how to handle inputs 0x%x\n", InputsRead); -} - - -static void decrement_dependencies(struct pair_state *s, int ip) +static void decrement_dependencies(struct pair_state *s, struct pair_state_instruction *pairinst) { - struct pair_state_instruction *pairinst = s->Instructions + ip; ASSERT(pairinst->NumDependencies > 0); if (!--pairinst->NumDependencies) - instruction_ready(s, ip); + instruction_ready(s, pairinst); } /** * Update the dependency tracking state based on what the instruction * at the given IP does. */ -static void commit_instruction(struct pair_state *s, int ip) +static void commit_instruction(struct pair_state *s, struct pair_state_instruction *pairinst) { - struct prog_instruction *inst = s->Program->Instructions + ip; - struct pair_state_instruction *pairinst = s->Instructions + ip; + struct prog_instruction *inst = &pairinst->Instruction; if (s->Verbose) - _mesa_printf("commit_instruction(%i)\n", ip); + _mesa_printf("commit_instruction(%i)\n", pairinst->IP); if (inst->DstReg.File == PROGRAM_TEMPORARY) { struct pair_register_translation *t = &s->Temps[inst->DstReg.Index]; @@ -516,11 +458,11 @@ static void commit_instruction(struct pair_state *s, int ip) if (t->Value[i]->NumReaders) { struct reg_value_reader *r; for(r = pairinst->Values[i]->Readers; r; r = r->Next) - decrement_dependencies(s, r->IP); + decrement_dependencies(s, r->Reader); } else if (t->Value[i]->Next) { /* This happens when the only reader writes * the register at the same time */ - decrement_dependencies(s, t->Value[i]->Next->IP); + decrement_dependencies(s, t->Value[i]->Next->Writer); } } } @@ -554,7 +496,7 @@ static void commit_instruction(struct pair_state *s, int ip) if (!--t->Value[swz]->NumReaders) { if (t->Value[swz]->Next) - decrement_dependencies(s, t->Value[swz]->Next->IP); + decrement_dependencies(s, t->Value[swz]->Next->Writer); } } } @@ -585,36 +527,52 @@ static void emit_all_tex(struct pair_state *s) // Allocate destination hardware registers in one block to avoid conflicts. for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { - int ip = pairinst - s->Instructions; - struct prog_instruction *inst = s->Program->Instructions + ip; + struct prog_instruction *inst = &pairinst->Instruction; if (inst->Opcode != OPCODE_KIL) get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); } - if (s->Debug) + if (s->Compiler->Base.Debug) _mesa_printf(" BEGIN_TEX\n"); if (s->Handler->BeginTexBlock) - s->Error = s->Error || !s->Handler->BeginTexBlock(s->UserData); + s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->BeginTexBlock(s->UserData); for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { - int ip = pairinst - s->Instructions; - struct prog_instruction *inst = s->Program->Instructions + ip; - commit_instruction(s, ip); + struct prog_instruction *inst = &pairinst->Instruction; + commit_instruction(s, pairinst); if (inst->Opcode != OPCODE_KIL) inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index); - if (s->Debug) { + if (s->Compiler->Base.Debug) { _mesa_printf(" "); _mesa_print_instruction(inst); fflush(stdout); } - s->Error = s->Error || !s->Handler->EmitTex(s->UserData, inst); + + struct radeon_pair_texture_instruction rpti; + + switch(inst->Opcode) { + case OPCODE_TEX: rpti.Opcode = RADEON_OPCODE_TEX; break; + case OPCODE_TXB: rpti.Opcode = RADEON_OPCODE_TXB; break; + case OPCODE_TXP: rpti.Opcode = RADEON_OPCODE_TXP; break; + default: + case OPCODE_KIL: rpti.Opcode = RADEON_OPCODE_KIL; break; + } + + rpti.DestIndex = inst->DstReg.Index; + rpti.WriteMask = inst->DstReg.WriteMask; + rpti.TexSrcUnit = inst->TexSrcUnit; + rpti.TexSrcTarget = inst->TexSrcTarget; + rpti.SrcIndex = inst->SrcReg[0].Index; + rpti.SrcSwizzle = inst->SrcReg[0].Swizzle; + + s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitTex(s->UserData, &rpti); } - if (s->Debug) + if (s->Compiler->Base.Debug) _mesa_printf(" END_TEX\n"); } @@ -637,7 +595,7 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio index = get_hw_reg(s, src.File, src.Index); } else { constant = 1; - s->Error |= !s->Handler->EmitConst(s->UserData, src.File, src.Index, &index); + index = src.Index; } for(i = 0; i < 3; ++i) { @@ -684,10 +642,12 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio * Fill the given ALU instruction's opcodes and source operands into the given pair, * if possible. */ -static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip) +static GLboolean fill_instruction_into_pair( + struct pair_state *s, + struct radeon_pair_instruction *pair, + struct pair_state_instruction *pairinst) { - struct pair_state_instruction *pairinst = s->Instructions + ip; - struct prog_instruction *inst = s->Program->Instructions + ip; + struct prog_instruction *inst = &pairinst->Instruction; ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP); ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP); @@ -768,16 +728,18 @@ static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_ * we are absolutely certain that we're going to emit a certain * instruction pairing. */ -static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip) +static void fill_dest_into_pair( + struct pair_state *s, + struct radeon_pair_instruction *pair, + struct pair_state_instruction *pairinst) { - struct pair_state_instruction *pairinst = s->Instructions + ip; - struct prog_instruction *inst = s->Program->Instructions + ip; + struct prog_instruction *inst = &pairinst->Instruction; if (inst->DstReg.File == PROGRAM_OUTPUT) { - if (inst->DstReg.Index == FRAG_RESULT_COLOR) { + if (inst->DstReg.Index == s->Compiler->OutputColor) { pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } else if (inst->DstReg.Index == FRAG_RESULT_DEPTH) { + } else if (inst->DstReg.Index == s->Compiler->OutputDepth) { pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); } } else { @@ -804,24 +766,24 @@ static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruc static void emit_alu(struct pair_state *s) { struct radeon_pair_instruction pair; + struct pair_state_instruction *psi; if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) { - int ip; if (s->ReadyFullALU) { - ip = s->ReadyFullALU - s->Instructions; + psi = s->ReadyFullALU; s->ReadyFullALU = s->ReadyFullALU->NextReady; } else if (s->ReadyRGB) { - ip = s->ReadyRGB - s->Instructions; + psi = s->ReadyRGB; s->ReadyRGB = s->ReadyRGB->NextReady; } else { - ip = s->ReadyAlpha - s->Instructions; + psi = s->ReadyAlpha; s->ReadyAlpha = s->ReadyAlpha->NextReady; } _mesa_bzero(&pair, sizeof(pair)); - fill_instruction_into_pair(s, &pair, ip); - fill_dest_into_pair(s, &pair, ip); - commit_instruction(s, ip); + fill_instruction_into_pair(s, &pair, psi); + fill_dest_into_pair(s, &pair, psi); + commit_instruction(s, psi); } else { struct pair_state_instruction **prgb; struct pair_state_instruction **palpha; @@ -830,65 +792,65 @@ static void emit_alu(struct pair_state *s) * many source slots; try all possible pairings if necessary */ for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { - int rgbip = *prgb - s->Instructions; - int alphaip = *palpha - s->Instructions; + struct pair_state_instruction * psirgb = *prgb; + struct pair_state_instruction * psialpha = *palpha; _mesa_bzero(&pair, sizeof(pair)); - fill_instruction_into_pair(s, &pair, rgbip); - if (!fill_instruction_into_pair(s, &pair, alphaip)) + fill_instruction_into_pair(s, &pair, psirgb); + if (!fill_instruction_into_pair(s, &pair, psialpha)) continue; *prgb = (*prgb)->NextReady; *palpha = (*palpha)->NextReady; - fill_dest_into_pair(s, &pair, rgbip); - fill_dest_into_pair(s, &pair, alphaip); - commit_instruction(s, rgbip); - commit_instruction(s, alphaip); + fill_dest_into_pair(s, &pair, psirgb); + fill_dest_into_pair(s, &pair, psialpha); + commit_instruction(s, psirgb); + commit_instruction(s, psialpha); goto success; } } /* No success in pairing; just take the first RGB instruction */ - int ip = s->ReadyRGB - s->Instructions; + psi = s->ReadyRGB; s->ReadyRGB = s->ReadyRGB->NextReady; + _mesa_bzero(&pair, sizeof(pair)); - fill_instruction_into_pair(s, &pair, ip); - fill_dest_into_pair(s, &pair, ip); - commit_instruction(s, ip); + fill_instruction_into_pair(s, &pair, psi); + fill_dest_into_pair(s, &pair, psi); + commit_instruction(s, psi); success: ; } - if (s->Debug) + if (s->Compiler->Base.Debug) radeonPrintPairInstruction(&pair); - s->Error = s->Error || !s->Handler->EmitPaired(s->UserData, &pair); + s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitPaired(s->UserData, &pair); } +/* Callback function for assigning input registers to hardware registers */ +static void alloc_helper(void * data, unsigned input, unsigned hwreg) +{ + struct pair_state * s = data; + alloc_hw_reg(s, PROGRAM_INPUT, input, hwreg); +} -GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, +void radeonPairProgram( + struct r300_fragment_program_compiler * compiler, const struct radeon_pair_handler* handler, void *userdata) { struct pair_state s; _mesa_bzero(&s, sizeof(s)); - s.Ctx = ctx; - s.Program = _mesa_clone_program(ctx, program); + s.Compiler = compiler; s.Handler = handler; s.UserData = userdata; - s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE; - s.Verbose = GL_FALSE && s.Debug; + s.Verbose = GL_FALSE && s.Compiler->Base.Debug; - s.Instructions = (struct pair_state_instruction*)_mesa_calloc( - sizeof(struct pair_state_instruction)*s.Program->NumInstructions); - s.ValuePool = (struct reg_value*)_mesa_calloc(sizeof(struct reg_value)*s.Program->NumInstructions*4); - s.ReaderPool = (struct reg_value_reader*)_mesa_calloc( - sizeof(struct reg_value_reader)*s.Program->NumInstructions*12); - - if (s.Debug) + if (s.Compiler->Base.Debug) _mesa_printf("Emit paired program\n"); scan_instructions(&s); - allocate_input_registers(&s); + s.Compiler->AllocateHwInputs(s.Compiler, &alloc_helper, &s); - while(!s.Error && + while(!s.Compiler->Base.Error && (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { if (s.ReadyTEX) emit_all_tex(&s); @@ -897,16 +859,8 @@ GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, emit_alu(&s); } - if (s.Debug) + if (s.Compiler->Base.Debug) _mesa_printf(" END\n"); - - _mesa_free(s.Instructions); - _mesa_free(s.ValuePool); - _mesa_free(s.ReaderPool); - - _mesa_reference_program(ctx, &s.Program, NULL); - - return !s.Error; } diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h index 4624a24629..ff76178551 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_pair.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -30,6 +30,8 @@ #include "radeon_program.h" +struct r300_fragment_program_compiler; + /** * Represents a paired instruction, as found in R300 and R500 @@ -82,18 +84,32 @@ struct radeon_pair_instruction { }; +enum { + RADEON_OPCODE_TEX = 0, + RADEON_OPCODE_TXB, + RADEON_OPCODE_TXP, + RADEON_OPCODE_KIL +}; + +struct radeon_pair_texture_instruction { + GLuint Opcode:2; /**< one of RADEON_OPCODE_xxx */ + + GLuint DestIndex:8; + GLuint WriteMask:4; + + GLuint TexSrcUnit:5; + GLuint TexSrcTarget:3; + + GLuint SrcIndex:8; + GLuint SrcSwizzle:12; +}; + + /** * */ struct radeon_pair_handler { /** - * Fill in the proper hardware index for the given constant register. - * - * @return GL_FALSE on error. - */ - GLboolean (*EmitConst)(void*, GLuint file, GLuint index, GLuint *hwindex); - - /** * Write a paired instruction to the hardware. * * @return GL_FALSE on error. @@ -107,7 +123,7 @@ struct radeon_pair_handler { * * @return GL_FALSE on error. */ - GLboolean (*EmitTex)(void*, struct prog_instruction*); + GLboolean (*EmitTex)(void*, struct radeon_pair_texture_instruction*); /** * Called before a block of contiguous, independent texture @@ -115,10 +131,11 @@ struct radeon_pair_handler { */ GLboolean (*BeginTexBlock)(void*); - GLuint MaxHwTemps; + unsigned MaxHwTemps; }; -GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, +void radeonPairProgram( + struct r300_fragment_program_compiler * compiler, const struct radeon_pair_handler*, void *userdata); void radeonPrintPairInstruction(struct radeon_pair_instruction *inst); diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index af535037d0..bd46f9acf2 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -164,47 +164,46 @@ static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom) r300ContextPtr r300 = R300_CONTEXT(ctx); BATCH_LOCALS(&r300->radeon); int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd); - int notexture = 0; - - if (numtmus) { - int i; - - for(i = 0; i < numtmus; ++i) { - radeonTexObj *t = r300->hw.textures[i]; - - if (!t) - notexture = 1; - } - - if (r300->radeon.radeonScreen->kernel_mm && notexture) { - return; - } - for(i = 0; i < numtmus; ++i) { - radeonTexObj *t = r300->hw.textures[i]; - if (t && !t->image_override) { - BEGIN_BATCH_NO_AUTOSTATE(4); - OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); - OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); - END_BATCH(); - } else if (!t) { - /* Texture unit hasn't a texture bound nothings to do */ - } else { /* override cases */ - if (t->bo) { - BEGIN_BATCH_NO_AUTOSTATE(4); - OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); - OUT_BATCH_RELOC(t->tile_bits, t->bo, 0, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); - END_BATCH(); - } else if (!r300->radeon.radeonScreen->kernel_mm) { - BEGIN_BATCH_NO_AUTOSTATE(2); - OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); - OUT_BATCH(t->override_offset); - END_BATCH(); - } else { - /* Texture unit hasn't a texture bound nothings to do */ - } - } + int i; + + for(i = 0; i < numtmus; ++i) { + radeonTexObj *t = r300->hw.textures[i]; + if (t && !t->image_override) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + } else if (!t) { + /* Texture unit hasn't a texture bound. + * We assign the current color buffer as a fakery to make + * KIL work on KMS (without it, the CS checker will complain). + */ + if (r300->radeon.radeonScreen->kernel_mm) { + struct radeon_renderbuffer *rrb = radeon_get_colorbuffer(&r300->radeon); + if (rrb && rrb->bo) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_BATCH_RELOC(0, rrb->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + } + } + } else { /* override cases */ + if (t->bo) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_BATCH_RELOC(t->tile_bits, t->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + } else if (!r300->radeon.radeonScreen->kernel_mm) { + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_BATCH(t->override_offset); + END_BATCH(); + } else { + /* Texture unit hasn't a texture bound nothings to do */ + } } } } diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 6f3aab986d..91fa77a169 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -64,9 +64,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_ioctl.h" #include "r300_tex.h" #include "r300_emit.h" -#include "r300_render.h" +#include "r300_queryobj.h" #include "r300_swtcl.h" #include "radeon_bocs_wrapper.h" +#include "radeon_buffer_objects.h" #include "vblank.h" @@ -74,6 +75,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "xmlpool.h" /* for symbolic values of enum-type options */ #define need_GL_VERSION_2_0 +#define need_GL_ARB_occlusion_query #define need_GL_ARB_point_parameters #define need_GL_ARB_vertex_program #define need_GL_EXT_blend_equation_separate @@ -94,6 +96,7 @@ const struct dri_extension card_extensions[] = { /* *INDENT-OFF* */ {"GL_ARB_depth_texture", NULL}, {"GL_ARB_fragment_program", NULL}, + {"GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions}, {"GL_ARB_multitexture", NULL}, {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions}, {"GL_ARB_shadow", NULL}, @@ -154,7 +157,6 @@ const struct dri_extension gl_20_extension[] = { }; static const struct tnl_pipeline_stage *r300_pipeline[] = { - /* Catch any t&l fallbacks */ &_tnl_vertex_transform_stage, @@ -165,21 +167,7 @@ static const struct tnl_pipeline_stage *r300_pipeline[] = { &_tnl_texture_transform_stage, &_tnl_point_attenuation_stage, &_tnl_vertex_program_stage, - - /* Try again to go to tcl? - * - no good for asymmetric-twoside (do with multipass) - * - no good for asymmetric-unfilled (do with multipass) - * - good for material - * - good for texgen - * - need to manipulate a bit of state - * - * - worth it/not worth it? - */ - - /* Else do them here. - */ - &_r300_render_stage, - &_tnl_render_stage, /* FALLBACK */ + &_tnl_render_stage, 0, }; @@ -325,6 +313,11 @@ static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen) ctx->Const.FragmentProgram.MaxNativeTexIndirections = R300_PFS_MAX_TEX_INDIRECT; ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; } + + if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) + r300->num_z_pipes = 2; + else + r300->num_z_pipes = r300->radeon.radeonScreen->num_gb_pipes; } static void r300ParseOptions(r300ContextPtr r300, radeonScreenPtr screen) @@ -367,6 +360,11 @@ static void r300InitGLExtensions(GLcontext *ctx) } else if (r300->options.s3tc_force_disabled) { _mesa_disable_extension(ctx, "GL_EXT_texture_compression_s3tc"); } + + if (!r300->radeon.radeonScreen->drmSupportsOcclusionQueries || + !r300->options.hw_tcl_enabled) { + _mesa_disable_extension(ctx, "GL_ARB_occlusion_query"); + } } /* Create the device specific rendering context. @@ -398,6 +396,8 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, r300InitStateFuncs(&functions); r300InitTextureFuncs(&functions); r300InitShaderFuncs(&functions); + r300InitQueryObjFunctions(&functions); + radeonInitBufferObjectFuncs(&functions); if (!radeonInitContext(&r300->radeon, &functions, glVisual, driContextPriv, @@ -451,13 +451,10 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, r300InitState(r300); r300InitShaderFunctions(r300); - if (screen->chip_family == CHIP_FAMILY_RS600 || screen->chip_family == CHIP_FAMILY_RS690 || - screen->chip_family == CHIP_FAMILY_RS740) { - r300->radeon.texture_row_align = 64; - } - r300InitGLExtensions(ctx); + make_empty_list(&r300->query.not_flushed_head); + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index f7af7d4e57..3ba3426608 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/mtypes.h" #include "shader/prog_instruction.h" +#include "compiler/radeon_code.h" struct r300_context; typedef struct r300_context r300ContextRec; @@ -389,46 +390,25 @@ struct r300_hw_state { /* Vertex shader state */ -/* Perhaps more if we store programs in vmem? */ -/* drm_r300_cmd_header_t->vpu->count is unsigned char */ -#define VSF_MAX_FRAGMENT_LENGTH (255*4) - -/* Can be tested with colormat currently. */ -#define VSF_MAX_FRAGMENT_TEMPS (14) - -#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) -#define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1) - #define COLOR_IS_RGBA #define TAG(x) r300##x #include "tnl_dd/t_dd_vertex.h" #undef TAG +struct r300_vertex_program_key { + GLbitfield FpReads; + GLuint FogAttr; + GLuint WPosAttr; +}; + struct r300_vertex_program { struct gl_vertex_program *Base; struct r300_vertex_program *next; - struct r300_vertex_program_key { - GLuint FpReads; - GLuint FogAttr; - GLuint WPosAttr; - } key; - - struct r300_vertex_shader_hw_code { - int length; - union { - GLuint d[VSF_MAX_FRAGMENT_LENGTH]; - float f[VSF_MAX_FRAGMENT_LENGTH]; - } body; - } hw_code; - - GLboolean translated; - GLboolean error; + struct r300_vertex_program_key key; + struct r300_vertex_program_code code; - int pos_end; - int num_temporaries; /* Number of temp vars used by program */ - int inputs[VERT_ATTRIB_MAX]; - int outputs[VERT_RESULT_MAX]; + GLboolean error; }; struct r300_vertex_program_cont { @@ -441,131 +421,18 @@ struct r300_vertex_program_cont { struct r300_vertex_program *progs; }; -#define R300_PFS_MAX_ALU_INST 64 -#define R300_PFS_MAX_TEX_INST 32 -#define R300_PFS_MAX_TEX_INDIRECT 4 -#define R300_PFS_NUM_TEMP_REGS 32 -#define R300_PFS_NUM_CONST_REGS 32 - -#define R500_PFS_MAX_INST 512 -#define R500_PFS_NUM_TEMP_REGS 128 -#define R500_PFS_NUM_CONST_REGS 256 - -struct r300_pfs_compile_state; -struct r500_pfs_compile_state; - -/** - * Stores state that influences the compilation of a fragment program. - */ -struct r300_fragment_program_external_state { - struct { - /** - * If the sampler is used as a shadow sampler, - * this field is: - * 0 - GL_LUMINANCE - * 1 - GL_INTENSITY - * 2 - GL_ALPHA - * depending on the depth texture mode. - */ - GLuint depth_texture_mode : 2; - - /** - * If the sampler is used as a shadow sampler, - * this field is (texture_compare_func - GL_NEVER). - * [e.g. if compare function is GL_LEQUAL, this field is 3] - * - * Otherwise, this field is 0. - */ - GLuint texture_compare_func : 3; - } unit[16]; -}; - - -struct r300_fragment_program_node { - int tex_offset; /**< first tex instruction */ - int tex_end; /**< last tex instruction, relative to tex_offset */ - int alu_offset; /**< first ALU instruction */ - int alu_end; /**< last ALU instruction, relative to alu_offset */ - int flags; -}; - -/** - * Stores an R300 fragment program in its compiled-to-hardware form. - */ -struct r300_fragment_program_code { - struct { - int length; /**< total # of texture instructions used */ - GLuint inst[R300_PFS_MAX_TEX_INST]; - } tex; - - struct { - int length; /**< total # of ALU instructions used */ - struct { - GLuint inst0; - GLuint inst1; - GLuint inst2; - GLuint inst3; - } inst[R300_PFS_MAX_ALU_INST]; - } alu; - - struct r300_fragment_program_node node[4]; - int cur_node; - int first_node_has_tex; - - /** - * Remember which program register a given hardware constant - * belongs to. - */ - struct prog_src_register constant[R300_PFS_NUM_CONST_REGS]; - int const_nr; - - int max_temp_idx; -}; - - -struct r500_fragment_program_code { - struct { - GLuint inst0; - GLuint inst1; - GLuint inst2; - GLuint inst3; - GLuint inst4; - GLuint inst5; - } inst[R500_PFS_MAX_INST]; - - int inst_offset; - int inst_end; - - /** - * Remember which program register a given hardware constant - * belongs to. - */ - struct prog_src_register constant[R500_PFS_NUM_CONST_REGS]; - int const_nr; - - int max_temp_idx; -}; /** * Store everything about a fragment program that is needed * to render with that program. */ struct r300_fragment_program { - struct gl_program *Base; - - GLboolean translated; GLboolean error; - + struct r300_fragment_program *next; struct r300_fragment_program_external_state state; - union rX00_fragment_program_code { - struct r300_fragment_program_code r300; - struct r500_fragment_program_code r500; - } code; - GLboolean writes_depth; - GLuint optimization; - - struct r300_fragment_program *next; + struct rX00_fragment_program_code code; + GLbitfield InputsRead; /* attribute that we are sending the WPOS in */ gl_frag_attrib wpos_attr; @@ -583,12 +450,6 @@ struct r300_fragment_program_cont { struct r300_fragment_program *progs; }; -struct r300_fragment_program_compiler { - r300ContextPtr r300; - struct r300_fragment_program *fp; - union rX00_fragment_program_code *code; - struct gl_program *program; -}; #define R300_MAX_AOS_ARRAYS 16 @@ -610,8 +471,6 @@ struct r300_swtcl_info { struct r300_vtable { void (* SetupRSUnit)(GLcontext *ctx); void (* SetupFragmentShaderTextures)(GLcontext *ctx, int *tmu_mappings); - GLboolean (* BuildFragmentProgramHwCode)(struct r300_fragment_program_compiler *compiler); - void (* FragmentProgramDump)(union rX00_fragment_program_code *code); void (* SetupPixelShader)(GLcontext *ctx); }; @@ -619,11 +478,12 @@ struct r300_vertex_buffer { struct vertex_attribute { /* generic */ GLubyte element; - GLvoid *data; - GLboolean free_needed; GLuint stride; GLuint dwords; GLubyte size; /* number of components */ + GLboolean is_named_bo; + struct radeon_bo *bo; + GLint bo_offset; /* hw specific */ uint32_t data_type:4; @@ -638,12 +498,23 @@ struct r300_vertex_buffer { }; struct r300_index_buffer { - GLvoid *ptr; + struct radeon_bo *bo; + int bo_offset; + GLboolean is_32bit; - GLboolean free_needed; GLuint count; }; +struct r300_query_object { + struct gl_query_object Base; + struct radeon_bo *bo; + int curr_offset; + GLboolean emitted_begin; + + /* Double linked list of not flushed query objects */ + struct r300_query_object *prev, *next; +}; + /** * \brief R300 context structure. */ @@ -669,7 +540,7 @@ struct r300_context { uint32_t s3tc_force_disabled:1; uint32_t stencil_two_side_disabled:1; } options; - + struct r300_swtcl_info swtcl; struct r300_vertex_buffer vbuf; struct r300_index_buffer ind_buf; @@ -678,6 +549,13 @@ struct r300_context { uint32_t fallback; DECLARE_RENDERINPUTS(render_inputs_bitset); + + struct { + struct r300_query_object *current; + struct r300_query_object not_flushed_head; + } query; + + int num_z_pipes; }; #define R300_CONTEXT(ctx) ((r300ContextPtr)(ctx->DriverCtx)) diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index 9769ff5399..d524d60299 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -36,41 +36,67 @@ #include "r300_context.h" #include "r300_emit.h" #include "r300_render.h" +#include "r300_queryobj.h" #include "r300_state.h" #include "r300_tex.h" +#include "radeon_buffer_objects.h" + #include "tnl/tnl.h" #include "tnl/t_vp_build.h" #include "vbo/vbo_context.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" -static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf, struct gl_buffer_object **bo, GLuint *nr_bo) + +static int getTypeSize(GLenum type) +{ + switch (type) { + case GL_DOUBLE: + return sizeof(GLdouble); + case GL_FLOAT: + return sizeof(GLfloat); + case GL_INT: + return sizeof(GLint); + case GL_UNSIGNED_INT: + return sizeof(GLuint); + case GL_SHORT: + return sizeof(GLshort); + case GL_UNSIGNED_SHORT: + return sizeof(GLushort); + case GL_BYTE: + return sizeof(GLbyte); + case GL_UNSIGNED_BYTE: + return sizeof(GLubyte); + default: + assert(0); + return 0; + } +} + +static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) { r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_index_buffer *ind_buf = &r300->ind_buf; GLvoid *src_ptr; + GLuint *out; + int i; + GLboolean mapped_named_bo = GL_FALSE; - if (!mesa_ind_buf) { - ind_buf->ptr = NULL; - return; - } - - ind_buf->count = mesa_ind_buf->count; if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { - bo[*nr_bo] = mesa_ind_buf->obj; - (*nr_bo)++; ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + mapped_named_bo = GL_TRUE; assert(mesa_ind_buf->obj->Pointer != NULL); } src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) { + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); GLubyte *in = (GLubyte *)src_ptr; - GLuint *out = _mesa_malloc(sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1)); - int i; - ind_buf->ptr = out; + radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4); + + assert(r300->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset); for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) { *out++ = in[i] | in[i + 1] << 16; @@ -80,16 +106,16 @@ static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *out++ = in[i]; } - ind_buf->free_needed = GL_TRUE; - ind_buf->is_32bit = GL_FALSE; - } else if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) { #if MESA_BIG_ENDIAN + } else { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */ + GLuint size; GLushort *in = (GLushort *)src_ptr; - GLuint *out = _mesa_malloc(sizeof(GLushort) * - ((mesa_ind_buf->count + 1) & ~1)); - int i; + size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); + + radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offet, size, 4); - ind_buf->ptr = out; + assert(r300->ind_buf.bo->ptr != NULL) + out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset); for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) { *out++ = in[i] | in[i + 1] << 16; @@ -98,42 +124,60 @@ static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer if (i < mesa_ind_buf->count) { *out++ = in[i]; } - - ind_buf->free_needed = GL_TRUE; -#else - ind_buf->ptr = src_ptr; - ind_buf->free_needed = GL_FALSE; #endif - ind_buf->is_32bit = GL_FALSE; - } else { - ind_buf->ptr = src_ptr; - ind_buf->free_needed = GL_FALSE; - ind_buf->is_32bit = GL_TRUE; + } + + r300->ind_buf.is_32bit = GL_FALSE; + r300->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); } } -static int getTypeSize(GLenum type) + +static void r300SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) { - switch (type) { - case GL_DOUBLE: - return sizeof(GLdouble); - case GL_FLOAT: - return sizeof(GLfloat); - case GL_INT: - return sizeof(GLint); - case GL_UNSIGNED_INT: - return sizeof(GLuint); - case GL_SHORT: - return sizeof(GLshort); - case GL_UNSIGNED_SHORT: - return sizeof(GLushort); - case GL_BYTE: - return sizeof(GLbyte); - case GL_UNSIGNED_BYTE: - return sizeof(GLubyte); - default: - assert(0); - return 0; + r300ContextPtr r300 = R300_CONTEXT(ctx); + + if (!mesa_ind_buf) { + r300->ind_buf.bo = NULL; + return; + } + +#if MESA_BIG_ENDIAN + if (mesa_ind_buf->type == GL_UNSIGNED_INT) { +#else + if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) { +#endif + const GLvoid *src_ptr; + GLvoid *dst_ptr; + GLboolean mapped_named_bo = GL_FALSE; + + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + assert(mesa_ind_buf->obj->Pointer != NULL); + mapped_named_bo = GL_TRUE; + } + + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type); + + radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4); + + assert(r300->ind_buf.bo->ptr != NULL); + dst_ptr = ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset); + _mesa_memcpy(dst_ptr, src_ptr, size); + + r300->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT); + r300->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + } + } else { + r300FixupIndexBuffer(ctx, mesa_ind_buf); } } @@ -161,26 +205,118 @@ static int getTypeSize(GLenum type) } \ } while (0) -static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const struct gl_client_array *input, struct gl_buffer_object **bo, GLuint *nr_bo) +/** + * Convert attribute data type to float + * If the attribute uses named buffer object replace the bo with newly allocated bo + */ +static void r300ConvertAttrib(GLcontext *ctx, int count, const struct gl_client_array *input, struct vertex_attribute *attr) { r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_vertex_buffer *vbuf = &r300->vbuf; - struct vertex_attribute r300_attr; - const void *src_ptr; - GLenum type; + const GLvoid *src_ptr; + GLboolean mapped_named_bo = GL_FALSE; + GLfloat *dst_ptr; GLuint stride; + stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB; + + /* Convert value for first element only */ + if (input->StrideB == 0) + count = 1; + if (input->BufferObj->Name) { if (!input->BufferObj->Pointer) { - bo[*nr_bo] = input->BufferObj; - (*nr_bo)++; ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); - assert(input->BufferObj->Pointer != NULL); + mapped_named_bo = GL_TRUE; } src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); - } else + } else { src_ptr = input->Ptr; + } + + radeonAllocDmaRegion(&r300->radeon, &attr->bo, &attr->bo_offset, sizeof(GLfloat) * input->Size * count, 32); + dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + + if (RADEON_DEBUG & DEBUG_FALLBACKS) { + fprintf(stderr, "%s: Converting vertex attributes, attribute data format %x,", __FUNCTION__, input->Type); + fprintf(stderr, "stride %d, components %d\n", stride, input->Size); + } + + assert(src_ptr != NULL); + + switch (input->Type) { + case GL_DOUBLE: + CONVERT(GLdouble, (GLfloat)); + break; + case GL_UNSIGNED_INT: + CONVERT(GLuint, UINT_TO_FLOAT); + break; + case GL_INT: + CONVERT(GLint, INT_TO_FLOAT); + break; + case GL_UNSIGNED_SHORT: + CONVERT(GLushort, USHORT_TO_FLOAT); + break; + case GL_SHORT: + CONVERT(GLshort, SHORT_TO_FLOAT); + break; + case GL_UNSIGNED_BYTE: + assert(input->Format != GL_BGRA); + CONVERT(GLubyte, UBYTE_TO_FLOAT); + break; + case GL_BYTE: + CONVERT(GLbyte, BYTE_TO_FLOAT); + break; + default: + assert(0); + break; + } + + if (mapped_named_bo) { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } +} + +static void r300AlignDataToDword(GLcontext *ctx, const struct gl_client_array *input, int count, struct vertex_attribute *attr) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + const int dst_stride = (input->StrideB + 3) & ~3; + const int size = getTypeSize(input->Type) * input->Size * count; + GLboolean mapped_named_bo = GL_FALSE; + + radeonAllocDmaRegion(&r300->radeon, &attr->bo, &attr->bo_offset, size, 32); + + if (!input->BufferObj->Pointer) { + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + mapped_named_bo = GL_TRUE; + } + + { + GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); + GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + int i; + + for (i = 0; i < count; ++i) { + _mesa_memcpy(dst_ptr, src_ptr, input->StrideB); + src_ptr += input->StrideB; + dst_ptr += dst_stride; + } + } + + if (mapped_named_bo) { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } + + attr->stride = dst_stride; +} + +static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const struct gl_client_array *input) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_vertex_buffer *vbuf = &r300->vbuf; + struct vertex_attribute r300_attr; + GLenum type; + GLuint stride; stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB; @@ -189,62 +325,57 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st getTypeSize(input->Type) != 4 || #endif stride < 4) { - if (RADEON_DEBUG & DEBUG_FALLBACKS) { - fprintf(stderr, "%s: Converting vertex attributes, attribute data format %x,", __FUNCTION__, input->Type); - fprintf(stderr, "stride %d, components %d\n", stride, input->Size); - } - - GLfloat *dst_ptr, *tmp; - - /* Convert value for first element only */ - if (input->StrideB == 0) - count = 1; - - tmp = dst_ptr = _mesa_malloc(sizeof(GLfloat) * input->Size * count); - - switch (input->Type) { - case GL_DOUBLE: - CONVERT(GLdouble, (GLfloat)); - break; - case GL_UNSIGNED_INT: - CONVERT(GLuint, UINT_TO_FLOAT); - break; - case GL_INT: - CONVERT(GLint, INT_TO_FLOAT); - break; - case GL_UNSIGNED_SHORT: - CONVERT(GLushort, USHORT_TO_FLOAT); - break; - case GL_SHORT: - CONVERT(GLshort, SHORT_TO_FLOAT); - break; - case GL_UNSIGNED_BYTE: - assert(input->Format != GL_BGRA); - CONVERT(GLubyte, UBYTE_TO_FLOAT); - break; - case GL_BYTE: - CONVERT(GLbyte, BYTE_TO_FLOAT); - break; - default: - assert(0); - break; - } type = GL_FLOAT; - r300_attr.free_needed = GL_TRUE; - r300_attr.data = tmp; + + r300ConvertAttrib(ctx, count, input, &r300_attr); if (input->StrideB == 0) { r300_attr.stride = 0; } else { r300_attr.stride = sizeof(GLfloat) * input->Size; } r300_attr.dwords = input->Size; + r300_attr.is_named_bo = GL_FALSE; } else { type = input->Type; - r300_attr.free_needed = GL_FALSE; - r300_attr.data = (GLvoid *)src_ptr; - r300_attr.stride = input->StrideB; - r300_attr.dwords = (getTypeSize(type) * input->Size + 3)/ 4; + r300_attr.dwords = (getTypeSize(type) * input->Size + 3)/ 4; + if (input->BufferObj->Name) { + if (stride % 4 != 0) { + assert(((int) input->Ptr) % input->StrideB == 0); + r300AlignDataToDword(ctx, input, count, &r300_attr); + r300_attr.is_named_bo = GL_FALSE; + } else { + r300_attr.stride = input->StrideB; + r300_attr.bo_offset = (GLuint) input->Ptr; + r300_attr.bo = get_radeon_buffer_object(input->BufferObj)->bo; + r300_attr.is_named_bo = GL_TRUE; + } + } else { + int size; + uint32_t *dst; + + if (input->StrideB == 0) { + size = getTypeSize(input->Type) * input->Size; + count = 1; + r300_attr.stride = 0; + } else { + size = getTypeSize(input->Type) * input->Size * count; + r300_attr.stride = (getTypeSize(type) * input->Size + 3) & ~3; + } + + radeonAllocDmaRegion(&r300->radeon, &r300_attr.bo, &r300_attr.bo_offset, size, 32); + assert(r300_attr.bo->ptr != NULL); + dst = (uint32_t *)ADD_POINTERS(r300_attr.bo->ptr, r300_attr.bo_offset); + switch (r300_attr.dwords) { + case 1: radeonEmitVec4(dst, input->Ptr, input->StrideB, count); break; + case 2: radeonEmitVec8(dst, input->Ptr, input->StrideB, count); break; + case 3: radeonEmitVec12(dst, input->Ptr, input->StrideB, count); break; + case 4: radeonEmitVec16(dst, input->Ptr, input->StrideB, count); break; + default: assert(0); break; + } + + r300_attr.is_named_bo = GL_FALSE; + } } r300_attr.size = input->Size; @@ -333,15 +464,15 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st ++vbuf->num_attribs; } -static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count, struct gl_buffer_object **bo, GLuint *nr_bo) +static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count) { r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_vertex_buffer *vbuf = &r300->vbuf; - + int ret; { int i, tmp; - tmp = r300->selected_vp->Base->Base.InputsRead; + tmp = r300->selected_vp->code.InputsRead; i = 0; vbuf->num_attribs = 0; while (tmp) { @@ -351,7 +482,7 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar ++i; } - r300TranslateAttrib(ctx, i, count, arrays[i], bo, nr_bo); + r300TranslateAttrib(ctx, i, count, arrays[i]); tmp >>= 1; ++i; @@ -366,38 +497,47 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar int i; for (i = 0; i < vbuf->num_attribs; i++) { - rcommon_emit_vector(ctx, &r300->radeon.tcl.aos[i], - vbuf->attribs[i].data, vbuf->attribs[i].dwords, - vbuf->attribs[i].stride, count); + struct radeon_aos *aos = &r300->radeon.tcl.aos[i]; + + aos->count = vbuf->attribs[i].stride == 0 ? 1 : count; + aos->stride = vbuf->attribs[i].stride / sizeof(float); + aos->offset = vbuf->attribs[i].bo_offset; + aos->components = vbuf->attribs[i].dwords; + aos->bo = vbuf->attribs[i].bo; + + if (vbuf->attribs[i].is_named_bo) { + radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, r300->vbuf.attribs[i].bo, RADEON_GEM_DOMAIN_GTT, 0); + } } r300->radeon.tcl.aos_count = vbuf->num_attribs; + ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, r300->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, GL_TRUE); } } -static void r300FreeData(GLcontext *ctx, struct gl_buffer_object **bo, GLuint nr_bo) +static void r300FreeData(GLcontext *ctx) { + /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo + * to prevent double unref in radeonReleaseArrays + * called during context destroy + */ + r300ContextPtr r300 = R300_CONTEXT(ctx); { - struct r300_vertex_buffer *vbuf = &R300_CONTEXT(ctx)->vbuf; int i; - for (i = 0; i < vbuf->num_attribs; i++) { - if (vbuf->attribs[i].free_needed) - _mesa_free(vbuf->attribs[i].data); + for (i = 0; i < r300->vbuf.num_attribs; i++) { + if (!r300->vbuf.attribs[i].is_named_bo) { + radeon_bo_unref(r300->vbuf.attribs[i].bo); + } + r300->radeon.tcl.aos[i].bo = NULL; } } { - struct r300_index_buffer *ind_buf = &R300_CONTEXT(ctx)->ind_buf; - if (ind_buf->free_needed) - _mesa_free(ind_buf->ptr); - } - - { - int i; - - for (i = 0; i < nr_bo; ++i) { - ctx->Driver.UnmapBuffer(ctx, 0, bo[i]); + if (r300->ind_buf.bo != NULL) { + radeon_bo_unref(r300->ind_buf.bo); } } } @@ -411,8 +551,7 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, GLuint max_index ) { struct r300_context *r300 = R300_CONTEXT(ctx); - struct gl_buffer_object *bo[VERT_ATTRIB_MAX+1]; - GLuint i, nr_bo = 0; + GLuint i; if (ctx->NewState) _mesa_update_state( ctx ); @@ -424,35 +563,37 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, !r300ValidateBuffers(ctx)); - r300FixupIndexBuffer(ctx, ib, bo, &nr_bo); - /* ensure we have the cmd buf space in advance to cover * the state + DMA AOS pointers */ rcommonEnsureCmdBufSpace(&r300->radeon, - r300->radeon.hw.max_state_size + (50*sizeof(int)), - __FUNCTION__); + r300->radeon.hw.max_state_size + (60*sizeof(int)), + __FUNCTION__); + + r300SetupIndexBuffer(ctx, ib); - r300SetVertexFormat(ctx, arrays, max_index + 1, bo, &nr_bo); + r300SetVertexFormat(ctx, arrays, max_index + 1); if (r300->fallback) return GL_FALSE; - r300SetupVAP(ctx, r300->selected_vp->Base->Base.InputsRead, r300->selected_vp->Base->Base.OutputsWritten); + r300SetupVAP(ctx, r300->selected_vp->code.InputsRead, r300->selected_vp->code.OutputsWritten); r300UpdateShaderStates(r300); r300EmitCacheFlush(r300); radeonEmitState(&r300->radeon); + r300EmitQueryBegin(ctx); + for (i = 0; i < nr_prims; ++i) { r300RunRenderPrimitive(ctx, prim[i].start, prim[i].start + prim[i].count, prim[i].mode); } r300EmitCacheFlush(r300); - radeonReleaseArrays(ctx, ~0); + r300EmitQueryEnd(ctx); - r300FreeData(ctx, bo, nr_bo); + r300FreeData(ctx); return GL_TRUE; } @@ -462,28 +603,23 @@ static void r300DrawPrims(GLcontext *ctx, const struct _mesa_prim *prim, GLuint nr_prims, const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, GLuint min_index, GLuint max_index) { - struct split_limits limits; GLboolean retval; - if (ib) - limits.max_verts = 0xffffffff; - else - limits.max_verts = 65535; - - limits.max_indices = 65535; - limits.max_vb_size = 1024*1024; + /* This check should get folded into just the places that + * min/max index are really needed. + */ + if (!index_bounds_valid) { + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + } if (min_index) { vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r300DrawPrims ); return; } - if ((ib && ib->count > 65535)) { - vbo_split_prims (ctx, arrays, prim, nr_prims, ib, min_index, max_index, r300DrawPrims, &limits); - return; - } /* Make an attempt at drawing */ retval = r300TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c index feb3370f37..07e6223087 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.c +++ b/src/mesa/drivers/dri/r300/r300_emit.c @@ -124,41 +124,6 @@ GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes) return ret; } -GLboolean r300EmitArrays(GLcontext * ctx) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_vertex_buffer *vbuf = &r300->vbuf; - GLuint InputsRead, OutputsWritten; - - r300ChooseSwtclVertexFormat(ctx, &InputsRead, &OutputsWritten); - - r300SwitchFallback(ctx, R300_FALLBACK_AOS_LIMIT, vbuf->num_attribs > R300_MAX_AOS_ARRAYS); - if (r300->fallback & R300_RASTER_FALLBACK_MASK) - return GL_FALSE; - - { - struct vertex_buffer *mesa_vb = &TNL_CONTEXT(ctx)->vb; - GLuint attr, i; - - for (i = 0; i < vbuf->num_attribs; i++) { - attr = vbuf->attribs[i].element; - rcommon_emit_vector(ctx, &r300->radeon.tcl.aos[i], mesa_vb->AttribPtr[attr]->data, - mesa_vb->AttribPtr[attr]->size, mesa_vb->AttribPtr[attr]->stride, mesa_vb->Count); - } - - r300->radeon.tcl.aos_count = vbuf->num_attribs; - - /* Fill index buffer info */ - r300->ind_buf.ptr = mesa_vb->Elts; - r300->ind_buf.is_32bit = GL_TRUE; - r300->ind_buf.free_needed = GL_FALSE; - } - - r300SetupVAP(ctx, InputsRead, OutputsWritten); - - return GL_TRUE; -} - void r300EmitCacheFlush(r300ContextPtr rmesa) { BATCH_LOCALS(&rmesa->radeon); diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h index 3f8c60ffae..8e57e354d1 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.h +++ b/src/mesa/drivers/dri/r300/r300_emit.h @@ -104,7 +104,7 @@ static INLINE uint32_t cmdpacket3(struct radeon_screen *rscrn, int packet) return cmd.u; } -static INLINE uint32_t cmdcpdelay(struct radeon_screen *rscrn, +static INLINE uint32_t cmdcpdelay(struct radeon_screen *rscrn, unsigned short count) { drm_r300_cmd_header_t cmd; @@ -216,8 +216,6 @@ void static INLINE cp_wait(radeonContextPtr radeon, unsigned char flags) } } -extern GLboolean r300EmitArrays(GLcontext * ctx); - extern int r300PrimitiveType(r300ContextPtr rmesa, int prim); extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim); diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c deleted file mode 100644 index 55c1cfe631..0000000000 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ /dev/null @@ -1,451 +0,0 @@ -/* - * Copyright (C) 2005 Ben Skeggs. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "r300_fragprog.h" - -#include "shader/prog_parameter.h" - -#include "r300_context.h" -#include "r300_fragprog_swizzle.h" - -static void reset_srcreg(struct prog_src_register* reg) -{ - _mesa_bzero(reg, sizeof(*reg)); - reg->Swizzle = SWIZZLE_NOOP; -} - -static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu) -{ - gl_state_index fail_value_tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0 - }; - struct prog_src_register reg = { 0, }; - - fail_value_tokens[2] = tmu; - reg.File = PROGRAM_STATE_VAR; - reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens); - reg.Swizzle = SWIZZLE_WWWW; - return reg; -} - -/** - * Transform TEX, TXP, TXB, and KIL instructions in the following way: - * - premultiply texture coordinates for RECT - * - extract operand swizzles - * - introduce a temporary register when write masks are needed - * - * \todo If/when r5xx uses the radeon_program architecture, this can probably - * be reused. - */ -GLboolean r300_transform_TEX( - struct radeon_transform_context *t, - struct prog_instruction* orig_inst, void* data) -{ - struct r300_fragment_program_compiler *compiler = - (struct r300_fragment_program_compiler*)data; - struct prog_instruction inst = *orig_inst; - struct prog_instruction* tgt; - GLboolean destredirect = GL_FALSE; - - if (inst.Opcode != OPCODE_TEX && - inst.Opcode != OPCODE_TXB && - inst.Opcode != OPCODE_TXP && - inst.Opcode != OPCODE_KIL) - return GL_FALSE; - - if (inst.Opcode != OPCODE_KIL && - t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; - - if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { - tgt = radeonAppendInstructions(t->Program, 1); - - tgt->Opcode = OPCODE_MOV; - tgt->DstReg = inst.DstReg; - if (comparefunc == GL_ALWAYS) { - tgt->SrcReg[0].File = PROGRAM_BUILTIN; - tgt->SrcReg[0].Swizzle = SWIZZLE_1111; - } else { - tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit); - } - return GL_TRUE; - } - - inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = radeonFindFreeTemporary(t); - inst.DstReg.WriteMask = WRITEMASK_XYZW; - } - - - /* Hardware uses [0..1]x[0..1] range for rectangle textures - * instead of [0..Width]x[0..Height]. - * Add a scaling instruction. - */ - if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) { - gl_state_index tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, - 0 - }; - - int tempreg = radeonFindFreeTemporary(t); - int factor_index; - - tokens[2] = inst.TexSrcUnit; - factor_index = _mesa_add_state_reference(t->Program->Parameters, tokens); - - tgt = radeonAppendInstructions(t->Program, 1); - - tgt->Opcode = OPCODE_MUL; - tgt->DstReg.File = PROGRAM_TEMPORARY; - tgt->DstReg.Index = tempreg; - tgt->SrcReg[0] = inst.SrcReg[0]; - tgt->SrcReg[1].File = PROGRAM_STATE_VAR; - tgt->SrcReg[1].Index = factor_index; - - reset_srcreg(&inst.SrcReg[0]); - inst.SrcReg[0].File = PROGRAM_TEMPORARY; - inst.SrcReg[0].Index = tempreg; - } - - if (inst.Opcode != OPCODE_KIL) { - if (inst.DstReg.File != PROGRAM_TEMPORARY || - inst.DstReg.WriteMask != WRITEMASK_XYZW) { - int tempreg = radeonFindFreeTemporary(t); - - inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = tempreg; - inst.DstReg.WriteMask = WRITEMASK_XYZW; - destredirect = GL_TRUE; - } else if (inst.SaturateMode) { - destredirect = GL_TRUE; - } - } - - if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) { - int tmpreg = radeonFindFreeTemporary(t); - tgt = radeonAppendInstructions(t->Program, 1); - tgt->Opcode = OPCODE_MOV; - tgt->DstReg.File = PROGRAM_TEMPORARY; - tgt->DstReg.Index = tmpreg; - tgt->SrcReg[0] = inst.SrcReg[0]; - - reset_srcreg(&inst.SrcReg[0]); - inst.SrcReg[0].File = PROGRAM_TEMPORARY; - inst.SrcReg[0].Index = tmpreg; - } - - tgt = radeonAppendInstructions(t->Program, 1); - _mesa_copy_instructions(tgt, &inst, 1); - - if (inst.Opcode != OPCODE_KIL && - t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; - GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode; - int rcptemp = radeonFindFreeTemporary(t); - int pass, fail; - - tgt = radeonAppendInstructions(t->Program, 3); - - tgt[0].Opcode = OPCODE_RCP; - tgt[0].DstReg.File = PROGRAM_TEMPORARY; - tgt[0].DstReg.Index = rcptemp; - tgt[0].DstReg.WriteMask = WRITEMASK_W; - tgt[0].SrcReg[0] = inst.SrcReg[0]; - tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW; - - tgt[1].Opcode = OPCODE_MAD; - tgt[1].DstReg = inst.DstReg; - tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask; - tgt[1].SrcReg[0] = inst.SrcReg[0]; - tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ; - tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY; - tgt[1].SrcReg[1].Index = rcptemp; - tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW; - tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY; - tgt[1].SrcReg[2].Index = inst.DstReg.Index; - if (depthmode == 0) /* GL_LUMINANCE */ - tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); - else if (depthmode == 2) /* GL_ALPHA */ - tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW; - - /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: - * r < tex <=> -tex+r < 0 - * r >= tex <=> not (-tex+r < 0 */ - if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) - tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW; - else - tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW; - - tgt[2].Opcode = OPCODE_CMP; - tgt[2].DstReg = orig_inst->DstReg; - tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY; - tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index; - - if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { - pass = 1; - fail = 2; - } else { - pass = 2; - fail = 1; - } - - tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN; - tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111; - tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit); - } else if (destredirect) { - tgt = radeonAppendInstructions(t->Program, 1); - - tgt->Opcode = OPCODE_MOV; - tgt->DstReg = orig_inst->DstReg; - tgt->SaturateMode = inst.SaturateMode; - tgt->SrcReg[0].File = PROGRAM_TEMPORARY; - tgt->SrcReg[0].Index = inst.DstReg.Index; - } - - return GL_TRUE; -} - -/* just some random things... */ -void r300FragmentProgramDump(union rX00_fragment_program_code *c) -{ - struct r300_fragment_program_code *code = &c->r300; - int n, i, j; - static int pc = 0; - - fprintf(stderr, "pc=%d*************************************\n", pc++); - - fprintf(stderr, "Hardware program\n"); - fprintf(stderr, "----------------\n"); - - for (n = 0; n < (code->cur_node + 1); n++) { - fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " - "alu_end: %d, tex_end: %d, flags: %08x\n", n, - code->node[n].alu_offset, - code->node[n].tex_offset, - code->node[n].alu_end, code->node[n].tex_end, - code->node[n].flags); - - if (n > 0 || code->first_node_has_tex) { - fprintf(stderr, " TEX:\n"); - for (i = code->node[n].tex_offset; - i <= code->node[n].tex_offset + code->node[n].tex_end; - ++i) { - const char *instr; - - switch ((code->tex. - inst[i] >> R300_TEX_INST_SHIFT) & - 15) { - case R300_TEX_OP_LD: - instr = "TEX"; - break; - case R300_TEX_OP_KIL: - instr = "KIL"; - break; - case R300_TEX_OP_TXP: - instr = "TXP"; - break; - case R300_TEX_OP_TXB: - instr = "TXB"; - break; - default: - instr = "UNKNOWN"; - } - - fprintf(stderr, - " %s t%i, %c%i, texture[%i] (%08x)\n", - instr, - (code->tex. - inst[i] >> R300_DST_ADDR_SHIFT) & 31, - 't', - (code->tex. - inst[i] >> R300_SRC_ADDR_SHIFT) & 31, - (code->tex. - inst[i] & R300_TEX_ID_MASK) >> - R300_TEX_ID_SHIFT, - code->tex.inst[i]); - } - } - - for (i = code->node[n].alu_offset; - i <= code->node[n].alu_offset + code->node[n].alu_end; ++i) { - char srcc[3][10], dstc[20]; - char srca[3][10], dsta[20]; - char argc[3][20]; - char arga[3][20]; - char flags[5], tmp[10]; - - for (j = 0; j < 3; ++j) { - int regc = code->alu.inst[i].inst1 >> (j * 6); - int rega = code->alu.inst[i].inst3 >> (j * 6); - - sprintf(srcc[j], "%c%i", - (regc & 32) ? 'c' : 't', regc & 31); - sprintf(srca[j], "%c%i", - (rega & 32) ? 'c' : 't', rega & 31); - } - - dstc[0] = 0; - sprintf(flags, "%s%s%s", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_REG_X) ? "x" : "", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_REG_Y) ? "y" : "", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_REG_Z) ? "z" : ""); - if (flags[0] != 0) { - sprintf(dstc, "t%i.%s ", - (code->alu.inst[i]. - inst1 >> R300_ALU_DSTC_SHIFT) & 31, - flags); - } - sprintf(flags, "%s%s%s", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", - (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); - if (flags[0] != 0) { - sprintf(tmp, "o%i.%s", - (code->alu.inst[i]. - inst1 >> R300_ALU_DSTC_SHIFT) & 31, - flags); - strcat(dstc, tmp); - } - - dsta[0] = 0; - if (code->alu.inst[i].inst3 & R300_ALU_DSTA_REG) { - sprintf(dsta, "t%i.w ", - (code->alu.inst[i]. - inst3 >> R300_ALU_DSTA_SHIFT) & 31); - } - if (code->alu.inst[i].inst3 & R300_ALU_DSTA_OUTPUT) { - sprintf(tmp, "o%i.w ", - (code->alu.inst[i]. - inst3 >> R300_ALU_DSTA_SHIFT) & 31); - strcat(dsta, tmp); - } - if (code->alu.inst[i].inst3 & R300_ALU_DSTA_DEPTH) { - strcat(dsta, "Z"); - } - - fprintf(stderr, - "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" - " w: %3s %3s %3s -> %-20s (%08x)\n", i, - srcc[0], srcc[1], srcc[2], dstc, - code->alu.inst[i].inst1, srca[0], srca[1], - srca[2], dsta, code->alu.inst[i].inst3); - - for (j = 0; j < 3; ++j) { - int regc = code->alu.inst[i].inst0 >> (j * 7); - int rega = code->alu.inst[i].inst2 >> (j * 7); - int d; - char buf[20]; - - d = regc & 31; - if (d < 12) { - switch (d % 4) { - case R300_ALU_ARGC_SRC0C_XYZ: - sprintf(buf, "%s.xyz", - srcc[d / 4]); - break; - case R300_ALU_ARGC_SRC0C_XXX: - sprintf(buf, "%s.xxx", - srcc[d / 4]); - break; - case R300_ALU_ARGC_SRC0C_YYY: - sprintf(buf, "%s.yyy", - srcc[d / 4]); - break; - case R300_ALU_ARGC_SRC0C_ZZZ: - sprintf(buf, "%s.zzz", - srcc[d / 4]); - break; - } - } else if (d < 15) { - sprintf(buf, "%s.www", srca[d - 12]); - } else if (d == 20) { - sprintf(buf, "0.0"); - } else if (d == 21) { - sprintf(buf, "1.0"); - } else if (d == 22) { - sprintf(buf, "0.5"); - } else if (d >= 23 && d < 32) { - d -= 23; - switch (d / 3) { - case 0: - sprintf(buf, "%s.yzx", - srcc[d % 3]); - break; - case 1: - sprintf(buf, "%s.zxy", - srcc[d % 3]); - break; - case 2: - sprintf(buf, "%s.Wzy", - srcc[d % 3]); - break; - } - } else { - sprintf(buf, "%i", d); - } - - sprintf(argc[j], "%s%s%s%s", - (regc & 32) ? "-" : "", - (regc & 64) ? "|" : "", - buf, (regc & 64) ? "|" : ""); - - d = rega & 31; - if (d < 9) { - sprintf(buf, "%s.%c", srcc[d / 3], - 'x' + (char)(d % 3)); - } else if (d < 12) { - sprintf(buf, "%s.w", srca[d - 9]); - } else if (d == 16) { - sprintf(buf, "0.0"); - } else if (d == 17) { - sprintf(buf, "1.0"); - } else if (d == 18) { - sprintf(buf, "0.5"); - } else { - sprintf(buf, "%i", d); - } - - sprintf(arga[j], "%s%s%s%s", - (rega & 32) ? "-" : "", - (rega & 64) ? "|" : "", - buf, (rega & 64) ? "|" : ""); - } - - fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" - " w: %8s %8s %8s op: %08x\n", - argc[0], argc[1], argc[2], - code->alu.inst[i].inst0, arga[0], arga[1], - arga[2], code->alu.inst[i].inst2); - } - } -} diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h deleted file mode 100644 index 5ce6f33cee..0000000000 --- a/src/mesa/drivers/dri/r300/r300_fragprog.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (C) 2005 Ben Skeggs. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/* - * Authors: - * Ben Skeggs <darktama@iinet.net.au> - * Jerome Glisse <j.glisse@gmail.com> - */ -#ifndef __R300_FRAGPROG_H_ -#define __R300_FRAGPROG_H_ - -#include "shader/program.h" -#include "shader/prog_instruction.h" - -#include "r300_context.h" -#include "radeon_program.h" - -#define DRI_CONF_FP_OPTIMIZATION_SPEED 0 -#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1 - -#if 1 - -/** - * Fragment program helper macros - */ - -/* Produce unshifted source selectors */ -#define FP_TMP(idx) (idx) -#define FP_CONST(idx) ((idx) | (1 << 5)) - -/* Produce source/dest selector dword */ -#define FP_SELC_MASK_NO 0 -#define FP_SELC_MASK_X 1 -#define FP_SELC_MASK_Y 2 -#define FP_SELC_MASK_XY 3 -#define FP_SELC_MASK_Z 4 -#define FP_SELC_MASK_XZ 5 -#define FP_SELC_MASK_YZ 6 -#define FP_SELC_MASK_XYZ 7 - -#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \ - (((destidx) << R300_ALU_DSTC_SHIFT) | \ - (FP_SELC_MASK_##regmask << 23) | \ - (FP_SELC_MASK_##outmask << 26) | \ - ((src0) << R300_ALU_SRC0C_SHIFT) | \ - ((src1) << R300_ALU_SRC1C_SHIFT) | \ - ((src2) << R300_ALU_SRC2C_SHIFT)) - -#define FP_SELA_MASK_NO 0 -#define FP_SELA_MASK_W 1 - -#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \ - (((destidx) << R300_ALU_DSTA_SHIFT) | \ - (FP_SELA_MASK_##regmask << 23) | \ - (FP_SELA_MASK_##outmask << 24) | \ - ((src0) << R300_ALU_SRC0A_SHIFT) | \ - ((src1) << R300_ALU_SRC1A_SHIFT) | \ - ((src2) << R300_ALU_SRC2A_SHIFT)) - -/* Produce unshifted argument selectors */ -#define FP_ARGC(source) R300_ALU_ARGC_##source -#define FP_ARGA(source) R300_ALU_ARGA_##source -#define FP_ABS(arg) ((arg) | (1 << 6)) -#define FP_NEG(arg) ((arg) ^ (1 << 5)) - -/* Produce instruction dword */ -#define FP_INSTRC(opcode,arg0,arg1,arg2) \ - (R300_ALU_OUTC_##opcode | \ - ((arg0) << R300_ALU_ARG0C_SHIFT) | \ - ((arg1) << R300_ALU_ARG1C_SHIFT) | \ - ((arg2) << R300_ALU_ARG2C_SHIFT)) - -#define FP_INSTRA(opcode,arg0,arg1,arg2) \ - (R300_ALU_OUTA_##opcode | \ - ((arg0) << R300_ALU_ARG0A_SHIFT) | \ - ((arg1) << R300_ALU_ARG1A_SHIFT) | \ - ((arg2) << R300_ALU_ARG2A_SHIFT)) - -#endif - -extern GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); - -extern void r300FragmentProgramDump(union rX00_fragment_program_code *c); - -extern GLboolean r300_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data); - -#endif diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index f5c4c0f4a0..6674efc5bc 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -42,21 +42,51 @@ #include "shader/prog_parameter.h" #include "shader/prog_print.h" +#include "compiler/radeon_compiler.h" + #include "r300_state.h" -#include "r300_fragprog.h" -#include "r300_fragprog_swizzle.h" -#include "r500_fragprog.h" -#include "radeon_program.h" -#include "radeon_program_alu.h" -static void nqssadce_init(struct nqssadce_state* s) +static GLuint build_dtm(GLuint depthmode) +{ + switch(depthmode) { + default: + case GL_LUMINANCE: return 0; + case GL_INTENSITY: return 1; + case GL_ALPHA: return 2; + } +} + +static GLuint build_func(GLuint comparefunc) { - s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; - s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; + return comparefunc - GL_NEVER; } /** + * Collect all external state that is relevant for compiling the given + * fragment program. + */ +static void build_state( + r300ContextPtr r300, + struct gl_fragment_program *fp, + struct r300_fragment_program_external_state *state) +{ + int unit; + + _mesa_bzero(state, sizeof(*state)); + + for(unit = 0; unit < 16; ++unit) { + if (fp->Base.ShadowSamplers & (1 << unit)) { + struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; + + state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); + state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); + } + } +} + + +/** * Transform the program to support fragment.position. * * Introduce a small fragment at the start of the program that will be @@ -65,104 +95,26 @@ static void nqssadce_init(struct nqssadce_state* s) * to read from a newly allocated temporary. * */ -static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) +static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler, struct r300_fragment_program * fp) { - GLuint InputsRead = compiler->fp->Base->InputsRead; + int i; - if (!(InputsRead & FRAG_BIT_WPOS)) { - compiler->fp->wpos_attr = FRAG_ATTRIB_MAX; + if (!(compiler->Base.Program.InputsRead & FRAG_BIT_WPOS)) { + fp->wpos_attr = FRAG_ATTRIB_MAX; return; } - static gl_state_index tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 - }; - struct prog_instruction *fpi; - GLuint window_index; - int i = 0; - for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i) { - if (!(InputsRead & (1 << i))) { - InputsRead &= ~(1 << FRAG_ATTRIB_WPOS); - InputsRead |= 1 << i; - compiler->fp->Base->InputsRead = InputsRead; - compiler->fp->wpos_attr = i; + if (!(compiler->Base.Program.InputsRead & (1 << i))) { + fp->wpos_attr = i; break; } } - GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); - - _mesa_insert_instructions(compiler->program, 0, 3); - fpi = compiler->program->Instructions; - i = 0; - - /* perspective divide */ - fpi[i].Opcode = OPCODE_RCP; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_W; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = compiler->fp->wpos_attr; - fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; - i++; - - fpi[i].Opcode = OPCODE_MUL; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = compiler->fp->wpos_attr; - fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - - fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[1].Index = tempregi; - fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; - i++; - - /* viewport transformation */ - window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); - - fpi[i].Opcode = OPCODE_MAD; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[0].Index = tempregi; - fpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; - fpi[i].SrcReg[1].Index = window_index; - fpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; - fpi[i].SrcReg[2].Index = window_index; - fpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - i++; - - for (; i < compiler->program->NumInstructions; ++i) { - int reg; - for (reg = 0; reg < 3; reg++) { - if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && - fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { - fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[reg].Index = tempregi; - } - } - } + rc_transform_fragment_wpos(&compiler->Base, FRAG_ATTRIB_WPOS, fp->wpos_attr); } - /** * Rewrite fragment.fogcoord to use a texture coordinate slot. * Note that fogcoord is forced into an X001 pattern, and this enforcement @@ -170,205 +122,117 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) * * See also the counterpart rewriting for vertex programs. */ -static void rewriteFog(struct r300_fragment_program_compiler *compiler) +static void rewriteFog(struct r300_fragment_program_compiler *compiler, struct r300_fragment_program * fp) { - struct r300_fragment_program *fp = compiler->fp; - GLuint InputsRead; + struct prog_src_register src; int i; - InputsRead = fp->Base->InputsRead; - - if (!(InputsRead & FRAG_BIT_FOGC)) { + if (!(compiler->Base.Program.InputsRead & FRAG_BIT_FOGC)) { fp->fog_attr = FRAG_ATTRIB_MAX; return; } for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i) { - if (!(InputsRead & (1 << i))) { - InputsRead &= ~(1 << FRAG_ATTRIB_FOGC); - InputsRead |= 1 << i; - fp->Base->InputsRead = InputsRead; + if (!(compiler->Base.Program.InputsRead & (1 << i))) { fp->fog_attr = i; break; } } - { - struct prog_instruction *inst; - - inst = compiler->program->Instructions; - while (inst->Opcode != OPCODE_END) { - const int src_regs = _mesa_num_inst_src_regs(inst->Opcode); - for (i = 0; i < src_regs; ++i) { - if (inst->SrcReg[i].File == PROGRAM_INPUT && inst->SrcReg[i].Index == FRAG_ATTRIB_FOGC) { - inst->SrcReg[i].Index = fp->fog_attr; - inst->SrcReg[i].Swizzle = combine_swizzles( - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE), - inst->SrcReg[i].Swizzle); - } - } - ++inst; - } - } -} - -static GLuint build_dtm(GLuint depthmode) -{ - switch(depthmode) { - default: - case GL_LUMINANCE: return 0; - case GL_INTENSITY: return 1; - case GL_ALPHA: return 2; - } + memset(&src, 0, sizeof(src)); + src.File = PROGRAM_INPUT; + src.Index = fp->fog_attr; + src.Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE); + rc_move_input(&compiler->Base, FRAG_ATTRIB_FOGC, src); } -static GLuint build_func(GLuint comparefunc) -{ - return comparefunc - GL_NEVER; -} /** - * Collect all external state that is relevant for compiling the given - * fragment program. + * Reserve hardware temporary registers for the program inputs. + * + * @note This allocation is performed explicitly, because the order of inputs + * is determined by the RS hardware. */ -static void build_state( - r300ContextPtr r300, - struct gl_fragment_program *fp, - struct r300_fragment_program_external_state *state) +static void allocate_hw_inputs( + struct r300_fragment_program_compiler * c, + void (*allocate)(void * data, unsigned input, unsigned hwreg), + void * mydata) { - int unit; - - _mesa_bzero(state, sizeof(*state)); - - for(unit = 0; unit < 16; ++unit) { - if (fp->Base.ShadowSamplers & (1 << unit)) { - struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; - - state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); - state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); - } + GLuint InputsRead = c->Base.Program.InputsRead; + int i; + GLuint hwindex = 0; + + /* Primary colour */ + if (InputsRead & FRAG_BIT_COL0) + allocate(mydata, FRAG_ATTRIB_COL0, hwindex++); + InputsRead &= ~FRAG_BIT_COL0; + + /* Secondary color */ + if (InputsRead & FRAG_BIT_COL1) + allocate(mydata, FRAG_ATTRIB_COL1, hwindex++); + InputsRead &= ~FRAG_BIT_COL1; + + /* Texcoords */ + for (i = 0; i < 8; i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) + allocate(mydata, FRAG_ATTRIB_TEX0+i, hwindex++); } -} + InputsRead &= ~FRAG_BITS_TEX_ANY; -static void rewrite_depth_out(struct gl_program *prog) -{ - struct prog_instruction *inst; + /* Fogcoords treated as a texcoord */ + if (InputsRead & FRAG_BIT_FOGC) + allocate(mydata, FRAG_ATTRIB_FOGC, hwindex++); + InputsRead &= ~FRAG_BIT_FOGC; - for (inst = prog->Instructions; inst->Opcode != OPCODE_END; ++inst) { - if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != FRAG_RESULT_DEPTH) - continue; + /* fragment position treated as a texcoord */ + if (InputsRead & FRAG_BIT_WPOS) + allocate(mydata, FRAG_ATTRIB_WPOS, hwindex++); + InputsRead &= ~FRAG_BIT_WPOS; - if (inst->DstReg.WriteMask & WRITEMASK_Z) { - inst->DstReg.WriteMask = WRITEMASK_W; - } else { - inst->DstReg.WriteMask = 0; - continue; - } - - switch (inst->Opcode) { - case OPCODE_FRC: - case OPCODE_MOV: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - break; - case OPCODE_ADD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MUL: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); - break; - case OPCODE_CMP: - case OPCODE_MAD: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); - inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]); - break; - default: - // Scalar instructions needn't be reswizzled - break; - } - } + /* Anything else */ + if (InputsRead) + rc_error(&c->Base, "Don't know how to handle inputs 0x%x\n", InputsRead); } -void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *fp) + +static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_program_cont *cont, struct r300_fragment_program *fp) { r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_fragment_program_compiler compiler; - compiler.r300 = r300; - compiler.fp = fp; + rc_init(&compiler.Base); + compiler.Base.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE; + compiler.code = &fp->code; - compiler.program = fp->Base; + compiler.state = fp->state; + compiler.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE; + compiler.OutputDepth = FRAG_RESULT_DEPTH; + compiler.OutputColor = FRAG_RESULT_COLOR; + compiler.AllocateHwInputs = &allocate_hw_inputs; - if (RADEON_DEBUG & DEBUG_PIXEL) { + if (compiler.Base.Debug) { fflush(stdout); _mesa_printf("Fragment Program: Initial program:\n"); - _mesa_print_program(compiler.program); + _mesa_print_program(&cont->Base.Base); fflush(stdout); } - insert_WPOS_trailer(&compiler); - - rewriteFog(&compiler); - - rewrite_depth_out(compiler.program); - - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - struct radeon_program_transformation transformations[] = { - { &r500_transform_TEX, &compiler }, - { &radeonTransformALU, 0 }, - { &radeonTransformDeriv, 0 }, - { &radeonTransformTrigScale, 0 } - }; - radeonLocalTransform(ctx, compiler.program, 4, transformations); - } else { - struct radeon_program_transformation transformations[] = { - { &r300_transform_TEX, &compiler }, - { &radeonTransformALU, 0 }, - { &radeonTransformTrigSimple, 0 } - }; - radeonLocalTransform(ctx, compiler.program, 3, transformations); - } + rc_mesa_to_rc_program(&compiler.Base, &cont->Base.Base); - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Fragment Program: After native rewrite:\n"); - _mesa_print_program(compiler.program); - fflush(stdout); - } - - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &r500FPIsNativeSwizzle, - .BuildSwizzle = &r500FPBuildSwizzle - }; - radeonNqssaDce(ctx, compiler.program, &nqssadce); - } else { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &r300FPIsNativeSwizzle, - .BuildSwizzle = &r300FPBuildSwizzle - }; - radeonNqssaDce(ctx, compiler.program, &nqssadce); - } + insert_WPOS_trailer(&compiler, fp); - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Compiler: after NqSSA-DCE:\n"); - _mesa_print_program(compiler.program); - fflush(stdout); - } + rewriteFog(&compiler, fp); - if (!r300->vtbl.BuildFragmentProgramHwCode(&compiler)) - fp->error = GL_TRUE; + r3xx_compile_fragment_program(&compiler); + fp->error = compiler.Base.Error; - fp->translated = GL_TRUE; + fp->InputsRead = compiler.Base.Program.InputsRead; - if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) - r300->vtbl.FragmentProgramDump(&fp->code); + rc_destroy(&compiler.Base); } -struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx) +struct r300_fragment_program *r300SelectAndTranslateFragmentShader(GLcontext *ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_fragment_program_cont *fp_list; @@ -389,11 +253,11 @@ struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx) fp = _mesa_calloc(sizeof(struct r300_fragment_program)); fp->state = state; - fp->translated = GL_FALSE; - fp->Base = _mesa_clone_program(ctx, &ctx->FragmentProgram._Current->Base); fp->next = fp_list->progs; fp_list->progs = fp; + translate_fragment_program(ctx, fp_list, fp); + return r300->selected_fp = fp; } diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.h b/src/mesa/drivers/dri/r300/r300_fragprog_common.h index 5e103ee408..3d64c08cee 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.h +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.h @@ -32,8 +32,6 @@ #include "r300_context.h" -extern void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *fp); - -struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx); +struct r300_fragment_program *r300SelectAndTranslateFragmentShader(GLcontext *ctx); #endif diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index ddabd53992..da801f42e4 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/imports.h" #include "main/macros.h" #include "main/context.h" +#include "main/simple_list.h" #include "swrast/swrast.h" #include "radeon_common.h" @@ -55,8 +56,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_vertprog.h" #include "radeon_reg.h" #include "r300_emit.h" -#include "r300_fragprog.h" #include "r300_context.h" +#include "r300_queryobj.h" #include "vblank.h" @@ -66,6 +67,66 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define CLEARBUFFER_DEPTH 0x2 #define CLEARBUFFER_STENCIL 0x4 +#if 1 + +/** + * Fragment program helper macros + */ + +/* Produce unshifted source selectors */ +#define FP_TMP(idx) (idx) +#define FP_CONST(idx) ((idx) | (1 << 5)) + +/* Produce source/dest selector dword */ +#define FP_SELC_MASK_NO 0 +#define FP_SELC_MASK_X 1 +#define FP_SELC_MASK_Y 2 +#define FP_SELC_MASK_XY 3 +#define FP_SELC_MASK_Z 4 +#define FP_SELC_MASK_XZ 5 +#define FP_SELC_MASK_YZ 6 +#define FP_SELC_MASK_XYZ 7 + +#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \ + (((destidx) << R300_ALU_DSTC_SHIFT) | \ + (FP_SELC_MASK_##regmask << 23) | \ + (FP_SELC_MASK_##outmask << 26) | \ + ((src0) << R300_ALU_SRC0C_SHIFT) | \ + ((src1) << R300_ALU_SRC1C_SHIFT) | \ + ((src2) << R300_ALU_SRC2C_SHIFT)) + +#define FP_SELA_MASK_NO 0 +#define FP_SELA_MASK_W 1 + +#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \ + (((destidx) << R300_ALU_DSTA_SHIFT) | \ + (FP_SELA_MASK_##regmask << 23) | \ + (FP_SELA_MASK_##outmask << 24) | \ + ((src0) << R300_ALU_SRC0A_SHIFT) | \ + ((src1) << R300_ALU_SRC1A_SHIFT) | \ + ((src2) << R300_ALU_SRC2A_SHIFT)) + +/* Produce unshifted argument selectors */ +#define FP_ARGC(source) R300_ALU_ARGC_##source +#define FP_ARGA(source) R300_ALU_ARGA_##source +#define FP_ABS(arg) ((arg) | (1 << 6)) +#define FP_NEG(arg) ((arg) ^ (1 << 5)) + +/* Produce instruction dword */ +#define FP_INSTRC(opcode,arg0,arg1,arg2) \ + (R300_ALU_OUTC_##opcode | \ + ((arg0) << R300_ALU_ARG0C_SHIFT) | \ + ((arg1) << R300_ALU_ARG1C_SHIFT) | \ + ((arg2) << R300_ALU_ARG2C_SHIFT)) + +#define FP_INSTRA(opcode,arg0,arg1,arg2) \ + (R300_ALU_OUTA_##opcode | \ + ((arg0) << R300_ALU_ARG0A_SHIFT) | \ + ((arg1) << R300_ALU_ARG1A_SHIFT) | \ + ((arg2) << R300_ALU_ARG2A_SHIFT)) + +#endif + static void r300EmitClearState(GLcontext * ctx); static void r300ClearBuffer(r300ContextPtr r300, int flags, @@ -109,18 +170,21 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, } #if 1 if (flags & (CLEARBUFFER_DEPTH | CLEARBUFFER_STENCIL)) { - assert(rrbd != 0); - cbpitch = (rrbd->pitch / rrbd->cpp); + uint32_t zbpitch = (rrbd->pitch / rrbd->cpp); if (rrbd->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){ - cbpitch |= R300_DEPTHMACROTILE_ENABLE; + zbpitch |= R300_DEPTHMACROTILE_ENABLE; } if (rrbd->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){ - cbpitch |= R300_DEPTHMICROTILE_TILED; + zbpitch |= R300_DEPTHMICROTILE_TILED; } BEGIN_BATCH_NO_AUTOSTATE(6); OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1); OUT_BATCH_RELOC(0, rrbd->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); - OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, cbpitch); + OUT_BATCH_REGSEQ(R300_ZB_DEPTHPITCH, 1); + if (!r300->radeon.radeonScreen->kernel_mm) + OUT_BATCH(zbpitch); + else + OUT_BATCH_RELOC(zbpitch, rrbd->bo, zbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); END_BATCH(); } #endif @@ -508,12 +572,12 @@ static void r300EmitClearState(GLcontext * ctx) 0, 0xf, PVS_DST_REG_OUT); vpu.cmd[2] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, - PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + PVS_SRC_REG_INPUT, NEGATE_NONE); vpu.cmd[3] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, - PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + PVS_SRC_REG_INPUT, NEGATE_NONE); vpu.cmd[4] = 0x0; vpu.cmd[5] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, @@ -521,13 +585,12 @@ static void r300EmitClearState(GLcontext * ctx) vpu.cmd[6] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, - - VSF_FLAG_NONE); + NEGATE_NONE); vpu.cmd[7] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, - PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + PVS_SRC_REG_INPUT, NEGATE_NONE); vpu.cmd[8] = 0x0; r300->vap_flush_needed = GL_TRUE; @@ -546,7 +609,7 @@ static int r300KernelClear(GLcontext *ctx, GLuint flags) /* Make sure it fits there. */ radeon_cs_space_reset_bos(r300->radeon.cmdbuf.cs); - + if (flags & BUFFER_BIT_COLOR0) { rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_COLOR0); radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, @@ -692,10 +755,19 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask) } } +static void r300Flush(GLcontext *ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + radeonFlush(ctx); + + make_empty_list(&r300->query.not_flushed_head); +} + void r300InitIoctlFuncs(struct dd_function_table *functions) { functions->Clear = r300Clear; functions->Finish = radeonFinish; - functions->Flush = radeonFlush; + functions->Flush = r300Flush; } diff --git a/src/mesa/drivers/dri/r300/r300_queryobj.c b/src/mesa/drivers/dri/r300/r300_queryobj.c new file mode 100644 index 0000000000..df1fb32ee7 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_queryobj.c @@ -0,0 +1,250 @@ +/* + * Copyright © 2008-2009 Maciej Cencora <m.cencora@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Maciej Cencora <m.cencora@gmail.com> + * + */ + +#include "r300_queryobj.h" +#include "r300_emit.h" + +#include "main/imports.h" +#include "main/simple_list.h" + +#define DDEBUG 0 + +#define PAGE_SIZE 4096 + +static void r300QueryGetResult(GLcontext *ctx, struct gl_query_object *q) +{ + struct r300_query_object *query = (struct r300_query_object *)q; + uint32_t *result; + int i; + + if (DDEBUG) fprintf(stderr, "%s: query id %d, result %d\n", __FUNCTION__, query->Base.Id, (int) query->Base.Result); + + radeon_bo_map(query->bo, GL_FALSE); + + result = query->bo->ptr; + + query->Base.Result = 0; + for (i = 0; i < query->curr_offset/sizeof(uint32_t); ++i) { + query->Base.Result += result[i]; + if (DDEBUG) fprintf(stderr, "result[%d] = %d\n", i, result[i]); + } + + radeon_bo_unmap(query->bo); +} + +static struct gl_query_object * r300NewQueryObject(GLcontext *ctx, GLuint id) +{ + struct r300_query_object *query; + + query = _mesa_calloc(sizeof(struct r300_query_object)); + + query->Base.Id = id; + query->Base.Result = 0; + query->Base.Active = GL_FALSE; + query->Base.Ready = GL_TRUE; + + if (DDEBUG) fprintf(stderr, "%s: query id %d\n", __FUNCTION__, query->Base.Id); + + return &query->Base; +} + +static void r300DeleteQuery(GLcontext *ctx, struct gl_query_object *q) +{ + struct r300_query_object *query = (struct r300_query_object *)q; + + if (DDEBUG) fprintf(stderr, "%s: query id %d\n", __FUNCTION__, q->Id); + + if (query->bo) { + radeon_bo_unref(query->bo); + } + + _mesa_free(query); +} + +static void r300BeginQuery(GLcontext *ctx, struct gl_query_object *q) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_query_object *query = (struct r300_query_object *)q; + + if (DDEBUG) fprintf(stderr, "%s: query id %d\n", __FUNCTION__, q->Id); + + assert(r300->query.current == NULL); + + if (!query->bo) { + query->bo = radeon_bo_open(r300->radeon.radeonScreen->bom, 0, PAGE_SIZE, PAGE_SIZE, RADEON_GEM_DOMAIN_GTT, 0); + } + query->curr_offset = 0; + + r300->query.current = query; + insert_at_tail(&r300->query.not_flushed_head, query); +} + +static void r300EndQuery(GLcontext *ctx, struct gl_query_object *q) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + if (DDEBUG) fprintf(stderr, "%s: query id %d\n", __FUNCTION__, q->Id); + + r300EmitQueryEnd(ctx); + + r300->query.current = NULL; +} + +static void r300WaitQuery(GLcontext *ctx, struct gl_query_object *q) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_query_object *tmp, *query = (struct r300_query_object *)q; + + /* If the cmdbuf with packets for this query hasn't been flushed yet, do it now */ + { + GLboolean found = GL_FALSE; + foreach(tmp, &r300->query.not_flushed_head) { + if (tmp == query) { + found = GL_TRUE; + break; + } + } + + if (found) + ctx->Driver.Flush(ctx); + } + + if (DDEBUG) fprintf(stderr, "%s: query id %d, bo %p, offset %d\n", __FUNCTION__, q->Id, query->bo, query->curr_offset); + + r300QueryGetResult(ctx, q); + + query->Base.Ready = GL_TRUE; +} + + +/** + * TODO: + * should check if bo is idle, bo there's no interface to do it + * just wait for result now + */ +static void r300CheckQuery(GLcontext *ctx, struct gl_query_object *q) +{ + if (DDEBUG) fprintf(stderr, "%s: query id %d\n", __FUNCTION__, q->Id); + + r300WaitQuery(ctx, q); +} + +void r300EmitQueryBegin(GLcontext *ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_query_object *query = r300->query.current; + BATCH_LOCALS(&r300->radeon); + + if (!query || query->emitted_begin) + return; + + if (DDEBUG) fprintf(stderr, "%s: query id %d\n", __FUNCTION__, query->Base.Id); + + if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); + OUT_BATCH_REGVAL(R300_ZB_ZPASS_DATA, 0); + END_BATCH(); + } else { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL); + OUT_BATCH_REGVAL(R300_ZB_ZPASS_DATA, 0); + END_BATCH(); + } + + query->emitted_begin = GL_TRUE; +} + +void r300EmitQueryEnd(GLcontext *ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_query_object *query = r300->query.current; + BATCH_LOCALS(&r300->radeon); + + if (!query || !query->emitted_begin) + return; + + if (DDEBUG) fprintf(stderr, "%s: query id %d, bo %p, offset %d\n", __FUNCTION__, query->Base.Id, query->bo, query->curr_offset); + + radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, + query->bo, + 0, RADEON_GEM_DOMAIN_GTT); + + if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) { + BEGIN_BATCH_NO_AUTOSTATE(14); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); + END_BATCH(); + } else { + BEGIN_BATCH_NO_AUTOSTATE(3 * 2 *r300->num_z_pipes + 2); + switch (r300->num_z_pipes) { + case 4: + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_3); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset+3*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + case 3: + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_2); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset+2*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + case 2: + if (r300->radeon.radeonScreen->chip_family <= CHIP_FAMILY_RV380) { + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_3); + } else { + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_1); + } + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset+1*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + case 1: + default: + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_0); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + break; + } + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL); + END_BATCH(); + } + + query->curr_offset += r300->num_z_pipes * sizeof(uint32_t); + assert(query->curr_offset < PAGE_SIZE); + query->emitted_begin = GL_FALSE; +} + +void r300InitQueryObjFunctions(struct dd_function_table *functions) +{ + functions->NewQueryObject = r300NewQueryObject; + functions->DeleteQuery = r300DeleteQuery; + functions->BeginQuery = r300BeginQuery; + functions->EndQuery = r300EndQuery; + functions->CheckQuery = r300CheckQuery; + functions->WaitQuery = r300WaitQuery; +} diff --git a/src/mesa/drivers/dri/r300/r300_queryobj.h b/src/mesa/drivers/dri/r300/r300_queryobj.h new file mode 100644 index 0000000000..f301f0b113 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_queryobj.h @@ -0,0 +1,34 @@ +/* + * Copyright © 2008 Maciej Cencora <m.cencora@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Maciej Cencora <m.cencora@gmail.com> + * + */ + +#include "main/imports.h" +#include "r300_context.h" + +extern void r300EmitQueryBegin(GLcontext *ctx); +extern void r300EmitQueryEnd(GLcontext *ctx); + +extern void r300InitQueryObjFunctions(struct dd_function_table *functions); diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 357c600af9..39b4b61a10 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -1128,6 +1128,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* SU Depth Offset value */ #define R300_SU_DEPTH_OFFSET 0x42c4 +#define R300_SU_REG_DEST 0x42c8 +# define R300_RASTER_PIPE_SELECT_0 (1 << 0) +# define R300_RASTER_PIPE_SELECT_1 (1 << 1) +# define R300_RASTER_PIPE_SELECT_2 (1 << 2) +# define R300_RASTER_PIPE_SELECT_3 (1 << 3) +# define R300_RASTER_PIPE_SELECT_ALL 0xf + /* BEGIN: Rasterization / Interpolators - many guesses */ @@ -2014,6 +2021,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R500_FG_ALPHA_VALUE 0x4be0 # define R500_FG_ALPHA_VALUE_MASK 0x0000ffff +#define RV530_FG_ZBREG_DEST 0x4be8 +# define RV530_FG_ZBREG_DEST_PIPE_SELECT_0 (1 << 0) +# define RV530_FG_ZBREG_DEST_PIPE_SELECT_1 (1 << 1) +# define RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL (3 << 0) + /* gap */ /* Fragment program parameters in 7.16 floating point */ @@ -2667,6 +2679,24 @@ enum { PVS_SRC_ADDR_MODE_1_SHIFT = 32, }; + +#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \ + (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \ + | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \ + | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \ + | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \ + | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \ + | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT)) + +#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \ + (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \ + | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \ + | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \ + | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \ + | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \ + | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \ + | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT)) + /*\}*/ /* BEGIN: Packet 3 commands */ diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 08d67b73ed..45330cda3c 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -64,6 +64,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" #include "vbo/vbo.h" +#include "vbo/vbo_split.h" #include "tnl/tnl.h" #include "tnl/t_vp_build.h" #include "radeon_reg.h" @@ -75,6 +76,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_tex.h" #include "r300_emit.h" #include "r300_fragprog_common.h" +#include "r300_queryobj.h" #include "r300_swtcl.h" /** @@ -172,64 +174,45 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) return num_verts - verts_off; } -static void r300EmitElts(GLcontext * ctx, unsigned long n_elts) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - void *out; - GLuint size; - - size = ((rmesa->ind_buf.is_32bit ? 4 : 2) * n_elts + 3) & ~3; - - radeonAllocDmaRegion(&rmesa->radeon, &rmesa->radeon.tcl.elt_dma_bo, - &rmesa->radeon.tcl.elt_dma_offset, size, 4); - radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1); - out = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset; - memcpy(out, rmesa->ind_buf.ptr, size); - radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo); -} - -static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type) +static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type, int offset) { BATCH_LOCALS(&rmesa->radeon); + int size; + + /* offset is in indices */ + BEGIN_BATCH(10); + OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0); + if (rmesa->ind_buf.is_32bit) { + /* convert to bytes */ + offset *= 4; + size = vertex_count; + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | + (vertex_count << 16) | type | + R300_VAP_VF_CNTL__INDEX_SIZE_32bit); + } else { + /* convert to bytes */ + offset *= 2; + size = (vertex_count + 1) >> 1; + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | + (vertex_count << 16) | type); + } - r300_emit_scissor(rmesa->radeon.glCtx); - if (vertex_count > 0) { - int size; - - BEGIN_BATCH(10); - OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0); - if (rmesa->ind_buf.is_32bit) { - size = vertex_count; - OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | - ((vertex_count + 0) << 16) | type | - R300_VAP_VF_CNTL__INDEX_SIZE_32bit); - } else { - size = (vertex_count + 1) >> 1; - OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | - ((vertex_count + 0) << 16) | type); - } - - if (!rmesa->radeon.radeonScreen->kernel_mm) { - OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); - OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | - (R300_VAP_PORT_IDX0 >> 2)); - OUT_BATCH_RELOC(rmesa->radeon.tcl.elt_dma_offset, - rmesa->radeon.tcl.elt_dma_bo, - rmesa->radeon.tcl.elt_dma_offset, - RADEON_GEM_DOMAIN_GTT, 0, 0); - OUT_BATCH(size); - } else { - OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); - OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | - (R300_VAP_PORT_IDX0 >> 2)); - OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset); - OUT_BATCH(size); - radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, - rmesa->radeon.tcl.elt_dma_bo, - RADEON_GEM_DOMAIN_GTT, 0, 0); - } - END_BATCH(); + if (!rmesa->radeon.radeonScreen->kernel_mm) { + OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); + OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | + (R300_VAP_PORT_IDX0 >> 2)); + OUT_BATCH_RELOC(0, rmesa->ind_buf.bo, rmesa->ind_buf.bo_offset + offset, RADEON_GEM_DOMAIN_GTT, 0, 0); + OUT_BATCH(size); + } else { + OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); + OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | + (R300_VAP_PORT_IDX0 >> 2)); + OUT_BATCH(rmesa->ind_buf.bo_offset + offset); + OUT_BATCH(size); + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->ind_buf.bo, RADEON_GEM_DOMAIN_GTT, 0, 0); } + END_BATCH(); } static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) @@ -340,7 +323,7 @@ static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type) { BATCH_LOCALS(&rmesa->radeon); - r300_emit_scissor(rmesa->radeon.glCtx); + r300_emit_scissor(rmesa->radeon.glCtx); BEGIN_BATCH(3); OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0); OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type); @@ -365,8 +348,16 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) */ rcommonEnsureCmdBufSpace(&rmesa->radeon, 128, __FUNCTION__); - if (rmesa->ind_buf.ptr) { - r300EmitElts(ctx, num_verts); + if (rmesa->ind_buf.bo) { + GLuint first, incr, offset = 0; + + if (!split_prim_inplace(prim & PRIM_MODE_MASK, &first, &incr) && + num_verts > 65500) { + WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim); + return; + } + + r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, 0); if (rmesa->radeon.radeonScreen->kernel_mm) { BEGIN_BATCH_NO_AUTOSTATE(2); @@ -374,45 +365,56 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) OUT_BATCH(rmesa->radeon.tcl.aos[0].count); END_BATCH(); } - r300FireEB(rmesa, num_verts, type); - } else { - r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start); - r300FireAOS(rmesa, num_verts, type); - } - COMMIT_BATCH(); -} - -static void r300RunRender(GLcontext * ctx, struct tnl_pipeline_stage *stage) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - int i; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - - if (RADEON_DEBUG & DEBUG_PRIMS) - fprintf(stderr, "%s\n", __FUNCTION__); - r300UpdateShaders(rmesa); - r300EmitArrays(ctx); + r300_emit_scissor(rmesa->radeon.glCtx); + while (num_verts > 0) { + int nr; + int align; + + nr = MIN2(num_verts, 65535); + nr -= (nr - first) % incr; + + /* get alignment for IB correct */ + if (nr != num_verts) { + do { + align = nr * (rmesa->ind_buf.is_32bit ? 4 : 2); + if (align % 4) + nr -= incr; + } while(align % 4); + if (nr <= 0) { + WARN_ONCE("did the impossible happen? we never aligned nr to dword\n"); + return; + } + + } + r300FireEB(rmesa, nr, type, offset); - r300UpdateShaderStates(rmesa); + num_verts -= nr; + offset += nr; + } - r300EmitCacheFlush(rmesa); - radeonEmitState(&rmesa->radeon); + } else { + GLuint first, incr, offset = 0; - for (i = 0; i < vb->PrimitiveCount; i++) { - GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); - GLuint start = vb->Primitive[i].start; - GLuint end = vb->Primitive[i].start + vb->Primitive[i].count; - r300RunRenderPrimitive(ctx, start, end, prim); + if (!split_prim_inplace(prim & PRIM_MODE_MASK, &first, &incr) && + num_verts > 65500) { + WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim); + return; + } + r300_emit_scissor(rmesa->radeon.glCtx); + while (num_verts > 0) { + int nr; + nr = MIN2(num_verts, 65535); + nr -= (nr - first) % incr; + r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start + offset); + r300FireAOS(rmesa, nr, type); + num_verts -= nr; + offset += nr; + } } - - r300EmitCacheFlush(rmesa); - - radeonReleaseArrays(ctx, ~0); + COMMIT_BATCH(); } - static const char *getFallbackString(uint32_t bit) { switch (bit) { @@ -449,7 +451,7 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode) r300ContextPtr rmesa = R300_CONTEXT(ctx); uint32_t old_fallback = rmesa->fallback; static uint32_t fallback_warn = 0; - + if (mode) { if ((fallback_warn & bit) == 0) { if (RADEON_DEBUG & DEBUG_FALLBACKS) @@ -470,7 +472,7 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode) /* update only if we change from no raster fallbacks to some raster fallbacks */ if (((old_fallback & R300_RASTER_FALLBACK_MASK) == 0) && ((bit & R300_RASTER_FALLBACK_MASK) > 0)) { - + radeon_firevertices(&rmesa->radeon); rmesa->radeon.swtcl.RenderIndex = ~0; _swsetup_Wakeup( ctx ); @@ -489,7 +491,7 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode) /* update only if we have disabled all raster fallbacks */ if ((old_fallback & R300_RASTER_FALLBACK_MASK) == bit) { _swrast_flush( ctx ); - + tnl->Driver.Render.Start = r300RenderStart; tnl->Driver.Render.Finish = r300RenderFinish; tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive; @@ -497,38 +499,10 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode) tnl->Driver.Render.BuildVertices = _tnl_build_vertices; tnl->Driver.Render.CopyPV = _tnl_copy_pv; tnl->Driver.Render.Interp = _tnl_interp; - + _tnl_invalidate_vertex_state( ctx, ~0 ); _tnl_invalidate_vertices( ctx, ~0 ); } } - -} -static GLboolean r300RunNonTCLRender(GLcontext * ctx, - struct tnl_pipeline_stage *stage) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - - if (RADEON_DEBUG & DEBUG_PRIMS) - fprintf(stderr, "%s\n", __FUNCTION__); - - if (rmesa->fallback & R300_RASTER_FALLBACK_MASK) - return GL_TRUE; - - if (rmesa->options.hw_tcl_enabled == GL_FALSE) - return GL_TRUE; - - r300RunRender(ctx, stage); - - return GL_FALSE; } - -const struct tnl_pipeline_stage _r300_render_stage = { - "r300 Hardware Rasterization", - NULL, - NULL, - NULL, - NULL, - r300RunNonTCLRender -}; diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c index 62228a3786..a4f9db13ec 100644 --- a/src/mesa/drivers/dri/r300/r300_shader.c +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -38,7 +38,7 @@ static void freeFragProgCache(GLcontext *ctx, struct r300_fragment_program_cont while (fp) { tmp = fp->next; - _mesa_reference_program(ctx, &fp->Base, NULL); + rc_constants_destroy(&fp->code.constants); _mesa_free(fp); fp = tmp; } @@ -50,6 +50,7 @@ static void freeVertProgCache(GLcontext *ctx, struct r300_vertex_program_cont *c while (vp) { tmp = vp->next; + rc_constants_destroy(&vp->code.constants); _mesa_reference_vertprog(ctx, &vp->Base, NULL); _mesa_free(vp); vp = tmp; @@ -122,15 +123,11 @@ static GLboolean r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) { if (target == GL_FRAGMENT_PROGRAM_ARB) { - struct r300_fragment_program *fp = r300SelectFragmentShader(ctx); - if (!fp->translated) - r300TranslateFragmentShader(ctx, fp); + struct r300_fragment_program *fp = r300SelectAndTranslateFragmentShader(ctx); return !fp->error; } else { - struct r300_vertex_program *vp = r300SelectVertexShader(ctx); - if (!vp->translated) - r300TranslateVertexShader(vp); + struct r300_vertex_program *vp = r300SelectAndTranslateVertexShader(ctx); return !vp->error; } diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 12fbf281d9..6081c33786 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -62,8 +62,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_emit.h" #include "r300_tex.h" #include "r300_fragprog_common.h" -#include "r300_fragprog.h" -#include "r500_fragprog.h" #include "r300_render.h" #include "r300_vertprog.h" @@ -458,7 +456,7 @@ static GLboolean current_fragment_program_writes_depth(GLcontext* ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - return ctx->FragmentProgram._Current && r300->selected_fp->writes_depth; + return ctx->FragmentProgram._Current && r300->selected_fp->code.writes_depth; } static void r300SetEarlyZState(GLcontext * ctx) @@ -473,6 +471,8 @@ static void r300SetEarlyZState(GLcontext * ctx) topZ = R300_ZTOP_DISABLE; else if (ctx->FragmentProgram._Current && ctx->FragmentProgram._Current->UsesKill) topZ = R300_ZTOP_DISABLE; + else if (r300->query.current) + topZ = R300_ZTOP_DISABLE; if (topZ != r300->hw.zstencil_format.cmd[2]) { /* Note: This completely reemits the stencil format. @@ -1046,53 +1046,6 @@ void r300UpdateViewportOffset(GLcontext * ctx) radeonUpdateScissor(ctx); } -static void -r300FetchStateParameter(GLcontext * ctx, - const gl_state_index state[STATE_LENGTH], - GLfloat * value) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - - switch (state[0]) { - case STATE_INTERNAL: - switch (state[1]) { - case STATE_R300_WINDOW_DIMENSION: { - __DRIdrawablePrivate * drawable = radeon_get_drawable(&r300->radeon); - value[0] = drawable->w * 0.5f; /* width*0.5 */ - value[1] = drawable->h * 0.5f; /* height*0.5 */ - value[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */ - value[3] = 1.0F; /* not used */ - break; - } - - case STATE_R300_TEXRECT_FACTOR:{ - struct gl_texture_object *t = - ctx->Texture.Unit[state[2]].CurrentTex[TEXTURE_RECT_INDEX]; - - if (t && t->Image[0][t->BaseLevel]) { - struct gl_texture_image *image = - t->Image[0][t->BaseLevel]; - value[0] = 1.0 / image->Width2; - value[1] = 1.0 / image->Height2; - } else { - value[0] = 1.0; - value[1] = 1.0; - } - value[2] = 1.0; - value[3] = 1.0; - break; - } - - default: - break; - } - break; - - default: - break; - } -} - /** * Update R300's own internal state parameters. * For now just STATE_R300_WINDOW_DIMENSION @@ -1101,7 +1054,6 @@ static void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) { r300ContextPtr rmesa = R300_CONTEXT(ctx); struct gl_program_parameter_list *paramList; - GLuint i; if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))) return; @@ -1109,21 +1061,12 @@ static void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) if (!ctx->FragmentProgram._Current || !rmesa->selected_fp) return; - paramList = rmesa->selected_fp->Base->Parameters; + paramList = ctx->FragmentProgram._Current->Base.Parameters; if (!paramList) return; _mesa_load_state_parameters(ctx, paramList); - - for (i = 0; i < paramList->NumParameters; i++) { - if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) { - r300FetchStateParameter(ctx, - paramList->Parameters[i]. - StateIndexes, - paramList->ParameterValues[i]); - } - } } /* ============================================================= @@ -1230,7 +1173,7 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) { r300ContextPtr r300 = R300_CONTEXT(ctx); int i; - struct r300_fragment_program_code *code = &r300->selected_fp->code.r300; + struct r300_fragment_program_code *code = &r300->selected_fp->code.code.r300; R300_STATECHANGE(r300, fpt); @@ -1272,7 +1215,7 @@ static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) { r300ContextPtr r300 = R300_CONTEXT(ctx); int i; - struct r500_fragment_program_code *code = &r300->selected_fp->code.r500; + struct r500_fragment_program_code *code = &r300->selected_fp->code.code.r500; /* find all the texture instructions and relocate the texture units */ for (i = 0; i < code->inst_end + 1; i++) { @@ -1312,6 +1255,7 @@ static GLuint translate_lod_bias(GLfloat bias) return (((GLuint)b) << R300_LOD_BIAS_SHIFT) & R300_LOD_BIAS_MASK; } + static void r300SetupTextures(GLcontext * ctx) { int i, mtu; @@ -1404,6 +1348,28 @@ static void r300SetupTextures(GLcontext * ctx) } } + /* R3xx and R4xx chips require that the texture unit corresponding to + * KIL instructions is really enabled. + * + * We do some fakery here and in the state atom emit logic to enable + * the texture without tripping up the CS checker in the kernel. + */ + if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { + if (ctx->FragmentProgram._Current->UsesKill && last_hw_tmu < 0) { + last_hw_tmu++; + + r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1; + + r300->hw.tex.border_color.cmd[R300_TEX_VALUE_0] = 0; + r300->hw.tex.chroma_key.cmd[R300_TEX_VALUE_0] = 0; + r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0; + r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0] = 0; + r300->hw.tex.size.cmd[R300_TEX_VALUE_0] = 0; /* 1x1 texture */ + r300->hw.tex.format.cmd[R300_TEX_VALUE_0] = 0; /* A8 format */ + r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0] = 0; + } + } + r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, last_hw_tmu + 1); r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] = @@ -1421,16 +1387,6 @@ static void r300SetupTextures(GLcontext * ctx) r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, last_hw_tmu + 1); - if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { - if (ctx->FragmentProgram._Current->UsesKill && last_hw_tmu < 0) { - // The KILL operation requires the first texture unit - // to be enabled. - r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1; - r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0; - r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = - cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 1); - } - } r300->vtbl.SetupFragmentShaderTextures(ctx, tmu_mappings); if (RADEON_DEBUG & DEBUG_STATE) @@ -1460,11 +1416,11 @@ static void r300SetupRSUnit(GLcontext * ctx) hw_tcl_on = r300->options.hw_tcl_enabled; if (hw_tcl_on) - OutputsWritten.vp_outputs = r300->selected_vp->Base->Base.OutputsWritten; + OutputsWritten.vp_outputs = r300->selected_vp->code.OutputsWritten; else RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset); - InputsRead = r300->selected_fp->Base->InputsRead; + InputsRead = r300->selected_fp->InputsRead; R300_STATECHANGE(r300, ri); R300_STATECHANGE(r300, rc); @@ -1554,11 +1510,11 @@ static void r500SetupRSUnit(GLcontext * ctx) hw_tcl_on = r300->options.hw_tcl_enabled; if (hw_tcl_on) - OutputsWritten.vp_outputs = r300->selected_vp->Base->Base.OutputsWritten; + OutputsWritten.vp_outputs = r300->selected_vp->code.OutputsWritten; else RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset); - InputsRead = r300->selected_fp->Base->InputsRead; + InputsRead = r300->selected_fp->InputsRead; R300_STATECHANGE(r300, ri); R300_STATECHANGE(r300, rc); @@ -1997,9 +1953,7 @@ void r300UpdateShaders(r300ContextPtr rmesa) { struct r300_fragment_program *fp; - fp = r300SelectFragmentShader(ctx); - if (!fp->translated) - r300TranslateFragmentShader(ctx, fp); + fp = r300SelectAndTranslateFragmentShader(ctx); r300SwitchFallback(ctx, R300_FALLBACK_FRAGMENT_PROGRAM, fp->error); } @@ -2024,9 +1978,7 @@ void r300UpdateShaders(r300ContextPtr rmesa) } } - vp = r300SelectVertexShader(ctx); - if (!vp->translated) - r300TranslateVertexShader(vp); + vp = r300SelectAndTranslateVertexShader(ctx); r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, vp->error); } @@ -2035,24 +1987,61 @@ void r300UpdateShaders(r300ContextPtr rmesa) rmesa->radeon.NewGLState = 0; } -static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, - struct gl_program *program, struct prog_src_register srcreg) +static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, GLuint index, GLfloat * buffer) { static const GLfloat dummy[4] = { 0, 0, 0, 0 }; + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct rc_constant * rcc = &rmesa->selected_fp->code.constants.Constants[index]; + + switch(rcc->Type) { + case RC_CONSTANT_EXTERNAL: + return ctx->FragmentProgram._Current->Base.Parameters->ParameterValues[rcc->u.External]; + case RC_CONSTANT_IMMEDIATE: + return rcc->u.Immediate; + case RC_CONSTANT_STATE: + switch(rcc->u.State[0]) { + case RC_STATE_SHADOW_AMBIENT: { + const int unit = (int) rcc->u.State[1]; + const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current; + if (texObj) { + buffer[0] = + buffer[1] = + buffer[2] = + buffer[3] = texObj->CompareFailValue; + } + return buffer; + } - switch(srcreg.File) { - case PROGRAM_LOCAL_PARAM: - return program->LocalParams[srcreg.Index]; - case PROGRAM_ENV_PARAM: - return ctx->FragmentProgram.Parameters[srcreg.Index]; - case PROGRAM_STATE_VAR: - case PROGRAM_NAMED_PARAM: - case PROGRAM_CONSTANT: - return program->Parameters->ParameterValues[srcreg.Index]; - default: - _mesa_problem(ctx, "get_fragmentprogram_constant: Unknown\n"); - return dummy; + case RC_STATE_R300_WINDOW_DIMENSION: { + __DRIdrawablePrivate * drawable = radeon_get_drawable(&rmesa->radeon); + buffer[0] = drawable->w * 0.5f; /* width*0.5 */ + buffer[1] = drawable->h * 0.5f; /* height*0.5 */ + buffer[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */ + buffer[3] = 1.0F; /* not used */ + return buffer; + } + + case RC_STATE_R300_TEXRECT_FACTOR: { + struct gl_texture_object *t = + ctx->Texture.Unit[rcc->u.State[1]].CurrentTex[TEXTURE_RECT_INDEX]; + + if (t && t->Image[0][t->BaseLevel]) { + struct gl_texture_image *image = + t->Image[0][t->BaseLevel]; + buffer[0] = 1.0 / image->Width2; + buffer[1] = 1.0 / image->Height2; + } else { + buffer[0] = 1.0; + buffer[1] = 1.0; + } + buffer[2] = 1.0; + buffer[3] = 1.0; + return buffer; + } + } } + + return dummy; } @@ -2061,9 +2050,9 @@ static void r300SetupPixelShader(GLcontext *ctx) r300ContextPtr rmesa = R300_CONTEXT(ctx); struct r300_fragment_program *fp = rmesa->selected_fp; struct r300_fragment_program_code *code; - int i, k; + int i; - code = &fp->code.r300; + code = &fp->code.code.r300; R300_STATECHANGE(rmesa, fpi[0]); R300_STATECHANGE(rmesa, fpi[1]); @@ -2074,38 +2063,24 @@ static void r300SetupPixelShader(GLcontext *ctx) rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, code->alu.length); rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, code->alu.length); for (i = 0; i < code->alu.length; i++) { - rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst0; - rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst1; - rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst2; - rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst3; + rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].rgb_inst; + rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].rgb_addr; + rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].alpha_inst; + rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].alpha_addr; } R300_STATECHANGE(rmesa, fp); - rmesa->hw.fp.cmd[R300_FP_CNTL0] = code->cur_node | (code->first_node_has_tex << 3); - rmesa->hw.fp.cmd[R300_FP_CNTL1] = code->max_temp_idx; - rmesa->hw.fp.cmd[R300_FP_CNTL2] = - (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) | - ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) | - (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) | - ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT); - /* I just want to say, the way these nodes are stored.. weird.. */ - for (i = 0, k = (4 - (code->cur_node + 1)); i < 4; i++, k++) { - if (i < (code->cur_node + 1)) { - rmesa->hw.fp.cmd[R300_FP_NODE0 + k] = - (code->node[i].alu_offset << R300_ALU_START_SHIFT) | - (code->node[i].alu_end << R300_ALU_SIZE_SHIFT) | - (code->node[i].tex_offset << R300_TEX_START_SHIFT) | - (code->node[i].tex_end << R300_TEX_SIZE_SHIFT) | - code->node[i].flags; - } else { - rmesa->hw.fp.cmd[R300_FP_NODE0 + (3 - i)] = 0; - } - } + rmesa->hw.fp.cmd[R300_FP_CNTL0] = code->config; + rmesa->hw.fp.cmd[R300_FP_CNTL1] = code->pixsize; + rmesa->hw.fp.cmd[R300_FP_CNTL2] = code->code_offset; + for (i = 0; i < 4; i++) + rmesa->hw.fp.cmd[R300_FP_NODE0 + i] = code->code_addr[i]; R300_STATECHANGE(rmesa, fpp); - rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, code->const_nr * 4); - for (i = 0; i < code->const_nr; i++) { - const GLfloat *constant = get_fragmentprogram_constant(ctx, fp->Base, code->constant[i]); + rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, fp->code.constants.Count * 4); + for (i = 0; i < fp->code.constants.Count; i++) { + GLfloat buffer[4]; + const GLfloat *constant = get_fragmentprogram_constant(ctx, i, buffer); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]); @@ -2137,19 +2112,19 @@ static void r500SetupPixelShader(GLcontext *ctx) ((drm_r300_cmd_header_t *) rmesa->hw.r500fp.cmd)->r500fp.count = 0; ((drm_r300_cmd_header_t *) rmesa->hw.r500fp_const.cmd)->r500fp.count = 0; - code = &fp->code.r500; + code = &fp->code.code.r500; R300_STATECHANGE(rmesa, fp); rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = code->max_temp_idx; rmesa->hw.fp.cmd[R500_FP_CODE_ADDR] = - R500_US_CODE_START_ADDR(code->inst_offset) | + R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(code->inst_end); rmesa->hw.fp.cmd[R500_FP_CODE_RANGE] = - R500_US_CODE_RANGE_ADDR(code->inst_offset) | + R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end); rmesa->hw.fp.cmd[R500_FP_CODE_OFFSET] = - R500_US_CODE_OFFSET_ADDR(0); /* FIXME when we add flow control */ + R500_US_CODE_OFFSET_ADDR(0); R300_STATECHANGE(rmesa, r500fp); /* Emit our shader... */ @@ -2165,14 +2140,15 @@ static void r500SetupPixelShader(GLcontext *ctx) bump_r500fp_count(rmesa->hw.r500fp.cmd, (code->inst_end + 1) * 6); R300_STATECHANGE(rmesa, r500fp_const); - for (i = 0; i < code->const_nr; i++) { - const GLfloat *constant = get_fragmentprogram_constant(ctx, fp->Base, code->constant[i]); + for (i = 0; i < fp->code.constants.Count; i++) { + GLfloat buffer[4]; + const GLfloat *constant = get_fragmentprogram_constant(ctx, i, buffer); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(constant[3]); } - bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, code->const_nr * 4); + bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, fp->code.constants.Count * 4); } void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten) @@ -2273,6 +2249,7 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state) _mesa_update_draw_buffer_bounds(ctx); R300_STATECHANGE(r300, cb); + R300_STATECHANGE(r300, zb); } r300->radeon.NewGLState |= new_state; @@ -2345,13 +2322,9 @@ void r300InitShaderFunctions(r300ContextPtr r300) r300->vtbl.SetupRSUnit = r500SetupRSUnit; r300->vtbl.SetupPixelShader = r500SetupPixelShader; r300->vtbl.SetupFragmentShaderTextures = r500SetupFragmentShaderTextures; - r300->vtbl.BuildFragmentProgramHwCode = r500BuildFragmentProgramHwCode; - r300->vtbl.FragmentProgramDump = r500FragmentProgramDump; } else { r300->vtbl.SetupRSUnit = r300SetupRSUnit; r300->vtbl.SetupPixelShader = r300SetupPixelShader; r300->vtbl.SetupFragmentShaderTextures = r300SetupFragmentShaderTextures; - r300->vtbl.BuildFragmentProgramHwCode = r300BuildFragmentProgramHwCode; - r300->vtbl.FragmentProgramDump = r300FragmentProgramDump; } } diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index 56ed519cf4..a634cb5192 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -76,7 +76,7 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_ GLuint InputsRead = 0; GLuint OutputsWritten = 0; int num_attrs = 0; - GLuint fp_reads = rmesa->selected_fp->Base->InputsRead; + GLuint fp_reads = rmesa->selected_fp->InputsRead; struct vertex_attribute *attrs = rmesa->vbuf.attribs; rmesa->swtcl.coloroffset = rmesa->swtcl.specoffset = 0; diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index de32013032..c5edbd0052 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -40,927 +40,124 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "shader/prog_statevars.h" #include "tnl/tnl.h" -#include "radeon_nqssadce.h" +#include "compiler/radeon_compiler.h" +#include "compiler/radeon_nqssadce.h" #include "r300_context.h" #include "r300_state.h" -/* TODO: Get rid of t_src_class call */ -#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ - ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \ - t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \ - (t_src_class(a.File) == PVS_SRC_REG_INPUT && \ - t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \ - -/* - * Take an already-setup and valid source then swizzle it appropriately to - * obtain a constant ZERO or ONE source. +/** + * Write parameter array for the given vertex program into dst. + * Return the total number of components written. */ -#define __CONST(x, y) \ - (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \ - t_swizzle(y), \ - t_swizzle(y), \ - t_swizzle(y), \ - t_swizzle(y), \ - t_src_class(src[x].File), \ - VSF_FLAG_NONE) | (src[x].RelAddr << 4)) - -#define FREE_TEMPS() \ - do { \ - int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \ - if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \ - WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \ - vp->error = GL_TRUE; \ - } \ - u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ - } while (0) - -static int r300VertexProgUpdateParams(GLcontext * ctx, struct gl_vertex_program *vp, float *dst) +static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_program *vp, float *dst) { - int pi; - float *dst_o = dst; - struct gl_program_parameter_list *paramList; + int i; - if (vp->IsNVProgram) { + if (vp->Base->IsNVProgram) { _mesa_load_tracked_matrices(ctx); - - for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) { - *dst++ = ctx->VertexProgram.Parameters[pi][0]; - *dst++ = ctx->VertexProgram.Parameters[pi][1]; - *dst++ = ctx->VertexProgram.Parameters[pi][2]; - *dst++ = ctx->VertexProgram.Parameters[pi][3]; + } else { + if (vp->Base->Base.Parameters) { + _mesa_load_state_parameters(ctx, vp->Base->Base.Parameters); } - return dst - dst_o; } - if (!vp->Base.Parameters) - return 0; - - _mesa_load_state_parameters(ctx, vp->Base.Parameters); - - if (vp->Base.Parameters->NumParameters * 4 > - VSF_MAX_FRAGMENT_LENGTH) { + if (vp->code.constants.Count * 4 > VSF_MAX_FRAGMENT_LENGTH) { + /* Should have checked this earlier... */ fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__); _mesa_exit(-1); } - paramList = vp->Base.Parameters; - for (pi = 0; pi < paramList->NumParameters; pi++) { - switch (paramList->Parameters[pi].Type) { - case PROGRAM_STATE_VAR: - case PROGRAM_NAMED_PARAM: - //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name); - case PROGRAM_CONSTANT: - *dst++ = paramList->ParameterValues[pi][0]; - *dst++ = paramList->ParameterValues[pi][1]; - *dst++ = paramList->ParameterValues[pi][2]; - *dst++ = paramList->ParameterValues[pi][3]; - break; - default: - _mesa_problem(NULL, "Bad param type in %s", - __FUNCTION__); - } - - } - - return dst - dst_o; -} - -static unsigned long t_dst_mask(GLuint mask) -{ - /* WRITEMASK_* is equivalent to VSF_FLAG_* */ - return mask & VSF_FLAG_ALL; -} - -static unsigned long t_dst_class(gl_register_file file) -{ - - switch (file) { - case PROGRAM_TEMPORARY: - return PVS_DST_REG_TEMPORARY; - case PROGRAM_OUTPUT: - return PVS_DST_REG_OUT; - case PROGRAM_ADDRESS: - return PVS_DST_REG_A0; - /* - case PROGRAM_INPUT: - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_NAMED_PARAM: - case PROGRAM_STATE_VAR: - case PROGRAM_WRITE_ONLY: - case PROGRAM_ADDRESS: - */ - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - _mesa_exit(-1); - return -1; - } -} + for(i = 0; i < vp->code.constants.Count; ++i) { + const float * src = 0; + const struct rc_constant * constant = &vp->code.constants.Constants[i]; -static unsigned long t_dst_index(struct r300_vertex_program *vp, - struct prog_dst_register *dst) -{ - if (dst->File == PROGRAM_OUTPUT) - return vp->outputs[dst->Index]; + switch(constant->Type) { + case RC_CONSTANT_EXTERNAL: + if (vp->Base->IsNVProgram) { + src = ctx->VertexProgram.Parameters[constant->u.External]; + } else { + src = vp->Base->Base.Parameters->ParameterValues[constant->u.External]; + } + break; - return dst->Index; -} + case RC_CONSTANT_IMMEDIATE: + src = constant->u.Immediate; + break; + } -static unsigned long t_src_class(gl_register_file file) -{ - switch (file) { - case PROGRAM_TEMPORARY: - return PVS_SRC_REG_TEMPORARY; - case PROGRAM_INPUT: - return PVS_SRC_REG_INPUT; - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_NAMED_PARAM: - case PROGRAM_CONSTANT: - case PROGRAM_STATE_VAR: - return PVS_SRC_REG_CONSTANT; - /* - case PROGRAM_OUTPUT: - case PROGRAM_WRITE_ONLY: - case PROGRAM_ADDRESS: - */ - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - _mesa_exit(-1); - return -1; + dst[4*i] = src[0]; + dst[4*i + 1] = src[1]; + dst[4*i + 2] = src[2]; + dst[4*i + 3] = src[3]; } -} -static INLINE unsigned long t_swizzle(GLubyte swizzle) -{ -/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ - return swizzle; + return 4 * vp->code.constants.Count; } -#if 0 -static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller) +static GLbitfield compute_required_outputs(struct gl_vertex_program * vp, GLbitfield fpreads) { + GLbitfield outputs = 0; int i; - if (vp == NULL) { - fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, - caller); - return; - } - - fprintf(stderr, "%s:<", caller); - for (i = 0; i < VERT_ATTRIB_MAX; i++) - fprintf(stderr, "%d ", vp->inputs[i]); - fprintf(stderr, ">\n"); - -} -#endif - -static unsigned long t_src_index(struct r300_vertex_program *vp, - struct prog_src_register *src) -{ - if (src->File == PROGRAM_INPUT) { - assert(vp->inputs[src->Index] != -1); - return vp->inputs[src->Index]; - } else { - if (src->Index < 0) { - fprintf(stderr, - "negative offsets for indirect addressing do not work.\n"); - return 0; - } - return src->Index; - } -} - -/* these two functions should probably be merged... */ - -static unsigned long t_src(struct r300_vertex_program *vp, - struct prog_src_register *src) -{ - /* src->Negate uses the NEGATE_ flags from program_instruction.h, - * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. - */ - return PVS_SRC_OPERAND(t_src_index(vp, src), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 1)), - t_swizzle(GET_SWZ(src->Swizzle, 2)), - t_swizzle(GET_SWZ(src->Swizzle, 3)), - t_src_class(src->File), - src->Negate) | (src->RelAddr << 4); -} +#define ADD_OUTPUT(fp_attr, vp_result) \ + do { \ + if (fpreads & (1 << (fp_attr))) \ + outputs |= (1 << (vp_result)); \ + } while (0) -static unsigned long t_src_scalar(struct r300_vertex_program *vp, - struct prog_src_register *src) -{ - /* src->Negate uses the NEGATE_ flags from program_instruction.h, - * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. - */ - return PVS_SRC_OPERAND(t_src_index(vp, src), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_src_class(src->File), - src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src->RelAddr << 4); -} + ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0); + ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1); -static GLboolean valid_dst(struct r300_vertex_program *vp, - struct prog_dst_register *dst) -{ - if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { - return GL_FALSE; - } else if (dst->File == PROGRAM_ADDRESS) { - assert(dst->Index == 0); + for (i = 0; i <= 7; ++i) { + ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i); } - return GL_TRUE; -} - -static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W - - inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), - t_src_class(src[0].File), - (!src[0]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[3] = 0; - - return inst; -} - -static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} - - inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - SWIZZLE_ZERO, - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = - PVS_SRC_OPERAND(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO, - t_src_class(src[1].File), - src[1].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} - inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - PVS_SRC_SELECT_FORCE_1, - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3], - int *u_temp_i) -{ - /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} - ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ - - inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION, - GL_FALSE, - GL_FALSE, - *u_temp_i, - t_dst_mask(vpi->DstReg.WriteMask), - PVS_DST_REG_TEMPORARY); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - inst += 4; - - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = PVS_SRC_OPERAND(*u_temp_i, - PVS_SRC_SELECT_X, - PVS_SRC_SELECT_Y, - PVS_SRC_SELECT_Z, - PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY, - /* Not 100% sure about this */ - (!src[0]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE - /*VSF_FLAG_ALL */ ); - inst[3] = __CONST(0, SWIZZLE_ZERO); - (*u_temp_i)--; - - return inst; -} - -static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X} - - inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} - - inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - /* NOTE: Users swizzling might not work. */ - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - - return inst; -} - -static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, - GL_FALSE, - GL_TRUE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = t_src(vp, &src[2]); - - return inst; -} - -static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} - - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = t_src_scalar(vp, &src[1]); - - return inst; -} - -static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} +#undef ADD_OUTPUT -static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} + if ((fpreads & (1 << FRAG_ATTRIB_COL0)) && + (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC0))) + outputs |= 1 << VERT_RESULT_BFC0; + if ((fpreads & (1 << FRAG_ATTRIB_COL1)) && + (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC1))) + outputs |= 1 << VERT_RESULT_BFC1; -static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} + outputs |= 1 << VERT_RESULT_HPOS; + if (vp->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ)) + outputs |= 1 << VERT_RESULT_PSIZ; -static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; + return outputs; } -static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W - -#if 0 - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[3] = 0; -#else - inst[0] = - PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ONE); - inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); -#endif - - return inst; -} - -static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} - - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} -static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3], - int *u_temp_i) -{ - /* mul r0, r1.yzxw, r2.zxyw - mad r0, -r2.yzxw, r1.zxyw, r0 - */ - - inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - GL_FALSE, - GL_FALSE, - *u_temp_i, - t_dst_mask(vpi->DstReg.WriteMask), - PVS_DST_REG_TEMPORARY); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W - t_src_class(src[1].File), - src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[3] = __CONST(1, SWIZZLE_ZERO); - inst += 4; - - inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W - t_src_class(src[1].File), - (!src[1]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[3] = - PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, - PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, - PVS_SRC_REG_TEMPORARY, VSF_FLAG_NONE); - - (*u_temp_i)--; - - return inst; -} - -static void t_inputs_outputs(struct r300_vertex_program *vp) +static void t_inputs_outputs(struct r300_vertex_program_compiler * c) { int i; int cur_reg; GLuint OutputsWritten, InputsRead; - OutputsWritten = vp->Base->Base.OutputsWritten; - InputsRead = vp->Base->Base.InputsRead; + OutputsWritten = c->Base.Program.OutputsWritten; + InputsRead = c->Base.Program.InputsRead; cur_reg = -1; for (i = 0; i < VERT_ATTRIB_MAX; i++) { if (InputsRead & (1 << i)) - vp->inputs[i] = ++cur_reg; + c->code->inputs[i] = ++cur_reg; else - vp->inputs[i] = -1; + c->code->inputs[i] = -1; } cur_reg = 0; for (i = 0; i < VERT_RESULT_MAX; i++) - vp->outputs[i] = -1; + c->code->outputs[i] = -1; assert(OutputsWritten & (1 << VERT_RESULT_HPOS)); if (OutputsWritten & (1 << VERT_RESULT_HPOS)) { - vp->outputs[VERT_RESULT_HPOS] = cur_reg++; + c->code->outputs[VERT_RESULT_HPOS] = cur_reg++; } if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) { - vp->outputs[VERT_RESULT_PSIZ] = cur_reg++; + c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++; } /* If we're writing back facing colors we need to send @@ -970,668 +167,99 @@ static void t_inputs_outputs(struct r300_vertex_program *vp) * get written into appropriate output vectors. */ if (OutputsWritten & (1 << VERT_RESULT_COL0)) { - vp->outputs[VERT_RESULT_COL0] = cur_reg++; + c->code->outputs[VERT_RESULT_COL0] = cur_reg++; } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || OutputsWritten & (1 << VERT_RESULT_BFC1)) { cur_reg++; } if (OutputsWritten & (1 << VERT_RESULT_COL1)) { - vp->outputs[VERT_RESULT_COL1] = cur_reg++; + c->code->outputs[VERT_RESULT_COL1] = cur_reg++; } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || OutputsWritten & (1 << VERT_RESULT_BFC1)) { cur_reg++; } if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { - vp->outputs[VERT_RESULT_BFC0] = cur_reg++; + c->code->outputs[VERT_RESULT_BFC0] = cur_reg++; } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { cur_reg++; } if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { - vp->outputs[VERT_RESULT_BFC1] = cur_reg++; + c->code->outputs[VERT_RESULT_BFC1] = cur_reg++; } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { cur_reg++; } for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { if (OutputsWritten & (1 << i)) { - vp->outputs[i] = cur_reg++; + c->code->outputs[i] = cur_reg++; } } if (OutputsWritten & (1 << VERT_RESULT_FOGC)) { - vp->outputs[VERT_RESULT_FOGC] = cur_reg++; + c->code->outputs[VERT_RESULT_FOGC] = cur_reg++; } } -void r300TranslateVertexShader(struct r300_vertex_program *vp) -{ - struct prog_instruction *vpi = vp->Base->Base.Instructions; - int i; - GLuint *inst; - unsigned long num_operands; - /* Initial value should be last tmp reg that hw supports. - Strangely enough r300 doesnt mind even though these would be out of range. - Smart enough to realize that it doesnt need it? */ - int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1; - struct prog_src_register src[3]; - - vp->pos_end = 0; /* Not supported yet */ - vp->hw_code.length = 0; - vp->translated = GL_TRUE; - vp->error = GL_FALSE; - - t_inputs_outputs(vp); - - for (inst = vp->hw_code.body.d; vpi->Opcode != OPCODE_END; - vpi++, inst += 4) { - - FREE_TEMPS(); - - if (!valid_dst(vp, &vpi->DstReg)) { - /* redirect result to unused temp */ - vpi->DstReg.File = PROGRAM_TEMPORARY; - vpi->DstReg.Index = u_temp_i; - } - - num_operands = _mesa_num_inst_src_regs(vpi->Opcode); - - /* copy the sources (src) from mesa into a local variable... is this needed? */ - for (i = 0; i < num_operands; i++) { - src[i] = vpi->SrcReg[i]; - } - - if (num_operands == 3) { /* TODO: scalars */ - if (CMP_SRCS(src[1], src[2]) - || CMP_SRCS(src[0], src[2])) { - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - u_temp_i, - VSF_FLAG_ALL, - PVS_DST_REG_TEMPORARY); - inst[1] = - PVS_SRC_OPERAND(t_src_index(vp, &src[2]), - SWIZZLE_X, - SWIZZLE_Y, - SWIZZLE_Z, - SWIZZLE_W, - t_src_class(src[2].File), - VSF_FLAG_NONE) | (src[2]. - RelAddr << - 4); - inst[2] = __CONST(2, SWIZZLE_ZERO); - inst[3] = __CONST(2, SWIZZLE_ZERO); - inst += 4; - - src[2].File = PROGRAM_TEMPORARY; - src[2].Index = u_temp_i; - src[2].RelAddr = 0; - u_temp_i--; - } - } - - if (num_operands >= 2) { - if (CMP_SRCS(src[1], src[0])) { - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - u_temp_i, - VSF_FLAG_ALL, - PVS_DST_REG_TEMPORARY); - inst[1] = - PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - SWIZZLE_X, - SWIZZLE_Y, - SWIZZLE_Z, - SWIZZLE_W, - t_src_class(src[0].File), - VSF_FLAG_NONE) | (src[0]. - RelAddr << - 4); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - inst += 4; - - src[0].File = PROGRAM_TEMPORARY; - src[0].Index = u_temp_i; - src[0].RelAddr = 0; - u_temp_i--; - } - } - - switch (vpi->Opcode) { - case OPCODE_ABS: - inst = r300TranslateOpcodeABS(vp, vpi, inst, src); - break; - case OPCODE_ADD: - inst = r300TranslateOpcodeADD(vp, vpi, inst, src); - break; - case OPCODE_ARL: - inst = r300TranslateOpcodeARL(vp, vpi, inst, src); - break; - case OPCODE_DP3: - inst = r300TranslateOpcodeDP3(vp, vpi, inst, src); - break; - case OPCODE_DP4: - inst = r300TranslateOpcodeDP4(vp, vpi, inst, src); - break; - case OPCODE_DPH: - inst = r300TranslateOpcodeDPH(vp, vpi, inst, src); - break; - case OPCODE_DST: - inst = r300TranslateOpcodeDST(vp, vpi, inst, src); - break; - case OPCODE_EX2: - inst = r300TranslateOpcodeEX2(vp, vpi, inst, src); - break; - case OPCODE_EXP: - inst = r300TranslateOpcodeEXP(vp, vpi, inst, src); - break; - case OPCODE_FLR: - inst = r300TranslateOpcodeFLR(vp, vpi, inst, src, /* FIXME */ - &u_temp_i); - break; - case OPCODE_FRC: - inst = r300TranslateOpcodeFRC(vp, vpi, inst, src); - break; - case OPCODE_LG2: - inst = r300TranslateOpcodeLG2(vp, vpi, inst, src); - break; - case OPCODE_LIT: - inst = r300TranslateOpcodeLIT(vp, vpi, inst, src); - break; - case OPCODE_LOG: - inst = r300TranslateOpcodeLOG(vp, vpi, inst, src); - break; - case OPCODE_MAD: - inst = r300TranslateOpcodeMAD(vp, vpi, inst, src); - break; - case OPCODE_MAX: - inst = r300TranslateOpcodeMAX(vp, vpi, inst, src); - break; - case OPCODE_MIN: - inst = r300TranslateOpcodeMIN(vp, vpi, inst, src); - break; - case OPCODE_MOV: - inst = r300TranslateOpcodeMOV(vp, vpi, inst, src); - break; - case OPCODE_MUL: - inst = r300TranslateOpcodeMUL(vp, vpi, inst, src); - break; - case OPCODE_POW: - inst = r300TranslateOpcodePOW(vp, vpi, inst, src); - break; - case OPCODE_RCP: - inst = r300TranslateOpcodeRCP(vp, vpi, inst, src); - break; - case OPCODE_RSQ: - inst = r300TranslateOpcodeRSQ(vp, vpi, inst, src); - break; - case OPCODE_SGE: - inst = r300TranslateOpcodeSGE(vp, vpi, inst, src); - break; - case OPCODE_SLT: - inst = r300TranslateOpcodeSLT(vp, vpi, inst, src); - break; - case OPCODE_SUB: - inst = r300TranslateOpcodeSUB(vp, vpi, inst, src); - break; - case OPCODE_SWZ: - inst = r300TranslateOpcodeSWZ(vp, vpi, inst, src); - break; - case OPCODE_XPD: - inst = r300TranslateOpcodeXPD(vp, vpi, inst, src, /* FIXME */ - &u_temp_i); - break; - default: - vp->error = GL_TRUE; - break; - } - } - - vp->hw_code.length = (inst - vp->hw_code.body.d); - if (vp->hw_code.length >= VSF_MAX_FRAGMENT_LENGTH) { - vp->error = GL_TRUE; - } -} - -static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id) -{ - struct prog_instruction *vpi; - - _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2); - - vpi = &prog->Instructions[prog->NumInstructions - 3]; - - vpi->Opcode = OPCODE_MOV; - - vpi->DstReg.File = PROGRAM_OUTPUT; - vpi->DstReg.Index = VERT_RESULT_HPOS; - vpi->DstReg.WriteMask = WRITEMASK_XYZW; - vpi->DstReg.CondMask = COND_TR; - - vpi->SrcReg[0].File = PROGRAM_TEMPORARY; - vpi->SrcReg[0].Index = temp_index; - vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; - - ++vpi; - - vpi->Opcode = OPCODE_MOV; - - vpi->DstReg.File = PROGRAM_OUTPUT; - vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; - vpi->DstReg.WriteMask = WRITEMASK_XYZW; - vpi->DstReg.CondMask = COND_TR; - - vpi->SrcReg[0].File = PROGRAM_TEMPORARY; - vpi->SrcReg[0].Index = temp_index; - vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; - - ++vpi; - - vpi->Opcode = OPCODE_END; -} - -static void pos_as_texcoord(struct gl_program *prog, int tex_id) -{ - struct prog_instruction *vpi; - GLuint tempregi = prog->NumTemporaries; - - prog->NumTemporaries++; - - for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) { - if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) { - vpi->DstReg.File = PROGRAM_TEMPORARY; - vpi->DstReg.Index = tempregi; - } - } - - insert_wpos(prog, tempregi, tex_id); - - prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); -} - -/** - * The fogcoord attribute is special in that only the first component - * is relevant, and the remaining components are always fixed (when read - * from by the fragment program) to yield an X001 pattern. - * - * We need to enforce this either in the vertex program or in the fragment - * program, and this code chooses not to enforce it in the vertex program. - * This is slightly cheaper, as long as the fragment program does not use - * weird swizzles. - * - * And it seems that usually, weird swizzles are not used, so... - * - * See also the counterpart rewriting for fragment programs. - */ -static void fog_as_texcoord(struct gl_program *prog, int tex_id) -{ - struct prog_instruction *vpi; - - vpi = prog->Instructions; - while (vpi->Opcode != OPCODE_END) { - if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) { - vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; - vpi->DstReg.WriteMask = WRITEMASK_X; - } - - ++vpi; - } - - prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC); - prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); -} - -static int translateABS(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_MAX; - inst->SrcReg[1] = inst->SrcReg[0]; - inst->SrcReg[1].Negate ^= NEGATE_XYZW; - - return 0; -} - -static int translateDP3(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_DP4; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - return 0; -} - -static int translateDPH(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_DP4; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); - - return 0; -} - -static int translateFLR(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - struct prog_dst_register dst; - int tmp_idx; - - tmp_idx = prog->NumTemporaries++; - - _mesa_insert_instructions(prog, pos + 1, 1); - - inst = &prog->Instructions[pos]; - dst = inst->DstReg; - - inst->Opcode = OPCODE_FRC; - inst->DstReg.File = PROGRAM_TEMPORARY; - inst->DstReg.Index = tmp_idx; - ++inst; - - inst->Opcode = OPCODE_ADD; - inst->DstReg = dst; - inst->SrcReg[0] = (inst-1)->SrcReg[0]; - inst->SrcReg[1].File = PROGRAM_TEMPORARY; - inst->SrcReg[1].Index = tmp_idx; - inst->SrcReg[1].Negate = NEGATE_XYZW; - - return 1; -} - -static int translateSUB(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_ADD; - inst->SrcReg[1].Negate ^= NEGATE_XYZW; - - return 0; -} - -static int translateSWZ(struct gl_program *prog, int pos) -{ - prog->Instructions[pos].Opcode = OPCODE_MOV; - - return 0; -} - -static int translateXPD(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - int tmp_idx; - - tmp_idx = prog->NumTemporaries++; - - _mesa_insert_instructions(prog, pos + 1, 1); - - inst = &prog->Instructions[pos]; - - *(inst+1) = *inst; - - inst->Opcode = OPCODE_MUL; - inst->DstReg.File = PROGRAM_TEMPORARY; - inst->DstReg.Index = tmp_idx; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); - inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); - ++inst; - - inst->Opcode = OPCODE_MAD; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); - inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); - inst->SrcReg[1].Negate ^= NEGATE_XYZW; - inst->SrcReg[2].File = PROGRAM_TEMPORARY; - inst->SrcReg[2].Index = tmp_idx; - - return 1; -} - -static void translateInsts(struct gl_program *prog) -{ - struct prog_instruction *inst; - int i; - - for (i = 0; i < prog->NumInstructions; ++i) { - inst = &prog->Instructions[i]; - - switch (inst->Opcode) { - case OPCODE_ABS: - i += translateABS(prog, i); - break; - case OPCODE_DP3: - i += translateDP3(prog, i); - break; - case OPCODE_DPH: - i += translateDPH(prog, i); - break; - case OPCODE_FLR: - i += translateFLR(prog, i); - break; - case OPCODE_SUB: - i += translateSUB(prog, i); - break; - case OPCODE_SWZ: - i += translateSWZ(prog, i); - break; - case OPCODE_XPD: - i += translateXPD(prog, i); - break; - default: - break; - } - } -} - -#define ADD_OUTPUT(fp_attr, vp_result) \ - do { \ - if ((FpReads & (1 << (fp_attr))) && !(prog->OutputsWritten & (1 << (vp_result)))) { \ - OutputsAdded |= 1 << (vp_result); \ - count++; \ - } \ - } while (0) - -static void addArtificialOutputs(GLcontext *ctx, struct gl_program *prog) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - GLuint OutputsAdded, FpReads; - int i, count; - - OutputsAdded = 0; - count = 0; - FpReads = r300->selected_fp->Base->InputsRead; - - ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0); - ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1); - - for (i = 0; i < 7; ++i) { - ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i); - } - - /* Some outputs may be artificially added, to match the inputs of the fragment program. - * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by - * vertex program are undefined, so just use MOV [vertex_result], CONST[0] - */ - if (count > 0) { - struct prog_instruction *inst; - - _mesa_insert_instructions(prog, prog->NumInstructions - 1, count); - inst = &prog->Instructions[prog->NumInstructions - 1 - count]; - - for (i = 0; i < VERT_RESULT_MAX; ++i) { - if (OutputsAdded & (1 << i)) { - inst->Opcode = OPCODE_MOV; - - inst->DstReg.File = PROGRAM_OUTPUT; - inst->DstReg.Index = i; - inst->DstReg.WriteMask = WRITEMASK_XYZW; - inst->DstReg.CondMask = COND_TR; - - inst->SrcReg[0].File = PROGRAM_CONSTANT; - inst->SrcReg[0].Index = 0; - inst->SrcReg[0].Swizzle = SWIZZLE_XYZW; - - ++inst; - } - } - - prog->OutputsWritten |= OutputsAdded; - } -} - -#undef ADD_OUTPUT - -static void nqssadceInit(struct nqssadce_state* s) -{ - r300ContextPtr r300 = R300_CONTEXT(s->Ctx); - GLuint fp_reads; - - fp_reads = r300->selected_fp->Base->InputsRead; - { - if (fp_reads & FRAG_BIT_COL0) { - s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW; - s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW; - } - - if (fp_reads & FRAG_BIT_COL1) { - s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW; - s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW; - } - } - - { - int i; - for (i = 0; i < 8; ++i) { - if (fp_reads & FRAG_BIT_TEX(i)) { - s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW; - } - } - } - - s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW; - if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ)) - s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X; -} - -static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) -{ - (void) opcode; - (void) reg; - - return GL_TRUE; -} static struct r300_vertex_program *build_program(GLcontext *ctx, struct r300_vertex_program_key *wanted_key, const struct gl_vertex_program *mesa_vp) { - r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_vertex_program *vp; - struct gl_program *prog; + struct r300_vertex_program_compiler compiler; vp = _mesa_calloc(sizeof(*vp)); vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base); _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key)); - prog = &vp->Base->Base; + rc_init(&compiler.Base); + compiler.Base.Debug = (RADEON_DEBUG & DEBUG_VERTS) ? GL_TRUE : GL_FALSE; + + compiler.code = &vp->code; + compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads); + compiler.SetHwInputOutput = &t_inputs_outputs; - if (RADEON_DEBUG & DEBUG_VERTS) { + if (compiler.Base.Debug) { fprintf(stderr, "Initial vertex program:\n"); - _mesa_print_program(prog); + _mesa_print_program(&vp->Base->Base); fflush(stdout); } - if (vp->Base->IsPositionInvariant) { + if (mesa_vp->IsPositionInvariant) { _mesa_insert_mvp_code(ctx, vp->Base); } - if (r300->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) { - pos_as_texcoord(&vp->Base->Base, r300->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0); - } - - if (r300->selected_fp->fog_attr != FRAG_ATTRIB_MAX) { - fog_as_texcoord(&vp->Base->Base, r300->selected_fp->fog_attr - FRAG_ATTRIB_TEX0); - } + rc_mesa_to_rc_program(&compiler.Base, &vp->Base->Base); - addArtificialOutputs(ctx, prog); + rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X); - translateInsts(prog); - - if (RADEON_DEBUG & DEBUG_VERTS) { - fprintf(stderr, "Vertex program after native rewrite:\n"); - _mesa_print_program(prog); - fflush(stdout); + if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) { + rc_copy_output(&compiler.Base, + VERT_RESULT_HPOS, + vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0); } - { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadceInit, - .IsNativeSwizzle = &swizzleIsNative, - .BuildSwizzle = NULL - }; - radeonNqssaDce(ctx, prog, &nqssadce); - - /* We need this step for reusing temporary registers */ - _mesa_optimize_program(ctx, prog); - - if (RADEON_DEBUG & DEBUG_VERTS) { - fprintf(stderr, "Vertex program after NQSSADCE:\n"); - _mesa_print_program(prog); - fflush(stdout); - } + if (vp->key.FogAttr != FRAG_ATTRIB_MAX) { + rc_move_output(&compiler.Base, + VERT_RESULT_FOGC, + vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X); } - assert(prog->NumInstructions); - { - struct prog_instruction *inst; - int max, i, tmp; - - inst = prog->Instructions; - max = -1; - while (inst->Opcode != OPCODE_END) { - tmp = _mesa_num_inst_src_regs(inst->Opcode); - for (i = 0; i < tmp; ++i) { - if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) { - if ((int) inst->SrcReg[i].Index > max) { - max = inst->SrcReg[i].Index; - } - } - } + r3xx_compile_vertex_program(&compiler); + vp->error = compiler.Base.Error; - if (_mesa_num_inst_dst_regs(inst->Opcode)) { - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - if ((int) inst->DstReg.Index > max) { - max = inst->DstReg.Index; - } - } - } - ++inst; - } + vp->Base->Base.InputsRead = vp->code.InputsRead; + vp->Base->Base.OutputsWritten = vp->code.OutputsWritten; - /* We actually want highest index of used temporary register, - * not the number of temporaries used. - * These values aren't always the same. - */ - vp->num_temporaries = max + 1; - } + rc_destroy(&compiler.Base); return vp; } -struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx) +struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_vertex_program_key wanted_key = { 0 }; @@ -1639,7 +267,7 @@ struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx) struct r300_vertex_program *vp; vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; - wanted_key.FpReads = r300->selected_fp->Base->InputsRead; + wanted_key.FpReads = r300->selected_fp->InputsRead; wanted_key.FogAttr = r300->selected_fp->fog_attr; wanted_key.WPosAttr = r300->selected_fp->wpos_attr; @@ -1664,7 +292,7 @@ struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx) if(_nc>_p->vpu.count)_p->vpu.count=_nc; \ } while(0) -static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_shader_hw_code *code) +static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_program_code *code) { int i; @@ -1708,15 +336,15 @@ void r300SetupVertexProgram(r300ContextPtr rmesa) ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0; R300_STATECHANGE(rmesa, vpp); - param_count = r300VertexProgUpdateParams(ctx, prog->Base, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]); + param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]); bump_vpu_count(rmesa->hw.vpp.cmd, param_count); param_count /= 4; - r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->hw_code)); - inst_count = (prog->hw_code.length / 4) - 1; + r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->code)); + inst_count = (prog->code.length / 4) - 1; - r300VapCntl(rmesa, _mesa_bitcount(prog->Base->Base.InputsRead), - _mesa_bitcount(prog->Base->Base.OutputsWritten), prog->num_temporaries); + r300VapCntl(rmesa, _mesa_bitcount(prog->code.InputsRead), + _mesa_bitcount(prog->code.OutputsWritten), prog->code.num_temporaries); R300_STATECHANGE(rmesa, pvs); rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.h b/src/mesa/drivers/dri/r300/r300_vertprog.h index 2dab11c337..ccec896be4 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.h +++ b/src/mesa/drivers/dri/r300/r300_vertprog.h @@ -3,39 +3,9 @@ #include "r300_reg.h" -#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \ - (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \ - | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \ - | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \ - | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \ - | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \ - | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT)) - -#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \ - (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \ - | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \ - | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \ - | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \ - | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \ - | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \ - | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT)) - -#if 1 - -#define VSF_FLAG_X 1 -#define VSF_FLAG_Y 2 -#define VSF_FLAG_Z 4 -#define VSF_FLAG_W 8 -#define VSF_FLAG_XYZ (VSF_FLAG_X | VSF_FLAG_Y | VSF_FLAG_Z) -#define VSF_FLAG_ALL 0xf -#define VSF_FLAG_NONE 0 - -#endif void r300SetupVertexProgram(r300ContextPtr rmesa); -struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx); - -void r300TranslateVertexShader(struct r300_vertex_program *vp); +struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx); #endif diff --git a/src/mesa/drivers/dri/r300/radeon_bo_legacy.c b/src/mesa/drivers/dri/r300/radeon_bo_legacy.c new file mode 120000 index 0000000000..79ad050e6b --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_bo_legacy.c @@ -0,0 +1 @@ +../radeon/radeon_bo_legacy.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_bo_legacy.h b/src/mesa/drivers/dri/r300/radeon_bo_legacy.h new file mode 120000 index 0000000000..83b0f7ffab --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_bo_legacy.h @@ -0,0 +1 @@ +../radeon/radeon_bo_legacy.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/r300/radeon_bocs_wrapper.h new file mode 120000 index 0000000000..ca894b2443 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_bocs_wrapper.h @@ -0,0 +1 @@ +../radeon/radeon_bocs_wrapper.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_buffer_objects.c b/src/mesa/drivers/dri/r300/radeon_buffer_objects.c new file mode 120000 index 0000000000..f6a5f66470 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_buffer_objects.c @@ -0,0 +1 @@ +../radeon/radeon_buffer_objects.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_buffer_objects.h b/src/mesa/drivers/dri/r300/radeon_buffer_objects.h new file mode 120000 index 0000000000..2f134fd17b --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_buffer_objects.h @@ -0,0 +1 @@ +../radeon/radeon_buffer_objects.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_chipset.h b/src/mesa/drivers/dri/r300/radeon_chipset.h new file mode 120000 index 0000000000..eba99001ff --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_chipset.h @@ -0,0 +1 @@ +../radeon/radeon_chipset.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_cmdbuf.h b/src/mesa/drivers/dri/r300/radeon_cmdbuf.h new file mode 120000 index 0000000000..a799e1dc6d --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_cmdbuf.h @@ -0,0 +1 @@ +../radeon/radeon_cmdbuf.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_common.c b/src/mesa/drivers/dri/r300/radeon_common.c new file mode 120000 index 0000000000..67b19ba940 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_common.c @@ -0,0 +1 @@ +../radeon/radeon_common.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_common.h b/src/mesa/drivers/dri/r300/radeon_common.h new file mode 120000 index 0000000000..5bcb696a9f --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_common.h @@ -0,0 +1 @@ +../radeon/radeon_common.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_common_context.c b/src/mesa/drivers/dri/r300/radeon_common_context.c new file mode 120000 index 0000000000..86800f3819 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_common_context.c @@ -0,0 +1 @@ +../radeon/radeon_common_context.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_common_context.h b/src/mesa/drivers/dri/r300/radeon_common_context.h new file mode 120000 index 0000000000..4d66312550 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_common_context.h @@ -0,0 +1 @@ +../radeon/radeon_common_context.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_cs_legacy.c b/src/mesa/drivers/dri/r300/radeon_cs_legacy.c new file mode 120000 index 0000000000..006720f8a4 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_cs_legacy.c @@ -0,0 +1 @@ +../radeon/radeon_cs_legacy.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_cs_legacy.h b/src/mesa/drivers/dri/r300/radeon_cs_legacy.h new file mode 120000 index 0000000000..a5f95e0a3d --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_cs_legacy.h @@ -0,0 +1 @@ +../radeon/radeon_cs_legacy.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_cs_space_drm.c b/src/mesa/drivers/dri/r300/radeon_cs_space_drm.c new file mode 120000 index 0000000000..c248ea7d1a --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_cs_space_drm.c @@ -0,0 +1 @@ +../radeon/radeon_cs_space_drm.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_dma.c b/src/mesa/drivers/dri/r300/radeon_dma.c new file mode 120000 index 0000000000..43be000625 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_dma.c @@ -0,0 +1 @@ +../radeon/radeon_dma.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_dma.h b/src/mesa/drivers/dri/r300/radeon_dma.h new file mode 120000 index 0000000000..82e50634e3 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_dma.h @@ -0,0 +1 @@ +../radeon/radeon_dma.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_fbo.c b/src/mesa/drivers/dri/r300/radeon_fbo.c new file mode 120000 index 0000000000..0d738d8d78 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_fbo.c @@ -0,0 +1 @@ +../radeon/radeon_fbo.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_lock.c b/src/mesa/drivers/dri/r300/radeon_lock.c new file mode 120000 index 0000000000..af4108a8e3 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_lock.c @@ -0,0 +1 @@ +../radeon/radeon_lock.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_lock.h b/src/mesa/drivers/dri/r300/radeon_lock.h new file mode 120000 index 0000000000..64bdf94ee7 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_lock.h @@ -0,0 +1 @@ +../radeon/radeon_lock.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_mipmap_tree.c b/src/mesa/drivers/dri/r300/radeon_mipmap_tree.c new file mode 120000 index 0000000000..31c0cfbe94 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_mipmap_tree.c @@ -0,0 +1 @@ +../radeon/radeon_mipmap_tree.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_mipmap_tree.h b/src/mesa/drivers/dri/r300/radeon_mipmap_tree.h new file mode 120000 index 0000000000..254d50cf8c --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_mipmap_tree.h @@ -0,0 +1 @@ +../radeon/radeon_mipmap_tree.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_program.c b/src/mesa/drivers/dri/r300/radeon_program.c deleted file mode 100644 index da5e7aefce..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_program.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "radeon_program.h" - -#include "shader/prog_print.h" - - -/** - * Transform the given clause in the following way: - * 1. Replace it with an empty clause - * 2. For every instruction in the original clause, try the given - * transformations in order. - * 3. If one of the transformations returns GL_TRUE, assume that it - * has emitted the appropriate instruction(s) into the new clause; - * otherwise, copy the instruction verbatim. - * - * \note The transformation is currently not recursive; in other words, - * instructions emitted by transformations are not transformed. - * - * \note The transform is called 'local' because it can only look at - * one instruction at a time. - */ -void radeonLocalTransform( - GLcontext *Ctx, - struct gl_program *program, - int num_transformations, - struct radeon_program_transformation* transformations) -{ - struct radeon_transform_context ctx; - int ip; - - ctx.Ctx = Ctx; - ctx.Program = program; - ctx.OldInstructions = program->Instructions; - ctx.OldNumInstructions = program->NumInstructions; - - program->Instructions = 0; - program->NumInstructions = 0; - - for(ip = 0; ip < ctx.OldNumInstructions; ++ip) { - struct prog_instruction *instr = ctx.OldInstructions + ip; - int i; - - for(i = 0; i < num_transformations; ++i) { - struct radeon_program_transformation* t = transformations + i; - - if (t->function(&ctx, instr, t->userData)) - break; - } - - if (i >= num_transformations) { - struct prog_instruction* dest = radeonAppendInstructions(program, 1); - _mesa_copy_instructions(dest, instr, 1); - } - } - - _mesa_free_instructions(ctx.OldInstructions, ctx.OldNumInstructions); -} - - -static void scan_instructions(GLboolean* used, const struct prog_instruction* insts, GLuint count) -{ - GLuint i; - for (i = 0; i < count; i++) { - const struct prog_instruction *inst = insts + i; - const GLuint n = _mesa_num_inst_src_regs(inst->Opcode); - GLuint k; - - for (k = 0; k < n; k++) { - if (inst->SrcReg[k].File == PROGRAM_TEMPORARY) - used[inst->SrcReg[k].Index] = GL_TRUE; - } - } -} - -GLint radeonFindFreeTemporary(struct radeon_transform_context *t) -{ - GLboolean used[MAX_PROGRAM_TEMPS]; - GLuint i; - - _mesa_memset(used, 0, sizeof(used)); - scan_instructions(used, t->Program->Instructions, t->Program->NumInstructions); - scan_instructions(used, t->OldInstructions, t->OldNumInstructions); - - for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { - if (!used[i]) - return i; - } - - return -1; -} - - -/** - * Append the given number of instructions to the program and return a - * pointer to the first new instruction. - */ -struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count) -{ - int oldnum = program->NumInstructions; - _mesa_insert_instructions(program, oldnum, count); - return program->Instructions + oldnum; -} diff --git a/src/mesa/drivers/dri/r300/radeon_screen.c b/src/mesa/drivers/dri/r300/radeon_screen.c new file mode 120000 index 0000000000..86161118dd --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_screen.c @@ -0,0 +1 @@ +../radeon/radeon_screen.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_screen.h b/src/mesa/drivers/dri/r300/radeon_screen.h new file mode 120000 index 0000000000..23bb6bd459 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_screen.h @@ -0,0 +1 @@ +../radeon/radeon_screen.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c new file mode 120000 index 0000000000..232868c4c9 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_span.c @@ -0,0 +1 @@ +../radeon/radeon_span.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_span.h b/src/mesa/drivers/dri/r300/radeon_span.h new file mode 120000 index 0000000000..f9d634508c --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_span.h @@ -0,0 +1 @@ +../radeon/radeon_span.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_texture.c b/src/mesa/drivers/dri/r300/radeon_texture.c new file mode 120000 index 0000000000..a822710915 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_texture.c @@ -0,0 +1 @@ +../radeon/radeon_texture.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_texture.h b/src/mesa/drivers/dri/r300/radeon_texture.h new file mode 120000 index 0000000000..17fac3d5ea --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_texture.h @@ -0,0 +1 @@ +../radeon/radeon_texture.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/server/radeon.h b/src/mesa/drivers/dri/r300/server/radeon.h new file mode 120000 index 0000000000..81274a54f1 --- /dev/null +++ b/src/mesa/drivers/dri/r300/server/radeon.h @@ -0,0 +1 @@ +../../radeon/server/radeon.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/server/radeon_dri.c b/src/mesa/drivers/dri/r300/server/radeon_dri.c new file mode 120000 index 0000000000..d05847d650 --- /dev/null +++ b/src/mesa/drivers/dri/r300/server/radeon_dri.c @@ -0,0 +1 @@ +../../radeon/server/radeon_dri.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/server/radeon_dri.h b/src/mesa/drivers/dri/r300/server/radeon_dri.h new file mode 120000 index 0000000000..27c591d3c9 --- /dev/null +++ b/src/mesa/drivers/dri/r300/server/radeon_dri.h @@ -0,0 +1 @@ +../../radeon/server/radeon_dri.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/server/radeon_egl.c b/src/mesa/drivers/dri/r300/server/radeon_egl.c new file mode 120000 index 0000000000..d7735a7643 --- /dev/null +++ b/src/mesa/drivers/dri/r300/server/radeon_egl.c @@ -0,0 +1 @@ +../../radeon/server/radeon_egl.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/server/radeon_macros.h b/src/mesa/drivers/dri/r300/server/radeon_macros.h new file mode 120000 index 0000000000..c56cd735b8 --- /dev/null +++ b/src/mesa/drivers/dri/r300/server/radeon_macros.h @@ -0,0 +1 @@ +../../radeon/server/radeon_macros.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/server/radeon_reg.h b/src/mesa/drivers/dri/r300/server/radeon_reg.h new file mode 120000 index 0000000000..e2349dcb68 --- /dev/null +++ b/src/mesa/drivers/dri/r300/server/radeon_reg.h @@ -0,0 +1 @@ +../../radeon/server/radeon_reg.h
\ No newline at end of file |