diff options
Diffstat (limited to 'src/mesa/drivers/dri/r300')
117 files changed, 23804 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/r300/Lindent b/src/mesa/drivers/dri/r300/Lindent new file mode 100755 index 0000000000..7d8d8896e3 --- /dev/null +++ b/src/mesa/drivers/dri/r300/Lindent @@ -0,0 +1,2 @@ +#!/bin/sh +indent -npro -kr -i8 -ts8 -sob -l80 -ss -ncs "$@" diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile new file mode 100644 index 0000000000..2245998c95 --- /dev/null +++ b/src/mesa/drivers/dri/r300/Makefile @@ -0,0 +1,77 @@ +# src/mesa/drivers/dri/r300/Makefile + +TOP = ../../../../.. +include $(TOP)/configs/current + +CFLAGS += $(RADEON_CFLAGS) + +LIBNAME = r300_dri.so + +ifeq ($(RADEON_LDFLAGS),) +CS_SOURCES = radeon_cs_space_drm.c radeon_bo.c radeon_cs.c +endif + +COMMON_SOURCES = \ + ../../common/driverfuncs.c \ + ../common/mm.c \ + ../common/utils.c \ + ../common/texmem.c \ + ../common/vblank.c \ + ../common/xmlconfig.c \ + ../common/dri_util.c + +RADEON_COMMON_SOURCES = \ + radeon_bo_legacy.c \ + radeon_buffer_objects.c \ + radeon_common_context.c \ + radeon_common.c \ + radeon_cs_legacy.c \ + radeon_dma.c \ + radeon_debug.c \ + radeon_fbo.c \ + radeon_lock.c \ + radeon_mipmap_tree.c \ + radeon_pixel_read.c \ + radeon_queryobj.c \ + radeon_span.c \ + radeon_texture.c \ + radeon_tex_copy.c \ + radeon_tex_getimage.c \ + radeon_tile.c + +DRIVER_SOURCES = \ + radeon_screen.c \ + r300_blit.c \ + r300_context.c \ + r300_draw.c \ + r300_cmdbuf.c \ + r300_state.c \ + r300_render.c \ + r300_tex.c \ + r300_texstate.c \ + r300_vertprog.c \ + r300_fragprog_common.c \ + r300_shader.c \ + radeon_mesa_to_rc.c \ + r300_emit.c \ + r300_swtcl.c \ + $(RADEON_COMMON_SOURCES) \ + $(EGL_SOURCES) \ + $(CS_SOURCES) + +C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES) + +DRIVER_DEFINES = -DRADEON_R300 +# -DRADEON_BO_TRACK \ + +DRI_LIB_DEPS += $(RADEON_LDFLAGS) + +SUBDIRS = compiler + +EXTRA_MODULES = compiler/libr300compiler.a + + +##### TARGETS ##### + +include ../Makefile.template + diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile new file mode 100644 index 0000000000..ff3801dc67 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -0,0 +1,86 @@ +# src/mesa/drivers/dri/r300/compiler/Makefile + +TOP = ../../../../../.. +include $(TOP)/configs/current + +LIBNAME = r300compiler + +C_SOURCES = \ + radeon_code.c \ + radeon_compiler.c \ + radeon_emulate_branches.c \ + radeon_emulate_loops.c \ + radeon_program.c \ + radeon_program_print.c \ + radeon_opcodes.c \ + radeon_program_alu.c \ + radeon_program_pair.c \ + radeon_program_tex.c \ + radeon_pair_translate.c \ + radeon_pair_schedule.c \ + radeon_pair_regalloc.c \ + radeon_dataflow.c \ + radeon_dataflow_deadcode.c \ + radeon_dataflow_swizzles.c \ + radeon_optimize.c \ + r3xx_fragprog.c \ + r300_fragprog.c \ + r300_fragprog_swizzle.c \ + r300_fragprog_emit.c \ + r500_fragprog.c \ + r500_fragprog_emit.c \ + r3xx_vertprog.c \ + r3xx_vertprog_dump.c \ + \ + memory_pool.c + + +### Basic defines ### + +OBJECTS = $(C_SOURCES:.c=.o) \ + $(CPP_SOURCES:.cpp=.o) \ + $(ASM_SOURCES:.S=.o) + +INCLUDES = \ + -I. \ + -I$(TOP)/include \ + -I$(TOP)/src/mesa \ + + +##### TARGETS ##### + +default: depend lib$(LIBNAME).a + +lib$(LIBNAME).a: $(OBJECTS) Makefile $(TOP)/configs/current + $(MKLIB) -o $(LIBNAME) -static $(OBJECTS) + +depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) 2> /dev/null + +# Emacs tags +tags: + etags `find . -name \*.[ch]` `find ../include` + +# Remove .o and backup files +clean: + rm -f $(OBJECTS) lib$(LIBNAME).a depend depend.bak + +# Dummy target +install: + @echo -n "" + +##### RULES ##### + +.c.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + +.cpp.o: + $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(LIBRARY_DEFINES) $< -o $@ + +.S.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + + +sinclude depend diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript new file mode 100755 index 0000000000..50d9cdb7f2 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/SConscript @@ -0,0 +1,41 @@ +Import('*') + +env = env.Clone() +env.Append(CPPPATH = '#/include') +env.Append(CPPPATH = '#/src/mesa') + +# temporary fix +env['CFLAGS'] = str(env['CFLAGS']).replace('-Werror=declaration-after-statement', '') + +r300compiler = env.ConvenienceLibrary( + target = 'r300compiler', + source = [ + 'radeon_code.c', + 'radeon_compiler.c', + 'radeon_program.c', + 'radeon_program_print.c', + 'radeon_opcodes.c', + 'radeon_program_alu.c', + 'radeon_program_pair.c', + 'radeon_program_tex.c', + 'radeon_pair_translate.c', + 'radeon_pair_schedule.c', + 'radeon_pair_regalloc.c', + 'radeon_optimize.c', + 'radeon_emulate_branches.c', + 'radeon_emulate_loops.c', + 'radeon_dataflow.c', + 'radeon_dataflow_deadcode.c', + 'radeon_dataflow_swizzles.c', + 'r3xx_fragprog.c', + 'r300_fragprog.c', + 'r300_fragprog_swizzle.c', + 'r300_fragprog_emit.c', + 'r500_fragprog.c', + 'r500_fragprog_emit.c', + 'r3xx_vertprog.c', + 'r3xx_vertprog_dump.c', + 'memory_pool.c', + ]) + +Return('r300compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.c b/src/mesa/drivers/dri/r300/compiler/memory_pool.c new file mode 100644 index 0000000000..76c7c60d8f --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/memory_pool.c @@ -0,0 +1,97 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "memory_pool.h" + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + + +#define POOL_LARGE_ALLOC 4096 +#define POOL_ALIGN 4 + + +struct memory_block { + struct memory_block * next; +}; + +void memory_pool_init(struct memory_pool * pool) +{ + memset(pool, 0, sizeof(struct memory_pool)); +} + + +void memory_pool_destroy(struct memory_pool * pool) +{ + while(pool->blocks) { + struct memory_block * block = pool->blocks; + pool->blocks = block->next; + free(block); + } +} + +static void refill_pool(struct memory_pool * pool) +{ + unsigned int blocksize = pool->total_allocated; + struct memory_block * newblock; + + if (!blocksize) + blocksize = 2*POOL_LARGE_ALLOC; + + newblock = (struct memory_block*)malloc(blocksize); + newblock->next = pool->blocks; + pool->blocks = newblock; + + pool->head = (unsigned char*)(newblock + 1); + pool->end = ((unsigned char*)newblock) + blocksize; + pool->total_allocated += blocksize; +} + + +void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes) +{ + if (bytes < POOL_LARGE_ALLOC) { + void * ptr; + + if (pool->head + bytes > pool->end) + refill_pool(pool); + + assert(pool->head + bytes <= pool->end); + + ptr = pool->head; + + pool->head += bytes; + pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1)); + + return ptr; + } else { + struct memory_block * block = (struct memory_block*)malloc(bytes + sizeof(struct memory_block)); + + block->next = pool->blocks; + pool->blocks = block; + + return (block + 1); + } +} + + diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.h b/src/mesa/drivers/dri/r300/compiler/memory_pool.h new file mode 100644 index 0000000000..42344d0e3b --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/memory_pool.h @@ -0,0 +1,80 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef MEMORY_POOL_H +#define MEMORY_POOL_H + +struct memory_block; + +/** + * Provides a pool of memory that can quickly be allocated from, at the + * cost of being unable to explicitly free one of the allocated blocks. + * Instead, the entire pool can be freed at once. + * + * The idea is to allow one to quickly allocate a flexible amount of + * memory during operations like shader compilation while avoiding + * reference counting headaches. + */ +struct memory_pool { + unsigned char * head; + unsigned char * end; + unsigned int total_allocated; + struct memory_block * blocks; +}; + + +void memory_pool_init(struct memory_pool * pool); +void memory_pool_destroy(struct memory_pool * pool); +void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes); + + +/** + * Generic helper for growing an array that has separate size/count + * and reserved counters to accomodate up to num new element. + * + * type * Array; + * unsigned int Size; + * unsigned int Reserved; + * + * memory_pool_array_reserve(pool, type, Array, Size, Reserved, k); + * assert(Size + k < Reserved); + * + * \note Size is not changed by this macro. + * + * \warning Array, Size, Reserved have to be lvalues and may be evaluated + * several times. + */ +#define memory_pool_array_reserve(pool, type, array, size, reserved, num) do { \ + unsigned int _num = (num); \ + if ((size) + _num > (reserved)) { \ + unsigned int newreserve = (reserved) * 2; \ + type * newarray; \ + if (newreserve < _num) \ + newreserve = 4 * _num; /* arbitrary heuristic */ \ + newarray = memory_pool_malloc((pool), newreserve * sizeof(type)); \ + memcpy(newarray, (array), (size) * sizeof(type)); \ + (array) = newarray; \ + (reserved) = newreserve; \ + } \ +} while(0) + +#endif /* MEMORY_POOL_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c new file mode 100644 index 0000000000..794db8335a --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c @@ -0,0 +1,256 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "r300_fragprog.h" + +#include <stdio.h> + +#include "../r300_reg.h" + +/* just some random things... */ +void r300FragmentProgramDump(struct rX00_fragment_program_code *c) +{ + struct r300_fragment_program_code *code = &c->code.r300; + int n, i, j; + static int pc = 0; + + fprintf(stderr, "pc=%d*************************************\n", pc++); + + fprintf(stderr, "Hardware program\n"); + fprintf(stderr, "----------------\n"); + + for (n = 0; n <= (code->config & 3); n++) { + uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n]; + int alu_offset = (code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT; + int alu_end = (code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT; + int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT; + int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT; + + fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " + "alu_end: %d, tex_end: %d (code_addr: %08x)\n", n, + alu_offset, tex_offset, alu_end, tex_end, code_addr); + + if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) { + fprintf(stderr, " TEX:\n"); + for (i = tex_offset; + i <= tex_offset + tex_end; + ++i) { + const char *instr; + + switch ((code->tex. + inst[i] >> R300_TEX_INST_SHIFT) & + 15) { + case R300_TEX_OP_LD: + instr = "TEX"; + break; + case R300_TEX_OP_KIL: + instr = "KIL"; + break; + case R300_TEX_OP_TXP: + instr = "TXP"; + break; + case R300_TEX_OP_TXB: + instr = "TXB"; + break; + default: + instr = "UNKNOWN"; + } + + fprintf(stderr, + " %s t%i, %c%i, texture[%i] (%08x)\n", + instr, + (code->tex. + inst[i] >> R300_DST_ADDR_SHIFT) & 31, + 't', + (code->tex. + inst[i] >> R300_SRC_ADDR_SHIFT) & 31, + (code->tex. + inst[i] & R300_TEX_ID_MASK) >> + R300_TEX_ID_SHIFT, + code->tex.inst[i]); + } + } + + for (i = alu_offset; + i <= alu_offset + alu_end; ++i) { + char srcc[3][10], dstc[20]; + char srca[3][10], dsta[20]; + char argc[3][20]; + char arga[3][20]; + char flags[5], tmp[10]; + + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].rgb_addr >> (j * 6); + int rega = code->alu.inst[i].alpha_addr >> (j * 6); + + sprintf(srcc[j], "%c%i", + (regc & 32) ? 'c' : 't', regc & 31); + sprintf(srca[j], "%c%i", + (rega & 32) ? 'c' : 't', rega & 31); + } + + dstc[0] = 0; + sprintf(flags, "%s%s%s", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(dstc, "t%i.%s ", + (code->alu.inst[i]. + rgb_addr >> R300_ALU_DSTC_SHIFT) & 31, + flags); + } + sprintf(flags, "%s%s%s", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", + (code->alu.inst[i]. + rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(tmp, "o%i.%s", + (code->alu.inst[i]. + rgb_addr >> 29) & 3, + flags); + strcat(dstc, tmp); + } + + dsta[0] = 0; + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) { + sprintf(dsta, "t%i.w ", + (code->alu.inst[i]. + alpha_addr >> R300_ALU_DSTA_SHIFT) & 31); + } + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) { + sprintf(tmp, "o%i.w ", + (code->alu.inst[i]. + alpha_addr >> 25) & 3); + strcat(dsta, tmp); + } + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) { + strcat(dsta, "Z"); + } + + fprintf(stderr, + "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" + " w: %3s %3s %3s -> %-20s (%08x)\n", i, + srcc[0], srcc[1], srcc[2], dstc, + code->alu.inst[i].rgb_addr, srca[0], srca[1], + srca[2], dsta, code->alu.inst[i].alpha_addr); + + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].rgb_inst >> (j * 7); + int rega = code->alu.inst[i].alpha_inst >> (j * 7); + int d; + char buf[20]; + + d = regc & 31; + if (d < 12) { + switch (d % 4) { + case R300_ALU_ARGC_SRC0C_XYZ: + sprintf(buf, "%s.xyz", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_XXX: + sprintf(buf, "%s.xxx", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_YYY: + sprintf(buf, "%s.yyy", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_ZZZ: + sprintf(buf, "%s.zzz", + srcc[d / 4]); + break; + } + } else if (d < 15) { + sprintf(buf, "%s.www", srca[d - 12]); + } else if (d == 20) { + sprintf(buf, "0.0"); + } else if (d == 21) { + sprintf(buf, "1.0"); + } else if (d == 22) { + sprintf(buf, "0.5"); + } else if (d >= 23 && d < 32) { + d -= 23; + switch (d / 3) { + case 0: + sprintf(buf, "%s.yzx", + srcc[d % 3]); + break; + case 1: + sprintf(buf, "%s.zxy", + srcc[d % 3]); + break; + case 2: + sprintf(buf, "%s.Wzy", + srcc[d % 3]); + break; + } + } else { + sprintf(buf, "%i", d); + } + + sprintf(argc[j], "%s%s%s%s", + (regc & 32) ? "-" : "", + (regc & 64) ? "|" : "", + buf, (regc & 64) ? "|" : ""); + + d = rega & 31; + if (d < 9) { + sprintf(buf, "%s.%c", srcc[d / 3], + 'x' + (char)(d % 3)); + } else if (d < 12) { + sprintf(buf, "%s.w", srca[d - 9]); + } else if (d == 16) { + sprintf(buf, "0.0"); + } else if (d == 17) { + sprintf(buf, "1.0"); + } else if (d == 18) { + sprintf(buf, "0.5"); + } else { + sprintf(buf, "%i", d); + } + + sprintf(arga[j], "%s%s%s%s", + (rega & 32) ? "-" : "", + (rega & 64) ? "|" : "", + buf, (rega & 64) ? "|" : ""); + } + + fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" + " w: %8s %8s %8s op: %08x\n", + argc[0], argc[1], argc[2], + code->alu.inst[i].rgb_inst, arga[0], arga[1], + arga[2], code->alu.inst[i].alpha_inst); + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h new file mode 100644 index 0000000000..8b755703be --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs <darktama@iinet.net.au> + * Jerome Glisse <j.glisse@gmail.com> + */ +#ifndef __R300_FRAGPROG_H_ +#define __R300_FRAGPROG_H_ + +#include "radeon_compiler.h" +#include "radeon_program.h" + + +extern void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); + +extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c); + +#endif diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c new file mode 100644 index 0000000000..b27a683c39 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -0,0 +1,380 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * Emit the r300_fragment_program_code that can be understood by the hardware. + * Input is a pre-transformed radeon_program. + * + * \author Ben Skeggs <darktama@iinet.net.au> + * + * \author Jerome Glisse <j.glisse@gmail.com> + * + * \todo FogOption + */ + +#include "r300_fragprog.h" + +#include "../r300_reg.h" + +#include "radeon_program_pair.h" +#include "r300_fragprog_swizzle.h" + + +struct r300_emit_state { + struct r300_fragment_program_compiler * compiler; + + unsigned current_node : 2; + unsigned node_first_tex : 8; + unsigned node_first_alu : 8; + uint32_t node_flags; +}; + +#define PROG_CODE \ + struct r300_fragment_program_compiler *c = emit->compiler; \ + struct r300_fragment_program_code *code = &c->code->code.r300 + +#define error(fmt, args...) do { \ + rc_error(&c->Base, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + } while(0) + + +/** + * Mark a temporary register as used. + */ +static void use_temporary(struct r300_fragment_program_code *code, unsigned int index) +{ + if (index > code->pixsize) + code->pixsize = index; +} + +static unsigned int use_source(struct r300_fragment_program_code* code, struct radeon_pair_instruction_source src) +{ + if (src.File == RC_FILE_CONSTANT) { + return src.Index | (1 << 5); + } else if (src.File == RC_FILE_TEMPORARY) { + use_temporary(code, src.Index); + return src.Index; + } + + return 0; +} + + +static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) +{ + switch(opcode) { + case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP; + case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3; + case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4; + case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC; + default: + error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name); + /* fall through */ + case RC_OPCODE_NOP: + /* fall through */ + case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD; + case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX; + case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN; + case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; + } +} + +static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) +{ + switch(opcode) { + case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP; + case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4; + case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4; + case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2; + case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC; + case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2; + default: + error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name); + /* fall through */ + case RC_OPCODE_NOP: + /* fall through */ + case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD; + case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX; + case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN; + case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP; + case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ; + } +} + +/** + * Emit one paired ALU instruction. + */ +static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst) +{ + PROG_CODE; + + if (code->alu.length >= R300_PFS_MAX_ALU_INST) { + error("Too many ALU instructions"); + return 0; + } + + int ip = code->alu.length++; + int j; + + code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode); + code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode); + + for(j = 0; j < 3; ++j) { + unsigned int src = use_source(code, inst->RGB.Src[j]); + code->alu.inst[ip].rgb_addr |= src << (6*j); + + src = use_source(code, inst->Alpha.Src[j]); + code->alu.inst[ip].alpha_addr |= src << (6*j); + + unsigned int arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); + arg |= inst->RGB.Arg[j].Abs << 6; + arg |= inst->RGB.Arg[j].Negate << 5; + code->alu.inst[ip].rgb_inst |= arg << (7*j); + + arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle); + arg |= inst->Alpha.Arg[j].Abs << 6; + arg |= inst->Alpha.Arg[j].Negate << 5; + code->alu.inst[ip].alpha_inst |= arg << (7*j); + } + + if (inst->RGB.Saturate) + code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP; + if (inst->Alpha.Saturate) + code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP; + + if (inst->RGB.WriteMask) { + use_temporary(code, inst->RGB.DestIndex); + code->alu.inst[ip].rgb_addr |= + (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) | + (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); + } + if (inst->RGB.OutputWriteMask) { + code->alu.inst[ip].rgb_addr |= + (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) | + R300_RGB_TARGET(inst->RGB.Target); + emit->node_flags |= R300_RGBA_OUT; + } + + if (inst->Alpha.WriteMask) { + use_temporary(code, inst->Alpha.DestIndex); + code->alu.inst[ip].alpha_addr |= + (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) | + R300_ALU_DSTA_REG; + } + if (inst->Alpha.OutputWriteMask) { + code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT | + R300_ALPHA_TARGET(inst->Alpha.Target); + emit->node_flags |= R300_RGBA_OUT; + } + if (inst->Alpha.DepthWriteMask) { + code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH; + emit->node_flags |= R300_W_OUT; + c->code->writes_depth = 1; + } + + return 1; +} + + +/** + * Finish the current node without advancing to the next one. + */ +static int finish_node(struct r300_emit_state * emit) +{ + struct r300_fragment_program_compiler * c = emit->compiler; + struct r300_fragment_program_code *code = &emit->compiler->code->code.r300; + + if (code->alu.length == emit->node_first_alu) { + /* Generate a single NOP for this node */ + struct rc_pair_instruction inst; + memset(&inst, 0, sizeof(inst)); + if (!emit_alu(emit, &inst)) + return 0; + } + + unsigned alu_offset = emit->node_first_alu; + unsigned alu_end = code->alu.length - alu_offset - 1; + unsigned tex_offset = emit->node_first_tex; + unsigned tex_end = code->tex.length - tex_offset - 1; + + if (code->tex.length == emit->node_first_tex) { + if (emit->current_node > 0) { + error("Node %i has no TEX instructions", emit->current_node); + return 0; + } + + tex_end = 0; + } else { + if (emit->current_node == 0) + code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX; + } + + /* Write the config register. + * Note: The order in which the words for each node are written + * is not correct here and needs to be fixed up once we're entirely + * done + * + * Also note that the register specification from AMD is slightly + * incorrect in its description of this register. */ + code->code_addr[emit->current_node] = + (alu_offset << R300_ALU_START_SHIFT) | + (alu_end << R300_ALU_SIZE_SHIFT) | + (tex_offset << R300_TEX_START_SHIFT) | + (tex_end << R300_TEX_SIZE_SHIFT) | + emit->node_flags; + + return 1; +} + + +/** + * Begin a block of texture instructions. + * Create the necessary indirection. + */ +static int begin_tex(struct r300_emit_state * emit) +{ + PROG_CODE; + + if (code->alu.length == emit->node_first_alu && + code->tex.length == emit->node_first_tex) { + return 1; + } + + if (emit->current_node == 3) { + error("Too many texture indirections"); + return 0; + } + + if (!finish_node(emit)) + return 0; + + emit->current_node++; + emit->node_first_tex = code->tex.length; + emit->node_first_alu = code->alu.length; + emit->node_flags = 0; + return 1; +} + + +static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst) +{ + PROG_CODE; + + if (code->tex.length >= R300_PFS_MAX_TEX_INST) { + error("Too many TEX instructions"); + return 0; + } + + unsigned int unit = inst->U.I.TexSrcUnit; + unsigned int dest = inst->U.I.DstReg.Index; + unsigned int opcode; + + switch(inst->U.I.Opcode) { + case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; + case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break; + case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; + case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; + default: + error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name); + return 0; + } + + if (inst->U.I.Opcode == RC_OPCODE_KIL) { + unit = 0; + dest = 0; + } else { + use_temporary(code, dest); + } + + use_temporary(code, inst->U.I.SrcReg[0].Index); + + code->tex.inst[code->tex.length++] = + (inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) | + (dest << R300_DST_ADDR_SHIFT) | + (unit << R300_TEX_ID_SHIFT) | + (opcode << R300_TEX_INST_SHIFT); + return 1; +} + + +/** + * Final compilation step: Turn the intermediate radeon_program into + * machine-readable instructions. + */ +void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) +{ + struct r300_emit_state emit; + struct r300_fragment_program_code *code = &compiler->code->code.r300; + + memset(&emit, 0, sizeof(emit)); + emit.compiler = compiler; + + memset(code, 0, sizeof(struct r300_fragment_program_code)); + + for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; + inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; + inst = inst->Next) { + if (inst->Type == RC_INSTRUCTION_NORMAL) { + if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { + begin_tex(&emit); + continue; + } + + emit_tex(&emit, inst); + } else { + emit_alu(&emit, &inst->U.P); + } + } + + if (code->pixsize >= compiler->Base.max_temp_regs) + rc_error(&compiler->Base, "Too many hardware temporaries used.\n"); + + if (compiler->Base.Error) + return; + + /* Finish the program */ + finish_node(&emit); + + code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */ + code->code_offset = + (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) | + ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) | + (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) | + ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT); + + if (emit.current_node < 3) { + int shift = 3 - emit.current_node; + int i; + for(i = emit.current_node; i >= 0; --i) + code->code_addr[shift + i] = code->code_addr[i]; + for(i = 0; i < shift; ++i) + code->code_addr[i] = 0; + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c new file mode 100644 index 0000000000..5d5de2f1b2 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c @@ -0,0 +1,228 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * Utilities to deal with the somewhat odd restriction on R300 fragment + * program swizzles. + */ + +#include "r300_fragprog_swizzle.h" + +#include <stdio.h> + +#include "../r300_reg.h" +#include "radeon_compiler.h" + +#define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO)) + +struct swizzle_data { + unsigned int hash; /**< swizzle value this matches */ + unsigned int base; /**< base value for hw swizzle */ + unsigned int stride; /**< difference in base between arg0/1/2 */ +}; + +static const struct swizzle_data native_swizzles[] = { + {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4}, + {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4}, + {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4}, + {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4}, + {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1}, + {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1}, + {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1}, + {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1}, + {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0}, + {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0}, + {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0} +}; + +static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]); + + +/** + * Find a native RGB swizzle that matches the given swizzle. + * Returns 0 if none found. + */ +static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle) +{ + int i, comp; + + for(i = 0; i < num_native_swizzles; ++i) { + const struct swizzle_data* sd = &native_swizzles[i]; + for(comp = 0; comp < 3; ++comp) { + unsigned int swz = GET_SWZ(swizzle, comp); + if (swz == RC_SWIZZLE_UNUSED) + continue; + if (swz != GET_SWZ(sd->hash, comp)) + break; + } + if (comp == 3) + return sd; + } + + return 0; +} + + +/** + * Check whether the given instruction supports the swizzle and negate + * combinations in the given source register. + */ +static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) +{ + if (reg.Abs) + reg.Negate = RC_MASK_NONE; + + if (opcode == RC_OPCODE_KIL || + opcode == RC_OPCODE_TEX || + opcode == RC_OPCODE_TXB || + opcode == RC_OPCODE_TXP) { + int j; + + if (reg.Abs || reg.Negate) + return 0; + + for(j = 0; j < 4; ++j) { + unsigned int swz = GET_SWZ(reg.Swizzle, j); + if (swz == RC_SWIZZLE_UNUSED) + continue; + if (swz != j) + return 0; + } + + return 1; + } + + unsigned int relevant = 0; + int j; + + for(j = 0; j < 3; ++j) + if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED) + relevant |= 1 << j; + + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) + return 0; + + if (!lookup_native_swizzle(reg.Swizzle)) + return 0; + + return 1; +} + + +static void r300_swizzle_split( + struct rc_src_register src, unsigned int mask, + struct rc_swizzle_split * split) +{ + if (src.Abs) + src.Negate = RC_MASK_NONE; + + split->NumPhases = 0; + + while(mask) { + const struct swizzle_data *best_swizzle = 0; + unsigned int best_matchcount = 0; + unsigned int best_matchmask = 0; + int i, comp; + + for(i = 0; i < num_native_swizzles; ++i) { + const struct swizzle_data *sd = &native_swizzles[i]; + unsigned int matchcount = 0; + unsigned int matchmask = 0; + for(comp = 0; comp < 3; ++comp) { + if (!GET_BIT(mask, comp)) + continue; + unsigned int swz = GET_SWZ(src.Swizzle, comp); + if (swz == RC_SWIZZLE_UNUSED) + continue; + if (swz == GET_SWZ(sd->hash, comp)) { + /* check if the negate bit of current component + * is the same for already matched components */ + if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp)))) + continue; + + matchcount++; + matchmask |= 1 << comp; + } + } + if (matchcount > best_matchcount) { + best_swizzle = sd; + best_matchcount = matchcount; + best_matchmask = matchmask; + if (matchmask == (mask & RC_MASK_XYZ)) + break; + } + } + + if (mask & RC_MASK_W) + best_matchmask |= RC_MASK_W; + + split->Phase[split->NumPhases++] = best_matchmask; + mask &= ~best_matchmask; + } +} + +struct rc_swizzle_caps r300_swizzle_caps = { + .IsNative = r300_swizzle_is_native, + .Split = r300_swizzle_split +}; + + +/** + * Translate an RGB (XYZ) swizzle into the hardware code for the given + * instruction source. + */ +unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle) +{ + const struct swizzle_data* sd = lookup_native_swizzle(swizzle); + + if (!sd) { + fprintf(stderr, "Not a native swizzle: %08x\n", swizzle); + return 0; + } + + return sd->base + src*sd->stride; +} + + +/** + * Translate an Alpha (W) swizzle into the hardware code for the given + * instruction source. + */ +unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle) +{ + if (swizzle < 3) + return swizzle + 3*src; + + switch(swizzle) { + case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; + case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE; + case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; + case RC_SWIZZLE_HALF: return R300_ALU_ARGA_HALF; + default: return R300_ALU_ARGA_ONE; + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h new file mode 100644 index 0000000000..118476af13 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __R300_FRAGPROG_SWIZZLE_H_ +#define __R300_FRAGPROG_SWIZZLE_H_ + +#include "radeon_swizzle.h" + +extern struct rc_swizzle_caps r300_swizzle_caps; + +unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle); +unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle); + +#endif /* __R300_FRAGPROG_SWIZZLE_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c new file mode 100644 index 0000000000..38312658d6 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -0,0 +1,209 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include <stdio.h> + +#include "radeon_dataflow.h" +#include "radeon_emulate_branches.h" +#include "radeon_emulate_loops.h" +#include "radeon_program_alu.h" +#include "radeon_program_tex.h" +#include "r300_fragprog.h" +#include "r300_fragprog_swizzle.h" +#include "r500_fragprog.h" + + +static void dataflow_outputs_mark_use(void * userdata, void * data, + void (*callback)(void *, unsigned int, unsigned int)) +{ + struct r300_fragment_program_compiler * c = userdata; + callback(data, c->OutputColor[0], RC_MASK_XYZW); + callback(data, c->OutputColor[1], RC_MASK_XYZW); + callback(data, c->OutputColor[2], RC_MASK_XYZW); + callback(data, c->OutputColor[3], RC_MASK_XYZW); + callback(data, c->OutputDepth, RC_MASK_W); +} + +static void rewrite_depth_out(struct r300_fragment_program_compiler * c) +{ + struct rc_instruction *rci; + + for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) { + struct rc_sub_instruction * inst = &rci->U.I; + + if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth) + continue; + + if (inst->DstReg.WriteMask & RC_MASK_Z) { + inst->DstReg.WriteMask = RC_MASK_W; + } else { + inst->DstReg.WriteMask = 0; + continue; + } + + switch (inst->Opcode) { + case RC_OPCODE_FRC: + case RC_OPCODE_MOV: + inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]); + break; + case RC_OPCODE_ADD: + case RC_OPCODE_MAX: + case RC_OPCODE_MIN: + case RC_OPCODE_MUL: + inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]); + break; + case RC_OPCODE_CMP: + case RC_OPCODE_MAD: + inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]); + inst->SrcReg[2] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[2]); + break; + default: + // Scalar instructions needn't be reswizzled + break; + } + } +} + +static void debug_program_log(struct r300_fragment_program_compiler* c, const char * where) +{ + if (c->Base.Debug) { + fprintf(stderr, "Fragment Program: %s\n", where); + rc_print_program(&c->Base.Program); + } +} + +void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) +{ + rewrite_depth_out(c); + + debug_program_log(c, "before compilation"); + + /* XXX Ideally this should be done only for r3xx, but since + * we don't have branching support for r5xx, we use the emulation + * on all chipsets. */ + + if(c->Base.is_r500){ + rc_emulate_loops(&c->Base, R500_PFS_MAX_INST); + } + else{ + rc_emulate_loops(&c->Base, R300_PFS_MAX_ALU_INST); + } + debug_program_log(c, "after emulate loops"); + + rc_emulate_branches(&c->Base); + + debug_program_log(c, "after emulate branches"); + + if (c->Base.is_r500) { + struct radeon_program_transformation transformations[] = { + { &r500_transform_IF, 0 }, + { &radeonTransformALU, 0 }, + { &radeonTransformDeriv, 0 }, + { &radeonTransformTrigScale, 0 } + }; + radeonLocalTransform(&c->Base, 4, transformations); + + debug_program_log(c, "after native rewrite part 1"); + + c->Base.SwizzleCaps = &r500_swizzle_caps; + } else { + struct radeon_program_transformation transformations[] = { + { &radeonTransformALU, 0 }, + { &radeonTransformTrigSimple, 0 } + }; + radeonLocalTransform(&c->Base, 2, transformations); + + debug_program_log(c, "after native rewrite part 1"); + + c->Base.SwizzleCaps = &r300_swizzle_caps; + } + + /* Run the common transformations too. + * Remember, lowering comes last! */ + struct radeon_program_transformation common_transformations[] = { + { &radeonTransformTEX, c }, + }; + radeonLocalTransform(&c->Base, 1, common_transformations); + + common_transformations[0].function = &radeonTransformALU; + radeonLocalTransform(&c->Base, 1, common_transformations); + + if (c->Base.Error) + return; + + debug_program_log(c, "after native rewrite part 2"); + + rc_dataflow_deadcode(&c->Base, &dataflow_outputs_mark_use, c); + if (c->Base.Error) + return; + + debug_program_log(c, "after deadcode"); + + rc_optimize(&c->Base); + + debug_program_log(c, "after dataflow optimize"); + + rc_dataflow_swizzles(&c->Base); + if (c->Base.Error) + return; + + debug_program_log(c, "after dataflow passes"); + + rc_pair_translate(c); + if (c->Base.Error) + return; + + debug_program_log(c, "after pair translate"); + + rc_pair_schedule(c); + if (c->Base.Error) + return; + + debug_program_log(c, "after pair scheduling"); + + rc_pair_regalloc(c, c->Base.max_temp_regs); + + if (c->Base.Error) + return; + + debug_program_log(c, "after register allocation"); + + if (c->Base.is_r500) { + r500BuildFragmentProgramHwCode(c); + } else { + r300BuildFragmentProgramHwCode(c); + } + + rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); + + if (c->Base.Debug) { + if (c->Base.is_r500) { + r500FragmentProgramDump(c->code); + } else { + r300FragmentProgramDump(c->code); + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c new file mode 100644 index 0000000000..507b2e532f --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -0,0 +1,651 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include <stdio.h> + +#include "../r300_reg.h" + +#include "radeon_dataflow.h" +#include "radeon_program_alu.h" +#include "radeon_swizzle.h" +#include "radeon_emulate_branches.h" + +/* + * Take an already-setup and valid source then swizzle it appropriately to + * obtain a constant ZERO or ONE source. + */ +#define __CONST(x, y) \ + (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_src_class(vpi->SrcReg[x].File), \ + RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4)) + + +static unsigned long t_dst_mask(unsigned int mask) +{ + /* RC_MASK_* is equivalent to VSF_FLAG_* */ + return mask & RC_MASK_XYZW; +} + +static unsigned long t_dst_class(rc_register_file file) +{ + switch (file) { + default: + fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file); + /* fall-through */ + case RC_FILE_TEMPORARY: + return PVS_DST_REG_TEMPORARY; + case RC_FILE_OUTPUT: + return PVS_DST_REG_OUT; + case RC_FILE_ADDRESS: + return PVS_DST_REG_A0; + } +} + +static unsigned long t_dst_index(struct r300_vertex_program_code *vp, + struct rc_dst_register *dst) +{ + if (dst->File == RC_FILE_OUTPUT) + return vp->outputs[dst->Index]; + + return dst->Index; +} + +static unsigned long t_src_class(rc_register_file file) +{ + switch (file) { + default: + fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file); + /* fall-through */ + case RC_FILE_NONE: + case RC_FILE_TEMPORARY: + return PVS_SRC_REG_TEMPORARY; + case RC_FILE_INPUT: + return PVS_SRC_REG_INPUT; + case RC_FILE_CONSTANT: + return PVS_SRC_REG_CONSTANT; + } +} + +static int t_src_conflict(struct rc_src_register a, struct rc_src_register b) +{ + unsigned long aclass = t_src_class(a.File); + unsigned long bclass = t_src_class(b.File); + + if (aclass != bclass) + return 0; + if (aclass == PVS_SRC_REG_TEMPORARY) + return 0; + + if (a.RelAddr || b.RelAddr) + return 1; + if (a.Index != b.Index) + return 1; + + return 0; +} + +static inline unsigned long t_swizzle(unsigned int swizzle) +{ + /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ + return swizzle; +} + +static unsigned long t_src_index(struct r300_vertex_program_code *vp, + struct rc_src_register *src) +{ + if (src->File == RC_FILE_INPUT) { + assert(vp->inputs[src->Index] != -1); + return vp->inputs[src->Index]; + } else { + if (src->Index < 0) { + fprintf(stderr, + "negative offsets for indirect addressing do not work.\n"); + return 0; + } + return src->Index; + } +} + +/* these two functions should probably be merged... */ + +static unsigned long t_src(struct r300_vertex_program_code *vp, + struct rc_src_register *src) +{ + /* src->Negate uses the RC_MASK_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 1)), + t_swizzle(GET_SWZ(src->Swizzle, 2)), + t_swizzle(GET_SWZ(src->Swizzle, 3)), + t_src_class(src->File), + src->Negate) | (src->RelAddr << 4); +} + +static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, + struct rc_src_register *src) +{ + /* src->Negate uses the RC_MASK_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_src_class(src->File), + src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) | + (src->RelAddr << 4); +} + +static int valid_dst(struct r300_vertex_program_code *vp, + struct rc_dst_register *dst) +{ + if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) { + return 0; + } else if (dst->File == RC_FILE_ADDRESS) { + assert(dst->Index == 0); + } + + return 1; +} + +static void ei_vector1(struct r300_vertex_program_code *vp, + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + 0, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = __CONST(0, RC_SWIZZLE_ZERO); +} + +static void ei_vector2(struct r300_vertex_program_code *vp, + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + 0, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = t_src(vp, &vpi->SrcReg[1]); + inst[3] = __CONST(1, RC_SWIZZLE_ZERO); +} + +static void ei_math1(struct r300_vertex_program_code *vp, + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(hw_opcode, + 1, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = __CONST(0, RC_SWIZZLE_ZERO); +} + +static void ei_lit(struct r300_vertex_program_code *vp, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} + + inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, + 1, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + /* NOTE: Users swizzling might not work. */ + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | + (vpi->SrcReg[0].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | + (vpi->SrcReg[0].RelAddr << 4); + inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W + t_src_class(vpi->SrcReg[0].File), + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | + (vpi->SrcReg[0].RelAddr << 4); +} + +static void ei_mad(struct r300_vertex_program_code *vp, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + /* Remarks about hardware limitations of MAD + * (please preserve this comment, as this information is _NOT_ + * in the documentation provided by AMD). + * + * As described in the documentation, MAD with three unique temporary + * source registers requires the use of the macro version. + * + * However (and this is not mentioned in the documentation), apparently + * the macro version is _NOT_ a full superset of the normal version. + * In particular, the macro version does not always work when relative + * addressing is used in the source operands. + * + * This limitation caused incorrect rendering in Sauerbraten's OpenGL + * assembly shader path when using medium quality animations + * (i.e. animations with matrix blending instead of quaternion blending). + * + * Unfortunately, I (nha) have been unable to extract a Piglit regression + * test for this issue - for some reason, it is possible to have vertex + * programs whose prefix is *exactly* the same as the prefix of the + * offending program in Sauerbraten up to the offending instruction + * without causing any trouble. + * + * Bottom line: Only use the macro version only when really necessary; + * according to AMD docs, this should improve performance by one clock + * as a nice side bonus. + */ + if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY && + vpi->SrcReg[1].File == RC_FILE_TEMPORARY && + vpi->SrcReg[2].File == RC_FILE_TEMPORARY && + vpi->SrcReg[0].Index != vpi->SrcReg[1].Index && + vpi->SrcReg[0].Index != vpi->SrcReg[2].Index && + vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) { + inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, + 0, + 1, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + } else { + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + 0, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + } + inst[1] = t_src(vp, &vpi->SrcReg[0]); + inst[2] = t_src(vp, &vpi->SrcReg[1]); + inst[3] = t_src(vp, &vpi->SrcReg[2]); +} + +static void ei_pow(struct r300_vertex_program_code *vp, + struct rc_sub_instruction *vpi, + unsigned int * inst) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, + 1, + 0, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); +} + + +static void translate_vertex_program(struct r300_vertex_program_compiler * compiler) +{ + struct rc_instruction *rci; + + compiler->code->pos_end = 0; /* Not supported yet */ + compiler->code->length = 0; + + compiler->SetHwInputOutput(compiler); + + for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) { + struct rc_sub_instruction *vpi = &rci->U.I; + unsigned int *inst = compiler->code->body.d + compiler->code->length; + + /* Skip instructions writing to non-existing destination */ + if (!valid_dst(compiler->code, &vpi->DstReg)) + continue; + + if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) { + rc_error(&compiler->Base, "Vertex program has too many instructions\n"); + return; + } + + assert(compiler->Base.is_r500 || + (vpi->Opcode != RC_OPCODE_SEQ && + vpi->Opcode != RC_OPCODE_SNE)); + + switch (vpi->Opcode) { + case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; + case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break; + case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break; + case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; + case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; + case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; + case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break; + case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; + case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break; + case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break; + case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break; + case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break; + case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break; + case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break; + case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break; + case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break; + case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break; + case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break; + case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break; + case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break; + case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break; + case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break; + case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; + case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break; + default: + rc_error(&compiler->Base, "Unknown opcode %s\n", rc_get_opcode_info(vpi->Opcode)->Name); + return; + } + + compiler->code->length += 4; + + if (compiler->Base.Error) + return; + } +} + +struct temporary_allocation { + unsigned int Allocated:1; + unsigned int HwTemp:15; + struct rc_instruction * LastRead; +}; + +static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler) +{ + struct rc_instruction *inst; + unsigned int num_orig_temps = 0; + char hwtemps[VSF_MAX_FRAGMENT_TEMPS]; + struct temporary_allocation * ta; + unsigned int i, j; + + compiler->code->num_temporaries = 0; + memset(hwtemps, 0, sizeof(hwtemps)); + + /* Pass 1: Count original temporaries and allocate structures */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + if (inst->U.I.SrcReg[i].Index >= num_orig_temps) + num_orig_temps = inst->U.I.SrcReg[i].Index + 1; + } + } + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { + if (inst->U.I.DstReg.Index >= num_orig_temps) + num_orig_temps = inst->U.I.DstReg.Index + 1; + } + } + } + + ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool, + sizeof(struct temporary_allocation) * num_orig_temps); + memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps); + + /* Pass 2: Determine original temporary lifetimes */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) + ta[inst->U.I.SrcReg[i].Index].LastRead = inst; + } + } + + /* Pass 3: Register allocation */ + for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + unsigned int orig = inst->U.I.SrcReg[i].Index; + inst->U.I.SrcReg[i].Index = ta[orig].HwTemp; + + if (ta[orig].Allocated && inst == ta[orig].LastRead) + hwtemps[ta[orig].HwTemp] = 0; + } + } + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { + unsigned int orig = inst->U.I.DstReg.Index; + + if (!ta[orig].Allocated) { + for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) { + if (!hwtemps[j]) + break; + } + if (j >= VSF_MAX_FRAGMENT_TEMPS) { + fprintf(stderr, "Out of hw temporaries\n"); + } else { + ta[orig].Allocated = 1; + ta[orig].HwTemp = j; + hwtemps[j] = 1; + + if (j >= compiler->code->num_temporaries) + compiler->code->num_temporaries = j + 1; + } + } + + inst->U.I.DstReg.Index = ta[orig].HwTemp; + } + } + } +} + + +/** + * Vertex engine cannot read two inputs or two constants at the same time. + * Introduce intermediate MOVs to temporary registers to account for this. + */ +static int transform_source_conflicts( + struct radeon_compiler *c, + struct rc_instruction* inst, + void* unused) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->NumSrcRegs == 3) { + if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]) + || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) { + int tmpreg = rc_find_free_temporary(c); + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = tmpreg; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + + reset_srcreg(&inst->U.I.SrcReg[2]); + inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[2].Index = tmpreg; + } + } + + if (opcode->NumSrcRegs >= 2) { + if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) { + int tmpreg = rc_find_free_temporary(c); + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = tmpreg; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + + reset_srcreg(&inst->U.I.SrcReg[1]); + inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[1].Index = tmpreg; + } + } + + return 1; +} + +static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler) +{ + int i; + + for(i = 0; i < 32; ++i) { + if ((compiler->RequiredOutputs & (1 << i)) && + !(compiler->Base.Program.OutputsWritten & (1 << i))) { + struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_MOV; + + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = i; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + + inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT; + inst->U.I.SrcReg[0].Index = 0; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + + compiler->Base.Program.OutputsWritten |= 1 << i; + } + } +} + +static void dataflow_outputs_mark_used(void * userdata, void * data, + void (*callback)(void *, unsigned int, unsigned int)) +{ + struct r300_vertex_program_compiler * c = userdata; + int i; + + for(i = 0; i < 32; ++i) { + if (c->RequiredOutputs & (1 << i)) + callback(data, i, RC_MASK_XYZW); + } +} + +static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) +{ + (void) opcode; + (void) reg; + + return 1; +} + +static void debug_program_log(struct r300_vertex_program_compiler* c, const char * where) +{ + if (c->Base.Debug) { + fprintf(stderr, "Vertex Program: %s\n", where); + rc_print_program(&c->Base.Program); + } +} + + +static struct rc_swizzle_caps r300_vertprog_swizzle_caps = { + .IsNative = &swizzle_is_native, + .Split = 0 /* should never be called */ +}; + + +void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) +{ + compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; + + addArtificialOutputs(compiler); + + debug_program_log(compiler, "before compilation"); + + /* XXX Ideally this should be done only for r3xx, but since + * we don't have branching support for r5xx, we use the emulation + * on all chipsets. */ + rc_emulate_branches(&compiler->Base); + + debug_program_log(compiler, "after emulate branches"); + + { + struct radeon_program_transformation transformations[] = { + { &r300_transform_vertex_alu, 0 }, + { &r300_transform_trig_scale_vertex, 0 } + }; + radeonLocalTransform(&compiler->Base, 2, transformations); + } + + debug_program_log(compiler, "after native rewrite"); + + { + /* Note: This pass has to be done seperately from ALU rewrite, + * otherwise non-native ALU instructions with source conflits + * will not be treated properly. + */ + struct radeon_program_transformation transformations[] = { + { &transform_source_conflicts, 0 }, + }; + radeonLocalTransform(&compiler->Base, 1, transformations); + } + + debug_program_log(compiler, "after source conflict resolve"); + + rc_dataflow_deadcode(&compiler->Base, &dataflow_outputs_mark_used, compiler); + + debug_program_log(compiler, "after deadcode"); + + rc_dataflow_swizzles(&compiler->Base); + + allocate_temporary_registers(compiler); + + debug_program_log(compiler, "after dataflow"); + + translate_vertex_program(compiler); + + rc_constants_copy(&compiler->code->constants, &compiler->Base.Program.Constants); + + compiler->code->InputsRead = compiler->Base.Program.InputsRead; + compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten; + + if (compiler->Base.Debug) { + fprintf(stderr, "Final vertex program code:\n"); + r300_vertex_program_dump(compiler->code); + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c new file mode 100644 index 0000000000..5800f1a78e --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c @@ -0,0 +1,180 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_code.h" + +#include <stdio.h> + +static char* r300_vs_ve_ops[] = { + /* R300 vector ops */ + " VE_NO_OP", + " VE_DOT_PRODUCT", + " VE_MULTIPLY", + " VE_ADD", + " VE_MULTIPLY_ADD", + " VE_DISTANCE_FACTOR", + " VE_FRACTION", + " VE_MAXIMUM", + " VE_MINIMUM", + "VE_SET_GREATER_THAN_EQUAL", + " VE_SET_LESS_THAN", + " VE_MULTIPLYX2_ADD", + " VE_MULTIPLY_CLAMP", + " VE_FLT2FIX_DX", + " VE_FLT2FIX_DX_RND", + /* R500 vector ops */ + " VE_PRED_SET_EQ_PUSH", + " VE_PRED_SET_GT_PUSH", + " VE_PRED_SET_GTE_PUSH", + " VE_PRED_SET_NEQ_PUSH", + " VE_COND_WRITE_EQ", + " VE_COND_WRITE_GT", + " VE_COND_WRITE_GTE", + " VE_COND_WRITE_NEQ", + " VE_COND_MUX_EQ", + " VE_COND_MUX_GT", + " VE_COND_MUX_GTE", + " VE_SET_GREATER_THAN", + " VE_SET_EQUAL", + " VE_SET_NOT_EQUAL", + " (reserved)", + " (reserved)", + " (reserved)", +}; + +static char* r300_vs_me_ops[] = { + /* R300 math ops */ + " ME_NO_OP", + " ME_EXP_BASE2_DX", + " ME_LOG_BASE2_DX", + " ME_EXP_BASEE_FF", + " ME_LIGHT_COEFF_DX", + " ME_POWER_FUNC_FF", + " ME_RECIP_DX", + " ME_RECIP_FF", + " ME_RECIP_SQRT_DX", + " ME_RECIP_SQRT_FF", + " ME_MULTIPLY", + " ME_EXP_BASE2_FULL_DX", + " ME_LOG_BASE2_FULL_DX", + " ME_POWER_FUNC_FF_CLAMP_B", + "ME_POWER_FUNC_FF_CLAMP_B1", + "ME_POWER_FUNC_FF_CLAMP_01", + " ME_SIN", + " ME_COS", + /* R500 math ops */ + " ME_LOG_BASE2_IEEE", + " ME_RECIP_IEEE", + " ME_RECIP_SQRT_IEEE", + " ME_PRED_SET_EQ", + " ME_PRED_SET_GT", + " ME_PRED_SET_GTE", + " ME_PRED_SET_NEQ", + " ME_PRED_SET_CLR", + " ME_PRED_SET_INV", + " ME_PRED_SET_POP", + " ME_PRED_SET_RESTORE", + " (reserved)", + " (reserved)", + " (reserved)", +}; + +/* XXX refactor to avoid clashing symbols */ +static char* r300_vs_src_debug[] = { + "t", + "i", + "c", + "a", +}; + +static char* r300_vs_dst_debug[] = { + "t", + "a0", + "o", + "ox", + "a", + "i", + "u", + "u", +}; + +static char* r300_vs_swiz_debug[] = { + "X", + "Y", + "Z", + "W", + "0", + "1", + "U", + "U", +}; + + +static void r300_vs_op_dump(uint32_t op) +{ + fprintf(stderr, " dst: %d%s op: ", + (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]); + if (op & 0x80) { + if (op & 0x1) { + fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n"); + } else { + fprintf(stderr, " PVS_MACRO_OP_2CLK_MADD\n"); + } + } else if (op & 0x40) { + fprintf(stderr, "%s\n", r300_vs_me_ops[op & 0x1f]); + } else { + fprintf(stderr, "%s\n", r300_vs_ve_ops[op & 0x1f]); + } +} + +static void r300_vs_src_dump(uint32_t src) +{ + fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n", + (src >> 5) & 0xff, r300_vs_src_debug[src & 0x3], + src & (1 << 25) ? "-" : " ", + r300_vs_swiz_debug[(src >> 13) & 0x7], + src & (1 << 26) ? "-" : " ", + r300_vs_swiz_debug[(src >> 16) & 0x7], + src & (1 << 27) ? "-" : " ", + r300_vs_swiz_debug[(src >> 19) & 0x7], + src & (1 << 28) ? "-" : " ", + r300_vs_swiz_debug[(src >> 22) & 0x7]); +} + +void r300_vertex_program_dump(struct r300_vertex_program_code * vs) +{ + unsigned instrcount = vs->length / 4; + unsigned i; + + for(i = 0; i < instrcount; i++) { + unsigned offset = i*4; + unsigned src; + + fprintf(stderr, "%d: op: 0x%08x", i, vs->body.d[offset]); + r300_vs_op_dump(vs->body.d[offset]); + + for(src = 0; src < 3; ++src) { + fprintf(stderr, " src%i: 0x%08x", src, vs->body.d[offset+1+src]); + r300_vs_src_dump(vs->body.d[offset+1+src]); + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c new file mode 100644 index 0000000000..632f0bcf4f --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -0,0 +1,344 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "r500_fragprog.h" + +#include <stdio.h> + +#include "../r300_reg.h" + +/** + * Rewrite IF instructions to use the ALU result special register. + */ +int r500_transform_IF( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data) +{ + if (inst->U.I.Opcode != RC_OPCODE_IF) + return 0; + + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.WriteMask = 0; + inst_mov->U.I.WriteALUResult = RC_ALURESULT_W; + inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(inst_mov->U.I.SrcReg[0].Swizzle, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_X); + + inst->U.I.SrcReg[0].File = RC_FILE_SPECIAL; + inst->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[0].Negate = 0; + + return 1; +} + +static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) +{ + unsigned int relevant; + int i; + + if (opcode == RC_OPCODE_TEX || + opcode == RC_OPCODE_TXB || + opcode == RC_OPCODE_TXP || + opcode == RC_OPCODE_KIL) { + if (reg.Abs) + return 0; + + if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE)) + return 0; + + if (reg.Negate) + reg.Negate ^= RC_MASK_XYZW; + + for(i = 0; i < 4; ++i) { + unsigned int swz = GET_SWZ(reg.Swizzle, i); + if (swz == RC_SWIZZLE_UNUSED) { + reg.Negate &= ~(1 << i); + continue; + } + if (swz >= 4) + return 0; + } + + if (reg.Negate) + return 0; + + return 1; + } else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) { + /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles; + * if it doesn't fit perfectly into a .xyzw case... */ + if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate) + return 1; + + return 0; + } else { + /* ALU instructions support almost everything */ + if (reg.Abs) + return 1; + + relevant = 0; + for(i = 0; i < 3; ++i) { + unsigned int swz = GET_SWZ(reg.Swizzle, i); + if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO) + relevant |= 1 << i; + } + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) + return 0; + + return 1; + } +} + +/** + * Split source register access. + * + * The only thing we *cannot* do in an ALU instruction is per-component + * negation. + */ +static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask, + struct rc_swizzle_split * split) +{ + unsigned int negatebase[2] = { 0, 0 }; + int i; + + for(i = 0; i < 4; ++i) { + unsigned int swz = GET_SWZ(src.Swizzle, i); + if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i)) + continue; + negatebase[GET_BIT(src.Negate, i)] |= 1 << i; + } + + split->NumPhases = 0; + + for(i = 0; i <= 1; ++i) { + if (!negatebase[i]) + continue; + + split->Phase[split->NumPhases++] = negatebase[i]; + } +} + +struct rc_swizzle_caps r500_swizzle_caps = { + .IsNative = r500_swizzle_is_native, + .Split = r500_swizzle_split +}; + +static char *toswiz(int swiz_val) { + switch(swiz_val) { + case 0: return "R"; + case 1: return "G"; + case 2: return "B"; + case 3: return "A"; + case 4: return "0"; + case 5: return "H"; + case 6: return "1"; + case 7: return "U"; + } + return NULL; +} + +static char *toop(int op_val) +{ + char *str = NULL; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP3"; break; + case 2: str = "DP4"; break; + case 3: str = "D2A"; break; + case 4: str = "MIN"; break; + case 5: str = "MAX"; break; + case 6: str = "Reserved"; break; + case 7: str = "CND"; break; + case 8: str = "CMP"; break; + case 9: str = "FRC"; break; + case 10: str = "SOP"; break; + case 11: str = "MDH"; break; + case 12: str = "MDV"; break; + } + return str; +} + +static char *to_alpha_op(int op_val) +{ + char *str = NULL; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP"; break; + case 2: str = "MIN"; break; + case 3: str = "MAX"; break; + case 4: str = "Reserved"; break; + case 5: str = "CND"; break; + case 6: str = "CMP"; break; + case 7: str = "FRC"; break; + case 8: str = "EX2"; break; + case 9: str = "LN2"; break; + case 10: str = "RCP"; break; + case 11: str = "RSQ"; break; + case 12: str = "SIN"; break; + case 13: str = "COS"; break; + case 14: str = "MDH"; break; + case 15: str = "MDV"; break; + } + return str; +} + +static char *to_mask(int val) +{ + char *str = NULL; + switch(val) { + case 0: str = "NONE"; break; + case 1: str = "R"; break; + case 2: str = "G"; break; + case 3: str = "RG"; break; + case 4: str = "B"; break; + case 5: str = "RB"; break; + case 6: str = "GB"; break; + case 7: str = "RGB"; break; + case 8: str = "A"; break; + case 9: str = "AR"; break; + case 10: str = "AG"; break; + case 11: str = "ARG"; break; + case 12: str = "AB"; break; + case 13: str = "ARB"; break; + case 14: str = "AGB"; break; + case 15: str = "ARGB"; break; + } + return str; +} + +static char *to_texop(int val) +{ + switch(val) { + case 0: return "NOP"; + case 1: return "LD"; + case 2: return "TEXKILL"; + case 3: return "PROJ"; + case 4: return "LODBIAS"; + case 5: return "LOD"; + case 6: return "DXDY"; + } + return NULL; +} + +void r500FragmentProgramDump(struct rX00_fragment_program_code *c) +{ + struct r500_fragment_program_code *code = &c->code.r500; + fprintf(stderr, "R500 Fragment Program:\n--------\n"); + + int n; + uint32_t inst; + uint32_t inst0; + char *str = NULL; + + for (n = 0; n < code->inst_end+1; n++) { + inst0 = inst = code->inst[n].inst0; + fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst); + switch(inst & 0x3) { + case R500_INST_TYPE_ALU: str = "ALU"; break; + case R500_INST_TYPE_OUT: str = "OUT"; break; + case R500_INST_TYPE_FC: str = "FC"; break; + case R500_INST_TYPE_TEX: str = "TEX"; break; + }; + fprintf(stderr,"%s %s %s %s %s ", str, + inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "", + inst & R500_INST_LAST ? "LAST" : "", + inst & R500_INST_NOP ? "NOP" : "", + inst & R500_INST_ALU_WAIT ? "ALU WAIT" : ""); + fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf), + to_mask((inst >> 15) & 0xf)); + + switch(inst0 & 0x3) { + case 0: + case 1: + fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1); + inst = code->inst[n].inst1; + + fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1<<8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', + (inst >> 30)); + + fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2); + inst = code->inst[n].inst2; + fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1<<8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', + (inst >> 30)); + fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3); + inst = code->inst[n].inst3; + fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n", + (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), + (inst >> 11) & 0x3, + (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), + (inst >> 24) & 0x3, (inst >> 29) & 0x3); + + + fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4); + inst = code->inst[n].inst4; + fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", to_alpha_op(inst & 0xf), + (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, + (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3, + (inst >> 29) & 0x3, + (inst >> 31) & 0x1); + + fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5); + inst = code->inst[n].inst5; + fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf), + (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7), + (inst >> 23) & 0x3, + (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3); + break; + case 2: + break; + case 3: + inst = code->inst[n].inst1; + fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf, + to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "", + (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED"); + inst = code->inst[n].inst2; + fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst, + inst & 127, inst & (1<<7) ? "(rel)" : "", + toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3), + toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3), + (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "", + toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3), + toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3)); + + fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3); + break; + } + fprintf(stderr,"\n"); + } + +} diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h new file mode 100644 index 0000000000..4efbae7ba6 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs <darktama@iinet.net.au> + * Jerome Glisse <j.glisse@gmail.com> + */ +#ifndef __R500_FRAGPROG_H_ +#define __R500_FRAGPROG_H_ + +#include "radeon_compiler.h" +#include "radeon_swizzle.h" + +extern void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); + +extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c); + +extern struct rc_swizzle_caps r500_swizzle_caps; + +extern int r500_transform_IF( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data); + +#endif diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c new file mode 100644 index 0000000000..fb2d8b5a9c --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -0,0 +1,495 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * \author Ben Skeggs <darktama@iinet.net.au> + * + * \author Jerome Glisse <j.glisse@gmail.com> + * + * \author Corbin Simpson <MostAwesomeDude@gmail.com> + * + */ + +#include "r500_fragprog.h" + +#include "../r300_reg.h" + +#include "radeon_program_pair.h" + + +#define PROG_CODE \ + struct r500_fragment_program_code *code = &c->code->code.r500 + +#define error(fmt, args...) do { \ + rc_error(&c->Base, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + } while(0) + + +struct branch_info { + int If; + int Else; + int Endif; +}; + +struct emit_state { + struct radeon_compiler * C; + struct r500_fragment_program_code * Code; + + struct branch_info * Branches; + unsigned int CurrentBranchDepth; + unsigned int BranchesReserved; + + unsigned int MaxBranchDepth; +}; + +static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) +{ + switch(opcode) { + case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; + case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; + case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; + case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; + case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; + case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; + default: + error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); + /* fall through */ + case RC_OPCODE_NOP: + /* fall through */ + case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; + case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; + case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; + case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; + } +} + +static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) +{ + switch(opcode) { + case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP; + case RC_OPCODE_COS: return R500_ALPHA_OP_COS; + case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH; + case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV; + case RC_OPCODE_DP3: return R500_ALPHA_OP_DP; + case RC_OPCODE_DP4: return R500_ALPHA_OP_DP; + case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2; + case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC; + case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2; + default: + error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); + /* fall through */ + case RC_OPCODE_NOP: + /* fall through */ + case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD; + case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX; + case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN; + case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP; + case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ; + case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN; + } +} + +static unsigned int fix_hw_swizzle(unsigned int swz) +{ + switch (swz) { + case RC_SWIZZLE_ZERO: + case RC_SWIZZLE_UNUSED: + swz = 4; + break; + case RC_SWIZZLE_HALF: + swz = 5; + break; + case RC_SWIZZLE_ONE: + swz = 6; + break; + } + + return swz; +} + +static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg) +{ + unsigned int t = inst->RGB.Arg[arg].Source; + int comp; + t |= inst->RGB.Arg[arg].Negate << 11; + t |= inst->RGB.Arg[arg].Abs << 12; + + for(comp = 0; comp < 3; ++comp) + t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2); + + return t; +} + +static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i) +{ + unsigned int t = inst->Alpha.Arg[i].Source; + t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2; + t |= inst->Alpha.Arg[i].Negate << 5; + t |= inst->Alpha.Arg[i].Abs << 6; + return t; +} + +static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func) +{ + switch(func) { + case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ; + case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT; + case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE; + case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE; + default: + rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func); + return 0; + } +} + +static void use_temporary(struct r500_fragment_program_code* code, unsigned int index) +{ + if (index > code->max_temp_idx) + code->max_temp_idx = index; +} + +static unsigned int use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src) +{ + if (src.File == RC_FILE_CONSTANT) { + return src.Index | 0x100; + } else if (src.File == RC_FILE_TEMPORARY) { + use_temporary(code, src.Index); + return src.Index; + } + + return 0; +} + +/** + * NOP the specified instruction if it is not a texture lookup. + */ +static void alu_nop(struct r300_fragment_program_compiler *c, int ip) +{ + PROG_CODE; + + if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) { + code->inst[ip].inst0 |= R500_INST_NOP; + } +} + +/** + * Emit a paired ALU instruction. + */ +static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst) +{ + PROG_CODE; + + if (code->inst_end >= 511) { + error("emit_alu: Too many instructions"); + return; + } + + int ip = ++code->inst_end; + + /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */ + if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX || + inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) { + if (ip > 0) { + alu_nop(c, ip - 1); + } + } + + code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode); + code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode); + + if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) { + code->inst[ip].inst0 = R500_INST_TYPE_OUT; + if (inst->WriteALUResult) { + error("%s: cannot write output and ALU result at the same time"); + return; + } + } else { + code->inst[ip].inst0 = R500_INST_TYPE_ALU; + } + code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT; + + code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14); + code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); + if (inst->Alpha.DepthWriteMask) { + code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; + c->code->writes_depth = 1; + } + + code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); + code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex); + use_temporary(code, inst->Alpha.DestIndex); + use_temporary(code, inst->RGB.DestIndex); + + if (inst->RGB.Saturate) + code->inst[ip].inst0 |= R500_INST_RGB_CLAMP; + if (inst->Alpha.Saturate) + code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP; + + code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0])); + code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1])); + code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2])); + + code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0])); + code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1])); + code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2])); + + code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT; + code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT; + code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT; + + code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT; + code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; + code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; + + code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target); + code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target); + + if (inst->WriteALUResult) { + code->inst[ip].inst3 |= R500_ALU_RGB_WMASK; + + if (inst->WriteALUResult == RC_ALURESULT_X) + code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED; + else + code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA; + + code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare); + } +} + +static unsigned int translate_strq_swizzle(unsigned int swizzle) +{ + unsigned int swiz = 0; + int i; + for (i = 0; i < 4; i++) + swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2; + return swiz; +} + +/** + * Emit a single TEX instruction + */ +static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst) +{ + PROG_CODE; + + if (code->inst_end >= 511) { + error("emit_tex: Too many instructions"); + return 0; + } + + int ip = ++code->inst_end; + + code->inst[ip].inst0 = R500_INST_TYPE_TEX + | (inst->DstReg.WriteMask << 11) + | R500_INST_TEX_SEM_WAIT; + code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) + | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; + + if (inst->TexSrcTarget == RC_TEXTURE_RECT) + code->inst[ip].inst1 |= R500_TEX_UNSCALED; + + switch (inst->Opcode) { + case RC_OPCODE_KIL: + code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; + break; + case RC_OPCODE_TEX: + code->inst[ip].inst1 |= R500_TEX_INST_LD; + break; + case RC_OPCODE_TXB: + code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; + break; + case RC_OPCODE_TXP: + code->inst[ip].inst1 |= R500_TEX_INST_PROJ; + break; + default: + error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name); + } + + use_temporary(code, inst->SrcReg[0].Index); + if (inst->Opcode != RC_OPCODE_KIL) + use_temporary(code, inst->DstReg.Index); + + code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) + | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8) + | R500_TEX_DST_ADDR(inst->DstReg.Index) + | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G + | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; + + return 1; +} + +static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst) +{ + if (s->Code->inst_end >= 511) { + rc_error(s->C, "emit_tex: Too many instructions"); + return; + } + + unsigned int newip = ++s->Code->inst_end; + + s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT; + + if (inst->U.I.Opcode == RC_OPCODE_IF) { + if (s->CurrentBranchDepth >= 32) { + rc_error(s->C, "Branch depth exceeds hardware limit"); + return; + } + + memory_pool_array_reserve(&s->C->Pool, struct branch_info, + s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1); + + struct branch_info * branch = &s->Branches[s->CurrentBranchDepth++]; + branch->If = newip; + branch->Else = -1; + branch->Endif = -1; + + if (s->CurrentBranchDepth > s->MaxBranchDepth) + s->MaxBranchDepth = s->CurrentBranchDepth; + + /* actual instruction is filled in at ENDIF time */ + } else if (inst->U.I.Opcode == RC_OPCODE_ELSE) { + if (!s->CurrentBranchDepth) { + rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); + return; + } + + struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1]; + branch->Else = newip; + + /* actual instruction is filled in at ENDIF time */ + } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) { + if (!s->CurrentBranchDepth) { + rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); + return; + } + + struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1]; + branch->Endif = newip; + + s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */ + | R500_FC_B_OP0_INCR /* increment branch counter if stay */ + ; + + if (branch->Else >= 0) { + /* increment branch counter also if jump */ + s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR; + s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1); + + s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_B_ELSE /* all active pixels want to jump */ + | R500_FC_B_OP0_NONE /* no counter op if stay */ + | R500_FC_B_OP1_DECR /* decrement branch counter if jump */ + | R500_FC_B_POP_CNT(1) + ; + s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + } else { + /* don't touch branch counter on jump */ + s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE; + s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + } + + s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ + | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ + | R500_FC_B_OP1_NONE /* no branch counter if stay */ + | R500_FC_B_POP_CNT(1) + ; + s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + + s->CurrentBranchDepth--; + } else { + rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name); + } +} + +void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) +{ + struct emit_state s; + struct r500_fragment_program_code *code = &compiler->code->code.r500; + + memset(&s, 0, sizeof(s)); + s.C = &compiler->Base; + s.Code = code; + + memset(code, 0, sizeof(*code)); + code->max_temp_idx = 1; + code->inst_end = -1; + + for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; + inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; + inst = inst->Next) { + if (inst->Type == RC_INSTRUCTION_NORMAL) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->IsFlowControl) { + emit_flowcontrol(&s, inst); + } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { + continue; + } else { + emit_tex(compiler, &inst->U.I); + } + } else { + emit_paired(compiler, &inst->U.P); + } + } + + if (code->max_temp_idx >= 128) + rc_error(&compiler->Base, "Too many hardware temporaries used"); + + if (compiler->Base.Error) + return; + + if (code->inst_end == -1 || + (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { + /* This may happen when dead-code elimination is disabled or + * when most of the fragment program logic is leading to a KIL */ + if (code->inst_end >= 511) { + rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions"); + return; + } + + int ip = ++code->inst_end; + code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; + } + + if (s.MaxBranchDepth >= 4) { + if (code->max_temp_idx < 1) + code->max_temp_idx = 1; + + code->us_fc_ctrl |= R500_FC_FULL_FC_EN; + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.c b/src/mesa/drivers/dri/r300/compiler/radeon_code.c new file mode 100644 index 0000000000..0eab18c344 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.c @@ -0,0 +1,173 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_code.h" + +#include <stdlib.h> +#include <string.h> + +#include "radeon_program.h" + +void rc_constants_init(struct rc_constant_list * c) +{ + memset(c, 0, sizeof(*c)); +} + +/** + * Copy a constants structure, assuming that the destination structure + * is not initialized. + */ +void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src) +{ + dst->Constants = malloc(sizeof(struct rc_constant) * src->Count); + memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count); + dst->Count = src->Count; + dst->_Reserved = src->Count; +} + +void rc_constants_destroy(struct rc_constant_list * c) +{ + free(c->Constants); + memset(c, 0, sizeof(*c)); +} + +unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant) +{ + unsigned index = c->Count; + + if (c->Count >= c->_Reserved) { + struct rc_constant * newlist; + + c->_Reserved = c->_Reserved * 2; + if (!c->_Reserved) + c->_Reserved = 16; + + newlist = malloc(sizeof(struct rc_constant) * c->_Reserved); + memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count); + + free(c->Constants); + c->Constants = newlist; + } + + c->Constants[index] = *constant; + c->Count++; + + return index; +} + + +/** + * Add a state vector to the constant list, while trying to avoid duplicates. + */ +unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1) +{ + unsigned index; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_STATE) { + if (c->Constants[index].u.State[0] == state0 && + c->Constants[index].u.State[1] == state1) + return index; + } + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_STATE; + constant.Size = 4; + constant.u.State[0] = state0; + constant.u.State[1] = state1; + + return rc_constants_add(c, &constant); +} + + +/** + * Add an immediate vector to the constant list, while trying to avoid + * duplicates. + */ +unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data) +{ + unsigned index; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { + if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4)) + return index; + } + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.Size = 4; + memcpy(constant.u.Immediate, data, sizeof(float) * 4); + + return rc_constants_add(c, &constant); +} + + +/** + * Add an immediate scalar to the constant list, while trying to avoid + * duplicates. + */ +unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle) +{ + unsigned index; + int free_index = -1; + struct rc_constant constant; + + for(index = 0; index < c->Count; ++index) { + if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { + unsigned comp; + for(comp = 0; comp < c->Constants[index].Size; ++comp) { + if (c->Constants[index].u.Immediate[comp] == data) { + *swizzle = RC_MAKE_SWIZZLE_SMEAR(comp); + return index; + } + } + + if (c->Constants[index].Size < 4) + free_index = index; + } + } + + if (free_index >= 0) { + unsigned comp = c->Constants[free_index].Size++; + c->Constants[free_index].u.Immediate[comp] = data; + *swizzle = RC_MAKE_SWIZZLE_SMEAR(comp); + return free_index; + } + + memset(&constant, 0, sizeof(constant)); + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.Size = 1; + constant.u.Immediate[0] = data; + *swizzle = RC_SWIZZLE_XXXX; + + return rc_constants_add(c, &constant); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h new file mode 100644 index 0000000000..1979e7e4e4 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -0,0 +1,265 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_CODE_H +#define RADEON_CODE_H + +#include <stdint.h> + +#define R300_PFS_MAX_ALU_INST 64 +#define R300_PFS_MAX_TEX_INST 32 +#define R300_PFS_MAX_TEX_INDIRECT 4 +#define R300_PFS_NUM_TEMP_REGS 32 +#define R300_PFS_NUM_CONST_REGS 32 + +#define R500_PFS_MAX_INST 512 +#define R500_PFS_NUM_TEMP_REGS 128 +#define R500_PFS_NUM_CONST_REGS 256 + + +#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) + +enum { + /** + * External constants are constants whose meaning is unknown to this + * compiler. For example, a Mesa gl_program's constants are turned + * into external constants. + */ + RC_CONSTANT_EXTERNAL = 0, + + RC_CONSTANT_IMMEDIATE, + + /** + * Constant referring to state that is known by this compiler, + * see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state. + */ + RC_CONSTANT_STATE +}; + +enum { + RC_STATE_SHADOW_AMBIENT = 0, + + RC_STATE_R300_WINDOW_DIMENSION, + RC_STATE_R300_TEXRECT_FACTOR, + RC_STATE_R300_VIEWPORT_SCALE, + RC_STATE_R300_VIEWPORT_OFFSET +}; + +struct rc_constant { + unsigned Type:2; /**< RC_CONSTANT_xxx */ + unsigned Size:3; + + union { + unsigned External; + float Immediate[4]; + unsigned State[2]; + } u; +}; + +struct rc_constant_list { + struct rc_constant * Constants; + unsigned Count; + + unsigned _Reserved; +}; + +void rc_constants_init(struct rc_constant_list * c); +void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src); +void rc_constants_destroy(struct rc_constant_list * c); +unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant); +unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2); +unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data); +unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle); + +/** + * Compare functions. + * + * \note By design, RC_COMPARE_FUNC_xxx + GL_NEVER gives you + * the correct GL compare function. + */ +typedef enum { + RC_COMPARE_FUNC_NEVER = 0, + RC_COMPARE_FUNC_LESS, + RC_COMPARE_FUNC_EQUAL, + RC_COMPARE_FUNC_LEQUAL, + RC_COMPARE_FUNC_GREATER, + RC_COMPARE_FUNC_NOTEQUAL, + RC_COMPARE_FUNC_GEQUAL, + RC_COMPARE_FUNC_ALWAYS +} rc_compare_func; + +/** + * Coordinate wrapping modes. + * + * These are not quite the same as their GL counterparts yet. + */ +typedef enum { + RC_WRAP_NONE = 0, + RC_WRAP_REPEAT, + RC_WRAP_MIRRORED_REPEAT, + RC_WRAP_MIRRORED_CLAMP +} rc_wrap_mode; + +/** + * Stores state that influences the compilation of a fragment program. + */ +struct r300_fragment_program_external_state { + struct { + /** + * If the sampler is used as a shadow sampler, + * this field is: + * 0 - GL_LUMINANCE + * 1 - GL_INTENSITY + * 2 - GL_ALPHA + * depending on the depth texture mode. + */ + unsigned depth_texture_swizzle:12; + + /** + * If the sampler is used as a shadow sampler, + * this field specifies the compare function. + * + * Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0). + * \sa rc_compare_func + */ + unsigned texture_compare_func : 3; + + /** + * No matter what the sampler type is, + * this field turns it into a shadow sampler. + */ + unsigned compare_mode_enabled : 1; + + /** + * If the sampler needs to fake NPOT, this field is set. + */ + unsigned fake_npot : 1; + + /** + * If the sampler will recieve non-normalized coords, + * this field is set. + */ + unsigned non_normalized_coords : 1; + + /** + * This field specifies wrapping modes for the sampler. + * + * If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths + * will be performed on the coordinates. + */ + unsigned wrap_mode : 2; + } unit[16]; +}; + + + +struct r300_fragment_program_node { + int tex_offset; /**< first tex instruction */ + int tex_end; /**< last tex instruction, relative to tex_offset */ + int alu_offset; /**< first ALU instruction */ + int alu_end; /**< last ALU instruction, relative to alu_offset */ + int flags; +}; + +/** + * Stores an R300 fragment program in its compiled-to-hardware form. + */ +struct r300_fragment_program_code { + struct { + int length; /**< total # of texture instructions used */ + uint32_t inst[R300_PFS_MAX_TEX_INST]; + } tex; + + struct { + int length; /**< total # of ALU instructions used */ + struct { + uint32_t rgb_inst; + uint32_t rgb_addr; + uint32_t alpha_inst; + uint32_t alpha_addr; + } inst[R300_PFS_MAX_ALU_INST]; + } alu; + + uint32_t config; /* US_CONFIG */ + uint32_t pixsize; /* US_PIXSIZE */ + uint32_t code_offset; /* US_CODE_OFFSET */ + uint32_t code_addr[4]; /* US_CODE_ADDR */ +}; + + +struct r500_fragment_program_code { + struct { + uint32_t inst0; + uint32_t inst1; + uint32_t inst2; + uint32_t inst3; + uint32_t inst4; + uint32_t inst5; + } inst[R500_PFS_MAX_INST]; + + int inst_end; /* Number of instructions - 1; also, last instruction to be executed */ + + int max_temp_idx; + + uint32_t us_fc_ctrl; +}; + +struct rX00_fragment_program_code { + union { + struct r300_fragment_program_code r300; + struct r500_fragment_program_code r500; + } code; + + unsigned writes_depth:1; + + struct rc_constant_list constants; +}; + + +#define VSF_MAX_FRAGMENT_LENGTH (255*4) +#define VSF_MAX_FRAGMENT_TEMPS (14) + +#define VSF_MAX_INPUTS 32 +#define VSF_MAX_OUTPUTS 32 + +struct r300_vertex_program_code { + int length; + union { + uint32_t d[VSF_MAX_FRAGMENT_LENGTH]; + float f[VSF_MAX_FRAGMENT_LENGTH]; + } body; + + int pos_end; + int num_temporaries; /* Number of temp vars used by program */ + int inputs[VSF_MAX_INPUTS]; + int outputs[VSF_MAX_OUTPUTS]; + + struct rc_constant_list constants; + + uint32_t InputsRead; + uint32_t OutputsWritten; +}; + +void r300_vertex_program_dump(struct r300_vertex_program_code * vs); + +#endif /* RADEON_CODE_H */ + diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c new file mode 100644 index 0000000000..1c8ba864a4 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -0,0 +1,309 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> + +#include "radeon_program.h" + + +void rc_init(struct radeon_compiler * c) +{ + memset(c, 0, sizeof(*c)); + + memory_pool_init(&c->Pool); + c->Program.Instructions.Prev = &c->Program.Instructions; + c->Program.Instructions.Next = &c->Program.Instructions; + c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; +} + +void rc_destroy(struct radeon_compiler * c) +{ + rc_constants_destroy(&c->Program.Constants); + memory_pool_destroy(&c->Pool); + free(c->ErrorMsg); +} + +void rc_debug(struct radeon_compiler * c, const char * fmt, ...) +{ + va_list ap; + + if (!c->Debug) + return; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +void rc_error(struct radeon_compiler * c, const char * fmt, ...) +{ + va_list ap; + + c->Error = 1; + + if (!c->ErrorMsg) { + /* Only remember the first error */ + char buf[1024]; + int written; + + va_start(ap, fmt); + written = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + if (written < sizeof(buf)) { + c->ErrorMsg = strdup(buf); + } else { + c->ErrorMsg = malloc(written + 1); + + va_start(ap, fmt); + vsnprintf(c->ErrorMsg, written + 1, fmt, ap); + va_end(ap); + } + } + + if (c->Debug) { + fprintf(stderr, "r300compiler error: "); + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + } +} + +int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion) +{ + rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion); + return 1; +} + +/** + * Recompute c->Program.InputsRead and c->Program.OutputsWritten + * based on which inputs and outputs are actually referenced + * in program instructions. + */ +void rc_calculate_inputs_outputs(struct radeon_compiler * c) +{ + struct rc_instruction *inst; + + c->Program.InputsRead = 0; + c->Program.OutputsWritten = 0; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) + { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + int i; + + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT) + c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index; + } + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) + c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index; + } + } +} + +/** + * Rewrite the program such that everything that source the given input + * register will source new_input instead. + */ +void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input) +{ + struct rc_instruction * inst; + + c->Program.InputsRead &= ~(1 << input); + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + for(i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) { + inst->U.I.SrcReg[i].File = new_input.File; + inst->U.I.SrcReg[i].Index = new_input.Index; + inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle); + if (!inst->U.I.SrcReg[i].Abs) { + inst->U.I.SrcReg[i].Negate ^= new_input.Negate; + inst->U.I.SrcReg[i].Abs = new_input.Abs; + } + + c->Program.InputsRead |= 1 << new_input.Index; + } + } + } +} + + +/** + * Rewrite the program such that everything that writes into the given + * output register will instead write to new_output. The new_output + * writemask is honoured. + */ +void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask) +{ + struct rc_instruction * inst; + + c->Program.OutputsWritten &= ~(1 << output); + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { + inst->U.I.DstReg.Index = new_output; + inst->U.I.DstReg.WriteMask &= writemask; + + c->Program.OutputsWritten |= 1 << new_output; + } + } + } +} + + +/** + * Rewrite the program such that a given output is duplicated. + */ +void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output) +{ + unsigned tempreg = rc_find_free_temporary(c); + struct rc_instruction * inst; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = tempreg; + } + } + } + + inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = output; + + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = tempreg; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + + inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = dup_output; + + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = tempreg; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + + c->Program.OutputsWritten |= 1 << dup_output; +} + + +/** + * Introduce standard code fragment to deal with fragment.position. + */ +void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, + int full_vtransform) +{ + unsigned tempregi = rc_find_free_temporary(c); + struct rc_instruction * inst_rcp; + struct rc_instruction * inst_mul; + struct rc_instruction * inst_mad; + struct rc_instruction * inst; + + c->Program.InputsRead &= ~(1 << wpos); + c->Program.InputsRead |= 1 << new_input; + + /* perspective divide */ + inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = tempregi; + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + + inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst_rcp->U.I.SrcReg[0].Index = new_input; + inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + + inst_mul = rc_insert_new_instruction(c, inst_rcp); + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = tempregi; + inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; + + inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst_mul->U.I.SrcReg[0].Index = new_input; + + inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[1].Index = tempregi; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + + /* viewport transformation */ + inst_mad = rc_insert_new_instruction(c, inst_mul); + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = tempregi; + inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; + + inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[0].Index = tempregi; + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; + + inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; + + inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0; + + if (full_vtransform) { + inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0); + inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0); + } else { + inst_mad->U.I.SrcReg[1].Index = + inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); + } + + for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + for(i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && + inst->U.I.SrcReg[i].Index == wpos) { + inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[i].Index = tempregi; + } + } + } +} + diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h new file mode 100644 index 0000000000..f15905d79d --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -0,0 +1,117 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_COMPILER_H +#define RADEON_COMPILER_H + +#include "../../../../main/compiler.h" + +#include "memory_pool.h" +#include "radeon_code.h" +#include "radeon_program.h" + +struct rc_swizzle_caps; + +struct radeon_compiler { + struct memory_pool Pool; + struct rc_program Program; + unsigned Debug:1; + unsigned Error:1; + char * ErrorMsg; + + /* Hardware specification. */ + unsigned is_r500; + unsigned max_temp_regs; + + /** + * Variables used internally, not be touched by callers + * of the compiler + */ + /*@{*/ + struct rc_swizzle_caps * SwizzleCaps; + /*@}*/ +}; + +void rc_init(struct radeon_compiler * c); +void rc_destroy(struct radeon_compiler * c); + +void rc_debug(struct radeon_compiler * c, const char * fmt, ...); +void rc_error(struct radeon_compiler * c, const char * fmt, ...); + +int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion); + +/** + * This macro acts like an if-statement that can be used to implement + * non-aborting assertions in the compiler. + * + * It checks whether \p cond is true. If not, an internal compiler error is + * flagged and the if-clause is run. + * + * A typical use-case would be: + * + * if (rc_assert(c, condition-that-must-be-true)) + * return; + */ +#define rc_assert(c, cond) \ + (!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond)) + +void rc_calculate_inputs_outputs(struct radeon_compiler * c); + +void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input); +void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask); +void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output); +void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, + int full_vtransform); + +struct r300_fragment_program_compiler { + struct radeon_compiler Base; + struct rX00_fragment_program_code *code; + /* Optional transformations and features. */ + struct r300_fragment_program_external_state state; + unsigned enable_shadow_ambient; + /* Register corresponding to the depthbuffer. */ + unsigned OutputDepth; + /* Registers corresponding to the four colorbuffers. */ + unsigned OutputColor[4]; + + void * UserData; + void (*AllocateHwInputs)( + struct r300_fragment_program_compiler * c, + void (*allocate)(void * data, unsigned input, unsigned hwreg), + void * mydata); +}; + +void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c); + + +struct r300_vertex_program_compiler { + struct radeon_compiler Base; + struct r300_vertex_program_code *code; + uint32_t RequiredOutputs; + + void * UserData; + void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c); +}; + +void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c); + +#endif /* RADEON_COMPILER_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c new file mode 100644 index 0000000000..0e6c62541f --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -0,0 +1,281 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_program.h" + + +static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn cb, void * userdata) +{ + struct rc_sub_instruction * inst = &fullinst->U.I; + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + unsigned int refmask = 0; + + if (inst->SrcReg[src].File == RC_FILE_NONE) + return; + + for(unsigned int chan = 0; chan < 4; ++chan) + refmask |= 1 << GET_SWZ(inst->SrcReg[src].Swizzle, chan); + + refmask &= RC_MASK_XYZW; + + if (refmask) + cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, refmask); + + if (refmask && inst->SrcReg[src].RelAddr) + cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X); + } +} + +static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + unsigned int refmasks[3] = { 0, 0, 0 }; + + if (inst->RGB.Opcode != RC_OPCODE_NOP) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode); + + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + for(unsigned int chan = 0; chan < 3; ++chan) { + unsigned int swz = GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan); + if (swz < 4) + refmasks[inst->RGB.Arg[arg].Source] |= 1 << swz; + } + } + } + + if (inst->Alpha.Opcode != RC_OPCODE_NOP) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode); + + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + if (inst->Alpha.Arg[arg].Swizzle < 4) + refmasks[inst->Alpha.Arg[arg].Source] |= 1 << inst->Alpha.Arg[arg].Swizzle; + } + } + + for(unsigned int src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ)) + cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, + refmasks[src] & RC_MASK_XYZ); + + if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W)) + cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W); + } +} + +/** + * Calls a callback function for all register reads. + * + * This is conservative, i.e. if the same register is referenced multiple times, + * the callback may also be called multiple times. + * Also, the writemask of the instruction is not taken into account. + */ +void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) { + reads_normal(inst, cb, userdata); + } else { + reads_pair(inst, cb, userdata); + } +} + + + +static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) +{ + struct rc_sub_instruction * inst = &fullinst->U.I; + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + + if (opcode->HasDstReg && inst->DstReg.WriteMask) + cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask); + + if (inst->WriteALUResult) + cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); +} + +static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + + if (inst->RGB.WriteMask) + cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask); + + if (inst->Alpha.WriteMask) + cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W); + + if (inst->WriteALUResult) + cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); +} + +/** + * Calls a callback function for all register writes in the instruction, + * reporting writemasks to the callback function. + * + * \warning Does not report output registers for paired instructions! + */ +void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) { + writes_normal(inst, cb, userdata); + } else { + writes_pair(inst, cb, userdata); + } +} + + +struct mask_to_chan_data { + void * UserData; + rc_read_write_chan_fn Fn; +}; + +static void mask_to_chan_cb(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct mask_to_chan_data * d = data; + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_BIT(mask, chan)) + d->Fn(d->UserData, inst, file, index, chan); + } +} + +/** + * Calls a callback function for all sourced register channels. + * + * This is conservative, i.e. channels may be called multiple times, + * and the writemask of the instruction is not taken into account. + */ +void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) +{ + struct mask_to_chan_data d; + d.UserData = userdata; + d.Fn = cb; + rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d); +} + +/** + * Calls a callback function for all written register channels. + * + * \warning Does not report output registers for paired instructions! + */ +void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) +{ + struct mask_to_chan_data d; + d.UserData = userdata; + d.Fn = cb; + rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d); +} + +static void remap_normal_instruction(struct rc_instruction * fullinst, + rc_remap_register_fn cb, void * userdata) +{ + struct rc_sub_instruction * inst = &fullinst->U.I; + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + + if (opcode->HasDstReg) { + rc_register_file file = inst->DstReg.File; + unsigned int index = inst->DstReg.Index; + + cb(userdata, fullinst, &file, &index); + + inst->DstReg.File = file; + inst->DstReg.Index = index; + } + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + rc_register_file file = inst->SrcReg[src].File; + unsigned int index = inst->SrcReg[src].Index; + + cb(userdata, fullinst, &file, &index); + + inst->SrcReg[src].File = file; + inst->SrcReg[src].Index = index; + } +} + +static void remap_pair_instruction(struct rc_instruction * fullinst, + rc_remap_register_fn cb, void * userdata) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + + if (inst->RGB.WriteMask) { + rc_register_file file = RC_FILE_TEMPORARY; + unsigned int index = inst->RGB.DestIndex; + + cb(userdata, fullinst, &file, &index); + + inst->RGB.DestIndex = index; + } + + if (inst->Alpha.WriteMask) { + rc_register_file file = RC_FILE_TEMPORARY; + unsigned int index = inst->Alpha.DestIndex; + + cb(userdata, fullinst, &file, &index); + + inst->Alpha.DestIndex = index; + } + + for(unsigned int src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used) { + rc_register_file file = inst->RGB.Src[src].File; + unsigned int index = inst->RGB.Src[src].Index; + + cb(userdata, fullinst, &file, &index); + + inst->RGB.Src[src].File = file; + inst->RGB.Src[src].Index = index; + } + + if (inst->Alpha.Src[src].Used) { + rc_register_file file = inst->Alpha.Src[src].File; + unsigned int index = inst->Alpha.Src[src].Index; + + cb(userdata, fullinst, &file, &index); + + inst->Alpha.Src[src].File = file; + inst->Alpha.Src[src].Index = index; + } + } +} + + +/** + * Remap all register accesses according to the given function. + * That is, call the function \p cb for each referenced register (both read and written) + * and update the given instruction \p inst accordingly + * if it modifies its \ref pfile and \ref pindex contents. + */ +void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) + remap_normal_instruction(inst, cb, userdata); + else + remap_pair_instruction(inst, cb, userdata); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h new file mode 100644 index 0000000000..60a6e192a9 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_DATAFLOW_H +#define RADEON_DATAFLOW_H + +#include "radeon_program_constants.h" + +struct radeon_compiler; +struct rc_instruction; +struct rc_swizzle_caps; + + +/** + * Help analyze and modify the register accesses of instructions. + */ +/*@{*/ +typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int chan); +void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); +void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); + +typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask); +void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); +void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); + +typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex); +void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata); +/*@}*/ + + +/** + * Compiler passes based on dataflow analysis. + */ +/*@{*/ +typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data, + void (*mark_fn)(void * data, unsigned int index, unsigned int mask)); +void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata); +void rc_dataflow_swizzles(struct radeon_compiler * c); +/*@}*/ + +void rc_optimize(struct radeon_compiler * c); + +#endif /* RADEON_DATAFLOW_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c new file mode 100644 index 0000000000..e3c2c83c0c --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c @@ -0,0 +1,284 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" + + +struct updatemask_state { + unsigned char Output[RC_REGISTER_MAX_INDEX]; + unsigned char Temporary[RC_REGISTER_MAX_INDEX]; + unsigned char Address; + unsigned char Special[RC_NUM_SPECIAL_REGISTERS]; +}; + +struct instruction_state { + unsigned char WriteMask:4; + unsigned char WriteALUResult:1; + unsigned char SrcReg[3]; +}; + +struct branchinfo { + unsigned int HaveElse:1; + + struct updatemask_state StoreEndif; + struct updatemask_state StoreElse; +}; + +struct deadcode_state { + struct radeon_compiler * C; + struct instruction_state * Instructions; + + struct updatemask_state R; + + struct branchinfo * BranchStack; + unsigned int BranchStackSize; + unsigned int BranchStackReserved; +}; + + +static void or_updatemasks( + struct updatemask_state * dst, + struct updatemask_state * a, + struct updatemask_state * b) +{ + for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) { + dst->Output[i] = a->Output[i] | b->Output[i]; + dst->Temporary[i] = a->Temporary[i] | b->Temporary[i]; + } + + for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i) + dst->Special[i] = a->Special[i] | b->Special[i]; + + dst->Address = a->Address | b->Address; +} + +static void push_branch(struct deadcode_state * s) +{ + memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack, + s->BranchStackSize, s->BranchStackReserved, 1); + + struct branchinfo * branch = &s->BranchStack[s->BranchStackSize++]; + branch->HaveElse = 0; + memcpy(&branch->StoreEndif, &s->R, sizeof(s->R)); +} + +static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index) +{ + if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) { + if (index >= RC_REGISTER_MAX_INDEX) { + rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file); + return 0; + } + + if (file == RC_FILE_OUTPUT) + return &s->R.Output[index]; + else + return &s->R.Temporary[index]; + } else if (file == RC_FILE_ADDRESS) { + return &s->R.Address; + } else if (file == RC_FILE_SPECIAL) { + if (index >= RC_NUM_SPECIAL_REGISTERS) { + rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index); + return 0; + } + + return &s->R.Special[index]; + } + + return 0; +} + +static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask) +{ + unsigned char * pused = get_used_ptr(s, file, index); + if (pused) + *pused |= mask; +} + +static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + struct instruction_state * insts = &s->Instructions[inst->IP]; + unsigned int usedmask = 0; + + if (opcode->HasDstReg) { + unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index); + if (pused) { + usedmask = *pused & inst->U.I.DstReg.WriteMask; + *pused &= ~usedmask; + } + } + + insts->WriteMask |= usedmask; + + if (inst->U.I.WriteALUResult) { + unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT); + if (pused && *pused) { + if (inst->U.I.WriteALUResult == RC_ALURESULT_X) + usedmask |= RC_MASK_X; + else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) + usedmask |= RC_MASK_W; + + *pused = 0; + insts->WriteALUResult = 1; + } + } + + unsigned int srcmasks[3]; + rc_compute_sources_for_writemask(inst, usedmask, srcmasks); + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + unsigned int refmask = 0; + unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src]; + insts->SrcReg[src] |= newsrcmask; + + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_BIT(newsrcmask, chan)) + refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); + } + + /* get rid of spurious bits from ZERO, ONE, etc. swizzles */ + refmask &= RC_MASK_XYZW; + + if (!refmask) + continue; + + mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask); + + if (inst->U.I.SrcReg[src].RelAddr) + mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X); + } +} + +static void mark_output_use(void * data, unsigned int index, unsigned int mask) +{ + struct deadcode_state * s = data; + + mark_used(s, RC_FILE_OUTPUT, index, mask); +} + +void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata) +{ + struct deadcode_state s; + unsigned int nr_instructions; + + memset(&s, 0, sizeof(s)); + s.C = c; + + nr_instructions = rc_recompute_ips(c); + s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions); + memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions); + + dce(userdata, &s, &mark_output_use); + + for(struct rc_instruction * inst = c->Program.Instructions.Prev; + inst != &c->Program.Instructions; + inst = inst->Prev) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->IsFlowControl) { + if (opcode->Opcode == RC_OPCODE_ENDIF) { + push_branch(&s); + } else { + if (s.BranchStackSize) { + struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1]; + + if (opcode->Opcode == RC_OPCODE_IF) { + or_updatemasks(&s.R, + &s.R, + branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif); + + s.BranchStackSize--; + } else if (opcode->Opcode == RC_OPCODE_ELSE) { + if (branch->HaveElse) { + rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__); + } else { + memcpy(&branch->StoreElse, &s.R, sizeof(s.R)); + memcpy(&s.R, &branch->StoreEndif, sizeof(s.R)); + branch->HaveElse = 1; + } + } else { + rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name); + } + } else { + rc_error(c, "%s: Unexpected control flow instruction\n", __FUNCTION__); + } + } + } + + update_instruction(&s, inst); + } + + unsigned int ip = 0; + for(struct rc_instruction * inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next, ++ip) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + int dead = 1; + + if (!opcode->HasDstReg) { + dead = 0; + } else { + inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask; + if (s.Instructions[ip].WriteMask) + dead = 0; + + if (s.Instructions[ip].WriteALUResult) + dead = 0; + else + inst->U.I.WriteALUResult = RC_ALURESULT_NONE; + } + + if (dead) { + struct rc_instruction * todelete = inst; + inst = inst->Prev; + rc_remove_instruction(todelete); + continue; + } + + unsigned int srcmasks[3]; + unsigned int usemask = s.Instructions[ip].WriteMask; + + if (inst->U.I.WriteALUResult == RC_ALURESULT_X) + usemask |= RC_MASK_X; + else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) + usemask |= RC_MASK_W; + + rc_compute_sources_for_writemask(inst, usemask, srcmasks); + + for(unsigned int src = 0; src < 3; ++src) { + for(unsigned int chan = 0; chan < 4; ++chan) { + if (!GET_BIT(srcmasks[src], chan)) + SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED); + } + } + } + + rc_calculate_inputs_outputs(c); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c new file mode 100644 index 0000000000..33acbd30f4 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" +#include "radeon_swizzle.h" + + +static void rewrite_source(struct radeon_compiler * c, + struct rc_instruction * inst, unsigned src) +{ + struct rc_swizzle_split split; + unsigned int tempreg = rc_find_free_temporary(c); + unsigned int usemask; + + usemask = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED) + usemask |= 1 << chan; + } + + c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split); + + for(unsigned int phase = 0; phase < split.NumPhases; ++phase) { + struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev); + unsigned int phase_refmask; + unsigned int masked_negate; + + mov->U.I.Opcode = RC_OPCODE_MOV; + mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + mov->U.I.DstReg.Index = tempreg; + mov->U.I.DstReg.WriteMask = split.Phase[phase]; + mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src]; + + phase_refmask = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + if (!GET_BIT(split.Phase[phase], chan)) + SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED); + else + phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan); + } + + phase_refmask &= RC_MASK_XYZW; + + masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate; + if (masked_negate == 0) + mov->U.I.SrcReg[0].Negate = 0; + else if (masked_negate == split.Phase[phase]) + mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW; + + } + + inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[src].Index = tempreg; + inst->U.I.SrcReg[src].Swizzle = 0; + inst->U.I.SrcReg[src].Negate = RC_MASK_NONE; + inst->U.I.SrcReg[src].Abs = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, + GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED); + } +} + +void rc_dataflow_swizzles(struct radeon_compiler * c) +{ + struct rc_instruction * inst; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int src; + + for(src = 0; src < opcode->NumSrcRegs; ++src) { + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) + rewrite_source(c, inst, src); + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c new file mode 100644 index 0000000000..863654cf68 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c @@ -0,0 +1,331 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_emulate_branches.h" + +#include <stdio.h> + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + + +struct proxy_info { + unsigned int Proxied:1; + unsigned int Index:RC_REGISTER_INDEX_BITS; +}; + +struct register_proxies { + struct proxy_info Temporary[RC_REGISTER_MAX_INDEX]; +}; + +struct branch_info { + struct rc_instruction * If; + struct rc_instruction * Else; +}; + +struct emulate_branch_state { + struct radeon_compiler * C; + + struct branch_info * Branches; + unsigned int BranchCount; + unsigned int BranchReserved; +}; + + +static void handle_if(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + memory_pool_array_reserve(&s->C->Pool, struct branch_info, + s->Branches, s->BranchCount, s->BranchReserved, 1); + + DBG("%s\n", __FUNCTION__); + + struct branch_info * branch = &s->Branches[s->BranchCount++]; + memset(branch, 0, sizeof(struct branch_info)); + branch->If = inst; + + /* Make a safety copy of the decision register, because we will need + * it at ENDIF time and it might be overwritten in both branches. */ + struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = rc_find_free_temporary(s->C); + inst_mov->U.I.DstReg.WriteMask = RC_MASK_X; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; + inst->U.I.SrcReg[0].Swizzle = 0; + inst->U.I.SrcReg[0].Abs = 0; + inst->U.I.SrcReg[0].Negate = 0; +} + +static void handle_else(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + if (!s->BranchCount) { + rc_error(s->C, "Encountered ELSE outside of branches"); + return; + } + + DBG("%s\n", __FUNCTION__); + + struct branch_info * branch = &s->Branches[s->BranchCount - 1]; + branch->Else = inst; +} + + +struct state_and_proxies { + struct emulate_branch_state * S; + struct register_proxies * Proxies; +}; + +static struct proxy_info * get_proxy_info(struct state_and_proxies * sap, + rc_register_file file, unsigned int index) +{ + if (file == RC_FILE_TEMPORARY) { + return &sap->Proxies->Temporary[index]; + } else { + return 0; + } +} + +static void scan_write(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int comp) +{ + struct state_and_proxies * sap = userdata; + struct proxy_info * proxy = get_proxy_info(sap, file, index); + + if (proxy && !proxy->Proxied) { + proxy->Proxied = 1; + proxy->Index = rc_find_free_temporary(sap->S->C); + } +} + +static void remap_proxy_function(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex) +{ + struct state_and_proxies * sap = userdata; + struct proxy_info * proxy = get_proxy_info(sap, *pfile, *pindex); + + if (proxy && proxy->Proxied) { + *pfile = RC_FILE_TEMPORARY; + *pindex = proxy->Index; + } +} + +/** + * Redirect all writes in the instruction range [begin, end) to proxy + * temporary registers. + */ +static void allocate_and_insert_proxies(struct emulate_branch_state * s, + struct register_proxies * proxies, + struct rc_instruction * begin, + struct rc_instruction * end) +{ + struct state_and_proxies sap; + + sap.S = s; + sap.Proxies = proxies; + + for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { + rc_for_all_writes_mask(inst, scan_write, &sap); + rc_remap_registers(inst, remap_proxy_function, &sap); + } + + for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { + if (proxies->Temporary[index].Proxied) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, begin->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = proxies->Temporary[index].Index; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = index; + } + } +} + + +static void inject_cmp(struct emulate_branch_state * s, + struct rc_instruction * inst_if, + struct rc_instruction * inst_endif, + rc_register_file file, unsigned int index, + struct proxy_info ifproxy, + struct proxy_info elseproxy) +{ + struct rc_instruction * inst_cmp = rc_insert_new_instruction(s->C, inst_endif); + inst_cmp->U.I.Opcode = RC_OPCODE_CMP; + inst_cmp->U.I.DstReg.File = file; + inst_cmp->U.I.DstReg.Index = index; + inst_cmp->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst_cmp->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0]; + inst_cmp->U.I.SrcReg[0].Abs = 1; + inst_cmp->U.I.SrcReg[0].Negate = RC_MASK_XYZW; + inst_cmp->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[1].Index = ifproxy.Proxied ? ifproxy.Index : index; + inst_cmp->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[2].Index = elseproxy.Proxied ? elseproxy.Index : index; +} + +static void handle_endif(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + if (!s->BranchCount) { + rc_error(s->C, "Encountered ENDIF outside of branches"); + return; + } + + DBG("%s\n", __FUNCTION__); + + struct branch_info * branch = &s->Branches[s->BranchCount - 1]; + struct register_proxies IfProxies; + struct register_proxies ElseProxies; + + memset(&IfProxies, 0, sizeof(IfProxies)); + memset(&ElseProxies, 0, sizeof(ElseProxies)); + + allocate_and_insert_proxies(s, &IfProxies, branch->If->Next, branch->Else ? branch->Else : inst); + + if (branch->Else) + allocate_and_insert_proxies(s, &ElseProxies, branch->Else->Next, inst); + + /* Insert the CMP instructions at the end. */ + for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { + if (IfProxies.Temporary[index].Proxied || ElseProxies.Temporary[index].Proxied) { + inject_cmp(s, branch->If, inst, RC_FILE_TEMPORARY, index, + IfProxies.Temporary[index], ElseProxies.Temporary[index]); + } + } + + /* Remove all traces of the branch instructions */ + rc_remove_instruction(branch->If); + if (branch->Else) + rc_remove_instruction(branch->Else); + rc_remove_instruction(inst); + + s->BranchCount--; + + if (VERBOSE) { + DBG("Program after ENDIF handling:\n"); + rc_print_program(&s->C->Program); + } +} + + +struct remap_output_data { + unsigned int Output:RC_REGISTER_INDEX_BITS; + unsigned int Temporary:RC_REGISTER_INDEX_BITS; +}; + +static void remap_output_function(void * userdata, struct rc_instruction * inst, + rc_register_file * pfile, unsigned int * pindex) +{ + struct remap_output_data * data = userdata; + + if (*pfile == RC_FILE_OUTPUT && *pindex == data->Output) { + *pfile = RC_FILE_TEMPORARY; + *pindex = data->Temporary; + } +} + + +/** + * Output registers cannot be read from and so cannot be dealt with like + * temporary registers. + * + * We do the simplest thing: If an output registers is written within + * a branch, then *all* writes to this register are proxied to a + * temporary register, and a final MOV is appended to the end of + * the program. + */ +static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruction * inst) +{ + if (!s->BranchCount) + return; + + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (!opcode->HasDstReg) + return; + + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) { + struct remap_output_data remap; + + remap.Output = inst->U.I.DstReg.Index; + remap.Temporary = rc_find_free_temporary(s->C); + + for(struct rc_instruction * inst = s->C->Program.Instructions.Next; + inst != &s->C->Program.Instructions; + inst = inst->Next) { + rc_remap_registers(inst, &remap_output_function, &remap); + } + + struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_OUTPUT; + inst_mov->U.I.DstReg.Index = remap.Output; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = remap.Temporary; + } +} + +/** + * Remove branch instructions; instead, execute both branches + * on different register sets and choose between their results + * using CMP instructions in place of the original ENDIF. + */ +void rc_emulate_branches(struct radeon_compiler * c) +{ + struct emulate_branch_state s; + + memset(&s, 0, sizeof(s)); + s.C = c; + + /* Untypical loop because we may remove the current instruction */ + struct rc_instruction * ptr = c->Program.Instructions.Next; + while(ptr != &c->Program.Instructions) { + struct rc_instruction * inst = ptr; + ptr = ptr->Next; + + if (inst->Type == RC_INSTRUCTION_NORMAL) { + switch(inst->U.I.Opcode) { + case RC_OPCODE_IF: + handle_if(&s, inst); + break; + case RC_OPCODE_ELSE: + handle_else(&s, inst); + break; + case RC_OPCODE_ENDIF: + handle_endif(&s, inst); + break; + default: + fix_output_writes(&s, inst); + break; + } + } else { + rc_error(c, "%s: unhandled instruction type\n", __FUNCTION__); + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h new file mode 100644 index 0000000000..e07279f093 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h @@ -0,0 +1,30 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef RADEON_EMULATE_BRANCHES_H +#define RADEON_EMULATE_BRANCHES_H + +struct radeon_compiler; + +void rc_emulate_branches(struct radeon_compiler * c); + +#endif /* RADEON_EMULATE_BRANCHES_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c new file mode 100644 index 0000000000..4c5d29f421 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c @@ -0,0 +1,474 @@ +/* + * Copyright 2010 Tom Stellard <tstellar@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + */ + +#include "radeon_emulate_loops.h" + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + +struct emulate_loop_state { + struct radeon_compiler * C; + struct loop_info * Loops; + unsigned int LoopCount; + unsigned int LoopReserved; +}; + +struct loop_info { + struct rc_instruction * BeginLoop; + struct rc_instruction * Cond; + struct rc_instruction * If; + struct rc_instruction * Brk; + struct rc_instruction * EndIf; + struct rc_instruction * EndLoop; +}; + +struct const_value { + + struct radeon_compiler * C; + struct rc_src_register * Src; + float Value; + int HasValue; +}; + +struct count_inst { + struct radeon_compiler * C; + int Index; + rc_swizzle Swz; + float Amount; + int Unknown; +}; + +static float get_constant_value(struct radeon_compiler * c, + struct rc_src_register * src, + int chan) +{ + float base = 1.0f; + int swz = GET_SWZ(src->Swizzle, chan); + if(swz >= 4 || src->Index >= c->Program.Constants.Count ){ + rc_error(c, "get_constant_value: Can't find a value.\n"); + return 0.0f; + } + if(GET_BIT(src->Negate, chan)){ + base = -1.0f; + } + return base * + c->Program.Constants.Constants[src->Index].u.Immediate[swz]; +} + +static int src_reg_is_immediate(struct rc_src_register * src, + struct radeon_compiler * c) +{ + return src->File == RC_FILE_CONSTANT && + c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE; +} + +static unsigned int loop_count_instructions(struct loop_info * loop) +{ + unsigned int count = 0; + struct rc_instruction * inst = loop->BeginLoop->Next; + while(inst != loop->EndLoop){ + count++; + inst = inst->Next; + } + return count; +} + +static unsigned int loop_calc_iterations(struct loop_info * loop, + unsigned int loop_count, unsigned int max_instructions) +{ + unsigned int icount = loop_count_instructions(loop); + return max_instructions / (loop_count * icount); +} + +static void loop_unroll(struct emulate_loop_state * s, + struct loop_info *loop, unsigned int iterations) +{ + unsigned int i; + struct rc_instruction * ptr; + struct rc_instruction * first = loop->BeginLoop->Next; + struct rc_instruction * last = loop->EndLoop->Prev; + struct rc_instruction * append_to = last; + rc_remove_instruction(loop->BeginLoop); + rc_remove_instruction(loop->EndLoop); + for( i = 1; i < iterations; i++){ + for(ptr = first; ptr != last->Next; ptr = ptr->Next){ + struct rc_instruction *new = rc_alloc_instruction(s->C); + memcpy(new, ptr, sizeof(struct rc_instruction)); + rc_insert_instruction(append_to, new); + append_to = new; + } + } +} + + +static void update_const_value(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct const_value * value = data; + if(value->Src->File != file || + value->Src->Index != index || + !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){ + return; + } + switch(inst->U.I.Opcode){ + case RC_OPCODE_MOV: + if(!src_reg_is_immediate(&inst->U.I.SrcReg[0], value->C)){ + return; + } + value->HasValue = 1; + value->Value = + get_constant_value(value->C, &inst->U.I.SrcReg[0], 0); + break; + } +} + +static void get_incr_amount(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct count_inst * count_inst = data; + int amnt_src_index; + const struct rc_opcode_info * opcode; + float amount; + + if(file != RC_FILE_TEMPORARY || + count_inst->Index != index || + (1 << GET_SWZ(count_inst->Swz,0) != mask)){ + return; + } + /* Find the index of the counter register. */ + opcode = rc_get_opcode_info(inst->U.I.Opcode); + if(opcode->NumSrcRegs != 2){ + count_inst->Unknown = 1; + return; + } + if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[0].Index == count_inst->Index && + inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){ + amnt_src_index = 1; + } else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[1].Index == count_inst->Index && + inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){ + amnt_src_index = 0; + } + else{ + count_inst->Unknown = 1; + return; + } + if(src_reg_is_immediate(&inst->U.I.SrcReg[amnt_src_index], + count_inst->C)){ + amount = get_constant_value(count_inst->C, + &inst->U.I.SrcReg[amnt_src_index], 0); + } + else{ + count_inst->Unknown = 1 ; + return; + } + switch(inst->U.I.Opcode){ + case RC_OPCODE_ADD: + count_inst->Amount += amount; + break; + case RC_OPCODE_SUB: + if(amnt_src_index == 0){ + count_inst->Unknown = 0; + return; + } + count_inst->Amount -= amount; + break; + default: + count_inst->Unknown = 1; + return; + } + +} + +static int transform_const_loop(struct emulate_loop_state * s, + struct loop_info * loop, + struct rc_instruction * cond) +{ + int end_loops = 1; + int iterations; + struct count_inst count_inst; + float limit_value; + struct rc_src_register * counter; + struct rc_src_register * limit; + struct const_value counter_value; + struct rc_instruction * inst; + + /* Find the counter and the upper limit */ + + if(src_reg_is_immediate(&cond->U.I.SrcReg[0], s->C)){ + limit = &cond->U.I.SrcReg[0]; + counter = &cond->U.I.SrcReg[1]; + } + else if(src_reg_is_immediate(&cond->U.I.SrcReg[1], s->C)){ + limit = &cond->U.I.SrcReg[1]; + counter = &cond->U.I.SrcReg[0]; + } + else{ + DBG("No constant limit.\n"); + return 0; + } + + /* Find the initial value of the counter */ + counter_value.Src = counter; + counter_value.Value = 0.0f; + counter_value.HasValue = 0; + counter_value.C = s->C; + for(inst = s->C->Program.Instructions.Next; inst != loop->BeginLoop; + inst = inst->Next){ + rc_for_all_writes_mask(inst, update_const_value, &counter_value); + } + if(!counter_value.HasValue){ + DBG("Initial counter value cannot be determined.\n"); + return 0; + } + DBG("Initial counter value is %f\n", counter_value.Value); + /* Determine how the counter is modified each loop */ + count_inst.C = s->C; + count_inst.Index = counter->Index; + count_inst.Swz = counter->Swizzle; + count_inst.Amount = 0.0f; + count_inst.Unknown = 0; + for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){ + switch(inst->U.I.Opcode){ + /* XXX In the future we might want to try to unroll nested + * loops here.*/ + case RC_OPCODE_BGNLOOP: + end_loops++; + break; + case RC_OPCODE_ENDLOOP: + loop->EndLoop = inst; + end_loops--; + break; + /* XXX Check if the counter is modified within an if statement. + */ + case RC_OPCODE_IF: + break; + default: + rc_for_all_writes_mask(inst, get_incr_amount, &count_inst); + if(count_inst.Unknown){ + return 0; + } + break; + } + } + /* Infinite loop */ + if(count_inst.Amount == 0.0f){ + return 0; + } + DBG("Counter is increased by %f each iteration.\n", count_inst.Amount); + /* Calculate the number of iterations of this loop. Keeping this + * simple, since we only support increment and decrement loops. + */ + limit_value = get_constant_value(s->C, limit, 0); + iterations = (int) ((limit_value - counter_value.Value) / + count_inst.Amount); + + DBG("Loop will have %d iterations.\n", iterations); + + /* Prepare loop for unrolling */ + rc_remove_instruction(loop->Cond); + rc_remove_instruction(loop->If); + rc_remove_instruction(loop->Brk); + rc_remove_instruction(loop->EndIf); + + loop_unroll(s, loop, iterations); + loop->EndLoop = NULL; + return 1; +} + +/** + * This function prepares a loop to be unrolled by converting it into an if + * statement. Here is an outline of the conversion process: + * BGNLOOP; -> BGNLOOP; + * <Additional conditional code> -> <Additional conditional code> + * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2]; + * IF temp[0]; -> IF temp[0]; + * BRK; -> + * ENDIF; -> <Loop Body> + * <Loop Body> -> ENDIF; + * ENDLOOP; -> ENDLOOP + * + * @param inst A pointer to a BGNLOOP instruction. + * @return If the loop can be unrolled, a pointer to the first instruction of + * the unrolled loop. + * Otherwise, A pointer to the ENDLOOP instruction. + * Null if there is an error. + */ +static struct rc_instruction * transform_loop(struct emulate_loop_state * s, + struct rc_instruction * inst) +{ + struct loop_info *loop; + struct rc_instruction * ptr; + + memory_pool_array_reserve(&s->C->Pool, struct loop_info, + s->Loops, s->LoopCount, s->LoopReserved, 1); + + loop = &s->Loops[s->LoopCount++]; + memset(loop, 0, sizeof(struct loop_info)); + if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){ + rc_error(s->C, "expected BGNLOOP\n", __FUNCTION__); + return NULL; + } + loop->BeginLoop = inst; + + for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next){ + switch(ptr->U.I.Opcode){ + case RC_OPCODE_BGNLOOP: + /* Nested loop */ + ptr = transform_loop(s, ptr); + if(!ptr){ + return NULL; + } + break; + case RC_OPCODE_BRK: + loop->Brk = ptr; + if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF){ + rc_error(s->C, + "%s: expected ENDIF\n",__FUNCTION__); + return NULL; + } + loop->EndIf = ptr->Next; + if(ptr->Prev->U.I.Opcode != RC_OPCODE_IF){ + rc_error(s->C, + "%s: expected IF\n", __FUNCTION__); + return NULL; + } + loop->If = ptr->Prev; + switch(loop->If->Prev->U.I.Opcode){ + case RC_OPCODE_SLT: + case RC_OPCODE_SGE: + case RC_OPCODE_SGT: + case RC_OPCODE_SLE: + case RC_OPCODE_SEQ: + case RC_OPCODE_SNE: + break; + default: + rc_error(s->C, "%s expected conditional\n", + __FUNCTION__); + return NULL; + } + loop->Cond = loop->If->Prev; + ptr = loop->EndIf; + break; + case RC_OPCODE_ENDLOOP: + loop->EndLoop = ptr; + break; + } + } + /* Reverse the conditional instruction */ + switch(loop->Cond->U.I.Opcode){ + case RC_OPCODE_SGE: + loop->Cond->U.I.Opcode = RC_OPCODE_SLT; + break; + case RC_OPCODE_SLT: + loop->Cond->U.I.Opcode = RC_OPCODE_SGE; + break; + case RC_OPCODE_SLE: + loop->Cond->U.I.Opcode = RC_OPCODE_SGT; + break; + case RC_OPCODE_SGT: + loop->Cond->U.I.Opcode = RC_OPCODE_SLE; + break; + case RC_OPCODE_SEQ: + loop->Cond->U.I.Opcode = RC_OPCODE_SNE; + break; + case RC_OPCODE_SNE: + loop->Cond->U.I.Opcode = RC_OPCODE_SEQ; + break; + default: + rc_error(s->C, "loop->Cond is not a conditional.\n"); + return NULL; + } + + /* Check if the number of loops is known at compile time. */ + if(transform_const_loop(s, loop, ptr)){ + return loop->BeginLoop->Next; + } + + /* Prepare the loop to be unrolled */ + rc_remove_instruction(loop->Brk); + rc_remove_instruction(loop->EndIf); + rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf); + return loop->EndLoop; +} + +static void rc_transform_loops(struct emulate_loop_state * s) +{ + struct rc_instruction * ptr = s->C->Program.Instructions.Next; + while(ptr != &s->C->Program.Instructions) { + if(ptr->Type == RC_INSTRUCTION_NORMAL && + ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ + ptr = transform_loop(s, ptr); + if(!ptr){ + return; + } + } + ptr = ptr->Next; + } +} + +static void rc_unroll_loops(struct emulate_loop_state *s, + unsigned int max_instructions) +{ + int i; + /* Iterate backwards of the list of loops so that loops that nested + * loops are unrolled first. + */ + for( i = s->LoopCount - 1; i >= 0; i-- ){ + if(!s->Loops[i].EndLoop){ + continue; + } + unsigned int iterations = loop_calc_iterations(&s->Loops[i], + s->LoopCount, max_instructions); + loop_unroll(s, &s->Loops[i], iterations); + } +} + +void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions) +{ + struct emulate_loop_state s; + + memset(&s, 0, sizeof(struct emulate_loop_state)); + s.C = c; + + /* We may need to move these two operations to r3xx_(vert|frag)prog.c + * and run the optimization passes between them in order to increase + * the number of unrolls we can do for each loop. + */ + rc_transform_loops(&s); + + rc_unroll_loops(&s, max_instructions); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h new file mode 100644 index 0000000000..ddcf1c0fab --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h @@ -0,0 +1,12 @@ + + +#ifndef RADEON_EMULATE_LOOPS_H +#define RADEON_EMULATE_LOOPS_H + +#define MAX_ITERATIONS 8 + +struct radeon_compiler; + +void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions); + +#endif /* RADEON_EMULATE_LOOPS_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c new file mode 100644 index 0000000000..1dc16855dc --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -0,0 +1,472 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_opcodes.h" +#include "radeon_program.h" + +#include "radeon_program_constants.h" + +struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { + { + .Opcode = RC_OPCODE_NOP, + .Name = "NOP" + }, + { + .Opcode = RC_OPCODE_ILLEGAL_OPCODE, + .Name = "ILLEGAL OPCODE" + }, + { + .Opcode = RC_OPCODE_ABS, + .Name = "ABS", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_ADD, + .Name = "ADD", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_ARL, + .Name = "ARL", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_CEIL, + .Name = "CEIL", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_CMP, + .Name = "CMP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_COS, + .Name = "COS", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_DDX, + .Name = "DDX", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_DDY, + .Name = "DDY", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_DP3, + .Name = "DP3", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DP4, + .Name = "DP4", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DPH, + .Name = "DPH", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DST, + .Name = "DST", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_EX2, + .Name = "EX2", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_EXP, + .Name = "EXP", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_FLR, + .Name = "FLR", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_FRC, + .Name = "FRC", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_KIL, + .Name = "KIL", + .NumSrcRegs = 1 + }, + { + .Opcode = RC_OPCODE_LG2, + .Name = "LG2", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_LIT, + .Name = "LIT", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_LOG, + .Name = "LOG", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_LRP, + .Name = "LRP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MAD, + .Name = "MAD", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MAX, + .Name = "MAX", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MIN, + .Name = "MIN", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MOV, + .Name = "MOV", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MUL, + .Name = "MUL", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_POW, + .Name = "POW", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_RCP, + .Name = "RCP", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_RSQ, + .Name = "RSQ", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_SCS, + .Name = "SCS", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_SEQ, + .Name = "SEQ", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SFL, + .Name = "SFL", + .NumSrcRegs = 0, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SGE, + .Name = "SGE", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SGT, + .Name = "SGT", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SIN, + .Name = "SIN", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_SLE, + .Name = "SLE", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SLT, + .Name = "SLT", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SNE, + .Name = "SNE", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SUB, + .Name = "SUB", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SWZ, + .Name = "SWZ", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_XPD, + .Name = "XPD", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TEX, + .Name = "TEX", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXB, + .Name = "TXB", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXD, + .Name = "TXD", + .HasTexture = 1, + .NumSrcRegs = 3, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXL, + .Name = "TXL", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXP, + .Name = "TXP", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_IF, + .Name = "IF", + .IsFlowControl = 1, + .NumSrcRegs = 1 + }, + { + .Opcode = RC_OPCODE_ELSE, + .Name = "ELSE", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_ENDIF, + .Name = "ENDIF", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_BGNLOOP, + .Name = "BGNLOOP", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_BRK, + .Name = "BRK", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_ENDLOOP, + .Name = "ENDLOOP", + .IsFlowControl = 1, + .NumSrcRegs = 0, + }, + { + .Opcode = RC_OPCODE_REPL_ALPHA, + .Name = "REPL_ALPHA", + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_BEGIN_TEX, + .Name = "BEGIN_TEX" + } +}; + +void rc_compute_sources_for_writemask( + const struct rc_instruction *inst, + unsigned int writemask, + unsigned int *srcmasks) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + srcmasks[0] = 0; + srcmasks[1] = 0; + srcmasks[2] = 0; + + if (opcode->Opcode == RC_OPCODE_KIL) + srcmasks[0] |= RC_MASK_XYZW; + else if (opcode->Opcode == RC_OPCODE_IF) + srcmasks[0] |= RC_MASK_X; + + if (!writemask) + return; + + if (opcode->IsComponentwise) { + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) + srcmasks[src] |= writemask; + } else if (opcode->IsStandardScalar) { + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) + srcmasks[src] |= RC_MASK_X; + } else { + switch(opcode->Opcode) { + case RC_OPCODE_ARL: + srcmasks[0] |= RC_MASK_X; + break; + case RC_OPCODE_DP3: + srcmasks[0] |= RC_MASK_XYZ; + srcmasks[1] |= RC_MASK_XYZ; + break; + case RC_OPCODE_DP4: + srcmasks[0] |= RC_MASK_XYZW; + srcmasks[1] |= RC_MASK_XYZW; + break; + case RC_OPCODE_TXB: + case RC_OPCODE_TXP: + srcmasks[0] |= RC_MASK_W; + /* Fall through */ + case RC_OPCODE_TEX: + switch (inst->U.I.TexSrcTarget) { + case RC_TEXTURE_1D: + srcmasks[0] |= RC_MASK_X; + break; + case RC_TEXTURE_2D: + case RC_TEXTURE_RECT: + case RC_TEXTURE_1D_ARRAY: + srcmasks[0] |= RC_MASK_XY; + break; + case RC_TEXTURE_3D: + case RC_TEXTURE_CUBE: + case RC_TEXTURE_2D_ARRAY: + srcmasks[0] |= RC_MASK_XYZ; + break; + } + break; + case RC_OPCODE_DST: + srcmasks[0] |= RC_MASK_Y | RC_MASK_Z; + srcmasks[1] |= RC_MASK_Y | RC_MASK_W; + break; + case RC_OPCODE_EXP: + case RC_OPCODE_LOG: + srcmasks[0] |= RC_MASK_XY; + break; + case RC_OPCODE_LIT: + srcmasks[0] |= RC_MASK_X | RC_MASK_Y | RC_MASK_W; + break; + default: + break; + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h new file mode 100644 index 0000000000..91c82ac089 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -0,0 +1,246 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_OPCODES_H +#define RADEON_OPCODES_H + +#include <assert.h> + +/** + * Opcodes understood by the Radeon compiler. + */ +typedef enum { + RC_OPCODE_NOP = 0, + RC_OPCODE_ILLEGAL_OPCODE, + + /** vec4 instruction: dst.c = abs(src0.c); */ + RC_OPCODE_ABS, + + /** vec4 instruction: dst.c = src0.c + src1.c; */ + RC_OPCODE_ADD, + + /** special instruction: load address register + * dst.x = floor(src.x), where dst must be an address register */ + RC_OPCODE_ARL, + + /** vec4 instruction: dst.c = ceil(src0.c) */ + RC_OPCODE_CEIL, + + /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */ + RC_OPCODE_CMP, + + /** scalar instruction: dst = cos(src0.x) */ + RC_OPCODE_COS, + + /** special instruction: take vec4 partial derivative in X direction + * dst.c = d src0.c / dx */ + RC_OPCODE_DDX, + + /** special instruction: take vec4 partial derivative in Y direction + * dst.c = d src0.c / dy */ + RC_OPCODE_DDY, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */ + RC_OPCODE_DP3, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src0.w*src1.w */ + RC_OPCODE_DP4, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src1.w */ + RC_OPCODE_DPH, + + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_DST, + + /** scalar instruction: dst = 2**src0.x */ + RC_OPCODE_EX2, + + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_EXP, + + /** vec4 instruction: dst.c = floor(src0.c) */ + RC_OPCODE_FLR, + + /** vec4 instruction: dst.c = src0.c - floor(src0.c) */ + RC_OPCODE_FRC, + + /** special instruction: stop execution if any component of src0 is negative */ + RC_OPCODE_KIL, + + /** scalar instruction: dst = log_2(src0.x) */ + RC_OPCODE_LG2, + + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_LIT, + + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_LOG, + + /** vec4 instruction: dst.c = src0.c*src1.c + (1 - src0.c)*src2.c */ + RC_OPCODE_LRP, + + /** vec4 instruction: dst.c = src0.c*src1.c + src2.c */ + RC_OPCODE_MAD, + + /** vec4 instruction: dst.c = max(src0.c, src1.c) */ + RC_OPCODE_MAX, + + /** vec4 instruction: dst.c = min(src0.c, src1.c) */ + RC_OPCODE_MIN, + + /** vec4 instruction: dst.c = src0.c */ + RC_OPCODE_MOV, + + /** vec4 instruction: dst.c = src0.c*src1.c */ + RC_OPCODE_MUL, + + /** scalar instruction: dst = src0.x ** src1.x */ + RC_OPCODE_POW, + + /** scalar instruction: dst = 1 / src0.x */ + RC_OPCODE_RCP, + + /** scalar instruction: dst = 1 / sqrt(src0.x) */ + RC_OPCODE_RSQ, + + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_SCS, + + /** vec4 instruction: dst.c = (src0.c == src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SEQ, + + /** vec4 instruction: dst.c = 0.0 */ + RC_OPCODE_SFL, + + /** vec4 instruction: dst.c = (src0.c >= src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SGE, + + /** vec4 instruction: dst.c = (src0.c > src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SGT, + + /** scalar instruction: dst = sin(src0.x) */ + RC_OPCODE_SIN, + + /** vec4 instruction: dst.c = (src0.c <= src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SLE, + + /** vec4 instruction: dst.c = (src0.c < src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SLT, + + /** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SNE, + + /** vec4 instruction: dst.c = src0.c - src1.c */ + RC_OPCODE_SUB, + + /** vec4 instruction: dst.c = src0.c */ + RC_OPCODE_SWZ, + + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_XPD, + + RC_OPCODE_TEX, + RC_OPCODE_TXB, + RC_OPCODE_TXD, + RC_OPCODE_TXL, + RC_OPCODE_TXP, + + /** branch instruction: + * If src0.x != 0.0, continue with the next instruction; + * otherwise, jump to matching RC_OPCODE_ELSE or RC_OPCODE_ENDIF. + */ + RC_OPCODE_IF, + + /** branch instruction: jump to matching RC_OPCODE_ENDIF */ + RC_OPCODE_ELSE, + + /** branch instruction: has no effect */ + RC_OPCODE_ENDIF, + + RC_OPCODE_BGNLOOP, + + RC_OPCODE_BRK, + + RC_OPCODE_ENDLOOP, + + /** special instruction, used in R300-R500 fragment program pair instructions + * indicates that the result of the alpha operation shall be replicated + * across all other channels */ + RC_OPCODE_REPL_ALPHA, + + /** special instruction, used in R300-R500 fragment programs + * to indicate the start of a block of texture instructions that + * can run simultaneously. */ + RC_OPCODE_BEGIN_TEX, + + MAX_RC_OPCODE +} rc_opcode; + + +struct rc_opcode_info { + rc_opcode Opcode; + const char * Name; + + /** true if the instruction reads from a texture. + * + * \note This is false for the KIL instruction, even though KIL is + * a texture instruction from a hardware point of view. */ + unsigned int HasTexture:1; + + unsigned int NumSrcRegs:2; + unsigned int HasDstReg:1; + + /** true if this instruction affects control flow */ + unsigned int IsFlowControl:1; + + /** true if this is a vector instruction that operates on components in parallel + * without any cross-component interaction */ + unsigned int IsComponentwise:1; + + /** true if this instruction sources only its operands X components + * to compute one result which is smeared across all output channels */ + unsigned int IsStandardScalar:1; +}; + +extern struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE]; + +static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode) +{ + assert((unsigned int)opcode < MAX_RC_OPCODE); + assert(rc_opcodes[opcode].Opcode == opcode); + + return &rc_opcodes[opcode]; +} + +struct rc_instruction; + +void rc_compute_sources_for_writemask( + const struct rc_instruction *inst, + unsigned int writemask, + unsigned int *srcmasks); + +#endif /* RADEON_OPCODES_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c new file mode 100644 index 0000000000..21d7210888 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -0,0 +1,446 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" +#include "radeon_swizzle.h" + + +static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) +{ + struct rc_src_register combine; + combine.File = inner.File; + combine.Index = inner.Index; + combine.RelAddr = inner.RelAddr; + if (outer.Abs) { + combine.Abs = 1; + combine.Negate = outer.Negate; + } else { + combine.Abs = inner.Abs; + combine.Negate = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(outer.Swizzle, chan); + if (swz < 4) + combine.Negate |= GET_BIT(inner.Negate, swz) << chan; + } + combine.Negate ^= outer.Negate; + } + combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); + return combine; +} + +struct peephole_state { + struct radeon_compiler * C; + struct rc_instruction * Mov; + unsigned int Conflict:1; + + /** Whether Mov's source has been clobbered */ + unsigned int SourceClobbered:1; + + /** Which components of Mov's destination register are still from that Mov? */ + unsigned int MovMask:4; + + /** Which components of Mov's destination register are clearly *not* from that Mov */ + unsigned int DefinedMask:4; + + /** Which components of Mov's source register are sourced */ + unsigned int SourcedMask:4; + + /** Branch depth beyond Mov; negative value indicates we left the Mov's block */ + int BranchDepth; +}; + +static void peephole_scan_read(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct peephole_state * s = data; + + if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index) + return; + + /* These instructions cannot read from the constants file. + * see radeonTransformTEX() + */ + if(s->Mov->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && + s->Mov->U.I.SrcReg[0].File != RC_FILE_INPUT && + (inst->U.I.Opcode == RC_OPCODE_TEX || + inst->U.I.Opcode == RC_OPCODE_TXB || + inst->U.I.Opcode == RC_OPCODE_TXP || + inst->U.I.Opcode == RC_OPCODE_KIL)){ + s->Conflict = 1; + return; + } + if ((mask & s->MovMask) == mask) { + if (s->SourceClobbered) { + s->Conflict = 1; + } + } else if ((mask & s->DefinedMask) == mask) { + /* read from something entirely written by other instruction: this is okay */ + } else { + /* read from component combination that is not well-defined without + * the MOV: cannot remove it */ + s->Conflict = 1; + } +} + +static void peephole_scan_write(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct peephole_state * s = data; + + if (s->BranchDepth < 0) + return; + + if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) { + s->MovMask &= ~mask; + if (s->BranchDepth == 0) + s->DefinedMask |= mask; + else + s->DefinedMask &= ~mask; + } + if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) { + if (mask & s->SourcedMask) + s->SourceClobbered = 1; + } else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) { + s->SourceClobbered = 1; + } +} + +static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mov) +{ + struct peephole_state s; + + if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || inst_mov->U.I.WriteALUResult) + return; + + memset(&s, 0, sizeof(s)); + s.C = c; + s.Mov = inst_mov; + s.MovMask = inst_mov->U.I.DstReg.WriteMask; + s.DefinedMask = RC_MASK_XYZW & ~s.MovMask; + + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan); + s.SourcedMask |= (1 << swz) & RC_MASK_XYZW; + } + + /* 1st pass: Check whether all subsequent readers can be changed */ + for(struct rc_instruction * inst = inst_mov->Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + rc_for_all_reads_mask(inst, peephole_scan_read, &s); + rc_for_all_writes_mask(inst, peephole_scan_write, &s); + if (s.Conflict) + return; + + if (s.BranchDepth >= 0) { + if (inst->U.I.Opcode == RC_OPCODE_IF) { + s.BranchDepth++; + } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) { + s.BranchDepth--; + if (s.BranchDepth < 0) { + s.DefinedMask &= ~s.MovMask; + s.MovMask = 0; + } + } + } + } + + if (s.Conflict) + return; + + /* 2nd pass: We can satisfy all readers, so switch them over all at once */ + s.MovMask = inst_mov->U.I.DstReg.WriteMask; + s.BranchDepth = 0; + + for(struct rc_instruction * inst = inst_mov->Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) { + unsigned int refmask = 0; + + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); + refmask |= (1 << swz) & RC_MASK_XYZW; + } + + if ((refmask & s.MovMask) == refmask) + inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]); + } + } + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY && + inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) { + s.MovMask &= ~inst->U.I.DstReg.WriteMask; + } + } + + if (s.BranchDepth >= 0) { + if (inst->U.I.Opcode == RC_OPCODE_IF) { + s.BranchDepth++; + } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) { + s.BranchDepth--; + if (s.BranchDepth < 0) + break; /* no more readers after this point */ + } + } + } + + /* Finally, remove the original MOV instruction */ + rc_remove_instruction(inst_mov); +} + +/** + * Check if a source register is actually always the same + * swizzle constant. + */ +static int is_src_uniform_constant(struct rc_src_register src, + rc_swizzle * pswz, unsigned int * pnegate) +{ + int have_used = 0; + + if (src.File != RC_FILE_NONE) { + *pswz = 0; + return 0; + } + + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(src.Swizzle, chan); + if (swz < 4) { + *pswz = 0; + return 0; + } + if (swz == RC_SWIZZLE_UNUSED) + continue; + + if (!have_used) { + *pswz = swz; + *pnegate = GET_BIT(src.Negate, chan); + have_used = 1; + } else { + if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) { + *pswz = 0; + return 0; + } + } + } + + return 1; +} + + +static void constant_folding_mad(struct rc_instruction * inst) +{ + rc_swizzle swz; + unsigned int negate; + + if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MUL; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_ADD; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2]; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_ADD; + if (negate) + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } + } +} + +static void constant_folding_mul(struct rc_instruction * inst) +{ + rc_swizzle swz; + unsigned int negate; + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_MOV; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + return; + } + } +} + +static void constant_folding_add(struct rc_instruction * inst) +{ + rc_swizzle swz; + unsigned int negate; + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + return; + } + } +} + + +/** + * Replace 0.0, 1.0 and 0.5 immediate constants by their + * respective swizzles. Simplify instructions like ADD dst, src, 0; + */ +static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || + inst->U.I.SrcReg[src].RelAddr || + inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) + continue; + + struct rc_constant * constant = + &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; + + if (constant->Type != RC_CONSTANT_IMMEDIATE) + continue; + + struct rc_src_register newsrc = inst->U.I.SrcReg[src]; + int have_real_reference = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); + if (swz >= 4) + continue; + + unsigned int newswz; + float imm = constant->u.Immediate[swz]; + float baseimm = imm; + if (imm < 0.0) + baseimm = -baseimm; + + if (baseimm == 0.0) { + newswz = RC_SWIZZLE_ZERO; + } else if (baseimm == 1.0) { + newswz = RC_SWIZZLE_ONE; + } else if (baseimm == 0.5) { + newswz = RC_SWIZZLE_HALF; + } else { + have_real_reference = 1; + continue; + } + + SET_SWZ(newsrc.Swizzle, chan, newswz); + if (imm < 0.0 && !newsrc.Abs) + newsrc.Negate ^= 1 << chan; + } + + if (!have_real_reference) { + newsrc.File = RC_FILE_NONE; + newsrc.Index = 0; + } + + /* don't make the swizzle worse */ + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) && + c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) + continue; + + inst->U.I.SrcReg[src] = newsrc; + } + + /* Simplify instructions based on constants */ + if (inst->U.I.Opcode == RC_OPCODE_MAD) + constant_folding_mad(inst); + + /* note: MAD can simplify to MUL or ADD */ + if (inst->U.I.Opcode == RC_OPCODE_MUL) + constant_folding_mul(inst); + else if (inst->U.I.Opcode == RC_OPCODE_ADD) + constant_folding_add(inst); +} + +void rc_optimize(struct radeon_compiler * c) +{ + struct rc_instruction * inst = c->Program.Instructions.Next; + while(inst != &c->Program.Instructions) { + struct rc_instruction * cur = inst; + inst = inst->Next; + + constant_folding(c, cur); + + if (cur->U.I.Opcode == RC_OPCODE_MOV) { + peephole(c, cur); + /* cur may no longer be part of the program */ + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c new file mode 100644 index 0000000000..8a912da461 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c @@ -0,0 +1,280 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include <stdio.h> + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + + +struct live_intervals { + int Start; + int End; + struct live_intervals * Next; +}; + +struct register_info { + struct live_intervals Live; + + unsigned int Used:1; + unsigned int Allocated:1; + unsigned int File:3; + unsigned int Index:RC_REGISTER_INDEX_BITS; +}; + +struct hardware_register { + struct live_intervals * Used; +}; + +struct regalloc_state { + struct radeon_compiler * C; + + struct register_info Input[RC_REGISTER_MAX_INDEX]; + struct register_info Temporary[RC_REGISTER_MAX_INDEX]; + + struct hardware_register * HwTemporary; + unsigned int NumHwTemporaries; +}; + +static void print_live_intervals(struct live_intervals * src) +{ + if (!src) { + DBG("(null)"); + return; + } + + while(src) { + DBG("(%i,%i)", src->Start, src->End); + src = src->Next; + } +} + +static void add_live_intervals(struct regalloc_state * s, + struct live_intervals ** dst, struct live_intervals * src) +{ + struct live_intervals ** dst_backup = dst; + + if (VERBOSE) { + DBG("add_live_intervals: "); + print_live_intervals(*dst); + DBG(" to "); + print_live_intervals(src); + DBG("\n"); + } + + while(src) { + if (*dst && (*dst)->End < src->Start) { + dst = &(*dst)->Next; + } else if (!*dst || (*dst)->Start > src->End) { + struct live_intervals * li = memory_pool_malloc(&s->C->Pool, sizeof(*li)); + li->Start = src->Start; + li->End = src->End; + li->Next = *dst; + *dst = li; + src = src->Next; + } else { + if (src->End > (*dst)->End) + (*dst)->End = src->End; + if (src->Start < (*dst)->Start) + (*dst)->Start = src->Start; + src = src->Next; + } + } + + if (VERBOSE) { + DBG(" result: "); + print_live_intervals(*dst_backup); + DBG("\n"); + } +} + +static int overlap_live_intervals(struct live_intervals * dst, struct live_intervals * src) +{ + if (VERBOSE) { + DBG("overlap_live_intervals: "); + print_live_intervals(dst); + DBG(" to "); + print_live_intervals(src); + DBG("\n"); + } + + while(src && dst) { + if (dst->End <= src->Start) { + dst = dst->Next; + } else if (dst->End <= src->End) { + DBG(" overlap\n"); + return 1; + } else if (dst->Start < src->End) { + DBG(" overlap\n"); + return 1; + } else { + src = src->Next; + } + } + + DBG(" no overlap\n"); + + return 0; +} + +static int try_add_live_intervals(struct regalloc_state * s, + struct live_intervals ** dst, struct live_intervals * src) +{ + if (overlap_live_intervals(*dst, src)) + return 0; + + add_live_intervals(s, dst, src); + return 1; +} + +static void scan_callback(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct regalloc_state * s = data; + struct register_info * reg; + + if (file == RC_FILE_TEMPORARY) + reg = &s->Temporary[index]; + else if (file == RC_FILE_INPUT) + reg = &s->Input[index]; + else + return; + + if (!reg->Used) { + reg->Used = 1; + if (file == RC_FILE_INPUT) + reg->Live.Start = -1; + else + reg->Live.Start = inst->IP; + reg->Live.End = inst->IP; + } else { + if (inst->IP > reg->Live.End) + reg->Live.End = inst->IP; + } +} + +static void compute_live_intervals(struct regalloc_state * s) +{ + rc_recompute_ips(s->C); + + for(struct rc_instruction * inst = s->C->Program.Instructions.Next; + inst != &s->C->Program.Instructions; + inst = inst->Next) { + rc_for_all_reads_mask(inst, scan_callback, s); + rc_for_all_writes_mask(inst, scan_callback, s); + } +} + +static void remap_register(void * data, struct rc_instruction * inst, + rc_register_file * file, unsigned int * index) +{ + struct regalloc_state * s = data; + const struct register_info * reg; + + if (*file == RC_FILE_TEMPORARY) + reg = &s->Temporary[*index]; + else if (*file == RC_FILE_INPUT) + reg = &s->Input[*index]; + else + return; + + if (reg->Allocated) { + *file = reg->File; + *index = reg->Index; + } +} + +static void do_regalloc(struct regalloc_state * s) +{ + /* Simple and stupid greedy register allocation */ + for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { + struct register_info * reg = &s->Temporary[index]; + + if (!reg->Used) + continue; + + for(unsigned int hwreg = 0; hwreg < s->NumHwTemporaries; ++hwreg) { + if (try_add_live_intervals(s, &s->HwTemporary[hwreg].Used, ®->Live)) { + reg->Allocated = 1; + reg->File = RC_FILE_TEMPORARY; + reg->Index = hwreg; + goto success; + } + } + + rc_error(s->C, "Ran out of hardware temporaries\n"); + return; + + success:; + } + + /* Rewrite all instructions based on the translation table we built */ + for(struct rc_instruction * inst = s->C->Program.Instructions.Next; + inst != &s->C->Program.Instructions; + inst = inst->Next) { + rc_remap_registers(inst, &remap_register, s); + } +} + +static void alloc_input(void * data, unsigned int input, unsigned int hwreg) +{ + struct regalloc_state * s = data; + + if (!s->Input[input].Used) + return; + + add_live_intervals(s, &s->HwTemporary[hwreg].Used, &s->Input[input].Live); + + s->Input[input].Allocated = 1; + s->Input[input].File = RC_FILE_TEMPORARY; + s->Input[input].Index = hwreg; + +} + +void rc_pair_regalloc(struct r300_fragment_program_compiler *c, unsigned maxtemps) +{ + struct regalloc_state s; + + memset(&s, 0, sizeof(s)); + s.C = &c->Base; + s.NumHwTemporaries = maxtemps; + s.HwTemporary = memory_pool_malloc(&s.C->Pool, maxtemps*sizeof(struct hardware_register)); + memset(s.HwTemporary, 0, maxtemps*sizeof(struct hardware_register)); + + compute_live_intervals(&s); + + c->AllocateHwInputs(c, &alloc_input, &s); + + do_regalloc(&s); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c new file mode 100644 index 0000000000..a279549ff8 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -0,0 +1,501 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include <stdio.h> + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + +struct schedule_instruction { + struct rc_instruction * Instruction; + + /** Next instruction in the linked list of ready instructions. */ + struct schedule_instruction *NextReady; + + /** Values that this instruction reads and writes */ + struct reg_value * WriteValues[4]; + struct reg_value * ReadValues[12]; + unsigned int NumWriteValues:3; + unsigned int NumReadValues:4; + + /** + * Number of (read and write) dependencies that must be resolved before + * this instruction can be scheduled. + */ + unsigned int NumDependencies:5; +}; + + +/** + * Used to keep track of which instructions read a value. + */ +struct reg_value_reader { + struct schedule_instruction *Reader; + struct reg_value_reader *Next; +}; + +/** + * Used to keep track which values are stored in each component of a + * RC_FILE_TEMPORARY. + */ +struct reg_value { + struct schedule_instruction * Writer; + + /** + * Unordered linked list of instructions that read from this value. + * When this value becomes available, we increase all readers' + * dependency count. + */ + struct reg_value_reader *Readers; + + /** + * Number of readers of this value. This is decremented each time + * a reader of the value is committed. + * When the reader cound reaches zero, the dependency count + * of the instruction writing \ref Next is decremented. + */ + unsigned int NumReaders; + + struct reg_value *Next; /**< Pointer to the next value to be written to the same register */ +}; + +struct register_state { + struct reg_value * Values[4]; +}; + +struct schedule_state { + struct radeon_compiler * C; + struct schedule_instruction * Current; + + struct register_state Temporary[RC_REGISTER_MAX_INDEX]; + + /** + * Linked lists of instructions that can be scheduled right now, + * based on which ALU/TEX resources they require. + */ + /*@{*/ + struct schedule_instruction *ReadyFullALU; + struct schedule_instruction *ReadyRGB; + struct schedule_instruction *ReadyAlpha; + struct schedule_instruction *ReadyTEX; + /*@}*/ +}; + +static struct reg_value ** get_reg_valuep(struct schedule_state * s, + rc_register_file file, unsigned int index, unsigned int chan) +{ + if (file != RC_FILE_TEMPORARY) + return 0; + + if (index >= RC_REGISTER_MAX_INDEX) { + rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index); + return 0; + } + + return &s->Temporary[index].Values[chan]; +} + +static struct reg_value * get_reg_value(struct schedule_state * s, + rc_register_file file, unsigned int index, unsigned int chan) +{ + struct reg_value ** pv = get_reg_valuep(s, file, index, chan); + if (!pv) + return 0; + return *pv; +} + +static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst) +{ + inst->NextReady = *list; + *list = inst; +} + +static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst) +{ + DBG("%i is now ready\n", sinst->Instruction->IP); + + if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) + add_inst_to_list(&s->ReadyTEX, sinst); + else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP) + add_inst_to_list(&s->ReadyRGB, sinst); + else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP) + add_inst_to_list(&s->ReadyAlpha, sinst); + else + add_inst_to_list(&s->ReadyFullALU, sinst); +} + +static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst) +{ + assert(sinst->NumDependencies > 0); + sinst->NumDependencies--; + if (!sinst->NumDependencies) + instruction_ready(s, sinst); +} + +static void commit_instruction(struct schedule_state * s, struct schedule_instruction * sinst) +{ + DBG("%i: commit\n", sinst->Instruction->IP); + + for(unsigned int i = 0; i < sinst->NumReadValues; ++i) { + struct reg_value * v = sinst->ReadValues[i]; + assert(v->NumReaders > 0); + v->NumReaders--; + if (!v->NumReaders) { + if (v->Next) + decrease_dependencies(s, v->Next->Writer); + } + } + + for(unsigned int i = 0; i < sinst->NumWriteValues; ++i) { + struct reg_value * v = sinst->WriteValues[i]; + if (v->NumReaders) { + for(struct reg_value_reader * r = v->Readers; r; r = r->Next) { + decrease_dependencies(s, r->Reader); + } + } else { + /* This happens in instruction sequences of the type + * OP r.x, ...; + * OP r.x, r.x, ...; + * See also the subtlety in how instructions that both + * read and write the same register are scanned. + */ + if (v->Next) + decrease_dependencies(s, v->Next->Writer); + } + } +} + +/** + * Emit all ready texture instructions in a single block. + * + * Emit as a single block to (hopefully) sample many textures in parallel, + * and to avoid hardware indirections on R300. + */ +static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before) +{ + struct schedule_instruction *readytex; + + assert(s->ReadyTEX); + + /* Don't let the ready list change under us! */ + readytex = s->ReadyTEX; + s->ReadyTEX = 0; + + /* Node marker for R300 */ + struct rc_instruction * inst_begin = rc_insert_new_instruction(s->C, before->Prev); + inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX; + + /* Link texture instructions back in */ + while(readytex) { + struct schedule_instruction * tex = readytex; + readytex = readytex->NextReady; + + rc_insert_instruction(before->Prev, tex->Instruction); + commit_instruction(s, tex); + } +} + + +static int destructive_merge_instructions( + struct rc_pair_instruction * rgb, + struct rc_pair_instruction * alpha) +{ + assert(rgb->Alpha.Opcode == RC_OPCODE_NOP); + assert(alpha->RGB.Opcode == RC_OPCODE_NOP); + + /* Copy alpha args into rgb */ + const struct rc_opcode_info * opcode = rc_get_opcode_info(alpha->Alpha.Opcode); + + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + unsigned int oldsrc = alpha->Alpha.Arg[arg].Source; + rc_register_file file = 0; + unsigned int index = 0; + + if (alpha->Alpha.Arg[arg].Swizzle < 3) { + srcrgb = 1; + file = alpha->RGB.Src[oldsrc].File; + index = alpha->RGB.Src[oldsrc].Index; + } else if (alpha->Alpha.Arg[arg].Swizzle < 4) { + srcalpha = 1; + file = alpha->Alpha.Src[oldsrc].File; + index = alpha->Alpha.Src[oldsrc].Index; + } + + int source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index); + if (source < 0) + return 0; + + rgb->Alpha.Arg[arg].Source = source; + rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle; + rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs; + rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate; + } + + /* Copy alpha opcode into rgb */ + rgb->Alpha.Opcode = alpha->Alpha.Opcode; + rgb->Alpha.DestIndex = alpha->Alpha.DestIndex; + rgb->Alpha.WriteMask = alpha->Alpha.WriteMask; + rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask; + rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask; + rgb->Alpha.Saturate = alpha->Alpha.Saturate; + + /* Merge ALU result writing */ + if (alpha->WriteALUResult) { + if (rgb->WriteALUResult) + return 0; + + rgb->WriteALUResult = alpha->WriteALUResult; + rgb->ALUResultCompare = alpha->ALUResultCompare; + } + + return 1; +} + +/** + * Try to merge the given instructions into the rgb instructions. + * + * Return true on success; on failure, return false, and keep + * the instructions untouched. + */ +static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha) +{ + struct rc_pair_instruction backup; + + memcpy(&backup, rgb, sizeof(struct rc_pair_instruction)); + + if (destructive_merge_instructions(rgb, alpha)) + return 1; + + memcpy(rgb, &backup, sizeof(struct rc_pair_instruction)); + return 0; +} + + +/** + * Find a good ALU instruction or pair of ALU instruction and emit it. + * + * Prefer emitting full ALU instructions, so that when we reach a point + * where no full ALU instruction can be emitted, we have more candidates + * for RGB/Alpha pairing. + */ +static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before) +{ + struct schedule_instruction * sinst; + + if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) { + if (s->ReadyFullALU) { + sinst = s->ReadyFullALU; + s->ReadyFullALU = s->ReadyFullALU->NextReady; + } else if (s->ReadyRGB) { + sinst = s->ReadyRGB; + s->ReadyRGB = s->ReadyRGB->NextReady; + } else { + sinst = s->ReadyAlpha; + s->ReadyAlpha = s->ReadyAlpha->NextReady; + } + + rc_insert_instruction(before->Prev, sinst->Instruction); + commit_instruction(s, sinst); + } else { + struct schedule_instruction **prgb; + struct schedule_instruction **palpha; + + /* Some pairings might fail because they require too + * many source slots; try all possible pairings if necessary */ + for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { + for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { + struct schedule_instruction * psirgb = *prgb; + struct schedule_instruction * psialpha = *palpha; + + if (!merge_instructions(&psirgb->Instruction->U.P, &psialpha->Instruction->U.P)) + continue; + + *prgb = (*prgb)->NextReady; + *palpha = (*palpha)->NextReady; + rc_insert_instruction(before->Prev, psirgb->Instruction); + commit_instruction(s, psirgb); + commit_instruction(s, psialpha); + goto success; + } + } + + /* No success in pairing; just take the first RGB instruction */ + sinst = s->ReadyRGB; + s->ReadyRGB = s->ReadyRGB->NextReady; + + rc_insert_instruction(before->Prev, sinst->Instruction); + commit_instruction(s, sinst); + success: ; + } +} + +static void scan_read(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int chan) +{ + struct schedule_state * s = data; + struct reg_value * v = get_reg_value(s, file, index, chan); + + if (!v) + return; + + if (v->Writer == s->Current) { + /* The instruction reads and writes to a register component. + * In this case, we only want to increment dependencies by one. + */ + return; + } + + DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); + + struct reg_value_reader * reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader)); + reader->Reader = s->Current; + reader->Next = v->Readers; + v->Readers = reader; + v->NumReaders++; + + s->Current->NumDependencies++; + + if (s->Current->NumReadValues >= 12) { + rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__); + } else { + s->Current->ReadValues[s->Current->NumReadValues++] = v; + } +} + +static void scan_write(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int chan) +{ + struct schedule_state * s = data; + struct reg_value ** pv = get_reg_valuep(s, file, index, chan); + + if (!pv) + return; + + DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); + + struct reg_value * newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv)); + memset(newv, 0, sizeof(*newv)); + + newv->Writer = s->Current; + + if (*pv) { + (*pv)->Next = newv; + s->Current->NumDependencies++; + } + + *pv = newv; + + if (s->Current->NumWriteValues >= 4) { + rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__); + } else { + s->Current->WriteValues[s->Current->NumWriteValues++] = newv; + } +} + +static void schedule_block(struct r300_fragment_program_compiler * c, + struct rc_instruction * begin, struct rc_instruction * end) +{ + struct schedule_state s; + + memset(&s, 0, sizeof(s)); + s.C = &c->Base; + + /* Scan instructions for data dependencies */ + unsigned int ip = 0; + for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { + s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current)); + memset(s.Current, 0, sizeof(struct schedule_instruction)); + + s.Current->Instruction = inst; + inst->IP = ip++; + + DBG("%i: Scanning\n", inst->IP); + + /* The order of things here is subtle and maybe slightly + * counter-intuitive, to account for the case where an + * instruction writes to the same register as it reads + * from. */ + rc_for_all_writes_chan(inst, &scan_write, &s); + rc_for_all_reads_chan(inst, &scan_read, &s); + + DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies); + + if (!s.Current->NumDependencies) + instruction_ready(&s, s.Current); + } + + /* Temporarily unlink all instructions */ + begin->Prev->Next = end; + end->Prev = begin->Prev; + + /* Schedule instructions back */ + while(!s.C->Error && + (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { + if (s.ReadyTEX) + emit_all_tex(&s, end); + + while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha)) + emit_one_alu(&s, end); + } +} + +static int is_controlflow(struct rc_instruction * inst) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + return opcode->IsFlowControl; + } + return 0; +} + +void rc_pair_schedule(struct r300_fragment_program_compiler *c) +{ + struct rc_instruction * inst = c->Base.Program.Instructions.Next; + while(inst != &c->Base.Program.Instructions) { + if (is_controlflow(inst)) { + inst = inst->Next; + continue; + } + + struct rc_instruction * first = inst; + + while(inst != &c->Base.Program.Instructions && !is_controlflow(inst)) + inst = inst->Next; + + DBG("Schedule one block\n"); + schedule_block(c, first, inst); + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c new file mode 100644 index 0000000000..407a0a55ee --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c @@ -0,0 +1,256 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include "radeon_compiler.h" + + +/** + * Finally rewrite ADD, MOV, MUL as the appropriate native instruction + * and reverse the order of arguments for CMP. + */ +static void final_rewrite(struct rc_sub_instruction *inst) +{ + struct rc_src_register tmp; + + switch(inst->Opcode) { + case RC_OPCODE_ADD: + inst->SrcReg[2] = inst->SrcReg[1]; + inst->SrcReg[1].File = RC_FILE_NONE; + inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; + inst->SrcReg[1].Negate = RC_MASK_NONE; + inst->Opcode = RC_OPCODE_MAD; + break; + case RC_OPCODE_CMP: + tmp = inst->SrcReg[2]; + inst->SrcReg[2] = inst->SrcReg[0]; + inst->SrcReg[0] = tmp; + break; + case RC_OPCODE_MOV: + /* AMD say we should use CMP. + * However, when we transform + * KIL -r0; + * into + * CMP tmp, -r0, -r0, 0; + * KIL tmp; + * we get incorrect behaviour on R500 when r0 == 0.0. + * It appears that the R500 KIL hardware treats -0.0 as less + * than zero. + */ + inst->SrcReg[1].File = RC_FILE_NONE; + inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; + inst->SrcReg[2].File = RC_FILE_NONE; + inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; + inst->Opcode = RC_OPCODE_MAD; + break; + case RC_OPCODE_MUL: + inst->SrcReg[2].File = RC_FILE_NONE; + inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; + inst->Opcode = RC_OPCODE_MAD; + break; + default: + /* nothing to do */ + break; + } +} + + +/** + * Classify an instruction according to which ALUs etc. it needs + */ +static void classify_instruction(struct rc_sub_instruction * inst, + int * needrgb, int * needalpha, int * istranscendent) +{ + *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0; + *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0; + *istranscendent = 0; + + if (inst->WriteALUResult == RC_ALURESULT_X) + *needrgb = 1; + else if (inst->WriteALUResult == RC_ALURESULT_W) + *needalpha = 1; + + switch(inst->Opcode) { + case RC_OPCODE_ADD: + case RC_OPCODE_CMP: + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + case RC_OPCODE_FRC: + case RC_OPCODE_MAD: + case RC_OPCODE_MAX: + case RC_OPCODE_MIN: + case RC_OPCODE_MOV: + case RC_OPCODE_MUL: + break; + case RC_OPCODE_COS: + case RC_OPCODE_EX2: + case RC_OPCODE_LG2: + case RC_OPCODE_RCP: + case RC_OPCODE_RSQ: + case RC_OPCODE_SIN: + *istranscendent = 1; + *needalpha = 1; + break; + case RC_OPCODE_DP4: + *needalpha = 1; + /* fall through */ + case RC_OPCODE_DP3: + *needrgb = 1; + break; + default: + break; + } +} + + +/** + * Fill the given ALU instruction's opcodes and source operands into the given pair, + * if possible. + */ +static void set_pair_instruction(struct r300_fragment_program_compiler *c, + struct rc_pair_instruction * pair, + struct rc_sub_instruction * inst) +{ + memset(pair, 0, sizeof(struct rc_pair_instruction)); + + int needrgb, needalpha, istranscendent; + classify_instruction(inst, &needrgb, &needalpha, &istranscendent); + + if (needrgb) { + if (istranscendent) + pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA; + else + pair->RGB.Opcode = inst->Opcode; + if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) + pair->RGB.Saturate = 1; + } + if (needalpha) { + pair->Alpha.Opcode = inst->Opcode; + if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) + pair->Alpha.Saturate = 1; + } + + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + int i; + + for(i = 0; i < opcode->NumSrcRegs; ++i) { + int source; + if (needrgb && !istranscendent) { + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + int j; + for(j = 0; j < 3; ++j) { + unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); + if (swz < 3) + srcrgb = 1; + else if (swz < 4) + srcalpha = 1; + } + source = rc_pair_alloc_source(pair, srcrgb, srcalpha, + inst->SrcReg[i].File, inst->SrcReg[i].Index); + pair->RGB.Arg[i].Source = source; + pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; + pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z)); + } + if (needalpha) { + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, istranscendent ? 0 : 3); + if (swz < 3) + srcrgb = 1; + else if (swz < 4) + srcalpha = 1; + source = rc_pair_alloc_source(pair, srcrgb, srcalpha, + inst->SrcReg[i].File, inst->SrcReg[i].Index); + pair->Alpha.Arg[i].Source = source; + pair->Alpha.Arg[i].Swizzle = swz; + pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W); + } + } + + /* Destination handling */ + if (inst->DstReg.File == RC_FILE_OUTPUT) { + if (inst->DstReg.Index == c->OutputDepth) { + pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } else { + for (i = 0; i < 4; i++) { + if (inst->DstReg.Index == c->OutputColor[i]) { + pair->RGB.Target = i; + pair->Alpha.Target = i; + pair->RGB.OutputWriteMask |= + inst->DstReg.WriteMask & RC_MASK_XYZ; + pair->Alpha.OutputWriteMask |= + GET_BIT(inst->DstReg.WriteMask, 3); + break; + } + } + } + } else { + if (needrgb) { + pair->RGB.DestIndex = inst->DstReg.Index; + pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; + } + if (needalpha) { + pair->Alpha.DestIndex = inst->DstReg.Index; + pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } + } + + if (inst->WriteALUResult) { + pair->WriteALUResult = inst->WriteALUResult; + pair->ALUResultCompare = inst->ALUResultCompare; + } +} + + +/** + * Translate all ALU instructions into corresponding pair instructions, + * performing no other changes. + */ +void rc_pair_translate(struct r300_fragment_program_compiler *c) +{ + for(struct rc_instruction * inst = c->Base.Program.Instructions.Next; + inst != &c->Base.Program.Instructions; + inst = inst->Next) { + if (inst->Type != RC_INSTRUCTION_NORMAL) + continue; + + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL) + continue; + + struct rc_sub_instruction copy = inst->U.I; + + final_rewrite(©); + inst->Type = RC_INSTRUCTION_PAIR; + set_pair_instruction(c, &inst->U.P, ©); + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c new file mode 100644 index 0000000000..a3c41d7bd4 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program.h" + +#include <stdio.h> + +#include "radeon_compiler.h" + + +/** + * Transform the given clause in the following way: + * 1. Replace it with an empty clause + * 2. For every instruction in the original clause, try the given + * transformations in order. + * 3. If one of the transformations returns GL_TRUE, assume that it + * has emitted the appropriate instruction(s) into the new clause; + * otherwise, copy the instruction verbatim. + * + * \note The transformation is currently not recursive; in other words, + * instructions emitted by transformations are not transformed. + * + * \note The transform is called 'local' because it can only look at + * one instruction at a time. + */ +void radeonLocalTransform( + struct radeon_compiler * c, + int num_transformations, + struct radeon_program_transformation* transformations) +{ + struct rc_instruction * inst = c->Program.Instructions.Next; + + while(inst != &c->Program.Instructions) { + struct rc_instruction * current = inst; + int i; + + inst = inst->Next; + + for(i = 0; i < num_transformations; ++i) { + struct radeon_program_transformation* t = transformations + i; + + if (t->function(c, current, t->userData)) + break; + } + } +} + +/** + * Left multiplication of a register with a swizzle + */ +struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg) +{ + struct rc_src_register tmp = srcreg; + int i; + tmp.Swizzle = 0; + tmp.Negate = 0; + for(i = 0; i < 4; ++i) { + rc_swizzle swz = GET_SWZ(swizzle, i); + if (swz < 4) { + tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); + tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; + } else { + tmp.Swizzle |= swz << (i*3); + } + } + return tmp; +} + +unsigned int rc_find_free_temporary(struct radeon_compiler * c) +{ + char used[RC_REGISTER_MAX_INDEX]; + unsigned int i; + struct rc_instruction * rcinst; + + memset(used, 0, sizeof(used)); + + for (rcinst = c->Program.Instructions.Next; rcinst != &c->Program.Instructions; rcinst = rcinst->Next) { + const struct rc_sub_instruction *inst = &rcinst->U.I; + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Opcode); + unsigned int k; + + for (k = 0; k < opcode->NumSrcRegs; k++) { + if (inst->SrcReg[k].File == RC_FILE_TEMPORARY) + used[inst->SrcReg[k].Index] = 1; + } + + if (opcode->HasDstReg) { + if (inst->DstReg.File == RC_FILE_TEMPORARY) + used[inst->DstReg.Index] = 1; + } + } + + for (i = 0; i < RC_REGISTER_MAX_INDEX; i++) { + if (!used[i]) + return i; + } + + rc_error(c, "Ran out of temporary registers\n"); + return 0; +} + + +struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c) +{ + struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction)); + + memset(inst, 0, sizeof(struct rc_instruction)); + + inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW; + + return inst; +} + +void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst) +{ + inst->Prev = after; + inst->Next = after->Next; + + inst->Prev->Next = inst; + inst->Next->Prev = inst; +} + +struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after) +{ + struct rc_instruction * inst = rc_alloc_instruction(c); + + rc_insert_instruction(after, inst); + + return inst; +} + +void rc_remove_instruction(struct rc_instruction * inst) +{ + inst->Prev->Next = inst->Next; + inst->Next->Prev = inst->Prev; +} + +/** + * Return the number of instructions in the program. + */ +unsigned int rc_recompute_ips(struct radeon_compiler * c) +{ + unsigned int ip = 0; + struct rc_instruction * inst; + + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + inst->IP = ip++; + } + + c->Program.Instructions.IP = 0xcafedead; + + return ip; +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h new file mode 100644 index 0000000000..e318867696 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -0,0 +1,233 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_H_ +#define __RADEON_PROGRAM_H_ + +#include <stdint.h> +#include <string.h> + +#include "radeon_opcodes.h" +#include "radeon_code.h" +#include "radeon_program_constants.h" +#include "radeon_program_pair.h" + +struct radeon_compiler; + +struct rc_src_register { + unsigned int File:3; + + /** Negative values may be used for relative addressing. */ + signed int Index:(RC_REGISTER_INDEX_BITS+1); + unsigned int RelAddr:1; + + unsigned int Swizzle:12; + + /** Take the component-wise absolute value */ + unsigned int Abs:1; + + /** Post-Abs negation. */ + unsigned int Negate:4; +}; + +struct rc_dst_register { + unsigned int File:3; + + /** Negative values may be used for relative addressing. */ + signed int Index:(RC_REGISTER_INDEX_BITS+1); + unsigned int RelAddr:1; + + unsigned int WriteMask:4; +}; + +/** + * Instructions are maintained by the compiler in a doubly linked list + * of these structures. + * + * This instruction format is intended to be expanded for hardware-specific + * trickery. At different stages of compilation, a different set of + * instruction types may be valid. + */ +struct rc_sub_instruction { + struct rc_src_register SrcReg[3]; + struct rc_dst_register DstReg; + + /** + * Opcode of this instruction, according to \ref rc_opcode enums. + */ + unsigned int Opcode:8; + + /** + * Saturate each value of the result to the range [0,1] or [-1,1], + * according to \ref rc_saturate_mode enums. + */ + unsigned int SaturateMode:2; + + /** + * Writing to the special register RC_SPECIAL_ALU_RESULT + */ + /*@{*/ + unsigned int WriteALUResult:2; + unsigned int ALUResultCompare:3; + /*@}*/ + + /** + * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions. + */ + /*@{*/ + /** Source texture unit. */ + unsigned int TexSrcUnit:5; + + /** Source texture target, one of the \ref rc_texture_target enums */ + unsigned int TexSrcTarget:3; + + /** True if tex instruction should do shadow comparison */ + unsigned int TexShadow:1; + /*@}*/ +}; + +typedef enum { + RC_INSTRUCTION_NORMAL = 0, + RC_INSTRUCTION_PAIR +} rc_instruction_type; + +struct rc_instruction { + struct rc_instruction * Prev; + struct rc_instruction * Next; + + rc_instruction_type Type; + union { + struct rc_sub_instruction I; + struct rc_pair_instruction P; + } U; + + /** + * Warning: IPs are not stable. If you want to use them, + * you need to recompute them at the beginning of each pass + * using \ref rc_recompute_ips + */ + unsigned int IP; +}; + +struct rc_program { + /** + * Instructions.Next points to the first instruction, + * Instructions.Prev points to the last instruction. + */ + struct rc_instruction Instructions; + + /* Long term, we should probably remove InputsRead & OutputsWritten, + * since updating dependent state can be fragile, and they aren't + * actually used very often. */ + uint32_t InputsRead; + uint32_t OutputsWritten; + uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */ + + struct rc_constant_list Constants; +}; + +enum { + OPCODE_REPL_ALPHA = MAX_RC_OPCODE /**< used in paired instructions */ +}; + + +static inline rc_swizzle get_swz(unsigned int swz, rc_swizzle idx) +{ + if (idx & 0x4) + return idx; + return GET_SWZ(swz, idx); +} + +static inline unsigned int combine_swizzles4(unsigned int src, + rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w) +{ + unsigned int ret = 0; + + ret |= get_swz(src, swz_x); + ret |= get_swz(src, swz_y) << 3; + ret |= get_swz(src, swz_z) << 6; + ret |= get_swz(src, swz_w) << 9; + + return ret; +} + +static inline unsigned int combine_swizzles(unsigned int src, unsigned int swz) +{ + unsigned int ret = 0; + + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X)); + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9; + + return ret; +} + +struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); + +static inline void reset_srcreg(struct rc_src_register* reg) +{ + memset(reg, 0, sizeof(struct rc_src_register)); + reg->Swizzle = RC_SWIZZLE_XYZW; +} + + +/** + * A transformation that can be passed to \ref radeonLocalTransform. + * + * The function will be called once for each instruction. + * It has to either emit the appropriate transformed code for the instruction + * and return true, or return false if it doesn't understand the + * instruction. + * + * The function gets passed the userData as last parameter. + */ +struct radeon_program_transformation { + int (*function)( + struct radeon_compiler*, + struct rc_instruction*, + void*); + void *userData; +}; + +void radeonLocalTransform( + struct radeon_compiler *c, + int num_transformations, + struct radeon_program_transformation* transformations); + +unsigned int rc_find_free_temporary(struct radeon_compiler * c); + +struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c); +struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after); +void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst); +void rc_remove_instruction(struct rc_instruction * inst); + +unsigned int rc_recompute_ips(struct radeon_compiler * c); + +void rc_print_program(const struct rc_program *prog); + +#endif diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c new file mode 100644 index 0000000000..c922d3d9a4 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -0,0 +1,975 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * + * Shareable transformations that transform "special" ALU instructions + * into ALU instructions that are supported by hardware. + * + */ + +#include "radeon_program_alu.h" + +#include "radeon_compiler.h" + + +static struct rc_instruction *emit1( + struct radeon_compiler * c, struct rc_instruction * after, + rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + struct rc_src_register SrcReg) +{ + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + + fpi->U.I.Opcode = Opcode; + fpi->U.I.SaturateMode = Saturate; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg; + return fpi; +} + +static struct rc_instruction *emit2( + struct radeon_compiler * c, struct rc_instruction * after, + rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + struct rc_src_register SrcReg0, struct rc_src_register SrcReg1) +{ + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + + fpi->U.I.Opcode = Opcode; + fpi->U.I.SaturateMode = Saturate; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg0; + fpi->U.I.SrcReg[1] = SrcReg1; + return fpi; +} + +static struct rc_instruction *emit3( + struct radeon_compiler * c, struct rc_instruction * after, + rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + struct rc_src_register SrcReg0, struct rc_src_register SrcReg1, + struct rc_src_register SrcReg2) +{ + struct rc_instruction *fpi = rc_insert_new_instruction(c, after); + + fpi->U.I.Opcode = Opcode; + fpi->U.I.SaturateMode = Saturate; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg0; + fpi->U.I.SrcReg[1] = SrcReg1; + fpi->U.I.SrcReg[2] = SrcReg2; + return fpi; +} + +static struct rc_dst_register dstreg(int file, int index) +{ + struct rc_dst_register dst; + dst.File = file; + dst.Index = index; + dst.WriteMask = RC_MASK_XYZW; + dst.RelAddr = 0; + return dst; +} + +static struct rc_dst_register dstregtmpmask(int index, int mask) +{ + struct rc_dst_register dst = {0}; + dst.File = RC_FILE_TEMPORARY; + dst.Index = index; + dst.WriteMask = mask; + dst.RelAddr = 0; + return dst; +} + +static const struct rc_src_register builtin_zero = { + .File = RC_FILE_NONE, + .Index = 0, + .Swizzle = RC_SWIZZLE_0000 +}; +static const struct rc_src_register builtin_one = { + .File = RC_FILE_NONE, + .Index = 0, + .Swizzle = RC_SWIZZLE_1111 +}; +static const struct rc_src_register srcreg_undefined = { + .File = RC_FILE_NONE, + .Index = 0, + .Swizzle = RC_SWIZZLE_XYZW +}; + +static struct rc_src_register srcreg(int file, int index) +{ + struct rc_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + return src; +} + +static struct rc_src_register srcregswz(int file, int index, int swz) +{ + struct rc_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + src.Swizzle = swz; + return src; +} + +static struct rc_src_register absolute(struct rc_src_register reg) +{ + struct rc_src_register newreg = reg; + newreg.Abs = 1; + newreg.Negate = RC_MASK_NONE; + return newreg; +} + +static struct rc_src_register negate(struct rc_src_register reg) +{ + struct rc_src_register newreg = reg; + newreg.Negate = newreg.Negate ^ RC_MASK_XYZW; + return newreg; +} + +static struct rc_src_register swizzle(struct rc_src_register reg, + rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w) +{ + struct rc_src_register swizzled = reg; + swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w); + return swizzled; +} + +static struct rc_src_register swizzle_smear(struct rc_src_register reg, + rc_swizzle x) +{ + return swizzle(reg, x, x, x, x); +} + +static struct rc_src_register swizzle_xxxx(struct rc_src_register reg) +{ + return swizzle_smear(reg, RC_SWIZZLE_X); +} + +static struct rc_src_register swizzle_yyyy(struct rc_src_register reg) +{ + return swizzle_smear(reg, RC_SWIZZLE_Y); +} + +static struct rc_src_register swizzle_zzzz(struct rc_src_register reg) +{ + return swizzle_smear(reg, RC_SWIZZLE_Z); +} + +static struct rc_src_register swizzle_wwww(struct rc_src_register reg) +{ + return swizzle_smear(reg, RC_SWIZZLE_W); +} + +static void transform_ABS(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_src_register src = inst->U.I.SrcReg[0]; + src.Abs = 1; + src.Negate = RC_MASK_NONE; + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src); + rc_remove_instruction(inst); +} + +static void transform_CEIL(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* Assuming: + * ceil(x) = -floor(-x) + * + * After inlining floor: + * ceil(x) = -(-x-frac(-x)) + * + * After simplification: + * ceil(x) = x+frac(-x) + */ + + int tempreg = rc_find_free_temporary(c); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0])); + emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg)); + rc_remove_instruction(inst); +} + +static void transform_DP3(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + struct rc_src_register src1 = inst->U.I.SrcReg[1]; + src0.Negate &= ~RC_MASK_W; + src0.Swizzle &= ~(7 << (3 * 3)); + src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); + src1.Negate &= ~RC_MASK_W; + src1.Swizzle &= ~(7 << (3 * 3)); + src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); + emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); + rc_remove_instruction(inst); +} + +static void transform_DPH(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + src0.Negate &= ~RC_MASK_W; + src0.Swizzle &= ~(7 << (3 * 3)); + src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3); + emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]); + rc_remove_instruction(inst); +} + +/** + * [1, src0.y*src1.y, src0.z, src1.w] + * So basically MUL with lotsa swizzling. + */ +static void transform_DST(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg, + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE), + swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W)); + rc_remove_instruction(inst); +} + +static void transform_FLR(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0]); + emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, + inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, tempreg))); + rc_remove_instruction(inst); +} + +/** + * Definition of LIT (from ARB_fragment_program): + * + * tmp = VectorLoad(op0); + * if (tmp.x < 0) tmp.x = 0; + * if (tmp.y < 0) tmp.y = 0; + * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); + * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; + * result.x = 1.0; + * result.y = tmp.x; + * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; + * result.w = 1.0; + * + * The longest path of computation is the one leading to result.z, + * consisting of 5 operations. This implementation of LIT takes + * 5 slots, if the subsequent optimization passes are clever enough + * to pair instructions correctly. + */ +static void transform_LIT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + unsigned int constant; + unsigned int constant_swizzle; + unsigned int temp; + struct rc_src_register srctemp; + + constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle); + + if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) { + struct rc_instruction * inst_mov; + + inst_mov = emit1(c, inst, + RC_OPCODE_MOV, 0, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c))); + + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + } + + temp = inst->U.I.DstReg.Index; + srctemp = srcreg(RC_FILE_TEMPORARY, temp); + + /* tmp.x = max(0.0, Src.x); */ + /* tmp.y = max(0.0, Src.y); */ + /* tmp.w = clamp(Src.z, -128+eps, 128-eps); */ + emit2(c, inst->Prev, RC_OPCODE_MAX, 0, + dstregtmpmask(temp, RC_MASK_XYW), + inst->U.I.SrcReg[0], + swizzle(srcreg(RC_FILE_CONSTANT, constant), + RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3)); + emit2(c, inst->Prev, RC_OPCODE_MIN, 0, + dstregtmpmask(temp, RC_MASK_Z), + swizzle_wwww(srctemp), + negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle))); + + /* tmp.w = Pow(tmp.y, tmp.w) */ + emit1(c, inst->Prev, RC_OPCODE_LG2, 0, + dstregtmpmask(temp, RC_MASK_W), + swizzle_yyyy(srctemp)); + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, + dstregtmpmask(temp, RC_MASK_W), + swizzle_wwww(srctemp), + swizzle_zzzz(srctemp)); + emit1(c, inst->Prev, RC_OPCODE_EX2, 0, + dstregtmpmask(temp, RC_MASK_W), + swizzle_wwww(srctemp)); + + /* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */ + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, + dstregtmpmask(temp, RC_MASK_Z), + negate(swizzle_xxxx(srctemp)), + swizzle_wwww(srctemp), + builtin_zero); + + /* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */ + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, + dstregtmpmask(temp, RC_MASK_XYW), + swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE)); + + rc_remove_instruction(inst); +} + +static void transform_LRP(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, + dstreg(RC_FILE_TEMPORARY, tempreg), + inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2])); + emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, + inst->U.I.DstReg, + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[2]); + + rc_remove_instruction(inst); +} + +static void transform_POW(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + struct rc_dst_register tempdst = dstreg(RC_FILE_TEMPORARY, tempreg); + struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempreg); + tempdst.WriteMask = RC_MASK_W; + tempsrc.Swizzle = RC_SWIZZLE_WWWW; + + emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0])); + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1])); + emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc); + + rc_remove_instruction(inst); +} + +static void transform_RSQ(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]); +} + +static void transform_SEQ(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_zero, builtin_one); + + rc_remove_instruction(inst); +} + +static void transform_SFL(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, builtin_zero); + rc_remove_instruction(inst); +} + +static void transform_SGE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one); + + rc_remove_instruction(inst); +} + +static void transform_SGT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero); + + rc_remove_instruction(inst); +} + +static void transform_SLE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one); + + rc_remove_instruction(inst); +} + +static void transform_SLT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero); + + rc_remove_instruction(inst); +} + +static void transform_SNE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_one, builtin_zero); + + rc_remove_instruction(inst); +} + +static void transform_SUB(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + inst->U.I.Opcode = RC_OPCODE_ADD; + inst->U.I.SrcReg[1] = negate(inst->U.I.SrcReg[1]); +} + +static void transform_SWZ(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + inst->U.I.Opcode = RC_OPCODE_MOV; +} + +static void transform_XPD(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstreg(RC_FILE_TEMPORARY, tempreg), + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), + swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W)); + emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W), + swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), + negate(srcreg(RC_FILE_TEMPORARY, tempreg))); + + rc_remove_instruction(inst); +} + + +/** + * Can be used as a transformation for @ref radeonClauseLocalTransform, + * no userData necessary. + * + * Eliminates the following ALU instructions: + * ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD + * using: + * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP + * + * Transforms RSQ to Radeon's native RSQ by explicitly setting + * absolute value. + * + * @note should be applicable to R300 and R500 fragment programs. + */ +int radeonTransformALU( + struct radeon_compiler * c, + struct rc_instruction* inst, + void* unused) +{ + switch(inst->U.I.Opcode) { + case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; + case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; + case RC_OPCODE_DST: transform_DST(c, inst); return 1; + case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; + case RC_OPCODE_LIT: transform_LIT(c, inst); return 1; + case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; + case RC_OPCODE_POW: transform_POW(c, inst); return 1; + case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1; + case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1; + case RC_OPCODE_SFL: transform_SFL(c, inst); return 1; + case RC_OPCODE_SGE: transform_SGE(c, inst); return 1; + case RC_OPCODE_SGT: transform_SGT(c, inst); return 1; + case RC_OPCODE_SLE: transform_SLE(c, inst); return 1; + case RC_OPCODE_SLT: transform_SLT(c, inst); return 1; + case RC_OPCODE_SNE: transform_SNE(c, inst); return 1; + case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; + case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; + case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; + default: + return 0; + } +} + + +static void transform_r300_vertex_ABS(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* Note: r500 can take absolute values, but r300 cannot. */ + inst->U.I.Opcode = RC_OPCODE_MAX; + inst->U.I.SrcReg[1] = inst->U.I.SrcReg[0]; + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; +} + +static void transform_r300_vertex_CMP(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* There is no decent CMP available, so let's rig one up. + * CMP is defined as dst = src0 < 0.0 ? src1 : src2 + * The following sequence consumes two temps and two extra slots + * (the second temp and the second slot is consumed by transform_LRP), + * but should be equivalent: + * + * SLT tmp0, src0, 0.0 + * LRP dst, tmp0, src1, src2 + * + * Yes, I know, I'm a mad scientist. ~ C. & M. */ + int tempreg0 = rc_find_free_temporary(c); + + /* SLT tmp0, src0, 0.0 */ + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dstreg(RC_FILE_TEMPORARY, tempreg0), + inst->U.I.SrcReg[0], builtin_zero); + + /* LRP dst, tmp0, src1, src2 */ + transform_LRP(c, + emit3(c, inst->Prev, RC_OPCODE_LRP, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); + + rc_remove_instruction(inst); +} + +static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + unsigned constant_swizzle; + int constant = rc_constants_add_immediate_scalar(&c->Program.Constants, + 0.0000000000000000001, + &constant_swizzle); + + /* MOV dst, src */ + emit1(c, inst->Prev, RC_OPCODE_MOV, 0, + dstreg(RC_FILE_TEMPORARY, tempreg), + inst->U.I.SrcReg[0]); + + /* MAX dst.z, src, 0.00...001 */ + emit2(c, inst->Prev, RC_OPCODE_MAX, 0, + dstregtmpmask(tempreg, RC_MASK_Y), + srcreg(RC_FILE_TEMPORARY, tempreg), + srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); + + inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, tempreg); +} + +static void transform_r300_vertex_SEQ(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + /* x = y <==> x >= y && y >= x */ + int tmp = rc_find_free_temporary(c); + + /* x <= y */ + emit2(c, inst->Prev, RC_OPCODE_SGE, 0, + dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + inst->U.I.SrcReg[1]); + + /* y <= x */ + emit2(c, inst->Prev, RC_OPCODE_SGE, 0, + inst->U.I.DstReg, + inst->U.I.SrcReg[1], + inst->U.I.SrcReg[0]); + + /* x && y = x * y */ + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tmp), + srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index)); + + rc_remove_instruction(inst); +} + +static void transform_r300_vertex_SNE(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + /* x != y <==> x < y || y < x */ + int tmp = rc_find_free_temporary(c); + + /* x < y */ + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + inst->U.I.SrcReg[1]); + + /* y < x */ + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + inst->U.I.DstReg, + inst->U.I.SrcReg[1], + inst->U.I.SrcReg[0]); + + /* x || y = max(x, y) */ + emit2(c, inst->Prev, RC_OPCODE_MAX, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tmp), + srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index)); + + rc_remove_instruction(inst); +} + +static void transform_r300_vertex_SGT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* x > y <==> -x < -y */ + inst->U.I.Opcode = RC_OPCODE_SLT; + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; +} + +static void transform_r300_vertex_SLE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* x <= y <==> -x >= -y */ + inst->U.I.Opcode = RC_OPCODE_SGE; + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; +} + +/** + * For use with radeonLocalTransform, this transforms non-native ALU + * instructions of the r300 up to r500 vertex engine. + */ +int r300_transform_vertex_alu( + struct radeon_compiler * c, + struct rc_instruction* inst, + void* unused) +{ + switch(inst->U.I.Opcode) { + case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; + case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; + case RC_OPCODE_DP3: transform_DP3(c, inst); return 1; + case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; + case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; + case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1; + case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; + case RC_OPCODE_SEQ: + if (!c->is_r500) { + transform_r300_vertex_SEQ(c, inst); + return 1; + } + return 0; + case RC_OPCODE_SFL: transform_SFL(c, inst); return 1; + case RC_OPCODE_SGT: transform_r300_vertex_SGT(c, inst); return 1; + case RC_OPCODE_SLE: transform_r300_vertex_SLE(c, inst); return 1; + case RC_OPCODE_SNE: + if (!c->is_r500) { + transform_r300_vertex_SNE(c, inst); + return 1; + } + return 0; + case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; + case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; + case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; + default: + return 0; + } +} + +static void sincos_constants(struct radeon_compiler* c, unsigned int *constants) +{ + static const float SinCosConsts[2][4] = { + { + 1.273239545, /* 4/PI */ + -0.405284735, /* -4/(PI*PI) */ + 3.141592654, /* PI */ + 0.2225 /* weight */ + }, + { + 0.75, + 0.5, + 0.159154943, /* 1/(2*PI) */ + 6.283185307 /* 2*PI */ + } + }; + int i; + + for(i = 0; i < 2; ++i) + constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]); +} + +/** + * Approximate sin(x), where x is clamped to (-pi/2, pi/2). + * + * MUL tmp.xy, src, { 4/PI, -4/(PI^2) } + * MAD tmp.x, tmp.y, |src|, tmp.x + * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x + * MAD dest, tmp.y, weight, tmp.x + */ +static void sin_approx( + struct radeon_compiler* c, struct rc_instruction * inst, + struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants) +{ + unsigned int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY), + swizzle_xxxx(src), + srcreg(RC_FILE_CONSTANT, constants[0])); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X), + swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), + absolute(swizzle_xxxx(src)), + swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y), + swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)), + absolute(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))), + negate(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)))); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dst, + swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), + swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[0])), + swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))); +} + +/** + * Translate the trigonometric functions COS, SIN, and SCS + * using only the basic instructions + * MOV, ADD, MUL, MAD, FRC + */ +int radeonTransformTrigSimple(struct radeon_compiler* c, + struct rc_instruction* inst, + void* unused) +{ + if (inst->U.I.Opcode != RC_OPCODE_COS && + inst->U.I.Opcode != RC_OPCODE_SIN && + inst->U.I.Opcode != RC_OPCODE_SCS) + return 0; + + unsigned int constants[2]; + unsigned int tempreg = rc_find_free_temporary(c); + + sincos_constants(c, constants); + + if (inst->U.I.Opcode == RC_OPCODE_COS) { + /* MAD tmp.x, src, 1/(2*PI), 0.75 */ + /* FRC tmp.x, tmp.x */ + /* MAD tmp.z, tmp.x, 2*PI, -PI */ + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_xxxx(inst->U.I.SrcReg[0]), + swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), + swizzle_xxxx(srcreg(RC_FILE_CONSTANT, constants[1]))); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg))); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), + swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), + negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); + + sin_approx(c, inst, inst->U.I.DstReg, + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), + constants); + } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_xxxx(inst->U.I.SrcReg[0]), + swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), + swizzle_yyyy(srcreg(RC_FILE_CONSTANT, constants[1]))); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg))); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), + swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), + negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); + + sin_approx(c, inst, inst->U.I.DstReg, + swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), + constants); + } else { + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), + swizzle_xxxx(inst->U.I.SrcReg[0]), + swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY), + srcreg(RC_FILE_TEMPORARY, tempreg)); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), + srcreg(RC_FILE_TEMPORARY, tempreg), + swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), + negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); + + struct rc_dst_register dst = inst->U.I.DstReg; + + dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X; + sin_approx(c, inst, dst, + swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)), + constants); + + dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y; + sin_approx(c, inst, dst, + swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), + constants); + } + + rc_remove_instruction(inst); + + return 1; +} + +static void r300_transform_SIN_COS_SCS(struct radeon_compiler *c, + struct rc_instruction *inst, + unsigned srctmp) +{ + if (inst->U.I.Opcode == RC_OPCODE_COS) { + emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); + } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { + emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, + inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); + } else if (inst->U.I.Opcode == RC_OPCODE_SCS) { + struct rc_dst_register moddst = inst->U.I.DstReg; + + if (inst->U.I.DstReg.WriteMask & RC_MASK_X) { + moddst.WriteMask = RC_MASK_X; + emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst, + srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); + } + if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) { + moddst.WriteMask = RC_MASK_Y; + emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst, + srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); + } + } + + rc_remove_instruction(inst); +} + + +/** + * Transform the trigonometric functions COS, SIN, and SCS + * to include pre-scaling by 1/(2*PI) and taking the fractional + * part, so that the input to COS and SIN is always in the range [0,1). + * SCS is replaced by one COS and one SIN instruction. + * + * @warning This transformation implicitly changes the semantics of SIN and COS! + */ +int radeonTransformTrigScale(struct radeon_compiler* c, + struct rc_instruction* inst, + void* unused) +{ + if (inst->U.I.Opcode != RC_OPCODE_COS && + inst->U.I.Opcode != RC_OPCODE_SIN && + inst->U.I.Opcode != RC_OPCODE_SCS) + return 0; + + static const float RCP_2PI = 0.15915494309189535; + unsigned int temp; + unsigned int constant; + unsigned int constant_swizzle; + + temp = rc_find_free_temporary(c); + constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle); + + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W), + swizzle_xxxx(inst->U.I.SrcReg[0]), + srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), + srcreg(RC_FILE_TEMPORARY, temp)); + + r300_transform_SIN_COS_SCS(c, inst, temp); + return 1; +} + +/** + * Transform the trigonometric functions COS, SIN, and SCS + * so that the input to COS and SIN is always in the range [-PI, PI]. + * SCS is replaced by one COS and one SIN instruction. + */ +int r300_transform_trig_scale_vertex(struct radeon_compiler *c, + struct rc_instruction *inst, + void *unused) +{ + if (inst->U.I.Opcode != RC_OPCODE_COS && + inst->U.I.Opcode != RC_OPCODE_SIN && + inst->U.I.Opcode != RC_OPCODE_SCS) + return 0; + + /* Repeat x in the range [-PI, PI]: + * + * repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI + */ + + static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979}; + unsigned int temp; + unsigned int constant; + + temp = rc_find_free_temporary(c); + constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons); + + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W), + swizzle_xxxx(inst->U.I.SrcReg[0]), + srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX), + srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), + srcreg(RC_FILE_TEMPORARY, temp)); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W), + srcreg(RC_FILE_TEMPORARY, temp), + srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ), + srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW)); + + r300_transform_SIN_COS_SCS(c, inst, temp); + return 1; +} + +/** + * Rewrite DDX/DDY instructions to properly work with r5xx shaders. + * The r5xx MDH/MDV instruction provides per-quad partial derivatives. + * It takes the form A*B+C. A and C are set by setting src0. B should be -1. + * + * @warning This explicitly changes the form of DDX and DDY! + */ + +int radeonTransformDeriv(struct radeon_compiler* c, + struct rc_instruction* inst, + void* unused) +{ + if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY) + return 0; + + inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111; + inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW; + + return 1; +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h new file mode 100644 index 0000000000..77d444476f --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_ALU_H_ +#define __RADEON_PROGRAM_ALU_H_ + +#include "radeon_program.h" + +int radeonTransformALU( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +int r300_transform_vertex_alu( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +int radeonTransformTrigSimple( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +int radeonTransformTrigScale( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +int r300_transform_trig_scale_vertex( + struct radeon_compiler *c, + struct rc_instruction *inst, + void*); + +int radeonTransformDeriv( + struct radeon_compiler * c, + struct rc_instruction * inst, + void*); + +#endif /* __RADEON_PROGRAM_ALU_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h new file mode 100644 index 0000000000..2ddf60b677 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h @@ -0,0 +1,150 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_PROGRAM_CONSTANTS_H +#define RADEON_PROGRAM_CONSTANTS_H + +typedef enum { + RC_SATURATE_NONE = 0, + RC_SATURATE_ZERO_ONE, + RC_SATURATE_MINUS_PLUS_ONE +} rc_saturate_mode; + +typedef enum { + RC_TEXTURE_2D_ARRAY, + RC_TEXTURE_1D_ARRAY, + RC_TEXTURE_CUBE, + RC_TEXTURE_3D, + RC_TEXTURE_RECT, + RC_TEXTURE_2D, + RC_TEXTURE_1D +} rc_texture_target; + +typedef enum { + /** + * Used to indicate unused register descriptions and + * source register that use a constant swizzle. + */ + RC_FILE_NONE = 0, + RC_FILE_TEMPORARY, + + /** + * Input register. + * + * \note The compiler attaches no implicit semantics to input registers. + * Fragment/vertex program specific semantics must be defined explicitly + * using the appropriate compiler interfaces. + */ + RC_FILE_INPUT, + + /** + * Output register. + * + * \note The compiler attaches no implicit semantics to input registers. + * Fragment/vertex program specific semantics must be defined explicitly + * using the appropriate compiler interfaces. + */ + RC_FILE_OUTPUT, + RC_FILE_ADDRESS, + + /** + * Indicates a constant from the \ref rc_constant_list . + */ + RC_FILE_CONSTANT, + + /** + * Indicates a special register, see RC_SPECIAL_xxx. + */ + RC_FILE_SPECIAL +} rc_register_file; + +enum { + /** R500 fragment program ALU result "register" */ + RC_SPECIAL_ALU_RESULT = 0, + + /** Must be last */ + RC_NUM_SPECIAL_REGISTERS +}; + +#define RC_REGISTER_INDEX_BITS 10 +#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS) + +typedef enum { + RC_SWIZZLE_X = 0, + RC_SWIZZLE_Y, + RC_SWIZZLE_Z, + RC_SWIZZLE_W, + RC_SWIZZLE_ZERO, + RC_SWIZZLE_ONE, + RC_SWIZZLE_HALF, + RC_SWIZZLE_UNUSED +} rc_swizzle; + +#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9)) +#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a)) +#define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7) +#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1) +#define SET_SWZ(swz, idx, newv) \ + do { \ + (swz) = ((swz) & ~(7 << ((idx)*3))) | ((newv) << ((idx)*3)); \ + } while(0) + +#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W) +#define RC_SWIZZLE_XYZ0 RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO) +#define RC_SWIZZLE_XYZZ RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z) +#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X) +#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y) +#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z) +#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W) +#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO) +#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE) +#define RC_SWIZZLE_HHHH RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF) + +/** + * \name Bitmasks for components of vectors. + * + * Used for write masks, negation masks, etc. + */ +/*@{*/ +#define RC_MASK_NONE 0 +#define RC_MASK_X 1 +#define RC_MASK_Y 2 +#define RC_MASK_Z 4 +#define RC_MASK_W 8 +#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y) +#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z) +#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W) +#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W) +/*@}*/ + +typedef enum { + RC_ALURESULT_NONE = 0, + RC_ALURESULT_X, + RC_ALURESULT_W +} rc_write_aluresult; + +#endif /* RADEON_PROGRAM_CONSTANTS_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c new file mode 100644 index 0000000000..ee839596aa --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2008-2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + + +/** + * Return the source slot where we installed the given register access, + * or -1 if no slot was free anymore. + */ +int rc_pair_alloc_source(struct rc_pair_instruction *pair, + unsigned int rgb, unsigned int alpha, + rc_register_file file, unsigned int index) +{ + int candidate = -1; + int candidate_quality = -1; + int i; + + if ((!rgb && !alpha) || file == RC_FILE_NONE) + return 0; + + for(i = 0; i < 3; ++i) { + int q = 0; + if (rgb) { + if (pair->RGB.Src[i].Used) { + if (pair->RGB.Src[i].File != file || + pair->RGB.Src[i].Index != index) + continue; + q++; + } + } + if (alpha) { + if (pair->Alpha.Src[i].Used) { + if (pair->Alpha.Src[i].File != file || + pair->Alpha.Src[i].Index != index) + continue; + q++; + } + } + if (q > candidate_quality) { + candidate_quality = q; + candidate = i; + } + } + + if (candidate >= 0) { + if (rgb) { + pair->RGB.Src[candidate].Used = 1; + pair->RGB.Src[candidate].File = file; + pair->RGB.Src[candidate].Index = index; + } + if (alpha) { + pair->Alpha.Src[candidate].Used = 1; + pair->Alpha.Src[candidate].File = file; + pair->Alpha.Src[candidate].Index = index; + } + } + + return candidate; +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h new file mode 100644 index 0000000000..511cc707a3 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_PAIR_H_ +#define __RADEON_PROGRAM_PAIR_H_ + +#include "radeon_code.h" +#include "radeon_opcodes.h" +#include "radeon_program_constants.h" + +struct r300_fragment_program_compiler; + + +/** + * \file + * Represents a paired ALU instruction, as found in R300 and R500 + * fragment programs. + * + * Note that this representation is taking some liberties as far + * as register files are concerned, to allow separate register + * allocation. + * + * Also note that there are some subtleties in that the semantics + * of certain opcodes are implicitly changed in this representation; + * see \ref rc_pair_translate + */ + + +struct radeon_pair_instruction_source { + unsigned int Used:1; + unsigned int File:3; + unsigned int Index:RC_REGISTER_INDEX_BITS; +}; + +struct radeon_pair_instruction_rgb { + unsigned int Opcode:8; + unsigned int DestIndex:RC_REGISTER_INDEX_BITS; + unsigned int WriteMask:3; + unsigned int Target:2; + unsigned int OutputWriteMask:3; + unsigned int Saturate:1; + + struct radeon_pair_instruction_source Src[3]; + + struct { + unsigned int Source:2; + unsigned int Swizzle:9; + unsigned int Abs:1; + unsigned int Negate:1; + } Arg[3]; +}; + +struct radeon_pair_instruction_alpha { + unsigned int Opcode:8; + unsigned int DestIndex:RC_REGISTER_INDEX_BITS; + unsigned int WriteMask:1; + unsigned int Target:2; + unsigned int OutputWriteMask:1; + unsigned int DepthWriteMask:1; + unsigned int Saturate:1; + + struct radeon_pair_instruction_source Src[3]; + + struct { + unsigned int Source:2; + unsigned int Swizzle:3; + unsigned int Abs:1; + unsigned int Negate:1; + } Arg[3]; +}; + +struct rc_pair_instruction { + struct radeon_pair_instruction_rgb RGB; + struct radeon_pair_instruction_alpha Alpha; + + unsigned int WriteALUResult:2; + unsigned int ALUResultCompare:3; +}; + + +/** + * General helper functions for dealing with the paired instruction format. + */ +/*@{*/ +int rc_pair_alloc_source(struct rc_pair_instruction *pair, + unsigned int rgb, unsigned int alpha, + rc_register_file file, unsigned int index); +/*@}*/ + + +/** + * Compiler passes that operate with the paired format. + */ +/*@{*/ +struct radeon_pair_handler; + +void rc_pair_translate(struct r300_fragment_program_compiler *c); +void rc_pair_schedule(struct r300_fragment_program_compiler *c); +void rc_pair_regalloc(struct r300_fragment_program_compiler *c, unsigned maxtemps); +/*@}*/ + +#endif /* __RADEON_PROGRAM_PAIR_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c new file mode 100644 index 0000000000..28fb9eae92 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c @@ -0,0 +1,300 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_program.h" + +#include <stdio.h> + +static const char * textarget_to_string(rc_texture_target target) +{ + switch(target) { + case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY"; + case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY"; + case RC_TEXTURE_CUBE: return "CUBE"; + case RC_TEXTURE_3D: return "3D"; + case RC_TEXTURE_RECT: return "RECT"; + case RC_TEXTURE_2D: return "2D"; + case RC_TEXTURE_1D: return "1D"; + default: return "BAD_TEXTURE_TARGET"; + } +} + +static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs) +{ + if (func == RC_COMPARE_FUNC_NEVER) { + fprintf(f, "false"); + } else if (func == RC_COMPARE_FUNC_ALWAYS) { + fprintf(f, "true"); + } else { + const char * op; + switch(func) { + case RC_COMPARE_FUNC_LESS: op = "<"; break; + case RC_COMPARE_FUNC_EQUAL: op = "=="; break; + case RC_COMPARE_FUNC_LEQUAL: op = "<="; break; + case RC_COMPARE_FUNC_GREATER: op = ">"; break; + case RC_COMPARE_FUNC_NOTEQUAL: op = "!="; break; + case RC_COMPARE_FUNC_GEQUAL: op = ">="; break; + default: op = "???"; break; + } + fprintf(f, "%s %s %s", lhs, op, rhs); + } +} + +static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr) +{ + if (file == RC_FILE_NONE) { + fprintf(f, "none"); + } else if (file == RC_FILE_SPECIAL) { + switch(index) { + case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break; + default: fprintf(f, "special[%i]", index); break; + } + } else { + const char * filename; + switch(file) { + case RC_FILE_TEMPORARY: filename = "temp"; break; + case RC_FILE_INPUT: filename = "input"; break; + case RC_FILE_OUTPUT: filename = "output"; break; + case RC_FILE_ADDRESS: filename = "addr"; break; + case RC_FILE_CONSTANT: filename = "const"; break; + default: filename = "BAD FILE"; break; + } + fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : ""); + } +} + +static void rc_print_mask(FILE * f, unsigned int mask) +{ + if (mask & RC_MASK_X) fprintf(f, "x"); + if (mask & RC_MASK_Y) fprintf(f, "y"); + if (mask & RC_MASK_Z) fprintf(f, "z"); + if (mask & RC_MASK_W) fprintf(f, "w"); +} + +static void rc_print_dst_register(FILE * f, struct rc_dst_register dst) +{ + rc_print_register(f, dst.File, dst.Index, dst.RelAddr); + if (dst.WriteMask != RC_MASK_XYZW) { + fprintf(f, "."); + rc_print_mask(f, dst.WriteMask); + } +} + +static char rc_swizzle_char(unsigned int swz) +{ + switch(swz) { + case RC_SWIZZLE_X: return 'x'; + case RC_SWIZZLE_Y: return 'y'; + case RC_SWIZZLE_Z: return 'z'; + case RC_SWIZZLE_W: return 'w'; + case RC_SWIZZLE_ZERO: return '0'; + case RC_SWIZZLE_ONE: return '1'; + case RC_SWIZZLE_HALF: return 'H'; + case RC_SWIZZLE_UNUSED: return '_'; + } + return '?'; +} + +static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate) +{ + unsigned int comp; + for(comp = 0; comp < 4; ++comp) { + rc_swizzle swz = GET_SWZ(swizzle, comp); + if (GET_BIT(negate, comp)) + fprintf(f, "-"); + fprintf(f, "%c", rc_swizzle_char(swz)); + } +} + +static void rc_print_src_register(FILE * f, struct rc_src_register src) +{ + int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW); + + if (src.Negate == RC_MASK_XYZW) + fprintf(f, "-"); + if (src.Abs) + fprintf(f, "|"); + + rc_print_register(f, src.File, src.Index, src.RelAddr); + + if (src.Abs && !trivial_negate) + fprintf(f, "|"); + + if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) { + fprintf(f, "."); + rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate); + } + + if (src.Abs && trivial_negate) + fprintf(f, "|"); +} + +static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int reg; + + fprintf(f, "%s", opcode->Name); + + switch(inst->U.I.SaturateMode) { + case RC_SATURATE_NONE: break; + case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break; + case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break; + default: fprintf(f, "_BAD_SAT"); break; + } + + if (opcode->HasDstReg) { + fprintf(f, " "); + rc_print_dst_register(f, inst->U.I.DstReg); + if (opcode->NumSrcRegs) + fprintf(f, ","); + } + + for(reg = 0; reg < opcode->NumSrcRegs; ++reg) { + if (reg > 0) + fprintf(f, ","); + fprintf(f, " "); + rc_print_src_register(f, inst->U.I.SrcReg[reg]); + } + + if (opcode->HasTexture) { + fprintf(f, ", %s%s[%u]", + textarget_to_string(inst->U.I.TexSrcTarget), + inst->U.I.TexShadow ? "SHADOW" : "", + inst->U.I.TexSrcUnit); + } + + fprintf(f, ";"); + + if (inst->U.I.WriteALUResult) { + fprintf(f, " [aluresult = ("); + rc_print_comparefunc(f, + (inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w", + inst->U.I.ALUResultCompare, "0"); + fprintf(f, ")]"); + } + + fprintf(f, "\n"); +} + +static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + int printedsrc = 0; + + for(unsigned int src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used) { + if (printedsrc) + fprintf(f, ", "); + fprintf(f, "src%i.xyz = ", src); + rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0); + printedsrc = 1; + } + if (inst->Alpha.Src[src].Used) { + if (printedsrc) + fprintf(f, ", "); + fprintf(f, "src%i.w = ", src); + rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0); + printedsrc = 1; + } + } + fprintf(f, "\n"); + + if (inst->RGB.Opcode != RC_OPCODE_NOP) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode); + + fprintf(f, " %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : ""); + if (inst->RGB.WriteMask) + fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex, + (inst->RGB.WriteMask & 1) ? "x" : "", + (inst->RGB.WriteMask & 2) ? "y" : "", + (inst->RGB.WriteMask & 4) ? "z" : ""); + if (inst->RGB.OutputWriteMask) + fprintf(f, " color[%i].%s%s%s", inst->RGB.Target, + (inst->RGB.OutputWriteMask & 1) ? "x" : "", + (inst->RGB.OutputWriteMask & 2) ? "y" : "", + (inst->RGB.OutputWriteMask & 4) ? "z" : ""); + if (inst->WriteALUResult == RC_ALURESULT_X) + fprintf(f, " aluresult"); + + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + const char* abs = inst->RGB.Arg[arg].Abs ? "|" : ""; + const char* neg = inst->RGB.Arg[arg].Negate ? "-" : ""; + fprintf(f, ", %s%ssrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[arg].Source, + rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)), + rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)), + rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)), + abs); + } + fprintf(f, "\n"); + } + + if (inst->Alpha.Opcode != RC_OPCODE_NOP) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode); + + fprintf(f, " %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : ""); + if (inst->Alpha.WriteMask) + fprintf(f, " temp[%i].w", inst->Alpha.DestIndex); + if (inst->Alpha.OutputWriteMask) + fprintf(f, " color[%i].w", inst->Alpha.Target); + if (inst->Alpha.DepthWriteMask) + fprintf(f, " depth.w"); + if (inst->WriteALUResult == RC_ALURESULT_W) + fprintf(f, " aluresult"); + + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : ""; + const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : ""; + fprintf(f, ", %s%ssrc%i.%c%s", neg, abs, inst->Alpha.Arg[arg].Source, + rc_swizzle_char(inst->Alpha.Arg[arg].Swizzle), abs); + } + fprintf(f, "\n"); + } + + if (inst->WriteALUResult) { + fprintf(f, " [aluresult = ("); + rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0"); + fprintf(f, ")]\n"); + } +} + +/** + * Print program to stderr, default options. + */ +void rc_print_program(const struct rc_program *prog) +{ + unsigned int linenum = 0; + struct rc_instruction *inst; + + fprintf(stderr, "# Radeon Compiler Program\n"); + + for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) { + fprintf(stderr, "%3d: ", linenum); + + if (inst->Type == RC_INSTRUCTION_PAIR) + rc_print_pair_instruction(stderr, inst); + else + rc_print_normal_instruction(stderr, inst); + + linenum++; + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c new file mode 100644 index 0000000000..9c4b65f4c0 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c @@ -0,0 +1,389 @@ +/* + * Copyright (C) 2010 Corbin Simpson + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_tex.h" + +/* Series of transformations to be done on textures. */ + +static struct rc_src_register shadow_ambient(struct r300_fragment_program_compiler *compiler, + int tmu) +{ + struct rc_src_register reg = { 0, }; + + if (compiler->enable_shadow_ambient) { + reg.File = RC_FILE_CONSTANT; + reg.Index = rc_constants_add_state(&compiler->Base.Program.Constants, + RC_STATE_SHADOW_AMBIENT, tmu); + reg.Swizzle = RC_SWIZZLE_WWWW; + } else { + reg.File = RC_FILE_NONE; + reg.Swizzle = RC_SWIZZLE_0000; + } + return reg; +} + +static void lower_texture_rect(struct r300_fragment_program_compiler *compiler, + struct rc_instruction *inst) +{ + struct rc_instruction *inst_rect; + unsigned temp = rc_find_free_temporary(&compiler->Base); + + if (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT || + compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords) { + inst_rect = rc_insert_new_instruction(&compiler->Base, inst->Prev); + + inst_rect->U.I.Opcode = RC_OPCODE_MUL; + inst_rect->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rect->U.I.DstReg.Index = temp; + inst_rect->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_rect->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_rect->U.I.SrcReg[1].Index = + rc_constants_add_state(&compiler->Base.Program.Constants, + RC_STATE_R300_TEXRECT_FACTOR, inst->U.I.TexSrcUnit); + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; + + inst->U.I.TexSrcTarget = RC_TEXTURE_2D; + } +} + +/** + * Transform TEX, TXP, TXB, and KIL instructions in the following ways: + * - implement texture compare (shadow extensions) + * - extract non-native source / destination operands + * - premultiply texture coordinates for RECT + * - extract operand swizzles + * - introduce a temporary register when write masks are needed + */ +int radeonTransformTEX( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data) +{ + struct r300_fragment_program_compiler *compiler = + (struct r300_fragment_program_compiler*)data; + + if (inst->U.I.Opcode != RC_OPCODE_TEX && + inst->U.I.Opcode != RC_OPCODE_TXB && + inst->U.I.Opcode != RC_OPCODE_TXP && + inst->U.I.Opcode != RC_OPCODE_KIL) + return 0; + + /* ARB_shadow & EXT_shadow_funcs */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + ((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) || + (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) { + rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; + + if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->U.I.Opcode = RC_OPCODE_MOV; + + if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->U.I.SrcReg[0].File = RC_FILE_NONE; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; + } else { + inst->U.I.SrcReg[0] = shadow_ambient(compiler, inst->U.I.TexSrcUnit); + } + + return 1; + } else { + rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; + struct rc_instruction * inst_rcp = NULL; + struct rc_instruction * inst_mad; + struct rc_instruction * inst_cmp; + unsigned tmp_texsample = rc_find_free_temporary(c); + unsigned tmp_sum = rc_find_free_temporary(c); + unsigned tmp_recip_w = 0; + int pass, fail, tex; + + /* Save the output register. */ + struct rc_dst_register output_reg = inst->U.I.DstReg; + + /* Redirect TEX to a new temp. */ + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = tmp_texsample; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + + if (inst->U.I.Opcode == RC_OPCODE_TXP) { + tmp_recip_w = rc_find_free_temporary(c); + + /* Compute 1/W. */ + inst_rcp = rc_insert_new_instruction(c, inst); + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = tmp_recip_w; + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + } + + /* Perspective-divide r by W (if it's TXP) and add the texture sample (see below). */ + inst_mad = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst); + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = tmp_sum; + inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ; + if (inst->U.I.Opcode == RC_OPCODE_TXP) { + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[1].Index = tmp_recip_w; + inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + tex = 2; + } else { + inst_mad->U.I.Opcode = RC_OPCODE_ADD; + tex = 1; + } + inst_mad->U.I.SrcReg[tex].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[tex].Index = tmp_texsample; + inst_mad->U.I.SrcReg[tex].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_swizzle; + + /* Fake EQUAL/NOTEQUAL, it seems to pass some tests suprisingly. */ + if (comparefunc == RC_COMPARE_FUNC_EQUAL) { + comparefunc = RC_COMPARE_FUNC_GEQUAL; + } else if (comparefunc == RC_COMPARE_FUNC_NOTEQUAL) { + comparefunc = RC_COMPARE_FUNC_LESS; + } + + /* Recall that SrcReg[0] is r, SrcReg[tex] is tex and: + * LESS: r < tex <=> -tex+r < 0 + * GEQUAL: r >= tex <=> not (-tex+r < 0) + * GREATER: r > tex <=> tex-r < 0 + * LEQUAL: r <= tex <=> not ( tex-r < 0) + * + * This negates either r or tex: */ + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL) + inst_mad->U.I.SrcReg[tex].Negate = inst_mad->U.I.SrcReg[tex].Negate ^ RC_MASK_XYZW; + else + inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; + + /* This negates the whole expresion: */ + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) { + pass = 1; + fail = 2; + } else { + pass = 2; + fail = 1; + } + + inst_cmp = rc_insert_new_instruction(c, inst_mad); + inst_cmp->U.I.Opcode = RC_OPCODE_CMP; + inst_cmp->U.I.DstReg = output_reg; + inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[0].Index = tmp_sum; + inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE; + inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; + inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit); + } + } + + /* Texture wrap modes don't work on NPOT textures or texrects. + * + * The game plan is simple. We have two flags, fake_npot and + * non_normalized_coords, as well as a tex target. The RECT tex target + * will make the emitted code use non-scaled texcoords. + * + * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and + * mirroring are not. If we need to repeat, we do: + * + * MUL temp, texcoord, <scaling factor constant> + * FRC temp, temp ; Discard integer portion of coords + * + * This gives us coords in [0, 1]. + * + * Mirroring is trickier. We're going to start out like repeat: + * + * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes + * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2] + * ; so scale to [0, 1] + * FRC temp, temp ; Make the pattern repeat + * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1] + * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern. + * ; The pattern is backwards, so reverse it (1-x). + * + * This gives us coords in [0, 1]. + * + * ~ C & M. ;) + */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT || + compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot || + compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords)) { + rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode; + + /* R300 cannot sample from rectangles. */ + if (!c->is_r500) { + lower_texture_rect(compiler, inst); + } + + if (compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot && + wrapmode != RC_WRAP_NONE) { + struct rc_instruction *inst_mov; + unsigned temp = rc_find_free_temporary(c); + + /* For NPOT fallback, we need normalized coordinates anyway. */ + if (c->is_r500) { + lower_texture_rect(compiler, inst); + } + + if (wrapmode == RC_WRAP_REPEAT) { + /* Both instructions will be paired up. */ + struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev); + + inst_frc->U.I.Opcode = RC_OPCODE_FRC; + inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_frc->U.I.DstReg.Index = temp; + inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + } else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) { + /* + * Function: + * f(v) = 1 - abs(frac(v * 0.5) * 2 - 1) + * + * Code: + * MUL temp, src0, 0.5 + * FRC temp, temp + * MAD temp, temp, 2, -1 + * ADD temp, 1, -abs(temp) + */ + + struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add; + unsigned two, two_swizzle; + + inst_mul = rc_insert_new_instruction(c, inst->Prev); + + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = temp; + inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH; + + inst_frc = rc_insert_new_instruction(c, inst->Prev); + + inst_frc->U.I.Opcode = RC_OPCODE_FRC; + inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_frc->U.I.DstReg.Index = temp; + inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_frc->U.I.SrcReg[0].Index = temp; + inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; + + two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle); + inst_mad = rc_insert_new_instruction(c, inst->Prev); + + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = temp; + inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[0].Index = temp; + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; + inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[1].Index = two; + inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle; + inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111; + inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ; + + inst_add = rc_insert_new_instruction(c, inst->Prev); + + inst_add->U.I.Opcode = RC_OPCODE_ADD; + inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_add->U.I.DstReg.Index = temp; + inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; + inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_add->U.I.SrcReg[1].Index = temp; + inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; + inst_add->U.I.SrcReg[1].Abs = 1; + inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ; + } else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) { + /* + * Mirrored clamp modes are bloody simple, we just use abs + * to mirror [0, 1] into [-1, 0]. This works for + * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER. + */ + struct rc_instruction *inst_mov; + + inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mov->U.I.SrcReg[0].Abs = 1; + } + + /* Preserve W for TXP/TXB. */ + inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = temp; + inst_mov->U.I.DstReg.WriteMask = RC_MASK_W; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = temp; + } + } + + /* Cannot write texture to output registers (all chips) or with masks (non-r500) */ + if (inst->U.I.Opcode != RC_OPCODE_KIL && + (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || + (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg = inst->U.I.DstReg; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); + + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + } + + /* Cannot read texture coordinate from constants file */ + if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; + } + + return 1; +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h new file mode 100644 index 0000000000..a0105051ac --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2010 Corbin Simpson + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_TEX_H_ +#define __RADEON_PROGRAM_TEX_H_ + +#include "radeon_compiler.h" +#include "radeon_program.h" + +int radeonTransformTEX( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data); + +#endif /* __RADEON_PROGRAM_TEX_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h b/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h new file mode 100644 index 0000000000..c81d5f7a5e --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_SWIZZLE_H +#define RADEON_SWIZZLE_H + +#include "radeon_program.h" + +struct rc_swizzle_split { + unsigned char NumPhases; + unsigned char Phase[4]; +}; + +/** + * Describe the swizzling capability of target hardware. + */ +struct rc_swizzle_caps { + /** + * Check whether the given swizzle, absolute and negate combination + * can be implemented natively by the hardware for this opcode. + * + * \return 1 if the swizzle is native for the given opcode + */ + int (*IsNative)(rc_opcode opcode, struct rc_src_register reg); + + /** + * Determine how to split access to the masked channels of the + * given source register to obtain ALU-native swizzles. + */ + void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split * split); +}; + +#endif /* RADEON_SWIZZLE_H */ diff --git a/src/mesa/drivers/dri/r300/r300_blit.c b/src/mesa/drivers/dri/r300/r300_blit.c new file mode 100644 index 0000000000..d2c25fb9cd --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_blit.c @@ -0,0 +1,663 @@ +/* + * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_common.h" +#include "r300_context.h" + +#include "r300_blit.h" +#include "r300_cmdbuf.h" +#include "r300_emit.h" +#include "r300_tex.h" +#include "compiler/radeon_compiler.h" +#include "compiler/radeon_opcodes.h" + +static void vp_ins_outs(struct r300_vertex_program_compiler *c) +{ + c->code->inputs[VERT_ATTRIB_POS] = 0; + c->code->inputs[VERT_ATTRIB_TEX0] = 1; + c->code->outputs[VERT_RESULT_HPOS] = 0; + c->code->outputs[VERT_RESULT_TEX0] = 1; +} + +static void fp_allocate_hw_inputs( + struct r300_fragment_program_compiler * c, + void (*allocate)(void * data, unsigned input, unsigned hwreg), + void * mydata) +{ + allocate(mydata, FRAG_ATTRIB_TEX0, 0); +} + +static void create_vertex_program(struct r300_context *r300) +{ + struct r300_vertex_program_compiler compiler; + struct rc_instruction *inst; + + rc_init(&compiler.Base); + + inst = rc_insert_new_instruction(&compiler.Base, compiler.Base.Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = VERT_RESULT_HPOS; + inst->U.I.DstReg.RelAddr = 0; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst->U.I.SrcReg[0].Abs = 0; + inst->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst->U.I.SrcReg[0].Index = VERT_ATTRIB_POS; + inst->U.I.SrcReg[0].Negate = 0; + inst->U.I.SrcReg[0].RelAddr = 0; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + + inst = rc_insert_new_instruction(&compiler.Base, compiler.Base.Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = VERT_RESULT_TEX0; + inst->U.I.DstReg.RelAddr = 0; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst->U.I.SrcReg[0].Abs = 0; + inst->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst->U.I.SrcReg[0].Index = VERT_ATTRIB_TEX0; + inst->U.I.SrcReg[0].Negate = 0; + inst->U.I.SrcReg[0].RelAddr = 0; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + + compiler.Base.Program.InputsRead = (1 << VERT_ATTRIB_POS) | (1 << VERT_ATTRIB_TEX0); + compiler.RequiredOutputs = compiler.Base.Program.OutputsWritten = (1 << VERT_RESULT_HPOS) | (1 << VERT_RESULT_TEX0); + compiler.SetHwInputOutput = vp_ins_outs; + compiler.code = &r300->blit.vp_code; + + r3xx_compile_vertex_program(&compiler); +} + +static void create_fragment_program(struct r300_context *r300) +{ + struct r300_fragment_program_compiler compiler; + struct rc_instruction *inst; + + memset(&compiler, 0, sizeof(struct r300_fragment_program_compiler)); + rc_init(&compiler.Base); + + inst = rc_insert_new_instruction(&compiler.Base, compiler.Base.Program.Instructions.Prev); + inst->U.I.Opcode = RC_OPCODE_TEX; + inst->U.I.TexSrcTarget = RC_TEXTURE_2D; + inst->U.I.TexSrcUnit = 0; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = FRAG_RESULT_COLOR; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst->U.I.SrcReg[0].Abs = 0; + inst->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst->U.I.SrcReg[0].Index = FRAG_ATTRIB_TEX0; + inst->U.I.SrcReg[0].Negate = 0; + inst->U.I.SrcReg[0].RelAddr = 0; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + + compiler.Base.Program.InputsRead = (1 << FRAG_ATTRIB_TEX0); + compiler.OutputColor[0] = FRAG_RESULT_COLOR; + compiler.OutputDepth = FRAG_RESULT_DEPTH; + compiler.enable_shadow_ambient = GL_TRUE; + compiler.Base.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515); + compiler.Base.max_temp_regs = (compiler.Base.is_r500) ? 128 : 32; + compiler.code = &r300->blit.fp_code; + compiler.AllocateHwInputs = fp_allocate_hw_inputs; + + r3xx_compile_fragment_program(&compiler); +} + +void r300_blit_init(struct r300_context *r300) +{ + if (r300->options.hw_tcl_enabled) + create_vertex_program(r300); + create_fragment_program(r300); +} + +static void r300_emit_tx_setup(struct r300_context *r300, + gl_format mesa_format, + struct radeon_bo *bo, + intptr_t offset, + unsigned width, + unsigned height, + unsigned pitch) +{ + int is_r500 = r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515; + BATCH_LOCALS(&r300->radeon); + + assert(is_r500 ? width <= 4096 : width <= 2048); + assert(is_r500 ? height <= 4096 : height <= 2048); + assert(r300TranslateTexFormat(mesa_format) >= 0); + assert(offset % 32 == 0); + + BEGIN_BATCH(17); + OUT_BATCH_REGVAL(R300_TX_FILTER0_0, + (R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_S_SHIFT) | + (R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_T_SHIFT) | + (R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_R_SHIFT) | + R300_TX_MIN_FILTER_MIP_NONE | + R300_TX_MIN_FILTER_NEAREST | + R300_TX_MAG_FILTER_NEAREST | + (0 << 28)); + OUT_BATCH_REGVAL(R300_TX_FILTER1_0, 0); + OUT_BATCH_REGVAL(R300_TX_SIZE_0, + (((width - 1) & 0x7ff) << R300_TX_WIDTHMASK_SHIFT) | + (((height - 1) & 0x7ff) << R300_TX_HEIGHTMASK_SHIFT) | + (0 << R300_TX_DEPTHMASK_SHIFT) | + (0 << R300_TX_MAX_MIP_LEVEL_SHIFT) | + R300_TX_SIZE_TXPITCH_EN); + + OUT_BATCH_REGVAL(R300_TX_FORMAT_0, r300TranslateTexFormat(mesa_format)); + OUT_BATCH_REGVAL(R300_TX_FORMAT2_0, + (pitch - 1) | + (is_r500 && width > 2048 ? R500_TXWIDTH_BIT11 : 0) | + (is_r500 && height > 2048 ? R500_TXHEIGHT_BIT11 : 0)); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0, 1); + OUT_BATCH_RELOC(0, bo, offset, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + + OUT_BATCH_REGSEQ(R300_TX_INVALTAGS, 2); + OUT_BATCH(0); + OUT_BATCH(1); + + END_BATCH(); +} + +#define EASY_US_FORMAT(FMT, C0, C1, C2, C3, SIGN) \ + (FMT | R500_C0_SEL_##C0 | R500_C1_SEL_##C1 | \ + R500_C2_SEL_##C2 | R500_C3_SEL_##C3 | R500_OUT_SIGN(SIGN)) + +static uint32_t mesa_format_to_us_format(gl_format mesa_format) +{ + switch(mesa_format) + { + case MESA_FORMAT_RGBA8888: // x + return EASY_US_FORMAT(R500_OUT_FMT_C4_8, A, B, G, R, 0); + case MESA_FORMAT_RGB565: // x + case MESA_FORMAT_ARGB1555: // x + case MESA_FORMAT_RGBA8888_REV: // x + return EASY_US_FORMAT(R500_OUT_FMT_C4_8, R, G, B, A, 0); + case MESA_FORMAT_ARGB8888: // x + return EASY_US_FORMAT(R500_OUT_FMT_C4_8, B, G, R, A, 0); + case MESA_FORMAT_ARGB8888_REV: + return EASY_US_FORMAT(R500_OUT_FMT_C4_8, A, R, G, B, 0); + case MESA_FORMAT_XRGB8888: + return EASY_US_FORMAT(R500_OUT_FMT_C4_8, A, R, G, B, 0); + + case MESA_FORMAT_RGB332: + return EASY_US_FORMAT(R500_OUT_FMT_C_3_3_2, A, R, G, B, 0); + + case MESA_FORMAT_RGBA_FLOAT32: + return EASY_US_FORMAT(R500_OUT_FMT_C4_32_FP, R, G, B, A, 0); + case MESA_FORMAT_RGBA_FLOAT16: + return EASY_US_FORMAT(R500_OUT_FMT_C4_16_FP, R, G, B, A, 0); + case MESA_FORMAT_ALPHA_FLOAT32: + return EASY_US_FORMAT(R500_OUT_FMT_C_32_FP, A, A, A, A, 0); + case MESA_FORMAT_ALPHA_FLOAT16: + return EASY_US_FORMAT(R500_OUT_FMT_C_16_FP, A, A, A, A, 0); + + case MESA_FORMAT_SIGNED_RGBA8888: + return EASY_US_FORMAT(R500_OUT_FMT_C4_8, R, G, B, A, 0xf); + case MESA_FORMAT_SIGNED_RGBA8888_REV: + return EASY_US_FORMAT(R500_OUT_FMT_C4_8, A, B, G, R, 0xf); + case MESA_FORMAT_SIGNED_RGBA_16: + return EASY_US_FORMAT(R500_OUT_FMT_C4_16, R, G, B, A, 0xf); + + default: + fprintf(stderr, "Unsupported format %s for US output\n", _mesa_get_format_name(mesa_format)); + assert(0); + return 0; + } +} +#undef EASY_US_FORMAT + +static void r500_emit_fp_setup(struct r300_context *r300, + struct r500_fragment_program_code *fp, + gl_format dst_format) +{ + r500_emit_fp(r300, (uint32_t *)fp->inst, (fp->inst_end + 1) * 6, 0, 0, 0); + BATCH_LOCALS(&r300->radeon); + + BEGIN_BATCH(10); + OUT_BATCH_REGSEQ(R500_US_CODE_ADDR, 3); + OUT_BATCH(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(fp->inst_end)); + OUT_BATCH(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(fp->inst_end)); + OUT_BATCH(0); + OUT_BATCH_REGVAL(R500_US_CONFIG, 0); + OUT_BATCH_REGVAL(R500_US_OUT_FMT_0, mesa_format_to_us_format(dst_format)); + OUT_BATCH_REGVAL(R500_US_PIXSIZE, fp->max_temp_idx); + END_BATCH(); +} + +static void r500_emit_rs_setup(struct r300_context *r300) +{ + BATCH_LOCALS(&r300->radeon); + + BEGIN_BATCH(7); + OUT_BATCH_REGSEQ(R300_RS_COUNT, 2); + OUT_BATCH((4 << R300_IT_COUNT_SHIFT) | R300_HIRES_EN); + OUT_BATCH(0); + OUT_BATCH_REGVAL(R500_RS_INST_0, + (0 << R500_RS_INST_TEX_ID_SHIFT) | + (0 << R500_RS_INST_TEX_ADDR_SHIFT) | + R500_RS_INST_TEX_CN_WRITE | + R500_RS_INST_COL_CN_NO_WRITE); + OUT_BATCH_REGVAL(R500_RS_IP_0, + (0 << R500_RS_IP_TEX_PTR_S_SHIFT) | + (1 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (2 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (3 << R500_RS_IP_TEX_PTR_Q_SHIFT)); + END_BATCH(); +} + +static void r300_emit_fp_setup(struct r300_context *r300, + struct r300_fragment_program_code *code, + gl_format dst_format) +{ + unsigned i; + BATCH_LOCALS(&r300->radeon); + + BEGIN_BATCH((code->alu.length + 1) * 4 + code->tex.length + 1 + 11); + + OUT_BATCH_REGSEQ(R300_US_ALU_RGB_INST_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) { + OUT_BATCH(code->alu.inst[i].rgb_inst); + } + OUT_BATCH_REGSEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) { + OUT_BATCH(code->alu.inst[i].rgb_addr); + } + OUT_BATCH_REGSEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) { + OUT_BATCH(code->alu.inst[i].alpha_inst); + } + OUT_BATCH_REGSEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) { + OUT_BATCH(code->alu.inst[i].alpha_addr); + } + + OUT_BATCH_REGSEQ(R300_US_TEX_INST_0, code->tex.length); + OUT_BATCH_TABLE(code->tex.inst, code->tex.length); + + OUT_BATCH_REGSEQ(R300_US_CONFIG, 3); + OUT_BATCH(R300_PFS_CNTL_FIRST_NODE_HAS_TEX); + OUT_BATCH(code->pixsize); + OUT_BATCH(code->code_offset); + OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4); + OUT_BATCH_TABLE(code->code_addr, 4); + OUT_BATCH_REGVAL(R500_US_OUT_FMT_0, mesa_format_to_us_format(dst_format)); + END_BATCH(); +} + +static void r300_emit_rs_setup(struct r300_context *r300) +{ + BATCH_LOCALS(&r300->radeon); + + BEGIN_BATCH(7); + OUT_BATCH_REGSEQ(R300_RS_COUNT, 2); + OUT_BATCH((4 << R300_IT_COUNT_SHIFT) | R300_HIRES_EN); + OUT_BATCH(0); + OUT_BATCH_REGVAL(R300_RS_INST_0, + R300_RS_INST_TEX_ID(0) | + R300_RS_INST_TEX_ADDR(0) | + R300_RS_INST_TEX_CN_WRITE); + OUT_BATCH_REGVAL(R300_RS_IP_0, + R300_RS_TEX_PTR(0) | + R300_RS_SEL_S(R300_RS_SEL_C0) | + R300_RS_SEL_T(R300_RS_SEL_C1) | + R300_RS_SEL_R(R300_RS_SEL_K0) | + R300_RS_SEL_Q(R300_RS_SEL_K1)); + END_BATCH(); +} + +static void emit_pvs_setup(struct r300_context *r300, + uint32_t *vp_code, + unsigned vp_len) +{ + BATCH_LOCALS(&r300->radeon); + + r300_emit_vpu(r300, vp_code, vp_len * 4, R300_PVS_CODE_START); + + BEGIN_BATCH(4); + OUT_BATCH_REGSEQ(R300_VAP_PVS_CODE_CNTL_0, 3); + OUT_BATCH((0 << R300_PVS_FIRST_INST_SHIFT) | + ((vp_len - 1) << R300_PVS_XYZW_VALID_INST_SHIFT) | + ((vp_len - 1)<< R300_PVS_LAST_INST_SHIFT)); + OUT_BATCH(0); + OUT_BATCH((vp_len - 1) << R300_PVS_LAST_VTX_SRC_INST_SHIFT); + END_BATCH(); +} + +static void emit_vap_setup(struct r300_context *r300) +{ + int tex_offset; + BATCH_LOCALS(&r300->radeon); + + if (r300->options.hw_tcl_enabled) + tex_offset = 1; + else + tex_offset = 6; + + BEGIN_BATCH(12); + OUT_BATCH_REGSEQ(R300_SE_VTE_CNTL, 2); + OUT_BATCH(R300_VTX_XY_FMT | R300_VTX_Z_FMT); + OUT_BATCH(4); + + OUT_BATCH_REGVAL(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa); + OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_0, + ((R300_DATA_TYPE_FLOAT_2 | (0 << R300_DST_VEC_LOC_SHIFT)) << 0) | + (((tex_offset << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_2 | R300_LAST_VEC) << 16)); + OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_EXT_0, + ((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | + (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) | + (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | + (0xf << R300_WRITE_ENA_SHIFT) ) << 0) | + (((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | + (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) | + (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | + (0xf << R300_WRITE_ENA_SHIFT) ) << 16) ) ); + OUT_BATCH_REGSEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); + OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT); + OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS); + END_BATCH(); +} + +static GLboolean validate_buffers(struct r300_context *r300, + struct radeon_bo *src_bo, + struct radeon_bo *dst_bo) +{ + int ret; + + radeon_cs_space_reset_bos(r300->radeon.cmdbuf.cs); + + ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, + src_bo, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + return GL_FALSE; + + ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, + dst_bo, 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT); + if (ret) + return GL_FALSE; + + return GL_TRUE; +} + +/** + * Calculate texcoords for given image region. + * Output values are [minx, maxx, miny, maxy] + */ +static void calc_tex_coords(float img_width, float img_height, + float x, float y, + float reg_width, float reg_height, + unsigned flip_y, float *buf) +{ + buf[0] = x / img_width; + buf[1] = buf[0] + reg_width / img_width; + buf[2] = y / img_height; + buf[3] = buf[2] + reg_height / img_height; + if (flip_y) + { + buf[2] = 1.0 - buf[2]; + buf[3] = 1.0 - buf[3]; + } +} + +static void emit_draw_packet(struct r300_context *r300, + unsigned src_width, unsigned src_height, + unsigned src_x_offset, unsigned src_y_offset, + unsigned dst_x_offset, unsigned dst_y_offset, + unsigned reg_width, unsigned reg_height, + unsigned flip_y) +{ + float texcoords[4]; + + calc_tex_coords(src_width, src_height, + src_x_offset, src_y_offset, + reg_width, reg_height, + flip_y, texcoords); + + float verts[] = { dst_x_offset, dst_y_offset, + texcoords[0], texcoords[2], + dst_x_offset, dst_y_offset + reg_height, + texcoords[0], texcoords[3], + dst_x_offset + reg_width, dst_y_offset + reg_height, + texcoords[1], texcoords[3], + dst_x_offset + reg_width, dst_y_offset, + texcoords[1], texcoords[2] }; + + BATCH_LOCALS(&r300->radeon); + + BEGIN_BATCH(19); + OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_IMMD_2, 16); + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | + (4 << 16) | R300_VAP_VF_CNTL__PRIM_QUADS); + OUT_BATCH_TABLE(verts, 16); + END_BATCH(); +} + +static void other_stuff(struct r300_context *r300) +{ + BATCH_LOCALS(&r300->radeon); + + BEGIN_BATCH(13); + OUT_BATCH_REGVAL(R300_GA_POLY_MODE, + R300_GA_POLY_MODE_FRONT_PTYPE_TRI | R300_GA_POLY_MODE_BACK_PTYPE_TRI); + OUT_BATCH_REGVAL(R300_SU_CULL_MODE, R300_FRONT_FACE_CCW); + OUT_BATCH_REGVAL(R300_FG_FOG_BLEND, 0); + OUT_BATCH_REGVAL(R300_FG_ALPHA_FUNC, 0); + OUT_BATCH_REGSEQ(R300_RB3D_CBLEND, 2); + OUT_BATCH(0x0); + OUT_BATCH(0x0); + OUT_BATCH_REGVAL(R300_ZB_CNTL, 0); + END_BATCH(); + if (r300->options.hw_tcl_enabled) { + BEGIN_BATCH(2); + OUT_BATCH_REGVAL(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); + END_BATCH(); + } +} + +static void emit_cb_setup(struct r300_context *r300, + struct radeon_bo *bo, + intptr_t offset, + gl_format mesa_format, + unsigned pitch, + unsigned width, + unsigned height) +{ + BATCH_LOCALS(&r300->radeon); + + unsigned x1, y1, x2, y2; + x1 = 0; + y1 = 0; + x2 = width - 1; + y2 = height - 1; + + if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { + x1 += R300_SCISSORS_OFFSET; + y1 += R300_SCISSORS_OFFSET; + x2 += R300_SCISSORS_OFFSET; + y2 += R300_SCISSORS_OFFSET; + } + + r300_emit_cb_setup(r300, bo, offset, mesa_format, + _mesa_get_format_bytes(mesa_format), + _mesa_format_row_stride(mesa_format, pitch)); + + BEGIN_BATCH_NO_AUTOSTATE(5); + OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2); + OUT_BATCH((x1 << R300_SCISSORS_X_SHIFT)|(y1 << R300_SCISSORS_Y_SHIFT)); + OUT_BATCH((x2 << R300_SCISSORS_X_SHIFT)|(y2 << R300_SCISSORS_Y_SHIFT)); + OUT_BATCH_REGVAL(R300_RB3D_CCTL, 0); + END_BATCH(); +} + +unsigned r300_check_blit(gl_format dst_format) +{ + switch (dst_format) { + case MESA_FORMAT_RGB565: + case MESA_FORMAT_ARGB1555: + case MESA_FORMAT_RGBA8888: + case MESA_FORMAT_RGBA8888_REV: + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_ARGB8888_REV: + case MESA_FORMAT_XRGB8888: + break; + default: + return 0; + } + + if (_mesa_get_format_bits(dst_format, GL_DEPTH_BITS) > 0) + return 0; + + return 1; +} + +/** + * Copy a region of [@a width x @a height] pixels from source buffer + * to destination buffer. + * @param[in] r300 r300 context + * @param[in] src_bo source radeon buffer object + * @param[in] src_offset offset of the source image in the @a src_bo + * @param[in] src_mesaformat source image format + * @param[in] src_pitch aligned source image width + * @param[in] src_width source image width + * @param[in] src_height source image height + * @param[in] src_x_offset x offset in the source image + * @param[in] src_y_offset y offset in the source image + * @param[in] dst_bo destination radeon buffer object + * @param[in] dst_offset offset of the destination image in the @a dst_bo + * @param[in] dst_mesaformat destination image format + * @param[in] dst_pitch aligned destination image width + * @param[in] dst_width destination image width + * @param[in] dst_height destination image height + * @param[in] dst_x_offset x offset in the destination image + * @param[in] dst_y_offset y offset in the destination image + * @param[in] width region width + * @param[in] height region height + * @param[in] flip_y set if y coords of the source image need to be flipped + */ +unsigned r300_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned reg_width, + unsigned reg_height, + unsigned flip_y) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + if (!r300_check_blit(dst_mesaformat)) + return 0; + + /* Make sure that colorbuffer has even width - hw limitation */ + if (dst_pitch % 2 > 0) + ++dst_pitch; + + /* Need to clamp the region size to make sure + * we don't read outside of the source buffer + * or write outside of the destination buffer. + */ + if (reg_width + src_x_offset > src_width) + reg_width = src_width - src_x_offset; + if (reg_height + src_y_offset > src_height) + reg_height = src_height - src_y_offset; + if (reg_width + dst_x_offset > dst_width) + reg_width = dst_width - dst_x_offset; + if (reg_height + dst_y_offset > dst_height) + reg_height = dst_height - dst_y_offset; + + if (src_bo == dst_bo) { + return 0; + } + + if (src_offset % 32 || dst_offset % 32) { + return GL_FALSE; + } + + if (0) { + fprintf(stderr, "src: size [%d x %d], pitch %d, " + "offset [%d x %d], format %s, bo %p\n", + src_width, src_height, src_pitch, + src_x_offset, src_y_offset, + _mesa_get_format_name(src_mesaformat), + src_bo); + fprintf(stderr, "dst: pitch %d, offset[%d x %d], format %s, bo %p\n", + dst_pitch, dst_x_offset, dst_y_offset, + _mesa_get_format_name(dst_mesaformat), dst_bo); + fprintf(stderr, "region: %d x %d\n", reg_width, reg_height); + } + + /* Flush is needed to make sure that source buffer has correct data */ + radeonFlush(r300->radeon.glCtx); + + if (!validate_buffers(r300, src_bo, dst_bo)) + return 0; + + rcommonEnsureCmdBufSpace(&r300->radeon, 200, __FUNCTION__); + + other_stuff(r300); + + r300_emit_tx_setup(r300, src_mesaformat, src_bo, src_offset, src_width, src_height, src_pitch); + + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + r500_emit_fp_setup(r300, &r300->blit.fp_code.code.r500, dst_mesaformat); + r500_emit_rs_setup(r300); + } else { + r300_emit_fp_setup(r300, &r300->blit.fp_code.code.r300, dst_mesaformat); + r300_emit_rs_setup(r300); + } + + if (r300->options.hw_tcl_enabled) + emit_pvs_setup(r300, r300->blit.vp_code.body.d, 2); + + emit_vap_setup(r300); + + emit_cb_setup(r300, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height); + + emit_draw_packet(r300, src_width, src_height, + src_x_offset, src_y_offset, + dst_x_offset, dst_y_offset, + reg_width, reg_height, + flip_y); + + r300EmitCacheFlush(r300); + + radeonFlush(r300->radeon.glCtx); + + return 1; +} diff --git a/src/mesa/drivers/dri/r300/r300_blit.h b/src/mesa/drivers/dri/r300/r300_blit.h new file mode 100644 index 0000000000..39b157a57b --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_blit.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef R300_BLIT_H +#define R300_BLIT_H + +void r300_blit_init(struct r300_context *r300); + +unsigned r300_check_blit(gl_format mesa_format); + +unsigned r300_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned reg_width, + unsigned reg_height, + unsigned flip_y); + +#endif // R300_BLIT_H diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c new file mode 100644 index 0000000000..c40802aec6 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -0,0 +1,907 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Nicolai Haehnle <prefect_@gmx.net> + */ + +#include "main/glheader.h" +#include "main/state.h" +#include "main/imports.h" +#include "main/macros.h" +#include "main/context.h" +#include "main/simple_list.h" + +#include "drm.h" +#include "radeon_drm.h" + +#include "r300_context.h" +#include "r300_reg.h" +#include "r300_cmdbuf.h" +#include "r300_emit.h" +#include "radeon_bocs_wrapper.h" +#include "radeon_mipmap_tree.h" +#include "radeon_queryobj.h" + +/** # of dwords reserved for additional instructions that may need to be written + * during flushing. + */ +#define SPACE_FOR_FLUSHING 4 + +static unsigned packet0_count(r300ContextPtr r300, uint32_t *pkt) +{ + if (r300->radeon.radeonScreen->kernel_mm) { + return ((((*pkt) >> 16) & 0x3FFF) + 1); + } else { + drm_r300_cmd_header_t *t = (drm_r300_cmd_header_t*)pkt; + return t->packet0.count; + } +} + +#define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) +#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count) + +static int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int cnt; + int extra = 1; + cnt = vpu_count(atom->cmd); + + if (r300->radeon.radeonScreen->kernel_mm) { + extra = 3; + } + + return cnt ? (cnt * 4) + extra : 0; +} + +static int check_vpp(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int cnt; + int extra = 1; + + if (r300->radeon.radeonScreen->kernel_mm) { + cnt = r300->selected_vp->code.constants.Count * 4; + extra = 3; + } else { + cnt = vpu_count(atom->cmd); + extra = 1; + } + + return cnt ? (cnt * 4) + extra : 0; +} + +void r300_emit_vpu(struct r300_context *r300, + uint32_t *data, + unsigned len, + uint32_t addr) +{ + BATCH_LOCALS(&r300->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(3 + len); + OUT_BATCH_REGVAL(R300_VAP_PVS_VECTOR_INDX_REG, addr); + OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, len-1) | RADEON_ONE_REG_WR); + OUT_BATCH_TABLE(data, len); + END_BATCH(); +} + +static void emit_vpu_state(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + drm_r300_cmd_header_t cmd; + uint32_t addr; + + cmd.u = atom->cmd[0]; + addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo; + + r300_emit_vpu(r300, &atom->cmd[1], vpu_count(atom->cmd) * 4, addr); +} + +static void emit_vpp_state(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + drm_r300_cmd_header_t cmd; + uint32_t addr; + + cmd.u = atom->cmd[0]; + addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo; + + r300_emit_vpu(r300, &atom->cmd[1], r300->selected_vp->code.constants.Count * 4, addr); +} + +void r500_emit_fp(struct r300_context *r300, + uint32_t *data, + unsigned len, + uint32_t addr, + unsigned type, + unsigned clamp) +{ + BATCH_LOCALS(&r300->radeon); + + addr |= (type << 16); + addr |= (clamp << 17); + + BEGIN_BATCH_NO_AUTOSTATE(len + 3); + OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0)); + OUT_BATCH(addr); + OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, len-1) | RADEON_ONE_REG_WR); + OUT_BATCH_TABLE(data, len); + END_BATCH(); +} + +static void emit_r500fp_atom(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + drm_r300_cmd_header_t cmd; + uint32_t addr, count; + int type, clamp; + + cmd.u = atom->cmd[0]; + addr = ((cmd.r500fp.adrhi_flags & 1) << 8) | cmd.r500fp.adrlo; + type = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE); + clamp = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP); + + if (type) { + count = r500fp_count(atom->cmd) * 4; + } else { + count = r500fp_count(atom->cmd) * 6; + } + + r500_emit_fp(r300, &atom->cmd[1], count, addr, type, clamp); +} + +static int check_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd); + int dw = 0, i; + if (atom->cmd[0] == CP_PACKET2) { + return dw; + } + for(i = 0; i < numtmus; ++i) { + radeonTexObj *t = r300->hw.textures[i]; + if (!t && !r300->radeon.radeonScreen->kernel_mm) { + dw += 0; + } else if (t && t->image_override && !t->bo) { + if (!r300->radeon.radeonScreen->kernel_mm) + dw += 2; + } else + dw += 4; + } + return dw; +} + +static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd); + int i; + + for(i = 0; i < numtmus; ++i) { + radeonTexObj *t = r300->hw.textures[i]; + if (t && !t->image_override) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, get_base_teximage_offset(t), + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + } else if (!t) { + /* Texture unit hasn't a texture bound. + * We assign the current color buffer as a fakery to make + * KIL work on KMS (without it, the CS checker will complain). + */ + if (r300->radeon.radeonScreen->kernel_mm) { + struct radeon_renderbuffer *rrb = radeon_get_colorbuffer(&r300->radeon); + if (rrb && rrb->bo) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_BATCH_RELOC(0, rrb->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + } + } + } else { /* override cases */ + if (t->bo) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_BATCH_RELOC(t->tile_bits, t->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + } else if (!r300->radeon.radeonScreen->kernel_mm) { + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_BATCH(t->override_offset); + END_BATCH(); + } else { + /* Texture unit hasn't a texture bound nothings to do */ + } + } + } +} + +void r300_emit_scissor(GLcontext *ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + unsigned x1, y1, x2, y2; + struct radeon_renderbuffer *rrb; + + if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { + return; + } + rrb = radeon_get_colorbuffer(&r300->radeon); + if (!rrb || !rrb->bo) { + fprintf(stderr, "no rrb\n"); + return; + } + if (r300->radeon.state.scissor.enabled) { + x1 = r300->radeon.state.scissor.rect.x1; + y1 = r300->radeon.state.scissor.rect.y1; + x2 = r300->radeon.state.scissor.rect.x2; + y2 = r300->radeon.state.scissor.rect.y2; + } else { + x1 = 0; + y1 = 0; + x2 = rrb->base.Width - 1; + y2 = rrb->base.Height - 1; + } + if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { + x1 += R300_SCISSORS_OFFSET; + y1 += R300_SCISSORS_OFFSET; + x2 += R300_SCISSORS_OFFSET; + y2 += R300_SCISSORS_OFFSET; + } + BEGIN_BATCH_NO_AUTOSTATE(3); + OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2); + OUT_BATCH((x1 << R300_SCISSORS_X_SHIFT)|(y1 << R300_SCISSORS_Y_SHIFT)); + OUT_BATCH((x2 << R300_SCISSORS_X_SHIFT)|(y2 << R300_SCISSORS_Y_SHIFT)); + END_BATCH(); +} +static int check_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + uint32_t dw = 6 + 3 + 16; + if (r300->radeon.radeonScreen->kernel_mm) + dw += 2; + if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { + dw -= 3 + 16; + } + return dw; +} + +static void emit_scissor(struct r300_context *r300, + unsigned width, + unsigned height) +{ + int i; + BATCH_LOCALS(&r300->radeon); + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + BEGIN_BATCH_NO_AUTOSTATE(3); + OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2); + OUT_BATCH(0); + OUT_BATCH(((width - 1) << R300_SCISSORS_X_SHIFT) | + ((height - 1) << R300_SCISSORS_Y_SHIFT)); + END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(16); + for (i = 0; i < 4; i++) { + OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2); + OUT_BATCH((0 << R300_CLIPRECT_X_SHIFT) | (0 << R300_CLIPRECT_Y_SHIFT)); + OUT_BATCH(((width - 1) << R300_CLIPRECT_X_SHIFT) | ((height - 1) << R300_CLIPRECT_Y_SHIFT)); + } + OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1); + OUT_BATCH(0xAAAA); + OUT_BATCH_REGSEQ(R300_SC_SCREENDOOR, 1); + OUT_BATCH(0xffffff); + END_BATCH(); + } else { + BEGIN_BATCH_NO_AUTOSTATE(3); + OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2); + OUT_BATCH((R300_SCISSORS_OFFSET << R300_SCISSORS_X_SHIFT) | + (R300_SCISSORS_OFFSET << R300_SCISSORS_Y_SHIFT)); + OUT_BATCH(((width + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_X_SHIFT) | + ((height + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_Y_SHIFT)); + END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(16); + for (i = 0; i < 4; i++) { + OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2); + OUT_BATCH((R300_SCISSORS_OFFSET << R300_CLIPRECT_X_SHIFT) | (R300_SCISSORS_OFFSET << R300_CLIPRECT_Y_SHIFT)); + OUT_BATCH(((R300_SCISSORS_OFFSET + width - 1) << R300_CLIPRECT_X_SHIFT) | + ((R300_SCISSORS_OFFSET + height - 1) << R300_CLIPRECT_Y_SHIFT)); + } + OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1); + OUT_BATCH(0xAAAA); + OUT_BATCH_REGSEQ(R300_SC_SCREENDOOR, 1); + OUT_BATCH(0xffffff); + END_BATCH(); + } +} + +void r300_emit_cb_setup(struct r300_context *r300, + struct radeon_bo *bo, + uint32_t offset, + GLuint format, + unsigned cpp, + unsigned pitch) +{ + BATCH_LOCALS(&r300->radeon); + uint32_t cbpitch = pitch / cpp; + uint32_t dw = 6; + + assert(offset % 32 == 0); + + switch (format) { + case MESA_FORMAT_SL8: + case MESA_FORMAT_A8: + case MESA_FORMAT_L8: + case MESA_FORMAT_I8: + cbpitch |= R300_COLOR_FORMAT_I8; + break; + case MESA_FORMAT_RGB565: + case MESA_FORMAT_RGB565_REV: + cbpitch |= R300_COLOR_FORMAT_RGB565; + break; + case MESA_FORMAT_ARGB4444: + case MESA_FORMAT_ARGB4444_REV: + cbpitch |= R300_COLOR_FORMAT_ARGB4444; + break; + case MESA_FORMAT_RGBA5551: + case MESA_FORMAT_ARGB1555: + case MESA_FORMAT_ARGB1555_REV: + cbpitch |= R300_COLOR_FORMAT_ARGB1555; + break; + case MESA_FORMAT_RGBA8888: + case MESA_FORMAT_RGBA8888_REV: + case MESA_FORMAT_XRGB8888: + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888_REV: + case MESA_FORMAT_ARGB8888_REV: + case MESA_FORMAT_SRGBA8: + case MESA_FORMAT_SARGB8: + cbpitch |= R300_COLOR_FORMAT_ARGB8888; + break; + default: + _mesa_problem(r300->radeon.glCtx, "unexpected format in emit_cb_offset()"); + break; + } + + if (bo->flags & RADEON_BO_FLAGS_MACRO_TILE) + cbpitch |= R300_COLOR_TILE_ENABLE; + + if (r300->radeon.radeonScreen->kernel_mm) + dw += 2; + + BEGIN_BATCH_NO_AUTOSTATE(dw); + OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1); + OUT_BATCH_RELOC(offset, bo, offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1); + if (!r300->radeon.radeonScreen->kernel_mm) + OUT_BATCH(cbpitch); + else + OUT_BATCH_RELOC(cbpitch, bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); +} + +static void emit_cb_offset_atom(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct radeon_renderbuffer *rrb; + uint32_t offset = r300->radeon.state.color.draw_offset; + + rrb = radeon_get_colorbuffer(&r300->radeon); + if (!rrb || !rrb->bo) { + fprintf(stderr, "no rrb\n"); + return; + } + + if (RADEON_DEBUG & RADEON_STATE) + fprintf(stderr,"rrb is %p %d %dx%d\n", rrb, offset, rrb->base.Width, rrb->base.Height); + + r300_emit_cb_setup(r300, rrb->bo, offset, rrb->base.Format, rrb->cpp, rrb->pitch); + + if (r300->radeon.radeonScreen->driScreen->dri2.enabled) { + emit_scissor(r300, rrb->base.Width, rrb->base.Height); + } +} + +static int check_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + uint32_t dw; + dw = 6; + if (r300->radeon.radeonScreen->kernel_mm) + dw += 2; + return dw; +} + +static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + struct radeon_renderbuffer *rrb; + uint32_t zbpitch; + uint32_t dw = atom->check(ctx, atom); + + rrb = radeon_get_depthbuffer(&r300->radeon); + if (!rrb) + return; + + zbpitch = (rrb->pitch / rrb->cpp); + if (!r300->radeon.radeonScreen->kernel_mm) { + if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) { + zbpitch |= R300_DEPTHMACROTILE_ENABLE; + } + if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){ + zbpitch |= R300_DEPTHMICROTILE_TILED; + } + } + + BEGIN_BATCH_NO_AUTOSTATE(dw); + OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1); + OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH_REGSEQ(R300_ZB_DEPTHPITCH, 1); + if (!r300->radeon.radeonScreen->kernel_mm) + OUT_BATCH(zbpitch); + else + OUT_BATCH_RELOC(cbpitch, rrb->bo, zbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); +} + +static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + struct radeon_renderbuffer *rrb; + uint32_t format = 0; + + rrb = radeon_get_depthbuffer(&r300->radeon); + if (!rrb) + format = 0; + else { + if (rrb->cpp == 2) + format = R300_DEPTHFORMAT_16BIT_INT_Z; + else if (rrb->cpp == 4) + format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; + } + + BEGIN_BATCH_NO_AUTOSTATE(atom->cmd_size); + OUT_BATCH(atom->cmd[0]); + atom->cmd[1] &= ~0xf; + atom->cmd[1] |= format; + OUT_BATCH(atom->cmd[1]); + OUT_BATCH(atom->cmd[2]); + OUT_BATCH(atom->cmd[3]); + OUT_BATCH(atom->cmd[4]); + END_BATCH(); +} + +static int check_never(GLcontext *ctx, struct radeon_state_atom *atom) +{ + return 0; +} + +static int check_always(GLcontext *ctx, struct radeon_state_atom *atom) +{ + return atom->cmd_size; +} + +static int check_variable(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int cnt; + if (atom->cmd[0] == CP_PACKET2) { + return 0; + } + cnt = packet0_count(r300, atom->cmd); + return cnt ? cnt + 1 : 0; +} + +static int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom) +{ + int cnt; + r300ContextPtr r300 = R300_CONTEXT(ctx); + int extra = 1; + cnt = r500fp_count(atom->cmd); + if (r300->radeon.radeonScreen->kernel_mm) + extra = 3; + + return cnt ? (cnt * 6) + extra : 0; +} + +static int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom) +{ + int cnt; + r300ContextPtr r300 = R300_CONTEXT(ctx); + int extra = 1; + cnt = r500fp_count(atom->cmd); + if (r300->radeon.radeonScreen->kernel_mm) + extra = 3; + + cnt = r500fp_count(atom->cmd); + return cnt ? (cnt * 4) + extra : 0; +} + +#define ALLOC_STATE( ATOM, CHK, SZ, IDX ) \ + do { \ + r300->hw.ATOM.cmd_size = (SZ); \ + r300->hw.ATOM.cmd = (uint32_t*)CALLOC((SZ) * sizeof(uint32_t)); \ + r300->hw.ATOM.name = #ATOM; \ + r300->hw.ATOM.idx = (IDX); \ + r300->hw.ATOM.check = check_##CHK; \ + r300->hw.ATOM.dirty = GL_FALSE; \ + r300->radeon.hw.max_state_size += (SZ); \ + insert_at_tail(&r300->radeon.hw.atomlist, &r300->hw.ATOM); \ + } while (0) +/** + * Allocate memory for the command buffer and initialize the state atom + * list. Note that the initial hardware state is set by r300InitState(). + */ +void r300InitCmdBuf(r300ContextPtr r300) +{ + int mtu; + int has_tcl; + int is_r500 = 0; + + has_tcl = r300->options.hw_tcl_enabled; + + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + is_r500 = 1; + + r300->radeon.hw.max_state_size = 2 + 2; /* reserve extra space for WAIT_IDLE and tex cache flush */ + + mtu = r300->radeon.glCtx->Const.MaxTextureUnits; + if (RADEON_DEBUG & RADEON_TEXTURE) { + fprintf(stderr, "Using %d maximum texture units..\n", mtu); + } + + /* Setup the atom linked list */ + make_empty_list(&r300->radeon.hw.atomlist); + r300->radeon.hw.atomlist.name = "atom-list"; + + /* Initialize state atoms */ + ALLOC_STATE(vpt, always, R300_VPT_CMDSIZE, 0); + r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VPORT_XSCALE, 6); + ALLOC_STATE(vap_cntl, always, R300_VAP_CNTL_SIZE, 0); + r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_STATE_FLUSH_REG, 1); + r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH_1] = 0; + r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL, 1); + if (is_r500 && !r300->radeon.radeonScreen->kernel_mm) { + ALLOC_STATE(vap_index_offset, always, 2, 0); + r300->hw.vap_index_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_VAP_INDEX_OFFSET, 1); + r300->hw.vap_index_offset.cmd[1] = 0; + } + ALLOC_STATE(vte, always, 3, 0); + r300->hw.vte.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VTE_CNTL, 2); + ALLOC_STATE(vap_vf_max_vtx_indx, always, 3, 0); + r300->hw.vap_vf_max_vtx_indx.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VF_MAX_VTX_INDX, 2); + ALLOC_STATE(vap_cntl_status, always, 2, 0); + r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL_STATUS, 1); + ALLOC_STATE(vir[0], variable, R300_VIR_CMDSIZE, 0); + r300->hw.vir[0].cmd[R300_VIR_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_0, 1); + ALLOC_STATE(vir[1], variable, R300_VIR_CMDSIZE, 1); + r300->hw.vir[1].cmd[R300_VIR_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_EXT_0, 1); + ALLOC_STATE(vic, always, R300_VIC_CMDSIZE, 0); + r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VTX_STATE_CNTL, 2); + ALLOC_STATE(vap_psc_sgn_norm_cntl, always, 2, 0); + r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE); + + if (has_tcl) { + ALLOC_STATE(vap_clip_cntl, always, 2, 0); + r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CLIP_CNTL, 1); + ALLOC_STATE(vap_clip, always, 5, 0); + r300->hw.vap_clip.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_GB_VERT_CLIP_ADJ, 4); + ALLOC_STATE(vap_pvs_vtx_timeout_reg, always, 2, 0); + r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, VAP_PVS_VTX_TIMEOUT_REG, 1); + } + + ALLOC_STATE(vof, always, R300_VOF_CMDSIZE, 0); + r300->hw.vof.cmd[R300_VOF_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_VAP_OUTPUT_VTX_FMT_0, 2); + + if (has_tcl) { + ALLOC_STATE(pvs, always, R300_PVS_CMDSIZE, 0); + r300->hw.pvs.cmd[R300_PVS_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_CODE_CNTL_0, 3); + } + + ALLOC_STATE(gb_enable, always, 2, 0); + r300->hw.gb_enable.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_ENABLE, 1); + if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { + ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0); + } else { + ALLOC_STATE(gb_misc, never, R300_GB_MISC_CMDSIZE, 0); + } + r300->hw.gb_misc.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_MSPOS0, 3); + ALLOC_STATE(gb_misc2, always, R300_GB_MISC2_CMDSIZE, 0); + r300->hw.gb_misc2.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x401C, 2); + ALLOC_STATE(txe, always, R300_TXE_CMDSIZE, 0); + r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_ENABLE, 1); + ALLOC_STATE(ga_point_s0, always, 5, 0); + r300->hw.ga_point_s0.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_S0, 4); + ALLOC_STATE(ga_triangle_stipple, always, 2, 0); + r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_TRIANGLE_STIPPLE, 1); + ALLOC_STATE(ps, always, R300_PS_CMDSIZE, 0); + r300->hw.ps.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_SIZE, 1); + ALLOC_STATE(ga_point_minmax, always, 4, 0); + r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_MINMAX, 3); + ALLOC_STATE(lcntl, always, 2, 0); + r300->hw.lcntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_CNTL, 1); + ALLOC_STATE(ga_line_stipple, always, 4, 0); + r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_STIPPLE_VALUE, 3); + if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { + ALLOC_STATE(shade, always, 2, 0); + } else { + ALLOC_STATE(shade, never, 2, 0); + } + r300->hw.shade.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_ENHANCE, 1); + ALLOC_STATE(shade2, always, 4, 0); + r300->hw.shade2.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x4278, 3); + ALLOC_STATE(polygon_mode, always, 4, 0); + r300->hw.polygon_mode.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POLY_MODE, 3); + ALLOC_STATE(fogp, always, 3, 0); + r300->hw.fogp.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_FOG_SCALE, 2); + ALLOC_STATE(zbias_cntl, always, 2, 0); + r300->hw.zbias_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_TEX_WRAP, 1); + ALLOC_STATE(zbs, always, R300_ZBS_CMDSIZE, 0); + r300->hw.zbs.cmd[R300_ZBS_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_FRONT_SCALE, 4); + ALLOC_STATE(occlusion_cntl, always, 2, 0); + r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_ENABLE, 1); + ALLOC_STATE(cul, always, R300_CUL_CMDSIZE, 0); + r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_CULL_MODE, 1); + ALLOC_STATE(su_depth_scale, always, 3, 0); + r300->hw.su_depth_scale.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_DEPTH_SCALE, 2); + ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0); + r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_COUNT, 2); + if (is_r500) { + ALLOC_STATE(ri, variable, R500_RI_CMDSIZE, 0); + r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_IP_0, 16); + ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, 1); + } else { + ALLOC_STATE(ri, variable, R300_RI_CMDSIZE, 0); + r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_IP_0, 8); + ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, 1); + } + ALLOC_STATE(sc_hyperz, always, 3, 0); + r300->hw.sc_hyperz.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_HYPERZ, 2); + ALLOC_STATE(sc_screendoor, always, 2, 0); + r300->hw.sc_screendoor.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1); + ALLOC_STATE(us_out_fmt, always, 6, 0); + r300->hw.us_out_fmt.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_OUT_FMT, 5); + + if (is_r500) { + ALLOC_STATE(fp, always, R500_FP_CMDSIZE, 0); + r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CONFIG, 2); + r300->hw.fp.cmd[R500_FP_CNTL] = R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO; + r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CODE_ADDR, 3); + r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(r300->radeon.radeonScreen, R500_US_FC_CTRL, 1); + r300->hw.fp.cmd[R500_FP_FC_CNTL] = 0; /* FIXME when we add flow control */ + + ALLOC_STATE(r500fp, r500fp, R500_FPI_CMDSIZE, 0); + r300->hw.r500fp.cmd[R300_FPI_CMD_0] = + cmdr500fp(r300->radeon.radeonScreen, 0, 0, 0, 0); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.r500fp.emit = emit_r500fp_atom; + + ALLOC_STATE(r500fp_const, r500fp_const, R500_FPP_CMDSIZE, 0); + r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = + cmdr500fp(r300->radeon.radeonScreen, 0, 0, 1, 0); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.r500fp_const.emit = emit_r500fp_atom; + } else { + ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); + r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CONFIG, 3); + r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CODE_ADDR_0, 4); + + ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0); + r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_TEX_INST_0, 0); + + ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0); + r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, 1); + ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1); + r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, 1); + ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2); + r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, 1); + ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3); + r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, 1); + ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0); + r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_PFS_PARAM_0_X, 0); + } + ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0); + r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_BLEND, 1); + ALLOC_STATE(fogc, always, R300_FOGC_CMDSIZE, 0); + r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_COLOR_R, 3); + ALLOC_STATE(at, always, R300_AT_CMDSIZE, 0); + r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_ALPHA_FUNC, 2); + ALLOC_STATE(fg_depth_src, always, 2, 0); + r300->hw.fg_depth_src.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_DEPTH_SRC, 1); + ALLOC_STATE(rb3d_cctl, always, 2, 0); + r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CCTL, 1); + ALLOC_STATE(bld, always, R300_BLD_CMDSIZE, 0); + r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CBLEND, 2); + ALLOC_STATE(cmk, always, R300_CMK_CMDSIZE, 0); + r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, RB3D_COLOR_CHANNEL_MASK, 1); + if (is_r500) { + ALLOC_STATE(blend_color, always, 3, 0); + r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_CONSTANT_COLOR_AR, 2); + } else { + ALLOC_STATE(blend_color, always, 2, 0); + r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_BLEND_COLOR, 1); + } + ALLOC_STATE(rop, always, 2, 0); + r300->hw.rop.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_ROPCNTL, 1); + ALLOC_STATE(cb, cb_offset, R300_CB_CMDSIZE, 0); + r300->hw.cb.emit = &emit_cb_offset_atom; + ALLOC_STATE(rb3d_dither_ctl, always, 10, 0); + r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DITHER_CTL, 9); + ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0); + r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_AARESOLVE_CTL, 1); + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV350) { + ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0); + } else { + ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, never, 3, 0); + } + r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2); + ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0); + r300->hw.zs.cmd[R300_ZS_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_ZB_CNTL, 3); + if (is_r500) { + if (r300->radeon.radeonScreen->kernel_mm) + ALLOC_STATE(zsb, always, R300_ZSB_CMDSIZE, 0); + else + ALLOC_STATE(zsb, never, R300_ZSB_CMDSIZE, 0); + r300->hw.zsb.cmd[R300_ZSB_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R500_ZB_STENCILREFMASK_BF, 1); + } + + ALLOC_STATE(zstencil_format, always, 5, 0); + r300->hw.zstencil_format.cmd[0] = + cmdpacket0(r300->radeon.radeonScreen, R300_ZB_FORMAT, 4); + r300->hw.zstencil_format.emit = emit_zstencil_format; + + ALLOC_STATE(zb, zb_offset, R300_ZB_CMDSIZE, 0); + r300->hw.zb.emit = emit_zb_offset; + ALLOC_STATE(zb_depthclearvalue, always, 2, 0); + r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_DEPTHCLEARVALUE, 1); + ALLOC_STATE(zb_zmask, always, 3, 0); + r300->hw.zb_zmask.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_ZMASK_OFFSET, 2); + ALLOC_STATE(zb_hiz_offset, always, 2, 0); + r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_OFFSET, 1); + ALLOC_STATE(zb_hiz_pitch, always, 2, 0); + r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_PITCH, 1); + + /* VPU only on TCL */ + if (has_tcl) { + int i; + ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0); + r300->hw.vpi.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vpi.emit = emit_vpu_state; + + if (is_r500) { + ALLOC_STATE(vpp, vpp, R300_VPP_CMDSIZE, 0); + r300->hw.vpp.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vpp.emit = emit_vpp_state; + + ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); + r300->hw.vps.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R500_POINT_VPORT_SCALE_OFFSET, 1); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vps.emit = emit_vpu_state; + + for (i = 0; i < 6; i++) { + ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); + r300->hw.vpucp[i].cmd[0] = + cmdvpu(r300->radeon.radeonScreen, + R500_PVS_UCP_START + i, 1); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vpucp[i].emit = emit_vpu_state; + } + } else { + ALLOC_STATE(vpp, vpp, R300_VPP_CMDSIZE, 0); + r300->hw.vpp.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vpp.emit = emit_vpp_state; + + ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); + r300->hw.vps.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R300_POINT_VPORT_SCALE_OFFSET, 1); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vps.emit = emit_vpu_state; + + for (i = 0; i < 6; i++) { + ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); + r300->hw.vpucp[i].cmd[0] = + cmdvpu(r300->radeon.radeonScreen, + R300_PVS_UCP_START + i, 1); + if (r300->radeon.radeonScreen->kernel_mm) + r300->hw.vpucp[i].emit = emit_vpu_state; + } + } + } + + /* Textures */ + ALLOC_STATE(tex.filter, variable, mtu + 1, 0); + r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 0); + + ALLOC_STATE(tex.filter_1, variable, mtu + 1, 0); + r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, 0); + + ALLOC_STATE(tex.size, variable, mtu + 1, 0); + r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, 0); + + ALLOC_STATE(tex.format, variable, mtu + 1, 0); + r300->hw.tex.format.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, 0); + + ALLOC_STATE(tex.pitch, variable, mtu + 1, 0); + r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, 0); + + ALLOC_STATE(tex.offset, tex_offsets, 1, 0); + r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, 0); + r300->hw.tex.offset.emit = &emit_tex_offsets; + + ALLOC_STATE(tex.chroma_key, variable, mtu + 1, 0); + r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, 0); + + ALLOC_STATE(tex.border_color, variable, mtu + 1, 0); + r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, 0); + + radeon_init_query_stateobj(&r300->radeon, R300_QUERYOBJ_CMDSIZE); + if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) { + r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, RV530_FG_ZBREG_DEST, 1); + r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_DATA_0] = RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL; + } else { + r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_REG_DEST, 1); + r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_DATA_0] = R300_RASTER_PIPE_SELECT_ALL; + } + r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_ZPASS_DATA, 1); + r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_DATA_1] = 0; + + r300->radeon.hw.is_dirty = GL_TRUE; + r300->radeon.hw.all_dirty = GL_TRUE; + + rcommonInitCmdBuf(&r300->radeon); +} diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.h b/src/mesa/drivers/dri/r300/r300_cmdbuf.h new file mode 100644 index 0000000000..0e68da928e --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.h @@ -0,0 +1,69 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Nicolai Haehnle <prefect_@gmx.net> + */ + +#ifndef __R300_CMDBUF_H__ +#define __R300_CMDBUF_H__ + +#include "r300_context.h" + +#define CACHE_FLUSH_BUFSZ (4*2) +#define PRE_EMIT_STATE_BUFSZ (2+2) +#define AOS_BUFSZ(nr) (3+(nr >>1)*3 + (nr&1)*2 + (nr*2)) +#define FIREAOS_BUFSZ (3) +#define SCISSORS_BUFSZ (3) + +void r300InitCmdBuf(r300ContextPtr r300); +void r300_emit_scissor(GLcontext *ctx); + +void r300_emit_vpu(struct r300_context *ctx, + uint32_t *data, + unsigned len, + uint32_t addr); + +void r500_emit_fp(struct r300_context *r300, + uint32_t *data, + unsigned len, + uint32_t addr, + unsigned type, + unsigned clamp); + +void r300_emit_cb_setup(struct r300_context *r300, + struct radeon_bo *bo, + uint32_t offset, + GLuint format, + unsigned cpp, + unsigned pitch); + +#endif /* __R300_CMDBUF_H__ */ diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c new file mode 100644 index 0000000000..6992ca59db --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -0,0 +1,570 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Keith Whitwell <keith@tungstengraphics.com> + * + * \author Nicolai Haehnle <prefect_@gmx.net> + */ + +#include "main/glheader.h" +#include "main/api_arrayelt.h" +#include "main/context.h" +#include "main/simple_list.h" +#include "main/imports.h" +#include "main/extensions.h" +#include "main/bufferobj.h" +#include "main/texobj.h" + +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" +#include "vbo/vbo.h" + +#include "tnl/tnl.h" +#include "tnl/t_pipeline.h" + +#include "drivers/common/driverfuncs.h" +#include "drivers/common/meta.h" + +#include "r300_context.h" +#include "radeon_span.h" +#include "r300_blit.h" +#include "r300_cmdbuf.h" +#include "r300_state.h" +#include "r300_tex.h" +#include "r300_emit.h" +#include "r300_render.h" +#include "r300_swtcl.h" +#include "radeon_bocs_wrapper.h" +#include "radeon_buffer_objects.h" +#include "radeon_queryobj.h" + +#include "utils.h" +#include "xmlpool.h" /* for symbolic values of enum-type options */ + +#define need_GL_VERSION_2_0 +#define need_GL_ARB_occlusion_query +#define need_GL_ARB_point_parameters +#define need_GL_ARB_vertex_program +#define need_GL_EXT_blend_equation_separate +#define need_GL_EXT_blend_func_separate +#define need_GL_EXT_blend_minmax +#define need_GL_EXT_framebuffer_blit +#define need_GL_EXT_framebuffer_object +#define need_GL_EXT_fog_coord +#define need_GL_EXT_gpu_program_parameters +#define need_GL_EXT_provoking_vertex +#define need_GL_EXT_secondary_color +#define need_GL_EXT_stencil_two_side +#define need_GL_ATI_separate_stencil +#define need_GL_NV_vertex_program + +#include "main/remap_helper.h" + +static const struct dri_extension card_extensions[] = { + /* *INDENT-OFF* */ + {"GL_ARB_depth_texture", NULL}, + {"GL_ARB_fragment_program", NULL}, + {"GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions}, + {"GL_ARB_multitexture", NULL}, + {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions}, + {"GL_ARB_shadow", NULL}, + {"GL_ARB_shadow_ambient", NULL}, + {"GL_ARB_texture_border_clamp", NULL}, + {"GL_ARB_texture_cube_map", NULL}, + {"GL_ARB_texture_env_add", NULL}, + {"GL_ARB_texture_env_combine", NULL}, + {"GL_ARB_texture_env_crossbar", NULL}, + {"GL_ARB_texture_env_dot3", NULL}, + {"GL_ARB_texture_mirrored_repeat", NULL}, + {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions}, + {"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions}, + {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions}, + {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions}, + {"GL_EXT_blend_subtract", NULL}, + {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, + {"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions}, + {"GL_EXT_provoking_vertex", GL_EXT_provoking_vertex_functions }, + {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions}, + {"GL_EXT_shadow_funcs", NULL}, + {"GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions}, + {"GL_EXT_stencil_wrap", NULL}, + {"GL_EXT_texture_edge_clamp", NULL}, + {"GL_EXT_texture_env_combine", NULL}, + {"GL_EXT_texture_env_dot3", NULL}, + {"GL_EXT_texture_filter_anisotropic", NULL}, + {"GL_EXT_texture_lod_bias", NULL}, + {"GL_EXT_texture_mirror_clamp", NULL}, + {"GL_EXT_texture_rectangle", NULL}, + {"GL_EXT_texture_sRGB", NULL}, + {"GL_EXT_vertex_array_bgra", NULL}, + {"GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions}, + {"GL_ATI_texture_env_combine3", NULL}, + {"GL_ATI_texture_mirror_once", NULL}, + {"GL_MESA_pack_invert", NULL}, + {"GL_MESA_ycbcr_texture", NULL}, + {"GL_MESAX_texture_float", NULL}, + {"GL_NV_blend_square", NULL}, + {"GL_NV_vertex_program", GL_NV_vertex_program_functions}, + {"GL_SGIS_generate_mipmap", NULL}, + {NULL, NULL} + /* *INDENT-ON* */ +}; + + +static const struct dri_extension mm_extensions[] = { + { "GL_EXT_framebuffer_blit", GL_EXT_framebuffer_blit_functions }, + { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, + { NULL, NULL } +}; + +/** + * The GL 2.0 functions are needed to make display lists work with + * functions added by GL_ATI_separate_stencil. + */ +static const struct dri_extension gl_20_extension[] = { + {"GL_VERSION_2_0", GL_VERSION_2_0_functions }, +}; + +static const struct tnl_pipeline_stage *r300_pipeline[] = { + /* Catch any t&l fallbacks + */ + &_tnl_vertex_transform_stage, + &_tnl_normal_transform_stage, + &_tnl_lighting_stage, + &_tnl_fog_coordinate_stage, + &_tnl_texgen_stage, + &_tnl_texture_transform_stage, + &_tnl_point_attenuation_stage, + &_tnl_vertex_program_stage, + &_tnl_render_stage, + 0, +}; + +static void r300_get_lock(radeonContextPtr rmesa) +{ + drm_radeon_sarea_t *sarea = rmesa->sarea; + + if (sarea->ctx_owner != rmesa->dri.hwContext) { + sarea->ctx_owner = rmesa->dri.hwContext; + if (!rmesa->radeonScreen->kernel_mm) + radeon_bo_legacy_texture_age(rmesa->radeonScreen->bom); + } +} + +static void r300_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa) +{ + /* please flush pipe do all pending work */ + radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, + R300_SC_SCREENDOOR, 1)); + radeon_cs_write_dword(cs, 0x0); + radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, + R300_SC_SCREENDOOR, 1)); + radeon_cs_write_dword(cs, 0x00FFFFFF); + radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, + R300_SC_HYPERZ, 1)); + radeon_cs_write_dword(cs, 0x0); + radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, + R300_US_CONFIG, 1)); + radeon_cs_write_dword(cs, 0x0); + radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, + R300_ZB_CNTL, 1)); + radeon_cs_write_dword(cs, 0x0); + radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen, R300_WAIT_3D)); + radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, + R300_RB3D_DSTCACHE_CTLSTAT, 1)); + radeon_cs_write_dword(cs, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); + radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, + R300_ZB_ZCACHE_CTLSTAT, 1)); + radeon_cs_write_dword(cs, R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE); + radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen, + R300_WAIT_3D | R300_WAIT_3D_CLEAN)); +} + +static void r300_vtbl_pre_emit_atoms(radeonContextPtr radeon) +{ + BATCH_LOCALS(radeon); + + cp_wait(radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN); + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH_REGVAL(R300_TX_INVALTAGS, R300_TX_FLUSH); + END_BATCH(); + end_3d(radeon); +} + +static void r300_fallback(GLcontext *ctx, GLuint bit, GLboolean mode) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + if (mode) + r300->radeon.Fallback |= bit; + else + r300->radeon.Fallback &= ~bit; + + r300SwitchFallback(ctx, R300_FALLBACK_RADEON_COMMON, mode); +} + +static void r300_emit_query_finish(radeonContextPtr radeon) +{ + r300ContextPtr r300 = (r300ContextPtr)radeon; + struct radeon_query_object *query = radeon->query.current; + BATCH_LOCALS(radeon); + + BEGIN_BATCH_NO_AUTOSTATE(3 * 2 *r300->radeon.radeonScreen->num_gb_pipes + 2); + switch (r300->radeon.radeonScreen->num_gb_pipes) { + case 4: + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_3); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset+3*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + case 3: + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_2); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset+2*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + case 2: + if (r300->radeon.radeonScreen->chip_family <= CHIP_FAMILY_RV380) { + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_3); + } else { + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_1); + } + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset+1*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + case 1: + default: + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_0); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + break; + } + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL); + END_BATCH(); + query->curr_offset += r300->radeon.radeonScreen->num_gb_pipes * sizeof(uint32_t); + assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE); + query->emitted_begin = GL_FALSE; +} + +static void rv530_emit_query_finish_single_z(radeonContextPtr radeon) +{ + BATCH_LOCALS(radeon); + struct radeon_query_object *query = radeon->query.current; + + BEGIN_BATCH_NO_AUTOSTATE(8); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); + END_BATCH(); + + query->curr_offset += sizeof(uint32_t); + assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE); + query->emitted_begin = GL_FALSE; +} + +static void rv530_emit_query_finish_double_z(radeonContextPtr radeon) +{ + BATCH_LOCALS(radeon); + struct radeon_query_object *query = radeon->query.current; + + BEGIN_BATCH_NO_AUTOSTATE(14); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); + END_BATCH(); + + query->curr_offset += 2 * sizeof(uint32_t); + assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE); + query->emitted_begin = GL_FALSE; +} + +static void r300_init_vtbl(radeonContextPtr radeon) +{ + radeon->vtbl.get_lock = r300_get_lock; + radeon->vtbl.update_viewport_offset = r300UpdateViewportOffset; + radeon->vtbl.emit_cs_header = r300_vtbl_emit_cs_header; + radeon->vtbl.swtcl_flush = r300_swtcl_flush; + radeon->vtbl.pre_emit_atoms = r300_vtbl_pre_emit_atoms; + radeon->vtbl.fallback = r300_fallback; + if (radeon->radeonScreen->chip_family == CHIP_FAMILY_RV530) { + if (radeon->radeonScreen->num_z_pipes == 2) + radeon->vtbl.emit_query_finish = rv530_emit_query_finish_double_z; + else + radeon->vtbl.emit_query_finish = rv530_emit_query_finish_single_z; + } else + radeon->vtbl.emit_query_finish = r300_emit_query_finish; + + radeon->vtbl.check_blit = r300_check_blit; + radeon->vtbl.blit = r300_blit; + + if (radeon->radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + radeon->vtbl.is_format_renderable = r500IsFormatRenderable; + } else { + radeon->vtbl.is_format_renderable = r300IsFormatRenderable; + } +} + +static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + ctx->Const.MaxTextureImageUnits = + driQueryOptioni(&r300->radeon.optionCache, "texture_image_units"); + ctx->Const.MaxTextureCoordUnits = + driQueryOptioni(&r300->radeon.optionCache, "texture_coord_units"); + ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureImageUnits, + ctx->Const.MaxTextureCoordUnits); + ctx->Const.MaxCombinedTextureImageUnits = + ctx->Const.MaxVertexTextureImageUnits + + ctx->Const.MaxTextureImageUnits; + + + ctx->Const.MaxTextureMaxAnisotropy = 16.0; + ctx->Const.MaxTextureLodBias = 16.0; + + if (screen->chip_family >= CHIP_FAMILY_RV515) { + ctx->Const.MaxTextureLevels = 13; + ctx->Const.MaxCubeTextureLevels = 13; + ctx->Const.MaxTextureRectSize = 4096; + ctx->Const.MaxRenderbufferSize = 4096; + } + else { + ctx->Const.MaxTextureLevels = 12; + ctx->Const.MaxCubeTextureLevels = 12; + ctx->Const.MaxTextureRectSize = 2048; + ctx->Const.MaxRenderbufferSize = 2048; + } + + ctx->Const.MinPointSize = 1.0; + ctx->Const.MinPointSizeAA = 1.0; + ctx->Const.MaxPointSize = R300_POINTSIZE_MAX; + ctx->Const.MaxPointSizeAA = R300_POINTSIZE_MAX; + + ctx->Const.MinLineWidth = 1.0; + ctx->Const.MinLineWidthAA = 1.0; + ctx->Const.MaxLineWidth = R300_LINESIZE_MAX; + ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX; + + ctx->Const.MaxDrawBuffers = 1; + ctx->Const.MaxColorAttachments = 1; + + /* currently bogus data */ + if (r300->options.hw_tcl_enabled) { + ctx->Const.VertexProgram.MaxNativeInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; + ctx->Const.VertexProgram.MaxNativeAluInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; + ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ + ctx->Const.VertexProgram.MaxNativeTemps = 32; + ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ + ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; + } + + if (screen->chip_family >= CHIP_FAMILY_RV515) { + ctx->Const.FragmentProgram.MaxNativeTemps = R500_PFS_NUM_TEMP_REGS; + ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */ + + /* The hardware limits are higher than this, + * but the non-KMS DRM interface artificially limits us + * to this many instructions. + * + * We could of course work around it in the KMS path, + * but it would be a mess, so it seems wiser + * to leave it as is. Going forward, the Gallium driver + * will not be subject to these limitations. + */ + ctx->Const.FragmentProgram.MaxNativeParameters = 255; + ctx->Const.FragmentProgram.MaxNativeAluInstructions = 255; + ctx->Const.FragmentProgram.MaxNativeTexInstructions = 255; + ctx->Const.FragmentProgram.MaxNativeInstructions = 255; + ctx->Const.FragmentProgram.MaxNativeTexIndirections = 255; + ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; + } else { + ctx->Const.FragmentProgram.MaxNativeTemps = R300_PFS_NUM_TEMP_REGS; + ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */ + ctx->Const.FragmentProgram.MaxNativeParameters = R300_PFS_NUM_CONST_REGS; + ctx->Const.FragmentProgram.MaxNativeAluInstructions = R300_PFS_MAX_ALU_INST; + ctx->Const.FragmentProgram.MaxNativeTexInstructions = R300_PFS_MAX_TEX_INST; + ctx->Const.FragmentProgram.MaxNativeInstructions = R300_PFS_MAX_ALU_INST + R300_PFS_MAX_TEX_INST; + ctx->Const.FragmentProgram.MaxNativeTexIndirections = R300_PFS_MAX_TEX_INDIRECT; + ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; + } + +} + +static void r300ParseOptions(r300ContextPtr r300, radeonScreenPtr screen) +{ + struct r300_options options = { 0 }; + + driParseConfigFiles(&r300->radeon.optionCache, &screen->optionCache, + screen->driScreen->myNum, "r300"); + + r300->radeon.initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache, "def_max_anisotropy"); + + options.stencil_two_side_disabled = driQueryOptionb(&r300->radeon.optionCache, "disable_stencil_two_side"); + options.s3tc_force_enabled = driQueryOptionb(&r300->radeon.optionCache, "force_s3tc_enable"); + options.s3tc_force_disabled = driQueryOptionb(&r300->radeon.optionCache, "disable_s3tc"); + + if (!(screen->chip_flags & RADEON_CHIPSET_TCL) || driQueryOptioni(&r300->radeon.optionCache, "tcl_mode") == DRI_CONF_TCL_SW) + options.hw_tcl_enabled = 0; + else + options.hw_tcl_enabled = 1; + + options.conformance_mode = !driQueryOptionb(&r300->radeon.optionCache, "disable_lowimpact_fallback"); + + r300->options = options; +} + +static void r300InitGLExtensions(GLcontext *ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + driInitExtensions(ctx, card_extensions, GL_TRUE); + if (r300->radeon.radeonScreen->kernel_mm) + driInitExtensions(ctx, mm_extensions, GL_FALSE); + + if (r300->options.stencil_two_side_disabled) + _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side"); + + if (r300->options.s3tc_force_disabled) { + _mesa_disable_extension(ctx, "GL_EXT_texture_compression_s3tc"); + } else if (ctx->Mesa_DXTn || r300->options.s3tc_force_enabled) { + _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); + _mesa_enable_extension(ctx, "GL_S3_s3tc"); + } + + if (!r300->radeon.radeonScreen->drmSupportsOcclusionQueries) { + _mesa_disable_extension(ctx, "GL_ARB_occlusion_query"); + } + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV350) + _mesa_enable_extension(ctx, "GL_ARB_half_float_vertex"); + + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + _mesa_enable_extension(ctx, "GL_EXT_packed_depth_stencil"); +} + +static void r300InitIoctlFuncs(struct dd_function_table *functions) +{ + functions->Clear = _mesa_meta_Clear; + functions->Finish = radeonFinish; + functions->Flush = radeonFlush; +} + +/* Create the device specific rendering context. + */ +GLboolean r300CreateContext(gl_api api, + const __GLcontextModes * glVisual, + __DRIcontext * driContextPriv, + void *sharedContextPrivate) +{ + __DRIscreen *sPriv = driContextPriv->driScreenPriv; + radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private); + struct dd_function_table functions; + r300ContextPtr r300; + GLcontext *ctx; + + assert(glVisual); + assert(driContextPriv); + assert(screen); + + r300 = (r300ContextPtr) CALLOC(sizeof(*r300)); + if (!r300) + return GL_FALSE; + + r300ParseOptions(r300, screen); + + r300->radeon.radeonScreen = screen; + r300_init_vtbl(&r300->radeon); + + _mesa_init_driver_functions(&functions); + r300InitIoctlFuncs(&functions); + r300InitStateFuncs(&r300->radeon, &functions); + r300InitTextureFuncs(&r300->radeon, &functions); + r300InitShaderFuncs(&functions); + radeonInitQueryObjFunctions(&functions); + radeonInitBufferObjectFuncs(&functions); + + if (!radeonInitContext(&r300->radeon, &functions, + glVisual, driContextPriv, + sharedContextPrivate)) { + FREE(r300); + return GL_FALSE; + } + + ctx = r300->radeon.glCtx; + + r300->fallback = 0; + if (r300->options.hw_tcl_enabled) + ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; + + ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; + + r300InitConstValues(ctx, screen); + + _mesa_set_mvp_with_dp4( ctx, GL_TRUE ); + + /* Initialize the software rasterizer and helper modules. + */ + _swrast_CreateContext(ctx); + _vbo_CreateContext(ctx); + _tnl_CreateContext(ctx); + _swsetup_CreateContext(ctx); + _swsetup_Wakeup(ctx); + + /* Install the customized pipeline: + */ + _tnl_destroy_pipeline(ctx); + _tnl_install_pipeline(ctx, r300_pipeline); + TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; + + /* Configure swrast and TNL to match hardware characteristics: + */ + _swrast_allow_pixel_fog(ctx, GL_FALSE); + _swrast_allow_vertex_fog(ctx, GL_TRUE); + _tnl_allow_pixel_fog(ctx, GL_FALSE); + _tnl_allow_vertex_fog(ctx, GL_TRUE); + + if (r300->options.hw_tcl_enabled) { + r300InitDraw(ctx); + } else { + r300InitSwtcl(ctx); + } + + r300_blit_init(r300); + radeon_fbo_init(&r300->radeon); + radeonInitSpanFuncs( ctx ); + r300InitCmdBuf(r300); + r300InitState(r300); + r300InitShaderFunctions(r300); + + r300InitGLExtensions(ctx); + + return GL_TRUE; +} + diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h new file mode 100644 index 0000000000..fbb609b9f6 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -0,0 +1,560 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Keith Whitwell <keith@tungstengraphics.com> + * \author Nicolai Haehnle <prefect_@gmx.net> + */ + +#ifndef __R300_CONTEXT_H__ +#define __R300_CONTEXT_H__ + +#include "drm.h" +#include "radeon_drm.h" +#include "dri_util.h" +#include "radeon_common.h" + +#include "main/mtypes.h" +#include "shader/prog_instruction.h" +#include "compiler/radeon_code.h" + +struct r300_context; +typedef struct r300_context r300ContextRec; +typedef struct r300_context *r300ContextPtr; + + +#include "r300_vertprog.h" + + +/* The blit width for texture uploads + */ +#define R300_BLIT_WIDTH_BYTES 1024 +#define R300_MAX_TEXTURE_UNITS 8 + + + +#define R300_VPT_CMD_0 0 +#define R300_VPT_XSCALE 1 +#define R300_VPT_XOFFSET 2 +#define R300_VPT_YSCALE 3 +#define R300_VPT_YOFFSET 4 +#define R300_VPT_ZSCALE 5 +#define R300_VPT_ZOFFSET 6 +#define R300_VPT_CMDSIZE 7 + +#define R300_VIR_CMD_0 0 /* vir is variable size (at least 1) */ +#define R300_VIR_CNTL_0 1 +#define R300_VIR_CNTL_1 2 +#define R300_VIR_CNTL_2 3 +#define R300_VIR_CNTL_3 4 +#define R300_VIR_CNTL_4 5 +#define R300_VIR_CNTL_5 6 +#define R300_VIR_CNTL_6 7 +#define R300_VIR_CNTL_7 8 +#define R300_VIR_CMDSIZE 9 + +#define R300_VIC_CMD_0 0 +#define R300_VIC_CNTL_0 1 +#define R300_VIC_CNTL_1 2 +#define R300_VIC_CMDSIZE 3 + +#define R300_VOF_CMD_0 0 +#define R300_VOF_CNTL_0 1 +#define R300_VOF_CNTL_1 2 +#define R300_VOF_CMDSIZE 3 + +#define R300_PVS_CMD_0 0 +#define R300_PVS_CNTL_1 1 +#define R300_PVS_CNTL_2 2 +#define R300_PVS_CNTL_3 3 +#define R300_PVS_CMDSIZE 4 + +#define R300_GB_MISC_CMD_0 0 +#define R300_GB_MISC_MSPOS_0 1 +#define R300_GB_MISC_MSPOS_1 2 +#define R300_GB_MISC_TILE_CONFIG 3 +#define R300_GB_MISC_CMDSIZE 4 +#define R300_GB_MISC2_CMD_0 0 +#define R300_GB_MISC2_SELECT 1 +#define R300_GB_MISC2_AA_CONFIG 2 +#define R300_GB_MISC2_CMDSIZE 3 + +#define R300_TXE_CMD_0 0 +#define R300_TXE_ENABLE 1 +#define R300_TXE_CMDSIZE 2 + +#define R300_PS_CMD_0 0 +#define R300_PS_POINTSIZE 1 +#define R300_PS_CMDSIZE 2 + +#define R300_ZBS_CMD_0 0 +#define R300_ZBS_T_FACTOR 1 +#define R300_ZBS_T_CONSTANT 2 +#define R300_ZBS_W_FACTOR 3 +#define R300_ZBS_W_CONSTANT 4 +#define R300_ZBS_CMDSIZE 5 + +#define R300_CUL_CMD_0 0 +#define R300_CUL_CULL 1 +#define R300_CUL_CMDSIZE 2 + +#define R300_RC_CMD_0 0 +#define R300_RC_CNTL_0 1 +#define R300_RC_CNTL_1 2 +#define R300_RC_CMDSIZE 3 + +#define R300_RI_CMD_0 0 +#define R300_RI_INTERP_0 1 +#define R300_RI_INTERP_1 2 +#define R300_RI_INTERP_2 3 +#define R300_RI_INTERP_3 4 +#define R300_RI_INTERP_4 5 +#define R300_RI_INTERP_5 6 +#define R300_RI_INTERP_6 7 +#define R300_RI_INTERP_7 8 +#define R300_RI_CMDSIZE 9 + +#define R500_RI_CMDSIZE 17 + +#define R300_RR_CMD_0 0 /* rr is variable size (at least 1) */ +#define R300_RR_INST_0 1 +#define R300_RR_INST_1 2 +#define R300_RR_INST_2 3 +#define R300_RR_INST_3 4 +#define R300_RR_INST_4 5 +#define R300_RR_INST_5 6 +#define R300_RR_INST_6 7 +#define R300_RR_INST_7 8 +#define R300_RR_CMDSIZE 9 + +#define R300_FP_CMD_0 0 +#define R300_FP_CNTL0 1 +#define R300_FP_CNTL1 2 +#define R300_FP_CNTL2 3 +#define R300_FP_CMD_1 4 +#define R300_FP_NODE0 5 +#define R300_FP_NODE1 6 +#define R300_FP_NODE2 7 +#define R300_FP_NODE3 8 +#define R300_FP_CMDSIZE 9 + +#define R500_FP_CMD_0 0 +#define R500_FP_CNTL 1 +#define R500_FP_PIXSIZE 2 +#define R500_FP_CMD_1 3 +#define R500_FP_CODE_ADDR 4 +#define R500_FP_CODE_RANGE 5 +#define R500_FP_CODE_OFFSET 6 +#define R500_FP_CMD_2 7 +#define R500_FP_FC_CNTL 8 +#define R500_FP_CMDSIZE 9 + +#define R300_FPT_CMD_0 0 +#define R300_FPT_INSTR_0 1 +#define R300_FPT_CMDSIZE 65 + +#define R300_FPI_CMD_0 0 +#define R300_FPI_INSTR_0 1 +#define R300_FPI_CMDSIZE 65 +/* R500 has space for 512 instructions - 6 dwords per instruction */ +#define R500_FPI_CMDSIZE (512*6+1) + +#define R300_FPP_CMD_0 0 +#define R300_FPP_PARAM_0 1 +#define R300_FPP_CMDSIZE (32*4+1) +/* R500 has spcae for 256 constants - 4 dwords per constant */ +#define R500_FPP_CMDSIZE (256*4+1) + +#define R300_FOGS_CMD_0 0 +#define R300_FOGS_STATE 1 +#define R300_FOGS_CMDSIZE 2 + +#define R300_FOGC_CMD_0 0 +#define R300_FOGC_R 1 +#define R300_FOGC_G 2 +#define R300_FOGC_B 3 +#define R300_FOGC_CMDSIZE 4 + +#define R300_FOGP_CMD_0 0 +#define R300_FOGP_SCALE 1 +#define R300_FOGP_START 2 +#define R300_FOGP_CMDSIZE 3 + +#define R300_AT_CMD_0 0 +#define R300_AT_ALPHA_TEST 1 +#define R300_AT_UNKNOWN 2 +#define R300_AT_CMDSIZE 3 + +#define R300_BLD_CMD_0 0 +#define R300_BLD_CBLEND 1 +#define R300_BLD_ABLEND 2 +#define R300_BLD_CMDSIZE 3 + +#define R300_CMK_CMD_0 0 +#define R300_CMK_COLORMASK 1 +#define R300_CMK_CMDSIZE 2 + +#define R300_CB_CMD_0 0 +#define R300_CB_OFFSET 1 +#define R300_CB_CMD_1 2 +#define R300_CB_PITCH 3 +#define R300_CB_CMDSIZE 4 + +#define R300_ZS_CMD_0 0 +#define R300_ZS_CNTL_0 1 +#define R300_ZS_CNTL_1 2 +#define R300_ZS_CNTL_2 3 +#define R300_ZS_CMDSIZE 4 + +#define R300_ZSB_CMD_0 0 +#define R300_ZSB_CNTL_0 1 +#define R300_ZSB_CMDSIZE 2 + +#define R300_ZB_CMD_0 0 +#define R300_ZB_OFFSET 1 +#define R300_ZB_PITCH 2 +#define R300_ZB_CMDSIZE 3 + +#define R300_VAP_CNTL_FLUSH 0 +#define R300_VAP_CNTL_FLUSH_1 1 +#define R300_VAP_CNTL_CMD 2 +#define R300_VAP_CNTL_INSTR 3 +#define R300_VAP_CNTL_SIZE 4 + +#define R300_VPI_CMD_0 0 +#define R300_VPI_INSTR_0 1 +#define R300_VPI_CMDSIZE 1025 /* 256 16 byte instructions */ + +#define R300_VPP_CMD_0 0 +#define R300_VPP_PARAM_0 1 +#define R300_VPP_CMDSIZE 1025 /* 256 4-component parameters */ + +#define R300_VPUCP_CMD_0 0 +#define R300_VPUCP_X 1 +#define R300_VPUCP_Y 2 +#define R300_VPUCP_Z 3 +#define R300_VPUCP_W 4 +#define R300_VPUCP_CMDSIZE 5 /* 256 4-component parameters */ + +#define R300_VPS_CMD_0 0 +#define R300_VPS_ZERO_0 1 +#define R300_VPS_ZERO_1 2 +#define R300_VPS_POINTSIZE 3 +#define R300_VPS_ZERO_3 4 +#define R300_VPS_CMDSIZE 5 + + /* the layout is common for all fields inside tex */ +#define R300_TEX_CMD_0 0 +#define R300_TEX_VALUE_0 1 +/* We don't really use this, instead specify mtu+1 dynamically +#define R300_TEX_CMDSIZE (MAX_TEXTURE_UNITS+1) +*/ + +#define R300_QUERYOBJ_CMD_0 0 +#define R300_QUERYOBJ_DATA_0 1 +#define R300_QUERYOBJ_CMD_1 2 +#define R300_QUERYOBJ_DATA_1 3 +#define R300_QUERYOBJ_CMDSIZE 4 + +/** + * Cache for hardware register state. + */ +struct r300_hw_state { + struct radeon_state_atom vpt; /* viewport (1D98) */ + struct radeon_state_atom vap_cntl; + struct radeon_state_atom vap_index_offset; /* 0x208c r5xx only */ + struct radeon_state_atom vof; /* VAP output format register 0x2090 */ + struct radeon_state_atom vte; /* (20B0) */ + struct radeon_state_atom vap_vf_max_vtx_indx; /* Maximum Vertex Indx Clamp (2134) */ + struct radeon_state_atom vap_cntl_status; + struct radeon_state_atom vir[2]; /* vap input route (2150/21E0) */ + struct radeon_state_atom vic; /* vap input control (2180) */ + struct radeon_state_atom vap_psc_sgn_norm_cntl; /* Programmable Stream Control Signed Normalize Control (21DC) */ + struct radeon_state_atom vap_clip_cntl; + struct radeon_state_atom vap_clip; + struct radeon_state_atom vap_pvs_vtx_timeout_reg; /* Vertex timeout register (2288) */ + struct radeon_state_atom pvs; /* pvs_cntl (22D0) */ + struct radeon_state_atom gb_enable; /* (4008) */ + struct radeon_state_atom gb_misc; /* Multisampling position shifts ? (4010) */ + struct radeon_state_atom gb_misc2; /* Multisampling position shifts ? (4010) */ + struct radeon_state_atom ga_point_s0; /* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) (4200) */ + struct radeon_state_atom ga_triangle_stipple; /* (4214) */ + struct radeon_state_atom ps; /* pointsize (421C) */ + struct radeon_state_atom ga_point_minmax; /* (4230) */ + struct radeon_state_atom lcntl; /* line control */ + struct radeon_state_atom ga_line_stipple; /* (4260) */ + struct radeon_state_atom shade; + struct radeon_state_atom shade2; + struct radeon_state_atom polygon_mode; + struct radeon_state_atom fogp; /* fog parameters (4294) */ + struct radeon_state_atom ga_soft_reset; /* (429C) */ + struct radeon_state_atom zbias_cntl; + struct radeon_state_atom zbs; /* zbias (42A4) */ + struct radeon_state_atom occlusion_cntl; + struct radeon_state_atom cul; /* cull cntl (42B8) */ + struct radeon_state_atom su_depth_scale; /* (42C0) */ + struct radeon_state_atom rc; /* rs control (4300) */ + struct radeon_state_atom ri; /* rs interpolators (4310) */ + struct radeon_state_atom rr; /* rs route (4330) */ + struct radeon_state_atom sc_hyperz; /* (43A4) */ + struct radeon_state_atom sc_screendoor; /* (43E8) */ + struct radeon_state_atom fp; /* fragment program cntl + nodes (4600) */ + struct radeon_state_atom fpt; /* texi - (4620) */ + struct radeon_state_atom us_out_fmt; /* (46A4) */ + struct radeon_state_atom r500fp; /* r500 fp instructions */ + struct radeon_state_atom r500fp_const; /* r500 fp constants */ + struct radeon_state_atom fpi[4]; /* fp instructions (46C0/47C0/48C0/49C0) */ + struct radeon_state_atom fogs; /* fog state (4BC0) */ + struct radeon_state_atom fogc; /* fog color (4BC8) */ + struct radeon_state_atom at; /* alpha test (4BD4) */ + struct radeon_state_atom fg_depth_src; /* (4BD8) */ + struct radeon_state_atom fpp; /* 0x4C00 and following */ + struct radeon_state_atom rb3d_cctl; /* (4E00) */ + struct radeon_state_atom bld; /* blending (4E04) */ + struct radeon_state_atom cmk; /* colormask (4E0C) */ + struct radeon_state_atom blend_color; /* constant blend color */ + struct radeon_state_atom rop; /* ropcntl */ + struct radeon_state_atom cb; /* colorbuffer (4E28) */ + struct radeon_state_atom rb3d_dither_ctl; /* (4E50) */ + struct radeon_state_atom rb3d_aaresolve_ctl; /* (4E88) */ + struct radeon_state_atom rb3d_discard_src_pixel_lte_threshold; /* (4E88) I saw it only written on RV350 hardware.. */ + struct radeon_state_atom zs; /* zstencil control (4F00) */ + struct radeon_state_atom zsb; /* zstencil bf */ + struct radeon_state_atom zstencil_format; + struct radeon_state_atom zb; /* z buffer (4F20) */ + struct radeon_state_atom zb_depthclearvalue; /* (4F28) */ + struct radeon_state_atom zb_zmask; /* (4F30) */ + struct radeon_state_atom zb_hiz_offset; /* (4F44) */ + struct radeon_state_atom zb_hiz_pitch; /* (4F54) */ + + struct radeon_state_atom vpi; /* vp instructions */ + struct radeon_state_atom vpp; /* vp parameters */ + struct radeon_state_atom vps; /* vertex point size (?) */ + struct radeon_state_atom vpucp[6]; /* vp user clip plane - 6 */ + /* 8 texture units */ + /* the state is grouped by function and not by + texture unit. This makes single unit updates + really awkward - we are much better off + updating the whole thing at once */ + struct { + struct radeon_state_atom filter; + struct radeon_state_atom filter_1; + struct radeon_state_atom size; + struct radeon_state_atom format; + struct radeon_state_atom pitch; + struct radeon_state_atom offset; + struct radeon_state_atom chroma_key; + struct radeon_state_atom border_color; + } tex; + struct radeon_state_atom txe; /* tex enable (4104) */ + radeonTexObj *textures[R300_MAX_TEXTURE_UNITS]; +}; + +/** + * State cache + */ + +/* Vertex shader state */ + +#define COLOR_IS_RGBA +#define TAG(x) r300##x +#include "tnl_dd/t_dd_vertex.h" +#undef TAG + +struct r300_vertex_program_key { + GLbitfield FpReads; + GLuint FogAttr; + GLuint WPosAttr; +}; + +struct r300_vertex_program { + struct gl_vertex_program *Base; + struct r300_vertex_program *next; + + struct r300_vertex_program_key key; + struct r300_vertex_program_code code; + + GLboolean error; +}; + +struct r300_vertex_program_cont { + /* This is the unmodified vertex program mesa provided us with. + * We need to keep it unchanged because we may need to create another + * hw specific vertex program based on this. + */ + struct gl_vertex_program mesa_program; + /* This is the list of hw specific vertex programs derived from mesa_program */ + struct r300_vertex_program *progs; +}; + + +/** +* Store everything about a fragment program that is needed +* to render with that program. +*/ +struct r300_fragment_program { + GLboolean error; + struct r300_fragment_program *next; + struct r300_fragment_program_external_state state; + + struct rX00_fragment_program_code code; + GLbitfield InputsRead; + + /* attribute that we are sending the WPOS in */ + gl_frag_attrib wpos_attr; + /* attribute that we are sending the fog coordinate in */ + gl_frag_attrib fog_attr; +}; + +struct r300_fragment_program_cont { + /* This is the unmodified fragment program mesa provided us with. + * We need to keep it unchanged because we may need to create another + * hw specific fragment program based on this. + */ + struct gl_fragment_program Base; + /* This is the list of hw specific fragment programs derived from Base */ + struct r300_fragment_program *progs; +}; + + +#define R300_MAX_AOS_ARRAYS 16 + + +/* r300_swtcl.c + */ +struct r300_swtcl_info { + /* + * Offset of the 4UB color data within a hardware (swtcl) vertex. + */ + GLuint coloroffset; + + /** + * Offset of the 3UB specular color data within a hardware (swtcl) vertex. + */ + GLuint specoffset; +}; + +struct r300_vtable { + void (* SetupRSUnit)(GLcontext *ctx); + void (* SetupFragmentShaderTextures)(GLcontext *ctx, int *tmu_mappings); + void (* SetupPixelShader)(GLcontext *ctx); +}; + +struct r300_vertex_buffer { + struct vertex_attribute { + /* generic */ + GLubyte element; + GLuint stride; + GLuint dwords; + GLubyte size; /* number of components */ + GLboolean is_named_bo; + struct radeon_bo *bo; + GLint bo_offset; + + /* hw specific */ + uint32_t data_type:4; + uint32_t dst_loc:5; + uint32_t _signed:1; + uint32_t normalize:1; + uint32_t swizzle:12; + uint32_t write_mask:4; + } attribs[VERT_ATTRIB_MAX]; + + GLubyte num_attribs; +}; + +struct r300_index_buffer { + struct radeon_bo *bo; + int bo_offset; + + GLboolean is_32bit; + GLuint count; +}; + + +/** + * \brief R300 context structure. + */ +struct r300_context { + struct radeon_context radeon; /* parent class, must be first */ + + struct r300_vtable vtbl; + + struct r300_hw_state hw; + + struct r300_vertex_program *selected_vp; + struct r300_fragment_program *selected_fp; + + /* Vertex buffers + */ + GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; + GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; + + struct r300_options { + uint32_t conformance_mode:1; + uint32_t hw_tcl_enabled:1; + uint32_t s3tc_force_enabled:1; + uint32_t s3tc_force_disabled:1; + uint32_t stencil_two_side_disabled:1; + } options; + + struct r300_swtcl_info swtcl; + struct r300_vertex_buffer vbuf; + struct r300_index_buffer ind_buf; + + uint32_t fallback; + + struct { + struct r300_vertex_program_code vp_code; + struct rX00_fragment_program_code fp_code; + } blit; + + DECLARE_RENDERINPUTS(render_inputs_bitset); +}; + +#define R300_CONTEXT(ctx) ((r300ContextPtr)(ctx->DriverCtx)) + +extern void r300DestroyContext(__DRIcontext * driContextPriv); +extern GLboolean r300CreateContext(gl_api api, + const __GLcontextModes * glVisual, + __DRIcontext * driContextPriv, + void *sharedContextPrivate); + +extern void r300InitShaderFuncs(struct dd_function_table *functions); + +extern void r300InitShaderFunctions(r300ContextPtr r300); + +extern void r300InitDraw(GLcontext *ctx); + +#define r300PackFloat32 radeonPackFloat32 +#define r300PackFloat24 radeonPackFloat24 + +#endif /* __R300_CONTEXT_H__ */ diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c new file mode 100644 index 0000000000..282c0e18bc --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -0,0 +1,750 @@ +/************************************************************************** + * + * Copyright 2009 Maciej Cencora + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHOR(S) AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <stdlib.h> + +#include "main/glheader.h" +#include "main/context.h" +#include "main/state.h" +#include "main/enums.h" +#include "main/simple_list.h" + +#include "r300_reg.h" +#include "r300_context.h" +#include "r300_emit.h" +#include "r300_render.h" +#include "r300_state.h" +#include "r300_tex.h" +#include "r300_cmdbuf.h" + +#include "radeon_buffer_objects.h" +#include "radeon_common_context.h" + +#include "tnl/tnl.h" +#include "tnl/t_vp_build.h" +#include "vbo/vbo_context.h" + + +static int getTypeSize(GLenum type) +{ + switch (type) { + case GL_DOUBLE: + return sizeof(GLdouble); + case GL_HALF_FLOAT: + return sizeof(GLhalfARB); + case GL_FLOAT: + return sizeof(GLfloat); + case GL_INT: + return sizeof(GLint); + case GL_UNSIGNED_INT: + return sizeof(GLuint); + case GL_SHORT: + return sizeof(GLshort); + case GL_UNSIGNED_SHORT: + return sizeof(GLushort); + case GL_BYTE: + return sizeof(GLbyte); + case GL_UNSIGNED_BYTE: + return sizeof(GLubyte); + default: + assert(0); + return 0; + } +} + +static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + GLvoid *src_ptr; + GLuint *out; + int i; + GLboolean mapped_named_bo = GL_FALSE; + + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + mapped_named_bo = GL_TRUE; + assert(mesa_ind_buf->obj->Pointer != NULL); + } + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT, + "%s: Fixing index buffer format. type %d\n", + __func__, mesa_ind_buf->type); + + if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) { + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); + GLubyte *in = (GLubyte *)src_ptr; + + radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4); + radeon_bo_map(r300->ind_buf.bo, 1); + assert(r300->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset); + + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) { + *out++ = in[i] | in[i + 1] << 16; + } + + if (i < mesa_ind_buf->count) { + *out++ = in[i]; + } + radeon_bo_unmap(r300->ind_buf.bo); +#if MESA_BIG_ENDIAN + } else { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */ + GLushort *in = (GLushort *)src_ptr; + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); + + radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, + &r300->ind_buf.bo_offset, size, 4); + + radeon_bo_map(r300->ind_buf.bo, 1); + assert(r300->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset); + + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) { + *out++ = in[i] | in[i + 1] << 16; + } + + if (i < mesa_ind_buf->count) { + *out++ = in[i]; + } + radeon_bo_unmap(r300->ind_buf.bo); +#endif + } + + r300->ind_buf.is_32bit = GL_FALSE; + r300->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + } +} + + +static void r300SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + if (!mesa_ind_buf) { + r300->ind_buf.bo = NULL; + return; + } + radeon_print(RADEON_RENDER, RADEON_TRACE, "%s\n", __func__); + +#if MESA_BIG_ENDIAN + if (mesa_ind_buf->type == GL_UNSIGNED_INT) { +#else + if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) { +#endif + const GLvoid *src_ptr; + GLvoid *dst_ptr; + GLboolean mapped_named_bo = GL_FALSE; + + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + assert(mesa_ind_buf->obj->Pointer != NULL); + mapped_named_bo = GL_TRUE; + } + + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type); + + radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4); + + radeon_bo_map(r300->ind_buf.bo, 1); + assert(r300->ind_buf.bo->ptr != NULL); + dst_ptr = ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset); + memcpy(dst_ptr, src_ptr, size); + + radeon_bo_unmap(r300->ind_buf.bo); + r300->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT); + r300->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + } + } else { + r300FixupIndexBuffer(ctx, mesa_ind_buf); + } +} + +#define CONVERT( TYPE, MACRO ) do { \ + GLuint i, j, sz; \ + sz = input->Size; \ + if (input->Normalized) { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)src_ptr; \ + for (j = 0; j < sz; j++) { \ + *dst_ptr++ = MACRO(*in); \ + in++; \ + } \ + src_ptr += stride; \ + } \ + } else { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)src_ptr; \ + for (j = 0; j < sz; j++) { \ + *dst_ptr++ = (GLfloat)(*in); \ + in++; \ + } \ + src_ptr += stride; \ + } \ + } \ +} while (0) + +/** + * Convert attribute data type to float + * If the attribute uses named buffer object replace the bo with newly allocated bo + */ +static void r300ConvertAttrib(GLcontext *ctx, int count, const struct gl_client_array *input, struct vertex_attribute *attr) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + const GLvoid *src_ptr; + GLboolean mapped_named_bo = GL_FALSE; + GLfloat *dst_ptr; + GLuint stride; + + stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB; + + /* Convert value for first element only */ + if (input->StrideB == 0) + count = 1; + + if (input->BufferObj->Name) { + if (!input->BufferObj->Pointer) { + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + mapped_named_bo = GL_TRUE; + } + + src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); + } else { + src_ptr = input->Ptr; + } + + radeonAllocDmaRegion(&r300->radeon, &attr->bo, &attr->bo_offset, sizeof(GLfloat) * input->Size * count, 32); + radeon_bo_map(attr->bo, 1); + dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + + radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT, + "%s: Converting vertex attributes, attribute data format %x," + "stride %d, components %d\n" + , __FUNCTION__, input->Type + , stride, input->Size); + + assert(src_ptr != NULL); + + switch (input->Type) { + case GL_DOUBLE: + CONVERT(GLdouble, (GLfloat)); + break; + case GL_UNSIGNED_INT: + CONVERT(GLuint, UINT_TO_FLOAT); + break; + case GL_INT: + CONVERT(GLint, INT_TO_FLOAT); + break; + case GL_UNSIGNED_SHORT: + CONVERT(GLushort, USHORT_TO_FLOAT); + break; + case GL_SHORT: + CONVERT(GLshort, SHORT_TO_FLOAT); + break; + case GL_UNSIGNED_BYTE: + assert(input->Format != GL_BGRA); + CONVERT(GLubyte, UBYTE_TO_FLOAT); + break; + case GL_BYTE: + CONVERT(GLbyte, BYTE_TO_FLOAT); + break; + default: + assert(0); + break; + } + + radeon_bo_unmap(attr->bo); + if (mapped_named_bo) { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } +} + +static void r300AlignDataToDword(GLcontext *ctx, const struct gl_client_array *input, int count, struct vertex_attribute *attr) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + const int dst_stride = (input->StrideB + 3) & ~3; + const int size = getTypeSize(input->Type) * input->Size * count; + GLboolean mapped_named_bo = GL_FALSE; + + radeonAllocDmaRegion(&r300->radeon, &attr->bo, &attr->bo_offset, size, 32); + + radeon_bo_map(attr->bo, 1); + + if (!input->BufferObj->Pointer) { + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + mapped_named_bo = GL_TRUE; + } + + radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT, "%s. Vertex alignment doesn't match hw requirements.\n", __func__); + + { + GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); + GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + int i; + + for (i = 0; i < count; ++i) { + memcpy(dst_ptr, src_ptr, input->StrideB); + src_ptr += input->StrideB; + dst_ptr += dst_stride; + } + } + + if (mapped_named_bo) { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } + + radeon_bo_unmap(attr->bo); + attr->stride = dst_stride; +} + +static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const struct gl_client_array *input) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_vertex_buffer *vbuf = &r300->vbuf; + struct vertex_attribute r300_attr = { 0 }; + GLenum type; + GLuint stride; + + radeon_print(RADEON_RENDER, RADEON_TRACE, "%s\n", __func__); + stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB; + + if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT || +#if MESA_BIG_ENDIAN + getTypeSize(input->Type) != 4 || +#endif + stride < 4) { + + type = GL_FLOAT; + + if (input->StrideB == 0) { + r300_attr.stride = 0; + } else { + r300_attr.stride = sizeof(GLfloat) * input->Size; + } + r300_attr.dwords = input->Size; + r300_attr.is_named_bo = GL_FALSE; + } else { + type = input->Type; + r300_attr.dwords = (getTypeSize(type) * input->Size + 3)/ 4; + if (!input->BufferObj->Name) { + + if (input->StrideB == 0) { + r300_attr.stride = 0; + } else { + r300_attr.stride = (getTypeSize(type) * input->Size + 3) & ~3; + } + + r300_attr.is_named_bo = GL_FALSE; + } + } + + r300_attr.size = input->Size; + r300_attr.element = attr; + r300_attr.dst_loc = vbuf->num_attribs; + + switch (type) { + case GL_FLOAT: + switch (input->Size) { + case 1: r300_attr.data_type = R300_DATA_TYPE_FLOAT_1; break; + case 2: r300_attr.data_type = R300_DATA_TYPE_FLOAT_2; break; + case 3: r300_attr.data_type = R300_DATA_TYPE_FLOAT_3; break; + case 4: r300_attr.data_type = R300_DATA_TYPE_FLOAT_4; break; + } + r300_attr._signed = 0; + r300_attr.normalize = 0; + break; + case GL_HALF_FLOAT: + switch (input->Size) { + case 1: + case 2: + r300_attr.data_type = R300_DATA_TYPE_FLT16_2; + break; + case 3: + case 4: + r300_attr.data_type = R300_DATA_TYPE_FLT16_4; + break; + } + break; + case GL_SHORT: + r300_attr._signed = 1; + r300_attr.normalize = input->Normalized; + switch (input->Size) { + case 1: + case 2: + r300_attr.data_type = R300_DATA_TYPE_SHORT_2; + break; + case 3: + case 4: + r300_attr.data_type = R300_DATA_TYPE_SHORT_4; + break; + } + break; + case GL_BYTE: + r300_attr._signed = 1; + r300_attr.normalize = input->Normalized; + r300_attr.data_type = R300_DATA_TYPE_BYTE; + break; + case GL_UNSIGNED_SHORT: + r300_attr._signed = 0; + r300_attr.normalize = input->Normalized; + switch (input->Size) { + case 1: + case 2: + r300_attr.data_type = R300_DATA_TYPE_SHORT_2; + break; + case 3: + case 4: + r300_attr.data_type = R300_DATA_TYPE_SHORT_4; + break; + } + break; + case GL_UNSIGNED_BYTE: + r300_attr._signed = 0; + r300_attr.normalize = input->Normalized; + if (input->Format == GL_BGRA) + r300_attr.data_type = R300_DATA_TYPE_D3DCOLOR; + else + r300_attr.data_type = R300_DATA_TYPE_BYTE; + break; + + default: + case GL_DOUBLE: + case GL_INT: + case GL_UNSIGNED_INT: + assert(0); + break; + } + + switch (input->Size) { + case 4: + r300_attr.swizzle = SWIZZLE_XYZW; + break; + case 3: + r300_attr.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); + break; + case 2: + r300_attr.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ONE); + break; + case 1: + r300_attr.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE); + break; + } + + r300_attr.write_mask = MASK_XYZW; + + vbuf->attribs[vbuf->num_attribs] = r300_attr; + ++vbuf->num_attribs; +} + +static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_vertex_buffer *vbuf = &r300->vbuf; + radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s\n", __func__); + { + int i, tmp; + + tmp = r300->selected_vp->code.InputsRead; + i = 0; + vbuf->num_attribs = 0; + while (tmp) { + /* find first enabled bit */ + while (!(tmp & 1)) { + tmp >>= 1; + ++i; + } + + r300TranslateAttrib(ctx, i, count, arrays[i]); + + tmp >>= 1; + ++i; + } + } + + r300SwitchFallback(ctx, R300_FALLBACK_AOS_LIMIT, vbuf->num_attribs > R300_MAX_AOS_ARRAYS); + if (r300->fallback) + return; +} + +static void r300AllocDmaRegions(GLcontext *ctx, const struct gl_client_array *input[], int count) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_vertex_buffer *vbuf = &r300->vbuf; + GLuint stride; + int ret; + int i, index; + radeon_print(RADEON_RENDER, RADEON_VERBOSE, + "%s: count %d num_attribs %d\n", + __func__, count, vbuf->num_attribs); + + for (index = 0; index < vbuf->num_attribs; index++) { + struct radeon_aos *aos = &r300->radeon.tcl.aos[index]; + i = vbuf->attribs[index].element; + + stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB; + + if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT || +#if MESA_BIG_ENDIAN + getTypeSize(input[i]->Type) != 4 || +#endif + stride < 4) { + + r300ConvertAttrib(ctx, count, input[i], &vbuf->attribs[index]); + } else { + if (input[i]->BufferObj->Name) { + if (stride % 4 != 0) { + assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0); + r300AlignDataToDword(ctx, input[i], count, &vbuf->attribs[index]); + vbuf->attribs[index].is_named_bo = GL_FALSE; + } else { + vbuf->attribs[index].stride = input[i]->StrideB; + vbuf->attribs[index].bo_offset = (intptr_t) input[i]->Ptr; + vbuf->attribs[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo; + vbuf->attribs[index].is_named_bo = GL_TRUE; + } + } else { + + int size; + int local_count = count; + uint32_t *dst; + + if (input[i]->StrideB == 0) { + size = getTypeSize(input[i]->Type) * input[i]->Size; + local_count = 1; + } else { + size = getTypeSize(input[i]->Type) * input[i]->Size * local_count; + } + + radeonAllocDmaRegion(&r300->radeon, &vbuf->attribs[index].bo, &vbuf->attribs[index].bo_offset, size, 32); + radeon_bo_map(vbuf->attribs[index].bo, 1); + assert(vbuf->attribs[index].bo->ptr != NULL); + dst = (uint32_t *)ADD_POINTERS(vbuf->attribs[index].bo->ptr, vbuf->attribs[index].bo_offset); + switch (vbuf->attribs[index].dwords) { + case 1: radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); break; + case 2: radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count); break; + case 3: radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count); break; + case 4: radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count); break; + default: assert(0); break; + } + radeon_bo_unmap(vbuf->attribs[index].bo); + + } + } + + aos->count = vbuf->attribs[index].stride == 0 ? 1 : count; + aos->stride = vbuf->attribs[index].stride / sizeof(float); + aos->components = vbuf->attribs[index].dwords; + aos->bo = vbuf->attribs[index].bo; + aos->offset = vbuf->attribs[index].bo_offset; + + if (vbuf->attribs[index].is_named_bo) { + radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, r300->vbuf.attribs[index].bo, RADEON_GEM_DOMAIN_GTT, 0); + } + } + + r300->radeon.tcl.aos_count = vbuf->num_attribs; + ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, first_elem(&r300->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0); + r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, ret); + +} + +static void r300FreeData(GLcontext *ctx) +{ + /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo + * to prevent double unref in radeonReleaseArrays + * called during context destroy + */ + radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s\n", __func__); + r300ContextPtr r300 = R300_CONTEXT(ctx); + { + int i; + + for (i = 0; i < r300->vbuf.num_attribs; i++) { + if (!r300->vbuf.attribs[i].is_named_bo) { + radeon_bo_unref(r300->vbuf.attribs[i].bo); + } + r300->radeon.tcl.aos[i].bo = NULL; + } + } + + { + if (r300->ind_buf.bo != NULL) { + radeon_bo_unref(r300->ind_buf.bo); + } + } +} + +static GLuint r300PredictTryDrawPrimsSize(GLcontext *ctx, + GLuint nr_prims, const struct _mesa_prim *prim) +{ + struct r300_context *r300 = R300_CONTEXT(ctx); + struct r300_vertex_buffer *vbuf = &r300->vbuf; + GLboolean flushed; + GLuint dwords; + GLuint state_size; + int i; + GLuint extra_prims = 0; + + /* Check for primitive splitting. */ + for (i = 0; i < nr_prims; ++i) { + const GLuint num_verts = r300NumVerts(r300, prim[i].count, prim[i].mode); + extra_prims += num_verts/(65535 - 32); + } + nr_prims += extra_prims; + + dwords = 2*CACHE_FLUSH_BUFSZ; + dwords += PRE_EMIT_STATE_BUFSZ; + dwords += (AOS_BUFSZ(vbuf->num_attribs) + + SCISSORS_BUFSZ*2 + + FIREAOS_BUFSZ )*nr_prims; + + state_size = radeonCountStateEmitSize(&r300->radeon); + flushed = rcommonEnsureCmdBufSpace(&r300->radeon, + dwords + state_size, + __FUNCTION__); + if (flushed) + dwords += radeonCountStateEmitSize(&r300->radeon); + else + dwords += state_size; + + radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: total prediction size is %d.\n", __FUNCTION__, dwords); + return dwords; +} + +static GLboolean r300TryDrawPrims(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLuint min_index, + GLuint max_index ) +{ + struct r300_context *r300 = R300_CONTEXT(ctx); + GLuint i; + + radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s: %u (%d-%d) cs begin at %d\n", + __FUNCTION__, nr_prims, min_index, max_index, r300->radeon.cmdbuf.cs->cdw ); + + if (ctx->NewState) + _mesa_update_state( ctx ); + + if (r300->options.hw_tcl_enabled) + _tnl_UpdateFixedFunctionProgram(ctx); + + r300UpdateShaders(r300); + + r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, !r300ValidateBuffers(ctx)); + + r300SetVertexFormat(ctx, arrays, max_index + 1); + + if (r300->fallback) + return GL_FALSE; + + r300SetupVAP(ctx, r300->selected_vp->code.InputsRead, r300->selected_vp->code.OutputsWritten); + + r300UpdateShaderStates(r300); + + /* ensure we have the cmd buf space in advance to cover + * the state + DMA AOS pointers */ + GLuint emit_end = r300PredictTryDrawPrimsSize(ctx, nr_prims, prim) + + r300->radeon.cmdbuf.cs->cdw; + + r300SetupIndexBuffer(ctx, ib); + + r300AllocDmaRegions(ctx, arrays, max_index + 1); + + if (r300->fallback) + return GL_FALSE; + + r300EmitCacheFlush(r300); + radeonEmitState(&r300->radeon); + + for (i = 0; i < nr_prims; ++i) { + r300RunRenderPrimitive(ctx, prim[i].start, prim[i].start + prim[i].count, prim[i].mode); + } + + r300EmitCacheFlush(r300); + + r300FreeData(ctx); + + radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: %u (%d-%d) cs ending at %d\n", + __FUNCTION__, nr_prims, min_index, max_index, r300->radeon.cmdbuf.cs->cdw ); + + if (emit_end < r300->radeon.cmdbuf.cs->cdw) + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", r300->radeon.cmdbuf.cs->cdw - emit_end); + + return GL_TRUE; +} + +static void r300DrawPrims(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index) +{ + GLboolean retval; + + /* This check should get folded into just the places that + * min/max index are really needed. + */ + if (!index_bounds_valid) { + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + } + + if (min_index) { + radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT, + "%s: Rebasing primitives. %p nr_prims %d min_index %u max_index %u\n", + __func__, prim, nr_prims, min_index, max_index); + vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r300DrawPrims ); + return; + } + + /* Make an attempt at drawing */ + retval = r300TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + + /* If failed run tnl pipeline - it should take care of fallbacks */ + if (!retval) + _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); +} + +void r300InitDraw(GLcontext *ctx) +{ + struct vbo_context *vbo = vbo_context(ctx); + + vbo->draw_prims = r300DrawPrims; +} diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c new file mode 100644 index 0000000000..a24d431611 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_emit.c @@ -0,0 +1,135 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Keith Whitwell <keith@tungstengraphics.com> + * \author Maciej Cencora <m.cencora@gmail.com> + */ + +#include "main/glheader.h" +#include "main/mtypes.h" +#include "main/colormac.h" +#include "main/imports.h" +#include "main/macros.h" + +#include "swrast_setup/swrast_setup.h" +#include "tnl/tnl.h" +#include "tnl/t_context.h" + +#include "r300_context.h" +#include "r300_emit.h" + + +GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead) +{ + /* No idea what this value means. I have seen other values written to + * this register... */ + return 0x5555; +} + +GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead) +{ + GLuint i, vic_1 = 0; + + if (InputsRead & (1 << VERT_ATTRIB_POS)) + vic_1 |= R300_INPUT_CNTL_POS; + + if (InputsRead & (1 << VERT_ATTRIB_NORMAL)) + vic_1 |= R300_INPUT_CNTL_NORMAL; + + if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) + vic_1 |= R300_INPUT_CNTL_COLOR; + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) + if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) { + vic_1 |= R300_INPUT_CNTL_TC0 << i; + } + + return vic_1; +} + +GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes) +{ + GLuint ret = 0; + + if (vp_writes & (1 << VERT_RESULT_HPOS)) + ret |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; + + if (vp_writes & (1 << VERT_RESULT_COL0)) + ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT; + + if (vp_writes & (1 << VERT_RESULT_COL1)) + ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT; + + /* Two sided lighting works only if all 4 colors are written */ + if (vp_writes & (1 << VERT_RESULT_BFC0) || vp_writes & (1 << VERT_RESULT_BFC1)) + ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT | R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT | + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT | R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; + + if (vp_writes & (1 << VERT_RESULT_PSIZ)) + ret |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; + + return ret; +} + +GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes) +{ + GLuint i, ret = 0, first_free_texcoord = 0; + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (vp_writes & (1 << (VERT_RESULT_TEX0 + i))) { + ret |= (4 << (3 * first_free_texcoord)); + ++first_free_texcoord; + } + } + + if (first_free_texcoord > 8) { + fprintf(stderr, "\tout of free texcoords\n"); + exit(-1); + } + + return ret; +} + +void r300EmitCacheFlush(r300ContextPtr rmesa) +{ + BATCH_LOCALS(&rmesa->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT, + R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | + R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); + OUT_BATCH_REGVAL(R300_ZB_ZCACHE_CTLSTAT, + R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | + R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + END_BATCH(); + COMMIT_BATCH(); +} diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h new file mode 100644 index 0000000000..a456d8867c --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_emit.h @@ -0,0 +1,228 @@ +/* + * Copyright (C) 2005 Vladimir Dergachev. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Vladimir Dergachev <volodya@mindspring.com> + * Nicolai Haehnle <prefect_@gmx.net> + * Aapo Tahkola <aet@rasterburn.org> + * Ben Skeggs <darktama@iinet.net.au> + * Jerome Glisse <j.glisse@gmail.com> + */ + +/* This files defines functions for accessing R300 hardware. + */ +#ifndef __R300_EMIT_H__ +#define __R300_EMIT_H__ + +#include "main/glheader.h" +#include "r300_context.h" +#include "r300_cmdbuf.h" + +static INLINE uint32_t cmdpacket0(struct radeon_screen *rscrn, + int reg, int count) +{ + if (!rscrn->kernel_mm) { + drm_r300_cmd_header_t cmd; + + cmd.u = 0; + cmd.packet0.cmd_type = R300_CMD_PACKET0; + cmd.packet0.count = count; + cmd.packet0.reghi = ((unsigned int)reg & 0xFF00) >> 8; + cmd.packet0.reglo = ((unsigned int)reg & 0x00FF); + + return cmd.u; + } + if (count) { + return CP_PACKET0(reg, count - 1); + } + return CP_PACKET2; +} + +static INLINE uint32_t cmdvpu(struct radeon_screen *rscrn, int addr, int count) +{ + drm_r300_cmd_header_t cmd; + + cmd.u = 0; + cmd.vpu.cmd_type = R300_CMD_VPU; + cmd.vpu.count = count; + cmd.vpu.adrhi = ((unsigned int)addr & 0xFF00) >> 8; + cmd.vpu.adrlo = ((unsigned int)addr & 0x00FF); + + return cmd.u; +} + +static INLINE uint32_t cmdr500fp(struct radeon_screen *rscrn, + int addr, int count, int type, int clamp) +{ + drm_r300_cmd_header_t cmd; + + cmd.u = 0; + cmd.r500fp.cmd_type = R300_CMD_R500FP; + cmd.r500fp.count = count; + cmd.r500fp.adrhi_flags = ((unsigned int)addr & 0x100) >> 8; + cmd.r500fp.adrhi_flags |= type ? R500FP_CONSTANT_TYPE : 0; + cmd.r500fp.adrhi_flags |= clamp ? R500FP_CONSTANT_CLAMP : 0; + cmd.r500fp.adrlo = ((unsigned int)addr & 0x00FF); + + return cmd.u; +} + +static INLINE uint32_t cmdpacket3(struct radeon_screen *rscrn, int packet) +{ + drm_r300_cmd_header_t cmd; + + cmd.u = 0; + cmd.packet3.cmd_type = R300_CMD_PACKET3; + cmd.packet3.packet = packet; + + return cmd.u; +} + +static INLINE uint32_t cmdcpdelay(struct radeon_screen *rscrn, + unsigned short count) +{ + drm_r300_cmd_header_t cmd; + + cmd.u = 0; + + cmd.delay.cmd_type = R300_CMD_CP_DELAY; + cmd.delay.count = count; + + return cmd.u; +} + +static INLINE uint32_t cmdwait(struct radeon_screen *rscrn, + unsigned char flags) +{ + drm_r300_cmd_header_t cmd; + + cmd.u = 0; + cmd.wait.cmd_type = R300_CMD_WAIT; + cmd.wait.flags = flags; + + return cmd.u; +} + +static INLINE uint32_t cmdpacify(struct radeon_screen *rscrn) +{ + drm_r300_cmd_header_t cmd; + + cmd.u = 0; + cmd.header.cmd_type = R300_CMD_END3D; + + return cmd.u; +} + +/** + * Write the header of a packet3 to the command buffer. + * Outputs 2 dwords and expects (num_extra+1) additional dwords afterwards. + */ +#define OUT_BATCH_PACKET3(packet, num_extra) do {\ + if (!b_l_rmesa->radeonScreen->kernel_mm) { \ + OUT_BATCH(cmdpacket3(b_l_rmesa->radeonScreen,\ + R300_CMD_PACKET3_RAW)); \ + } else b_l_rmesa->cmdbuf.cs->section_cdw++;\ + OUT_BATCH(CP_PACKET3((packet), (num_extra))); \ + } while(0) + +/** + * Must be sent to switch to 2d commands + */ +void static INLINE end_3d(radeonContextPtr radeon) +{ + BATCH_LOCALS(radeon); + + if (!radeon->radeonScreen->kernel_mm) { + BEGIN_BATCH_NO_AUTOSTATE(1); + OUT_BATCH(cmdpacify(radeon->radeonScreen)); + END_BATCH(); + } +} + +void static INLINE cp_delay(r300ContextPtr rmesa, unsigned short count) +{ + BATCH_LOCALS(&rmesa->radeon); + + if (!rmesa->radeon.radeonScreen->kernel_mm) { + BEGIN_BATCH_NO_AUTOSTATE(1); + OUT_BATCH(cmdcpdelay(rmesa->radeon.radeonScreen, count)); + END_BATCH(); + } +} + +void static INLINE cp_wait(radeonContextPtr radeon, unsigned char flags) +{ + BATCH_LOCALS(radeon); + uint32_t wait_until; + + if (!radeon->radeonScreen->kernel_mm) { + BEGIN_BATCH_NO_AUTOSTATE(1); + OUT_BATCH(cmdwait(radeon->radeonScreen, flags)); + END_BATCH(); + } else { + switch(flags) { + case R300_WAIT_2D: + wait_until = (1 << 14); + break; + case R300_WAIT_3D: + wait_until = (1 << 15); + break; + case R300_NEW_WAIT_2D_3D: + wait_until = (1 << 14) | (1 << 15); + break; + case R300_NEW_WAIT_2D_2D_CLEAN: + wait_until = (1 << 14) | (1 << 16) | (1 << 18); + break; + case R300_NEW_WAIT_3D_3D_CLEAN: + wait_until = (1 << 15) | (1 << 17) | (1 << 18); + break; + case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN: + wait_until = (1 << 14) | (1 << 16) | (1 << 18); + wait_until |= (1 << 15) | (1 << 17) | (1 << 18); + break; + default: + return; + } + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); + OUT_BATCH(wait_until); + END_BATCH(); + } +} + +extern int r300PrimitiveType(r300ContextPtr rmesa, int prim); +extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim); + +extern void r300EmitCacheFlush(r300ContextPtr rmesa); + +extern GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead); +extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead); +extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes); +extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes); + +#endif diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c new file mode 100644 index 0000000000..7be2f74b5b --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -0,0 +1,305 @@ +/* + * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * Fragment program compiler. Perform transformations on the intermediate + * representation until the program is in a form where we can translate + * it more or less directly into machine-readable form. + * + * \author Ben Skeggs <darktama@iinet.net.au> + * \author Jerome Glisse <j.glisse@gmail.com> + */ + +#include "r300_fragprog_common.h" + +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" + +#include "compiler/radeon_compiler.h" + +#include "radeon_mesa_to_rc.h" + + +static GLuint build_dts(GLuint depthmode) +{ + switch(depthmode) { + default: + case GL_LUMINANCE: return RC_SWIZZLE_XYZZ; + case GL_INTENSITY: return RC_SWIZZLE_XYZW; + case GL_ALPHA: return RC_SWIZZLE_WWWW; + } +} + +static GLuint build_func(GLuint comparefunc) +{ + return comparefunc - GL_NEVER; +} + +/** + * Collect all external state that is relevant for compiling the given + * fragment program. + */ +static void build_state( + r300ContextPtr r300, + struct gl_fragment_program *fp, + struct r300_fragment_program_external_state *state) +{ + int unit; + + memset(state, 0, sizeof(*state)); + + for(unit = 0; unit < 16; ++unit) { + if (fp->Base.ShadowSamplers & (1 << unit)) { + struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; + + state->unit[unit].depth_texture_swizzle = build_dts(tex->DepthMode); + state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); + } + } +} + + +/** + * Transform the program to support fragment.position. + * + * Introduce a small fragment at the start of the program that will be + * the only code that directly reads the FRAG_ATTRIB_WPOS input. + * All other code pieces that reference that input will be rewritten + * to read from a newly allocated temporary. + * + */ +static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler, struct r300_fragment_program * fp) +{ + int i; + + fp->wpos_attr = FRAG_ATTRIB_MAX; + if (!(compiler->Base.Program.InputsRead & FRAG_BIT_WPOS)) { + return; + } + + for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i) + { + if (!(compiler->Base.Program.InputsRead & (1 << i))) { + fp->wpos_attr = i; + break; + } + } + + /* No free texcoord found, fall-back to software rendering */ + if (fp->wpos_attr == FRAG_ATTRIB_MAX) + { + compiler->Base.Error = 1; + return; + } + + rc_transform_fragment_wpos(&compiler->Base, FRAG_ATTRIB_WPOS, fp->wpos_attr, GL_FALSE); +} + +/** + * Rewrite fragment.fogcoord to use a texture coordinate slot. + * Note that fogcoord is forced into an X001 pattern, and this enforcement + * is done here. + * + * See also the counterpart rewriting for vertex programs. + */ +static void rewriteFog(struct r300_fragment_program_compiler *compiler, struct r300_fragment_program * fp) +{ + struct rc_src_register src; + int i; + + fp->fog_attr = FRAG_ATTRIB_MAX; + if (!(compiler->Base.Program.InputsRead & FRAG_BIT_FOGC)) { + return; + } + + for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i) + { + if (!(compiler->Base.Program.InputsRead & (1 << i))) { + fp->fog_attr = i; + break; + } + } + + /* No free texcoord found, fall-back to software rendering */ + if (fp->fog_attr == FRAG_ATTRIB_MAX) + { + compiler->Base.Error = 1; + return; + } + + memset(&src, 0, sizeof(src)); + src.File = RC_FILE_INPUT; + src.Index = fp->fog_attr; + src.Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE); + rc_move_input(&compiler->Base, FRAG_ATTRIB_FOGC, src); +} + + +/** + * Reserve hardware temporary registers for the program inputs. + * + * @note This allocation is performed explicitly, because the order of inputs + * is determined by the RS hardware. + */ +static void allocate_hw_inputs( + struct r300_fragment_program_compiler * c, + void (*allocate)(void * data, unsigned input, unsigned hwreg), + void * mydata) +{ + GLuint InputsRead = c->Base.Program.InputsRead; + int i; + GLuint hwindex = 0; + + /* Primary colour */ + if (InputsRead & FRAG_BIT_COL0) + allocate(mydata, FRAG_ATTRIB_COL0, hwindex++); + InputsRead &= ~FRAG_BIT_COL0; + + /* Secondary color */ + if (InputsRead & FRAG_BIT_COL1) + allocate(mydata, FRAG_ATTRIB_COL1, hwindex++); + InputsRead &= ~FRAG_BIT_COL1; + + /* Texcoords */ + for (i = 0; i < 8; i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) + allocate(mydata, FRAG_ATTRIB_TEX0+i, hwindex++); + } + InputsRead &= ~FRAG_BITS_TEX_ANY; + + /* Fogcoords treated as a texcoord */ + if (InputsRead & FRAG_BIT_FOGC) + allocate(mydata, FRAG_ATTRIB_FOGC, hwindex++); + InputsRead &= ~FRAG_BIT_FOGC; + + /* fragment position treated as a texcoord */ + if (InputsRead & FRAG_BIT_WPOS) + allocate(mydata, FRAG_ATTRIB_WPOS, hwindex++); + InputsRead &= ~FRAG_BIT_WPOS; + + /* Anything else */ + if (InputsRead) + rc_error(&c->Base, "Don't know how to handle inputs 0x%x\n", InputsRead); +} + + +static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_program_cont *cont, struct r300_fragment_program *fp) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_fragment_program_compiler compiler; + + rc_init(&compiler.Base); + compiler.Base.Debug = (RADEON_DEBUG & RADEON_PIXEL) ? GL_TRUE : GL_FALSE; + + compiler.code = &fp->code; + compiler.state = fp->state; + compiler.enable_shadow_ambient = GL_TRUE; + compiler.Base.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE; + compiler.Base.max_temp_regs = (compiler.Base.is_r500) ? 128 : 32; + compiler.OutputDepth = FRAG_RESULT_DEPTH; + memset(compiler.OutputColor, 0, 4 * sizeof(unsigned)); + compiler.OutputColor[0] = FRAG_RESULT_COLOR; + compiler.AllocateHwInputs = &allocate_hw_inputs; + + if (compiler.Base.Debug) { + fflush(stderr); + printf("Fragment Program: Initial program:\n"); + _mesa_print_program(&cont->Base.Base); + fflush(stderr); + } + + radeon_mesa_to_rc_program(&compiler.Base, &cont->Base.Base); + + insert_WPOS_trailer(&compiler, fp); + + rewriteFog(&compiler, fp); + + r3xx_compile_fragment_program(&compiler); + + if (compiler.Base.is_r500) { + /* We need to support the non-KMS DRM interface, which + * artificially limits the number of instructions and + * constants which are available to us. + * + * See also the comment in r300_context.c where we + * set the MAX_NATIVE_xxx values. + */ + if (fp->code.code.r500.inst_end >= 255 || fp->code.constants.Count > 255) + rc_error(&compiler.Base, "Program is too big (upgrade to r300g to avoid this limitation).\n"); + } + + fp->error = compiler.Base.Error; + + fp->InputsRead = compiler.Base.Program.InputsRead; + + /* Clear the fog/wpos_attr if code accessing these + * attributes has been removed during compilation + */ + if (fp->fog_attr != FRAG_ATTRIB_MAX) { + if (!(fp->InputsRead & (1 << fp->fog_attr))) + fp->fog_attr = FRAG_ATTRIB_MAX; + } + + if (fp->wpos_attr != FRAG_ATTRIB_MAX) { + if (!(fp->InputsRead & (1 << fp->wpos_attr))) + fp->wpos_attr = FRAG_ATTRIB_MAX; + } + + rc_destroy(&compiler.Base); +} + +struct r300_fragment_program *r300SelectAndTranslateFragmentShader(GLcontext *ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_fragment_program_cont *fp_list; + struct r300_fragment_program *fp; + struct r300_fragment_program_external_state state; + + fp_list = (struct r300_fragment_program_cont *)ctx->FragmentProgram._Current; + build_state(r300, ctx->FragmentProgram._Current, &state); + + fp = fp_list->progs; + while (fp) { + if (memcmp(&fp->state, &state, sizeof(state)) == 0) { + return r300->selected_fp = fp; + } + fp = fp->next; + } + + fp = calloc(1, sizeof(struct r300_fragment_program)); + + fp->state = state; + + fp->next = fp_list->progs; + fp_list->progs = fp; + + translate_fragment_program(ctx, fp_list, fp); + + return r300->selected_fp = fp; +} diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.h b/src/mesa/drivers/dri/r300/r300_fragprog_common.h new file mode 100644 index 0000000000..3d64c08cee --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __R300_FRAGPROG_COMMON_H_ +#define __R300_FRAGPROG_COMMON_H_ + +#include "main/mtypes.h" + +#include "r300_context.h" + +struct r300_fragment_program *r300SelectAndTranslateFragmentShader(GLcontext *ctx); + +#endif diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h new file mode 100644 index 0000000000..ac93563ed9 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -0,0 +1,3313 @@ +/************************************************************************** + +Copyright (C) 2004-2005 Nicolai Haehnle et al. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* *INDENT-OFF* */ + +#ifndef _R300_REG_H +#define _R300_REG_H + +#define R300_MC_INIT_MISC_LAT_TIMER 0x180 +# define R300_MC_MISC__MC_CPR_INIT_LAT_SHIFT 0 +# define R300_MC_MISC__MC_VF_INIT_LAT_SHIFT 4 +# define R300_MC_MISC__MC_DISP0R_INIT_LAT_SHIFT 8 +# define R300_MC_MISC__MC_DISP1R_INIT_LAT_SHIFT 12 +# define R300_MC_MISC__MC_FIXED_INIT_LAT_SHIFT 16 +# define R300_MC_MISC__MC_E2R_INIT_LAT_SHIFT 20 +# define R300_MC_MISC__MC_SAME_PAGE_PRIO_SHIFT 24 +# define R300_MC_MISC__MC_GLOBW_INIT_LAT_SHIFT 28 + + +#define R300_MC_INIT_GFX_LAT_TIMER 0x154 +# define R300_MC_MISC__MC_G3D0R_INIT_LAT_SHIFT 0 +# define R300_MC_MISC__MC_G3D1R_INIT_LAT_SHIFT 4 +# define R300_MC_MISC__MC_G3D2R_INIT_LAT_SHIFT 8 +# define R300_MC_MISC__MC_G3D3R_INIT_LAT_SHIFT 12 +# define R300_MC_MISC__MC_TX0R_INIT_LAT_SHIFT 16 +# define R300_MC_MISC__MC_TX1R_INIT_LAT_SHIFT 20 +# define R300_MC_MISC__MC_GLOBR_INIT_LAT_SHIFT 24 +# define R300_MC_MISC__MC_GLOBW_FULL_LAT_SHIFT 28 + +/* + * This file contains registers and constants for the R300. They have been + * found mostly by examining command buffers captured using glxtest, as well + * as by extrapolating some known registers and constants from the R200. + * I am fairly certain that they are correct unless stated otherwise + * in comments. + */ + +#define R300_SE_VPORT_XSCALE 0x1D98 +#define R300_SE_VPORT_XOFFSET 0x1D9C +#define R300_SE_VPORT_YSCALE 0x1DA0 +#define R300_SE_VPORT_YOFFSET 0x1DA4 +#define R300_SE_VPORT_ZSCALE 0x1DA8 +#define R300_SE_VPORT_ZOFFSET 0x1DAC + +#define R300_VAP_PORT_IDX0 0x2040 +/* + * Vertex Array Processing (VAP) Control + */ +#define R300_VAP_CNTL 0x2080 +# define R300_PVS_NUM_SLOTS_SHIFT 0 +# define R300_PVS_NUM_CNTLRS_SHIFT 4 +# define R300_PVS_NUM_FPUS_SHIFT 8 +# define R300_VF_MAX_VTX_NUM_SHIFT 18 +# define R300_GL_CLIP_SPACE_DEF (0 << 22) +# define R300_DX_CLIP_SPACE_DEF (1 << 22) +# define R500_TCL_STATE_OPTIMIZATION (1 << 23) + +/* This register is written directly and also starts data section + * in many 3d CP_PACKET3's + */ +#define R300_VAP_VF_CNTL 0x2084 +# define R300_VAP_VF_CNTL__PRIM_TYPE__SHIFT 0 +# define R300_VAP_VF_CNTL__PRIM_NONE (0<<0) +# define R300_VAP_VF_CNTL__PRIM_POINTS (1<<0) +# define R300_VAP_VF_CNTL__PRIM_LINES (2<<0) +# define R300_VAP_VF_CNTL__PRIM_LINE_STRIP (3<<0) +# define R300_VAP_VF_CNTL__PRIM_TRIANGLES (4<<0) +# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN (5<<0) +# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP (6<<0) +# define R300_VAP_VF_CNTL__PRIM_LINE_LOOP (12<<0) +# define R300_VAP_VF_CNTL__PRIM_QUADS (13<<0) +# define R300_VAP_VF_CNTL__PRIM_QUAD_STRIP (14<<0) +# define R300_VAP_VF_CNTL__PRIM_POLYGON (15<<0) + +# define R300_VAP_VF_CNTL__PRIM_WALK__SHIFT 4 + /* State based - direct writes to registers trigger vertex + generation */ +# define R300_VAP_VF_CNTL__PRIM_WALK_STATE_BASED (0<<4) +# define R300_VAP_VF_CNTL__PRIM_WALK_INDICES (1<<4) +# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST (2<<4) +# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED (3<<4) + + /* I don't think I saw these three used.. */ +# define R300_VAP_VF_CNTL__COLOR_ORDER__SHIFT 6 +# define R300_VAP_VF_CNTL__TCL_OUTPUT_CTL_ENA__SHIFT 9 +# define R300_VAP_VF_CNTL__PROG_STREAM_ENA__SHIFT 10 + + /* index size - when not set the indices are assumed to be 16 bit */ +# define R300_VAP_VF_CNTL__INDEX_SIZE_32bit (1<<11) + /* number of vertices */ +# define R300_VAP_VF_CNTL__NUM_VERTICES__SHIFT 16 + +#define R500_VAP_INDEX_OFFSET 0x208c + +#define R300_VAP_OUTPUT_VTX_FMT_0 0x2090 +# define R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT (1<<0) +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT (1<<1) +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT (1<<2) +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT (1<<3) +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT (1<<4) +# define R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT (1<<16) + +#define R300_VAP_OUTPUT_VTX_FMT_1 0x2094 + /* each of the following is 3 bits wide, specifies number + of components */ +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21 +# define R300_VAP_OUTPUT_VTX_FMT_1__NOT_PRESENT 0 +# define R300_VAP_OUTPUT_VTX_FMT_1__1_COMPONENT 1 +# define R300_VAP_OUTPUT_VTX_FMT_1__2_COMPONENTS 2 +# define R300_VAP_OUTPUT_VTX_FMT_1__3_COMPONENTS 3 +# define R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS 4 + +#define R300_SE_VTE_CNTL 0x20b0 +# define R300_VPORT_X_SCALE_ENA (1 << 0) +# define R300_VPORT_X_OFFSET_ENA (1 << 1) +# define R300_VPORT_Y_SCALE_ENA (1 << 2) +# define R300_VPORT_Y_OFFSET_ENA (1 << 3) +# define R300_VPORT_Z_SCALE_ENA (1 << 4) +# define R300_VPORT_Z_OFFSET_ENA (1 << 5) +# define R300_VTX_XY_FMT (1 << 8) +# define R300_VTX_Z_FMT (1 << 9) +# define R300_VTX_W0_FMT (1 << 10) +# define R300_SERIAL_PROC_ENA (1 << 11) + +/* BEGIN: Vertex data assembly - lots of uncertainties */ + +/* gap */ + +/* Maximum Vertex Indx Clamp */ +#define R300_VAP_VF_MAX_VTX_INDX 0x2134 +/* Minimum Vertex Indx Clamp */ +#define R300_VAP_VF_MIN_VTX_INDX 0x2138 + +/** Vertex assembler/processor control status */ +#define R300_VAP_CNTL_STATUS 0x2140 +/* No swap at all (default) */ +# define R300_VC_NO_SWAP (0 << 0) +/* 16-bit swap: 0xAABBCCDD becomes 0xBBAADDCC */ +# define R300_VC_16BIT_SWAP (1 << 0) +/* 32-bit swap: 0xAABBCCDD becomes 0xDDCCBBAA */ +# define R300_VC_32BIT_SWAP (2 << 0) +/* Half-dword swap: 0xAABBCCDD becomes 0xCCDDAABB */ +# define R300_VC_HALF_DWORD_SWAP (3 << 0) +/* The TCL engine will not be used (as it is logically or even physically removed) */ +# define R300_VAP_TCL_BYPASS (1 << 8) +/* Read only flag if TCL engine is busy. */ +# define R300_VAP_PVS_BUSY (1 << 11) +/* TODO: gap for MAX_MPS */ +/* Read only flag if the vertex store is busy. */ +# define R300_VAP_VS_BUSY (1 << 24) +/* Read only flag if the reciprocal engine is busy. */ +# define R300_VAP_RCP_BUSY (1 << 25) +/* Read only flag if the viewport transform engine is busy. */ +# define R300_VAP_VTE_BUSY (1 << 26) +/* Read only flag if the memory interface unit is busy. */ +# define R300_VAP_MUI_BUSY (1 << 27) +/* Read only flag if the vertex cache is busy. */ +# define R300_VAP_VC_BUSY (1 << 28) +/* Read only flag if the vertex fetcher is busy. */ +# define R300_VAP_VF_BUSY (1 << 29) +/* Read only flag if the register pipeline is busy. */ +# define R300_VAP_REGPIPE_BUSY (1 << 30) +/* Read only flag if the VAP engine is busy. */ +# define R300_VAP_VAP_BUSY (1 << 31) + +/* gap */ + +/* Where do we get our vertex data? + * + * Vertex data either comes either from immediate mode registers or from + * vertex arrays. + * There appears to be no mixed mode (though we can force the pitch of + * vertex arrays to 0, effectively reusing the same element over and over + * again). + * + * Immediate mode is controlled by the INPUT_CNTL registers. I am not sure + * if these registers influence vertex array processing. + * + * Vertex arrays are controlled via the 3D_LOAD_VBPNTR packet3. + * + * In both cases, vertex attributes are then passed through INPUT_ROUTE. + * + * Beginning with INPUT_ROUTE_0_0 is a list of WORDs that route vertex data + * into the vertex processor's input registers. + * The first word routes the first input, the second word the second, etc. + * The corresponding input is routed into the register with the given index. + * The list is ended by a word with INPUT_ROUTE_END set. + * + * Always set COMPONENTS_4 in immediate mode. + */ + +#define R300_VAP_PROG_STREAM_CNTL_0 0x2150 +# define R300_DATA_TYPE_0_SHIFT 0 +# define R300_DATA_TYPE_FLOAT_1 0 +# define R300_DATA_TYPE_FLOAT_2 1 +# define R300_DATA_TYPE_FLOAT_3 2 +# define R300_DATA_TYPE_FLOAT_4 3 +# define R300_DATA_TYPE_BYTE 4 +# define R300_DATA_TYPE_D3DCOLOR 5 +# define R300_DATA_TYPE_SHORT_2 6 +# define R300_DATA_TYPE_SHORT_4 7 +# define R300_DATA_TYPE_VECTOR_3_TTT 8 +# define R300_DATA_TYPE_VECTOR_3_EET 9 +# define R300_DATA_TYPE_FLT16_2 11 +# define R300_DATA_TYPE_FLT16_4 12 + +# define R300_SKIP_DWORDS_SHIFT 4 +# define R300_DST_VEC_LOC_SHIFT 8 +# define R300_LAST_VEC (1 << 13) +# define R300_SIGNED (1 << 14) +# define R300_NORMALIZE (1 << 15) +# define R300_DATA_TYPE_1_SHIFT 16 +#define R300_VAP_PROG_STREAM_CNTL_1 0x2154 +#define R300_VAP_PROG_STREAM_CNTL_2 0x2158 +#define R300_VAP_PROG_STREAM_CNTL_3 0x215C +#define R300_VAP_PROG_STREAM_CNTL_4 0x2160 +#define R300_VAP_PROG_STREAM_CNTL_5 0x2164 +#define R300_VAP_PROG_STREAM_CNTL_6 0x2168 +#define R300_VAP_PROG_STREAM_CNTL_7 0x216C +/* gap */ + +/* Notes: + * - always set up to produce at least two attributes: + * if vertex program uses only position, fglrx will set normal, too + * - INPUT_CNTL_0_COLOR and INPUT_CNTL_COLOR bits are always equal. + */ +#define R300_VAP_VTX_STATE_CNTL 0x2180 +# define R300_COLOR_0_ASSEMBLY_SHIFT 0 +# define R300_SEL_COLOR 0 +# define R300_SEL_USER_COLOR_0 1 +# define R300_SEL_USER_COLOR_1 2 +# define R300_COLOR_1_ASSEMBLY_SHIFT 2 +# define R300_COLOR_2_ASSEMBLY_SHIFT 4 +# define R300_COLOR_3_ASSEMBLY_SHIFT 6 +# define R300_COLOR_4_ASSEMBLY_SHIFT 8 +# define R300_COLOR_5_ASSEMBLY_SHIFT 10 +# define R300_COLOR_6_ASSEMBLY_SHIFT 12 +# define R300_COLOR_7_ASSEMBLY_SHIFT 14 +# define R300_UPDATE_USER_COLOR_0_ENA (1 << 16) + +/* + * Each bit in this field applies to the corresponding vector in the VSM + * memory (i.e. Bit 0 applies to VECTOR_0 (POSITION), etc.). If the bit + * is set, then the corresponding 4-Dword Vector is output into the Vertex Stream. + */ +#define R300_VAP_VSM_VTX_ASSM 0x2184 +# define R300_INPUT_CNTL_POS 0x00000001 +# define R300_INPUT_CNTL_NORMAL 0x00000002 +# define R300_INPUT_CNTL_COLOR 0x00000004 +# define R300_INPUT_CNTL_TC0 0x00000400 +# define R300_INPUT_CNTL_TC1 0x00000800 +# define R300_INPUT_CNTL_TC2 0x00001000 /* GUESS */ +# define R300_INPUT_CNTL_TC3 0x00002000 /* GUESS */ +# define R300_INPUT_CNTL_TC4 0x00004000 /* GUESS */ +# define R300_INPUT_CNTL_TC5 0x00008000 /* GUESS */ +# define R300_INPUT_CNTL_TC6 0x00010000 /* GUESS */ +# define R300_INPUT_CNTL_TC7 0x00020000 /* GUESS */ + +/* Programmable Stream Control Signed Normalize Control */ +#define R300_VAP_PSC_SGN_NORM_CNTL 0x21dc +# define SGN_NORM_ZERO 0 +# define SGN_NORM_ZERO_CLAMP_MINUS_ONE 1 +# define SGN_NORM_NO_ZERO 2 + +/* gap */ + +/* Words parallel to INPUT_ROUTE_0; All words that are active in INPUT_ROUTE_0 + * are set to a swizzling bit pattern, other words are 0. + * + * In immediate mode, the pattern is always set to xyzw. In vertex array + * mode, the swizzling pattern is e.g. used to set zw components in texture + * coordinates with only tweo components. + */ +#define R300_VAP_PROG_STREAM_CNTL_EXT_0 0x21e0 +# define R300_SWIZZLE0_SHIFT 0 +# define R300_SWIZZLE_SELECT_X_SHIFT 0 +# define R300_SWIZZLE_SELECT_Y_SHIFT 3 +# define R300_SWIZZLE_SELECT_Z_SHIFT 6 +# define R300_SWIZZLE_SELECT_W_SHIFT 9 + +# define R300_SWIZZLE_SELECT_X 0 +# define R300_SWIZZLE_SELECT_Y 1 +# define R300_SWIZZLE_SELECT_Z 2 +# define R300_SWIZZLE_SELECT_W 3 +# define R300_SWIZZLE_SELECT_FP_ZERO 4 +# define R300_SWIZZLE_SELECT_FP_ONE 5 +/* alternate forms for r300_emit.c */ +# define R300_INPUT_ROUTE_SELECT_X 0 +# define R300_INPUT_ROUTE_SELECT_Y 1 +# define R300_INPUT_ROUTE_SELECT_Z 2 +# define R300_INPUT_ROUTE_SELECT_W 3 +# define R300_INPUT_ROUTE_SELECT_ZERO 4 +# define R300_INPUT_ROUTE_SELECT_ONE 5 + +# define R300_WRITE_ENA_SHIFT 12 +# define R300_WRITE_ENA_X 1 +# define R300_WRITE_ENA_Y 2 +# define R300_WRITE_ENA_Z 4 +# define R300_WRITE_ENA_W 8 +# define R300_SWIZZLE1_SHIFT 16 +#define R300_VAP_PROG_STREAM_CNTL_EXT_1 0x21e4 +#define R300_VAP_PROG_STREAM_CNTL_EXT_2 0x21e8 +#define R300_VAP_PROG_STREAM_CNTL_EXT_3 0x21ec +#define R300_VAP_PROG_STREAM_CNTL_EXT_4 0x21f0 +#define R300_VAP_PROG_STREAM_CNTL_EXT_5 0x21f4 +#define R300_VAP_PROG_STREAM_CNTL_EXT_6 0x21f8 +#define R300_VAP_PROG_STREAM_CNTL_EXT_7 0x21fc + +/* END: Vertex data assembly */ + +/* gap */ + +/* BEGIN: Upload vertex program and data */ + +/* + * The programmable vertex shader unit has a memory bank of unknown size + * that can be written to in 16 byte units by writing the address into + * UPLOAD_ADDRESS, followed by data in UPLOAD_DATA (multiples of 4 DWORDs). + * + * Pointers into the memory bank are always in multiples of 16 bytes. + * + * The memory bank is divided into areas with fixed meaning. + * + * Starting at address UPLOAD_PROGRAM: Vertex program instructions. + * Native limits reported by drivers from ATI suggest size 256 (i.e. 4KB), + * whereas the difference between known addresses suggests size 512. + * + * Starting at address UPLOAD_PARAMETERS: Vertex program parameters. + * Native reported limits and the VPI layout suggest size 256, whereas + * difference between known addresses suggests size 512. + * + * At address UPLOAD_POINTSIZE is a vector (0, 0, ps, 0), where ps is the + * floating point pointsize. The exact purpose of this state is uncertain, + * as there is also the R300_RE_POINTSIZE register. + * + * Multiple vertex programs and parameter sets can be loaded at once, + * which could explain the size discrepancy. + */ +#define R300_VAP_PVS_VECTOR_INDX_REG 0x2200 +# define R300_PVS_CODE_START 0 +# define R300_MAX_PVS_CODE_LINES 256 +# define R500_MAX_PVS_CODE_LINES 1024 +# define R300_PVS_CONST_START 512 +# define R500_PVS_CONST_START 1024 +# define R300_MAX_PVS_CONST_VECS 256 +# define R500_MAX_PVS_CONST_VECS 1024 +# define R300_PVS_UCP_START 1024 +# define R500_PVS_UCP_START 1536 +# define R300_POINT_VPORT_SCALE_OFFSET 1030 +# define R500_POINT_VPORT_SCALE_OFFSET 1542 +# define R300_POINT_GEN_TEX_OFFSET 1031 +# define R500_POINT_GEN_TEX_OFFSET 1543 + +/* + * These are obsolete defines form r300_context.h, but they might give some + * clues when investigating the addresses further... + */ +#if 0 +#define VSF_DEST_PROGRAM 0x0 +#define VSF_DEST_MATRIX0 0x200 +#define VSF_DEST_MATRIX1 0x204 +#define VSF_DEST_MATRIX2 0x208 +#define VSF_DEST_VECTOR0 0x20c +#define VSF_DEST_VECTOR1 0x20d +#define VSF_DEST_UNKNOWN1 0x400 +#define VSF_DEST_UNKNOWN2 0x406 +#endif + +/* gap */ + +#define R300_VAP_PVS_UPLOAD_DATA 0x2208 + +/* END: Upload vertex program and data */ + +/* gap */ + +/* I do not know the purpose of this register. However, I do know that + * it is set to 221C_CLEAR for clear operations and to 221C_NORMAL + * for normal rendering. + * + * 2007-11-05: This register is the user clip plane control register, but there + * also seems to be a rendering mode control; the NORMAL/CLEAR defines. + * + * See bug #9871. http://bugs.freedesktop.org/attachment.cgi?id=10672&action=view + */ +#define R300_VAP_CLIP_CNTL 0x221C +# define R300_VAP_UCP_ENABLE_0 (1 << 0) +# define R300_VAP_UCP_ENABLE_1 (1 << 1) +# define R300_VAP_UCP_ENABLE_2 (1 << 2) +# define R300_VAP_UCP_ENABLE_3 (1 << 3) +# define R300_VAP_UCP_ENABLE_4 (1 << 4) +# define R300_VAP_UCP_ENABLE_5 (1 << 5) +# define R300_PS_UCP_MODE_DIST_COP (0 << 14) +# define R300_PS_UCP_MODE_RADIUS_COP (1 << 14) +# define R300_PS_UCP_MODE_RADIUS_COP_CLIP (2 << 14) +# define R300_PS_UCP_MODE_CLIP_AS_TRIFAN (3 << 14) +# define R300_CLIP_DISABLE (1 << 16) +# define R300_UCP_CULL_ONLY_ENABLE (1 << 17) +# define R300_BOUNDARY_EDGE_FLAG_ENABLE (1 << 18) +# define R500_COLOR2_IS_TEXTURE (1 << 20) +# define R500_COLOR3_IS_TEXTURE (1 << 21) + +/* These seem to be per-pixel and per-vertex X and Y clipping planes. The first + * plane is per-pixel and the second plane is per-vertex. + * + * This was determined by experimentation alone but I believe it is correct. + * + * These registers are called X_QUAD0_1_FL to X_QUAD0_4_FL by glxtest. + */ +#define R300_VAP_GB_VERT_CLIP_ADJ 0x2220 +#define R300_VAP_GB_VERT_DISC_ADJ 0x2224 +#define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228 +#define R300_VAP_GB_HORZ_DISC_ADJ 0x222c + +/* gap */ + +/* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between + * rendering commands and overwriting vertex program parameters. + * Therefore, I suspect writing zero to 0x2284 synchronizes the engine and + * avoids bugs caused by still running shaders reading bad data from memory. + */ +#define R300_VAP_PVS_STATE_FLUSH_REG 0x2284 + +/* This register is used to define the number of core clocks to wait for a + * vertex to be received by the VAP input controller (while the primitive + * path is backed up) before forcing any accumulated vertices to be submitted + * to the vertex processing path. + */ +#define VAP_PVS_VTX_TIMEOUT_REG 0x2288 +# define R300_2288_R300 0x00750000 /* -- nh */ +# define R300_2288_RV350 0x0000FFFF /* -- Vladimir */ + +/* gap */ + +/* Addresses are relative to the vertex program instruction area of the + * memory bank. PROGRAM_END points to the last instruction of the active + * program + * + * The meaning of the two UNKNOWN fields is obviously not known. However, + * experiments so far have shown that both *must* point to an instruction + * inside the vertex program, otherwise the GPU locks up. + * + * fglrx usually sets CNTL_3_UNKNOWN to the end of the program and + * R300_PVS_CNTL_1_POS_END_SHIFT points to instruction where last write to + * position takes place. + * + * Most likely this is used to ignore rest of the program in cases + * where group of verts arent visible. For some reason this "section" + * is sometimes accepted other instruction that have no relationship with + * position calculations. + */ +#define R300_VAP_PVS_CODE_CNTL_0 0x22D0 +# define R300_PVS_FIRST_INST_SHIFT 0 +# define R300_PVS_XYZW_VALID_INST_SHIFT 10 +# define R300_PVS_LAST_INST_SHIFT 20 +/* Addresses are relative to the vertex program parameters area. */ +#define R300_VAP_PVS_CONST_CNTL 0x22D4 +# define R300_PVS_CONST_BASE_OFFSET_SHIFT 0 +# define R300_PVS_MAX_CONST_ADDR_SHIFT 16 +#define R300_VAP_PVS_CODE_CNTL_1 0x22D8 +# define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0 +#define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC + +/* The entire range from 0x2300 to 0x2AC inclusive seems to be used for + * immediate vertices + */ +#define R300_VAP_VTX_COLOR_R 0x2464 +#define R300_VAP_VTX_COLOR_G 0x2468 +#define R300_VAP_VTX_COLOR_B 0x246C +#define R300_VAP_VTX_POS_0_X_1 0x2490 /* used for glVertex2*() */ +#define R300_VAP_VTX_POS_0_Y_1 0x2494 +#define R300_VAP_VTX_COLOR_PKD 0x249C /* RGBA */ +#define R300_VAP_VTX_POS_0_X_2 0x24A0 /* used for glVertex3*() */ +#define R300_VAP_VTX_POS_0_Y_2 0x24A4 +#define R300_VAP_VTX_POS_0_Z_2 0x24A8 +/* write 0 to indicate end of packet? */ +#define R300_VAP_VTX_END_OF_PKT 0x24AC + +/* gap */ + +/* These are values from r300_reg/r300_reg.h - they are known to be correct + * and are here so we can use one register file instead of several + * - Vladimir + */ +#define R300_GB_VAP_RASTER_VTX_FMT_0 0x4000 +# define R300_GB_VAP_RASTER_VTX_FMT_0__POS_PRESENT (1<<0) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_0_PRESENT (1<<1) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_1_PRESENT (1<<2) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_2_PRESENT (1<<3) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_3_PRESENT (1<<4) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_SPACE (0xf<<5) +# define R300_GB_VAP_RASTER_VTX_FMT_0__PT_SIZE_PRESENT (0x1<<16) + +#define R300_GB_VAP_RASTER_VTX_FMT_1 0x4004 + /* each of the following is 3 bits wide, specifies number + of components */ +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21 + +/* UNK30 seems to enables point to quad transformation on textures + * (or something closely related to that). + * This bit is rather fatal at the time being due to lackings at pixel + * shader side + * Specifies top of Raster pipe specific enable controls. + */ +#define R300_GB_ENABLE 0x4008 +# define R300_GB_POINT_STUFF_DISABLE (0 << 0) +# define R300_GB_POINT_STUFF_ENABLE (1 << 0) /* Specifies if points will have stuffed texture coordinates. */ +# define R300_GB_LINE_STUFF_DISABLE (0 << 1) +# define R300_GB_LINE_STUFF_ENABLE (1 << 1) /* Specifies if lines will have stuffed texture coordinates. */ +# define R300_GB_TRIANGLE_STUFF_DISABLE (0 << 2) +# define R300_GB_TRIANGLE_STUFF_ENABLE (1 << 2) /* Specifies if triangles will have stuffed texture coordinates. */ +# define R300_GB_STENCIL_AUTO_DISABLE (0 << 4) +# define R300_GB_STENCIL_AUTO_ENABLE (1 << 4) /* Enable stencil auto inc/dec based on triangle cw/ccw, force into dzy low bit. */ +# define R300_GB_STENCIL_AUTO_FORCE (2 << 4) /* Force 0 into dzy low bit. */ + + /* each of the following is 2 bits wide */ +#define R300_GB_TEX_REPLICATE 0 /* Replicate VAP source texture coordinates (S,T,[R,Q]). */ +#define R300_GB_TEX_ST 1 /* Stuff with source texture coordinates (S,T). */ +#define R300_GB_TEX_STR 2 /* Stuff with source texture coordinates (S,T,R). */ +# define R300_GB_TEX0_SOURCE_SHIFT 16 +# define R300_GB_TEX1_SOURCE_SHIFT 18 +# define R300_GB_TEX2_SOURCE_SHIFT 20 +# define R300_GB_TEX3_SOURCE_SHIFT 22 +# define R300_GB_TEX4_SOURCE_SHIFT 24 +# define R300_GB_TEX5_SOURCE_SHIFT 26 +# define R300_GB_TEX6_SOURCE_SHIFT 28 +# define R300_GB_TEX7_SOURCE_SHIFT 30 + +/* MSPOS - positions for multisample antialiasing (?) */ +#define R300_GB_MSPOS0 0x4010 + /* shifts - each of the fields is 4 bits */ +# define R300_GB_MSPOS0__MS_X0_SHIFT 0 +# define R300_GB_MSPOS0__MS_Y0_SHIFT 4 +# define R300_GB_MSPOS0__MS_X1_SHIFT 8 +# define R300_GB_MSPOS0__MS_Y1_SHIFT 12 +# define R300_GB_MSPOS0__MS_X2_SHIFT 16 +# define R300_GB_MSPOS0__MS_Y2_SHIFT 20 +# define R300_GB_MSPOS0__MSBD0_Y 24 +# define R300_GB_MSPOS0__MSBD0_X 28 + +#define R300_GB_MSPOS1 0x4014 +# define R300_GB_MSPOS1__MS_X3_SHIFT 0 +# define R300_GB_MSPOS1__MS_Y3_SHIFT 4 +# define R300_GB_MSPOS1__MS_X4_SHIFT 8 +# define R300_GB_MSPOS1__MS_Y4_SHIFT 12 +# define R300_GB_MSPOS1__MS_X5_SHIFT 16 +# define R300_GB_MSPOS1__MS_Y5_SHIFT 20 +# define R300_GB_MSPOS1__MSBD1 24 + +/* Specifies the graphics pipeline configuration for rasterization. */ +#define R300_GB_TILE_CONFIG 0x4018 +# define R300_GB_TILE_DISABLE (0 << 0) +# define R300_GB_TILE_ENABLE (1 << 0) +# define R300_GB_TILE_PIPE_COUNT_RV300 (0 << 1) /* RV350 (1 pipe, 1 ctx) */ +# define R300_GB_TILE_PIPE_COUNT_R300 (3 << 1) /* R300 (2 pipes, 1 ctx) */ +# define R300_GB_TILE_PIPE_COUNT_R420_3P (6 << 1) /* R420-3P (3 pipes, 1 ctx) */ +# define R300_GB_TILE_PIPE_COUNT_R420 (7 << 1) /* R420 (4 pipes, 1 ctx) */ +# define R300_GB_TILE_SIZE_8 (0 << 4) +# define R300_GB_TILE_SIZE_16 (1 << 4) +# define R300_GB_TILE_SIZE_32 (2 << 4) +# define R300_GB_SUPER_SIZE_1 (0 << 6) +# define R300_GB_SUPER_SIZE_2 (1 << 6) +# define R300_GB_SUPER_SIZE_4 (2 << 6) +# define R300_GB_SUPER_SIZE_8 (3 << 6) +# define R300_GB_SUPER_SIZE_16 (4 << 6) +# define R300_GB_SUPER_SIZE_32 (5 << 6) +# define R300_GB_SUPER_SIZE_64 (6 << 6) +# define R300_GB_SUPER_SIZE_128 (7 << 6) +# define R300_GB_SUPER_X_SHIFT 9 /* 3 bits wide */ +# define R300_GB_SUPER_Y_SHIFT 12 /* 3 bits wide */ +# define R300_GB_SUPER_TILE_A (0 << 15) +# define R300_GB_SUPER_TILE_B (1 << 15) +# define R300_GB_SUBPIXEL_1_12 (0 << 16) +# define R300_GB_SUBPIXEL_1_16 (1 << 16) +# define GB_TILE_CONFIG_QUADS_PER_RAS_4 (0 << 17) +# define GB_TILE_CONFIG_QUADS_PER_RAS_8 (1 << 17) +# define GB_TILE_CONFIG_QUADS_PER_RAS_16 (2 << 17) +# define GB_TILE_CONFIG_QUADS_PER_RAS_32 (3 << 17) +# define GB_TILE_CONFIG_BB_SCAN_INTERCEPT (0 << 19) +# define GB_TILE_CONFIG_BB_SCAN_BOUND_BOX (1 << 19) +# define GB_TILE_CONFIG_ALT_SCAN_EN_LR (0 << 20) +# define GB_TILE_CONFIG_ALT_SCAN_EN_LRL (1 << 20) +# define GB_TILE_CONFIG_ALT_OFFSET (0 << 21) +# define GB_TILE_CONFIG_SUBPRECISION (0 << 22) +# define GB_TILE_CONFIG_ALT_TILING_DEF (0 << 23) +# define GB_TILE_CONFIG_ALT_TILING_3_2 (1 << 23) +# define GB_TILE_CONFIG_Z_EXTENDED_24_1 (0 << 24) +# define GB_TILE_CONFIG_Z_EXTENDED_S25_1 (1 << 24) + +/* Specifies the sizes of the various FIFO`s in the sc/rs/us. This register must be the first one written */ +#define R300_GB_FIFO_SIZE 0x4024 + /* each of the following is 2 bits wide */ +#define R300_GB_FIFO_SIZE_32 0 +#define R300_GB_FIFO_SIZE_64 1 +#define R300_GB_FIFO_SIZE_128 2 +#define R300_GB_FIFO_SIZE_256 3 +# define R300_SC_IFIFO_SIZE_SHIFT 0 +# define R300_SC_TZFIFO_SIZE_SHIFT 2 +# define R300_SC_BFIFO_SIZE_SHIFT 4 + +# define R300_US_OFIFO_SIZE_SHIFT 12 +# define R300_US_WFIFO_SIZE_SHIFT 14 + /* the following use the same constants as above, but meaning is + is times 2 (i.e. instead of 32 words it means 64 */ +# define R300_RS_TFIFO_SIZE_SHIFT 6 +# define R300_RS_CFIFO_SIZE_SHIFT 8 +# define R300_US_RAM_SIZE_SHIFT 10 + /* watermarks, 3 bits wide */ +# define R300_RS_HIGHWATER_COL_SHIFT 16 +# define R300_RS_HIGHWATER_TEX_SHIFT 19 +# define R300_OFIFO_HIGHWATER_SHIFT 22 /* two bits only */ +# define R300_CUBE_FIFO_HIGHWATER_COL_SHIFT 24 + +#define GB_Z_PEQ_CONFIG 0x4028 +# define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_4_4 (0 << 0) +# define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8 (1 << 0) + +/* Specifies various polygon specific selects (fog, depth, perspective). */ +#define R300_GB_SELECT 0x401c +# define R300_GB_FOG_SELECT_C0A (0 << 0) +# define R300_GB_FOG_SELECT_C1A (1 << 0) +# define R300_GB_FOG_SELECT_C2A (2 << 0) +# define R300_GB_FOG_SELECT_C3A (3 << 0) +# define R300_GB_FOG_SELECT_1_1_W (4 << 0) +# define R300_GB_FOG_SELECT_Z (5 << 0) +# define R300_GB_DEPTH_SELECT_Z (0 << 3) +# define R300_GB_DEPTH_SELECT_1_1_W (1 << 3) +# define R300_GB_W_SELECT_1_W (0 << 4) +# define R300_GB_W_SELECT_1 (1 << 4) +# define R300_GB_FOG_STUFF_DISABLE (0 << 5) +# define R300_GB_FOG_STUFF_ENABLE (1 << 5) +# define R300_GB_FOG_STUFF_TEX_SHIFT 6 +# define R300_GB_FOG_STUFF_TEX_MASK 0x000003c0 +# define R300_GB_FOG_STUFF_COMP_SHIFT 10 +# define R300_GB_FOG_STUFF_COMP_MASK 0x00000c00 + +/* Specifies the graphics pipeline configuration for antialiasing. */ +#define GB_AA_CONFIG 0x4020 +# define GB_AA_CONFIG_AA_DISABLE (0 << 0) +# define GB_AA_CONFIG_AA_ENABLE (1 << 0) +# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2 (0 << 1) +# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3 (1 << 1) +# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4 (2 << 1) +# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6 (3 << 1) + +/* Selects which of 4 pipes are active. */ +#define GB_PIPE_SELECT 0x402c +# define GB_PIPE_SELECT_PIPE0_ID_SHIFT 0 +# define GB_PIPE_SELECT_PIPE1_ID_SHIFT 2 +# define GB_PIPE_SELECT_PIPE2_ID_SHIFT 4 +# define GB_PIPE_SELECT_PIPE3_ID_SHIFT 6 +# define GB_PIPE_SELECT_PIPE_MASK_SHIFT 8 +# define GB_PIPE_SELECT_MAX_PIPE 12 +# define GB_PIPE_SELECT_BAD_PIPES 14 +# define GB_PIPE_SELECT_CONFIG_PIPES 18 + + +/* Specifies the sizes of the various FIFO`s in the sc/rs. */ +#define GB_FIFO_SIZE1 0x4070 +/* High water mark for SC input fifo */ +# define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_SHIFT 0 +# define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_MASK 0x0000003f +/* High water mark for SC input fifo (B) */ +# define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_SHIFT 6 +# define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_MASK 0x00000fc0 +/* High water mark for RS colors' fifo */ +# define GB_FIFO_SIZE1_SC_HIGHWATER_COL_SHIFT 12 +# define GB_FIFO_SIZE1_SC_HIGHWATER_COL_MASK 0x0003f000 +/* High water mark for RS textures' fifo */ +# define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_SHIFT 18 +# define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_MASK 0x00fc0000 + +/* This table specifies the source location and format for up to 16 texture + * addresses (i[0]:i[15]) and four colors (c[0]:c[3]) + */ +#define R500_RS_IP_0 0x4074 +#define R500_RS_IP_1 0x4078 +#define R500_RS_IP_2 0x407C +#define R500_RS_IP_3 0x4080 +#define R500_RS_IP_4 0x4084 +#define R500_RS_IP_5 0x4088 +#define R500_RS_IP_6 0x408C +#define R500_RS_IP_7 0x4090 +#define R500_RS_IP_8 0x4094 +#define R500_RS_IP_9 0x4098 +#define R500_RS_IP_10 0x409C +#define R500_RS_IP_11 0x40A0 +#define R500_RS_IP_12 0x40A4 +#define R500_RS_IP_13 0x40A8 +#define R500_RS_IP_14 0x40AC +#define R500_RS_IP_15 0x40B0 +#define R500_RS_IP_PTR_K0 62 +#define R500_RS_IP_PTR_K1 63 +#define R500_RS_IP_TEX_PTR_S_SHIFT 0 +#define R500_RS_IP_TEX_PTR_T_SHIFT 6 +#define R500_RS_IP_TEX_PTR_R_SHIFT 12 +#define R500_RS_IP_TEX_PTR_Q_SHIFT 18 +#define R500_RS_IP_COL_PTR_SHIFT 24 +#define R500_RS_IP_COL_FMT_SHIFT 27 +# define R500_RS_COL_PTR(x) ((x) << 24) +# define R500_RS_COL_FMT(x) ((x) << 27) +/* gap */ +#define R500_RS_IP_OFFSET_DIS (0 << 31) +#define R500_RS_IP_OFFSET_EN (1 << 31) + +/* gap */ + +/* Zero to flush caches. */ +#define R300_TX_INVALTAGS 0x4100 +#define R300_TX_FLUSH 0x0 + +/* The upper enable bits are guessed, based on fglrx reported limits. */ +#define R300_TX_ENABLE 0x4104 +# define R300_TX_ENABLE_0 (1 << 0) +# define R300_TX_ENABLE_1 (1 << 1) +# define R300_TX_ENABLE_2 (1 << 2) +# define R300_TX_ENABLE_3 (1 << 3) +# define R300_TX_ENABLE_4 (1 << 4) +# define R300_TX_ENABLE_5 (1 << 5) +# define R300_TX_ENABLE_6 (1 << 6) +# define R300_TX_ENABLE_7 (1 << 7) +# define R300_TX_ENABLE_8 (1 << 8) +# define R300_TX_ENABLE_9 (1 << 9) +# define R300_TX_ENABLE_10 (1 << 10) +# define R300_TX_ENABLE_11 (1 << 11) +# define R300_TX_ENABLE_12 (1 << 12) +# define R300_TX_ENABLE_13 (1 << 13) +# define R300_TX_ENABLE_14 (1 << 14) +# define R300_TX_ENABLE_15 (1 << 15) + +#define R500_TX_FILTER_4 0x4110 +# define R500_TX_WEIGHT_1_SHIFT (0) +# define R500_TX_WEIGHT_0_SHIFT (11) +# define R500_TX_WEIGHT_PAIR (1<<22) +# define R500_TX_PHASE_SHIFT (23) +# define R500_TX_DIRECTION_HORIZONTAL (0<<27) +# define R500_TX_DIRECTION_VERITCAL (1<<27) + +/* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */ +#define R300_GA_POINT_S0 0x4200 + +/* T Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */ +#define R300_GA_POINT_T0 0x4204 + +/* S Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */ +#define R300_GA_POINT_S1 0x4208 + +/* T Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */ +#define R300_GA_POINT_T1 0x420c + +/* Specifies amount to shift integer position of vertex (screen space) before + * converting to float for triangle stipple. + */ +#define R300_GA_TRIANGLE_STIPPLE 0x4214 +# define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_SHIFT 0 +# define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_MASK 0x0000000f +# define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT 16 +# define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_MASK 0x000f0000 + +/* The pointsize is given in multiples of 6. The pointsize can be enormous: + * Clear() renders a single point that fills the entire framebuffer. + * 1/2 Height of point; fixed (16.0), subpixel format (1/12 or 1/16, even if in + * 8b precision). + */ +#define R300_GA_POINT_SIZE 0x421C +# define R300_POINTSIZE_Y_SHIFT 0 +# define R300_POINTSIZE_Y_MASK 0x0000ffff +# define R300_POINTSIZE_X_SHIFT 16 +# define R300_POINTSIZE_X_MASK 0xffff0000 +# define R300_POINTSIZE_MAX (R300_POINTSIZE_Y_MASK / 6) + +/* Blue fill color */ +#define R500_GA_FILL_R 0x4220 + +/* Blue fill color */ +#define R500_GA_FILL_G 0x4224 + +/* Blue fill color */ +#define R500_GA_FILL_B 0x4228 + +/* Alpha fill color */ +#define R500_GA_FILL_A 0x422c + + +/* Specifies maximum and minimum point & sprite sizes for per vertex size + * specification. The lower part (15:0) is MIN and (31:16) is max. + */ +#define R300_GA_POINT_MINMAX 0x4230 +# define R300_GA_POINT_MINMAX_MIN_SHIFT 0 +# define R300_GA_POINT_MINMAX_MIN_MASK (0xFFFF << 0) +# define R300_GA_POINT_MINMAX_MAX_SHIFT 16 +# define R300_GA_POINT_MINMAX_MAX_MASK (0xFFFF << 16) + +/* 1/2 width of line, in subpixels (1/12 or 1/16 only, even in 8b + * subprecision); (16.0) fixed format. + * + * The line width is given in multiples of 6. + * In default mode lines are classified as vertical lines. + * HO: horizontal + * VE: vertical or horizontal + * HO & VE: no classification + */ +#define R300_GA_LINE_CNTL 0x4234 +# define R300_GA_LINE_CNTL_WIDTH_SHIFT 0 +# define R300_GA_LINE_CNTL_WIDTH_MASK 0x0000ffff +# define R300_GA_LINE_CNTL_END_TYPE_HOR (0 << 16) +# define R300_GA_LINE_CNTL_END_TYPE_VER (1 << 16) +# define R300_GA_LINE_CNTL_END_TYPE_SQR (2 << 16) /* horizontal or vertical depending upon slope */ +# define R300_GA_LINE_CNTL_END_TYPE_COMP (3 << 16) /* Computed (perpendicular to slope) */ +# define R500_GA_LINE_CNTL_SORT_NO (0 << 18) +# define R500_GA_LINE_CNTL_SORT_MINX_MINY (1 << 18) +/** TODO: looks wrong */ +# define R300_LINESIZE_MAX (R300_GA_LINE_CNTL_WIDTH_MASK / 6) +/** TODO: looks wrong */ +# define R300_LINE_CNT_HO (1 << 16) +/** TODO: looks wrong */ +# define R300_LINE_CNT_VE (1 << 17) + +/* Line Stipple configuration information. */ +#define R300_GA_LINE_STIPPLE_CONFIG 0x4238 +# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_NO (0 << 0) +# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE (1 << 0) +# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_PACKET (2 << 0) +# define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_SHIFT 2 +# define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK 0xfffffffc + +/* Used to load US instructions and constants */ +#define R500_GA_US_VECTOR_INDEX 0x4250 +# define R500_GA_US_VECTOR_INDEX_SHIFT 0 +# define R500_GA_US_VECTOR_INDEX_MASK 0x000000ff +# define R500_GA_US_VECTOR_INDEX_TYPE_INSTR (0 << 16) +# define R500_GA_US_VECTOR_INDEX_TYPE_CONST (1 << 16) +# define R500_GA_US_VECTOR_INDEX_CLAMP_NO (0 << 17) +# define R500_GA_US_VECTOR_INDEX_CLAMP_CONST (1 << 17) + +/* Data register for loading US instructions and constants */ +#define R500_GA_US_VECTOR_DATA 0x4254 + +/* Specifies color properties and mappings of textures. */ +#define R500_GA_COLOR_CONTROL_PS3 0x4258 +# define R500_TEX0_SHADING_PS3_SOLID (0 << 0) +# define R500_TEX0_SHADING_PS3_FLAT (1 << 0) +# define R500_TEX0_SHADING_PS3_GOURAUD (2 << 0) +# define R500_TEX1_SHADING_PS3_SOLID (0 << 2) +# define R500_TEX1_SHADING_PS3_FLAT (1 << 2) +# define R500_TEX1_SHADING_PS3_GOURAUD (2 << 2) +# define R500_TEX2_SHADING_PS3_SOLID (0 << 4) +# define R500_TEX2_SHADING_PS3_FLAT (1 << 4) +# define R500_TEX2_SHADING_PS3_GOURAUD (2 << 4) +# define R500_TEX3_SHADING_PS3_SOLID (0 << 6) +# define R500_TEX3_SHADING_PS3_FLAT (1 << 6) +# define R500_TEX3_SHADING_PS3_GOURAUD (2 << 6) +# define R500_TEX4_SHADING_PS3_SOLID (0 << 8) +# define R500_TEX4_SHADING_PS3_FLAT (1 << 8) +# define R500_TEX4_SHADING_PS3_GOURAUD (2 << 8) +# define R500_TEX5_SHADING_PS3_SOLID (0 << 10) +# define R500_TEX5_SHADING_PS3_FLAT (1 << 10) +# define R500_TEX5_SHADING_PS3_GOURAUD (2 << 10) +# define R500_TEX6_SHADING_PS3_SOLID (0 << 12) +# define R500_TEX6_SHADING_PS3_FLAT (1 << 12) +# define R500_TEX6_SHADING_PS3_GOURAUD (2 << 12) +# define R500_TEX7_SHADING_PS3_SOLID (0 << 14) +# define R500_TEX7_SHADING_PS3_FLAT (1 << 14) +# define R500_TEX7_SHADING_PS3_GOURAUD (2 << 14) +# define R500_TEX8_SHADING_PS3_SOLID (0 << 16) +# define R500_TEX8_SHADING_PS3_FLAT (1 << 16) +# define R500_TEX8_SHADING_PS3_GOURAUD (2 << 16) +# define R500_TEX9_SHADING_PS3_SOLID (0 << 18) +# define R500_TEX9_SHADING_PS3_FLAT (1 << 18) +# define R500_TEX9_SHADING_PS3_GOURAUD (2 << 18) +# define R500_TEX10_SHADING_PS3_SOLID (0 << 20) +# define R500_TEX10_SHADING_PS3_FLAT (1 << 20) +# define R500_TEX10_SHADING_PS3_GOURAUD (2 << 20) +# define R500_COLOR0_TEX_OVERRIDE_NO (0 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_0 (1 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_1 (2 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_2 (3 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_3 (4 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_4 (5 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_5 (6 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_6 (7 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_7 (8 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_8_C2 (9 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_9_C3 (10 << 22) +# define R500_COLOR1_TEX_OVERRIDE_NO (0 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_0 (1 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_1 (2 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_2 (3 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_3 (4 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_4 (5 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_5 (6 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_6 (7 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_7 (8 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_8_C2 (9 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_9_C3 (10 << 26) + +/* Returns idle status of various G3D block, captured when GA_IDLE written or + * when hard or soft reset asserted. + */ +#define R500_GA_IDLE 0x425c +# define R500_GA_IDLE_PIPE3_Z_IDLE (0 << 0) +# define R500_GA_IDLE_PIPE2_Z_IDLE (0 << 1) +# define R500_GA_IDLE_PIPE3_CD_IDLE (0 << 2) +# define R500_GA_IDLE_PIPE2_CD_IDLE (0 << 3) +# define R500_GA_IDLE_PIPE3_FG_IDLE (0 << 4) +# define R500_GA_IDLE_PIPE2_FG_IDLE (0 << 5) +# define R500_GA_IDLE_PIPE3_US_IDLE (0 << 6) +# define R500_GA_IDLE_PIPE2_US_IDLE (0 << 7) +# define R500_GA_IDLE_PIPE3_SC_IDLE (0 << 8) +# define R500_GA_IDLE_PIPE2_SC_IDLE (0 << 9) +# define R500_GA_IDLE_PIPE3_RS_IDLE (0 << 10) +# define R500_GA_IDLE_PIPE2_RS_IDLE (0 << 11) +# define R500_GA_IDLE_PIPE1_Z_IDLE (0 << 12) +# define R500_GA_IDLE_PIPE0_Z_IDLE (0 << 13) +# define R500_GA_IDLE_PIPE1_CD_IDLE (0 << 14) +# define R500_GA_IDLE_PIPE0_CD_IDLE (0 << 15) +# define R500_GA_IDLE_PIPE1_FG_IDLE (0 << 16) +# define R500_GA_IDLE_PIPE0_FG_IDLE (0 << 17) +# define R500_GA_IDLE_PIPE1_US_IDLE (0 << 18) +# define R500_GA_IDLE_PIPE0_US_IDLE (0 << 19) +# define R500_GA_IDLE_PIPE1_SC_IDLE (0 << 20) +# define R500_GA_IDLE_PIPE0_SC_IDLE (0 << 21) +# define R500_GA_IDLE_PIPE1_RS_IDLE (0 << 22) +# define R500_GA_IDLE_PIPE0_RS_IDLE (0 << 23) +# define R500_GA_IDLE_SU_IDLE (0 << 24) +# define R500_GA_IDLE_GA_IDLE (0 << 25) +# define R500_GA_IDLE_GA_UNIT2_IDLE (0 << 26) + +/* Current value of stipple accumulator. */ +#define R300_GA_LINE_STIPPLE_VALUE 0x4260 + +/* S Texture Coordinate Value for Vertex 0 of Line (stuff textures -- i.e. AA) */ +#define R300_GA_LINE_S0 0x4264 +/* S Texture Coordinate Value for Vertex 1 of Lines (V2 of parallelogram -- stuff textures -- i.e. AA) */ +#define R300_GA_LINE_S1 0x4268 + +/* GA Input fifo high water marks */ +#define R500_GA_FIFO_CNTL 0x4270 +# define R500_GA_FIFO_CNTL_VERTEX_FIFO_MASK 0x00000007 +# define R500_GA_FIFO_CNTL_VERTEX_FIFO_SHIFT 0 +# define R500_GA_FIFO_CNTL_VERTEX_INDEX_MASK 0x00000038 +# define R500_GA_FIFO_CNTL_VERTEX_INDEX_SHIFT 3 +# define R500_GA_FIFO_CNTL_VERTEX_REG_MASK 0x00003fc0 +# define R500_GA_FIFO_CNTL_VERTEX_REG_SHIFT 6 + +/* GA enhance/tweaks */ +#define R300_GA_ENHANCE 0x4274 +# define R300_GA_ENHANCE_DEADLOCK_CNTL_NO_EFFECT (0 << 0) +# define R300_GA_ENHANCE_DEADLOCK_CNTL_PREVENT_TCL (1 << 0) /* Prevents TCL interface from deadlocking on GA side. */ +# define R300_GA_ENHANCE_FASTSYNC_CNTL_NO_EFFECT (0 << 1) +# define R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE (1 << 1) /* Enables high-performance register/primitive switching. */ +# define R500_GA_ENHANCE_REG_READWRITE_NO_EFFECT (0 << 2) /* R520+ only */ +# define R500_GA_ENHANCE_REG_READWRITE_ENABLE (1 << 2) /* R520+ only, Enables GA support of simultaneous register reads and writes. */ +# define R500_GA_ENHANCE_REG_NOSTALL_NO_EFFECT (0 << 3) +# define R500_GA_ENHANCE_REG_NOSTALL_ENABLE (1 << 3) /* Enables GA support of no-stall reads for register read back. */ + +#define R300_GA_COLOR_CONTROL 0x4278 +# define R300_GA_COLOR_CONTROL_RGB0_SHADING_SOLID (0 << 0) +# define R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT (1 << 0) +# define R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD (2 << 0) +# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_SOLID (0 << 2) +# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT (1 << 2) +# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD (2 << 2) +# define R300_GA_COLOR_CONTROL_RGB1_SHADING_SOLID (0 << 4) +# define R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT (1 << 4) +# define R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD (2 << 4) +# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_SOLID (0 << 6) +# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_FLAT (1 << 6) +# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD (2 << 6) +# define R300_GA_COLOR_CONTROL_RGB2_SHADING_SOLID (0 << 8) +# define R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT (1 << 8) +# define R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD (2 << 8) +# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_SOLID (0 << 10) +# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT (1 << 10) +# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD (2 << 10) +# define R300_GA_COLOR_CONTROL_RGB3_SHADING_SOLID (0 << 12) +# define R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT (1 << 12) +# define R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD (2 << 12) +# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_SOLID (0 << 14) +# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_FLAT (1 << 14) +# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD (2 << 14) +# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_FIRST (0 << 16) +# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND (1 << 16) +# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_THIRD (2 << 16) +# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST (3 << 16) + +/** TODO: might be candidate for removal */ +# define R300_RE_SHADE_MODEL_SMOOTH ( \ + R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD) +/** TODO: might be candidate for removal, the GOURAUD stuff also looks buggy to me */ +# define R300_RE_SHADE_MODEL_FLAT ( \ + R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT | \ + R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT | \ + R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD) + +/* Specifies red & green components of fill color -- S312 format -- Backwards comp. */ +#define R300_GA_SOLID_RG 0x427c +# define GA_SOLID_RG_COLOR_GREEN_SHIFT 0 +# define GA_SOLID_RG_COLOR_GREEN_MASK 0x0000ffff +# define GA_SOLID_RG_COLOR_RED_SHIFT 16 +# define GA_SOLID_RG_COLOR_RED_MASK 0xffff0000 +/* Specifies blue & alpha components of fill color -- S312 format -- Backwards comp. */ +#define R300_GA_SOLID_BA 0x4280 +# define GA_SOLID_BA_COLOR_ALPHA_SHIFT 0 +# define GA_SOLID_BA_COLOR_ALPHA_MASK 0x0000ffff +# define GA_SOLID_BA_COLOR_BLUE_SHIFT 16 +# define GA_SOLID_BA_COLOR_BLUE_MASK 0xffff0000 + +/* Polygon Mode + * Dangerous + */ +#define R300_GA_POLY_MODE 0x4288 +# define R300_GA_POLY_MODE_DISABLE (0 << 0) +# define R300_GA_POLY_MODE_DUAL (1 << 0) /* send 2 sets of 3 polys with specified poly type */ +/* reserved */ +# define R300_GA_POLY_MODE_FRONT_PTYPE_POINT (0 << 4) +# define R300_GA_POLY_MODE_FRONT_PTYPE_LINE (1 << 4) +# define R300_GA_POLY_MODE_FRONT_PTYPE_TRI (2 << 4) +/* reserved */ +# define R300_GA_POLY_MODE_BACK_PTYPE_POINT (0 << 7) +# define R300_GA_POLY_MODE_BACK_PTYPE_LINE (1 << 7) +# define R300_GA_POLY_MODE_BACK_PTYPE_TRI (2 << 7) +/* reserved */ + +/* Specifies the rouding mode for geometry & color SPFP to FP conversions. */ +#define R300_GA_ROUND_MODE 0x428c +# define R300_GA_ROUND_MODE_GEOMETRY_ROUND_TRUNC (0 << 0) +# define R300_GA_ROUND_MODE_GEOMETRY_ROUND_NEAREST (1 << 0) +# define R300_GA_ROUND_MODE_COLOR_ROUND_TRUNC (0 << 2) +# define R300_GA_ROUND_MODE_COLOR_ROUND_NEAREST (1 << 2) +# define R300_GA_ROUND_MODE_RGB_CLAMP_RGB (0 << 4) +# define R300_GA_ROUND_MODE_RGB_CLAMP_FP20 (1 << 4) +# define R300_GA_ROUND_MODE_ALPHA_CLAMP_RGB (0 << 5) +# define R300_GA_ROUND_MODE_ALPHA_CLAMP_FP20 (1 << 5) +# define R500_GA_ROUND_MODE_GEOMETRY_MASK_SHIFT 6 +# define R500_GA_ROUND_MODE_GEOMETRY_MASK_MASK 0x000003c0 + +/* Specifies x & y offsets for vertex data after conversion to FP. + * Offsets are in S15 format (subpixels -- 1/12 or 1/16, even in 8b + * subprecision). + */ +#define R300_GA_OFFSET 0x4290 +# define R300_GA_OFFSET_X_OFFSET_SHIFT 0 +# define R300_GA_OFFSET_X_OFFSET_MASK 0x0000ffff +# define R300_GA_OFFSET_Y_OFFSET_SHIFT 16 +# define R300_GA_OFFSET_Y_OFFSET_MASK 0xffff0000 + +/* Specifies the scale to apply to fog. */ +#define R300_GA_FOG_SCALE 0x4294 +/* Specifies the offset to apply to fog. */ +#define R300_GA_FOG_OFFSET 0x4298 +/* Specifies number of cycles to assert reset, and also causes RB3D soft reset to assert. */ +#define R300_GA_SOFT_RESET 0x429c + +/* Not sure why there are duplicate of factor and constant values. + * My best guess so far is that there are seperate zbiases for test and write. + * Ordering might be wrong. + * Some of the tests indicate that fgl has a fallback implementation of zbias + * via pixel shaders. + */ +#define R300_SU_TEX_WRAP 0x42A0 +#define R300_SU_POLY_OFFSET_FRONT_SCALE 0x42A4 +#define R300_SU_POLY_OFFSET_FRONT_OFFSET 0x42A8 +#define R300_SU_POLY_OFFSET_BACK_SCALE 0x42AC +#define R300_SU_POLY_OFFSET_BACK_OFFSET 0x42B0 + +/* This register needs to be set to (1<<1) for RV350 to correctly + * perform depth test (see --vb-triangles in r300_demo) + * Don't know about other chips. - Vladimir + * This is set to 3 when GL_POLYGON_OFFSET_FILL is on. + * My guess is that there are two bits for each zbias primitive + * (FILL, LINE, POINT). + * One to enable depth test and one for depth write. + * Yet this doesnt explain why depth writes work ... + */ +#define R300_SU_POLY_OFFSET_ENABLE 0x42B4 +# define R300_FRONT_ENABLE (1 << 0) +# define R300_BACK_ENABLE (1 << 1) +# define R300_PARA_ENABLE (1 << 2) + +#define R300_SU_CULL_MODE 0x42B8 +# define R300_CULL_FRONT (1 << 0) +# define R300_CULL_BACK (1 << 1) +# define R300_FRONT_FACE_CCW (0 << 2) +# define R300_FRONT_FACE_CW (1 << 2) + +/* SU Depth Scale value */ +#define R300_SU_DEPTH_SCALE 0x42c0 +/* SU Depth Offset value */ +#define R300_SU_DEPTH_OFFSET 0x42c4 + +#define R300_SU_REG_DEST 0x42c8 +# define R300_RASTER_PIPE_SELECT_0 (1 << 0) +# define R300_RASTER_PIPE_SELECT_1 (1 << 1) +# define R300_RASTER_PIPE_SELECT_2 (1 << 2) +# define R300_RASTER_PIPE_SELECT_3 (1 << 3) +# define R300_RASTER_PIPE_SELECT_ALL 0xf + + +/* BEGIN: Rasterization / Interpolators - many guesses */ + +/* + * TC_CNT is the number of incoming texture coordinate sets (i.e. it depends + * on the vertex program, *not* the fragment program) + */ +#define R300_RS_COUNT 0x4300 +# define R300_IT_COUNT_SHIFT 0 +# define R300_IT_COUNT_MASK 0x0000007f +# define R300_IC_COUNT_SHIFT 7 +# define R300_IC_COUNT_MASK 0x00000780 +# define R300_W_ADDR_SHIFT 12 +# define R300_W_ADDR_MASK 0x0003f000 +# define R300_HIRES_DIS (0 << 18) +# define R300_HIRES_EN (1 << 18) + +#define R300_RS_INST_COUNT 0x4304 +# define R300_RS_INST_COUNT_SHIFT 0 +# define R300_RS_INST_COUNT_MASK 0x0000000f +# define R300_RS_TX_OFFSET_SHIFT 5 +# define R300_RS_TX_OFFSET_MASK 0x000000e0 + +/* gap */ + +/* Only used for texture coordinates. + * Use the source field to route texture coordinate input from the + * vertex program to the desired interpolator. Note that the source + * field is relative to the outputs the vertex program *actually* + * writes. If a vertex program only writes texcoord[1], this will + * be source index 0. + * Set INTERP_USED on all interpolators that produce data used by + * the fragment program. INTERP_USED looks like a swizzling mask, + * but I haven't seen it used that way. + * + * Note: The _UNKNOWN constants are always set in their respective + * register. I don't know if this is necessary. + */ +#define R300_RS_IP_0 0x4310 +#define R300_RS_IP_1 0x4314 +#define R300_RS_IP_2 0x4318 +#define R300_RS_IP_3 0x431C +# define R300_RS_INTERP_SRC_SHIFT 2 /* TODO: check for removal */ +# define R300_RS_INTERP_SRC_MASK (7 << 2) /* TODO: check for removal */ +# define R300_RS_TEX_PTR(x) ((x) << 0) +# define R300_RS_COL_PTR(x) ((x) << 6) +# define R300_RS_COL_FMT(x) ((x) << 9) +# define R300_RS_COL_FMT_RGBA 0 +# define R300_RS_COL_FMT_RGB0 1 +# define R300_RS_COL_FMT_RGB1 2 +# define R300_RS_COL_FMT_000A 4 +# define R300_RS_COL_FMT_0000 5 +# define R300_RS_COL_FMT_0001 6 +# define R300_RS_COL_FMT_111A 8 +# define R300_RS_COL_FMT_1110 9 +# define R300_RS_COL_FMT_1111 10 +# define R300_RS_SEL_S(x) ((x) << 13) +# define R300_RS_SEL_T(x) ((x) << 16) +# define R300_RS_SEL_R(x) ((x) << 19) +# define R300_RS_SEL_Q(x) ((x) << 22) +# define R300_RS_SEL_C0 0 +# define R300_RS_SEL_C1 1 +# define R300_RS_SEL_C2 2 +# define R300_RS_SEL_C3 3 +# define R300_RS_SEL_K0 4 +# define R300_RS_SEL_K1 5 + + +/* */ +#define R500_RS_INST_0 0x4320 +#define R500_RS_INST_1 0x4324 +#define R500_RS_INST_2 0x4328 +#define R500_RS_INST_3 0x432c +#define R500_RS_INST_4 0x4330 +#define R500_RS_INST_5 0x4334 +#define R500_RS_INST_6 0x4338 +#define R500_RS_INST_7 0x433c +#define R500_RS_INST_8 0x4340 +#define R500_RS_INST_9 0x4344 +#define R500_RS_INST_10 0x4348 +#define R500_RS_INST_11 0x434c +#define R500_RS_INST_12 0x4350 +#define R500_RS_INST_13 0x4354 +#define R500_RS_INST_14 0x4358 +#define R500_RS_INST_15 0x435c +#define R500_RS_INST_TEX_ID_SHIFT 0 +#define R500_RS_INST_TEX_CN_WRITE (1 << 4) +#define R500_RS_INST_TEX_ADDR_SHIFT 5 +#define R500_RS_INST_COL_ID_SHIFT 12 +#define R500_RS_INST_COL_CN_NO_WRITE (0 << 16) +#define R500_RS_INST_COL_CN_WRITE (1 << 16) +#define R500_RS_INST_COL_CN_WRITE_FBUFFER (2 << 16) +#define R500_RS_INST_COL_CN_WRITE_BACKFACE (3 << 16) +#define R500_RS_INST_COL_ADDR_SHIFT 18 +#define R500_RS_INST_TEX_ADJ (1 << 25) +#define R500_RS_INST_W_CN (1 << 26) +#define R500_RS_INST_TEX_ID(x) ((x) << R500_RS_INST_TEX_ID_SHIFT) +#define R500_RS_INST_TEX_ADDR(x) ((x) << R500_RS_INST_TEX_ADDR_SHIFT) +#define R500_RS_INST_COL_ID(x) ((x) << R500_RS_INST_COL_ID_SHIFT) +#define R500_RS_INST_COL_ADDR(x) ((x) << R500_RS_INST_COL_ADDR_SHIFT) + +/* These DWORDs control how vertex data is routed into fragment program + * registers, after interpolators. + */ +#define R300_RS_INST_0 0x4330 +#define R300_RS_INST_1 0x4334 +#define R300_RS_INST_2 0x4338 +#define R300_RS_INST_3 0x433C /* GUESS */ +#define R300_RS_INST_4 0x4340 /* GUESS */ +#define R300_RS_INST_5 0x4344 /* GUESS */ +#define R300_RS_INST_6 0x4348 /* GUESS */ +#define R300_RS_INST_7 0x434C /* GUESS */ +# define R300_RS_INST_TEX_ID(x) ((x) << 0) +# define R300_RS_INST_TEX_CN_WRITE (1 << 3) +# define R300_RS_INST_TEX_ADDR_SHIFT 6 +# define R300_RS_INST_TEX_ADDR(x) ((x) << R300_RS_INST_TEX_ADDR_SHIFT) +# define R300_RS_INST_COL_ID(x) ((x) << 11) +# define R300_RS_INST_COL_CN_WRITE (1 << 14) +# define R300_RS_INST_COL_ADDR_SHIFT 17 +# define R300_RS_INST_COL_ADDR(x) ((x) << R300_RS_INST_COL_ADDR_SHIFT) +# define R300_RS_INST_TEX_ADJ (1 << 22) +# define R300_RS_COL_BIAS_UNUSED_SHIFT 23 + +/* END: Rasterization / Interpolators - many guesses */ + +/* Hierarchical Z Enable */ +#define R300_SC_HYPERZ 0x43a4 +# define R300_SC_HYPERZ_DISABLE (0 << 0) +# define R300_SC_HYPERZ_ENABLE (1 << 0) +# define R300_SC_HYPERZ_MIN (0 << 1) +# define R300_SC_HYPERZ_MAX (1 << 1) +# define R300_SC_HYPERZ_ADJ_256 (0 << 2) +# define R300_SC_HYPERZ_ADJ_128 (1 << 2) +# define R300_SC_HYPERZ_ADJ_64 (2 << 2) +# define R300_SC_HYPERZ_ADJ_32 (3 << 2) +# define R300_SC_HYPERZ_ADJ_16 (4 << 2) +# define R300_SC_HYPERZ_ADJ_8 (5 << 2) +# define R300_SC_HYPERZ_ADJ_4 (6 << 2) +# define R300_SC_HYPERZ_ADJ_2 (7 << 2) +# define R300_SC_HYPERZ_HZ_Z0MIN_NO (0 << 5) +# define R300_SC_HYPERZ_HZ_Z0MIN (1 << 5) +# define R300_SC_HYPERZ_HZ_Z0MAX_NO (0 << 6) +# define R300_SC_HYPERZ_HZ_Z0MAX (1 << 6) + +#define R300_SC_EDGERULE 0x43a8 + +/* BEGIN: Scissors and cliprects */ + +/* There are four clipping rectangles. Their corner coordinates are inclusive. + * Every pixel is assigned a number from 0 and 15 by setting bits 0-3 depending + * on whether the pixel is inside cliprects 0-3, respectively. For example, + * if a pixel is inside cliprects 0 and 1, but outside 2 and 3, it is assigned + * the number 3 (binary 0011). + * Iff the bit corresponding to the pixel's number in RE_CLIPRECT_CNTL is set, + * the pixel is rasterized. + * + * In addition to this, there is a scissors rectangle. Only pixels inside the + * scissors rectangle are drawn. (coordinates are inclusive) + * + * For some reason, the top-left corner of the framebuffer is at (1440, 1440) + * for the purpose of clipping and scissors. + */ +#define R300_SC_CLIPRECT_TL_0 0x43B0 +#define R300_SC_CLIPRECT_BR_0 0x43B4 +#define R300_SC_CLIPRECT_TL_1 0x43B8 +#define R300_SC_CLIPRECT_BR_1 0x43BC +#define R300_SC_CLIPRECT_TL_2 0x43C0 +#define R300_SC_CLIPRECT_BR_2 0x43C4 +#define R300_SC_CLIPRECT_TL_3 0x43C8 +#define R300_SC_CLIPRECT_BR_3 0x43CC +# define R300_CLIPRECT_OFFSET 1440 +# define R300_CLIPRECT_MASK 0x1FFF +# define R300_CLIPRECT_X_SHIFT 0 +# define R300_CLIPRECT_X_MASK (0x1FFF << 0) +# define R300_CLIPRECT_Y_SHIFT 13 +# define R300_CLIPRECT_Y_MASK (0x1FFF << 13) +#define R300_SC_CLIP_RULE 0x43D0 +# define R300_CLIP_OUT (1 << 0) +# define R300_CLIP_0 (1 << 1) +# define R300_CLIP_1 (1 << 2) +# define R300_CLIP_10 (1 << 3) +# define R300_CLIP_2 (1 << 4) +# define R300_CLIP_20 (1 << 5) +# define R300_CLIP_21 (1 << 6) +# define R300_CLIP_210 (1 << 7) +# define R300_CLIP_3 (1 << 8) +# define R300_CLIP_30 (1 << 9) +# define R300_CLIP_31 (1 << 10) +# define R300_CLIP_310 (1 << 11) +# define R300_CLIP_32 (1 << 12) +# define R300_CLIP_320 (1 << 13) +# define R300_CLIP_321 (1 << 14) +# define R300_CLIP_3210 (1 << 15) + +/* gap */ + +#define R300_SC_SCISSORS_TL 0x43E0 +#define R300_SC_SCISSORS_BR 0x43E4 +# define R300_SCISSORS_OFFSET 1440 +# define R300_SCISSORS_X_SHIFT 0 +# define R300_SCISSORS_X_MASK (0x1FFF << 0) +# define R300_SCISSORS_Y_SHIFT 13 +# define R300_SCISSORS_Y_MASK (0x1FFF << 13) + +/* Screen door sample mask */ +#define R300_SC_SCREENDOOR 0x43e8 + +/* END: Scissors and cliprects */ + +/* BEGIN: Texture specification */ + +/* + * The texture specification dwords are grouped by meaning and not by texture + * unit. This means that e.g. the offset for texture image unit N is found in + * register TX_OFFSET_0 + (4*N) + */ +#define R300_TX_FILTER0_0 0x4400 +#define R300_TX_FILTER0_1 0x4404 +#define R300_TX_FILTER0_2 0x4408 +#define R300_TX_FILTER0_3 0x440c +#define R300_TX_FILTER0_4 0x4410 +#define R300_TX_FILTER0_5 0x4414 +#define R300_TX_FILTER0_6 0x4418 +#define R300_TX_FILTER0_7 0x441c +#define R300_TX_FILTER0_8 0x4420 +#define R300_TX_FILTER0_9 0x4424 +#define R300_TX_FILTER0_10 0x4428 +#define R300_TX_FILTER0_11 0x442c +#define R300_TX_FILTER0_12 0x4430 +#define R300_TX_FILTER0_13 0x4434 +#define R300_TX_FILTER0_14 0x4438 +#define R300_TX_FILTER0_15 0x443c +# define R300_TX_REPEAT 0 +# define R300_TX_MIRRORED 1 +# define R300_TX_CLAMP_TO_EDGE 2 +# define R300_TX_MIRROR_ONCE_TO_EDGE 3 +# define R300_TX_CLAMP 4 +# define R300_TX_MIRROR_ONCE 5 +# define R300_TX_CLAMP_TO_BORDER 6 +# define R300_TX_MIRROR_ONCE_TO_BORDER 7 +# define R300_TX_WRAP_S_SHIFT 0 +# define R300_TX_WRAP_S_MASK (7 << 0) +# define R300_TX_WRAP_T_SHIFT 3 +# define R300_TX_WRAP_T_MASK (7 << 3) +# define R300_TX_WRAP_R_SHIFT 6 +# define R300_TX_WRAP_R_MASK (7 << 6) +# define R300_TX_MAG_FILTER_4 (0 << 9) +# define R300_TX_MAG_FILTER_NEAREST (1 << 9) +# define R300_TX_MAG_FILTER_LINEAR (2 << 9) +# define R300_TX_MAG_FILTER_ANISO (3 << 9) +# define R300_TX_MAG_FILTER_MASK (3 << 9) +# define R300_TX_MIN_FILTER_NEAREST (1 << 11) +# define R300_TX_MIN_FILTER_LINEAR (2 << 11) +# define R300_TX_MIN_FILTER_ANISO (3 << 11) +# define R300_TX_MIN_FILTER_MASK (3 << 11) +# define R300_TX_MIN_FILTER_MIP_NONE (0 << 13) +# define R300_TX_MIN_FILTER_MIP_NEAREST (1 << 13) +# define R300_TX_MIN_FILTER_MIP_LINEAR (2 << 13) +# define R300_TX_MIN_FILTER_MIP_MASK (3 << 13) +# define R300_TX_MAX_ANISO_1_TO_1 (0 << 21) +# define R300_TX_MAX_ANISO_2_TO_1 (1 << 21) +# define R300_TX_MAX_ANISO_4_TO_1 (2 << 21) +# define R300_TX_MAX_ANISO_8_TO_1 (3 << 21) +# define R300_TX_MAX_ANISO_16_TO_1 (4 << 21) +# define R300_TX_MAX_ANISO_MASK (7 << 21) + +#define R300_TX_FILTER1_0 0x4440 +# define R300_CHROMA_KEY_MODE_DISABLE 0 +# define R300_CHROMA_KEY_FORCE 1 +# define R300_CHROMA_KEY_BLEND 2 +# define R300_MC_ROUND_NORMAL (0<<2) +# define R300_MC_ROUND_MPEG4 (1<<2) +# define R300_LOD_BIAS_SHIFT 3 +# define R300_LOD_BIAS_MASK 0x1ff8 +# define R300_EDGE_ANISO_EDGE_DIAG (0<<13) +# define R300_EDGE_ANISO_EDGE_ONLY (1<<13) +# define R300_MC_COORD_TRUNCATE_DISABLE (0<<14) +# define R300_MC_COORD_TRUNCATE_MPEG (1<<14) +# define R300_TX_TRI_PERF_0_8 (0<<15) +# define R300_TX_TRI_PERF_1_8 (1<<15) +# define R300_TX_TRI_PERF_1_4 (2<<15) +# define R300_TX_TRI_PERF_3_8 (3<<15) +# define R300_ANISO_THRESHOLD_MASK (7<<17) + +# define R500_MACRO_SWITCH (1<<22) +# define R500_BORDER_FIX (1<<31) + +#define R300_TX_SIZE_0 0x4480 +# define R300_TX_WIDTHMASK_SHIFT 0 +# define R300_TX_WIDTHMASK_MASK (2047 << 0) +# define R300_TX_HEIGHTMASK_SHIFT 11 +# define R300_TX_HEIGHTMASK_MASK (2047 << 11) +# define R300_TX_DEPTHMASK_SHIFT 22 +# define R300_TX_DEPTHMASK_MASK (0xf << 22) +# define R300_TX_MAX_MIP_LEVEL_SHIFT 26 +# define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 26) +# define R300_TX_SIZE_PROJECTED (1<<30) +# define R300_TX_SIZE_TXPITCH_EN (1<<31) +#define R300_TX_FORMAT_0 0x44C0 + /* The interpretation of the format word by Wladimir van der Laan */ + /* The X, Y, Z and W refer to the layout of the components. + They are given meanings as R, G, B and Alpha by the swizzle + specification */ +# define R300_TX_FORMAT_X8 0x0 +# define R500_TX_FORMAT_X1 0x0 // bit set in format 2 +# define R300_TX_FORMAT_X16 0x1 +# define R500_TX_FORMAT_X1_REV 0x0 // bit set in format 2 +# define R300_TX_FORMAT_Y4X4 0x2 +# define R300_TX_FORMAT_Y8X8 0x3 +# define R300_TX_FORMAT_Y16X16 0x4 +# define R300_TX_FORMAT_Z3Y3X2 0x5 +# define R300_TX_FORMAT_Z5Y6X5 0x6 +# define R300_TX_FORMAT_Z6Y5X5 0x7 +# define R300_TX_FORMAT_Z11Y11X10 0x8 +# define R300_TX_FORMAT_Z10Y11X11 0x9 +# define R300_TX_FORMAT_W4Z4Y4X4 0xA +# define R300_TX_FORMAT_W1Z5Y5X5 0xB +# define R300_TX_FORMAT_W8Z8Y8X8 0xC +# define R300_TX_FORMAT_W2Z10Y10X10 0xD +# define R300_TX_FORMAT_W16Z16Y16X16 0xE +# define R300_TX_FORMAT_DXT1 0xF +# define R300_TX_FORMAT_DXT3 0x10 +# define R300_TX_FORMAT_DXT5 0x11 +# define R300_TX_FORMAT_D3DMFT_CxV8U8 0x12 /* no swizzle */ +# define R300_TX_FORMAT_A8R8G8B8 0x13 /* no swizzle */ +# define R300_TX_FORMAT_B8G8_B8G8 0x14 /* no swizzle */ +# define R300_TX_FORMAT_G8R8_G8B8 0x15 /* no swizzle */ + + /* These two values are wrong, but they're the only values that + * produce any even vaguely correct results. Can r300 only do 16-bit + * depth textures? + */ +# define R300_TX_FORMAT_X24_Y8 0x1e +# define R300_TX_FORMAT_X32 0x1e + + /* 0x16 - some 16 bit green format.. ?? */ +# define R300_TX_FORMAT_3D (1 << 25) +# define R300_TX_FORMAT_CUBIC_MAP (2 << 25) + +# define R300_TX_FORMAT_GAMMA (1 << 21) + + /* gap */ + /* Floating point formats */ + /* Note - hardware supports both 16 and 32 bit floating point */ +# define R300_TX_FORMAT_FL_I16 0x18 +# define R300_TX_FORMAT_FL_I16A16 0x19 +# define R300_TX_FORMAT_FL_R16G16B16A16 0x1A +# define R300_TX_FORMAT_FL_I32 0x1B +# define R300_TX_FORMAT_FL_I32A32 0x1C +# define R300_TX_FORMAT_FL_R32G32B32A32 0x1D + /* alpha modes, convenience mostly */ + /* if you have alpha, pick constant appropriate to the + number of channels (1 for I8, 2 for I8A8, 4 for R8G8B8A8, etc */ +# define R300_TX_FORMAT_ALPHA_1CH 0x000 +# define R300_TX_FORMAT_ALPHA_2CH 0x200 +# define R300_TX_FORMAT_ALPHA_4CH 0x600 +# define R300_TX_FORMAT_ALPHA_NONE 0xA00 + /* Swizzling */ + /* constants */ +# define R300_TX_FORMAT_X 0 +# define R300_TX_FORMAT_Y 1 +# define R300_TX_FORMAT_Z 2 +# define R300_TX_FORMAT_W 3 +# define R300_TX_FORMAT_ZERO 4 +# define R300_TX_FORMAT_ONE 5 + /* 2.0*Z, everything above 1.0 is set to 0.0 */ +# define R300_TX_FORMAT_CUT_Z 6 + /* 2.0*W, everything above 1.0 is set to 0.0 */ +# define R300_TX_FORMAT_CUT_W 7 + +# define R300_TX_FORMAT_B_SHIFT 18 +# define R300_TX_FORMAT_G_SHIFT 15 +# define R300_TX_FORMAT_R_SHIFT 12 +# define R300_TX_FORMAT_A_SHIFT 9 + /* Convenience macro to take care of layout and swizzling */ +# define R300_EASY_TX_FORMAT(B, G, R, A, FMT) ( \ + ((R300_TX_FORMAT_##B)<<R300_TX_FORMAT_B_SHIFT) \ + | ((R300_TX_FORMAT_##G)<<R300_TX_FORMAT_G_SHIFT) \ + | ((R300_TX_FORMAT_##R)<<R300_TX_FORMAT_R_SHIFT) \ + | ((R300_TX_FORMAT_##A)<<R300_TX_FORMAT_A_SHIFT) \ + | (R300_TX_FORMAT_##FMT) \ + ) + /* These can be ORed with result of R300_EASY_TX_FORMAT() + We don't really know what they do. Take values from a + constant color ? */ +# define R300_TX_FORMAT_CONST_X (1<<5) +# define R300_TX_FORMAT_CONST_Y (2<<5) +# define R300_TX_FORMAT_CONST_Z (4<<5) +# define R300_TX_FORMAT_CONST_W (8<<5) + +# define R300_TX_FORMAT_YUV_MODE 0x00800000 + +#define R300_TX_FORMAT2_0 0x4500 /* obvious missing in gap */ +# define R300_TX_PITCHMASK_SHIFT 0 +# define R300_TX_PITCHMASK_MASK (2047 << 0) +# define R500_TXFORMAT_MSB (1 << 14) +# define R500_TXWIDTH_BIT11 (1 << 15) +# define R500_TXHEIGHT_BIT11 (1 << 16) +# define R500_POW2FIX2FLT (1 << 17) +# define R500_SEL_FILTER4_TC0 (0 << 18) +# define R500_SEL_FILTER4_TC1 (1 << 18) +# define R500_SEL_FILTER4_TC2 (2 << 18) +# define R500_SEL_FILTER4_TC3 (3 << 18) + +#define R300_TX_OFFSET_0 0x4540 +#define R300_TX_OFFSET_1 0x4544 +#define R300_TX_OFFSET_2 0x4548 +#define R300_TX_OFFSET_3 0x454C +#define R300_TX_OFFSET_4 0x4550 +#define R300_TX_OFFSET_5 0x4554 +#define R300_TX_OFFSET_6 0x4558 +#define R300_TX_OFFSET_7 0x455C + /* BEGIN: Guess from R200 */ +# define R300_TXO_ENDIAN_NO_SWAP (0 << 0) +# define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0) +# define R300_TXO_ENDIAN_WORD_SWAP (2 << 0) +# define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0) +# define R300_TXO_MACRO_TILE (1 << 2) +# define R300_TXO_MICRO_TILE_LINEAR (0 << 3) +# define R300_TXO_MICRO_TILE (1 << 3) +# define R300_TXO_MICRO_TILE_SQUARE (2 << 3) +# define R300_TXO_OFFSET_MASK 0xffffffe0 +# define R300_TXO_OFFSET_SHIFT 5 + /* END: Guess from R200 */ + +/* 32 bit chroma key */ +#define R300_TX_CHROMA_KEY_0 0x4580 +#define R300_TX_CHROMA_KEY_1 0x4584 +#define R300_TX_CHROMA_KEY_2 0x4588 +#define R300_TX_CHROMA_KEY_3 0x458c +#define R300_TX_CHROMA_KEY_4 0x4590 +#define R300_TX_CHROMA_KEY_5 0x4594 +#define R300_TX_CHROMA_KEY_6 0x4598 +#define R300_TX_CHROMA_KEY_7 0x459c +#define R300_TX_CHROMA_KEY_8 0x45a0 +#define R300_TX_CHROMA_KEY_9 0x45a4 +#define R300_TX_CHROMA_KEY_10 0x45a8 +#define R300_TX_CHROMA_KEY_11 0x45ac +#define R300_TX_CHROMA_KEY_12 0x45b0 +#define R300_TX_CHROMA_KEY_13 0x45b4 +#define R300_TX_CHROMA_KEY_14 0x45b8 +#define R300_TX_CHROMA_KEY_15 0x45bc +/* ff00ff00 == { 0, 1.0, 0, 1.0 } */ + +/* Border Color */ +#define R300_TX_BORDER_COLOR_0 0x45c0 +#define R300_TX_BORDER_COLOR_1 0x45c4 +#define R300_TX_BORDER_COLOR_2 0x45c8 +#define R300_TX_BORDER_COLOR_3 0x45cc +#define R300_TX_BORDER_COLOR_4 0x45d0 +#define R300_TX_BORDER_COLOR_5 0x45d4 +#define R300_TX_BORDER_COLOR_6 0x45d8 +#define R300_TX_BORDER_COLOR_7 0x45dc +#define R300_TX_BORDER_COLOR_8 0x45e0 +#define R300_TX_BORDER_COLOR_9 0x45e4 +#define R300_TX_BORDER_COLOR_10 0x45e8 +#define R300_TX_BORDER_COLOR_11 0x45ec +#define R300_TX_BORDER_COLOR_12 0x45f0 +#define R300_TX_BORDER_COLOR_13 0x45f4 +#define R300_TX_BORDER_COLOR_14 0x45f8 +#define R300_TX_BORDER_COLOR_15 0x45fc + + +/* END: Texture specification */ + +/* BEGIN: Fragment program instruction set */ + +/* Fragment programs are written directly into register space. + * There are separate instruction streams for texture instructions and ALU + * instructions. + * In order to synchronize these streams, the program is divided into up + * to 4 nodes. Each node begins with a number of TEX operations, followed + * by a number of ALU operations. + * The first node can have zero TEX ops, all subsequent nodes must have at + * least + * one TEX ops. + * All nodes must have at least one ALU op. + * + * The index of the last node is stored in PFS_CNTL_0: A value of 0 means + * 1 node, a value of 3 means 4 nodes. + * The total amount of instructions is defined in PFS_CNTL_2. The offsets are + * offsets into the respective instruction streams, while *_END points to the + * last instruction relative to this offset. + */ +#define R300_US_CONFIG 0x4600 +# define R300_PFS_CNTL_LAST_NODES_SHIFT 0 +# define R300_PFS_CNTL_LAST_NODES_MASK (3 << 0) +# define R300_PFS_CNTL_FIRST_NODE_HAS_TEX (1 << 3) +#define R300_US_PIXSIZE 0x4604 +/* There is an unshifted value here which has so far always been equal to the + * index of the highest used temporary register. + */ +#define R300_US_CODE_OFFSET 0x4608 +# define R300_PFS_CNTL_ALU_OFFSET_SHIFT 0 +# define R300_PFS_CNTL_ALU_OFFSET_MASK (63 << 0) +# define R300_PFS_CNTL_ALU_END_SHIFT 6 +# define R300_PFS_CNTL_ALU_END_MASK (63 << 6) +# define R300_PFS_CNTL_TEX_OFFSET_SHIFT 13 +# define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 13) +# define R300_PFS_CNTL_TEX_END_SHIFT 18 +# define R300_PFS_CNTL_TEX_END_MASK (31 << 18) + +/* gap */ + +/* Nodes are stored backwards. The last active node is always stored in + * PFS_NODE_3. + * Example: In a 2-node program, NODE_0 and NODE_1 are set to 0. The + * first node is stored in NODE_2, the second node is stored in NODE_3. + * + * Offsets are relative to the master offset from PFS_CNTL_2. + */ +#define R300_US_CODE_ADDR_0 0x4610 +#define R300_US_CODE_ADDR_1 0x4614 +#define R300_US_CODE_ADDR_2 0x4618 +#define R300_US_CODE_ADDR_3 0x461C +# define R300_ALU_START_SHIFT 0 +# define R300_ALU_START_MASK (63 << 0) +# define R300_ALU_SIZE_SHIFT 6 +# define R300_ALU_SIZE_MASK (63 << 6) +# define R300_TEX_START_SHIFT 12 +# define R300_TEX_START_MASK (31 << 12) +# define R300_TEX_SIZE_SHIFT 17 +# define R300_TEX_SIZE_MASK (31 << 17) +# define R300_RGBA_OUT (1 << 22) +# define R300_W_OUT (1 << 23) + +/* TEX + * As far as I can tell, texture instructions cannot write into output + * registers directly. A subsequent ALU instruction is always necessary, + * even if it's just MAD o0, r0, 1, 0 + */ +#define R300_US_TEX_INST_0 0x4620 +# define R300_SRC_ADDR_SHIFT 0 +# define R300_SRC_ADDR_MASK (31 << 0) +# define R300_DST_ADDR_SHIFT 6 +# define R300_DST_ADDR_MASK (31 << 6) +# define R300_TEX_ID_SHIFT 11 +# define R300_TEX_ID_MASK (15 << 11) +# define R300_TEX_INST_SHIFT 15 +# define R300_TEX_OP_NOP 0 +# define R300_TEX_OP_LD 1 +# define R300_TEX_OP_KIL 2 +# define R300_TEX_OP_TXP 3 +# define R300_TEX_OP_TXB 4 +# define R300_TEX_INST_MASK (7 << 15) + +/* Output format from the unfied shader */ +#define R300_US_OUT_FMT 0x46A4 +# define R300_US_OUT_FMT_C4_8 (0 << 0) +# define R300_US_OUT_FMT_C4_10 (1 << 0) +# define R300_US_OUT_FMT_C4_10_GAMMA (2 << 0) +# define R300_US_OUT_FMT_C_16 (3 << 0) +# define R300_US_OUT_FMT_C2_16 (4 << 0) +# define R300_US_OUT_FMT_C4_16 (5 << 0) +# define R300_US_OUT_FMT_C_16_MPEG (6 << 0) +# define R300_US_OUT_FMT_C2_16_MPEG (7 << 0) +# define R300_US_OUT_FMT_C2_4 (8 << 0) +# define R300_US_OUT_FMT_C_3_3_2 (9 << 0) +# define R300_US_OUT_FMT_C_6_5_6 (10 << 0) +# define R300_US_OUT_FMT_C_11_11_10 (11 << 0) +# define R300_US_OUT_FMT_C_10_11_11 (12 << 0) +# define R300_US_OUT_FMT_C_2_10_10_10 (13 << 0) +/* reserved */ +# define R300_US_OUT_FMT_UNUSED (15 << 0) +# define R300_US_OUT_FMT_C_16_FP (16 << 0) +# define R300_US_OUT_FMT_C2_16_FP (17 << 0) +# define R300_US_OUT_FMT_C4_16_FP (18 << 0) +# define R300_US_OUT_FMT_C_32_FP (19 << 0) +# define R300_US_OUT_FMT_C2_32_FP (20 << 0) +# define R300_US_OUT_FMT_C4_32_FP (20 << 0) + +/* ALU + * The ALU instructions register blocks are enumerated according to the order + * in which fglrx. I assume there is space for 64 instructions, since + * each block has space for a maximum of 64 DWORDs, and this matches reported + * native limits. + * + * The basic functional block seems to be one MAD for each color and alpha, + * and an adder that adds all components after the MUL. + * - ADD, MUL, MAD etc.: use MAD with appropriate neutral operands + * - DP4: Use OUTC_DP4, OUTA_DP4 + * - DP3: Use OUTC_DP3, OUTA_DP4, appropriate alpha operands + * - DPH: Use OUTC_DP4, OUTA_DP4, appropriate alpha operands + * - CMPH: If ARG2 > 0.5, return ARG0, else return ARG1 + * - CMP: If ARG2 < 0, return ARG1, else return ARG0 + * - FLR: use FRC+MAD + * - XPD: use MAD+MAD + * - SGE, SLT: use MAD+CMP + * - RSQ: use ABS modifier for argument + * - Use OUTC_REPL_ALPHA to write results of an alpha-only operation + * (e.g. RCP) into color register + * - apparently, there's no quick DST operation + * - fglrx set FPI2_UNKNOWN_31 on a "MAD fragment.color, tmp0, tmp1, tmp2" + * - fglrx set FPI2_UNKNOWN_31 on a "MAX r2, r1, c0" + * - fglrx once set FPI0_UNKNOWN_31 on a "FRC r1, r1" + * + * Operand selection + * First stage selects three sources from the available registers and + * constant parameters. This is defined in INSTR1 (color) and INSTR3 (alpha). + * fglrx sorts the three source fields: Registers before constants, + * lower indices before higher indices; I do not know whether this is + * necessary. + * + * fglrx fills unused sources with "read constant 0" + * According to specs, you cannot select more than two different constants. + * + * Second stage selects the operands from the sources. This is defined in + * INSTR0 (color) and INSTR2 (alpha). You can also select the special constants + * zero and one. + * Swizzling and negation happens in this stage, as well. + * + * Important: Color and alpha seem to be mostly separate, i.e. their sources + * selection appears to be fully independent (the register storage is probably + * physically split into a color and an alpha section). + * However (because of the apparent physical split), there is some interaction + * WRT swizzling. If, for example, you want to load an R component into an + * Alpha operand, this R component is taken from a *color* source, not from + * an alpha source. The corresponding register doesn't even have to appear in + * the alpha sources list. (I hope this all makes sense to you) + * + * Destination selection + * The destination register index is in FPI1 (color) and FPI3 (alpha) + * together with enable bits. + * There are separate enable bits for writing into temporary registers + * (DSTC_REG_* /DSTA_REG) and program output registers (DSTC_OUTPUT_* + * /DSTA_OUTPUT). You can write to both at once, or not write at all (the + * same index must be used for both). + * + * Note: There is a special form for LRP + * - Argument order is the same as in ARB_fragment_program. + * - Operation is MAD + * - ARG1 is set to ARGC_SRC1C_LRP/ARGC_SRC1A_LRP + * - Set FPI0/FPI2_SPECIAL_LRP + * Arbitrary LRP (including support for swizzling) requires vanilla MAD+MAD + */ +#define R300_US_ALU_RGB_ADDR_0 0x46C0 +# define R300_ALU_SRC0C_SHIFT 0 +# define R300_ALU_SRC0C_MASK (31 << 0) +# define R300_ALU_SRC0C_CONST (1 << 5) +# define R300_ALU_SRC1C_SHIFT 6 +# define R300_ALU_SRC1C_MASK (31 << 6) +# define R300_ALU_SRC1C_CONST (1 << 11) +# define R300_ALU_SRC2C_SHIFT 12 +# define R300_ALU_SRC2C_MASK (31 << 12) +# define R300_ALU_SRC2C_CONST (1 << 17) +# define R300_ALU_SRC_MASK 0x0003ffff +# define R300_ALU_DSTC_SHIFT 18 +# define R300_ALU_DSTC_MASK (31 << 18) +# define R300_ALU_DSTC_REG_MASK_SHIFT 23 +# define R300_ALU_DSTC_REG_X (1 << 23) +# define R300_ALU_DSTC_REG_Y (1 << 24) +# define R300_ALU_DSTC_REG_Z (1 << 25) +# define R300_ALU_DSTC_OUTPUT_MASK_SHIFT 26 +# define R300_ALU_DSTC_OUTPUT_X (1 << 26) +# define R300_ALU_DSTC_OUTPUT_Y (1 << 27) +# define R300_ALU_DSTC_OUTPUT_Z (1 << 28) +# define R300_RGB_TARGET(x) ((x) << 29) + +#define R300_US_ALU_ALPHA_ADDR_0 0x47C0 +# define R300_ALU_SRC0A_SHIFT 0 +# define R300_ALU_SRC0A_MASK (31 << 0) +# define R300_ALU_SRC0A_CONST (1 << 5) +# define R300_ALU_SRC1A_SHIFT 6 +# define R300_ALU_SRC1A_MASK (31 << 6) +# define R300_ALU_SRC1A_CONST (1 << 11) +# define R300_ALU_SRC2A_SHIFT 12 +# define R300_ALU_SRC2A_MASK (31 << 12) +# define R300_ALU_SRC2A_CONST (1 << 17) +# define R300_ALU_SRC_MASK 0x0003ffff +# define R300_ALU_DSTA_SHIFT 18 +# define R300_ALU_DSTA_MASK (31 << 18) +# define R300_ALU_DSTA_REG (1 << 23) +# define R300_ALU_DSTA_OUTPUT (1 << 24) +# define R300_ALU_DSTA_DEPTH (1 << 27) +# define R300_ALPHA_TARGET(x) ((x) << 25) + +#define R300_US_ALU_RGB_INST_0 0x48C0 +# define R300_ALU_ARGC_SRC0C_XYZ 0 +# define R300_ALU_ARGC_SRC0C_XXX 1 +# define R300_ALU_ARGC_SRC0C_YYY 2 +# define R300_ALU_ARGC_SRC0C_ZZZ 3 +# define R300_ALU_ARGC_SRC1C_XYZ 4 +# define R300_ALU_ARGC_SRC1C_XXX 5 +# define R300_ALU_ARGC_SRC1C_YYY 6 +# define R300_ALU_ARGC_SRC1C_ZZZ 7 +# define R300_ALU_ARGC_SRC2C_XYZ 8 +# define R300_ALU_ARGC_SRC2C_XXX 9 +# define R300_ALU_ARGC_SRC2C_YYY 10 +# define R300_ALU_ARGC_SRC2C_ZZZ 11 +# define R300_ALU_ARGC_SRC0A 12 +# define R300_ALU_ARGC_SRC1A 13 +# define R300_ALU_ARGC_SRC2A 14 +# define R300_ALU_ARGC_SRCP_XYZ 15 +# define R300_ALU_ARGC_SRCP_XXX 16 +# define R300_ALU_ARGC_SRCP_YYY 17 +# define R300_ALU_ARGC_SRCP_ZZZ 18 +# define R300_ALU_ARGC_SRCP_WWW 19 +# define R300_ALU_ARGC_ZERO 20 +# define R300_ALU_ARGC_ONE 21 +# define R300_ALU_ARGC_HALF 22 +# define R300_ALU_ARGC_SRC0C_YZX 23 +# define R300_ALU_ARGC_SRC1C_YZX 24 +# define R300_ALU_ARGC_SRC2C_YZX 25 +# define R300_ALU_ARGC_SRC0C_ZXY 26 +# define R300_ALU_ARGC_SRC1C_ZXY 27 +# define R300_ALU_ARGC_SRC2C_ZXY 28 +# define R300_ALU_ARGC_SRC0CA_WZY 29 +# define R300_ALU_ARGC_SRC1CA_WZY 30 +# define R300_ALU_ARGC_SRC2CA_WZY 31 + +# define R300_ALU_ARG0C_SHIFT 0 +# define R300_ALU_ARG0C_MASK (31 << 0) +# define R300_ALU_ARG0C_NOP (0 << 5) +# define R300_ALU_ARG0C_NEG (1 << 5) +# define R300_ALU_ARG0C_ABS (2 << 5) +# define R300_ALU_ARG0C_NAB (3 << 5) +# define R300_ALU_ARG1C_SHIFT 7 +# define R300_ALU_ARG1C_MASK (31 << 7) +# define R300_ALU_ARG1C_NOP (0 << 12) +# define R300_ALU_ARG1C_NEG (1 << 12) +# define R300_ALU_ARG1C_ABS (2 << 12) +# define R300_ALU_ARG1C_NAB (3 << 12) +# define R300_ALU_ARG2C_SHIFT 14 +# define R300_ALU_ARG2C_MASK (31 << 14) +# define R300_ALU_ARG2C_NOP (0 << 19) +# define R300_ALU_ARG2C_NEG (1 << 19) +# define R300_ALU_ARG2C_ABS (2 << 19) +# define R300_ALU_ARG2C_NAB (3 << 19) +# define R300_ALU_SRCP_1_MINUS_2_SRC0 (0 << 21) +# define R300_ALU_SRCP_SRC1_MINUS_SRC0 (1 << 21) +# define R300_ALU_SRCP_SRC1_PLUS_SRC0 (2 << 21) +# define R300_ALU_SRCP_1_MINUS_SRC0 (3 << 21) + +# define R300_ALU_OUTC_MAD (0 << 23) +# define R300_ALU_OUTC_DP3 (1 << 23) +# define R300_ALU_OUTC_DP4 (2 << 23) +# define R300_ALU_OUTC_D2A (3 << 23) +# define R300_ALU_OUTC_MIN (4 << 23) +# define R300_ALU_OUTC_MAX (5 << 23) +# define R300_ALU_OUTC_CMPH (7 << 23) +# define R300_ALU_OUTC_CMP (8 << 23) +# define R300_ALU_OUTC_FRC (9 << 23) +# define R300_ALU_OUTC_REPL_ALPHA (10 << 23) + +# define R300_ALU_OUTC_MOD_NOP (0 << 27) +# define R300_ALU_OUTC_MOD_MUL2 (1 << 27) +# define R300_ALU_OUTC_MOD_MUL4 (2 << 27) +# define R300_ALU_OUTC_MOD_MUL8 (3 << 27) +# define R300_ALU_OUTC_MOD_DIV2 (4 << 27) +# define R300_ALU_OUTC_MOD_DIV4 (5 << 27) +# define R300_ALU_OUTC_MOD_DIV8 (6 << 27) + +# define R300_ALU_OUTC_CLAMP (1 << 30) +# define R300_ALU_INSERT_NOP (1 << 31) + +#define R300_US_ALU_ALPHA_INST_0 0x49C0 +# define R300_ALU_ARGA_SRC0C_X 0 +# define R300_ALU_ARGA_SRC0C_Y 1 +# define R300_ALU_ARGA_SRC0C_Z 2 +# define R300_ALU_ARGA_SRC1C_X 3 +# define R300_ALU_ARGA_SRC1C_Y 4 +# define R300_ALU_ARGA_SRC1C_Z 5 +# define R300_ALU_ARGA_SRC2C_X 6 +# define R300_ALU_ARGA_SRC2C_Y 7 +# define R300_ALU_ARGA_SRC2C_Z 8 +# define R300_ALU_ARGA_SRC0A 9 +# define R300_ALU_ARGA_SRC1A 10 +# define R300_ALU_ARGA_SRC2A 11 +# define R300_ALU_ARGA_SRCP_X 12 +# define R300_ALU_ARGA_SRCP_Y 13 +# define R300_ALU_ARGA_SRCP_Z 14 +# define R300_ALU_ARGA_SRCP_W 15 + +# define R300_ALU_ARGA_ZERO 16 +# define R300_ALU_ARGA_ONE 17 +# define R300_ALU_ARGA_HALF 18 +# define R300_ALU_ARG0A_SHIFT 0 +# define R300_ALU_ARG0A_MASK (31 << 0) +# define R300_ALU_ARG0A_NOP (0 << 5) +# define R300_ALU_ARG0A_NEG (1 << 5) +# define R300_ALU_ARG0A_ABS (2 << 5) +# define R300_ALU_ARG0A_NAB (3 << 5) +# define R300_ALU_ARG1A_SHIFT 7 +# define R300_ALU_ARG1A_MASK (31 << 7) +# define R300_ALU_ARG1A_NOP (0 << 12) +# define R300_ALU_ARG1A_NEG (1 << 12) +# define R300_ALU_ARG1A_ABS (2 << 12) +# define R300_ALU_ARG1A_NAB (3 << 12) +# define R300_ALU_ARG2A_SHIFT 14 +# define R300_ALU_ARG2A_MASK (31 << 14) +# define R300_ALU_ARG2A_NOP (0 << 19) +# define R300_ALU_ARG2A_NEG (1 << 19) +# define R300_ALU_ARG2A_ABS (2 << 19) +# define R300_ALU_ARG2A_NAB (3 << 19) +# define R300_ALU_SRCP_1_MINUS_2_SRC0 (0 << 21) +# define R300_ALU_SRCP_SRC1_MINUS_SRC0 (1 << 21) +# define R300_ALU_SRCP_SRC1_PLUS_SRC0 (2 << 21) +# define R300_ALU_SRCP_1_MINUS_SRC0 (3 << 21) + +# define R300_ALU_OUTA_MAD (0 << 23) +# define R300_ALU_OUTA_DP4 (1 << 23) +# define R300_ALU_OUTA_MIN (2 << 23) +# define R300_ALU_OUTA_MAX (3 << 23) +# define R300_ALU_OUTA_CND (5 << 23) +# define R300_ALU_OUTA_CMP (6 << 23) +# define R300_ALU_OUTA_FRC (7 << 23) +# define R300_ALU_OUTA_EX2 (8 << 23) +# define R300_ALU_OUTA_LG2 (9 << 23) +# define R300_ALU_OUTA_RCP (10 << 23) +# define R300_ALU_OUTA_RSQ (11 << 23) + +# define R300_ALU_OUTA_MOD_NOP (0 << 27) +# define R300_ALU_OUTA_MOD_MUL2 (1 << 27) +# define R300_ALU_OUTA_MOD_MUL4 (2 << 27) +# define R300_ALU_OUTA_MOD_MUL8 (3 << 27) +# define R300_ALU_OUTA_MOD_DIV2 (4 << 27) +# define R300_ALU_OUTA_MOD_DIV4 (5 << 27) +# define R300_ALU_OUTA_MOD_DIV8 (6 << 27) + +# define R300_ALU_OUTA_CLAMP (1 << 30) +/* END: Fragment program instruction set */ + +/* Fog: Fog Blending Enable */ +#define R300_FG_FOG_BLEND 0x4bc0 +# define R300_FG_FOG_BLEND_DISABLE (0 << 0) +# define R300_FG_FOG_BLEND_ENABLE (1 << 0) +# define R300_FG_FOG_BLEND_FN_LINEAR (0 << 1) +# define R300_FG_FOG_BLEND_FN_EXP (1 << 1) +# define R300_FG_FOG_BLEND_FN_EXP2 (2 << 1) +# define R300_FG_FOG_BLEND_FN_CONSTANT (3 << 1) +# define R300_FG_FOG_BLEND_FN_MASK (3 << 1) + +/* Fog: Red Component of Fog Color */ +#define R300_FG_FOG_COLOR_R 0x4bc8 +/* Fog: Green Component of Fog Color */ +#define R300_FG_FOG_COLOR_G 0x4bcc +/* Fog: Blue Component of Fog Color */ +#define R300_FG_FOG_COLOR_B 0x4bd0 +# define R300_FG_FOG_COLOR_MASK 0x000003ff + +/* Fog: Constant Factor for Fog Blending */ +#define R300_FG_FOG_FACTOR 0x4bc4 +# define FG_FOG_FACTOR_MASK 0x000003ff + +/* Fog: Alpha function */ +#define R300_FG_ALPHA_FUNC 0x4bd4 +# define R300_FG_ALPHA_FUNC_VAL_MASK 0x000000ff +# define R300_FG_ALPHA_FUNC_NEVER (0 << 8) +# define R300_FG_ALPHA_FUNC_LESS (1 << 8) +# define R300_FG_ALPHA_FUNC_EQUAL (2 << 8) +# define R300_FG_ALPHA_FUNC_LE (3 << 8) +# define R300_FG_ALPHA_FUNC_GREATER (4 << 8) +# define R300_FG_ALPHA_FUNC_NOTEQUAL (5 << 8) +# define R300_FG_ALPHA_FUNC_GE (6 << 8) +# define R300_FG_ALPHA_FUNC_ALWAYS (7 << 8) +# define R300_ALPHA_TEST_OP_MASK (7 << 8) +# define R300_FG_ALPHA_FUNC_DISABLE (0 << 11) +# define R300_FG_ALPHA_FUNC_ENABLE (1 << 11) + +# define R500_FG_ALPHA_FUNC_10BIT (0 << 12) +# define R500_FG_ALPHA_FUNC_8BIT (1 << 12) + +# define R300_FG_ALPHA_FUNC_MASK_DISABLE (0 << 16) +# define R300_FG_ALPHA_FUNC_MASK_ENABLE (1 << 16) +# define R300_FG_ALPHA_FUNC_CFG_2_OF_4 (0 << 17) +# define R300_FG_ALPHA_FUNC_CFG_3_OF_6 (1 << 17) + +# define R300_FG_ALPHA_FUNC_DITH_DISABLE (0 << 20) +# define R300_FG_ALPHA_FUNC_DITH_ENABLE (1 << 20) + +# define R500_FG_ALPHA_FUNC_OFFSET_DISABLE (0 << 24) +# define R500_FG_ALPHA_FUNC_OFFSET_ENABLE (1 << 24) /* Not supported in R520 */ +# define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_DISABLE (0 << 25) +# define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_ENABLE (1 << 25) + +# define R500_FG_ALPHA_FUNC_FP16_DISABLE (0 << 28) +# define R500_FG_ALPHA_FUNC_FP16_ENABLE (1 << 28) + + +/* Fog: Where does the depth come from? */ +#define R300_FG_DEPTH_SRC 0x4bd8 +# define R300_FG_DEPTH_SRC_SCAN (0 << 0) +# define R300_FG_DEPTH_SRC_SHADER (1 << 0) + +/* Fog: Alpha Compare Value */ +#define R500_FG_ALPHA_VALUE 0x4be0 +# define R500_FG_ALPHA_VALUE_MASK 0x0000ffff + +#define RV530_FG_ZBREG_DEST 0x4be8 +# define RV530_FG_ZBREG_DEST_PIPE_SELECT_0 (1 << 0) +# define RV530_FG_ZBREG_DEST_PIPE_SELECT_1 (1 << 1) +# define RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL (3 << 0) + +/* gap */ + +/* Fragment program parameters in 7.16 floating point */ +#define R300_PFS_PARAM_0_X 0x4C00 +#define R300_PFS_PARAM_0_Y 0x4C04 +#define R300_PFS_PARAM_0_Z 0x4C08 +#define R300_PFS_PARAM_0_W 0x4C0C +/* last consts */ +#define R300_PFS_PARAM_31_X 0x4DF0 +#define R300_PFS_PARAM_31_Y 0x4DF4 +#define R300_PFS_PARAM_31_Z 0x4DF8 +#define R300_PFS_PARAM_31_W 0x4DFC + +/* Unpipelined. */ +#define R300_RB3D_CCTL 0x4e00 +# define R300_RB3D_CCTL_NUM_MULTIWRITES_1_BUFFER (0 << 5) +# define R300_RB3D_CCTL_NUM_MULTIWRITES_2_BUFFERS (1 << 5) +# define R300_RB3D_CCTL_NUM_MULTIWRITES_3_BUFFERS (2 << 5) +# define R300_RB3D_CCTL_NUM_MULTIWRITES_4_BUFFERS (3 << 5) +# define R300_RB3D_CCTL_CLRCMP_FLIPE_DISABLE (0 << 7) +# define R300_RB3D_CCTL_CLRCMP_FLIPE_ENABLE (1 << 7) +# define R300_RB3D_CCTL_AA_COMPRESSION_DISABLE (0 << 9) +# define R300_RB3D_CCTL_AA_COMPRESSION_ENABLE (1 << 9) +# define R300_RB3D_CCTL_CMASK_DISABLE (0 << 10) +# define R300_RB3D_CCTL_CMASK_ENABLE (1 << 10) +/* reserved */ +# define R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_DISABLE (0 << 12) +# define R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_ENABLE (1 << 12) +# define R300_RB3D_CCTL_WRITE_COMPRESSION_ENABLE (0 << 13) +# define R300_RB3D_CCTL_WRITE_COMPRESSION_DISABLE (1 << 13) +# define R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_DISABLE (0 << 14) +# define R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE (1 << 14) + + +/* Notes: + * - AFAIK fglrx always sets BLEND_UNKNOWN when blending is used in + * the application + * - AFAIK fglrx always sets BLEND_NO_SEPARATE when CBLEND and ABLEND + * are set to the same + * function (both registers are always set up completely in any case) + * - Most blend flags are simply copied from R200 and not tested yet + */ +#define R300_RB3D_CBLEND 0x4E04 +#define R300_RB3D_ABLEND 0x4E08 +/* the following only appear in CBLEND */ +# define R300_ALPHA_BLEND_ENABLE (1 << 0) +# define R300_SEPARATE_ALPHA_ENABLE (1 << 1) +# define R300_READ_ENABLE (1 << 2) +# define R300_DISCARD_SRC_PIXELS_DIS (0 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0 (1 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_0 (2 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0 (3 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1 (4 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_1 (5 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1 (6 << 3) + +/* the following are shared between CBLEND and ABLEND */ +# define R300_FCN_MASK (3 << 12) +# define R300_COMB_FCN_ADD_CLAMP (0 << 12) +# define R300_COMB_FCN_ADD_NOCLAMP (1 << 12) +# define R300_COMB_FCN_SUB_CLAMP (2 << 12) +# define R300_COMB_FCN_SUB_NOCLAMP (3 << 12) +# define R300_COMB_FCN_MIN (4 << 12) +# define R300_COMB_FCN_MAX (5 << 12) +# define R300_COMB_FCN_RSUB_CLAMP (6 << 12) +# define R300_COMB_FCN_RSUB_NOCLAMP (7 << 12) +# define R300_BLEND_GL_ZERO (32) +# define R300_BLEND_GL_ONE (33) +# define R300_BLEND_GL_SRC_COLOR (34) +# define R300_BLEND_GL_ONE_MINUS_SRC_COLOR (35) +# define R300_BLEND_GL_DST_COLOR (36) +# define R300_BLEND_GL_ONE_MINUS_DST_COLOR (37) +# define R300_BLEND_GL_SRC_ALPHA (38) +# define R300_BLEND_GL_ONE_MINUS_SRC_ALPHA (39) +# define R300_BLEND_GL_DST_ALPHA (40) +# define R300_BLEND_GL_ONE_MINUS_DST_ALPHA (41) +# define R300_BLEND_GL_SRC_ALPHA_SATURATE (42) +# define R300_BLEND_GL_CONST_COLOR (43) +# define R300_BLEND_GL_ONE_MINUS_CONST_COLOR (44) +# define R300_BLEND_GL_CONST_ALPHA (45) +# define R300_BLEND_GL_ONE_MINUS_CONST_ALPHA (46) +# define R300_BLEND_MASK (63) +# define R300_SRC_BLEND_SHIFT (16) +# define R300_DST_BLEND_SHIFT (24) + +/* Constant color used by the blender. Pipelined through the blender. + * Note: For R520, this field is ignored, use RB3D_CONSTANT_COLOR_GB__BLUE, + * RB3D_CONSTANT_COLOR_GB__GREEN, etc. instead. + */ +#define R300_RB3D_BLEND_COLOR 0x4E10 + + +/* 3D Color Channel Mask. If all the channels used in the current color format + * are disabled, then the cb will discard all the incoming quads. Pipelined + * through the blender. + */ +#define RB3D_COLOR_CHANNEL_MASK 0x4E0C +# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 (1 << 0) +# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 (1 << 1) +# define RB3D_COLOR_CHANNEL_MASK_RED_MASK0 (1 << 2) +# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 (1 << 3) +# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK1 (1 << 4) +# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK1 (1 << 5) +# define RB3D_COLOR_CHANNEL_MASK_RED_MASK1 (1 << 6) +# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK1 (1 << 7) +# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK2 (1 << 8) +# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK2 (1 << 9) +# define RB3D_COLOR_CHANNEL_MASK_RED_MASK2 (1 << 10) +# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK2 (1 << 11) +# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK3 (1 << 12) +# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK3 (1 << 13) +# define RB3D_COLOR_CHANNEL_MASK_RED_MASK3 (1 << 14) +# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK3 (1 << 15) + +/* Clear color that is used when the color mask is set to 00. Unpipelined. + * Program this register with a 32-bit value in ARGB8888 or ARGB2101010 + * formats, ignoring the fields. + */ +#define RB3D_COLOR_CLEAR_VALUE 0x4e14 + +/* gap */ + +/* Color Compare Color. Stalls the 2d/3d datapath until it is idle. */ +#define RB3D_CLRCMP_CLR 0x4e20 + +/* Color Compare Mask. Stalls the 2d/3d datapath until it is idle. */ +#define RB3D_CLRCMP_MSK 0x4e24 + +/* Color Buffer Address Offset of multibuffer 0. Unpipelined. */ +#define R300_RB3D_COLOROFFSET0 0x4E28 +# define R300_COLOROFFSET_MASK 0xFFFFFFE0 +/* Color Buffer Address Offset of multibuffer 1. Unpipelined. */ +#define R300_RB3D_COLOROFFSET1 0x4E2C +/* Color Buffer Address Offset of multibuffer 2. Unpipelined. */ +#define R300_RB3D_COLOROFFSET2 0x4E30 +/* Color Buffer Address Offset of multibuffer 3. Unpipelined. */ +#define R300_RB3D_COLOROFFSET3 0x4E34 + +/* Color buffer format and tiling control for all the multibuffers and the + * pitch of multibuffer 0 to 3. Unpipelined. The cache must be empty before any + * of the registers are changed. + * + * Bit 16: Larger tiles + * Bit 17: 4x2 tiles + * Bit 18: Extremely weird tile like, but some pixels duplicated? + */ +#define R300_RB3D_COLORPITCH0 0x4E38 +# define R300_COLORPITCH_MASK 0x00003FFE +# define R300_COLOR_TILE_DISABLE (0 << 16) +# define R300_COLOR_TILE_ENABLE (1 << 16) +# define R300_COLOR_MICROTILE_DISABLE (0 << 17) +# define R300_COLOR_MICROTILE_ENABLE (1 << 17) +# define R300_COLOR_MICROTILE_ENABLE_SQUARE (2 << 17) /* Only available in 16-bit */ +# define R300_COLOR_ENDIAN_NO_SWAP (0 << 19) +# define R300_COLOR_ENDIAN_WORD_SWAP (1 << 19) +# define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 19) +# define R300_COLOR_ENDIAN_HALF_DWORD_SWAP (3 << 19) +# define R500_COLOR_FORMAT_ARGB10101010 (0 << 21) +# define R500_COLOR_FORMAT_UV1010 (1 << 21) +# define R500_COLOR_FORMAT_CI8 (2 << 21) /* 2D only */ +# define R300_COLOR_FORMAT_ARGB1555 (3 << 21) +# define R300_COLOR_FORMAT_RGB565 (4 << 21) +# define R500_COLOR_FORMAT_ARGB2101010 (5 << 21) +# define R300_COLOR_FORMAT_ARGB8888 (6 << 21) +# define R300_COLOR_FORMAT_ARGB32323232 (7 << 21) +/* reserved */ +# define R300_COLOR_FORMAT_I8 (9 << 21) +# define R300_COLOR_FORMAT_ARGB16161616 (10 << 21) +# define R300_COLOR_FORMAT_VYUY (11 << 21) +# define R300_COLOR_FORMAT_YVYU (12 << 21) +# define R300_COLOR_FORMAT_UV88 (13 << 21) +# define R500_COLOR_FORMAT_I10 (14 << 21) +# define R300_COLOR_FORMAT_ARGB4444 (15 << 21) +#define R300_RB3D_COLORPITCH1 0x4E3C +#define R300_RB3D_COLORPITCH2 0x4E40 +#define R300_RB3D_COLORPITCH3 0x4E44 + +/* gap */ + +/* Destination Color Buffer Cache Control/Status. If the cb is in e2 mode, then + * a flush or free will not occur upon a write to this register, but a sync + * will be immediately sent if one is requested. If both DC_FLUSH and DC_FREE + * are zero but DC_FINISH is one, then a sync will be sent immediately -- the + * cb will not wait for all the previous operations to complete before sending + * the sync. Unpipelined except when DC_FINISH and DC_FREE are both set to + * zero. + * + * Set to 0A before 3D operations, set to 02 afterwards. + */ +#define R300_RB3D_DSTCACHE_CTLSTAT 0x4e4c +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT (0 << 0) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT_1 (1 << 0) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D (2 << 0) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D_1 (3 << 0) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT (0 << 2) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT_1 (1 << 2) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS (2 << 2) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS_1 (3 << 2) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_NO_SIGNAL (0 << 4) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL (1 << 4) + +#define R300_RB3D_DITHER_CTL 0x4E50 +# define R300_RB3D_DITHER_CTL_DITHER_MODE_TRUNCATE (0 << 0) +# define R300_RB3D_DITHER_CTL_DITHER_MODE_ROUND (1 << 0) +# define R300_RB3D_DITHER_CTL_DITHER_MODE_LUT (2 << 0) +/* reserved */ +# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_TRUNCATE (0 << 2) +# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_ROUND (1 << 2) +# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT (2 << 2) +/* reserved */ + +/* Resolve buffer destination address. The cache must be empty before changing + * this register if the cb is in resolve mode. Unpipelined + */ +#define R300_RB3D_AARESOLVE_OFFSET 0x4e80 +# define R300_RB3D_AARESOLVE_OFFSET_SHIFT 5 +# define R300_RB3D_AARESOLVE_OFFSET_MASK 0xffffffe0 /* At least according to the calculations of Christoph Brill */ + +/* Resolve Buffer Pitch and Tiling Control. The cache must be empty before + * changing this register if the cb is in resolve mode. Unpipelined + */ +#define R300_RB3D_AARESOLVE_PITCH 0x4e84 +# define R300_RB3D_AARESOLVE_PITCH_SHIFT 1 +# define R300_RB3D_AARESOLVE_PITCH_MASK 0x00003ffe /* At least according to the calculations of Christoph Brill */ + +/* Resolve Buffer Control. Unpipelined */ +#define R300_RB3D_AARESOLVE_CTL 0x4e88 +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_NORMAL (0 << 0) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE (1 << 0) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_10 (0 << 1) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_22 (1 << 1) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_SAMPLE0 (0 << 2) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE (1 << 2) + + +/* Discard src pixels less than or equal to threshold. */ +#define R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD 0x4ea0 +/* Discard src pixels greater than or equal to threshold. */ +#define R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD 0x4ea4 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_SHIFT 0 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_MASK 0x000000ff +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_SHIFT 8 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_MASK 0x0000ff00 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_SHIFT 16 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_MASK 0x00ff0000 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_SHIFT 24 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_MASK 0xff000000 + +/* 3D ROP Control. Stalls the 2d/3d datapath until it is idle. */ +#define R300_RB3D_ROPCNTL 0x4e18 +# define R300_RB3D_ROPCNTL_ROP_ENABLE 0x00000004 +# define R300_RB3D_ROPCNTL_ROP_MASK (15 << 8) +# define R300_RB3D_ROPCNTL_ROP_SHIFT 8 + +/* Color Compare Flip. Stalls the 2d/3d datapath until it is idle. */ +#define R300_RB3D_CLRCMP_FLIPE 0x4e1c + +/* Sets the fifo sizes */ +#define R500_RB3D_FIFO_SIZE 0x4ef4 +# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_FULL (0 << 0) +# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_HALF (1 << 0) +# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_QUATER (2 << 0) +# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_EIGTHS (3 << 0) + +/* Constant color used by the blender. Pipelined through the blender. */ +#define R500_RB3D_CONSTANT_COLOR_AR 0x4ef8 +# define R500_RB3D_CONSTANT_COLOR_AR_RED_MASK 0x0000ffff +# define R500_RB3D_CONSTANT_COLOR_AR_RED_SHIFT 0 +# define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_MASK 0xffff0000 +# define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_SHIFT 16 + +/* Constant color used by the blender. Pipelined through the blender. */ +#define R500_RB3D_CONSTANT_COLOR_GB 0x4efc +# define R500_RB3D_CONSTANT_COLOR_AR_BLUE_MASK 0x0000ffff +# define R500_RB3D_CONSTANT_COLOR_AR_BLUE_SHIFT 0 +# define R500_RB3D_CONSTANT_COLOR_AR_GREEN_MASK 0xffff0000 +# define R500_RB3D_CONSTANT_COLOR_AR_GREEN_SHIFT 16 + +/* gap */ +/* There seems to be no "write only" setting, so use Z-test = ALWAYS + * for this. + * Bit (1<<8) is the "test" bit. so plain write is 6 - vd + */ +#define R300_ZB_CNTL 0x4F00 +# define R300_STENCIL_ENABLE (1 << 0) +# define R300_Z_ENABLE (1 << 1) +# define R300_Z_WRITE_ENABLE (1 << 2) +# define R300_Z_SIGNED_COMPARE (1 << 3) +# define R300_STENCIL_FRONT_BACK (1 << 4) +# define R400_ZSIGNED_MAGNITUDE (1 << 5) +# define R500_STENCIL_REFMASK_FRONT_BACK (1 << 6) + +#define R300_ZB_ZSTENCILCNTL 0x4f04 + /* functions */ +# define R300_ZS_NEVER 0 +# define R300_ZS_LESS 1 +# define R300_ZS_LEQUAL 2 +# define R300_ZS_EQUAL 3 +# define R300_ZS_GEQUAL 4 +# define R300_ZS_GREATER 5 +# define R300_ZS_NOTEQUAL 6 +# define R300_ZS_ALWAYS 7 +# define R300_ZS_MASK 7 + /* operations */ +# define R300_ZS_KEEP 0 +# define R300_ZS_ZERO 1 +# define R300_ZS_REPLACE 2 +# define R300_ZS_INCR 3 +# define R300_ZS_DECR 4 +# define R300_ZS_INVERT 5 +# define R300_ZS_INCR_WRAP 6 +# define R300_ZS_DECR_WRAP 7 +# define R300_Z_FUNC_SHIFT 0 + /* front and back refer to operations done for front + and back faces, i.e. separate stencil function support */ +# define R300_S_FRONT_FUNC_SHIFT 3 +# define R300_S_FRONT_SFAIL_OP_SHIFT 6 +# define R300_S_FRONT_ZPASS_OP_SHIFT 9 +# define R300_S_FRONT_ZFAIL_OP_SHIFT 12 +# define R300_S_BACK_FUNC_SHIFT 15 +# define R300_S_BACK_SFAIL_OP_SHIFT 18 +# define R300_S_BACK_ZPASS_OP_SHIFT 21 +# define R300_S_BACK_ZFAIL_OP_SHIFT 24 + +#define R300_ZB_STENCILREFMASK 0x4f08 +# define R300_STENCILREF_SHIFT 0 +# define R300_STENCILREF_MASK 0x000000ff +# define R300_STENCILMASK_SHIFT 8 +# define R300_STENCILMASK_MASK 0x0000ff00 +# define R300_STENCILWRITEMASK_SHIFT 16 +# define R300_STENCILWRITEMASK_MASK 0x00ff0000 + +/* gap */ + +#define R300_ZB_FORMAT 0x4f10 +# define R300_DEPTHFORMAT_16BIT_INT_Z (0 << 0) +# define R300_DEPTHFORMAT_16BIT_13E3 (1 << 0) +# define R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL (2 << 0) +/* reserved up to (15 << 0) */ +# define R300_INVERT_13E3_LEADING_ONES (0 << 4) +# define R300_INVERT_13E3_LEADING_ZEROS (1 << 4) + +#define R300_ZB_ZTOP 0x4F14 +# define R300_ZTOP_DISABLE (0 << 0) +# define R300_ZTOP_ENABLE (1 << 0) + +/* gap */ + +#define R300_ZB_ZCACHE_CTLSTAT 0x4f18 +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_NO_EFFECT (0 << 0) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE (1 << 0) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_NO_EFFECT (0 << 1) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE (1 << 1) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_IDLE (0 << 31) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_BUSY (1 << 31) + +#define R300_ZB_BW_CNTL 0x4f1c +# define R300_HIZ_DISABLE (0 << 0) +# define R300_HIZ_ENABLE (1 << 0) +# define R300_HIZ_MIN (0 << 1) +# define R300_HIZ_MAX (1 << 1) +# define R300_FAST_FILL_DISABLE (0 << 2) +# define R300_FAST_FILL_ENABLE (1 << 2) +# define R300_RD_COMP_DISABLE (0 << 3) +# define R300_RD_COMP_ENABLE (1 << 3) +# define R300_WR_COMP_DISABLE (0 << 4) +# define R300_WR_COMP_ENABLE (1 << 4) +# define R300_ZB_CB_CLEAR_RMW (0 << 5) +# define R300_ZB_CB_CLEAR_CACHE_LINEAR (1 << 5) +# define R300_FORCE_COMPRESSED_STENCIL_VALUE_DISABLE (0 << 6) +# define R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE (1 << 6) + +# define R500_ZEQUAL_OPTIMIZE_ENABLE (0 << 7) +# define R500_ZEQUAL_OPTIMIZE_DISABLE (1 << 7) +# define R500_SEQUAL_OPTIMIZE_ENABLE (0 << 8) +# define R500_SEQUAL_OPTIMIZE_DISABLE (1 << 8) + +# define R500_BMASK_ENABLE (0 << 10) +# define R500_BMASK_DISABLE (1 << 10) +# define R500_HIZ_EQUAL_REJECT_DISABLE (0 << 11) +# define R500_HIZ_EQUAL_REJECT_ENABLE (1 << 11) +# define R500_HIZ_FP_EXP_BITS_DISABLE (0 << 12) +# define R500_HIZ_FP_EXP_BITS_1 (1 << 12) +# define R500_HIZ_FP_EXP_BITS_2 (2 << 12) +# define R500_HIZ_FP_EXP_BITS_3 (3 << 12) +# define R500_HIZ_FP_EXP_BITS_4 (4 << 12) +# define R500_HIZ_FP_EXP_BITS_5 (5 << 12) +# define R500_HIZ_FP_INVERT_LEADING_ONES (0 << 15) +# define R500_HIZ_FP_INVERT_LEADING_ZEROS (1 << 15) +# define R500_TILE_OVERWRITE_RECOMPRESSION_ENABLE (0 << 16) +# define R500_TILE_OVERWRITE_RECOMPRESSION_DISABLE (1 << 16) +# define R500_CONTIGUOUS_6XAA_SAMPLES_ENABLE (0 << 17) +# define R500_CONTIGUOUS_6XAA_SAMPLES_DISABLE (1 << 17) +# define R500_PEQ_PACKING_DISABLE (0 << 18) +# define R500_PEQ_PACKING_ENABLE (1 << 18) +# define R500_COVERED_PTR_MASKING_DISABLE (0 << 18) +# define R500_COVERED_PTR_MASKING_ENABLE (1 << 18) + + +/* gap */ + +/* Z Buffer Address Offset. + * Bits 31 to 5 are used for aligned Z buffer address offset for macro tiles. + */ +#define R300_ZB_DEPTHOFFSET 0x4f20 + +/* Z Buffer Pitch and Endian Control */ +#define R300_ZB_DEPTHPITCH 0x4f24 +# define R300_DEPTHPITCH_MASK 0x00003FFC +# define R300_DEPTHMACROTILE_DISABLE (0 << 16) +# define R300_DEPTHMACROTILE_ENABLE (1 << 16) +# define R300_DEPTHMICROTILE_LINEAR (0 << 17) +# define R300_DEPTHMICROTILE_TILED (1 << 17) +# define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17) +# define R300_DEPTHENDIAN_NO_SWAP (0 << 18) +# define R300_DEPTHENDIAN_WORD_SWAP (1 << 18) +# define R300_DEPTHENDIAN_DWORD_SWAP (2 << 18) +# define R300_DEPTHENDIAN_HALF_DWORD_SWAP (3 << 18) + +/* Z Buffer Clear Value */ +#define R300_ZB_DEPTHCLEARVALUE 0x4f28 + +#define R300_ZB_ZMASK_OFFSET 0x4f30 +#define R300_ZB_ZMASK_PITCH 0x4f34 +#define R300_ZB_ZMASK_WRINDEX 0x4f38 +#define R300_ZB_ZMASK_DWORD 0x4f3c +#define R300_ZB_ZMASK_RDINDEX 0x4f40 + +/* Hierarchical Z Memory Offset */ +#define R300_ZB_HIZ_OFFSET 0x4f44 + +/* Hierarchical Z Write Index */ +#define R300_ZB_HIZ_WRINDEX 0x4f48 + +/* Hierarchical Z Data */ +#define R300_ZB_HIZ_DWORD 0x4f4c + +/* Hierarchical Z Read Index */ +#define R300_ZB_HIZ_RDINDEX 0x4f50 + +/* Hierarchical Z Pitch */ +#define R300_ZB_HIZ_PITCH 0x4f54 + +/* Z Buffer Z Pass Counter Data */ +#define R300_ZB_ZPASS_DATA 0x4f58 + +/* Z Buffer Z Pass Counter Address */ +#define R300_ZB_ZPASS_ADDR 0x4f5c + +/* Depth buffer X and Y coordinate offset */ +#define R300_ZB_DEPTHXY_OFFSET 0x4f60 +# define R300_DEPTHX_OFFSET_SHIFT 1 +# define R300_DEPTHX_OFFSET_MASK 0x000007FE +# define R300_DEPTHY_OFFSET_SHIFT 17 +# define R300_DEPTHY_OFFSET_MASK 0x07FE0000 + +/* Sets the fifo sizes */ +#define R500_ZB_FIFO_SIZE 0x4fd0 +# define R500_OP_FIFO_SIZE_FULL (0 << 0) +# define R500_OP_FIFO_SIZE_HALF (1 << 0) +# define R500_OP_FIFO_SIZE_QUATER (2 << 0) +# define R500_OP_FIFO_SIZE_EIGTHS (4 << 0) + +/* Stencil Reference Value and Mask for backfacing quads */ +/* R300_ZB_STENCILREFMASK handles front face */ +#define R500_ZB_STENCILREFMASK_BF 0x4fd4 +# define R500_STENCILREF_SHIFT 0 +# define R500_STENCILREF_MASK 0x000000ff +# define R500_STENCILMASK_SHIFT 8 +# define R500_STENCILMASK_MASK 0x0000ff00 +# define R500_STENCILWRITEMASK_SHIFT 16 +# define R500_STENCILWRITEMASK_MASK 0x00ff0000 + +/** + * \defgroup R3XX_R5XX_PROGRAMMABLE_VERTEX_SHADER_DESCRIPTION R3XX-R5XX PROGRAMMABLE VERTEX SHADER DESCRIPTION + * + * The PVS_DST_MATH_INST is used to identify whether the instruction is a Vector + * Engine instruction or a Math Engine instruction. + */ + +/*\{*/ + +enum { + /* R3XX */ + VECTOR_NO_OP = 0, + VE_DOT_PRODUCT = 1, + VE_MULTIPLY = 2, + VE_ADD = 3, + VE_MULTIPLY_ADD = 4, + VE_DISTANCE_VECTOR = 5, + VE_FRACTION = 6, + VE_MAXIMUM = 7, + VE_MINIMUM = 8, + VE_SET_GREATER_THAN_EQUAL = 9, + VE_SET_LESS_THAN = 10, + VE_MULTIPLYX2_ADD = 11, + VE_MULTIPLY_CLAMP = 12, + VE_FLT2FIX_DX = 13, + VE_FLT2FIX_DX_RND = 14, + /* R5XX */ + VE_PRED_SET_EQ_PUSH = 15, + VE_PRED_SET_GT_PUSH = 16, + VE_PRED_SET_GTE_PUSH = 17, + VE_PRED_SET_NEQ_PUSH = 18, + VE_COND_WRITE_EQ = 19, + VE_COND_WRITE_GT = 20, + VE_COND_WRITE_GTE = 21, + VE_COND_WRITE_NEQ = 22, + VE_COND_MUX_EQ = 23, + VE_COND_MUX_GT = 24, + VE_COND_MUX_GTE = 25, + VE_SET_GREATER_THAN = 26, + VE_SET_EQUAL = 27, + VE_SET_NOT_EQUAL = 28, +}; + +enum { + /* R3XX */ + MATH_NO_OP = 0, + ME_EXP_BASE2_DX = 1, + ME_LOG_BASE2_DX = 2, + ME_EXP_BASEE_FF = 3, + ME_LIGHT_COEFF_DX = 4, + ME_POWER_FUNC_FF = 5, + ME_RECIP_DX = 6, + ME_RECIP_FF = 7, + ME_RECIP_SQRT_DX = 8, + ME_RECIP_SQRT_FF = 9, + ME_MULTIPLY = 10, + ME_EXP_BASE2_FULL_DX = 11, + ME_LOG_BASE2_FULL_DX = 12, + ME_POWER_FUNC_FF_CLAMP_B = 13, + ME_POWER_FUNC_FF_CLAMP_B1 = 14, + ME_POWER_FUNC_FF_CLAMP_01 = 15, + ME_SIN = 16, + ME_COS = 17, + /* R5XX */ + ME_LOG_BASE2_IEEE = 18, + ME_RECIP_IEEE = 19, + ME_RECIP_SQRT_IEEE = 20, + ME_PRED_SET_EQ = 21, + ME_PRED_SET_GT = 22, + ME_PRED_SET_GTE = 23, + ME_PRED_SET_NEQ = 24, + ME_PRED_SET_CLR = 25, + ME_PRED_SET_INV = 26, + ME_PRED_SET_POP = 27, + ME_PRED_SET_RESTORE = 28, +}; + +enum { + /* R3XX */ + PVS_MACRO_OP_2CLK_MADD = 0, + PVS_MACRO_OP_2CLK_M2X_ADD = 1, +}; + +enum { + PVS_SRC_REG_TEMPORARY = 0, /* Intermediate Storage */ + PVS_SRC_REG_INPUT = 1, /* Input Vertex Storage */ + PVS_SRC_REG_CONSTANT = 2, /* Constant State Storage */ + PVS_SRC_REG_ALT_TEMPORARY = 3, /* Alternate Intermediate Storage */ +}; + +enum { + PVS_DST_REG_TEMPORARY = 0, /* Intermediate Storage */ + PVS_DST_REG_A0 = 1, /* Address Register Storage */ + PVS_DST_REG_OUT = 2, /* Output Memory. Used for all outputs */ + PVS_DST_REG_OUT_REPL_X = 3, /* Output Memory & Replicate X to all channels */ + PVS_DST_REG_ALT_TEMPORARY = 4, /* Alternate Intermediate Storage */ + PVS_DST_REG_INPUT = 5, /* Output Memory & Replicate X to all channels */ +}; + +enum { + PVS_SRC_SELECT_X = 0, /* Select X Component */ + PVS_SRC_SELECT_Y = 1, /* Select Y Component */ + PVS_SRC_SELECT_Z = 2, /* Select Z Component */ + PVS_SRC_SELECT_W = 3, /* Select W Component */ + PVS_SRC_SELECT_FORCE_0 = 4, /* Force Component to 0.0 */ + PVS_SRC_SELECT_FORCE_1 = 5, /* Force Component to 1.0 */ +}; + +/* PVS Opcode & Destination Operand Description */ + +enum { + PVS_DST_OPCODE_MASK = 0x3f, + PVS_DST_OPCODE_SHIFT = 0, + PVS_DST_MATH_INST_MASK = 0x1, + PVS_DST_MATH_INST_SHIFT = 6, + PVS_DST_MACRO_INST_MASK = 0x1, + PVS_DST_MACRO_INST_SHIFT = 7, + PVS_DST_REG_TYPE_MASK = 0xf, + PVS_DST_REG_TYPE_SHIFT = 8, + PVS_DST_ADDR_MODE_1_MASK = 0x1, + PVS_DST_ADDR_MODE_1_SHIFT = 12, + PVS_DST_OFFSET_MASK = 0x7f, + PVS_DST_OFFSET_SHIFT = 13, + PVS_DST_WE_X_MASK = 0x1, + PVS_DST_WE_X_SHIFT = 20, + PVS_DST_WE_Y_MASK = 0x1, + PVS_DST_WE_Y_SHIFT = 21, + PVS_DST_WE_Z_MASK = 0x1, + PVS_DST_WE_Z_SHIFT = 22, + PVS_DST_WE_W_MASK = 0x1, + PVS_DST_WE_W_SHIFT = 23, + PVS_DST_VE_SAT_MASK = 0x1, + PVS_DST_VE_SAT_SHIFT = 24, + PVS_DST_ME_SAT_MASK = 0x1, + PVS_DST_ME_SAT_SHIFT = 25, + PVS_DST_PRED_ENABLE_MASK = 0x1, + PVS_DST_PRED_ENABLE_SHIFT = 26, + PVS_DST_PRED_SENSE_MASK = 0x1, + PVS_DST_PRED_SENSE_SHIFT = 27, + PVS_DST_DUAL_MATH_OP_MASK = 0x3, + PVS_DST_DUAL_MATH_OP_SHIFT = 27, + PVS_DST_ADDR_SEL_MASK = 0x3, + PVS_DST_ADDR_SEL_SHIFT = 29, + PVS_DST_ADDR_MODE_0_MASK = 0x1, + PVS_DST_ADDR_MODE_0_SHIFT = 31, +}; + +/* PVS Source Operand Description */ + +enum { + PVS_SRC_REG_TYPE_MASK = 0x3, + PVS_SRC_REG_TYPE_SHIFT = 0, + SPARE_0_MASK = 0x1, + SPARE_0_SHIFT = 2, + PVS_SRC_ABS_XYZW_MASK = 0x1, + PVS_SRC_ABS_XYZW_SHIFT = 3, + PVS_SRC_ADDR_MODE_0_MASK = 0x1, + PVS_SRC_ADDR_MODE_0_SHIFT = 4, + PVS_SRC_OFFSET_MASK = 0xff, + PVS_SRC_OFFSET_SHIFT = 5, + PVS_SRC_SWIZZLE_X_MASK = 0x7, + PVS_SRC_SWIZZLE_X_SHIFT = 13, + PVS_SRC_SWIZZLE_Y_MASK = 0x7, + PVS_SRC_SWIZZLE_Y_SHIFT = 16, + PVS_SRC_SWIZZLE_Z_MASK = 0x7, + PVS_SRC_SWIZZLE_Z_SHIFT = 19, + PVS_SRC_SWIZZLE_W_MASK = 0x7, + PVS_SRC_SWIZZLE_W_SHIFT = 22, + PVS_SRC_MODIFIER_X_MASK = 0x1, + PVS_SRC_MODIFIER_X_SHIFT = 25, + PVS_SRC_MODIFIER_Y_MASK = 0x1, + PVS_SRC_MODIFIER_Y_SHIFT = 26, + PVS_SRC_MODIFIER_Z_MASK = 0x1, + PVS_SRC_MODIFIER_Z_SHIFT = 27, + PVS_SRC_MODIFIER_W_MASK = 0x1, + PVS_SRC_MODIFIER_W_SHIFT = 28, + PVS_SRC_ADDR_SEL_MASK = 0x3, + PVS_SRC_ADDR_SEL_SHIFT = 29, + PVS_SRC_ADDR_MODE_1_MASK = 0x0, + PVS_SRC_ADDR_MODE_1_SHIFT = 32, +}; + + +#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \ + (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \ + | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \ + | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \ + | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \ + | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \ + | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT)) + +#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \ + (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \ + | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \ + | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \ + | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \ + | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \ + | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \ + | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT)) + +/*\}*/ + +/* BEGIN: Packet 3 commands */ + +/* A primitive emission dword. */ +#define R300_PRIM_TYPE_NONE (0 << 0) +#define R300_PRIM_TYPE_POINT (1 << 0) +#define R300_PRIM_TYPE_LINE (2 << 0) +#define R300_PRIM_TYPE_LINE_STRIP (3 << 0) +#define R300_PRIM_TYPE_TRI_LIST (4 << 0) +#define R300_PRIM_TYPE_TRI_FAN (5 << 0) +#define R300_PRIM_TYPE_TRI_STRIP (6 << 0) +#define R300_PRIM_TYPE_TRI_TYPE2 (7 << 0) +#define R300_PRIM_TYPE_RECT_LIST (8 << 0) +#define R300_PRIM_TYPE_3VRT_POINT_LIST (9 << 0) +#define R300_PRIM_TYPE_3VRT_LINE_LIST (10 << 0) + /* GUESS (based on r200) */ +#define R300_PRIM_TYPE_POINT_SPRITES (11 << 0) +#define R300_PRIM_TYPE_LINE_LOOP (12 << 0) +#define R300_PRIM_TYPE_QUADS (13 << 0) +#define R300_PRIM_TYPE_QUAD_STRIP (14 << 0) +#define R300_PRIM_TYPE_POLYGON (15 << 0) +#define R300_PRIM_TYPE_MASK 0xF +#define R300_PRIM_WALK_IND (1 << 4) +#define R300_PRIM_WALK_LIST (2 << 4) +#define R300_PRIM_WALK_RING (3 << 4) +#define R300_PRIM_WALK_MASK (3 << 4) + /* GUESS (based on r200) */ +#define R300_PRIM_COLOR_ORDER_BGRA (0 << 6) +#define R300_PRIM_COLOR_ORDER_RGBA (1 << 6) +#define R300_PRIM_NUM_VERTICES_SHIFT 16 +#define R300_PRIM_NUM_VERTICES_MASK 0xffff + + + +/* + * The R500 unified shader (US) registers come in banks of 512 each, one + * for each instruction slot in the shader. You can't touch them directly. + * R500_US_VECTOR_INDEX() sets the base instruction to modify; successive + * writes to R500_GA_US_VECTOR_DATA autoincrement the index after the + * instruction is fully specified. + */ +#define R500_US_ALU_ALPHA_INST_0 0xa800 +# define R500_ALPHA_OP_MAD 0 +# define R500_ALPHA_OP_DP 1 +# define R500_ALPHA_OP_MIN 2 +# define R500_ALPHA_OP_MAX 3 +/* #define R500_ALPHA_OP_RESERVED 4 */ +# define R500_ALPHA_OP_CND 5 +# define R500_ALPHA_OP_CMP 6 +# define R500_ALPHA_OP_FRC 7 +# define R500_ALPHA_OP_EX2 8 +# define R500_ALPHA_OP_LN2 9 +# define R500_ALPHA_OP_RCP 10 +# define R500_ALPHA_OP_RSQ 11 +# define R500_ALPHA_OP_SIN 12 +# define R500_ALPHA_OP_COS 13 +# define R500_ALPHA_OP_MDH 14 +# define R500_ALPHA_OP_MDV 15 +# define R500_ALPHA_ADDRD(x) ((x) << 4) +# define R500_ALPHA_ADDRD_REL (1 << 11) +# define R500_ALPHA_SEL_A_SHIFT 12 +# define R500_ALPHA_SEL_A_SRC0 (0 << 12) +# define R500_ALPHA_SEL_A_SRC1 (1 << 12) +# define R500_ALPHA_SEL_A_SRC2 (2 << 12) +# define R500_ALPHA_SEL_A_SRCP (3 << 12) +# define R500_ALPHA_SWIZ_A_R (0 << 14) +# define R500_ALPHA_SWIZ_A_G (1 << 14) +# define R500_ALPHA_SWIZ_A_B (2 << 14) +# define R500_ALPHA_SWIZ_A_A (3 << 14) +# define R500_ALPHA_SWIZ_A_0 (4 << 14) +# define R500_ALPHA_SWIZ_A_HALF (5 << 14) +# define R500_ALPHA_SWIZ_A_1 (6 << 14) +/* #define R500_ALPHA_SWIZ_A_UNUSED (7 << 14) */ +# define R500_ALPHA_MOD_A_NOP (0 << 17) +# define R500_ALPHA_MOD_A_NEG (1 << 17) +# define R500_ALPHA_MOD_A_ABS (2 << 17) +# define R500_ALPHA_MOD_A_NAB (3 << 17) +# define R500_ALPHA_SEL_B_SHIFT 19 +# define R500_ALPHA_SEL_B_SRC0 (0 << 19) +# define R500_ALPHA_SEL_B_SRC1 (1 << 19) +# define R500_ALPHA_SEL_B_SRC2 (2 << 19) +# define R500_ALPHA_SEL_B_SRCP (3 << 19) +# define R500_ALPHA_SWIZ_B_R (0 << 21) +# define R500_ALPHA_SWIZ_B_G (1 << 21) +# define R500_ALPHA_SWIZ_B_B (2 << 21) +# define R500_ALPHA_SWIZ_B_A (3 << 21) +# define R500_ALPHA_SWIZ_B_0 (4 << 21) +# define R500_ALPHA_SWIZ_B_HALF (5 << 21) +# define R500_ALPHA_SWIZ_B_1 (6 << 21) +/* #define R500_ALPHA_SWIZ_B_UNUSED (7 << 21) */ +# define R500_ALPHA_MOD_B_NOP (0 << 24) +# define R500_ALPHA_MOD_B_NEG (1 << 24) +# define R500_ALPHA_MOD_B_ABS (2 << 24) +# define R500_ALPHA_MOD_B_NAB (3 << 24) +# define R500_ALPHA_OMOD_IDENTITY (0 << 26) +# define R500_ALPHA_OMOD_MUL_2 (1 << 26) +# define R500_ALPHA_OMOD_MUL_4 (2 << 26) +# define R500_ALPHA_OMOD_MUL_8 (3 << 26) +# define R500_ALPHA_OMOD_DIV_2 (4 << 26) +# define R500_ALPHA_OMOD_DIV_4 (5 << 26) +# define R500_ALPHA_OMOD_DIV_8 (6 << 26) +# define R500_ALPHA_OMOD_DISABLE (7 << 26) +# define R500_ALPHA_TARGET(x) ((x) << 29) +# define R500_ALPHA_W_OMASK (1 << 31) +#define R500_US_ALU_ALPHA_ADDR_0 0x9800 +# define R500_ALPHA_ADDR0(x) ((x) << 0) +# define R500_ALPHA_ADDR0_CONST (1 << 8) +# define R500_ALPHA_ADDR0_REL (1 << 9) +# define R500_ALPHA_ADDR1(x) ((x) << 10) +# define R500_ALPHA_ADDR1_CONST (1 << 18) +# define R500_ALPHA_ADDR1_REL (1 << 19) +# define R500_ALPHA_ADDR2(x) ((x) << 20) +# define R500_ALPHA_ADDR2_CONST (1 << 28) +# define R500_ALPHA_ADDR2_REL (1 << 29) +# define R500_ALPHA_SRCP_OP_1_MINUS_2A0 (0 << 30) +# define R500_ALPHA_SRCP_OP_A1_MINUS_A0 (1 << 30) +# define R500_ALPHA_SRCP_OP_A1_PLUS_A0 (2 << 30) +# define R500_ALPHA_SRCP_OP_1_MINUS_A0 (3 << 30) +#define R500_US_ALU_RGBA_INST_0 0xb000 +# define R500_ALU_RGBA_OP_MAD (0 << 0) +# define R500_ALU_RGBA_OP_DP3 (1 << 0) +# define R500_ALU_RGBA_OP_DP4 (2 << 0) +# define R500_ALU_RGBA_OP_D2A (3 << 0) +# define R500_ALU_RGBA_OP_MIN (4 << 0) +# define R500_ALU_RGBA_OP_MAX (5 << 0) +/* #define R500_ALU_RGBA_OP_RESERVED (6 << 0) */ +# define R500_ALU_RGBA_OP_CND (7 << 0) +# define R500_ALU_RGBA_OP_CMP (8 << 0) +# define R500_ALU_RGBA_OP_FRC (9 << 0) +# define R500_ALU_RGBA_OP_SOP (10 << 0) +# define R500_ALU_RGBA_OP_MDH (11 << 0) +# define R500_ALU_RGBA_OP_MDV (12 << 0) +# define R500_ALU_RGBA_ADDRD(x) ((x) << 4) +# define R500_ALU_RGBA_ADDRD_REL (1 << 11) +# define R500_ALU_RGBA_SEL_C_SHIFT 12 +# define R500_ALU_RGBA_SEL_C_SRC0 (0 << 12) +# define R500_ALU_RGBA_SEL_C_SRC1 (1 << 12) +# define R500_ALU_RGBA_SEL_C_SRC2 (2 << 12) +# define R500_ALU_RGBA_SEL_C_SRCP (3 << 12) +# define R500_ALU_RGBA_R_SWIZ_R (0 << 14) +# define R500_ALU_RGBA_R_SWIZ_G (1 << 14) +# define R500_ALU_RGBA_R_SWIZ_B (2 << 14) +# define R500_ALU_RGBA_R_SWIZ_A (3 << 14) +# define R500_ALU_RGBA_R_SWIZ_0 (4 << 14) +# define R500_ALU_RGBA_R_SWIZ_HALF (5 << 14) +# define R500_ALU_RGBA_R_SWIZ_1 (6 << 14) +/* #define R500_ALU_RGBA_R_SWIZ_UNUSED (7 << 14) */ +# define R500_ALU_RGBA_G_SWIZ_R (0 << 17) +# define R500_ALU_RGBA_G_SWIZ_G (1 << 17) +# define R500_ALU_RGBA_G_SWIZ_B (2 << 17) +# define R500_ALU_RGBA_G_SWIZ_A (3 << 17) +# define R500_ALU_RGBA_G_SWIZ_0 (4 << 17) +# define R500_ALU_RGBA_G_SWIZ_HALF (5 << 17) +# define R500_ALU_RGBA_G_SWIZ_1 (6 << 17) +/* #define R500_ALU_RGBA_G_SWIZ_UNUSED (7 << 17) */ +# define R500_ALU_RGBA_B_SWIZ_R (0 << 20) +# define R500_ALU_RGBA_B_SWIZ_G (1 << 20) +# define R500_ALU_RGBA_B_SWIZ_B (2 << 20) +# define R500_ALU_RGBA_B_SWIZ_A (3 << 20) +# define R500_ALU_RGBA_B_SWIZ_0 (4 << 20) +# define R500_ALU_RGBA_B_SWIZ_HALF (5 << 20) +# define R500_ALU_RGBA_B_SWIZ_1 (6 << 20) +/* #define R500_ALU_RGBA_B_SWIZ_UNUSED (7 << 20) */ +# define R500_ALU_RGBA_MOD_C_NOP (0 << 23) +# define R500_ALU_RGBA_MOD_C_NEG (1 << 23) +# define R500_ALU_RGBA_MOD_C_ABS (2 << 23) +# define R500_ALU_RGBA_MOD_C_NAB (3 << 23) +# define R500_ALU_RGBA_ALPHA_SEL_C_SHIFT 25 +# define R500_ALU_RGBA_ALPHA_SEL_C_SRC0 (0 << 25) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRC1 (1 << 25) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRC2 (2 << 25) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRCP (3 << 25) +# define R500_ALU_RGBA_A_SWIZ_R (0 << 27) +# define R500_ALU_RGBA_A_SWIZ_G (1 << 27) +# define R500_ALU_RGBA_A_SWIZ_B (2 << 27) +# define R500_ALU_RGBA_A_SWIZ_A (3 << 27) +# define R500_ALU_RGBA_A_SWIZ_0 (4 << 27) +# define R500_ALU_RGBA_A_SWIZ_HALF (5 << 27) +# define R500_ALU_RGBA_A_SWIZ_1 (6 << 27) +/* #define R500_ALU_RGBA_A_SWIZ_UNUSED (7 << 27) */ +# define R500_ALU_RGBA_ALPHA_MOD_C_NOP (0 << 30) +# define R500_ALU_RGBA_ALPHA_MOD_C_NEG (1 << 30) +# define R500_ALU_RGBA_ALPHA_MOD_C_ABS (2 << 30) +# define R500_ALU_RGBA_ALPHA_MOD_C_NAB (3 << 30) +#define R500_US_ALU_RGB_INST_0 0xa000 +# define R500_ALU_RGB_SEL_A_SHIFT 0 +# define R500_ALU_RGB_SEL_A_SRC0 (0 << 0) +# define R500_ALU_RGB_SEL_A_SRC1 (1 << 0) +# define R500_ALU_RGB_SEL_A_SRC2 (2 << 0) +# define R500_ALU_RGB_SEL_A_SRCP (3 << 0) +# define R500_ALU_RGB_R_SWIZ_A_R (0 << 2) +# define R500_ALU_RGB_R_SWIZ_A_G (1 << 2) +# define R500_ALU_RGB_R_SWIZ_A_B (2 << 2) +# define R500_ALU_RGB_R_SWIZ_A_A (3 << 2) +# define R500_ALU_RGB_R_SWIZ_A_0 (4 << 2) +# define R500_ALU_RGB_R_SWIZ_A_HALF (5 << 2) +# define R500_ALU_RGB_R_SWIZ_A_1 (6 << 2) +/* #define R500_ALU_RGB_R_SWIZ_A_UNUSED (7 << 2) */ +# define R500_ALU_RGB_G_SWIZ_A_R (0 << 5) +# define R500_ALU_RGB_G_SWIZ_A_G (1 << 5) +# define R500_ALU_RGB_G_SWIZ_A_B (2 << 5) +# define R500_ALU_RGB_G_SWIZ_A_A (3 << 5) +# define R500_ALU_RGB_G_SWIZ_A_0 (4 << 5) +# define R500_ALU_RGB_G_SWIZ_A_HALF (5 << 5) +# define R500_ALU_RGB_G_SWIZ_A_1 (6 << 5) +/* #define R500_ALU_RGB_G_SWIZ_A_UNUSED (7 << 5) */ +# define R500_ALU_RGB_B_SWIZ_A_R (0 << 8) +# define R500_ALU_RGB_B_SWIZ_A_G (1 << 8) +# define R500_ALU_RGB_B_SWIZ_A_B (2 << 8) +# define R500_ALU_RGB_B_SWIZ_A_A (3 << 8) +# define R500_ALU_RGB_B_SWIZ_A_0 (4 << 8) +# define R500_ALU_RGB_B_SWIZ_A_HALF (5 << 8) +# define R500_ALU_RGB_B_SWIZ_A_1 (6 << 8) +/* #define R500_ALU_RGB_B_SWIZ_A_UNUSED (7 << 8) */ +# define R500_ALU_RGB_MOD_A_NOP (0 << 11) +# define R500_ALU_RGB_MOD_A_NEG (1 << 11) +# define R500_ALU_RGB_MOD_A_ABS (2 << 11) +# define R500_ALU_RGB_MOD_A_NAB (3 << 11) +# define R500_ALU_RGB_SEL_B_SHIFT 13 +# define R500_ALU_RGB_SEL_B_SRC0 (0 << 13) +# define R500_ALU_RGB_SEL_B_SRC1 (1 << 13) +# define R500_ALU_RGB_SEL_B_SRC2 (2 << 13) +# define R500_ALU_RGB_SEL_B_SRCP (3 << 13) +# define R500_ALU_RGB_R_SWIZ_B_R (0 << 15) +# define R500_ALU_RGB_R_SWIZ_B_G (1 << 15) +# define R500_ALU_RGB_R_SWIZ_B_B (2 << 15) +# define R500_ALU_RGB_R_SWIZ_B_A (3 << 15) +# define R500_ALU_RGB_R_SWIZ_B_0 (4 << 15) +# define R500_ALU_RGB_R_SWIZ_B_HALF (5 << 15) +# define R500_ALU_RGB_R_SWIZ_B_1 (6 << 15) +/* #define R500_ALU_RGB_R_SWIZ_B_UNUSED (7 << 15) */ +# define R500_ALU_RGB_G_SWIZ_B_R (0 << 18) +# define R500_ALU_RGB_G_SWIZ_B_G (1 << 18) +# define R500_ALU_RGB_G_SWIZ_B_B (2 << 18) +# define R500_ALU_RGB_G_SWIZ_B_A (3 << 18) +# define R500_ALU_RGB_G_SWIZ_B_0 (4 << 18) +# define R500_ALU_RGB_G_SWIZ_B_HALF (5 << 18) +# define R500_ALU_RGB_G_SWIZ_B_1 (6 << 18) +/* #define R500_ALU_RGB_G_SWIZ_B_UNUSED (7 << 18) */ +# define R500_ALU_RGB_B_SWIZ_B_R (0 << 21) +# define R500_ALU_RGB_B_SWIZ_B_G (1 << 21) +# define R500_ALU_RGB_B_SWIZ_B_B (2 << 21) +# define R500_ALU_RGB_B_SWIZ_B_A (3 << 21) +# define R500_ALU_RGB_B_SWIZ_B_0 (4 << 21) +# define R500_ALU_RGB_B_SWIZ_B_HALF (5 << 21) +# define R500_ALU_RGB_B_SWIZ_B_1 (6 << 21) +/* #define R500_ALU_RGB_B_SWIZ_B_UNUSED (7 << 21) */ +# define R500_ALU_RGB_MOD_B_NOP (0 << 24) +# define R500_ALU_RGB_MOD_B_NEG (1 << 24) +# define R500_ALU_RGB_MOD_B_ABS (2 << 24) +# define R500_ALU_RGB_MOD_B_NAB (3 << 24) +# define R500_ALU_RGB_OMOD_IDENTITY (0 << 26) +# define R500_ALU_RGB_OMOD_MUL_2 (1 << 26) +# define R500_ALU_RGB_OMOD_MUL_4 (2 << 26) +# define R500_ALU_RGB_OMOD_MUL_8 (3 << 26) +# define R500_ALU_RGB_OMOD_DIV_2 (4 << 26) +# define R500_ALU_RGB_OMOD_DIV_4 (5 << 26) +# define R500_ALU_RGB_OMOD_DIV_8 (6 << 26) +# define R500_ALU_RGB_OMOD_DISABLE (7 << 26) +# define R500_ALU_RGB_TARGET(x) ((x) << 29) +# define R500_ALU_RGB_WMASK (1 << 31) +#define R500_US_ALU_RGB_ADDR_0 0x9000 +# define R500_RGB_ADDR0(x) ((x) << 0) +# define R500_RGB_ADDR0_CONST (1 << 8) +# define R500_RGB_ADDR0_REL (1 << 9) +# define R500_RGB_ADDR1(x) ((x) << 10) +# define R500_RGB_ADDR1_CONST (1 << 18) +# define R500_RGB_ADDR1_REL (1 << 19) +# define R500_RGB_ADDR2(x) ((x) << 20) +# define R500_RGB_ADDR2_CONST (1 << 28) +# define R500_RGB_ADDR2_REL (1 << 29) +# define R500_RGB_SRCP_OP_1_MINUS_2RGB0 (0 << 30) +# define R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 (1 << 30) +# define R500_RGB_SRCP_OP_RGB1_PLUS_RGB0 (2 << 30) +# define R500_RGB_SRCP_OP_1_MINUS_RGB0 (3 << 30) +#define R500_US_CMN_INST_0 0xb800 +# define R500_INST_TYPE_MASK (3 << 0) +# define R500_INST_TYPE_ALU (0 << 0) +# define R500_INST_TYPE_OUT (1 << 0) +# define R500_INST_TYPE_FC (2 << 0) +# define R500_INST_TYPE_TEX (3 << 0) +# define R500_INST_TEX_SEM_WAIT (1 << 2) +# define R500_INST_RGB_PRED_SEL_NONE (0 << 3) +# define R500_INST_RGB_PRED_SEL_RGBA (1 << 3) +# define R500_INST_RGB_PRED_SEL_RRRR (2 << 3) +# define R500_INST_RGB_PRED_SEL_GGGG (3 << 3) +# define R500_INST_RGB_PRED_SEL_BBBB (4 << 3) +# define R500_INST_RGB_PRED_SEL_AAAA (5 << 3) +# define R500_INST_RGB_PRED_INV (1 << 6) +# define R500_INST_WRITE_INACTIVE (1 << 7) +# define R500_INST_LAST (1 << 8) +# define R500_INST_NOP (1 << 9) +# define R500_INST_ALU_WAIT (1 << 10) +# define R500_INST_RGB_WMASK_R (1 << 11) +# define R500_INST_RGB_WMASK_G (1 << 12) +# define R500_INST_RGB_WMASK_B (1 << 13) +# define R500_INST_ALPHA_WMASK (1 << 14) +# define R500_INST_RGB_OMASK_R (1 << 15) +# define R500_INST_RGB_OMASK_G (1 << 16) +# define R500_INST_RGB_OMASK_B (1 << 17) +# define R500_INST_ALPHA_OMASK (1 << 18) +# define R500_INST_RGB_CLAMP (1 << 19) +# define R500_INST_ALPHA_CLAMP (1 << 20) +# define R500_INST_ALU_RESULT_SEL (1 << 21) +# define R500_INST_ALU_RESULT_SEL_RED (0 << 21) +# define R500_INST_ALU_RESULT_SEL_ALPHA (1 << 21) +# define R500_INST_ALPHA_PRED_INV (1 << 22) +# define R500_INST_ALU_RESULT_OP_EQ (0 << 23) +# define R500_INST_ALU_RESULT_OP_LT (1 << 23) +# define R500_INST_ALU_RESULT_OP_GE (2 << 23) +# define R500_INST_ALU_RESULT_OP_NE (3 << 23) +# define R500_INST_ALPHA_PRED_SEL_NONE (0 << 25) +# define R500_INST_ALPHA_PRED_SEL_RGBA (1 << 25) +# define R500_INST_ALPHA_PRED_SEL_RRRR (2 << 25) +# define R500_INST_ALPHA_PRED_SEL_GGGG (3 << 25) +# define R500_INST_ALPHA_PRED_SEL_BBBB (4 << 25) +# define R500_INST_ALPHA_PRED_SEL_AAAA (5 << 25) +/* XXX next four are kind of guessed */ +# define R500_INST_STAT_WE_R (1 << 28) +# define R500_INST_STAT_WE_G (1 << 29) +# define R500_INST_STAT_WE_B (1 << 30) +# define R500_INST_STAT_WE_A (1 << 31) + +/* note that these are 8 bit lengths, despite the offsets, at least for R500 */ +#define R500_US_CODE_ADDR 0x4630 +# define R500_US_CODE_START_ADDR(x) ((x) << 0) +# define R500_US_CODE_END_ADDR(x) ((x) << 16) +#define R500_US_CODE_OFFSET 0x4638 +# define R500_US_CODE_OFFSET_ADDR(x) ((x) << 0) +#define R500_US_CODE_RANGE 0x4634 +# define R500_US_CODE_RANGE_ADDR(x) ((x) << 0) +# define R500_US_CODE_RANGE_SIZE(x) ((x) << 16) +#define R500_US_CONFIG 0x4600 +# define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO (1 << 1) +#define R500_US_FC_ADDR_0 0xa000 +# define R500_FC_BOOL_ADDR(x) ((x) << 0) +# define R500_FC_INT_ADDR(x) ((x) << 8) +# define R500_FC_JUMP_ADDR(x) ((x) << 16) +# define R500_FC_JUMP_GLOBAL (1 << 31) +#define R500_US_FC_BOOL_CONST 0x4620 +# define R500_FC_KBOOL(x) (x) +#define R500_US_FC_CTRL 0x4624 +# define R500_FC_TEST_EN (1 << 30) +# define R500_FC_FULL_FC_EN (1 << 31) +#define R500_US_FC_INST_0 0x9800 +# define R500_FC_OP_JUMP (0 << 0) +# define R500_FC_OP_LOOP (1 << 0) +# define R500_FC_OP_ENDLOOP (2 << 0) +# define R500_FC_OP_REP (3 << 0) +# define R500_FC_OP_ENDREP (4 << 0) +# define R500_FC_OP_BREAKLOOP (5 << 0) +# define R500_FC_OP_BREAKREP (6 << 0) +# define R500_FC_OP_CONTINUE (7 << 0) +# define R500_FC_B_ELSE (1 << 4) +# define R500_FC_JUMP_ANY (1 << 5) +# define R500_FC_A_OP_NONE (0 << 6) +# define R500_FC_A_OP_POP (1 << 6) +# define R500_FC_A_OP_PUSH (2 << 6) +# define R500_FC_JUMP_FUNC(x) ((x) << 8) +# define R500_FC_B_POP_CNT(x) ((x) << 16) +# define R500_FC_B_OP0_NONE (0 << 24) +# define R500_FC_B_OP0_DECR (1 << 24) +# define R500_FC_B_OP0_INCR (2 << 24) +# define R500_FC_B_OP1_DECR (0 << 26) +# define R500_FC_B_OP1_NONE (1 << 26) +# define R500_FC_B_OP1_INCR (2 << 26) +# define R500_FC_IGNORE_UNCOVERED (1 << 28) +#define R500_US_FC_INT_CONST_0 0x4c00 +# define R500_FC_INT_CONST_KR(x) ((x) << 0) +# define R500_FC_INT_CONST_KG(x) ((x) << 8) +# define R500_FC_INT_CONST_KB(x) ((x) << 16) +/* _0 through _15 */ +#define R500_US_FORMAT0_0 0x4640 +# define R500_FORMAT_TXWIDTH(x) ((x) << 0) +# define R500_FORMAT_TXHEIGHT(x) ((x) << 11) +# define R500_FORMAT_TXDEPTH(x) ((x) << 22) +/* _0 through _3 */ +#define R500_US_OUT_FMT_0 0x46a4 +# define R500_OUT_FMT_C4_8 (0 << 0) +# define R500_OUT_FMT_C4_10 (1 << 0) +# define R500_OUT_FMT_C4_10_GAMMA (2 << 0) +# define R500_OUT_FMT_C_16 (3 << 0) +# define R500_OUT_FMT_C2_16 (4 << 0) +# define R500_OUT_FMT_C4_16 (5 << 0) +# define R500_OUT_FMT_C_16_MPEG (6 << 0) +# define R500_OUT_FMT_C2_16_MPEG (7 << 0) +# define R500_OUT_FMT_C2_4 (8 << 0) +# define R500_OUT_FMT_C_3_3_2 (9 << 0) +# define R500_OUT_FMT_C_6_5_6 (10 << 0) +# define R500_OUT_FMT_C_11_11_10 (11 << 0) +# define R500_OUT_FMT_C_10_11_11 (12 << 0) +# define R500_OUT_FMT_C_2_10_10_10 (13 << 0) +/* #define R500_OUT_FMT_RESERVED (14 << 0) */ +# define R500_OUT_FMT_UNUSED (15 << 0) +# define R500_OUT_FMT_C_16_FP (16 << 0) +# define R500_OUT_FMT_C2_16_FP (17 << 0) +# define R500_OUT_FMT_C4_16_FP (18 << 0) +# define R500_OUT_FMT_C_32_FP (19 << 0) +# define R500_OUT_FMT_C2_32_FP (20 << 0) +# define R500_OUT_FMT_C4_32_FP (21 << 0) +# define R500_C0_SEL_A (0 << 8) +# define R500_C0_SEL_R (1 << 8) +# define R500_C0_SEL_G (2 << 8) +# define R500_C0_SEL_B (3 << 8) +# define R500_C1_SEL_A (0 << 10) +# define R500_C1_SEL_R (1 << 10) +# define R500_C1_SEL_G (2 << 10) +# define R500_C1_SEL_B (3 << 10) +# define R500_C2_SEL_A (0 << 12) +# define R500_C2_SEL_R (1 << 12) +# define R500_C2_SEL_G (2 << 12) +# define R500_C2_SEL_B (3 << 12) +# define R500_C3_SEL_A (0 << 14) +# define R500_C3_SEL_R (1 << 14) +# define R500_C3_SEL_G (2 << 14) +# define R500_C3_SEL_B (3 << 14) +# define R500_OUT_SIGN(x) ((x) << 16) +# define R500_ROUND_ADJ (1 << 20) +#define R500_US_PIXSIZE 0x4604 +# define R500_PIX_SIZE(x) (x) +#define R500_US_TEX_ADDR_0 0x9800 +# define R500_TEX_SRC_ADDR(x) ((x) << 0) +# define R500_TEX_SRC_ADDR_REL (1 << 7) +# define R500_TEX_SRC_S_SWIZ_R (0 << 8) +# define R500_TEX_SRC_S_SWIZ_G (1 << 8) +# define R500_TEX_SRC_S_SWIZ_B (2 << 8) +# define R500_TEX_SRC_S_SWIZ_A (3 << 8) +# define R500_TEX_SRC_T_SWIZ_R (0 << 10) +# define R500_TEX_SRC_T_SWIZ_G (1 << 10) +# define R500_TEX_SRC_T_SWIZ_B (2 << 10) +# define R500_TEX_SRC_T_SWIZ_A (3 << 10) +# define R500_TEX_SRC_R_SWIZ_R (0 << 12) +# define R500_TEX_SRC_R_SWIZ_G (1 << 12) +# define R500_TEX_SRC_R_SWIZ_B (2 << 12) +# define R500_TEX_SRC_R_SWIZ_A (3 << 12) +# define R500_TEX_SRC_Q_SWIZ_R (0 << 14) +# define R500_TEX_SRC_Q_SWIZ_G (1 << 14) +# define R500_TEX_SRC_Q_SWIZ_B (2 << 14) +# define R500_TEX_SRC_Q_SWIZ_A (3 << 14) +# define R500_TEX_DST_ADDR(x) ((x) << 16) +# define R500_TEX_DST_ADDR_REL (1 << 23) +# define R500_TEX_DST_R_SWIZ_R (0 << 24) +# define R500_TEX_DST_R_SWIZ_G (1 << 24) +# define R500_TEX_DST_R_SWIZ_B (2 << 24) +# define R500_TEX_DST_R_SWIZ_A (3 << 24) +# define R500_TEX_DST_G_SWIZ_R (0 << 26) +# define R500_TEX_DST_G_SWIZ_G (1 << 26) +# define R500_TEX_DST_G_SWIZ_B (2 << 26) +# define R500_TEX_DST_G_SWIZ_A (3 << 26) +# define R500_TEX_DST_B_SWIZ_R (0 << 28) +# define R500_TEX_DST_B_SWIZ_G (1 << 28) +# define R500_TEX_DST_B_SWIZ_B (2 << 28) +# define R500_TEX_DST_B_SWIZ_A (3 << 28) +# define R500_TEX_DST_A_SWIZ_R (0 << 30) +# define R500_TEX_DST_A_SWIZ_G (1 << 30) +# define R500_TEX_DST_A_SWIZ_B (2 << 30) +# define R500_TEX_DST_A_SWIZ_A (3 << 30) +#define R500_US_TEX_ADDR_DXDY_0 0xa000 +# define R500_DX_ADDR(x) ((x) << 0) +# define R500_DX_ADDR_REL (1 << 7) +# define R500_DX_S_SWIZ_R (0 << 8) +# define R500_DX_S_SWIZ_G (1 << 8) +# define R500_DX_S_SWIZ_B (2 << 8) +# define R500_DX_S_SWIZ_A (3 << 8) +# define R500_DX_T_SWIZ_R (0 << 10) +# define R500_DX_T_SWIZ_G (1 << 10) +# define R500_DX_T_SWIZ_B (2 << 10) +# define R500_DX_T_SWIZ_A (3 << 10) +# define R500_DX_R_SWIZ_R (0 << 12) +# define R500_DX_R_SWIZ_G (1 << 12) +# define R500_DX_R_SWIZ_B (2 << 12) +# define R500_DX_R_SWIZ_A (3 << 12) +# define R500_DX_Q_SWIZ_R (0 << 14) +# define R500_DX_Q_SWIZ_G (1 << 14) +# define R500_DX_Q_SWIZ_B (2 << 14) +# define R500_DX_Q_SWIZ_A (3 << 14) +# define R500_DY_ADDR(x) ((x) << 16) +# define R500_DY_ADDR_REL (1 << 17) +# define R500_DY_S_SWIZ_R (0 << 24) +# define R500_DY_S_SWIZ_G (1 << 24) +# define R500_DY_S_SWIZ_B (2 << 24) +# define R500_DY_S_SWIZ_A (3 << 24) +# define R500_DY_T_SWIZ_R (0 << 26) +# define R500_DY_T_SWIZ_G (1 << 26) +# define R500_DY_T_SWIZ_B (2 << 26) +# define R500_DY_T_SWIZ_A (3 << 26) +# define R500_DY_R_SWIZ_R (0 << 28) +# define R500_DY_R_SWIZ_G (1 << 28) +# define R500_DY_R_SWIZ_B (2 << 28) +# define R500_DY_R_SWIZ_A (3 << 28) +# define R500_DY_Q_SWIZ_R (0 << 30) +# define R500_DY_Q_SWIZ_G (1 << 30) +# define R500_DY_Q_SWIZ_B (2 << 30) +# define R500_DY_Q_SWIZ_A (3 << 30) +#define R500_US_TEX_INST_0 0x9000 +# define R500_TEX_ID(x) ((x) << 16) +# define R500_TEX_INST_NOP (0 << 22) +# define R500_TEX_INST_LD (1 << 22) +# define R500_TEX_INST_TEXKILL (2 << 22) +# define R500_TEX_INST_PROJ (3 << 22) +# define R500_TEX_INST_LODBIAS (4 << 22) +# define R500_TEX_INST_LOD (5 << 22) +# define R500_TEX_INST_DXDY (6 << 22) +# define R500_TEX_SEM_ACQUIRE (1 << 25) +# define R500_TEX_IGNORE_UNCOVERED (1 << 26) +# define R500_TEX_UNSCALED (1 << 27) +#define R300_US_W_FMT 0x46b4 +# define R300_W_FMT_W0 (0 << 0) +# define R300_W_FMT_W24 (1 << 0) +# define R300_W_FMT_W24FP (2 << 0) +# define R300_W_SRC_US (0 << 2) +# define R300_W_SRC_RAS (1 << 2) + + +/* Packet0 field ordering to write all values to the same reg */ +#define RADEON_ONE_REG_WR (1 << 15) + +/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR. + * Two parameter dwords: + * 0. VAP_VTX_FMT: The first parameter is not written to hardware + * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword. + */ +#define R300_PACKET3_3D_DRAW_VBUF 0x00002800 + +/* Draw a primitive from immediate vertices in this packet + * Up to 16382 dwords: + * 0. VAP_VTX_FMT: The first parameter is not written to hardware + * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword. + * 2 to end: Up to 16380 dwords of vertex data. + */ +#define R300_PACKET3_3D_DRAW_IMMD 0x00002900 + +/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR and + * immediate vertices in this packet + * Up to 16382 dwords: + * 0. VAP_VTX_FMT: The first parameter is not written to hardware + * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword. + * 2 to end: Up to 16380 dwords of vertex data. + */ +#define R300_PACKET3_3D_DRAW_INDX 0x00002A00 + + +/* Specify the full set of vertex arrays as (address, stride). + * The first parameter is the number of vertex arrays specified. + * The rest of the command is a variable length list of blocks, where + * each block is three dwords long and specifies two arrays. + * The first dword of a block is split into two words, the lower significant + * word refers to the first array, the more significant word to the second + * array in the block. + * The low byte of each word contains the size of an array entry in dwords, + * the high byte contains the stride of the array. + * The second dword of a block contains the pointer to the first array, + * the third dword of a block contains the pointer to the second array. + * Note that if the total number of arrays is odd, the third dword of + * the last block is omitted. + */ +#define R300_PACKET3_3D_LOAD_VBPNTR 0x00002F00 + +#define R300_PACKET3_INDX_BUFFER 0x00003300 +# define R300_INDX_BUFFER_DST_SHIFT 0 +# define R300_INDX_BUFFER_SKIP_SHIFT 16 +# define R300_INDX_BUFFER_ONE_REG_WR (1<<31) + +/* Same as R300_PACKET3_3D_DRAW_VBUF but without VAP_VTX_FMT */ +#define R300_PACKET3_3D_DRAW_VBUF_2 0x00003400 +/* Same as R300_PACKET3_3D_DRAW_IMMD but without VAP_VTX_FMT */ +#define R300_PACKET3_3D_DRAW_IMMD_2 0x00003500 +/* Same as R300_PACKET3_3D_DRAW_INDX but without VAP_VTX_FMT */ +#define R300_PACKET3_3D_DRAW_INDX_2 0x00003600 + +/* Clears a portion of hierachical Z RAM + * 3 dword parameters + * 0. START + * 1. COUNT: 13:0 (max is 0x3FFF) + * 2. CLEAR_VALUE: Value to write into HIZ RAM. + */ +#define R300_PACKET3_3D_CLEAR_HIZ 0x00003700 + +/* Draws a set of primitives using vertex buffers pointed by the state data. + * At least 2 Parameters: + * 0. VAP_VF_CNTL: The first parameter is a standard primitive emission dword. + * 2 to end: Data or indices (see other 3D_DRAW_* packets for details) + */ +#define R300_PACKET3_3D_DRAW_128 0x00003900 + +/* END: Packet 3 commands */ + + +/* Color formats for 2d packets + */ +#define R300_CP_COLOR_FORMAT_CI8 2 +#define R300_CP_COLOR_FORMAT_ARGB1555 3 +#define R300_CP_COLOR_FORMAT_RGB565 4 +#define R300_CP_COLOR_FORMAT_ARGB8888 6 +#define R300_CP_COLOR_FORMAT_RGB332 7 +#define R300_CP_COLOR_FORMAT_RGB8 9 +#define R300_CP_COLOR_FORMAT_ARGB4444 15 + +/* + * CP type-3 packets + */ +#define R300_CP_CMD_BITBLT_MULTI 0xC0009B00 + +#endif /* _R300_REG_H */ + +/* *INDENT-ON* */ + +/* vim: set foldenable foldmarker=\\{,\\} foldmethod=marker : */ diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c new file mode 100644 index 0000000000..bb8f91491f --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -0,0 +1,505 @@ +/************************************************************************** + +Copyright (C) 2004 Nicolai Haehnle. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \brief R300 Render (Vertex Buffer Implementation) + * + * The immediate implementation has been removed from CVS in favor of the vertex + * buffer implementation. + * + * The render functions are called by the pipeline manager to render a batch of + * primitives. They return TRUE to pass on to the next stage (i.e. software + * rasterization) or FALSE to indicate that the pipeline has finished after + * rendering something. + * + * When falling back to software TCL still attempt to use hardware + * rasterization. + * + * I am not sure that the cache related registers are setup correctly, but + * obviously this does work... Further investigation is needed. + * + * \author Nicolai Haehnle <prefect_@gmx.net> + * + * \todo Add immediate implementation back? Perhaps this is useful if there are + * no bugs... + */ + +#include "r300_render.h" + +#include "main/glheader.h" +#include "main/imports.h" +#include "main/enums.h" +#include "main/macros.h" +#include "main/context.h" +#include "main/dd.h" +#include "main/simple_list.h" +#include "main/api_arrayelt.h" +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" +#include "vbo/vbo.h" +#include "vbo/vbo_split.h" +#include "r300_context.h" +#include "r300_state.h" +#include "r300_reg.h" +#include "r300_emit.h" +#include "r300_swtcl.h" + +/** + * \brief Convert a OpenGL primitive type into a R300 primitive type. + */ +int r300PrimitiveType(r300ContextPtr rmesa, int prim) +{ + switch (prim & PRIM_MODE_MASK) { + case GL_POINTS: + return R300_VAP_VF_CNTL__PRIM_POINTS; + break; + case GL_LINES: + return R300_VAP_VF_CNTL__PRIM_LINES; + break; + case GL_LINE_STRIP: + return R300_VAP_VF_CNTL__PRIM_LINE_STRIP; + break; + case GL_LINE_LOOP: + return R300_VAP_VF_CNTL__PRIM_LINE_LOOP; + break; + case GL_TRIANGLES: + return R300_VAP_VF_CNTL__PRIM_TRIANGLES; + break; + case GL_TRIANGLE_STRIP: + return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP; + break; + case GL_TRIANGLE_FAN: + return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN; + break; + case GL_QUADS: + return R300_VAP_VF_CNTL__PRIM_QUADS; + break; + case GL_QUAD_STRIP: + return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP; + break; + case GL_POLYGON: + return R300_VAP_VF_CNTL__PRIM_POLYGON; + break; + default: + assert(0); + return -1; + break; + } +} + +int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) +{ + int verts_off = 0; + + switch (prim & PRIM_MODE_MASK) { + case GL_POINTS: + verts_off = 0; + break; + case GL_LINES: + verts_off = num_verts % 2; + break; + case GL_LINE_STRIP: + if (num_verts < 2) + verts_off = num_verts; + break; + case GL_LINE_LOOP: + if (num_verts < 2) + verts_off = num_verts; + break; + case GL_TRIANGLES: + verts_off = num_verts % 3; + break; + case GL_TRIANGLE_STRIP: + if (num_verts < 3) + verts_off = num_verts; + break; + case GL_TRIANGLE_FAN: + if (num_verts < 3) + verts_off = num_verts; + break; + case GL_QUADS: + verts_off = num_verts % 4; + break; + case GL_QUAD_STRIP: + if (num_verts < 4) + verts_off = num_verts; + else + verts_off = num_verts % 2; + break; + case GL_POLYGON: + if (num_verts < 3) + verts_off = num_verts; + break; + default: + assert(0); + return -1; + break; + } + + return num_verts - verts_off; +} + +static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type, int offset) +{ + BATCH_LOCALS(&rmesa->radeon); + int size; + + /* offset is in indices */ + BEGIN_BATCH(10); + OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0); + if (rmesa->ind_buf.is_32bit) { + /* convert to bytes */ + offset *= 4; + size = vertex_count; + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | + (vertex_count << 16) | type | + R300_VAP_VF_CNTL__INDEX_SIZE_32bit); + } else { + /* convert to bytes */ + offset *= 2; + size = (vertex_count + 1) >> 1; + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | + (vertex_count << 16) | type); + } + + if (!rmesa->radeon.radeonScreen->kernel_mm) { + OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); + OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | + (R300_VAP_PORT_IDX0 >> 2)); + OUT_BATCH_RELOC(0, rmesa->ind_buf.bo, rmesa->ind_buf.bo_offset + offset, RADEON_GEM_DOMAIN_GTT, 0, 0); + OUT_BATCH(size); + } else { + OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); + OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | + (R300_VAP_PORT_IDX0 >> 2)); + OUT_BATCH(rmesa->ind_buf.bo_offset + offset); + OUT_BATCH(size); + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->ind_buf.bo, RADEON_GEM_DOMAIN_GTT, 0, 0); + } + END_BATCH(); +} + +static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) +{ + BATCH_LOCALS(&rmesa->radeon); + uint32_t voffset; + int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; + int i; + + if (RADEON_DEBUG & RADEON_VERTS) + fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr, + offset); + + if (!rmesa->radeon.radeonScreen->kernel_mm) { + BEGIN_BATCH(sz+2+(nr * 2)); + OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1); + OUT_BATCH(nr); + + for (i = 0; i + 1 < nr; i += 2) { + OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) | + (rmesa->radeon.tcl.aos[i].stride << 8) | + (rmesa->radeon.tcl.aos[i + 1].components << 16) | + (rmesa->radeon.tcl.aos[i + 1].stride << 24)); + + voffset = rmesa->radeon.tcl.aos[i + 0].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; + OUT_BATCH_RELOC(voffset, + rmesa->radeon.tcl.aos[i].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + voffset = rmesa->radeon.tcl.aos[i + 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride; + OUT_BATCH_RELOC(voffset, + rmesa->radeon.tcl.aos[i+1].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } + + if (nr & 1) { + OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) | + (rmesa->radeon.tcl.aos[nr - 1].stride << 8)); + voffset = rmesa->radeon.tcl.aos[nr - 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride; + OUT_BATCH_RELOC(voffset, + rmesa->radeon.tcl.aos[nr - 1].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } + END_BATCH(); + } else { + + BEGIN_BATCH(sz+2+(nr * 2)); + OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1); + OUT_BATCH(nr); + + for (i = 0; i + 1 < nr; i += 2) { + OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) | + (rmesa->radeon.tcl.aos[i].stride << 8) | + (rmesa->radeon.tcl.aos[i + 1].components << 16) | + (rmesa->radeon.tcl.aos[i + 1].stride << 24)); + + voffset = rmesa->radeon.tcl.aos[i + 0].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; + OUT_BATCH(voffset); + voffset = rmesa->radeon.tcl.aos[i + 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride; + OUT_BATCH(voffset); + } + + if (nr & 1) { + OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) | + (rmesa->radeon.tcl.aos[nr - 1].stride << 8)); + voffset = rmesa->radeon.tcl.aos[nr - 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride; + OUT_BATCH(voffset); + } + for (i = 0; i + 1 < nr; i += 2) { + voffset = rmesa->radeon.tcl.aos[i + 0].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->radeon.tcl.aos[i+0].bo, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + voffset = rmesa->radeon.tcl.aos[i + 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride; + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->radeon.tcl.aos[i+1].bo, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } + if (nr & 1) { + voffset = rmesa->radeon.tcl.aos[nr - 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride; + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->radeon.tcl.aos[nr-1].bo, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } + END_BATCH(); + } + +} + +static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type) +{ + BATCH_LOCALS(&rmesa->radeon); + + r300_emit_scissor(rmesa->radeon.glCtx); + BEGIN_BATCH(3); + OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0); + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type); + END_BATCH(); +} + +void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + BATCH_LOCALS(&rmesa->radeon); + int type, num_verts; + + type = r300PrimitiveType(rmesa, prim); + num_verts = r300NumVerts(rmesa, end - start, prim); + + if (type < 0 || num_verts <= 0) + return; + + if (rmesa->ind_buf.bo) { + GLuint first, incr, offset = 0; + + if (!split_prim_inplace(prim & PRIM_MODE_MASK, &first, &incr) && + num_verts > 65500) { + WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim); + return; + } + + + r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, 0); + if (rmesa->radeon.radeonScreen->kernel_mm) { + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH_REGSEQ(R300_VAP_VF_MAX_VTX_INDX, 1); + OUT_BATCH(rmesa->radeon.tcl.aos[0].count); + END_BATCH(); + } + + r300_emit_scissor(rmesa->radeon.glCtx); + while (num_verts > 0) { + int nr; + int align; + + nr = MIN2(num_verts, 65535); + nr -= (nr - first) % incr; + + /* get alignment for IB correct */ + if (nr != num_verts) { + do { + align = nr * (rmesa->ind_buf.is_32bit ? 4 : 2); + if (align % 4) + nr -= incr; + } while(align % 4); + if (nr <= 0) { + WARN_ONCE("did the impossible happen? we never aligned nr to dword\n"); + return; + } + + } + r300FireEB(rmesa, nr, type, offset); + + num_verts -= nr; + offset += nr; + } + + } else { + GLuint first, incr, offset = 0; + + if (!split_prim_inplace(prim & PRIM_MODE_MASK, &first, &incr) && + num_verts > 65535) { + WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim); + return; + } + + if (rmesa->radeon.radeonScreen->kernel_mm) { + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH_REGSEQ(R300_VAP_VF_MAX_VTX_INDX, 1); + OUT_BATCH(rmesa->radeon.tcl.aos[0].count); + END_BATCH(); + } + + r300_emit_scissor(rmesa->radeon.glCtx); + while (num_verts > 0) { + int nr; + nr = MIN2(num_verts, 65535); + nr -= (nr - first) % incr; + r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start + offset); + r300FireAOS(rmesa, nr, type); + num_verts -= nr; + offset += nr; + } + } + COMMIT_BATCH(); +} + +static const char *getFallbackString(r300ContextPtr rmesa, uint32_t bit) +{ + static char common_fallback_str[32]; + switch (bit) { + case R300_FALLBACK_VERTEX_PROGRAM : + return "vertex program"; + case R300_FALLBACK_LINE_SMOOTH: + return "smooth lines"; + case R300_FALLBACK_POINT_SMOOTH: + return "smooth points"; + case R300_FALLBACK_POLYGON_SMOOTH: + return "smooth polygons"; + case R300_FALLBACK_LINE_STIPPLE: + return "line stipple"; + case R300_FALLBACK_POLYGON_STIPPLE: + return "polygon stipple"; + case R300_FALLBACK_STENCIL_TWOSIDE: + return "two-sided stencil"; + case R300_FALLBACK_RENDER_MODE: + return "render mode != GL_RENDER"; + case R300_FALLBACK_FRAGMENT_PROGRAM: + return "fragment program"; + case R300_FALLBACK_RADEON_COMMON: + snprintf(common_fallback_str, 32, "radeon common 0x%08x", rmesa->radeon.Fallback); + return common_fallback_str; + case R300_FALLBACK_AOS_LIMIT: + return "aos limit"; + case R300_FALLBACK_INVALID_BUFFERS: + return "invalid buffers"; + default: + return "unknown"; + } +} + +void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + r300ContextPtr rmesa = R300_CONTEXT(ctx); + uint32_t old_fallback = rmesa->fallback; + static uint32_t fallback_warn = 0; + + if (mode) { + if ((fallback_warn & bit) == 0) { + if (RADEON_DEBUG & RADEON_FALLBACKS) + fprintf(stderr, "WARNING! Falling back to software for %s\n", getFallbackString(rmesa, bit)); + fallback_warn |= bit; + } + rmesa->fallback |= bit; + + /* update only if we change from no tcl fallbacks to some tcl fallbacks */ + if (rmesa->options.hw_tcl_enabled) { + if (((old_fallback & R300_TCL_FALLBACK_MASK) == 0) && + ((bit & R300_TCL_FALLBACK_MASK) > 0)) { + R300_STATECHANGE(rmesa, vap_cntl_status); + rmesa->hw.vap_cntl_status.cmd[1] |= R300_VAP_TCL_BYPASS; + } + } + + /* update only if we change from no raster fallbacks to some raster fallbacks */ + if (((old_fallback & R300_RASTER_FALLBACK_MASK) == 0) && + ((bit & R300_RASTER_FALLBACK_MASK) > 0)) { + + radeon_firevertices(&rmesa->radeon); + rmesa->radeon.swtcl.RenderIndex = ~0; + _swsetup_Wakeup( ctx ); + } + } else { + rmesa->fallback &= ~bit; + + /* update only if we have disabled all tcl fallbacks */ + if (rmesa->options.hw_tcl_enabled) { + if ((old_fallback & R300_TCL_FALLBACK_MASK) == bit) { + R300_STATECHANGE(rmesa, vap_cntl_status); + rmesa->hw.vap_cntl_status.cmd[1] &= ~R300_VAP_TCL_BYPASS; + } + } + + /* update only if we have disabled all raster fallbacks */ + if ((old_fallback & R300_RASTER_FALLBACK_MASK) == bit) { + _swrast_flush( ctx ); + + tnl->Driver.Render.Start = r300RenderStart; + tnl->Driver.Render.Finish = r300RenderFinish; + tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive; + tnl->Driver.Render.ResetLineStipple = r300ResetLineStipple; + tnl->Driver.Render.BuildVertices = _tnl_build_vertices; + tnl->Driver.Render.CopyPV = _tnl_copy_pv; + tnl->Driver.Render.Interp = _tnl_interp; + + _tnl_invalidate_vertex_state( ctx, ~0 ); + _tnl_invalidate_vertices( ctx, ~0 ); + } + } + +} diff --git a/src/mesa/drivers/dri/r300/r300_render.h b/src/mesa/drivers/dri/r300/r300_render.h new file mode 100644 index 0000000000..581e9fa0cc --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_render.h @@ -0,0 +1,70 @@ +/* + * Copyright 2009 Maciej Cencora <m.cencora@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __R300_RENDER_H__ +#define __R300_RENDER_H__ + +#include "main/mtypes.h" + +#define R300_FALLBACK_VERTEX_PROGRAM (1 << 0) +#define R300_TCL_FALLBACK_MASK 0x0000ffff + +#define R300_FALLBACK_LINE_SMOOTH (1 << 16) +#define R300_FALLBACK_POINT_SMOOTH (1 << 17) +#define R300_FALLBACK_POLYGON_SMOOTH (1 << 18) +#define R300_FALLBACK_LINE_STIPPLE (1 << 19) +#define R300_FALLBACK_POLYGON_STIPPLE (1 << 20) +#define R300_FALLBACK_STENCIL_TWOSIDE (1 << 21) +#define R300_FALLBACK_RENDER_MODE (1 << 22) +#define R300_FALLBACK_FRAGMENT_PROGRAM (1 << 23) +#define R300_FALLBACK_RADEON_COMMON (1 << 29) +#define R300_FALLBACK_AOS_LIMIT (1 << 30) +#define R300_FALLBACK_INVALID_BUFFERS (1 << 31) +#define R300_RASTER_FALLBACK_MASK 0xffff0000 + +#define MASK_XYZW (R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) +#define MASK_X R300_WRITE_ENA_X +#define MASK_Y R300_WRITE_ENA_Y +#define MASK_Z R300_WRITE_ENA_Z +#define MASK_W R300_WRITE_ENA_W + +#if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \ + SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \ + SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \ + SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \ + SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \ + SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE +#error Cannot change these! +#endif + +extern const struct tnl_pipeline_stage _r300_render_stage; + +extern void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode); + +extern void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim); + +#endif diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c new file mode 100644 index 0000000000..9c24166ec5 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -0,0 +1,145 @@ +/* + * Copyright 2009 Maciej Cencora <m.cencora@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "main/glheader.h" + +#include "shader/program.h" +#include "tnl/tnl.h" +#include "r300_context.h" +#include "r300_fragprog_common.h" + +static void freeFragProgCache(GLcontext *ctx, struct r300_fragment_program_cont *cache) +{ + struct r300_fragment_program *tmp, *fp = cache->progs; + + while (fp) { + tmp = fp->next; + rc_constants_destroy(&fp->code.constants); + free(fp); + fp = tmp; + } +} + +static void freeVertProgCache(GLcontext *ctx, struct r300_vertex_program_cont *cache) +{ + struct r300_vertex_program *tmp, *vp = cache->progs; + + while (vp) { + tmp = vp->next; + rc_constants_destroy(&vp->code.constants); + _mesa_reference_vertprog(ctx, &vp->Base, NULL); + free(vp); + vp = tmp; + } +} + +static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, + GLuint id) +{ + struct r300_vertex_program_cont *vp; + struct r300_fragment_program_cont *fp; + + switch (target) { + case GL_VERTEX_STATE_PROGRAM_NV: + case GL_VERTEX_PROGRAM_ARB: + vp = CALLOC_STRUCT(r300_vertex_program_cont); + return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id); + + case GL_FRAGMENT_PROGRAM_NV: + case GL_FRAGMENT_PROGRAM_ARB: + fp = CALLOC_STRUCT(r300_fragment_program_cont); + return _mesa_init_fragment_program(ctx, &fp->Base, target, id); + + default: + _mesa_problem(ctx, "Bad target in r300NewProgram"); + } + + return NULL; +} + +static void r300DeleteProgram(GLcontext * ctx, struct gl_program *prog) +{ + struct r300_vertex_program_cont *vp = (struct r300_vertex_program_cont *)prog; + struct r300_fragment_program_cont *fp = (struct r300_fragment_program_cont *)prog; + + switch (prog->Target) { + case GL_VERTEX_PROGRAM_ARB: + freeVertProgCache(ctx, vp); + break; + case GL_FRAGMENT_PROGRAM_ARB: + freeFragProgCache(ctx, fp); + break; + } + + _mesa_delete_program(ctx, prog); +} + +static GLboolean +r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) +{ + struct r300_vertex_program_cont *vp = (struct r300_vertex_program_cont *)prog; + struct r300_fragment_program_cont *fp = (struct r300_fragment_program_cont *)prog; + + switch (target) { + case GL_VERTEX_PROGRAM_ARB: + freeVertProgCache(ctx, vp); + vp->progs = NULL; + break; + case GL_FRAGMENT_PROGRAM_ARB: + freeFragProgCache(ctx, fp); + fp->progs = NULL; + break; + } + + /* need this for tcl fallbacks */ + (void) _tnl_program_string(ctx, target, prog); + + /* XXX check if program is legal, within limits */ + return GL_TRUE; +} + +static GLboolean +r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) +{ + if (target == GL_FRAGMENT_PROGRAM_ARB) { + struct r300_fragment_program *fp = r300SelectAndTranslateFragmentShader(ctx); + + return !fp->error; + } else { + struct r300_vertex_program *vp = r300SelectAndTranslateVertexShader(ctx); + + return !vp->error; + } +} + +void r300InitShaderFuncs(struct dd_function_table *functions) +{ + functions->NewProgram = r300NewProgram; + functions->DeleteProgram = r300DeleteProgram; + functions->ProgramStringNotify = r300ProgramStringNotify; + functions->IsProgramNative = r300IsProgramNative; +} diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c new file mode 100644 index 0000000000..fa33be4998 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -0,0 +1,2416 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. +Copyright (C) 2004 Nicolai Haehnle. +All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Nicolai Haehnle <prefect_@gmx.net> + */ + +#include "main/glheader.h" +#include "main/state.h" +#include "main/imports.h" +#include "main/enums.h" +#include "main/macros.h" +#include "main/context.h" +#include "main/dd.h" +#include "main/framebuffer.h" +#include "main/simple_list.h" +#include "main/api_arrayelt.h" + +#include "drivers/common/meta.h" +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" +#include "shader/prog_parameter.h" +#include "shader/prog_statevars.h" +#include "vbo/vbo.h" +#include "tnl/tnl.h" + +#include "r300_context.h" +#include "r300_state.h" +#include "r300_reg.h" +#include "r300_emit.h" +#include "r300_fragprog_common.h" +#include "r300_render.h" +#include "r300_vertprog.h" + +static void r300BlendColor(GLcontext * ctx, const GLfloat cf[4]) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + + R300_STATECHANGE(rmesa, blend_color); + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + GLuint r = IROUND(cf[0]*1023.0f); + GLuint g = IROUND(cf[1]*1023.0f); + GLuint b = IROUND(cf[2]*1023.0f); + GLuint a = IROUND(cf[3]*1023.0f); + + rmesa->hw.blend_color.cmd[1] = r | (a << 16); + rmesa->hw.blend_color.cmd[2] = b | (g << 16); + } else { + GLubyte color[4]; + CLAMPED_FLOAT_TO_UBYTE(color[0], cf[0]); + CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]); + CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]); + CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]); + + rmesa->hw.blend_color.cmd[1] = PACK_COLOR_8888(color[3], color[0], + color[1], color[2]); + } +} + +/** + * Calculate the hardware blend factor setting. This same function is used + * for source and destination of both alpha and RGB. + * + * \returns + * The hardware register value for the specified blend factor. This value + * will need to be shifted into the correct position for either source or + * destination factor. + * + * \todo + * Since the two cases where source and destination are handled differently + * are essentially error cases, they should never happen. Determine if these + * cases can be removed. + */ +static int blend_factor(GLenum factor, GLboolean is_src) +{ + switch (factor) { + case GL_ZERO: + return R300_BLEND_GL_ZERO; + break; + case GL_ONE: + return R300_BLEND_GL_ONE; + break; + case GL_DST_COLOR: + return R300_BLEND_GL_DST_COLOR; + break; + case GL_ONE_MINUS_DST_COLOR: + return R300_BLEND_GL_ONE_MINUS_DST_COLOR; + break; + case GL_SRC_COLOR: + return R300_BLEND_GL_SRC_COLOR; + break; + case GL_ONE_MINUS_SRC_COLOR: + return R300_BLEND_GL_ONE_MINUS_SRC_COLOR; + break; + case GL_SRC_ALPHA: + return R300_BLEND_GL_SRC_ALPHA; + break; + case GL_ONE_MINUS_SRC_ALPHA: + return R300_BLEND_GL_ONE_MINUS_SRC_ALPHA; + break; + case GL_DST_ALPHA: + return R300_BLEND_GL_DST_ALPHA; + break; + case GL_ONE_MINUS_DST_ALPHA: + return R300_BLEND_GL_ONE_MINUS_DST_ALPHA; + break; + case GL_SRC_ALPHA_SATURATE: + return (is_src) ? R300_BLEND_GL_SRC_ALPHA_SATURATE : + R300_BLEND_GL_ZERO; + break; + case GL_CONSTANT_COLOR: + return R300_BLEND_GL_CONST_COLOR; + break; + case GL_ONE_MINUS_CONSTANT_COLOR: + return R300_BLEND_GL_ONE_MINUS_CONST_COLOR; + break; + case GL_CONSTANT_ALPHA: + return R300_BLEND_GL_CONST_ALPHA; + break; + case GL_ONE_MINUS_CONSTANT_ALPHA: + return R300_BLEND_GL_ONE_MINUS_CONST_ALPHA; + break; + default: + fprintf(stderr, "unknown blend factor %x\n", factor); + return (is_src) ? R300_BLEND_GL_ONE : R300_BLEND_GL_ZERO; + break; + } +} + +/** + * Sets both the blend equation and the blend function. + * This is done in a single + * function because some blend equations (i.e., \c GL_MIN and \c GL_MAX) + * change the interpretation of the blend function. + * Also, make sure that blend function and blend equation are set to their + * default value if color blending is not enabled, since at least blend + * equations GL_MIN and GL_FUNC_REVERSE_SUBTRACT will cause wrong results + * otherwise for unknown reasons. + */ + +/* helper function */ +static void r300SetBlendCntl(r300ContextPtr r300, int func, int eqn, + int cbits, int funcA, int eqnA) +{ + GLuint new_ablend, new_cblend; + +#if 0 + fprintf(stderr, + "eqnA=%08x funcA=%08x eqn=%08x func=%08x cbits=%08x\n", + eqnA, funcA, eqn, func, cbits); +#endif + new_ablend = eqnA | funcA; + new_cblend = eqn | func; + + /* Some blend factor combinations don't seem to work when the + * BLEND_NO_SEPARATE bit is set. + * + * Especially problematic candidates are the ONE_MINUS_* flags, + * but I can't see a real pattern. + */ +#if 0 + if (new_ablend == new_cblend) { + new_cblend |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0; + } +#endif + new_cblend |= cbits; + + if ((new_ablend != r300->hw.bld.cmd[R300_BLD_ABLEND]) || + (new_cblend != r300->hw.bld.cmd[R300_BLD_CBLEND])) { + R300_STATECHANGE(r300, bld); + r300->hw.bld.cmd[R300_BLD_ABLEND] = new_ablend; + r300->hw.bld.cmd[R300_BLD_CBLEND] = new_cblend; + } +} + +static void r300SetBlendState(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int func = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT); + int eqn = R300_COMB_FCN_ADD_CLAMP; + int funcA = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT); + int eqnA = R300_COMB_FCN_ADD_CLAMP; + + if (RGBA_LOGICOP_ENABLED(ctx) || !ctx->Color.BlendEnabled) { + r300SetBlendCntl(r300, func, eqn, 0, func, eqn); + return; + } + + func = + (blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE) << + R300_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.BlendDstRGB, + GL_FALSE) << + R300_DST_BLEND_SHIFT); + + switch (ctx->Color.BlendEquationRGB) { + case GL_FUNC_ADD: + eqn = R300_COMB_FCN_ADD_CLAMP; + break; + + case GL_FUNC_SUBTRACT: + eqn = R300_COMB_FCN_SUB_CLAMP; + break; + + case GL_FUNC_REVERSE_SUBTRACT: + eqn = R300_COMB_FCN_RSUB_CLAMP; + break; + + case GL_MIN: + eqn = R300_COMB_FCN_MIN; + func = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT); + break; + + case GL_MAX: + eqn = R300_COMB_FCN_MAX; + func = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT); + break; + + default: + fprintf(stderr, + "[%s:%u] Invalid RGB blend equation (0x%04x).\n", + __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB); + return; + } + + funcA = + (blend_factor(ctx->Color.BlendSrcA, GL_TRUE) << + R300_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.BlendDstA, + GL_FALSE) << + R300_DST_BLEND_SHIFT); + + switch (ctx->Color.BlendEquationA) { + case GL_FUNC_ADD: + eqnA = R300_COMB_FCN_ADD_CLAMP; + break; + + case GL_FUNC_SUBTRACT: + eqnA = R300_COMB_FCN_SUB_CLAMP; + break; + + case GL_FUNC_REVERSE_SUBTRACT: + eqnA = R300_COMB_FCN_RSUB_CLAMP; + break; + + case GL_MIN: + eqnA = R300_COMB_FCN_MIN; + funcA = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT); + break; + + case GL_MAX: + eqnA = R300_COMB_FCN_MAX; + funcA = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT); + break; + + default: + fprintf(stderr, + "[%s:%u] Invalid A blend equation (0x%04x).\n", + __FUNCTION__, __LINE__, ctx->Color.BlendEquationA); + return; + } + + r300SetBlendCntl(r300, + func, eqn, + (R300_SEPARATE_ALPHA_ENABLE | + R300_READ_ENABLE | + R300_ALPHA_BLEND_ENABLE), funcA, eqnA); +} + +static void r300BlendEquationSeparate(GLcontext * ctx, + GLenum modeRGB, GLenum modeA) +{ + r300SetBlendState(ctx); +} + +static void r300BlendFuncSeparate(GLcontext * ctx, + GLenum sfactorRGB, GLenum dfactorRGB, + GLenum sfactorA, GLenum dfactorA) +{ + r300SetBlendState(ctx); +} + +/** + * Translate LogicOp enums into hardware representation. + * Both use a very logical bit-wise layout, but unfortunately the order + * of bits is reversed. + */ +static GLuint translate_logicop(GLenum logicop) +{ + GLuint bits = logicop - GL_CLEAR; + bits = ((bits & 1) << 3) | ((bits & 2) << 1) | ((bits & 4) >> 1) | ((bits & 8) >> 3); + return bits << R300_RB3D_ROPCNTL_ROP_SHIFT; +} + +/** + * Used internally to update the r300->hw hardware state to match the + * current OpenGL state. + */ +static void r300SetLogicOpState(GLcontext *ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + R300_STATECHANGE(r300, rop); + if (RGBA_LOGICOP_ENABLED(ctx)) { + r300->hw.rop.cmd[1] = R300_RB3D_ROPCNTL_ROP_ENABLE | + translate_logicop(ctx->Color.LogicOp); + } else { + r300->hw.rop.cmd[1] = 0; + } +} + +/** + * Called by Mesa when an application program changes the LogicOp state + * via glLogicOp. + */ +static void r300LogicOpcode(GLcontext *ctx, GLenum logicop) +{ + if (RGBA_LOGICOP_ENABLED(ctx)) + r300SetLogicOpState(ctx); +} + +static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + GLint p; + GLint *ip; + + /* no VAP UCP on non-TCL chipsets */ + if (!rmesa->options.hw_tcl_enabled) + return; + + p = (GLint) plane - (GLint) GL_CLIP_PLANE0; + ip = (GLint *)ctx->Transform._ClipUserPlane[p]; + + R300_STATECHANGE( rmesa, vpucp[p] ); + rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0]; + rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1]; + rmesa->hw.vpucp[p].cmd[R300_VPUCP_Z] = ip[2]; + rmesa->hw.vpucp[p].cmd[R300_VPUCP_W] = ip[3]; +} + +static void r300SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + GLuint p; + + /* no VAP UCP on non-TCL chipsets */ + if (!r300->options.hw_tcl_enabled) + return; + + p = cap - GL_CLIP_PLANE0; + R300_STATECHANGE(r300, vap_clip_cntl); + if (state) { + r300->hw.vap_clip_cntl.cmd[1] |= (R300_VAP_UCP_ENABLE_0 << p); + r300ClipPlane(ctx, cap, NULL); + } else { + r300->hw.vap_clip_cntl.cmd[1] &= ~(R300_VAP_UCP_ENABLE_0 << p); + } +} + +/** + * Update our tracked culling state based on Mesa's state. + */ +static void r300UpdateCulling(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + uint32_t val = 0; + + if (ctx->Polygon.CullFlag) { + switch (ctx->Polygon.CullFaceMode) { + case GL_FRONT: + val = R300_CULL_FRONT; + break; + case GL_BACK: + val = R300_CULL_BACK; + break; + case GL_FRONT_AND_BACK: + val = R300_CULL_FRONT | R300_CULL_BACK; + break; + default: + break; + } + } + + switch (ctx->Polygon.FrontFace) { + case GL_CW: + val |= R300_FRONT_FACE_CW; + break; + case GL_CCW: + val |= R300_FRONT_FACE_CCW; + break; + default: + break; + } + + /* Winding is inverted when rendering to FBO */ + if (ctx->DrawBuffer && ctx->DrawBuffer->Name) + val ^= R300_FRONT_FACE_CW; + + R300_STATECHANGE(r300, cul); + r300->hw.cul.cmd[R300_CUL_CULL] = val; +} + +static void r300SetPolygonOffsetState(GLcontext * ctx, GLboolean state) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + R300_STATECHANGE(r300, occlusion_cntl); + if (state) { + r300->hw.occlusion_cntl.cmd[1] |= (3 << 0); + } else { + r300->hw.occlusion_cntl.cmd[1] &= ~(3 << 0); + } +} + +static GLboolean current_fragment_program_writes_depth(GLcontext* ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + return ctx->FragmentProgram._Current && r300->selected_fp->code.writes_depth; +} + +static void r300SetEarlyZState(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + GLuint topZ = R300_ZTOP_ENABLE; + GLuint w_fmt, fgdepthsrc; + + if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS) + topZ = R300_ZTOP_DISABLE; + else if (current_fragment_program_writes_depth(ctx)) + topZ = R300_ZTOP_DISABLE; + else if (ctx->FragmentProgram._Current && ctx->FragmentProgram._Current->UsesKill) + topZ = R300_ZTOP_DISABLE; + else if (r300->radeon.query.current) + topZ = R300_ZTOP_DISABLE; + + if (topZ != r300->hw.zstencil_format.cmd[2]) { + /* Note: This completely reemits the stencil format. + * I have not tested whether this is strictly necessary, + * or if emitting a write to ZB_ZTOP is enough. + */ + R300_STATECHANGE(r300, zstencil_format); + r300->hw.zstencil_format.cmd[2] = topZ; + } + + /* w_fmt value is set to get best performance + * see p.130 R5xx 3D acceleration guide v1.3 */ + if (current_fragment_program_writes_depth(ctx)) { + fgdepthsrc = R300_FG_DEPTH_SRC_SHADER; + w_fmt = R300_W_FMT_W24 | R300_W_SRC_US; + } else { + fgdepthsrc = R300_FG_DEPTH_SRC_SCAN; + w_fmt = R300_W_FMT_W0 | R300_W_SRC_US; + } + + if (w_fmt != r300->hw.us_out_fmt.cmd[5]) { + R300_STATECHANGE(r300, us_out_fmt); + r300->hw.us_out_fmt.cmd[5] = w_fmt; + } + + if (fgdepthsrc != r300->hw.fg_depth_src.cmd[1]) { + R300_STATECHANGE(r300, fg_depth_src); + r300->hw.fg_depth_src.cmd[1] = fgdepthsrc; + } +} + +static void r300SetAlphaState(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + GLubyte refByte; + uint32_t pp_misc = 0x0; + GLboolean really_enabled = ctx->Color.AlphaEnabled; + + CLAMPED_FLOAT_TO_UBYTE(refByte, ctx->Color.AlphaRef); + + switch (ctx->Color.AlphaFunc) { + case GL_NEVER: + pp_misc |= R300_FG_ALPHA_FUNC_NEVER; + break; + case GL_LESS: + pp_misc |= R300_FG_ALPHA_FUNC_LESS; + break; + case GL_EQUAL: + pp_misc |= R300_FG_ALPHA_FUNC_EQUAL; + break; + case GL_LEQUAL: + pp_misc |= R300_FG_ALPHA_FUNC_LE; + break; + case GL_GREATER: + pp_misc |= R300_FG_ALPHA_FUNC_GREATER; + break; + case GL_NOTEQUAL: + pp_misc |= R300_FG_ALPHA_FUNC_NOTEQUAL; + break; + case GL_GEQUAL: + pp_misc |= R300_FG_ALPHA_FUNC_GE; + break; + case GL_ALWAYS: + /*pp_misc |= FG_ALPHA_FUNC_ALWAYS; */ + really_enabled = GL_FALSE; + break; + } + + if (really_enabled) { + pp_misc |= R300_FG_ALPHA_FUNC_ENABLE; + pp_misc |= R500_FG_ALPHA_FUNC_8BIT; + pp_misc |= (refByte & R300_FG_ALPHA_FUNC_VAL_MASK); + } else { + pp_misc = 0x0; + } + + R300_STATECHANGE(r300, at); + r300->hw.at.cmd[R300_AT_ALPHA_TEST] = pp_misc; + r300->hw.at.cmd[R300_AT_UNKNOWN] = 0; +} + +static void r300AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref) +{ + (void)func; + (void)ref; + r300SetAlphaState(ctx); +} + +static int translate_func(int func) +{ + switch (func) { + case GL_NEVER: + return R300_ZS_NEVER; + case GL_LESS: + return R300_ZS_LESS; + case GL_EQUAL: + return R300_ZS_EQUAL; + case GL_LEQUAL: + return R300_ZS_LEQUAL; + case GL_GREATER: + return R300_ZS_GREATER; + case GL_NOTEQUAL: + return R300_ZS_NOTEQUAL; + case GL_GEQUAL: + return R300_ZS_GEQUAL; + case GL_ALWAYS: + return R300_ZS_ALWAYS; + } + return 0; +} + +static void r300SetDepthState(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + R300_STATECHANGE(r300, zs); + r300->hw.zs.cmd[R300_ZS_CNTL_0] &= (R300_STENCIL_ENABLE | + R300_STENCIL_FRONT_BACK | + R500_STENCIL_REFMASK_FRONT_BACK); + r300->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(R300_ZS_MASK << R300_Z_FUNC_SHIFT); + + if (ctx->Depth.Test && ctx->DrawBuffer->_DepthBuffer) { + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_ENABLE; + if (ctx->Depth.Mask) + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_WRITE_ENABLE; + r300->hw.zs.cmd[R300_ZS_CNTL_1] |= + translate_func(ctx->Depth.Func) << R300_Z_FUNC_SHIFT; + } +} + +static void r300CatchStencilFallback(GLcontext *ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + const unsigned back = ctx->Stencil._BackFace; + + if (rmesa->radeon.radeonScreen->kernel_mm && + (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)) { + r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_FALSE); + } else if (ctx->Stencil._Enabled && + (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back] + || ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[back] + || ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[back])) { + r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_TRUE); + } else { + r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_FALSE); + } +} + +static void r300SetStencilState(GLcontext * ctx, GLboolean state) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + GLboolean hw_stencil = GL_FALSE; + + r300CatchStencilFallback(ctx); + + if (ctx->DrawBuffer) { + struct radeon_renderbuffer *rrbStencil + = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL); + hw_stencil = (rrbStencil && rrbStencil->bo); + } + + if (hw_stencil) { + R300_STATECHANGE(r300, zs); + if (state) { + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= + R300_STENCIL_ENABLE; + } else { + r300->hw.zs.cmd[R300_ZS_CNTL_0] &= + ~R300_STENCIL_ENABLE; + } + } +} + +static void r300UpdatePolygonMode(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + uint32_t hw_mode = R300_GA_POLY_MODE_DISABLE; + + /* Only do something if a polygon mode is wanted, default is GL_FILL */ + if (ctx->Polygon.FrontMode != GL_FILL || + ctx->Polygon.BackMode != GL_FILL) { + GLenum f, b; + + /* Handle GL_CW (clock wise and GL_CCW (counter clock wise) + * correctly by selecting the correct front and back face + */ + if (ctx->Polygon.FrontFace == GL_CCW) { + f = ctx->Polygon.FrontMode; + b = ctx->Polygon.BackMode; + } else { + f = ctx->Polygon.BackMode; + b = ctx->Polygon.FrontMode; + } + + /* Enable polygon mode */ + hw_mode |= R300_GA_POLY_MODE_DUAL; + + switch (f) { + case GL_LINE: + hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_LINE; + break; + case GL_POINT: + hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_POINT; + break; + case GL_FILL: + hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_TRI; + break; + } + + switch (b) { + case GL_LINE: + hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_LINE; + break; + case GL_POINT: + hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_POINT; + break; + case GL_FILL: + hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_TRI; + break; + } + } + + if (r300->hw.polygon_mode.cmd[1] != hw_mode) { + R300_STATECHANGE(r300, polygon_mode); + r300->hw.polygon_mode.cmd[1] = hw_mode; + } + + r300->hw.polygon_mode.cmd[2] = 0x00000001; + r300->hw.polygon_mode.cmd[3] = 0x00000000; +} + +/** + * Change the culling mode. + * + * \note Mesa already filters redundant calls to this function. + */ +static void r300CullFace(GLcontext * ctx, GLenum mode) +{ + (void)mode; + + r300UpdateCulling(ctx); +} + +/** + * Change the polygon orientation. + * + * \note Mesa already filters redundant calls to this function. + */ +static void r300FrontFace(GLcontext * ctx, GLenum mode) +{ + (void)mode; + + r300UpdateCulling(ctx); + r300UpdatePolygonMode(ctx); +} + +/** + * Change the depth testing function. + * + * \note Mesa already filters redundant calls to this function. + */ +static void r300DepthFunc(GLcontext * ctx, GLenum func) +{ + (void)func; + r300SetDepthState(ctx); +} + +/** + * Enable/Disable depth writing. + * + * \note Mesa already filters redundant calls to this function. + */ +static void r300DepthMask(GLcontext * ctx, GLboolean mask) +{ + (void)mask; + r300SetDepthState(ctx); +} + +/** + * Handle glColorMask() + */ +static void r300ColorMask(GLcontext * ctx, + GLboolean r, GLboolean g, GLboolean b, GLboolean a) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int mask = (r ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) | + (g ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) | + (b ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) | + (a ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0); + + if (mask != r300->hw.cmk.cmd[R300_CMK_COLORMASK]) { + R300_STATECHANGE(r300, cmk); + r300->hw.cmk.cmd[R300_CMK_COLORMASK] = mask; + } +} + +/* ============================================================= + * Point state + */ +static void r300PointSize(GLcontext * ctx, GLfloat size) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + /* We need to clamp to user defined range here, because + * the HW clamping happens only for per vertex point size. */ + size = CLAMP(size, ctx->Point.MinSize, ctx->Point.MaxSize); + + /* same size limits for AA, non-AA points */ + size = CLAMP(size, ctx->Const.MinPointSize, ctx->Const.MaxPointSize); + + R300_STATECHANGE(r300, ps); + r300->hw.ps.cmd[R300_PS_POINTSIZE] = + ((int)(size * 6) << R300_POINTSIZE_X_SHIFT) | + ((int)(size * 6) << R300_POINTSIZE_Y_SHIFT); +} + +static void r300PointParameter(GLcontext * ctx, GLenum pname, const GLfloat * param) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + switch (pname) { + case GL_POINT_SIZE_MIN: + R300_STATECHANGE(r300, ga_point_minmax); + r300->hw.ga_point_minmax.cmd[1] &= ~R300_GA_POINT_MINMAX_MIN_MASK; + r300->hw.ga_point_minmax.cmd[1] |= (GLuint)(ctx->Point.MinSize * 6.0); + r300PointSize(ctx, ctx->Point.Size); + break; + case GL_POINT_SIZE_MAX: + R300_STATECHANGE(r300, ga_point_minmax); + r300->hw.ga_point_minmax.cmd[1] &= ~R300_GA_POINT_MINMAX_MAX_MASK; + r300->hw.ga_point_minmax.cmd[1] |= (GLuint)(ctx->Point.MaxSize * 6.0) + << R300_GA_POINT_MINMAX_MAX_SHIFT; + r300PointSize(ctx, ctx->Point.Size); + break; + case GL_POINT_DISTANCE_ATTENUATION: + break; + case GL_POINT_FADE_THRESHOLD_SIZE: + break; + default: + break; + } +} + +/* ============================================================= + * Line state + */ +static void r300LineWidth(GLcontext * ctx, GLfloat widthf) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + widthf = CLAMP(widthf, + ctx->Const.MinPointSize, + ctx->Const.MaxPointSize); + R300_STATECHANGE(r300, lcntl); + r300->hw.lcntl.cmd[1] = + R300_LINE_CNT_HO | R300_LINE_CNT_VE | (int)(widthf * 6.0); +} + +static void r300PolygonMode(GLcontext * ctx, GLenum face, GLenum mode) +{ + (void)face; + (void)mode; + + r300UpdatePolygonMode(ctx); +} + +/* ============================================================= + * Stencil + */ + +static int translate_stencil_op(int op) +{ + switch (op) { + case GL_KEEP: + return R300_ZS_KEEP; + case GL_ZERO: + return R300_ZS_ZERO; + case GL_REPLACE: + return R300_ZS_REPLACE; + case GL_INCR: + return R300_ZS_INCR; + case GL_DECR: + return R300_ZS_DECR; + case GL_INCR_WRAP_EXT: + return R300_ZS_INCR_WRAP; + case GL_DECR_WRAP_EXT: + return R300_ZS_DECR_WRAP; + case GL_INVERT: + return R300_ZS_INVERT; + default: + WARN_ONCE("Do not know how to translate stencil op"); + return R300_ZS_KEEP; + } + return 0; +} + +static void r300ShadeModel(GLcontext * ctx, GLenum mode) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + + R300_STATECHANGE(rmesa, shade); + rmesa->hw.shade.cmd[1] = 0x00000002; + R300_STATECHANGE(rmesa, shade2); + switch (mode) { + case GL_FLAT: + rmesa->hw.shade2.cmd[1] = R300_RE_SHADE_MODEL_FLAT; + break; + case GL_SMOOTH: + rmesa->hw.shade2.cmd[1] = R300_RE_SHADE_MODEL_SMOOTH; + break; + default: + return; + } + rmesa->hw.shade2.cmd[2] = 0x00000000; + rmesa->hw.shade2.cmd[3] = 0x00000000; +} + +static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face, + GLenum func, GLint ref, GLuint mask) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + GLuint refmask; + GLuint flag; + const unsigned back = ctx->Stencil._BackFace; + + r300CatchStencilFallback(ctx); + + refmask = ((ctx->Stencil.Ref[0] & 0xff) << R300_STENCILREF_SHIFT) + | ((ctx->Stencil.ValueMask[0] & 0xff) << R300_STENCILMASK_SHIFT); + + R300_STATECHANGE(rmesa, zs); + rmesa->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_STENCIL_FRONT_BACK; + rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= ~((R300_ZS_MASK << + R300_S_FRONT_FUNC_SHIFT) + | (R300_ZS_MASK << + R300_S_BACK_FUNC_SHIFT)); + + rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &= + ~((R300_STENCILREF_MASK << R300_STENCILREF_SHIFT) | + (R300_STENCILREF_MASK << R300_STENCILMASK_SHIFT)); + + flag = translate_func(ctx->Stencil.Function[0]); + rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= + (flag << R300_S_FRONT_FUNC_SHIFT); + + flag = translate_func(ctx->Stencil.Function[back]); + + rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= + (flag << R300_S_BACK_FUNC_SHIFT); + rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= refmask; + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + rmesa->hw.zs.cmd[R300_ZS_CNTL_0] |= R500_STENCIL_REFMASK_FRONT_BACK; + R300_STATECHANGE(rmesa, zsb); + refmask = ((ctx->Stencil.Ref[back] & 0xff) << R300_STENCILREF_SHIFT) + | ((ctx->Stencil.ValueMask[back] & 0xff) << R300_STENCILMASK_SHIFT); + + rmesa->hw.zsb.cmd[R300_ZSB_CNTL_0] &= + ~((R300_STENCILREF_MASK << R300_STENCILREF_SHIFT) | + (R300_STENCILREF_MASK << R300_STENCILMASK_SHIFT)); + rmesa->hw.zsb.cmd[R300_ZSB_CNTL_0] |= refmask; + } +} + +static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + const unsigned back = ctx->Stencil._BackFace; + + r300CatchStencilFallback(ctx); + + R300_STATECHANGE(rmesa, zs); + rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &= + ~(R300_STENCILREF_MASK << + R300_STENCILWRITEMASK_SHIFT); + rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= + (ctx->Stencil. + WriteMask[0] & R300_STENCILREF_MASK) << + R300_STENCILWRITEMASK_SHIFT; + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + R300_STATECHANGE(rmesa, zsb); + rmesa->hw.zsb.cmd[R300_ZSB_CNTL_0] |= + (ctx->Stencil. + WriteMask[back] & R300_STENCILREF_MASK) << + R300_STENCILWRITEMASK_SHIFT; + } +} + +static void r300StencilOpSeparate(GLcontext * ctx, GLenum face, + GLenum fail, GLenum zfail, GLenum zpass) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + const unsigned back = ctx->Stencil._BackFace; + + r300CatchStencilFallback(ctx); + + R300_STATECHANGE(rmesa, zs); + /* It is easier to mask what's left.. */ + rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= + (R300_ZS_MASK << R300_Z_FUNC_SHIFT) | + (R300_ZS_MASK << R300_S_FRONT_FUNC_SHIFT) | + (R300_ZS_MASK << R300_S_BACK_FUNC_SHIFT); + + rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= + (translate_stencil_op(ctx->Stencil.FailFunc[0]) << + R300_S_FRONT_SFAIL_OP_SHIFT) + | (translate_stencil_op(ctx->Stencil.ZFailFunc[0]) << + R300_S_FRONT_ZFAIL_OP_SHIFT) + | (translate_stencil_op(ctx->Stencil.ZPassFunc[0]) << + R300_S_FRONT_ZPASS_OP_SHIFT); + + rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= + (translate_stencil_op(ctx->Stencil.FailFunc[back]) << + R300_S_BACK_SFAIL_OP_SHIFT) + | (translate_stencil_op(ctx->Stencil.ZFailFunc[back]) << + R300_S_BACK_ZFAIL_OP_SHIFT) + | (translate_stencil_op(ctx->Stencil.ZPassFunc[back]) << + R300_S_BACK_ZPASS_OP_SHIFT); +} + +/* ============================================================= + * Window position and viewport transformation + */ + +static void r300UpdateWindow(GLcontext * ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); + GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0; + GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0; + const GLfloat *v = ctx->Viewport._WindowMap.m; + const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF; + const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); + GLfloat y_scale, y_bias; + + if (render_to_fbo) { + y_scale = 1.0; + y_bias = 0; + } else { + y_scale = -1.0; + y_bias = yoffset; + } + + GLfloat sx = v[MAT_SX]; + GLfloat tx = v[MAT_TX] + xoffset; + GLfloat sy = v[MAT_SY] * y_scale; + GLfloat ty = (v[MAT_TY] * y_scale) + y_bias; + GLfloat sz = v[MAT_SZ] * depthScale; + GLfloat tz = v[MAT_TZ] * depthScale; + + R300_STATECHANGE(rmesa, vpt); + + rmesa->hw.vpt.cmd[R300_VPT_XSCALE] = r300PackFloat32(sx); + rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] = r300PackFloat32(tx); + rmesa->hw.vpt.cmd[R300_VPT_YSCALE] = r300PackFloat32(sy); + rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] = r300PackFloat32(ty); + rmesa->hw.vpt.cmd[R300_VPT_ZSCALE] = r300PackFloat32(sz); + rmesa->hw.vpt.cmd[R300_VPT_ZOFFSET] = r300PackFloat32(tz); +} + +static void r300Viewport(GLcontext * ctx, GLint x, GLint y, + GLsizei width, GLsizei height) +{ + /* Don't pipeline viewport changes, conflict with window offset + * setting below. Could apply deltas to rescue pipelined viewport + * values, or keep the originals hanging around. + */ + r300UpdateWindow(ctx); + + radeon_viewport(ctx, x, y, width, height); +} + +static void r300DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval) +{ + r300UpdateWindow(ctx); +} + +void r300UpdateViewportOffset(GLcontext * ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); + GLfloat xoffset = (GLfloat) dPriv->x; + GLfloat yoffset = (GLfloat) dPriv->y + dPriv->h; + const GLfloat *v = ctx->Viewport._WindowMap.m; + + GLfloat tx = v[MAT_TX] + xoffset; + GLfloat ty = (-v[MAT_TY]) + yoffset; + + if (rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] != r300PackFloat32(tx) || + rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] != r300PackFloat32(ty)) { + /* Note: this should also modify whatever data the context reset + * code uses... + */ + R300_STATECHANGE(rmesa, vpt); + rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] = r300PackFloat32(tx); + rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] = r300PackFloat32(ty); + + } + + radeonUpdateScissor(ctx); +} + +/** + * Update R300's own internal state parameters. + * For now just STATE_R300_WINDOW_DIMENSION + */ +static void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct gl_program_parameter_list *paramList; + + if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))) + return; + + if (!ctx->FragmentProgram._Current || !rmesa->selected_fp) + return; + + paramList = ctx->FragmentProgram._Current->Base.Parameters; + + if (!paramList) + return; + + _mesa_load_state_parameters(ctx, paramList); +} + +/* ============================================================= + * Polygon state + */ +static void r300PolygonOffset(GLcontext * ctx, GLfloat factor, GLfloat units) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + GLfloat constant = units; + + switch (ctx->Visual.depthBits) { + case 16: + constant *= 4.0; + break; + case 24: + constant *= 2.0; + break; + } + + factor *= 12.0; + +/* fprintf(stderr, "%s f:%f u:%f\n", __FUNCTION__, factor, constant); */ + + R300_STATECHANGE(rmesa, zbs); + rmesa->hw.zbs.cmd[R300_ZBS_T_FACTOR] = r300PackFloat32(factor); + rmesa->hw.zbs.cmd[R300_ZBS_T_CONSTANT] = r300PackFloat32(constant); + rmesa->hw.zbs.cmd[R300_ZBS_W_FACTOR] = r300PackFloat32(factor); + rmesa->hw.zbs.cmd[R300_ZBS_W_CONSTANT] = r300PackFloat32(constant); +} + +/* Routing and texture-related */ + +/* r300 doesnt handle GL_CLAMP and GL_MIRROR_CLAMP_EXT correctly when filter is NEAREST. + * Since texwrap produces same results for GL_CLAMP and GL_CLAMP_TO_EDGE we use them instead. + * We need to recalculate wrap modes whenever filter mode is changed because someone might do: + * glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + * glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP); + * glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + * Since r300 completely ignores R300_TX_CLAMP when either min or mag is nearest it cant handle + * combinations where only one of them is nearest. + */ +static unsigned long gen_fixed_filter(unsigned long f) +{ + unsigned long mag, min, needs_fixing = 0; + //return f; + + /* We ignore MIRROR bit so we dont have to do everything twice */ + if ((f & ((7 - 1) << R300_TX_WRAP_S_SHIFT)) == + (R300_TX_CLAMP << R300_TX_WRAP_S_SHIFT)) { + needs_fixing |= 1; + } + if ((f & ((7 - 1) << R300_TX_WRAP_T_SHIFT)) == + (R300_TX_CLAMP << R300_TX_WRAP_T_SHIFT)) { + needs_fixing |= 2; + } + if ((f & ((7 - 1) << R300_TX_WRAP_R_SHIFT)) == + (R300_TX_CLAMP << R300_TX_WRAP_R_SHIFT)) { + needs_fixing |= 4; + } + + if (!needs_fixing) + return f; + + mag = f & R300_TX_MAG_FILTER_MASK; + min = f & (R300_TX_MIN_FILTER_MASK|R300_TX_MIN_FILTER_MIP_MASK); + + /* TODO: Check for anisto filters too */ + if ((mag != R300_TX_MAG_FILTER_NEAREST) + && (min != R300_TX_MIN_FILTER_NEAREST)) + return f; + + /* r300 cant handle these modes hence we force nearest to linear */ + if ((mag == R300_TX_MAG_FILTER_NEAREST) + && (min != R300_TX_MIN_FILTER_NEAREST)) { + f &= ~R300_TX_MAG_FILTER_NEAREST; + f |= R300_TX_MAG_FILTER_LINEAR; + return f; + } + + if ((min == R300_TX_MIN_FILTER_NEAREST) + && (mag != R300_TX_MAG_FILTER_NEAREST)) { + f &= ~R300_TX_MIN_FILTER_NEAREST; + f |= R300_TX_MIN_FILTER_LINEAR; + return f; + } + + /* Both are nearest */ + if (needs_fixing & 1) { + f &= ~((7 - 1) << R300_TX_WRAP_S_SHIFT); + f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_S_SHIFT; + } + if (needs_fixing & 2) { + f &= ~((7 - 1) << R300_TX_WRAP_T_SHIFT); + f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_T_SHIFT; + } + if (needs_fixing & 4) { + f &= ~((7 - 1) << R300_TX_WRAP_R_SHIFT); + f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_R_SHIFT; + } + return f; +} + +static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int i; + struct r300_fragment_program_code *code = &r300->selected_fp->code.code.r300; + + R300_STATECHANGE(r300, fpt); + + for (i = 0; i < code->tex.length; i++) { + int unit; + int opcode; + unsigned long val; + + unit = code->tex.inst[i] >> R300_TEX_ID_SHIFT; + unit &= 15; + + val = code->tex.inst[i]; + val &= ~R300_TEX_ID_MASK; + + opcode = + (val & R300_TEX_INST_MASK) >> R300_TEX_INST_SHIFT; + if (opcode == R300_TEX_OP_KIL) { + r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + } else { + if (tmu_mappings[unit] >= 0) { + val |= + tmu_mappings[unit] << + R300_TEX_ID_SHIFT; + r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + } else { + // We get here when the corresponding texture image is incomplete + // (e.g. incomplete mipmaps etc.) + r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + } + } + } + + r300->hw.fpt.cmd[R300_FPT_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, + R300_US_TEX_INST_0, code->tex.length); +} + +static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int i; + struct r500_fragment_program_code *code = &r300->selected_fp->code.code.r500; + + /* find all the texture instructions and relocate the texture units */ + for (i = 0; i < code->inst_end + 1; i++) { + if ((code->inst[i].inst0 & 0x3) == R500_INST_TYPE_TEX) { + uint32_t val; + int unit, opcode, new_unit; + + val = code->inst[i].inst1; + + unit = (val >> 16) & 0xf; + + val &= ~(0xf << 16); + + opcode = val & (0x7 << 22); + if (opcode == R500_TEX_INST_TEXKILL) { + new_unit = 0; + } else { + if (tmu_mappings[unit] >= 0) { + new_unit = tmu_mappings[unit]; + } else { + new_unit = 0; + } + } + val |= R500_TEX_ID(new_unit); + code->inst[i].inst1 = val; + } + } +} + +static GLuint translate_lod_bias(GLfloat bias) +{ + GLint b = (int)(bias*32); + if (b >= (1 << 9)) + b = (1 << 9)-1; + else if (b < -(1 << 9)) + b = -(1 << 9); + return (((GLuint)b) << R300_LOD_BIAS_SHIFT) & R300_LOD_BIAS_MASK; +} + + +static void r300SetupTextures(GLcontext * ctx) +{ + int i, mtu; + struct radeon_tex_obj *t; + r300ContextPtr r300 = R300_CONTEXT(ctx); + int hw_tmu = 0; + int last_hw_tmu = -1; /* -1 translates into no setup costs for fields */ + int tmu_mappings[R300_MAX_TEXTURE_UNITS] = { -1, }; + + R300_STATECHANGE(r300, txe); + R300_STATECHANGE(r300, tex.filter); + R300_STATECHANGE(r300, tex.filter_1); + R300_STATECHANGE(r300, tex.size); + R300_STATECHANGE(r300, tex.format); + R300_STATECHANGE(r300, tex.pitch); + R300_STATECHANGE(r300, tex.offset); + R300_STATECHANGE(r300, tex.chroma_key); + R300_STATECHANGE(r300, tex.border_color); + + r300->hw.txe.cmd[R300_TXE_ENABLE] = 0x0; + + mtu = r300->radeon.glCtx->Const.MaxTextureUnits; + if (RADEON_DEBUG & RADEON_STATE) + fprintf(stderr, "mtu=%d\n", mtu); + + if (mtu > R300_MAX_TEXTURE_UNITS) { + fprintf(stderr, + "Aiiee ! mtu=%d is greater than R300_MAX_TEXTURE_UNITS=%d\n", + mtu, R300_MAX_TEXTURE_UNITS); + exit(-1); + } + + /* We cannot let disabled tmu offsets pass DRM */ + for (i = 0; i < mtu; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled) { + tmu_mappings[i] = hw_tmu; + + t = radeon_tex_obj(ctx->Texture.Unit[i]._Current); + if (!t) + continue; + + if ((t->pp_txformat & 0xffffff00) == 0xffffff00) { + WARN_ONCE + ("unknown texture format (entry %x) encountered. Help me !\n", + t->pp_txformat & 0xff); + } + + if (RADEON_DEBUG & RADEON_STATE) + fprintf(stderr, + "Activating texture unit %d\n", i); + + r300->hw.txe.cmd[R300_TXE_ENABLE] |= (1 << hw_tmu); + + r300->hw.tex.filter.cmd[R300_TEX_VALUE_0 + + hw_tmu] = + gen_fixed_filter(t->pp_txfilter) | (hw_tmu << 28); + /* Note: There is a LOD bias per texture unit and a LOD bias + * per texture object. We add them here to get the correct behaviour. + * (The per-texture object LOD bias was introduced in OpenGL 1.4 + * and is not present in the EXT_texture_object extension). + */ + r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] = + t->pp_txfilter_1 | + translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.LodBias); + r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] = + t->pp_txsize; + r300->hw.tex.format.cmd[R300_TEX_VALUE_0 + + hw_tmu] = t->pp_txformat; + r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] = + t->pp_txpitch; + r300->hw.textures[hw_tmu] = t; + + if (t->tile_bits & R300_TXO_MACRO_TILE) { + WARN_ONCE("macro tiling enabled!\n"); + } + + if (t->tile_bits & R300_TXO_MICRO_TILE) { + WARN_ONCE("micro tiling enabled!\n"); + } + + r300->hw.tex.chroma_key.cmd[R300_TEX_VALUE_0 + + hw_tmu] = 0x0; + r300->hw.tex.border_color.cmd[R300_TEX_VALUE_0 + + hw_tmu] = + t->pp_border_color; + + last_hw_tmu = hw_tmu; + + hw_tmu++; + } + } + + /* R3xx and R4xx chips require that the texture unit corresponding to + * KIL instructions is really enabled. + * + * We do some fakery here and in the state atom emit logic to enable + * the texture without tripping up the CS checker in the kernel. + */ + if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { + if (ctx->FragmentProgram._Current->UsesKill && last_hw_tmu < 0) { + last_hw_tmu++; + + r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1; + + r300->hw.tex.border_color.cmd[R300_TEX_VALUE_0] = 0; + r300->hw.tex.chroma_key.cmd[R300_TEX_VALUE_0] = 0; + r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0; + r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0] = 0; + r300->hw.tex.size.cmd[R300_TEX_VALUE_0] = 0; /* 1x1 texture */ + r300->hw.tex.format.cmd[R300_TEX_VALUE_0] = 0; /* A8 format */ + r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0] = 0; + } + } + + r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, last_hw_tmu + 1); + r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, last_hw_tmu + 1); + r300->hw.tex.size.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, last_hw_tmu + 1); + r300->hw.tex.format.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, last_hw_tmu + 1); + r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, last_hw_tmu + 1); + r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, last_hw_tmu + 1); + r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, last_hw_tmu + 1); + r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, last_hw_tmu + 1); + + r300->vtbl.SetupFragmentShaderTextures(ctx, tmu_mappings); + + if (RADEON_DEBUG & RADEON_STATE) + fprintf(stderr, "TX_ENABLE: %08x last_hw_tmu=%d\n", + r300->hw.txe.cmd[R300_TXE_ENABLE], last_hw_tmu); +} + +union r300_outputs_written { + GLuint vp_outputs; /* hw_tcl_on */ + DECLARE_RENDERINPUTS(index_bitset); /* !hw_tcl_on */ +}; + +#define R300_OUTPUTS_WRITTEN_TEST(ow, vp_result, tnl_attrib) \ + ((hw_tcl_on) ? (ow).vp_outputs & (1 << (vp_result)) : \ + RENDERINPUTS_TEST( (ow.index_bitset), (tnl_attrib) )) + +static void r300SetupRSUnit(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + union r300_outputs_written OutputsWritten; + GLuint InputsRead; + int fp_reg, high_rr; + int col_ip, tex_ip; + int rs_tex_count = 0; + int i, col_fmt, hw_tcl_on; + + hw_tcl_on = r300->options.hw_tcl_enabled; + + if (hw_tcl_on) + OutputsWritten.vp_outputs = r300->selected_vp->code.OutputsWritten; + else + RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset); + + InputsRead = r300->selected_fp->InputsRead; + + R300_STATECHANGE(r300, ri); + R300_STATECHANGE(r300, rc); + R300_STATECHANGE(r300, rr); + + fp_reg = col_ip = tex_ip = col_fmt = 0; + + r300->hw.rc.cmd[1] = 0; + r300->hw.rc.cmd[2] = 0; + for (i=0; i<R300_RR_CMDSIZE-1; ++i) + r300->hw.rr.cmd[R300_RR_INST_0 + i] = 0; + + for (i=0; i<R300_RI_CMDSIZE-1; ++i) + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0; + + + if (InputsRead & FRAG_BIT_COL0) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { + r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA); + r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R300_RS_INST_COL_ID(col_ip) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_COL0; + ++col_ip; + ++fp_reg; + } else { + WARN_ONCE("fragprog wants col0, vp doesn't provide it\n"); + } + } + + if (InputsRead & FRAG_BIT_COL1) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { + r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA); + r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R300_RS_INST_COL_ID(col_ip) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_COL1; + ++col_ip; + ++fp_reg; + } else { + WARN_ONCE("fragprog wants col1, vp doesn't provide it\n"); + } + } + + /* We always route 4 texcoord components */ + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (! ( InputsRead & FRAG_BIT_TEX(i) ) ) + continue; + + if (!R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { + WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i); + continue; + } + + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count); + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~(FRAG_BIT_TEX0 << i); + rs_tex_count += 4; + ++tex_ip; + ++fp_reg; + } + + /* Setup default color if no color or tex was set */ + if (rs_tex_count == 0 && col_ip == 0) { + r300->hw.rr.cmd[R300_RR_INST_0] = R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_ADDR(0); + r300->hw.ri.cmd[R300_RI_INTERP_0] = R300_RS_COL_PTR(0) | R300_RS_COL_FMT(R300_RS_COL_FMT_0001); + ++col_ip; + } + + high_rr = (col_ip > tex_ip) ? col_ip : tex_ip; + r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; + r300->hw.rc.cmd[2] |= high_rr - 1; + + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, high_rr); + r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_IP_0, high_rr); + + if (InputsRead) + WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); +} + +static void r500SetupRSUnit(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + union r300_outputs_written OutputsWritten; + GLuint InputsRead; + int fp_reg, high_rr; + int col_ip, tex_ip; + int rs_tex_count = 0; + int i, col_fmt, hw_tcl_on; + + hw_tcl_on = r300->options.hw_tcl_enabled; + + if (hw_tcl_on) + OutputsWritten.vp_outputs = r300->selected_vp->code.OutputsWritten; + else + RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset); + + InputsRead = r300->selected_fp->InputsRead; + + R300_STATECHANGE(r300, ri); + R300_STATECHANGE(r300, rc); + R300_STATECHANGE(r300, rr); + + fp_reg = col_ip = tex_ip = col_fmt = 0; + + r300->hw.rc.cmd[1] = 0; + r300->hw.rc.cmd[2] = 0; + for (i=0; i<R300_RR_CMDSIZE-1; ++i) + r300->hw.rr.cmd[R300_RR_INST_0 + i] = 0; + + for (i=0; i<R500_RI_CMDSIZE-1; ++i) + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0; + + + if (InputsRead & FRAG_BIT_COL0) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { + r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA); + r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R500_RS_INST_COL_ID(col_ip) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_COL0; + ++col_ip; + ++fp_reg; + } else { + WARN_ONCE("fragprog wants col0, vp doesn't provide it\n"); + } + } + + if (InputsRead & FRAG_BIT_COL1) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { + r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA); + r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R500_RS_INST_COL_ID(col_ip) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_COL1; + ++col_ip; + ++fp_reg; + } else { + WARN_ONCE("fragprog wants col1, vp doesn't provide it\n"); + } + } + + /* We always route 4 texcoord components */ + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (! ( InputsRead & FRAG_BIT_TEX(i) ) ) + continue; + + if (!R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { + WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i); + continue; + } + + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) | + ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) | + ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) | + ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT); + + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~(FRAG_BIT_TEX0 << i); + rs_tex_count += 4; + ++tex_ip; + ++fp_reg; + } + + /* Setup default color if no color or tex was set */ + if (rs_tex_count == 0 && col_ip == 0) { + r300->hw.rr.cmd[R300_RR_INST_0] = R500_RS_INST_COL_ID(0) | R500_RS_INST_COL_ADDR(0); + r300->hw.ri.cmd[R300_RI_INTERP_0] = R500_RS_COL_PTR(0) | R500_RS_COL_FMT(R300_RS_COL_FMT_0001); + ++col_ip; + } + + high_rr = (col_ip > tex_ip) ? col_ip : tex_ip; + r300->hw.rc.cmd[1] = (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; + r300->hw.rc.cmd[2] = 0xC0 | (high_rr - 1); + + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, high_rr); + r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_IP_0, high_rr); + + if (InputsRead) + WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); +} + +#define MIN3(a, b, c) ((a) < (b) ? MIN2(a, c) : MIN2(b, c)) + +void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, + GLuint output_count, GLuint temp_count) +{ + int vtx_mem_size; + int pvs_num_slots; + int pvs_num_cntrls; + + /* Flush PVS engine before changing PVS_NUM_SLOTS, PVS_NUM_CNTRLS. + * See r500 docs 6.5.2 - done in emit */ + + /* avoid division by zero */ + if (input_count == 0) input_count = 1; + if (output_count == 0) output_count = 1; + if (temp_count == 0) temp_count = 1; + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + vtx_mem_size = 128; + else + vtx_mem_size = 72; + + pvs_num_slots = MIN3(10, vtx_mem_size/input_count, vtx_mem_size/output_count); + pvs_num_cntrls = MIN2(6, vtx_mem_size/temp_count); + + R300_STATECHANGE(rmesa, vap_cntl); + if (rmesa->options.hw_tcl_enabled) { + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] = + (pvs_num_slots << R300_PVS_NUM_SLOTS_SHIFT) | + (pvs_num_cntrls << R300_PVS_NUM_CNTLRS_SHIFT) | + (12 << R300_VF_MAX_VTX_NUM_SHIFT); + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= R500_TCL_STATE_OPTIMIZATION; + } else + /* not sure about non-tcl */ + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + (5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (5 << R300_VF_MAX_VTX_NUM_SHIFT)); + + if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R300) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R350)) + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (4 << R300_PVS_NUM_FPUS_SHIFT); + else if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (5 << R300_PVS_NUM_FPUS_SHIFT); + else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420)) + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (6 << R300_PVS_NUM_FPUS_SHIFT); + else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (8 << R300_PVS_NUM_FPUS_SHIFT); + else + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (2 << R300_PVS_NUM_FPUS_SHIFT); + +} + +/** + * Enable/Disable states. + * + * \note Mesa already filters redundant calls to this function. + */ +static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + if (RADEON_DEBUG & RADEON_STATE) + fprintf(stderr, "%s( %s = %s )\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(cap), + state ? "GL_TRUE" : "GL_FALSE"); + + switch (cap) { + case GL_ALPHA_TEST: + r300SetAlphaState(ctx); + break; + case GL_COLOR_LOGIC_OP: + r300SetLogicOpState(ctx); + /* fall-through, because logic op overrides blending */ + case GL_BLEND: + r300SetBlendState(ctx); + break; + case GL_CLIP_PLANE0: + case GL_CLIP_PLANE1: + case GL_CLIP_PLANE2: + case GL_CLIP_PLANE3: + case GL_CLIP_PLANE4: + case GL_CLIP_PLANE5: + r300SetClipPlaneState(ctx, cap, state); + break; + case GL_CULL_FACE: + r300UpdateCulling(ctx); + break; + case GL_DEPTH_TEST: + r300SetDepthState(ctx); + break; + case GL_LINE_SMOOTH: + if (rmesa->options.conformance_mode) + r300SwitchFallback(ctx, R300_FALLBACK_LINE_SMOOTH, ctx->Line.SmoothFlag); + break; + case GL_LINE_STIPPLE: + if (rmesa->options.conformance_mode) + r300SwitchFallback(ctx, R300_FALLBACK_LINE_STIPPLE, ctx->Line.StippleFlag); + break; + case GL_POINT_SMOOTH: + if (rmesa->options.conformance_mode) + r300SwitchFallback(ctx, R300_FALLBACK_POINT_SMOOTH, ctx->Point.SmoothFlag); + break; + case GL_POLYGON_SMOOTH: + if (rmesa->options.conformance_mode) + r300SwitchFallback(ctx, R300_FALLBACK_POLYGON_SMOOTH, ctx->Polygon.SmoothFlag); + break; + case GL_POLYGON_STIPPLE: + if (rmesa->options.conformance_mode) + r300SwitchFallback(ctx, R300_FALLBACK_POLYGON_STIPPLE, ctx->Polygon.StippleFlag); + break; + case GL_POLYGON_OFFSET_POINT: + case GL_POLYGON_OFFSET_LINE: + case GL_POLYGON_OFFSET_FILL: + r300SetPolygonOffsetState(ctx, state); + break; + case GL_SCISSOR_TEST: + radeon_firevertices(&rmesa->radeon); + rmesa->radeon.state.scissor.enabled = state; + radeonUpdateScissor( ctx ); + break; + case GL_STENCIL_TEST: + r300SetStencilState(ctx, state); + break; + default: + break; + } +} + +/** + * Completely recalculates hardware state based on the Mesa state. + */ +static void r300ResetHwState(r300ContextPtr r300) +{ + GLcontext *ctx = r300->radeon.glCtx; + int has_tcl; + + has_tcl = r300->options.hw_tcl_enabled; + + if (RADEON_DEBUG & RADEON_STATE) + fprintf(stderr, "%s\n", __FUNCTION__); + + r300ColorMask(ctx, + ctx->Color.ColorMask[0][RCOMP], + ctx->Color.ColorMask[0][GCOMP], + ctx->Color.ColorMask[0][BCOMP], + ctx->Color.ColorMask[0][ACOMP]); + + r300Enable(ctx, GL_DEPTH_TEST, ctx->Depth.Test); + r300DepthMask(ctx, ctx->Depth.Mask); + r300DepthFunc(ctx, ctx->Depth.Func); + + /* stencil */ + r300Enable(ctx, GL_STENCIL_TEST, ctx->Stencil._Enabled); + r300StencilMaskSeparate(ctx, 0, ctx->Stencil.WriteMask[0]); + r300StencilFuncSeparate(ctx, 0, ctx->Stencil.Function[0], + ctx->Stencil.Ref[0], ctx->Stencil.ValueMask[0]); + r300StencilOpSeparate(ctx, 0, ctx->Stencil.FailFunc[0], + ctx->Stencil.ZFailFunc[0], + ctx->Stencil.ZPassFunc[0]); + + r300UpdateCulling(ctx); + + r300SetBlendState(ctx); + r300SetLogicOpState(ctx); + + r300AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef); + r300Enable(ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled); + + r300->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA + | R300_VPORT_X_OFFSET_ENA + | R300_VPORT_Y_SCALE_ENA + | R300_VPORT_Y_OFFSET_ENA + | R300_VPORT_Z_SCALE_ENA + | R300_VPORT_Z_OFFSET_ENA | R300_VTX_W0_FMT; + r300->hw.vte.cmd[2] = 0x00000008; + + r300->hw.vap_vf_max_vtx_indx.cmd[1] = 0x00FFFFFF; + r300->hw.vap_vf_max_vtx_indx.cmd[2] = 0x00000000; + +#ifdef MESA_LITTLE_ENDIAN + r300->hw.vap_cntl_status.cmd[1] = R300_VC_NO_SWAP; +#else + r300->hw.vap_cntl_status.cmd[1] = R300_VC_32BIT_SWAP; +#endif + + /* disable VAP/TCL on non-TCL capable chips */ + if (!has_tcl) + r300->hw.vap_cntl_status.cmd[1] |= R300_VAP_TCL_BYPASS; + + r300->hw.vap_psc_sgn_norm_cntl.cmd[1] = 0xAAAAAAAA; + + /* XXX: Other families? */ + if (has_tcl) { + r300->hw.vap_clip_cntl.cmd[1] = R300_PS_UCP_MODE_DIST_COP; + + r300->hw.vap_clip.cmd[1] = r300PackFloat32(1.0); /* X */ + r300->hw.vap_clip.cmd[2] = r300PackFloat32(1.0); /* X */ + r300->hw.vap_clip.cmd[3] = r300PackFloat32(1.0); /* Y */ + r300->hw.vap_clip.cmd[4] = r300PackFloat32(1.0); /* Y */ + + switch (r300->radeon.radeonScreen->chip_family) { + case CHIP_FAMILY_R300: + r300->hw.vap_pvs_vtx_timeout_reg.cmd[1] = R300_2288_R300; + break; + default: + r300->hw.vap_pvs_vtx_timeout_reg.cmd[1] = R300_2288_RV350; + break; + } + } + + r300->hw.gb_enable.cmd[1] = R300_GB_POINT_STUFF_ENABLE + | R300_GB_LINE_STUFF_ENABLE + | R300_GB_TRIANGLE_STUFF_ENABLE; + + r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_0] = 0x66666666; + r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_1] = 0x06666666; + + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = + R300_GB_TILE_ENABLE | R300_GB_TILE_SIZE_16 /*| R300_GB_SUBPIXEL_1_16*/; + switch (r300->radeon.radeonScreen->num_gb_pipes) { + case 1: + default: + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= + R300_GB_TILE_PIPE_COUNT_RV300; + break; + case 2: + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= + R300_GB_TILE_PIPE_COUNT_R300; + break; + case 3: + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= + R300_GB_TILE_PIPE_COUNT_R420_3P; + break; + case 4: + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= + R300_GB_TILE_PIPE_COUNT_R420; + break; + } + + /* XXX: Enable anti-aliasing? */ + r300->hw.gb_misc2.cmd[R300_GB_MISC2_AA_CONFIG] = GB_AA_CONFIG_AA_DISABLE; + r300->hw.gb_misc2.cmd[R300_GB_MISC2_SELECT] = 0; + + r300->hw.ga_point_s0.cmd[1] = r300PackFloat32(0.0); + r300->hw.ga_point_s0.cmd[2] = r300PackFloat32(0.0); + r300->hw.ga_point_s0.cmd[3] = r300PackFloat32(1.0); + r300->hw.ga_point_s0.cmd[4] = r300PackFloat32(1.0); + + r300->hw.ga_triangle_stipple.cmd[1] = 0x00050005; + + r300PointSize(ctx, 1.0); + + r300->hw.ga_point_minmax.cmd[1] = 0x18000006; + r300->hw.ga_point_minmax.cmd[2] = 0x00020006; + r300->hw.ga_point_minmax.cmd[3] = r300PackFloat32(1.0 / 192.0); + + r300LineWidth(ctx, 1.0); + + r300->hw.ga_line_stipple.cmd[1] = 0; + r300->hw.ga_line_stipple.cmd[2] = r300PackFloat32(0.0); + r300->hw.ga_line_stipple.cmd[3] = r300PackFloat32(1.0); + + r300ShadeModel(ctx, ctx->Light.ShadeModel); + + r300PolygonMode(ctx, GL_FRONT, ctx->Polygon.FrontMode); + r300PolygonMode(ctx, GL_BACK, ctx->Polygon.BackMode); + r300->hw.zbias_cntl.cmd[1] = 0x00000000; + + r300PolygonOffset(ctx, ctx->Polygon.OffsetFactor, + ctx->Polygon.OffsetUnits); + r300Enable(ctx, GL_POLYGON_OFFSET_POINT, ctx->Polygon.OffsetPoint); + r300Enable(ctx, GL_POLYGON_OFFSET_LINE, ctx->Polygon.OffsetLine); + r300Enable(ctx, GL_POLYGON_OFFSET_FILL, ctx->Polygon.OffsetFill); + + r300->hw.su_depth_scale.cmd[1] = 0x4B7FFFFF; + r300->hw.su_depth_scale.cmd[2] = 0x00000000; + + r300->hw.sc_hyperz.cmd[1] = 0x0000001C; + r300->hw.sc_hyperz.cmd[2] = 0x2DA49525; + + r300->hw.sc_screendoor.cmd[1] = 0x00FFFFFF; + + r300->hw.us_out_fmt.cmd[1] = R500_OUT_FMT_C4_8 | + R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A; + r300->hw.us_out_fmt.cmd[2] = R500_OUT_FMT_UNUSED | + R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A; + r300->hw.us_out_fmt.cmd[3] = R500_OUT_FMT_UNUSED | + R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A; + r300->hw.us_out_fmt.cmd[4] = R500_OUT_FMT_UNUSED | + R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A; + r300->hw.us_out_fmt.cmd[5] = R300_W_FMT_W0 | R300_W_SRC_US; + + /* disable fog unit */ + r300->hw.fogs.cmd[R300_FOGS_STATE] = 0; + r300->hw.fg_depth_src.cmd[1] = R300_FG_DEPTH_SRC_SCAN; + + r300->hw.rb3d_cctl.cmd[1] = 0; + + r300BlendColor(ctx, ctx->Color.BlendColor); + + r300->hw.rb3d_dither_ctl.cmd[1] = 0; + r300->hw.rb3d_dither_ctl.cmd[2] = 0; + r300->hw.rb3d_dither_ctl.cmd[3] = 0; + r300->hw.rb3d_dither_ctl.cmd[4] = 0; + r300->hw.rb3d_dither_ctl.cmd[5] = 0; + r300->hw.rb3d_dither_ctl.cmd[6] = 0; + r300->hw.rb3d_dither_ctl.cmd[7] = 0; + r300->hw.rb3d_dither_ctl.cmd[8] = 0; + r300->hw.rb3d_dither_ctl.cmd[9] = 0; + + r300->hw.rb3d_aaresolve_ctl.cmd[1] = 0; + + r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000; + r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff; + + r300->hw.zb_depthclearvalue.cmd[1] = 0; + + r300->hw.zstencil_format.cmd[2] = R300_ZTOP_DISABLE; + r300->hw.zstencil_format.cmd[3] = 0x00000003; + r300->hw.zstencil_format.cmd[4] = 0x00000000; + r300SetEarlyZState(ctx); + + r300->hw.zb_zmask.cmd[1] = 0; + r300->hw.zb_zmask.cmd[2] = 0; + + r300->hw.zb_hiz_offset.cmd[1] = 0; + + r300->hw.zb_hiz_pitch.cmd[1] = 0; + + r300VapCntl(r300, 0, 0, 0); + if (has_tcl) { + r300->hw.vps.cmd[R300_VPS_ZERO_0] = 0; + r300->hw.vps.cmd[R300_VPS_ZERO_1] = 0; + r300->hw.vps.cmd[R300_VPS_POINTSIZE] = r300PackFloat32(1.0); + r300->hw.vps.cmd[R300_VPS_ZERO_3] = 0; + } + + r300->radeon.hw.all_dirty = GL_TRUE; +} + +void r300UpdateShaders(r300ContextPtr rmesa) +{ + GLcontext *ctx = rmesa->radeon.glCtx; + + /* should only happenen once, just after context is created */ + /* TODO: shouldn't we fallback to sw here? */ + if (!ctx->FragmentProgram._Current) { + fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); + return; + } + + { + struct r300_fragment_program *fp; + + fp = r300SelectAndTranslateFragmentShader(ctx); + + r300SwitchFallback(ctx, R300_FALLBACK_FRAGMENT_PROGRAM, fp->error); + } + + if (rmesa->options.hw_tcl_enabled) { + struct r300_vertex_program *vp; + + vp = r300SelectAndTranslateVertexShader(ctx); + + r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, vp->error); + } + + r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + rmesa->radeon.NewGLState = 0; +} + +static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, GLuint index, GLfloat * buffer) +{ + static const GLfloat dummy[4] = { 0, 0, 0, 0 }; + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct rc_constant * rcc = &rmesa->selected_fp->code.constants.Constants[index]; + + switch(rcc->Type) { + case RC_CONSTANT_EXTERNAL: + return ctx->FragmentProgram._Current->Base.Parameters->ParameterValues[rcc->u.External]; + case RC_CONSTANT_IMMEDIATE: + return rcc->u.Immediate; + case RC_CONSTANT_STATE: + switch(rcc->u.State[0]) { + case RC_STATE_SHADOW_AMBIENT: { + const int unit = (int) rcc->u.State[1]; + const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current; + if (texObj) { + buffer[0] = + buffer[1] = + buffer[2] = + buffer[3] = texObj->CompareFailValue; + } + return buffer; + } + + case RC_STATE_R300_WINDOW_DIMENSION: { + __DRIdrawable * drawable = radeon_get_drawable(&rmesa->radeon); + buffer[0] = drawable->w * 0.5f; /* width*0.5 */ + buffer[1] = drawable->h * 0.5f; /* height*0.5 */ + buffer[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */ + buffer[3] = 1.0F; /* not used */ + return buffer; + } + + case RC_STATE_R300_TEXRECT_FACTOR: { + struct gl_texture_object *t = + ctx->Texture.Unit[rcc->u.State[1]].CurrentTex[TEXTURE_RECT_INDEX]; + + if (t && t->Image[0][t->BaseLevel]) { + struct gl_texture_image *image = + t->Image[0][t->BaseLevel]; + buffer[0] = 1.0 / image->Width2; + buffer[1] = 1.0 / image->Height2; + } else { + buffer[0] = 1.0; + buffer[1] = 1.0; + } + buffer[2] = 1.0; + buffer[3] = 1.0; + return buffer; + } + } + } + + return dummy; +} + + +static void r300SetupPixelShader(GLcontext *ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct r300_fragment_program *fp = rmesa->selected_fp; + struct r300_fragment_program_code *code; + int i; + + code = &fp->code.code.r300; + + R300_STATECHANGE(rmesa, fpi[0]); + R300_STATECHANGE(rmesa, fpi[1]); + R300_STATECHANGE(rmesa, fpi[2]); + R300_STATECHANGE(rmesa, fpi[3]); + rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, code->alu.length); + rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, code->alu.length); + rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, code->alu.length); + rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) { + rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].rgb_inst; + rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].rgb_addr; + rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].alpha_inst; + rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].alpha_addr; + } + + R300_STATECHANGE(rmesa, fp); + rmesa->hw.fp.cmd[R300_FP_CNTL0] = code->config; + rmesa->hw.fp.cmd[R300_FP_CNTL1] = code->pixsize; + rmesa->hw.fp.cmd[R300_FP_CNTL2] = code->code_offset; + for (i = 0; i < 4; i++) + rmesa->hw.fp.cmd[R300_FP_NODE0 + i] = code->code_addr[i]; + + R300_STATECHANGE(rmesa, fpp); + rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, fp->code.constants.Count * 4); + for (i = 0; i < fp->code.constants.Count; i++) { + GLfloat buffer[4]; + const GLfloat *constant = get_fragmentprogram_constant(ctx, i, buffer); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(constant[3]); + } +} + +#define bump_r500fp_count(ptr, new_count) do{\ + drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\ + int _nc=(new_count)/6; \ + assert(_nc < 256); \ + if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\ +} while(0) + +#define bump_r500fp_const_count(ptr, new_count) do{\ + drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\ + int _nc=(new_count)/4; \ + assert(_nc < 256); \ + if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\ +} while(0) + +static void r500SetupPixelShader(GLcontext *ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct r300_fragment_program *fp = rmesa->selected_fp; + int i; + struct r500_fragment_program_code *code; + + ((drm_r300_cmd_header_t *) rmesa->hw.r500fp.cmd)->r500fp.count = 0; + ((drm_r300_cmd_header_t *) rmesa->hw.r500fp_const.cmd)->r500fp.count = 0; + + code = &fp->code.code.r500; + + R300_STATECHANGE(rmesa, fp); + rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = code->max_temp_idx; + + rmesa->hw.fp.cmd[R500_FP_CODE_ADDR] = + R500_US_CODE_START_ADDR(0) | + R500_US_CODE_END_ADDR(code->inst_end); + rmesa->hw.fp.cmd[R500_FP_CODE_RANGE] = + R500_US_CODE_RANGE_ADDR(0) | + R500_US_CODE_RANGE_SIZE(code->inst_end); + rmesa->hw.fp.cmd[R500_FP_CODE_OFFSET] = + R500_US_CODE_OFFSET_ADDR(0); + + R300_STATECHANGE(rmesa, r500fp); + /* Emit our shader... */ + for (i = 0; i < code->inst_end+1; i++) { + rmesa->hw.r500fp.cmd[i*6+1] = code->inst[i].inst0; + rmesa->hw.r500fp.cmd[i*6+2] = code->inst[i].inst1; + rmesa->hw.r500fp.cmd[i*6+3] = code->inst[i].inst2; + rmesa->hw.r500fp.cmd[i*6+4] = code->inst[i].inst3; + rmesa->hw.r500fp.cmd[i*6+5] = code->inst[i].inst4; + rmesa->hw.r500fp.cmd[i*6+6] = code->inst[i].inst5; + } + + bump_r500fp_count(rmesa->hw.r500fp.cmd, (code->inst_end + 1) * 6); + + R300_STATECHANGE(rmesa, r500fp_const); + for (i = 0; i < fp->code.constants.Count; i++) { + GLfloat buffer[4]; + const GLfloat *constant = get_fragmentprogram_constant(ctx, i, buffer); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(constant[3]); + } + bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, fp->code.constants.Count * 4); +} + +void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten) +{ + r300ContextPtr rmesa = R300_CONTEXT( ctx ); + struct vertex_attribute *attrs = rmesa->vbuf.attribs; + int i, j, reg_count; + uint32_t *vir0 = &rmesa->hw.vir[0].cmd[1]; + uint32_t *vir1 = &rmesa->hw.vir[1].cmd[1]; + + for (i = 0; i < R300_VIR_CMDSIZE-1; ++i) + vir0[i] = vir1[i] = 0; + + for (i = 0, j = 0; i < rmesa->vbuf.num_attribs; ++i) { + int tmp; + + tmp = attrs[i].data_type | (attrs[i].dst_loc << R300_DST_VEC_LOC_SHIFT); + if (attrs[i]._signed) + tmp |= R300_SIGNED; + if (attrs[i].normalize) + tmp |= R300_NORMALIZE; + + if (i % 2 == 0) { + vir0[j] = tmp << R300_DATA_TYPE_0_SHIFT; + vir1[j] = attrs[i].swizzle | (attrs[i].write_mask << R300_WRITE_ENA_SHIFT); + } else { + vir0[j] |= tmp << R300_DATA_TYPE_1_SHIFT; + vir1[j] |= (attrs[i].swizzle | (attrs[i].write_mask << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT; + ++j; + } + } + + reg_count = (rmesa->vbuf.num_attribs + 1) >> 1; + if (rmesa->vbuf.num_attribs % 2 != 0) { + vir0[reg_count-1] |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT; + } else { + vir0[reg_count-1] |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT; + } + + R300_STATECHANGE(rmesa, vir[0]); + R300_STATECHANGE(rmesa, vir[1]); + R300_STATECHANGE(rmesa, vof); + R300_STATECHANGE(rmesa, vic); + + if (rmesa->radeon.radeonScreen->kernel_mm) { + rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF; + rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF; + rmesa->hw.vir[0].cmd[0] |= (reg_count & 0x3FFF) << 16; + rmesa->hw.vir[1].cmd[0] |= (reg_count & 0x3FFF) << 16; + } else { + ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = reg_count; + ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = reg_count; + } + + rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); + rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); + rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten); + rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten); +} + +void r300UpdateShaderStates(r300ContextPtr rmesa) +{ + GLcontext *ctx; + ctx = rmesa->radeon.glCtx; + + /* should only happenen once, just after context is created */ + if (!ctx->FragmentProgram._Current) + return; + + r300SetEarlyZState(ctx); + + r300SetupTextures(ctx); + + rmesa->vtbl.SetupPixelShader(ctx); + + rmesa->vtbl.SetupRSUnit(ctx); + + if (rmesa->options.hw_tcl_enabled) { + r300SetupVertexProgram(rmesa); + } +} + +#define EASY_US_OUT_FMT(comps, c0, c1, c2, c3) \ + (R500_OUT_FMT_##comps | R500_C0_SEL_##c0 | R500_C1_SEL_##c1 | \ + R500_C2_SEL_##c2 | R500_C3_SEL_##c3) +static void r300SetupUsOutputFormat(GLcontext *ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + uint32_t hw_format; + struct radeon_renderbuffer *rrb = radeon_get_colorbuffer(&rmesa->radeon); + + if (!rrb) { + return; + } + + switch (rrb->base.Format) + { + case MESA_FORMAT_RGBA5551: + case MESA_FORMAT_RGBA8888: + hw_format = EASY_US_OUT_FMT(C4_8, A, B, G, R); + break; + case MESA_FORMAT_RGB565_REV: + case MESA_FORMAT_RGBA8888_REV: + hw_format = EASY_US_OUT_FMT(C4_8, R, G, B, A); + break; + case MESA_FORMAT_RGB565: + case MESA_FORMAT_ARGB4444: + case MESA_FORMAT_ARGB1555: + case MESA_FORMAT_XRGB8888: + case MESA_FORMAT_ARGB8888: + hw_format = EASY_US_OUT_FMT(C4_8, B, G, R, A); + break; + case MESA_FORMAT_ARGB4444_REV: + case MESA_FORMAT_ARGB1555_REV: + case MESA_FORMAT_XRGB8888_REV: + case MESA_FORMAT_ARGB8888_REV: + hw_format = EASY_US_OUT_FMT(C4_8, A, R, G, B); + break; + case MESA_FORMAT_SRGBA8: + hw_format = EASY_US_OUT_FMT(C4_10_GAMMA, A, B, G, R); + break; + case MESA_FORMAT_SARGB8: + hw_format = EASY_US_OUT_FMT(C4_10_GAMMA, B, G, R, A); + break; + case MESA_FORMAT_SL8: + hw_format = EASY_US_OUT_FMT(C4_10_GAMMA, A, A, R, A); + break; + case MESA_FORMAT_A8: + hw_format = EASY_US_OUT_FMT(C4_8, A, A, A, A); + break; + case MESA_FORMAT_L8: + case MESA_FORMAT_I8: + hw_format = EASY_US_OUT_FMT(C4_8, A, A, R, A); + break; + default: + assert(!"Unsupported format"); + break; + } + + R300_STATECHANGE(rmesa, us_out_fmt); + rmesa->hw.us_out_fmt.cmd[1] = hw_format; +} +#undef EASY_US_OUT_FMT + +/** + * Called by Mesa after an internal state update. + */ +static void r300InvalidateState(GLcontext * ctx, GLuint new_state) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + _swrast_InvalidateState(ctx, new_state); + _swsetup_InvalidateState(ctx, new_state); + _vbo_InvalidateState(ctx, new_state); + _tnl_InvalidateState(ctx, new_state); + + if (new_state & _NEW_BUFFERS) { + _mesa_update_framebuffer(ctx); + /* this updates the DrawBuffer's Width/Height if it's a FBO */ + _mesa_update_draw_buffer_bounds(ctx); + + R300_STATECHANGE(r300, cb); + R300_STATECHANGE(r300, zb); + } + + if (new_state & (_NEW_LIGHT)) { + R300_STATECHANGE(r300, shade2); + if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) + r300->hw.shade2.cmd[1] |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST; + else + r300->hw.shade2.cmd[1] &= ~R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST; + } + + if (new_state & _NEW_BUFFERS) { + r300SetupUsOutputFormat(ctx); + } + + r300->radeon.NewGLState |= new_state; +} + +/** + * Calculate initial hardware state and register state functions. + * Assumes that the command buffer and state atoms have been + * initialized already. + */ +void r300InitState(r300ContextPtr r300) +{ + r300ResetHwState(r300); +} + +static void r300RenderMode(GLcontext * ctx, GLenum mode) +{ + r300SwitchFallback(ctx, R300_FALLBACK_RENDER_MODE, ctx->RenderMode != GL_RENDER); +} + +/** + * Initialize driver's state callback functions + */ +void r300InitStateFuncs(radeonContextPtr radeon, struct dd_function_table *functions) +{ + + functions->UpdateState = r300InvalidateState; + functions->AlphaFunc = r300AlphaFunc; + functions->BlendColor = r300BlendColor; + functions->BlendEquationSeparate = r300BlendEquationSeparate; + functions->BlendFuncSeparate = r300BlendFuncSeparate; + functions->Enable = r300Enable; + functions->ColorMask = r300ColorMask; + functions->DepthFunc = r300DepthFunc; + functions->DepthMask = r300DepthMask; + functions->CullFace = r300CullFace; + functions->FrontFace = r300FrontFace; + functions->ShadeModel = r300ShadeModel; + functions->LogicOpcode = r300LogicOpcode; + + /* ARB_point_parameters */ + functions->PointParameterfv = r300PointParameter; + + /* Stencil related */ + functions->StencilFuncSeparate = r300StencilFuncSeparate; + functions->StencilMaskSeparate = r300StencilMaskSeparate; + functions->StencilOpSeparate = r300StencilOpSeparate; + + /* Viewport related */ + functions->Viewport = r300Viewport; + functions->DepthRange = r300DepthRange; + functions->PointSize = r300PointSize; + functions->LineWidth = r300LineWidth; + + functions->PolygonOffset = r300PolygonOffset; + functions->PolygonMode = r300PolygonMode; + + functions->RenderMode = r300RenderMode; + + functions->ClipPlane = r300ClipPlane; + functions->Scissor = radeonScissor; + + functions->DrawBuffer = radeonDrawBuffer; + functions->ReadBuffer = radeonReadBuffer; + + functions->CopyPixels = _mesa_meta_CopyPixels; + functions->DrawPixels = _mesa_meta_DrawPixels; + if (radeon->radeonScreen->kernel_mm) + functions->ReadPixels = radeonReadPixels; +} + +void r300InitShaderFunctions(r300ContextPtr r300) +{ + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + r300->vtbl.SetupRSUnit = r500SetupRSUnit; + r300->vtbl.SetupPixelShader = r500SetupPixelShader; + r300->vtbl.SetupFragmentShaderTextures = r500SetupFragmentShaderTextures; + } else { + r300->vtbl.SetupRSUnit = r300SetupRSUnit; + r300->vtbl.SetupPixelShader = r300SetupPixelShader; + r300->vtbl.SetupFragmentShaderTextures = r300SetupFragmentShaderTextures; + } +} diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h new file mode 100644 index 0000000000..e70f84f4e4 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_state.h @@ -0,0 +1,62 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Nicolai Haehnle <prefect_@gmx.net> + */ + +#ifndef __R300_STATE_H__ +#define __R300_STATE_H__ + +#include "r300_context.h" + +#define R300_NEWPRIM( rmesa ) \ + do { \ + if ( rmesa->radeon.dma.flush ) \ + rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); \ + } while (0) + +#define R300_STATECHANGE(r300, atom) \ + do { \ + R300_NEWPRIM(r300); \ + r300->hw.atom.dirty = GL_TRUE; \ + r300->radeon.hw.is_dirty = GL_TRUE; \ + } while(0) + +void r300UpdateViewportOffset (GLcontext * ctx); +void r300UpdateDrawBuffer (GLcontext * ctx); +void r300UpdateShaders (r300ContextPtr rmesa); +void r300UpdateShaderStates (r300ContextPtr rmesa); +void r300InitState (r300ContextPtr r300); +void r300InitStateFuncs (radeonContextPtr radeon, struct dd_function_table *functions); +void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, GLuint output_count, GLuint temp_count); +void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten); + +#endif /* __R300_STATE_H__ */ diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c new file mode 100644 index 0000000000..4dcc7cb022 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -0,0 +1,683 @@ +/************************************************************************** + +Copyright (C) 2007 Dave Airlie + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Dave Airlie <airlied@linux.ie> + * Maciej Cencora <m.cencora@gmail.com> + */ + +#include "tnl/tnl.h" +#include "tnl/t_pipeline.h" + +#include "r300_state.h" +#include "r300_swtcl.h" +#include "r300_emit.h" +#include "r300_tex.h" +#include "r300_render.h" +#include "main/simple_list.h" + +#define EMIT_ATTR( ATTR, STYLE ) \ +do { \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \ + rmesa->radeon.swtcl.vertex_attr_count++; \ +} while (0) + +#define EMIT_PAD( N ) \ +do { \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0; \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD; \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N); \ + rmesa->radeon.swtcl.vertex_attr_count++; \ +} while (0) + +#define ADD_ATTR(_attr, _format, _dst_loc, _swizzle, _write_mask, _normalize) \ +do { \ + attrs[num_attrs].element = (_attr); \ + attrs[num_attrs].data_type = (_format); \ + attrs[num_attrs].dst_loc = (_dst_loc); \ + attrs[num_attrs].swizzle = (_swizzle); \ + attrs[num_attrs].write_mask = (_write_mask); \ + attrs[num_attrs]._signed = 0; \ + attrs[num_attrs].normalize = (_normalize); \ + ++num_attrs; \ +} while (0) + +void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_OutputsWritten) +{ + r300ContextPtr rmesa = R300_CONTEXT( ctx ); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + int first_free_tex = 0; + GLuint InputsRead = 0; + GLuint OutputsWritten = 0; + int num_attrs = 0; + GLuint fp_reads = rmesa->selected_fp->InputsRead; + struct vertex_attribute *attrs = rmesa->vbuf.attribs; + + radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__); + rmesa->swtcl.coloroffset = rmesa->swtcl.specoffset = 0; + rmesa->radeon.swtcl.vertex_attr_count = 0; + + if (RADEON_DEBUG & RADEON_VERTS) + fprintf(stderr, "%s\n", __func__); + + /* We always want non Ndc coords format */ + VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr; + + /* Always write position vector */ + InputsRead |= 1 << VERT_ATTRIB_POS; + OutputsWritten |= 1 << VERT_RESULT_HPOS; + EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F ); + ADD_ATTR(VERT_ATTRIB_POS, R300_DATA_TYPE_FLOAT_4, SWTCL_OVM_POS, SWIZZLE_XYZW, MASK_XYZW, 0); + rmesa->swtcl.coloroffset = 4; + + if (fp_reads & FRAG_BIT_COL0) { + InputsRead |= 1 << VERT_ATTRIB_COLOR0; + OutputsWritten |= 1 << VERT_RESULT_COL0; +#if MESA_LITTLE_ENDIAN + EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA ); + ADD_ATTR(VERT_ATTRIB_COLOR0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW, 1); +#else + EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR ); + ADD_ATTR(VERT_ATTRIB_COLOR0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW, 1); +#endif + } + + if (fp_reads & FRAG_BIT_COL1) { + GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); + InputsRead |= 1 << VERT_ATTRIB_COLOR1; + OutputsWritten |= 1 << VERT_RESULT_COL1; +#if MESA_LITTLE_ENDIAN + EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_RGBA ); + ADD_ATTR(VERT_ATTRIB_COLOR1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR1, swiz, MASK_XYZW, 1); +#else + EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_ABGR ); + ADD_ATTR(VERT_ATTRIB_COLOR1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR1, swiz, MASK_XYZW, 1); +#endif + rmesa->swtcl.specoffset = rmesa->swtcl.coloroffset + 1; + } + + if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) { + VB->AttribPtr[VERT_ATTRIB_GENERIC0] = VB->BackfaceColorPtr; + OutputsWritten |= 1 << VERT_RESULT_BFC0; +#if MESA_LITTLE_ENDIAN + EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_RGBA ); + ADD_ATTR(VERT_ATTRIB_GENERIC0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW, 1); +#else + EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_ABGR ); + ADD_ATTR(VERT_ATTRIB_GENERIC0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW, 1); +#endif + if (fp_reads & FRAG_BIT_COL1) { + VB->AttribPtr[VERT_ATTRIB_GENERIC1] = VB->BackfaceSecondaryColorPtr; + GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); + OutputsWritten |= 1 << VERT_RESULT_BFC1; +#if MESA_LITTLE_ENDIAN + EMIT_ATTR( _TNL_ATTRIB_GENERIC1, EMIT_4UB_4F_RGBA ); + ADD_ATTR(VERT_ATTRIB_GENERIC1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR3, swiz, MASK_XYZW, 1); +#else + EMIT_ATTR( _TNL_ATTRIB_GENERIC1, EMIT_4UB_4F_ABGR ); + ADD_ATTR(VERT_ATTRIB_GENERIC1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR3, swiz, MASK_XYZW, 1); +#endif + } + } + + if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POINTSIZE )) { + GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO); + InputsRead |= 1 << VERT_ATTRIB_POINT_SIZE; + OutputsWritten |= 1 << VERT_RESULT_PSIZ; + EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F ); + ADD_ATTR(VERT_ATTRIB_POINT_SIZE, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_POINT_SIZE, swiz, MASK_X, 0); + } + + if (rmesa->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) { + int tex_id = rmesa->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0; + + VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS]; + VB->AttribPtr[_TNL_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS]; + RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id); + } + + if (rmesa->selected_fp->fog_attr != FRAG_ATTRIB_MAX) { + int tex_id = rmesa->selected_fp->fog_attr - FRAG_ATTRIB_TEX0; + + VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG]; + VB->AttribPtr[_TNL_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG]; + RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id); + } + + /** + * Sending only one texcoord component may lead to lock up, + * so for all textures always output 4 texcoord components to RS. + */ + { + int i; + GLuint swiz, format, hw_format; + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (fp_reads & FRAG_BIT_TEX(i)) { + switch (VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size) { + case 1: + format = EMIT_1F; + hw_format = R300_DATA_TYPE_FLOAT_1; + swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE); + break; + case 2: + format = EMIT_2F; + hw_format = R300_DATA_TYPE_FLOAT_2; + swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ONE); + break; + case 3: + format = EMIT_3F; + hw_format = R300_DATA_TYPE_FLOAT_3; + swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); + break; + case 4: + format = EMIT_4F; + hw_format = R300_DATA_TYPE_FLOAT_4; + swiz = SWIZZLE_XYZW; + break; + default: + continue; + } + InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i); + OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); + EMIT_ATTR(_TNL_ATTRIB_TEX(i), format); + ADD_ATTR(VERT_ATTRIB_TEX0 + i, hw_format, SWTCL_OVM_TEX(first_free_tex), swiz, MASK_XYZW, 0); + ++first_free_tex; + } + } + } + + if (first_free_tex >= ctx->Const.MaxTextureUnits) { + fprintf(stderr, "\tout of free texcoords to write fog coordinate\n"); + exit(-1); + } + + R300_NEWPRIM(rmesa); + rmesa->vbuf.num_attribs = num_attrs; + *_InputsRead = InputsRead; + *_OutputsWritten = OutputsWritten; + + RENDERINPUTS_COPY(rmesa->render_inputs_bitset, tnl->render_inputs_bitset); +} + +static void r300PrepareVertices(GLcontext *ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + GLuint InputsRead, OutputsWritten; + radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__); + + r300ChooseSwtclVertexFormat(ctx, &InputsRead, &OutputsWritten); + r300SetupVAP(ctx, InputsRead, OutputsWritten); + + rmesa->radeon.swtcl.vertex_size = + _tnl_install_attrs( ctx, + rmesa->radeon.swtcl.vertex_attrs, + rmesa->radeon.swtcl.vertex_attr_count, + NULL, 0 ); + + rmesa->radeon.swtcl.vertex_size /= 4; +} + +static void r300_predict_emit_size( r300ContextPtr rmesa ) +{ + if (!rmesa->radeon.swtcl.emit_prediction) { + const int vertex_size = 7; + const int prim_size = 3; + const int cache_flush_size = 4; + const int pre_emit_state = 4; + const int scissor_size = 3; + const int state_size = radeonCountStateEmitSize(&rmesa->radeon); + + if (rcommonEnsureCmdBufSpace(&rmesa->radeon, + state_size + pre_emit_state + scissor_size + + vertex_size + prim_size + cache_flush_size * 2, + __FUNCTION__)) + rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize(&rmesa->radeon); + else + rmesa->radeon.swtcl.emit_prediction = state_size; + + rmesa->radeon.swtcl.emit_prediction += rmesa->radeon.cmdbuf.cs->cdw + + vertex_size + scissor_size + prim_size + cache_flush_size * 2 + pre_emit_state; + radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, + "%s, size %d\n", + __func__, rmesa->radeon.cmdbuf.cs->cdw + + vertex_size + scissor_size + prim_size + cache_flush_size * 2 + pre_emit_state); + } +} + + +static GLuint reduced_prim[] = { + GL_POINTS, + GL_LINES, + GL_LINES, + GL_LINES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, +}; + +static void r300RasterPrimitive( GLcontext *ctx, GLuint prim ); + +/*********************************************************************** + * Emit primitives as inline vertices * + ***********************************************************************/ + + +#define HAVE_POINTS 1 +#define HAVE_LINES 1 +#define HAVE_LINE_STRIPS 1 +#define HAVE_TRIANGLES 1 +#define HAVE_TRI_STRIPS 1 +#define HAVE_TRI_STRIP_1 0 +#define HAVE_TRI_FANS 1 +#define HAVE_QUADS 0 +#define HAVE_QUAD_STRIPS 0 +#define HAVE_POLYGONS 1 +#define HAVE_ELTS 1 + +static void* r300_alloc_verts(r300ContextPtr rmesa, GLuint n, GLuint size) +{ + void *rv; + do { + r300_predict_emit_size( rmesa ); + rv = rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 ); + } while (!rv); + return rv; +} + +#undef LOCAL_VARS +#undef ALLOC_VERTS +#define CTX_ARG r300ContextPtr rmesa +#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size +#define ALLOC_VERTS( n, size ) r300_alloc_verts(rmesa, n, size); +#define LOCAL_VARS \ + r300ContextPtr rmesa = R300_CONTEXT(ctx); \ + const char *r300verts = (char *)rmesa->radeon.swtcl.verts; +#define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int))) +#define VERTEX r300Vertex +#undef TAG +#define TAG(x) r300_##x +#include "tnl_dd/t_dd_triemit.h" + + + +/*********************************************************************** + * Macros for t_dd_tritmp.h to draw basic primitives * + ***********************************************************************/ + +#define QUAD( a, b, c, d ) r300_quad( rmesa, a, b, c, d ) +#define TRI( a, b, c ) r300_triangle( rmesa, a, b, c ) +#define LINE( a, b ) r300_line( rmesa, a, b ) +#define POINT( a ) r300_point( rmesa, a ) + +/*********************************************************************** + * Build render functions from dd templates * + ***********************************************************************/ + +#define R300_UNFILLED_BIT 0x01 +#define R300_MAX_TRIFUNC 0x02 + +static struct { + tnl_points_func points; + tnl_line_func line; + tnl_triangle_func triangle; + tnl_quad_func quad; +} rast_tab[R300_MAX_TRIFUNC]; + +#define DO_FALLBACK 0 +#define DO_UNFILLED (IND & R300_UNFILLED_BIT) +#define DO_TWOSIDE 0 +#define DO_FLAT 0 +#define DO_OFFSET 0 +#define DO_TRI 1 +#define DO_QUAD 1 +#define DO_LINE 1 +#define DO_POINTS 1 +#define DO_FULL_QUAD 1 + +#define HAVE_SPEC 1 +#define HAVE_BACK_COLORS 0 +#define HAVE_HW_FLATSHADE 1 +#define TAB rast_tab + +#define DEPTH_SCALE 1.0 +#define UNFILLED_TRI unfilled_tri +#define UNFILLED_QUAD unfilled_quad +#define VERT_X(_v) _v->v.x +#define VERT_Y(_v) _v->v.y +#define VERT_Z(_v) _v->v.z +#define AREA_IS_CCW( a ) (a < 0) +#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int))) + +#define VERT_SET_RGBA( v, c ) \ +do { \ + r300_color_t *color = (r300_color_t *)&((v)->ui[coloroffset]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]); \ +} while (0) + +#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset] + +#define VERT_SET_SPEC( v0, c ) \ +do { \ + if (specoffset) { \ + UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.red, (c)[0]); \ + UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.green, (c)[1]); \ + UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.blue, (c)[2]); \ + } \ +} while (0) + +#define VERT_COPY_SPEC( v0, v1 ) \ +do { \ + if (specoffset) { \ + v0->v.specular.red = v1->v.specular.red; \ + v0->v.specular.green = v1->v.specular.green; \ + v0->v.specular.blue = v1->v.specular.blue; \ + } \ +} while (0) + +#define VERT_SAVE_RGBA( idx ) color[idx] = v[idx]->ui[coloroffset] +#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx] +#define VERT_SAVE_SPEC( idx ) if (specoffset) spec[idx] = v[idx]->ui[specoffset] +#define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx] + +#undef LOCAL_VARS +#undef TAG +#undef INIT + +#define LOCAL_VARS(n) \ + r300ContextPtr rmesa = R300_CONTEXT(ctx); \ + GLuint color[n] = { 0, }, spec[n] = { 0, }; \ + GLuint coloroffset = rmesa->swtcl.coloroffset; \ + GLuint specoffset = rmesa->swtcl.specoffset; \ + (void) color; (void) spec; (void) coloroffset; (void) specoffset; + +/*********************************************************************** + * Helpers for rendering unfilled primitives * + ***********************************************************************/ + +#define RASTERIZE(x) r300RasterPrimitive( ctx, reduced_prim[x] ) +#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive +#undef TAG +#define TAG(x) x +#include "tnl_dd/t_dd_unfilled.h" +#undef IND + + +/*********************************************************************** + * Generate GL render functions * + ***********************************************************************/ + + +#define IND (0) +#define TAG(x) x +#include "tnl_dd/t_dd_tritmp.h" + +#define IND (R300_UNFILLED_BIT) +#define TAG(x) x##_unfilled +#include "tnl_dd/t_dd_tritmp.h" + + +static void init_rast_tab( void ) +{ + init(); + init_unfilled(); +} + +/**********************************************************************/ +/* Render unclipped begin/end objects */ +/**********************************************************************/ + +#define RENDER_POINTS( start, count ) \ + for ( ; start < count ; start++) \ + r300_point( rmesa, VERT(start) ) +#define RENDER_LINE( v0, v1 ) \ + r300_line( rmesa, VERT(v0), VERT(v1) ) +#define RENDER_TRI( v0, v1, v2 ) \ + r300_triangle( rmesa, VERT(v0), VERT(v1), VERT(v2) ) +#define RENDER_QUAD( v0, v1, v2, v3 ) \ + r300_quad( rmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) ) +#define INIT(x) do { \ + r300RenderPrimitive( ctx, x ); \ +} while (0) +#undef LOCAL_VARS +#define LOCAL_VARS \ + r300ContextPtr rmesa = R300_CONTEXT(ctx); \ + const GLuint vertsize = rmesa->radeon.swtcl.vertex_size; \ + const char *r300verts = (char *)rmesa->radeon.swtcl.verts; \ + const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \ + const GLboolean stipple = ctx->Line.StippleFlag; \ + (void) elt; (void) stipple; +#define RESET_STIPPLE //if ( stipple ) r200ResetLineStipple( ctx ); +#define RESET_OCCLUSION +#define PRESERVE_VB_DEFS +#define ELT(x) (x) +#define TAG(x) r300_##x##_verts +#include "tnl/t_vb_rendertmp.h" +#undef ELT +#undef TAG +#define TAG(x) r300_##x##_elts +#define ELT(x) elt[x] +#include "tnl/t_vb_rendertmp.h" + + + + +/**********************************************************************/ +/* Choose render functions */ +/**********************************************************************/ +static void r300ChooseRenderState( GLcontext *ctx ) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + r300ContextPtr rmesa = R300_CONTEXT(ctx); + GLuint index = 0; + GLuint flags = ctx->_TriangleCaps; + radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__); + + if (flags & DD_TRI_UNFILLED) index |= R300_UNFILLED_BIT; + + if (index != rmesa->radeon.swtcl.RenderIndex) { + tnl->Driver.Render.Points = rast_tab[index].points; + tnl->Driver.Render.Line = rast_tab[index].line; + tnl->Driver.Render.ClippedLine = rast_tab[index].line; + tnl->Driver.Render.Triangle = rast_tab[index].triangle; + tnl->Driver.Render.Quad = rast_tab[index].quad; + + if (index == 0) { + tnl->Driver.Render.PrimTabVerts = r300_render_tab_verts; + tnl->Driver.Render.PrimTabElts = r300_render_tab_elts; + tnl->Driver.Render.ClippedPolygon = r300_fast_clipped_poly; + } else { + tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts; + tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts; + tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon; + } + + rmesa->radeon.swtcl.RenderIndex = index; + } +} + +void r300RenderStart(GLcontext *ctx) +{ + radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__); + r300ContextPtr rmesa = R300_CONTEXT( ctx ); + + r300ChooseRenderState(ctx); + + r300UpdateShaders(rmesa); + + r300PrepareVertices(ctx); + + r300ValidateBuffers(ctx); + + r300UpdateShaderStates(rmesa); + + + /* investigate if we can put back flush optimisation if needed */ + if (rmesa->radeon.dma.flush != NULL) { + rmesa->radeon.dma.flush(ctx); + } +} + +void r300RenderFinish(GLcontext *ctx) +{ +} + +static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim ) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__); + + if (rmesa->radeon.swtcl.hw_primitive != hwprim) { + R300_NEWPRIM( rmesa ); + rmesa->radeon.swtcl.hw_primitive = hwprim; + } +} + +void r300RenderPrimitive(GLcontext *ctx, GLenum prim) +{ + + r300ContextPtr rmesa = R300_CONTEXT(ctx); + rmesa->radeon.swtcl.render_primitive = prim; + radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__); + + if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED)) + return; + + r300RasterPrimitive( ctx, reduced_prim[prim] ); +} + +void r300ResetLineStipple(GLcontext *ctx) +{ + if (RADEON_DEBUG & RADEON_VERTS) + fprintf(stderr, "%s\n", __func__); +} + +void r300InitSwtcl(GLcontext *ctx) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + r300ContextPtr rmesa = R300_CONTEXT(ctx); + static int firsttime = 1; + radeon_print(RADEON_SWRENDER, RADEON_NORMAL, "%s\n", __func__); + + if (firsttime) { + init_rast_tab(); + firsttime = 0; + } + rmesa->radeon.swtcl.emit_prediction = 0; + + tnl->Driver.Render.Start = r300RenderStart; + tnl->Driver.Render.Finish = r300RenderFinish; + tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive; + tnl->Driver.Render.ResetLineStipple = r300ResetLineStipple; + tnl->Driver.Render.BuildVertices = _tnl_build_vertices; + tnl->Driver.Render.CopyPV = _tnl_copy_pv; + tnl->Driver.Render.Interp = _tnl_interp; + + /* FIXME: what are these numbers? */ + _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, + 48 * sizeof(GLfloat) ); + + rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf; + rmesa->radeon.swtcl.RenderIndex = ~0; + rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES; + rmesa->radeon.swtcl.hw_primitive = 0; + + _tnl_invalidate_vertex_state( ctx, ~0 ); + _tnl_invalidate_vertices( ctx, ~0 ); + + _tnl_need_projected_coords( ctx, GL_FALSE ); +} + +void r300DestroySwtcl(GLcontext *ctx) +{ +} + +static void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset) +{ + BATCH_LOCALS(&rmesa->radeon); + + radeon_print(RADEON_SWRENDER, RADEON_TRACE, + "%s: vertex_size %d, offset 0x%x \n", + __FUNCTION__, vertex_size, offset); + + BEGIN_BATCH(7); + OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2); + OUT_BATCH(1); + OUT_BATCH(vertex_size | (vertex_size << 8)); + OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); +} + +static void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr) +{ + BATCH_LOCALS(&rmesa->radeon); + int type, num_verts; + if (RADEON_DEBUG & RADEON_VERTS) + fprintf(stderr, "%s\n", __func__); + + type = r300PrimitiveType(rmesa, primitive); + num_verts = r300NumVerts(rmesa, vertex_nr, primitive); + + BEGIN_BATCH(3); + OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0); + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type); + END_BATCH(); +} + +void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset) +{ + radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__); + r300ContextPtr rmesa = R300_CONTEXT(ctx); + + r300EmitCacheFlush(rmesa); + + radeonEmitState(&rmesa->radeon); + r300_emit_scissor(ctx); + r300EmitVertexAOS(rmesa, + rmesa->radeon.swtcl.vertex_size, + rmesa->radeon.swtcl.bo, + current_offset); + + r300EmitVbufPrim(rmesa, + rmesa->radeon.swtcl.hw_primitive, + rmesa->radeon.swtcl.numverts); + r300EmitCacheFlush(rmesa); + if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw ) + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", + rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction ); + rmesa->radeon.swtcl.emit_prediction = 0; + COMMIT_BATCH(); +} diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.h b/src/mesa/drivers/dri/r300/r300_swtcl.h new file mode 100644 index 0000000000..c271d26546 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_swtcl.h @@ -0,0 +1,65 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> - original r200 code + * Dave Airlie <airlied@linux.ie> + */ + +#ifndef __R300_SWTCL_H__ +#define __R300_SWTCL_H__ + +#include "main/mtypes.h" +#include "swrast/swrast.h" +#include "r300_context.h" + +/* + * Here are definitions of OVM locations of vertex attributes for non TCL hw + */ +#define SWTCL_OVM_POS 0 +#define SWTCL_OVM_COLOR0 2 +#define SWTCL_OVM_COLOR1 3 +#define SWTCL_OVM_COLOR2 4 +#define SWTCL_OVM_COLOR3 5 +#define SWTCL_OVM_TEX(n) ((n) + 6) +#define SWTCL_OVM_POINT_SIZE 15 + +extern void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *InputsRead, GLuint *OutputsWritten); + +extern void r300InitSwtcl( GLcontext *ctx ); +extern void r300DestroySwtcl( GLcontext *ctx ); + +extern void r300RenderStart(GLcontext *ctx); +extern void r300RenderFinish(GLcontext *ctx); +extern void r300RenderPrimitive(GLcontext *ctx, GLenum prim); +extern void r300ResetLineStipple(GLcontext *ctx); + +extern void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset); + +#endif diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c new file mode 100644 index 0000000000..baef206bc2 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -0,0 +1,386 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +/** + * \file + * + * \author Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "main/glheader.h" +#include "main/imports.h" +#include "main/colormac.h" +#include "main/context.h" +#include "main/enums.h" +#include "main/image.h" +#include "main/mipmap.h" +#include "main/simple_list.h" +#include "main/texstore.h" +#include "main/texobj.h" + +#include "texmem.h" + +#include "r300_context.h" +#include "radeon_mipmap_tree.h" +#include "r300_tex.h" + + +static unsigned int translate_wrap_mode(GLenum wrapmode) +{ + switch(wrapmode) { + case GL_REPEAT: return R300_TX_REPEAT; + case GL_CLAMP: return R300_TX_CLAMP; + case GL_CLAMP_TO_EDGE: return R300_TX_CLAMP_TO_EDGE; + case GL_CLAMP_TO_BORDER: return R300_TX_CLAMP_TO_BORDER; + case GL_MIRRORED_REPEAT: return R300_TX_REPEAT | R300_TX_MIRRORED; + case GL_MIRROR_CLAMP_EXT: return R300_TX_CLAMP | R300_TX_MIRRORED; + case GL_MIRROR_CLAMP_TO_EDGE_EXT: return R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED; + case GL_MIRROR_CLAMP_TO_BORDER_EXT: return R300_TX_CLAMP_TO_BORDER | R300_TX_MIRRORED; + default: + _mesa_problem(NULL, "bad wrap mode in %s", __FUNCTION__); + return 0; + } +} + + +/** + * Update the cached hardware registers based on the current texture wrap modes. + * + * \param t Texture object whose wrap modes are to be set + */ +static void r300UpdateTexWrap(radeonTexObjPtr t) +{ + struct gl_texture_object *tObj = &t->base; + + t->pp_txfilter &= + ~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_R_MASK); + + t->pp_txfilter |= translate_wrap_mode(tObj->WrapS) << R300_TX_WRAP_S_SHIFT; + + if (tObj->Target != GL_TEXTURE_1D) { + t->pp_txfilter |= translate_wrap_mode(tObj->WrapT) << R300_TX_WRAP_T_SHIFT; + + if (tObj->Target == GL_TEXTURE_3D) + t->pp_txfilter |= translate_wrap_mode(tObj->WrapR) << R300_TX_WRAP_R_SHIFT; + } +} + +static GLuint aniso_filter(GLfloat anisotropy) +{ + if (anisotropy >= 16.0) { + return R300_TX_MAX_ANISO_16_TO_1; + } else if (anisotropy >= 8.0) { + return R300_TX_MAX_ANISO_8_TO_1; + } else if (anisotropy >= 4.0) { + return R300_TX_MAX_ANISO_4_TO_1; + } else if (anisotropy >= 2.0) { + return R300_TX_MAX_ANISO_2_TO_1; + } else { + return R300_TX_MAX_ANISO_1_TO_1; + } +} + +/** + * Set the texture magnification and minification modes. + * + * \param t Texture whose filter modes are to be set + * \param minf Texture minification mode + * \param magf Texture magnification mode + * \param anisotropy Maximum anisotropy level + */ +static void r300SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy) +{ + /* Force revalidation to account for switches from/to mipmapping. */ + t->validated = GL_FALSE; + + t->pp_txfilter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK); + t->pp_txfilter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY; + + /* Note that EXT_texture_filter_anisotropic is extremely vague about + * how anisotropic filtering interacts with the "normal" filter modes. + * When anisotropic filtering is enabled, we override min and mag + * filter settings completely. This includes driconf's settings. + */ + if (anisotropy >= 2.0 && (minf != GL_NEAREST) && (magf != GL_NEAREST)) { + t->pp_txfilter |= R300_TX_MAG_FILTER_ANISO + | R300_TX_MIN_FILTER_ANISO + | R300_TX_MIN_FILTER_MIP_LINEAR + | aniso_filter(anisotropy); + if (RADEON_DEBUG & RADEON_TEXTURE) + fprintf(stderr, "Using maximum anisotropy of %f\n", anisotropy); + return; + } + + switch (minf) { + case GL_NEAREST: + t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST; + break; + case GL_LINEAR: + t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR; + break; + case GL_NEAREST_MIPMAP_NEAREST: + t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_NEAREST; + break; + case GL_NEAREST_MIPMAP_LINEAR: + t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_LINEAR; + break; + case GL_LINEAR_MIPMAP_NEAREST: + t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_NEAREST; + break; + case GL_LINEAR_MIPMAP_LINEAR: + t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_LINEAR; + break; + } + + /* Note we don't have 3D mipmaps so only use the mag filter setting + * to set the 3D texture filter mode. + */ + switch (magf) { + case GL_NEAREST: + t->pp_txfilter |= R300_TX_MAG_FILTER_NEAREST; + break; + case GL_LINEAR: + t->pp_txfilter |= R300_TX_MAG_FILTER_LINEAR; + break; + } +} + +static void r300SetTexBorderColor(radeonTexObjPtr t, const GLfloat color[4]) +{ + GLubyte c[4]; + CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]); + CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]); + CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]); + CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]); + t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]); +} + +/** + * Changes variables and flags for a state update, which will happen at the + * next UpdateTextureState + */ + +static void r300TexParameter(GLcontext * ctx, GLenum target, + struct gl_texture_object *texObj, + GLenum pname, const GLfloat * params) +{ + radeonTexObj* t = radeon_tex_obj(texObj); + GLenum texBaseFormat; + + if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) { + fprintf(stderr, "%s( %s )\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(pname)); + } + + switch (pname) { + case GL_TEXTURE_MIN_FILTER: + case GL_TEXTURE_MAG_FILTER: + case GL_TEXTURE_MAX_ANISOTROPY_EXT: + r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy); + break; + + case GL_TEXTURE_WRAP_S: + case GL_TEXTURE_WRAP_T: + case GL_TEXTURE_WRAP_R: + r300UpdateTexWrap(t); + break; + + case GL_TEXTURE_BORDER_COLOR: + r300SetTexBorderColor(t, texObj->BorderColor.f); + break; + + case GL_TEXTURE_BASE_LEVEL: + case GL_TEXTURE_MAX_LEVEL: + case GL_TEXTURE_MIN_LOD: + case GL_TEXTURE_MAX_LOD: + t->validated = GL_FALSE; + break; + + case GL_DEPTH_TEXTURE_MODE: + if (!texObj->Image[0][texObj->BaseLevel]) + return; + texBaseFormat = texObj->Image[0][texObj->BaseLevel]->_BaseFormat; + + if (texBaseFormat == GL_DEPTH_COMPONENT || + texBaseFormat == GL_DEPTH_STENCIL) { + r300SetDepthTexMode(texObj); + break; + } else { + /* If the texture isn't a depth texture, changing this + * state won't cause any changes to the hardware. + * Don't force a flush of texture state. + */ + return; + } + + default: + return; + } +} + +static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + radeonTexObj* t = radeon_tex_obj(texObj); + + if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) { + fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, + (void *)texObj, + _mesa_lookup_enum_by_nr(texObj->Target)); + } + + if (rmesa) { + int i; + struct radeon_bo *bo; + bo = !t->mt ? t->bo : t->mt->bo; + if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->radeon.cmdbuf.cs)) { + radeon_firevertices(&rmesa->radeon); + } + + for(i = 0; i < R300_MAX_TEXTURE_UNITS; ++i) + if (rmesa->hw.textures[i] == t) + rmesa->hw.textures[i] = 0; + } + + if (t->bo) { + radeon_bo_unref(t->bo); + t->bo = NULL; + } + + radeon_miptree_unreference(&t->mt); + + _mesa_delete_texture_object(ctx, texObj); +} + +/** + * Allocate a new texture object. + * Called via ctx->Driver.NewTextureObject. + * Note: this function will be called during context creation to + * allocate the default texture objects. + * Fixup MaxAnisotropy according to user preference. + */ +static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, + GLuint name, + GLenum target) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj); + + + if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) { + fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, + t, _mesa_lookup_enum_by_nr(target)); + } + + _mesa_initialize_texture_object(&t->base, name, target); + t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy; + + /* Initialize hardware state */ + r300UpdateTexWrap(t); + r300SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy); + r300SetTexBorderColor(t, t->base.BorderColor.f); + + return &t->base; +} + +unsigned r300IsFormatRenderable(gl_format mesa_format) +{ + switch (mesa_format) + { + case MESA_FORMAT_RGB565: + case MESA_FORMAT_RGBA5551: + case MESA_FORMAT_RGBA8888: + case MESA_FORMAT_RGB565_REV: + case MESA_FORMAT_RGBA8888_REV: + case MESA_FORMAT_ARGB4444: + case MESA_FORMAT_ARGB1555: + case MESA_FORMAT_XRGB8888: + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_ARGB4444_REV: + case MESA_FORMAT_ARGB1555_REV: + case MESA_FORMAT_XRGB8888_REV: + case MESA_FORMAT_ARGB8888_REV: + case MESA_FORMAT_SRGBA8: + case MESA_FORMAT_SARGB8: + case MESA_FORMAT_SL8: + case MESA_FORMAT_A8: + case MESA_FORMAT_L8: + case MESA_FORMAT_I8: + case MESA_FORMAT_Z16: + return 1; + default: + return 0; + } +} + +unsigned r500IsFormatRenderable(gl_format mesa_format) +{ + if (mesa_format == MESA_FORMAT_S8_Z24) { + return 1; + } else { + return r300IsFormatRenderable(mesa_format); + } +} + +void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions) +{ + /* Note: we only plug in the functions we implement in the driver + * since _mesa_init_driver_functions() was already called. + */ + functions->NewTextureImage = radeonNewTextureImage; + functions->FreeTexImageData = radeonFreeTexImageData; + functions->MapTexture = radeonMapTexture; + functions->UnmapTexture = radeonUnmapTexture; + + functions->ChooseTextureFormat = radeonChooseTextureFormat_mesa; + functions->TexImage1D = radeonTexImage1D; + functions->TexImage2D = radeonTexImage2D; + functions->TexImage3D = radeonTexImage3D; + functions->TexSubImage1D = radeonTexSubImage1D; + functions->TexSubImage2D = radeonTexSubImage2D; + functions->TexSubImage3D = radeonTexSubImage3D; + functions->GetTexImage = radeonGetTexImage; + functions->GetCompressedTexImage = radeonGetCompressedTexImage; + functions->NewTextureObject = r300NewTextureObject; + functions->DeleteTexture = r300DeleteTexture; + functions->IsTextureResident = driIsTextureResident; + + functions->TexParameter = r300TexParameter; + + functions->CompressedTexImage2D = radeonCompressedTexImage2D; + functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; + + if (radeon->radeonScreen->kernel_mm) { + functions->CopyTexImage2D = radeonCopyTexImage2D; + functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; + } + + functions->GenerateMipmap = radeonGenerateMipmap; + + driInitTextureFormats(); +} diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h new file mode 100644 index 0000000000..aca44cd766 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_tex.h @@ -0,0 +1,59 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef __r300_TEX_H__ +#define __r300_TEX_H__ + +extern void r300SetDepthTexMode(struct gl_texture_object *tObj); + +extern void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target, + __DRIdrawable *dPriv); + +extern void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, + GLint format, __DRIdrawable *dPriv); + +extern void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname, + unsigned long long offset, GLint depth, + GLuint pitch); + +extern GLboolean r300ValidateBuffers(GLcontext * ctx); + +extern void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions); + +int32_t r300TranslateTexFormat(gl_format mesaFormat); + +unsigned r300IsFormatRenderable(gl_format mesaFormat); +unsigned r500IsFormatRenderable(gl_format mesaFormat); + +#endif /* __r300_TEX_H__ */ diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c new file mode 100644 index 0000000000..4ba6740e3d --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -0,0 +1,524 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Keith Whitwell <keith@tungstengraphics.com> + * + * \todo Enable R300 texture tiling code? + */ + +#include "main/glheader.h" +#include "main/imports.h" +#include "main/context.h" +#include "main/macros.h" +#include "main/teximage.h" +#include "main/texobj.h" +#include "main/enums.h" +#include "main/simple_list.h" + +#include "r300_context.h" +#include "radeon_mipmap_tree.h" +#include "r300_tex.h" +#include "r300_reg.h" + +/* + * Note that the _REV formats are the same as the non-REV formats. This is + * because the REV and non-REV formats are identical as a byte string, but + * differ when accessed as 16-bit or 32-bit words depending on the endianness of + * the host. Since the textures are transferred to the R300 as a byte string + * (i.e. without any byte-swapping), the R300 sees the REV and non-REV formats + * identically. -- paulus + */ + +int32_t r300TranslateTexFormat(gl_format mesaFormat) +{ + switch (mesaFormat) + { +#ifdef MESA_LITTLE_ENDIAN + case MESA_FORMAT_RGBA8888: + return R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8); + case MESA_FORMAT_RGBA8888_REV: + return R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8); + case MESA_FORMAT_ARGB8888: + return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); + case MESA_FORMAT_ARGB8888_REV: + return R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8); +#else + case MESA_FORMAT_RGBA8888: + return R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8); + case MESA_FORMAT_RGBA8888_REV: + return R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8); + case MESA_FORMAT_ARGB8888: + return R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8); + case MESA_FORMAT_ARGB8888_REV: + return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); +#endif + case MESA_FORMAT_XRGB8888: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); + case MESA_FORMAT_RGB888: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); + case MESA_FORMAT_RGB565: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); + case MESA_FORMAT_RGB565_REV: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); + case MESA_FORMAT_ARGB4444: + return R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4); + case MESA_FORMAT_ARGB4444_REV: + return R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4); + case MESA_FORMAT_ARGB1555: + return R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5); + case MESA_FORMAT_ARGB1555_REV: + return R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5); + case MESA_FORMAT_AL88: + return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8); + case MESA_FORMAT_AL88_REV: + return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8); + case MESA_FORMAT_RGB332: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z3Y3X2); + case MESA_FORMAT_A8: + return R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8); + case MESA_FORMAT_L8: + return R300_EASY_TX_FORMAT(X, X, X, ONE, X8); + case MESA_FORMAT_I8: + return R300_EASY_TX_FORMAT(X, X, X, X, X8); + case MESA_FORMAT_CI8: + return R300_EASY_TX_FORMAT(X, X, X, X, X8); + case MESA_FORMAT_YCBCR: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8) | R300_TX_FORMAT_YUV_MODE; + case MESA_FORMAT_YCBCR_REV: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8) | R300_TX_FORMAT_YUV_MODE; + case MESA_FORMAT_RGB_DXT1: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1); + case MESA_FORMAT_RGBA_DXT1: + return R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1); + case MESA_FORMAT_RGBA_DXT3: + return R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3); + case MESA_FORMAT_RGBA_DXT5: + return R300_EASY_TX_FORMAT(Y, Z, W, X, DXT5); + case MESA_FORMAT_RGBA_FLOAT32: + return R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R32G32B32A32); + case MESA_FORMAT_RGBA_FLOAT16: + return R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16); + case MESA_FORMAT_ALPHA_FLOAT32: + return R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I32); + case MESA_FORMAT_ALPHA_FLOAT16: + return R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I16); + case MESA_FORMAT_LUMINANCE_FLOAT32: + return R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I32); + case MESA_FORMAT_LUMINANCE_FLOAT16: + return R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I16); + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32: + return R300_EASY_TX_FORMAT(X, X, X, Y, FL_I32A32); + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16: + return R300_EASY_TX_FORMAT(X, X, X, Y, FL_I16A16); + case MESA_FORMAT_INTENSITY_FLOAT32: + return R300_EASY_TX_FORMAT(X, X, X, X, FL_I32); + case MESA_FORMAT_INTENSITY_FLOAT16: + return R300_EASY_TX_FORMAT(X, X, X, X, FL_I16); + case MESA_FORMAT_Z16: + return R300_EASY_TX_FORMAT(X, X, X, X, X16); + case MESA_FORMAT_Z24_S8: + return R300_EASY_TX_FORMAT(X, X, X, X, X24_Y8); + case MESA_FORMAT_S8_Z24: + return R300_EASY_TX_FORMAT(Y, Y, Y, Y, X24_Y8); + case MESA_FORMAT_Z32: + return R300_EASY_TX_FORMAT(X, X, X, X, X32); + /* EXT_texture_sRGB */ + case MESA_FORMAT_SRGBA8: + return R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8) | R300_TX_FORMAT_GAMMA; + case MESA_FORMAT_SLA8: + return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8) | R300_TX_FORMAT_GAMMA; + case MESA_FORMAT_SL8: + return R300_EASY_TX_FORMAT(X, X, X, ONE, X8) | R300_TX_FORMAT_GAMMA; + case MESA_FORMAT_SRGB_DXT1: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1) | R300_TX_FORMAT_GAMMA; + case MESA_FORMAT_SRGBA_DXT1: + return R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1) | R300_TX_FORMAT_GAMMA; + case MESA_FORMAT_SRGBA_DXT3: + return R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3) | R300_TX_FORMAT_GAMMA; + case MESA_FORMAT_SRGBA_DXT5: + return R300_EASY_TX_FORMAT(Y, Z, W, X, DXT5) | R300_TX_FORMAT_GAMMA; + default: + return -1; + } +}; + +void r300SetDepthTexMode(struct gl_texture_object *tObj) +{ + static const GLuint formats[3][3] = { + { + R300_EASY_TX_FORMAT(X, X, X, ONE, X16), + R300_EASY_TX_FORMAT(X, X, X, X, X16), + R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X16), + }, + { + R300_EASY_TX_FORMAT(Y, Y, Y, ONE, X24_Y8), + R300_EASY_TX_FORMAT(Y, Y, Y, Y, X24_Y8), + R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, Y, X24_Y8), + }, + { + R300_EASY_TX_FORMAT(X, X, X, ONE, X32), + R300_EASY_TX_FORMAT(X, X, X, X, X32), + R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X32), + }, + }; + const GLuint *format; + radeonTexObjPtr t; + + if (!tObj) + return; + + t = radeon_tex_obj(tObj); + + switch (tObj->Image[0][tObj->BaseLevel]->TexFormat) { + case MESA_FORMAT_Z16: + format = formats[0]; + break; + case MESA_FORMAT_S8_Z24: + format = formats[1]; + break; + case MESA_FORMAT_Z32: + format = formats[2]; + break; + default: + /* Error...which should have already been caught by higher + * levels of Mesa. + */ + ASSERT(0); + return; + } + + switch (tObj->DepthMode) { + case GL_LUMINANCE: + t->pp_txformat = format[0]; + break; + case GL_INTENSITY: + t->pp_txformat = format[1]; + break; + case GL_ALPHA: + t->pp_txformat = format[2]; + break; + default: + /* Error...which should have already been caught by higher + * levels of Mesa. + */ + ASSERT(0); + return; + } +} + + +/** + * Compute the cached hardware register values for the given texture object. + * + * \param rmesa Context pointer + * \param t the r300 texture object + */ +static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t) +{ + const struct gl_texture_image *firstImage; + firstImage = t->base.Image[0][t->minLod]; + + if (!t->image_override) { + if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { + r300SetDepthTexMode(&t->base); + } else { + int32_t txformat = r300TranslateTexFormat(firstImage->TexFormat); + if (txformat < 0) { + _mesa_problem(rmesa->radeon.glCtx, "%s: Invalid format %s", + __FUNCTION__, _mesa_get_format_name(firstImage->TexFormat)); + exit(1); + } + t->pp_txformat = (uint32_t) txformat; + } + } + + if (t->image_override && t->bo) + return; + + t->pp_txsize = (((R300_TX_WIDTHMASK_MASK & ((firstImage->Width - 1) << R300_TX_WIDTHMASK_SHIFT))) + | ((R300_TX_HEIGHTMASK_MASK & ((firstImage->Height - 1) << R300_TX_HEIGHTMASK_SHIFT))) + | ((R300_TX_DEPTHMASK_MASK & ((firstImage->DepthLog2) << R300_TX_DEPTHMASK_SHIFT))) + | ((R300_TX_MAX_MIP_LEVEL_MASK & ((t->maxLod - t->minLod) << R300_TX_MAX_MIP_LEVEL_SHIFT)))); + + t->tile_bits = 0; + + if (t->base.Target == GL_TEXTURE_CUBE_MAP) + t->pp_txformat |= R300_TX_FORMAT_CUBIC_MAP; + if (t->base.Target == GL_TEXTURE_3D) + t->pp_txformat |= R300_TX_FORMAT_3D; + + + if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) { + unsigned int align = (64 / _mesa_get_format_bytes(firstImage->TexFormat)) - 1; + t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN; + if (!t->image_override) + t->pp_txpitch = ((firstImage->Width + align) & ~align) - 1; + } + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + if (firstImage->Width > 2048) + t->pp_txpitch |= R500_TXWIDTH_BIT11; + else + t->pp_txpitch &= ~R500_TXWIDTH_BIT11; + if (firstImage->Height > 2048) + t->pp_txpitch |= R500_TXHEIGHT_BIT11; + else + t->pp_txpitch &= ~R500_TXHEIGHT_BIT11; + } +} + +/** + * Ensure the given texture is ready for rendering. + * + * Mostly this means populating the texture object's mipmap tree. + */ +static GLboolean r300_validate_texture(GLcontext * ctx, struct gl_texture_object *texObj) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + radeonTexObj *t = radeon_tex_obj(texObj); + + if (!radeon_validate_texture_miptree(ctx, texObj)) + return GL_FALSE; + + /* Configure the hardware registers (more precisely, the cached version + * of the hardware registers). */ + setup_hardware_state(rmesa, t); + + t->validated = GL_TRUE; + return GL_TRUE; +} + +/** + * Ensure all enabled and complete textures are uploaded along with any buffers being used. + */ +GLboolean r300ValidateBuffers(GLcontext * ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct radeon_renderbuffer *rrb; + int i; + int ret; + + radeon_cs_space_reset_bos(rmesa->radeon.cmdbuf.cs); + + rrb = radeon_get_colorbuffer(&rmesa->radeon); + /* color buffer */ + if (rrb && rrb->bo) { + radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, + rrb->bo, 0, + RADEON_GEM_DOMAIN_VRAM); + } + + /* depth buffer */ + rrb = radeon_get_depthbuffer(&rmesa->radeon); + if (rrb && rrb->bo) { + radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, + rrb->bo, 0, + RADEON_GEM_DOMAIN_VRAM); + } + + for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) { + radeonTexObj *t; + + if (!ctx->Texture.Unit[i]._ReallyEnabled) + continue; + + if (!r300_validate_texture(ctx, ctx->Texture.Unit[i]._Current)) { + _mesa_warning(ctx, + "failed to validate texture for unit %d.\n", + i); + } + t = radeon_tex_obj(ctx->Texture.Unit[i]._Current); + if (t->image_override && t->bo) + radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, + t->bo, + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); + else if (t->mt->bo) + radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, + t->mt->bo, + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); + } + + ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + return GL_FALSE; + return GL_TRUE; +} + +void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname, + unsigned long long offset, GLint depth, GLuint pitch) +{ + r300ContextPtr rmesa = pDRICtx->driverPrivate; + struct gl_texture_object *tObj = + _mesa_lookup_texture(rmesa->radeon.glCtx, texname); + radeonTexObjPtr t = radeon_tex_obj(tObj); + uint32_t pitch_val; + + if (!tObj) + return; + + t->image_override = GL_TRUE; + + if (!offset) + return; + + t->bo = NULL; + t->override_offset = offset; + t->pp_txpitch &= (1 << 13) -1; + pitch_val = pitch; + + switch (depth) { + case 32: + t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); + pitch_val /= 4; + break; + case 24: + default: + t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); + pitch_val /= 4; + break; + case 16: + t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); + pitch_val /= 2; + break; + } + pitch_val--; + + t->pp_txpitch |= pitch_val; +} + +void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format, __DRIdrawable *dPriv) +{ + struct gl_texture_unit *texUnit; + struct gl_texture_object *texObj; + struct gl_texture_image *texImage; + struct radeon_renderbuffer *rb; + radeon_texture_image *rImage; + radeonContextPtr radeon; + r300ContextPtr rmesa; + struct radeon_framebuffer *rfb; + radeonTexObjPtr t; + uint32_t pitch_val; + uint32_t internalFormat, type, format; + + type = GL_BGRA; + format = GL_UNSIGNED_BYTE; + internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); + + radeon = pDRICtx->driverPrivate; + rmesa = pDRICtx->driverPrivate; + + rfb = dPriv->driverPrivate; + texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit]; + texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target); + texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0); + + rImage = get_radeon_texture_image(texImage); + t = radeon_tex_obj(texObj); + if (t == NULL) { + return; + } + + radeon_update_renderbuffers(pDRICtx, dPriv, GL_TRUE); + rb = rfb->color_rb[0]; + if (rb->bo == NULL) { + /* Failed to BO for the buffer */ + return; + } + + _mesa_lock_texture(radeon->glCtx, texObj); + if (t->bo) { + radeon_bo_unref(t->bo); + t->bo = NULL; + } + if (rImage->bo) { + radeon_bo_unref(rImage->bo); + rImage->bo = NULL; + } + + radeon_miptree_unreference(&t->mt); + radeon_miptree_unreference(&rImage->mt); + + _mesa_init_teximage_fields(radeon->glCtx, target, texImage, + rb->base.Width, rb->base.Height, 1, 0, rb->cpp); + texImage->RowStride = rb->pitch / rb->cpp; + rImage->bo = rb->bo; + radeon_bo_ref(rImage->bo); + t->bo = rb->bo; + radeon_bo_ref(t->bo); + t->tile_bits = 0; + t->image_override = GL_TRUE; + t->override_offset = 0; + t->pp_txpitch &= (1 << 13) -1; + pitch_val = rb->pitch; + switch (rb->cpp) { + case 4: + if (texture_format == __DRI_TEXTURE_FORMAT_RGB) + t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); + else + t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); + pitch_val /= 4; + break; + case 3: + default: + t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); + pitch_val /= 4; + break; + case 2: + t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); + pitch_val /= 2; + break; + } + pitch_val--; + t->pp_txsize = (((R300_TX_WIDTHMASK_MASK & ((rb->base.Width - 1) << R300_TX_WIDTHMASK_SHIFT))) + | ((R300_TX_HEIGHTMASK_MASK & ((rb->base.Height - 1) << R300_TX_HEIGHTMASK_SHIFT)))); + t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN; + t->pp_txpitch |= pitch_val; + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + if (rb->base.Width > 2048) + t->pp_txpitch |= R500_TXWIDTH_BIT11; + else + t->pp_txpitch &= ~R500_TXWIDTH_BIT11; + if (rb->base.Height > 2048) + t->pp_txpitch |= R500_TXHEIGHT_BIT11; + else + t->pp_txpitch &= ~R500_TXHEIGHT_BIT11; + } + t->validated = GL_TRUE; + _mesa_unlock_texture(radeon->glCtx, texObj); + return; +} + +void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) +{ + r300SetTexBuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv); +} diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c new file mode 100644 index 0000000000..a1fe378029 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -0,0 +1,414 @@ +/************************************************************************** + +Copyright (C) 2005 Aapo Tahkola <aet@rasterburn.org> +Copyright (C) 2008 Oliver McFadden <z3ro.geek@gmail.com> + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* Radeon R5xx Acceleration, Revision 1.2 */ + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" +#include "shader/program.h" +#include "shader/programopt.h" +#include "shader/prog_instruction.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "shader/prog_statevars.h" +#include "tnl/tnl.h" + +#include "compiler/radeon_compiler.h" +#include "radeon_mesa_to_rc.h" +#include "r300_context.h" +#include "r300_fragprog_common.h" +#include "r300_state.h" + +/** + * Write parameter array for the given vertex program into dst. + * Return the total number of components written. + */ +static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_program *vp, float *dst) +{ + int i; + + if (vp->Base->IsNVProgram) { + _mesa_load_tracked_matrices(ctx); + } else { + if (vp->Base->Base.Parameters) { + _mesa_load_state_parameters(ctx, vp->Base->Base.Parameters); + } + } + + for(i = 0; i < vp->code.constants.Count; ++i) { + const float * src = 0; + const struct rc_constant * constant = &vp->code.constants.Constants[i]; + + switch(constant->Type) { + case RC_CONSTANT_EXTERNAL: + if (vp->Base->IsNVProgram) { + src = ctx->VertexProgram.Parameters[constant->u.External]; + } else { + src = vp->Base->Base.Parameters->ParameterValues[constant->u.External]; + } + break; + + case RC_CONSTANT_IMMEDIATE: + src = constant->u.Immediate; + break; + } + + assert(src); + dst[4*i] = src[0]; + dst[4*i + 1] = src[1]; + dst[4*i + 2] = src[2]; + dst[4*i + 3] = src[3]; + } + + return 4 * vp->code.constants.Count; +} + +static GLbitfield compute_required_outputs(struct gl_vertex_program * vp, GLbitfield fpreads) +{ + GLbitfield outputs = 0; + int i; + +#define ADD_OUTPUT(fp_attr, vp_result) \ + do { \ + if (fpreads & (1 << (fp_attr))) \ + outputs |= (1 << (vp_result)); \ + } while (0) + + ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0); + ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1); + + for (i = 0; i <= 7; ++i) { + ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i); + } + +#undef ADD_OUTPUT + + if ((fpreads & (1 << FRAG_ATTRIB_COL0)) && + (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC0))) + outputs |= 1 << VERT_RESULT_BFC0; + if ((fpreads & (1 << FRAG_ATTRIB_COL1)) && + (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC1))) + outputs |= 1 << VERT_RESULT_BFC1; + + outputs |= 1 << VERT_RESULT_HPOS; + if (vp->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ)) + outputs |= 1 << VERT_RESULT_PSIZ; + + return outputs; +} + + +static void t_inputs_outputs(struct r300_vertex_program_compiler * c) +{ + int i; + int cur_reg; + GLuint OutputsWritten, InputsRead; + + OutputsWritten = c->Base.Program.OutputsWritten; + InputsRead = c->Base.Program.InputsRead; + + cur_reg = -1; + for (i = 0; i < VERT_ATTRIB_MAX; i++) { + if (InputsRead & (1 << i)) + c->code->inputs[i] = ++cur_reg; + else + c->code->inputs[i] = -1; + } + + cur_reg = 0; + for (i = 0; i < VERT_RESULT_MAX; i++) + c->code->outputs[i] = -1; + + assert(OutputsWritten & (1 << VERT_RESULT_HPOS)); + + if (OutputsWritten & (1 << VERT_RESULT_HPOS)) { + c->code->outputs[VERT_RESULT_HPOS] = cur_reg++; + } + + if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) { + c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++; + } + + /* If we're writing back facing colors we need to send + * four colors to make front/back face colors selection work. + * If the vertex program doesn't write all 4 colors, lets + * pretend it does by skipping output index reg so the colors + * get written into appropriate output vectors. + */ + if (OutputsWritten & (1 << VERT_RESULT_COL0)) { + c->code->outputs[VERT_RESULT_COL0] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || + OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; + } + + if (OutputsWritten & (1 << VERT_RESULT_COL1)) { + c->code->outputs[VERT_RESULT_COL1] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || + OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; + } + + if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { + c->code->outputs[VERT_RESULT_BFC0] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; + } + + if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { + c->code->outputs[VERT_RESULT_BFC1] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { + cur_reg++; + } + + for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { + if (OutputsWritten & (1 << i)) { + c->code->outputs[i] = cur_reg++; + } + } + + if (OutputsWritten & (1 << VERT_RESULT_FOGC)) { + c->code->outputs[VERT_RESULT_FOGC] = cur_reg++; + } +} + +/** + * The NV_vertex_program spec mandates that all registers be + * initialized to zero. We do this here unconditionally. + * + * \note We rely on dead-code elimination in the compiler. + */ +static void initialize_NV_registers(struct radeon_compiler * compiler) +{ + unsigned int reg; + struct rc_instruction * inst; + + for(reg = 0; reg < 12; ++reg) { + inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions); + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = reg; + inst->U.I.SrcReg[0].File = RC_FILE_NONE; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + } + + inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions); + inst->U.I.Opcode = RC_OPCODE_ARL; + inst->U.I.DstReg.File = RC_FILE_ADDRESS; + inst->U.I.DstReg.Index = 0; + inst->U.I.DstReg.WriteMask = WRITEMASK_X; + inst->U.I.SrcReg[0].File = RC_FILE_NONE; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; +} + +static struct r300_vertex_program *build_program(GLcontext *ctx, + struct r300_vertex_program_key *wanted_key, + const struct gl_vertex_program *mesa_vp) +{ + struct r300_vertex_program *vp; + struct r300_vertex_program_compiler compiler; + + vp = calloc(1, sizeof(*vp)); + vp->Base = _mesa_clone_vertex_program(ctx, mesa_vp); + memcpy(&vp->key, wanted_key, sizeof(vp->key)); + + rc_init(&compiler.Base); + compiler.Base.Debug = (RADEON_DEBUG & RADEON_VERTS) ? GL_TRUE : GL_FALSE; + + compiler.code = &vp->code; + compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads); + compiler.SetHwInputOutput = &t_inputs_outputs; + + if (compiler.Base.Debug) { + fprintf(stderr, "Initial vertex program:\n"); + _mesa_print_program(&vp->Base->Base); + fflush(stderr); + } + + if (mesa_vp->IsPositionInvariant) { + _mesa_insert_mvp_code(ctx, vp->Base); + } + + radeon_mesa_to_rc_program(&compiler.Base, &vp->Base->Base); + + if (mesa_vp->IsNVProgram) + initialize_NV_registers(&compiler.Base); + + rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X); + + if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) { + unsigned int vp_wpos_attr = vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0; + + /* Set empty writemask for instructions writing to vp_wpos_attr + * before moving the wpos attr there. + * Such instructions will be removed by DCE. + */ + rc_move_output(&compiler.Base, vp_wpos_attr, vp->key.WPosAttr, 0); + rc_copy_output(&compiler.Base, VERT_RESULT_HPOS, vp_wpos_attr); + } + + if (vp->key.FogAttr != FRAG_ATTRIB_MAX) { + unsigned int vp_fog_attr = vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0; + + /* Set empty writemask for instructions writing to vp_fog_attr + * before moving the fog attr there. + * Such instructions will be removed by DCE. + */ + rc_move_output(&compiler.Base, vp_fog_attr, vp->key.FogAttr, 0); + rc_move_output(&compiler.Base, VERT_RESULT_FOGC, vp_fog_attr, WRITEMASK_X); + } + + r3xx_compile_vertex_program(&compiler); + + if (vp->code.constants.Count > ctx->Const.VertexProgram.MaxParameters) { + rc_error(&compiler.Base, "Program exceeds constant buffer size limit\n"); + } + + vp->error = compiler.Base.Error; + + vp->Base->Base.InputsRead = vp->code.InputsRead; + vp->Base->Base.OutputsWritten = vp->code.OutputsWritten; + + rc_destroy(&compiler.Base); + + return vp; +} + +struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_vertex_program_key wanted_key = { 0 }; + struct r300_vertex_program_cont *vpc; + struct r300_vertex_program *vp; + + vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; + + if (!r300->selected_fp) { + /* This can happen when GetProgramiv is called to check + * whether the program runs natively. + * + * To be honest, this is not a very good solution, + * but solving the problem of reporting good values + * for those queries is tough anyway considering that + * we recompile vertex programs based on the precise + * fragment program that is in use. + */ + r300SelectAndTranslateFragmentShader(ctx); + } + + assert(r300->selected_fp); + wanted_key.FpReads = r300->selected_fp->InputsRead; + wanted_key.FogAttr = r300->selected_fp->fog_attr; + wanted_key.WPosAttr = r300->selected_fp->wpos_attr; + + for (vp = vpc->progs; vp; vp = vp->next) { + if (memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == 0) { + return r300->selected_vp = vp; + } + } + + vp = build_program(ctx, &wanted_key, &vpc->mesa_program); + vp->next = vpc->progs; + vpc->progs = vp; + + return r300->selected_vp = vp; +} + +#define bump_vpu_count(ptr, new_count) do { \ + drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \ + int _nc=(new_count)/4; \ + if(_nc>_p->vpu.count)_p->vpu.count=_nc; \ + } while(0) + +static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_program_code *code) +{ + int i; + + assert((code->length > 0) && (code->length % 4 == 0)); + + switch ((dest >> 8) & 0xf) { + case 0: + R300_STATECHANGE(r300, vpi); + for (i = 0; i < code->length; i++) + r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]); + bump_vpu_count(r300->hw.vpi.cmd, code->length + 4 * (dest & 0xff)); + break; + case 2: + R300_STATECHANGE(r300, vpp); + for (i = 0; i < code->length; i++) + r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]); + bump_vpu_count(r300->hw.vpp.cmd, code->length + 4 * (dest & 0xff)); + break; + case 4: + R300_STATECHANGE(r300, vps); + for (i = 0; i < code->length; i++) + r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (code->body.d[i]); + bump_vpu_count(r300->hw.vps.cmd, code->length + 4 * (dest & 0xff)); + break; + default: + fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest); + exit(-1); + } +} + +void r300SetupVertexProgram(r300ContextPtr rmesa) +{ + GLcontext *ctx = rmesa->radeon.glCtx; + struct r300_vertex_program *prog = rmesa->selected_vp; + int inst_count = 0; + int param_count = 0; + + /* Reset state, in case we don't use something */ + ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0; + ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0; + ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0; + + R300_STATECHANGE(rmesa, vap_cntl); + R300_STATECHANGE(rmesa, vpp); + param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]); + if (!rmesa->radeon.radeonScreen->kernel_mm && param_count > 255 * 4) { + WARN_ONCE("Too many VP params, expect rendering errors\n"); + } + /* Prevent the overflow (vpu.count is u8) */ + bump_vpu_count(rmesa->hw.vpp.cmd, MIN2(255 * 4, param_count)); + param_count /= 4; + + r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->code)); + inst_count = (prog->code.length / 4) - 1; + + r300VapCntl(rmesa, _mesa_bitcount(prog->code.InputsRead), + _mesa_bitcount(prog->code.OutputsWritten), prog->code.num_temporaries); + + R300_STATECHANGE(rmesa, pvs); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | + (inst_count << R300_PVS_LAST_INST_SHIFT); + + rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | ((param_count - 1) << R300_PVS_MAX_CONST_ADDR_SHIFT); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT); +} diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.h b/src/mesa/drivers/dri/r300/r300_vertprog.h new file mode 100644 index 0000000000..ccec896be4 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_vertprog.h @@ -0,0 +1,11 @@ +#ifndef __R300_VERTPROG_H_ +#define __R300_VERTPROG_H_ + +#include "r300_reg.h" + + +void r300SetupVertexProgram(r300ContextPtr rmesa); + +struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx); + +#endif diff --git a/src/mesa/drivers/dri/r300/radeon_bo.c b/src/mesa/drivers/dri/r300/radeon_bo.c new file mode 120000 index 0000000000..9448ffee54 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_bo.c @@ -0,0 +1 @@ +../radeon/radeon_bo.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_bo_int_drm.h b/src/mesa/drivers/dri/r300/radeon_bo_int_drm.h new file mode 120000 index 0000000000..029450928b --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_bo_int_drm.h @@ -0,0 +1 @@ +../radeon/radeon_bo_int_drm.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_bo_legacy.c b/src/mesa/drivers/dri/r300/radeon_bo_legacy.c new file mode 120000 index 0000000000..79ad050e6b --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_bo_legacy.c @@ -0,0 +1 @@ +../radeon/radeon_bo_legacy.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_bo_legacy.h b/src/mesa/drivers/dri/r300/radeon_bo_legacy.h new file mode 120000 index 0000000000..83b0f7ffab --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_bo_legacy.h @@ -0,0 +1 @@ +../radeon/radeon_bo_legacy.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/r300/radeon_bocs_wrapper.h new file mode 120000 index 0000000000..ca894b2443 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_bocs_wrapper.h @@ -0,0 +1 @@ +../radeon/radeon_bocs_wrapper.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_buffer_objects.c b/src/mesa/drivers/dri/r300/radeon_buffer_objects.c new file mode 120000 index 0000000000..f6a5f66470 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_buffer_objects.c @@ -0,0 +1 @@ +../radeon/radeon_buffer_objects.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_buffer_objects.h b/src/mesa/drivers/dri/r300/radeon_buffer_objects.h new file mode 120000 index 0000000000..2f134fd17b --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_buffer_objects.h @@ -0,0 +1 @@ +../radeon/radeon_buffer_objects.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_chipset.h b/src/mesa/drivers/dri/r300/radeon_chipset.h new file mode 120000 index 0000000000..eba99001ff --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_chipset.h @@ -0,0 +1 @@ +../radeon/radeon_chipset.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_cmdbuf.h b/src/mesa/drivers/dri/r300/radeon_cmdbuf.h new file mode 120000 index 0000000000..a799e1dc6d --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_cmdbuf.h @@ -0,0 +1 @@ +../radeon/radeon_cmdbuf.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_common.c b/src/mesa/drivers/dri/r300/radeon_common.c new file mode 120000 index 0000000000..67b19ba940 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_common.c @@ -0,0 +1 @@ +../radeon/radeon_common.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_common.h b/src/mesa/drivers/dri/r300/radeon_common.h new file mode 120000 index 0000000000..5bcb696a9f --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_common.h @@ -0,0 +1 @@ +../radeon/radeon_common.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_common_context.c b/src/mesa/drivers/dri/r300/radeon_common_context.c new file mode 120000 index 0000000000..86800f3819 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_common_context.c @@ -0,0 +1 @@ +../radeon/radeon_common_context.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_common_context.h b/src/mesa/drivers/dri/r300/radeon_common_context.h new file mode 120000 index 0000000000..4d66312550 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_common_context.h @@ -0,0 +1 @@ +../radeon/radeon_common_context.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_context.h b/src/mesa/drivers/dri/r300/radeon_context.h new file mode 100644 index 0000000000..da4812d323 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_context.h @@ -0,0 +1,62 @@ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + VA Linux Systems Inc., Fremont, California. +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Gareth Hughes <gareth@valinux.com> + * Keith Whitwell <keith@tungstengraphics.com> + * Kevin E. Martin <martin@valinux.com> + * Nicolai Haehnle <prefect_@gmx.net> + */ + +#ifndef __RADEON_CONTEXT_H__ +#define __RADEON_CONTEXT_H__ + +#include "main/mtypes.h" +#include "main/colormac.h" +#include "radeon_screen.h" +#include "drm.h" +#include "dri_util.h" + +#include "radeon_screen.h" + +#define FALLBACK( radeon, bit, mode ) fprintf(stderr, "%s:%s\n", __LINE__, __FILE__); + +/* TCL fallbacks */ +extern void radeonTclFallback(GLcontext * ctx, GLuint bit, GLboolean mode); + +#define TCL_FALLBACK( ctx, bit, mode ) ; + + +#endif /* __RADEON_CONTEXT_H__ */ diff --git a/src/mesa/drivers/dri/r300/radeon_cs.c b/src/mesa/drivers/dri/r300/radeon_cs.c new file mode 120000 index 0000000000..66b7ad1eb0 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_cs.c @@ -0,0 +1 @@ +../radeon/radeon_cs.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_cs_int_drm.h b/src/mesa/drivers/dri/r300/radeon_cs_int_drm.h new file mode 120000 index 0000000000..462f5245d0 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_cs_int_drm.h @@ -0,0 +1 @@ +../radeon/radeon_cs_int_drm.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_cs_legacy.c b/src/mesa/drivers/dri/r300/radeon_cs_legacy.c new file mode 120000 index 0000000000..006720f8a4 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_cs_legacy.c @@ -0,0 +1 @@ +../radeon/radeon_cs_legacy.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_cs_legacy.h b/src/mesa/drivers/dri/r300/radeon_cs_legacy.h new file mode 120000 index 0000000000..a5f95e0a3d --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_cs_legacy.h @@ -0,0 +1 @@ +../radeon/radeon_cs_legacy.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_cs_space_drm.c b/src/mesa/drivers/dri/r300/radeon_cs_space_drm.c new file mode 120000 index 0000000000..c248ea7d1a --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_cs_space_drm.c @@ -0,0 +1 @@ +../radeon/radeon_cs_space_drm.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_debug.c b/src/mesa/drivers/dri/r300/radeon_debug.c new file mode 120000 index 0000000000..c98c2e074c --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_debug.c @@ -0,0 +1 @@ +../radeon/radeon_debug.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_debug.h b/src/mesa/drivers/dri/r300/radeon_debug.h new file mode 120000 index 0000000000..bd8aa28e89 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_debug.h @@ -0,0 +1 @@ +../radeon/radeon_debug.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_dma.c b/src/mesa/drivers/dri/r300/radeon_dma.c new file mode 120000 index 0000000000..43be000625 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_dma.c @@ -0,0 +1 @@ +../radeon/radeon_dma.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_dma.h b/src/mesa/drivers/dri/r300/radeon_dma.h new file mode 120000 index 0000000000..82e50634e3 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_dma.h @@ -0,0 +1 @@ +../radeon/radeon_dma.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_fbo.c b/src/mesa/drivers/dri/r300/radeon_fbo.c new file mode 120000 index 0000000000..0d738d8d78 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_fbo.c @@ -0,0 +1 @@ +../radeon/radeon_fbo.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_lock.c b/src/mesa/drivers/dri/r300/radeon_lock.c new file mode 120000 index 0000000000..af4108a8e3 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_lock.c @@ -0,0 +1 @@ +../radeon/radeon_lock.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_lock.h b/src/mesa/drivers/dri/r300/radeon_lock.h new file mode 120000 index 0000000000..64bdf94ee7 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_lock.h @@ -0,0 +1 @@ +../radeon/radeon_lock.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c new file mode 100644 index 0000000000..9f9dec840b --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c @@ -0,0 +1,223 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_mesa_to_rc.h" + +#include "main/mtypes.h" +#include "shader/prog_instruction.h" +#include "shader/prog_parameter.h" + +#include "compiler/radeon_compiler.h" +#include "compiler/radeon_program.h" + + +static rc_opcode translate_opcode(gl_inst_opcode opcode) +{ + switch(opcode) { + case OPCODE_NOP: return RC_OPCODE_NOP; + case OPCODE_ABS: return RC_OPCODE_ABS; + case OPCODE_ADD: return RC_OPCODE_ADD; + case OPCODE_ARL: return RC_OPCODE_ARL; + case OPCODE_CMP: return RC_OPCODE_CMP; + case OPCODE_COS: return RC_OPCODE_COS; + case OPCODE_DDX: return RC_OPCODE_DDX; + case OPCODE_DDY: return RC_OPCODE_DDY; + case OPCODE_DP3: return RC_OPCODE_DP3; + case OPCODE_DP4: return RC_OPCODE_DP4; + case OPCODE_DPH: return RC_OPCODE_DPH; + case OPCODE_DST: return RC_OPCODE_DST; + case OPCODE_EX2: return RC_OPCODE_EX2; + case OPCODE_EXP: return RC_OPCODE_EXP; + case OPCODE_FLR: return RC_OPCODE_FLR; + case OPCODE_FRC: return RC_OPCODE_FRC; + case OPCODE_KIL: return RC_OPCODE_KIL; + case OPCODE_LG2: return RC_OPCODE_LG2; + case OPCODE_LIT: return RC_OPCODE_LIT; + case OPCODE_LOG: return RC_OPCODE_LOG; + case OPCODE_LRP: return RC_OPCODE_LRP; + case OPCODE_MAD: return RC_OPCODE_MAD; + case OPCODE_MAX: return RC_OPCODE_MAX; + case OPCODE_MIN: return RC_OPCODE_MIN; + case OPCODE_MOV: return RC_OPCODE_MOV; + case OPCODE_MUL: return RC_OPCODE_MUL; + case OPCODE_POW: return RC_OPCODE_POW; + case OPCODE_RCP: return RC_OPCODE_RCP; + case OPCODE_RSQ: return RC_OPCODE_RSQ; + case OPCODE_SCS: return RC_OPCODE_SCS; + case OPCODE_SEQ: return RC_OPCODE_SEQ; + case OPCODE_SFL: return RC_OPCODE_SFL; + case OPCODE_SGE: return RC_OPCODE_SGE; + case OPCODE_SGT: return RC_OPCODE_SGT; + case OPCODE_SIN: return RC_OPCODE_SIN; + case OPCODE_SLE: return RC_OPCODE_SLE; + case OPCODE_SLT: return RC_OPCODE_SLT; + case OPCODE_SNE: return RC_OPCODE_SNE; + case OPCODE_SUB: return RC_OPCODE_SUB; + case OPCODE_SWZ: return RC_OPCODE_SWZ; + case OPCODE_TEX: return RC_OPCODE_TEX; + case OPCODE_TXB: return RC_OPCODE_TXB; + case OPCODE_TXD: return RC_OPCODE_TXD; + case OPCODE_TXL: return RC_OPCODE_TXL; + case OPCODE_TXP: return RC_OPCODE_TXP; + case OPCODE_XPD: return RC_OPCODE_XPD; + default: return RC_OPCODE_ILLEGAL_OPCODE; + } +} + +static rc_saturate_mode translate_saturate(unsigned int saturate) +{ + switch(saturate) { + default: + case SATURATE_OFF: return RC_SATURATE_NONE; + case SATURATE_ZERO_ONE: return RC_SATURATE_ZERO_ONE; + } +} + +static rc_register_file translate_register_file(unsigned int file) +{ + switch(file) { + case PROGRAM_TEMPORARY: return RC_FILE_TEMPORARY; + case PROGRAM_INPUT: return RC_FILE_INPUT; + case PROGRAM_OUTPUT: return RC_FILE_OUTPUT; + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: return RC_FILE_CONSTANT; + case PROGRAM_ADDRESS: return RC_FILE_ADDRESS; + default: return RC_FILE_NONE; + } +} + +static void translate_srcreg(struct rc_src_register * dest, struct prog_src_register * src) +{ + dest->File = translate_register_file(src->File); + dest->Index = src->Index; + dest->RelAddr = src->RelAddr; + dest->Swizzle = src->Swizzle; + dest->Abs = src->Abs; + dest->Negate = src->Negate; +} + +static void translate_dstreg(struct rc_dst_register * dest, struct prog_dst_register * src) +{ + dest->File = translate_register_file(src->File); + dest->Index = src->Index; + dest->RelAddr = src->RelAddr; + dest->WriteMask = src->WriteMask; +} + +static rc_texture_target translate_tex_target(gl_texture_index target) +{ + switch(target) { + case TEXTURE_2D_ARRAY_INDEX: return RC_TEXTURE_2D_ARRAY; + case TEXTURE_1D_ARRAY_INDEX: return RC_TEXTURE_1D_ARRAY; + case TEXTURE_CUBE_INDEX: return RC_TEXTURE_CUBE; + case TEXTURE_3D_INDEX: return RC_TEXTURE_3D; + case TEXTURE_RECT_INDEX: return RC_TEXTURE_RECT; + default: + case TEXTURE_2D_INDEX: return RC_TEXTURE_2D; + case TEXTURE_1D_INDEX: return RC_TEXTURE_1D; + } +} + +static void translate_instruction(struct radeon_compiler * c, + struct rc_instruction * dest, struct prog_instruction * src) +{ + const struct rc_opcode_info * opcode; + unsigned int i; + + dest->U.I.Opcode = translate_opcode(src->Opcode); + if (dest->U.I.Opcode == RC_OPCODE_ILLEGAL_OPCODE) { + rc_error(c, "Unsupported opcode %i\n", src->Opcode); + return; + } + dest->U.I.SaturateMode = translate_saturate(src->SaturateMode); + + opcode = rc_get_opcode_info(dest->U.I.Opcode); + + for(i = 0; i < opcode->NumSrcRegs; ++i) + translate_srcreg(&dest->U.I.SrcReg[i], &src->SrcReg[i]); + + if (opcode->HasDstReg) + translate_dstreg(&dest->U.I.DstReg, &src->DstReg); + + if (opcode->HasTexture) { + dest->U.I.TexSrcUnit = src->TexSrcUnit; + dest->U.I.TexSrcTarget = translate_tex_target(src->TexSrcTarget); + dest->U.I.TexShadow = src->TexShadow; + } +} + +void radeon_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program) +{ + struct prog_instruction *source; + unsigned int i; + + for(source = program->Instructions; source->Opcode != OPCODE_END; ++source) { + struct rc_instruction * dest = rc_insert_new_instruction(c, c->Program.Instructions.Prev); + translate_instruction(c, dest, source); + } + + c->Program.ShadowSamplers = program->ShadowSamplers; + c->Program.InputsRead = program->InputsRead; + c->Program.OutputsWritten = program->OutputsWritten; + + int isNVProgram = 0; + + if (program->Target == GL_VERTEX_PROGRAM_ARB) { + struct gl_vertex_program * vp = (struct gl_vertex_program *) program; + isNVProgram = vp->IsNVProgram; + } + + if (isNVProgram) { + /* NV_vertex_program has a fixed-sized constant environment. + * This could be handled more efficiently for programs that + * do not use relative addressing. + */ + for(i = 0; i < 96; ++i) { + struct rc_constant constant; + + constant.Type = RC_CONSTANT_EXTERNAL; + constant.Size = 4; + constant.u.External = i; + + rc_constants_add(&c->Program.Constants, &constant); + } + } else { + for(i = 0; i < program->Parameters->NumParameters; ++i) { + struct rc_constant constant; + + constant.Type = RC_CONSTANT_EXTERNAL; + constant.Size = 4; + constant.u.External = i; + + rc_constants_add(&c->Program.Constants, &constant); + } + } +} diff --git a/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.h b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.h new file mode 100644 index 0000000000..9511a04f36 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_MESA_TO_RC_H +#define RADEON_MESA_TO_RC_H + +struct gl_program; +struct radeon_compiler; + +void radeon_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program); + +#endif /* RADEON_MESA_TO_RC_H */ diff --git a/src/mesa/drivers/dri/r300/radeon_mipmap_tree.c b/src/mesa/drivers/dri/r300/radeon_mipmap_tree.c new file mode 120000 index 0000000000..31c0cfbe94 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_mipmap_tree.c @@ -0,0 +1 @@ +../radeon/radeon_mipmap_tree.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_mipmap_tree.h b/src/mesa/drivers/dri/r300/radeon_mipmap_tree.h new file mode 120000 index 0000000000..254d50cf8c --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_mipmap_tree.h @@ -0,0 +1 @@ +../radeon/radeon_mipmap_tree.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_pixel_read.c b/src/mesa/drivers/dri/r300/radeon_pixel_read.c new file mode 120000 index 0000000000..3b03803126 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_pixel_read.c @@ -0,0 +1 @@ +../radeon/radeon_pixel_read.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_queryobj.c b/src/mesa/drivers/dri/r300/radeon_queryobj.c new file mode 120000 index 0000000000..1d6ebc1c48 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_queryobj.c @@ -0,0 +1 @@ +../radeon/radeon_queryobj.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_queryobj.h b/src/mesa/drivers/dri/r300/radeon_queryobj.h new file mode 120000 index 0000000000..8f6f842b0a --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_queryobj.h @@ -0,0 +1 @@ +../radeon/radeon_queryobj.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_screen.c b/src/mesa/drivers/dri/r300/radeon_screen.c new file mode 120000 index 0000000000..86161118dd --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_screen.c @@ -0,0 +1 @@ +../radeon/radeon_screen.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_screen.h b/src/mesa/drivers/dri/r300/radeon_screen.h new file mode 120000 index 0000000000..23bb6bd459 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_screen.h @@ -0,0 +1 @@ +../radeon/radeon_screen.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c new file mode 120000 index 0000000000..232868c4c9 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_span.c @@ -0,0 +1 @@ +../radeon/radeon_span.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_span.h b/src/mesa/drivers/dri/r300/radeon_span.h new file mode 120000 index 0000000000..f9d634508c --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_span.h @@ -0,0 +1 @@ +../radeon/radeon_span.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_tex_copy.c b/src/mesa/drivers/dri/r300/radeon_tex_copy.c new file mode 120000 index 0000000000..dfa5ba34e6 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_tex_copy.c @@ -0,0 +1 @@ +../radeon/radeon_tex_copy.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_tex_getimage.c b/src/mesa/drivers/dri/r300/radeon_tex_getimage.c new file mode 120000 index 0000000000..d9836d7326 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_tex_getimage.c @@ -0,0 +1 @@ +../radeon/radeon_tex_getimage.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_texture.c b/src/mesa/drivers/dri/r300/radeon_texture.c new file mode 120000 index 0000000000..a822710915 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_texture.c @@ -0,0 +1 @@ +../radeon/radeon_texture.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_texture.h b/src/mesa/drivers/dri/r300/radeon_texture.h new file mode 120000 index 0000000000..17fac3d5ea --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_texture.h @@ -0,0 +1 @@ +../radeon/radeon_texture.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_tile.c b/src/mesa/drivers/dri/r300/radeon_tile.c new file mode 120000 index 0000000000..d4bfe27da6 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_tile.c @@ -0,0 +1 @@ +../radeon/radeon_tile.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/radeon_tile.h b/src/mesa/drivers/dri/r300/radeon_tile.h new file mode 120000 index 0000000000..31074c581e --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_tile.h @@ -0,0 +1 @@ +../radeon/radeon_tile.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/server/radeon.h b/src/mesa/drivers/dri/r300/server/radeon.h new file mode 120000 index 0000000000..81274a54f1 --- /dev/null +++ b/src/mesa/drivers/dri/r300/server/radeon.h @@ -0,0 +1 @@ +../../radeon/server/radeon.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/server/radeon_dri.h b/src/mesa/drivers/dri/r300/server/radeon_dri.h new file mode 120000 index 0000000000..27c591d3c9 --- /dev/null +++ b/src/mesa/drivers/dri/r300/server/radeon_dri.h @@ -0,0 +1 @@ +../../radeon/server/radeon_dri.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/server/radeon_macros.h b/src/mesa/drivers/dri/r300/server/radeon_macros.h new file mode 120000 index 0000000000..c56cd735b8 --- /dev/null +++ b/src/mesa/drivers/dri/r300/server/radeon_macros.h @@ -0,0 +1 @@ +../../radeon/server/radeon_macros.h
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/server/radeon_reg.h b/src/mesa/drivers/dri/r300/server/radeon_reg.h new file mode 120000 index 0000000000..e2349dcb68 --- /dev/null +++ b/src/mesa/drivers/dri/r300/server/radeon_reg.h @@ -0,0 +1 @@ +../../radeon/server/radeon_reg.h
\ No newline at end of file |