From 5da8289e9cc086ac9c010ee41d0c06161c240dbd Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 17 Mar 2008 19:05:15 +1000 Subject: r500: fragprog --- src/mesa/drivers/dri/r300/Makefile | 1 + src/mesa/drivers/dri/r300/r500_fragprog.c | 2476 +++++++++++++++++++++++++++++ src/mesa/drivers/dri/r300/r500_fragprog.h | 104 ++ 3 files changed, 2581 insertions(+) create mode 100644 src/mesa/drivers/dri/r300/r500_fragprog.c create mode 100644 src/mesa/drivers/dri/r300/r500_fragprog.h (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 44248964fd..5b2bd0bc2b 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -39,6 +39,7 @@ DRIVER_SOURCES = \ r300_texstate.c \ r300_vertprog.c \ r300_fragprog.c \ + r500_fragprog.c \ r300_shader.c \ r300_emit.c \ r300_swtcl.c \ diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c new file mode 100644 index 0000000000..3638a94380 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -0,0 +1,2476 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * \author Ben Skeggs + * + * \author Jerome Glisse + * + * \todo Depth write, WPOS/FOGC inputs + * + * \todo FogOption + * + * \todo Verify results of opcodes for accuracy, I've only checked them in + * specific cases. + */ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#include "shader/prog_instruction.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" + +#include "r300_context.h" +#include "r300_fragprog.h" +#include "r300_reg.h" +#include "r300_state.h" + +/* + * Usefull macros and values + */ +#define ERROR(fmt, args...) do { \ + fprintf(stderr, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + fp->error = GL_TRUE; \ + } while(0) + +#define PFS_INVAL 0xFFFFFFFF +#define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs + +#define SWIZZLE_XYZ 0 +#define SWIZZLE_XXX 1 +#define SWIZZLE_YYY 2 +#define SWIZZLE_ZZZ 3 +#define SWIZZLE_WWW 4 +#define SWIZZLE_YZX 5 +#define SWIZZLE_ZXY 6 +#define SWIZZLE_WZY 7 +#define SWIZZLE_111 8 +#define SWIZZLE_000 9 +#define SWIZZLE_HHH 10 + +#define swizzle(r, x, y, z, w) do_swizzle(fp, r, \ + ((SWIZZLE_##x<<0)| \ + (SWIZZLE_##y<<3)| \ + (SWIZZLE_##z<<6)| \ + (SWIZZLE_##w<<9)), \ + 0) + +#define REG_TYPE_INPUT 0 +#define REG_TYPE_OUTPUT 1 +#define REG_TYPE_TEMP 2 +#define REG_TYPE_CONST 3 + +#define REG_TYPE_SHIFT 0 +#define REG_INDEX_SHIFT 2 +#define REG_VSWZ_SHIFT 8 +#define REG_SSWZ_SHIFT 13 +#define REG_NEGV_SHIFT 18 +#define REG_NEGS_SHIFT 19 +#define REG_ABS_SHIFT 20 +#define REG_NO_USE_SHIFT 21 // Hack for refcounting +#define REG_VALID_SHIFT 22 // Does the register contain a defined value? +#define REG_BUILTIN_SHIFT 23 // Is it a builtin (like all zero/all one)? + +#define REG_TYPE_MASK (0x03 << REG_TYPE_SHIFT) +#define REG_INDEX_MASK (0x3F << REG_INDEX_SHIFT) +#define REG_VSWZ_MASK (0x1F << REG_VSWZ_SHIFT) +#define REG_SSWZ_MASK (0x1F << REG_SSWZ_SHIFT) +#define REG_NEGV_MASK (0x01 << REG_NEGV_SHIFT) +#define REG_NEGS_MASK (0x01 << REG_NEGS_SHIFT) +#define REG_ABS_MASK (0x01 << REG_ABS_SHIFT) +#define REG_NO_USE_MASK (0x01 << REG_NO_USE_SHIFT) +#define REG_VALID_MASK (0x01 << REG_VALID_SHIFT) +#define REG_BUILTIN_MASK (0x01 << REG_BUILTIN_SHIFT) + +#define REG(type, index, vswz, sswz, nouse, valid, builtin) \ + (((type << REG_TYPE_SHIFT) & REG_TYPE_MASK) | \ + ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK) | \ + ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK) | \ + ((valid << REG_VALID_SHIFT) & REG_VALID_MASK) | \ + ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK) | \ + ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK) | \ + ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK)) +#define REG_GET_TYPE(reg) \ + ((reg & REG_TYPE_MASK) >> REG_TYPE_SHIFT) +#define REG_GET_INDEX(reg) \ + ((reg & REG_INDEX_MASK) >> REG_INDEX_SHIFT) +#define REG_GET_VSWZ(reg) \ + ((reg & REG_VSWZ_MASK) >> REG_VSWZ_SHIFT) +#define REG_GET_SSWZ(reg) \ + ((reg & REG_SSWZ_MASK) >> REG_SSWZ_SHIFT) +#define REG_GET_NO_USE(reg) \ + ((reg & REG_NO_USE_MASK) >> REG_NO_USE_SHIFT) +#define REG_GET_VALID(reg) \ + ((reg & REG_VALID_MASK) >> REG_VALID_SHIFT) +#define REG_GET_BUILTIN(reg) \ + ((reg & REG_BUILTIN_MASK) >> REG_BUILTIN_SHIFT) +#define REG_SET_TYPE(reg, type) \ + reg = ((reg & ~REG_TYPE_MASK) | \ + ((type << REG_TYPE_SHIFT) & REG_TYPE_MASK)) +#define REG_SET_INDEX(reg, index) \ + reg = ((reg & ~REG_INDEX_MASK) | \ + ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK)) +#define REG_SET_VSWZ(reg, vswz) \ + reg = ((reg & ~REG_VSWZ_MASK) | \ + ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK)) +#define REG_SET_SSWZ(reg, sswz) \ + reg = ((reg & ~REG_SSWZ_MASK) | \ + ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK)) +#define REG_SET_NO_USE(reg, nouse) \ + reg = ((reg & ~REG_NO_USE_MASK) | \ + ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK)) +#define REG_SET_VALID(reg, valid) \ + reg = ((reg & ~REG_VALID_MASK) | \ + ((valid << REG_VALID_SHIFT) & REG_VALID_MASK)) +#define REG_SET_BUILTIN(reg, builtin) \ + reg = ((reg & ~REG_BUILTIN_MASK) | \ + ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK)) +#define REG_ABS(reg) \ + reg = (reg | REG_ABS_MASK) +#define REG_NEGV(reg) \ + reg = (reg | REG_NEGV_MASK) +#define REG_NEGS(reg) \ + reg = (reg | REG_NEGS_MASK) + +/* + * Datas structures for fragment program generation + */ + +/* description of r300 native hw instructions */ +static const struct { + const char *name; + int argc; + int v_op; + int s_op; +} r300_fpop[] = { + /* *INDENT-OFF* */ + {"MAD", 3, R300_FPI0_OUTC_MAD, R300_FPI2_OUTA_MAD}, + {"DP3", 2, R300_FPI0_OUTC_DP3, R300_FPI2_OUTA_DP4}, + {"DP4", 2, R300_FPI0_OUTC_DP4, R300_FPI2_OUTA_DP4}, + {"MIN", 2, R300_FPI0_OUTC_MIN, R300_FPI2_OUTA_MIN}, + {"MAX", 2, R300_FPI0_OUTC_MAX, R300_FPI2_OUTA_MAX}, + {"CMP", 3, R300_FPI0_OUTC_CMP, R300_FPI2_OUTA_CMP}, + {"FRC", 1, R300_FPI0_OUTC_FRC, R300_FPI2_OUTA_FRC}, + {"EX2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_EX2}, + {"LG2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_LG2}, + {"RCP", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RCP}, + {"RSQ", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RSQ}, + {"REPL_ALPHA", 1, R300_FPI0_OUTC_REPL_ALPHA, PFS_INVAL}, + {"CMPH", 3, R300_FPI0_OUTC_CMPH, PFS_INVAL}, + /* *INDENT-ON* */ +}; + +/* vector swizzles r300 can support natively, with a couple of + * cases we handle specially + * + * REG_VSWZ/REG_SSWZ is an index into this table + */ + +/* mapping from SWIZZLE_* to r300 native values for scalar insns */ +#define SWIZZLE_HALF 6 + +#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \ + SWIZZLE_##y, \ + SWIZZLE_##z, \ + SWIZZLE_ZERO)) +/* native swizzles */ +static const struct r300_pfs_swizzle { + GLuint hash; /* swizzle value this matches */ + GLuint base; /* base value for hw swizzle */ + GLuint stride; /* difference in base between arg0/1/2 */ + GLuint flags; +} v_swiz[] = { + /* *INDENT-OFF* */ + {MAKE_SWZ3(X, Y, Z), R300_FPI0_ARGC_SRC0C_XYZ, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(X, X, X), R300_FPI0_ARGC_SRC0C_XXX, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(Y, Y, Y), R300_FPI0_ARGC_SRC0C_YYY, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(Z, Z, Z), R300_FPI0_ARGC_SRC0C_ZZZ, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(W, W, W), R300_FPI0_ARGC_SRC0A, 1, SLOT_SRC_SCALAR}, + {MAKE_SWZ3(Y, Z, X), R300_FPI0_ARGC_SRC0C_YZX, 1, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(Z, X, Y), R300_FPI0_ARGC_SRC0C_ZXY, 1, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(W, Z, Y), R300_FPI0_ARGC_SRC0CA_WZY, 1, SLOT_SRC_BOTH}, + {MAKE_SWZ3(ONE, ONE, ONE), R300_FPI0_ARGC_ONE, 0, 0}, + {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_FPI0_ARGC_ZERO, 0, 0}, + {MAKE_SWZ3(HALF, HALF, HALF), R300_FPI0_ARGC_HALF, 0, 0}, + {PFS_INVAL, 0, 0, 0}, + /* *INDENT-ON* */ +}; + +/* used during matching of non-native swizzles */ +#define SWZ_X_MASK (7 << 0) +#define SWZ_Y_MASK (7 << 3) +#define SWZ_Z_MASK (7 << 6) +#define SWZ_W_MASK (7 << 9) +static const struct { + GLuint hash; /* used to mask matching swizzle components */ + int mask; /* actual outmask */ + int count; /* count of components matched */ +} s_mask[] = { + /* *INDENT-OFF* */ + {SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK, 1 | 2 | 4, 3}, + {SWZ_X_MASK | SWZ_Y_MASK, 1 | 2, 2}, + {SWZ_X_MASK | SWZ_Z_MASK, 1 | 4, 2}, + {SWZ_Y_MASK | SWZ_Z_MASK, 2 | 4, 2}, + {SWZ_X_MASK, 1, 1}, + {SWZ_Y_MASK, 2, 1}, + {SWZ_Z_MASK, 4, 1}, + {PFS_INVAL, PFS_INVAL, PFS_INVAL} + /* *INDENT-ON* */ +}; + +static const struct { + int base; /* hw value of swizzle */ + int stride; /* difference between SRC0/1/2 */ + GLuint flags; +} s_swiz[] = { + /* *INDENT-OFF* */ + {R300_FPI2_ARGA_SRC0C_X, 3, SLOT_SRC_VECTOR}, + {R300_FPI2_ARGA_SRC0C_Y, 3, SLOT_SRC_VECTOR}, + {R300_FPI2_ARGA_SRC0C_Z, 3, SLOT_SRC_VECTOR}, + {R300_FPI2_ARGA_SRC0A, 1, SLOT_SRC_SCALAR}, + {R300_FPI2_ARGA_ZERO, 0, 0}, + {R300_FPI2_ARGA_ONE, 0, 0}, + {R300_FPI2_ARGA_HALF, 0, 0} + /* *INDENT-ON* */ +}; + +/* boiler-plate reg, for convenience */ +static const GLuint undef = REG(REG_TYPE_TEMP, + 0, + SWIZZLE_XYZ, + SWIZZLE_W, + GL_FALSE, + GL_FALSE, + GL_FALSE); + +/* constant one source */ +static const GLuint pfs_one = REG(REG_TYPE_CONST, + 0, + SWIZZLE_111, + SWIZZLE_ONE, + GL_FALSE, + GL_TRUE, + GL_TRUE); + +/* constant half source */ +static const GLuint pfs_half = REG(REG_TYPE_CONST, + 0, + SWIZZLE_HHH, + SWIZZLE_HALF, + GL_FALSE, + GL_TRUE, + GL_TRUE); + +/* constant zero source */ +static const GLuint pfs_zero = REG(REG_TYPE_CONST, + 0, + SWIZZLE_000, + SWIZZLE_ZERO, + GL_FALSE, + GL_TRUE, + GL_TRUE); + +/* + * Common functions prototypes + */ +static void dump_program(struct r300_fragment_program *fp); +static void emit_arith(struct r300_fragment_program *fp, int op, + GLuint dest, int mask, + GLuint src0, GLuint src1, GLuint src2, int flags); + +/** + * Get an R300 temporary that can be written to in the given slot. + */ +static int get_hw_temp(struct r300_fragment_program *fp, int slot) +{ + COMPILE_STATE; + int r; + + for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) { + if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= slot) + break; + } + + if (r >= PFS_NUM_TEMP_REGS) { + ERROR("Out of hardware temps\n"); + return 0; + } + // Reserved is used to avoid the following scenario: + // R300 temporary X is first assigned to Mesa temporary Y during vector ops + // R300 temporary X is then assigned to Mesa temporary Z for further vector ops + // Then scalar ops on Mesa temporary Z are emitted and move back in time + // to overwrite the value of temporary Y. + // End scenario. + cs->hwtemps[r].reserved = cs->hwtemps[r].free; + cs->hwtemps[r].free = -1; + + // Reset to some value that won't mess things up when the user + // tries to read from a temporary that hasn't been assigned a value yet. + // In the normal case, vector_valid and scalar_valid should be set to + // a sane value by the first emit that writes to this temporary. + cs->hwtemps[r].vector_valid = 0; + cs->hwtemps[r].scalar_valid = 0; + + if (r > fp->max_temp_idx) + fp->max_temp_idx = r; + + return r; +} + +/** + * Get an R300 temporary that will act as a TEX destination register. + */ +static int get_hw_temp_tex(struct r300_fragment_program *fp) +{ + COMPILE_STATE; + int r; + + for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) { + if (cs->used_in_node & (1 << r)) + continue; + + // Note: Be very careful here + if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= 0) + break; + } + + if (r >= PFS_NUM_TEMP_REGS) + return get_hw_temp(fp, 0); /* Will cause an indirection */ + + cs->hwtemps[r].reserved = cs->hwtemps[r].free; + cs->hwtemps[r].free = -1; + + // Reset to some value that won't mess things up when the user + // tries to read from a temporary that hasn't been assigned a value yet. + // In the normal case, vector_valid and scalar_valid should be set to + // a sane value by the first emit that writes to this temporary. + cs->hwtemps[r].vector_valid = cs->nrslots; + cs->hwtemps[r].scalar_valid = cs->nrslots; + + if (r > fp->max_temp_idx) + fp->max_temp_idx = r; + + return r; +} + +/** + * Mark the given hardware register as free. + */ +static void free_hw_temp(struct r300_fragment_program *fp, int idx) +{ + COMPILE_STATE; + + // Be very careful here. Consider sequences like + // MAD r0, r1,r2,r3 + // TEX r4, ... + // The TEX instruction may be moved in front of the MAD instruction + // due to the way nodes work. We don't want to alias r1 and r4 in + // this case. + // I'm certain the register allocation could be further sanitized, + // but it's tricky because of stuff that can happen inside emit_tex + // and emit_arith. + cs->hwtemps[idx].free = cs->nrslots + 1; +} + +/** + * Create a new Mesa temporary register. + */ +static GLuint get_temp_reg(struct r300_fragment_program *fp) +{ + COMPILE_STATE; + GLuint r = undef; + GLuint index; + + index = ffs(~cs->temp_in_use); + if (!index) { + ERROR("Out of program temps\n"); + return r; + } + + cs->temp_in_use |= (1 << --index); + cs->temps[index].refcount = 0xFFFFFFFF; + cs->temps[index].reg = -1; + + REG_SET_TYPE(r, REG_TYPE_TEMP); + REG_SET_INDEX(r, index); + REG_SET_VALID(r, GL_TRUE); + return r; +} + +/** + * Create a new Mesa temporary register that will act as the destination + * register for a texture read. + */ +static GLuint get_temp_reg_tex(struct r300_fragment_program *fp) +{ + COMPILE_STATE; + GLuint r = undef; + GLuint index; + + index = ffs(~cs->temp_in_use); + if (!index) { + ERROR("Out of program temps\n"); + return r; + } + + cs->temp_in_use |= (1 << --index); + cs->temps[index].refcount = 0xFFFFFFFF; + cs->temps[index].reg = get_hw_temp_tex(fp); + + REG_SET_TYPE(r, REG_TYPE_TEMP); + REG_SET_INDEX(r, index); + REG_SET_VALID(r, GL_TRUE); + return r; +} + +/** + * Free a Mesa temporary and the associated R300 temporary. + */ +static void free_temp(struct r300_fragment_program *fp, GLuint r) +{ + COMPILE_STATE; + GLuint index = REG_GET_INDEX(r); + + if (!(cs->temp_in_use & (1 << index))) + return; + + if (REG_GET_TYPE(r) == REG_TYPE_TEMP) { + free_hw_temp(fp, cs->temps[index].reg); + cs->temps[index].reg = -1; + cs->temp_in_use &= ~(1 << index); + } else if (REG_GET_TYPE(r) == REG_TYPE_INPUT) { + free_hw_temp(fp, cs->inputs[index].reg); + cs->inputs[index].reg = -1; + } +} + +/** + * Emit a hardware constant/parameter. + * + * \p cp Stable pointer to an array of 4 floats. + * The pointer must be stable in the sense that it remains to be valid + * and hold the contents of the constant/parameter throughout the lifetime + * of the fragment program (actually, up until the next time the fragment + * program is translated). + */ +static GLuint emit_const4fv(struct r300_fragment_program *fp, + const GLfloat * cp) +{ + GLuint reg = undef; + int index; + + for (index = 0; index < fp->const_nr; ++index) { + if (fp->constant[index] == cp) + break; + } + + if (index >= fp->const_nr) { + if (index >= PFS_NUM_CONST_REGS) { + ERROR("Out of hw constants!\n"); + return reg; + } + + fp->const_nr++; + fp->constant[index] = cp; + } + + REG_SET_TYPE(reg, REG_TYPE_CONST); + REG_SET_INDEX(reg, index); + REG_SET_VALID(reg, GL_TRUE); + return reg; +} + +static inline GLuint negate(GLuint r) +{ + REG_NEGS(r); + REG_NEGV(r); + return r; +} + +/* Hack, to prevent clobbering sources used multiple times when + * emulating non-native instructions + */ +static inline GLuint keep(GLuint r) +{ + REG_SET_NO_USE(r, GL_TRUE); + return r; +} + +static inline GLuint absolute(GLuint r) +{ + REG_ABS(r); + return r; +} + +static int swz_native(struct r300_fragment_program *fp, + GLuint src, GLuint * r, GLuint arbneg) +{ + /* Native swizzle, handle negation */ + src = (src & ~REG_NEGS_MASK) | (((arbneg >> 3) & 1) << REG_NEGS_SHIFT); + + if ((arbneg & 0x7) == 0x0) { + src = src & ~REG_NEGV_MASK; + *r = src; + } else if ((arbneg & 0x7) == 0x7) { + src |= REG_NEGV_MASK; + *r = src; + } else { + if (!REG_GET_VALID(*r)) + *r = get_temp_reg(fp); + src |= REG_NEGV_MASK; + emit_arith(fp, + PFS_OP_MAD, + *r, arbneg & 0x7, keep(src), pfs_one, pfs_zero, 0); + src = src & ~REG_NEGV_MASK; + emit_arith(fp, + PFS_OP_MAD, + *r, + (arbneg ^ 0x7) | WRITEMASK_W, + src, pfs_one, pfs_zero, 0); + } + + return 3; +} + +static int swz_emit_partial(struct r300_fragment_program *fp, + GLuint src, + GLuint * r, int mask, int mc, GLuint arbneg) +{ + GLuint tmp; + GLuint wmask = 0; + + if (!REG_GET_VALID(*r)) + *r = get_temp_reg(fp); + + /* A partial match, VSWZ/mask define what parts of the + * desired swizzle we match + */ + if (mc + s_mask[mask].count == 3) { + wmask = WRITEMASK_W; + src |= ((arbneg >> 3) & 1) << REG_NEGS_SHIFT; + } + + tmp = arbneg & s_mask[mask].mask; + if (tmp) { + tmp = tmp ^ s_mask[mask].mask; + if (tmp) { + emit_arith(fp, + PFS_OP_MAD, + *r, + arbneg & s_mask[mask].mask, + keep(src) | REG_NEGV_MASK, + pfs_one, pfs_zero, 0); + if (!wmask) { + REG_SET_NO_USE(src, GL_TRUE); + } else { + REG_SET_NO_USE(src, GL_FALSE); + } + emit_arith(fp, + PFS_OP_MAD, + *r, tmp | wmask, src, pfs_one, pfs_zero, 0); + } else { + if (!wmask) { + REG_SET_NO_USE(src, GL_TRUE); + } else { + REG_SET_NO_USE(src, GL_FALSE); + } + emit_arith(fp, + PFS_OP_MAD, + *r, + (arbneg & s_mask[mask].mask) | wmask, + src | REG_NEGV_MASK, pfs_one, pfs_zero, 0); + } + } else { + if (!wmask) { + REG_SET_NO_USE(src, GL_TRUE); + } else { + REG_SET_NO_USE(src, GL_FALSE); + } + emit_arith(fp, PFS_OP_MAD, + *r, + s_mask[mask].mask | wmask, + src, pfs_one, pfs_zero, 0); + } + + return s_mask[mask].count; +} + +static GLuint do_swizzle(struct r300_fragment_program *fp, + GLuint src, GLuint arbswz, GLuint arbneg) +{ + GLuint r = undef; + GLuint vswz; + int c_mask = 0; + int v_match = 0; + + /* If swizzling from something without an XYZW native swizzle, + * emit result to a temp, and do new swizzle from the temp. + */ +#if 0 + if (REG_GET_VSWZ(src) != SWIZZLE_XYZ || REG_GET_SSWZ(src) != SWIZZLE_W) { + GLuint temp = get_temp_reg(fp); + emit_arith(fp, + PFS_OP_MAD, + temp, WRITEMASK_XYZW, src, pfs_one, pfs_zero, 0); + src = temp; + } +#endif + + if (REG_GET_VSWZ(src) != SWIZZLE_XYZ || REG_GET_SSWZ(src) != SWIZZLE_W) { + GLuint vsrcswz = + (v_swiz[REG_GET_VSWZ(src)]. + hash & (SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK)) | + REG_GET_SSWZ(src) << 9; + GLint i; + + GLuint newswz = 0; + GLuint offset; + for (i = 0; i < 4; ++i) { + offset = GET_SWZ(arbswz, i); + + newswz |= + (offset <= 3) ? GET_SWZ(vsrcswz, + offset) << i * + 3 : offset << i * 3; + } + + arbswz = newswz & (SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK); + REG_SET_SSWZ(src, GET_SWZ(newswz, 3)); + } else { + /* set scalar swizzling */ + REG_SET_SSWZ(src, GET_SWZ(arbswz, 3)); + + } + do { + vswz = REG_GET_VSWZ(src); + do { + int chash; + + REG_SET_VSWZ(src, vswz); + chash = v_swiz[REG_GET_VSWZ(src)].hash & + s_mask[c_mask].hash; + + if (chash == (arbswz & s_mask[c_mask].hash)) { + if (s_mask[c_mask].count == 3) { + v_match += swz_native(fp, + src, &r, arbneg); + } else { + v_match += swz_emit_partial(fp, + src, + &r, + c_mask, + v_match, + arbneg); + } + + if (v_match == 3) + return r; + + /* Fill with something invalid.. all 0's was + * wrong before, matched SWIZZLE_X. So all + * 1's will be okay for now + */ + arbswz |= (PFS_INVAL & s_mask[c_mask].hash); + } + } while (v_swiz[++vswz].hash != PFS_INVAL); + REG_SET_VSWZ(src, SWIZZLE_XYZ); + } while (s_mask[++c_mask].hash != PFS_INVAL); + + ERROR("should NEVER get here\n"); + return r; +} + +static GLuint t_src(struct r300_fragment_program *fp, + struct prog_src_register fpsrc) +{ + GLuint r = undef; + + switch (fpsrc.File) { + case PROGRAM_TEMPORARY: + REG_SET_INDEX(r, fpsrc.Index); + REG_SET_VALID(r, GL_TRUE); + REG_SET_TYPE(r, REG_TYPE_TEMP); + break; + case PROGRAM_INPUT: + REG_SET_INDEX(r, fpsrc.Index); + REG_SET_VALID(r, GL_TRUE); + REG_SET_TYPE(r, REG_TYPE_INPUT); + break; + case PROGRAM_LOCAL_PARAM: + r = emit_const4fv(fp, + fp->mesa_program.Base.LocalParams[fpsrc. + Index]); + break; + case PROGRAM_ENV_PARAM: + r = emit_const4fv(fp, + fp->ctx->FragmentProgram.Parameters[fpsrc. + Index]); + break; + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + r = emit_const4fv(fp, + fp->mesa_program.Base.Parameters-> + ParameterValues[fpsrc.Index]); + break; + default: + ERROR("unknown SrcReg->File %x\n", fpsrc.File); + return r; + } + + /* no point swizzling ONE/ZERO/HALF constants... */ + if (REG_GET_VSWZ(r) < SWIZZLE_111 || REG_GET_SSWZ(r) < SWIZZLE_ZERO) + r = do_swizzle(fp, r, fpsrc.Swizzle, fpsrc.NegateBase); + return r; +} + +static GLuint t_scalar_src(struct r300_fragment_program *fp, + struct prog_src_register fpsrc) +{ + struct prog_src_register src = fpsrc; + int sc = GET_SWZ(fpsrc.Swizzle, 0); /* X */ + + src.Swizzle = ((sc << 0) | (sc << 3) | (sc << 6) | (sc << 9)); + + return t_src(fp, src); +} + +static GLuint t_dst(struct r300_fragment_program *fp, + struct prog_dst_register dest) +{ + GLuint r = undef; + + switch (dest.File) { + case PROGRAM_TEMPORARY: + REG_SET_INDEX(r, dest.Index); + REG_SET_VALID(r, GL_TRUE); + REG_SET_TYPE(r, REG_TYPE_TEMP); + return r; + case PROGRAM_OUTPUT: + REG_SET_TYPE(r, REG_TYPE_OUTPUT); + switch (dest.Index) { + case FRAG_RESULT_COLR: + case FRAG_RESULT_DEPR: + REG_SET_INDEX(r, dest.Index); + REG_SET_VALID(r, GL_TRUE); + return r; + default: + ERROR("Bad DstReg->Index 0x%x\n", dest.Index); + return r; + } + default: + ERROR("Bad DstReg->File 0x%x\n", dest.File); + return r; + } +} + +static int t_hw_src(struct r300_fragment_program *fp, GLuint src, GLboolean tex) +{ + COMPILE_STATE; + int idx; + int index = REG_GET_INDEX(src); + + switch (REG_GET_TYPE(src)) { + case REG_TYPE_TEMP: + /* NOTE: if reg==-1 here, a source is being read that + * hasn't been written to. Undefined results. + */ + if (cs->temps[index].reg == -1) + cs->temps[index].reg = get_hw_temp(fp, cs->nrslots); + + idx = cs->temps[index].reg; + + if (!REG_GET_NO_USE(src) && (--cs->temps[index].refcount == 0)) + free_temp(fp, src); + break; + case REG_TYPE_INPUT: + idx = cs->inputs[index].reg; + + if (!REG_GET_NO_USE(src) && (--cs->inputs[index].refcount == 0)) + free_hw_temp(fp, cs->inputs[index].reg); + break; + case REG_TYPE_CONST: + return (index | SRC_CONST); + default: + ERROR("Invalid type for source reg\n"); + return (0 | SRC_CONST); + } + + if (!tex) + cs->used_in_node |= (1 << idx); + + return idx; +} + +static int t_hw_dst(struct r300_fragment_program *fp, + GLuint dest, GLboolean tex, int slot) +{ + COMPILE_STATE; + int idx; + GLuint index = REG_GET_INDEX(dest); + assert(REG_GET_VALID(dest)); + + switch (REG_GET_TYPE(dest)) { + case REG_TYPE_TEMP: + if (cs->temps[REG_GET_INDEX(dest)].reg == -1) { + if (!tex) { + cs->temps[index].reg = get_hw_temp(fp, slot); + } else { + cs->temps[index].reg = get_hw_temp_tex(fp); + } + } + idx = cs->temps[index].reg; + + if (!REG_GET_NO_USE(dest) && (--cs->temps[index].refcount == 0)) + free_temp(fp, dest); + + cs->dest_in_node |= (1 << idx); + cs->used_in_node |= (1 << idx); + break; + case REG_TYPE_OUTPUT: + switch (index) { + case FRAG_RESULT_COLR: + fp->node[fp->cur_node].flags |= + R300_PFS_NODE_OUTPUT_COLOR; + break; + case FRAG_RESULT_DEPR: + fp->node[fp->cur_node].flags |= + R300_PFS_NODE_OUTPUT_DEPTH; + break; + } + return index; + break; + default: + ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest)); + return 0; + } + + return idx; +} + +static void emit_nop(struct r300_fragment_program *fp) +{ + COMPILE_STATE; + + if (cs->nrslots >= PFS_MAX_ALU_INST) { + ERROR("Out of ALU instruction slots\n"); + return; + } + + fp->alu.inst[cs->nrslots].inst0 = NOP_INST0; + fp->alu.inst[cs->nrslots].inst1 = NOP_INST1; + fp->alu.inst[cs->nrslots].inst2 = NOP_INST2; + fp->alu.inst[cs->nrslots].inst3 = NOP_INST3; + cs->nrslots++; +} + +static void emit_tex(struct r300_fragment_program *fp, + struct prog_instruction *fpi, int opcode) +{ + COMPILE_STATE; + GLuint coord = t_src(fp, fpi->SrcReg[0]); + GLuint dest = undef, rdest = undef; + GLuint din, uin; + int unit = fpi->TexSrcUnit; + int hwsrc, hwdest; + GLuint tempreg = 0; + + uin = cs->used_in_node; + din = cs->dest_in_node; + + /* Resolve source/dest to hardware registers */ + if (opcode != R300_FPITX_OP_KIL) { + if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) { + /** + * Hardware uses [0..1]x[0..1] range for rectangle textures + * instead of [0..Width]x[0..Height]. + * Add a scaling instruction. + * + * \todo Refactor this once we have proper rewriting/optimization + * support for programs. + */ + gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, + 0 + }; + int factor_index; + GLuint factorreg; + + tokens[2] = unit; + factor_index = + _mesa_add_state_reference(fp->mesa_program.Base. + Parameters, tokens); + factorreg = + emit_const4fv(fp, + fp->mesa_program.Base.Parameters-> + ParameterValues[factor_index]); + tempreg = keep(get_temp_reg(fp)); + + emit_arith(fp, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, + coord, factorreg, pfs_zero, 0); + + /* Ensure correct node indirection */ + uin = cs->used_in_node; + din = cs->dest_in_node; + + hwsrc = t_hw_src(fp, tempreg, GL_TRUE); + } else { + hwsrc = t_hw_src(fp, coord, GL_TRUE); + } + + dest = t_dst(fp, fpi->DstReg); + + /* r300 doesn't seem to be able to do TEX->output reg */ + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { + rdest = dest; + dest = get_temp_reg_tex(fp); + } else if (fpi->DstReg.WriteMask != WRITEMASK_XYZW) { + /* in case write mask isn't XYZW */ + rdest = dest; + dest = get_temp_reg_tex(fp); + } + hwdest = + t_hw_dst(fp, dest, GL_TRUE, + fp->node[fp->cur_node].alu_offset); + + /* Use a temp that hasn't been used in this node, rather + * than causing an indirection + */ + if (uin & (1 << hwdest)) { + free_hw_temp(fp, hwdest); + hwdest = get_hw_temp_tex(fp); + cs->temps[REG_GET_INDEX(dest)].reg = hwdest; + } + } else { + hwdest = 0; + unit = 0; + hwsrc = t_hw_src(fp, coord, GL_TRUE); + } + + /* Indirection if source has been written in this node, or if the + * dest has been read/written in this node + */ + if ((REG_GET_TYPE(coord) != REG_TYPE_CONST && + (din & (1 << hwsrc))) || (uin & (1 << hwdest))) { + + /* Finish off current node */ + if (fp->node[fp->cur_node].alu_offset == cs->nrslots) + emit_nop(fp); + + fp->node[fp->cur_node].alu_end = + cs->nrslots - fp->node[fp->cur_node].alu_offset - 1; + assert(fp->node[fp->cur_node].alu_end >= 0); + + if (++fp->cur_node >= PFS_MAX_TEX_INDIRECT) { + ERROR("too many levels of texture indirection\n"); + return; + } + + /* Start new node */ + fp->node[fp->cur_node].tex_offset = fp->tex.length; + fp->node[fp->cur_node].alu_offset = cs->nrslots; + fp->node[fp->cur_node].tex_end = -1; + fp->node[fp->cur_node].alu_end = -1; + fp->node[fp->cur_node].flags = 0; + cs->used_in_node = 0; + cs->dest_in_node = 0; + } + + if (fp->cur_node == 0) + fp->first_node_has_tex = 1; + + fp->tex.inst[fp->tex.length++] = 0 | (hwsrc << R300_FPITX_SRC_SHIFT) + | (hwdest << R300_FPITX_DST_SHIFT) + | (unit << R300_FPITX_IMAGE_SHIFT) + /* not entirely sure about this */ + | (opcode << R300_FPITX_OPCODE_SHIFT); + + cs->dest_in_node |= (1 << hwdest); + if (REG_GET_TYPE(coord) != REG_TYPE_CONST) + cs->used_in_node |= (1 << hwsrc); + + fp->node[fp->cur_node].tex_end++; + + /* Copy from temp to output if needed */ + if (REG_GET_VALID(rdest)) { + emit_arith(fp, PFS_OP_MAD, rdest, fpi->DstReg.WriteMask, dest, + pfs_one, pfs_zero, 0); + free_temp(fp, dest); + } + + /* Free temp register */ + if (tempreg != 0) + free_temp(fp, tempreg); +} + +/** + * Returns the first slot where we could possibly allow writing to dest, + * according to register allocation. + */ +static int get_earliest_allowed_write(struct r300_fragment_program *fp, + GLuint dest, int mask) +{ + COMPILE_STATE; + int idx; + int pos; + GLuint index = REG_GET_INDEX(dest); + assert(REG_GET_VALID(dest)); + + switch (REG_GET_TYPE(dest)) { + case REG_TYPE_TEMP: + if (cs->temps[index].reg == -1) + return 0; + + idx = cs->temps[index].reg; + break; + case REG_TYPE_OUTPUT: + return 0; + default: + ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest)); + return 0; + } + + pos = cs->hwtemps[idx].reserved; + if (mask & WRITEMASK_XYZ) { + if (pos < cs->hwtemps[idx].vector_lastread) + pos = cs->hwtemps[idx].vector_lastread; + } + if (mask & WRITEMASK_W) { + if (pos < cs->hwtemps[idx].scalar_lastread) + pos = cs->hwtemps[idx].scalar_lastread; + } + + return pos; +} + +/** + * Allocates a slot for an ALU instruction that can consist of + * a vertex part or a scalar part or both. + * + * Sources from src (src[0] to src[argc-1]) are added to the slot in the + * appropriate position (vector and/or scalar), and their positions are + * recorded in the srcpos array. + * + * This function emits instruction code for the source fetch and the + * argument selection. It does not emit instruction code for the + * opcode or the destination selection. + * + * @return the index of the slot + */ +static int find_and_prepare_slot(struct r300_fragment_program *fp, + GLboolean emit_vop, + GLboolean emit_sop, + int argc, GLuint * src, GLuint dest, int mask) +{ + COMPILE_STATE; + int hwsrc[3]; + int srcpos[3]; + unsigned int used; + int tempused; + int tempvsrc[3]; + int tempssrc[3]; + int pos; + int regnr; + int i, j; + + // Determine instruction slots, whether sources are required on + // vector or scalar side, and the smallest slot number where + // all source registers are available + used = 0; + if (emit_vop) + used |= SLOT_OP_VECTOR; + if (emit_sop) + used |= SLOT_OP_SCALAR; + + pos = get_earliest_allowed_write(fp, dest, mask); + + if (fp->node[fp->cur_node].alu_offset > pos) + pos = fp->node[fp->cur_node].alu_offset; + for (i = 0; i < argc; ++i) { + if (!REG_GET_BUILTIN(src[i])) { + if (emit_vop) + used |= v_swiz[REG_GET_VSWZ(src[i])].flags << i; + if (emit_sop) + used |= s_swiz[REG_GET_SSWZ(src[i])].flags << i; + } + + hwsrc[i] = t_hw_src(fp, src[i], GL_FALSE); /* Note: sideeffects wrt refcounting! */ + regnr = hwsrc[i] & 31; + + if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) { + if (used & (SLOT_SRC_VECTOR << i)) { + if (cs->hwtemps[regnr].vector_valid > pos) + pos = cs->hwtemps[regnr].vector_valid; + } + if (used & (SLOT_SRC_SCALAR << i)) { + if (cs->hwtemps[regnr].scalar_valid > pos) + pos = cs->hwtemps[regnr].scalar_valid; + } + } + } + + // Find a slot that fits + for (;; ++pos) { + if (cs->slot[pos].used & used & SLOT_OP_BOTH) + continue; + + if (pos >= cs->nrslots) { + if (cs->nrslots >= PFS_MAX_ALU_INST) { + ERROR("Out of ALU instruction slots\n"); + return -1; + } + + fp->alu.inst[pos].inst0 = NOP_INST0; + fp->alu.inst[pos].inst1 = NOP_INST1; + fp->alu.inst[pos].inst2 = NOP_INST2; + fp->alu.inst[pos].inst3 = NOP_INST3; + + cs->nrslots++; + } + // Note: When we need both parts (vector and scalar) of a source, + // we always try to put them into the same position. This makes the + // code easier to read, and it is optimal (i.e. one doesn't gain + // anything by splitting the parts). + // It also avoids headaches with swizzles that access both parts (i.e WXY) + tempused = cs->slot[pos].used; + for (i = 0; i < 3; ++i) { + tempvsrc[i] = cs->slot[pos].vsrc[i]; + tempssrc[i] = cs->slot[pos].ssrc[i]; + } + + for (i = 0; i < argc; ++i) { + int flags = (used >> i) & SLOT_SRC_BOTH; + + if (!flags) { + srcpos[i] = 0; + continue; + } + + for (j = 0; j < 3; ++j) { + if ((tempused >> j) & flags & SLOT_SRC_VECTOR) { + if (tempvsrc[j] != hwsrc[i]) + continue; + } + + if ((tempused >> j) & flags & SLOT_SRC_SCALAR) { + if (tempssrc[j] != hwsrc[i]) + continue; + } + + break; + } + + if (j == 3) + break; + + srcpos[i] = j; + tempused |= flags << j; + if (flags & SLOT_SRC_VECTOR) + tempvsrc[j] = hwsrc[i]; + if (flags & SLOT_SRC_SCALAR) + tempssrc[j] = hwsrc[i]; + } + + if (i == argc) + break; + } + + // Found a slot, reserve it + cs->slot[pos].used = tempused | (used & SLOT_OP_BOTH); + for (i = 0; i < 3; ++i) { + cs->slot[pos].vsrc[i] = tempvsrc[i]; + cs->slot[pos].ssrc[i] = tempssrc[i]; + } + + for (i = 0; i < argc; ++i) { + if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) { + int regnr = hwsrc[i] & 31; + + if (used & (SLOT_SRC_VECTOR << i)) { + if (cs->hwtemps[regnr].vector_lastread < pos) + cs->hwtemps[regnr].vector_lastread = + pos; + } + if (used & (SLOT_SRC_SCALAR << i)) { + if (cs->hwtemps[regnr].scalar_lastread < pos) + cs->hwtemps[regnr].scalar_lastread = + pos; + } + } + } + + // Emit the source fetch code + fp->alu.inst[pos].inst1 &= ~R300_FPI1_SRC_MASK; + fp->alu.inst[pos].inst1 |= + ((cs->slot[pos].vsrc[0] << R300_FPI1_SRC0C_SHIFT) | + (cs->slot[pos].vsrc[1] << R300_FPI1_SRC1C_SHIFT) | + (cs->slot[pos].vsrc[2] << R300_FPI1_SRC2C_SHIFT)); + + fp->alu.inst[pos].inst3 &= ~R300_FPI3_SRC_MASK; + fp->alu.inst[pos].inst3 |= + ((cs->slot[pos].ssrc[0] << R300_FPI3_SRC0A_SHIFT) | + (cs->slot[pos].ssrc[1] << R300_FPI3_SRC1A_SHIFT) | + (cs->slot[pos].ssrc[2] << R300_FPI3_SRC2A_SHIFT)); + + // Emit the argument selection code + if (emit_vop) { + int swz[3]; + + for (i = 0; i < 3; ++i) { + if (i < argc) { + swz[i] = (v_swiz[REG_GET_VSWZ(src[i])].base + + (srcpos[i] * + v_swiz[REG_GET_VSWZ(src[i])]. + stride)) | ((src[i] & REG_NEGV_MASK) + ? ARG_NEG : 0) | ((src[i] + & + REG_ABS_MASK) + ? + ARG_ABS + : 0); + } else { + swz[i] = R300_FPI0_ARGC_ZERO; + } + } + + fp->alu.inst[pos].inst0 &= + ~(R300_FPI0_ARG0C_MASK | R300_FPI0_ARG1C_MASK | + R300_FPI0_ARG2C_MASK); + fp->alu.inst[pos].inst0 |= + (swz[0] << R300_FPI0_ARG0C_SHIFT) | (swz[1] << + R300_FPI0_ARG1C_SHIFT) + | (swz[2] << R300_FPI0_ARG2C_SHIFT); + } + + if (emit_sop) { + int swz[3]; + + for (i = 0; i < 3; ++i) { + if (i < argc) { + swz[i] = (s_swiz[REG_GET_SSWZ(src[i])].base + + (srcpos[i] * + s_swiz[REG_GET_SSWZ(src[i])]. + stride)) | ((src[i] & REG_NEGV_MASK) + ? ARG_NEG : 0) | ((src[i] + & + REG_ABS_MASK) + ? + ARG_ABS + : 0); + } else { + swz[i] = R300_FPI2_ARGA_ZERO; + } + } + + fp->alu.inst[pos].inst2 &= + ~(R300_FPI2_ARG0A_MASK | R300_FPI2_ARG1A_MASK | + R300_FPI2_ARG2A_MASK); + fp->alu.inst[pos].inst2 |= + (swz[0] << R300_FPI2_ARG0A_SHIFT) | (swz[1] << + R300_FPI2_ARG1A_SHIFT) + | (swz[2] << R300_FPI2_ARG2A_SHIFT); + } + + return pos; +} + +/** + * Append an ALU instruction to the instruction list. + */ +static void emit_arith(struct r300_fragment_program *fp, + int op, + GLuint dest, + int mask, + GLuint src0, GLuint src1, GLuint src2, int flags) +{ + COMPILE_STATE; + GLuint src[3] = { src0, src1, src2 }; + int hwdest; + GLboolean emit_vop, emit_sop; + int vop, sop, argc; + int pos; + + vop = r300_fpop[op].v_op; + sop = r300_fpop[op].s_op; + argc = r300_fpop[op].argc; + + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT && + REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) { + if (mask & WRITEMASK_Z) { + mask = WRITEMASK_W; + } else { + return; + } + } + + emit_vop = GL_FALSE; + emit_sop = GL_FALSE; + if ((mask & WRITEMASK_XYZ) || vop == R300_FPI0_OUTC_DP3) + emit_vop = GL_TRUE; + if ((mask & WRITEMASK_W) || vop == R300_FPI0_OUTC_REPL_ALPHA) + emit_sop = GL_TRUE; + + pos = + find_and_prepare_slot(fp, emit_vop, emit_sop, argc, src, dest, + mask); + if (pos < 0) + return; + + hwdest = t_hw_dst(fp, dest, GL_FALSE, pos); /* Note: Side effects wrt register allocation */ + + if (flags & PFS_FLAG_SAT) { + vop |= R300_FPI0_OUTC_SAT; + sop |= R300_FPI2_OUTA_SAT; + } + + /* Throw the pieces together and get FPI0/1 */ + if (emit_vop) { + fp->alu.inst[pos].inst0 |= vop; + + fp->alu.inst[pos].inst1 |= hwdest << R300_FPI1_DSTC_SHIFT; + + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { + if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { + fp->alu.inst[pos].inst1 |= + (mask & WRITEMASK_XYZ) << + R300_FPI1_DSTC_OUTPUT_MASK_SHIFT; + } else + assert(0); + } else { + fp->alu.inst[pos].inst1 |= + (mask & WRITEMASK_XYZ) << + R300_FPI1_DSTC_REG_MASK_SHIFT; + + cs->hwtemps[hwdest].vector_valid = pos + 1; + } + } + + /* And now FPI2/3 */ + if (emit_sop) { + fp->alu.inst[pos].inst2 |= sop; + + if (mask & WRITEMASK_W) { + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { + if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { + fp->alu.inst[pos].inst3 |= + (hwdest << R300_FPI3_DSTA_SHIFT) | + R300_FPI3_DSTA_OUTPUT; + } else if (REG_GET_INDEX(dest) == + FRAG_RESULT_DEPR) { + fp->alu.inst[pos].inst3 |= + R300_FPI3_DSTA_DEPTH; + } else + assert(0); + } else { + fp->alu.inst[pos].inst3 |= + (hwdest << R300_FPI3_DSTA_SHIFT) | + R300_FPI3_DSTA_REG; + + cs->hwtemps[hwdest].scalar_valid = pos + 1; + } + } + } + + return; +} + +#if 0 +static GLuint get_attrib(struct r300_fragment_program *fp, GLuint attr) +{ + struct gl_fragment_program *mp = &fp->mesa_program; + GLuint r = undef; + + if (!(mp->Base.InputsRead & (1 << attr))) { + ERROR("Attribute %d was not provided!\n", attr); + return undef; + } + + REG_SET_TYPE(r, REG_TYPE_INPUT); + REG_SET_INDEX(r, attr); + REG_SET_VALID(r, GL_TRUE); + return r; +} +#endif + +static GLfloat SinCosConsts[2][4] = { + { + 1.273239545, // 4/PI + -0.405284735, // -4/(PI*PI) + 3.141592654, // PI + 0.2225 // weight + }, + { + 0.75, + 0.0, + 0.159154943, // 1/(2*PI) + 6.283185307 // 2*PI + } +}; + +/** + * Emit a LIT instruction. + * \p flags may be PFS_FLAG_SAT + * + * Definition of LIT (from ARB_fragment_program): + * tmp = VectorLoad(op0); + * if (tmp.x < 0) tmp.x = 0; + * if (tmp.y < 0) tmp.y = 0; + * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); + * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; + * result.x = 1.0; + * result.y = tmp.x; + * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; + * result.w = 1.0; + * + * The longest path of computation is the one leading to result.z, + * consisting of 5 operations. This implementation of LIT takes + * 5 slots. So unless there's some special undocumented opcode, + * this implementation is potentially optimal. Unfortunately, + * emit_arith is a bit too conservative because it doesn't understand + * partial writes to the vector component. + */ +static const GLfloat LitConst[4] = + { 127.999999, 127.999999, 127.999999, -127.999999 }; + +static void emit_lit(struct r300_fragment_program *fp, + GLuint dest, int mask, GLuint src, int flags) +{ + COMPILE_STATE; + GLuint cnst; + int needTemporary; + GLuint temp; + + cnst = emit_const4fv(fp, LitConst); + + needTemporary = 0; + if ((mask & WRITEMASK_XYZW) != WRITEMASK_XYZW) { + needTemporary = 1; + } else if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { + // LIT is typically followed by DP3/DP4, so there's no point + // in creating special code for this case + needTemporary = 1; + } + + if (needTemporary) { + temp = keep(get_temp_reg(fp)); + } else { + temp = keep(dest); + } + + // Note: The order of emit_arith inside the slots is relevant, + // because emit_arith only looks at scalar vs. vector when resolving + // dependencies, and it does not consider individual vector components, + // so swizzling between the two parts can create fake dependencies. + + // First slot + emit_arith(fp, PFS_OP_MAX, temp, WRITEMASK_XY, + keep(src), pfs_zero, undef, 0); + emit_arith(fp, PFS_OP_MAX, temp, WRITEMASK_W, src, cnst, undef, 0); + + // Second slot + emit_arith(fp, PFS_OP_MIN, temp, WRITEMASK_Z, + swizzle(temp, W, W, W, W), cnst, undef, 0); + emit_arith(fp, PFS_OP_LG2, temp, WRITEMASK_W, + swizzle(temp, Y, Y, Y, Y), undef, undef, 0); + + // Third slot + // If desired, we saturate the y result here. + // This does not affect the use as a condition variable in the CMP later + emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_W, + temp, swizzle(temp, Z, Z, Z, Z), pfs_zero, 0); + emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_Y, + swizzle(temp, X, X, X, X), pfs_one, pfs_zero, flags); + + // Fourth slot + emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_X, + pfs_one, pfs_one, pfs_zero, 0); + emit_arith(fp, PFS_OP_EX2, temp, WRITEMASK_W, temp, undef, undef, 0); + + // Fifth slot + emit_arith(fp, PFS_OP_CMP, temp, WRITEMASK_Z, + pfs_zero, swizzle(temp, W, W, W, W), + negate(swizzle(temp, Y, Y, Y, Y)), flags); + emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_W, pfs_one, pfs_one, + pfs_zero, 0); + + if (needTemporary) { + emit_arith(fp, PFS_OP_MAD, dest, mask, + temp, pfs_one, pfs_zero, flags); + free_temp(fp, temp); + } else { + // Decrease refcount of the destination + t_hw_dst(fp, dest, GL_FALSE, cs->nrslots); + } +} + +static GLboolean parse_program(struct r300_fragment_program *fp) +{ + struct gl_fragment_program *mp = &fp->mesa_program; + const struct prog_instruction *inst = mp->Base.Instructions; + struct prog_instruction *fpi; + GLuint src[3], dest, temp[2]; + int flags, mask = 0; + int const_sin[2]; + + if (!inst || inst[0].Opcode == OPCODE_END) { + ERROR("empty program?\n"); + return GL_FALSE; + } + + for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { + if (fpi->SaturateMode == SATURATE_ZERO_ONE) + flags = PFS_FLAG_SAT; + else + flags = 0; + + if (fpi->Opcode != OPCODE_KIL) { + dest = t_dst(fp, fpi->DstReg); + mask = fpi->DstReg.WriteMask; + } + + switch (fpi->Opcode) { + case OPCODE_ABS: + src[0] = t_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_MAD, dest, mask, + absolute(src[0]), pfs_one, pfs_zero, flags); + break; + case OPCODE_ADD: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], pfs_one, src[1], flags); + break; + case OPCODE_CMP: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + src[2] = t_src(fp, fpi->SrcReg[2]); + /* ARB_f_p - if src0.c < 0.0 ? src1.c : src2.c + * r300 - if src2.c < 0.0 ? src1.c : src0.c + */ + emit_arith(fp, PFS_OP_CMP, dest, mask, + src[2], src[1], src[0], flags); + break; + case OPCODE_COS: + /* + * cos using a parabola (see SIN): + * cos(x): + * x = (x/(2*PI))+0.75 + * x = frac(x) + * x = (x*2*PI)-PI + * result = sin(x) + */ + temp[0] = get_temp_reg(fp); + const_sin[0] = emit_const4fv(fp, SinCosConsts[0]); + const_sin[1] = emit_const4fv(fp, SinCosConsts[1]); + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + + /* add 0.5*PI and do range reduction */ + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, + swizzle(src[0], X, X, X, X), + swizzle(const_sin[1], Z, Z, Z, Z), + swizzle(const_sin[1], X, X, X, X), 0); + + emit_arith(fp, PFS_OP_FRC, temp[0], WRITEMASK_X, + swizzle(temp[0], X, X, X, X), + undef, undef, 0); + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI + negate(swizzle(const_sin[0], Z, Z, Z, Z)), //-PI + 0); + + /* SIN */ + + emit_arith(fp, PFS_OP_MAD, temp[0], + WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], + Z, Z, Z, + Z), + const_sin[0], pfs_zero, 0); + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, + swizzle(temp[0], Y, Y, Y, Y), + absolute(swizzle(temp[0], Z, Z, Z, Z)), + swizzle(temp[0], X, X, X, X), 0); + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Y, + swizzle(temp[0], X, X, X, X), + absolute(swizzle(temp[0], X, X, X, X)), + negate(swizzle(temp[0], X, X, X, X)), 0); + + emit_arith(fp, PFS_OP_MAD, dest, mask, + swizzle(temp[0], Y, Y, Y, Y), + swizzle(const_sin[0], W, W, W, W), + swizzle(temp[0], X, X, X, X), flags); + + free_temp(fp, temp[0]); + break; + case OPCODE_DP3: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_DP3, dest, mask, + src[0], src[1], undef, flags); + break; + case OPCODE_DP4: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_DP4, dest, mask, + src[0], src[1], undef, flags); + break; + case OPCODE_DPH: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + /* src0.xyz1 -> temp + * DP4 dest, temp, src1 + */ +#if 0 + temp[0] = get_temp_reg(fp); + src[0].s_swz = SWIZZLE_ONE; + emit_arith(fp, PFS_OP_MAD, temp[0], mask, + src[0], pfs_one, pfs_zero, 0); + emit_arith(fp, PFS_OP_DP4, dest, mask, + temp[0], src[1], undef, flags); + free_temp(fp, temp[0]); +#else + emit_arith(fp, PFS_OP_DP4, dest, mask, + swizzle(src[0], X, Y, Z, ONE), src[1], + undef, flags); +#endif + break; + case OPCODE_DST: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + /* dest.y = src0.y * src1.y */ + if (mask & WRITEMASK_Y) + emit_arith(fp, PFS_OP_MAD, dest, WRITEMASK_Y, + keep(src[0]), keep(src[1]), + pfs_zero, flags); + /* dest.z = src0.z */ + if (mask & WRITEMASK_Z) + emit_arith(fp, PFS_OP_MAD, dest, WRITEMASK_Z, + src[0], pfs_one, pfs_zero, flags); + /* result.x = 1.0 + * result.w = src1.w */ + if (mask & WRITEMASK_XW) { + REG_SET_VSWZ(src[1], SWIZZLE_111); /*Cheat */ + emit_arith(fp, PFS_OP_MAD, dest, + mask & WRITEMASK_XW, + src[1], pfs_one, pfs_zero, flags); + } + break; + case OPCODE_EX2: + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_EX2, dest, mask, + src[0], undef, undef, flags); + break; + case OPCODE_FLR: + src[0] = t_src(fp, fpi->SrcReg[0]); + temp[0] = get_temp_reg(fp); + /* FRC temp, src0 + * MAD dest, src0, 1.0, -temp + */ + emit_arith(fp, PFS_OP_FRC, temp[0], mask, + keep(src[0]), undef, undef, 0); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], pfs_one, negate(temp[0]), flags); + free_temp(fp, temp[0]); + break; + case OPCODE_FRC: + src[0] = t_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_FRC, dest, mask, + src[0], undef, undef, flags); + break; + case OPCODE_KIL: + emit_tex(fp, fpi, R300_FPITX_OP_KIL); + break; + case OPCODE_LG2: + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_LG2, dest, mask, + src[0], undef, undef, flags); + break; + case OPCODE_LIT: + src[0] = t_src(fp, fpi->SrcReg[0]); + emit_lit(fp, dest, mask, src[0], flags); + break; + case OPCODE_LRP: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + src[2] = t_src(fp, fpi->SrcReg[2]); + /* result = tmp0tmp1 + (1 - tmp0)tmp2 + * = tmp0tmp1 + tmp2 + (-tmp0)tmp2 + * MAD temp, -tmp0, tmp2, tmp2 + * MAD result, tmp0, tmp1, temp + */ + temp[0] = get_temp_reg(fp); + emit_arith(fp, PFS_OP_MAD, temp[0], mask, + negate(keep(src[0])), keep(src[2]), src[2], + 0); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], src[1], temp[0], flags); + free_temp(fp, temp[0]); + break; + case OPCODE_MAD: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + src[2] = t_src(fp, fpi->SrcReg[2]); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], src[1], src[2], flags); + break; + case OPCODE_MAX: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_MAX, dest, mask, + src[0], src[1], undef, flags); + break; + case OPCODE_MIN: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_MIN, dest, mask, + src[0], src[1], undef, flags); + break; + case OPCODE_MOV: + case OPCODE_SWZ: + src[0] = t_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], pfs_one, pfs_zero, flags); + break; + case OPCODE_MUL: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], src[1], pfs_zero, flags); + break; + case OPCODE_POW: + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + src[1] = t_scalar_src(fp, fpi->SrcReg[1]); + temp[0] = get_temp_reg(fp); + emit_arith(fp, PFS_OP_LG2, temp[0], WRITEMASK_W, + src[0], undef, undef, 0); + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_W, + temp[0], src[1], pfs_zero, 0); + emit_arith(fp, PFS_OP_EX2, dest, fpi->DstReg.WriteMask, + temp[0], undef, undef, 0); + free_temp(fp, temp[0]); + break; + case OPCODE_RCP: + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_RCP, dest, mask, + src[0], undef, undef, flags); + break; + case OPCODE_RSQ: + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_RSQ, dest, mask, + absolute(src[0]), pfs_zero, pfs_zero, flags); + break; + case OPCODE_SCS: + /* + * scs using a parabola : + * scs(x): + * result.x = sin(-abs(x)+0.5*PI) (cos) + * result.y = sin(x) (sin) + * + */ + temp[0] = get_temp_reg(fp); + temp[1] = get_temp_reg(fp); + const_sin[0] = emit_const4fv(fp, SinCosConsts[0]); + const_sin[1] = emit_const4fv(fp, SinCosConsts[1]); + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + + /* x = -abs(x)+0.5*PI */ + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(const_sin[0], Z, Z, Z, Z), //PI + pfs_half, + negate(abs + (swizzle(keep(src[0]), X, X, X, X))), + 0); + + /* C*x (sin) */ + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_W, + swizzle(const_sin[0], Y, Y, Y, Y), + swizzle(keep(src[0]), X, X, X, X), + pfs_zero, 0); + + /* B*x, C*x (cos) */ + emit_arith(fp, PFS_OP_MAD, temp[0], + WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], + Z, Z, Z, + Z), + const_sin[0], pfs_zero, 0); + + /* B*x (sin) */ + emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_W, + swizzle(const_sin[0], X, X, X, X), + keep(src[0]), pfs_zero, 0); + + /* y = B*x + C*x*abs(x) (sin) */ + emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_Z, + absolute(src[0]), + swizzle(temp[0], W, W, W, W), + swizzle(temp[1], W, W, W, W), 0); + + /* y = B*x + C*x*abs(x) (cos) */ + emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_W, + swizzle(temp[0], Y, Y, Y, Y), + absolute(swizzle(temp[0], Z, Z, Z, Z)), + swizzle(temp[0], X, X, X, X), 0); + + /* y*abs(y) - y (cos), y*abs(y) - y (sin) */ + emit_arith(fp, PFS_OP_MAD, temp[0], + WRITEMASK_X | WRITEMASK_Y, swizzle(temp[1], + W, Z, Y, + X), + absolute(swizzle(temp[1], W, Z, Y, X)), + negate(swizzle(temp[1], W, Z, Y, X)), 0); + + /* dest.xy = mad(temp.xy, P, temp2.wz) */ + emit_arith(fp, PFS_OP_MAD, dest, + mask & (WRITEMASK_X | WRITEMASK_Y), temp[0], + swizzle(const_sin[0], W, W, W, W), + swizzle(temp[1], W, Z, Y, X), flags); + + free_temp(fp, temp[0]); + free_temp(fp, temp[1]); + break; + case OPCODE_SGE: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + temp[0] = get_temp_reg(fp); + /* temp = src0 - src1 + * dest.c = (temp.c < 0.0) ? 0 : 1 + */ + emit_arith(fp, PFS_OP_MAD, temp[0], mask, + src[0], pfs_one, negate(src[1]), 0); + emit_arith(fp, PFS_OP_CMP, dest, mask, + pfs_one, pfs_zero, temp[0], 0); + free_temp(fp, temp[0]); + break; + case OPCODE_SIN: + /* + * using a parabola: + * sin(x) = 4/pi * x + -4/(pi*pi) * x * abs(x) + * extra precision is obtained by weighting against + * itself squared. + */ + + temp[0] = get_temp_reg(fp); + const_sin[0] = emit_const4fv(fp, SinCosConsts[0]); + const_sin[1] = emit_const4fv(fp, SinCosConsts[1]); + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + + /* do range reduction */ + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, + swizzle(keep(src[0]), X, X, X, X), + swizzle(const_sin[1], Z, Z, Z, Z), + pfs_half, 0); + + emit_arith(fp, PFS_OP_FRC, temp[0], WRITEMASK_X, + swizzle(temp[0], X, X, X, X), + undef, undef, 0); + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI + negate(swizzle(const_sin[0], Z, Z, Z, Z)), //PI + 0); + + /* SIN */ + + emit_arith(fp, PFS_OP_MAD, temp[0], + WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], + Z, Z, Z, + Z), + const_sin[0], pfs_zero, 0); + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, + swizzle(temp[0], Y, Y, Y, Y), + absolute(swizzle(temp[0], Z, Z, Z, Z)), + swizzle(temp[0], X, X, X, X), 0); + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Y, + swizzle(temp[0], X, X, X, X), + absolute(swizzle(temp[0], X, X, X, X)), + negate(swizzle(temp[0], X, X, X, X)), 0); + + emit_arith(fp, PFS_OP_MAD, dest, mask, + swizzle(temp[0], Y, Y, Y, Y), + swizzle(const_sin[0], W, W, W, W), + swizzle(temp[0], X, X, X, X), flags); + + free_temp(fp, temp[0]); + break; + case OPCODE_SLT: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + temp[0] = get_temp_reg(fp); + /* temp = src0 - src1 + * dest.c = (temp.c < 0.0) ? 1 : 0 + */ + emit_arith(fp, PFS_OP_MAD, temp[0], mask, + src[0], pfs_one, negate(src[1]), 0); + emit_arith(fp, PFS_OP_CMP, dest, mask, + pfs_zero, pfs_one, temp[0], 0); + free_temp(fp, temp[0]); + break; + case OPCODE_SUB: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], pfs_one, negate(src[1]), flags); + break; + case OPCODE_TEX: + emit_tex(fp, fpi, R300_FPITX_OP_TEX); + break; + case OPCODE_TXB: + emit_tex(fp, fpi, R300_FPITX_OP_TXB); + break; + case OPCODE_TXP: + emit_tex(fp, fpi, R300_FPITX_OP_TXP); + break; + case OPCODE_XPD:{ + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + temp[0] = get_temp_reg(fp); + /* temp = src0.zxy * src1.yzx */ + emit_arith(fp, PFS_OP_MAD, temp[0], + WRITEMASK_XYZ, swizzle(keep(src[0]), + Z, X, Y, W), + swizzle(keep(src[1]), Y, Z, X, W), + pfs_zero, 0); + /* dest.xyz = src0.yzx * src1.zxy - temp + * dest.w = undefined + * */ + emit_arith(fp, PFS_OP_MAD, dest, + mask & WRITEMASK_XYZ, swizzle(src[0], + Y, Z, + X, W), + swizzle(src[1], Z, X, Y, W), + negate(temp[0]), flags); + /* cleanup */ + free_temp(fp, temp[0]); + break; + } + default: + ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); + break; + } + + if (fp->error) + return GL_FALSE; + + } + + return GL_TRUE; +} + +static void insert_wpos(struct gl_program *prog) +{ + static gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 + }; + struct prog_instruction *fpi; + GLuint window_index; + int i = 0; + GLuint tempregi = prog->NumTemporaries; + /* should do something else if no temps left... */ + prog->NumTemporaries++; + + fpi = _mesa_alloc_instructions(prog->NumInstructions + 3); + _mesa_init_instructions(fpi, prog->NumInstructions + 3); + + /* perspective divide */ + fpi[i].Opcode = OPCODE_RCP; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_W; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; + i++; + + fpi[i].Opcode = OPCODE_MUL; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + + fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[1].Index = tempregi; + fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; + i++; + + /* viewport transformation */ + window_index = _mesa_add_state_reference(prog->Parameters, tokens); + + fpi[i].Opcode = OPCODE_MAD; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[0].Index = tempregi; + fpi[i].SrcReg[0].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[1].Index = window_index; + fpi[i].SrcReg[1].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[2].Index = window_index; + fpi[i].SrcReg[2].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + i++; + + _mesa_copy_instructions(&fpi[i], prog->Instructions, + prog->NumInstructions); + + free(prog->Instructions); + + prog->Instructions = fpi; + + prog->NumInstructions += i; + fpi = &prog->Instructions[prog->NumInstructions - 1]; + + assert(fpi->Opcode == OPCODE_END); + + for (fpi = &prog->Instructions[3]; fpi->Opcode != OPCODE_END; fpi++) { + for (i = 0; i < 3; i++) + if (fpi->SrcReg[i].File == PROGRAM_INPUT && + fpi->SrcReg[i].Index == FRAG_ATTRIB_WPOS) { + fpi->SrcReg[i].File = PROGRAM_TEMPORARY; + fpi->SrcReg[i].Index = tempregi; + } + } +} + +/* - Init structures + * - Determine what hwregs each input corresponds to + */ +static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) +{ + struct r300_pfs_compile_state *cs = NULL; + struct gl_fragment_program *mp = &fp->mesa_program; + struct prog_instruction *fpi; + GLuint InputsRead = mp->Base.InputsRead; + GLuint temps_used = 0; /* for fp->temps[] */ + int i, j; + + /* New compile, reset tracking data */ + fp->optimization = + driQueryOptioni(&r300->radeon.optionCache, "fp_optimization"); + fp->translated = GL_FALSE; + fp->error = GL_FALSE; + fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile); + fp->tex.length = 0; + fp->cur_node = 0; + fp->first_node_has_tex = 0; + fp->const_nr = 0; + fp->max_temp_idx = 0; + fp->node[0].alu_end = -1; + fp->node[0].tex_end = -1; + + _mesa_memset(cs, 0, sizeof(*fp->cs)); + for (i = 0; i < PFS_MAX_ALU_INST; i++) { + for (j = 0; j < 3; j++) { + cs->slot[i].vsrc[j] = SRC_CONST; + cs->slot[i].ssrc[j] = SRC_CONST; + } + } + + /* Work out what temps the Mesa inputs correspond to, this must match + * what setup_rs_unit does, which shouldn't be a problem as rs_unit + * configures itself based on the fragprog's InputsRead + * + * NOTE: this depends on get_hw_temp() allocating registers in order, + * starting from register 0. + */ + + /* Texcoords come first */ + for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) { + cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0; + cs->inputs[FRAG_ATTRIB_TEX0 + i].reg = + get_hw_temp(fp, 0); + } + } + InputsRead &= ~FRAG_BITS_TEX_ANY; + + /* fragment position treated as a texcoord */ + if (InputsRead & FRAG_BIT_WPOS) { + cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0; + cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(fp, 0); + insert_wpos(&mp->Base); + } + InputsRead &= ~FRAG_BIT_WPOS; + + /* Then primary colour */ + if (InputsRead & FRAG_BIT_COL0) { + cs->inputs[FRAG_ATTRIB_COL0].refcount = 0; + cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(fp, 0); + } + InputsRead &= ~FRAG_BIT_COL0; + + /* Secondary color */ + if (InputsRead & FRAG_BIT_COL1) { + cs->inputs[FRAG_ATTRIB_COL1].refcount = 0; + cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(fp, 0); + } + InputsRead &= ~FRAG_BIT_COL1; + + /* Anything else */ + if (InputsRead) { + WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead); + /* force read from hwreg 0 for now */ + for (i = 0; i < 32; i++) + if (InputsRead & (1 << i)) + cs->inputs[i].reg = 0; + } + + /* Pre-parse the mesa program, grabbing refcounts on input/temp regs. + * That way, we can free up the reg when it's no longer needed + */ + if (!mp->Base.Instructions) { + ERROR("No instructions found in program\n"); + return; + } + + for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { + int idx; + + for (i = 0; i < 3; i++) { + idx = fpi->SrcReg[i].Index; + switch (fpi->SrcReg[i].File) { + case PROGRAM_TEMPORARY: + if (!(temps_used & (1 << idx))) { + cs->temps[idx].reg = -1; + cs->temps[idx].refcount = 1; + temps_used |= (1 << idx); + } else + cs->temps[idx].refcount++; + break; + case PROGRAM_INPUT: + cs->inputs[idx].refcount++; + break; + default: + break; + } + } + + idx = fpi->DstReg.Index; + if (fpi->DstReg.File == PROGRAM_TEMPORARY) { + if (!(temps_used & (1 << idx))) { + cs->temps[idx].reg = -1; + cs->temps[idx].refcount = 1; + temps_used |= (1 << idx); + } else + cs->temps[idx].refcount++; + } + } + cs->temp_in_use = temps_used; +} + +static void update_params(struct r300_fragment_program *fp) +{ + struct gl_fragment_program *mp = &fp->mesa_program; + + /* Ask Mesa nicely to fill in ParameterValues for us */ + if (mp->Base.Parameters) + _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters); +} + +void r500TranslateFragmentShader(r300ContextPtr r300, + struct r300_fragment_program *fp) +{ + struct r300_pfs_compile_state *cs = NULL; + + if (!fp->translated) { + + init_program(r300, fp); + cs = fp->cs; + + if (parse_program(fp) == GL_FALSE) { + dump_program(fp); + return; + } + + /* Finish off */ + fp->node[fp->cur_node].alu_end = + cs->nrslots - fp->node[fp->cur_node].alu_offset - 1; + if (fp->node[fp->cur_node].tex_end < 0) + fp->node[fp->cur_node].tex_end = 0; + fp->alu_offset = 0; + fp->alu_end = cs->nrslots - 1; + fp->tex_offset = 0; + fp->tex_end = fp->tex.length ? fp->tex.length - 1 : 0; + assert(fp->node[fp->cur_node].alu_end >= 0); + assert(fp->alu_end >= 0); + + fp->translated = GL_TRUE; + if (RADEON_DEBUG & DEBUG_PIXEL) + dump_program(fp); + r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM); + } + + update_params(fp); +} + +/* just some random things... */ +static void dump_program(struct r300_fragment_program *fp) +{ + int n, i, j; + static int pc = 0; + + fprintf(stderr, "pc=%d*************************************\n", pc++); + + fprintf(stderr, "Mesa program:\n"); + fprintf(stderr, "-------------\n"); + _mesa_print_program(&fp->mesa_program.Base); + fflush(stdout); + + fprintf(stderr, "Hardware program\n"); + fprintf(stderr, "----------------\n"); + + for (n = 0; n < (fp->cur_node + 1); n++) { + fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " + "alu_end: %d, tex_end: %d\n", n, + fp->node[n].alu_offset, + fp->node[n].tex_offset, + fp->node[n].alu_end, fp->node[n].tex_end); + + if (fp->tex.length) { + fprintf(stderr, " TEX:\n"); + for (i = fp->node[n].tex_offset; + i <= fp->node[n].tex_offset + fp->node[n].tex_end; + ++i) { + const char *instr; + + switch ((fp->tex. + inst[i] >> R300_FPITX_OPCODE_SHIFT) & + 15) { + case R300_FPITX_OP_TEX: + instr = "TEX"; + break; + case R300_FPITX_OP_KIL: + instr = "KIL"; + break; + case R300_FPITX_OP_TXP: + instr = "TXP"; + break; + case R300_FPITX_OP_TXB: + instr = "TXB"; + break; + default: + instr = "UNKNOWN"; + } + + fprintf(stderr, + " %s t%i, %c%i, texture[%i] (%08x)\n", + instr, + (fp->tex. + inst[i] >> R300_FPITX_DST_SHIFT) & 31, + (fp->tex. + inst[i] & R300_FPITX_SRC_CONST) ? 'c' : + 't', + (fp->tex. + inst[i] >> R300_FPITX_SRC_SHIFT) & 31, + (fp->tex. + inst[i] & R300_FPITX_IMAGE_MASK) >> + R300_FPITX_IMAGE_SHIFT, + fp->tex.inst[i]); + } + } + + for (i = fp->node[n].alu_offset; + i <= fp->node[n].alu_offset + fp->node[n].alu_end; ++i) { + char srcc[3][10], dstc[20]; + char srca[3][10], dsta[20]; + char argc[3][20]; + char arga[3][20]; + char flags[5], tmp[10]; + + for (j = 0; j < 3; ++j) { + int regc = fp->alu.inst[i].inst1 >> (j * 6); + int rega = fp->alu.inst[i].inst3 >> (j * 6); + + sprintf(srcc[j], "%c%i", + (regc & 32) ? 'c' : 't', regc & 31); + sprintf(srca[j], "%c%i", + (rega & 32) ? 'c' : 't', rega & 31); + } + + dstc[0] = 0; + sprintf(flags, "%s%s%s", + (fp->alu.inst[i]. + inst1 & R300_FPI1_DSTC_REG_X) ? "x" : "", + (fp->alu.inst[i]. + inst1 & R300_FPI1_DSTC_REG_Y) ? "y" : "", + (fp->alu.inst[i]. + inst1 & R300_FPI1_DSTC_REG_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(dstc, "t%i.%s ", + (fp->alu.inst[i]. + inst1 >> R300_FPI1_DSTC_SHIFT) & 31, + flags); + } + sprintf(flags, "%s%s%s", + (fp->alu.inst[i]. + inst1 & R300_FPI1_DSTC_OUTPUT_X) ? "x" : "", + (fp->alu.inst[i]. + inst1 & R300_FPI1_DSTC_OUTPUT_Y) ? "y" : "", + (fp->alu.inst[i]. + inst1 & R300_FPI1_DSTC_OUTPUT_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(tmp, "o%i.%s", + (fp->alu.inst[i]. + inst1 >> R300_FPI1_DSTC_SHIFT) & 31, + flags); + strcat(dstc, tmp); + } + + dsta[0] = 0; + if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_REG) { + sprintf(dsta, "t%i.w ", + (fp->alu.inst[i]. + inst3 >> R300_FPI3_DSTA_SHIFT) & 31); + } + if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_OUTPUT) { + sprintf(tmp, "o%i.w ", + (fp->alu.inst[i]. + inst3 >> R300_FPI3_DSTA_SHIFT) & 31); + strcat(dsta, tmp); + } + if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_DEPTH) { + strcat(dsta, "Z"); + } + + fprintf(stderr, + "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" + " w: %3s %3s %3s -> %-20s (%08x)\n", i, + srcc[0], srcc[1], srcc[2], dstc, + fp->alu.inst[i].inst1, srca[0], srca[1], + srca[2], dsta, fp->alu.inst[i].inst3); + + for (j = 0; j < 3; ++j) { + int regc = fp->alu.inst[i].inst0 >> (j * 7); + int rega = fp->alu.inst[i].inst2 >> (j * 7); + int d; + char buf[20]; + + d = regc & 31; + if (d < 12) { + switch (d % 4) { + case R300_FPI0_ARGC_SRC0C_XYZ: + sprintf(buf, "%s.xyz", + srcc[d / 4]); + break; + case R300_FPI0_ARGC_SRC0C_XXX: + sprintf(buf, "%s.xxx", + srcc[d / 4]); + break; + case R300_FPI0_ARGC_SRC0C_YYY: + sprintf(buf, "%s.yyy", + srcc[d / 4]); + break; + case R300_FPI0_ARGC_SRC0C_ZZZ: + sprintf(buf, "%s.zzz", + srcc[d / 4]); + break; + } + } else if (d < 15) { + sprintf(buf, "%s.www", srca[d - 12]); + } else if (d == 20) { + sprintf(buf, "0.0"); + } else if (d == 21) { + sprintf(buf, "1.0"); + } else if (d == 22) { + sprintf(buf, "0.5"); + } else if (d >= 23 && d < 32) { + d -= 23; + switch (d / 3) { + case 0: + sprintf(buf, "%s.yzx", + srcc[d % 3]); + break; + case 1: + sprintf(buf, "%s.zxy", + srcc[d % 3]); + break; + case 2: + sprintf(buf, "%s.Wzy", + srcc[d % 3]); + break; + } + } else { + sprintf(buf, "%i", d); + } + + sprintf(argc[j], "%s%s%s%s", + (regc & 32) ? "-" : "", + (regc & 64) ? "|" : "", + buf, (regc & 64) ? "|" : ""); + + d = rega & 31; + if (d < 9) { + sprintf(buf, "%s.%c", srcc[d / 3], + 'x' + (char)(d % 3)); + } else if (d < 12) { + sprintf(buf, "%s.w", srca[d - 9]); + } else if (d == 16) { + sprintf(buf, "0.0"); + } else if (d == 17) { + sprintf(buf, "1.0"); + } else if (d == 18) { + sprintf(buf, "0.5"); + } else { + sprintf(buf, "%i", d); + } + + sprintf(arga[j], "%s%s%s%s", + (rega & 32) ? "-" : "", + (rega & 64) ? "|" : "", + buf, (rega & 64) ? "|" : ""); + } + + fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" + " w: %8s %8s %8s op: %08x\n", + argc[0], argc[1], argc[2], + fp->alu.inst[i].inst0, arga[0], arga[1], + arga[2], fp->alu.inst[i].inst2); + } + } +} diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/r500_fragprog.h new file mode 100644 index 0000000000..72fca77845 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r500_fragprog.h @@ -0,0 +1,104 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs + * Jerome Glisse + */ +#ifndef __R300_FRAGPROG_H_ +#define __R300_FRAGPROG_H_ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#include "shader/program.h" +#include "shader/prog_instruction.h" + +#include "r300_context.h" + +typedef struct r300_fragment_program_swizzle { + GLuint length; + GLuint src[4]; + GLuint inst[8]; +} r300_fragment_program_swizzle_t; + +/* supported hw opcodes */ +#define PFS_OP_MAD 0 +#define PFS_OP_DP3 1 +#define PFS_OP_DP4 2 +#define PFS_OP_MIN 3 +#define PFS_OP_MAX 4 +#define PFS_OP_CMP 5 +#define PFS_OP_FRC 6 +#define PFS_OP_EX2 7 +#define PFS_OP_LG2 8 +#define PFS_OP_RCP 9 +#define PFS_OP_RSQ 10 +#define PFS_OP_REPL_ALPHA 11 +#define PFS_OP_CMPH 12 +#define MAX_PFS_OP 12 + +#define PFS_FLAG_SAT (1 << 0) +#define PFS_FLAG_ABS (1 << 1) + +#define ARG_NEG (1 << 5) +#define ARG_ABS (1 << 6) +#define ARG_MASK (127 << 0) +#define ARG_STRIDE 7 +#define SRC_CONST (1 << 5) +#define SRC_MASK (63 << 0) +#define SRC_STRIDE 6 + +#define NOP_INST0 ( \ + (R300_FPI0_OUTC_MAD) | \ + (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG0C_SHIFT) | \ + (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG1C_SHIFT) | \ + (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG2C_SHIFT)) +#define NOP_INST1 ( \ + ((0 | SRC_CONST) << R300_FPI1_SRC0C_SHIFT) | \ + ((0 | SRC_CONST) << R300_FPI1_SRC1C_SHIFT) | \ + ((0 | SRC_CONST) << R300_FPI1_SRC2C_SHIFT)) +#define NOP_INST2 ( \ + (R300_FPI2_OUTA_MAD) | \ + (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG0A_SHIFT) | \ + (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG1A_SHIFT) | \ + (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG2A_SHIFT)) +#define NOP_INST3 ( \ + ((0 | SRC_CONST) << R300_FPI3_SRC0A_SHIFT) | \ + ((0 | SRC_CONST) << R300_FPI3_SRC1A_SHIFT) | \ + ((0 | SRC_CONST) << R300_FPI3_SRC2A_SHIFT)) + +#define DRI_CONF_FP_OPTIMIZATION_SPEED 0 +#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1 + +struct r300_fragment_program; + +extern void r300TranslateFragmentShader(r300ContextPtr r300, + struct r300_fragment_program *fp); + +#endif -- cgit v1.2.3 From 1c71ec4d45a8da2a5c83b09e2e39d4a7c2ecc99b Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 17 Mar 2008 21:09:49 +1000 Subject: some basic r500 portage --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 62 +++- src/mesa/drivers/dri/r300/r300_context.h | 1 + src/mesa/drivers/dri/r300/r300_emit.h | 12 + src/mesa/drivers/dri/r300/r300_reg.h | 477 ++++++++++++++++++++++++++- src/mesa/drivers/dri/r300/r300_state.c | 208 +++++++++++- src/mesa/drivers/dri/radeon/radeon_chipset.h | 2 + 6 files changed, 742 insertions(+), 20 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 1abc92ec49..883b41a349 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -281,10 +281,14 @@ void r300InitCmdBuf(r300ContextPtr r300) { int size, mtu; int has_tcl = 1; + int is_r500 = 0; if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) has_tcl = 0; + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + is_r500 = 1; + r300->hw.max_state_size = 2 + 2; /* reserve extra space for WAIT_IDLE and tex cache flush */ mtu = r300->radeon.glCtx->Const.MaxTextureUnits; @@ -374,10 +378,17 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.su_depth_scale.cmd[0] = cmdpacket0(R300_SU_DEPTH_SCALE, 2); ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0); r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(R300_RS_COUNT, 2); - ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0); - r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R300_RS_IP_0, 8); - ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); - r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_ROUTE_0, 1); + if (is_r500) { + ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0); + r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R500_RS_IP_0, 16); + ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, 1); + } else { + ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0); + r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R300_RS_IP_0, 8); + ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_ROUTE_0, 1); + } ALLOC_STATE(sc_hyperz, always, 3, 0); r300->hw.sc_hyperz.cmd[0] = cmdpacket0(R300_SC_HYPERZ, 2); ALLOC_STATE(sc_screendoor, always, 2, 0); @@ -389,14 +400,25 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_PFS_TEXI_0, 0); ALLOC_STATE(us_out_fmt, always, 6, 0); r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R500_US_OUT_FMT, 5); - ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0); - r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR0_0, 1); - ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1); - r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR1_0, 1); - ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2); - r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR2_0, 1); - ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3); - r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR3_0, 1); + + if (is_r500) { + ALLOC_STATE(r500fp, variable, R300_FPI_CMDSIZE, 0); + r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 1); + } + + if (0/*is_r500*/) { + + }/* else*/ + { + ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0); + r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR0_0, 1); + ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1); + r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR1_0, 1); + ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2); + r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR2_0, 1); + ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3); + r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR3_0, 1); + } ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0); r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(FG_FOG_BLEND, 1); ALLOC_STATE(fogc, always, R300_FOGC_CMDSIZE, 0); @@ -456,10 +478,18 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.vps.cmd[R300_VPS_CMD_0] = cmdvpu(R300_PVS_UPLOAD_POINTSIZE, 1); - for (i = 0; i < 6; i++) { - ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); - r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] = - cmdvpu(R300_PVS_UPLOAD_CLIP_PLANE0+i, 1); + if (is_r500) { + for (i = 0; i < 6; i++) { + ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); + r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] = + cmdvpu(R500_PVS_UPLOAD_CLIP_PLANE0+i, 1); + } + } else { + for (i = 0; i < 6; i++) { + ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); + r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] = + cmdvpu(R300_PVS_UPLOAD_CLIP_PLANE0+i, 1); + } } } diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 993aa51990..1b8b9a8222 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -487,6 +487,7 @@ struct r300_hw_state { struct r300_state_atom fp; /* fragment program cntl + nodes (4600) */ struct r300_state_atom fpt; /* texi - (4620) */ struct r300_state_atom us_out_fmt; /* (46A4) */ + struct r300_state_atom r500fp; /* r500 fp instructions */ struct r300_state_atom fpi[4]; /* fp instructions (46C0/47C0/48C0/49C0) */ struct r300_state_atom fogs; /* fog state (4BC0) */ struct r300_state_atom fogc; /* fog color (4BC8) */ diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h index a6d69ec5ff..a4f6ab997e 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.h +++ b/src/mesa/drivers/dri/r300/r300_emit.h @@ -74,6 +74,18 @@ static inline uint32_t cmdvpu(int addr, int count) return cmd.u; } +static inline uint32_t cmdr500fp(int addr, int count) +{ + drm_r300_cmd_header_t cmd; + + cmd.vpu.cmd_type = R300_CMD_R500FP; + cmd.vpu.count = count; + cmd.vpu.adrhi = ((unsigned int)addr & 0xFF00) >> 8; + cmd.vpu.adrlo = ((unsigned int)addr & 0x00FF); + + return cmd.u; +} + static inline uint32_t cmdpacket3(int packet) { drm_r300_cmd_header_t cmd; diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 0d78fb8e84..5ddb973ca1 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -657,7 +657,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* This table specifies the source location and format for up to 16 texture * addresses (i[0]:i[15]) and four colors (c[0]:c[3]) */ -#define R500_RS_IP_0 0x4074 +#define R500_RS_IP_0 0x4074 #define R500_RS_IP_1 0x4078 #define R500_RS_IP_2 0x407C #define R500_RS_IP_3 0x4080 @@ -1151,7 +1151,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* */ -#define R500_RS_INST_0 0x4320 +#define R500_RS_INST_0 0x4320 #define R500_RS_INST_1 0x4324 #define R500_RS_INST_2 0x4328 #define R500_RS_INST_3 0x432c @@ -2544,6 +2544,479 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_PRIM_NUM_VERTICES_SHIFT 16 #define R300_PRIM_NUM_VERTICES_MASK 0xffff + + +/* + * The R500 unified shader (US) registers come in banks of 512 each, one + * for each instruction slot in the shader. You can't touch them directly. + * R500_US_VECTOR_INDEX() sets the base instruction to modify; successive + * writes to R500_GA_US_VECTOR_DATA autoincrement the index after the + * instruction is fully specified. + */ +#define R500_US_ALU_ALPHA_INST_0 0xa800 +# define R500_ALPHA_OP_MAD 0 +# define R500_ALPHA_OP_DP 1 +# define R500_ALPHA_OP_MIN 2 +# define R500_ALPHA_OP_MAX 3 +/* #define R500_ALPHA_OP_RESERVED 4 */ +# define R500_ALPHA_OP_CND 5 +# define R500_ALPHA_OP_CMP 6 +# define R500_ALPHA_OP_FRC 7 +# define R500_ALPHA_OP_EX2 8 +# define R500_ALPHA_OP_LN2 9 +# define R500_ALPHA_OP_RCP 10 +# define R500_ALPHA_OP_RSQ 11 +# define R500_ALPHA_OP_SIN 12 +# define R500_ALPHA_OP_COS 13 +# define R500_ALPHA_OP_MDH 14 +# define R500_ALPHA_OP_MDV 15 +# define R500_ALPHA_ADDRD(x) (x << 4) +# define R500_ALPHA_ADDRD_REL (1 << 11) +# define R500_ALPHA_SEL_A_SRC0 (0 << 12) +# define R500_ALPHA_SEL_A_SRC1 (1 << 12) +# define R500_ALPHA_SEL_A_SRC2 (2 << 12) +# define R500_ALPHA_SEL_A_SRCP (3 << 12) +# define R500_ALPHA_SWIZ_A_R (0 << 14) +# define R500_ALPHA_SWIZ_A_G (1 << 14) +# define R500_ALPHA_SWIZ_A_B (2 << 14) +# define R500_ALPHA_SWIZ_A_A (3 << 14) +# define R500_ALPHA_SWIZ_A_0 (4 << 14) +# define R500_ALPHA_SWIZ_A_HALF (5 << 14) +# define R500_ALPHA_SWIZ_A_1 (6 << 14) +/* #define R500_ALPHA_SWIZ_A_UNUSED (7 << 14) */ +# define R500_ALPHA_MOD_A_NOP (0 << 17) +# define R500_ALPHA_MOD_A_NEG (1 << 17) +# define R500_ALPHA_MOD_A_ABS (2 << 17) +# define R500_ALPHA_MOD_A_NAB (3 << 17) +# define R500_ALPHA_SEL_B_SRC0 (0 << 19) +# define R500_ALPHA_SEL_B_SRC1 (1 << 19) +# define R500_ALPHA_SEL_B_SRC2 (2 << 19) +# define R500_ALPHA_SEL_B_SRCP (3 << 19) +# define R500_ALPHA_SWIZ_B_R (0 << 21) +# define R500_ALPHA_SWIZ_B_G (1 << 21) +# define R500_ALPHA_SWIZ_B_B (2 << 21) +# define R500_ALPHA_SWIZ_B_A (3 << 21) +# define R500_ALPHA_SWIZ_B_0 (4 << 21) +# define R500_ALPHA_SWIZ_B_HALF (5 << 21) +# define R500_ALPHA_SWIZ_B_1 (6 << 21) +/* #define R500_ALPHA_SWIZ_B_UNUSED (7 << 21) */ +# define R500_ALPHA_MOD_B_NOP (0 << 24) +# define R500_ALPHA_MOD_B_NEG (1 << 24) +# define R500_ALPHA_MOD_B_ABS (2 << 24) +# define R500_ALPHA_MOD_B_NAB (3 << 24) +# define R500_ALPHA_OMOD_IDENTITY (0 << 26) +# define R500_ALPHA_OMOD_MUL_2 (1 << 26) +# define R500_ALPHA_OMOD_MUL_4 (2 << 26) +# define R500_ALPHA_OMOD_MUL_8 (3 << 26) +# define R500_ALPHA_OMOD_DIV_2 (4 << 26) +# define R500_ALPHA_OMOD_DIV_4 (5 << 26) +# define R500_ALPHA_OMOD_DIV_8 (6 << 26) +# define R500_ALPHA_OMOD_DISABLE (7 << 26) +# define R500_ALPHA_TARGET(x) (x << 29) +# define R500_ALPHA_W_OMASK (1 << 31) +#define R500_US_ALU_ALPHA_ADDR_0 0x9800 +# define R500_ALPHA_ADDR0(x) (x << 0) +# define R500_ALPHA_ADDR0_CONST (1 << 8) +# define R500_ALPHA_ADDR0_REL (1 << 9) +# define R500_ALPHA_ADDR1(x) (x << 10) +# define R500_ALPHA_ADDR1_CONST (1 << 18) +# define R500_ALPHA_ADDR1_REL (1 << 19) +# define R500_ALPHA_ADDR2(x) (x << 20) +# define R500_ALPHA_ADDR2_CONST (1 << 28) +# define R500_ALPHA_ADDR2_REL (1 << 29) +# define R500_ALPHA_SRCP_OP_1_MINUS_2A0 (0 << 30) +# define R500_ALPHA_SRCP_OP_A1_MINUS_A0 (1 << 30) +# define R500_ALPHA_SRCP_OP_A1_PLUS_A0 (2 << 30) +# define R500_ALPHA_SRCP_OP_1_MINUS_A0 (3 << 30) +#define R500_US_ALU_RGBA_INST_0 0xb000 +# define R500_ALU_RGBA_OP_MAD (0 << 0) +# define R500_ALU_RGBA_OP_DP3 (1 << 0) +# define R500_ALU_RGBA_OP_DP4 (2 << 0) +# define R500_ALU_RGBA_OP_D2A (3 << 0) +# define R500_ALU_RGBA_OP_MIN (4 << 0) +# define R500_ALU_RGBA_OP_MAX (5 << 0) +/* #define R500_ALU_RGBA_OP_RESERVED (6 << 0) */ +# define R500_ALU_RGBA_OP_CND (7 << 0) +# define R500_ALU_RGBA_OP_CMP (8 << 0) +# define R500_ALU_RGBA_OP_FRC (9 << 0) +# define R500_ALU_RGBA_OP_SOP (10 << 0) +# define R500_ALU_RGBA_OP_MDH (11 << 0) +# define R500_ALU_RGBA_OP_MDV (12 << 0) +# define R500_ALU_RGBA_ADDRD(x) (x << 4) +# define R500_ALU_RGBA_ADDRD_REL (1 << 11) +# define R500_ALU_RGBA_SEL_C_SRC0 (0 << 12) +# define R500_ALU_RGBA_SEL_C_SRC1 (1 << 12) +# define R500_ALU_RGBA_SEL_C_SRC2 (2 << 12) +# define R500_ALU_RGBA_SEL_C_SRCP (3 << 12) +# define R500_ALU_RGBA_R_SWIZ_R (0 << 14) +# define R500_ALU_RGBA_R_SWIZ_G (1 << 14) +# define R500_ALU_RGBA_R_SWIZ_B (2 << 14) +# define R500_ALU_RGBA_R_SWIZ_A (3 << 14) +# define R500_ALU_RGBA_R_SWIZ_0 (4 << 14) +# define R500_ALU_RGBA_R_SWIZ_HALF (5 << 14) +# define R500_ALU_RGBA_R_SWIZ_1 (6 << 14) +/* #define R500_ALU_RGBA_R_SWIZ_UNUSED (7 << 14) */ +# define R500_ALU_RGBA_G_SWIZ_R (0 << 17) +# define R500_ALU_RGBA_G_SWIZ_G (1 << 17) +# define R500_ALU_RGBA_G_SWIZ_B (2 << 17) +# define R500_ALU_RGBA_G_SWIZ_A (3 << 17) +# define R500_ALU_RGBA_G_SWIZ_0 (4 << 17) +# define R500_ALU_RGBA_G_SWIZ_HALF (5 << 17) +# define R500_ALU_RGBA_G_SWIZ_1 (6 << 17) +/* #define R500_ALU_RGBA_G_SWIZ_UNUSED (7 << 17) */ +# define R500_ALU_RGBA_B_SWIZ_R (0 << 20) +# define R500_ALU_RGBA_B_SWIZ_G (1 << 20) +# define R500_ALU_RGBA_B_SWIZ_B (2 << 20) +# define R500_ALU_RGBA_B_SWIZ_A (3 << 20) +# define R500_ALU_RGBA_B_SWIZ_0 (4 << 20) +# define R500_ALU_RGBA_B_SWIZ_HALF (5 << 20) +# define R500_ALU_RGBA_B_SWIZ_1 (6 << 20) +/* #define R500_ALU_RGBA_B_SWIZ_UNUSED (7 << 20) */ +# define R500_ALU_RGBA_MOD_C_NOP (0 << 23) +# define R500_ALU_RGBA_MOD_C_NEG (1 << 23) +# define R500_ALU_RGBA_MOD_C_ABS (2 << 23) +# define R500_ALU_RGBA_MOD_C_NAB (3 << 23) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRC0 (0 << 25) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRC1 (1 << 25) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRC2 (2 << 25) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRCP (3 << 25) +# define R500_ALU_RGBA_A_SWIZ_R (0 << 27) +# define R500_ALU_RGBA_A_SWIZ_G (1 << 27) +# define R500_ALU_RGBA_A_SWIZ_B (2 << 27) +# define R500_ALU_RGBA_A_SWIZ_A (3 << 27) +# define R500_ALU_RGBA_A_SWIZ_0 (4 << 27) +# define R500_ALU_RGBA_A_SWIZ_HALF (5 << 27) +# define R500_ALU_RGBA_A_SWIZ_1 (6 << 27) +/* #define R500_ALU_RGBA_A_SWIZ_UNUSED (7 << 27) */ +# define R500_ALU_RGBA_ALPHA_MOD_C_NOP (0 << 30) +# define R500_ALU_RGBA_ALPHA_MOD_C_NEG (1 << 30) +# define R500_ALU_RGBA_ALPHA_MOD_C_ABS (2 << 30) +# define R500_ALU_RGBA_ALPHA_MOD_C_NAB (3 << 30) +#define R500_US_ALU_RGB_INST_0 0xa000 +# define R500_ALU_RGB_SEL_A_SRC0 (0 << 0) +# define R500_ALU_RGB_SEL_A_SRC1 (1 << 0) +# define R500_ALU_RGB_SEL_A_SRC2 (2 << 0) +# define R500_ALU_RGB_SEL_A_SRCP (3 << 0) +# define R500_ALU_RGB_R_SWIZ_A_R (0 << 2) +# define R500_ALU_RGB_R_SWIZ_A_G (1 << 2) +# define R500_ALU_RGB_R_SWIZ_A_B (2 << 2) +# define R500_ALU_RGB_R_SWIZ_A_A (3 << 2) +# define R500_ALU_RGB_R_SWIZ_A_0 (4 << 2) +# define R500_ALU_RGB_R_SWIZ_A_HALF (5 << 2) +# define R500_ALU_RGB_R_SWIZ_A_1 (6 << 2) +/* #define R500_ALU_RGB_R_SWIZ_A_UNUSED (7 << 2) */ +# define R500_ALU_RGB_G_SWIZ_A_R (0 << 5) +# define R500_ALU_RGB_G_SWIZ_A_G (1 << 5) +# define R500_ALU_RGB_G_SWIZ_A_B (2 << 5) +# define R500_ALU_RGB_G_SWIZ_A_A (3 << 5) +# define R500_ALU_RGB_G_SWIZ_A_0 (4 << 5) +# define R500_ALU_RGB_G_SWIZ_A_HALF (5 << 5) +# define R500_ALU_RGB_G_SWIZ_A_1 (6 << 5) +/* #define R500_ALU_RGB_G_SWIZ_A_UNUSED (7 << 5) */ +# define R500_ALU_RGB_B_SWIZ_A_R (0 << 8) +# define R500_ALU_RGB_B_SWIZ_A_G (1 << 8) +# define R500_ALU_RGB_B_SWIZ_A_B (2 << 8) +# define R500_ALU_RGB_B_SWIZ_A_A (3 << 8) +# define R500_ALU_RGB_B_SWIZ_A_0 (4 << 8) +# define R500_ALU_RGB_B_SWIZ_A_HALF (5 << 8) +# define R500_ALU_RGB_B_SWIZ_A_1 (6 << 8) +/* #define R500_ALU_RGB_B_SWIZ_A_UNUSED (7 << 8) */ +# define R500_ALU_RGB_MOD_A_NOP (0 << 11) +# define R500_ALU_RGB_MOD_A_NEG (1 << 11) +# define R500_ALU_RGB_MOD_A_ABS (2 << 11) +# define R500_ALU_RGB_MOD_A_NAB (3 << 11) +# define R500_ALU_RGB_SEL_B_SRC0 (0 << 13) +# define R500_ALU_RGB_SEL_B_SRC1 (1 << 13) +# define R500_ALU_RGB_SEL_B_SRC2 (2 << 13) +# define R500_ALU_RGB_SEL_B_SRCP (3 << 13) +# define R500_ALU_RGB_R_SWIZ_B_R (0 << 15) +# define R500_ALU_RGB_R_SWIZ_B_G (1 << 15) +# define R500_ALU_RGB_R_SWIZ_B_B (2 << 15) +# define R500_ALU_RGB_R_SWIZ_B_A (3 << 15) +# define R500_ALU_RGB_R_SWIZ_B_0 (4 << 15) +# define R500_ALU_RGB_R_SWIZ_B_HALF (5 << 15) +# define R500_ALU_RGB_R_SWIZ_B_1 (6 << 15) +/* #define R500_ALU_RGB_R_SWIZ_B_UNUSED (7 << 15) */ +# define R500_ALU_RGB_G_SWIZ_B_R (0 << 18) +# define R500_ALU_RGB_G_SWIZ_B_G (1 << 18) +# define R500_ALU_RGB_G_SWIZ_B_B (2 << 18) +# define R500_ALU_RGB_G_SWIZ_B_A (3 << 18) +# define R500_ALU_RGB_G_SWIZ_B_0 (4 << 18) +# define R500_ALU_RGB_G_SWIZ_B_HALF (5 << 18) +# define R500_ALU_RGB_G_SWIZ_B_1 (6 << 18) +/* #define R500_ALU_RGB_G_SWIZ_B_UNUSED (7 << 18) */ +# define R500_ALU_RGB_B_SWIZ_B_R (0 << 21) +# define R500_ALU_RGB_B_SWIZ_B_G (1 << 21) +# define R500_ALU_RGB_B_SWIZ_B_B (2 << 21) +# define R500_ALU_RGB_B_SWIZ_B_A (3 << 21) +# define R500_ALU_RGB_B_SWIZ_B_0 (4 << 21) +# define R500_ALU_RGB_B_SWIZ_B_HALF (5 << 21) +# define R500_ALU_RGB_B_SWIZ_B_1 (6 << 21) +/* #define R500_ALU_RGB_B_SWIZ_B_UNUSED (7 << 21) */ +# define R500_ALU_RGB_MOD_B_NOP (0 << 24) +# define R500_ALU_RGB_MOD_B_NEG (1 << 24) +# define R500_ALU_RGB_MOD_B_ABS (2 << 24) +# define R500_ALU_RGB_MOD_B_NAB (3 << 24) +# define R500_ALU_RGB_OMOD_IDENTITY (0 << 26) +# define R500_ALU_RGB_OMOD_MUL_2 (1 << 26) +# define R500_ALU_RGB_OMOD_MUL_4 (2 << 26) +# define R500_ALU_RGB_OMOD_MUL_8 (3 << 26) +# define R500_ALU_RGB_OMOD_DIV_2 (4 << 26) +# define R500_ALU_RGB_OMOD_DIV_4 (5 << 26) +# define R500_ALU_RGB_OMOD_DIV_8 (6 << 26) +# define R500_ALU_RGB_OMOD_DISABLE (7 << 26) +# define R500_ALU_RGB_TARGET(x) (x << 29) +# define R500_ALU_RGB_WMASK (1 << 31) +#define R500_US_ALU_RGB_ADDR_0 0x9000 +# define R500_RGB_ADDR0(x) (x << 0) +# define R500_RGB_ADDR0_CONST (1 << 8) +# define R500_RGB_ADDR0_REL (1 << 9) +# define R500_RGB_ADDR1(x) (x << 10) +# define R500_RGB_ADDR1_CONST (1 << 18) +# define R500_RGB_ADDR1_REL (1 << 19) +# define R500_RGB_ADDR2(x) (x << 20) +# define R500_RGB_ADDR2_CONST (1 << 28) +# define R500_RGB_ADDR2_REL (1 << 29) +# define R500_RGB_SRCP_OP_1_MINUS_2RGB0 (0 << 30) +# define R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 (1 << 30) +# define R500_RGB_SRCP_OP_RGB1_PLUS_RGB0 (2 << 30) +# define R500_RGB_SRCP_OP_1_MINUS_RGB0 (3 << 30) +#define R500_US_CMN_INST_0 0xb800 +# define R500_INST_TYPE_ALU (0 << 0) +# define R500_INST_TYPE_OUT (1 << 0) +# define R500_INST_TYPE_FC (2 << 0) +# define R500_INST_TYPE_TEX (3 << 0) +# define R500_INST_TEX_SEM_WAIT (1 << 2) +# define R500_INST_RGB_PRED_SEL_NONE (0 << 3) +# define R500_INST_RGB_PRED_SEL_RGBA (1 << 3) +# define R500_INST_RGB_PRED_SEL_RRRR (2 << 3) +# define R500_INST_RGB_PRED_SEL_GGGG (3 << 3) +# define R500_INST_RGB_PRED_SEL_BBBB (4 << 3) +# define R500_INST_RGB_PRED_SEL_AAAA (5 << 3) +# define R500_INST_RGB_PRED_INV (1 << 6) +# define R500_INST_WRITE_INACTIVE (1 << 7) +# define R500_INST_LAST (1 << 8) +# define R500_INST_NOP (1 << 9) +# define R500_INST_ALU_WAIT (1 << 10) +# define R500_INST_RGB_WMASK_R (1 << 11) +# define R500_INST_RGB_WMASK_G (1 << 12) +# define R500_INST_RGB_WMASK_B (1 << 13) +# define R500_INST_ALPHA_WMASK (1 << 14) +# define R500_INST_RGB_OMASK_R (1 << 15) +# define R500_INST_RGB_OMASK_G (1 << 16) +# define R500_INST_RGB_OMASK_B (1 << 17) +# define R500_INST_ALPHA_OMASK (1 << 18) +# define R500_INST_RGB_CLAMP (1 << 19) +# define R500_INST_ALPHA_CLAMP (1 << 20) +# define R500_INST_ALU_RESULT_SEL (1 << 21) +# define R500_INST_ALPHA_PRED_INV (1 << 22) +# define R500_INST_ALU_RESULT_OP_EQ (0 << 23) +# define R500_INST_ALU_RESULT_OP_LT (1 << 23) +# define R500_INST_ALU_RESULT_OP_GE (2 << 23) +# define R500_INST_ALU_RESULT_OP_NE (3 << 23) +# define R500_INST_ALPHA_PRED_SEL_NONE (0 << 25) +# define R500_INST_ALPHA_PRED_SEL_RGBA (1 << 25) +# define R500_INST_ALPHA_PRED_SEL_RRRR (2 << 25) +# define R500_INST_ALPHA_PRED_SEL_GGGG (3 << 25) +# define R500_INST_ALPHA_PRED_SEL_BBBB (4 << 25) +# define R500_INST_ALPHA_PRED_SEL_AAAA (5 << 25) +/* XXX next four are kind of guessed */ +# define R500_INST_STAT_WE_R (1 << 28) +# define R500_INST_STAT_WE_G (1 << 29) +# define R500_INST_STAT_WE_B (1 << 30) +# define R500_INST_STAT_WE_A (1 << 31) + +/* note that these are 8 bit lengths, despite the offsets, at least for R500 */ +#define R500_US_CODE_ADDR 0x4630 +# define R500_US_CODE_START_ADDR(x) (x << 0) +# define R500_US_CODE_END_ADDR(x) (x << 16) +#define R500_US_CODE_OFFSET 0x4638 +# define R500_US_CODE_OFFSET_ADDR(x) (x << 0) +#define R500_US_CODE_RANGE 0x4634 +# define R500_US_CODE_RANGE_ADDR(x) (x << 0) +# define R500_US_CODE_RANGE_SIZE(x) (x << 16) +#define R500_US_CONFIG 0x4600 +# define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO (1 << 1) +#define R500_US_FC_ADDR_0 0xa000 +# define R500_FC_BOOL_ADDR(x) (x << 0) +# define R500_FC_INT_ADDR(x) (x << 8) +# define R500_FC_JUMP_ADDR(x) (x << 16) +# define R500_FC_JUMP_GLOBAL (1 << 31) +#define R500_US_FC_BOOL_CONST 0x4620 +# define R500_FC_KBOOL(x) (x) +#define R500_US_FC_CTRL 0x4624 +# define R500_FC_TEST_EN (1 << 30) +# define R500_FC_FULL_FC_EN (1 << 31) +#define R500_US_FC_INST_0 0x9800 +# define R500_FC_OP_JUMP (0 << 0) +# define R500_FC_OP_LOOP (1 << 0) +# define R500_FC_OP_ENDLOOP (2 << 0) +# define R500_FC_OP_REP (3 << 0) +# define R500_FC_OP_ENDREP (4 << 0) +# define R500_FC_OP_BREAKLOOP (5 << 0) +# define R500_FC_OP_BREAKREP (6 << 0) +# define R500_FC_OP_CONTINUE (7 << 0) +# define R500_FC_B_ELSE (1 << 4) +# define R500_FC_JUMP_ANY (1 << 5) +# define R500_FC_A_OP_NONE (0 << 6) +# define R500_FC_A_OP_POP (1 << 6) +# define R500_FC_A_OP_PUSH (2 << 6) +# define R500_FC_JUMP_FUNC(x) (x << 8) +# define R500_FC_B_POP_CNT(x) (x << 16) +# define R500_FC_B_OP0_NONE (0 << 24) +# define R500_FC_B_OP0_DECR (1 << 24) +# define R500_FC_B_OP0_INCR (2 << 24) +# define R500_FC_B_OP1_DECR (0 << 26) +# define R500_FC_B_OP1_NONE (1 << 26) +# define R500_FC_B_OP1_INCR (2 << 26) +# define R500_FC_IGNORE_UNCOVERED (1 << 28) +#define R500_US_FC_INT_CONST_0 0x4c00 +# define R500_FC_INT_CONST_KR(x) (x << 0) +# define R500_FC_INT_CONST_KG(x) (x << 8) +# define R500_FC_INT_CONST_KB(x) (x << 16) +/* _0 through _15 */ +#define R500_US_FORMAT0_0 0x4640 +# define R500_FORMAT_TXWIDTH(x) (x << 0) +# define R500_FORMAT_TXHEIGHT(x) (x << 11) +# define R500_FORMAT_TXDEPTH(x) (x << 22) +/* _0 through _3 */ +#define R500_US_OUT_FMT_0 0x46a4 +# define R500_OUT_FMT_C4_8 (0 << 0) +# define R500_OUT_FMT_C4_10 (1 << 0) +# define R500_OUT_FMT_C4_10_GAMMA (2 << 0) +# define R500_OUT_FMT_C_16 (3 << 0) +# define R500_OUT_FMT_C2_16 (4 << 0) +# define R500_OUT_FMT_C4_16 (5 << 0) +# define R500_OUT_FMT_C_16_MPEG (6 << 0) +# define R500_OUT_FMT_C2_16_MPEG (7 << 0) +# define R500_OUT_FMT_C2_4 (8 << 0) +# define R500_OUT_FMT_C_3_3_2 (9 << 0) +# define R500_OUT_FMT_C_6_5_6 (10 << 0) +# define R500_OUT_FMT_C_11_11_10 (11 << 0) +# define R500_OUT_FMT_C_10_11_11 (12 << 0) +# define R500_OUT_FMT_C_2_10_10_10 (13 << 0) +/* #define R500_OUT_FMT_RESERVED (14 << 0) */ +# define R500_OUT_FMT_UNUSED (15 << 0) +# define R500_OUT_FMT_C_16_FP (16 << 0) +# define R500_OUT_FMT_C2_16_FP (17 << 0) +# define R500_OUT_FMT_C4_16_FP (18 << 0) +# define R500_OUT_FMT_C_32_FP (19 << 0) +# define R500_OUT_FMT_C2_32_FP (20 << 0) +# define R500_OUT_FMT_C4_32_FP (21 << 0) +# define R500_C0_SEL_A (0 << 8) +# define R500_C0_SEL_R (1 << 8) +# define R500_C0_SEL_G (2 << 8) +# define R500_C0_SEL_B (3 << 8) +# define R500_C1_SEL_A (0 << 10) +# define R500_C1_SEL_R (1 << 10) +# define R500_C1_SEL_G (2 << 10) +# define R500_C1_SEL_B (3 << 10) +# define R500_C2_SEL_A (0 << 12) +# define R500_C2_SEL_R (1 << 12) +# define R500_C2_SEL_G (2 << 12) +# define R500_C2_SEL_B (3 << 12) +# define R500_C3_SEL_A (0 << 14) +# define R500_C3_SEL_R (1 << 14) +# define R500_C3_SEL_G (2 << 14) +# define R500_C3_SEL_B (3 << 14) +# define R500_OUT_SIGN(x) (x << 16) +# define R500_ROUND_ADJ (1 << 20) +#define R500_US_PIXSIZE 0x4604 +# define R500_PIX_SIZE(x) (x) +#define R500_US_TEX_ADDR_0 0x9800 +# define R500_TEX_SRC_ADDR(x) (x << 0) +# define R500_TEX_SRC_ADDR_REL (1 << 7) +# define R500_TEX_SRC_S_SWIZ_R (0 << 8) +# define R500_TEX_SRC_S_SWIZ_G (1 << 8) +# define R500_TEX_SRC_S_SWIZ_B (2 << 8) +# define R500_TEX_SRC_S_SWIZ_A (3 << 8) +# define R500_TEX_SRC_T_SWIZ_R (0 << 10) +# define R500_TEX_SRC_T_SWIZ_G (1 << 10) +# define R500_TEX_SRC_T_SWIZ_B (2 << 10) +# define R500_TEX_SRC_T_SWIZ_A (3 << 10) +# define R500_TEX_SRC_R_SWIZ_R (0 << 12) +# define R500_TEX_SRC_R_SWIZ_G (1 << 12) +# define R500_TEX_SRC_R_SWIZ_B (2 << 12) +# define R500_TEX_SRC_R_SWIZ_A (3 << 12) +# define R500_TEX_SRC_Q_SWIZ_R (0 << 14) +# define R500_TEX_SRC_Q_SWIZ_G (1 << 14) +# define R500_TEX_SRC_Q_SWIZ_B (2 << 14) +# define R500_TEX_SRC_Q_SWIZ_A (3 << 14) +# define R500_TEX_DST_ADDR(x) (x << 16) +# define R500_TEX_DST_ADDR_REL (1 << 23) +# define R500_TEX_DST_R_SWIZ_R (0 << 24) +# define R500_TEX_DST_R_SWIZ_G (1 << 24) +# define R500_TEX_DST_R_SWIZ_B (2 << 24) +# define R500_TEX_DST_R_SWIZ_A (3 << 24) +# define R500_TEX_DST_G_SWIZ_R (0 << 26) +# define R500_TEX_DST_G_SWIZ_G (1 << 26) +# define R500_TEX_DST_G_SWIZ_B (2 << 26) +# define R500_TEX_DST_G_SWIZ_A (3 << 26) +# define R500_TEX_DST_B_SWIZ_R (0 << 28) +# define R500_TEX_DST_B_SWIZ_G (1 << 28) +# define R500_TEX_DST_B_SWIZ_B (2 << 28) +# define R500_TEX_DST_B_SWIZ_A (3 << 28) +# define R500_TEX_DST_A_SWIZ_R (0 << 30) +# define R500_TEX_DST_A_SWIZ_G (1 << 30) +# define R500_TEX_DST_A_SWIZ_B (2 << 30) +# define R500_TEX_DST_A_SWIZ_A (3 << 30) +#define R500_US_TEX_ADDR_DXDY_0 0xa000 +# define R500_DX_ADDR(x) (x << 0) +# define R500_DX_ADDR_REL (1 << 7) +# define R500_DX_S_SWIZ_R (0 << 8) +# define R500_DX_S_SWIZ_G (1 << 8) +# define R500_DX_S_SWIZ_B (2 << 8) +# define R500_DX_S_SWIZ_A (3 << 8) +# define R500_DX_T_SWIZ_R (0 << 10) +# define R500_DX_T_SWIZ_G (1 << 10) +# define R500_DX_T_SWIZ_B (2 << 10) +# define R500_DX_T_SWIZ_A (3 << 10) +# define R500_DX_R_SWIZ_R (0 << 12) +# define R500_DX_R_SWIZ_G (1 << 12) +# define R500_DX_R_SWIZ_B (2 << 12) +# define R500_DX_R_SWIZ_A (3 << 12) +# define R500_DX_Q_SWIZ_R (0 << 14) +# define R500_DX_Q_SWIZ_G (1 << 14) +# define R500_DX_Q_SWIZ_B (2 << 14) +# define R500_DX_Q_SWIZ_A (3 << 14) +# define R500_DY_ADDR(x) (x << 16) +# define R500_DY_ADDR_REL (1 << 17) +# define R500_DY_S_SWIZ_R (0 << 24) +# define R500_DY_S_SWIZ_G (1 << 24) +# define R500_DY_S_SWIZ_B (2 << 24) +# define R500_DY_S_SWIZ_A (3 << 24) +# define R500_DY_T_SWIZ_R (0 << 26) +# define R500_DY_T_SWIZ_G (1 << 26) +# define R500_DY_T_SWIZ_B (2 << 26) +# define R500_DY_T_SWIZ_A (3 << 26) +# define R500_DY_R_SWIZ_R (0 << 28) +# define R500_DY_R_SWIZ_G (1 << 28) +# define R500_DY_R_SWIZ_B (2 << 28) +# define R500_DY_R_SWIZ_A (3 << 28) +# define R500_DY_Q_SWIZ_R (0 << 30) +# define R500_DY_Q_SWIZ_G (1 << 30) +# define R500_DY_Q_SWIZ_B (2 << 30) +# define R500_DY_Q_SWIZ_A (3 << 30) +#define R500_US_TEX_INST_0 0x9000 +# define R500_TEX_ID(x) (x << 16) +# define R500_TEX_INST_NOP (0 << 22) +# define R500_TEX_INST_LD (1 << 22) +# define R500_TEX_INST_TEXKILL (2 << 22) +# define R500_TEX_INST_PROJ (3 << 22) +# define R500_TEX_INST_LODBIAS (4 << 22) +# define R500_TEX_INST_LOD (5 << 22) +# define R500_TEX_INST_DXDY (6 << 22) +# define R500_TEX_SEM_ACQUIRE (1 << 25) +# define R500_TEX_IGNORE_UNCOVERED (1 << 26) +# define R500_TEX_UNSCALED (1 << 27) +#define R500_US_W_FMT 0x46b4 +# define R500_W_FMT_W0 (0 << 0) +# define R500_W_FMT_W24 (1 << 0) +# define R500_W_FMT_W24FP (2 << 0) +# define R500_W_SRC_US (0 << 2) +# define R500_W_SRC_RAS (1 << 2) + + /* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR. * Two parameter dwords: * 0. VAP_VTX_FMT: The first parameter is not written to hardware diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index ec9bf25bab..1f3779cade 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1603,6 +1603,128 @@ static void r300SetupRSUnit(GLcontext * ctx) WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); } +static void r500SetupRSUnit(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + /* I'm still unsure if these are needed */ + GLuint interp_magic[8] = { + 0x00, + R300_RS_COL_PTR(1), + R300_RS_COL_PTR(2), + R300_RS_COL_PTR(3), + 0x00, + 0x00, + 0x00, + 0x00 + }; + union r300_outputs_written OutputsWritten; + GLuint InputsRead; + int fp_reg, high_rr; + int in_texcoords, col_interp_nr; + int i; + + if (hw_tcl_on) + OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; + else + RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->state.render_inputs_bitset); + + if (ctx->FragmentProgram._Current) + InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; + else { + fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); + return; /* This should only ever happen once.. */ + } + + R300_STATECHANGE(r300, ri); + R300_STATECHANGE(r300, rc); + R300_STATECHANGE(r300, rr); + + fp_reg = in_texcoords = col_interp_nr = high_rr = 0; + + r300->hw.rr.cmd[R300_RR_ROUTE_1] = 0; + + if (InputsRead & FRAG_BIT_WPOS) { + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) + if (!(InputsRead & (FRAG_BIT_TEX0 << i))) + break; + + if (i == ctx->Const.MaxTextureUnits) { + fprintf(stderr, "\tno free texcoord found...\n"); + _mesa_exit(-1); + } + + InputsRead |= (FRAG_BIT_TEX0 << i); + InputsRead &= ~FRAG_BIT_WPOS; + } + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0 | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | (in_texcoords << R300_RS_INTERP_SRC_SHIFT) + | interp_magic[i]; + + r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] = 0; + if (InputsRead & (FRAG_BIT_TEX0 << i)) { + //assert(r300->state.texture.tc_count != 0); + r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] |= R300_RS_ROUTE_ENABLE | i /* source INTERP */ + | (fp_reg << R300_RS_ROUTE_DEST_SHIFT); + high_rr = fp_reg; + + /* Passing invalid data here can lock the GPU. */ + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { + InputsRead &= ~(FRAG_BIT_TEX0 << i); + fp_reg++; + } else { + WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i); + } + } + /* Need to count all coords enabled at vof */ + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { + in_texcoords++; + } + } + + if (InputsRead & FRAG_BIT_COL0) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { + r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R300_RS_ROUTE_0_COLOR | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); + InputsRead &= ~FRAG_BIT_COL0; + col_interp_nr++; + } else { + WARN_ONCE("fragprog wants col0, vp doesn't provide it\n"); + } + } + + if (InputsRead & FRAG_BIT_COL1) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { + r300->hw.rr.cmd[R300_RR_ROUTE_1] |= R300_RS_ROUTE_1_UNKNOWN11 | R300_RS_ROUTE_1_COLOR1 | (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT); + InputsRead &= ~FRAG_BIT_COL1; + if (high_rr < 1) + high_rr = 1; + col_interp_nr++; + } else { + WARN_ONCE("fragprog wants col1, vp doesn't provide it\n"); + } + } + + /* Need at least one. This might still lock as the values are undefined... */ + if (in_texcoords == 0 && col_interp_nr == 0) { + r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R300_RS_ROUTE_0_COLOR | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); + col_interp_nr++; + } + + r300->hw.rc.cmd[1] = 0 | ((in_texcoords << 2) << R300_IT_COUNT_SHIFT) + | (col_interp_nr << R300_IC_COUNT_SHIFT) + | R300_HIRES_EN; + + assert(high_rr >= 0); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_ROUTE_0, high_rr + 1); + r300->hw.rc.cmd[2] = 0xC0 | high_rr; + + if (InputsRead) + WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); +} + + + + #define bump_vpu_count(ptr, new_count) do{\ drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\ int _nc=(new_count)/4; \ @@ -2163,6 +2285,81 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) } } +static void r500SetupPixelShader(r300ContextPtr rmesa) +{ + GLcontext *ctx = rmesa->radeon.glCtx; + struct r300_fragment_program *fp = (struct r300_fragment_program *) + (char *)ctx->FragmentProgram._Current; + int i, k; + + if (!fp) /* should only happenen once, just after context is created */ + return; + + /* emit the standard zero shader */ + R300_STATECHANGE(rmesa, r500fp); + i = 1; + rmesa->hw.r500fp.cmd[i++] = 0x7807; + rmesa->hw.r500fp.cmd[i++] = R500_TEX_ID(0) | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; + rmesa->hw.r500fp.cmd[i++] = R500_TEX_SRC_ADDR(0) | R500_TEX_SRC_S_SWIZ_R | + R500_TEX_SRC_T_SWIZ_G | + R500_TEX_DST_ADDR(0) | + R500_TEX_DST_R_SWIZ_R | + R500_TEX_DST_G_SWIZ_G | + R500_TEX_DST_B_SWIZ_B | + R500_TEX_DST_A_SWIZ_A; + rmesa->hw.r500fp.cmd[i++] = R500_DX_ADDR(0) | + R500_DX_S_SWIZ_R | + R500_DX_T_SWIZ_R | + R500_DX_R_SWIZ_R | + R500_DX_Q_SWIZ_R | + R500_DY_ADDR(0) | + R500_DY_S_SWIZ_R | + R500_DY_T_SWIZ_R | + R500_DY_R_SWIZ_R | + R500_DY_Q_SWIZ_R; + rmesa->hw.r500fp.cmd[i++] = 0x0; + rmesa->hw.r500fp.cmd[i++] = 0x0; + + rmesa->hw.r500fp.cmd[i++] = R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | + R500_INST_LAST | + R500_INST_RGB_OMASK_R | + R500_INST_RGB_OMASK_G | + R500_INST_RGB_OMASK_B | + R500_INST_ALPHA_OMASK; + + rmesa->hw.r500fp.cmd[i++] = R500_RGB_ADDR0(0) | + R500_RGB_ADDR1(0) | + R500_RGB_ADDR1_CONST | + R500_RGB_ADDR2(0) | + R500_RGB_ADDR2_CONST | + R500_RGB_SRCP_OP_1_MINUS_2RGB0; + rmesa->hw.r500fp.cmd[i++] = R500_ALPHA_ADDR0(0) | + R500_ALPHA_ADDR1(0) | + R500_ALPHA_ADDR1_CONST | + R500_ALPHA_ADDR2(0) | + R500_ALPHA_ADDR2_CONST | + R500_ALPHA_SRCP_OP_1_MINUS_2A0; + rmesa->hw.r500fp.cmd[i++] = R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | + R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC0 | + R500_ALU_RGB_R_SWIZ_B_1 | + R500_ALU_RGB_B_SWIZ_B_1 | + R500_ALU_RGB_G_SWIZ_B_1; + rmesa->hw.r500fp.cmd[i++] = R500_ALPHA_OP_MAD | + R500_ALPHA_SWIZ_A_A | + R500_ALPHA_SWIZ_B_1; + rmesa->hw.r500fp.cmd[i++] = R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_R_SWIZ_0 | + R500_ALU_RGBA_G_SWIZ_0 | + R500_ALU_RGBA_B_SWIZ_0 | + R500_ALU_RGBA_A_SWIZ_0; + + +} + void r300UpdateShaderStates(r300ContextPtr rmesa) { GLcontext *ctx; @@ -2170,12 +2367,19 @@ void r300UpdateShaderStates(r300ContextPtr rmesa) r300UpdateTextureState(ctx); - r300SetupPixelShader(rmesa); + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + r500SetupPixelShader(rmesa); + else + r300SetupPixelShader(rmesa); r300SetupTextures(ctx); if ((rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) r300SetupVertexProgram(rmesa); - r300SetupRSUnit(ctx); + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + r500SetupRSUnit(ctx); + else + r300SetupRSUnit(ctx); } /** diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index 4dece95a98..c54fa90327 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -168,6 +168,8 @@ enum { CHIP_FAMILY_RV410, CHIP_FAMILY_RS400, CHIP_FAMILY_RS690, + CHIP_FAMILY_RV515, + CHIP_FAMILY_R520, CHIP_FAMILY_LAST }; -- cgit v1.2.3 From 57028236c8b6ab0a56cba3a504d1d8ff12ab6c0d Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 19 Mar 2008 16:29:11 +1000 Subject: more r500 vs r300 kickin --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 20 ++++---- src/mesa/drivers/dri/r300/r300_context.h | 2 + src/mesa/drivers/dri/r300/r300_ioctl.c | 79 ++++++++++++++++++-------------- 3 files changed, 56 insertions(+), 45 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 883b41a349..a92bb87d7d 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -379,7 +379,7 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0); r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(R300_RS_COUNT, 2); if (is_r500) { - ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0); + ALLOC_STATE(ri, always, R500_RI_CMDSIZE, 0); r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R500_RS_IP_0, 16); ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, 1); @@ -393,23 +393,23 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.sc_hyperz.cmd[0] = cmdpacket0(R300_SC_HYPERZ, 2); ALLOC_STATE(sc_screendoor, always, 2, 0); r300->hw.sc_screendoor.cmd[0] = cmdpacket0(R300_SC_SCREENDOOR, 1); - ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); - r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_PFS_CNTL_0, 3); - r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(R300_PFS_NODE_0, 4); - ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0); - r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_PFS_TEXI_0, 0); ALLOC_STATE(us_out_fmt, always, 6, 0); r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R500_US_OUT_FMT, 5); if (is_r500) { ALLOC_STATE(r500fp, variable, R300_FPI_CMDSIZE, 0); - r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 1); + r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0); } - if (0/*is_r500*/) { + if (is_r500) { + + } else { + ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); + r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_PFS_CNTL_0, 3); + r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(R300_PFS_NODE_0, 4); + ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0); + r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_PFS_TEXI_0, 0); - }/* else*/ - { ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0); r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR0_0, 1); ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1); diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 1b8b9a8222..4f8b7591c2 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -330,6 +330,8 @@ struct r300_state_atom { #define R300_RI_INTERP_7 8 #define R300_RI_CMDSIZE 9 +#define R500_RI_CMDSIZE 17 + #define R300_RR_CMD_0 0 /* rr is variable size (at least 1) */ #define R300_RR_ROUTE_0 1 #define R300_RR_ROUTE_1 2 diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 02c67e8ef9..2ad1bc3bcd 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -186,10 +186,15 @@ static void r300EmitClearState(GLcontext * ctx) int cmd_written = 0; drm_radeon_cmd_header_t *cmd = NULL; int has_tcl = 1; + int is_r500 = 0; if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) has_tcl = 0; + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + is_r500 = 1; + + /* FIXME: the values written to R300_VAP_INPUT_ROUTE_0_0 and * R300_VAP_INPUT_ROUTE_0_1 are in fact known, however, the values are * quite complex; see the functions in r300_emit.c. @@ -271,49 +276,53 @@ static void r300EmitClearState(GLcontext * ctx) e32(((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) | ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT)); - R300_STATECHANGE(r300, ri); - reg_start(R300_RS_IP_0, 8); - for (i = 0; i < 8; ++i) { - e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3)); - } + if (!is_r500) { + R300_STATECHANGE(r300, ri); + reg_start(R300_RS_IP_0, 8); + for (i = 0; i < 8; ++i) { + e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3)); + } - R300_STATECHANGE(r300, rc); - /* The second constant is needed to get glxgears display anything .. */ - reg_start(R300_RS_COUNT, 1); - e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); - e32(0x0); + R300_STATECHANGE(r300, rc); + /* The second constant is needed to get glxgears display anything .. */ + reg_start(R300_RS_COUNT, 1); + e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); + e32(0x0); - R300_STATECHANGE(r300, rr); - reg_start(R300_RS_ROUTE_0, 0); - e32(R300_RS_ROUTE_0_COLOR); + R300_STATECHANGE(r300, rr); + reg_start(R300_RS_ROUTE_0, 0); + e32(R300_RS_ROUTE_0_COLOR); + } - R300_STATECHANGE(r300, fp); - reg_start(R300_PFS_CNTL_0, 2); - e32(0x0); - e32(0x0); - e32(0x0); - reg_start(R300_PFS_NODE_0, 3); - e32(0x0); - e32(0x0); - e32(0x0); - e32(R300_PFS_NODE_OUTPUT_COLOR); + if (!is_r500) { + R300_STATECHANGE(r300, fp); + reg_start(R300_PFS_CNTL_0, 2); + e32(0x0); + e32(0x0); + e32(0x0); + reg_start(R300_PFS_NODE_0, 3); + e32(0x0); + e32(0x0); + e32(0x0); + e32(R300_PFS_NODE_OUTPUT_COLOR); - R300_STATECHANGE(r300, fpi[0]); - R300_STATECHANGE(r300, fpi[1]); - R300_STATECHANGE(r300, fpi[2]); - R300_STATECHANGE(r300, fpi[3]); + R300_STATECHANGE(r300, fpi[0]); + R300_STATECHANGE(r300, fpi[1]); + R300_STATECHANGE(r300, fpi[2]); + R300_STATECHANGE(r300, fpi[3]); - reg_start(R300_PFS_INSTR0_0, 0); - e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); + reg_start(R300_PFS_INSTR0_0, 0); + e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); - reg_start(R300_PFS_INSTR1_0, 0); - e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); + reg_start(R300_PFS_INSTR1_0, 0); + e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); - reg_start(R300_PFS_INSTR2_0, 0); - e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); + reg_start(R300_PFS_INSTR2_0, 0); + e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); - reg_start(R300_PFS_INSTR3_0, 0); - e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); + reg_start(R300_PFS_INSTR3_0, 0); + e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); + } if (has_tcl) { R300_STATECHANGE(r300, pvs); -- cgit v1.2.3 From 9e6e4ae49a49c60f9b15d9276a44a1ad25865c81 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 20 Mar 2008 13:55:56 +1000 Subject: r500 RS unit setup --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 4 ---- src/mesa/drivers/dri/r300/r300_reg.h | 22 +++++++++++----------- src/mesa/drivers/dri/r300/r300_state.c | 28 ++++++++++++++++++---------- 3 files changed, 29 insertions(+), 25 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index a92bb87d7d..3cfb7cf2cd 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -399,10 +399,6 @@ void r300InitCmdBuf(r300ContextPtr r300) if (is_r500) { ALLOC_STATE(r500fp, variable, R300_FPI_CMDSIZE, 0); r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0); - } - - if (is_r500) { - } else { ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_PFS_CNTL_0, 3); diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 5ddb973ca1..d1c33d5227 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -673,12 +673,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R500_RS_IP_13 0x40A8 #define R500_RS_IP_14 0x40AC #define R500_RS_IP_15 0x40B0 -#define R500_RS_IP_TEX_PTR_S_SHIFT 0 -#define R500_RS_IP_TEX_PTR_T_SHIFT 6 -#define R500_RS_IP_TEX_PTR_R_SHIFT 12 -#define R500_RS_IP_TEX_PTR_Q_SHIFT 18 -#define R500_RS_IP_COL_PTR_SHIFT 24 -#define R500_RS_IP_COL_FMT_SHIFT 27 +#define R500_RS_IP_TEX_PTR_S_SHIFT 0 +#define R500_RS_IP_TEX_PTR_T_SHIFT 6 +#define R500_RS_IP_TEX_PTR_R_SHIFT 12 +#define R500_RS_IP_TEX_PTR_Q_SHIFT 18 +#define R500_RS_IP_COL_PTR_SHIFT 24 +#define R500_RS_IP_COL_FMT_SHIFT 27 #define R500_RS_IP_COL_FMT_RGBA (0 << 27) #define R500_RS_IP_COL_FMT_RGB0 (1 << 27) #define R500_RS_IP_COL_FMT_RGB1 (2 << 27) @@ -692,7 +692,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R500_RS_IP_COL_FMT_1111 (10 << 27) /* gap */ #define R500_RS_IP_OFFSET_DIS (0 << 31) -#define R500_RS_IP_OFFSET_EN (1 << 31) +#define R500_RS_IP_OFFSET_EN (1 << 31) /* gap */ @@ -1138,10 +1138,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_RS_COL_FMT_111A 8 # define R300_RS_COL_FMT_1110 9 # define R300_RS_COL_FMT_1111 10 -# define R300_RS_SEL_S(x) (x << 13) -# define R300_RS_SEL_T(x) (x << 16) -# define R300_RS_SEL_R(x) (x << 19) -# define R300_RS_SEL_Q(x) (x << 22) +# define R300_RS_SEL_S(x) (x << 13) +# define R300_RS_SEL_T(x) (x << 16) +# define R300_RS_SEL_R(x) (x << 19) +# define R300_RS_SEL_Q(x) (x << 22) # define R300_RS_SEL_C0 0 # define R300_RS_SEL_C1 1 # define R300_RS_SEL_C2 2 diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 1f3779cade..e30404f652 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1609,9 +1609,9 @@ static void r500SetupRSUnit(GLcontext * ctx) /* I'm still unsure if these are needed */ GLuint interp_magic[8] = { 0x00, - R300_RS_COL_PTR(1), - R300_RS_COL_PTR(2), - R300_RS_COL_PTR(3), + 1 << 24, + 2 << 24, + 3 << 24, 0x00, 0x00, 0x00, @@ -1658,14 +1658,20 @@ static void r500SetupRSUnit(GLcontext * ctx) } for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0 | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | (in_texcoords << R300_RS_INTERP_SRC_SHIFT) - | interp_magic[i]; + + // r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0 | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | (in_texcoords << R300_RS_INTERP_SRC_SHIFT) + + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = (0 << R500_TEX_PTR_S_SHIFT) | + (1 << R500_TEX_PTR_T_SHIFT) | + (2 << R500_TEX_PTR_R_SHIFT) | + (3 << R500_TEX_PTR_Q_SHIFT) | + (in_texcoords << 0) | interp_magic[i]; r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] = 0; if (InputsRead & (FRAG_BIT_TEX0 << i)) { //assert(r300->state.texture.tc_count != 0); - r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] |= R300_RS_ROUTE_ENABLE | i /* source INTERP */ - | (fp_reg << R300_RS_ROUTE_DEST_SHIFT); + r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] |= R500_RS_INST_TEX_CN_WRITE | i /* source INTERP */ + | (fp_reg << R500_RS_INST_TEX_ADDR_SHIFT); high_rr = fp_reg; /* Passing invalid data here can lock the GPU. */ @@ -1684,7 +1690,8 @@ static void r500SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL0) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { - r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R300_RS_ROUTE_0_COLOR | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); + // r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R300_RS_ROUTE_0_COLOR | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); + r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_COL_ADDR_SHIFT); InputsRead &= ~FRAG_BIT_COL0; col_interp_nr++; } else { @@ -1694,7 +1701,8 @@ static void r500SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL1) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { - r300->hw.rr.cmd[R300_RR_ROUTE_1] |= R300_RS_ROUTE_1_UNKNOWN11 | R300_RS_ROUTE_1_COLOR1 | (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT); + // r300->hw.rr.cmd[R300_RR_ROUTE_1] |= R300_RS_ROUTE_1_UNKNOWN11 | R300_RS_ROUTE_1_COLOR1 | (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT); + r300->hw.rr.cmd[R300_RR_ROUTE_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITER300_RS_ROUTE_1_UNKNOWN11 | (fp_reg++ << R500_RS_INST_COL_COL_ADDR_SHIFT); InputsRead &= ~FRAG_BIT_COL1; if (high_rr < 1) high_rr = 1; @@ -1706,7 +1714,7 @@ static void r500SetupRSUnit(GLcontext * ctx) /* Need at least one. This might still lock as the values are undefined... */ if (in_texcoords == 0 && col_interp_nr == 0) { - r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R300_RS_ROUTE_0_COLOR | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); + r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_COL_ADDR_SHIFT); col_interp_nr++; } -- cgit v1.2.3 From 2b8e422b3173388075b010f10e935c735edcd9a1 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 20 Mar 2008 14:21:10 +1000 Subject: mesa: cleanup state emission and rs for r500 trivial clear app now renders --- src/mesa/drivers/dri/r300/r300_emit.h | 13 +++++ src/mesa/drivers/dri/r300/r300_ioctl.c | 86 ++++++++++++++++++++++++++++++++++ src/mesa/drivers/dri/r300/r300_state.c | 10 ++-- 3 files changed, 104 insertions(+), 5 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h index a4f6ab997e..50e7e4f149 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.h +++ b/src/mesa/drivers/dri/r300/r300_emit.h @@ -178,6 +178,19 @@ static inline uint32_t cmdpacify(void) cmd[0].i = cmdvpu((dest), _n/4); \ } while (0); +#define r500fp_start_fragment(dest, length) \ + do { \ + int _n; \ + _n = (length); \ + cmd = (drm_radeon_cmd_header_t*) \ + r300AllocCmdBuf(rmesa, \ + (_n+1), \ + __FUNCTION__); \ + cmd_reserved = _n+1; \ + cmd_written =1; \ + cmd[0].i = cmdr500fp((dest), _n/6); \ + } while (0); + #define start_packet3(packet, count) \ { \ int _n; \ diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 2ad1bc3bcd..63555d5f3a 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -292,6 +292,26 @@ static void r300EmitClearState(GLcontext * ctx) R300_STATECHANGE(r300, rr); reg_start(R300_RS_ROUTE_0, 0); e32(R300_RS_ROUTE_0_COLOR); + } else { + + R300_STATECHANGE(r300, ri); + reg_start(R500_RS_IP_0, 8); + for (i = 0; i < 8; ++i) { + e32((1 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (2 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (3 << R500_RS_IP_TEX_PTR_Q_SHIFT) ); + } + + R300_STATECHANGE(r300, rc); + /* The second constant is needed to get glxgears display anything .. */ + reg_start(R300_RS_COUNT, 1); + e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); + e32(0x0); + + R300_STATECHANGE(r300, rr); + reg_start(R500_RS_INST_0, 0); + e32(R500_RS_INST_COL_CN_WRITE); + } if (!is_r500) { @@ -322,6 +342,72 @@ static void r300EmitClearState(GLcontext * ctx) reg_start(R300_PFS_INSTR3_0, 0); e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); + } else { + R300_STATECHANGE(r300, r500fp); + r500fp_start_fragment(0, 12); + + e32(0x7808); + e32(R500_TEX_ID(0) | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED); + e32(R500_TEX_SRC_ADDR(0) | R500_TEX_SRC_S_SWIZ_R | + R500_TEX_SRC_T_SWIZ_G | + R500_TEX_DST_ADDR(0) | + R500_TEX_DST_R_SWIZ_R | + R500_TEX_DST_G_SWIZ_G | + R500_TEX_DST_B_SWIZ_B | + R500_TEX_DST_A_SWIZ_A); + e32(R500_DX_ADDR(0) | + R500_DX_S_SWIZ_R | + R500_DX_T_SWIZ_R | + R500_DX_R_SWIZ_R | + R500_DX_Q_SWIZ_R | + R500_DY_ADDR(0) | + R500_DY_S_SWIZ_R | + R500_DY_T_SWIZ_R | + R500_DY_R_SWIZ_R | + R500_DY_Q_SWIZ_R); + e32(0x0); + e32(0x0); + + e32(R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | + R500_INST_LAST | + R500_INST_RGB_OMASK_R | + R500_INST_RGB_OMASK_G | + R500_INST_RGB_OMASK_B | + R500_INST_ALPHA_OMASK); + + e32(R500_RGB_ADDR0(0) | + R500_RGB_ADDR1(0) | + R500_RGB_ADDR1_CONST | + R500_RGB_ADDR2(0) | + R500_RGB_ADDR2_CONST | + R500_RGB_SRCP_OP_1_MINUS_2RGB0); + + e32(R500_ALPHA_ADDR0(0) | + R500_ALPHA_ADDR1(0) | + R500_ALPHA_ADDR1_CONST | + R500_ALPHA_ADDR2(0) | + R500_ALPHA_ADDR2_CONST | + R500_ALPHA_SRCP_OP_1_MINUS_2A0); + + e32(R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | + R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC0 | + R500_ALU_RGB_R_SWIZ_B_1 | + R500_ALU_RGB_B_SWIZ_B_1 | + R500_ALU_RGB_G_SWIZ_B_1); + + e32(R500_ALPHA_OP_MAD | + R500_ALPHA_SWIZ_A_A | + R500_ALPHA_SWIZ_B_1); + + e32(R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_R_SWIZ_0 | + R500_ALU_RGBA_G_SWIZ_0 | + R500_ALU_RGBA_B_SWIZ_0 | + R500_ALU_RGBA_A_SWIZ_0); } if (has_tcl) { diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index e30404f652..e7e10ba935 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1661,10 +1661,10 @@ static void r500SetupRSUnit(GLcontext * ctx) // r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0 | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | (in_texcoords << R300_RS_INTERP_SRC_SHIFT) - r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = (0 << R500_TEX_PTR_S_SHIFT) | - (1 << R500_TEX_PTR_T_SHIFT) | - (2 << R500_TEX_PTR_R_SHIFT) | - (3 << R500_TEX_PTR_Q_SHIFT) | + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = (0 << R500_RS_IP_TEX_PTR_S_SHIFT) | + (1 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (2 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (3 << R500_RS_IP_TEX_PTR_Q_SHIFT) | (in_texcoords << 0) | interp_magic[i]; r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] = 0; @@ -1702,7 +1702,7 @@ static void r500SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL1) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { // r300->hw.rr.cmd[R300_RR_ROUTE_1] |= R300_RS_ROUTE_1_UNKNOWN11 | R300_RS_ROUTE_1_COLOR1 | (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT); - r300->hw.rr.cmd[R300_RR_ROUTE_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITER300_RS_ROUTE_1_UNKNOWN11 | (fp_reg++ << R500_RS_INST_COL_COL_ADDR_SHIFT); + r300->hw.rr.cmd[R300_RR_ROUTE_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_COL_ADDR_SHIFT); InputsRead &= ~FRAG_BIT_COL1; if (high_rr < 1) high_rr = 1; -- cgit v1.2.3 From d019f101e840e20b0f47799c17336599d829ae54 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 20 Mar 2008 14:30:59 +1000 Subject: r300: this code really shouldn't be here. For R500 just ignore it for now while I do something interesting like run glxgears. --- src/mesa/drivers/dri/r300/r300_state.c | 55 ++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 26 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index e7e10ba935..aa833e870f 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1436,40 +1436,43 @@ static void r300SetupTextures(GLcontext * ctx) if (!fp) /* should only happenen once, just after context is created */ return; - R300_STATECHANGE(r300, fpt); - for (i = 0; i < fp->tex.length; i++) { - int unit; - int opcode; - unsigned long val; + if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { + R300_STATECHANGE(r300, fpt); - unit = fp->tex.inst[i] >> R300_FPITX_IMAGE_SHIFT; - unit &= 15; - - val = fp->tex.inst[i]; - val &= ~R300_FPITX_IMAGE_MASK; - - opcode = - (val & R300_FPITX_OPCODE_MASK) >> R300_FPITX_OPCODE_SHIFT; - if (opcode == R300_FPITX_OP_KIL) { - r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; - } else { - if (tmu_mappings[unit] >= 0) { - val |= - tmu_mappings[unit] << - R300_FPITX_IMAGE_SHIFT; + for (i = 0; i < fp->tex.length; i++) { + int unit; + int opcode; + unsigned long val; + + unit = fp->tex.inst[i] >> R300_FPITX_IMAGE_SHIFT; + unit &= 15; + + val = fp->tex.inst[i]; + val &= ~R300_FPITX_IMAGE_MASK; + + opcode = + (val & R300_FPITX_OPCODE_MASK) >> R300_FPITX_OPCODE_SHIFT; + if (opcode == R300_FPITX_OP_KIL) { r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; } else { - // We get here when the corresponding texture image is incomplete - // (e.g. incomplete mipmaps etc.) - r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + if (tmu_mappings[unit] >= 0) { + val |= + tmu_mappings[unit] << + R300_FPITX_IMAGE_SHIFT; + r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + } else { + // We get here when the corresponding texture image is incomplete + // (e.g. incomplete mipmaps etc.) + r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + } } } + + r300->hw.fpt.cmd[R300_FPT_CMD_0] = + cmdpacket0(R300_PFS_TEXI_0, fp->tex.length); } - r300->hw.fpt.cmd[R300_FPT_CMD_0] = - cmdpacket0(R300_PFS_TEXI_0, fp->tex.length); - if (RADEON_DEBUG & DEBUG_STATE) fprintf(stderr, "TX_ENABLE: %08x last_hw_tmu=%d\n", r300->hw.txe.cmd[R300_TXE_ENABLE], last_hw_tmu); -- cgit v1.2.3 From ed18005a9940c26cafa5ed8ccadc46e2fbe63f9e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 20 Mar 2008 14:34:04 +1000 Subject: r300: add rv530 pci id for the t60p laptop --- src/mesa/drivers/dri/radeon/radeon_chipset.h | 3 +++ src/mesa/drivers/dri/radeon/radeon_screen.c | 5 +++++ 2 files changed, 8 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index c54fa90327..3c981bf85e 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -146,6 +146,9 @@ #define PCI_CHIP_RV410_5E4C 0x5E4C #define PCI_CHIP_RV410_5E4D 0x5E4D #define PCI_CHIP_RV410_5E4F 0x5E4F + +#define PCI_CHIP_RV530_71C4 0x71C4 + #define PCI_CHIP_RS350_7834 0x7834 #define PCI_CHIP_RS350_7835 0x7835 #define PCI_CHIP_RS690_791E 0x791E diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 93b239ae9f..42e7b82ebe 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -693,6 +693,11 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) fprintf(stderr, "Warning, RS690 detected, 3D support is incomplete.\n"); break; + case PCI_CHIP_RV530_71C4: + screen->chip_family = CHIP_FAMILY_R520; + fprintf(stderr, "Warning, R520 detected, 3D HAHAHAHAHA!!.\n"); + break; + default: fprintf(stderr, "unknown chip id 0x%x, can't guess.\n", dri_priv->deviceID); -- cgit v1.2.3 From 3eb8e5871f3b3e572ae5e281f55fb7282c82c47e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 21 Mar 2008 17:05:29 +1000 Subject: r500: setup fragment program constant emission atom --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 4 +++- src/mesa/drivers/dri/r300/r300_context.h | 1 + src/mesa/drivers/dri/r300/r300_emit.h | 14 ++++++++------ 3 files changed, 12 insertions(+), 7 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 3cfb7cf2cd..248de7e34a 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -398,7 +398,9 @@ void r300InitCmdBuf(r300ContextPtr r300) if (is_r500) { ALLOC_STATE(r500fp, variable, R300_FPI_CMDSIZE, 0); - r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0); + r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 0, 0); + ALLOC_STATE(r500fp_const, variable, R300_FPI_CMDSIZE, 0); + r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 1, 0); } else { ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_PFS_CNTL_0, 3); diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 4f8b7591c2..982882f5b1 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -490,6 +490,7 @@ struct r300_hw_state { struct r300_state_atom fpt; /* texi - (4620) */ struct r300_state_atom us_out_fmt; /* (46A4) */ struct r300_state_atom r500fp; /* r500 fp instructions */ + struct r300_state_atom r500fp_const; /* r500 fp constants */ struct r300_state_atom fpi[4]; /* fp instructions (46C0/47C0/48C0/49C0) */ struct r300_state_atom fogs; /* fog state (4BC0) */ struct r300_state_atom fogc; /* fog color (4BC8) */ diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h index 50e7e4f149..51302301f7 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.h +++ b/src/mesa/drivers/dri/r300/r300_emit.h @@ -74,14 +74,16 @@ static inline uint32_t cmdvpu(int addr, int count) return cmd.u; } -static inline uint32_t cmdr500fp(int addr, int count) +static inline uint32_t cmdr500fp(int addr, int count, int type, int clamp) { drm_r300_cmd_header_t cmd; - cmd.vpu.cmd_type = R300_CMD_R500FP; - cmd.vpu.count = count; - cmd.vpu.adrhi = ((unsigned int)addr & 0xFF00) >> 8; - cmd.vpu.adrlo = ((unsigned int)addr & 0x00FF); + cmd.r500fp.cmd_type = R300_CMD_R500FP; + cmd.r500fp.count = count; + cmd.r500fp.adrhi_flags = ((unsigned int)addr & 0x100) >> 8; + cmd.r500fp.adrhi_flags |= type ? R500FP_CONSTANT_TYPE : 0; + cmd.r500fp.adrhi_flags |= clamp ? R500FP_CONSTANT_CLAMP : 0; + cmd.r500fp.adrlo = ((unsigned int)addr & 0x00FF); return cmd.u; } @@ -188,7 +190,7 @@ static inline uint32_t cmdpacify(void) __FUNCTION__); \ cmd_reserved = _n+1; \ cmd_written =1; \ - cmd[0].i = cmdr500fp((dest), _n/6); \ + cmd[0].i = cmdr500fp((dest), _n/6, 0, 0); \ } while (0); #define start_packet3(packet, count) \ -- cgit v1.2.3 From 583ed4bde3aa450f049ad893820aece9fb6b1a9a Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 22 Mar 2008 10:55:32 +1000 Subject: r500: fixup support for emitting fragment program to hardware. Also fixup the constant emission this breaks glxgears from what I can see but its another step to correctness --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 23 +++++++++++++++++++---- src/mesa/drivers/dri/r300/r300_state.c | 24 ++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 248de7e34a..d965a95c0f 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -242,6 +242,7 @@ void r300EmitState(r300ContextPtr r300) #define packet0_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->packet0.count) #define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) +#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count) static int check_always(r300ContextPtr r300, struct r300_state_atom *atom) { @@ -262,6 +263,20 @@ static int check_vpu(r300ContextPtr r300, struct r300_state_atom *atom) return cnt ? (cnt * 4) + 1 : 0; } +static int check_r500fp(r300ContextPtr r300, struct r300_state_atom *atom) +{ + int cnt; + cnt = r500fp_count(atom->cmd); + return cnt ? (cnt * 6) + 1 : 0; +} + +static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom) +{ + int cnt; + cnt = r500fp_count(atom->cmd); + return cnt ? (cnt * 4) + 1 : 0; +} + #define ALLOC_STATE( ATOM, CHK, SZ, IDX ) \ do { \ r300->hw.ATOM.cmd_size = (SZ); \ @@ -397,9 +412,9 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R500_US_OUT_FMT, 5); if (is_r500) { - ALLOC_STATE(r500fp, variable, R300_FPI_CMDSIZE, 0); + ALLOC_STATE(r500fp, r500fp, R300_FPI_CMDSIZE, 0); r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 0, 0); - ALLOC_STATE(r500fp_const, variable, R300_FPI_CMDSIZE, 0); + ALLOC_STATE(r500fp_const, r500fp_const, R300_FPP_CMDSIZE, 0); r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 1, 0); } else { ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); @@ -416,6 +431,8 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR2_0, 1); ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3); r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR3_0, 1); + ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0); + r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, 0); } ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0); r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(FG_FOG_BLEND, 1); @@ -425,8 +442,6 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(FG_ALPHA_FUNC, 2); ALLOC_STATE(fg_depth_src, always, 2, 0); r300->hw.fg_depth_src.cmd[0] = cmdpacket0(R300_FG_DEPTH_SRC, 1); - ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0); - r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, 0); ALLOC_STATE(rb3d_cctl, always, 2, 0); r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(R300_RB3D_CCTL, 1); ALLOC_STATE(bld, always, R300_BLD_CMDSIZE, 0); diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index aa833e870f..906053dd04 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2296,6 +2296,20 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) } } +#define bump_r500fp_count(ptr, new_count) do{\ + drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\ + int _nc=(new_count)/6; \ + assert(_nc < 256); \ + if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\ +} while(0) + +#define bump_r500fp_const_count(ptr, new_count) do{\ + drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\ + int _nc=(new_count)/4; \ + assert(_nc < 256); \ + if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\ +} while(0) + static void r500SetupPixelShader(r300ContextPtr rmesa) { GLcontext *ctx = rmesa->radeon.glCtx; @@ -2368,6 +2382,16 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) R500_ALU_RGBA_B_SWIZ_0 | R500_ALU_RGBA_A_SWIZ_0; + bump_r500fp_count(rmesa->hw.r500fp.cmd, 12); + + R300_STATECHANGE(rmesa, r500fp_const); + for (i = 0; i < fp->const_nr; i++) { + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(fp->constant[i][0]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(fp->constant[i][1]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(fp->constant[i][2]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(fp->constant[i][3]); + } + bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, fp->const_nr * 4); } -- cgit v1.2.3 From 4feac4e40e08d95c4549ef28ad89cc4d3d8f74a7 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 22 Mar 2008 10:56:48 +1000 Subject: r500: enable gb tiling for r5xx --- src/mesa/drivers/dri/r300/r300_state.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 906053dd04..26cf4605ff 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2046,6 +2046,8 @@ static void r300ResetHwState(r300ContextPtr r300) R300_GB_TILE_PIPE_COUNT_R300; break; case CHIP_FAMILY_R420: + case CHIP_FAMILY_RV515: + case CHIP_FAMILY_R520: r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= R300_GB_TILE_PIPE_COUNT_R420; break; -- cgit v1.2.3 From 9566cf0e2f326d8fe638748d0bc10626e8c0387e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 22 Mar 2008 11:00:25 +1000 Subject: r500: fixup fake shader to keep gears going --- src/mesa/drivers/dri/r300/r300_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 26cf4605ff..d74d673f36 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2325,7 +2325,7 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) /* emit the standard zero shader */ R300_STATECHANGE(rmesa, r500fp); i = 1; - rmesa->hw.r500fp.cmd[i++] = 0x7807; + rmesa->hw.r500fp.cmd[i++] = 0x7808; rmesa->hw.r500fp.cmd[i++] = R500_TEX_ID(0) | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; rmesa->hw.r500fp.cmd[i++] = R500_TEX_SRC_ADDR(0) | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G | -- cgit v1.2.3 From eff6f1203222a776c5990b5d104b57a7f69b9aab Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 16 Apr 2008 20:42:15 +1000 Subject: fixup r500 bits for renaming --- src/mesa/drivers/dri/r300/r300_state.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 6dc76aed87..8f12266a5f 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1644,7 +1644,7 @@ static void r500SetupRSUnit(GLcontext * ctx) fp_reg = in_texcoords = col_interp_nr = high_rr = 0; - r300->hw.rr.cmd[R300_RR_ROUTE_1] = 0; + r300->hw.rr.cmd[R300_RR_INST_1] = 0; if (InputsRead & FRAG_BIT_WPOS) { for (i = 0; i < ctx->Const.MaxTextureUnits; i++) @@ -1670,10 +1670,10 @@ static void r500SetupRSUnit(GLcontext * ctx) (3 << R500_RS_IP_TEX_PTR_Q_SHIFT) | (in_texcoords << 0) | interp_magic[i]; - r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] = 0; + r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0; if (InputsRead & (FRAG_BIT_TEX0 << i)) { //assert(r300->state.texture.tc_count != 0); - r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] |= R500_RS_INST_TEX_CN_WRITE | i /* source INTERP */ + r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] |= R500_RS_INST_TEX_CN_WRITE | i /* source INTERP */ | (fp_reg << R500_RS_INST_TEX_ADDR_SHIFT); high_rr = fp_reg; @@ -1694,7 +1694,7 @@ static void r500SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL0) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { // r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R300_RS_ROUTE_0_COLOR | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); - r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_COL_ADDR_SHIFT); + r300->hw.rr.cmd[R300_RR_INST_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); InputsRead &= ~FRAG_BIT_COL0; col_interp_nr++; } else { @@ -1705,7 +1705,7 @@ static void r500SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL1) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { // r300->hw.rr.cmd[R300_RR_ROUTE_1] |= R300_RS_ROUTE_1_UNKNOWN11 | R300_RS_ROUTE_1_COLOR1 | (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT); - r300->hw.rr.cmd[R300_RR_ROUTE_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_COL_ADDR_SHIFT); + r300->hw.rr.cmd[R300_RR_INST_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); InputsRead &= ~FRAG_BIT_COL1; if (high_rr < 1) high_rr = 1; @@ -1717,7 +1717,7 @@ static void r500SetupRSUnit(GLcontext * ctx) /* Need at least one. This might still lock as the values are undefined... */ if (in_texcoords == 0 && col_interp_nr == 0) { - r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_COL_ADDR_SHIFT); + r300->hw.rr.cmd[R300_RR_INST_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); col_interp_nr++; } @@ -1726,7 +1726,7 @@ static void r500SetupRSUnit(GLcontext * ctx) | R300_HIRES_EN; assert(high_rr >= 0); - r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_ROUTE_0, high_rr + 1); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, high_rr + 1); r300->hw.rc.cmd[2] = 0xC0 | high_rr; if (InputsRead) -- cgit v1.2.3 From f25b37c1da2c9f1109b9169b89216c2be4750f98 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 1 May 2008 16:02:58 -0400 Subject: R5xx: Add R5xx pci ids --- src/mesa/drivers/dri/r300/r300_state.c | 4 + src/mesa/drivers/dri/radeon/radeon_chipset.h | 104 +++++++++++++++++++++++ src/mesa/drivers/dri/radeon/radeon_screen.c | 120 ++++++++++++++++++++++++++- 3 files changed, 227 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 8f12266a5f..7419b15a55 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2048,6 +2048,10 @@ static void r300ResetHwState(r300ContextPtr r300) case CHIP_FAMILY_R420: case CHIP_FAMILY_RV515: case CHIP_FAMILY_R520: + case CHIP_FAMILY_RV530: + case CHIP_FAMILY_R580: + case CHIP_FAMILY_RV560: + case CHIP_FAMILY_RV570: r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= R300_GB_TILE_PIPE_COUNT_R420; break; diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index dc1b8a9c8e..2821ecc0c0 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -147,12 +147,111 @@ #define PCI_CHIP_RV410_5E4D 0x5E4D #define PCI_CHIP_RV410_5E4F 0x5E4F +#define PCI_CHIP_R520_7100 0x7100 +#define PCI_CHIP_R520_7101 0x7101 +#define PCI_CHIP_R520_7102 0x7102 +#define PCI_CHIP_R520_7103 0x7103 +#define PCI_CHIP_R520_7104 0x7104 +#define PCI_CHIP_R520_7105 0x7105 +#define PCI_CHIP_R520_7106 0x7106 +#define PCI_CHIP_R520_7108 0x7108 +#define PCI_CHIP_R520_7109 0x7109 +#define PCI_CHIP_R520_710A 0x710A +#define PCI_CHIP_R520_710B 0x710B +#define PCI_CHIP_R520_710C 0x710C +#define PCI_CHIP_R520_710E 0x710E +#define PCI_CHIP_R520_710F 0x710F +#define PCI_CHIP_RV515_7140 0x7140 +#define PCI_CHIP_RV515_7141 0x7141 +#define PCI_CHIP_RV515_7142 0x7142 +#define PCI_CHIP_RV515_7143 0x7143 +#define PCI_CHIP_RV515_7144 0x7144 +#define PCI_CHIP_RV515_7145 0x7145 +#define PCI_CHIP_RV515_7146 0x7146 +#define PCI_CHIP_RV515_7147 0x7147 +#define PCI_CHIP_RV515_7149 0x7149 +#define PCI_CHIP_RV515_714A 0x714A +#define PCI_CHIP_RV515_714B 0x714B +#define PCI_CHIP_RV515_714C 0x714C +#define PCI_CHIP_RV515_714D 0x714D +#define PCI_CHIP_RV515_714E 0x714E +#define PCI_CHIP_RV515_714F 0x714F +#define PCI_CHIP_RV515_7151 0x7151 +#define PCI_CHIP_RV515_7152 0x7152 +#define PCI_CHIP_RV515_7153 0x7153 +#define PCI_CHIP_RV515_715E 0x715E +#define PCI_CHIP_RV515_715F 0x715F +#define PCI_CHIP_RV515_7180 0x7180 +#define PCI_CHIP_RV515_7181 0x7181 +#define PCI_CHIP_RV515_7183 0x7183 +#define PCI_CHIP_RV515_7186 0x7186 +#define PCI_CHIP_RV515_7187 0x7187 +#define PCI_CHIP_RV515_7188 0x7188 +#define PCI_CHIP_RV515_718A 0x718A +#define PCI_CHIP_RV515_718B 0x718B +#define PCI_CHIP_RV515_718C 0x718C +#define PCI_CHIP_RV515_718D 0x718D +#define PCI_CHIP_RV515_718F 0x718F +#define PCI_CHIP_RV515_7193 0x7193 +#define PCI_CHIP_RV515_7196 0x7196 +#define PCI_CHIP_RV515_719B 0x719B +#define PCI_CHIP_RV515_719F 0x719F +#define PCI_CHIP_RV530_71C0 0x71C0 +#define PCI_CHIP_RV530_71C1 0x71C1 +#define PCI_CHIP_RV530_71C2 0x71C2 +#define PCI_CHIP_RV530_71C3 0x71C3 #define PCI_CHIP_RV530_71C4 0x71C4 +#define PCI_CHIP_RV530_71C5 0x71C5 +#define PCI_CHIP_RV530_71C6 0x71C6 +#define PCI_CHIP_RV530_71C7 0x71C7 +#define PCI_CHIP_RV530_71CD 0x71CD +#define PCI_CHIP_RV530_71CE 0x71CE +#define PCI_CHIP_RV530_71D2 0x71D2 +#define PCI_CHIP_RV530_71D4 0x71D4 +#define PCI_CHIP_RV530_71D5 0x71D5 +#define PCI_CHIP_RV530_71D6 0x71D6 +#define PCI_CHIP_RV530_71DA 0x71DA +#define PCI_CHIP_RV530_71DE 0x71DE +#define PCI_CHIP_RV515_7200 0x7200 +#define PCI_CHIP_RV515_7210 0x7210 +#define PCI_CHIP_RV515_7211 0x7211 +#define PCI_CHIP_R580_7240 0x7240 +#define PCI_CHIP_R580_7243 0x7243 +#define PCI_CHIP_R580_7244 0x7244 +#define PCI_CHIP_R580_7245 0x7245 +#define PCI_CHIP_R580_7246 0x7246 +#define PCI_CHIP_R580_7247 0x7247 +#define PCI_CHIP_R580_7248 0x7248 +#define PCI_CHIP_R580_7249 0x7249 +#define PCI_CHIP_R580_724A 0x724A +#define PCI_CHIP_R580_724B 0x724B +#define PCI_CHIP_R580_724C 0x724C +#define PCI_CHIP_R580_724D 0x724D +#define PCI_CHIP_R580_724E 0x724E +#define PCI_CHIP_R580_724F 0x724F +#define PCI_CHIP_RV570_7280 0x7280 +#define PCI_CHIP_RV560_7281 0x7281 +#define PCI_CHIP_RV560_7283 0x7283 +#define PCI_CHIP_R580_7284 0x7284 +#define PCI_CHIP_RV560_7287 0x7287 +#define PCI_CHIP_RV570_7288 0x7288 +#define PCI_CHIP_RV570_7289 0x7289 +#define PCI_CHIP_RV570_728B 0x728B +#define PCI_CHIP_RV570_728C 0x728C +#define PCI_CHIP_RV560_7290 0x7290 +#define PCI_CHIP_RV560_7291 0x7291 +#define PCI_CHIP_RV560_7293 0x7293 +#define PCI_CHIP_RV560_7297 0x7297 #define PCI_CHIP_RS350_7834 0x7834 #define PCI_CHIP_RS350_7835 0x7835 #define PCI_CHIP_RS690_791E 0x791E #define PCI_CHIP_RS690_791F 0x791F +#define PCI_CHIP_RS740_796C 0x796C +#define PCI_CHIP_RS740_796D 0x796D +#define PCI_CHIP_RS740_796E 0x796E +#define PCI_CHIP_RS740_796F 0x796F + enum { CHIP_FAMILY_R100, @@ -172,8 +271,13 @@ enum { CHIP_FAMILY_RV410, CHIP_FAMILY_RS400, CHIP_FAMILY_RS690, + CHIP_FAMILY_RS740, CHIP_FAMILY_RV515, CHIP_FAMILY_R520, + CHIP_FAMILY_RV530, + CHIP_FAMILY_R580, + CHIP_FAMILY_RV560, + CHIP_FAMILY_RV570, CHIP_FAMILY_LAST }; diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 1a1666ccfe..6f9d912442 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -682,15 +682,133 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) break; case PCI_CHIP_RS690_791E: + case PCI_CHIP_RS690_791F: screen->chip_family = CHIP_FAMILY_RS690; fprintf(stderr, "Warning, RS690 detected, 3D support is incomplete.\n"); break; + case PCI_CHIP_RS740_796C: + case PCI_CHIP_RS740_796D: + case PCI_CHIP_RS740_796E: + case PCI_CHIP_RS740_796F: + screen->chip_family = CHIP_FAMILY_RS740; + fprintf(stderr, "Warning, RS740 detected, 3D support is incomplete.\n"); + break; - case PCI_CHIP_RV530_71C4: + case PCI_CHIP_R520_7100: + case PCI_CHIP_R520_7101: + case PCI_CHIP_R520_7102: + case PCI_CHIP_R520_7103: + case PCI_CHIP_R520_7104: + case PCI_CHIP_R520_7105: + case PCI_CHIP_R520_7106: + case PCI_CHIP_R520_7108: + case PCI_CHIP_R520_7109: + case PCI_CHIP_R520_710A: + case PCI_CHIP_R520_710B: + case PCI_CHIP_R520_710C: + case PCI_CHIP_R520_710E: + case PCI_CHIP_R520_710F: screen->chip_family = CHIP_FAMILY_R520; fprintf(stderr, "Warning, R520 detected, 3D HAHAHAHAHA!!.\n"); break; + case PCI_CHIP_RV515_7140: + case PCI_CHIP_RV515_7141: + case PCI_CHIP_RV515_7142: + case PCI_CHIP_RV515_7143: + case PCI_CHIP_RV515_7144: + case PCI_CHIP_RV515_7145: + case PCI_CHIP_RV515_7146: + case PCI_CHIP_RV515_7147: + case PCI_CHIP_RV515_7149: + case PCI_CHIP_RV515_714A: + case PCI_CHIP_RV515_714B: + case PCI_CHIP_RV515_714C: + case PCI_CHIP_RV515_714D: + case PCI_CHIP_RV515_714E: + case PCI_CHIP_RV515_714F: + case PCI_CHIP_RV515_7151: + case PCI_CHIP_RV515_7152: + case PCI_CHIP_RV515_7153: + case PCI_CHIP_RV515_715E: + case PCI_CHIP_RV515_715F: + case PCI_CHIP_RV515_7180: + case PCI_CHIP_RV515_7181: + case PCI_CHIP_RV515_7183: + case PCI_CHIP_RV515_7186: + case PCI_CHIP_RV515_7187: + case PCI_CHIP_RV515_7188: + case PCI_CHIP_RV515_718A: + case PCI_CHIP_RV515_718B: + case PCI_CHIP_RV515_718C: + case PCI_CHIP_RV515_718D: + case PCI_CHIP_RV515_718F: + case PCI_CHIP_RV515_7193: + case PCI_CHIP_RV515_7196: + case PCI_CHIP_RV515_719B: + case PCI_CHIP_RV515_719F: + case PCI_CHIP_RV515_7200: + case PCI_CHIP_RV515_7210: + case PCI_CHIP_RV515_7211: + screen->chip_family = CHIP_FAMILY_RV515; + fprintf(stderr, "Warning, RV515 detected, 3D HAHAHAHAHA!!.\n"); + break; + + case PCI_CHIP_RV530_71C0: + case PCI_CHIP_RV530_71C1: + case PCI_CHIP_RV530_71C2: + case PCI_CHIP_RV530_71C3: + case PCI_CHIP_RV530_71C4: + case PCI_CHIP_RV530_71C5: + case PCI_CHIP_RV530_71C6: + case PCI_CHIP_RV530_71C7: + case PCI_CHIP_RV530_71CD: + case PCI_CHIP_RV530_71CE: + case PCI_CHIP_RV530_71D2: + case PCI_CHIP_RV530_71D4: + case PCI_CHIP_RV530_71D5: + case PCI_CHIP_RV530_71D6: + case PCI_CHIP_RV530_71DA: + case PCI_CHIP_RV530_71DE: + screen->chip_family = CHIP_FAMILY_RV530; + fprintf(stderr, "Warning, RV530 detected, 3D HAHAHAHAHA!!.\n"); + break; + + case PCI_CHIP_R580_7240: + case PCI_CHIP_R580_7243: + case PCI_CHIP_R580_7244: + case PCI_CHIP_R580_7245: + case PCI_CHIP_R580_7246: + case PCI_CHIP_R580_7247: + case PCI_CHIP_R580_7248: + case PCI_CHIP_R580_7249: + case PCI_CHIP_R580_724A: + case PCI_CHIP_R580_724B: + case PCI_CHIP_R580_724C: + case PCI_CHIP_R580_724D: + case PCI_CHIP_R580_724E: + case PCI_CHIP_R580_724F: + case PCI_CHIP_R580_7284: + screen->chip_family = CHIP_FAMILY_R580; + fprintf(stderr, "Warning, R580 detected, 3D HAHAHAHAHA!!.\n"); + break; + + case PCI_CHIP_RV570_7280: + case PCI_CHIP_RV560_7281: + case PCI_CHIP_RV560_7283: + case PCI_CHIP_RV560_7287: + case PCI_CHIP_RV570_7288: + case PCI_CHIP_RV570_7289: + case PCI_CHIP_RV570_728B: + case PCI_CHIP_RV570_728C: + case PCI_CHIP_RV560_7290: + case PCI_CHIP_RV560_7291: + case PCI_CHIP_RV560_7293: + case PCI_CHIP_RV560_7297: + screen->chip_family = CHIP_FAMILY_RV560; + fprintf(stderr, "Warning, RV560 detected, 3D HAHAHAHAHA!!.\n"); + break; + default: fprintf(stderr, "unknown chip id 0x%x, can't guess.\n", dri_priv->deviceID); -- cgit v1.2.3 From 279ea105d8e91aa922ad946b66ee076e5e7e21c7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 May 2008 10:51:40 -0400 Subject: R5xx: various updates - fixup VAP_CNTL setup - remove extra instruction in r5xx passthrough shader - add notes about pipe config --- src/mesa/drivers/dri/r300/r300_reg.h | 9 +++- src/mesa/drivers/dri/r300/r300_state.c | 71 ++++++++++++++++------------- src/mesa/drivers/dri/radeon/radeon_screen.c | 9 +++- 3 files changed, 56 insertions(+), 33 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 2822b1d4c3..fee21dae67 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -70,6 +70,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Stolen from r200 code from Christoph Brill (It's a guess!) */ #define R300_VAP_CNTL 0x2080 +# define R300_PVS_NUM_SLOTS_SHIFT 0 +# define R300_PVS_NUM_CNTLRS_SHIFT 4 +# define R300_PVS_NUM_FPUS_SHIFT 8 +# define R300_VF_MAX_VTX_NUM_SHIFT 18 +# define R300_GL_CLIP_SPACE_DEF (0 << 22) +# define R300_DX_CLIP_SPACE_DEF (1 << 22) +# define R500_TCL_STATE_OPTIMIZATION (1 << 23) /* This register is written directly and also starts data section * in many 3d CP_PACKET3's @@ -375,7 +382,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_VAP_CLIP_CNTL 0x221C # define R300_221C_NORMAL 0x00000000 # define R300_221C_CLEAR 0x0001C000 -#define R300_VAP_UCP_ENABLE_0 (1 << 0) +# define R300_VAP_UCP_ENABLE_0 (1 << 0) /* These seem to be per-pixel and per-vertex X and Y clipping planes. The first * plane is per-pixel and the second plane is per-vertex. diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 7419b15a55..0740d7ea4a 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1981,10 +1981,34 @@ static void r300ResetHwState(r300ContextPtr r300) r300AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef); r300Enable(ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled); - if (!has_tcl) - r300->hw.vap_cntl.cmd[1] = 0x0014045a; + /* setup the VAP */ + /* PVS_NUM_SLOTS, PVS_NUM_CNTLRS, VF_MAX_VTX_NUM need to be adjusted + * dynamically. PVS_NUM_FPUS is fixed based on asic + */ + if (has_tcl) { + r300->hw.vap_cntl.cmd[1] = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + (5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (12 << R300_VF_MAX_VTX_NUM_SHIFT)); + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + r300->hw.vap_cntl.cmd[1] |= R500_TCL_STATE_OPTIMIZATION; + } else + r300->hw.vap_cntl.cmd[1] = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + (5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (5 << R300_VF_MAX_VTX_NUM_SHIFT)); + + if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515) + r300->hw.vap_cntl.cmd[1] |= (2 << R300_PVS_NUM_FPUS_SHIFT); + else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560)) + r300->hw.vap_cntl.cmd[1] |= (5 << R300_PVS_NUM_FPUS_SHIFT); + else if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420) + r300->hw.vap_cntl.cmd[1] |= (6 << R300_PVS_NUM_FPUS_SHIFT); + else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) + r300->hw.vap_cntl.cmd[1] |= (8 << R300_PVS_NUM_FPUS_SHIFT); else - r300->hw.vap_cntl.cmd[1] = 0x0030045A; //0x0030065a /* Dangerous */ + r300->hw.vap_cntl.cmd[1] |= (4 << R300_PVS_NUM_FPUS_SHIFT); r300->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA | R300_VPORT_X_OFFSET_ENA @@ -2035,20 +2059,27 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_0] = 0x66666666; r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_1] = 0x06666666; - /* XXX: Other families? */ + /* num pipes needs to be read back from the GB_PIPE_SELECT register + * on r4xx/r5xx/rs4xx/rs6xx + * should move this to the drm + */ r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = - R300_GB_TILE_ENABLE | R300_GB_TILE_SIZE_16; + R300_GB_TILE_ENABLE | R300_GB_TILE_SIZE_16 /*| R300_GB_SUBPIXEL_1_16*/; switch (r300->radeon.radeonScreen->chip_family) { case CHIP_FAMILY_R300: case CHIP_FAMILY_R350: - case CHIP_FAMILY_RV410: r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= R300_GB_TILE_PIPE_COUNT_R300; break; - case CHIP_FAMILY_R420: + case CHIP_FAMILY_RV350: case CHIP_FAMILY_RV515: - case CHIP_FAMILY_R520: case CHIP_FAMILY_RV530: + case CHIP_FAMILY_RV410: + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= + R300_GB_TILE_PIPE_COUNT_RV300; + break; + case CHIP_FAMILY_R420: + case CHIP_FAMILY_R520: case CHIP_FAMILY_R580: case CHIP_FAMILY_RV560: case CHIP_FAMILY_RV570: @@ -2329,28 +2360,6 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) /* emit the standard zero shader */ R300_STATECHANGE(rmesa, r500fp); i = 1; - rmesa->hw.r500fp.cmd[i++] = 0x7808; - rmesa->hw.r500fp.cmd[i++] = R500_TEX_ID(0) | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; - rmesa->hw.r500fp.cmd[i++] = R500_TEX_SRC_ADDR(0) | R500_TEX_SRC_S_SWIZ_R | - R500_TEX_SRC_T_SWIZ_G | - R500_TEX_DST_ADDR(0) | - R500_TEX_DST_R_SWIZ_R | - R500_TEX_DST_G_SWIZ_G | - R500_TEX_DST_B_SWIZ_B | - R500_TEX_DST_A_SWIZ_A; - rmesa->hw.r500fp.cmd[i++] = R500_DX_ADDR(0) | - R500_DX_S_SWIZ_R | - R500_DX_T_SWIZ_R | - R500_DX_R_SWIZ_R | - R500_DX_Q_SWIZ_R | - R500_DY_ADDR(0) | - R500_DY_S_SWIZ_R | - R500_DY_T_SWIZ_R | - R500_DY_R_SWIZ_R | - R500_DY_Q_SWIZ_R; - rmesa->hw.r500fp.cmd[i++] = 0x0; - rmesa->hw.r500fp.cmd[i++] = 0x0; - rmesa->hw.r500fp.cmd[i++] = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT | R500_INST_LAST | @@ -2388,7 +2397,7 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) R500_ALU_RGBA_B_SWIZ_0 | R500_ALU_RGBA_A_SWIZ_0; - bump_r500fp_count(rmesa->hw.r500fp.cmd, 12); + bump_r500fp_count(rmesa->hw.r500fp.cmd, 6); R300_STATECHANGE(rmesa, r500fp_const); for (i = 0; i < fp->const_nr; i++) { diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 6f9d912442..2f57d289fe 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -649,7 +649,9 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->chip_flags = RADEON_CHIPSET_TCL; break; - /* RV410 SE chips have half the pipes of regular RV410 */ + /* RV410 SE chips have half the pipes of regular RV410 + * Need to get num pipes form the GB_PIPE_SELECT register + */ case PCI_CHIP_RV410_5E4C: case PCI_CHIP_RV410_5E4F: screen->chip_family = CHIP_FAMILY_RV380; @@ -709,6 +711,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_R520_710E: case PCI_CHIP_R520_710F: screen->chip_family = CHIP_FAMILY_R520; + //screen->chip_flags = RADEON_CHIPSET_TCL; fprintf(stderr, "Warning, R520 detected, 3D HAHAHAHAHA!!.\n"); break; @@ -751,6 +754,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_RV515_7210: case PCI_CHIP_RV515_7211: screen->chip_family = CHIP_FAMILY_RV515; + //screen->chip_flags = RADEON_CHIPSET_TCL; fprintf(stderr, "Warning, RV515 detected, 3D HAHAHAHAHA!!.\n"); break; @@ -771,6 +775,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_RV530_71DA: case PCI_CHIP_RV530_71DE: screen->chip_family = CHIP_FAMILY_RV530; + //screen->chip_flags = RADEON_CHIPSET_TCL; fprintf(stderr, "Warning, RV530 detected, 3D HAHAHAHAHA!!.\n"); break; @@ -790,6 +795,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_R580_724F: case PCI_CHIP_R580_7284: screen->chip_family = CHIP_FAMILY_R580; + //screen->chip_flags = RADEON_CHIPSET_TCL; fprintf(stderr, "Warning, R580 detected, 3D HAHAHAHAHA!!.\n"); break; @@ -806,6 +812,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_RV560_7293: case PCI_CHIP_RV560_7297: screen->chip_family = CHIP_FAMILY_RV560; + //screen->chip_flags = RADEON_CHIPSET_TCL; fprintf(stderr, "Warning, RV560 detected, 3D HAHAHAHAHA!!.\n"); break; -- cgit v1.2.3 From 45077fc3232eebf2b657c552afa92b24e4770bb0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 May 2008 10:54:20 -0400 Subject: Update comment --- src/mesa/drivers/dri/r300/r300_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 0740d7ea4a..57ff9e9a73 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1982,7 +1982,7 @@ static void r300ResetHwState(r300ContextPtr r300) r300Enable(ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled); /* setup the VAP */ - /* PVS_NUM_SLOTS, PVS_NUM_CNTLRS, VF_MAX_VTX_NUM need to be adjusted + /* for tcl, PVS_NUM_SLOTS, PVS_NUM_CNTLRS, VF_MAX_VTX_NUM need to be adjusted * dynamically. PVS_NUM_FPUS is fixed based on asic */ if (has_tcl) { -- cgit v1.2.3 From db1fc51ccc24745e83bd2f635bed97787873341b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 May 2008 12:29:40 -0400 Subject: R300: clean up VAP_PROG_STREAM_CNTL* register usage --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 4 +- src/mesa/drivers/dri/r300/r300_emit.c | 28 +++++++---- src/mesa/drivers/dri/r300/r300_ioctl.c | 25 ++++++++-- src/mesa/drivers/dri/r300/r300_reg.h | 88 ++++++++++++++++++++------------- src/mesa/drivers/dri/r300/r300_swtcl.c | 27 +++++----- 5 files changed, 109 insertions(+), 63 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 5d81fcfadf..53cbfb8e64 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -328,10 +328,10 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(R300_VAP_CNTL_STATUS, 1); ALLOC_STATE(vir[0], variable, R300_VIR_CMDSIZE, 0); r300->hw.vir[0].cmd[R300_VIR_CMD_0] = - cmdpacket0(R300_VAP_INPUT_ROUTE_0_0, 1); + cmdpacket0(R300_VAP_PROG_STREAM_CNTL_0, 1); ALLOC_STATE(vir[1], variable, R300_VIR_CMDSIZE, 1); r300->hw.vir[1].cmd[R300_VIR_CMD_0] = - cmdpacket0(R300_VAP_INPUT_ROUTE_1_0, 1); + cmdpacket0(R300_VAP_PROG_STREAM_CNTL_EXT_0, 1); ALLOC_STATE(vic, always, R300_VIC_CMDSIZE, 0); r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(R300_VAP_INPUT_CNTL_0, 2); ALLOC_STATE(vap_psc_sgn_norm_cntl, always, 2, 0); diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c index e7371133d3..0eeb8bf98a 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.c +++ b/src/mesa/drivers/dri/r300/r300_emit.c @@ -216,14 +216,18 @@ static GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, for (i = 0; i < nr; i += 2) { /* make sure input is valid, would lockup the gpu */ assert(inputs[tab[i]] != -1); - dw = R300_INPUT_ROUTE_FLOAT | (inputs[tab[i]] << 8) | (attribptr[tab[i]]->size - 1); + dw = (R300_SIGNED | + (inputs[tab[i]] << R300_DST_VEC_LOC_SHIFT) | + (attribptr[tab[i]]->size - 1)) << R300_DATA_TYPE_0_SHIFT; if (i + 1 == nr) { - dw |= R300_VAP_INPUT_ROUTE_END; + dw |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT; } else { assert(inputs[tab[i + 1]] != -1); - dw |= (R300_INPUT_ROUTE_FLOAT | (inputs[tab[i + 1]] << 8) | (attribptr[tab[i + 1]]->size - 1)) << 16; + dw |= (R300_SIGNED | + (inputs[tab[i + 1]] << R300_DST_VEC_LOC_SHIFT) | + (attribptr[tab[i + 1]]->size - 1)) << R300_DATA_TYPE_1_SHIFT; if (i + 2 == nr) { - dw |= (R300_VAP_INPUT_ROUTE_END << 16); + dw |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT; } } dst[i >> 1] = dw; @@ -234,10 +238,10 @@ static GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, static GLuint r300VAPInputRoute1Swizzle(int swizzle[4]) { - return (swizzle[0] << R300_INPUT_ROUTE_X_SHIFT) | - (swizzle[1] << R300_INPUT_ROUTE_Y_SHIFT) | - (swizzle[2] << R300_INPUT_ROUTE_Z_SHIFT) | - (swizzle[3] << R300_INPUT_ROUTE_W_SHIFT); + return (swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) | + (swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) | + (swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) | + (swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT); } GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr) @@ -245,9 +249,13 @@ GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr) GLuint i, dw; for (i = 0; i < nr; i += 2) { - dw = r300VAPInputRoute1Swizzle(swizzle[i]) | R300_INPUT_ROUTE_ENABLE; + dw = (r300VAPInputRoute1Swizzle(swizzle[i]) | + ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | + R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE0_SHIFT; if (i + 1 < nr) { - dw |= (r300VAPInputRoute1Swizzle(swizzle[i + 1]) | R300_INPUT_ROUTE_ENABLE) << 16; + dw |= (r300VAPInputRoute1Swizzle(swizzle[i + 1]) | + ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | + R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT; } dst[i >> 1] = dw; } diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 9f6f2307f5..cfd04f1e26 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -204,11 +204,15 @@ static void r300EmitClearState(GLcontext * ctx) * these registers, as well as the actual values used for rendering. */ R300_STATECHANGE(r300, vir[0]); - reg_start(R300_VAP_INPUT_ROUTE_0_0, 0); + reg_start(R300_VAP_PROG_STREAM_CNTL_0, 0); if (!has_tcl) - e32(0x22030003); + /*e32(0x22030003);*/ + e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | + ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); else - e32(0x21030003); + /*e32(0x21030003);*/ + e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | + ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); /* disable fog */ R300_STATECHANGE(r300, fogs); @@ -216,8 +220,19 @@ static void r300EmitClearState(GLcontext * ctx) e32(0x0); R300_STATECHANGE(r300, vir[1]); - reg_start(R300_VAP_INPUT_ROUTE_1_0, 0); - e32(0xF688F688); + reg_start(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0); + e32(((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | + (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | + (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | + ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) + << R300_SWIZZLE0_SHIFT) | + (((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | + (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | + (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | + ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) + << R300_SWIZZLE1_SHIFT))); /* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */ R300_STATECHANGE(r300, vic); diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index fee21dae67..f65aac3ca4 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -218,27 +218,31 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Always set COMPONENTS_4 in immediate mode. */ -#define R300_VAP_INPUT_ROUTE_0_0 0x2150 -# define R300_INPUT_ROUTE_COMPONENTS_1 (0 << 0) -# define R300_INPUT_ROUTE_COMPONENTS_2 (1 << 0) -# define R300_INPUT_ROUTE_COMPONENTS_3 (2 << 0) -# define R300_INPUT_ROUTE_COMPONENTS_4 (3 << 0) -# define R300_INPUT_ROUTE_COMPONENTS_RGBA (4 << 0) /* GUESS */ -# define R300_VAP_INPUT_ROUTE_IDX_SHIFT 8 -# define R300_VAP_INPUT_ROUTE_IDX_MASK (31 << 8) /* GUESS */ -# define R300_VAP_INPUT_ROUTE_END (1 << 13) -# define R300_INPUT_ROUTE_IMMEDIATE_MODE (0 << 14) /* GUESS */ -# define R300_INPUT_ROUTE_FLOAT (1 << 14) /* GUESS */ -# define R300_INPUT_ROUTE_UNSIGNED_BYTE (2 << 14) /* GUESS */ -# define R300_INPUT_ROUTE_FLOAT_COLOR (3 << 14) /* GUESS */ -#define R300_VAP_INPUT_ROUTE_0_1 0x2154 -#define R300_VAP_INPUT_ROUTE_0_2 0x2158 -#define R300_VAP_INPUT_ROUTE_0_3 0x215C -#define R300_VAP_INPUT_ROUTE_0_4 0x2160 -#define R300_VAP_INPUT_ROUTE_0_5 0x2164 -#define R300_VAP_INPUT_ROUTE_0_6 0x2168 -#define R300_VAP_INPUT_ROUTE_0_7 0x216C - +#define R300_VAP_PROG_STREAM_CNTL_0 0x2150 +# define R300_DATA_TYPE_0_SHIFT 0 +# define R300_DATA_TYPE_FLOAT_1 0 +# define R300_DATA_TYPE_FLOAT_2 1 +# define R300_DATA_TYPE_FLOAT_3 2 +# define R300_DATA_TYPE_FLOAT_4 3 +# define R300_DATA_TYPE_BYTE 4 +# define R300_DATA_TYPE_D3DCOLOR 5 +# define R300_DATA_TYPE_SHORT_2 6 +# define R300_DATA_TYPE_SHORT_4 7 +# define R300_DATA_TYPE_VECTOR_3_TTT 8 +# define R300_DATA_TYPE_VECTOR_3_EET 9 +# define R300_SKIP_DWORDS_SHIFT 4 +# define R300_DST_VEC_LOC_SHIFT 8 +# define R300_LAST_VEC (1 << 13) +# define R300_SIGNED (1 << 14) +# define R300_NORMALIZE (1 << 15) +# define R300_DATA_TYPE_1_SHIFT 16 +#define R300_VAP_PROG_STREAM_CNTL_1 0x2154 +#define R300_VAP_PROG_STREAM_CNTL_2 0x2158 +#define R300_VAP_PROG_STREAM_CNTL_3 0x215C +#define R300_VAP_PROG_STREAM_CNTL_4 0x2160 +#define R300_VAP_PROG_STREAM_CNTL_5 0x2164 +#define R300_VAP_PROG_STREAM_CNTL_6 0x2168 +#define R300_VAP_PROG_STREAM_CNTL_7 0x216C /* gap */ /* Notes: @@ -276,26 +280,40 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * mode, the swizzling pattern is e.g. used to set zw components in texture * coordinates with only tweo components. */ -#define R300_VAP_INPUT_ROUTE_1_0 0x21E0 +#define R300_VAP_PROG_STREAM_CNTL_EXT_0 0x21e0 +# define R300_SWIZZLE0_SHIFT 0 +# define R300_SWIZZLE_SELECT_X_SHIFT 0 +# define R300_SWIZZLE_SELECT_Y_SHIFT 3 +# define R300_SWIZZLE_SELECT_Z_SHIFT 6 +# define R300_SWIZZLE_SELECT_W_SHIFT 9 + +# define R300_SWIZZLE_SELECT_X 0 +# define R300_SWIZZLE_SELECT_Y 1 +# define R300_SWIZZLE_SELECT_Z 2 +# define R300_SWIZZLE_SELECT_W 3 +# define R300_SWIZZLE_SELECT_FP_ZERO 4 +# define R300_SWIZZLE_SELECT_FP_ONE 5 +/* alternate forms for r300_emit.c */ # define R300_INPUT_ROUTE_SELECT_X 0 # define R300_INPUT_ROUTE_SELECT_Y 1 # define R300_INPUT_ROUTE_SELECT_Z 2 # define R300_INPUT_ROUTE_SELECT_W 3 # define R300_INPUT_ROUTE_SELECT_ZERO 4 # define R300_INPUT_ROUTE_SELECT_ONE 5 -# define R300_INPUT_ROUTE_SELECT_MASK 7 -# define R300_INPUT_ROUTE_X_SHIFT 0 -# define R300_INPUT_ROUTE_Y_SHIFT 3 -# define R300_INPUT_ROUTE_Z_SHIFT 6 -# define R300_INPUT_ROUTE_W_SHIFT 9 -# define R300_INPUT_ROUTE_ENABLE (15 << 12) -#define R300_VAP_INPUT_ROUTE_1_1 0x21E4 -#define R300_VAP_INPUT_ROUTE_1_2 0x21E8 -#define R300_VAP_INPUT_ROUTE_1_3 0x21EC -#define R300_VAP_INPUT_ROUTE_1_4 0x21F0 -#define R300_VAP_INPUT_ROUTE_1_5 0x21F4 -#define R300_VAP_INPUT_ROUTE_1_6 0x21F8 -#define R300_VAP_INPUT_ROUTE_1_7 0x21FC + +# define R300_WRITE_ENA_SHIFT 12 +# define R300_WRITE_ENA_X 1 +# define R300_WRITE_ENA_Y 2 +# define R300_WRITE_ENA_Z 4 +# define R300_WRITE_ENA_W 8 +# define R300_SWIZZLE1_SHIFT 16 +#define R300_VAP_PROG_STREAM_CNTL_EXT_1 0x21e4 +#define R300_VAP_PROG_STREAM_CNTL_EXT_2 0x21e8 +#define R300_VAP_PROG_STREAM_CNTL_EXT_3 0x21ec +#define R300_VAP_PROG_STREAM_CNTL_EXT_4 0x21f0 +#define R300_VAP_PROG_STREAM_CNTL_EXT_5 0x21f4 +#define R300_VAP_PROG_STREAM_CNTL_EXT_6 0x21f8 +#define R300_VAP_PROG_STREAM_CNTL_EXT_7 0x21fc /* END: Vertex data assembly */ diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index a732bdb559..699499d7cc 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -85,21 +85,26 @@ static GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, GLuint i, dw; /* type, inputs, stop bit, size */ - for (i = 0; i + 1 < nr; i += 2) { - dw = (inputs[tab[i]] << 8) | 0x3; - dw |= ((inputs[tab[i + 1]] << 8) | 0x3) << 16; - if (i + 2 == nr) { - dw |= (R300_VAP_INPUT_ROUTE_END << 16); + for (i = 0; i < nr; i += 2) { + /* make sure input is valid, would lockup the gpu */ + assert(inputs[tab[i]] != -1); + dw = (R300_SIGNED | + (inputs[tab[i]] << R300_DST_VEC_LOC_SHIFT) | + (attribptr[tab[i]]->size - 1)) << R300_DATA_TYPE_0_SHIFT; + if (i + 1 == nr) { + dw |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT; + } else { + assert(inputs[tab[i + 1]] != -1); + dw |= (R300_SIGNED | + (inputs[tab[i + 1]] << R300_DST_VEC_LOC_SHIFT) | + (attribptr[tab[i + 1]]->size - 1)) << R300_DATA_TYPE_1_SHIFT; + if (i + 2 == nr) { + dw |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT; + } } dst[i >> 1] = dw; } - if (nr & 1) { - dw = (inputs[tab[nr - 1]] << 8) | 0x3; - dw |= R300_VAP_INPUT_ROUTE_END; - dst[nr >> 1] = dw; - } - return (nr + 1) >> 1; } -- cgit v1.2.3 From 734ef96d5f7dae620115f328296d7e560e624042 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 May 2008 12:48:39 -0400 Subject: R300: cleanup VAP_CLIP_CNTL --- src/mesa/drivers/dri/r300/r300_ioctl.c | 2 +- src/mesa/drivers/dri/r300/r300_reg.h | 18 +++++++++++++++--- src/mesa/drivers/dri/r300/r300_state.c | 2 +- 3 files changed, 17 insertions(+), 5 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index cfd04f1e26..279cbb4eb0 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -283,7 +283,7 @@ static void r300EmitClearState(GLcontext * ctx) if (has_tcl) { R300_STATECHANGE(r300, vap_clip_cntl); reg_start(R300_VAP_CLIP_CNTL, 0); - e32(R300_221C_CLEAR); + e32(R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE); } R300_STATECHANGE(r300, ps); diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index f65aac3ca4..7b71eeab93 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -398,9 +398,21 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * See bug #9871. http://bugs.freedesktop.org/attachment.cgi?id=10672&action=view */ #define R300_VAP_CLIP_CNTL 0x221C -# define R300_221C_NORMAL 0x00000000 -# define R300_221C_CLEAR 0x0001C000 -# define R300_VAP_UCP_ENABLE_0 (1 << 0) +# define R300_VAP_UCP_ENABLE_0 (1 << 0) +# define R300_VAP_UCP_ENABLE_1 (1 << 1) +# define R300_VAP_UCP_ENABLE_2 (1 << 2) +# define R300_VAP_UCP_ENABLE_3 (1 << 3) +# define R300_VAP_UCP_ENABLE_4 (1 << 4) +# define R300_VAP_UCP_ENABLE_5 (1 << 5) +# define R300_PS_UCP_MODE_DIST_COP (0 << 14) +# define R300_PS_UCP_MODE_RADIUS_COP (1 << 14) +# define R300_PS_UCP_MODE_RADIUS_COP_CLIP (2 << 14) +# define R300_PS_UCP_MODE_CLIP_AS_TRIFAN (3 << 14) +# define R300_CLIP_DISABLE (1 << 16) +# define R300_UCP_CULL_ONLY_ENABLE (1 << 17) +# define R300_BOUNDARY_EDGE_FLAG_ENABLE (1 << 18) +# define R500_COLOR2_IS_TEXTURE (1 << 20) +# define R500_COLOR3_IS_TEXTURE (1 << 21) /* These seem to be per-pixel and per-vertex X and Y clipping planes. The first * plane is per-pixel and the second plane is per-vertex. diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 57ff9e9a73..27615fd568 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2035,7 +2035,7 @@ static void r300ResetHwState(r300ContextPtr r300) /* XXX: Other families? */ if (has_tcl) { - r300->hw.vap_clip_cntl.cmd[1] = R300_221C_NORMAL; + r300->hw.vap_clip_cntl.cmd[1] = R300_PS_UCP_MODE_DIST_COP; r300->hw.vap_clip.cmd[1] = r300PackFloat32(1.0); /* X */ r300->hw.vap_clip.cmd[2] = r300PackFloat32(1.0); /* X */ -- cgit v1.2.3 From 25d9f2dd247f9d9cc4bb18032d10542fd67ccea2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 May 2008 13:03:48 -0400 Subject: R300: fix VAP_OUTPUT_VTX_FMT_1 defines --- src/mesa/drivers/dri/r300/r300_reg.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 7b71eeab93..81b5c3faf3 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -132,11 +132,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_VAP_OUTPUT_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15 # define R300_VAP_OUTPUT_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18 # define R300_VAP_OUTPUT_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21 -# define R300_VAP_OUTPUT_VTX_FMT_1__NOT_PRESENT (1<<0) -# define R300_VAP_OUTPUT_VTX_FMT_1__1_COMPONENT (1<<1) -# define R300_VAP_OUTPUT_VTX_FMT_1__2_COMPONENTS (1<<2) -# define R300_VAP_OUTPUT_VTX_FMT_1__3_COMPONENTS (1<<3) -# define R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS (1<<4) +# define R300_VAP_OUTPUT_VTX_FMT_1__NOT_PRESENT 0 +# define R300_VAP_OUTPUT_VTX_FMT_1__1_COMPONENT 1 +# define R300_VAP_OUTPUT_VTX_FMT_1__2_COMPONENTS 2 +# define R300_VAP_OUTPUT_VTX_FMT_1__3_COMPONENTS 3 +# define R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS 4 #define R300_SE_VTE_CNTL 0x20b0 # define R300_VPORT_X_SCALE_ENA 0x00000001 -- cgit v1.2.3 From 1226aba119c46c09ec6620dc0be29b63fb3440a7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 May 2008 13:56:56 -0400 Subject: R3/4/5: fix TCL on r5xx, cleanup PVS code --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 30 ++++++----- src/mesa/drivers/dri/r300/r300_ioctl.c | 4 +- src/mesa/drivers/dri/r300/r300_reg.h | 77 ++++++++++++++++------------- src/mesa/drivers/dri/r300/r300_state.c | 4 +- src/mesa/drivers/dri/radeon/radeon_screen.c | 10 ++-- 5 files changed, 72 insertions(+), 53 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 53cbfb8e64..3f9d9da399 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -333,7 +333,7 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.vir[1].cmd[R300_VIR_CMD_0] = cmdpacket0(R300_VAP_PROG_STREAM_CNTL_EXT_0, 1); ALLOC_STATE(vic, always, R300_VIC_CMDSIZE, 0); - r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(R300_VAP_INPUT_CNTL_0, 2); + r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(R300_VAP_VTX_STATE_CNTL, 2); ALLOC_STATE(vap_psc_sgn_norm_cntl, always, 2, 0); r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE); @@ -481,27 +481,35 @@ void r300InitCmdBuf(r300ContextPtr r300) int i; ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0); r300->hw.vpi.cmd[R300_VPI_CMD_0] = - cmdvpu(R300_PVS_UPLOAD_PROGRAM, 0); + cmdvpu(R300_PVS_CODE_START, 0); - ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); - r300->hw.vpp.cmd[R300_VPP_CMD_0] = - cmdvpu(R300_PVS_UPLOAD_PARAMETERS, 0); + if (is_r500) { + ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); + r300->hw.vpp.cmd[R300_VPP_CMD_0] = + cmdvpu(R500_PVS_CONST_START, 0); - ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); - r300->hw.vps.cmd[R300_VPS_CMD_0] = - cmdvpu(R300_PVS_UPLOAD_POINTSIZE, 1); + ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); + r300->hw.vps.cmd[R300_VPS_CMD_0] = + cmdvpu(R500_POINT_VPORT_SCALE_OFFSET, 1); - if (is_r500) { for (i = 0; i < 6; i++) { ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] = - cmdvpu(R500_PVS_UPLOAD_CLIP_PLANE0+i, 1); + cmdvpu(R500_PVS_UCP_START + i, 1); } } else { + ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); + r300->hw.vpp.cmd[R300_VPP_CMD_0] = + cmdvpu(R300_PVS_CONST_START, 0); + + ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); + r300->hw.vps.cmd[R300_VPS_CMD_0] = + cmdvpu(R300_POINT_VPORT_SCALE_OFFSET, 1); + for (i = 0; i < 6; i++) { ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] = - cmdvpu(R300_PVS_UPLOAD_CLIP_PLANE0+i, 1); + cmdvpu(R300_PVS_UCP_START + i, 1); } } } diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 279cbb4eb0..04dbb957c2 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -236,8 +236,8 @@ static void r300EmitClearState(GLcontext * ctx) /* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */ R300_STATECHANGE(r300, vic); - reg_start(R300_VAP_INPUT_CNTL_0, 1); - e32(R300_INPUT_CNTL_0_COLOR); + reg_start(R300_VAP_VTX_STATE_CNTL, 1); + e32((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT)); e32(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0); R300_STATECHANGE(r300, vte); diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 81b5c3faf3..a6719d6553 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -139,17 +139,16 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS 4 #define R300_SE_VTE_CNTL 0x20b0 -# define R300_VPORT_X_SCALE_ENA 0x00000001 -# define R300_VPORT_X_OFFSET_ENA 0x00000002 -# define R300_VPORT_Y_SCALE_ENA 0x00000004 -# define R300_VPORT_Y_OFFSET_ENA 0x00000008 -# define R300_VPORT_Z_SCALE_ENA 0x00000010 -# define R300_VPORT_Z_OFFSET_ENA 0x00000020 -# define R300_VTX_XY_FMT 0x00000100 -# define R300_VTX_Z_FMT 0x00000200 -# define R300_VTX_W0_FMT 0x00000400 -# define R300_VTX_W0_NORMALIZE 0x00000800 -# define R300_VTX_ST_DENORMALIZED 0x00001000 +# define R300_VPORT_X_SCALE_ENA (1 << 0) +# define R300_VPORT_X_OFFSET_ENA (1 << 1) +# define R300_VPORT_Y_SCALE_ENA (1 << 2) +# define R300_VPORT_Y_OFFSET_ENA (1 << 3) +# define R300_VPORT_Z_SCALE_ENA (1 << 4) +# define R300_VPORT_Z_OFFSET_ENA (1 << 5) +# define R300_VTX_XY_FMT (1 << 8) +# define R300_VTX_Z_FMT (1 << 9) +# define R300_VTX_W0_FMT (1 << 10) +# define R300_SERIAL_PROC_ENA (1 << 11) /* BEGIN: Vertex data assembly - lots of uncertainties */ @@ -250,9 +249,26 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * if vertex program uses only position, fglrx will set normal, too * - INPUT_CNTL_0_COLOR and INPUT_CNTL_COLOR bits are always equal. */ -#define R300_VAP_INPUT_CNTL_0 0x2180 -# define R300_INPUT_CNTL_0_COLOR 0x00000001 -#define R300_VAP_INPUT_CNTL_1 0x2184 +#define R300_VAP_VTX_STATE_CNTL 0x2180 +# define R300_COLOR_0_ASSEMBLY_SHIFT 0 +# define R300_SEL_COLOR 0 +# define R300_SEL_USER_COLOR_0 1 +# define R300_SEL_USER_COLOR_1 2 +# define R300_COLOR_1_ASSEMBLY_SHIFT 2 +# define R300_COLOR_2_ASSEMBLY_SHIFT 4 +# define R300_COLOR_3_ASSEMBLY_SHIFT 6 +# define R300_COLOR_4_ASSEMBLY_SHIFT 8 +# define R300_COLOR_5_ASSEMBLY_SHIFT 10 +# define R300_COLOR_6_ASSEMBLY_SHIFT 12 +# define R300_COLOR_7_ASSEMBLY_SHIFT 14 +# define R300_UPDATE_USER_COLOR_0_ENA (1 << 16) + +/* + * Each bit in this field applies to the corresponding vector in the VSM + * memory (i.e. Bit 0 applies to VECTOR_0 (POSITION), etc.). If the bit + * is set, then the corresponding 4-Dword Vector is output into the Vertex Stream. + */ +#define R300_VAP_VSM_VTX_ASSM 0x2184 # define R300_INPUT_CNTL_POS 0x00000001 # define R300_INPUT_CNTL_NORMAL 0x00000002 # define R300_INPUT_CNTL_COLOR 0x00000004 @@ -345,25 +361,20 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Multiple vertex programs and parameter sets can be loaded at once, * which could explain the size discrepancy. */ -#define R300_VAP_PVS_UPLOAD_ADDRESS 0x2200 -# define R300_PVS_UPLOAD_PROGRAM 0x00000000 -/* gap */ -# define R300_PVS_UPLOAD_PARAMETERS 0x00000200 -/* gap */ -# define R300_PVS_UPLOAD_CLIP_PLANE0 0x00000400 -# define R300_PVS_UPLOAD_CLIP_PLANE1 0x00000401 -# define R300_PVS_UPLOAD_CLIP_PLANE2 0x00000402 -# define R300_PVS_UPLOAD_CLIP_PLANE3 0x00000403 -# define R300_PVS_UPLOAD_CLIP_PLANE4 0x00000404 -# define R300_PVS_UPLOAD_CLIP_PLANE5 0x00000405 -# define R300_PVS_UPLOAD_POINTSIZE 0x00000406 - -# define R500_PVS_UPLOAD_CLIP_PLANE0 0x00000600 -# define R500_PVS_UPLOAD_CLIP_PLANE1 0x00000601 -# define R500_PVS_UPLOAD_CLIP_PLANE2 0x00000602 -# define R500_PVS_UPLOAD_CLIP_PLANE3 0x00000603 -# define R500_PVS_UPLOAD_CLIP_PLANE4 0x00000604 -# define R500_PVS_UPLOAD_CLIP_PLANE5 0x00000605 +#define R300_VAP_PVS_VECTOR_INDX_REG 0x2200 +# define R300_PVS_CODE_START 0 +# define R300_MAX_PVS_CODE_LINES 256 +# define R500_MAX_PVS_CODE_LINES 1024 +# define R300_PVS_CONST_START 512 +# define R500_PVS_CONST_START 1024 +# define R300_MAX_PVS_CONST_VECS 256 +# define R500_MAX_PVS_CONST_VECS 1024 +# define R300_PVS_UCP_START 1024 +# define R500_PVS_UCP_START 1536 +# define R300_POINT_VPORT_SCALE_OFFSET 1030 +# define R500_POINT_VPORT_SCALE_OFFSET 1542 +# define R300_POINT_GEN_TEX_OFFSET 1031 +# define R500_POINT_GEN_TEX_OFFSET 1543 /* * These are obsolete defines form r300_context.h, but they might give some diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 27615fd568..89a0827b2f 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1802,7 +1802,7 @@ static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa) prog->program.length = program_end; - r300SetupVertexProgramFragment(rmesa, R300_PVS_UPLOAD_PROGRAM, + r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START, &(prog->program)); inst_count = (prog->program.length / 4) - 1; @@ -1837,7 +1837,7 @@ static void r300SetupRealVertexProgram(r300ContextPtr rmesa) bump_vpu_count(rmesa->hw.vpp.cmd, param_count); param_count /= 4; - r300SetupVertexProgramFragment(rmesa, R300_PVS_UPLOAD_PROGRAM, &(prog->program)); + r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START, &(prog->program)); inst_count = (prog->program.length / 4) - 1; R300_STATECHANGE(rmesa, pvs); diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 2f57d289fe..0f716a0b70 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -711,7 +711,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_R520_710E: case PCI_CHIP_R520_710F: screen->chip_family = CHIP_FAMILY_R520; - //screen->chip_flags = RADEON_CHIPSET_TCL; + screen->chip_flags = RADEON_CHIPSET_TCL; fprintf(stderr, "Warning, R520 detected, 3D HAHAHAHAHA!!.\n"); break; @@ -754,7 +754,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_RV515_7210: case PCI_CHIP_RV515_7211: screen->chip_family = CHIP_FAMILY_RV515; - //screen->chip_flags = RADEON_CHIPSET_TCL; + screen->chip_flags = RADEON_CHIPSET_TCL; fprintf(stderr, "Warning, RV515 detected, 3D HAHAHAHAHA!!.\n"); break; @@ -775,7 +775,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_RV530_71DA: case PCI_CHIP_RV530_71DE: screen->chip_family = CHIP_FAMILY_RV530; - //screen->chip_flags = RADEON_CHIPSET_TCL; + screen->chip_flags = RADEON_CHIPSET_TCL; fprintf(stderr, "Warning, RV530 detected, 3D HAHAHAHAHA!!.\n"); break; @@ -795,7 +795,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_R580_724F: case PCI_CHIP_R580_7284: screen->chip_family = CHIP_FAMILY_R580; - //screen->chip_flags = RADEON_CHIPSET_TCL; + screen->chip_flags = RADEON_CHIPSET_TCL; fprintf(stderr, "Warning, R580 detected, 3D HAHAHAHAHA!!.\n"); break; @@ -812,7 +812,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_RV560_7293: case PCI_CHIP_RV560_7297: screen->chip_family = CHIP_FAMILY_RV560; - //screen->chip_flags = RADEON_CHIPSET_TCL; + screen->chip_flags = RADEON_CHIPSET_TCL; fprintf(stderr, "Warning, RV560 detected, 3D HAHAHAHAHA!!.\n"); break; -- cgit v1.2.3 From cd66f0e2d9e79b03b4773ccacf758fd3d141ccab Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 17 Mar 2008 19:05:15 +1000 Subject: r500: fragprog --- src/mesa/drivers/dri/r300/Makefile | 1 + src/mesa/drivers/dri/r300/r500_fragprog.c | 2476 +++++++++++++++++++++++++++++ src/mesa/drivers/dri/r300/r500_fragprog.h | 104 ++ 3 files changed, 2581 insertions(+) create mode 100644 src/mesa/drivers/dri/r300/r500_fragprog.c create mode 100644 src/mesa/drivers/dri/r300/r500_fragprog.h (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 44248964fd..5b2bd0bc2b 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -39,6 +39,7 @@ DRIVER_SOURCES = \ r300_texstate.c \ r300_vertprog.c \ r300_fragprog.c \ + r500_fragprog.c \ r300_shader.c \ r300_emit.c \ r300_swtcl.c \ diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c new file mode 100644 index 0000000000..3638a94380 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -0,0 +1,2476 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * \author Ben Skeggs + * + * \author Jerome Glisse + * + * \todo Depth write, WPOS/FOGC inputs + * + * \todo FogOption + * + * \todo Verify results of opcodes for accuracy, I've only checked them in + * specific cases. + */ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#include "shader/prog_instruction.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" + +#include "r300_context.h" +#include "r300_fragprog.h" +#include "r300_reg.h" +#include "r300_state.h" + +/* + * Usefull macros and values + */ +#define ERROR(fmt, args...) do { \ + fprintf(stderr, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + fp->error = GL_TRUE; \ + } while(0) + +#define PFS_INVAL 0xFFFFFFFF +#define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs + +#define SWIZZLE_XYZ 0 +#define SWIZZLE_XXX 1 +#define SWIZZLE_YYY 2 +#define SWIZZLE_ZZZ 3 +#define SWIZZLE_WWW 4 +#define SWIZZLE_YZX 5 +#define SWIZZLE_ZXY 6 +#define SWIZZLE_WZY 7 +#define SWIZZLE_111 8 +#define SWIZZLE_000 9 +#define SWIZZLE_HHH 10 + +#define swizzle(r, x, y, z, w) do_swizzle(fp, r, \ + ((SWIZZLE_##x<<0)| \ + (SWIZZLE_##y<<3)| \ + (SWIZZLE_##z<<6)| \ + (SWIZZLE_##w<<9)), \ + 0) + +#define REG_TYPE_INPUT 0 +#define REG_TYPE_OUTPUT 1 +#define REG_TYPE_TEMP 2 +#define REG_TYPE_CONST 3 + +#define REG_TYPE_SHIFT 0 +#define REG_INDEX_SHIFT 2 +#define REG_VSWZ_SHIFT 8 +#define REG_SSWZ_SHIFT 13 +#define REG_NEGV_SHIFT 18 +#define REG_NEGS_SHIFT 19 +#define REG_ABS_SHIFT 20 +#define REG_NO_USE_SHIFT 21 // Hack for refcounting +#define REG_VALID_SHIFT 22 // Does the register contain a defined value? +#define REG_BUILTIN_SHIFT 23 // Is it a builtin (like all zero/all one)? + +#define REG_TYPE_MASK (0x03 << REG_TYPE_SHIFT) +#define REG_INDEX_MASK (0x3F << REG_INDEX_SHIFT) +#define REG_VSWZ_MASK (0x1F << REG_VSWZ_SHIFT) +#define REG_SSWZ_MASK (0x1F << REG_SSWZ_SHIFT) +#define REG_NEGV_MASK (0x01 << REG_NEGV_SHIFT) +#define REG_NEGS_MASK (0x01 << REG_NEGS_SHIFT) +#define REG_ABS_MASK (0x01 << REG_ABS_SHIFT) +#define REG_NO_USE_MASK (0x01 << REG_NO_USE_SHIFT) +#define REG_VALID_MASK (0x01 << REG_VALID_SHIFT) +#define REG_BUILTIN_MASK (0x01 << REG_BUILTIN_SHIFT) + +#define REG(type, index, vswz, sswz, nouse, valid, builtin) \ + (((type << REG_TYPE_SHIFT) & REG_TYPE_MASK) | \ + ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK) | \ + ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK) | \ + ((valid << REG_VALID_SHIFT) & REG_VALID_MASK) | \ + ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK) | \ + ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK) | \ + ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK)) +#define REG_GET_TYPE(reg) \ + ((reg & REG_TYPE_MASK) >> REG_TYPE_SHIFT) +#define REG_GET_INDEX(reg) \ + ((reg & REG_INDEX_MASK) >> REG_INDEX_SHIFT) +#define REG_GET_VSWZ(reg) \ + ((reg & REG_VSWZ_MASK) >> REG_VSWZ_SHIFT) +#define REG_GET_SSWZ(reg) \ + ((reg & REG_SSWZ_MASK) >> REG_SSWZ_SHIFT) +#define REG_GET_NO_USE(reg) \ + ((reg & REG_NO_USE_MASK) >> REG_NO_USE_SHIFT) +#define REG_GET_VALID(reg) \ + ((reg & REG_VALID_MASK) >> REG_VALID_SHIFT) +#define REG_GET_BUILTIN(reg) \ + ((reg & REG_BUILTIN_MASK) >> REG_BUILTIN_SHIFT) +#define REG_SET_TYPE(reg, type) \ + reg = ((reg & ~REG_TYPE_MASK) | \ + ((type << REG_TYPE_SHIFT) & REG_TYPE_MASK)) +#define REG_SET_INDEX(reg, index) \ + reg = ((reg & ~REG_INDEX_MASK) | \ + ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK)) +#define REG_SET_VSWZ(reg, vswz) \ + reg = ((reg & ~REG_VSWZ_MASK) | \ + ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK)) +#define REG_SET_SSWZ(reg, sswz) \ + reg = ((reg & ~REG_SSWZ_MASK) | \ + ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK)) +#define REG_SET_NO_USE(reg, nouse) \ + reg = ((reg & ~REG_NO_USE_MASK) | \ + ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK)) +#define REG_SET_VALID(reg, valid) \ + reg = ((reg & ~REG_VALID_MASK) | \ + ((valid << REG_VALID_SHIFT) & REG_VALID_MASK)) +#define REG_SET_BUILTIN(reg, builtin) \ + reg = ((reg & ~REG_BUILTIN_MASK) | \ + ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK)) +#define REG_ABS(reg) \ + reg = (reg | REG_ABS_MASK) +#define REG_NEGV(reg) \ + reg = (reg | REG_NEGV_MASK) +#define REG_NEGS(reg) \ + reg = (reg | REG_NEGS_MASK) + +/* + * Datas structures for fragment program generation + */ + +/* description of r300 native hw instructions */ +static const struct { + const char *name; + int argc; + int v_op; + int s_op; +} r300_fpop[] = { + /* *INDENT-OFF* */ + {"MAD", 3, R300_FPI0_OUTC_MAD, R300_FPI2_OUTA_MAD}, + {"DP3", 2, R300_FPI0_OUTC_DP3, R300_FPI2_OUTA_DP4}, + {"DP4", 2, R300_FPI0_OUTC_DP4, R300_FPI2_OUTA_DP4}, + {"MIN", 2, R300_FPI0_OUTC_MIN, R300_FPI2_OUTA_MIN}, + {"MAX", 2, R300_FPI0_OUTC_MAX, R300_FPI2_OUTA_MAX}, + {"CMP", 3, R300_FPI0_OUTC_CMP, R300_FPI2_OUTA_CMP}, + {"FRC", 1, R300_FPI0_OUTC_FRC, R300_FPI2_OUTA_FRC}, + {"EX2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_EX2}, + {"LG2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_LG2}, + {"RCP", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RCP}, + {"RSQ", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RSQ}, + {"REPL_ALPHA", 1, R300_FPI0_OUTC_REPL_ALPHA, PFS_INVAL}, + {"CMPH", 3, R300_FPI0_OUTC_CMPH, PFS_INVAL}, + /* *INDENT-ON* */ +}; + +/* vector swizzles r300 can support natively, with a couple of + * cases we handle specially + * + * REG_VSWZ/REG_SSWZ is an index into this table + */ + +/* mapping from SWIZZLE_* to r300 native values for scalar insns */ +#define SWIZZLE_HALF 6 + +#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \ + SWIZZLE_##y, \ + SWIZZLE_##z, \ + SWIZZLE_ZERO)) +/* native swizzles */ +static const struct r300_pfs_swizzle { + GLuint hash; /* swizzle value this matches */ + GLuint base; /* base value for hw swizzle */ + GLuint stride; /* difference in base between arg0/1/2 */ + GLuint flags; +} v_swiz[] = { + /* *INDENT-OFF* */ + {MAKE_SWZ3(X, Y, Z), R300_FPI0_ARGC_SRC0C_XYZ, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(X, X, X), R300_FPI0_ARGC_SRC0C_XXX, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(Y, Y, Y), R300_FPI0_ARGC_SRC0C_YYY, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(Z, Z, Z), R300_FPI0_ARGC_SRC0C_ZZZ, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(W, W, W), R300_FPI0_ARGC_SRC0A, 1, SLOT_SRC_SCALAR}, + {MAKE_SWZ3(Y, Z, X), R300_FPI0_ARGC_SRC0C_YZX, 1, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(Z, X, Y), R300_FPI0_ARGC_SRC0C_ZXY, 1, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(W, Z, Y), R300_FPI0_ARGC_SRC0CA_WZY, 1, SLOT_SRC_BOTH}, + {MAKE_SWZ3(ONE, ONE, ONE), R300_FPI0_ARGC_ONE, 0, 0}, + {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_FPI0_ARGC_ZERO, 0, 0}, + {MAKE_SWZ3(HALF, HALF, HALF), R300_FPI0_ARGC_HALF, 0, 0}, + {PFS_INVAL, 0, 0, 0}, + /* *INDENT-ON* */ +}; + +/* used during matching of non-native swizzles */ +#define SWZ_X_MASK (7 << 0) +#define SWZ_Y_MASK (7 << 3) +#define SWZ_Z_MASK (7 << 6) +#define SWZ_W_MASK (7 << 9) +static const struct { + GLuint hash; /* used to mask matching swizzle components */ + int mask; /* actual outmask */ + int count; /* count of components matched */ +} s_mask[] = { + /* *INDENT-OFF* */ + {SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK, 1 | 2 | 4, 3}, + {SWZ_X_MASK | SWZ_Y_MASK, 1 | 2, 2}, + {SWZ_X_MASK | SWZ_Z_MASK, 1 | 4, 2}, + {SWZ_Y_MASK | SWZ_Z_MASK, 2 | 4, 2}, + {SWZ_X_MASK, 1, 1}, + {SWZ_Y_MASK, 2, 1}, + {SWZ_Z_MASK, 4, 1}, + {PFS_INVAL, PFS_INVAL, PFS_INVAL} + /* *INDENT-ON* */ +}; + +static const struct { + int base; /* hw value of swizzle */ + int stride; /* difference between SRC0/1/2 */ + GLuint flags; +} s_swiz[] = { + /* *INDENT-OFF* */ + {R300_FPI2_ARGA_SRC0C_X, 3, SLOT_SRC_VECTOR}, + {R300_FPI2_ARGA_SRC0C_Y, 3, SLOT_SRC_VECTOR}, + {R300_FPI2_ARGA_SRC0C_Z, 3, SLOT_SRC_VECTOR}, + {R300_FPI2_ARGA_SRC0A, 1, SLOT_SRC_SCALAR}, + {R300_FPI2_ARGA_ZERO, 0, 0}, + {R300_FPI2_ARGA_ONE, 0, 0}, + {R300_FPI2_ARGA_HALF, 0, 0} + /* *INDENT-ON* */ +}; + +/* boiler-plate reg, for convenience */ +static const GLuint undef = REG(REG_TYPE_TEMP, + 0, + SWIZZLE_XYZ, + SWIZZLE_W, + GL_FALSE, + GL_FALSE, + GL_FALSE); + +/* constant one source */ +static const GLuint pfs_one = REG(REG_TYPE_CONST, + 0, + SWIZZLE_111, + SWIZZLE_ONE, + GL_FALSE, + GL_TRUE, + GL_TRUE); + +/* constant half source */ +static const GLuint pfs_half = REG(REG_TYPE_CONST, + 0, + SWIZZLE_HHH, + SWIZZLE_HALF, + GL_FALSE, + GL_TRUE, + GL_TRUE); + +/* constant zero source */ +static const GLuint pfs_zero = REG(REG_TYPE_CONST, + 0, + SWIZZLE_000, + SWIZZLE_ZERO, + GL_FALSE, + GL_TRUE, + GL_TRUE); + +/* + * Common functions prototypes + */ +static void dump_program(struct r300_fragment_program *fp); +static void emit_arith(struct r300_fragment_program *fp, int op, + GLuint dest, int mask, + GLuint src0, GLuint src1, GLuint src2, int flags); + +/** + * Get an R300 temporary that can be written to in the given slot. + */ +static int get_hw_temp(struct r300_fragment_program *fp, int slot) +{ + COMPILE_STATE; + int r; + + for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) { + if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= slot) + break; + } + + if (r >= PFS_NUM_TEMP_REGS) { + ERROR("Out of hardware temps\n"); + return 0; + } + // Reserved is used to avoid the following scenario: + // R300 temporary X is first assigned to Mesa temporary Y during vector ops + // R300 temporary X is then assigned to Mesa temporary Z for further vector ops + // Then scalar ops on Mesa temporary Z are emitted and move back in time + // to overwrite the value of temporary Y. + // End scenario. + cs->hwtemps[r].reserved = cs->hwtemps[r].free; + cs->hwtemps[r].free = -1; + + // Reset to some value that won't mess things up when the user + // tries to read from a temporary that hasn't been assigned a value yet. + // In the normal case, vector_valid and scalar_valid should be set to + // a sane value by the first emit that writes to this temporary. + cs->hwtemps[r].vector_valid = 0; + cs->hwtemps[r].scalar_valid = 0; + + if (r > fp->max_temp_idx) + fp->max_temp_idx = r; + + return r; +} + +/** + * Get an R300 temporary that will act as a TEX destination register. + */ +static int get_hw_temp_tex(struct r300_fragment_program *fp) +{ + COMPILE_STATE; + int r; + + for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) { + if (cs->used_in_node & (1 << r)) + continue; + + // Note: Be very careful here + if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= 0) + break; + } + + if (r >= PFS_NUM_TEMP_REGS) + return get_hw_temp(fp, 0); /* Will cause an indirection */ + + cs->hwtemps[r].reserved = cs->hwtemps[r].free; + cs->hwtemps[r].free = -1; + + // Reset to some value that won't mess things up when the user + // tries to read from a temporary that hasn't been assigned a value yet. + // In the normal case, vector_valid and scalar_valid should be set to + // a sane value by the first emit that writes to this temporary. + cs->hwtemps[r].vector_valid = cs->nrslots; + cs->hwtemps[r].scalar_valid = cs->nrslots; + + if (r > fp->max_temp_idx) + fp->max_temp_idx = r; + + return r; +} + +/** + * Mark the given hardware register as free. + */ +static void free_hw_temp(struct r300_fragment_program *fp, int idx) +{ + COMPILE_STATE; + + // Be very careful here. Consider sequences like + // MAD r0, r1,r2,r3 + // TEX r4, ... + // The TEX instruction may be moved in front of the MAD instruction + // due to the way nodes work. We don't want to alias r1 and r4 in + // this case. + // I'm certain the register allocation could be further sanitized, + // but it's tricky because of stuff that can happen inside emit_tex + // and emit_arith. + cs->hwtemps[idx].free = cs->nrslots + 1; +} + +/** + * Create a new Mesa temporary register. + */ +static GLuint get_temp_reg(struct r300_fragment_program *fp) +{ + COMPILE_STATE; + GLuint r = undef; + GLuint index; + + index = ffs(~cs->temp_in_use); + if (!index) { + ERROR("Out of program temps\n"); + return r; + } + + cs->temp_in_use |= (1 << --index); + cs->temps[index].refcount = 0xFFFFFFFF; + cs->temps[index].reg = -1; + + REG_SET_TYPE(r, REG_TYPE_TEMP); + REG_SET_INDEX(r, index); + REG_SET_VALID(r, GL_TRUE); + return r; +} + +/** + * Create a new Mesa temporary register that will act as the destination + * register for a texture read. + */ +static GLuint get_temp_reg_tex(struct r300_fragment_program *fp) +{ + COMPILE_STATE; + GLuint r = undef; + GLuint index; + + index = ffs(~cs->temp_in_use); + if (!index) { + ERROR("Out of program temps\n"); + return r; + } + + cs->temp_in_use |= (1 << --index); + cs->temps[index].refcount = 0xFFFFFFFF; + cs->temps[index].reg = get_hw_temp_tex(fp); + + REG_SET_TYPE(r, REG_TYPE_TEMP); + REG_SET_INDEX(r, index); + REG_SET_VALID(r, GL_TRUE); + return r; +} + +/** + * Free a Mesa temporary and the associated R300 temporary. + */ +static void free_temp(struct r300_fragment_program *fp, GLuint r) +{ + COMPILE_STATE; + GLuint index = REG_GET_INDEX(r); + + if (!(cs->temp_in_use & (1 << index))) + return; + + if (REG_GET_TYPE(r) == REG_TYPE_TEMP) { + free_hw_temp(fp, cs->temps[index].reg); + cs->temps[index].reg = -1; + cs->temp_in_use &= ~(1 << index); + } else if (REG_GET_TYPE(r) == REG_TYPE_INPUT) { + free_hw_temp(fp, cs->inputs[index].reg); + cs->inputs[index].reg = -1; + } +} + +/** + * Emit a hardware constant/parameter. + * + * \p cp Stable pointer to an array of 4 floats. + * The pointer must be stable in the sense that it remains to be valid + * and hold the contents of the constant/parameter throughout the lifetime + * of the fragment program (actually, up until the next time the fragment + * program is translated). + */ +static GLuint emit_const4fv(struct r300_fragment_program *fp, + const GLfloat * cp) +{ + GLuint reg = undef; + int index; + + for (index = 0; index < fp->const_nr; ++index) { + if (fp->constant[index] == cp) + break; + } + + if (index >= fp->const_nr) { + if (index >= PFS_NUM_CONST_REGS) { + ERROR("Out of hw constants!\n"); + return reg; + } + + fp->const_nr++; + fp->constant[index] = cp; + } + + REG_SET_TYPE(reg, REG_TYPE_CONST); + REG_SET_INDEX(reg, index); + REG_SET_VALID(reg, GL_TRUE); + return reg; +} + +static inline GLuint negate(GLuint r) +{ + REG_NEGS(r); + REG_NEGV(r); + return r; +} + +/* Hack, to prevent clobbering sources used multiple times when + * emulating non-native instructions + */ +static inline GLuint keep(GLuint r) +{ + REG_SET_NO_USE(r, GL_TRUE); + return r; +} + +static inline GLuint absolute(GLuint r) +{ + REG_ABS(r); + return r; +} + +static int swz_native(struct r300_fragment_program *fp, + GLuint src, GLuint * r, GLuint arbneg) +{ + /* Native swizzle, handle negation */ + src = (src & ~REG_NEGS_MASK) | (((arbneg >> 3) & 1) << REG_NEGS_SHIFT); + + if ((arbneg & 0x7) == 0x0) { + src = src & ~REG_NEGV_MASK; + *r = src; + } else if ((arbneg & 0x7) == 0x7) { + src |= REG_NEGV_MASK; + *r = src; + } else { + if (!REG_GET_VALID(*r)) + *r = get_temp_reg(fp); + src |= REG_NEGV_MASK; + emit_arith(fp, + PFS_OP_MAD, + *r, arbneg & 0x7, keep(src), pfs_one, pfs_zero, 0); + src = src & ~REG_NEGV_MASK; + emit_arith(fp, + PFS_OP_MAD, + *r, + (arbneg ^ 0x7) | WRITEMASK_W, + src, pfs_one, pfs_zero, 0); + } + + return 3; +} + +static int swz_emit_partial(struct r300_fragment_program *fp, + GLuint src, + GLuint * r, int mask, int mc, GLuint arbneg) +{ + GLuint tmp; + GLuint wmask = 0; + + if (!REG_GET_VALID(*r)) + *r = get_temp_reg(fp); + + /* A partial match, VSWZ/mask define what parts of the + * desired swizzle we match + */ + if (mc + s_mask[mask].count == 3) { + wmask = WRITEMASK_W; + src |= ((arbneg >> 3) & 1) << REG_NEGS_SHIFT; + } + + tmp = arbneg & s_mask[mask].mask; + if (tmp) { + tmp = tmp ^ s_mask[mask].mask; + if (tmp) { + emit_arith(fp, + PFS_OP_MAD, + *r, + arbneg & s_mask[mask].mask, + keep(src) | REG_NEGV_MASK, + pfs_one, pfs_zero, 0); + if (!wmask) { + REG_SET_NO_USE(src, GL_TRUE); + } else { + REG_SET_NO_USE(src, GL_FALSE); + } + emit_arith(fp, + PFS_OP_MAD, + *r, tmp | wmask, src, pfs_one, pfs_zero, 0); + } else { + if (!wmask) { + REG_SET_NO_USE(src, GL_TRUE); + } else { + REG_SET_NO_USE(src, GL_FALSE); + } + emit_arith(fp, + PFS_OP_MAD, + *r, + (arbneg & s_mask[mask].mask) | wmask, + src | REG_NEGV_MASK, pfs_one, pfs_zero, 0); + } + } else { + if (!wmask) { + REG_SET_NO_USE(src, GL_TRUE); + } else { + REG_SET_NO_USE(src, GL_FALSE); + } + emit_arith(fp, PFS_OP_MAD, + *r, + s_mask[mask].mask | wmask, + src, pfs_one, pfs_zero, 0); + } + + return s_mask[mask].count; +} + +static GLuint do_swizzle(struct r300_fragment_program *fp, + GLuint src, GLuint arbswz, GLuint arbneg) +{ + GLuint r = undef; + GLuint vswz; + int c_mask = 0; + int v_match = 0; + + /* If swizzling from something without an XYZW native swizzle, + * emit result to a temp, and do new swizzle from the temp. + */ +#if 0 + if (REG_GET_VSWZ(src) != SWIZZLE_XYZ || REG_GET_SSWZ(src) != SWIZZLE_W) { + GLuint temp = get_temp_reg(fp); + emit_arith(fp, + PFS_OP_MAD, + temp, WRITEMASK_XYZW, src, pfs_one, pfs_zero, 0); + src = temp; + } +#endif + + if (REG_GET_VSWZ(src) != SWIZZLE_XYZ || REG_GET_SSWZ(src) != SWIZZLE_W) { + GLuint vsrcswz = + (v_swiz[REG_GET_VSWZ(src)]. + hash & (SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK)) | + REG_GET_SSWZ(src) << 9; + GLint i; + + GLuint newswz = 0; + GLuint offset; + for (i = 0; i < 4; ++i) { + offset = GET_SWZ(arbswz, i); + + newswz |= + (offset <= 3) ? GET_SWZ(vsrcswz, + offset) << i * + 3 : offset << i * 3; + } + + arbswz = newswz & (SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK); + REG_SET_SSWZ(src, GET_SWZ(newswz, 3)); + } else { + /* set scalar swizzling */ + REG_SET_SSWZ(src, GET_SWZ(arbswz, 3)); + + } + do { + vswz = REG_GET_VSWZ(src); + do { + int chash; + + REG_SET_VSWZ(src, vswz); + chash = v_swiz[REG_GET_VSWZ(src)].hash & + s_mask[c_mask].hash; + + if (chash == (arbswz & s_mask[c_mask].hash)) { + if (s_mask[c_mask].count == 3) { + v_match += swz_native(fp, + src, &r, arbneg); + } else { + v_match += swz_emit_partial(fp, + src, + &r, + c_mask, + v_match, + arbneg); + } + + if (v_match == 3) + return r; + + /* Fill with something invalid.. all 0's was + * wrong before, matched SWIZZLE_X. So all + * 1's will be okay for now + */ + arbswz |= (PFS_INVAL & s_mask[c_mask].hash); + } + } while (v_swiz[++vswz].hash != PFS_INVAL); + REG_SET_VSWZ(src, SWIZZLE_XYZ); + } while (s_mask[++c_mask].hash != PFS_INVAL); + + ERROR("should NEVER get here\n"); + return r; +} + +static GLuint t_src(struct r300_fragment_program *fp, + struct prog_src_register fpsrc) +{ + GLuint r = undef; + + switch (fpsrc.File) { + case PROGRAM_TEMPORARY: + REG_SET_INDEX(r, fpsrc.Index); + REG_SET_VALID(r, GL_TRUE); + REG_SET_TYPE(r, REG_TYPE_TEMP); + break; + case PROGRAM_INPUT: + REG_SET_INDEX(r, fpsrc.Index); + REG_SET_VALID(r, GL_TRUE); + REG_SET_TYPE(r, REG_TYPE_INPUT); + break; + case PROGRAM_LOCAL_PARAM: + r = emit_const4fv(fp, + fp->mesa_program.Base.LocalParams[fpsrc. + Index]); + break; + case PROGRAM_ENV_PARAM: + r = emit_const4fv(fp, + fp->ctx->FragmentProgram.Parameters[fpsrc. + Index]); + break; + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + r = emit_const4fv(fp, + fp->mesa_program.Base.Parameters-> + ParameterValues[fpsrc.Index]); + break; + default: + ERROR("unknown SrcReg->File %x\n", fpsrc.File); + return r; + } + + /* no point swizzling ONE/ZERO/HALF constants... */ + if (REG_GET_VSWZ(r) < SWIZZLE_111 || REG_GET_SSWZ(r) < SWIZZLE_ZERO) + r = do_swizzle(fp, r, fpsrc.Swizzle, fpsrc.NegateBase); + return r; +} + +static GLuint t_scalar_src(struct r300_fragment_program *fp, + struct prog_src_register fpsrc) +{ + struct prog_src_register src = fpsrc; + int sc = GET_SWZ(fpsrc.Swizzle, 0); /* X */ + + src.Swizzle = ((sc << 0) | (sc << 3) | (sc << 6) | (sc << 9)); + + return t_src(fp, src); +} + +static GLuint t_dst(struct r300_fragment_program *fp, + struct prog_dst_register dest) +{ + GLuint r = undef; + + switch (dest.File) { + case PROGRAM_TEMPORARY: + REG_SET_INDEX(r, dest.Index); + REG_SET_VALID(r, GL_TRUE); + REG_SET_TYPE(r, REG_TYPE_TEMP); + return r; + case PROGRAM_OUTPUT: + REG_SET_TYPE(r, REG_TYPE_OUTPUT); + switch (dest.Index) { + case FRAG_RESULT_COLR: + case FRAG_RESULT_DEPR: + REG_SET_INDEX(r, dest.Index); + REG_SET_VALID(r, GL_TRUE); + return r; + default: + ERROR("Bad DstReg->Index 0x%x\n", dest.Index); + return r; + } + default: + ERROR("Bad DstReg->File 0x%x\n", dest.File); + return r; + } +} + +static int t_hw_src(struct r300_fragment_program *fp, GLuint src, GLboolean tex) +{ + COMPILE_STATE; + int idx; + int index = REG_GET_INDEX(src); + + switch (REG_GET_TYPE(src)) { + case REG_TYPE_TEMP: + /* NOTE: if reg==-1 here, a source is being read that + * hasn't been written to. Undefined results. + */ + if (cs->temps[index].reg == -1) + cs->temps[index].reg = get_hw_temp(fp, cs->nrslots); + + idx = cs->temps[index].reg; + + if (!REG_GET_NO_USE(src) && (--cs->temps[index].refcount == 0)) + free_temp(fp, src); + break; + case REG_TYPE_INPUT: + idx = cs->inputs[index].reg; + + if (!REG_GET_NO_USE(src) && (--cs->inputs[index].refcount == 0)) + free_hw_temp(fp, cs->inputs[index].reg); + break; + case REG_TYPE_CONST: + return (index | SRC_CONST); + default: + ERROR("Invalid type for source reg\n"); + return (0 | SRC_CONST); + } + + if (!tex) + cs->used_in_node |= (1 << idx); + + return idx; +} + +static int t_hw_dst(struct r300_fragment_program *fp, + GLuint dest, GLboolean tex, int slot) +{ + COMPILE_STATE; + int idx; + GLuint index = REG_GET_INDEX(dest); + assert(REG_GET_VALID(dest)); + + switch (REG_GET_TYPE(dest)) { + case REG_TYPE_TEMP: + if (cs->temps[REG_GET_INDEX(dest)].reg == -1) { + if (!tex) { + cs->temps[index].reg = get_hw_temp(fp, slot); + } else { + cs->temps[index].reg = get_hw_temp_tex(fp); + } + } + idx = cs->temps[index].reg; + + if (!REG_GET_NO_USE(dest) && (--cs->temps[index].refcount == 0)) + free_temp(fp, dest); + + cs->dest_in_node |= (1 << idx); + cs->used_in_node |= (1 << idx); + break; + case REG_TYPE_OUTPUT: + switch (index) { + case FRAG_RESULT_COLR: + fp->node[fp->cur_node].flags |= + R300_PFS_NODE_OUTPUT_COLOR; + break; + case FRAG_RESULT_DEPR: + fp->node[fp->cur_node].flags |= + R300_PFS_NODE_OUTPUT_DEPTH; + break; + } + return index; + break; + default: + ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest)); + return 0; + } + + return idx; +} + +static void emit_nop(struct r300_fragment_program *fp) +{ + COMPILE_STATE; + + if (cs->nrslots >= PFS_MAX_ALU_INST) { + ERROR("Out of ALU instruction slots\n"); + return; + } + + fp->alu.inst[cs->nrslots].inst0 = NOP_INST0; + fp->alu.inst[cs->nrslots].inst1 = NOP_INST1; + fp->alu.inst[cs->nrslots].inst2 = NOP_INST2; + fp->alu.inst[cs->nrslots].inst3 = NOP_INST3; + cs->nrslots++; +} + +static void emit_tex(struct r300_fragment_program *fp, + struct prog_instruction *fpi, int opcode) +{ + COMPILE_STATE; + GLuint coord = t_src(fp, fpi->SrcReg[0]); + GLuint dest = undef, rdest = undef; + GLuint din, uin; + int unit = fpi->TexSrcUnit; + int hwsrc, hwdest; + GLuint tempreg = 0; + + uin = cs->used_in_node; + din = cs->dest_in_node; + + /* Resolve source/dest to hardware registers */ + if (opcode != R300_FPITX_OP_KIL) { + if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) { + /** + * Hardware uses [0..1]x[0..1] range for rectangle textures + * instead of [0..Width]x[0..Height]. + * Add a scaling instruction. + * + * \todo Refactor this once we have proper rewriting/optimization + * support for programs. + */ + gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, + 0 + }; + int factor_index; + GLuint factorreg; + + tokens[2] = unit; + factor_index = + _mesa_add_state_reference(fp->mesa_program.Base. + Parameters, tokens); + factorreg = + emit_const4fv(fp, + fp->mesa_program.Base.Parameters-> + ParameterValues[factor_index]); + tempreg = keep(get_temp_reg(fp)); + + emit_arith(fp, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, + coord, factorreg, pfs_zero, 0); + + /* Ensure correct node indirection */ + uin = cs->used_in_node; + din = cs->dest_in_node; + + hwsrc = t_hw_src(fp, tempreg, GL_TRUE); + } else { + hwsrc = t_hw_src(fp, coord, GL_TRUE); + } + + dest = t_dst(fp, fpi->DstReg); + + /* r300 doesn't seem to be able to do TEX->output reg */ + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { + rdest = dest; + dest = get_temp_reg_tex(fp); + } else if (fpi->DstReg.WriteMask != WRITEMASK_XYZW) { + /* in case write mask isn't XYZW */ + rdest = dest; + dest = get_temp_reg_tex(fp); + } + hwdest = + t_hw_dst(fp, dest, GL_TRUE, + fp->node[fp->cur_node].alu_offset); + + /* Use a temp that hasn't been used in this node, rather + * than causing an indirection + */ + if (uin & (1 << hwdest)) { + free_hw_temp(fp, hwdest); + hwdest = get_hw_temp_tex(fp); + cs->temps[REG_GET_INDEX(dest)].reg = hwdest; + } + } else { + hwdest = 0; + unit = 0; + hwsrc = t_hw_src(fp, coord, GL_TRUE); + } + + /* Indirection if source has been written in this node, or if the + * dest has been read/written in this node + */ + if ((REG_GET_TYPE(coord) != REG_TYPE_CONST && + (din & (1 << hwsrc))) || (uin & (1 << hwdest))) { + + /* Finish off current node */ + if (fp->node[fp->cur_node].alu_offset == cs->nrslots) + emit_nop(fp); + + fp->node[fp->cur_node].alu_end = + cs->nrslots - fp->node[fp->cur_node].alu_offset - 1; + assert(fp->node[fp->cur_node].alu_end >= 0); + + if (++fp->cur_node >= PFS_MAX_TEX_INDIRECT) { + ERROR("too many levels of texture indirection\n"); + return; + } + + /* Start new node */ + fp->node[fp->cur_node].tex_offset = fp->tex.length; + fp->node[fp->cur_node].alu_offset = cs->nrslots; + fp->node[fp->cur_node].tex_end = -1; + fp->node[fp->cur_node].alu_end = -1; + fp->node[fp->cur_node].flags = 0; + cs->used_in_node = 0; + cs->dest_in_node = 0; + } + + if (fp->cur_node == 0) + fp->first_node_has_tex = 1; + + fp->tex.inst[fp->tex.length++] = 0 | (hwsrc << R300_FPITX_SRC_SHIFT) + | (hwdest << R300_FPITX_DST_SHIFT) + | (unit << R300_FPITX_IMAGE_SHIFT) + /* not entirely sure about this */ + | (opcode << R300_FPITX_OPCODE_SHIFT); + + cs->dest_in_node |= (1 << hwdest); + if (REG_GET_TYPE(coord) != REG_TYPE_CONST) + cs->used_in_node |= (1 << hwsrc); + + fp->node[fp->cur_node].tex_end++; + + /* Copy from temp to output if needed */ + if (REG_GET_VALID(rdest)) { + emit_arith(fp, PFS_OP_MAD, rdest, fpi->DstReg.WriteMask, dest, + pfs_one, pfs_zero, 0); + free_temp(fp, dest); + } + + /* Free temp register */ + if (tempreg != 0) + free_temp(fp, tempreg); +} + +/** + * Returns the first slot where we could possibly allow writing to dest, + * according to register allocation. + */ +static int get_earliest_allowed_write(struct r300_fragment_program *fp, + GLuint dest, int mask) +{ + COMPILE_STATE; + int idx; + int pos; + GLuint index = REG_GET_INDEX(dest); + assert(REG_GET_VALID(dest)); + + switch (REG_GET_TYPE(dest)) { + case REG_TYPE_TEMP: + if (cs->temps[index].reg == -1) + return 0; + + idx = cs->temps[index].reg; + break; + case REG_TYPE_OUTPUT: + return 0; + default: + ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest)); + return 0; + } + + pos = cs->hwtemps[idx].reserved; + if (mask & WRITEMASK_XYZ) { + if (pos < cs->hwtemps[idx].vector_lastread) + pos = cs->hwtemps[idx].vector_lastread; + } + if (mask & WRITEMASK_W) { + if (pos < cs->hwtemps[idx].scalar_lastread) + pos = cs->hwtemps[idx].scalar_lastread; + } + + return pos; +} + +/** + * Allocates a slot for an ALU instruction that can consist of + * a vertex part or a scalar part or both. + * + * Sources from src (src[0] to src[argc-1]) are added to the slot in the + * appropriate position (vector and/or scalar), and their positions are + * recorded in the srcpos array. + * + * This function emits instruction code for the source fetch and the + * argument selection. It does not emit instruction code for the + * opcode or the destination selection. + * + * @return the index of the slot + */ +static int find_and_prepare_slot(struct r300_fragment_program *fp, + GLboolean emit_vop, + GLboolean emit_sop, + int argc, GLuint * src, GLuint dest, int mask) +{ + COMPILE_STATE; + int hwsrc[3]; + int srcpos[3]; + unsigned int used; + int tempused; + int tempvsrc[3]; + int tempssrc[3]; + int pos; + int regnr; + int i, j; + + // Determine instruction slots, whether sources are required on + // vector or scalar side, and the smallest slot number where + // all source registers are available + used = 0; + if (emit_vop) + used |= SLOT_OP_VECTOR; + if (emit_sop) + used |= SLOT_OP_SCALAR; + + pos = get_earliest_allowed_write(fp, dest, mask); + + if (fp->node[fp->cur_node].alu_offset > pos) + pos = fp->node[fp->cur_node].alu_offset; + for (i = 0; i < argc; ++i) { + if (!REG_GET_BUILTIN(src[i])) { + if (emit_vop) + used |= v_swiz[REG_GET_VSWZ(src[i])].flags << i; + if (emit_sop) + used |= s_swiz[REG_GET_SSWZ(src[i])].flags << i; + } + + hwsrc[i] = t_hw_src(fp, src[i], GL_FALSE); /* Note: sideeffects wrt refcounting! */ + regnr = hwsrc[i] & 31; + + if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) { + if (used & (SLOT_SRC_VECTOR << i)) { + if (cs->hwtemps[regnr].vector_valid > pos) + pos = cs->hwtemps[regnr].vector_valid; + } + if (used & (SLOT_SRC_SCALAR << i)) { + if (cs->hwtemps[regnr].scalar_valid > pos) + pos = cs->hwtemps[regnr].scalar_valid; + } + } + } + + // Find a slot that fits + for (;; ++pos) { + if (cs->slot[pos].used & used & SLOT_OP_BOTH) + continue; + + if (pos >= cs->nrslots) { + if (cs->nrslots >= PFS_MAX_ALU_INST) { + ERROR("Out of ALU instruction slots\n"); + return -1; + } + + fp->alu.inst[pos].inst0 = NOP_INST0; + fp->alu.inst[pos].inst1 = NOP_INST1; + fp->alu.inst[pos].inst2 = NOP_INST2; + fp->alu.inst[pos].inst3 = NOP_INST3; + + cs->nrslots++; + } + // Note: When we need both parts (vector and scalar) of a source, + // we always try to put them into the same position. This makes the + // code easier to read, and it is optimal (i.e. one doesn't gain + // anything by splitting the parts). + // It also avoids headaches with swizzles that access both parts (i.e WXY) + tempused = cs->slot[pos].used; + for (i = 0; i < 3; ++i) { + tempvsrc[i] = cs->slot[pos].vsrc[i]; + tempssrc[i] = cs->slot[pos].ssrc[i]; + } + + for (i = 0; i < argc; ++i) { + int flags = (used >> i) & SLOT_SRC_BOTH; + + if (!flags) { + srcpos[i] = 0; + continue; + } + + for (j = 0; j < 3; ++j) { + if ((tempused >> j) & flags & SLOT_SRC_VECTOR) { + if (tempvsrc[j] != hwsrc[i]) + continue; + } + + if ((tempused >> j) & flags & SLOT_SRC_SCALAR) { + if (tempssrc[j] != hwsrc[i]) + continue; + } + + break; + } + + if (j == 3) + break; + + srcpos[i] = j; + tempused |= flags << j; + if (flags & SLOT_SRC_VECTOR) + tempvsrc[j] = hwsrc[i]; + if (flags & SLOT_SRC_SCALAR) + tempssrc[j] = hwsrc[i]; + } + + if (i == argc) + break; + } + + // Found a slot, reserve it + cs->slot[pos].used = tempused | (used & SLOT_OP_BOTH); + for (i = 0; i < 3; ++i) { + cs->slot[pos].vsrc[i] = tempvsrc[i]; + cs->slot[pos].ssrc[i] = tempssrc[i]; + } + + for (i = 0; i < argc; ++i) { + if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) { + int regnr = hwsrc[i] & 31; + + if (used & (SLOT_SRC_VECTOR << i)) { + if (cs->hwtemps[regnr].vector_lastread < pos) + cs->hwtemps[regnr].vector_lastread = + pos; + } + if (used & (SLOT_SRC_SCALAR << i)) { + if (cs->hwtemps[regnr].scalar_lastread < pos) + cs->hwtemps[regnr].scalar_lastread = + pos; + } + } + } + + // Emit the source fetch code + fp->alu.inst[pos].inst1 &= ~R300_FPI1_SRC_MASK; + fp->alu.inst[pos].inst1 |= + ((cs->slot[pos].vsrc[0] << R300_FPI1_SRC0C_SHIFT) | + (cs->slot[pos].vsrc[1] << R300_FPI1_SRC1C_SHIFT) | + (cs->slot[pos].vsrc[2] << R300_FPI1_SRC2C_SHIFT)); + + fp->alu.inst[pos].inst3 &= ~R300_FPI3_SRC_MASK; + fp->alu.inst[pos].inst3 |= + ((cs->slot[pos].ssrc[0] << R300_FPI3_SRC0A_SHIFT) | + (cs->slot[pos].ssrc[1] << R300_FPI3_SRC1A_SHIFT) | + (cs->slot[pos].ssrc[2] << R300_FPI3_SRC2A_SHIFT)); + + // Emit the argument selection code + if (emit_vop) { + int swz[3]; + + for (i = 0; i < 3; ++i) { + if (i < argc) { + swz[i] = (v_swiz[REG_GET_VSWZ(src[i])].base + + (srcpos[i] * + v_swiz[REG_GET_VSWZ(src[i])]. + stride)) | ((src[i] & REG_NEGV_MASK) + ? ARG_NEG : 0) | ((src[i] + & + REG_ABS_MASK) + ? + ARG_ABS + : 0); + } else { + swz[i] = R300_FPI0_ARGC_ZERO; + } + } + + fp->alu.inst[pos].inst0 &= + ~(R300_FPI0_ARG0C_MASK | R300_FPI0_ARG1C_MASK | + R300_FPI0_ARG2C_MASK); + fp->alu.inst[pos].inst0 |= + (swz[0] << R300_FPI0_ARG0C_SHIFT) | (swz[1] << + R300_FPI0_ARG1C_SHIFT) + | (swz[2] << R300_FPI0_ARG2C_SHIFT); + } + + if (emit_sop) { + int swz[3]; + + for (i = 0; i < 3; ++i) { + if (i < argc) { + swz[i] = (s_swiz[REG_GET_SSWZ(src[i])].base + + (srcpos[i] * + s_swiz[REG_GET_SSWZ(src[i])]. + stride)) | ((src[i] & REG_NEGV_MASK) + ? ARG_NEG : 0) | ((src[i] + & + REG_ABS_MASK) + ? + ARG_ABS + : 0); + } else { + swz[i] = R300_FPI2_ARGA_ZERO; + } + } + + fp->alu.inst[pos].inst2 &= + ~(R300_FPI2_ARG0A_MASK | R300_FPI2_ARG1A_MASK | + R300_FPI2_ARG2A_MASK); + fp->alu.inst[pos].inst2 |= + (swz[0] << R300_FPI2_ARG0A_SHIFT) | (swz[1] << + R300_FPI2_ARG1A_SHIFT) + | (swz[2] << R300_FPI2_ARG2A_SHIFT); + } + + return pos; +} + +/** + * Append an ALU instruction to the instruction list. + */ +static void emit_arith(struct r300_fragment_program *fp, + int op, + GLuint dest, + int mask, + GLuint src0, GLuint src1, GLuint src2, int flags) +{ + COMPILE_STATE; + GLuint src[3] = { src0, src1, src2 }; + int hwdest; + GLboolean emit_vop, emit_sop; + int vop, sop, argc; + int pos; + + vop = r300_fpop[op].v_op; + sop = r300_fpop[op].s_op; + argc = r300_fpop[op].argc; + + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT && + REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) { + if (mask & WRITEMASK_Z) { + mask = WRITEMASK_W; + } else { + return; + } + } + + emit_vop = GL_FALSE; + emit_sop = GL_FALSE; + if ((mask & WRITEMASK_XYZ) || vop == R300_FPI0_OUTC_DP3) + emit_vop = GL_TRUE; + if ((mask & WRITEMASK_W) || vop == R300_FPI0_OUTC_REPL_ALPHA) + emit_sop = GL_TRUE; + + pos = + find_and_prepare_slot(fp, emit_vop, emit_sop, argc, src, dest, + mask); + if (pos < 0) + return; + + hwdest = t_hw_dst(fp, dest, GL_FALSE, pos); /* Note: Side effects wrt register allocation */ + + if (flags & PFS_FLAG_SAT) { + vop |= R300_FPI0_OUTC_SAT; + sop |= R300_FPI2_OUTA_SAT; + } + + /* Throw the pieces together and get FPI0/1 */ + if (emit_vop) { + fp->alu.inst[pos].inst0 |= vop; + + fp->alu.inst[pos].inst1 |= hwdest << R300_FPI1_DSTC_SHIFT; + + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { + if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { + fp->alu.inst[pos].inst1 |= + (mask & WRITEMASK_XYZ) << + R300_FPI1_DSTC_OUTPUT_MASK_SHIFT; + } else + assert(0); + } else { + fp->alu.inst[pos].inst1 |= + (mask & WRITEMASK_XYZ) << + R300_FPI1_DSTC_REG_MASK_SHIFT; + + cs->hwtemps[hwdest].vector_valid = pos + 1; + } + } + + /* And now FPI2/3 */ + if (emit_sop) { + fp->alu.inst[pos].inst2 |= sop; + + if (mask & WRITEMASK_W) { + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { + if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { + fp->alu.inst[pos].inst3 |= + (hwdest << R300_FPI3_DSTA_SHIFT) | + R300_FPI3_DSTA_OUTPUT; + } else if (REG_GET_INDEX(dest) == + FRAG_RESULT_DEPR) { + fp->alu.inst[pos].inst3 |= + R300_FPI3_DSTA_DEPTH; + } else + assert(0); + } else { + fp->alu.inst[pos].inst3 |= + (hwdest << R300_FPI3_DSTA_SHIFT) | + R300_FPI3_DSTA_REG; + + cs->hwtemps[hwdest].scalar_valid = pos + 1; + } + } + } + + return; +} + +#if 0 +static GLuint get_attrib(struct r300_fragment_program *fp, GLuint attr) +{ + struct gl_fragment_program *mp = &fp->mesa_program; + GLuint r = undef; + + if (!(mp->Base.InputsRead & (1 << attr))) { + ERROR("Attribute %d was not provided!\n", attr); + return undef; + } + + REG_SET_TYPE(r, REG_TYPE_INPUT); + REG_SET_INDEX(r, attr); + REG_SET_VALID(r, GL_TRUE); + return r; +} +#endif + +static GLfloat SinCosConsts[2][4] = { + { + 1.273239545, // 4/PI + -0.405284735, // -4/(PI*PI) + 3.141592654, // PI + 0.2225 // weight + }, + { + 0.75, + 0.0, + 0.159154943, // 1/(2*PI) + 6.283185307 // 2*PI + } +}; + +/** + * Emit a LIT instruction. + * \p flags may be PFS_FLAG_SAT + * + * Definition of LIT (from ARB_fragment_program): + * tmp = VectorLoad(op0); + * if (tmp.x < 0) tmp.x = 0; + * if (tmp.y < 0) tmp.y = 0; + * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); + * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; + * result.x = 1.0; + * result.y = tmp.x; + * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; + * result.w = 1.0; + * + * The longest path of computation is the one leading to result.z, + * consisting of 5 operations. This implementation of LIT takes + * 5 slots. So unless there's some special undocumented opcode, + * this implementation is potentially optimal. Unfortunately, + * emit_arith is a bit too conservative because it doesn't understand + * partial writes to the vector component. + */ +static const GLfloat LitConst[4] = + { 127.999999, 127.999999, 127.999999, -127.999999 }; + +static void emit_lit(struct r300_fragment_program *fp, + GLuint dest, int mask, GLuint src, int flags) +{ + COMPILE_STATE; + GLuint cnst; + int needTemporary; + GLuint temp; + + cnst = emit_const4fv(fp, LitConst); + + needTemporary = 0; + if ((mask & WRITEMASK_XYZW) != WRITEMASK_XYZW) { + needTemporary = 1; + } else if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { + // LIT is typically followed by DP3/DP4, so there's no point + // in creating special code for this case + needTemporary = 1; + } + + if (needTemporary) { + temp = keep(get_temp_reg(fp)); + } else { + temp = keep(dest); + } + + // Note: The order of emit_arith inside the slots is relevant, + // because emit_arith only looks at scalar vs. vector when resolving + // dependencies, and it does not consider individual vector components, + // so swizzling between the two parts can create fake dependencies. + + // First slot + emit_arith(fp, PFS_OP_MAX, temp, WRITEMASK_XY, + keep(src), pfs_zero, undef, 0); + emit_arith(fp, PFS_OP_MAX, temp, WRITEMASK_W, src, cnst, undef, 0); + + // Second slot + emit_arith(fp, PFS_OP_MIN, temp, WRITEMASK_Z, + swizzle(temp, W, W, W, W), cnst, undef, 0); + emit_arith(fp, PFS_OP_LG2, temp, WRITEMASK_W, + swizzle(temp, Y, Y, Y, Y), undef, undef, 0); + + // Third slot + // If desired, we saturate the y result here. + // This does not affect the use as a condition variable in the CMP later + emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_W, + temp, swizzle(temp, Z, Z, Z, Z), pfs_zero, 0); + emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_Y, + swizzle(temp, X, X, X, X), pfs_one, pfs_zero, flags); + + // Fourth slot + emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_X, + pfs_one, pfs_one, pfs_zero, 0); + emit_arith(fp, PFS_OP_EX2, temp, WRITEMASK_W, temp, undef, undef, 0); + + // Fifth slot + emit_arith(fp, PFS_OP_CMP, temp, WRITEMASK_Z, + pfs_zero, swizzle(temp, W, W, W, W), + negate(swizzle(temp, Y, Y, Y, Y)), flags); + emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_W, pfs_one, pfs_one, + pfs_zero, 0); + + if (needTemporary) { + emit_arith(fp, PFS_OP_MAD, dest, mask, + temp, pfs_one, pfs_zero, flags); + free_temp(fp, temp); + } else { + // Decrease refcount of the destination + t_hw_dst(fp, dest, GL_FALSE, cs->nrslots); + } +} + +static GLboolean parse_program(struct r300_fragment_program *fp) +{ + struct gl_fragment_program *mp = &fp->mesa_program; + const struct prog_instruction *inst = mp->Base.Instructions; + struct prog_instruction *fpi; + GLuint src[3], dest, temp[2]; + int flags, mask = 0; + int const_sin[2]; + + if (!inst || inst[0].Opcode == OPCODE_END) { + ERROR("empty program?\n"); + return GL_FALSE; + } + + for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { + if (fpi->SaturateMode == SATURATE_ZERO_ONE) + flags = PFS_FLAG_SAT; + else + flags = 0; + + if (fpi->Opcode != OPCODE_KIL) { + dest = t_dst(fp, fpi->DstReg); + mask = fpi->DstReg.WriteMask; + } + + switch (fpi->Opcode) { + case OPCODE_ABS: + src[0] = t_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_MAD, dest, mask, + absolute(src[0]), pfs_one, pfs_zero, flags); + break; + case OPCODE_ADD: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], pfs_one, src[1], flags); + break; + case OPCODE_CMP: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + src[2] = t_src(fp, fpi->SrcReg[2]); + /* ARB_f_p - if src0.c < 0.0 ? src1.c : src2.c + * r300 - if src2.c < 0.0 ? src1.c : src0.c + */ + emit_arith(fp, PFS_OP_CMP, dest, mask, + src[2], src[1], src[0], flags); + break; + case OPCODE_COS: + /* + * cos using a parabola (see SIN): + * cos(x): + * x = (x/(2*PI))+0.75 + * x = frac(x) + * x = (x*2*PI)-PI + * result = sin(x) + */ + temp[0] = get_temp_reg(fp); + const_sin[0] = emit_const4fv(fp, SinCosConsts[0]); + const_sin[1] = emit_const4fv(fp, SinCosConsts[1]); + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + + /* add 0.5*PI and do range reduction */ + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, + swizzle(src[0], X, X, X, X), + swizzle(const_sin[1], Z, Z, Z, Z), + swizzle(const_sin[1], X, X, X, X), 0); + + emit_arith(fp, PFS_OP_FRC, temp[0], WRITEMASK_X, + swizzle(temp[0], X, X, X, X), + undef, undef, 0); + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI + negate(swizzle(const_sin[0], Z, Z, Z, Z)), //-PI + 0); + + /* SIN */ + + emit_arith(fp, PFS_OP_MAD, temp[0], + WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], + Z, Z, Z, + Z), + const_sin[0], pfs_zero, 0); + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, + swizzle(temp[0], Y, Y, Y, Y), + absolute(swizzle(temp[0], Z, Z, Z, Z)), + swizzle(temp[0], X, X, X, X), 0); + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Y, + swizzle(temp[0], X, X, X, X), + absolute(swizzle(temp[0], X, X, X, X)), + negate(swizzle(temp[0], X, X, X, X)), 0); + + emit_arith(fp, PFS_OP_MAD, dest, mask, + swizzle(temp[0], Y, Y, Y, Y), + swizzle(const_sin[0], W, W, W, W), + swizzle(temp[0], X, X, X, X), flags); + + free_temp(fp, temp[0]); + break; + case OPCODE_DP3: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_DP3, dest, mask, + src[0], src[1], undef, flags); + break; + case OPCODE_DP4: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_DP4, dest, mask, + src[0], src[1], undef, flags); + break; + case OPCODE_DPH: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + /* src0.xyz1 -> temp + * DP4 dest, temp, src1 + */ +#if 0 + temp[0] = get_temp_reg(fp); + src[0].s_swz = SWIZZLE_ONE; + emit_arith(fp, PFS_OP_MAD, temp[0], mask, + src[0], pfs_one, pfs_zero, 0); + emit_arith(fp, PFS_OP_DP4, dest, mask, + temp[0], src[1], undef, flags); + free_temp(fp, temp[0]); +#else + emit_arith(fp, PFS_OP_DP4, dest, mask, + swizzle(src[0], X, Y, Z, ONE), src[1], + undef, flags); +#endif + break; + case OPCODE_DST: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + /* dest.y = src0.y * src1.y */ + if (mask & WRITEMASK_Y) + emit_arith(fp, PFS_OP_MAD, dest, WRITEMASK_Y, + keep(src[0]), keep(src[1]), + pfs_zero, flags); + /* dest.z = src0.z */ + if (mask & WRITEMASK_Z) + emit_arith(fp, PFS_OP_MAD, dest, WRITEMASK_Z, + src[0], pfs_one, pfs_zero, flags); + /* result.x = 1.0 + * result.w = src1.w */ + if (mask & WRITEMASK_XW) { + REG_SET_VSWZ(src[1], SWIZZLE_111); /*Cheat */ + emit_arith(fp, PFS_OP_MAD, dest, + mask & WRITEMASK_XW, + src[1], pfs_one, pfs_zero, flags); + } + break; + case OPCODE_EX2: + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_EX2, dest, mask, + src[0], undef, undef, flags); + break; + case OPCODE_FLR: + src[0] = t_src(fp, fpi->SrcReg[0]); + temp[0] = get_temp_reg(fp); + /* FRC temp, src0 + * MAD dest, src0, 1.0, -temp + */ + emit_arith(fp, PFS_OP_FRC, temp[0], mask, + keep(src[0]), undef, undef, 0); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], pfs_one, negate(temp[0]), flags); + free_temp(fp, temp[0]); + break; + case OPCODE_FRC: + src[0] = t_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_FRC, dest, mask, + src[0], undef, undef, flags); + break; + case OPCODE_KIL: + emit_tex(fp, fpi, R300_FPITX_OP_KIL); + break; + case OPCODE_LG2: + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_LG2, dest, mask, + src[0], undef, undef, flags); + break; + case OPCODE_LIT: + src[0] = t_src(fp, fpi->SrcReg[0]); + emit_lit(fp, dest, mask, src[0], flags); + break; + case OPCODE_LRP: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + src[2] = t_src(fp, fpi->SrcReg[2]); + /* result = tmp0tmp1 + (1 - tmp0)tmp2 + * = tmp0tmp1 + tmp2 + (-tmp0)tmp2 + * MAD temp, -tmp0, tmp2, tmp2 + * MAD result, tmp0, tmp1, temp + */ + temp[0] = get_temp_reg(fp); + emit_arith(fp, PFS_OP_MAD, temp[0], mask, + negate(keep(src[0])), keep(src[2]), src[2], + 0); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], src[1], temp[0], flags); + free_temp(fp, temp[0]); + break; + case OPCODE_MAD: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + src[2] = t_src(fp, fpi->SrcReg[2]); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], src[1], src[2], flags); + break; + case OPCODE_MAX: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_MAX, dest, mask, + src[0], src[1], undef, flags); + break; + case OPCODE_MIN: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_MIN, dest, mask, + src[0], src[1], undef, flags); + break; + case OPCODE_MOV: + case OPCODE_SWZ: + src[0] = t_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], pfs_one, pfs_zero, flags); + break; + case OPCODE_MUL: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], src[1], pfs_zero, flags); + break; + case OPCODE_POW: + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + src[1] = t_scalar_src(fp, fpi->SrcReg[1]); + temp[0] = get_temp_reg(fp); + emit_arith(fp, PFS_OP_LG2, temp[0], WRITEMASK_W, + src[0], undef, undef, 0); + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_W, + temp[0], src[1], pfs_zero, 0); + emit_arith(fp, PFS_OP_EX2, dest, fpi->DstReg.WriteMask, + temp[0], undef, undef, 0); + free_temp(fp, temp[0]); + break; + case OPCODE_RCP: + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_RCP, dest, mask, + src[0], undef, undef, flags); + break; + case OPCODE_RSQ: + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_RSQ, dest, mask, + absolute(src[0]), pfs_zero, pfs_zero, flags); + break; + case OPCODE_SCS: + /* + * scs using a parabola : + * scs(x): + * result.x = sin(-abs(x)+0.5*PI) (cos) + * result.y = sin(x) (sin) + * + */ + temp[0] = get_temp_reg(fp); + temp[1] = get_temp_reg(fp); + const_sin[0] = emit_const4fv(fp, SinCosConsts[0]); + const_sin[1] = emit_const4fv(fp, SinCosConsts[1]); + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + + /* x = -abs(x)+0.5*PI */ + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(const_sin[0], Z, Z, Z, Z), //PI + pfs_half, + negate(abs + (swizzle(keep(src[0]), X, X, X, X))), + 0); + + /* C*x (sin) */ + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_W, + swizzle(const_sin[0], Y, Y, Y, Y), + swizzle(keep(src[0]), X, X, X, X), + pfs_zero, 0); + + /* B*x, C*x (cos) */ + emit_arith(fp, PFS_OP_MAD, temp[0], + WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], + Z, Z, Z, + Z), + const_sin[0], pfs_zero, 0); + + /* B*x (sin) */ + emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_W, + swizzle(const_sin[0], X, X, X, X), + keep(src[0]), pfs_zero, 0); + + /* y = B*x + C*x*abs(x) (sin) */ + emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_Z, + absolute(src[0]), + swizzle(temp[0], W, W, W, W), + swizzle(temp[1], W, W, W, W), 0); + + /* y = B*x + C*x*abs(x) (cos) */ + emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_W, + swizzle(temp[0], Y, Y, Y, Y), + absolute(swizzle(temp[0], Z, Z, Z, Z)), + swizzle(temp[0], X, X, X, X), 0); + + /* y*abs(y) - y (cos), y*abs(y) - y (sin) */ + emit_arith(fp, PFS_OP_MAD, temp[0], + WRITEMASK_X | WRITEMASK_Y, swizzle(temp[1], + W, Z, Y, + X), + absolute(swizzle(temp[1], W, Z, Y, X)), + negate(swizzle(temp[1], W, Z, Y, X)), 0); + + /* dest.xy = mad(temp.xy, P, temp2.wz) */ + emit_arith(fp, PFS_OP_MAD, dest, + mask & (WRITEMASK_X | WRITEMASK_Y), temp[0], + swizzle(const_sin[0], W, W, W, W), + swizzle(temp[1], W, Z, Y, X), flags); + + free_temp(fp, temp[0]); + free_temp(fp, temp[1]); + break; + case OPCODE_SGE: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + temp[0] = get_temp_reg(fp); + /* temp = src0 - src1 + * dest.c = (temp.c < 0.0) ? 0 : 1 + */ + emit_arith(fp, PFS_OP_MAD, temp[0], mask, + src[0], pfs_one, negate(src[1]), 0); + emit_arith(fp, PFS_OP_CMP, dest, mask, + pfs_one, pfs_zero, temp[0], 0); + free_temp(fp, temp[0]); + break; + case OPCODE_SIN: + /* + * using a parabola: + * sin(x) = 4/pi * x + -4/(pi*pi) * x * abs(x) + * extra precision is obtained by weighting against + * itself squared. + */ + + temp[0] = get_temp_reg(fp); + const_sin[0] = emit_const4fv(fp, SinCosConsts[0]); + const_sin[1] = emit_const4fv(fp, SinCosConsts[1]); + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + + /* do range reduction */ + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, + swizzle(keep(src[0]), X, X, X, X), + swizzle(const_sin[1], Z, Z, Z, Z), + pfs_half, 0); + + emit_arith(fp, PFS_OP_FRC, temp[0], WRITEMASK_X, + swizzle(temp[0], X, X, X, X), + undef, undef, 0); + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI + negate(swizzle(const_sin[0], Z, Z, Z, Z)), //PI + 0); + + /* SIN */ + + emit_arith(fp, PFS_OP_MAD, temp[0], + WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], + Z, Z, Z, + Z), + const_sin[0], pfs_zero, 0); + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, + swizzle(temp[0], Y, Y, Y, Y), + absolute(swizzle(temp[0], Z, Z, Z, Z)), + swizzle(temp[0], X, X, X, X), 0); + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Y, + swizzle(temp[0], X, X, X, X), + absolute(swizzle(temp[0], X, X, X, X)), + negate(swizzle(temp[0], X, X, X, X)), 0); + + emit_arith(fp, PFS_OP_MAD, dest, mask, + swizzle(temp[0], Y, Y, Y, Y), + swizzle(const_sin[0], W, W, W, W), + swizzle(temp[0], X, X, X, X), flags); + + free_temp(fp, temp[0]); + break; + case OPCODE_SLT: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + temp[0] = get_temp_reg(fp); + /* temp = src0 - src1 + * dest.c = (temp.c < 0.0) ? 1 : 0 + */ + emit_arith(fp, PFS_OP_MAD, temp[0], mask, + src[0], pfs_one, negate(src[1]), 0); + emit_arith(fp, PFS_OP_CMP, dest, mask, + pfs_zero, pfs_one, temp[0], 0); + free_temp(fp, temp[0]); + break; + case OPCODE_SUB: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], pfs_one, negate(src[1]), flags); + break; + case OPCODE_TEX: + emit_tex(fp, fpi, R300_FPITX_OP_TEX); + break; + case OPCODE_TXB: + emit_tex(fp, fpi, R300_FPITX_OP_TXB); + break; + case OPCODE_TXP: + emit_tex(fp, fpi, R300_FPITX_OP_TXP); + break; + case OPCODE_XPD:{ + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + temp[0] = get_temp_reg(fp); + /* temp = src0.zxy * src1.yzx */ + emit_arith(fp, PFS_OP_MAD, temp[0], + WRITEMASK_XYZ, swizzle(keep(src[0]), + Z, X, Y, W), + swizzle(keep(src[1]), Y, Z, X, W), + pfs_zero, 0); + /* dest.xyz = src0.yzx * src1.zxy - temp + * dest.w = undefined + * */ + emit_arith(fp, PFS_OP_MAD, dest, + mask & WRITEMASK_XYZ, swizzle(src[0], + Y, Z, + X, W), + swizzle(src[1], Z, X, Y, W), + negate(temp[0]), flags); + /* cleanup */ + free_temp(fp, temp[0]); + break; + } + default: + ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); + break; + } + + if (fp->error) + return GL_FALSE; + + } + + return GL_TRUE; +} + +static void insert_wpos(struct gl_program *prog) +{ + static gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 + }; + struct prog_instruction *fpi; + GLuint window_index; + int i = 0; + GLuint tempregi = prog->NumTemporaries; + /* should do something else if no temps left... */ + prog->NumTemporaries++; + + fpi = _mesa_alloc_instructions(prog->NumInstructions + 3); + _mesa_init_instructions(fpi, prog->NumInstructions + 3); + + /* perspective divide */ + fpi[i].Opcode = OPCODE_RCP; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_W; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; + i++; + + fpi[i].Opcode = OPCODE_MUL; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + + fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[1].Index = tempregi; + fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; + i++; + + /* viewport transformation */ + window_index = _mesa_add_state_reference(prog->Parameters, tokens); + + fpi[i].Opcode = OPCODE_MAD; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[0].Index = tempregi; + fpi[i].SrcReg[0].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[1].Index = window_index; + fpi[i].SrcReg[1].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[2].Index = window_index; + fpi[i].SrcReg[2].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + i++; + + _mesa_copy_instructions(&fpi[i], prog->Instructions, + prog->NumInstructions); + + free(prog->Instructions); + + prog->Instructions = fpi; + + prog->NumInstructions += i; + fpi = &prog->Instructions[prog->NumInstructions - 1]; + + assert(fpi->Opcode == OPCODE_END); + + for (fpi = &prog->Instructions[3]; fpi->Opcode != OPCODE_END; fpi++) { + for (i = 0; i < 3; i++) + if (fpi->SrcReg[i].File == PROGRAM_INPUT && + fpi->SrcReg[i].Index == FRAG_ATTRIB_WPOS) { + fpi->SrcReg[i].File = PROGRAM_TEMPORARY; + fpi->SrcReg[i].Index = tempregi; + } + } +} + +/* - Init structures + * - Determine what hwregs each input corresponds to + */ +static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) +{ + struct r300_pfs_compile_state *cs = NULL; + struct gl_fragment_program *mp = &fp->mesa_program; + struct prog_instruction *fpi; + GLuint InputsRead = mp->Base.InputsRead; + GLuint temps_used = 0; /* for fp->temps[] */ + int i, j; + + /* New compile, reset tracking data */ + fp->optimization = + driQueryOptioni(&r300->radeon.optionCache, "fp_optimization"); + fp->translated = GL_FALSE; + fp->error = GL_FALSE; + fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile); + fp->tex.length = 0; + fp->cur_node = 0; + fp->first_node_has_tex = 0; + fp->const_nr = 0; + fp->max_temp_idx = 0; + fp->node[0].alu_end = -1; + fp->node[0].tex_end = -1; + + _mesa_memset(cs, 0, sizeof(*fp->cs)); + for (i = 0; i < PFS_MAX_ALU_INST; i++) { + for (j = 0; j < 3; j++) { + cs->slot[i].vsrc[j] = SRC_CONST; + cs->slot[i].ssrc[j] = SRC_CONST; + } + } + + /* Work out what temps the Mesa inputs correspond to, this must match + * what setup_rs_unit does, which shouldn't be a problem as rs_unit + * configures itself based on the fragprog's InputsRead + * + * NOTE: this depends on get_hw_temp() allocating registers in order, + * starting from register 0. + */ + + /* Texcoords come first */ + for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) { + cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0; + cs->inputs[FRAG_ATTRIB_TEX0 + i].reg = + get_hw_temp(fp, 0); + } + } + InputsRead &= ~FRAG_BITS_TEX_ANY; + + /* fragment position treated as a texcoord */ + if (InputsRead & FRAG_BIT_WPOS) { + cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0; + cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(fp, 0); + insert_wpos(&mp->Base); + } + InputsRead &= ~FRAG_BIT_WPOS; + + /* Then primary colour */ + if (InputsRead & FRAG_BIT_COL0) { + cs->inputs[FRAG_ATTRIB_COL0].refcount = 0; + cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(fp, 0); + } + InputsRead &= ~FRAG_BIT_COL0; + + /* Secondary color */ + if (InputsRead & FRAG_BIT_COL1) { + cs->inputs[FRAG_ATTRIB_COL1].refcount = 0; + cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(fp, 0); + } + InputsRead &= ~FRAG_BIT_COL1; + + /* Anything else */ + if (InputsRead) { + WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead); + /* force read from hwreg 0 for now */ + for (i = 0; i < 32; i++) + if (InputsRead & (1 << i)) + cs->inputs[i].reg = 0; + } + + /* Pre-parse the mesa program, grabbing refcounts on input/temp regs. + * That way, we can free up the reg when it's no longer needed + */ + if (!mp->Base.Instructions) { + ERROR("No instructions found in program\n"); + return; + } + + for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { + int idx; + + for (i = 0; i < 3; i++) { + idx = fpi->SrcReg[i].Index; + switch (fpi->SrcReg[i].File) { + case PROGRAM_TEMPORARY: + if (!(temps_used & (1 << idx))) { + cs->temps[idx].reg = -1; + cs->temps[idx].refcount = 1; + temps_used |= (1 << idx); + } else + cs->temps[idx].refcount++; + break; + case PROGRAM_INPUT: + cs->inputs[idx].refcount++; + break; + default: + break; + } + } + + idx = fpi->DstReg.Index; + if (fpi->DstReg.File == PROGRAM_TEMPORARY) { + if (!(temps_used & (1 << idx))) { + cs->temps[idx].reg = -1; + cs->temps[idx].refcount = 1; + temps_used |= (1 << idx); + } else + cs->temps[idx].refcount++; + } + } + cs->temp_in_use = temps_used; +} + +static void update_params(struct r300_fragment_program *fp) +{ + struct gl_fragment_program *mp = &fp->mesa_program; + + /* Ask Mesa nicely to fill in ParameterValues for us */ + if (mp->Base.Parameters) + _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters); +} + +void r500TranslateFragmentShader(r300ContextPtr r300, + struct r300_fragment_program *fp) +{ + struct r300_pfs_compile_state *cs = NULL; + + if (!fp->translated) { + + init_program(r300, fp); + cs = fp->cs; + + if (parse_program(fp) == GL_FALSE) { + dump_program(fp); + return; + } + + /* Finish off */ + fp->node[fp->cur_node].alu_end = + cs->nrslots - fp->node[fp->cur_node].alu_offset - 1; + if (fp->node[fp->cur_node].tex_end < 0) + fp->node[fp->cur_node].tex_end = 0; + fp->alu_offset = 0; + fp->alu_end = cs->nrslots - 1; + fp->tex_offset = 0; + fp->tex_end = fp->tex.length ? fp->tex.length - 1 : 0; + assert(fp->node[fp->cur_node].alu_end >= 0); + assert(fp->alu_end >= 0); + + fp->translated = GL_TRUE; + if (RADEON_DEBUG & DEBUG_PIXEL) + dump_program(fp); + r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM); + } + + update_params(fp); +} + +/* just some random things... */ +static void dump_program(struct r300_fragment_program *fp) +{ + int n, i, j; + static int pc = 0; + + fprintf(stderr, "pc=%d*************************************\n", pc++); + + fprintf(stderr, "Mesa program:\n"); + fprintf(stderr, "-------------\n"); + _mesa_print_program(&fp->mesa_program.Base); + fflush(stdout); + + fprintf(stderr, "Hardware program\n"); + fprintf(stderr, "----------------\n"); + + for (n = 0; n < (fp->cur_node + 1); n++) { + fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " + "alu_end: %d, tex_end: %d\n", n, + fp->node[n].alu_offset, + fp->node[n].tex_offset, + fp->node[n].alu_end, fp->node[n].tex_end); + + if (fp->tex.length) { + fprintf(stderr, " TEX:\n"); + for (i = fp->node[n].tex_offset; + i <= fp->node[n].tex_offset + fp->node[n].tex_end; + ++i) { + const char *instr; + + switch ((fp->tex. + inst[i] >> R300_FPITX_OPCODE_SHIFT) & + 15) { + case R300_FPITX_OP_TEX: + instr = "TEX"; + break; + case R300_FPITX_OP_KIL: + instr = "KIL"; + break; + case R300_FPITX_OP_TXP: + instr = "TXP"; + break; + case R300_FPITX_OP_TXB: + instr = "TXB"; + break; + default: + instr = "UNKNOWN"; + } + + fprintf(stderr, + " %s t%i, %c%i, texture[%i] (%08x)\n", + instr, + (fp->tex. + inst[i] >> R300_FPITX_DST_SHIFT) & 31, + (fp->tex. + inst[i] & R300_FPITX_SRC_CONST) ? 'c' : + 't', + (fp->tex. + inst[i] >> R300_FPITX_SRC_SHIFT) & 31, + (fp->tex. + inst[i] & R300_FPITX_IMAGE_MASK) >> + R300_FPITX_IMAGE_SHIFT, + fp->tex.inst[i]); + } + } + + for (i = fp->node[n].alu_offset; + i <= fp->node[n].alu_offset + fp->node[n].alu_end; ++i) { + char srcc[3][10], dstc[20]; + char srca[3][10], dsta[20]; + char argc[3][20]; + char arga[3][20]; + char flags[5], tmp[10]; + + for (j = 0; j < 3; ++j) { + int regc = fp->alu.inst[i].inst1 >> (j * 6); + int rega = fp->alu.inst[i].inst3 >> (j * 6); + + sprintf(srcc[j], "%c%i", + (regc & 32) ? 'c' : 't', regc & 31); + sprintf(srca[j], "%c%i", + (rega & 32) ? 'c' : 't', rega & 31); + } + + dstc[0] = 0; + sprintf(flags, "%s%s%s", + (fp->alu.inst[i]. + inst1 & R300_FPI1_DSTC_REG_X) ? "x" : "", + (fp->alu.inst[i]. + inst1 & R300_FPI1_DSTC_REG_Y) ? "y" : "", + (fp->alu.inst[i]. + inst1 & R300_FPI1_DSTC_REG_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(dstc, "t%i.%s ", + (fp->alu.inst[i]. + inst1 >> R300_FPI1_DSTC_SHIFT) & 31, + flags); + } + sprintf(flags, "%s%s%s", + (fp->alu.inst[i]. + inst1 & R300_FPI1_DSTC_OUTPUT_X) ? "x" : "", + (fp->alu.inst[i]. + inst1 & R300_FPI1_DSTC_OUTPUT_Y) ? "y" : "", + (fp->alu.inst[i]. + inst1 & R300_FPI1_DSTC_OUTPUT_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(tmp, "o%i.%s", + (fp->alu.inst[i]. + inst1 >> R300_FPI1_DSTC_SHIFT) & 31, + flags); + strcat(dstc, tmp); + } + + dsta[0] = 0; + if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_REG) { + sprintf(dsta, "t%i.w ", + (fp->alu.inst[i]. + inst3 >> R300_FPI3_DSTA_SHIFT) & 31); + } + if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_OUTPUT) { + sprintf(tmp, "o%i.w ", + (fp->alu.inst[i]. + inst3 >> R300_FPI3_DSTA_SHIFT) & 31); + strcat(dsta, tmp); + } + if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_DEPTH) { + strcat(dsta, "Z"); + } + + fprintf(stderr, + "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" + " w: %3s %3s %3s -> %-20s (%08x)\n", i, + srcc[0], srcc[1], srcc[2], dstc, + fp->alu.inst[i].inst1, srca[0], srca[1], + srca[2], dsta, fp->alu.inst[i].inst3); + + for (j = 0; j < 3; ++j) { + int regc = fp->alu.inst[i].inst0 >> (j * 7); + int rega = fp->alu.inst[i].inst2 >> (j * 7); + int d; + char buf[20]; + + d = regc & 31; + if (d < 12) { + switch (d % 4) { + case R300_FPI0_ARGC_SRC0C_XYZ: + sprintf(buf, "%s.xyz", + srcc[d / 4]); + break; + case R300_FPI0_ARGC_SRC0C_XXX: + sprintf(buf, "%s.xxx", + srcc[d / 4]); + break; + case R300_FPI0_ARGC_SRC0C_YYY: + sprintf(buf, "%s.yyy", + srcc[d / 4]); + break; + case R300_FPI0_ARGC_SRC0C_ZZZ: + sprintf(buf, "%s.zzz", + srcc[d / 4]); + break; + } + } else if (d < 15) { + sprintf(buf, "%s.www", srca[d - 12]); + } else if (d == 20) { + sprintf(buf, "0.0"); + } else if (d == 21) { + sprintf(buf, "1.0"); + } else if (d == 22) { + sprintf(buf, "0.5"); + } else if (d >= 23 && d < 32) { + d -= 23; + switch (d / 3) { + case 0: + sprintf(buf, "%s.yzx", + srcc[d % 3]); + break; + case 1: + sprintf(buf, "%s.zxy", + srcc[d % 3]); + break; + case 2: + sprintf(buf, "%s.Wzy", + srcc[d % 3]); + break; + } + } else { + sprintf(buf, "%i", d); + } + + sprintf(argc[j], "%s%s%s%s", + (regc & 32) ? "-" : "", + (regc & 64) ? "|" : "", + buf, (regc & 64) ? "|" : ""); + + d = rega & 31; + if (d < 9) { + sprintf(buf, "%s.%c", srcc[d / 3], + 'x' + (char)(d % 3)); + } else if (d < 12) { + sprintf(buf, "%s.w", srca[d - 9]); + } else if (d == 16) { + sprintf(buf, "0.0"); + } else if (d == 17) { + sprintf(buf, "1.0"); + } else if (d == 18) { + sprintf(buf, "0.5"); + } else { + sprintf(buf, "%i", d); + } + + sprintf(arga[j], "%s%s%s%s", + (rega & 32) ? "-" : "", + (rega & 64) ? "|" : "", + buf, (rega & 64) ? "|" : ""); + } + + fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" + " w: %8s %8s %8s op: %08x\n", + argc[0], argc[1], argc[2], + fp->alu.inst[i].inst0, arga[0], arga[1], + arga[2], fp->alu.inst[i].inst2); + } + } +} diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/r500_fragprog.h new file mode 100644 index 0000000000..72fca77845 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r500_fragprog.h @@ -0,0 +1,104 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs + * Jerome Glisse + */ +#ifndef __R300_FRAGPROG_H_ +#define __R300_FRAGPROG_H_ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#include "shader/program.h" +#include "shader/prog_instruction.h" + +#include "r300_context.h" + +typedef struct r300_fragment_program_swizzle { + GLuint length; + GLuint src[4]; + GLuint inst[8]; +} r300_fragment_program_swizzle_t; + +/* supported hw opcodes */ +#define PFS_OP_MAD 0 +#define PFS_OP_DP3 1 +#define PFS_OP_DP4 2 +#define PFS_OP_MIN 3 +#define PFS_OP_MAX 4 +#define PFS_OP_CMP 5 +#define PFS_OP_FRC 6 +#define PFS_OP_EX2 7 +#define PFS_OP_LG2 8 +#define PFS_OP_RCP 9 +#define PFS_OP_RSQ 10 +#define PFS_OP_REPL_ALPHA 11 +#define PFS_OP_CMPH 12 +#define MAX_PFS_OP 12 + +#define PFS_FLAG_SAT (1 << 0) +#define PFS_FLAG_ABS (1 << 1) + +#define ARG_NEG (1 << 5) +#define ARG_ABS (1 << 6) +#define ARG_MASK (127 << 0) +#define ARG_STRIDE 7 +#define SRC_CONST (1 << 5) +#define SRC_MASK (63 << 0) +#define SRC_STRIDE 6 + +#define NOP_INST0 ( \ + (R300_FPI0_OUTC_MAD) | \ + (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG0C_SHIFT) | \ + (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG1C_SHIFT) | \ + (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG2C_SHIFT)) +#define NOP_INST1 ( \ + ((0 | SRC_CONST) << R300_FPI1_SRC0C_SHIFT) | \ + ((0 | SRC_CONST) << R300_FPI1_SRC1C_SHIFT) | \ + ((0 | SRC_CONST) << R300_FPI1_SRC2C_SHIFT)) +#define NOP_INST2 ( \ + (R300_FPI2_OUTA_MAD) | \ + (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG0A_SHIFT) | \ + (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG1A_SHIFT) | \ + (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG2A_SHIFT)) +#define NOP_INST3 ( \ + ((0 | SRC_CONST) << R300_FPI3_SRC0A_SHIFT) | \ + ((0 | SRC_CONST) << R300_FPI3_SRC1A_SHIFT) | \ + ((0 | SRC_CONST) << R300_FPI3_SRC2A_SHIFT)) + +#define DRI_CONF_FP_OPTIMIZATION_SPEED 0 +#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1 + +struct r300_fragment_program; + +extern void r300TranslateFragmentShader(r300ContextPtr r300, + struct r300_fragment_program *fp); + +#endif -- cgit v1.2.3 From 2ffa112ed32cf8123e5177a0fe2c12130c6f78c7 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 17 Mar 2008 21:09:49 +1000 Subject: some basic r500 portage --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 62 +++- src/mesa/drivers/dri/r300/r300_context.h | 1 + src/mesa/drivers/dri/r300/r300_emit.h | 12 + src/mesa/drivers/dri/r300/r300_reg.h | 477 ++++++++++++++++++++++++++- src/mesa/drivers/dri/r300/r300_state.c | 208 +++++++++++- src/mesa/drivers/dri/radeon/radeon_chipset.h | 2 + 6 files changed, 742 insertions(+), 20 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 3497738eac..883b41a349 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -281,10 +281,14 @@ void r300InitCmdBuf(r300ContextPtr r300) { int size, mtu; int has_tcl = 1; + int is_r500 = 0; if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) has_tcl = 0; + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + is_r500 = 1; + r300->hw.max_state_size = 2 + 2; /* reserve extra space for WAIT_IDLE and tex cache flush */ mtu = r300->radeon.glCtx->Const.MaxTextureUnits; @@ -374,10 +378,17 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.su_depth_scale.cmd[0] = cmdpacket0(R300_SU_DEPTH_SCALE, 2); ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0); r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(R300_RS_COUNT, 2); - ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0); - r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R300_RS_IP_0, 8); - ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); - r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, 1); + if (is_r500) { + ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0); + r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R500_RS_IP_0, 16); + ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, 1); + } else { + ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0); + r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R300_RS_IP_0, 8); + ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_ROUTE_0, 1); + } ALLOC_STATE(sc_hyperz, always, 3, 0); r300->hw.sc_hyperz.cmd[0] = cmdpacket0(R300_SC_HYPERZ, 2); ALLOC_STATE(sc_screendoor, always, 2, 0); @@ -389,14 +400,25 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_PFS_TEXI_0, 0); ALLOC_STATE(us_out_fmt, always, 6, 0); r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R500_US_OUT_FMT, 5); - ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0); - r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR0_0, 1); - ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1); - r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR1_0, 1); - ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2); - r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR2_0, 1); - ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3); - r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR3_0, 1); + + if (is_r500) { + ALLOC_STATE(r500fp, variable, R300_FPI_CMDSIZE, 0); + r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 1); + } + + if (0/*is_r500*/) { + + }/* else*/ + { + ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0); + r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR0_0, 1); + ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1); + r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR1_0, 1); + ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2); + r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR2_0, 1); + ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3); + r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR3_0, 1); + } ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0); r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(FG_FOG_BLEND, 1); ALLOC_STATE(fogc, always, R300_FOGC_CMDSIZE, 0); @@ -456,10 +478,18 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.vps.cmd[R300_VPS_CMD_0] = cmdvpu(R300_PVS_UPLOAD_POINTSIZE, 1); - for (i = 0; i < 6; i++) { - ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); - r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] = - cmdvpu(R300_PVS_UPLOAD_CLIP_PLANE0+i, 1); + if (is_r500) { + for (i = 0; i < 6; i++) { + ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); + r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] = + cmdvpu(R500_PVS_UPLOAD_CLIP_PLANE0+i, 1); + } + } else { + for (i = 0; i < 6; i++) { + ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); + r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] = + cmdvpu(R300_PVS_UPLOAD_CLIP_PLANE0+i, 1); + } } } diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 780d9aa5d2..ab45bf80b4 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -487,6 +487,7 @@ struct r300_hw_state { struct r300_state_atom fp; /* fragment program cntl + nodes (4600) */ struct r300_state_atom fpt; /* texi - (4620) */ struct r300_state_atom us_out_fmt; /* (46A4) */ + struct r300_state_atom r500fp; /* r500 fp instructions */ struct r300_state_atom fpi[4]; /* fp instructions (46C0/47C0/48C0/49C0) */ struct r300_state_atom fogs; /* fog state (4BC0) */ struct r300_state_atom fogc; /* fog color (4BC8) */ diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h index a6d69ec5ff..a4f6ab997e 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.h +++ b/src/mesa/drivers/dri/r300/r300_emit.h @@ -74,6 +74,18 @@ static inline uint32_t cmdvpu(int addr, int count) return cmd.u; } +static inline uint32_t cmdr500fp(int addr, int count) +{ + drm_r300_cmd_header_t cmd; + + cmd.vpu.cmd_type = R300_CMD_R500FP; + cmd.vpu.count = count; + cmd.vpu.adrhi = ((unsigned int)addr & 0xFF00) >> 8; + cmd.vpu.adrlo = ((unsigned int)addr & 0x00FF); + + return cmd.u; +} + static inline uint32_t cmdpacket3(int packet) { drm_r300_cmd_header_t cmd; diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 2200cec6ab..d640d8b7e7 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -657,7 +657,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* This table specifies the source location and format for up to 16 texture * addresses (i[0]:i[15]) and four colors (c[0]:c[3]) */ -#define R500_RS_IP_0 0x4074 +#define R500_RS_IP_0 0x4074 #define R500_RS_IP_1 0x4078 #define R500_RS_IP_2 0x407C #define R500_RS_IP_3 0x4080 @@ -1151,7 +1151,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* */ -#define R500_RS_INST_0 0x4320 +#define R500_RS_INST_0 0x4320 #define R500_RS_INST_1 0x4324 #define R500_RS_INST_2 0x4328 #define R500_RS_INST_3 0x432c @@ -2598,6 +2598,479 @@ enum { #define R300_PRIM_NUM_VERTICES_SHIFT 16 #define R300_PRIM_NUM_VERTICES_MASK 0xffff + + +/* + * The R500 unified shader (US) registers come in banks of 512 each, one + * for each instruction slot in the shader. You can't touch them directly. + * R500_US_VECTOR_INDEX() sets the base instruction to modify; successive + * writes to R500_GA_US_VECTOR_DATA autoincrement the index after the + * instruction is fully specified. + */ +#define R500_US_ALU_ALPHA_INST_0 0xa800 +# define R500_ALPHA_OP_MAD 0 +# define R500_ALPHA_OP_DP 1 +# define R500_ALPHA_OP_MIN 2 +# define R500_ALPHA_OP_MAX 3 +/* #define R500_ALPHA_OP_RESERVED 4 */ +# define R500_ALPHA_OP_CND 5 +# define R500_ALPHA_OP_CMP 6 +# define R500_ALPHA_OP_FRC 7 +# define R500_ALPHA_OP_EX2 8 +# define R500_ALPHA_OP_LN2 9 +# define R500_ALPHA_OP_RCP 10 +# define R500_ALPHA_OP_RSQ 11 +# define R500_ALPHA_OP_SIN 12 +# define R500_ALPHA_OP_COS 13 +# define R500_ALPHA_OP_MDH 14 +# define R500_ALPHA_OP_MDV 15 +# define R500_ALPHA_ADDRD(x) (x << 4) +# define R500_ALPHA_ADDRD_REL (1 << 11) +# define R500_ALPHA_SEL_A_SRC0 (0 << 12) +# define R500_ALPHA_SEL_A_SRC1 (1 << 12) +# define R500_ALPHA_SEL_A_SRC2 (2 << 12) +# define R500_ALPHA_SEL_A_SRCP (3 << 12) +# define R500_ALPHA_SWIZ_A_R (0 << 14) +# define R500_ALPHA_SWIZ_A_G (1 << 14) +# define R500_ALPHA_SWIZ_A_B (2 << 14) +# define R500_ALPHA_SWIZ_A_A (3 << 14) +# define R500_ALPHA_SWIZ_A_0 (4 << 14) +# define R500_ALPHA_SWIZ_A_HALF (5 << 14) +# define R500_ALPHA_SWIZ_A_1 (6 << 14) +/* #define R500_ALPHA_SWIZ_A_UNUSED (7 << 14) */ +# define R500_ALPHA_MOD_A_NOP (0 << 17) +# define R500_ALPHA_MOD_A_NEG (1 << 17) +# define R500_ALPHA_MOD_A_ABS (2 << 17) +# define R500_ALPHA_MOD_A_NAB (3 << 17) +# define R500_ALPHA_SEL_B_SRC0 (0 << 19) +# define R500_ALPHA_SEL_B_SRC1 (1 << 19) +# define R500_ALPHA_SEL_B_SRC2 (2 << 19) +# define R500_ALPHA_SEL_B_SRCP (3 << 19) +# define R500_ALPHA_SWIZ_B_R (0 << 21) +# define R500_ALPHA_SWIZ_B_G (1 << 21) +# define R500_ALPHA_SWIZ_B_B (2 << 21) +# define R500_ALPHA_SWIZ_B_A (3 << 21) +# define R500_ALPHA_SWIZ_B_0 (4 << 21) +# define R500_ALPHA_SWIZ_B_HALF (5 << 21) +# define R500_ALPHA_SWIZ_B_1 (6 << 21) +/* #define R500_ALPHA_SWIZ_B_UNUSED (7 << 21) */ +# define R500_ALPHA_MOD_B_NOP (0 << 24) +# define R500_ALPHA_MOD_B_NEG (1 << 24) +# define R500_ALPHA_MOD_B_ABS (2 << 24) +# define R500_ALPHA_MOD_B_NAB (3 << 24) +# define R500_ALPHA_OMOD_IDENTITY (0 << 26) +# define R500_ALPHA_OMOD_MUL_2 (1 << 26) +# define R500_ALPHA_OMOD_MUL_4 (2 << 26) +# define R500_ALPHA_OMOD_MUL_8 (3 << 26) +# define R500_ALPHA_OMOD_DIV_2 (4 << 26) +# define R500_ALPHA_OMOD_DIV_4 (5 << 26) +# define R500_ALPHA_OMOD_DIV_8 (6 << 26) +# define R500_ALPHA_OMOD_DISABLE (7 << 26) +# define R500_ALPHA_TARGET(x) (x << 29) +# define R500_ALPHA_W_OMASK (1 << 31) +#define R500_US_ALU_ALPHA_ADDR_0 0x9800 +# define R500_ALPHA_ADDR0(x) (x << 0) +# define R500_ALPHA_ADDR0_CONST (1 << 8) +# define R500_ALPHA_ADDR0_REL (1 << 9) +# define R500_ALPHA_ADDR1(x) (x << 10) +# define R500_ALPHA_ADDR1_CONST (1 << 18) +# define R500_ALPHA_ADDR1_REL (1 << 19) +# define R500_ALPHA_ADDR2(x) (x << 20) +# define R500_ALPHA_ADDR2_CONST (1 << 28) +# define R500_ALPHA_ADDR2_REL (1 << 29) +# define R500_ALPHA_SRCP_OP_1_MINUS_2A0 (0 << 30) +# define R500_ALPHA_SRCP_OP_A1_MINUS_A0 (1 << 30) +# define R500_ALPHA_SRCP_OP_A1_PLUS_A0 (2 << 30) +# define R500_ALPHA_SRCP_OP_1_MINUS_A0 (3 << 30) +#define R500_US_ALU_RGBA_INST_0 0xb000 +# define R500_ALU_RGBA_OP_MAD (0 << 0) +# define R500_ALU_RGBA_OP_DP3 (1 << 0) +# define R500_ALU_RGBA_OP_DP4 (2 << 0) +# define R500_ALU_RGBA_OP_D2A (3 << 0) +# define R500_ALU_RGBA_OP_MIN (4 << 0) +# define R500_ALU_RGBA_OP_MAX (5 << 0) +/* #define R500_ALU_RGBA_OP_RESERVED (6 << 0) */ +# define R500_ALU_RGBA_OP_CND (7 << 0) +# define R500_ALU_RGBA_OP_CMP (8 << 0) +# define R500_ALU_RGBA_OP_FRC (9 << 0) +# define R500_ALU_RGBA_OP_SOP (10 << 0) +# define R500_ALU_RGBA_OP_MDH (11 << 0) +# define R500_ALU_RGBA_OP_MDV (12 << 0) +# define R500_ALU_RGBA_ADDRD(x) (x << 4) +# define R500_ALU_RGBA_ADDRD_REL (1 << 11) +# define R500_ALU_RGBA_SEL_C_SRC0 (0 << 12) +# define R500_ALU_RGBA_SEL_C_SRC1 (1 << 12) +# define R500_ALU_RGBA_SEL_C_SRC2 (2 << 12) +# define R500_ALU_RGBA_SEL_C_SRCP (3 << 12) +# define R500_ALU_RGBA_R_SWIZ_R (0 << 14) +# define R500_ALU_RGBA_R_SWIZ_G (1 << 14) +# define R500_ALU_RGBA_R_SWIZ_B (2 << 14) +# define R500_ALU_RGBA_R_SWIZ_A (3 << 14) +# define R500_ALU_RGBA_R_SWIZ_0 (4 << 14) +# define R500_ALU_RGBA_R_SWIZ_HALF (5 << 14) +# define R500_ALU_RGBA_R_SWIZ_1 (6 << 14) +/* #define R500_ALU_RGBA_R_SWIZ_UNUSED (7 << 14) */ +# define R500_ALU_RGBA_G_SWIZ_R (0 << 17) +# define R500_ALU_RGBA_G_SWIZ_G (1 << 17) +# define R500_ALU_RGBA_G_SWIZ_B (2 << 17) +# define R500_ALU_RGBA_G_SWIZ_A (3 << 17) +# define R500_ALU_RGBA_G_SWIZ_0 (4 << 17) +# define R500_ALU_RGBA_G_SWIZ_HALF (5 << 17) +# define R500_ALU_RGBA_G_SWIZ_1 (6 << 17) +/* #define R500_ALU_RGBA_G_SWIZ_UNUSED (7 << 17) */ +# define R500_ALU_RGBA_B_SWIZ_R (0 << 20) +# define R500_ALU_RGBA_B_SWIZ_G (1 << 20) +# define R500_ALU_RGBA_B_SWIZ_B (2 << 20) +# define R500_ALU_RGBA_B_SWIZ_A (3 << 20) +# define R500_ALU_RGBA_B_SWIZ_0 (4 << 20) +# define R500_ALU_RGBA_B_SWIZ_HALF (5 << 20) +# define R500_ALU_RGBA_B_SWIZ_1 (6 << 20) +/* #define R500_ALU_RGBA_B_SWIZ_UNUSED (7 << 20) */ +# define R500_ALU_RGBA_MOD_C_NOP (0 << 23) +# define R500_ALU_RGBA_MOD_C_NEG (1 << 23) +# define R500_ALU_RGBA_MOD_C_ABS (2 << 23) +# define R500_ALU_RGBA_MOD_C_NAB (3 << 23) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRC0 (0 << 25) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRC1 (1 << 25) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRC2 (2 << 25) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRCP (3 << 25) +# define R500_ALU_RGBA_A_SWIZ_R (0 << 27) +# define R500_ALU_RGBA_A_SWIZ_G (1 << 27) +# define R500_ALU_RGBA_A_SWIZ_B (2 << 27) +# define R500_ALU_RGBA_A_SWIZ_A (3 << 27) +# define R500_ALU_RGBA_A_SWIZ_0 (4 << 27) +# define R500_ALU_RGBA_A_SWIZ_HALF (5 << 27) +# define R500_ALU_RGBA_A_SWIZ_1 (6 << 27) +/* #define R500_ALU_RGBA_A_SWIZ_UNUSED (7 << 27) */ +# define R500_ALU_RGBA_ALPHA_MOD_C_NOP (0 << 30) +# define R500_ALU_RGBA_ALPHA_MOD_C_NEG (1 << 30) +# define R500_ALU_RGBA_ALPHA_MOD_C_ABS (2 << 30) +# define R500_ALU_RGBA_ALPHA_MOD_C_NAB (3 << 30) +#define R500_US_ALU_RGB_INST_0 0xa000 +# define R500_ALU_RGB_SEL_A_SRC0 (0 << 0) +# define R500_ALU_RGB_SEL_A_SRC1 (1 << 0) +# define R500_ALU_RGB_SEL_A_SRC2 (2 << 0) +# define R500_ALU_RGB_SEL_A_SRCP (3 << 0) +# define R500_ALU_RGB_R_SWIZ_A_R (0 << 2) +# define R500_ALU_RGB_R_SWIZ_A_G (1 << 2) +# define R500_ALU_RGB_R_SWIZ_A_B (2 << 2) +# define R500_ALU_RGB_R_SWIZ_A_A (3 << 2) +# define R500_ALU_RGB_R_SWIZ_A_0 (4 << 2) +# define R500_ALU_RGB_R_SWIZ_A_HALF (5 << 2) +# define R500_ALU_RGB_R_SWIZ_A_1 (6 << 2) +/* #define R500_ALU_RGB_R_SWIZ_A_UNUSED (7 << 2) */ +# define R500_ALU_RGB_G_SWIZ_A_R (0 << 5) +# define R500_ALU_RGB_G_SWIZ_A_G (1 << 5) +# define R500_ALU_RGB_G_SWIZ_A_B (2 << 5) +# define R500_ALU_RGB_G_SWIZ_A_A (3 << 5) +# define R500_ALU_RGB_G_SWIZ_A_0 (4 << 5) +# define R500_ALU_RGB_G_SWIZ_A_HALF (5 << 5) +# define R500_ALU_RGB_G_SWIZ_A_1 (6 << 5) +/* #define R500_ALU_RGB_G_SWIZ_A_UNUSED (7 << 5) */ +# define R500_ALU_RGB_B_SWIZ_A_R (0 << 8) +# define R500_ALU_RGB_B_SWIZ_A_G (1 << 8) +# define R500_ALU_RGB_B_SWIZ_A_B (2 << 8) +# define R500_ALU_RGB_B_SWIZ_A_A (3 << 8) +# define R500_ALU_RGB_B_SWIZ_A_0 (4 << 8) +# define R500_ALU_RGB_B_SWIZ_A_HALF (5 << 8) +# define R500_ALU_RGB_B_SWIZ_A_1 (6 << 8) +/* #define R500_ALU_RGB_B_SWIZ_A_UNUSED (7 << 8) */ +# define R500_ALU_RGB_MOD_A_NOP (0 << 11) +# define R500_ALU_RGB_MOD_A_NEG (1 << 11) +# define R500_ALU_RGB_MOD_A_ABS (2 << 11) +# define R500_ALU_RGB_MOD_A_NAB (3 << 11) +# define R500_ALU_RGB_SEL_B_SRC0 (0 << 13) +# define R500_ALU_RGB_SEL_B_SRC1 (1 << 13) +# define R500_ALU_RGB_SEL_B_SRC2 (2 << 13) +# define R500_ALU_RGB_SEL_B_SRCP (3 << 13) +# define R500_ALU_RGB_R_SWIZ_B_R (0 << 15) +# define R500_ALU_RGB_R_SWIZ_B_G (1 << 15) +# define R500_ALU_RGB_R_SWIZ_B_B (2 << 15) +# define R500_ALU_RGB_R_SWIZ_B_A (3 << 15) +# define R500_ALU_RGB_R_SWIZ_B_0 (4 << 15) +# define R500_ALU_RGB_R_SWIZ_B_HALF (5 << 15) +# define R500_ALU_RGB_R_SWIZ_B_1 (6 << 15) +/* #define R500_ALU_RGB_R_SWIZ_B_UNUSED (7 << 15) */ +# define R500_ALU_RGB_G_SWIZ_B_R (0 << 18) +# define R500_ALU_RGB_G_SWIZ_B_G (1 << 18) +# define R500_ALU_RGB_G_SWIZ_B_B (2 << 18) +# define R500_ALU_RGB_G_SWIZ_B_A (3 << 18) +# define R500_ALU_RGB_G_SWIZ_B_0 (4 << 18) +# define R500_ALU_RGB_G_SWIZ_B_HALF (5 << 18) +# define R500_ALU_RGB_G_SWIZ_B_1 (6 << 18) +/* #define R500_ALU_RGB_G_SWIZ_B_UNUSED (7 << 18) */ +# define R500_ALU_RGB_B_SWIZ_B_R (0 << 21) +# define R500_ALU_RGB_B_SWIZ_B_G (1 << 21) +# define R500_ALU_RGB_B_SWIZ_B_B (2 << 21) +# define R500_ALU_RGB_B_SWIZ_B_A (3 << 21) +# define R500_ALU_RGB_B_SWIZ_B_0 (4 << 21) +# define R500_ALU_RGB_B_SWIZ_B_HALF (5 << 21) +# define R500_ALU_RGB_B_SWIZ_B_1 (6 << 21) +/* #define R500_ALU_RGB_B_SWIZ_B_UNUSED (7 << 21) */ +# define R500_ALU_RGB_MOD_B_NOP (0 << 24) +# define R500_ALU_RGB_MOD_B_NEG (1 << 24) +# define R500_ALU_RGB_MOD_B_ABS (2 << 24) +# define R500_ALU_RGB_MOD_B_NAB (3 << 24) +# define R500_ALU_RGB_OMOD_IDENTITY (0 << 26) +# define R500_ALU_RGB_OMOD_MUL_2 (1 << 26) +# define R500_ALU_RGB_OMOD_MUL_4 (2 << 26) +# define R500_ALU_RGB_OMOD_MUL_8 (3 << 26) +# define R500_ALU_RGB_OMOD_DIV_2 (4 << 26) +# define R500_ALU_RGB_OMOD_DIV_4 (5 << 26) +# define R500_ALU_RGB_OMOD_DIV_8 (6 << 26) +# define R500_ALU_RGB_OMOD_DISABLE (7 << 26) +# define R500_ALU_RGB_TARGET(x) (x << 29) +# define R500_ALU_RGB_WMASK (1 << 31) +#define R500_US_ALU_RGB_ADDR_0 0x9000 +# define R500_RGB_ADDR0(x) (x << 0) +# define R500_RGB_ADDR0_CONST (1 << 8) +# define R500_RGB_ADDR0_REL (1 << 9) +# define R500_RGB_ADDR1(x) (x << 10) +# define R500_RGB_ADDR1_CONST (1 << 18) +# define R500_RGB_ADDR1_REL (1 << 19) +# define R500_RGB_ADDR2(x) (x << 20) +# define R500_RGB_ADDR2_CONST (1 << 28) +# define R500_RGB_ADDR2_REL (1 << 29) +# define R500_RGB_SRCP_OP_1_MINUS_2RGB0 (0 << 30) +# define R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 (1 << 30) +# define R500_RGB_SRCP_OP_RGB1_PLUS_RGB0 (2 << 30) +# define R500_RGB_SRCP_OP_1_MINUS_RGB0 (3 << 30) +#define R500_US_CMN_INST_0 0xb800 +# define R500_INST_TYPE_ALU (0 << 0) +# define R500_INST_TYPE_OUT (1 << 0) +# define R500_INST_TYPE_FC (2 << 0) +# define R500_INST_TYPE_TEX (3 << 0) +# define R500_INST_TEX_SEM_WAIT (1 << 2) +# define R500_INST_RGB_PRED_SEL_NONE (0 << 3) +# define R500_INST_RGB_PRED_SEL_RGBA (1 << 3) +# define R500_INST_RGB_PRED_SEL_RRRR (2 << 3) +# define R500_INST_RGB_PRED_SEL_GGGG (3 << 3) +# define R500_INST_RGB_PRED_SEL_BBBB (4 << 3) +# define R500_INST_RGB_PRED_SEL_AAAA (5 << 3) +# define R500_INST_RGB_PRED_INV (1 << 6) +# define R500_INST_WRITE_INACTIVE (1 << 7) +# define R500_INST_LAST (1 << 8) +# define R500_INST_NOP (1 << 9) +# define R500_INST_ALU_WAIT (1 << 10) +# define R500_INST_RGB_WMASK_R (1 << 11) +# define R500_INST_RGB_WMASK_G (1 << 12) +# define R500_INST_RGB_WMASK_B (1 << 13) +# define R500_INST_ALPHA_WMASK (1 << 14) +# define R500_INST_RGB_OMASK_R (1 << 15) +# define R500_INST_RGB_OMASK_G (1 << 16) +# define R500_INST_RGB_OMASK_B (1 << 17) +# define R500_INST_ALPHA_OMASK (1 << 18) +# define R500_INST_RGB_CLAMP (1 << 19) +# define R500_INST_ALPHA_CLAMP (1 << 20) +# define R500_INST_ALU_RESULT_SEL (1 << 21) +# define R500_INST_ALPHA_PRED_INV (1 << 22) +# define R500_INST_ALU_RESULT_OP_EQ (0 << 23) +# define R500_INST_ALU_RESULT_OP_LT (1 << 23) +# define R500_INST_ALU_RESULT_OP_GE (2 << 23) +# define R500_INST_ALU_RESULT_OP_NE (3 << 23) +# define R500_INST_ALPHA_PRED_SEL_NONE (0 << 25) +# define R500_INST_ALPHA_PRED_SEL_RGBA (1 << 25) +# define R500_INST_ALPHA_PRED_SEL_RRRR (2 << 25) +# define R500_INST_ALPHA_PRED_SEL_GGGG (3 << 25) +# define R500_INST_ALPHA_PRED_SEL_BBBB (4 << 25) +# define R500_INST_ALPHA_PRED_SEL_AAAA (5 << 25) +/* XXX next four are kind of guessed */ +# define R500_INST_STAT_WE_R (1 << 28) +# define R500_INST_STAT_WE_G (1 << 29) +# define R500_INST_STAT_WE_B (1 << 30) +# define R500_INST_STAT_WE_A (1 << 31) + +/* note that these are 8 bit lengths, despite the offsets, at least for R500 */ +#define R500_US_CODE_ADDR 0x4630 +# define R500_US_CODE_START_ADDR(x) (x << 0) +# define R500_US_CODE_END_ADDR(x) (x << 16) +#define R500_US_CODE_OFFSET 0x4638 +# define R500_US_CODE_OFFSET_ADDR(x) (x << 0) +#define R500_US_CODE_RANGE 0x4634 +# define R500_US_CODE_RANGE_ADDR(x) (x << 0) +# define R500_US_CODE_RANGE_SIZE(x) (x << 16) +#define R500_US_CONFIG 0x4600 +# define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO (1 << 1) +#define R500_US_FC_ADDR_0 0xa000 +# define R500_FC_BOOL_ADDR(x) (x << 0) +# define R500_FC_INT_ADDR(x) (x << 8) +# define R500_FC_JUMP_ADDR(x) (x << 16) +# define R500_FC_JUMP_GLOBAL (1 << 31) +#define R500_US_FC_BOOL_CONST 0x4620 +# define R500_FC_KBOOL(x) (x) +#define R500_US_FC_CTRL 0x4624 +# define R500_FC_TEST_EN (1 << 30) +# define R500_FC_FULL_FC_EN (1 << 31) +#define R500_US_FC_INST_0 0x9800 +# define R500_FC_OP_JUMP (0 << 0) +# define R500_FC_OP_LOOP (1 << 0) +# define R500_FC_OP_ENDLOOP (2 << 0) +# define R500_FC_OP_REP (3 << 0) +# define R500_FC_OP_ENDREP (4 << 0) +# define R500_FC_OP_BREAKLOOP (5 << 0) +# define R500_FC_OP_BREAKREP (6 << 0) +# define R500_FC_OP_CONTINUE (7 << 0) +# define R500_FC_B_ELSE (1 << 4) +# define R500_FC_JUMP_ANY (1 << 5) +# define R500_FC_A_OP_NONE (0 << 6) +# define R500_FC_A_OP_POP (1 << 6) +# define R500_FC_A_OP_PUSH (2 << 6) +# define R500_FC_JUMP_FUNC(x) (x << 8) +# define R500_FC_B_POP_CNT(x) (x << 16) +# define R500_FC_B_OP0_NONE (0 << 24) +# define R500_FC_B_OP0_DECR (1 << 24) +# define R500_FC_B_OP0_INCR (2 << 24) +# define R500_FC_B_OP1_DECR (0 << 26) +# define R500_FC_B_OP1_NONE (1 << 26) +# define R500_FC_B_OP1_INCR (2 << 26) +# define R500_FC_IGNORE_UNCOVERED (1 << 28) +#define R500_US_FC_INT_CONST_0 0x4c00 +# define R500_FC_INT_CONST_KR(x) (x << 0) +# define R500_FC_INT_CONST_KG(x) (x << 8) +# define R500_FC_INT_CONST_KB(x) (x << 16) +/* _0 through _15 */ +#define R500_US_FORMAT0_0 0x4640 +# define R500_FORMAT_TXWIDTH(x) (x << 0) +# define R500_FORMAT_TXHEIGHT(x) (x << 11) +# define R500_FORMAT_TXDEPTH(x) (x << 22) +/* _0 through _3 */ +#define R500_US_OUT_FMT_0 0x46a4 +# define R500_OUT_FMT_C4_8 (0 << 0) +# define R500_OUT_FMT_C4_10 (1 << 0) +# define R500_OUT_FMT_C4_10_GAMMA (2 << 0) +# define R500_OUT_FMT_C_16 (3 << 0) +# define R500_OUT_FMT_C2_16 (4 << 0) +# define R500_OUT_FMT_C4_16 (5 << 0) +# define R500_OUT_FMT_C_16_MPEG (6 << 0) +# define R500_OUT_FMT_C2_16_MPEG (7 << 0) +# define R500_OUT_FMT_C2_4 (8 << 0) +# define R500_OUT_FMT_C_3_3_2 (9 << 0) +# define R500_OUT_FMT_C_6_5_6 (10 << 0) +# define R500_OUT_FMT_C_11_11_10 (11 << 0) +# define R500_OUT_FMT_C_10_11_11 (12 << 0) +# define R500_OUT_FMT_C_2_10_10_10 (13 << 0) +/* #define R500_OUT_FMT_RESERVED (14 << 0) */ +# define R500_OUT_FMT_UNUSED (15 << 0) +# define R500_OUT_FMT_C_16_FP (16 << 0) +# define R500_OUT_FMT_C2_16_FP (17 << 0) +# define R500_OUT_FMT_C4_16_FP (18 << 0) +# define R500_OUT_FMT_C_32_FP (19 << 0) +# define R500_OUT_FMT_C2_32_FP (20 << 0) +# define R500_OUT_FMT_C4_32_FP (21 << 0) +# define R500_C0_SEL_A (0 << 8) +# define R500_C0_SEL_R (1 << 8) +# define R500_C0_SEL_G (2 << 8) +# define R500_C0_SEL_B (3 << 8) +# define R500_C1_SEL_A (0 << 10) +# define R500_C1_SEL_R (1 << 10) +# define R500_C1_SEL_G (2 << 10) +# define R500_C1_SEL_B (3 << 10) +# define R500_C2_SEL_A (0 << 12) +# define R500_C2_SEL_R (1 << 12) +# define R500_C2_SEL_G (2 << 12) +# define R500_C2_SEL_B (3 << 12) +# define R500_C3_SEL_A (0 << 14) +# define R500_C3_SEL_R (1 << 14) +# define R500_C3_SEL_G (2 << 14) +# define R500_C3_SEL_B (3 << 14) +# define R500_OUT_SIGN(x) (x << 16) +# define R500_ROUND_ADJ (1 << 20) +#define R500_US_PIXSIZE 0x4604 +# define R500_PIX_SIZE(x) (x) +#define R500_US_TEX_ADDR_0 0x9800 +# define R500_TEX_SRC_ADDR(x) (x << 0) +# define R500_TEX_SRC_ADDR_REL (1 << 7) +# define R500_TEX_SRC_S_SWIZ_R (0 << 8) +# define R500_TEX_SRC_S_SWIZ_G (1 << 8) +# define R500_TEX_SRC_S_SWIZ_B (2 << 8) +# define R500_TEX_SRC_S_SWIZ_A (3 << 8) +# define R500_TEX_SRC_T_SWIZ_R (0 << 10) +# define R500_TEX_SRC_T_SWIZ_G (1 << 10) +# define R500_TEX_SRC_T_SWIZ_B (2 << 10) +# define R500_TEX_SRC_T_SWIZ_A (3 << 10) +# define R500_TEX_SRC_R_SWIZ_R (0 << 12) +# define R500_TEX_SRC_R_SWIZ_G (1 << 12) +# define R500_TEX_SRC_R_SWIZ_B (2 << 12) +# define R500_TEX_SRC_R_SWIZ_A (3 << 12) +# define R500_TEX_SRC_Q_SWIZ_R (0 << 14) +# define R500_TEX_SRC_Q_SWIZ_G (1 << 14) +# define R500_TEX_SRC_Q_SWIZ_B (2 << 14) +# define R500_TEX_SRC_Q_SWIZ_A (3 << 14) +# define R500_TEX_DST_ADDR(x) (x << 16) +# define R500_TEX_DST_ADDR_REL (1 << 23) +# define R500_TEX_DST_R_SWIZ_R (0 << 24) +# define R500_TEX_DST_R_SWIZ_G (1 << 24) +# define R500_TEX_DST_R_SWIZ_B (2 << 24) +# define R500_TEX_DST_R_SWIZ_A (3 << 24) +# define R500_TEX_DST_G_SWIZ_R (0 << 26) +# define R500_TEX_DST_G_SWIZ_G (1 << 26) +# define R500_TEX_DST_G_SWIZ_B (2 << 26) +# define R500_TEX_DST_G_SWIZ_A (3 << 26) +# define R500_TEX_DST_B_SWIZ_R (0 << 28) +# define R500_TEX_DST_B_SWIZ_G (1 << 28) +# define R500_TEX_DST_B_SWIZ_B (2 << 28) +# define R500_TEX_DST_B_SWIZ_A (3 << 28) +# define R500_TEX_DST_A_SWIZ_R (0 << 30) +# define R500_TEX_DST_A_SWIZ_G (1 << 30) +# define R500_TEX_DST_A_SWIZ_B (2 << 30) +# define R500_TEX_DST_A_SWIZ_A (3 << 30) +#define R500_US_TEX_ADDR_DXDY_0 0xa000 +# define R500_DX_ADDR(x) (x << 0) +# define R500_DX_ADDR_REL (1 << 7) +# define R500_DX_S_SWIZ_R (0 << 8) +# define R500_DX_S_SWIZ_G (1 << 8) +# define R500_DX_S_SWIZ_B (2 << 8) +# define R500_DX_S_SWIZ_A (3 << 8) +# define R500_DX_T_SWIZ_R (0 << 10) +# define R500_DX_T_SWIZ_G (1 << 10) +# define R500_DX_T_SWIZ_B (2 << 10) +# define R500_DX_T_SWIZ_A (3 << 10) +# define R500_DX_R_SWIZ_R (0 << 12) +# define R500_DX_R_SWIZ_G (1 << 12) +# define R500_DX_R_SWIZ_B (2 << 12) +# define R500_DX_R_SWIZ_A (3 << 12) +# define R500_DX_Q_SWIZ_R (0 << 14) +# define R500_DX_Q_SWIZ_G (1 << 14) +# define R500_DX_Q_SWIZ_B (2 << 14) +# define R500_DX_Q_SWIZ_A (3 << 14) +# define R500_DY_ADDR(x) (x << 16) +# define R500_DY_ADDR_REL (1 << 17) +# define R500_DY_S_SWIZ_R (0 << 24) +# define R500_DY_S_SWIZ_G (1 << 24) +# define R500_DY_S_SWIZ_B (2 << 24) +# define R500_DY_S_SWIZ_A (3 << 24) +# define R500_DY_T_SWIZ_R (0 << 26) +# define R500_DY_T_SWIZ_G (1 << 26) +# define R500_DY_T_SWIZ_B (2 << 26) +# define R500_DY_T_SWIZ_A (3 << 26) +# define R500_DY_R_SWIZ_R (0 << 28) +# define R500_DY_R_SWIZ_G (1 << 28) +# define R500_DY_R_SWIZ_B (2 << 28) +# define R500_DY_R_SWIZ_A (3 << 28) +# define R500_DY_Q_SWIZ_R (0 << 30) +# define R500_DY_Q_SWIZ_G (1 << 30) +# define R500_DY_Q_SWIZ_B (2 << 30) +# define R500_DY_Q_SWIZ_A (3 << 30) +#define R500_US_TEX_INST_0 0x9000 +# define R500_TEX_ID(x) (x << 16) +# define R500_TEX_INST_NOP (0 << 22) +# define R500_TEX_INST_LD (1 << 22) +# define R500_TEX_INST_TEXKILL (2 << 22) +# define R500_TEX_INST_PROJ (3 << 22) +# define R500_TEX_INST_LODBIAS (4 << 22) +# define R500_TEX_INST_LOD (5 << 22) +# define R500_TEX_INST_DXDY (6 << 22) +# define R500_TEX_SEM_ACQUIRE (1 << 25) +# define R500_TEX_IGNORE_UNCOVERED (1 << 26) +# define R500_TEX_UNSCALED (1 << 27) +#define R500_US_W_FMT 0x46b4 +# define R500_W_FMT_W0 (0 << 0) +# define R500_W_FMT_W24 (1 << 0) +# define R500_W_FMT_W24FP (2 << 0) +# define R500_W_SRC_US (0 << 2) +# define R500_W_SRC_RAS (1 << 2) + + /* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR. * Two parameter dwords: * 0. VAP_VTX_FMT: The first parameter is not written to hardware diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index e11b5afc30..d2fd04a550 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1603,6 +1603,128 @@ static void r300SetupRSUnit(GLcontext * ctx) WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); } +static void r500SetupRSUnit(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + /* I'm still unsure if these are needed */ + GLuint interp_magic[8] = { + 0x00, + R300_RS_COL_PTR(1), + R300_RS_COL_PTR(2), + R300_RS_COL_PTR(3), + 0x00, + 0x00, + 0x00, + 0x00 + }; + union r300_outputs_written OutputsWritten; + GLuint InputsRead; + int fp_reg, high_rr; + int in_texcoords, col_interp_nr; + int i; + + if (hw_tcl_on) + OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; + else + RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->state.render_inputs_bitset); + + if (ctx->FragmentProgram._Current) + InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; + else { + fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); + return; /* This should only ever happen once.. */ + } + + R300_STATECHANGE(r300, ri); + R300_STATECHANGE(r300, rc); + R300_STATECHANGE(r300, rr); + + fp_reg = in_texcoords = col_interp_nr = high_rr = 0; + + r300->hw.rr.cmd[R300_RR_ROUTE_1] = 0; + + if (InputsRead & FRAG_BIT_WPOS) { + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) + if (!(InputsRead & (FRAG_BIT_TEX0 << i))) + break; + + if (i == ctx->Const.MaxTextureUnits) { + fprintf(stderr, "\tno free texcoord found...\n"); + _mesa_exit(-1); + } + + InputsRead |= (FRAG_BIT_TEX0 << i); + InputsRead &= ~FRAG_BIT_WPOS; + } + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0 | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | (in_texcoords << R300_RS_INTERP_SRC_SHIFT) + | interp_magic[i]; + + r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] = 0; + if (InputsRead & (FRAG_BIT_TEX0 << i)) { + //assert(r300->state.texture.tc_count != 0); + r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] |= R300_RS_ROUTE_ENABLE | i /* source INTERP */ + | (fp_reg << R300_RS_ROUTE_DEST_SHIFT); + high_rr = fp_reg; + + /* Passing invalid data here can lock the GPU. */ + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { + InputsRead &= ~(FRAG_BIT_TEX0 << i); + fp_reg++; + } else { + WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i); + } + } + /* Need to count all coords enabled at vof */ + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { + in_texcoords++; + } + } + + if (InputsRead & FRAG_BIT_COL0) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { + r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R300_RS_ROUTE_0_COLOR | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); + InputsRead &= ~FRAG_BIT_COL0; + col_interp_nr++; + } else { + WARN_ONCE("fragprog wants col0, vp doesn't provide it\n"); + } + } + + if (InputsRead & FRAG_BIT_COL1) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { + r300->hw.rr.cmd[R300_RR_ROUTE_1] |= R300_RS_ROUTE_1_UNKNOWN11 | R300_RS_ROUTE_1_COLOR1 | (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT); + InputsRead &= ~FRAG_BIT_COL1; + if (high_rr < 1) + high_rr = 1; + col_interp_nr++; + } else { + WARN_ONCE("fragprog wants col1, vp doesn't provide it\n"); + } + } + + /* Need at least one. This might still lock as the values are undefined... */ + if (in_texcoords == 0 && col_interp_nr == 0) { + r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R300_RS_ROUTE_0_COLOR | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); + col_interp_nr++; + } + + r300->hw.rc.cmd[1] = 0 | ((in_texcoords << 2) << R300_IT_COUNT_SHIFT) + | (col_interp_nr << R300_IC_COUNT_SHIFT) + | R300_HIRES_EN; + + assert(high_rr >= 0); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_ROUTE_0, high_rr + 1); + r300->hw.rc.cmd[2] = 0xC0 | high_rr; + + if (InputsRead) + WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); +} + + + + #define bump_vpu_count(ptr, new_count) do{\ drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\ int _nc=(new_count)/4; \ @@ -2163,6 +2285,81 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) } } +static void r500SetupPixelShader(r300ContextPtr rmesa) +{ + GLcontext *ctx = rmesa->radeon.glCtx; + struct r300_fragment_program *fp = (struct r300_fragment_program *) + (char *)ctx->FragmentProgram._Current; + int i, k; + + if (!fp) /* should only happenen once, just after context is created */ + return; + + /* emit the standard zero shader */ + R300_STATECHANGE(rmesa, r500fp); + i = 1; + rmesa->hw.r500fp.cmd[i++] = 0x7807; + rmesa->hw.r500fp.cmd[i++] = R500_TEX_ID(0) | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; + rmesa->hw.r500fp.cmd[i++] = R500_TEX_SRC_ADDR(0) | R500_TEX_SRC_S_SWIZ_R | + R500_TEX_SRC_T_SWIZ_G | + R500_TEX_DST_ADDR(0) | + R500_TEX_DST_R_SWIZ_R | + R500_TEX_DST_G_SWIZ_G | + R500_TEX_DST_B_SWIZ_B | + R500_TEX_DST_A_SWIZ_A; + rmesa->hw.r500fp.cmd[i++] = R500_DX_ADDR(0) | + R500_DX_S_SWIZ_R | + R500_DX_T_SWIZ_R | + R500_DX_R_SWIZ_R | + R500_DX_Q_SWIZ_R | + R500_DY_ADDR(0) | + R500_DY_S_SWIZ_R | + R500_DY_T_SWIZ_R | + R500_DY_R_SWIZ_R | + R500_DY_Q_SWIZ_R; + rmesa->hw.r500fp.cmd[i++] = 0x0; + rmesa->hw.r500fp.cmd[i++] = 0x0; + + rmesa->hw.r500fp.cmd[i++] = R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | + R500_INST_LAST | + R500_INST_RGB_OMASK_R | + R500_INST_RGB_OMASK_G | + R500_INST_RGB_OMASK_B | + R500_INST_ALPHA_OMASK; + + rmesa->hw.r500fp.cmd[i++] = R500_RGB_ADDR0(0) | + R500_RGB_ADDR1(0) | + R500_RGB_ADDR1_CONST | + R500_RGB_ADDR2(0) | + R500_RGB_ADDR2_CONST | + R500_RGB_SRCP_OP_1_MINUS_2RGB0; + rmesa->hw.r500fp.cmd[i++] = R500_ALPHA_ADDR0(0) | + R500_ALPHA_ADDR1(0) | + R500_ALPHA_ADDR1_CONST | + R500_ALPHA_ADDR2(0) | + R500_ALPHA_ADDR2_CONST | + R500_ALPHA_SRCP_OP_1_MINUS_2A0; + rmesa->hw.r500fp.cmd[i++] = R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | + R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC0 | + R500_ALU_RGB_R_SWIZ_B_1 | + R500_ALU_RGB_B_SWIZ_B_1 | + R500_ALU_RGB_G_SWIZ_B_1; + rmesa->hw.r500fp.cmd[i++] = R500_ALPHA_OP_MAD | + R500_ALPHA_SWIZ_A_A | + R500_ALPHA_SWIZ_B_1; + rmesa->hw.r500fp.cmd[i++] = R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_R_SWIZ_0 | + R500_ALU_RGBA_G_SWIZ_0 | + R500_ALU_RGBA_B_SWIZ_0 | + R500_ALU_RGBA_A_SWIZ_0; + + +} + void r300UpdateShaderStates(r300ContextPtr rmesa) { GLcontext *ctx; @@ -2170,12 +2367,19 @@ void r300UpdateShaderStates(r300ContextPtr rmesa) r300UpdateTextureState(ctx); - r300SetupPixelShader(rmesa); + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + r500SetupPixelShader(rmesa); + else + r300SetupPixelShader(rmesa); r300SetupTextures(ctx); if ((rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) r300SetupVertexProgram(rmesa); - r300SetupRSUnit(ctx); + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + r500SetupRSUnit(ctx); + else + r300SetupRSUnit(ctx); } /** diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index 6ad441bdd0..9e375474a0 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -169,6 +169,8 @@ enum { CHIP_FAMILY_RV410, CHIP_FAMILY_RS400, CHIP_FAMILY_RS690, + CHIP_FAMILY_RV515, + CHIP_FAMILY_R520, CHIP_FAMILY_LAST }; -- cgit v1.2.3 From 55418dc87d132875feb50c2bd9531b5f5ed13334 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 19 Mar 2008 16:29:11 +1000 Subject: more r500 vs r300 kickin --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 20 ++++---- src/mesa/drivers/dri/r300/r300_context.h | 2 + src/mesa/drivers/dri/r300/r300_ioctl.c | 79 ++++++++++++++++++-------------- 3 files changed, 56 insertions(+), 45 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 883b41a349..a92bb87d7d 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -379,7 +379,7 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0); r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(R300_RS_COUNT, 2); if (is_r500) { - ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0); + ALLOC_STATE(ri, always, R500_RI_CMDSIZE, 0); r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R500_RS_IP_0, 16); ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, 1); @@ -393,23 +393,23 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.sc_hyperz.cmd[0] = cmdpacket0(R300_SC_HYPERZ, 2); ALLOC_STATE(sc_screendoor, always, 2, 0); r300->hw.sc_screendoor.cmd[0] = cmdpacket0(R300_SC_SCREENDOOR, 1); - ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); - r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_PFS_CNTL_0, 3); - r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(R300_PFS_NODE_0, 4); - ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0); - r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_PFS_TEXI_0, 0); ALLOC_STATE(us_out_fmt, always, 6, 0); r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R500_US_OUT_FMT, 5); if (is_r500) { ALLOC_STATE(r500fp, variable, R300_FPI_CMDSIZE, 0); - r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 1); + r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0); } - if (0/*is_r500*/) { + if (is_r500) { + + } else { + ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); + r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_PFS_CNTL_0, 3); + r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(R300_PFS_NODE_0, 4); + ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0); + r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_PFS_TEXI_0, 0); - }/* else*/ - { ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0); r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR0_0, 1); ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1); diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index ab45bf80b4..012c0fe6a5 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -330,6 +330,8 @@ struct r300_state_atom { #define R300_RI_INTERP_7 8 #define R300_RI_CMDSIZE 9 +#define R500_RI_CMDSIZE 17 + #define R300_RR_CMD_0 0 /* rr is variable size (at least 1) */ #define R300_RR_INST_0 1 #define R300_RR_INST_1 2 diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 1b405889c3..07656b130c 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -186,10 +186,15 @@ static void r300EmitClearState(GLcontext * ctx) int cmd_written = 0; drm_radeon_cmd_header_t *cmd = NULL; int has_tcl = 1; + int is_r500 = 0; if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) has_tcl = 0; + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + is_r500 = 1; + + /* FIXME: the values written to R300_VAP_INPUT_ROUTE_0_0 and * R300_VAP_INPUT_ROUTE_0_1 are in fact known, however, the values are * quite complex; see the functions in r300_emit.c. @@ -271,49 +276,53 @@ static void r300EmitClearState(GLcontext * ctx) e32(((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) | ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT)); - R300_STATECHANGE(r300, ri); - reg_start(R300_RS_IP_0, 8); - for (i = 0; i < 8; ++i) { - e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3)); - } + if (!is_r500) { + R300_STATECHANGE(r300, ri); + reg_start(R300_RS_IP_0, 8); + for (i = 0; i < 8; ++i) { + e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3)); + } - R300_STATECHANGE(r300, rc); - /* The second constant is needed to get glxgears display anything .. */ - reg_start(R300_RS_COUNT, 1); - e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); - e32(0x0); + R300_STATECHANGE(r300, rc); + /* The second constant is needed to get glxgears display anything .. */ + reg_start(R300_RS_COUNT, 1); + e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); + e32(0x0); - R300_STATECHANGE(r300, rr); - reg_start(R300_RS_INST_0, 0); - e32(R300_RS_INST_COL_CN_WRITE); + R300_STATECHANGE(r300, rr); + reg_start(R300_RS_ROUTE_0, 0); + e32(R300_RS_ROUTE_0_COLOR); + } - R300_STATECHANGE(r300, fp); - reg_start(R300_PFS_CNTL_0, 2); - e32(0x0); - e32(0x0); - e32(0x0); - reg_start(R300_PFS_NODE_0, 3); - e32(0x0); - e32(0x0); - e32(0x0); - e32(R300_PFS_NODE_OUTPUT_COLOR); + if (!is_r500) { + R300_STATECHANGE(r300, fp); + reg_start(R300_PFS_CNTL_0, 2); + e32(0x0); + e32(0x0); + e32(0x0); + reg_start(R300_PFS_NODE_0, 3); + e32(0x0); + e32(0x0); + e32(0x0); + e32(R300_PFS_NODE_OUTPUT_COLOR); - R300_STATECHANGE(r300, fpi[0]); - R300_STATECHANGE(r300, fpi[1]); - R300_STATECHANGE(r300, fpi[2]); - R300_STATECHANGE(r300, fpi[3]); + R300_STATECHANGE(r300, fpi[0]); + R300_STATECHANGE(r300, fpi[1]); + R300_STATECHANGE(r300, fpi[2]); + R300_STATECHANGE(r300, fpi[3]); - reg_start(R300_PFS_INSTR0_0, 0); - e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); + reg_start(R300_PFS_INSTR0_0, 0); + e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); - reg_start(R300_PFS_INSTR1_0, 0); - e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); + reg_start(R300_PFS_INSTR1_0, 0); + e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); - reg_start(R300_PFS_INSTR2_0, 0); - e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); + reg_start(R300_PFS_INSTR2_0, 0); + e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); - reg_start(R300_PFS_INSTR3_0, 0); - e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); + reg_start(R300_PFS_INSTR3_0, 0); + e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); + } if (has_tcl) { R300_STATECHANGE(r300, pvs); -- cgit v1.2.3 From a87914993d2d4a5ed32adfe16e2a2ac006d997c0 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 20 Mar 2008 13:55:56 +1000 Subject: r500 RS unit setup --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 4 ---- src/mesa/drivers/dri/r300/r300_reg.h | 22 +++++++++++----------- src/mesa/drivers/dri/r300/r300_state.c | 28 ++++++++++++++++++---------- 3 files changed, 29 insertions(+), 25 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index a92bb87d7d..3cfb7cf2cd 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -399,10 +399,6 @@ void r300InitCmdBuf(r300ContextPtr r300) if (is_r500) { ALLOC_STATE(r500fp, variable, R300_FPI_CMDSIZE, 0); r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0); - } - - if (is_r500) { - } else { ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_PFS_CNTL_0, 3); diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index d640d8b7e7..2822b1d4c3 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -673,12 +673,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R500_RS_IP_13 0x40A8 #define R500_RS_IP_14 0x40AC #define R500_RS_IP_15 0x40B0 -#define R500_RS_IP_TEX_PTR_S_SHIFT 0 -#define R500_RS_IP_TEX_PTR_T_SHIFT 6 -#define R500_RS_IP_TEX_PTR_R_SHIFT 12 -#define R500_RS_IP_TEX_PTR_Q_SHIFT 18 -#define R500_RS_IP_COL_PTR_SHIFT 24 -#define R500_RS_IP_COL_FMT_SHIFT 27 +#define R500_RS_IP_TEX_PTR_S_SHIFT 0 +#define R500_RS_IP_TEX_PTR_T_SHIFT 6 +#define R500_RS_IP_TEX_PTR_R_SHIFT 12 +#define R500_RS_IP_TEX_PTR_Q_SHIFT 18 +#define R500_RS_IP_COL_PTR_SHIFT 24 +#define R500_RS_IP_COL_FMT_SHIFT 27 #define R500_RS_IP_COL_FMT_RGBA (0 << 27) #define R500_RS_IP_COL_FMT_RGB0 (1 << 27) #define R500_RS_IP_COL_FMT_RGB1 (2 << 27) @@ -692,7 +692,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R500_RS_IP_COL_FMT_1111 (10 << 27) /* gap */ #define R500_RS_IP_OFFSET_DIS (0 << 31) -#define R500_RS_IP_OFFSET_EN (1 << 31) +#define R500_RS_IP_OFFSET_EN (1 << 31) /* gap */ @@ -1138,10 +1138,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_RS_COL_FMT_111A 8 # define R300_RS_COL_FMT_1110 9 # define R300_RS_COL_FMT_1111 10 -# define R300_RS_SEL_S(x) (x << 13) -# define R300_RS_SEL_T(x) (x << 16) -# define R300_RS_SEL_R(x) (x << 19) -# define R300_RS_SEL_Q(x) (x << 22) +# define R300_RS_SEL_S(x) (x << 13) +# define R300_RS_SEL_T(x) (x << 16) +# define R300_RS_SEL_R(x) (x << 19) +# define R300_RS_SEL_Q(x) (x << 22) # define R300_RS_SEL_C0 0 # define R300_RS_SEL_C1 1 # define R300_RS_SEL_C2 2 diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index d2fd04a550..04ee59da63 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1609,9 +1609,9 @@ static void r500SetupRSUnit(GLcontext * ctx) /* I'm still unsure if these are needed */ GLuint interp_magic[8] = { 0x00, - R300_RS_COL_PTR(1), - R300_RS_COL_PTR(2), - R300_RS_COL_PTR(3), + 1 << 24, + 2 << 24, + 3 << 24, 0x00, 0x00, 0x00, @@ -1658,14 +1658,20 @@ static void r500SetupRSUnit(GLcontext * ctx) } for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0 | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | (in_texcoords << R300_RS_INTERP_SRC_SHIFT) - | interp_magic[i]; + + // r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0 | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | (in_texcoords << R300_RS_INTERP_SRC_SHIFT) + + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = (0 << R500_TEX_PTR_S_SHIFT) | + (1 << R500_TEX_PTR_T_SHIFT) | + (2 << R500_TEX_PTR_R_SHIFT) | + (3 << R500_TEX_PTR_Q_SHIFT) | + (in_texcoords << 0) | interp_magic[i]; r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] = 0; if (InputsRead & (FRAG_BIT_TEX0 << i)) { //assert(r300->state.texture.tc_count != 0); - r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] |= R300_RS_ROUTE_ENABLE | i /* source INTERP */ - | (fp_reg << R300_RS_ROUTE_DEST_SHIFT); + r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] |= R500_RS_INST_TEX_CN_WRITE | i /* source INTERP */ + | (fp_reg << R500_RS_INST_TEX_ADDR_SHIFT); high_rr = fp_reg; /* Passing invalid data here can lock the GPU. */ @@ -1684,7 +1690,8 @@ static void r500SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL0) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { - r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R300_RS_ROUTE_0_COLOR | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); + // r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R300_RS_ROUTE_0_COLOR | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); + r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_COL_ADDR_SHIFT); InputsRead &= ~FRAG_BIT_COL0; col_interp_nr++; } else { @@ -1694,7 +1701,8 @@ static void r500SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL1) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { - r300->hw.rr.cmd[R300_RR_ROUTE_1] |= R300_RS_ROUTE_1_UNKNOWN11 | R300_RS_ROUTE_1_COLOR1 | (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT); + // r300->hw.rr.cmd[R300_RR_ROUTE_1] |= R300_RS_ROUTE_1_UNKNOWN11 | R300_RS_ROUTE_1_COLOR1 | (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT); + r300->hw.rr.cmd[R300_RR_ROUTE_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITER300_RS_ROUTE_1_UNKNOWN11 | (fp_reg++ << R500_RS_INST_COL_COL_ADDR_SHIFT); InputsRead &= ~FRAG_BIT_COL1; if (high_rr < 1) high_rr = 1; @@ -1706,7 +1714,7 @@ static void r500SetupRSUnit(GLcontext * ctx) /* Need at least one. This might still lock as the values are undefined... */ if (in_texcoords == 0 && col_interp_nr == 0) { - r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R300_RS_ROUTE_0_COLOR | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); + r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_COL_ADDR_SHIFT); col_interp_nr++; } -- cgit v1.2.3 From 9d9f66cc8d57dc16bb94c092b3821b56afce6cab Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 20 Mar 2008 14:21:10 +1000 Subject: mesa: cleanup state emission and rs for r500 trivial clear app now renders --- src/mesa/drivers/dri/r300/r300_emit.h | 13 +++++ src/mesa/drivers/dri/r300/r300_ioctl.c | 86 ++++++++++++++++++++++++++++++++++ src/mesa/drivers/dri/r300/r300_state.c | 10 ++-- 3 files changed, 104 insertions(+), 5 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h index a4f6ab997e..50e7e4f149 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.h +++ b/src/mesa/drivers/dri/r300/r300_emit.h @@ -178,6 +178,19 @@ static inline uint32_t cmdpacify(void) cmd[0].i = cmdvpu((dest), _n/4); \ } while (0); +#define r500fp_start_fragment(dest, length) \ + do { \ + int _n; \ + _n = (length); \ + cmd = (drm_radeon_cmd_header_t*) \ + r300AllocCmdBuf(rmesa, \ + (_n+1), \ + __FUNCTION__); \ + cmd_reserved = _n+1; \ + cmd_written =1; \ + cmd[0].i = cmdr500fp((dest), _n/6); \ + } while (0); + #define start_packet3(packet, count) \ { \ int _n; \ diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 07656b130c..14258324bd 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -292,6 +292,26 @@ static void r300EmitClearState(GLcontext * ctx) R300_STATECHANGE(r300, rr); reg_start(R300_RS_ROUTE_0, 0); e32(R300_RS_ROUTE_0_COLOR); + } else { + + R300_STATECHANGE(r300, ri); + reg_start(R500_RS_IP_0, 8); + for (i = 0; i < 8; ++i) { + e32((1 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (2 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (3 << R500_RS_IP_TEX_PTR_Q_SHIFT) ); + } + + R300_STATECHANGE(r300, rc); + /* The second constant is needed to get glxgears display anything .. */ + reg_start(R300_RS_COUNT, 1); + e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); + e32(0x0); + + R300_STATECHANGE(r300, rr); + reg_start(R500_RS_INST_0, 0); + e32(R500_RS_INST_COL_CN_WRITE); + } if (!is_r500) { @@ -322,6 +342,72 @@ static void r300EmitClearState(GLcontext * ctx) reg_start(R300_PFS_INSTR3_0, 0); e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); + } else { + R300_STATECHANGE(r300, r500fp); + r500fp_start_fragment(0, 12); + + e32(0x7808); + e32(R500_TEX_ID(0) | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED); + e32(R500_TEX_SRC_ADDR(0) | R500_TEX_SRC_S_SWIZ_R | + R500_TEX_SRC_T_SWIZ_G | + R500_TEX_DST_ADDR(0) | + R500_TEX_DST_R_SWIZ_R | + R500_TEX_DST_G_SWIZ_G | + R500_TEX_DST_B_SWIZ_B | + R500_TEX_DST_A_SWIZ_A); + e32(R500_DX_ADDR(0) | + R500_DX_S_SWIZ_R | + R500_DX_T_SWIZ_R | + R500_DX_R_SWIZ_R | + R500_DX_Q_SWIZ_R | + R500_DY_ADDR(0) | + R500_DY_S_SWIZ_R | + R500_DY_T_SWIZ_R | + R500_DY_R_SWIZ_R | + R500_DY_Q_SWIZ_R); + e32(0x0); + e32(0x0); + + e32(R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | + R500_INST_LAST | + R500_INST_RGB_OMASK_R | + R500_INST_RGB_OMASK_G | + R500_INST_RGB_OMASK_B | + R500_INST_ALPHA_OMASK); + + e32(R500_RGB_ADDR0(0) | + R500_RGB_ADDR1(0) | + R500_RGB_ADDR1_CONST | + R500_RGB_ADDR2(0) | + R500_RGB_ADDR2_CONST | + R500_RGB_SRCP_OP_1_MINUS_2RGB0); + + e32(R500_ALPHA_ADDR0(0) | + R500_ALPHA_ADDR1(0) | + R500_ALPHA_ADDR1_CONST | + R500_ALPHA_ADDR2(0) | + R500_ALPHA_ADDR2_CONST | + R500_ALPHA_SRCP_OP_1_MINUS_2A0); + + e32(R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | + R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC0 | + R500_ALU_RGB_R_SWIZ_B_1 | + R500_ALU_RGB_B_SWIZ_B_1 | + R500_ALU_RGB_G_SWIZ_B_1); + + e32(R500_ALPHA_OP_MAD | + R500_ALPHA_SWIZ_A_A | + R500_ALPHA_SWIZ_B_1); + + e32(R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_R_SWIZ_0 | + R500_ALU_RGBA_G_SWIZ_0 | + R500_ALU_RGBA_B_SWIZ_0 | + R500_ALU_RGBA_A_SWIZ_0); } if (has_tcl) { diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 04ee59da63..f4ee307adb 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1661,10 +1661,10 @@ static void r500SetupRSUnit(GLcontext * ctx) // r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0 | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | (in_texcoords << R300_RS_INTERP_SRC_SHIFT) - r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = (0 << R500_TEX_PTR_S_SHIFT) | - (1 << R500_TEX_PTR_T_SHIFT) | - (2 << R500_TEX_PTR_R_SHIFT) | - (3 << R500_TEX_PTR_Q_SHIFT) | + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = (0 << R500_RS_IP_TEX_PTR_S_SHIFT) | + (1 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (2 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (3 << R500_RS_IP_TEX_PTR_Q_SHIFT) | (in_texcoords << 0) | interp_magic[i]; r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] = 0; @@ -1702,7 +1702,7 @@ static void r500SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL1) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { // r300->hw.rr.cmd[R300_RR_ROUTE_1] |= R300_RS_ROUTE_1_UNKNOWN11 | R300_RS_ROUTE_1_COLOR1 | (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT); - r300->hw.rr.cmd[R300_RR_ROUTE_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITER300_RS_ROUTE_1_UNKNOWN11 | (fp_reg++ << R500_RS_INST_COL_COL_ADDR_SHIFT); + r300->hw.rr.cmd[R300_RR_ROUTE_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_COL_ADDR_SHIFT); InputsRead &= ~FRAG_BIT_COL1; if (high_rr < 1) high_rr = 1; -- cgit v1.2.3 From 99e75135ee5437e47bb64983dbb2deaef131f2d6 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 20 Mar 2008 14:30:59 +1000 Subject: r300: this code really shouldn't be here. For R500 just ignore it for now while I do something interesting like run glxgears. --- src/mesa/drivers/dri/r300/r300_state.c | 55 ++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 26 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index f4ee307adb..ffcc311c05 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1436,40 +1436,43 @@ static void r300SetupTextures(GLcontext * ctx) if (!fp) /* should only happenen once, just after context is created */ return; - R300_STATECHANGE(r300, fpt); - for (i = 0; i < fp->tex.length; i++) { - int unit; - int opcode; - unsigned long val; + if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { + R300_STATECHANGE(r300, fpt); - unit = fp->tex.inst[i] >> R300_FPITX_IMAGE_SHIFT; - unit &= 15; - - val = fp->tex.inst[i]; - val &= ~R300_FPITX_IMAGE_MASK; - - opcode = - (val & R300_FPITX_OPCODE_MASK) >> R300_FPITX_OPCODE_SHIFT; - if (opcode == R300_FPITX_OP_KIL) { - r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; - } else { - if (tmu_mappings[unit] >= 0) { - val |= - tmu_mappings[unit] << - R300_FPITX_IMAGE_SHIFT; + for (i = 0; i < fp->tex.length; i++) { + int unit; + int opcode; + unsigned long val; + + unit = fp->tex.inst[i] >> R300_FPITX_IMAGE_SHIFT; + unit &= 15; + + val = fp->tex.inst[i]; + val &= ~R300_FPITX_IMAGE_MASK; + + opcode = + (val & R300_FPITX_OPCODE_MASK) >> R300_FPITX_OPCODE_SHIFT; + if (opcode == R300_FPITX_OP_KIL) { r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; } else { - // We get here when the corresponding texture image is incomplete - // (e.g. incomplete mipmaps etc.) - r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + if (tmu_mappings[unit] >= 0) { + val |= + tmu_mappings[unit] << + R300_FPITX_IMAGE_SHIFT; + r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + } else { + // We get here when the corresponding texture image is incomplete + // (e.g. incomplete mipmaps etc.) + r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + } } } + + r300->hw.fpt.cmd[R300_FPT_CMD_0] = + cmdpacket0(R300_PFS_TEXI_0, fp->tex.length); } - r300->hw.fpt.cmd[R300_FPT_CMD_0] = - cmdpacket0(R300_PFS_TEXI_0, fp->tex.length); - if (RADEON_DEBUG & DEBUG_STATE) fprintf(stderr, "TX_ENABLE: %08x last_hw_tmu=%d\n", r300->hw.txe.cmd[R300_TXE_ENABLE], last_hw_tmu); -- cgit v1.2.3 From e46c3d7bcf000803e2a7d7339fe36db4fb97cf62 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 20 Mar 2008 14:34:04 +1000 Subject: r300: add rv530 pci id for the t60p laptop --- src/mesa/drivers/dri/radeon/radeon_chipset.h | 3 +++ src/mesa/drivers/dri/radeon/radeon_screen.c | 5 +++++ 2 files changed, 8 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index 9e375474a0..dc1b8a9c8e 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -146,6 +146,9 @@ #define PCI_CHIP_RV410_5E4C 0x5E4C #define PCI_CHIP_RV410_5E4D 0x5E4D #define PCI_CHIP_RV410_5E4F 0x5E4F + +#define PCI_CHIP_RV530_71C4 0x71C4 + #define PCI_CHIP_RS350_7834 0x7834 #define PCI_CHIP_RS350_7835 0x7835 #define PCI_CHIP_RS690_791E 0x791E diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 6107577e40..1a1666ccfe 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -686,6 +686,11 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) fprintf(stderr, "Warning, RS690 detected, 3D support is incomplete.\n"); break; + case PCI_CHIP_RV530_71C4: + screen->chip_family = CHIP_FAMILY_R520; + fprintf(stderr, "Warning, R520 detected, 3D HAHAHAHAHA!!.\n"); + break; + default: fprintf(stderr, "unknown chip id 0x%x, can't guess.\n", dri_priv->deviceID); -- cgit v1.2.3 From 14c3bdb3f7de153d93eda13980275d2840d62016 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 21 Mar 2008 17:05:29 +1000 Subject: r500: setup fragment program constant emission atom --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 4 +++- src/mesa/drivers/dri/r300/r300_context.h | 1 + src/mesa/drivers/dri/r300/r300_emit.h | 14 ++++++++------ 3 files changed, 12 insertions(+), 7 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 3cfb7cf2cd..248de7e34a 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -398,7 +398,9 @@ void r300InitCmdBuf(r300ContextPtr r300) if (is_r500) { ALLOC_STATE(r500fp, variable, R300_FPI_CMDSIZE, 0); - r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0); + r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 0, 0); + ALLOC_STATE(r500fp_const, variable, R300_FPI_CMDSIZE, 0); + r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 1, 0); } else { ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_PFS_CNTL_0, 3); diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 012c0fe6a5..45dafd6bcc 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -490,6 +490,7 @@ struct r300_hw_state { struct r300_state_atom fpt; /* texi - (4620) */ struct r300_state_atom us_out_fmt; /* (46A4) */ struct r300_state_atom r500fp; /* r500 fp instructions */ + struct r300_state_atom r500fp_const; /* r500 fp constants */ struct r300_state_atom fpi[4]; /* fp instructions (46C0/47C0/48C0/49C0) */ struct r300_state_atom fogs; /* fog state (4BC0) */ struct r300_state_atom fogc; /* fog color (4BC8) */ diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h index 50e7e4f149..51302301f7 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.h +++ b/src/mesa/drivers/dri/r300/r300_emit.h @@ -74,14 +74,16 @@ static inline uint32_t cmdvpu(int addr, int count) return cmd.u; } -static inline uint32_t cmdr500fp(int addr, int count) +static inline uint32_t cmdr500fp(int addr, int count, int type, int clamp) { drm_r300_cmd_header_t cmd; - cmd.vpu.cmd_type = R300_CMD_R500FP; - cmd.vpu.count = count; - cmd.vpu.adrhi = ((unsigned int)addr & 0xFF00) >> 8; - cmd.vpu.adrlo = ((unsigned int)addr & 0x00FF); + cmd.r500fp.cmd_type = R300_CMD_R500FP; + cmd.r500fp.count = count; + cmd.r500fp.adrhi_flags = ((unsigned int)addr & 0x100) >> 8; + cmd.r500fp.adrhi_flags |= type ? R500FP_CONSTANT_TYPE : 0; + cmd.r500fp.adrhi_flags |= clamp ? R500FP_CONSTANT_CLAMP : 0; + cmd.r500fp.adrlo = ((unsigned int)addr & 0x00FF); return cmd.u; } @@ -188,7 +190,7 @@ static inline uint32_t cmdpacify(void) __FUNCTION__); \ cmd_reserved = _n+1; \ cmd_written =1; \ - cmd[0].i = cmdr500fp((dest), _n/6); \ + cmd[0].i = cmdr500fp((dest), _n/6, 0, 0); \ } while (0); #define start_packet3(packet, count) \ -- cgit v1.2.3 From 70335540c68a35121979ac63d976fb5edc1d68ca Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 22 Mar 2008 10:55:32 +1000 Subject: r500: fixup support for emitting fragment program to hardware. Also fixup the constant emission this breaks glxgears from what I can see but its another step to correctness --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 23 +++++++++++++++++++---- src/mesa/drivers/dri/r300/r300_state.c | 24 ++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 248de7e34a..d965a95c0f 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -242,6 +242,7 @@ void r300EmitState(r300ContextPtr r300) #define packet0_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->packet0.count) #define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) +#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count) static int check_always(r300ContextPtr r300, struct r300_state_atom *atom) { @@ -262,6 +263,20 @@ static int check_vpu(r300ContextPtr r300, struct r300_state_atom *atom) return cnt ? (cnt * 4) + 1 : 0; } +static int check_r500fp(r300ContextPtr r300, struct r300_state_atom *atom) +{ + int cnt; + cnt = r500fp_count(atom->cmd); + return cnt ? (cnt * 6) + 1 : 0; +} + +static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom) +{ + int cnt; + cnt = r500fp_count(atom->cmd); + return cnt ? (cnt * 4) + 1 : 0; +} + #define ALLOC_STATE( ATOM, CHK, SZ, IDX ) \ do { \ r300->hw.ATOM.cmd_size = (SZ); \ @@ -397,9 +412,9 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R500_US_OUT_FMT, 5); if (is_r500) { - ALLOC_STATE(r500fp, variable, R300_FPI_CMDSIZE, 0); + ALLOC_STATE(r500fp, r500fp, R300_FPI_CMDSIZE, 0); r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 0, 0); - ALLOC_STATE(r500fp_const, variable, R300_FPI_CMDSIZE, 0); + ALLOC_STATE(r500fp_const, r500fp_const, R300_FPP_CMDSIZE, 0); r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 1, 0); } else { ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); @@ -416,6 +431,8 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR2_0, 1); ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3); r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR3_0, 1); + ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0); + r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, 0); } ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0); r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(FG_FOG_BLEND, 1); @@ -425,8 +442,6 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(FG_ALPHA_FUNC, 2); ALLOC_STATE(fg_depth_src, always, 2, 0); r300->hw.fg_depth_src.cmd[0] = cmdpacket0(R300_FG_DEPTH_SRC, 1); - ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0); - r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, 0); ALLOC_STATE(rb3d_cctl, always, 2, 0); r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(R300_RB3D_CCTL, 1); ALLOC_STATE(bld, always, R300_BLD_CMDSIZE, 0); diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index ffcc311c05..eab39ec4b0 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2296,6 +2296,20 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) } } +#define bump_r500fp_count(ptr, new_count) do{\ + drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\ + int _nc=(new_count)/6; \ + assert(_nc < 256); \ + if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\ +} while(0) + +#define bump_r500fp_const_count(ptr, new_count) do{\ + drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\ + int _nc=(new_count)/4; \ + assert(_nc < 256); \ + if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\ +} while(0) + static void r500SetupPixelShader(r300ContextPtr rmesa) { GLcontext *ctx = rmesa->radeon.glCtx; @@ -2368,6 +2382,16 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) R500_ALU_RGBA_B_SWIZ_0 | R500_ALU_RGBA_A_SWIZ_0; + bump_r500fp_count(rmesa->hw.r500fp.cmd, 12); + + R300_STATECHANGE(rmesa, r500fp_const); + for (i = 0; i < fp->const_nr; i++) { + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(fp->constant[i][0]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(fp->constant[i][1]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(fp->constant[i][2]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(fp->constant[i][3]); + } + bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, fp->const_nr * 4); } -- cgit v1.2.3 From 8e33a83b6820af84862c45c30829a8ef52578743 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 22 Mar 2008 10:56:48 +1000 Subject: r500: enable gb tiling for r5xx --- src/mesa/drivers/dri/r300/r300_state.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index eab39ec4b0..ac36b3b43c 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2046,6 +2046,8 @@ static void r300ResetHwState(r300ContextPtr r300) R300_GB_TILE_PIPE_COUNT_R300; break; case CHIP_FAMILY_R420: + case CHIP_FAMILY_RV515: + case CHIP_FAMILY_R520: r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= R300_GB_TILE_PIPE_COUNT_R420; break; -- cgit v1.2.3 From a3996ba2d1b43795c289d3e59e561e4fc84d9b7e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 22 Mar 2008 11:00:25 +1000 Subject: r500: fixup fake shader to keep gears going --- src/mesa/drivers/dri/r300/r300_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index ac36b3b43c..6dc76aed87 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2325,7 +2325,7 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) /* emit the standard zero shader */ R300_STATECHANGE(rmesa, r500fp); i = 1; - rmesa->hw.r500fp.cmd[i++] = 0x7807; + rmesa->hw.r500fp.cmd[i++] = 0x7808; rmesa->hw.r500fp.cmd[i++] = R500_TEX_ID(0) | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; rmesa->hw.r500fp.cmd[i++] = R500_TEX_SRC_ADDR(0) | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G | -- cgit v1.2.3 From d06e61aa80fcf6d9681d5112f0625b1602975aed Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 16 Apr 2008 20:42:15 +1000 Subject: fixup r500 bits for renaming --- src/mesa/drivers/dri/r300/r300_state.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 6dc76aed87..8f12266a5f 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1644,7 +1644,7 @@ static void r500SetupRSUnit(GLcontext * ctx) fp_reg = in_texcoords = col_interp_nr = high_rr = 0; - r300->hw.rr.cmd[R300_RR_ROUTE_1] = 0; + r300->hw.rr.cmd[R300_RR_INST_1] = 0; if (InputsRead & FRAG_BIT_WPOS) { for (i = 0; i < ctx->Const.MaxTextureUnits; i++) @@ -1670,10 +1670,10 @@ static void r500SetupRSUnit(GLcontext * ctx) (3 << R500_RS_IP_TEX_PTR_Q_SHIFT) | (in_texcoords << 0) | interp_magic[i]; - r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] = 0; + r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0; if (InputsRead & (FRAG_BIT_TEX0 << i)) { //assert(r300->state.texture.tc_count != 0); - r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] |= R500_RS_INST_TEX_CN_WRITE | i /* source INTERP */ + r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] |= R500_RS_INST_TEX_CN_WRITE | i /* source INTERP */ | (fp_reg << R500_RS_INST_TEX_ADDR_SHIFT); high_rr = fp_reg; @@ -1694,7 +1694,7 @@ static void r500SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL0) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { // r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R300_RS_ROUTE_0_COLOR | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); - r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_COL_ADDR_SHIFT); + r300->hw.rr.cmd[R300_RR_INST_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); InputsRead &= ~FRAG_BIT_COL0; col_interp_nr++; } else { @@ -1705,7 +1705,7 @@ static void r500SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL1) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { // r300->hw.rr.cmd[R300_RR_ROUTE_1] |= R300_RS_ROUTE_1_UNKNOWN11 | R300_RS_ROUTE_1_COLOR1 | (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT); - r300->hw.rr.cmd[R300_RR_ROUTE_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_COL_ADDR_SHIFT); + r300->hw.rr.cmd[R300_RR_INST_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); InputsRead &= ~FRAG_BIT_COL1; if (high_rr < 1) high_rr = 1; @@ -1717,7 +1717,7 @@ static void r500SetupRSUnit(GLcontext * ctx) /* Need at least one. This might still lock as the values are undefined... */ if (in_texcoords == 0 && col_interp_nr == 0) { - r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_COL_ADDR_SHIFT); + r300->hw.rr.cmd[R300_RR_INST_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); col_interp_nr++; } @@ -1726,7 +1726,7 @@ static void r500SetupRSUnit(GLcontext * ctx) | R300_HIRES_EN; assert(high_rr >= 0); - r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_ROUTE_0, high_rr + 1); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, high_rr + 1); r300->hw.rc.cmd[2] = 0xC0 | high_rr; if (InputsRead) -- cgit v1.2.3 From c02d1863d1bfa87c8c4fdd0c36f90245613d5bbd Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 29 Apr 2008 13:03:32 -0700 Subject: Add chip id 71D5 (RV530 M66) to radeon_chipset.h --- src/mesa/drivers/dri/radeon/radeon_chipset.h | 3 ++- src/mesa/drivers/dri/radeon/radeon_screen.c | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index dc1b8a9c8e..5ea8cff1bf 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -147,7 +147,8 @@ #define PCI_CHIP_RV410_5E4D 0x5E4D #define PCI_CHIP_RV410_5E4F 0x5E4F -#define PCI_CHIP_RV530_71C4 0x71C4 +#define PCI_CHIP_RV530_71C4 0x71C4 +#define PCI_CHIP_RV530_71D5 0x71D5 #define PCI_CHIP_RS350_7834 0x7834 #define PCI_CHIP_RS350_7835 0x7835 diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 1a1666ccfe..661ffd3a3d 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -691,6 +691,11 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) fprintf(stderr, "Warning, R520 detected, 3D HAHAHAHAHA!!.\n"); break; + case PCI_CHIP_RV530_71D5: + screen->chip_family = CHIP_FAMILY_R520; + fprintf(stderr, "Warning, RV530 detected, all your base belong to us\n"); + break; + default: fprintf(stderr, "unknown chip id 0x%x, can't guess.\n", dri_priv->deviceID); -- cgit v1.2.3 From 6e96ea535a8fe4d2487fed27c06feaeef449470d Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 29 Apr 2008 13:04:39 -0700 Subject: Initial r5xx fragment program compiler support. Includes fallback shader and a handful of working opcodes. --- src/mesa/drivers/dri/r300/r300_context.h | 48 + src/mesa/drivers/dri/r300/r300_fragprog.c | 1 + src/mesa/drivers/dri/r300/r300_render.c | 25 +- src/mesa/drivers/dri/r300/r300_state.c | 24 +- src/mesa/drivers/dri/r300/r500_fragprog.c | 2444 +++-------------------------- src/mesa/drivers/dri/r300/r500_fragprog.h | 6 +- 6 files changed, 326 insertions(+), 2222 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 45dafd6bcc..bb5f5c35f0 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -774,6 +774,54 @@ struct r300_fragment_program { GLuint optimization; }; +struct r500_fragment_program { + struct gl_fragment_program mesa_program; + + GLcontext *ctx; + GLboolean translated; + GLboolean error; + struct r300_pfs_compile_state *cs; + + struct { + GLuint inst0; + GLuint inst1; + GLuint inst2; + GLuint inst3; + GLuint inst4; + GLuint inst5; + } inst[512]; + /* TODO: This is magic! */ + + struct { + int tex_offset; + int tex_end; + int alu_offset; + int alu_end; + int flags; + } node[4]; + int cur_node; + int first_node_has_tex; + + int alu_offset; + int alu_end; + int tex_offset; + int tex_end; + + /* Hardware constants. + * Contains a pointer to the value. The destination of the pointer + * is supposed to be updated when GL state changes. + * Typically, this is either a pointer into + * gl_program_parameter_list::ParameterValues, or a pointer to a + * global constant (e.g. for sin/cos-approximation) + */ + const GLfloat *constant[PFS_NUM_CONST_REGS]; + int const_nr; + + int max_temp_idx; + + GLuint optimization; +}; + #define R300_MAX_AOS_ARRAYS 16 #define REG_COORDS 0 diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index c664fb6562..5ba2971fb9 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -2217,6 +2217,7 @@ static void update_params(struct r300_fragment_program *fp) void r300TranslateFragmentShader(r300ContextPtr r300, struct r300_fragment_program *fp) { + struct r300_pfs_compile_state *cs = NULL; if (!fp->translated) { diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index eee1e803a0..fc07105c56 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -334,13 +334,26 @@ static GLboolean r300RunRender(GLcontext * ctx, static int r300Fallback(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_fragment_program *fp = (struct r300_fragment_program *) + /* Do we need to use new-style shaders? + * Also is there a better way to do this? */ + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + struct r500_fragment_program *fp = (struct r500_fragment_program *) (char *)ctx->FragmentProgram._Current; - - if (fp) { - if (!fp->translated) - r300TranslateFragmentShader(r300, fp); - FALLBACK_IF(!fp->translated); + if (fp) { + if (!fp->translated) { + r500TranslateFragmentShader(r300, fp); + FALLBACK_IF(!fp->translated); + } + } + } else { + struct r300_fragment_program *fp = (struct r300_fragment_program *) + (char *)ctx->FragmentProgram._Current; + if (fp) { + if (!fp->translated) { + r300TranslateFragmentShader(r300, fp); + FALLBACK_IF(!fp->translated); + } + } } FALLBACK_IF(ctx->RenderMode != GL_RENDER); diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 8f12266a5f..0ffa5bfd75 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2315,15 +2315,32 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) static void r500SetupPixelShader(r300ContextPtr rmesa) { GLcontext *ctx = rmesa->radeon.glCtx; - struct r300_fragment_program *fp = (struct r300_fragment_program *) + struct r500_fragment_program *fp = (struct r500_fragment_program *) (char *)ctx->FragmentProgram._Current; int i, k; - if (!fp) /* should only happenen once, just after context is created */ + if (!fp) /* should only happen once, just after context is created */ return; + r500TranslateFragmentShader(rmesa, fp); + if (!fp->translated) { + fprintf(stderr, "%s: No valid fragment shader, exiting\n", + __FUNCTION__); + return; + } + /* emit the standard zero shader */ R300_STATECHANGE(rmesa, r500fp); + /* Moar magic... */ + for (i = 0; i < fp->cs->nrslots; i++) { + rmesa->hw.r500fp.cmd[i*6+1] = fp->inst[i].inst0; + rmesa->hw.r500fp.cmd[i*6+2] = fp->inst[i].inst1; + rmesa->hw.r500fp.cmd[i*6+3] = fp->inst[i].inst2; + rmesa->hw.r500fp.cmd[i*6+4] = fp->inst[i].inst3; + rmesa->hw.r500fp.cmd[i*6+5] = fp->inst[i].inst4; + rmesa->hw.r500fp.cmd[i*6+6] = fp->inst[i].inst5; + } +#if 0 i = 1; rmesa->hw.r500fp.cmd[i++] = 0x7808; rmesa->hw.r500fp.cmd[i++] = R500_TEX_ID(0) | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; @@ -2383,8 +2400,9 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 | R500_ALU_RGBA_A_SWIZ_0; +#endif - bump_r500fp_count(rmesa->hw.r500fp.cmd, 12); + bump_r500fp_count(rmesa->hw.r500fp.cmd, i * 6); R300_STATECHANGE(rmesa, r500fp_const); for (i = 0; i < fp->const_nr; i++) { diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 3638a94380..b976637ee2 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -32,6 +32,8 @@ * * \author Jerome Glisse * + * \author Corbin Simpson + * * \todo Depth write, WPOS/FOGC inputs * * \todo FogOption @@ -48,12 +50,12 @@ #include "shader/prog_print.h" #include "r300_context.h" -#include "r300_fragprog.h" +#include "r500_fragprog.h" #include "r300_reg.h" #include "r300_state.h" /* - * Usefull macros and values + * Useful macros and values */ #define ERROR(fmt, args...) do { \ fprintf(stderr, "%s::%s(): " fmt "\n", \ @@ -61,2027 +63,272 @@ fp->error = GL_TRUE; \ } while(0) -#define PFS_INVAL 0xFFFFFFFF #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs -#define SWIZZLE_XYZ 0 -#define SWIZZLE_XXX 1 -#define SWIZZLE_YYY 2 -#define SWIZZLE_ZZZ 3 -#define SWIZZLE_WWW 4 -#define SWIZZLE_YZX 5 -#define SWIZZLE_ZXY 6 -#define SWIZZLE_WZY 7 -#define SWIZZLE_111 8 -#define SWIZZLE_000 9 -#define SWIZZLE_HHH 10 - -#define swizzle(r, x, y, z, w) do_swizzle(fp, r, \ - ((SWIZZLE_##x<<0)| \ - (SWIZZLE_##y<<3)| \ - (SWIZZLE_##z<<6)| \ - (SWIZZLE_##w<<9)), \ - 0) - -#define REG_TYPE_INPUT 0 -#define REG_TYPE_OUTPUT 1 -#define REG_TYPE_TEMP 2 -#define REG_TYPE_CONST 3 - -#define REG_TYPE_SHIFT 0 -#define REG_INDEX_SHIFT 2 -#define REG_VSWZ_SHIFT 8 -#define REG_SSWZ_SHIFT 13 -#define REG_NEGV_SHIFT 18 -#define REG_NEGS_SHIFT 19 -#define REG_ABS_SHIFT 20 -#define REG_NO_USE_SHIFT 21 // Hack for refcounting -#define REG_VALID_SHIFT 22 // Does the register contain a defined value? -#define REG_BUILTIN_SHIFT 23 // Is it a builtin (like all zero/all one)? - -#define REG_TYPE_MASK (0x03 << REG_TYPE_SHIFT) -#define REG_INDEX_MASK (0x3F << REG_INDEX_SHIFT) -#define REG_VSWZ_MASK (0x1F << REG_VSWZ_SHIFT) -#define REG_SSWZ_MASK (0x1F << REG_SSWZ_SHIFT) -#define REG_NEGV_MASK (0x01 << REG_NEGV_SHIFT) -#define REG_NEGS_MASK (0x01 << REG_NEGS_SHIFT) -#define REG_ABS_MASK (0x01 << REG_ABS_SHIFT) -#define REG_NO_USE_MASK (0x01 << REG_NO_USE_SHIFT) -#define REG_VALID_MASK (0x01 << REG_VALID_SHIFT) -#define REG_BUILTIN_MASK (0x01 << REG_BUILTIN_SHIFT) - -#define REG(type, index, vswz, sswz, nouse, valid, builtin) \ - (((type << REG_TYPE_SHIFT) & REG_TYPE_MASK) | \ - ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK) | \ - ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK) | \ - ((valid << REG_VALID_SHIFT) & REG_VALID_MASK) | \ - ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK) | \ - ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK) | \ - ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK)) -#define REG_GET_TYPE(reg) \ - ((reg & REG_TYPE_MASK) >> REG_TYPE_SHIFT) -#define REG_GET_INDEX(reg) \ - ((reg & REG_INDEX_MASK) >> REG_INDEX_SHIFT) -#define REG_GET_VSWZ(reg) \ - ((reg & REG_VSWZ_MASK) >> REG_VSWZ_SHIFT) -#define REG_GET_SSWZ(reg) \ - ((reg & REG_SSWZ_MASK) >> REG_SSWZ_SHIFT) -#define REG_GET_NO_USE(reg) \ - ((reg & REG_NO_USE_MASK) >> REG_NO_USE_SHIFT) -#define REG_GET_VALID(reg) \ - ((reg & REG_VALID_MASK) >> REG_VALID_SHIFT) -#define REG_GET_BUILTIN(reg) \ - ((reg & REG_BUILTIN_MASK) >> REG_BUILTIN_SHIFT) -#define REG_SET_TYPE(reg, type) \ - reg = ((reg & ~REG_TYPE_MASK) | \ - ((type << REG_TYPE_SHIFT) & REG_TYPE_MASK)) -#define REG_SET_INDEX(reg, index) \ - reg = ((reg & ~REG_INDEX_MASK) | \ - ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK)) -#define REG_SET_VSWZ(reg, vswz) \ - reg = ((reg & ~REG_VSWZ_MASK) | \ - ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK)) -#define REG_SET_SSWZ(reg, sswz) \ - reg = ((reg & ~REG_SSWZ_MASK) | \ - ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK)) -#define REG_SET_NO_USE(reg, nouse) \ - reg = ((reg & ~REG_NO_USE_MASK) | \ - ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK)) -#define REG_SET_VALID(reg, valid) \ - reg = ((reg & ~REG_VALID_MASK) | \ - ((valid << REG_VALID_SHIFT) & REG_VALID_MASK)) -#define REG_SET_BUILTIN(reg, builtin) \ - reg = ((reg & ~REG_BUILTIN_MASK) | \ - ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK)) -#define REG_ABS(reg) \ - reg = (reg | REG_ABS_MASK) -#define REG_NEGV(reg) \ - reg = (reg | REG_NEGV_MASK) -#define REG_NEGS(reg) \ - reg = (reg | REG_NEGS_MASK) - -/* - * Datas structures for fragment program generation - */ - -/* description of r300 native hw instructions */ -static const struct { - const char *name; - int argc; - int v_op; - int s_op; -} r300_fpop[] = { - /* *INDENT-OFF* */ - {"MAD", 3, R300_FPI0_OUTC_MAD, R300_FPI2_OUTA_MAD}, - {"DP3", 2, R300_FPI0_OUTC_DP3, R300_FPI2_OUTA_DP4}, - {"DP4", 2, R300_FPI0_OUTC_DP4, R300_FPI2_OUTA_DP4}, - {"MIN", 2, R300_FPI0_OUTC_MIN, R300_FPI2_OUTA_MIN}, - {"MAX", 2, R300_FPI0_OUTC_MAX, R300_FPI2_OUTA_MAX}, - {"CMP", 3, R300_FPI0_OUTC_CMP, R300_FPI2_OUTA_CMP}, - {"FRC", 1, R300_FPI0_OUTC_FRC, R300_FPI2_OUTA_FRC}, - {"EX2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_EX2}, - {"LG2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_LG2}, - {"RCP", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RCP}, - {"RSQ", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RSQ}, - {"REPL_ALPHA", 1, R300_FPI0_OUTC_REPL_ALPHA, PFS_INVAL}, - {"CMPH", 3, R300_FPI0_OUTC_CMPH, PFS_INVAL}, - /* *INDENT-ON* */ -}; - -/* vector swizzles r300 can support natively, with a couple of - * cases we handle specially - * - * REG_VSWZ/REG_SSWZ is an index into this table - */ - -/* mapping from SWIZZLE_* to r300 native values for scalar insns */ -#define SWIZZLE_HALF 6 - -#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \ - SWIZZLE_##y, \ - SWIZZLE_##z, \ - SWIZZLE_ZERO)) -/* native swizzles */ -static const struct r300_pfs_swizzle { - GLuint hash; /* swizzle value this matches */ - GLuint base; /* base value for hw swizzle */ - GLuint stride; /* difference in base between arg0/1/2 */ - GLuint flags; -} v_swiz[] = { - /* *INDENT-OFF* */ - {MAKE_SWZ3(X, Y, Z), R300_FPI0_ARGC_SRC0C_XYZ, 4, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(X, X, X), R300_FPI0_ARGC_SRC0C_XXX, 4, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(Y, Y, Y), R300_FPI0_ARGC_SRC0C_YYY, 4, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(Z, Z, Z), R300_FPI0_ARGC_SRC0C_ZZZ, 4, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(W, W, W), R300_FPI0_ARGC_SRC0A, 1, SLOT_SRC_SCALAR}, - {MAKE_SWZ3(Y, Z, X), R300_FPI0_ARGC_SRC0C_YZX, 1, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(Z, X, Y), R300_FPI0_ARGC_SRC0C_ZXY, 1, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(W, Z, Y), R300_FPI0_ARGC_SRC0CA_WZY, 1, SLOT_SRC_BOTH}, - {MAKE_SWZ3(ONE, ONE, ONE), R300_FPI0_ARGC_ONE, 0, 0}, - {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_FPI0_ARGC_ZERO, 0, 0}, - {MAKE_SWZ3(HALF, HALF, HALF), R300_FPI0_ARGC_HALF, 0, 0}, - {PFS_INVAL, 0, 0, 0}, - /* *INDENT-ON* */ -}; - -/* used during matching of non-native swizzles */ -#define SWZ_X_MASK (7 << 0) -#define SWZ_Y_MASK (7 << 3) -#define SWZ_Z_MASK (7 << 6) -#define SWZ_W_MASK (7 << 9) -static const struct { - GLuint hash; /* used to mask matching swizzle components */ - int mask; /* actual outmask */ - int count; /* count of components matched */ -} s_mask[] = { - /* *INDENT-OFF* */ - {SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK, 1 | 2 | 4, 3}, - {SWZ_X_MASK | SWZ_Y_MASK, 1 | 2, 2}, - {SWZ_X_MASK | SWZ_Z_MASK, 1 | 4, 2}, - {SWZ_Y_MASK | SWZ_Z_MASK, 2 | 4, 2}, - {SWZ_X_MASK, 1, 1}, - {SWZ_Y_MASK, 2, 1}, - {SWZ_Z_MASK, 4, 1}, - {PFS_INVAL, PFS_INVAL, PFS_INVAL} - /* *INDENT-ON* */ -}; - -static const struct { - int base; /* hw value of swizzle */ - int stride; /* difference between SRC0/1/2 */ - GLuint flags; -} s_swiz[] = { - /* *INDENT-OFF* */ - {R300_FPI2_ARGA_SRC0C_X, 3, SLOT_SRC_VECTOR}, - {R300_FPI2_ARGA_SRC0C_Y, 3, SLOT_SRC_VECTOR}, - {R300_FPI2_ARGA_SRC0C_Z, 3, SLOT_SRC_VECTOR}, - {R300_FPI2_ARGA_SRC0A, 1, SLOT_SRC_SCALAR}, - {R300_FPI2_ARGA_ZERO, 0, 0}, - {R300_FPI2_ARGA_ONE, 0, 0}, - {R300_FPI2_ARGA_HALF, 0, 0} - /* *INDENT-ON* */ -}; - -/* boiler-plate reg, for convenience */ -static const GLuint undef = REG(REG_TYPE_TEMP, - 0, - SWIZZLE_XYZ, - SWIZZLE_W, - GL_FALSE, - GL_FALSE, - GL_FALSE); - -/* constant one source */ -static const GLuint pfs_one = REG(REG_TYPE_CONST, - 0, - SWIZZLE_111, - SWIZZLE_ONE, - GL_FALSE, - GL_TRUE, - GL_TRUE); - -/* constant half source */ -static const GLuint pfs_half = REG(REG_TYPE_CONST, - 0, - SWIZZLE_HHH, - SWIZZLE_HALF, - GL_FALSE, - GL_TRUE, - GL_TRUE); - -/* constant zero source */ -static const GLuint pfs_zero = REG(REG_TYPE_CONST, - 0, - SWIZZLE_000, - SWIZZLE_ZERO, - GL_FALSE, - GL_TRUE, - GL_TRUE); - -/* - * Common functions prototypes - */ -static void dump_program(struct r300_fragment_program *fp); -static void emit_arith(struct r300_fragment_program *fp, int op, - GLuint dest, int mask, - GLuint src0, GLuint src1, GLuint src2, int flags); - -/** - * Get an R300 temporary that can be written to in the given slot. - */ -static int get_hw_temp(struct r300_fragment_program *fp, int slot) -{ - COMPILE_STATE; - int r; - - for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) { - if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= slot) - break; - } - - if (r >= PFS_NUM_TEMP_REGS) { - ERROR("Out of hardware temps\n"); - return 0; - } - // Reserved is used to avoid the following scenario: - // R300 temporary X is first assigned to Mesa temporary Y during vector ops - // R300 temporary X is then assigned to Mesa temporary Z for further vector ops - // Then scalar ops on Mesa temporary Z are emitted and move back in time - // to overwrite the value of temporary Y. - // End scenario. - cs->hwtemps[r].reserved = cs->hwtemps[r].free; - cs->hwtemps[r].free = -1; - - // Reset to some value that won't mess things up when the user - // tries to read from a temporary that hasn't been assigned a value yet. - // In the normal case, vector_valid and scalar_valid should be set to - // a sane value by the first emit that writes to this temporary. - cs->hwtemps[r].vector_valid = 0; - cs->hwtemps[r].scalar_valid = 0; - - if (r > fp->max_temp_idx) - fp->max_temp_idx = r; - - return r; -} - -/** - * Get an R300 temporary that will act as a TEX destination register. - */ -static int get_hw_temp_tex(struct r300_fragment_program *fp) -{ - COMPILE_STATE; - int r; - - for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) { - if (cs->used_in_node & (1 << r)) - continue; - - // Note: Be very careful here - if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= 0) - break; - } - - if (r >= PFS_NUM_TEMP_REGS) - return get_hw_temp(fp, 0); /* Will cause an indirection */ - - cs->hwtemps[r].reserved = cs->hwtemps[r].free; - cs->hwtemps[r].free = -1; - - // Reset to some value that won't mess things up when the user - // tries to read from a temporary that hasn't been assigned a value yet. - // In the normal case, vector_valid and scalar_valid should be set to - // a sane value by the first emit that writes to this temporary. - cs->hwtemps[r].vector_valid = cs->nrslots; - cs->hwtemps[r].scalar_valid = cs->nrslots; - - if (r > fp->max_temp_idx) - fp->max_temp_idx = r; - - return r; -} - -/** - * Mark the given hardware register as free. - */ -static void free_hw_temp(struct r300_fragment_program *fp, int idx) -{ - COMPILE_STATE; - - // Be very careful here. Consider sequences like - // MAD r0, r1,r2,r3 - // TEX r4, ... - // The TEX instruction may be moved in front of the MAD instruction - // due to the way nodes work. We don't want to alias r1 and r4 in - // this case. - // I'm certain the register allocation could be further sanitized, - // but it's tricky because of stuff that can happen inside emit_tex - // and emit_arith. - cs->hwtemps[idx].free = cs->nrslots + 1; -} - -/** - * Create a new Mesa temporary register. - */ -static GLuint get_temp_reg(struct r300_fragment_program *fp) -{ - COMPILE_STATE; - GLuint r = undef; - GLuint index; - - index = ffs(~cs->temp_in_use); - if (!index) { - ERROR("Out of program temps\n"); - return r; - } - - cs->temp_in_use |= (1 << --index); - cs->temps[index].refcount = 0xFFFFFFFF; - cs->temps[index].reg = -1; - - REG_SET_TYPE(r, REG_TYPE_TEMP); - REG_SET_INDEX(r, index); - REG_SET_VALID(r, GL_TRUE); - return r; +/* "Register" flags */ +#define REG_CONSTANT (1 << 8) + +/* Swizzle tools */ +#define R500_SWIZZLE_ZERO 4 +#define R500_SWIZZLE_HALF 5 +#define R500_SWIZZLE_ONE 6 +#define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6)) +/* Swizzles for inst3 */ +#define MAKE_SWIZ_RGB_A(x) (x << 2) +#define MAKE_SWIZ_RGB_B(x) (x << 15) +/* Swizzles for inst4 */ +#define MAKE_SWIZ_ALPHA_A(x) (x << 14) +#define MAKE_SWIZ_ALPHA_B(x) (x << 21) +/* Swizzle for inst5 */ +#define MAKE_SWIZ_RGBA_C(x) (x << 14) +#define MAKE_SWIZ_ALPHA_C(x) (x << 27) + +static inline GLuint make_rgb_swizzle(struct prog_src_register src) { + GLuint swiz = 0x0; + GLuint temp; + /* This could be optimized, but it should be plenty fast already. */ + for (int i = 0; i < 3; i++) { + temp = (src.Swizzle >> i*3) & 0x7; + /* Fix SWIZZLE_ONE */ + if (temp == 5) temp++; + swiz += temp << i*3; + } + return swiz; } -/** - * Create a new Mesa temporary register that will act as the destination - * register for a texture read. - */ -static GLuint get_temp_reg_tex(struct r300_fragment_program *fp) -{ - COMPILE_STATE; - GLuint r = undef; - GLuint index; - - index = ffs(~cs->temp_in_use); - if (!index) { - ERROR("Out of program temps\n"); - return r; - } - - cs->temp_in_use |= (1 << --index); - cs->temps[index].refcount = 0xFFFFFFFF; - cs->temps[index].reg = get_hw_temp_tex(fp); - - REG_SET_TYPE(r, REG_TYPE_TEMP); - REG_SET_INDEX(r, index); - REG_SET_VALID(r, GL_TRUE); - return r; +static inline GLuint make_alpha_swizzle(struct prog_src_register src) { + GLuint swiz = (src.Swizzle >> 12) & 0x7; + if (swiz == 5) swiz++; + return swiz; } -/** - * Free a Mesa temporary and the associated R300 temporary. - */ -static void free_temp(struct r300_fragment_program *fp, GLuint r) -{ - COMPILE_STATE; - GLuint index = REG_GET_INDEX(r); - - if (!(cs->temp_in_use & (1 << index))) - return; - - if (REG_GET_TYPE(r) == REG_TYPE_TEMP) { - free_hw_temp(fp, cs->temps[index].reg); - cs->temps[index].reg = -1; - cs->temp_in_use &= ~(1 << index); - } else if (REG_GET_TYPE(r) == REG_TYPE_INPUT) { - free_hw_temp(fp, cs->inputs[index].reg); - cs->inputs[index].reg = -1; - } -} - -/** - * Emit a hardware constant/parameter. - * - * \p cp Stable pointer to an array of 4 floats. - * The pointer must be stable in the sense that it remains to be valid - * and hold the contents of the constant/parameter throughout the lifetime - * of the fragment program (actually, up until the next time the fragment - * program is translated). - */ -static GLuint emit_const4fv(struct r300_fragment_program *fp, - const GLfloat * cp) -{ - GLuint reg = undef; - int index; - - for (index = 0; index < fp->const_nr; ++index) { - if (fp->constant[index] == cp) +static GLuint make_src(struct prog_src_register src) { + GLuint reg = src.Index; + switch (src.File) { + case PROGRAM_INPUT: + /* Ugly hack needed to work around Mesa; + * fragments don't get loaded right otherwise! */ + reg = 0x0; + break; + case PROGRAM_CONSTANT: + reg |= REG_CONSTANT; + break; + default: + // ERROR("Can't handle src.File %x\n", src.File); break; } - - if (index >= fp->const_nr) { - if (index >= PFS_NUM_CONST_REGS) { - ERROR("Out of hw constants!\n"); - return reg; - } - - fp->const_nr++; - fp->constant[index] = cp; - } - - REG_SET_TYPE(reg, REG_TYPE_CONST); - REG_SET_INDEX(reg, index); - REG_SET_VALID(reg, GL_TRUE); return reg; } -static inline GLuint negate(GLuint r) -{ - REG_NEGS(r); - REG_NEGV(r); - return r; -} - -/* Hack, to prevent clobbering sources used multiple times when - * emulating non-native instructions - */ -static inline GLuint keep(GLuint r) -{ - REG_SET_NO_USE(r, GL_TRUE); - return r; -} - -static inline GLuint absolute(GLuint r) -{ - REG_ABS(r); - return r; -} - -static int swz_native(struct r300_fragment_program *fp, - GLuint src, GLuint * r, GLuint arbneg) -{ - /* Native swizzle, handle negation */ - src = (src & ~REG_NEGS_MASK) | (((arbneg >> 3) & 1) << REG_NEGS_SHIFT); - - if ((arbneg & 0x7) == 0x0) { - src = src & ~REG_NEGV_MASK; - *r = src; - } else if ((arbneg & 0x7) == 0x7) { - src |= REG_NEGV_MASK; - *r = src; - } else { - if (!REG_GET_VALID(*r)) - *r = get_temp_reg(fp); - src |= REG_NEGV_MASK; - emit_arith(fp, - PFS_OP_MAD, - *r, arbneg & 0x7, keep(src), pfs_one, pfs_zero, 0); - src = src & ~REG_NEGV_MASK; - emit_arith(fp, - PFS_OP_MAD, - *r, - (arbneg ^ 0x7) | WRITEMASK_W, - src, pfs_one, pfs_zero, 0); - } - - return 3; -} - -static int swz_emit_partial(struct r300_fragment_program *fp, - GLuint src, - GLuint * r, int mask, int mc, GLuint arbneg) -{ - GLuint tmp; - GLuint wmask = 0; - - if (!REG_GET_VALID(*r)) - *r = get_temp_reg(fp); - - /* A partial match, VSWZ/mask define what parts of the - * desired swizzle we match - */ - if (mc + s_mask[mask].count == 3) { - wmask = WRITEMASK_W; - src |= ((arbneg >> 3) & 1) << REG_NEGS_SHIFT; - } - - tmp = arbneg & s_mask[mask].mask; - if (tmp) { - tmp = tmp ^ s_mask[mask].mask; - if (tmp) { - emit_arith(fp, - PFS_OP_MAD, - *r, - arbneg & s_mask[mask].mask, - keep(src) | REG_NEGV_MASK, - pfs_one, pfs_zero, 0); - if (!wmask) { - REG_SET_NO_USE(src, GL_TRUE); - } else { - REG_SET_NO_USE(src, GL_FALSE); - } - emit_arith(fp, - PFS_OP_MAD, - *r, tmp | wmask, src, pfs_one, pfs_zero, 0); - } else { - if (!wmask) { - REG_SET_NO_USE(src, GL_TRUE); - } else { - REG_SET_NO_USE(src, GL_FALSE); - } - emit_arith(fp, - PFS_OP_MAD, - *r, - (arbneg & s_mask[mask].mask) | wmask, - src | REG_NEGV_MASK, pfs_one, pfs_zero, 0); - } - } else { - if (!wmask) { - REG_SET_NO_USE(src, GL_TRUE); - } else { - REG_SET_NO_USE(src, GL_FALSE); - } - emit_arith(fp, PFS_OP_MAD, - *r, - s_mask[mask].mask | wmask, - src, pfs_one, pfs_zero, 0); - } - - return s_mask[mask].count; -} - -static GLuint do_swizzle(struct r300_fragment_program *fp, - GLuint src, GLuint arbswz, GLuint arbneg) -{ - GLuint r = undef; - GLuint vswz; - int c_mask = 0; - int v_match = 0; - - /* If swizzling from something without an XYZW native swizzle, - * emit result to a temp, and do new swizzle from the temp. - */ -#if 0 - if (REG_GET_VSWZ(src) != SWIZZLE_XYZ || REG_GET_SSWZ(src) != SWIZZLE_W) { - GLuint temp = get_temp_reg(fp); - emit_arith(fp, - PFS_OP_MAD, - temp, WRITEMASK_XYZW, src, pfs_one, pfs_zero, 0); - src = temp; - } -#endif - - if (REG_GET_VSWZ(src) != SWIZZLE_XYZ || REG_GET_SSWZ(src) != SWIZZLE_W) { - GLuint vsrcswz = - (v_swiz[REG_GET_VSWZ(src)]. - hash & (SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK)) | - REG_GET_SSWZ(src) << 9; - GLint i; - - GLuint newswz = 0; - GLuint offset; - for (i = 0; i < 4; ++i) { - offset = GET_SWZ(arbswz, i); - - newswz |= - (offset <= 3) ? GET_SWZ(vsrcswz, - offset) << i * - 3 : offset << i * 3; - } - - arbswz = newswz & (SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK); - REG_SET_SSWZ(src, GET_SWZ(newswz, 3)); - } else { - /* set scalar swizzling */ - REG_SET_SSWZ(src, GET_SWZ(arbswz, 3)); - - } - do { - vswz = REG_GET_VSWZ(src); - do { - int chash; - - REG_SET_VSWZ(src, vswz); - chash = v_swiz[REG_GET_VSWZ(src)].hash & - s_mask[c_mask].hash; - - if (chash == (arbswz & s_mask[c_mask].hash)) { - if (s_mask[c_mask].count == 3) { - v_match += swz_native(fp, - src, &r, arbneg); - } else { - v_match += swz_emit_partial(fp, - src, - &r, - c_mask, - v_match, - arbneg); - } - - if (v_match == 3) - return r; - - /* Fill with something invalid.. all 0's was - * wrong before, matched SWIZZLE_X. So all - * 1's will be okay for now - */ - arbswz |= (PFS_INVAL & s_mask[c_mask].hash); - } - } while (v_swiz[++vswz].hash != PFS_INVAL); - REG_SET_VSWZ(src, SWIZZLE_XYZ); - } while (s_mask[++c_mask].hash != PFS_INVAL); - - ERROR("should NEVER get here\n"); - return r; -} - -static GLuint t_src(struct r300_fragment_program *fp, - struct prog_src_register fpsrc) -{ - GLuint r = undef; - - switch (fpsrc.File) { - case PROGRAM_TEMPORARY: - REG_SET_INDEX(r, fpsrc.Index); - REG_SET_VALID(r, GL_TRUE); - REG_SET_TYPE(r, REG_TYPE_TEMP); - break; - case PROGRAM_INPUT: - REG_SET_INDEX(r, fpsrc.Index); - REG_SET_VALID(r, GL_TRUE); - REG_SET_TYPE(r, REG_TYPE_INPUT); - break; - case PROGRAM_LOCAL_PARAM: - r = emit_const4fv(fp, - fp->mesa_program.Base.LocalParams[fpsrc. - Index]); - break; - case PROGRAM_ENV_PARAM: - r = emit_const4fv(fp, - fp->ctx->FragmentProgram.Parameters[fpsrc. - Index]); - break; - case PROGRAM_STATE_VAR: - case PROGRAM_NAMED_PARAM: - r = emit_const4fv(fp, - fp->mesa_program.Base.Parameters-> - ParameterValues[fpsrc.Index]); - break; - default: - ERROR("unknown SrcReg->File %x\n", fpsrc.File); - return r; - } - - /* no point swizzling ONE/ZERO/HALF constants... */ - if (REG_GET_VSWZ(r) < SWIZZLE_111 || REG_GET_SSWZ(r) < SWIZZLE_ZERO) - r = do_swizzle(fp, r, fpsrc.Swizzle, fpsrc.NegateBase); - return r; -} - -static GLuint t_scalar_src(struct r300_fragment_program *fp, - struct prog_src_register fpsrc) -{ - struct prog_src_register src = fpsrc; - int sc = GET_SWZ(fpsrc.Swizzle, 0); /* X */ - - src.Swizzle = ((sc << 0) | (sc << 3) | (sc << 6) | (sc << 9)); - - return t_src(fp, src); -} - -static GLuint t_dst(struct r300_fragment_program *fp, - struct prog_dst_register dest) -{ - GLuint r = undef; - +static GLuint make_dest(struct prog_dst_register dest) { + GLuint reg = dest.Index; switch (dest.File) { - case PROGRAM_TEMPORARY: - REG_SET_INDEX(r, dest.Index); - REG_SET_VALID(r, GL_TRUE); - REG_SET_TYPE(r, REG_TYPE_TEMP); - return r; - case PROGRAM_OUTPUT: - REG_SET_TYPE(r, REG_TYPE_OUTPUT); - switch (dest.Index) { - case FRAG_RESULT_COLR: - case FRAG_RESULT_DEPR: - REG_SET_INDEX(r, dest.Index); - REG_SET_VALID(r, GL_TRUE); - return r; - default: - ERROR("Bad DstReg->Index 0x%x\n", dest.Index); - return r; - } - default: - ERROR("Bad DstReg->File 0x%x\n", dest.File); - return r; - } -} - -static int t_hw_src(struct r300_fragment_program *fp, GLuint src, GLboolean tex) -{ - COMPILE_STATE; - int idx; - int index = REG_GET_INDEX(src); - - switch (REG_GET_TYPE(src)) { - case REG_TYPE_TEMP: - /* NOTE: if reg==-1 here, a source is being read that - * hasn't been written to. Undefined results. - */ - if (cs->temps[index].reg == -1) - cs->temps[index].reg = get_hw_temp(fp, cs->nrslots); - - idx = cs->temps[index].reg; - - if (!REG_GET_NO_USE(src) && (--cs->temps[index].refcount == 0)) - free_temp(fp, src); - break; - case REG_TYPE_INPUT: - idx = cs->inputs[index].reg; - - if (!REG_GET_NO_USE(src) && (--cs->inputs[index].refcount == 0)) - free_hw_temp(fp, cs->inputs[index].reg); - break; - case REG_TYPE_CONST: - return (index | SRC_CONST); - default: - ERROR("Invalid type for source reg\n"); - return (0 | SRC_CONST); - } - - if (!tex) - cs->used_in_node |= (1 << idx); - - return idx; -} - -static int t_hw_dst(struct r300_fragment_program *fp, - GLuint dest, GLboolean tex, int slot) -{ - COMPILE_STATE; - int idx; - GLuint index = REG_GET_INDEX(dest); - assert(REG_GET_VALID(dest)); - - switch (REG_GET_TYPE(dest)) { - case REG_TYPE_TEMP: - if (cs->temps[REG_GET_INDEX(dest)].reg == -1) { - if (!tex) { - cs->temps[index].reg = get_hw_temp(fp, slot); - } else { - cs->temps[index].reg = get_hw_temp_tex(fp); - } - } - idx = cs->temps[index].reg; - - if (!REG_GET_NO_USE(dest) && (--cs->temps[index].refcount == 0)) - free_temp(fp, dest); - - cs->dest_in_node |= (1 << idx); - cs->used_in_node |= (1 << idx); - break; - case REG_TYPE_OUTPUT: - switch (index) { - case FRAG_RESULT_COLR: - fp->node[fp->cur_node].flags |= - R300_PFS_NODE_OUTPUT_COLOR; + case PROGRAM_OUTPUT: + /* Eventually we may need to handle multiple + * rendering targets... */ break; - case FRAG_RESULT_DEPR: - fp->node[fp->cur_node].flags |= - R300_PFS_NODE_OUTPUT_DEPTH; + case PROGRAM_CONSTANT: + reg |= REG_CONSTANT; break; - } - return index; - break; - default: - ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest)); - return 0; - } - - return idx; -} - -static void emit_nop(struct r300_fragment_program *fp) -{ - COMPILE_STATE; - - if (cs->nrslots >= PFS_MAX_ALU_INST) { - ERROR("Out of ALU instruction slots\n"); - return; - } - - fp->alu.inst[cs->nrslots].inst0 = NOP_INST0; - fp->alu.inst[cs->nrslots].inst1 = NOP_INST1; - fp->alu.inst[cs->nrslots].inst2 = NOP_INST2; - fp->alu.inst[cs->nrslots].inst3 = NOP_INST3; - cs->nrslots++; -} - -static void emit_tex(struct r300_fragment_program *fp, - struct prog_instruction *fpi, int opcode) -{ - COMPILE_STATE; - GLuint coord = t_src(fp, fpi->SrcReg[0]); - GLuint dest = undef, rdest = undef; - GLuint din, uin; - int unit = fpi->TexSrcUnit; - int hwsrc, hwdest; - GLuint tempreg = 0; - - uin = cs->used_in_node; - din = cs->dest_in_node; - - /* Resolve source/dest to hardware registers */ - if (opcode != R300_FPITX_OP_KIL) { - if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) { - /** - * Hardware uses [0..1]x[0..1] range for rectangle textures - * instead of [0..Width]x[0..Height]. - * Add a scaling instruction. - * - * \todo Refactor this once we have proper rewriting/optimization - * support for programs. - */ - gl_state_index tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, - 0 - }; - int factor_index; - GLuint factorreg; - - tokens[2] = unit; - factor_index = - _mesa_add_state_reference(fp->mesa_program.Base. - Parameters, tokens); - factorreg = - emit_const4fv(fp, - fp->mesa_program.Base.Parameters-> - ParameterValues[factor_index]); - tempreg = keep(get_temp_reg(fp)); - - emit_arith(fp, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, - coord, factorreg, pfs_zero, 0); - - /* Ensure correct node indirection */ - uin = cs->used_in_node; - din = cs->dest_in_node; - - hwsrc = t_hw_src(fp, tempreg, GL_TRUE); - } else { - hwsrc = t_hw_src(fp, coord, GL_TRUE); - } - - dest = t_dst(fp, fpi->DstReg); - - /* r300 doesn't seem to be able to do TEX->output reg */ - if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { - rdest = dest; - dest = get_temp_reg_tex(fp); - } else if (fpi->DstReg.WriteMask != WRITEMASK_XYZW) { - /* in case write mask isn't XYZW */ - rdest = dest; - dest = get_temp_reg_tex(fp); - } - hwdest = - t_hw_dst(fp, dest, GL_TRUE, - fp->node[fp->cur_node].alu_offset); - - /* Use a temp that hasn't been used in this node, rather - * than causing an indirection - */ - if (uin & (1 << hwdest)) { - free_hw_temp(fp, hwdest); - hwdest = get_hw_temp_tex(fp); - cs->temps[REG_GET_INDEX(dest)].reg = hwdest; - } - } else { - hwdest = 0; - unit = 0; - hwsrc = t_hw_src(fp, coord, GL_TRUE); - } - - /* Indirection if source has been written in this node, or if the - * dest has been read/written in this node - */ - if ((REG_GET_TYPE(coord) != REG_TYPE_CONST && - (din & (1 << hwsrc))) || (uin & (1 << hwdest))) { - - /* Finish off current node */ - if (fp->node[fp->cur_node].alu_offset == cs->nrslots) - emit_nop(fp); - - fp->node[fp->cur_node].alu_end = - cs->nrslots - fp->node[fp->cur_node].alu_offset - 1; - assert(fp->node[fp->cur_node].alu_end >= 0); - - if (++fp->cur_node >= PFS_MAX_TEX_INDIRECT) { - ERROR("too many levels of texture indirection\n"); - return; - } - - /* Start new node */ - fp->node[fp->cur_node].tex_offset = fp->tex.length; - fp->node[fp->cur_node].alu_offset = cs->nrslots; - fp->node[fp->cur_node].tex_end = -1; - fp->node[fp->cur_node].alu_end = -1; - fp->node[fp->cur_node].flags = 0; - cs->used_in_node = 0; - cs->dest_in_node = 0; - } - - if (fp->cur_node == 0) - fp->first_node_has_tex = 1; - - fp->tex.inst[fp->tex.length++] = 0 | (hwsrc << R300_FPITX_SRC_SHIFT) - | (hwdest << R300_FPITX_DST_SHIFT) - | (unit << R300_FPITX_IMAGE_SHIFT) - /* not entirely sure about this */ - | (opcode << R300_FPITX_OPCODE_SHIFT); - - cs->dest_in_node |= (1 << hwdest); - if (REG_GET_TYPE(coord) != REG_TYPE_CONST) - cs->used_in_node |= (1 << hwsrc); - - fp->node[fp->cur_node].tex_end++; - - /* Copy from temp to output if needed */ - if (REG_GET_VALID(rdest)) { - emit_arith(fp, PFS_OP_MAD, rdest, fpi->DstReg.WriteMask, dest, - pfs_one, pfs_zero, 0); - free_temp(fp, dest); - } - - /* Free temp register */ - if (tempreg != 0) - free_temp(fp, tempreg); -} - -/** - * Returns the first slot where we could possibly allow writing to dest, - * according to register allocation. - */ -static int get_earliest_allowed_write(struct r300_fragment_program *fp, - GLuint dest, int mask) -{ - COMPILE_STATE; - int idx; - int pos; - GLuint index = REG_GET_INDEX(dest); - assert(REG_GET_VALID(dest)); - - switch (REG_GET_TYPE(dest)) { - case REG_TYPE_TEMP: - if (cs->temps[index].reg == -1) - return 0; - - idx = cs->temps[index].reg; - break; - case REG_TYPE_OUTPUT: - return 0; - default: - ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest)); - return 0; - } - - pos = cs->hwtemps[idx].reserved; - if (mask & WRITEMASK_XYZ) { - if (pos < cs->hwtemps[idx].vector_lastread) - pos = cs->hwtemps[idx].vector_lastread; - } - if (mask & WRITEMASK_W) { - if (pos < cs->hwtemps[idx].scalar_lastread) - pos = cs->hwtemps[idx].scalar_lastread; - } - - return pos; -} - -/** - * Allocates a slot for an ALU instruction that can consist of - * a vertex part or a scalar part or both. - * - * Sources from src (src[0] to src[argc-1]) are added to the slot in the - * appropriate position (vector and/or scalar), and their positions are - * recorded in the srcpos array. - * - * This function emits instruction code for the source fetch and the - * argument selection. It does not emit instruction code for the - * opcode or the destination selection. - * - * @return the index of the slot - */ -static int find_and_prepare_slot(struct r300_fragment_program *fp, - GLboolean emit_vop, - GLboolean emit_sop, - int argc, GLuint * src, GLuint dest, int mask) -{ - COMPILE_STATE; - int hwsrc[3]; - int srcpos[3]; - unsigned int used; - int tempused; - int tempvsrc[3]; - int tempssrc[3]; - int pos; - int regnr; - int i, j; - - // Determine instruction slots, whether sources are required on - // vector or scalar side, and the smallest slot number where - // all source registers are available - used = 0; - if (emit_vop) - used |= SLOT_OP_VECTOR; - if (emit_sop) - used |= SLOT_OP_SCALAR; - - pos = get_earliest_allowed_write(fp, dest, mask); - - if (fp->node[fp->cur_node].alu_offset > pos) - pos = fp->node[fp->cur_node].alu_offset; - for (i = 0; i < argc; ++i) { - if (!REG_GET_BUILTIN(src[i])) { - if (emit_vop) - used |= v_swiz[REG_GET_VSWZ(src[i])].flags << i; - if (emit_sop) - used |= s_swiz[REG_GET_SSWZ(src[i])].flags << i; - } - - hwsrc[i] = t_hw_src(fp, src[i], GL_FALSE); /* Note: sideeffects wrt refcounting! */ - regnr = hwsrc[i] & 31; - - if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) { - if (used & (SLOT_SRC_VECTOR << i)) { - if (cs->hwtemps[regnr].vector_valid > pos) - pos = cs->hwtemps[regnr].vector_valid; - } - if (used & (SLOT_SRC_SCALAR << i)) { - if (cs->hwtemps[regnr].scalar_valid > pos) - pos = cs->hwtemps[regnr].scalar_valid; - } - } - } - - // Find a slot that fits - for (;; ++pos) { - if (cs->slot[pos].used & used & SLOT_OP_BOTH) - continue; - - if (pos >= cs->nrslots) { - if (cs->nrslots >= PFS_MAX_ALU_INST) { - ERROR("Out of ALU instruction slots\n"); - return -1; - } - - fp->alu.inst[pos].inst0 = NOP_INST0; - fp->alu.inst[pos].inst1 = NOP_INST1; - fp->alu.inst[pos].inst2 = NOP_INST2; - fp->alu.inst[pos].inst3 = NOP_INST3; - - cs->nrslots++; - } - // Note: When we need both parts (vector and scalar) of a source, - // we always try to put them into the same position. This makes the - // code easier to read, and it is optimal (i.e. one doesn't gain - // anything by splitting the parts). - // It also avoids headaches with swizzles that access both parts (i.e WXY) - tempused = cs->slot[pos].used; - for (i = 0; i < 3; ++i) { - tempvsrc[i] = cs->slot[pos].vsrc[i]; - tempssrc[i] = cs->slot[pos].ssrc[i]; - } - - for (i = 0; i < argc; ++i) { - int flags = (used >> i) & SLOT_SRC_BOTH; - - if (!flags) { - srcpos[i] = 0; - continue; - } - - for (j = 0; j < 3; ++j) { - if ((tempused >> j) & flags & SLOT_SRC_VECTOR) { - if (tempvsrc[j] != hwsrc[i]) - continue; - } - - if ((tempused >> j) & flags & SLOT_SRC_SCALAR) { - if (tempssrc[j] != hwsrc[i]) - continue; - } - - break; - } - - if (j == 3) - break; - - srcpos[i] = j; - tempused |= flags << j; - if (flags & SLOT_SRC_VECTOR) - tempvsrc[j] = hwsrc[i]; - if (flags & SLOT_SRC_SCALAR) - tempssrc[j] = hwsrc[i]; - } - - if (i == argc) + default: + // ERROR("Can't handle dest.File %x\n", dest.File); break; } - - // Found a slot, reserve it - cs->slot[pos].used = tempused | (used & SLOT_OP_BOTH); - for (i = 0; i < 3; ++i) { - cs->slot[pos].vsrc[i] = tempvsrc[i]; - cs->slot[pos].ssrc[i] = tempssrc[i]; - } - - for (i = 0; i < argc; ++i) { - if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) { - int regnr = hwsrc[i] & 31; - - if (used & (SLOT_SRC_VECTOR << i)) { - if (cs->hwtemps[regnr].vector_lastread < pos) - cs->hwtemps[regnr].vector_lastread = - pos; - } - if (used & (SLOT_SRC_SCALAR << i)) { - if (cs->hwtemps[regnr].scalar_lastread < pos) - cs->hwtemps[regnr].scalar_lastread = - pos; - } - } - } - - // Emit the source fetch code - fp->alu.inst[pos].inst1 &= ~R300_FPI1_SRC_MASK; - fp->alu.inst[pos].inst1 |= - ((cs->slot[pos].vsrc[0] << R300_FPI1_SRC0C_SHIFT) | - (cs->slot[pos].vsrc[1] << R300_FPI1_SRC1C_SHIFT) | - (cs->slot[pos].vsrc[2] << R300_FPI1_SRC2C_SHIFT)); - - fp->alu.inst[pos].inst3 &= ~R300_FPI3_SRC_MASK; - fp->alu.inst[pos].inst3 |= - ((cs->slot[pos].ssrc[0] << R300_FPI3_SRC0A_SHIFT) | - (cs->slot[pos].ssrc[1] << R300_FPI3_SRC1A_SHIFT) | - (cs->slot[pos].ssrc[2] << R300_FPI3_SRC2A_SHIFT)); - - // Emit the argument selection code - if (emit_vop) { - int swz[3]; - - for (i = 0; i < 3; ++i) { - if (i < argc) { - swz[i] = (v_swiz[REG_GET_VSWZ(src[i])].base + - (srcpos[i] * - v_swiz[REG_GET_VSWZ(src[i])]. - stride)) | ((src[i] & REG_NEGV_MASK) - ? ARG_NEG : 0) | ((src[i] - & - REG_ABS_MASK) - ? - ARG_ABS - : 0); - } else { - swz[i] = R300_FPI0_ARGC_ZERO; - } - } - - fp->alu.inst[pos].inst0 &= - ~(R300_FPI0_ARG0C_MASK | R300_FPI0_ARG1C_MASK | - R300_FPI0_ARG2C_MASK); - fp->alu.inst[pos].inst0 |= - (swz[0] << R300_FPI0_ARG0C_SHIFT) | (swz[1] << - R300_FPI0_ARG1C_SHIFT) - | (swz[2] << R300_FPI0_ARG2C_SHIFT); - } - - if (emit_sop) { - int swz[3]; - - for (i = 0; i < 3; ++i) { - if (i < argc) { - swz[i] = (s_swiz[REG_GET_SSWZ(src[i])].base + - (srcpos[i] * - s_swiz[REG_GET_SSWZ(src[i])]. - stride)) | ((src[i] & REG_NEGV_MASK) - ? ARG_NEG : 0) | ((src[i] - & - REG_ABS_MASK) - ? - ARG_ABS - : 0); - } else { - swz[i] = R300_FPI2_ARGA_ZERO; - } - } - - fp->alu.inst[pos].inst2 &= - ~(R300_FPI2_ARG0A_MASK | R300_FPI2_ARG1A_MASK | - R300_FPI2_ARG2A_MASK); - fp->alu.inst[pos].inst2 |= - (swz[0] << R300_FPI2_ARG0A_SHIFT) | (swz[1] << - R300_FPI2_ARG1A_SHIFT) - | (swz[2] << R300_FPI2_ARG2A_SHIFT); - } - - return pos; -} - -/** - * Append an ALU instruction to the instruction list. - */ -static void emit_arith(struct r300_fragment_program *fp, - int op, - GLuint dest, - int mask, - GLuint src0, GLuint src1, GLuint src2, int flags) -{ - COMPILE_STATE; - GLuint src[3] = { src0, src1, src2 }; - int hwdest; - GLboolean emit_vop, emit_sop; - int vop, sop, argc; - int pos; - - vop = r300_fpop[op].v_op; - sop = r300_fpop[op].s_op; - argc = r300_fpop[op].argc; - - if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT && - REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) { - if (mask & WRITEMASK_Z) { - mask = WRITEMASK_W; - } else { - return; - } - } - - emit_vop = GL_FALSE; - emit_sop = GL_FALSE; - if ((mask & WRITEMASK_XYZ) || vop == R300_FPI0_OUTC_DP3) - emit_vop = GL_TRUE; - if ((mask & WRITEMASK_W) || vop == R300_FPI0_OUTC_REPL_ALPHA) - emit_sop = GL_TRUE; - - pos = - find_and_prepare_slot(fp, emit_vop, emit_sop, argc, src, dest, - mask); - if (pos < 0) - return; - - hwdest = t_hw_dst(fp, dest, GL_FALSE, pos); /* Note: Side effects wrt register allocation */ - - if (flags & PFS_FLAG_SAT) { - vop |= R300_FPI0_OUTC_SAT; - sop |= R300_FPI2_OUTA_SAT; - } - - /* Throw the pieces together and get FPI0/1 */ - if (emit_vop) { - fp->alu.inst[pos].inst0 |= vop; - - fp->alu.inst[pos].inst1 |= hwdest << R300_FPI1_DSTC_SHIFT; - - if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { - if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { - fp->alu.inst[pos].inst1 |= - (mask & WRITEMASK_XYZ) << - R300_FPI1_DSTC_OUTPUT_MASK_SHIFT; - } else - assert(0); - } else { - fp->alu.inst[pos].inst1 |= - (mask & WRITEMASK_XYZ) << - R300_FPI1_DSTC_REG_MASK_SHIFT; - - cs->hwtemps[hwdest].vector_valid = pos + 1; - } - } - - /* And now FPI2/3 */ - if (emit_sop) { - fp->alu.inst[pos].inst2 |= sop; - - if (mask & WRITEMASK_W) { - if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { - if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { - fp->alu.inst[pos].inst3 |= - (hwdest << R300_FPI3_DSTA_SHIFT) | - R300_FPI3_DSTA_OUTPUT; - } else if (REG_GET_INDEX(dest) == - FRAG_RESULT_DEPR) { - fp->alu.inst[pos].inst3 |= - R300_FPI3_DSTA_DEPTH; - } else - assert(0); - } else { - fp->alu.inst[pos].inst3 |= - (hwdest << R300_FPI3_DSTA_SHIFT) | - R300_FPI3_DSTA_REG; - - cs->hwtemps[hwdest].scalar_valid = pos + 1; - } - } - } - - return; -} - -#if 0 -static GLuint get_attrib(struct r300_fragment_program *fp, GLuint attr) -{ - struct gl_fragment_program *mp = &fp->mesa_program; - GLuint r = undef; - - if (!(mp->Base.InputsRead & (1 << attr))) { - ERROR("Attribute %d was not provided!\n", attr); - return undef; - } - - REG_SET_TYPE(r, REG_TYPE_INPUT); - REG_SET_INDEX(r, attr); - REG_SET_VALID(r, GL_TRUE); - return r; + return reg; } -#endif - -static GLfloat SinCosConsts[2][4] = { - { - 1.273239545, // 4/PI - -0.405284735, // -4/(PI*PI) - 3.141592654, // PI - 0.2225 // weight - }, - { - 0.75, - 0.0, - 0.159154943, // 1/(2*PI) - 6.283185307 // 2*PI - } -}; - -/** - * Emit a LIT instruction. - * \p flags may be PFS_FLAG_SAT - * - * Definition of LIT (from ARB_fragment_program): - * tmp = VectorLoad(op0); - * if (tmp.x < 0) tmp.x = 0; - * if (tmp.y < 0) tmp.y = 0; - * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); - * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; - * result.x = 1.0; - * result.y = tmp.x; - * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; - * result.w = 1.0; - * - * The longest path of computation is the one leading to result.z, - * consisting of 5 operations. This implementation of LIT takes - * 5 slots. So unless there's some special undocumented opcode, - * this implementation is potentially optimal. Unfortunately, - * emit_arith is a bit too conservative because it doesn't understand - * partial writes to the vector component. - */ -static const GLfloat LitConst[4] = - { 127.999999, 127.999999, 127.999999, -127.999999 }; -static void emit_lit(struct r300_fragment_program *fp, - GLuint dest, int mask, GLuint src, int flags) +static void dumb_shader(struct r500_fragment_program *fp) { - COMPILE_STATE; - GLuint cnst; - int needTemporary; - GLuint temp; - - cnst = emit_const4fv(fp, LitConst); - - needTemporary = 0; - if ((mask & WRITEMASK_XYZW) != WRITEMASK_XYZW) { - needTemporary = 1; - } else if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { - // LIT is typically followed by DP3/DP4, so there's no point - // in creating special code for this case - needTemporary = 1; - } - - if (needTemporary) { - temp = keep(get_temp_reg(fp)); - } else { - temp = keep(dest); - } - - // Note: The order of emit_arith inside the slots is relevant, - // because emit_arith only looks at scalar vs. vector when resolving - // dependencies, and it does not consider individual vector components, - // so swizzling between the two parts can create fake dependencies. - - // First slot - emit_arith(fp, PFS_OP_MAX, temp, WRITEMASK_XY, - keep(src), pfs_zero, undef, 0); - emit_arith(fp, PFS_OP_MAX, temp, WRITEMASK_W, src, cnst, undef, 0); - - // Second slot - emit_arith(fp, PFS_OP_MIN, temp, WRITEMASK_Z, - swizzle(temp, W, W, W, W), cnst, undef, 0); - emit_arith(fp, PFS_OP_LG2, temp, WRITEMASK_W, - swizzle(temp, Y, Y, Y, Y), undef, undef, 0); - - // Third slot - // If desired, we saturate the y result here. - // This does not affect the use as a condition variable in the CMP later - emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_W, - temp, swizzle(temp, Z, Z, Z, Z), pfs_zero, 0); - emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_Y, - swizzle(temp, X, X, X, X), pfs_one, pfs_zero, flags); - - // Fourth slot - emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_X, - pfs_one, pfs_one, pfs_zero, 0); - emit_arith(fp, PFS_OP_EX2, temp, WRITEMASK_W, temp, undef, undef, 0); - - // Fifth slot - emit_arith(fp, PFS_OP_CMP, temp, WRITEMASK_Z, - pfs_zero, swizzle(temp, W, W, W, W), - negate(swizzle(temp, Y, Y, Y, Y)), flags); - emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_W, pfs_one, pfs_one, - pfs_zero, 0); - - if (needTemporary) { - emit_arith(fp, PFS_OP_MAD, dest, mask, - temp, pfs_one, pfs_zero, flags); - free_temp(fp, temp); - } else { - // Decrease refcount of the destination - t_hw_dst(fp, dest, GL_FALSE, cs->nrslots); - } + /* R500_INST_TYPE_TEX */ + fp->inst[0].inst0 = 0x7808; + fp->inst[0].inst1 = R500_TEX_ID(0) | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; + fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0) | R500_TEX_SRC_S_SWIZ_R | + R500_TEX_SRC_T_SWIZ_G | + R500_TEX_DST_ADDR(0) | + R500_TEX_DST_R_SWIZ_R | + R500_TEX_DST_G_SWIZ_G | + R500_TEX_DST_B_SWIZ_B | + R500_TEX_DST_A_SWIZ_A; + fp->inst[0].inst3 = R500_DX_ADDR(0) | + R500_DX_S_SWIZ_R | + R500_DX_T_SWIZ_R | + R500_DX_R_SWIZ_R | + R500_DX_Q_SWIZ_R | + R500_DY_ADDR(0) | + R500_DY_S_SWIZ_R | + R500_DY_T_SWIZ_R | + R500_DY_R_SWIZ_R | + R500_DY_Q_SWIZ_R; + fp->inst[0].inst4 = 0x0; + fp->inst[0].inst5 = 0x0; + + fp->inst[1].inst0 = R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | + R500_INST_LAST | + R500_INST_RGB_OMASK_R | + R500_INST_RGB_OMASK_G | + R500_INST_RGB_OMASK_B | + R500_INST_ALPHA_OMASK; + fp->inst[1].inst1 = R500_RGB_ADDR0(0) | + R500_RGB_ADDR1(0) | + R500_RGB_ADDR1_CONST | + R500_RGB_ADDR2(0) | + R500_RGB_ADDR2_CONST | + R500_RGB_SRCP_OP_1_MINUS_2RGB0; + fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) | + R500_ALPHA_ADDR1(0) | + R500_ALPHA_ADDR1_CONST | + R500_ALPHA_ADDR2(0) | + R500_ALPHA_ADDR2_CONST | + R500_ALPHA_SRCP_OP_1_MINUS_2A0; + fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | + R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC0 | + R500_ALU_RGB_R_SWIZ_B_1 | + R500_ALU_RGB_B_SWIZ_B_1 | + R500_ALU_RGB_G_SWIZ_B_1; + fp->inst[1].inst4 = R500_ALPHA_OP_MAD | + R500_ALPHA_SWIZ_A_A | + R500_ALPHA_SWIZ_B_1; + fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_R_SWIZ_0 | + R500_ALU_RGBA_G_SWIZ_0 | + R500_ALU_RGBA_B_SWIZ_0 | + R500_ALU_RGBA_A_SWIZ_0; + + fp->cs->nrslots = 2; + fp->translated = GL_TRUE; } -static GLboolean parse_program(struct r300_fragment_program *fp) +static GLboolean parse_program(struct r500_fragment_program *fp) { struct gl_fragment_program *mp = &fp->mesa_program; const struct prog_instruction *inst = mp->Base.Instructions; struct prog_instruction *fpi; GLuint src[3], dest, temp[2]; - int flags, mask = 0; - int const_sin[2]; + int flags, mask, counter = 0; if (!inst || inst[0].Opcode == OPCODE_END) { - ERROR("empty program?\n"); + ERROR("The program is empty!\n"); return GL_FALSE; } for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { - if (fpi->SaturateMode == SATURATE_ZERO_ONE) - flags = PFS_FLAG_SAT; - else - flags = 0; if (fpi->Opcode != OPCODE_KIL) { - dest = t_dst(fp, fpi->DstReg); + dest = make_dest(fpi->DstReg); mask = fpi->DstReg.WriteMask; } switch (fpi->Opcode) { - case OPCODE_ABS: - src[0] = t_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_MAD, dest, mask, - absolute(src[0]), pfs_one, pfs_zero, flags); - break; - case OPCODE_ADD: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_MAD, dest, mask, - src[0], pfs_one, src[1], flags); - break; - case OPCODE_CMP: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - src[2] = t_src(fp, fpi->SrcReg[2]); - /* ARB_f_p - if src0.c < 0.0 ? src1.c : src2.c - * r300 - if src2.c < 0.0 ? src1.c : src0.c - */ - emit_arith(fp, PFS_OP_CMP, dest, mask, - src[2], src[1], src[0], flags); - break; - case OPCODE_COS: - /* - * cos using a parabola (see SIN): - * cos(x): - * x = (x/(2*PI))+0.75 - * x = frac(x) - * x = (x*2*PI)-PI - * result = sin(x) - */ - temp[0] = get_temp_reg(fp); - const_sin[0] = emit_const4fv(fp, SinCosConsts[0]); - const_sin[1] = emit_const4fv(fp, SinCosConsts[1]); - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - - /* add 0.5*PI and do range reduction */ - - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, - swizzle(src[0], X, X, X, X), - swizzle(const_sin[1], Z, Z, Z, Z), - swizzle(const_sin[1], X, X, X, X), 0); - - emit_arith(fp, PFS_OP_FRC, temp[0], WRITEMASK_X, - swizzle(temp[0], X, X, X, X), - undef, undef, 0); - - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI - negate(swizzle(const_sin[0], Z, Z, Z, Z)), //-PI - 0); - - /* SIN */ - - emit_arith(fp, PFS_OP_MAD, temp[0], - WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], - Z, Z, Z, - Z), - const_sin[0], pfs_zero, 0); - - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, - swizzle(temp[0], Y, Y, Y, Y), - absolute(swizzle(temp[0], Z, Z, Z, Z)), - swizzle(temp[0], X, X, X, X), 0); - - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Y, - swizzle(temp[0], X, X, X, X), - absolute(swizzle(temp[0], X, X, X, X)), - negate(swizzle(temp[0], X, X, X, X)), 0); - - emit_arith(fp, PFS_OP_MAD, dest, mask, - swizzle(temp[0], Y, Y, Y, Y), - swizzle(const_sin[0], W, W, W, W), - swizzle(temp[0], X, X, X, X), flags); - - free_temp(fp, temp[0]); - break; - case OPCODE_DP3: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_DP3, dest, mask, - src[0], src[1], undef, flags); - break; - case OPCODE_DP4: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_DP4, dest, mask, - src[0], src[1], undef, flags); - break; - case OPCODE_DPH: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - /* src0.xyz1 -> temp - * DP4 dest, temp, src1 - */ -#if 0 - temp[0] = get_temp_reg(fp); - src[0].s_swz = SWIZZLE_ONE; - emit_arith(fp, PFS_OP_MAD, temp[0], mask, - src[0], pfs_one, pfs_zero, 0); - emit_arith(fp, PFS_OP_DP4, dest, mask, - temp[0], src[1], undef, flags); - free_temp(fp, temp[0]); -#else - emit_arith(fp, PFS_OP_DP4, dest, mask, - swizzle(src[0], X, Y, Z, ONE), src[1], - undef, flags); -#endif - break; - case OPCODE_DST: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - /* dest.y = src0.y * src1.y */ - if (mask & WRITEMASK_Y) - emit_arith(fp, PFS_OP_MAD, dest, WRITEMASK_Y, - keep(src[0]), keep(src[1]), - pfs_zero, flags); - /* dest.z = src0.z */ - if (mask & WRITEMASK_Z) - emit_arith(fp, PFS_OP_MAD, dest, WRITEMASK_Z, - src[0], pfs_one, pfs_zero, flags); - /* result.x = 1.0 - * result.w = src1.w */ - if (mask & WRITEMASK_XW) { - REG_SET_VSWZ(src[1], SWIZZLE_111); /*Cheat */ - emit_arith(fp, PFS_OP_MAD, dest, - mask & WRITEMASK_XW, - src[1], pfs_one, pfs_zero, flags); - } - break; - case OPCODE_EX2: - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_EX2, dest, mask, - src[0], undef, undef, flags); - break; - case OPCODE_FLR: - src[0] = t_src(fp, fpi->SrcReg[0]); - temp[0] = get_temp_reg(fp); - /* FRC temp, src0 - * MAD dest, src0, 1.0, -temp - */ - emit_arith(fp, PFS_OP_FRC, temp[0], mask, - keep(src[0]), undef, undef, 0); - emit_arith(fp, PFS_OP_MAD, dest, mask, - src[0], pfs_one, negate(temp[0]), flags); - free_temp(fp, temp[0]); - break; - case OPCODE_FRC: - src[0] = t_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_FRC, dest, mask, - src[0], undef, undef, flags); - break; - case OPCODE_KIL: - emit_tex(fp, fpi, R300_FPITX_OP_KIL); - break; - case OPCODE_LG2: - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_LG2, dest, mask, - src[0], undef, undef, flags); - break; - case OPCODE_LIT: - src[0] = t_src(fp, fpi->SrcReg[0]); - emit_lit(fp, dest, mask, src[0], flags); - break; - case OPCODE_LRP: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - src[2] = t_src(fp, fpi->SrcReg[2]); - /* result = tmp0tmp1 + (1 - tmp0)tmp2 - * = tmp0tmp1 + tmp2 + (-tmp0)tmp2 - * MAD temp, -tmp0, tmp2, tmp2 - * MAD result, tmp0, tmp1, temp - */ - temp[0] = get_temp_reg(fp); - emit_arith(fp, PFS_OP_MAD, temp[0], mask, - negate(keep(src[0])), keep(src[2]), src[2], - 0); - emit_arith(fp, PFS_OP_MAD, dest, mask, - src[0], src[1], temp[0], flags); - free_temp(fp, temp[0]); - break; - case OPCODE_MAD: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - src[2] = t_src(fp, fpi->SrcReg[2]); - emit_arith(fp, PFS_OP_MAD, dest, mask, - src[0], src[1], src[2], flags); - break; - case OPCODE_MAX: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_MAX, dest, mask, - src[0], src[1], undef, flags); - break; - case OPCODE_MIN: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_MIN, dest, mask, - src[0], src[1], undef, flags); - break; - case OPCODE_MOV: - case OPCODE_SWZ: - src[0] = t_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_MAD, dest, mask, - src[0], pfs_one, pfs_zero, flags); - break; - case OPCODE_MUL: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_MAD, dest, mask, - src[0], src[1], pfs_zero, flags); - break; - case OPCODE_POW: - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - src[1] = t_scalar_src(fp, fpi->SrcReg[1]); - temp[0] = get_temp_reg(fp); - emit_arith(fp, PFS_OP_LG2, temp[0], WRITEMASK_W, - src[0], undef, undef, 0); - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_W, - temp[0], src[1], pfs_zero, 0); - emit_arith(fp, PFS_OP_EX2, dest, fpi->DstReg.WriteMask, - temp[0], undef, undef, 0); - free_temp(fp, temp[0]); - break; - case OPCODE_RCP: - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_RCP, dest, mask, - src[0], undef, undef, flags); - break; - case OPCODE_RSQ: - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_RSQ, dest, mask, - absolute(src[0]), pfs_zero, pfs_zero, flags); - break; - case OPCODE_SCS: - /* - * scs using a parabola : - * scs(x): - * result.x = sin(-abs(x)+0.5*PI) (cos) - * result.y = sin(x) (sin) - * - */ - temp[0] = get_temp_reg(fp); - temp[1] = get_temp_reg(fp); - const_sin[0] = emit_const4fv(fp, SinCosConsts[0]); - const_sin[1] = emit_const4fv(fp, SinCosConsts[1]); - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - - /* x = -abs(x)+0.5*PI */ - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(const_sin[0], Z, Z, Z, Z), //PI - pfs_half, - negate(abs - (swizzle(keep(src[0]), X, X, X, X))), - 0); - - /* C*x (sin) */ - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_W, - swizzle(const_sin[0], Y, Y, Y, Y), - swizzle(keep(src[0]), X, X, X, X), - pfs_zero, 0); - - /* B*x, C*x (cos) */ - emit_arith(fp, PFS_OP_MAD, temp[0], - WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], - Z, Z, Z, - Z), - const_sin[0], pfs_zero, 0); - - /* B*x (sin) */ - emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_W, - swizzle(const_sin[0], X, X, X, X), - keep(src[0]), pfs_zero, 0); - - /* y = B*x + C*x*abs(x) (sin) */ - emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_Z, - absolute(src[0]), - swizzle(temp[0], W, W, W, W), - swizzle(temp[1], W, W, W, W), 0); - - /* y = B*x + C*x*abs(x) (cos) */ - emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_W, - swizzle(temp[0], Y, Y, Y, Y), - absolute(swizzle(temp[0], Z, Z, Z, Z)), - swizzle(temp[0], X, X, X, X), 0); - - /* y*abs(y) - y (cos), y*abs(y) - y (sin) */ - emit_arith(fp, PFS_OP_MAD, temp[0], - WRITEMASK_X | WRITEMASK_Y, swizzle(temp[1], - W, Z, Y, - X), - absolute(swizzle(temp[1], W, Z, Y, X)), - negate(swizzle(temp[1], W, Z, Y, X)), 0); - - /* dest.xy = mad(temp.xy, P, temp2.wz) */ - emit_arith(fp, PFS_OP_MAD, dest, - mask & (WRITEMASK_X | WRITEMASK_Y), temp[0], - swizzle(const_sin[0], W, W, W, W), - swizzle(temp[1], W, Z, Y, X), flags); - - free_temp(fp, temp[0]); - free_temp(fp, temp[1]); - break; - case OPCODE_SGE: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - temp[0] = get_temp_reg(fp); - /* temp = src0 - src1 - * dest.c = (temp.c < 0.0) ? 0 : 1 - */ - emit_arith(fp, PFS_OP_MAD, temp[0], mask, - src[0], pfs_one, negate(src[1]), 0); - emit_arith(fp, PFS_OP_CMP, dest, mask, - pfs_one, pfs_zero, temp[0], 0); - free_temp(fp, temp[0]); - break; - case OPCODE_SIN: - /* - * using a parabola: - * sin(x) = 4/pi * x + -4/(pi*pi) * x * abs(x) - * extra precision is obtained by weighting against - * itself squared. - */ - - temp[0] = get_temp_reg(fp); - const_sin[0] = emit_const4fv(fp, SinCosConsts[0]); - const_sin[1] = emit_const4fv(fp, SinCosConsts[1]); - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - - /* do range reduction */ - - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, - swizzle(keep(src[0]), X, X, X, X), - swizzle(const_sin[1], Z, Z, Z, Z), - pfs_half, 0); - - emit_arith(fp, PFS_OP_FRC, temp[0], WRITEMASK_X, - swizzle(temp[0], X, X, X, X), - undef, undef, 0); - - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI - negate(swizzle(const_sin[0], Z, Z, Z, Z)), //PI - 0); - - /* SIN */ - - emit_arith(fp, PFS_OP_MAD, temp[0], - WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], - Z, Z, Z, - Z), - const_sin[0], pfs_zero, 0); - - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, - swizzle(temp[0], Y, Y, Y, Y), - absolute(swizzle(temp[0], Z, Z, Z, Z)), - swizzle(temp[0], X, X, X, X), 0); - - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Y, - swizzle(temp[0], X, X, X, X), - absolute(swizzle(temp[0], X, X, X, X)), - negate(swizzle(temp[0], X, X, X, X)), 0); - - emit_arith(fp, PFS_OP_MAD, dest, mask, - swizzle(temp[0], Y, Y, Y, Y), - swizzle(const_sin[0], W, W, W, W), - swizzle(temp[0], X, X, X, X), flags); - - free_temp(fp, temp[0]); - break; - case OPCODE_SLT: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - temp[0] = get_temp_reg(fp); - /* temp = src0 - src1 - * dest.c = (temp.c < 0.0) ? 1 : 0 - */ - emit_arith(fp, PFS_OP_MAD, temp[0], mask, - src[0], pfs_one, negate(src[1]), 0); - emit_arith(fp, PFS_OP_CMP, dest, mask, - pfs_zero, pfs_one, temp[0], 0); - free_temp(fp, temp[0]); - break; - case OPCODE_SUB: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_MAD, dest, mask, - src[0], pfs_one, negate(src[1]), flags); - break; - case OPCODE_TEX: - emit_tex(fp, fpi, R300_FPITX_OP_TEX); - break; - case OPCODE_TXB: - emit_tex(fp, fpi, R300_FPITX_OP_TXB); - break; - case OPCODE_TXP: - emit_tex(fp, fpi, R300_FPITX_OP_TXP); - break; - case OPCODE_XPD:{ - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - temp[0] = get_temp_reg(fp); - /* temp = src0.zxy * src1.yzx */ - emit_arith(fp, PFS_OP_MAD, temp[0], - WRITEMASK_XYZ, swizzle(keep(src[0]), - Z, X, Y, W), - swizzle(keep(src[1]), Y, Z, X, W), - pfs_zero, 0); - /* dest.xyz = src0.yzx * src1.zxy - temp - * dest.w = undefined - * */ - emit_arith(fp, PFS_OP_MAD, dest, - mask & WRITEMASK_XYZ, swizzle(src[0], - Y, Z, - X, W), - swizzle(src[1], Z, X, Y, W), - negate(temp[0]), flags); - /* cleanup */ - free_temp(fp, temp[0]); + case OPCODE_ADD: + src[0] = make_src(fpi->SrcReg[0]); + src[1] = make_src(fpi->SrcReg[1]); + /* Variation on MAD: 1*src0+src1 */ + fp->inst[counter].inst0 = R500_INST_TYPE_ALU + | R500_INST_RGB_WMASK_R | R500_INST_RGB_WMASK_G + | R500_INST_RGB_WMASK_B | R500_INST_ALPHA_WMASK; + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = /* 1 */ + MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) + | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE) + | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRC1 + | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1])) + | R500_ALU_RGBA_ALPHA_SEL_C_SRC1 + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1])); + break; + case OPCODE_MAD: + src[0] = make_src(fpi->SrcReg[0]); + src[1] = make_src(fpi->SrcReg[1]); + src[2] = make_src(fpi->SrcReg[2]); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU + | R500_INST_RGB_WMASK_R | R500_INST_RGB_WMASK_G + | R500_INST_RGB_WMASK_B | R500_INST_ALPHA_WMASK; + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRC2 + | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2])) + | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2])); + break; + case OPCODE_MOV: + src[0] = make_src(fpi->SrcReg[0]); + /* We use MAX, but MIN, CND, and CMP also work. + * Just remember to disable the OMOD! */ + fp->inst[counter].inst0 = R500_INST_TYPE_ALU + | R500_INST_RGB_WMASK_R | R500_INST_RGB_WMASK_G + | R500_INST_RGB_WMASK_B | R500_INST_ALPHA_WMASK; + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | R500_ALU_RGB_R_SWIZ_A_R | R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B + | R500_ALU_RGB_SEL_B_SRC0 + | R500_ALU_RGB_R_SWIZ_B_R | R500_ALU_RGB_G_SWIZ_B_G | R500_ALU_RGB_B_SWIZ_B_B + | R500_ALU_RGB_OMOD_DISABLE; + fp->inst[counter].inst4 = R500_ALPHA_OP_MAX + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SEL_B_SRC0 + | R500_ALPHA_OMOD_DISABLE; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX + | R500_ALU_RGBA_ADDRD(dest); + break; + default: + ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); break; - } - default: - ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); - break; } + /* Finishing touches */ + if (fpi->SaturateMode == SATURATE_ZERO_ONE) { + fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP; + } + if (fpi->DstReg.File == PROGRAM_OUTPUT) { + fp->inst[counter].inst0 |= R500_INST_TYPE_OUT + | R500_INST_RGB_OMASK_R | R500_INST_RGB_OMASK_G + | R500_INST_RGB_OMASK_B | R500_INST_ALPHA_OMASK; + } + + counter++; + if (fp->error) return GL_FALSE; } - return GL_TRUE; -} + fp->cs->nrslots = counter; -static void insert_wpos(struct gl_program *prog) -{ - static gl_state_index tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 - }; - struct prog_instruction *fpi; - GLuint window_index; - int i = 0; - GLuint tempregi = prog->NumTemporaries; - /* should do something else if no temps left... */ - prog->NumTemporaries++; - - fpi = _mesa_alloc_instructions(prog->NumInstructions + 3); - _mesa_init_instructions(fpi, prog->NumInstructions + 3); - - /* perspective divide */ - fpi[i].Opcode = OPCODE_RCP; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_W; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; - fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; - i++; - - fpi[i].Opcode = OPCODE_MUL; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; - fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - - fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[1].Index = tempregi; - fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; - i++; - - /* viewport transformation */ - window_index = _mesa_add_state_reference(prog->Parameters, tokens); - - fpi[i].Opcode = OPCODE_MAD; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[0].Index = tempregi; - fpi[i].SrcReg[0].Swizzle = - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; - fpi[i].SrcReg[1].Index = window_index; - fpi[i].SrcReg[1].Swizzle = - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; - fpi[i].SrcReg[2].Index = window_index; - fpi[i].SrcReg[2].Swizzle = - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - i++; - - _mesa_copy_instructions(&fpi[i], prog->Instructions, - prog->NumInstructions); - - free(prog->Instructions); - - prog->Instructions = fpi; - - prog->NumInstructions += i; - fpi = &prog->Instructions[prog->NumInstructions - 1]; - - assert(fpi->Opcode == OPCODE_END); - - for (fpi = &prog->Instructions[3]; fpi->Opcode != OPCODE_END; fpi++) { - for (i = 0; i < 3; i++) - if (fpi->SrcReg[i].File == PROGRAM_INPUT && - fpi->SrcReg[i].Index == FRAG_ATTRIB_WPOS) { - fpi->SrcReg[i].File = PROGRAM_TEMPORARY; - fpi->SrcReg[i].Index = tempregi; - } + /* Finish him! (If it's an output instruction...) + * Yes, I know it's ugly... */ + if ((fp->inst[counter].inst0 & 0x3) ^ 0x2) { + fp->inst[counter].inst0 |= R500_INST_TYPE_OUT + | R500_INST_TEX_SEM_WAIT | R500_INST_LAST; } + + return GL_TRUE; } -/* - Init structures - * - Determine what hwregs each input corresponds to - */ -static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) +static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) { struct r300_pfs_compile_state *cs = NULL; struct gl_fragment_program *mp = &fp->mesa_program; @@ -2096,7 +343,6 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) fp->translated = GL_FALSE; fp->error = GL_FALSE; fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile); - fp->tex.length = 0; fp->cur_node = 0; fp->first_node_has_tex = 0; fp->const_nr = 0; @@ -2120,6 +366,7 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) * starting from register 0. */ +#if 0 /* Texcoords come first */ for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) { if (InputsRead & (FRAG_BIT_TEX0 << i)) { @@ -2160,6 +407,7 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) if (InputsRead & (1 << i)) cs->inputs[i].reg = 0; } +#endif /* Pre-parse the mesa program, grabbing refcounts on input/temp regs. * That way, we can free up the reg when it's no longer needed @@ -2204,7 +452,7 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) cs->temp_in_use = temps_used; } -static void update_params(struct r300_fragment_program *fp) +static void update_params(struct r500_fragment_program *fp) { struct gl_fragment_program *mp = &fp->mesa_program; @@ -2214,17 +462,25 @@ static void update_params(struct r300_fragment_program *fp) } void r500TranslateFragmentShader(r300ContextPtr r300, - struct r300_fragment_program *fp) + struct r500_fragment_program *fp) { + struct r300_pfs_compile_state *cs = NULL; if (!fp->translated) { + /* I need to see what I'm working with! */ + fprintf(stderr, "Mesa program:\n"); + fprintf(stderr, "-------------\n"); + _mesa_print_program(&fp->mesa_program.Base); + fflush(stdout); + init_program(r300, fp); cs = fp->cs; if (parse_program(fp) == GL_FALSE) { - dump_program(fp); + ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n"); + dumb_shader(fp); return; } @@ -2235,242 +491,12 @@ void r500TranslateFragmentShader(r300ContextPtr r300, fp->node[fp->cur_node].tex_end = 0; fp->alu_offset = 0; fp->alu_end = cs->nrslots - 1; - fp->tex_offset = 0; - fp->tex_end = fp->tex.length ? fp->tex.length - 1 : 0; - assert(fp->node[fp->cur_node].alu_end >= 0); - assert(fp->alu_end >= 0); + //assert(fp->node[fp->cur_node].alu_end >= 0); + //assert(fp->alu_end >= 0); fp->translated = GL_TRUE; - if (RADEON_DEBUG & DEBUG_PIXEL) - dump_program(fp); r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM); } update_params(fp); } - -/* just some random things... */ -static void dump_program(struct r300_fragment_program *fp) -{ - int n, i, j; - static int pc = 0; - - fprintf(stderr, "pc=%d*************************************\n", pc++); - - fprintf(stderr, "Mesa program:\n"); - fprintf(stderr, "-------------\n"); - _mesa_print_program(&fp->mesa_program.Base); - fflush(stdout); - - fprintf(stderr, "Hardware program\n"); - fprintf(stderr, "----------------\n"); - - for (n = 0; n < (fp->cur_node + 1); n++) { - fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " - "alu_end: %d, tex_end: %d\n", n, - fp->node[n].alu_offset, - fp->node[n].tex_offset, - fp->node[n].alu_end, fp->node[n].tex_end); - - if (fp->tex.length) { - fprintf(stderr, " TEX:\n"); - for (i = fp->node[n].tex_offset; - i <= fp->node[n].tex_offset + fp->node[n].tex_end; - ++i) { - const char *instr; - - switch ((fp->tex. - inst[i] >> R300_FPITX_OPCODE_SHIFT) & - 15) { - case R300_FPITX_OP_TEX: - instr = "TEX"; - break; - case R300_FPITX_OP_KIL: - instr = "KIL"; - break; - case R300_FPITX_OP_TXP: - instr = "TXP"; - break; - case R300_FPITX_OP_TXB: - instr = "TXB"; - break; - default: - instr = "UNKNOWN"; - } - - fprintf(stderr, - " %s t%i, %c%i, texture[%i] (%08x)\n", - instr, - (fp->tex. - inst[i] >> R300_FPITX_DST_SHIFT) & 31, - (fp->tex. - inst[i] & R300_FPITX_SRC_CONST) ? 'c' : - 't', - (fp->tex. - inst[i] >> R300_FPITX_SRC_SHIFT) & 31, - (fp->tex. - inst[i] & R300_FPITX_IMAGE_MASK) >> - R300_FPITX_IMAGE_SHIFT, - fp->tex.inst[i]); - } - } - - for (i = fp->node[n].alu_offset; - i <= fp->node[n].alu_offset + fp->node[n].alu_end; ++i) { - char srcc[3][10], dstc[20]; - char srca[3][10], dsta[20]; - char argc[3][20]; - char arga[3][20]; - char flags[5], tmp[10]; - - for (j = 0; j < 3; ++j) { - int regc = fp->alu.inst[i].inst1 >> (j * 6); - int rega = fp->alu.inst[i].inst3 >> (j * 6); - - sprintf(srcc[j], "%c%i", - (regc & 32) ? 'c' : 't', regc & 31); - sprintf(srca[j], "%c%i", - (rega & 32) ? 'c' : 't', rega & 31); - } - - dstc[0] = 0; - sprintf(flags, "%s%s%s", - (fp->alu.inst[i]. - inst1 & R300_FPI1_DSTC_REG_X) ? "x" : "", - (fp->alu.inst[i]. - inst1 & R300_FPI1_DSTC_REG_Y) ? "y" : "", - (fp->alu.inst[i]. - inst1 & R300_FPI1_DSTC_REG_Z) ? "z" : ""); - if (flags[0] != 0) { - sprintf(dstc, "t%i.%s ", - (fp->alu.inst[i]. - inst1 >> R300_FPI1_DSTC_SHIFT) & 31, - flags); - } - sprintf(flags, "%s%s%s", - (fp->alu.inst[i]. - inst1 & R300_FPI1_DSTC_OUTPUT_X) ? "x" : "", - (fp->alu.inst[i]. - inst1 & R300_FPI1_DSTC_OUTPUT_Y) ? "y" : "", - (fp->alu.inst[i]. - inst1 & R300_FPI1_DSTC_OUTPUT_Z) ? "z" : ""); - if (flags[0] != 0) { - sprintf(tmp, "o%i.%s", - (fp->alu.inst[i]. - inst1 >> R300_FPI1_DSTC_SHIFT) & 31, - flags); - strcat(dstc, tmp); - } - - dsta[0] = 0; - if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_REG) { - sprintf(dsta, "t%i.w ", - (fp->alu.inst[i]. - inst3 >> R300_FPI3_DSTA_SHIFT) & 31); - } - if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_OUTPUT) { - sprintf(tmp, "o%i.w ", - (fp->alu.inst[i]. - inst3 >> R300_FPI3_DSTA_SHIFT) & 31); - strcat(dsta, tmp); - } - if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_DEPTH) { - strcat(dsta, "Z"); - } - - fprintf(stderr, - "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" - " w: %3s %3s %3s -> %-20s (%08x)\n", i, - srcc[0], srcc[1], srcc[2], dstc, - fp->alu.inst[i].inst1, srca[0], srca[1], - srca[2], dsta, fp->alu.inst[i].inst3); - - for (j = 0; j < 3; ++j) { - int regc = fp->alu.inst[i].inst0 >> (j * 7); - int rega = fp->alu.inst[i].inst2 >> (j * 7); - int d; - char buf[20]; - - d = regc & 31; - if (d < 12) { - switch (d % 4) { - case R300_FPI0_ARGC_SRC0C_XYZ: - sprintf(buf, "%s.xyz", - srcc[d / 4]); - break; - case R300_FPI0_ARGC_SRC0C_XXX: - sprintf(buf, "%s.xxx", - srcc[d / 4]); - break; - case R300_FPI0_ARGC_SRC0C_YYY: - sprintf(buf, "%s.yyy", - srcc[d / 4]); - break; - case R300_FPI0_ARGC_SRC0C_ZZZ: - sprintf(buf, "%s.zzz", - srcc[d / 4]); - break; - } - } else if (d < 15) { - sprintf(buf, "%s.www", srca[d - 12]); - } else if (d == 20) { - sprintf(buf, "0.0"); - } else if (d == 21) { - sprintf(buf, "1.0"); - } else if (d == 22) { - sprintf(buf, "0.5"); - } else if (d >= 23 && d < 32) { - d -= 23; - switch (d / 3) { - case 0: - sprintf(buf, "%s.yzx", - srcc[d % 3]); - break; - case 1: - sprintf(buf, "%s.zxy", - srcc[d % 3]); - break; - case 2: - sprintf(buf, "%s.Wzy", - srcc[d % 3]); - break; - } - } else { - sprintf(buf, "%i", d); - } - - sprintf(argc[j], "%s%s%s%s", - (regc & 32) ? "-" : "", - (regc & 64) ? "|" : "", - buf, (regc & 64) ? "|" : ""); - - d = rega & 31; - if (d < 9) { - sprintf(buf, "%s.%c", srcc[d / 3], - 'x' + (char)(d % 3)); - } else if (d < 12) { - sprintf(buf, "%s.w", srca[d - 9]); - } else if (d == 16) { - sprintf(buf, "0.0"); - } else if (d == 17) { - sprintf(buf, "1.0"); - } else if (d == 18) { - sprintf(buf, "0.5"); - } else { - sprintf(buf, "%i", d); - } - - sprintf(arga[j], "%s%s%s%s", - (rega & 32) ? "-" : "", - (rega & 64) ? "|" : "", - buf, (rega & 64) ? "|" : ""); - } - - fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" - " w: %8s %8s %8s op: %08x\n", - argc[0], argc[1], argc[2], - fp->alu.inst[i].inst0, arga[0], arga[1], - arga[2], fp->alu.inst[i].inst2); - } - } -} diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/r500_fragprog.h index 72fca77845..6678029f49 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/r500_fragprog.h @@ -96,9 +96,7 @@ typedef struct r300_fragment_program_swizzle { #define DRI_CONF_FP_OPTIMIZATION_SPEED 0 #define DRI_CONF_FP_OPTIMIZATION_QUALITY 1 -struct r300_fragment_program; - -extern void r300TranslateFragmentShader(r300ContextPtr r300, - struct r300_fragment_program *fp); +extern void r500TranslateFragmentShader(r300ContextPtr r300, + struct r500_fragment_program *fp); #endif -- cgit v1.2.3 From b5246de562706aa2f423edaa060f4530da84f3a0 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 29 Apr 2008 23:13:00 -0700 Subject: Added OPCODE_ABS, slightly fixed ADD/SUB --- src/mesa/drivers/dri/r300/r500_fragprog.c | 79 +++++++++++++++++++++++++++++-- 1 file changed, 76 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index b976637ee2..6014fd90d6 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -72,6 +72,7 @@ #define R500_SWIZZLE_ZERO 4 #define R500_SWIZZLE_HALF 5 #define R500_SWIZZLE_ONE 6 +#define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6)) #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6)) /* Swizzles for inst3 */ #define MAKE_SWIZ_RGB_A(x) (x << 2) @@ -223,6 +224,26 @@ static GLboolean parse_program(struct r500_fragment_program *fp) } switch (fpi->Opcode) { + case OPCODE_ABS: + src[0] = make_src(fpi->SrcReg[0]); + /* Variation on MOV */ + fp->inst[counter].inst0 = R500_INST_TYPE_ALU + | R500_INST_RGB_WMASK_R | R500_INST_RGB_WMASK_G + | R500_INST_RGB_WMASK_B | R500_INST_ALPHA_WMASK; + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_MOD_A_ABS | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAX + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 + | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) | R500_ALPHA_MOD_A_ABS + | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX + | R500_ALU_RGBA_ADDRD(dest); + break; case OPCODE_ADD: src[0] = make_src(fpi->SrcReg[0]); src[1] = make_src(fpi->SrcReg[1]); @@ -239,7 +260,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst4 = R500_ALPHA_OP_MAD | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE) + /* | R500_ALPHA_SEL_A_SRC0 */ | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD | R500_ALU_RGBA_ADDRD(dest) @@ -256,9 +277,9 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_INST_RGB_WMASK_R | R500_INST_RGB_WMASK_G | R500_INST_RGB_WMASK_B | R500_INST_ALPHA_WMASK; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); + | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); + | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); @@ -294,6 +315,58 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX | R500_ALU_RGBA_ADDRD(dest); break; + case OPCODE_MUL: + src[0] = make_src(fpi->SrcReg[0]); + src[1] = make_src(fpi->SrcReg[1]); + /* Variation on MAD: src0*src1+0 */ + fp->inst[counter].inst0 = R500_INST_TYPE_ALU + | R500_INST_RGB_WMASK_R | R500_INST_RGB_WMASK_G + | R500_INST_RGB_WMASK_B | R500_INST_ALPHA_WMASK; + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(dest) + // | R500_ALU_RGBA_SEL_C_SRC2 + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + break; + case OPCODE_SUB: + src[0] = make_src(fpi->SrcReg[0]); + src[1] = make_src(fpi->SrcReg[1]); + /* Variation on MAD: 1*src0-src1 */ + fp->inst[counter].inst0 = R500_INST_TYPE_ALU + | R500_INST_RGB_WMASK_R | R500_INST_RGB_WMASK_G + | R500_INST_RGB_WMASK_B | R500_INST_ALPHA_WMASK; + fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0]) + | R500_RGB_ADDR2(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0]) + | R500_ALPHA_ADDR2(src[1]); + fp->inst[counter].inst3 = /* 1 */ + MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRC2 + | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1])) + | R500_ALU_RGBA_MOD_C_NEG + | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1])) + | R500_ALU_RGBA_ALPHA_MOD_C_NEG; + break; default: ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); break; -- cgit v1.2.3 From 029cb1fd0f576dd7587bc306b126318fbfcdde2d Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Thu, 1 May 2008 11:57:29 -0700 Subject: Start of TEX/TXP support. Still having problems with temps and consts, though... --- src/mesa/drivers/dri/r300/r300_state.c | 66 +-------- src/mesa/drivers/dri/r300/r500_fragprog.c | 215 +++++++++++++++++++++++++----- 2 files changed, 180 insertions(+), 101 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 0ffa5bfd75..dac37ba273 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2329,9 +2329,8 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) return; } - /* emit the standard zero shader */ R300_STATECHANGE(rmesa, r500fp); - /* Moar magic... */ + /* Emit our shader... */ for (i = 0; i < fp->cs->nrslots; i++) { rmesa->hw.r500fp.cmd[i*6+1] = fp->inst[i].inst0; rmesa->hw.r500fp.cmd[i*6+2] = fp->inst[i].inst1; @@ -2340,69 +2339,8 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) rmesa->hw.r500fp.cmd[i*6+5] = fp->inst[i].inst4; rmesa->hw.r500fp.cmd[i*6+6] = fp->inst[i].inst5; } -#if 0 - i = 1; - rmesa->hw.r500fp.cmd[i++] = 0x7808; - rmesa->hw.r500fp.cmd[i++] = R500_TEX_ID(0) | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; - rmesa->hw.r500fp.cmd[i++] = R500_TEX_SRC_ADDR(0) | R500_TEX_SRC_S_SWIZ_R | - R500_TEX_SRC_T_SWIZ_G | - R500_TEX_DST_ADDR(0) | - R500_TEX_DST_R_SWIZ_R | - R500_TEX_DST_G_SWIZ_G | - R500_TEX_DST_B_SWIZ_B | - R500_TEX_DST_A_SWIZ_A; - rmesa->hw.r500fp.cmd[i++] = R500_DX_ADDR(0) | - R500_DX_S_SWIZ_R | - R500_DX_T_SWIZ_R | - R500_DX_R_SWIZ_R | - R500_DX_Q_SWIZ_R | - R500_DY_ADDR(0) | - R500_DY_S_SWIZ_R | - R500_DY_T_SWIZ_R | - R500_DY_R_SWIZ_R | - R500_DY_Q_SWIZ_R; - rmesa->hw.r500fp.cmd[i++] = 0x0; - rmesa->hw.r500fp.cmd[i++] = 0x0; - - rmesa->hw.r500fp.cmd[i++] = R500_INST_TYPE_OUT | - R500_INST_TEX_SEM_WAIT | - R500_INST_LAST | - R500_INST_RGB_OMASK_R | - R500_INST_RGB_OMASK_G | - R500_INST_RGB_OMASK_B | - R500_INST_ALPHA_OMASK; - - rmesa->hw.r500fp.cmd[i++] = R500_RGB_ADDR0(0) | - R500_RGB_ADDR1(0) | - R500_RGB_ADDR1_CONST | - R500_RGB_ADDR2(0) | - R500_RGB_ADDR2_CONST | - R500_RGB_SRCP_OP_1_MINUS_2RGB0; - rmesa->hw.r500fp.cmd[i++] = R500_ALPHA_ADDR0(0) | - R500_ALPHA_ADDR1(0) | - R500_ALPHA_ADDR1_CONST | - R500_ALPHA_ADDR2(0) | - R500_ALPHA_ADDR2_CONST | - R500_ALPHA_SRCP_OP_1_MINUS_2A0; - rmesa->hw.r500fp.cmd[i++] = R500_ALU_RGB_SEL_A_SRC0 | - R500_ALU_RGB_R_SWIZ_A_R | - R500_ALU_RGB_G_SWIZ_A_G | - R500_ALU_RGB_B_SWIZ_A_B | - R500_ALU_RGB_SEL_B_SRC0 | - R500_ALU_RGB_R_SWIZ_B_1 | - R500_ALU_RGB_B_SWIZ_B_1 | - R500_ALU_RGB_G_SWIZ_B_1; - rmesa->hw.r500fp.cmd[i++] = R500_ALPHA_OP_MAD | - R500_ALPHA_SWIZ_A_A | - R500_ALPHA_SWIZ_B_1; - rmesa->hw.r500fp.cmd[i++] = R500_ALU_RGBA_OP_MAD | - R500_ALU_RGBA_R_SWIZ_0 | - R500_ALU_RGBA_G_SWIZ_0 | - R500_ALU_RGBA_B_SWIZ_0 | - R500_ALU_RGBA_A_SWIZ_0; -#endif - bump_r500fp_count(rmesa->hw.r500fp.cmd, i * 6); + bump_r500fp_count(rmesa->hw.r500fp.cmd, fp->cs->nrslots * 6); R300_STATECHANGE(rmesa, r500fp_const); for (i = 0; i < fp->const_nr; i++) { diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 6014fd90d6..b751a2aa83 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -74,6 +74,9 @@ #define R500_SWIZZLE_ONE 6 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6)) #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6)) +/* Swizzles for inst2 */ +#define MAKE_SWIZ_TEX_STRQ(x) (x << 8) +#define MAKE_SWIZ_TEX_RGBA(x) (x << 24) /* Swizzles for inst3 */ #define MAKE_SWIZ_RGB_A(x) (x << 2) #define MAKE_SWIZ_RGB_B(x) (x << 15) @@ -103,36 +106,80 @@ static inline GLuint make_alpha_swizzle(struct prog_src_register src) { return swiz; } -static GLuint make_src(struct prog_src_register src) { - GLuint reg = src.Index; +static inline GLuint make_strq_swizzle(struct prog_src_register src) { + GLuint swiz = 0x0; + GLuint temp = src.Swizzle; + for (int i = 0; i < 4; i++) { + swiz = (temp & 0x3) << i*2; + temp >>= 3; + } + return swiz; +} + +/* Borrowed verbatim from r300_fragprog since it hasn't changed. */ +static GLuint emit_const4fv(struct r500_fragment_program *fp, + const GLfloat * cp) +{ + GLuint reg = 0x0; + int index; + + for (index = 0; index < fp->const_nr; ++index) { + if (fp->constant[index] == cp) + break; + } + + if (index >= fp->const_nr) { + /* TODO: This should be r5xx nums, not r300 */ + if (index >= PFS_NUM_CONST_REGS) { + ERROR("Out of hw constants!\n"); + return reg; + } + + fp->const_nr++; + fp->constant[index] = cp; + } + + reg = index | REG_CONSTANT; + return reg; +} + +static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) { + GLuint reg; switch (src.File) { + case PROGRAM_TEMPORARY: + reg = src.Index + 1; + break; case PROGRAM_INPUT: /* Ugly hack needed to work around Mesa; * fragments don't get loaded right otherwise! */ reg = 0x0; break; case PROGRAM_CONSTANT: - reg |= REG_CONSTANT; + reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters-> + ParameterValues[src.Index]); break; default: - // ERROR("Can't handle src.File %x\n", src.File); + ERROR("Can't handle src.File %x\n", src.File); + reg = 0x0; break; } return reg; } -static GLuint make_dest(struct prog_dst_register dest) { - GLuint reg = dest.Index; +static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_register dest) { + GLuint reg; switch (dest.File) { + case PROGRAM_TEMPORARY: + reg = dest.Index + 1; + break; case PROGRAM_OUTPUT: /* Eventually we may need to handle multiple * rendering targets... */ - break; - case PROGRAM_CONSTANT: - reg |= REG_CONSTANT; + reg = dest.Index; break; default: - // ERROR("Can't handle dest.File %x\n", dest.File); + ERROR("Can't handle dest.File %x\n", dest.File); + reg = 0x0; break; } return reg; @@ -140,7 +187,7 @@ static GLuint make_dest(struct prog_dst_register dest) { static void dumb_shader(struct r500_fragment_program *fp) { - /* R500_INST_TYPE_TEX */ + /* R500_INST_TYPE_TEX? */ fp->inst[0].inst0 = 0x7808; fp->inst[0].inst1 = R500_TEX_ID(0) | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0) | R500_TEX_SRC_S_SWIZ_R | @@ -203,6 +250,9 @@ static void dumb_shader(struct r500_fragment_program *fp) fp->translated = GL_TRUE; } +static void emit_alu(struct r500_fragment_program *fp) { +} + static GLboolean parse_program(struct r500_fragment_program *fp) { struct gl_fragment_program *mp = &fp->mesa_program; @@ -219,17 +269,16 @@ static GLboolean parse_program(struct r500_fragment_program *fp) for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { if (fpi->Opcode != OPCODE_KIL) { - dest = make_dest(fpi->DstReg); - mask = fpi->DstReg.WriteMask; + dest = make_dest(fp, fpi->DstReg); + mask = fpi->DstReg.WriteMask << 11; } switch (fpi->Opcode) { case OPCODE_ABS: - src[0] = make_src(fpi->SrcReg[0]); + src[0] = make_src(fp, fpi->SrcReg[0]); /* Variation on MOV */ fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | R500_INST_RGB_WMASK_R | R500_INST_RGB_WMASK_G - | R500_INST_RGB_WMASK_B | R500_INST_ALPHA_WMASK; + | mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 @@ -245,12 +294,11 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ADDRD(dest); break; case OPCODE_ADD: - src[0] = make_src(fpi->SrcReg[0]); - src[1] = make_src(fpi->SrcReg[1]); + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); /* Variation on MAD: 1*src0+src1 */ fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | R500_INST_RGB_WMASK_R | R500_INST_RGB_WMASK_G - | R500_INST_RGB_WMASK_B | R500_INST_ALPHA_WMASK; + | mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) @@ -260,7 +308,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst4 = R500_ALPHA_OP_MAD | R500_ALPHA_ADDRD(dest) - /* | R500_ALPHA_SEL_A_SRC0 */ | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) + | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD | R500_ALU_RGBA_ADDRD(dest) @@ -269,13 +317,61 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ALPHA_SEL_C_SRC1 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1])); break; + case OPCODE_DP3: + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + src[2] = make_src(fp, fpi->SrcReg[2]); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU + | mask; + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst4 = R500_ALPHA_OP_DP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3 + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRC2 + | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2])) + | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2])); + break; + case OPCODE_DP4: + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + src[2] = make_src(fp, fpi->SrcReg[2]); + /* Based on DP3 */ + fp->inst[counter].inst0 = R500_INST_TYPE_ALU + | mask; + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst4 = R500_ALPHA_OP_DP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4 + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRC2 + | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2])) + | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2])); + break; case OPCODE_MAD: - src[0] = make_src(fpi->SrcReg[0]); - src[1] = make_src(fpi->SrcReg[1]); - src[2] = make_src(fpi->SrcReg[2]); + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + src[2] = make_src(fp, fpi->SrcReg[2]); fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | R500_INST_RGB_WMASK_R | R500_INST_RGB_WMASK_G - | R500_INST_RGB_WMASK_B | R500_INST_ALPHA_WMASK; + | mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) @@ -294,13 +390,46 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2])); break; + case OPCODE_MAX: + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[0]); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | mask; + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 + | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAX + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX + | R500_ALU_RGBA_ADDRD(dest); + break; + case OPCODE_MIN: + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[0]); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | mask; + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 + | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst4 = R500_ALPHA_OP_MIN + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN + | R500_ALU_RGBA_ADDRD(dest); + break; case OPCODE_MOV: - src[0] = make_src(fpi->SrcReg[0]); + src[0] = make_src(fp, fpi->SrcReg[0]); /* We use MAX, but MIN, CND, and CMP also work. * Just remember to disable the OMOD! */ fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | R500_INST_RGB_WMASK_R | R500_INST_RGB_WMASK_G - | R500_INST_RGB_WMASK_B | R500_INST_ALPHA_WMASK; + | mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 @@ -316,12 +445,11 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ADDRD(dest); break; case OPCODE_MUL: - src[0] = make_src(fpi->SrcReg[0]); - src[1] = make_src(fpi->SrcReg[1]); + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); /* Variation on MAD: src0*src1+0 */ fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | R500_INST_RGB_WMASK_R | R500_INST_RGB_WMASK_G - | R500_INST_RGB_WMASK_B | R500_INST_ALPHA_WMASK; + | mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) @@ -341,12 +469,11 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); break; case OPCODE_SUB: - src[0] = make_src(fpi->SrcReg[0]); - src[1] = make_src(fpi->SrcReg[1]); + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); /* Variation on MAD: 1*src0-src1 */ fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | R500_INST_RGB_WMASK_R | R500_INST_RGB_WMASK_G - | R500_INST_RGB_WMASK_B | R500_INST_ALPHA_WMASK; + | mask; fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0]) | R500_RGB_ADDR2(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0]) @@ -367,6 +494,20 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1])) | R500_ALU_RGBA_ALPHA_MOD_C_NEG; break; + case OPCODE_TEX: + src[0] = make_src(fp, fpi->SrcReg[0]); + fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask; + fp->inst[counter].inst1 = fpi->TexSrcUnit + | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE; + fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(src[0]) + | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) + | R500_TEX_DST_ADDR(dest) + | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G + | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; + fp->inst[counter].inst3 = 0x0; + fp->inst[counter].inst4 = 0x0; + fp->inst[counter].inst5 = 0x0; + break; default: ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); break; -- cgit v1.2.3 From b15c49e59bdc149b978d2b35a4efcc99d15f16b2 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Fri, 2 May 2008 10:15:10 -0700 Subject: r5xx: Fragprog shader now handles TEX/TXP correctly. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index b751a2aa83..f94b244232 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -110,7 +110,7 @@ static inline GLuint make_strq_swizzle(struct prog_src_register src) { GLuint swiz = 0x0; GLuint temp = src.Swizzle; for (int i = 0; i < 4; i++) { - swiz = (temp & 0x3) << i*2; + swiz += (temp & 0x3) << i*2; temp >>= 3; } return swiz; @@ -147,7 +147,7 @@ static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_registe GLuint reg; switch (src.File) { case PROGRAM_TEMPORARY: - reg = src.Index + 1; + reg = (src.Index << 0x1) | 0x1; break; case PROGRAM_INPUT: /* Ugly hack needed to work around Mesa; @@ -170,7 +170,7 @@ static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_regist GLuint reg; switch (dest.File) { case PROGRAM_TEMPORARY: - reg = dest.Index + 1; + reg = (dest.Index << 0x1) | 0x1; break; case PROGRAM_OUTPUT: /* Eventually we may need to handle multiple @@ -495,12 +495,31 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ALPHA_MOD_C_NEG; break; case OPCODE_TEX: + src[0] = make_src(fp, fpi->SrcReg[0]); + fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask + | R500_INST_TEX_SEM_WAIT; + fp->inst[counter].inst1 = fpi->TexSrcUnit + | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; + fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(src[0]) + /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */ + | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G + | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A + | R500_TEX_DST_ADDR(dest) + | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G + | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; + fp->inst[counter].inst3 = 0x0; + fp->inst[counter].inst4 = 0x0; + fp->inst[counter].inst5 = 0x0; + break; + case OPCODE_TXP: src[0] = make_src(fp, fpi->SrcReg[0]); fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask; fp->inst[counter].inst1 = fpi->TexSrcUnit - | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE; + | R500_TEX_INST_PROJ | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(src[0]) - | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) + /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */ + | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G + | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A | R500_TEX_DST_ADDR(dest) | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; -- cgit v1.2.3 From 8aa98a409b16cfd1a035c3f60208207eb1cc4d41 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 17 Mar 2008 19:05:15 +1000 Subject: r500: fragprog --- src/mesa/drivers/dri/r300/Makefile | 1 + src/mesa/drivers/dri/r300/r500_fragprog.c | 2476 +++++++++++++++++++++++++++++ src/mesa/drivers/dri/r300/r500_fragprog.h | 104 ++ 3 files changed, 2581 insertions(+) create mode 100644 src/mesa/drivers/dri/r300/r500_fragprog.c create mode 100644 src/mesa/drivers/dri/r300/r500_fragprog.h (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 44248964fd..5b2bd0bc2b 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -39,6 +39,7 @@ DRIVER_SOURCES = \ r300_texstate.c \ r300_vertprog.c \ r300_fragprog.c \ + r500_fragprog.c \ r300_shader.c \ r300_emit.c \ r300_swtcl.c \ diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c new file mode 100644 index 0000000000..3638a94380 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -0,0 +1,2476 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * \author Ben Skeggs + * + * \author Jerome Glisse + * + * \todo Depth write, WPOS/FOGC inputs + * + * \todo FogOption + * + * \todo Verify results of opcodes for accuracy, I've only checked them in + * specific cases. + */ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#include "shader/prog_instruction.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" + +#include "r300_context.h" +#include "r300_fragprog.h" +#include "r300_reg.h" +#include "r300_state.h" + +/* + * Usefull macros and values + */ +#define ERROR(fmt, args...) do { \ + fprintf(stderr, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + fp->error = GL_TRUE; \ + } while(0) + +#define PFS_INVAL 0xFFFFFFFF +#define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs + +#define SWIZZLE_XYZ 0 +#define SWIZZLE_XXX 1 +#define SWIZZLE_YYY 2 +#define SWIZZLE_ZZZ 3 +#define SWIZZLE_WWW 4 +#define SWIZZLE_YZX 5 +#define SWIZZLE_ZXY 6 +#define SWIZZLE_WZY 7 +#define SWIZZLE_111 8 +#define SWIZZLE_000 9 +#define SWIZZLE_HHH 10 + +#define swizzle(r, x, y, z, w) do_swizzle(fp, r, \ + ((SWIZZLE_##x<<0)| \ + (SWIZZLE_##y<<3)| \ + (SWIZZLE_##z<<6)| \ + (SWIZZLE_##w<<9)), \ + 0) + +#define REG_TYPE_INPUT 0 +#define REG_TYPE_OUTPUT 1 +#define REG_TYPE_TEMP 2 +#define REG_TYPE_CONST 3 + +#define REG_TYPE_SHIFT 0 +#define REG_INDEX_SHIFT 2 +#define REG_VSWZ_SHIFT 8 +#define REG_SSWZ_SHIFT 13 +#define REG_NEGV_SHIFT 18 +#define REG_NEGS_SHIFT 19 +#define REG_ABS_SHIFT 20 +#define REG_NO_USE_SHIFT 21 // Hack for refcounting +#define REG_VALID_SHIFT 22 // Does the register contain a defined value? +#define REG_BUILTIN_SHIFT 23 // Is it a builtin (like all zero/all one)? + +#define REG_TYPE_MASK (0x03 << REG_TYPE_SHIFT) +#define REG_INDEX_MASK (0x3F << REG_INDEX_SHIFT) +#define REG_VSWZ_MASK (0x1F << REG_VSWZ_SHIFT) +#define REG_SSWZ_MASK (0x1F << REG_SSWZ_SHIFT) +#define REG_NEGV_MASK (0x01 << REG_NEGV_SHIFT) +#define REG_NEGS_MASK (0x01 << REG_NEGS_SHIFT) +#define REG_ABS_MASK (0x01 << REG_ABS_SHIFT) +#define REG_NO_USE_MASK (0x01 << REG_NO_USE_SHIFT) +#define REG_VALID_MASK (0x01 << REG_VALID_SHIFT) +#define REG_BUILTIN_MASK (0x01 << REG_BUILTIN_SHIFT) + +#define REG(type, index, vswz, sswz, nouse, valid, builtin) \ + (((type << REG_TYPE_SHIFT) & REG_TYPE_MASK) | \ + ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK) | \ + ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK) | \ + ((valid << REG_VALID_SHIFT) & REG_VALID_MASK) | \ + ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK) | \ + ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK) | \ + ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK)) +#define REG_GET_TYPE(reg) \ + ((reg & REG_TYPE_MASK) >> REG_TYPE_SHIFT) +#define REG_GET_INDEX(reg) \ + ((reg & REG_INDEX_MASK) >> REG_INDEX_SHIFT) +#define REG_GET_VSWZ(reg) \ + ((reg & REG_VSWZ_MASK) >> REG_VSWZ_SHIFT) +#define REG_GET_SSWZ(reg) \ + ((reg & REG_SSWZ_MASK) >> REG_SSWZ_SHIFT) +#define REG_GET_NO_USE(reg) \ + ((reg & REG_NO_USE_MASK) >> REG_NO_USE_SHIFT) +#define REG_GET_VALID(reg) \ + ((reg & REG_VALID_MASK) >> REG_VALID_SHIFT) +#define REG_GET_BUILTIN(reg) \ + ((reg & REG_BUILTIN_MASK) >> REG_BUILTIN_SHIFT) +#define REG_SET_TYPE(reg, type) \ + reg = ((reg & ~REG_TYPE_MASK) | \ + ((type << REG_TYPE_SHIFT) & REG_TYPE_MASK)) +#define REG_SET_INDEX(reg, index) \ + reg = ((reg & ~REG_INDEX_MASK) | \ + ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK)) +#define REG_SET_VSWZ(reg, vswz) \ + reg = ((reg & ~REG_VSWZ_MASK) | \ + ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK)) +#define REG_SET_SSWZ(reg, sswz) \ + reg = ((reg & ~REG_SSWZ_MASK) | \ + ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK)) +#define REG_SET_NO_USE(reg, nouse) \ + reg = ((reg & ~REG_NO_USE_MASK) | \ + ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK)) +#define REG_SET_VALID(reg, valid) \ + reg = ((reg & ~REG_VALID_MASK) | \ + ((valid << REG_VALID_SHIFT) & REG_VALID_MASK)) +#define REG_SET_BUILTIN(reg, builtin) \ + reg = ((reg & ~REG_BUILTIN_MASK) | \ + ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK)) +#define REG_ABS(reg) \ + reg = (reg | REG_ABS_MASK) +#define REG_NEGV(reg) \ + reg = (reg | REG_NEGV_MASK) +#define REG_NEGS(reg) \ + reg = (reg | REG_NEGS_MASK) + +/* + * Datas structures for fragment program generation + */ + +/* description of r300 native hw instructions */ +static const struct { + const char *name; + int argc; + int v_op; + int s_op; +} r300_fpop[] = { + /* *INDENT-OFF* */ + {"MAD", 3, R300_FPI0_OUTC_MAD, R300_FPI2_OUTA_MAD}, + {"DP3", 2, R300_FPI0_OUTC_DP3, R300_FPI2_OUTA_DP4}, + {"DP4", 2, R300_FPI0_OUTC_DP4, R300_FPI2_OUTA_DP4}, + {"MIN", 2, R300_FPI0_OUTC_MIN, R300_FPI2_OUTA_MIN}, + {"MAX", 2, R300_FPI0_OUTC_MAX, R300_FPI2_OUTA_MAX}, + {"CMP", 3, R300_FPI0_OUTC_CMP, R300_FPI2_OUTA_CMP}, + {"FRC", 1, R300_FPI0_OUTC_FRC, R300_FPI2_OUTA_FRC}, + {"EX2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_EX2}, + {"LG2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_LG2}, + {"RCP", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RCP}, + {"RSQ", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RSQ}, + {"REPL_ALPHA", 1, R300_FPI0_OUTC_REPL_ALPHA, PFS_INVAL}, + {"CMPH", 3, R300_FPI0_OUTC_CMPH, PFS_INVAL}, + /* *INDENT-ON* */ +}; + +/* vector swizzles r300 can support natively, with a couple of + * cases we handle specially + * + * REG_VSWZ/REG_SSWZ is an index into this table + */ + +/* mapping from SWIZZLE_* to r300 native values for scalar insns */ +#define SWIZZLE_HALF 6 + +#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \ + SWIZZLE_##y, \ + SWIZZLE_##z, \ + SWIZZLE_ZERO)) +/* native swizzles */ +static const struct r300_pfs_swizzle { + GLuint hash; /* swizzle value this matches */ + GLuint base; /* base value for hw swizzle */ + GLuint stride; /* difference in base between arg0/1/2 */ + GLuint flags; +} v_swiz[] = { + /* *INDENT-OFF* */ + {MAKE_SWZ3(X, Y, Z), R300_FPI0_ARGC_SRC0C_XYZ, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(X, X, X), R300_FPI0_ARGC_SRC0C_XXX, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(Y, Y, Y), R300_FPI0_ARGC_SRC0C_YYY, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(Z, Z, Z), R300_FPI0_ARGC_SRC0C_ZZZ, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(W, W, W), R300_FPI0_ARGC_SRC0A, 1, SLOT_SRC_SCALAR}, + {MAKE_SWZ3(Y, Z, X), R300_FPI0_ARGC_SRC0C_YZX, 1, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(Z, X, Y), R300_FPI0_ARGC_SRC0C_ZXY, 1, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(W, Z, Y), R300_FPI0_ARGC_SRC0CA_WZY, 1, SLOT_SRC_BOTH}, + {MAKE_SWZ3(ONE, ONE, ONE), R300_FPI0_ARGC_ONE, 0, 0}, + {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_FPI0_ARGC_ZERO, 0, 0}, + {MAKE_SWZ3(HALF, HALF, HALF), R300_FPI0_ARGC_HALF, 0, 0}, + {PFS_INVAL, 0, 0, 0}, + /* *INDENT-ON* */ +}; + +/* used during matching of non-native swizzles */ +#define SWZ_X_MASK (7 << 0) +#define SWZ_Y_MASK (7 << 3) +#define SWZ_Z_MASK (7 << 6) +#define SWZ_W_MASK (7 << 9) +static const struct { + GLuint hash; /* used to mask matching swizzle components */ + int mask; /* actual outmask */ + int count; /* count of components matched */ +} s_mask[] = { + /* *INDENT-OFF* */ + {SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK, 1 | 2 | 4, 3}, + {SWZ_X_MASK | SWZ_Y_MASK, 1 | 2, 2}, + {SWZ_X_MASK | SWZ_Z_MASK, 1 | 4, 2}, + {SWZ_Y_MASK | SWZ_Z_MASK, 2 | 4, 2}, + {SWZ_X_MASK, 1, 1}, + {SWZ_Y_MASK, 2, 1}, + {SWZ_Z_MASK, 4, 1}, + {PFS_INVAL, PFS_INVAL, PFS_INVAL} + /* *INDENT-ON* */ +}; + +static const struct { + int base; /* hw value of swizzle */ + int stride; /* difference between SRC0/1/2 */ + GLuint flags; +} s_swiz[] = { + /* *INDENT-OFF* */ + {R300_FPI2_ARGA_SRC0C_X, 3, SLOT_SRC_VECTOR}, + {R300_FPI2_ARGA_SRC0C_Y, 3, SLOT_SRC_VECTOR}, + {R300_FPI2_ARGA_SRC0C_Z, 3, SLOT_SRC_VECTOR}, + {R300_FPI2_ARGA_SRC0A, 1, SLOT_SRC_SCALAR}, + {R300_FPI2_ARGA_ZERO, 0, 0}, + {R300_FPI2_ARGA_ONE, 0, 0}, + {R300_FPI2_ARGA_HALF, 0, 0} + /* *INDENT-ON* */ +}; + +/* boiler-plate reg, for convenience */ +static const GLuint undef = REG(REG_TYPE_TEMP, + 0, + SWIZZLE_XYZ, + SWIZZLE_W, + GL_FALSE, + GL_FALSE, + GL_FALSE); + +/* constant one source */ +static const GLuint pfs_one = REG(REG_TYPE_CONST, + 0, + SWIZZLE_111, + SWIZZLE_ONE, + GL_FALSE, + GL_TRUE, + GL_TRUE); + +/* constant half source */ +static const GLuint pfs_half = REG(REG_TYPE_CONST, + 0, + SWIZZLE_HHH, + SWIZZLE_HALF, + GL_FALSE, + GL_TRUE, + GL_TRUE); + +/* constant zero source */ +static const GLuint pfs_zero = REG(REG_TYPE_CONST, + 0, + SWIZZLE_000, + SWIZZLE_ZERO, + GL_FALSE, + GL_TRUE, + GL_TRUE); + +/* + * Common functions prototypes + */ +static void dump_program(struct r300_fragment_program *fp); +static void emit_arith(struct r300_fragment_program *fp, int op, + GLuint dest, int mask, + GLuint src0, GLuint src1, GLuint src2, int flags); + +/** + * Get an R300 temporary that can be written to in the given slot. + */ +static int get_hw_temp(struct r300_fragment_program *fp, int slot) +{ + COMPILE_STATE; + int r; + + for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) { + if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= slot) + break; + } + + if (r >= PFS_NUM_TEMP_REGS) { + ERROR("Out of hardware temps\n"); + return 0; + } + // Reserved is used to avoid the following scenario: + // R300 temporary X is first assigned to Mesa temporary Y during vector ops + // R300 temporary X is then assigned to Mesa temporary Z for further vector ops + // Then scalar ops on Mesa temporary Z are emitted and move back in time + // to overwrite the value of temporary Y. + // End scenario. + cs->hwtemps[r].reserved = cs->hwtemps[r].free; + cs->hwtemps[r].free = -1; + + // Reset to some value that won't mess things up when the user + // tries to read from a temporary that hasn't been assigned a value yet. + // In the normal case, vector_valid and scalar_valid should be set to + // a sane value by the first emit that writes to this temporary. + cs->hwtemps[r].vector_valid = 0; + cs->hwtemps[r].scalar_valid = 0; + + if (r > fp->max_temp_idx) + fp->max_temp_idx = r; + + return r; +} + +/** + * Get an R300 temporary that will act as a TEX destination register. + */ +static int get_hw_temp_tex(struct r300_fragment_program *fp) +{ + COMPILE_STATE; + int r; + + for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) { + if (cs->used_in_node & (1 << r)) + continue; + + // Note: Be very careful here + if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= 0) + break; + } + + if (r >= PFS_NUM_TEMP_REGS) + return get_hw_temp(fp, 0); /* Will cause an indirection */ + + cs->hwtemps[r].reserved = cs->hwtemps[r].free; + cs->hwtemps[r].free = -1; + + // Reset to some value that won't mess things up when the user + // tries to read from a temporary that hasn't been assigned a value yet. + // In the normal case, vector_valid and scalar_valid should be set to + // a sane value by the first emit that writes to this temporary. + cs->hwtemps[r].vector_valid = cs->nrslots; + cs->hwtemps[r].scalar_valid = cs->nrslots; + + if (r > fp->max_temp_idx) + fp->max_temp_idx = r; + + return r; +} + +/** + * Mark the given hardware register as free. + */ +static void free_hw_temp(struct r300_fragment_program *fp, int idx) +{ + COMPILE_STATE; + + // Be very careful here. Consider sequences like + // MAD r0, r1,r2,r3 + // TEX r4, ... + // The TEX instruction may be moved in front of the MAD instruction + // due to the way nodes work. We don't want to alias r1 and r4 in + // this case. + // I'm certain the register allocation could be further sanitized, + // but it's tricky because of stuff that can happen inside emit_tex + // and emit_arith. + cs->hwtemps[idx].free = cs->nrslots + 1; +} + +/** + * Create a new Mesa temporary register. + */ +static GLuint get_temp_reg(struct r300_fragment_program *fp) +{ + COMPILE_STATE; + GLuint r = undef; + GLuint index; + + index = ffs(~cs->temp_in_use); + if (!index) { + ERROR("Out of program temps\n"); + return r; + } + + cs->temp_in_use |= (1 << --index); + cs->temps[index].refcount = 0xFFFFFFFF; + cs->temps[index].reg = -1; + + REG_SET_TYPE(r, REG_TYPE_TEMP); + REG_SET_INDEX(r, index); + REG_SET_VALID(r, GL_TRUE); + return r; +} + +/** + * Create a new Mesa temporary register that will act as the destination + * register for a texture read. + */ +static GLuint get_temp_reg_tex(struct r300_fragment_program *fp) +{ + COMPILE_STATE; + GLuint r = undef; + GLuint index; + + index = ffs(~cs->temp_in_use); + if (!index) { + ERROR("Out of program temps\n"); + return r; + } + + cs->temp_in_use |= (1 << --index); + cs->temps[index].refcount = 0xFFFFFFFF; + cs->temps[index].reg = get_hw_temp_tex(fp); + + REG_SET_TYPE(r, REG_TYPE_TEMP); + REG_SET_INDEX(r, index); + REG_SET_VALID(r, GL_TRUE); + return r; +} + +/** + * Free a Mesa temporary and the associated R300 temporary. + */ +static void free_temp(struct r300_fragment_program *fp, GLuint r) +{ + COMPILE_STATE; + GLuint index = REG_GET_INDEX(r); + + if (!(cs->temp_in_use & (1 << index))) + return; + + if (REG_GET_TYPE(r) == REG_TYPE_TEMP) { + free_hw_temp(fp, cs->temps[index].reg); + cs->temps[index].reg = -1; + cs->temp_in_use &= ~(1 << index); + } else if (REG_GET_TYPE(r) == REG_TYPE_INPUT) { + free_hw_temp(fp, cs->inputs[index].reg); + cs->inputs[index].reg = -1; + } +} + +/** + * Emit a hardware constant/parameter. + * + * \p cp Stable pointer to an array of 4 floats. + * The pointer must be stable in the sense that it remains to be valid + * and hold the contents of the constant/parameter throughout the lifetime + * of the fragment program (actually, up until the next time the fragment + * program is translated). + */ +static GLuint emit_const4fv(struct r300_fragment_program *fp, + const GLfloat * cp) +{ + GLuint reg = undef; + int index; + + for (index = 0; index < fp->const_nr; ++index) { + if (fp->constant[index] == cp) + break; + } + + if (index >= fp->const_nr) { + if (index >= PFS_NUM_CONST_REGS) { + ERROR("Out of hw constants!\n"); + return reg; + } + + fp->const_nr++; + fp->constant[index] = cp; + } + + REG_SET_TYPE(reg, REG_TYPE_CONST); + REG_SET_INDEX(reg, index); + REG_SET_VALID(reg, GL_TRUE); + return reg; +} + +static inline GLuint negate(GLuint r) +{ + REG_NEGS(r); + REG_NEGV(r); + return r; +} + +/* Hack, to prevent clobbering sources used multiple times when + * emulating non-native instructions + */ +static inline GLuint keep(GLuint r) +{ + REG_SET_NO_USE(r, GL_TRUE); + return r; +} + +static inline GLuint absolute(GLuint r) +{ + REG_ABS(r); + return r; +} + +static int swz_native(struct r300_fragment_program *fp, + GLuint src, GLuint * r, GLuint arbneg) +{ + /* Native swizzle, handle negation */ + src = (src & ~REG_NEGS_MASK) | (((arbneg >> 3) & 1) << REG_NEGS_SHIFT); + + if ((arbneg & 0x7) == 0x0) { + src = src & ~REG_NEGV_MASK; + *r = src; + } else if ((arbneg & 0x7) == 0x7) { + src |= REG_NEGV_MASK; + *r = src; + } else { + if (!REG_GET_VALID(*r)) + *r = get_temp_reg(fp); + src |= REG_NEGV_MASK; + emit_arith(fp, + PFS_OP_MAD, + *r, arbneg & 0x7, keep(src), pfs_one, pfs_zero, 0); + src = src & ~REG_NEGV_MASK; + emit_arith(fp, + PFS_OP_MAD, + *r, + (arbneg ^ 0x7) | WRITEMASK_W, + src, pfs_one, pfs_zero, 0); + } + + return 3; +} + +static int swz_emit_partial(struct r300_fragment_program *fp, + GLuint src, + GLuint * r, int mask, int mc, GLuint arbneg) +{ + GLuint tmp; + GLuint wmask = 0; + + if (!REG_GET_VALID(*r)) + *r = get_temp_reg(fp); + + /* A partial match, VSWZ/mask define what parts of the + * desired swizzle we match + */ + if (mc + s_mask[mask].count == 3) { + wmask = WRITEMASK_W; + src |= ((arbneg >> 3) & 1) << REG_NEGS_SHIFT; + } + + tmp = arbneg & s_mask[mask].mask; + if (tmp) { + tmp = tmp ^ s_mask[mask].mask; + if (tmp) { + emit_arith(fp, + PFS_OP_MAD, + *r, + arbneg & s_mask[mask].mask, + keep(src) | REG_NEGV_MASK, + pfs_one, pfs_zero, 0); + if (!wmask) { + REG_SET_NO_USE(src, GL_TRUE); + } else { + REG_SET_NO_USE(src, GL_FALSE); + } + emit_arith(fp, + PFS_OP_MAD, + *r, tmp | wmask, src, pfs_one, pfs_zero, 0); + } else { + if (!wmask) { + REG_SET_NO_USE(src, GL_TRUE); + } else { + REG_SET_NO_USE(src, GL_FALSE); + } + emit_arith(fp, + PFS_OP_MAD, + *r, + (arbneg & s_mask[mask].mask) | wmask, + src | REG_NEGV_MASK, pfs_one, pfs_zero, 0); + } + } else { + if (!wmask) { + REG_SET_NO_USE(src, GL_TRUE); + } else { + REG_SET_NO_USE(src, GL_FALSE); + } + emit_arith(fp, PFS_OP_MAD, + *r, + s_mask[mask].mask | wmask, + src, pfs_one, pfs_zero, 0); + } + + return s_mask[mask].count; +} + +static GLuint do_swizzle(struct r300_fragment_program *fp, + GLuint src, GLuint arbswz, GLuint arbneg) +{ + GLuint r = undef; + GLuint vswz; + int c_mask = 0; + int v_match = 0; + + /* If swizzling from something without an XYZW native swizzle, + * emit result to a temp, and do new swizzle from the temp. + */ +#if 0 + if (REG_GET_VSWZ(src) != SWIZZLE_XYZ || REG_GET_SSWZ(src) != SWIZZLE_W) { + GLuint temp = get_temp_reg(fp); + emit_arith(fp, + PFS_OP_MAD, + temp, WRITEMASK_XYZW, src, pfs_one, pfs_zero, 0); + src = temp; + } +#endif + + if (REG_GET_VSWZ(src) != SWIZZLE_XYZ || REG_GET_SSWZ(src) != SWIZZLE_W) { + GLuint vsrcswz = + (v_swiz[REG_GET_VSWZ(src)]. + hash & (SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK)) | + REG_GET_SSWZ(src) << 9; + GLint i; + + GLuint newswz = 0; + GLuint offset; + for (i = 0; i < 4; ++i) { + offset = GET_SWZ(arbswz, i); + + newswz |= + (offset <= 3) ? GET_SWZ(vsrcswz, + offset) << i * + 3 : offset << i * 3; + } + + arbswz = newswz & (SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK); + REG_SET_SSWZ(src, GET_SWZ(newswz, 3)); + } else { + /* set scalar swizzling */ + REG_SET_SSWZ(src, GET_SWZ(arbswz, 3)); + + } + do { + vswz = REG_GET_VSWZ(src); + do { + int chash; + + REG_SET_VSWZ(src, vswz); + chash = v_swiz[REG_GET_VSWZ(src)].hash & + s_mask[c_mask].hash; + + if (chash == (arbswz & s_mask[c_mask].hash)) { + if (s_mask[c_mask].count == 3) { + v_match += swz_native(fp, + src, &r, arbneg); + } else { + v_match += swz_emit_partial(fp, + src, + &r, + c_mask, + v_match, + arbneg); + } + + if (v_match == 3) + return r; + + /* Fill with something invalid.. all 0's was + * wrong before, matched SWIZZLE_X. So all + * 1's will be okay for now + */ + arbswz |= (PFS_INVAL & s_mask[c_mask].hash); + } + } while (v_swiz[++vswz].hash != PFS_INVAL); + REG_SET_VSWZ(src, SWIZZLE_XYZ); + } while (s_mask[++c_mask].hash != PFS_INVAL); + + ERROR("should NEVER get here\n"); + return r; +} + +static GLuint t_src(struct r300_fragment_program *fp, + struct prog_src_register fpsrc) +{ + GLuint r = undef; + + switch (fpsrc.File) { + case PROGRAM_TEMPORARY: + REG_SET_INDEX(r, fpsrc.Index); + REG_SET_VALID(r, GL_TRUE); + REG_SET_TYPE(r, REG_TYPE_TEMP); + break; + case PROGRAM_INPUT: + REG_SET_INDEX(r, fpsrc.Index); + REG_SET_VALID(r, GL_TRUE); + REG_SET_TYPE(r, REG_TYPE_INPUT); + break; + case PROGRAM_LOCAL_PARAM: + r = emit_const4fv(fp, + fp->mesa_program.Base.LocalParams[fpsrc. + Index]); + break; + case PROGRAM_ENV_PARAM: + r = emit_const4fv(fp, + fp->ctx->FragmentProgram.Parameters[fpsrc. + Index]); + break; + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + r = emit_const4fv(fp, + fp->mesa_program.Base.Parameters-> + ParameterValues[fpsrc.Index]); + break; + default: + ERROR("unknown SrcReg->File %x\n", fpsrc.File); + return r; + } + + /* no point swizzling ONE/ZERO/HALF constants... */ + if (REG_GET_VSWZ(r) < SWIZZLE_111 || REG_GET_SSWZ(r) < SWIZZLE_ZERO) + r = do_swizzle(fp, r, fpsrc.Swizzle, fpsrc.NegateBase); + return r; +} + +static GLuint t_scalar_src(struct r300_fragment_program *fp, + struct prog_src_register fpsrc) +{ + struct prog_src_register src = fpsrc; + int sc = GET_SWZ(fpsrc.Swizzle, 0); /* X */ + + src.Swizzle = ((sc << 0) | (sc << 3) | (sc << 6) | (sc << 9)); + + return t_src(fp, src); +} + +static GLuint t_dst(struct r300_fragment_program *fp, + struct prog_dst_register dest) +{ + GLuint r = undef; + + switch (dest.File) { + case PROGRAM_TEMPORARY: + REG_SET_INDEX(r, dest.Index); + REG_SET_VALID(r, GL_TRUE); + REG_SET_TYPE(r, REG_TYPE_TEMP); + return r; + case PROGRAM_OUTPUT: + REG_SET_TYPE(r, REG_TYPE_OUTPUT); + switch (dest.Index) { + case FRAG_RESULT_COLR: + case FRAG_RESULT_DEPR: + REG_SET_INDEX(r, dest.Index); + REG_SET_VALID(r, GL_TRUE); + return r; + default: + ERROR("Bad DstReg->Index 0x%x\n", dest.Index); + return r; + } + default: + ERROR("Bad DstReg->File 0x%x\n", dest.File); + return r; + } +} + +static int t_hw_src(struct r300_fragment_program *fp, GLuint src, GLboolean tex) +{ + COMPILE_STATE; + int idx; + int index = REG_GET_INDEX(src); + + switch (REG_GET_TYPE(src)) { + case REG_TYPE_TEMP: + /* NOTE: if reg==-1 here, a source is being read that + * hasn't been written to. Undefined results. + */ + if (cs->temps[index].reg == -1) + cs->temps[index].reg = get_hw_temp(fp, cs->nrslots); + + idx = cs->temps[index].reg; + + if (!REG_GET_NO_USE(src) && (--cs->temps[index].refcount == 0)) + free_temp(fp, src); + break; + case REG_TYPE_INPUT: + idx = cs->inputs[index].reg; + + if (!REG_GET_NO_USE(src) && (--cs->inputs[index].refcount == 0)) + free_hw_temp(fp, cs->inputs[index].reg); + break; + case REG_TYPE_CONST: + return (index | SRC_CONST); + default: + ERROR("Invalid type for source reg\n"); + return (0 | SRC_CONST); + } + + if (!tex) + cs->used_in_node |= (1 << idx); + + return idx; +} + +static int t_hw_dst(struct r300_fragment_program *fp, + GLuint dest, GLboolean tex, int slot) +{ + COMPILE_STATE; + int idx; + GLuint index = REG_GET_INDEX(dest); + assert(REG_GET_VALID(dest)); + + switch (REG_GET_TYPE(dest)) { + case REG_TYPE_TEMP: + if (cs->temps[REG_GET_INDEX(dest)].reg == -1) { + if (!tex) { + cs->temps[index].reg = get_hw_temp(fp, slot); + } else { + cs->temps[index].reg = get_hw_temp_tex(fp); + } + } + idx = cs->temps[index].reg; + + if (!REG_GET_NO_USE(dest) && (--cs->temps[index].refcount == 0)) + free_temp(fp, dest); + + cs->dest_in_node |= (1 << idx); + cs->used_in_node |= (1 << idx); + break; + case REG_TYPE_OUTPUT: + switch (index) { + case FRAG_RESULT_COLR: + fp->node[fp->cur_node].flags |= + R300_PFS_NODE_OUTPUT_COLOR; + break; + case FRAG_RESULT_DEPR: + fp->node[fp->cur_node].flags |= + R300_PFS_NODE_OUTPUT_DEPTH; + break; + } + return index; + break; + default: + ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest)); + return 0; + } + + return idx; +} + +static void emit_nop(struct r300_fragment_program *fp) +{ + COMPILE_STATE; + + if (cs->nrslots >= PFS_MAX_ALU_INST) { + ERROR("Out of ALU instruction slots\n"); + return; + } + + fp->alu.inst[cs->nrslots].inst0 = NOP_INST0; + fp->alu.inst[cs->nrslots].inst1 = NOP_INST1; + fp->alu.inst[cs->nrslots].inst2 = NOP_INST2; + fp->alu.inst[cs->nrslots].inst3 = NOP_INST3; + cs->nrslots++; +} + +static void emit_tex(struct r300_fragment_program *fp, + struct prog_instruction *fpi, int opcode) +{ + COMPILE_STATE; + GLuint coord = t_src(fp, fpi->SrcReg[0]); + GLuint dest = undef, rdest = undef; + GLuint din, uin; + int unit = fpi->TexSrcUnit; + int hwsrc, hwdest; + GLuint tempreg = 0; + + uin = cs->used_in_node; + din = cs->dest_in_node; + + /* Resolve source/dest to hardware registers */ + if (opcode != R300_FPITX_OP_KIL) { + if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) { + /** + * Hardware uses [0..1]x[0..1] range for rectangle textures + * instead of [0..Width]x[0..Height]. + * Add a scaling instruction. + * + * \todo Refactor this once we have proper rewriting/optimization + * support for programs. + */ + gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, + 0 + }; + int factor_index; + GLuint factorreg; + + tokens[2] = unit; + factor_index = + _mesa_add_state_reference(fp->mesa_program.Base. + Parameters, tokens); + factorreg = + emit_const4fv(fp, + fp->mesa_program.Base.Parameters-> + ParameterValues[factor_index]); + tempreg = keep(get_temp_reg(fp)); + + emit_arith(fp, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, + coord, factorreg, pfs_zero, 0); + + /* Ensure correct node indirection */ + uin = cs->used_in_node; + din = cs->dest_in_node; + + hwsrc = t_hw_src(fp, tempreg, GL_TRUE); + } else { + hwsrc = t_hw_src(fp, coord, GL_TRUE); + } + + dest = t_dst(fp, fpi->DstReg); + + /* r300 doesn't seem to be able to do TEX->output reg */ + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { + rdest = dest; + dest = get_temp_reg_tex(fp); + } else if (fpi->DstReg.WriteMask != WRITEMASK_XYZW) { + /* in case write mask isn't XYZW */ + rdest = dest; + dest = get_temp_reg_tex(fp); + } + hwdest = + t_hw_dst(fp, dest, GL_TRUE, + fp->node[fp->cur_node].alu_offset); + + /* Use a temp that hasn't been used in this node, rather + * than causing an indirection + */ + if (uin & (1 << hwdest)) { + free_hw_temp(fp, hwdest); + hwdest = get_hw_temp_tex(fp); + cs->temps[REG_GET_INDEX(dest)].reg = hwdest; + } + } else { + hwdest = 0; + unit = 0; + hwsrc = t_hw_src(fp, coord, GL_TRUE); + } + + /* Indirection if source has been written in this node, or if the + * dest has been read/written in this node + */ + if ((REG_GET_TYPE(coord) != REG_TYPE_CONST && + (din & (1 << hwsrc))) || (uin & (1 << hwdest))) { + + /* Finish off current node */ + if (fp->node[fp->cur_node].alu_offset == cs->nrslots) + emit_nop(fp); + + fp->node[fp->cur_node].alu_end = + cs->nrslots - fp->node[fp->cur_node].alu_offset - 1; + assert(fp->node[fp->cur_node].alu_end >= 0); + + if (++fp->cur_node >= PFS_MAX_TEX_INDIRECT) { + ERROR("too many levels of texture indirection\n"); + return; + } + + /* Start new node */ + fp->node[fp->cur_node].tex_offset = fp->tex.length; + fp->node[fp->cur_node].alu_offset = cs->nrslots; + fp->node[fp->cur_node].tex_end = -1; + fp->node[fp->cur_node].alu_end = -1; + fp->node[fp->cur_node].flags = 0; + cs->used_in_node = 0; + cs->dest_in_node = 0; + } + + if (fp->cur_node == 0) + fp->first_node_has_tex = 1; + + fp->tex.inst[fp->tex.length++] = 0 | (hwsrc << R300_FPITX_SRC_SHIFT) + | (hwdest << R300_FPITX_DST_SHIFT) + | (unit << R300_FPITX_IMAGE_SHIFT) + /* not entirely sure about this */ + | (opcode << R300_FPITX_OPCODE_SHIFT); + + cs->dest_in_node |= (1 << hwdest); + if (REG_GET_TYPE(coord) != REG_TYPE_CONST) + cs->used_in_node |= (1 << hwsrc); + + fp->node[fp->cur_node].tex_end++; + + /* Copy from temp to output if needed */ + if (REG_GET_VALID(rdest)) { + emit_arith(fp, PFS_OP_MAD, rdest, fpi->DstReg.WriteMask, dest, + pfs_one, pfs_zero, 0); + free_temp(fp, dest); + } + + /* Free temp register */ + if (tempreg != 0) + free_temp(fp, tempreg); +} + +/** + * Returns the first slot where we could possibly allow writing to dest, + * according to register allocation. + */ +static int get_earliest_allowed_write(struct r300_fragment_program *fp, + GLuint dest, int mask) +{ + COMPILE_STATE; + int idx; + int pos; + GLuint index = REG_GET_INDEX(dest); + assert(REG_GET_VALID(dest)); + + switch (REG_GET_TYPE(dest)) { + case REG_TYPE_TEMP: + if (cs->temps[index].reg == -1) + return 0; + + idx = cs->temps[index].reg; + break; + case REG_TYPE_OUTPUT: + return 0; + default: + ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest)); + return 0; + } + + pos = cs->hwtemps[idx].reserved; + if (mask & WRITEMASK_XYZ) { + if (pos < cs->hwtemps[idx].vector_lastread) + pos = cs->hwtemps[idx].vector_lastread; + } + if (mask & WRITEMASK_W) { + if (pos < cs->hwtemps[idx].scalar_lastread) + pos = cs->hwtemps[idx].scalar_lastread; + } + + return pos; +} + +/** + * Allocates a slot for an ALU instruction that can consist of + * a vertex part or a scalar part or both. + * + * Sources from src (src[0] to src[argc-1]) are added to the slot in the + * appropriate position (vector and/or scalar), and their positions are + * recorded in the srcpos array. + * + * This function emits instruction code for the source fetch and the + * argument selection. It does not emit instruction code for the + * opcode or the destination selection. + * + * @return the index of the slot + */ +static int find_and_prepare_slot(struct r300_fragment_program *fp, + GLboolean emit_vop, + GLboolean emit_sop, + int argc, GLuint * src, GLuint dest, int mask) +{ + COMPILE_STATE; + int hwsrc[3]; + int srcpos[3]; + unsigned int used; + int tempused; + int tempvsrc[3]; + int tempssrc[3]; + int pos; + int regnr; + int i, j; + + // Determine instruction slots, whether sources are required on + // vector or scalar side, and the smallest slot number where + // all source registers are available + used = 0; + if (emit_vop) + used |= SLOT_OP_VECTOR; + if (emit_sop) + used |= SLOT_OP_SCALAR; + + pos = get_earliest_allowed_write(fp, dest, mask); + + if (fp->node[fp->cur_node].alu_offset > pos) + pos = fp->node[fp->cur_node].alu_offset; + for (i = 0; i < argc; ++i) { + if (!REG_GET_BUILTIN(src[i])) { + if (emit_vop) + used |= v_swiz[REG_GET_VSWZ(src[i])].flags << i; + if (emit_sop) + used |= s_swiz[REG_GET_SSWZ(src[i])].flags << i; + } + + hwsrc[i] = t_hw_src(fp, src[i], GL_FALSE); /* Note: sideeffects wrt refcounting! */ + regnr = hwsrc[i] & 31; + + if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) { + if (used & (SLOT_SRC_VECTOR << i)) { + if (cs->hwtemps[regnr].vector_valid > pos) + pos = cs->hwtemps[regnr].vector_valid; + } + if (used & (SLOT_SRC_SCALAR << i)) { + if (cs->hwtemps[regnr].scalar_valid > pos) + pos = cs->hwtemps[regnr].scalar_valid; + } + } + } + + // Find a slot that fits + for (;; ++pos) { + if (cs->slot[pos].used & used & SLOT_OP_BOTH) + continue; + + if (pos >= cs->nrslots) { + if (cs->nrslots >= PFS_MAX_ALU_INST) { + ERROR("Out of ALU instruction slots\n"); + return -1; + } + + fp->alu.inst[pos].inst0 = NOP_INST0; + fp->alu.inst[pos].inst1 = NOP_INST1; + fp->alu.inst[pos].inst2 = NOP_INST2; + fp->alu.inst[pos].inst3 = NOP_INST3; + + cs->nrslots++; + } + // Note: When we need both parts (vector and scalar) of a source, + // we always try to put them into the same position. This makes the + // code easier to read, and it is optimal (i.e. one doesn't gain + // anything by splitting the parts). + // It also avoids headaches with swizzles that access both parts (i.e WXY) + tempused = cs->slot[pos].used; + for (i = 0; i < 3; ++i) { + tempvsrc[i] = cs->slot[pos].vsrc[i]; + tempssrc[i] = cs->slot[pos].ssrc[i]; + } + + for (i = 0; i < argc; ++i) { + int flags = (used >> i) & SLOT_SRC_BOTH; + + if (!flags) { + srcpos[i] = 0; + continue; + } + + for (j = 0; j < 3; ++j) { + if ((tempused >> j) & flags & SLOT_SRC_VECTOR) { + if (tempvsrc[j] != hwsrc[i]) + continue; + } + + if ((tempused >> j) & flags & SLOT_SRC_SCALAR) { + if (tempssrc[j] != hwsrc[i]) + continue; + } + + break; + } + + if (j == 3) + break; + + srcpos[i] = j; + tempused |= flags << j; + if (flags & SLOT_SRC_VECTOR) + tempvsrc[j] = hwsrc[i]; + if (flags & SLOT_SRC_SCALAR) + tempssrc[j] = hwsrc[i]; + } + + if (i == argc) + break; + } + + // Found a slot, reserve it + cs->slot[pos].used = tempused | (used & SLOT_OP_BOTH); + for (i = 0; i < 3; ++i) { + cs->slot[pos].vsrc[i] = tempvsrc[i]; + cs->slot[pos].ssrc[i] = tempssrc[i]; + } + + for (i = 0; i < argc; ++i) { + if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) { + int regnr = hwsrc[i] & 31; + + if (used & (SLOT_SRC_VECTOR << i)) { + if (cs->hwtemps[regnr].vector_lastread < pos) + cs->hwtemps[regnr].vector_lastread = + pos; + } + if (used & (SLOT_SRC_SCALAR << i)) { + if (cs->hwtemps[regnr].scalar_lastread < pos) + cs->hwtemps[regnr].scalar_lastread = + pos; + } + } + } + + // Emit the source fetch code + fp->alu.inst[pos].inst1 &= ~R300_FPI1_SRC_MASK; + fp->alu.inst[pos].inst1 |= + ((cs->slot[pos].vsrc[0] << R300_FPI1_SRC0C_SHIFT) | + (cs->slot[pos].vsrc[1] << R300_FPI1_SRC1C_SHIFT) | + (cs->slot[pos].vsrc[2] << R300_FPI1_SRC2C_SHIFT)); + + fp->alu.inst[pos].inst3 &= ~R300_FPI3_SRC_MASK; + fp->alu.inst[pos].inst3 |= + ((cs->slot[pos].ssrc[0] << R300_FPI3_SRC0A_SHIFT) | + (cs->slot[pos].ssrc[1] << R300_FPI3_SRC1A_SHIFT) | + (cs->slot[pos].ssrc[2] << R300_FPI3_SRC2A_SHIFT)); + + // Emit the argument selection code + if (emit_vop) { + int swz[3]; + + for (i = 0; i < 3; ++i) { + if (i < argc) { + swz[i] = (v_swiz[REG_GET_VSWZ(src[i])].base + + (srcpos[i] * + v_swiz[REG_GET_VSWZ(src[i])]. + stride)) | ((src[i] & REG_NEGV_MASK) + ? ARG_NEG : 0) | ((src[i] + & + REG_ABS_MASK) + ? + ARG_ABS + : 0); + } else { + swz[i] = R300_FPI0_ARGC_ZERO; + } + } + + fp->alu.inst[pos].inst0 &= + ~(R300_FPI0_ARG0C_MASK | R300_FPI0_ARG1C_MASK | + R300_FPI0_ARG2C_MASK); + fp->alu.inst[pos].inst0 |= + (swz[0] << R300_FPI0_ARG0C_SHIFT) | (swz[1] << + R300_FPI0_ARG1C_SHIFT) + | (swz[2] << R300_FPI0_ARG2C_SHIFT); + } + + if (emit_sop) { + int swz[3]; + + for (i = 0; i < 3; ++i) { + if (i < argc) { + swz[i] = (s_swiz[REG_GET_SSWZ(src[i])].base + + (srcpos[i] * + s_swiz[REG_GET_SSWZ(src[i])]. + stride)) | ((src[i] & REG_NEGV_MASK) + ? ARG_NEG : 0) | ((src[i] + & + REG_ABS_MASK) + ? + ARG_ABS + : 0); + } else { + swz[i] = R300_FPI2_ARGA_ZERO; + } + } + + fp->alu.inst[pos].inst2 &= + ~(R300_FPI2_ARG0A_MASK | R300_FPI2_ARG1A_MASK | + R300_FPI2_ARG2A_MASK); + fp->alu.inst[pos].inst2 |= + (swz[0] << R300_FPI2_ARG0A_SHIFT) | (swz[1] << + R300_FPI2_ARG1A_SHIFT) + | (swz[2] << R300_FPI2_ARG2A_SHIFT); + } + + return pos; +} + +/** + * Append an ALU instruction to the instruction list. + */ +static void emit_arith(struct r300_fragment_program *fp, + int op, + GLuint dest, + int mask, + GLuint src0, GLuint src1, GLuint src2, int flags) +{ + COMPILE_STATE; + GLuint src[3] = { src0, src1, src2 }; + int hwdest; + GLboolean emit_vop, emit_sop; + int vop, sop, argc; + int pos; + + vop = r300_fpop[op].v_op; + sop = r300_fpop[op].s_op; + argc = r300_fpop[op].argc; + + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT && + REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) { + if (mask & WRITEMASK_Z) { + mask = WRITEMASK_W; + } else { + return; + } + } + + emit_vop = GL_FALSE; + emit_sop = GL_FALSE; + if ((mask & WRITEMASK_XYZ) || vop == R300_FPI0_OUTC_DP3) + emit_vop = GL_TRUE; + if ((mask & WRITEMASK_W) || vop == R300_FPI0_OUTC_REPL_ALPHA) + emit_sop = GL_TRUE; + + pos = + find_and_prepare_slot(fp, emit_vop, emit_sop, argc, src, dest, + mask); + if (pos < 0) + return; + + hwdest = t_hw_dst(fp, dest, GL_FALSE, pos); /* Note: Side effects wrt register allocation */ + + if (flags & PFS_FLAG_SAT) { + vop |= R300_FPI0_OUTC_SAT; + sop |= R300_FPI2_OUTA_SAT; + } + + /* Throw the pieces together and get FPI0/1 */ + if (emit_vop) { + fp->alu.inst[pos].inst0 |= vop; + + fp->alu.inst[pos].inst1 |= hwdest << R300_FPI1_DSTC_SHIFT; + + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { + if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { + fp->alu.inst[pos].inst1 |= + (mask & WRITEMASK_XYZ) << + R300_FPI1_DSTC_OUTPUT_MASK_SHIFT; + } else + assert(0); + } else { + fp->alu.inst[pos].inst1 |= + (mask & WRITEMASK_XYZ) << + R300_FPI1_DSTC_REG_MASK_SHIFT; + + cs->hwtemps[hwdest].vector_valid = pos + 1; + } + } + + /* And now FPI2/3 */ + if (emit_sop) { + fp->alu.inst[pos].inst2 |= sop; + + if (mask & WRITEMASK_W) { + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { + if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { + fp->alu.inst[pos].inst3 |= + (hwdest << R300_FPI3_DSTA_SHIFT) | + R300_FPI3_DSTA_OUTPUT; + } else if (REG_GET_INDEX(dest) == + FRAG_RESULT_DEPR) { + fp->alu.inst[pos].inst3 |= + R300_FPI3_DSTA_DEPTH; + } else + assert(0); + } else { + fp->alu.inst[pos].inst3 |= + (hwdest << R300_FPI3_DSTA_SHIFT) | + R300_FPI3_DSTA_REG; + + cs->hwtemps[hwdest].scalar_valid = pos + 1; + } + } + } + + return; +} + +#if 0 +static GLuint get_attrib(struct r300_fragment_program *fp, GLuint attr) +{ + struct gl_fragment_program *mp = &fp->mesa_program; + GLuint r = undef; + + if (!(mp->Base.InputsRead & (1 << attr))) { + ERROR("Attribute %d was not provided!\n", attr); + return undef; + } + + REG_SET_TYPE(r, REG_TYPE_INPUT); + REG_SET_INDEX(r, attr); + REG_SET_VALID(r, GL_TRUE); + return r; +} +#endif + +static GLfloat SinCosConsts[2][4] = { + { + 1.273239545, // 4/PI + -0.405284735, // -4/(PI*PI) + 3.141592654, // PI + 0.2225 // weight + }, + { + 0.75, + 0.0, + 0.159154943, // 1/(2*PI) + 6.283185307 // 2*PI + } +}; + +/** + * Emit a LIT instruction. + * \p flags may be PFS_FLAG_SAT + * + * Definition of LIT (from ARB_fragment_program): + * tmp = VectorLoad(op0); + * if (tmp.x < 0) tmp.x = 0; + * if (tmp.y < 0) tmp.y = 0; + * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); + * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; + * result.x = 1.0; + * result.y = tmp.x; + * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; + * result.w = 1.0; + * + * The longest path of computation is the one leading to result.z, + * consisting of 5 operations. This implementation of LIT takes + * 5 slots. So unless there's some special undocumented opcode, + * this implementation is potentially optimal. Unfortunately, + * emit_arith is a bit too conservative because it doesn't understand + * partial writes to the vector component. + */ +static const GLfloat LitConst[4] = + { 127.999999, 127.999999, 127.999999, -127.999999 }; + +static void emit_lit(struct r300_fragment_program *fp, + GLuint dest, int mask, GLuint src, int flags) +{ + COMPILE_STATE; + GLuint cnst; + int needTemporary; + GLuint temp; + + cnst = emit_const4fv(fp, LitConst); + + needTemporary = 0; + if ((mask & WRITEMASK_XYZW) != WRITEMASK_XYZW) { + needTemporary = 1; + } else if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { + // LIT is typically followed by DP3/DP4, so there's no point + // in creating special code for this case + needTemporary = 1; + } + + if (needTemporary) { + temp = keep(get_temp_reg(fp)); + } else { + temp = keep(dest); + } + + // Note: The order of emit_arith inside the slots is relevant, + // because emit_arith only looks at scalar vs. vector when resolving + // dependencies, and it does not consider individual vector components, + // so swizzling between the two parts can create fake dependencies. + + // First slot + emit_arith(fp, PFS_OP_MAX, temp, WRITEMASK_XY, + keep(src), pfs_zero, undef, 0); + emit_arith(fp, PFS_OP_MAX, temp, WRITEMASK_W, src, cnst, undef, 0); + + // Second slot + emit_arith(fp, PFS_OP_MIN, temp, WRITEMASK_Z, + swizzle(temp, W, W, W, W), cnst, undef, 0); + emit_arith(fp, PFS_OP_LG2, temp, WRITEMASK_W, + swizzle(temp, Y, Y, Y, Y), undef, undef, 0); + + // Third slot + // If desired, we saturate the y result here. + // This does not affect the use as a condition variable in the CMP later + emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_W, + temp, swizzle(temp, Z, Z, Z, Z), pfs_zero, 0); + emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_Y, + swizzle(temp, X, X, X, X), pfs_one, pfs_zero, flags); + + // Fourth slot + emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_X, + pfs_one, pfs_one, pfs_zero, 0); + emit_arith(fp, PFS_OP_EX2, temp, WRITEMASK_W, temp, undef, undef, 0); + + // Fifth slot + emit_arith(fp, PFS_OP_CMP, temp, WRITEMASK_Z, + pfs_zero, swizzle(temp, W, W, W, W), + negate(swizzle(temp, Y, Y, Y, Y)), flags); + emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_W, pfs_one, pfs_one, + pfs_zero, 0); + + if (needTemporary) { + emit_arith(fp, PFS_OP_MAD, dest, mask, + temp, pfs_one, pfs_zero, flags); + free_temp(fp, temp); + } else { + // Decrease refcount of the destination + t_hw_dst(fp, dest, GL_FALSE, cs->nrslots); + } +} + +static GLboolean parse_program(struct r300_fragment_program *fp) +{ + struct gl_fragment_program *mp = &fp->mesa_program; + const struct prog_instruction *inst = mp->Base.Instructions; + struct prog_instruction *fpi; + GLuint src[3], dest, temp[2]; + int flags, mask = 0; + int const_sin[2]; + + if (!inst || inst[0].Opcode == OPCODE_END) { + ERROR("empty program?\n"); + return GL_FALSE; + } + + for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { + if (fpi->SaturateMode == SATURATE_ZERO_ONE) + flags = PFS_FLAG_SAT; + else + flags = 0; + + if (fpi->Opcode != OPCODE_KIL) { + dest = t_dst(fp, fpi->DstReg); + mask = fpi->DstReg.WriteMask; + } + + switch (fpi->Opcode) { + case OPCODE_ABS: + src[0] = t_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_MAD, dest, mask, + absolute(src[0]), pfs_one, pfs_zero, flags); + break; + case OPCODE_ADD: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], pfs_one, src[1], flags); + break; + case OPCODE_CMP: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + src[2] = t_src(fp, fpi->SrcReg[2]); + /* ARB_f_p - if src0.c < 0.0 ? src1.c : src2.c + * r300 - if src2.c < 0.0 ? src1.c : src0.c + */ + emit_arith(fp, PFS_OP_CMP, dest, mask, + src[2], src[1], src[0], flags); + break; + case OPCODE_COS: + /* + * cos using a parabola (see SIN): + * cos(x): + * x = (x/(2*PI))+0.75 + * x = frac(x) + * x = (x*2*PI)-PI + * result = sin(x) + */ + temp[0] = get_temp_reg(fp); + const_sin[0] = emit_const4fv(fp, SinCosConsts[0]); + const_sin[1] = emit_const4fv(fp, SinCosConsts[1]); + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + + /* add 0.5*PI and do range reduction */ + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, + swizzle(src[0], X, X, X, X), + swizzle(const_sin[1], Z, Z, Z, Z), + swizzle(const_sin[1], X, X, X, X), 0); + + emit_arith(fp, PFS_OP_FRC, temp[0], WRITEMASK_X, + swizzle(temp[0], X, X, X, X), + undef, undef, 0); + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI + negate(swizzle(const_sin[0], Z, Z, Z, Z)), //-PI + 0); + + /* SIN */ + + emit_arith(fp, PFS_OP_MAD, temp[0], + WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], + Z, Z, Z, + Z), + const_sin[0], pfs_zero, 0); + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, + swizzle(temp[0], Y, Y, Y, Y), + absolute(swizzle(temp[0], Z, Z, Z, Z)), + swizzle(temp[0], X, X, X, X), 0); + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Y, + swizzle(temp[0], X, X, X, X), + absolute(swizzle(temp[0], X, X, X, X)), + negate(swizzle(temp[0], X, X, X, X)), 0); + + emit_arith(fp, PFS_OP_MAD, dest, mask, + swizzle(temp[0], Y, Y, Y, Y), + swizzle(const_sin[0], W, W, W, W), + swizzle(temp[0], X, X, X, X), flags); + + free_temp(fp, temp[0]); + break; + case OPCODE_DP3: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_DP3, dest, mask, + src[0], src[1], undef, flags); + break; + case OPCODE_DP4: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_DP4, dest, mask, + src[0], src[1], undef, flags); + break; + case OPCODE_DPH: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + /* src0.xyz1 -> temp + * DP4 dest, temp, src1 + */ +#if 0 + temp[0] = get_temp_reg(fp); + src[0].s_swz = SWIZZLE_ONE; + emit_arith(fp, PFS_OP_MAD, temp[0], mask, + src[0], pfs_one, pfs_zero, 0); + emit_arith(fp, PFS_OP_DP4, dest, mask, + temp[0], src[1], undef, flags); + free_temp(fp, temp[0]); +#else + emit_arith(fp, PFS_OP_DP4, dest, mask, + swizzle(src[0], X, Y, Z, ONE), src[1], + undef, flags); +#endif + break; + case OPCODE_DST: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + /* dest.y = src0.y * src1.y */ + if (mask & WRITEMASK_Y) + emit_arith(fp, PFS_OP_MAD, dest, WRITEMASK_Y, + keep(src[0]), keep(src[1]), + pfs_zero, flags); + /* dest.z = src0.z */ + if (mask & WRITEMASK_Z) + emit_arith(fp, PFS_OP_MAD, dest, WRITEMASK_Z, + src[0], pfs_one, pfs_zero, flags); + /* result.x = 1.0 + * result.w = src1.w */ + if (mask & WRITEMASK_XW) { + REG_SET_VSWZ(src[1], SWIZZLE_111); /*Cheat */ + emit_arith(fp, PFS_OP_MAD, dest, + mask & WRITEMASK_XW, + src[1], pfs_one, pfs_zero, flags); + } + break; + case OPCODE_EX2: + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_EX2, dest, mask, + src[0], undef, undef, flags); + break; + case OPCODE_FLR: + src[0] = t_src(fp, fpi->SrcReg[0]); + temp[0] = get_temp_reg(fp); + /* FRC temp, src0 + * MAD dest, src0, 1.0, -temp + */ + emit_arith(fp, PFS_OP_FRC, temp[0], mask, + keep(src[0]), undef, undef, 0); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], pfs_one, negate(temp[0]), flags); + free_temp(fp, temp[0]); + break; + case OPCODE_FRC: + src[0] = t_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_FRC, dest, mask, + src[0], undef, undef, flags); + break; + case OPCODE_KIL: + emit_tex(fp, fpi, R300_FPITX_OP_KIL); + break; + case OPCODE_LG2: + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_LG2, dest, mask, + src[0], undef, undef, flags); + break; + case OPCODE_LIT: + src[0] = t_src(fp, fpi->SrcReg[0]); + emit_lit(fp, dest, mask, src[0], flags); + break; + case OPCODE_LRP: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + src[2] = t_src(fp, fpi->SrcReg[2]); + /* result = tmp0tmp1 + (1 - tmp0)tmp2 + * = tmp0tmp1 + tmp2 + (-tmp0)tmp2 + * MAD temp, -tmp0, tmp2, tmp2 + * MAD result, tmp0, tmp1, temp + */ + temp[0] = get_temp_reg(fp); + emit_arith(fp, PFS_OP_MAD, temp[0], mask, + negate(keep(src[0])), keep(src[2]), src[2], + 0); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], src[1], temp[0], flags); + free_temp(fp, temp[0]); + break; + case OPCODE_MAD: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + src[2] = t_src(fp, fpi->SrcReg[2]); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], src[1], src[2], flags); + break; + case OPCODE_MAX: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_MAX, dest, mask, + src[0], src[1], undef, flags); + break; + case OPCODE_MIN: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_MIN, dest, mask, + src[0], src[1], undef, flags); + break; + case OPCODE_MOV: + case OPCODE_SWZ: + src[0] = t_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], pfs_one, pfs_zero, flags); + break; + case OPCODE_MUL: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], src[1], pfs_zero, flags); + break; + case OPCODE_POW: + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + src[1] = t_scalar_src(fp, fpi->SrcReg[1]); + temp[0] = get_temp_reg(fp); + emit_arith(fp, PFS_OP_LG2, temp[0], WRITEMASK_W, + src[0], undef, undef, 0); + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_W, + temp[0], src[1], pfs_zero, 0); + emit_arith(fp, PFS_OP_EX2, dest, fpi->DstReg.WriteMask, + temp[0], undef, undef, 0); + free_temp(fp, temp[0]); + break; + case OPCODE_RCP: + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_RCP, dest, mask, + src[0], undef, undef, flags); + break; + case OPCODE_RSQ: + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + emit_arith(fp, PFS_OP_RSQ, dest, mask, + absolute(src[0]), pfs_zero, pfs_zero, flags); + break; + case OPCODE_SCS: + /* + * scs using a parabola : + * scs(x): + * result.x = sin(-abs(x)+0.5*PI) (cos) + * result.y = sin(x) (sin) + * + */ + temp[0] = get_temp_reg(fp); + temp[1] = get_temp_reg(fp); + const_sin[0] = emit_const4fv(fp, SinCosConsts[0]); + const_sin[1] = emit_const4fv(fp, SinCosConsts[1]); + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + + /* x = -abs(x)+0.5*PI */ + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(const_sin[0], Z, Z, Z, Z), //PI + pfs_half, + negate(abs + (swizzle(keep(src[0]), X, X, X, X))), + 0); + + /* C*x (sin) */ + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_W, + swizzle(const_sin[0], Y, Y, Y, Y), + swizzle(keep(src[0]), X, X, X, X), + pfs_zero, 0); + + /* B*x, C*x (cos) */ + emit_arith(fp, PFS_OP_MAD, temp[0], + WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], + Z, Z, Z, + Z), + const_sin[0], pfs_zero, 0); + + /* B*x (sin) */ + emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_W, + swizzle(const_sin[0], X, X, X, X), + keep(src[0]), pfs_zero, 0); + + /* y = B*x + C*x*abs(x) (sin) */ + emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_Z, + absolute(src[0]), + swizzle(temp[0], W, W, W, W), + swizzle(temp[1], W, W, W, W), 0); + + /* y = B*x + C*x*abs(x) (cos) */ + emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_W, + swizzle(temp[0], Y, Y, Y, Y), + absolute(swizzle(temp[0], Z, Z, Z, Z)), + swizzle(temp[0], X, X, X, X), 0); + + /* y*abs(y) - y (cos), y*abs(y) - y (sin) */ + emit_arith(fp, PFS_OP_MAD, temp[0], + WRITEMASK_X | WRITEMASK_Y, swizzle(temp[1], + W, Z, Y, + X), + absolute(swizzle(temp[1], W, Z, Y, X)), + negate(swizzle(temp[1], W, Z, Y, X)), 0); + + /* dest.xy = mad(temp.xy, P, temp2.wz) */ + emit_arith(fp, PFS_OP_MAD, dest, + mask & (WRITEMASK_X | WRITEMASK_Y), temp[0], + swizzle(const_sin[0], W, W, W, W), + swizzle(temp[1], W, Z, Y, X), flags); + + free_temp(fp, temp[0]); + free_temp(fp, temp[1]); + break; + case OPCODE_SGE: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + temp[0] = get_temp_reg(fp); + /* temp = src0 - src1 + * dest.c = (temp.c < 0.0) ? 0 : 1 + */ + emit_arith(fp, PFS_OP_MAD, temp[0], mask, + src[0], pfs_one, negate(src[1]), 0); + emit_arith(fp, PFS_OP_CMP, dest, mask, + pfs_one, pfs_zero, temp[0], 0); + free_temp(fp, temp[0]); + break; + case OPCODE_SIN: + /* + * using a parabola: + * sin(x) = 4/pi * x + -4/(pi*pi) * x * abs(x) + * extra precision is obtained by weighting against + * itself squared. + */ + + temp[0] = get_temp_reg(fp); + const_sin[0] = emit_const4fv(fp, SinCosConsts[0]); + const_sin[1] = emit_const4fv(fp, SinCosConsts[1]); + src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + + /* do range reduction */ + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, + swizzle(keep(src[0]), X, X, X, X), + swizzle(const_sin[1], Z, Z, Z, Z), + pfs_half, 0); + + emit_arith(fp, PFS_OP_FRC, temp[0], WRITEMASK_X, + swizzle(temp[0], X, X, X, X), + undef, undef, 0); + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI + negate(swizzle(const_sin[0], Z, Z, Z, Z)), //PI + 0); + + /* SIN */ + + emit_arith(fp, PFS_OP_MAD, temp[0], + WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], + Z, Z, Z, + Z), + const_sin[0], pfs_zero, 0); + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, + swizzle(temp[0], Y, Y, Y, Y), + absolute(swizzle(temp[0], Z, Z, Z, Z)), + swizzle(temp[0], X, X, X, X), 0); + + emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Y, + swizzle(temp[0], X, X, X, X), + absolute(swizzle(temp[0], X, X, X, X)), + negate(swizzle(temp[0], X, X, X, X)), 0); + + emit_arith(fp, PFS_OP_MAD, dest, mask, + swizzle(temp[0], Y, Y, Y, Y), + swizzle(const_sin[0], W, W, W, W), + swizzle(temp[0], X, X, X, X), flags); + + free_temp(fp, temp[0]); + break; + case OPCODE_SLT: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + temp[0] = get_temp_reg(fp); + /* temp = src0 - src1 + * dest.c = (temp.c < 0.0) ? 1 : 0 + */ + emit_arith(fp, PFS_OP_MAD, temp[0], mask, + src[0], pfs_one, negate(src[1]), 0); + emit_arith(fp, PFS_OP_CMP, dest, mask, + pfs_zero, pfs_one, temp[0], 0); + free_temp(fp, temp[0]); + break; + case OPCODE_SUB: + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0], pfs_one, negate(src[1]), flags); + break; + case OPCODE_TEX: + emit_tex(fp, fpi, R300_FPITX_OP_TEX); + break; + case OPCODE_TXB: + emit_tex(fp, fpi, R300_FPITX_OP_TXB); + break; + case OPCODE_TXP: + emit_tex(fp, fpi, R300_FPITX_OP_TXP); + break; + case OPCODE_XPD:{ + src[0] = t_src(fp, fpi->SrcReg[0]); + src[1] = t_src(fp, fpi->SrcReg[1]); + temp[0] = get_temp_reg(fp); + /* temp = src0.zxy * src1.yzx */ + emit_arith(fp, PFS_OP_MAD, temp[0], + WRITEMASK_XYZ, swizzle(keep(src[0]), + Z, X, Y, W), + swizzle(keep(src[1]), Y, Z, X, W), + pfs_zero, 0); + /* dest.xyz = src0.yzx * src1.zxy - temp + * dest.w = undefined + * */ + emit_arith(fp, PFS_OP_MAD, dest, + mask & WRITEMASK_XYZ, swizzle(src[0], + Y, Z, + X, W), + swizzle(src[1], Z, X, Y, W), + negate(temp[0]), flags); + /* cleanup */ + free_temp(fp, temp[0]); + break; + } + default: + ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); + break; + } + + if (fp->error) + return GL_FALSE; + + } + + return GL_TRUE; +} + +static void insert_wpos(struct gl_program *prog) +{ + static gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 + }; + struct prog_instruction *fpi; + GLuint window_index; + int i = 0; + GLuint tempregi = prog->NumTemporaries; + /* should do something else if no temps left... */ + prog->NumTemporaries++; + + fpi = _mesa_alloc_instructions(prog->NumInstructions + 3); + _mesa_init_instructions(fpi, prog->NumInstructions + 3); + + /* perspective divide */ + fpi[i].Opcode = OPCODE_RCP; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_W; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; + i++; + + fpi[i].Opcode = OPCODE_MUL; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + + fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[1].Index = tempregi; + fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; + i++; + + /* viewport transformation */ + window_index = _mesa_add_state_reference(prog->Parameters, tokens); + + fpi[i].Opcode = OPCODE_MAD; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[0].Index = tempregi; + fpi[i].SrcReg[0].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[1].Index = window_index; + fpi[i].SrcReg[1].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[2].Index = window_index; + fpi[i].SrcReg[2].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + i++; + + _mesa_copy_instructions(&fpi[i], prog->Instructions, + prog->NumInstructions); + + free(prog->Instructions); + + prog->Instructions = fpi; + + prog->NumInstructions += i; + fpi = &prog->Instructions[prog->NumInstructions - 1]; + + assert(fpi->Opcode == OPCODE_END); + + for (fpi = &prog->Instructions[3]; fpi->Opcode != OPCODE_END; fpi++) { + for (i = 0; i < 3; i++) + if (fpi->SrcReg[i].File == PROGRAM_INPUT && + fpi->SrcReg[i].Index == FRAG_ATTRIB_WPOS) { + fpi->SrcReg[i].File = PROGRAM_TEMPORARY; + fpi->SrcReg[i].Index = tempregi; + } + } +} + +/* - Init structures + * - Determine what hwregs each input corresponds to + */ +static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) +{ + struct r300_pfs_compile_state *cs = NULL; + struct gl_fragment_program *mp = &fp->mesa_program; + struct prog_instruction *fpi; + GLuint InputsRead = mp->Base.InputsRead; + GLuint temps_used = 0; /* for fp->temps[] */ + int i, j; + + /* New compile, reset tracking data */ + fp->optimization = + driQueryOptioni(&r300->radeon.optionCache, "fp_optimization"); + fp->translated = GL_FALSE; + fp->error = GL_FALSE; + fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile); + fp->tex.length = 0; + fp->cur_node = 0; + fp->first_node_has_tex = 0; + fp->const_nr = 0; + fp->max_temp_idx = 0; + fp->node[0].alu_end = -1; + fp->node[0].tex_end = -1; + + _mesa_memset(cs, 0, sizeof(*fp->cs)); + for (i = 0; i < PFS_MAX_ALU_INST; i++) { + for (j = 0; j < 3; j++) { + cs->slot[i].vsrc[j] = SRC_CONST; + cs->slot[i].ssrc[j] = SRC_CONST; + } + } + + /* Work out what temps the Mesa inputs correspond to, this must match + * what setup_rs_unit does, which shouldn't be a problem as rs_unit + * configures itself based on the fragprog's InputsRead + * + * NOTE: this depends on get_hw_temp() allocating registers in order, + * starting from register 0. + */ + + /* Texcoords come first */ + for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) { + cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0; + cs->inputs[FRAG_ATTRIB_TEX0 + i].reg = + get_hw_temp(fp, 0); + } + } + InputsRead &= ~FRAG_BITS_TEX_ANY; + + /* fragment position treated as a texcoord */ + if (InputsRead & FRAG_BIT_WPOS) { + cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0; + cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(fp, 0); + insert_wpos(&mp->Base); + } + InputsRead &= ~FRAG_BIT_WPOS; + + /* Then primary colour */ + if (InputsRead & FRAG_BIT_COL0) { + cs->inputs[FRAG_ATTRIB_COL0].refcount = 0; + cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(fp, 0); + } + InputsRead &= ~FRAG_BIT_COL0; + + /* Secondary color */ + if (InputsRead & FRAG_BIT_COL1) { + cs->inputs[FRAG_ATTRIB_COL1].refcount = 0; + cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(fp, 0); + } + InputsRead &= ~FRAG_BIT_COL1; + + /* Anything else */ + if (InputsRead) { + WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead); + /* force read from hwreg 0 for now */ + for (i = 0; i < 32; i++) + if (InputsRead & (1 << i)) + cs->inputs[i].reg = 0; + } + + /* Pre-parse the mesa program, grabbing refcounts on input/temp regs. + * That way, we can free up the reg when it's no longer needed + */ + if (!mp->Base.Instructions) { + ERROR("No instructions found in program\n"); + return; + } + + for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { + int idx; + + for (i = 0; i < 3; i++) { + idx = fpi->SrcReg[i].Index; + switch (fpi->SrcReg[i].File) { + case PROGRAM_TEMPORARY: + if (!(temps_used & (1 << idx))) { + cs->temps[idx].reg = -1; + cs->temps[idx].refcount = 1; + temps_used |= (1 << idx); + } else + cs->temps[idx].refcount++; + break; + case PROGRAM_INPUT: + cs->inputs[idx].refcount++; + break; + default: + break; + } + } + + idx = fpi->DstReg.Index; + if (fpi->DstReg.File == PROGRAM_TEMPORARY) { + if (!(temps_used & (1 << idx))) { + cs->temps[idx].reg = -1; + cs->temps[idx].refcount = 1; + temps_used |= (1 << idx); + } else + cs->temps[idx].refcount++; + } + } + cs->temp_in_use = temps_used; +} + +static void update_params(struct r300_fragment_program *fp) +{ + struct gl_fragment_program *mp = &fp->mesa_program; + + /* Ask Mesa nicely to fill in ParameterValues for us */ + if (mp->Base.Parameters) + _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters); +} + +void r500TranslateFragmentShader(r300ContextPtr r300, + struct r300_fragment_program *fp) +{ + struct r300_pfs_compile_state *cs = NULL; + + if (!fp->translated) { + + init_program(r300, fp); + cs = fp->cs; + + if (parse_program(fp) == GL_FALSE) { + dump_program(fp); + return; + } + + /* Finish off */ + fp->node[fp->cur_node].alu_end = + cs->nrslots - fp->node[fp->cur_node].alu_offset - 1; + if (fp->node[fp->cur_node].tex_end < 0) + fp->node[fp->cur_node].tex_end = 0; + fp->alu_offset = 0; + fp->alu_end = cs->nrslots - 1; + fp->tex_offset = 0; + fp->tex_end = fp->tex.length ? fp->tex.length - 1 : 0; + assert(fp->node[fp->cur_node].alu_end >= 0); + assert(fp->alu_end >= 0); + + fp->translated = GL_TRUE; + if (RADEON_DEBUG & DEBUG_PIXEL) + dump_program(fp); + r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM); + } + + update_params(fp); +} + +/* just some random things... */ +static void dump_program(struct r300_fragment_program *fp) +{ + int n, i, j; + static int pc = 0; + + fprintf(stderr, "pc=%d*************************************\n", pc++); + + fprintf(stderr, "Mesa program:\n"); + fprintf(stderr, "-------------\n"); + _mesa_print_program(&fp->mesa_program.Base); + fflush(stdout); + + fprintf(stderr, "Hardware program\n"); + fprintf(stderr, "----------------\n"); + + for (n = 0; n < (fp->cur_node + 1); n++) { + fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " + "alu_end: %d, tex_end: %d\n", n, + fp->node[n].alu_offset, + fp->node[n].tex_offset, + fp->node[n].alu_end, fp->node[n].tex_end); + + if (fp->tex.length) { + fprintf(stderr, " TEX:\n"); + for (i = fp->node[n].tex_offset; + i <= fp->node[n].tex_offset + fp->node[n].tex_end; + ++i) { + const char *instr; + + switch ((fp->tex. + inst[i] >> R300_FPITX_OPCODE_SHIFT) & + 15) { + case R300_FPITX_OP_TEX: + instr = "TEX"; + break; + case R300_FPITX_OP_KIL: + instr = "KIL"; + break; + case R300_FPITX_OP_TXP: + instr = "TXP"; + break; + case R300_FPITX_OP_TXB: + instr = "TXB"; + break; + default: + instr = "UNKNOWN"; + } + + fprintf(stderr, + " %s t%i, %c%i, texture[%i] (%08x)\n", + instr, + (fp->tex. + inst[i] >> R300_FPITX_DST_SHIFT) & 31, + (fp->tex. + inst[i] & R300_FPITX_SRC_CONST) ? 'c' : + 't', + (fp->tex. + inst[i] >> R300_FPITX_SRC_SHIFT) & 31, + (fp->tex. + inst[i] & R300_FPITX_IMAGE_MASK) >> + R300_FPITX_IMAGE_SHIFT, + fp->tex.inst[i]); + } + } + + for (i = fp->node[n].alu_offset; + i <= fp->node[n].alu_offset + fp->node[n].alu_end; ++i) { + char srcc[3][10], dstc[20]; + char srca[3][10], dsta[20]; + char argc[3][20]; + char arga[3][20]; + char flags[5], tmp[10]; + + for (j = 0; j < 3; ++j) { + int regc = fp->alu.inst[i].inst1 >> (j * 6); + int rega = fp->alu.inst[i].inst3 >> (j * 6); + + sprintf(srcc[j], "%c%i", + (regc & 32) ? 'c' : 't', regc & 31); + sprintf(srca[j], "%c%i", + (rega & 32) ? 'c' : 't', rega & 31); + } + + dstc[0] = 0; + sprintf(flags, "%s%s%s", + (fp->alu.inst[i]. + inst1 & R300_FPI1_DSTC_REG_X) ? "x" : "", + (fp->alu.inst[i]. + inst1 & R300_FPI1_DSTC_REG_Y) ? "y" : "", + (fp->alu.inst[i]. + inst1 & R300_FPI1_DSTC_REG_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(dstc, "t%i.%s ", + (fp->alu.inst[i]. + inst1 >> R300_FPI1_DSTC_SHIFT) & 31, + flags); + } + sprintf(flags, "%s%s%s", + (fp->alu.inst[i]. + inst1 & R300_FPI1_DSTC_OUTPUT_X) ? "x" : "", + (fp->alu.inst[i]. + inst1 & R300_FPI1_DSTC_OUTPUT_Y) ? "y" : "", + (fp->alu.inst[i]. + inst1 & R300_FPI1_DSTC_OUTPUT_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(tmp, "o%i.%s", + (fp->alu.inst[i]. + inst1 >> R300_FPI1_DSTC_SHIFT) & 31, + flags); + strcat(dstc, tmp); + } + + dsta[0] = 0; + if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_REG) { + sprintf(dsta, "t%i.w ", + (fp->alu.inst[i]. + inst3 >> R300_FPI3_DSTA_SHIFT) & 31); + } + if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_OUTPUT) { + sprintf(tmp, "o%i.w ", + (fp->alu.inst[i]. + inst3 >> R300_FPI3_DSTA_SHIFT) & 31); + strcat(dsta, tmp); + } + if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_DEPTH) { + strcat(dsta, "Z"); + } + + fprintf(stderr, + "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" + " w: %3s %3s %3s -> %-20s (%08x)\n", i, + srcc[0], srcc[1], srcc[2], dstc, + fp->alu.inst[i].inst1, srca[0], srca[1], + srca[2], dsta, fp->alu.inst[i].inst3); + + for (j = 0; j < 3; ++j) { + int regc = fp->alu.inst[i].inst0 >> (j * 7); + int rega = fp->alu.inst[i].inst2 >> (j * 7); + int d; + char buf[20]; + + d = regc & 31; + if (d < 12) { + switch (d % 4) { + case R300_FPI0_ARGC_SRC0C_XYZ: + sprintf(buf, "%s.xyz", + srcc[d / 4]); + break; + case R300_FPI0_ARGC_SRC0C_XXX: + sprintf(buf, "%s.xxx", + srcc[d / 4]); + break; + case R300_FPI0_ARGC_SRC0C_YYY: + sprintf(buf, "%s.yyy", + srcc[d / 4]); + break; + case R300_FPI0_ARGC_SRC0C_ZZZ: + sprintf(buf, "%s.zzz", + srcc[d / 4]); + break; + } + } else if (d < 15) { + sprintf(buf, "%s.www", srca[d - 12]); + } else if (d == 20) { + sprintf(buf, "0.0"); + } else if (d == 21) { + sprintf(buf, "1.0"); + } else if (d == 22) { + sprintf(buf, "0.5"); + } else if (d >= 23 && d < 32) { + d -= 23; + switch (d / 3) { + case 0: + sprintf(buf, "%s.yzx", + srcc[d % 3]); + break; + case 1: + sprintf(buf, "%s.zxy", + srcc[d % 3]); + break; + case 2: + sprintf(buf, "%s.Wzy", + srcc[d % 3]); + break; + } + } else { + sprintf(buf, "%i", d); + } + + sprintf(argc[j], "%s%s%s%s", + (regc & 32) ? "-" : "", + (regc & 64) ? "|" : "", + buf, (regc & 64) ? "|" : ""); + + d = rega & 31; + if (d < 9) { + sprintf(buf, "%s.%c", srcc[d / 3], + 'x' + (char)(d % 3)); + } else if (d < 12) { + sprintf(buf, "%s.w", srca[d - 9]); + } else if (d == 16) { + sprintf(buf, "0.0"); + } else if (d == 17) { + sprintf(buf, "1.0"); + } else if (d == 18) { + sprintf(buf, "0.5"); + } else { + sprintf(buf, "%i", d); + } + + sprintf(arga[j], "%s%s%s%s", + (rega & 32) ? "-" : "", + (rega & 64) ? "|" : "", + buf, (rega & 64) ? "|" : ""); + } + + fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" + " w: %8s %8s %8s op: %08x\n", + argc[0], argc[1], argc[2], + fp->alu.inst[i].inst0, arga[0], arga[1], + arga[2], fp->alu.inst[i].inst2); + } + } +} diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/r500_fragprog.h new file mode 100644 index 0000000000..72fca77845 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r500_fragprog.h @@ -0,0 +1,104 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs + * Jerome Glisse + */ +#ifndef __R300_FRAGPROG_H_ +#define __R300_FRAGPROG_H_ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#include "shader/program.h" +#include "shader/prog_instruction.h" + +#include "r300_context.h" + +typedef struct r300_fragment_program_swizzle { + GLuint length; + GLuint src[4]; + GLuint inst[8]; +} r300_fragment_program_swizzle_t; + +/* supported hw opcodes */ +#define PFS_OP_MAD 0 +#define PFS_OP_DP3 1 +#define PFS_OP_DP4 2 +#define PFS_OP_MIN 3 +#define PFS_OP_MAX 4 +#define PFS_OP_CMP 5 +#define PFS_OP_FRC 6 +#define PFS_OP_EX2 7 +#define PFS_OP_LG2 8 +#define PFS_OP_RCP 9 +#define PFS_OP_RSQ 10 +#define PFS_OP_REPL_ALPHA 11 +#define PFS_OP_CMPH 12 +#define MAX_PFS_OP 12 + +#define PFS_FLAG_SAT (1 << 0) +#define PFS_FLAG_ABS (1 << 1) + +#define ARG_NEG (1 << 5) +#define ARG_ABS (1 << 6) +#define ARG_MASK (127 << 0) +#define ARG_STRIDE 7 +#define SRC_CONST (1 << 5) +#define SRC_MASK (63 << 0) +#define SRC_STRIDE 6 + +#define NOP_INST0 ( \ + (R300_FPI0_OUTC_MAD) | \ + (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG0C_SHIFT) | \ + (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG1C_SHIFT) | \ + (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG2C_SHIFT)) +#define NOP_INST1 ( \ + ((0 | SRC_CONST) << R300_FPI1_SRC0C_SHIFT) | \ + ((0 | SRC_CONST) << R300_FPI1_SRC1C_SHIFT) | \ + ((0 | SRC_CONST) << R300_FPI1_SRC2C_SHIFT)) +#define NOP_INST2 ( \ + (R300_FPI2_OUTA_MAD) | \ + (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG0A_SHIFT) | \ + (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG1A_SHIFT) | \ + (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG2A_SHIFT)) +#define NOP_INST3 ( \ + ((0 | SRC_CONST) << R300_FPI3_SRC0A_SHIFT) | \ + ((0 | SRC_CONST) << R300_FPI3_SRC1A_SHIFT) | \ + ((0 | SRC_CONST) << R300_FPI3_SRC2A_SHIFT)) + +#define DRI_CONF_FP_OPTIMIZATION_SPEED 0 +#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1 + +struct r300_fragment_program; + +extern void r300TranslateFragmentShader(r300ContextPtr r300, + struct r300_fragment_program *fp); + +#endif -- cgit v1.2.3 From 5a143e91dcaf64d77694b85671c214f93e3e8512 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 17 Mar 2008 21:09:49 +1000 Subject: some basic r500 portage --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 62 +++- src/mesa/drivers/dri/r300/r300_context.h | 1 + src/mesa/drivers/dri/r300/r300_emit.h | 12 + src/mesa/drivers/dri/r300/r300_reg.h | 477 ++++++++++++++++++++++++++- src/mesa/drivers/dri/r300/r300_state.c | 208 +++++++++++- src/mesa/drivers/dri/radeon/radeon_chipset.h | 2 + 6 files changed, 742 insertions(+), 20 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 3497738eac..883b41a349 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -281,10 +281,14 @@ void r300InitCmdBuf(r300ContextPtr r300) { int size, mtu; int has_tcl = 1; + int is_r500 = 0; if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) has_tcl = 0; + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + is_r500 = 1; + r300->hw.max_state_size = 2 + 2; /* reserve extra space for WAIT_IDLE and tex cache flush */ mtu = r300->radeon.glCtx->Const.MaxTextureUnits; @@ -374,10 +378,17 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.su_depth_scale.cmd[0] = cmdpacket0(R300_SU_DEPTH_SCALE, 2); ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0); r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(R300_RS_COUNT, 2); - ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0); - r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R300_RS_IP_0, 8); - ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); - r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, 1); + if (is_r500) { + ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0); + r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R500_RS_IP_0, 16); + ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, 1); + } else { + ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0); + r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R300_RS_IP_0, 8); + ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_ROUTE_0, 1); + } ALLOC_STATE(sc_hyperz, always, 3, 0); r300->hw.sc_hyperz.cmd[0] = cmdpacket0(R300_SC_HYPERZ, 2); ALLOC_STATE(sc_screendoor, always, 2, 0); @@ -389,14 +400,25 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_PFS_TEXI_0, 0); ALLOC_STATE(us_out_fmt, always, 6, 0); r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R500_US_OUT_FMT, 5); - ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0); - r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR0_0, 1); - ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1); - r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR1_0, 1); - ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2); - r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR2_0, 1); - ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3); - r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR3_0, 1); + + if (is_r500) { + ALLOC_STATE(r500fp, variable, R300_FPI_CMDSIZE, 0); + r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 1); + } + + if (0/*is_r500*/) { + + }/* else*/ + { + ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0); + r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR0_0, 1); + ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1); + r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR1_0, 1); + ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2); + r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR2_0, 1); + ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3); + r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR3_0, 1); + } ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0); r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(FG_FOG_BLEND, 1); ALLOC_STATE(fogc, always, R300_FOGC_CMDSIZE, 0); @@ -456,10 +478,18 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.vps.cmd[R300_VPS_CMD_0] = cmdvpu(R300_PVS_UPLOAD_POINTSIZE, 1); - for (i = 0; i < 6; i++) { - ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); - r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] = - cmdvpu(R300_PVS_UPLOAD_CLIP_PLANE0+i, 1); + if (is_r500) { + for (i = 0; i < 6; i++) { + ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); + r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] = + cmdvpu(R500_PVS_UPLOAD_CLIP_PLANE0+i, 1); + } + } else { + for (i = 0; i < 6; i++) { + ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); + r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] = + cmdvpu(R300_PVS_UPLOAD_CLIP_PLANE0+i, 1); + } } } diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 780d9aa5d2..ab45bf80b4 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -487,6 +487,7 @@ struct r300_hw_state { struct r300_state_atom fp; /* fragment program cntl + nodes (4600) */ struct r300_state_atom fpt; /* texi - (4620) */ struct r300_state_atom us_out_fmt; /* (46A4) */ + struct r300_state_atom r500fp; /* r500 fp instructions */ struct r300_state_atom fpi[4]; /* fp instructions (46C0/47C0/48C0/49C0) */ struct r300_state_atom fogs; /* fog state (4BC0) */ struct r300_state_atom fogc; /* fog color (4BC8) */ diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h index a6d69ec5ff..a4f6ab997e 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.h +++ b/src/mesa/drivers/dri/r300/r300_emit.h @@ -74,6 +74,18 @@ static inline uint32_t cmdvpu(int addr, int count) return cmd.u; } +static inline uint32_t cmdr500fp(int addr, int count) +{ + drm_r300_cmd_header_t cmd; + + cmd.vpu.cmd_type = R300_CMD_R500FP; + cmd.vpu.count = count; + cmd.vpu.adrhi = ((unsigned int)addr & 0xFF00) >> 8; + cmd.vpu.adrlo = ((unsigned int)addr & 0x00FF); + + return cmd.u; +} + static inline uint32_t cmdpacket3(int packet) { drm_r300_cmd_header_t cmd; diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 2200cec6ab..d640d8b7e7 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -657,7 +657,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* This table specifies the source location and format for up to 16 texture * addresses (i[0]:i[15]) and four colors (c[0]:c[3]) */ -#define R500_RS_IP_0 0x4074 +#define R500_RS_IP_0 0x4074 #define R500_RS_IP_1 0x4078 #define R500_RS_IP_2 0x407C #define R500_RS_IP_3 0x4080 @@ -1151,7 +1151,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* */ -#define R500_RS_INST_0 0x4320 +#define R500_RS_INST_0 0x4320 #define R500_RS_INST_1 0x4324 #define R500_RS_INST_2 0x4328 #define R500_RS_INST_3 0x432c @@ -2598,6 +2598,479 @@ enum { #define R300_PRIM_NUM_VERTICES_SHIFT 16 #define R300_PRIM_NUM_VERTICES_MASK 0xffff + + +/* + * The R500 unified shader (US) registers come in banks of 512 each, one + * for each instruction slot in the shader. You can't touch them directly. + * R500_US_VECTOR_INDEX() sets the base instruction to modify; successive + * writes to R500_GA_US_VECTOR_DATA autoincrement the index after the + * instruction is fully specified. + */ +#define R500_US_ALU_ALPHA_INST_0 0xa800 +# define R500_ALPHA_OP_MAD 0 +# define R500_ALPHA_OP_DP 1 +# define R500_ALPHA_OP_MIN 2 +# define R500_ALPHA_OP_MAX 3 +/* #define R500_ALPHA_OP_RESERVED 4 */ +# define R500_ALPHA_OP_CND 5 +# define R500_ALPHA_OP_CMP 6 +# define R500_ALPHA_OP_FRC 7 +# define R500_ALPHA_OP_EX2 8 +# define R500_ALPHA_OP_LN2 9 +# define R500_ALPHA_OP_RCP 10 +# define R500_ALPHA_OP_RSQ 11 +# define R500_ALPHA_OP_SIN 12 +# define R500_ALPHA_OP_COS 13 +# define R500_ALPHA_OP_MDH 14 +# define R500_ALPHA_OP_MDV 15 +# define R500_ALPHA_ADDRD(x) (x << 4) +# define R500_ALPHA_ADDRD_REL (1 << 11) +# define R500_ALPHA_SEL_A_SRC0 (0 << 12) +# define R500_ALPHA_SEL_A_SRC1 (1 << 12) +# define R500_ALPHA_SEL_A_SRC2 (2 << 12) +# define R500_ALPHA_SEL_A_SRCP (3 << 12) +# define R500_ALPHA_SWIZ_A_R (0 << 14) +# define R500_ALPHA_SWIZ_A_G (1 << 14) +# define R500_ALPHA_SWIZ_A_B (2 << 14) +# define R500_ALPHA_SWIZ_A_A (3 << 14) +# define R500_ALPHA_SWIZ_A_0 (4 << 14) +# define R500_ALPHA_SWIZ_A_HALF (5 << 14) +# define R500_ALPHA_SWIZ_A_1 (6 << 14) +/* #define R500_ALPHA_SWIZ_A_UNUSED (7 << 14) */ +# define R500_ALPHA_MOD_A_NOP (0 << 17) +# define R500_ALPHA_MOD_A_NEG (1 << 17) +# define R500_ALPHA_MOD_A_ABS (2 << 17) +# define R500_ALPHA_MOD_A_NAB (3 << 17) +# define R500_ALPHA_SEL_B_SRC0 (0 << 19) +# define R500_ALPHA_SEL_B_SRC1 (1 << 19) +# define R500_ALPHA_SEL_B_SRC2 (2 << 19) +# define R500_ALPHA_SEL_B_SRCP (3 << 19) +# define R500_ALPHA_SWIZ_B_R (0 << 21) +# define R500_ALPHA_SWIZ_B_G (1 << 21) +# define R500_ALPHA_SWIZ_B_B (2 << 21) +# define R500_ALPHA_SWIZ_B_A (3 << 21) +# define R500_ALPHA_SWIZ_B_0 (4 << 21) +# define R500_ALPHA_SWIZ_B_HALF (5 << 21) +# define R500_ALPHA_SWIZ_B_1 (6 << 21) +/* #define R500_ALPHA_SWIZ_B_UNUSED (7 << 21) */ +# define R500_ALPHA_MOD_B_NOP (0 << 24) +# define R500_ALPHA_MOD_B_NEG (1 << 24) +# define R500_ALPHA_MOD_B_ABS (2 << 24) +# define R500_ALPHA_MOD_B_NAB (3 << 24) +# define R500_ALPHA_OMOD_IDENTITY (0 << 26) +# define R500_ALPHA_OMOD_MUL_2 (1 << 26) +# define R500_ALPHA_OMOD_MUL_4 (2 << 26) +# define R500_ALPHA_OMOD_MUL_8 (3 << 26) +# define R500_ALPHA_OMOD_DIV_2 (4 << 26) +# define R500_ALPHA_OMOD_DIV_4 (5 << 26) +# define R500_ALPHA_OMOD_DIV_8 (6 << 26) +# define R500_ALPHA_OMOD_DISABLE (7 << 26) +# define R500_ALPHA_TARGET(x) (x << 29) +# define R500_ALPHA_W_OMASK (1 << 31) +#define R500_US_ALU_ALPHA_ADDR_0 0x9800 +# define R500_ALPHA_ADDR0(x) (x << 0) +# define R500_ALPHA_ADDR0_CONST (1 << 8) +# define R500_ALPHA_ADDR0_REL (1 << 9) +# define R500_ALPHA_ADDR1(x) (x << 10) +# define R500_ALPHA_ADDR1_CONST (1 << 18) +# define R500_ALPHA_ADDR1_REL (1 << 19) +# define R500_ALPHA_ADDR2(x) (x << 20) +# define R500_ALPHA_ADDR2_CONST (1 << 28) +# define R500_ALPHA_ADDR2_REL (1 << 29) +# define R500_ALPHA_SRCP_OP_1_MINUS_2A0 (0 << 30) +# define R500_ALPHA_SRCP_OP_A1_MINUS_A0 (1 << 30) +# define R500_ALPHA_SRCP_OP_A1_PLUS_A0 (2 << 30) +# define R500_ALPHA_SRCP_OP_1_MINUS_A0 (3 << 30) +#define R500_US_ALU_RGBA_INST_0 0xb000 +# define R500_ALU_RGBA_OP_MAD (0 << 0) +# define R500_ALU_RGBA_OP_DP3 (1 << 0) +# define R500_ALU_RGBA_OP_DP4 (2 << 0) +# define R500_ALU_RGBA_OP_D2A (3 << 0) +# define R500_ALU_RGBA_OP_MIN (4 << 0) +# define R500_ALU_RGBA_OP_MAX (5 << 0) +/* #define R500_ALU_RGBA_OP_RESERVED (6 << 0) */ +# define R500_ALU_RGBA_OP_CND (7 << 0) +# define R500_ALU_RGBA_OP_CMP (8 << 0) +# define R500_ALU_RGBA_OP_FRC (9 << 0) +# define R500_ALU_RGBA_OP_SOP (10 << 0) +# define R500_ALU_RGBA_OP_MDH (11 << 0) +# define R500_ALU_RGBA_OP_MDV (12 << 0) +# define R500_ALU_RGBA_ADDRD(x) (x << 4) +# define R500_ALU_RGBA_ADDRD_REL (1 << 11) +# define R500_ALU_RGBA_SEL_C_SRC0 (0 << 12) +# define R500_ALU_RGBA_SEL_C_SRC1 (1 << 12) +# define R500_ALU_RGBA_SEL_C_SRC2 (2 << 12) +# define R500_ALU_RGBA_SEL_C_SRCP (3 << 12) +# define R500_ALU_RGBA_R_SWIZ_R (0 << 14) +# define R500_ALU_RGBA_R_SWIZ_G (1 << 14) +# define R500_ALU_RGBA_R_SWIZ_B (2 << 14) +# define R500_ALU_RGBA_R_SWIZ_A (3 << 14) +# define R500_ALU_RGBA_R_SWIZ_0 (4 << 14) +# define R500_ALU_RGBA_R_SWIZ_HALF (5 << 14) +# define R500_ALU_RGBA_R_SWIZ_1 (6 << 14) +/* #define R500_ALU_RGBA_R_SWIZ_UNUSED (7 << 14) */ +# define R500_ALU_RGBA_G_SWIZ_R (0 << 17) +# define R500_ALU_RGBA_G_SWIZ_G (1 << 17) +# define R500_ALU_RGBA_G_SWIZ_B (2 << 17) +# define R500_ALU_RGBA_G_SWIZ_A (3 << 17) +# define R500_ALU_RGBA_G_SWIZ_0 (4 << 17) +# define R500_ALU_RGBA_G_SWIZ_HALF (5 << 17) +# define R500_ALU_RGBA_G_SWIZ_1 (6 << 17) +/* #define R500_ALU_RGBA_G_SWIZ_UNUSED (7 << 17) */ +# define R500_ALU_RGBA_B_SWIZ_R (0 << 20) +# define R500_ALU_RGBA_B_SWIZ_G (1 << 20) +# define R500_ALU_RGBA_B_SWIZ_B (2 << 20) +# define R500_ALU_RGBA_B_SWIZ_A (3 << 20) +# define R500_ALU_RGBA_B_SWIZ_0 (4 << 20) +# define R500_ALU_RGBA_B_SWIZ_HALF (5 << 20) +# define R500_ALU_RGBA_B_SWIZ_1 (6 << 20) +/* #define R500_ALU_RGBA_B_SWIZ_UNUSED (7 << 20) */ +# define R500_ALU_RGBA_MOD_C_NOP (0 << 23) +# define R500_ALU_RGBA_MOD_C_NEG (1 << 23) +# define R500_ALU_RGBA_MOD_C_ABS (2 << 23) +# define R500_ALU_RGBA_MOD_C_NAB (3 << 23) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRC0 (0 << 25) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRC1 (1 << 25) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRC2 (2 << 25) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRCP (3 << 25) +# define R500_ALU_RGBA_A_SWIZ_R (0 << 27) +# define R500_ALU_RGBA_A_SWIZ_G (1 << 27) +# define R500_ALU_RGBA_A_SWIZ_B (2 << 27) +# define R500_ALU_RGBA_A_SWIZ_A (3 << 27) +# define R500_ALU_RGBA_A_SWIZ_0 (4 << 27) +# define R500_ALU_RGBA_A_SWIZ_HALF (5 << 27) +# define R500_ALU_RGBA_A_SWIZ_1 (6 << 27) +/* #define R500_ALU_RGBA_A_SWIZ_UNUSED (7 << 27) */ +# define R500_ALU_RGBA_ALPHA_MOD_C_NOP (0 << 30) +# define R500_ALU_RGBA_ALPHA_MOD_C_NEG (1 << 30) +# define R500_ALU_RGBA_ALPHA_MOD_C_ABS (2 << 30) +# define R500_ALU_RGBA_ALPHA_MOD_C_NAB (3 << 30) +#define R500_US_ALU_RGB_INST_0 0xa000 +# define R500_ALU_RGB_SEL_A_SRC0 (0 << 0) +# define R500_ALU_RGB_SEL_A_SRC1 (1 << 0) +# define R500_ALU_RGB_SEL_A_SRC2 (2 << 0) +# define R500_ALU_RGB_SEL_A_SRCP (3 << 0) +# define R500_ALU_RGB_R_SWIZ_A_R (0 << 2) +# define R500_ALU_RGB_R_SWIZ_A_G (1 << 2) +# define R500_ALU_RGB_R_SWIZ_A_B (2 << 2) +# define R500_ALU_RGB_R_SWIZ_A_A (3 << 2) +# define R500_ALU_RGB_R_SWIZ_A_0 (4 << 2) +# define R500_ALU_RGB_R_SWIZ_A_HALF (5 << 2) +# define R500_ALU_RGB_R_SWIZ_A_1 (6 << 2) +/* #define R500_ALU_RGB_R_SWIZ_A_UNUSED (7 << 2) */ +# define R500_ALU_RGB_G_SWIZ_A_R (0 << 5) +# define R500_ALU_RGB_G_SWIZ_A_G (1 << 5) +# define R500_ALU_RGB_G_SWIZ_A_B (2 << 5) +# define R500_ALU_RGB_G_SWIZ_A_A (3 << 5) +# define R500_ALU_RGB_G_SWIZ_A_0 (4 << 5) +# define R500_ALU_RGB_G_SWIZ_A_HALF (5 << 5) +# define R500_ALU_RGB_G_SWIZ_A_1 (6 << 5) +/* #define R500_ALU_RGB_G_SWIZ_A_UNUSED (7 << 5) */ +# define R500_ALU_RGB_B_SWIZ_A_R (0 << 8) +# define R500_ALU_RGB_B_SWIZ_A_G (1 << 8) +# define R500_ALU_RGB_B_SWIZ_A_B (2 << 8) +# define R500_ALU_RGB_B_SWIZ_A_A (3 << 8) +# define R500_ALU_RGB_B_SWIZ_A_0 (4 << 8) +# define R500_ALU_RGB_B_SWIZ_A_HALF (5 << 8) +# define R500_ALU_RGB_B_SWIZ_A_1 (6 << 8) +/* #define R500_ALU_RGB_B_SWIZ_A_UNUSED (7 << 8) */ +# define R500_ALU_RGB_MOD_A_NOP (0 << 11) +# define R500_ALU_RGB_MOD_A_NEG (1 << 11) +# define R500_ALU_RGB_MOD_A_ABS (2 << 11) +# define R500_ALU_RGB_MOD_A_NAB (3 << 11) +# define R500_ALU_RGB_SEL_B_SRC0 (0 << 13) +# define R500_ALU_RGB_SEL_B_SRC1 (1 << 13) +# define R500_ALU_RGB_SEL_B_SRC2 (2 << 13) +# define R500_ALU_RGB_SEL_B_SRCP (3 << 13) +# define R500_ALU_RGB_R_SWIZ_B_R (0 << 15) +# define R500_ALU_RGB_R_SWIZ_B_G (1 << 15) +# define R500_ALU_RGB_R_SWIZ_B_B (2 << 15) +# define R500_ALU_RGB_R_SWIZ_B_A (3 << 15) +# define R500_ALU_RGB_R_SWIZ_B_0 (4 << 15) +# define R500_ALU_RGB_R_SWIZ_B_HALF (5 << 15) +# define R500_ALU_RGB_R_SWIZ_B_1 (6 << 15) +/* #define R500_ALU_RGB_R_SWIZ_B_UNUSED (7 << 15) */ +# define R500_ALU_RGB_G_SWIZ_B_R (0 << 18) +# define R500_ALU_RGB_G_SWIZ_B_G (1 << 18) +# define R500_ALU_RGB_G_SWIZ_B_B (2 << 18) +# define R500_ALU_RGB_G_SWIZ_B_A (3 << 18) +# define R500_ALU_RGB_G_SWIZ_B_0 (4 << 18) +# define R500_ALU_RGB_G_SWIZ_B_HALF (5 << 18) +# define R500_ALU_RGB_G_SWIZ_B_1 (6 << 18) +/* #define R500_ALU_RGB_G_SWIZ_B_UNUSED (7 << 18) */ +# define R500_ALU_RGB_B_SWIZ_B_R (0 << 21) +# define R500_ALU_RGB_B_SWIZ_B_G (1 << 21) +# define R500_ALU_RGB_B_SWIZ_B_B (2 << 21) +# define R500_ALU_RGB_B_SWIZ_B_A (3 << 21) +# define R500_ALU_RGB_B_SWIZ_B_0 (4 << 21) +# define R500_ALU_RGB_B_SWIZ_B_HALF (5 << 21) +# define R500_ALU_RGB_B_SWIZ_B_1 (6 << 21) +/* #define R500_ALU_RGB_B_SWIZ_B_UNUSED (7 << 21) */ +# define R500_ALU_RGB_MOD_B_NOP (0 << 24) +# define R500_ALU_RGB_MOD_B_NEG (1 << 24) +# define R500_ALU_RGB_MOD_B_ABS (2 << 24) +# define R500_ALU_RGB_MOD_B_NAB (3 << 24) +# define R500_ALU_RGB_OMOD_IDENTITY (0 << 26) +# define R500_ALU_RGB_OMOD_MUL_2 (1 << 26) +# define R500_ALU_RGB_OMOD_MUL_4 (2 << 26) +# define R500_ALU_RGB_OMOD_MUL_8 (3 << 26) +# define R500_ALU_RGB_OMOD_DIV_2 (4 << 26) +# define R500_ALU_RGB_OMOD_DIV_4 (5 << 26) +# define R500_ALU_RGB_OMOD_DIV_8 (6 << 26) +# define R500_ALU_RGB_OMOD_DISABLE (7 << 26) +# define R500_ALU_RGB_TARGET(x) (x << 29) +# define R500_ALU_RGB_WMASK (1 << 31) +#define R500_US_ALU_RGB_ADDR_0 0x9000 +# define R500_RGB_ADDR0(x) (x << 0) +# define R500_RGB_ADDR0_CONST (1 << 8) +# define R500_RGB_ADDR0_REL (1 << 9) +# define R500_RGB_ADDR1(x) (x << 10) +# define R500_RGB_ADDR1_CONST (1 << 18) +# define R500_RGB_ADDR1_REL (1 << 19) +# define R500_RGB_ADDR2(x) (x << 20) +# define R500_RGB_ADDR2_CONST (1 << 28) +# define R500_RGB_ADDR2_REL (1 << 29) +# define R500_RGB_SRCP_OP_1_MINUS_2RGB0 (0 << 30) +# define R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 (1 << 30) +# define R500_RGB_SRCP_OP_RGB1_PLUS_RGB0 (2 << 30) +# define R500_RGB_SRCP_OP_1_MINUS_RGB0 (3 << 30) +#define R500_US_CMN_INST_0 0xb800 +# define R500_INST_TYPE_ALU (0 << 0) +# define R500_INST_TYPE_OUT (1 << 0) +# define R500_INST_TYPE_FC (2 << 0) +# define R500_INST_TYPE_TEX (3 << 0) +# define R500_INST_TEX_SEM_WAIT (1 << 2) +# define R500_INST_RGB_PRED_SEL_NONE (0 << 3) +# define R500_INST_RGB_PRED_SEL_RGBA (1 << 3) +# define R500_INST_RGB_PRED_SEL_RRRR (2 << 3) +# define R500_INST_RGB_PRED_SEL_GGGG (3 << 3) +# define R500_INST_RGB_PRED_SEL_BBBB (4 << 3) +# define R500_INST_RGB_PRED_SEL_AAAA (5 << 3) +# define R500_INST_RGB_PRED_INV (1 << 6) +# define R500_INST_WRITE_INACTIVE (1 << 7) +# define R500_INST_LAST (1 << 8) +# define R500_INST_NOP (1 << 9) +# define R500_INST_ALU_WAIT (1 << 10) +# define R500_INST_RGB_WMASK_R (1 << 11) +# define R500_INST_RGB_WMASK_G (1 << 12) +# define R500_INST_RGB_WMASK_B (1 << 13) +# define R500_INST_ALPHA_WMASK (1 << 14) +# define R500_INST_RGB_OMASK_R (1 << 15) +# define R500_INST_RGB_OMASK_G (1 << 16) +# define R500_INST_RGB_OMASK_B (1 << 17) +# define R500_INST_ALPHA_OMASK (1 << 18) +# define R500_INST_RGB_CLAMP (1 << 19) +# define R500_INST_ALPHA_CLAMP (1 << 20) +# define R500_INST_ALU_RESULT_SEL (1 << 21) +# define R500_INST_ALPHA_PRED_INV (1 << 22) +# define R500_INST_ALU_RESULT_OP_EQ (0 << 23) +# define R500_INST_ALU_RESULT_OP_LT (1 << 23) +# define R500_INST_ALU_RESULT_OP_GE (2 << 23) +# define R500_INST_ALU_RESULT_OP_NE (3 << 23) +# define R500_INST_ALPHA_PRED_SEL_NONE (0 << 25) +# define R500_INST_ALPHA_PRED_SEL_RGBA (1 << 25) +# define R500_INST_ALPHA_PRED_SEL_RRRR (2 << 25) +# define R500_INST_ALPHA_PRED_SEL_GGGG (3 << 25) +# define R500_INST_ALPHA_PRED_SEL_BBBB (4 << 25) +# define R500_INST_ALPHA_PRED_SEL_AAAA (5 << 25) +/* XXX next four are kind of guessed */ +# define R500_INST_STAT_WE_R (1 << 28) +# define R500_INST_STAT_WE_G (1 << 29) +# define R500_INST_STAT_WE_B (1 << 30) +# define R500_INST_STAT_WE_A (1 << 31) + +/* note that these are 8 bit lengths, despite the offsets, at least for R500 */ +#define R500_US_CODE_ADDR 0x4630 +# define R500_US_CODE_START_ADDR(x) (x << 0) +# define R500_US_CODE_END_ADDR(x) (x << 16) +#define R500_US_CODE_OFFSET 0x4638 +# define R500_US_CODE_OFFSET_ADDR(x) (x << 0) +#define R500_US_CODE_RANGE 0x4634 +# define R500_US_CODE_RANGE_ADDR(x) (x << 0) +# define R500_US_CODE_RANGE_SIZE(x) (x << 16) +#define R500_US_CONFIG 0x4600 +# define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO (1 << 1) +#define R500_US_FC_ADDR_0 0xa000 +# define R500_FC_BOOL_ADDR(x) (x << 0) +# define R500_FC_INT_ADDR(x) (x << 8) +# define R500_FC_JUMP_ADDR(x) (x << 16) +# define R500_FC_JUMP_GLOBAL (1 << 31) +#define R500_US_FC_BOOL_CONST 0x4620 +# define R500_FC_KBOOL(x) (x) +#define R500_US_FC_CTRL 0x4624 +# define R500_FC_TEST_EN (1 << 30) +# define R500_FC_FULL_FC_EN (1 << 31) +#define R500_US_FC_INST_0 0x9800 +# define R500_FC_OP_JUMP (0 << 0) +# define R500_FC_OP_LOOP (1 << 0) +# define R500_FC_OP_ENDLOOP (2 << 0) +# define R500_FC_OP_REP (3 << 0) +# define R500_FC_OP_ENDREP (4 << 0) +# define R500_FC_OP_BREAKLOOP (5 << 0) +# define R500_FC_OP_BREAKREP (6 << 0) +# define R500_FC_OP_CONTINUE (7 << 0) +# define R500_FC_B_ELSE (1 << 4) +# define R500_FC_JUMP_ANY (1 << 5) +# define R500_FC_A_OP_NONE (0 << 6) +# define R500_FC_A_OP_POP (1 << 6) +# define R500_FC_A_OP_PUSH (2 << 6) +# define R500_FC_JUMP_FUNC(x) (x << 8) +# define R500_FC_B_POP_CNT(x) (x << 16) +# define R500_FC_B_OP0_NONE (0 << 24) +# define R500_FC_B_OP0_DECR (1 << 24) +# define R500_FC_B_OP0_INCR (2 << 24) +# define R500_FC_B_OP1_DECR (0 << 26) +# define R500_FC_B_OP1_NONE (1 << 26) +# define R500_FC_B_OP1_INCR (2 << 26) +# define R500_FC_IGNORE_UNCOVERED (1 << 28) +#define R500_US_FC_INT_CONST_0 0x4c00 +# define R500_FC_INT_CONST_KR(x) (x << 0) +# define R500_FC_INT_CONST_KG(x) (x << 8) +# define R500_FC_INT_CONST_KB(x) (x << 16) +/* _0 through _15 */ +#define R500_US_FORMAT0_0 0x4640 +# define R500_FORMAT_TXWIDTH(x) (x << 0) +# define R500_FORMAT_TXHEIGHT(x) (x << 11) +# define R500_FORMAT_TXDEPTH(x) (x << 22) +/* _0 through _3 */ +#define R500_US_OUT_FMT_0 0x46a4 +# define R500_OUT_FMT_C4_8 (0 << 0) +# define R500_OUT_FMT_C4_10 (1 << 0) +# define R500_OUT_FMT_C4_10_GAMMA (2 << 0) +# define R500_OUT_FMT_C_16 (3 << 0) +# define R500_OUT_FMT_C2_16 (4 << 0) +# define R500_OUT_FMT_C4_16 (5 << 0) +# define R500_OUT_FMT_C_16_MPEG (6 << 0) +# define R500_OUT_FMT_C2_16_MPEG (7 << 0) +# define R500_OUT_FMT_C2_4 (8 << 0) +# define R500_OUT_FMT_C_3_3_2 (9 << 0) +# define R500_OUT_FMT_C_6_5_6 (10 << 0) +# define R500_OUT_FMT_C_11_11_10 (11 << 0) +# define R500_OUT_FMT_C_10_11_11 (12 << 0) +# define R500_OUT_FMT_C_2_10_10_10 (13 << 0) +/* #define R500_OUT_FMT_RESERVED (14 << 0) */ +# define R500_OUT_FMT_UNUSED (15 << 0) +# define R500_OUT_FMT_C_16_FP (16 << 0) +# define R500_OUT_FMT_C2_16_FP (17 << 0) +# define R500_OUT_FMT_C4_16_FP (18 << 0) +# define R500_OUT_FMT_C_32_FP (19 << 0) +# define R500_OUT_FMT_C2_32_FP (20 << 0) +# define R500_OUT_FMT_C4_32_FP (21 << 0) +# define R500_C0_SEL_A (0 << 8) +# define R500_C0_SEL_R (1 << 8) +# define R500_C0_SEL_G (2 << 8) +# define R500_C0_SEL_B (3 << 8) +# define R500_C1_SEL_A (0 << 10) +# define R500_C1_SEL_R (1 << 10) +# define R500_C1_SEL_G (2 << 10) +# define R500_C1_SEL_B (3 << 10) +# define R500_C2_SEL_A (0 << 12) +# define R500_C2_SEL_R (1 << 12) +# define R500_C2_SEL_G (2 << 12) +# define R500_C2_SEL_B (3 << 12) +# define R500_C3_SEL_A (0 << 14) +# define R500_C3_SEL_R (1 << 14) +# define R500_C3_SEL_G (2 << 14) +# define R500_C3_SEL_B (3 << 14) +# define R500_OUT_SIGN(x) (x << 16) +# define R500_ROUND_ADJ (1 << 20) +#define R500_US_PIXSIZE 0x4604 +# define R500_PIX_SIZE(x) (x) +#define R500_US_TEX_ADDR_0 0x9800 +# define R500_TEX_SRC_ADDR(x) (x << 0) +# define R500_TEX_SRC_ADDR_REL (1 << 7) +# define R500_TEX_SRC_S_SWIZ_R (0 << 8) +# define R500_TEX_SRC_S_SWIZ_G (1 << 8) +# define R500_TEX_SRC_S_SWIZ_B (2 << 8) +# define R500_TEX_SRC_S_SWIZ_A (3 << 8) +# define R500_TEX_SRC_T_SWIZ_R (0 << 10) +# define R500_TEX_SRC_T_SWIZ_G (1 << 10) +# define R500_TEX_SRC_T_SWIZ_B (2 << 10) +# define R500_TEX_SRC_T_SWIZ_A (3 << 10) +# define R500_TEX_SRC_R_SWIZ_R (0 << 12) +# define R500_TEX_SRC_R_SWIZ_G (1 << 12) +# define R500_TEX_SRC_R_SWIZ_B (2 << 12) +# define R500_TEX_SRC_R_SWIZ_A (3 << 12) +# define R500_TEX_SRC_Q_SWIZ_R (0 << 14) +# define R500_TEX_SRC_Q_SWIZ_G (1 << 14) +# define R500_TEX_SRC_Q_SWIZ_B (2 << 14) +# define R500_TEX_SRC_Q_SWIZ_A (3 << 14) +# define R500_TEX_DST_ADDR(x) (x << 16) +# define R500_TEX_DST_ADDR_REL (1 << 23) +# define R500_TEX_DST_R_SWIZ_R (0 << 24) +# define R500_TEX_DST_R_SWIZ_G (1 << 24) +# define R500_TEX_DST_R_SWIZ_B (2 << 24) +# define R500_TEX_DST_R_SWIZ_A (3 << 24) +# define R500_TEX_DST_G_SWIZ_R (0 << 26) +# define R500_TEX_DST_G_SWIZ_G (1 << 26) +# define R500_TEX_DST_G_SWIZ_B (2 << 26) +# define R500_TEX_DST_G_SWIZ_A (3 << 26) +# define R500_TEX_DST_B_SWIZ_R (0 << 28) +# define R500_TEX_DST_B_SWIZ_G (1 << 28) +# define R500_TEX_DST_B_SWIZ_B (2 << 28) +# define R500_TEX_DST_B_SWIZ_A (3 << 28) +# define R500_TEX_DST_A_SWIZ_R (0 << 30) +# define R500_TEX_DST_A_SWIZ_G (1 << 30) +# define R500_TEX_DST_A_SWIZ_B (2 << 30) +# define R500_TEX_DST_A_SWIZ_A (3 << 30) +#define R500_US_TEX_ADDR_DXDY_0 0xa000 +# define R500_DX_ADDR(x) (x << 0) +# define R500_DX_ADDR_REL (1 << 7) +# define R500_DX_S_SWIZ_R (0 << 8) +# define R500_DX_S_SWIZ_G (1 << 8) +# define R500_DX_S_SWIZ_B (2 << 8) +# define R500_DX_S_SWIZ_A (3 << 8) +# define R500_DX_T_SWIZ_R (0 << 10) +# define R500_DX_T_SWIZ_G (1 << 10) +# define R500_DX_T_SWIZ_B (2 << 10) +# define R500_DX_T_SWIZ_A (3 << 10) +# define R500_DX_R_SWIZ_R (0 << 12) +# define R500_DX_R_SWIZ_G (1 << 12) +# define R500_DX_R_SWIZ_B (2 << 12) +# define R500_DX_R_SWIZ_A (3 << 12) +# define R500_DX_Q_SWIZ_R (0 << 14) +# define R500_DX_Q_SWIZ_G (1 << 14) +# define R500_DX_Q_SWIZ_B (2 << 14) +# define R500_DX_Q_SWIZ_A (3 << 14) +# define R500_DY_ADDR(x) (x << 16) +# define R500_DY_ADDR_REL (1 << 17) +# define R500_DY_S_SWIZ_R (0 << 24) +# define R500_DY_S_SWIZ_G (1 << 24) +# define R500_DY_S_SWIZ_B (2 << 24) +# define R500_DY_S_SWIZ_A (3 << 24) +# define R500_DY_T_SWIZ_R (0 << 26) +# define R500_DY_T_SWIZ_G (1 << 26) +# define R500_DY_T_SWIZ_B (2 << 26) +# define R500_DY_T_SWIZ_A (3 << 26) +# define R500_DY_R_SWIZ_R (0 << 28) +# define R500_DY_R_SWIZ_G (1 << 28) +# define R500_DY_R_SWIZ_B (2 << 28) +# define R500_DY_R_SWIZ_A (3 << 28) +# define R500_DY_Q_SWIZ_R (0 << 30) +# define R500_DY_Q_SWIZ_G (1 << 30) +# define R500_DY_Q_SWIZ_B (2 << 30) +# define R500_DY_Q_SWIZ_A (3 << 30) +#define R500_US_TEX_INST_0 0x9000 +# define R500_TEX_ID(x) (x << 16) +# define R500_TEX_INST_NOP (0 << 22) +# define R500_TEX_INST_LD (1 << 22) +# define R500_TEX_INST_TEXKILL (2 << 22) +# define R500_TEX_INST_PROJ (3 << 22) +# define R500_TEX_INST_LODBIAS (4 << 22) +# define R500_TEX_INST_LOD (5 << 22) +# define R500_TEX_INST_DXDY (6 << 22) +# define R500_TEX_SEM_ACQUIRE (1 << 25) +# define R500_TEX_IGNORE_UNCOVERED (1 << 26) +# define R500_TEX_UNSCALED (1 << 27) +#define R500_US_W_FMT 0x46b4 +# define R500_W_FMT_W0 (0 << 0) +# define R500_W_FMT_W24 (1 << 0) +# define R500_W_FMT_W24FP (2 << 0) +# define R500_W_SRC_US (0 << 2) +# define R500_W_SRC_RAS (1 << 2) + + /* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR. * Two parameter dwords: * 0. VAP_VTX_FMT: The first parameter is not written to hardware diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index e11b5afc30..d2fd04a550 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1603,6 +1603,128 @@ static void r300SetupRSUnit(GLcontext * ctx) WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); } +static void r500SetupRSUnit(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + /* I'm still unsure if these are needed */ + GLuint interp_magic[8] = { + 0x00, + R300_RS_COL_PTR(1), + R300_RS_COL_PTR(2), + R300_RS_COL_PTR(3), + 0x00, + 0x00, + 0x00, + 0x00 + }; + union r300_outputs_written OutputsWritten; + GLuint InputsRead; + int fp_reg, high_rr; + int in_texcoords, col_interp_nr; + int i; + + if (hw_tcl_on) + OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; + else + RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->state.render_inputs_bitset); + + if (ctx->FragmentProgram._Current) + InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; + else { + fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); + return; /* This should only ever happen once.. */ + } + + R300_STATECHANGE(r300, ri); + R300_STATECHANGE(r300, rc); + R300_STATECHANGE(r300, rr); + + fp_reg = in_texcoords = col_interp_nr = high_rr = 0; + + r300->hw.rr.cmd[R300_RR_ROUTE_1] = 0; + + if (InputsRead & FRAG_BIT_WPOS) { + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) + if (!(InputsRead & (FRAG_BIT_TEX0 << i))) + break; + + if (i == ctx->Const.MaxTextureUnits) { + fprintf(stderr, "\tno free texcoord found...\n"); + _mesa_exit(-1); + } + + InputsRead |= (FRAG_BIT_TEX0 << i); + InputsRead &= ~FRAG_BIT_WPOS; + } + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0 | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | (in_texcoords << R300_RS_INTERP_SRC_SHIFT) + | interp_magic[i]; + + r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] = 0; + if (InputsRead & (FRAG_BIT_TEX0 << i)) { + //assert(r300->state.texture.tc_count != 0); + r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] |= R300_RS_ROUTE_ENABLE | i /* source INTERP */ + | (fp_reg << R300_RS_ROUTE_DEST_SHIFT); + high_rr = fp_reg; + + /* Passing invalid data here can lock the GPU. */ + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { + InputsRead &= ~(FRAG_BIT_TEX0 << i); + fp_reg++; + } else { + WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i); + } + } + /* Need to count all coords enabled at vof */ + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { + in_texcoords++; + } + } + + if (InputsRead & FRAG_BIT_COL0) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { + r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R300_RS_ROUTE_0_COLOR | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); + InputsRead &= ~FRAG_BIT_COL0; + col_interp_nr++; + } else { + WARN_ONCE("fragprog wants col0, vp doesn't provide it\n"); + } + } + + if (InputsRead & FRAG_BIT_COL1) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { + r300->hw.rr.cmd[R300_RR_ROUTE_1] |= R300_RS_ROUTE_1_UNKNOWN11 | R300_RS_ROUTE_1_COLOR1 | (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT); + InputsRead &= ~FRAG_BIT_COL1; + if (high_rr < 1) + high_rr = 1; + col_interp_nr++; + } else { + WARN_ONCE("fragprog wants col1, vp doesn't provide it\n"); + } + } + + /* Need at least one. This might still lock as the values are undefined... */ + if (in_texcoords == 0 && col_interp_nr == 0) { + r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R300_RS_ROUTE_0_COLOR | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); + col_interp_nr++; + } + + r300->hw.rc.cmd[1] = 0 | ((in_texcoords << 2) << R300_IT_COUNT_SHIFT) + | (col_interp_nr << R300_IC_COUNT_SHIFT) + | R300_HIRES_EN; + + assert(high_rr >= 0); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_ROUTE_0, high_rr + 1); + r300->hw.rc.cmd[2] = 0xC0 | high_rr; + + if (InputsRead) + WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); +} + + + + #define bump_vpu_count(ptr, new_count) do{\ drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\ int _nc=(new_count)/4; \ @@ -2163,6 +2285,81 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) } } +static void r500SetupPixelShader(r300ContextPtr rmesa) +{ + GLcontext *ctx = rmesa->radeon.glCtx; + struct r300_fragment_program *fp = (struct r300_fragment_program *) + (char *)ctx->FragmentProgram._Current; + int i, k; + + if (!fp) /* should only happenen once, just after context is created */ + return; + + /* emit the standard zero shader */ + R300_STATECHANGE(rmesa, r500fp); + i = 1; + rmesa->hw.r500fp.cmd[i++] = 0x7807; + rmesa->hw.r500fp.cmd[i++] = R500_TEX_ID(0) | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; + rmesa->hw.r500fp.cmd[i++] = R500_TEX_SRC_ADDR(0) | R500_TEX_SRC_S_SWIZ_R | + R500_TEX_SRC_T_SWIZ_G | + R500_TEX_DST_ADDR(0) | + R500_TEX_DST_R_SWIZ_R | + R500_TEX_DST_G_SWIZ_G | + R500_TEX_DST_B_SWIZ_B | + R500_TEX_DST_A_SWIZ_A; + rmesa->hw.r500fp.cmd[i++] = R500_DX_ADDR(0) | + R500_DX_S_SWIZ_R | + R500_DX_T_SWIZ_R | + R500_DX_R_SWIZ_R | + R500_DX_Q_SWIZ_R | + R500_DY_ADDR(0) | + R500_DY_S_SWIZ_R | + R500_DY_T_SWIZ_R | + R500_DY_R_SWIZ_R | + R500_DY_Q_SWIZ_R; + rmesa->hw.r500fp.cmd[i++] = 0x0; + rmesa->hw.r500fp.cmd[i++] = 0x0; + + rmesa->hw.r500fp.cmd[i++] = R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | + R500_INST_LAST | + R500_INST_RGB_OMASK_R | + R500_INST_RGB_OMASK_G | + R500_INST_RGB_OMASK_B | + R500_INST_ALPHA_OMASK; + + rmesa->hw.r500fp.cmd[i++] = R500_RGB_ADDR0(0) | + R500_RGB_ADDR1(0) | + R500_RGB_ADDR1_CONST | + R500_RGB_ADDR2(0) | + R500_RGB_ADDR2_CONST | + R500_RGB_SRCP_OP_1_MINUS_2RGB0; + rmesa->hw.r500fp.cmd[i++] = R500_ALPHA_ADDR0(0) | + R500_ALPHA_ADDR1(0) | + R500_ALPHA_ADDR1_CONST | + R500_ALPHA_ADDR2(0) | + R500_ALPHA_ADDR2_CONST | + R500_ALPHA_SRCP_OP_1_MINUS_2A0; + rmesa->hw.r500fp.cmd[i++] = R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | + R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC0 | + R500_ALU_RGB_R_SWIZ_B_1 | + R500_ALU_RGB_B_SWIZ_B_1 | + R500_ALU_RGB_G_SWIZ_B_1; + rmesa->hw.r500fp.cmd[i++] = R500_ALPHA_OP_MAD | + R500_ALPHA_SWIZ_A_A | + R500_ALPHA_SWIZ_B_1; + rmesa->hw.r500fp.cmd[i++] = R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_R_SWIZ_0 | + R500_ALU_RGBA_G_SWIZ_0 | + R500_ALU_RGBA_B_SWIZ_0 | + R500_ALU_RGBA_A_SWIZ_0; + + +} + void r300UpdateShaderStates(r300ContextPtr rmesa) { GLcontext *ctx; @@ -2170,12 +2367,19 @@ void r300UpdateShaderStates(r300ContextPtr rmesa) r300UpdateTextureState(ctx); - r300SetupPixelShader(rmesa); + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + r500SetupPixelShader(rmesa); + else + r300SetupPixelShader(rmesa); r300SetupTextures(ctx); if ((rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) r300SetupVertexProgram(rmesa); - r300SetupRSUnit(ctx); + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + r500SetupRSUnit(ctx); + else + r300SetupRSUnit(ctx); } /** diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index 6ad441bdd0..9e375474a0 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -169,6 +169,8 @@ enum { CHIP_FAMILY_RV410, CHIP_FAMILY_RS400, CHIP_FAMILY_RS690, + CHIP_FAMILY_RV515, + CHIP_FAMILY_R520, CHIP_FAMILY_LAST }; -- cgit v1.2.3 From ed1584aed892e9004a96b915c12a1adbc6b419f0 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 19 Mar 2008 16:29:11 +1000 Subject: more r500 vs r300 kickin --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 20 ++++---- src/mesa/drivers/dri/r300/r300_context.h | 2 + src/mesa/drivers/dri/r300/r300_ioctl.c | 79 ++++++++++++++++++-------------- 3 files changed, 56 insertions(+), 45 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 883b41a349..a92bb87d7d 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -379,7 +379,7 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0); r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(R300_RS_COUNT, 2); if (is_r500) { - ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0); + ALLOC_STATE(ri, always, R500_RI_CMDSIZE, 0); r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R500_RS_IP_0, 16); ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, 1); @@ -393,23 +393,23 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.sc_hyperz.cmd[0] = cmdpacket0(R300_SC_HYPERZ, 2); ALLOC_STATE(sc_screendoor, always, 2, 0); r300->hw.sc_screendoor.cmd[0] = cmdpacket0(R300_SC_SCREENDOOR, 1); - ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); - r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_PFS_CNTL_0, 3); - r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(R300_PFS_NODE_0, 4); - ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0); - r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_PFS_TEXI_0, 0); ALLOC_STATE(us_out_fmt, always, 6, 0); r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R500_US_OUT_FMT, 5); if (is_r500) { ALLOC_STATE(r500fp, variable, R300_FPI_CMDSIZE, 0); - r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 1); + r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0); } - if (0/*is_r500*/) { + if (is_r500) { + + } else { + ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); + r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_PFS_CNTL_0, 3); + r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(R300_PFS_NODE_0, 4); + ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0); + r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_PFS_TEXI_0, 0); - }/* else*/ - { ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0); r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR0_0, 1); ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1); diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index ab45bf80b4..012c0fe6a5 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -330,6 +330,8 @@ struct r300_state_atom { #define R300_RI_INTERP_7 8 #define R300_RI_CMDSIZE 9 +#define R500_RI_CMDSIZE 17 + #define R300_RR_CMD_0 0 /* rr is variable size (at least 1) */ #define R300_RR_INST_0 1 #define R300_RR_INST_1 2 diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 1b405889c3..07656b130c 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -186,10 +186,15 @@ static void r300EmitClearState(GLcontext * ctx) int cmd_written = 0; drm_radeon_cmd_header_t *cmd = NULL; int has_tcl = 1; + int is_r500 = 0; if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) has_tcl = 0; + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + is_r500 = 1; + + /* FIXME: the values written to R300_VAP_INPUT_ROUTE_0_0 and * R300_VAP_INPUT_ROUTE_0_1 are in fact known, however, the values are * quite complex; see the functions in r300_emit.c. @@ -271,49 +276,53 @@ static void r300EmitClearState(GLcontext * ctx) e32(((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) | ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT)); - R300_STATECHANGE(r300, ri); - reg_start(R300_RS_IP_0, 8); - for (i = 0; i < 8; ++i) { - e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3)); - } + if (!is_r500) { + R300_STATECHANGE(r300, ri); + reg_start(R300_RS_IP_0, 8); + for (i = 0; i < 8; ++i) { + e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3)); + } - R300_STATECHANGE(r300, rc); - /* The second constant is needed to get glxgears display anything .. */ - reg_start(R300_RS_COUNT, 1); - e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); - e32(0x0); + R300_STATECHANGE(r300, rc); + /* The second constant is needed to get glxgears display anything .. */ + reg_start(R300_RS_COUNT, 1); + e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); + e32(0x0); - R300_STATECHANGE(r300, rr); - reg_start(R300_RS_INST_0, 0); - e32(R300_RS_INST_COL_CN_WRITE); + R300_STATECHANGE(r300, rr); + reg_start(R300_RS_ROUTE_0, 0); + e32(R300_RS_ROUTE_0_COLOR); + } - R300_STATECHANGE(r300, fp); - reg_start(R300_PFS_CNTL_0, 2); - e32(0x0); - e32(0x0); - e32(0x0); - reg_start(R300_PFS_NODE_0, 3); - e32(0x0); - e32(0x0); - e32(0x0); - e32(R300_PFS_NODE_OUTPUT_COLOR); + if (!is_r500) { + R300_STATECHANGE(r300, fp); + reg_start(R300_PFS_CNTL_0, 2); + e32(0x0); + e32(0x0); + e32(0x0); + reg_start(R300_PFS_NODE_0, 3); + e32(0x0); + e32(0x0); + e32(0x0); + e32(R300_PFS_NODE_OUTPUT_COLOR); - R300_STATECHANGE(r300, fpi[0]); - R300_STATECHANGE(r300, fpi[1]); - R300_STATECHANGE(r300, fpi[2]); - R300_STATECHANGE(r300, fpi[3]); + R300_STATECHANGE(r300, fpi[0]); + R300_STATECHANGE(r300, fpi[1]); + R300_STATECHANGE(r300, fpi[2]); + R300_STATECHANGE(r300, fpi[3]); - reg_start(R300_PFS_INSTR0_0, 0); - e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); + reg_start(R300_PFS_INSTR0_0, 0); + e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); - reg_start(R300_PFS_INSTR1_0, 0); - e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); + reg_start(R300_PFS_INSTR1_0, 0); + e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); - reg_start(R300_PFS_INSTR2_0, 0); - e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); + reg_start(R300_PFS_INSTR2_0, 0); + e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); - reg_start(R300_PFS_INSTR3_0, 0); - e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); + reg_start(R300_PFS_INSTR3_0, 0); + e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); + } if (has_tcl) { R300_STATECHANGE(r300, pvs); -- cgit v1.2.3 From a453b3154e063c3e934cb90a546e984a758dd14f Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 20 Mar 2008 13:55:56 +1000 Subject: r500 RS unit setup --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 4 ---- src/mesa/drivers/dri/r300/r300_reg.h | 22 +++++++++++----------- src/mesa/drivers/dri/r300/r300_state.c | 28 ++++++++++++++++++---------- 3 files changed, 29 insertions(+), 25 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index a92bb87d7d..3cfb7cf2cd 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -399,10 +399,6 @@ void r300InitCmdBuf(r300ContextPtr r300) if (is_r500) { ALLOC_STATE(r500fp, variable, R300_FPI_CMDSIZE, 0); r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0); - } - - if (is_r500) { - } else { ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_PFS_CNTL_0, 3); diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index d640d8b7e7..2822b1d4c3 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -673,12 +673,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R500_RS_IP_13 0x40A8 #define R500_RS_IP_14 0x40AC #define R500_RS_IP_15 0x40B0 -#define R500_RS_IP_TEX_PTR_S_SHIFT 0 -#define R500_RS_IP_TEX_PTR_T_SHIFT 6 -#define R500_RS_IP_TEX_PTR_R_SHIFT 12 -#define R500_RS_IP_TEX_PTR_Q_SHIFT 18 -#define R500_RS_IP_COL_PTR_SHIFT 24 -#define R500_RS_IP_COL_FMT_SHIFT 27 +#define R500_RS_IP_TEX_PTR_S_SHIFT 0 +#define R500_RS_IP_TEX_PTR_T_SHIFT 6 +#define R500_RS_IP_TEX_PTR_R_SHIFT 12 +#define R500_RS_IP_TEX_PTR_Q_SHIFT 18 +#define R500_RS_IP_COL_PTR_SHIFT 24 +#define R500_RS_IP_COL_FMT_SHIFT 27 #define R500_RS_IP_COL_FMT_RGBA (0 << 27) #define R500_RS_IP_COL_FMT_RGB0 (1 << 27) #define R500_RS_IP_COL_FMT_RGB1 (2 << 27) @@ -692,7 +692,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R500_RS_IP_COL_FMT_1111 (10 << 27) /* gap */ #define R500_RS_IP_OFFSET_DIS (0 << 31) -#define R500_RS_IP_OFFSET_EN (1 << 31) +#define R500_RS_IP_OFFSET_EN (1 << 31) /* gap */ @@ -1138,10 +1138,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_RS_COL_FMT_111A 8 # define R300_RS_COL_FMT_1110 9 # define R300_RS_COL_FMT_1111 10 -# define R300_RS_SEL_S(x) (x << 13) -# define R300_RS_SEL_T(x) (x << 16) -# define R300_RS_SEL_R(x) (x << 19) -# define R300_RS_SEL_Q(x) (x << 22) +# define R300_RS_SEL_S(x) (x << 13) +# define R300_RS_SEL_T(x) (x << 16) +# define R300_RS_SEL_R(x) (x << 19) +# define R300_RS_SEL_Q(x) (x << 22) # define R300_RS_SEL_C0 0 # define R300_RS_SEL_C1 1 # define R300_RS_SEL_C2 2 diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index d2fd04a550..04ee59da63 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1609,9 +1609,9 @@ static void r500SetupRSUnit(GLcontext * ctx) /* I'm still unsure if these are needed */ GLuint interp_magic[8] = { 0x00, - R300_RS_COL_PTR(1), - R300_RS_COL_PTR(2), - R300_RS_COL_PTR(3), + 1 << 24, + 2 << 24, + 3 << 24, 0x00, 0x00, 0x00, @@ -1658,14 +1658,20 @@ static void r500SetupRSUnit(GLcontext * ctx) } for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0 | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | (in_texcoords << R300_RS_INTERP_SRC_SHIFT) - | interp_magic[i]; + + // r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0 | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | (in_texcoords << R300_RS_INTERP_SRC_SHIFT) + + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = (0 << R500_TEX_PTR_S_SHIFT) | + (1 << R500_TEX_PTR_T_SHIFT) | + (2 << R500_TEX_PTR_R_SHIFT) | + (3 << R500_TEX_PTR_Q_SHIFT) | + (in_texcoords << 0) | interp_magic[i]; r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] = 0; if (InputsRead & (FRAG_BIT_TEX0 << i)) { //assert(r300->state.texture.tc_count != 0); - r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] |= R300_RS_ROUTE_ENABLE | i /* source INTERP */ - | (fp_reg << R300_RS_ROUTE_DEST_SHIFT); + r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] |= R500_RS_INST_TEX_CN_WRITE | i /* source INTERP */ + | (fp_reg << R500_RS_INST_TEX_ADDR_SHIFT); high_rr = fp_reg; /* Passing invalid data here can lock the GPU. */ @@ -1684,7 +1690,8 @@ static void r500SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL0) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { - r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R300_RS_ROUTE_0_COLOR | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); + // r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R300_RS_ROUTE_0_COLOR | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); + r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_COL_ADDR_SHIFT); InputsRead &= ~FRAG_BIT_COL0; col_interp_nr++; } else { @@ -1694,7 +1701,8 @@ static void r500SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL1) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { - r300->hw.rr.cmd[R300_RR_ROUTE_1] |= R300_RS_ROUTE_1_UNKNOWN11 | R300_RS_ROUTE_1_COLOR1 | (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT); + // r300->hw.rr.cmd[R300_RR_ROUTE_1] |= R300_RS_ROUTE_1_UNKNOWN11 | R300_RS_ROUTE_1_COLOR1 | (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT); + r300->hw.rr.cmd[R300_RR_ROUTE_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITER300_RS_ROUTE_1_UNKNOWN11 | (fp_reg++ << R500_RS_INST_COL_COL_ADDR_SHIFT); InputsRead &= ~FRAG_BIT_COL1; if (high_rr < 1) high_rr = 1; @@ -1706,7 +1714,7 @@ static void r500SetupRSUnit(GLcontext * ctx) /* Need at least one. This might still lock as the values are undefined... */ if (in_texcoords == 0 && col_interp_nr == 0) { - r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R300_RS_ROUTE_0_COLOR | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); + r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_COL_ADDR_SHIFT); col_interp_nr++; } -- cgit v1.2.3 From c0cb9bc84c1997d790d0b7efa8ed94fc601d7d19 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 20 Mar 2008 14:21:10 +1000 Subject: mesa: cleanup state emission and rs for r500 trivial clear app now renders --- src/mesa/drivers/dri/r300/r300_emit.h | 13 +++++ src/mesa/drivers/dri/r300/r300_ioctl.c | 86 ++++++++++++++++++++++++++++++++++ src/mesa/drivers/dri/r300/r300_state.c | 10 ++-- 3 files changed, 104 insertions(+), 5 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h index a4f6ab997e..50e7e4f149 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.h +++ b/src/mesa/drivers/dri/r300/r300_emit.h @@ -178,6 +178,19 @@ static inline uint32_t cmdpacify(void) cmd[0].i = cmdvpu((dest), _n/4); \ } while (0); +#define r500fp_start_fragment(dest, length) \ + do { \ + int _n; \ + _n = (length); \ + cmd = (drm_radeon_cmd_header_t*) \ + r300AllocCmdBuf(rmesa, \ + (_n+1), \ + __FUNCTION__); \ + cmd_reserved = _n+1; \ + cmd_written =1; \ + cmd[0].i = cmdr500fp((dest), _n/6); \ + } while (0); + #define start_packet3(packet, count) \ { \ int _n; \ diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 07656b130c..14258324bd 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -292,6 +292,26 @@ static void r300EmitClearState(GLcontext * ctx) R300_STATECHANGE(r300, rr); reg_start(R300_RS_ROUTE_0, 0); e32(R300_RS_ROUTE_0_COLOR); + } else { + + R300_STATECHANGE(r300, ri); + reg_start(R500_RS_IP_0, 8); + for (i = 0; i < 8; ++i) { + e32((1 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (2 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (3 << R500_RS_IP_TEX_PTR_Q_SHIFT) ); + } + + R300_STATECHANGE(r300, rc); + /* The second constant is needed to get glxgears display anything .. */ + reg_start(R300_RS_COUNT, 1); + e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); + e32(0x0); + + R300_STATECHANGE(r300, rr); + reg_start(R500_RS_INST_0, 0); + e32(R500_RS_INST_COL_CN_WRITE); + } if (!is_r500) { @@ -322,6 +342,72 @@ static void r300EmitClearState(GLcontext * ctx) reg_start(R300_PFS_INSTR3_0, 0); e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); + } else { + R300_STATECHANGE(r300, r500fp); + r500fp_start_fragment(0, 12); + + e32(0x7808); + e32(R500_TEX_ID(0) | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED); + e32(R500_TEX_SRC_ADDR(0) | R500_TEX_SRC_S_SWIZ_R | + R500_TEX_SRC_T_SWIZ_G | + R500_TEX_DST_ADDR(0) | + R500_TEX_DST_R_SWIZ_R | + R500_TEX_DST_G_SWIZ_G | + R500_TEX_DST_B_SWIZ_B | + R500_TEX_DST_A_SWIZ_A); + e32(R500_DX_ADDR(0) | + R500_DX_S_SWIZ_R | + R500_DX_T_SWIZ_R | + R500_DX_R_SWIZ_R | + R500_DX_Q_SWIZ_R | + R500_DY_ADDR(0) | + R500_DY_S_SWIZ_R | + R500_DY_T_SWIZ_R | + R500_DY_R_SWIZ_R | + R500_DY_Q_SWIZ_R); + e32(0x0); + e32(0x0); + + e32(R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | + R500_INST_LAST | + R500_INST_RGB_OMASK_R | + R500_INST_RGB_OMASK_G | + R500_INST_RGB_OMASK_B | + R500_INST_ALPHA_OMASK); + + e32(R500_RGB_ADDR0(0) | + R500_RGB_ADDR1(0) | + R500_RGB_ADDR1_CONST | + R500_RGB_ADDR2(0) | + R500_RGB_ADDR2_CONST | + R500_RGB_SRCP_OP_1_MINUS_2RGB0); + + e32(R500_ALPHA_ADDR0(0) | + R500_ALPHA_ADDR1(0) | + R500_ALPHA_ADDR1_CONST | + R500_ALPHA_ADDR2(0) | + R500_ALPHA_ADDR2_CONST | + R500_ALPHA_SRCP_OP_1_MINUS_2A0); + + e32(R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | + R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC0 | + R500_ALU_RGB_R_SWIZ_B_1 | + R500_ALU_RGB_B_SWIZ_B_1 | + R500_ALU_RGB_G_SWIZ_B_1); + + e32(R500_ALPHA_OP_MAD | + R500_ALPHA_SWIZ_A_A | + R500_ALPHA_SWIZ_B_1); + + e32(R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_R_SWIZ_0 | + R500_ALU_RGBA_G_SWIZ_0 | + R500_ALU_RGBA_B_SWIZ_0 | + R500_ALU_RGBA_A_SWIZ_0); } if (has_tcl) { diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 04ee59da63..f4ee307adb 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1661,10 +1661,10 @@ static void r500SetupRSUnit(GLcontext * ctx) // r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0 | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | (in_texcoords << R300_RS_INTERP_SRC_SHIFT) - r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = (0 << R500_TEX_PTR_S_SHIFT) | - (1 << R500_TEX_PTR_T_SHIFT) | - (2 << R500_TEX_PTR_R_SHIFT) | - (3 << R500_TEX_PTR_Q_SHIFT) | + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = (0 << R500_RS_IP_TEX_PTR_S_SHIFT) | + (1 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (2 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (3 << R500_RS_IP_TEX_PTR_Q_SHIFT) | (in_texcoords << 0) | interp_magic[i]; r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] = 0; @@ -1702,7 +1702,7 @@ static void r500SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL1) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { // r300->hw.rr.cmd[R300_RR_ROUTE_1] |= R300_RS_ROUTE_1_UNKNOWN11 | R300_RS_ROUTE_1_COLOR1 | (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT); - r300->hw.rr.cmd[R300_RR_ROUTE_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITER300_RS_ROUTE_1_UNKNOWN11 | (fp_reg++ << R500_RS_INST_COL_COL_ADDR_SHIFT); + r300->hw.rr.cmd[R300_RR_ROUTE_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_COL_ADDR_SHIFT); InputsRead &= ~FRAG_BIT_COL1; if (high_rr < 1) high_rr = 1; -- cgit v1.2.3 From c07534dc719f53c0e59d59f3bd7dd25d1d32747c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 20 Mar 2008 14:30:59 +1000 Subject: r300: this code really shouldn't be here. For R500 just ignore it for now while I do something interesting like run glxgears. --- src/mesa/drivers/dri/r300/r300_state.c | 55 ++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 26 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index f4ee307adb..ffcc311c05 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1436,40 +1436,43 @@ static void r300SetupTextures(GLcontext * ctx) if (!fp) /* should only happenen once, just after context is created */ return; - R300_STATECHANGE(r300, fpt); - for (i = 0; i < fp->tex.length; i++) { - int unit; - int opcode; - unsigned long val; + if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { + R300_STATECHANGE(r300, fpt); - unit = fp->tex.inst[i] >> R300_FPITX_IMAGE_SHIFT; - unit &= 15; - - val = fp->tex.inst[i]; - val &= ~R300_FPITX_IMAGE_MASK; - - opcode = - (val & R300_FPITX_OPCODE_MASK) >> R300_FPITX_OPCODE_SHIFT; - if (opcode == R300_FPITX_OP_KIL) { - r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; - } else { - if (tmu_mappings[unit] >= 0) { - val |= - tmu_mappings[unit] << - R300_FPITX_IMAGE_SHIFT; + for (i = 0; i < fp->tex.length; i++) { + int unit; + int opcode; + unsigned long val; + + unit = fp->tex.inst[i] >> R300_FPITX_IMAGE_SHIFT; + unit &= 15; + + val = fp->tex.inst[i]; + val &= ~R300_FPITX_IMAGE_MASK; + + opcode = + (val & R300_FPITX_OPCODE_MASK) >> R300_FPITX_OPCODE_SHIFT; + if (opcode == R300_FPITX_OP_KIL) { r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; } else { - // We get here when the corresponding texture image is incomplete - // (e.g. incomplete mipmaps etc.) - r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + if (tmu_mappings[unit] >= 0) { + val |= + tmu_mappings[unit] << + R300_FPITX_IMAGE_SHIFT; + r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + } else { + // We get here when the corresponding texture image is incomplete + // (e.g. incomplete mipmaps etc.) + r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + } } } + + r300->hw.fpt.cmd[R300_FPT_CMD_0] = + cmdpacket0(R300_PFS_TEXI_0, fp->tex.length); } - r300->hw.fpt.cmd[R300_FPT_CMD_0] = - cmdpacket0(R300_PFS_TEXI_0, fp->tex.length); - if (RADEON_DEBUG & DEBUG_STATE) fprintf(stderr, "TX_ENABLE: %08x last_hw_tmu=%d\n", r300->hw.txe.cmd[R300_TXE_ENABLE], last_hw_tmu); -- cgit v1.2.3 From 6443da0865a6ad8bdd7abc65a9621ba329fcb756 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 20 Mar 2008 14:34:04 +1000 Subject: r300: add rv530 pci id for the t60p laptop --- src/mesa/drivers/dri/radeon/radeon_chipset.h | 3 +++ src/mesa/drivers/dri/radeon/radeon_screen.c | 5 +++++ 2 files changed, 8 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index 9e375474a0..dc1b8a9c8e 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -146,6 +146,9 @@ #define PCI_CHIP_RV410_5E4C 0x5E4C #define PCI_CHIP_RV410_5E4D 0x5E4D #define PCI_CHIP_RV410_5E4F 0x5E4F + +#define PCI_CHIP_RV530_71C4 0x71C4 + #define PCI_CHIP_RS350_7834 0x7834 #define PCI_CHIP_RS350_7835 0x7835 #define PCI_CHIP_RS690_791E 0x791E diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 6107577e40..1a1666ccfe 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -686,6 +686,11 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) fprintf(stderr, "Warning, RS690 detected, 3D support is incomplete.\n"); break; + case PCI_CHIP_RV530_71C4: + screen->chip_family = CHIP_FAMILY_R520; + fprintf(stderr, "Warning, R520 detected, 3D HAHAHAHAHA!!.\n"); + break; + default: fprintf(stderr, "unknown chip id 0x%x, can't guess.\n", dri_priv->deviceID); -- cgit v1.2.3 From 3b7c5bfb98cd4d3b675ac39ec62d0fa71a66a6dd Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 21 Mar 2008 17:05:29 +1000 Subject: r500: setup fragment program constant emission atom --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 4 +++- src/mesa/drivers/dri/r300/r300_context.h | 1 + src/mesa/drivers/dri/r300/r300_emit.h | 14 ++++++++------ 3 files changed, 12 insertions(+), 7 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 3cfb7cf2cd..248de7e34a 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -398,7 +398,9 @@ void r300InitCmdBuf(r300ContextPtr r300) if (is_r500) { ALLOC_STATE(r500fp, variable, R300_FPI_CMDSIZE, 0); - r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0); + r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 0, 0); + ALLOC_STATE(r500fp_const, variable, R300_FPI_CMDSIZE, 0); + r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 1, 0); } else { ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_PFS_CNTL_0, 3); diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 012c0fe6a5..45dafd6bcc 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -490,6 +490,7 @@ struct r300_hw_state { struct r300_state_atom fpt; /* texi - (4620) */ struct r300_state_atom us_out_fmt; /* (46A4) */ struct r300_state_atom r500fp; /* r500 fp instructions */ + struct r300_state_atom r500fp_const; /* r500 fp constants */ struct r300_state_atom fpi[4]; /* fp instructions (46C0/47C0/48C0/49C0) */ struct r300_state_atom fogs; /* fog state (4BC0) */ struct r300_state_atom fogc; /* fog color (4BC8) */ diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h index 50e7e4f149..51302301f7 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.h +++ b/src/mesa/drivers/dri/r300/r300_emit.h @@ -74,14 +74,16 @@ static inline uint32_t cmdvpu(int addr, int count) return cmd.u; } -static inline uint32_t cmdr500fp(int addr, int count) +static inline uint32_t cmdr500fp(int addr, int count, int type, int clamp) { drm_r300_cmd_header_t cmd; - cmd.vpu.cmd_type = R300_CMD_R500FP; - cmd.vpu.count = count; - cmd.vpu.adrhi = ((unsigned int)addr & 0xFF00) >> 8; - cmd.vpu.adrlo = ((unsigned int)addr & 0x00FF); + cmd.r500fp.cmd_type = R300_CMD_R500FP; + cmd.r500fp.count = count; + cmd.r500fp.adrhi_flags = ((unsigned int)addr & 0x100) >> 8; + cmd.r500fp.adrhi_flags |= type ? R500FP_CONSTANT_TYPE : 0; + cmd.r500fp.adrhi_flags |= clamp ? R500FP_CONSTANT_CLAMP : 0; + cmd.r500fp.adrlo = ((unsigned int)addr & 0x00FF); return cmd.u; } @@ -188,7 +190,7 @@ static inline uint32_t cmdpacify(void) __FUNCTION__); \ cmd_reserved = _n+1; \ cmd_written =1; \ - cmd[0].i = cmdr500fp((dest), _n/6); \ + cmd[0].i = cmdr500fp((dest), _n/6, 0, 0); \ } while (0); #define start_packet3(packet, count) \ -- cgit v1.2.3 From 6864220c28c95991cf9577892156d670bd184007 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 22 Mar 2008 10:55:32 +1000 Subject: r500: fixup support for emitting fragment program to hardware. Also fixup the constant emission this breaks glxgears from what I can see but its another step to correctness --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 23 +++++++++++++++++++---- src/mesa/drivers/dri/r300/r300_state.c | 24 ++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 248de7e34a..d965a95c0f 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -242,6 +242,7 @@ void r300EmitState(r300ContextPtr r300) #define packet0_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->packet0.count) #define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) +#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count) static int check_always(r300ContextPtr r300, struct r300_state_atom *atom) { @@ -262,6 +263,20 @@ static int check_vpu(r300ContextPtr r300, struct r300_state_atom *atom) return cnt ? (cnt * 4) + 1 : 0; } +static int check_r500fp(r300ContextPtr r300, struct r300_state_atom *atom) +{ + int cnt; + cnt = r500fp_count(atom->cmd); + return cnt ? (cnt * 6) + 1 : 0; +} + +static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom) +{ + int cnt; + cnt = r500fp_count(atom->cmd); + return cnt ? (cnt * 4) + 1 : 0; +} + #define ALLOC_STATE( ATOM, CHK, SZ, IDX ) \ do { \ r300->hw.ATOM.cmd_size = (SZ); \ @@ -397,9 +412,9 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R500_US_OUT_FMT, 5); if (is_r500) { - ALLOC_STATE(r500fp, variable, R300_FPI_CMDSIZE, 0); + ALLOC_STATE(r500fp, r500fp, R300_FPI_CMDSIZE, 0); r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 0, 0); - ALLOC_STATE(r500fp_const, variable, R300_FPI_CMDSIZE, 0); + ALLOC_STATE(r500fp_const, r500fp_const, R300_FPP_CMDSIZE, 0); r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 1, 0); } else { ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); @@ -416,6 +431,8 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR2_0, 1); ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3); r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR3_0, 1); + ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0); + r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, 0); } ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0); r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(FG_FOG_BLEND, 1); @@ -425,8 +442,6 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(FG_ALPHA_FUNC, 2); ALLOC_STATE(fg_depth_src, always, 2, 0); r300->hw.fg_depth_src.cmd[0] = cmdpacket0(R300_FG_DEPTH_SRC, 1); - ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0); - r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, 0); ALLOC_STATE(rb3d_cctl, always, 2, 0); r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(R300_RB3D_CCTL, 1); ALLOC_STATE(bld, always, R300_BLD_CMDSIZE, 0); diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index ffcc311c05..eab39ec4b0 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2296,6 +2296,20 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) } } +#define bump_r500fp_count(ptr, new_count) do{\ + drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\ + int _nc=(new_count)/6; \ + assert(_nc < 256); \ + if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\ +} while(0) + +#define bump_r500fp_const_count(ptr, new_count) do{\ + drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\ + int _nc=(new_count)/4; \ + assert(_nc < 256); \ + if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\ +} while(0) + static void r500SetupPixelShader(r300ContextPtr rmesa) { GLcontext *ctx = rmesa->radeon.glCtx; @@ -2368,6 +2382,16 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) R500_ALU_RGBA_B_SWIZ_0 | R500_ALU_RGBA_A_SWIZ_0; + bump_r500fp_count(rmesa->hw.r500fp.cmd, 12); + + R300_STATECHANGE(rmesa, r500fp_const); + for (i = 0; i < fp->const_nr; i++) { + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(fp->constant[i][0]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(fp->constant[i][1]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(fp->constant[i][2]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(fp->constant[i][3]); + } + bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, fp->const_nr * 4); } -- cgit v1.2.3 From fbab11e9b08d107b04d53833286da1c1487c2708 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 22 Mar 2008 10:56:48 +1000 Subject: r500: enable gb tiling for r5xx --- src/mesa/drivers/dri/r300/r300_state.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index eab39ec4b0..ac36b3b43c 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2046,6 +2046,8 @@ static void r300ResetHwState(r300ContextPtr r300) R300_GB_TILE_PIPE_COUNT_R300; break; case CHIP_FAMILY_R420: + case CHIP_FAMILY_RV515: + case CHIP_FAMILY_R520: r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= R300_GB_TILE_PIPE_COUNT_R420; break; -- cgit v1.2.3 From de9c1c3627de778671c0fa1215a2c5e24d4374dc Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 22 Mar 2008 11:00:25 +1000 Subject: r500: fixup fake shader to keep gears going --- src/mesa/drivers/dri/r300/r300_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index ac36b3b43c..6dc76aed87 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2325,7 +2325,7 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) /* emit the standard zero shader */ R300_STATECHANGE(rmesa, r500fp); i = 1; - rmesa->hw.r500fp.cmd[i++] = 0x7807; + rmesa->hw.r500fp.cmd[i++] = 0x7808; rmesa->hw.r500fp.cmd[i++] = R500_TEX_ID(0) | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; rmesa->hw.r500fp.cmd[i++] = R500_TEX_SRC_ADDR(0) | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G | -- cgit v1.2.3 From 28904e54d2a1bae009cbb88088b81e8d5bbd15d5 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 16 Apr 2008 20:42:15 +1000 Subject: fixup r500 bits for renaming --- src/mesa/drivers/dri/r300/r300_state.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 6dc76aed87..8f12266a5f 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1644,7 +1644,7 @@ static void r500SetupRSUnit(GLcontext * ctx) fp_reg = in_texcoords = col_interp_nr = high_rr = 0; - r300->hw.rr.cmd[R300_RR_ROUTE_1] = 0; + r300->hw.rr.cmd[R300_RR_INST_1] = 0; if (InputsRead & FRAG_BIT_WPOS) { for (i = 0; i < ctx->Const.MaxTextureUnits; i++) @@ -1670,10 +1670,10 @@ static void r500SetupRSUnit(GLcontext * ctx) (3 << R500_RS_IP_TEX_PTR_Q_SHIFT) | (in_texcoords << 0) | interp_magic[i]; - r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] = 0; + r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0; if (InputsRead & (FRAG_BIT_TEX0 << i)) { //assert(r300->state.texture.tc_count != 0); - r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] |= R500_RS_INST_TEX_CN_WRITE | i /* source INTERP */ + r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] |= R500_RS_INST_TEX_CN_WRITE | i /* source INTERP */ | (fp_reg << R500_RS_INST_TEX_ADDR_SHIFT); high_rr = fp_reg; @@ -1694,7 +1694,7 @@ static void r500SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL0) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { // r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R300_RS_ROUTE_0_COLOR | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); - r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_COL_ADDR_SHIFT); + r300->hw.rr.cmd[R300_RR_INST_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); InputsRead &= ~FRAG_BIT_COL0; col_interp_nr++; } else { @@ -1705,7 +1705,7 @@ static void r500SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL1) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { // r300->hw.rr.cmd[R300_RR_ROUTE_1] |= R300_RS_ROUTE_1_UNKNOWN11 | R300_RS_ROUTE_1_COLOR1 | (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT); - r300->hw.rr.cmd[R300_RR_ROUTE_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_COL_ADDR_SHIFT); + r300->hw.rr.cmd[R300_RR_INST_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); InputsRead &= ~FRAG_BIT_COL1; if (high_rr < 1) high_rr = 1; @@ -1717,7 +1717,7 @@ static void r500SetupRSUnit(GLcontext * ctx) /* Need at least one. This might still lock as the values are undefined... */ if (in_texcoords == 0 && col_interp_nr == 0) { - r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_COL_ADDR_SHIFT); + r300->hw.rr.cmd[R300_RR_INST_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); col_interp_nr++; } @@ -1726,7 +1726,7 @@ static void r500SetupRSUnit(GLcontext * ctx) | R300_HIRES_EN; assert(high_rr >= 0); - r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_ROUTE_0, high_rr + 1); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, high_rr + 1); r300->hw.rc.cmd[2] = 0xC0 | high_rr; if (InputsRead) -- cgit v1.2.3 From 831fc138c1617f5cb49da589ea5126c8eda364a4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 1 May 2008 16:02:58 -0400 Subject: R5xx: Add R5xx pci ids --- src/mesa/drivers/dri/r300/r300_state.c | 4 + src/mesa/drivers/dri/radeon/radeon_chipset.h | 104 +++++++++++++++++++++++ src/mesa/drivers/dri/radeon/radeon_screen.c | 120 ++++++++++++++++++++++++++- 3 files changed, 227 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 8f12266a5f..7419b15a55 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2048,6 +2048,10 @@ static void r300ResetHwState(r300ContextPtr r300) case CHIP_FAMILY_R420: case CHIP_FAMILY_RV515: case CHIP_FAMILY_R520: + case CHIP_FAMILY_RV530: + case CHIP_FAMILY_R580: + case CHIP_FAMILY_RV560: + case CHIP_FAMILY_RV570: r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= R300_GB_TILE_PIPE_COUNT_R420; break; diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index dc1b8a9c8e..2821ecc0c0 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -147,12 +147,111 @@ #define PCI_CHIP_RV410_5E4D 0x5E4D #define PCI_CHIP_RV410_5E4F 0x5E4F +#define PCI_CHIP_R520_7100 0x7100 +#define PCI_CHIP_R520_7101 0x7101 +#define PCI_CHIP_R520_7102 0x7102 +#define PCI_CHIP_R520_7103 0x7103 +#define PCI_CHIP_R520_7104 0x7104 +#define PCI_CHIP_R520_7105 0x7105 +#define PCI_CHIP_R520_7106 0x7106 +#define PCI_CHIP_R520_7108 0x7108 +#define PCI_CHIP_R520_7109 0x7109 +#define PCI_CHIP_R520_710A 0x710A +#define PCI_CHIP_R520_710B 0x710B +#define PCI_CHIP_R520_710C 0x710C +#define PCI_CHIP_R520_710E 0x710E +#define PCI_CHIP_R520_710F 0x710F +#define PCI_CHIP_RV515_7140 0x7140 +#define PCI_CHIP_RV515_7141 0x7141 +#define PCI_CHIP_RV515_7142 0x7142 +#define PCI_CHIP_RV515_7143 0x7143 +#define PCI_CHIP_RV515_7144 0x7144 +#define PCI_CHIP_RV515_7145 0x7145 +#define PCI_CHIP_RV515_7146 0x7146 +#define PCI_CHIP_RV515_7147 0x7147 +#define PCI_CHIP_RV515_7149 0x7149 +#define PCI_CHIP_RV515_714A 0x714A +#define PCI_CHIP_RV515_714B 0x714B +#define PCI_CHIP_RV515_714C 0x714C +#define PCI_CHIP_RV515_714D 0x714D +#define PCI_CHIP_RV515_714E 0x714E +#define PCI_CHIP_RV515_714F 0x714F +#define PCI_CHIP_RV515_7151 0x7151 +#define PCI_CHIP_RV515_7152 0x7152 +#define PCI_CHIP_RV515_7153 0x7153 +#define PCI_CHIP_RV515_715E 0x715E +#define PCI_CHIP_RV515_715F 0x715F +#define PCI_CHIP_RV515_7180 0x7180 +#define PCI_CHIP_RV515_7181 0x7181 +#define PCI_CHIP_RV515_7183 0x7183 +#define PCI_CHIP_RV515_7186 0x7186 +#define PCI_CHIP_RV515_7187 0x7187 +#define PCI_CHIP_RV515_7188 0x7188 +#define PCI_CHIP_RV515_718A 0x718A +#define PCI_CHIP_RV515_718B 0x718B +#define PCI_CHIP_RV515_718C 0x718C +#define PCI_CHIP_RV515_718D 0x718D +#define PCI_CHIP_RV515_718F 0x718F +#define PCI_CHIP_RV515_7193 0x7193 +#define PCI_CHIP_RV515_7196 0x7196 +#define PCI_CHIP_RV515_719B 0x719B +#define PCI_CHIP_RV515_719F 0x719F +#define PCI_CHIP_RV530_71C0 0x71C0 +#define PCI_CHIP_RV530_71C1 0x71C1 +#define PCI_CHIP_RV530_71C2 0x71C2 +#define PCI_CHIP_RV530_71C3 0x71C3 #define PCI_CHIP_RV530_71C4 0x71C4 +#define PCI_CHIP_RV530_71C5 0x71C5 +#define PCI_CHIP_RV530_71C6 0x71C6 +#define PCI_CHIP_RV530_71C7 0x71C7 +#define PCI_CHIP_RV530_71CD 0x71CD +#define PCI_CHIP_RV530_71CE 0x71CE +#define PCI_CHIP_RV530_71D2 0x71D2 +#define PCI_CHIP_RV530_71D4 0x71D4 +#define PCI_CHIP_RV530_71D5 0x71D5 +#define PCI_CHIP_RV530_71D6 0x71D6 +#define PCI_CHIP_RV530_71DA 0x71DA +#define PCI_CHIP_RV530_71DE 0x71DE +#define PCI_CHIP_RV515_7200 0x7200 +#define PCI_CHIP_RV515_7210 0x7210 +#define PCI_CHIP_RV515_7211 0x7211 +#define PCI_CHIP_R580_7240 0x7240 +#define PCI_CHIP_R580_7243 0x7243 +#define PCI_CHIP_R580_7244 0x7244 +#define PCI_CHIP_R580_7245 0x7245 +#define PCI_CHIP_R580_7246 0x7246 +#define PCI_CHIP_R580_7247 0x7247 +#define PCI_CHIP_R580_7248 0x7248 +#define PCI_CHIP_R580_7249 0x7249 +#define PCI_CHIP_R580_724A 0x724A +#define PCI_CHIP_R580_724B 0x724B +#define PCI_CHIP_R580_724C 0x724C +#define PCI_CHIP_R580_724D 0x724D +#define PCI_CHIP_R580_724E 0x724E +#define PCI_CHIP_R580_724F 0x724F +#define PCI_CHIP_RV570_7280 0x7280 +#define PCI_CHIP_RV560_7281 0x7281 +#define PCI_CHIP_RV560_7283 0x7283 +#define PCI_CHIP_R580_7284 0x7284 +#define PCI_CHIP_RV560_7287 0x7287 +#define PCI_CHIP_RV570_7288 0x7288 +#define PCI_CHIP_RV570_7289 0x7289 +#define PCI_CHIP_RV570_728B 0x728B +#define PCI_CHIP_RV570_728C 0x728C +#define PCI_CHIP_RV560_7290 0x7290 +#define PCI_CHIP_RV560_7291 0x7291 +#define PCI_CHIP_RV560_7293 0x7293 +#define PCI_CHIP_RV560_7297 0x7297 #define PCI_CHIP_RS350_7834 0x7834 #define PCI_CHIP_RS350_7835 0x7835 #define PCI_CHIP_RS690_791E 0x791E #define PCI_CHIP_RS690_791F 0x791F +#define PCI_CHIP_RS740_796C 0x796C +#define PCI_CHIP_RS740_796D 0x796D +#define PCI_CHIP_RS740_796E 0x796E +#define PCI_CHIP_RS740_796F 0x796F + enum { CHIP_FAMILY_R100, @@ -172,8 +271,13 @@ enum { CHIP_FAMILY_RV410, CHIP_FAMILY_RS400, CHIP_FAMILY_RS690, + CHIP_FAMILY_RS740, CHIP_FAMILY_RV515, CHIP_FAMILY_R520, + CHIP_FAMILY_RV530, + CHIP_FAMILY_R580, + CHIP_FAMILY_RV560, + CHIP_FAMILY_RV570, CHIP_FAMILY_LAST }; diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 1a1666ccfe..6f9d912442 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -682,15 +682,133 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) break; case PCI_CHIP_RS690_791E: + case PCI_CHIP_RS690_791F: screen->chip_family = CHIP_FAMILY_RS690; fprintf(stderr, "Warning, RS690 detected, 3D support is incomplete.\n"); break; + case PCI_CHIP_RS740_796C: + case PCI_CHIP_RS740_796D: + case PCI_CHIP_RS740_796E: + case PCI_CHIP_RS740_796F: + screen->chip_family = CHIP_FAMILY_RS740; + fprintf(stderr, "Warning, RS740 detected, 3D support is incomplete.\n"); + break; - case PCI_CHIP_RV530_71C4: + case PCI_CHIP_R520_7100: + case PCI_CHIP_R520_7101: + case PCI_CHIP_R520_7102: + case PCI_CHIP_R520_7103: + case PCI_CHIP_R520_7104: + case PCI_CHIP_R520_7105: + case PCI_CHIP_R520_7106: + case PCI_CHIP_R520_7108: + case PCI_CHIP_R520_7109: + case PCI_CHIP_R520_710A: + case PCI_CHIP_R520_710B: + case PCI_CHIP_R520_710C: + case PCI_CHIP_R520_710E: + case PCI_CHIP_R520_710F: screen->chip_family = CHIP_FAMILY_R520; fprintf(stderr, "Warning, R520 detected, 3D HAHAHAHAHA!!.\n"); break; + case PCI_CHIP_RV515_7140: + case PCI_CHIP_RV515_7141: + case PCI_CHIP_RV515_7142: + case PCI_CHIP_RV515_7143: + case PCI_CHIP_RV515_7144: + case PCI_CHIP_RV515_7145: + case PCI_CHIP_RV515_7146: + case PCI_CHIP_RV515_7147: + case PCI_CHIP_RV515_7149: + case PCI_CHIP_RV515_714A: + case PCI_CHIP_RV515_714B: + case PCI_CHIP_RV515_714C: + case PCI_CHIP_RV515_714D: + case PCI_CHIP_RV515_714E: + case PCI_CHIP_RV515_714F: + case PCI_CHIP_RV515_7151: + case PCI_CHIP_RV515_7152: + case PCI_CHIP_RV515_7153: + case PCI_CHIP_RV515_715E: + case PCI_CHIP_RV515_715F: + case PCI_CHIP_RV515_7180: + case PCI_CHIP_RV515_7181: + case PCI_CHIP_RV515_7183: + case PCI_CHIP_RV515_7186: + case PCI_CHIP_RV515_7187: + case PCI_CHIP_RV515_7188: + case PCI_CHIP_RV515_718A: + case PCI_CHIP_RV515_718B: + case PCI_CHIP_RV515_718C: + case PCI_CHIP_RV515_718D: + case PCI_CHIP_RV515_718F: + case PCI_CHIP_RV515_7193: + case PCI_CHIP_RV515_7196: + case PCI_CHIP_RV515_719B: + case PCI_CHIP_RV515_719F: + case PCI_CHIP_RV515_7200: + case PCI_CHIP_RV515_7210: + case PCI_CHIP_RV515_7211: + screen->chip_family = CHIP_FAMILY_RV515; + fprintf(stderr, "Warning, RV515 detected, 3D HAHAHAHAHA!!.\n"); + break; + + case PCI_CHIP_RV530_71C0: + case PCI_CHIP_RV530_71C1: + case PCI_CHIP_RV530_71C2: + case PCI_CHIP_RV530_71C3: + case PCI_CHIP_RV530_71C4: + case PCI_CHIP_RV530_71C5: + case PCI_CHIP_RV530_71C6: + case PCI_CHIP_RV530_71C7: + case PCI_CHIP_RV530_71CD: + case PCI_CHIP_RV530_71CE: + case PCI_CHIP_RV530_71D2: + case PCI_CHIP_RV530_71D4: + case PCI_CHIP_RV530_71D5: + case PCI_CHIP_RV530_71D6: + case PCI_CHIP_RV530_71DA: + case PCI_CHIP_RV530_71DE: + screen->chip_family = CHIP_FAMILY_RV530; + fprintf(stderr, "Warning, RV530 detected, 3D HAHAHAHAHA!!.\n"); + break; + + case PCI_CHIP_R580_7240: + case PCI_CHIP_R580_7243: + case PCI_CHIP_R580_7244: + case PCI_CHIP_R580_7245: + case PCI_CHIP_R580_7246: + case PCI_CHIP_R580_7247: + case PCI_CHIP_R580_7248: + case PCI_CHIP_R580_7249: + case PCI_CHIP_R580_724A: + case PCI_CHIP_R580_724B: + case PCI_CHIP_R580_724C: + case PCI_CHIP_R580_724D: + case PCI_CHIP_R580_724E: + case PCI_CHIP_R580_724F: + case PCI_CHIP_R580_7284: + screen->chip_family = CHIP_FAMILY_R580; + fprintf(stderr, "Warning, R580 detected, 3D HAHAHAHAHA!!.\n"); + break; + + case PCI_CHIP_RV570_7280: + case PCI_CHIP_RV560_7281: + case PCI_CHIP_RV560_7283: + case PCI_CHIP_RV560_7287: + case PCI_CHIP_RV570_7288: + case PCI_CHIP_RV570_7289: + case PCI_CHIP_RV570_728B: + case PCI_CHIP_RV570_728C: + case PCI_CHIP_RV560_7290: + case PCI_CHIP_RV560_7291: + case PCI_CHIP_RV560_7293: + case PCI_CHIP_RV560_7297: + screen->chip_family = CHIP_FAMILY_RV560; + fprintf(stderr, "Warning, RV560 detected, 3D HAHAHAHAHA!!.\n"); + break; + default: fprintf(stderr, "unknown chip id 0x%x, can't guess.\n", dri_priv->deviceID); -- cgit v1.2.3 From e3721a3b3fca5ad7d957ae95252405da0740fbf6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 May 2008 10:51:40 -0400 Subject: R5xx: various updates - fixup VAP_CNTL setup - remove extra instruction in r5xx passthrough shader - add notes about pipe config --- src/mesa/drivers/dri/r300/r300_reg.h | 9 +++- src/mesa/drivers/dri/r300/r300_state.c | 71 ++++++++++++++++------------- src/mesa/drivers/dri/radeon/radeon_screen.c | 9 +++- 3 files changed, 56 insertions(+), 33 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 2822b1d4c3..fee21dae67 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -70,6 +70,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Stolen from r200 code from Christoph Brill (It's a guess!) */ #define R300_VAP_CNTL 0x2080 +# define R300_PVS_NUM_SLOTS_SHIFT 0 +# define R300_PVS_NUM_CNTLRS_SHIFT 4 +# define R300_PVS_NUM_FPUS_SHIFT 8 +# define R300_VF_MAX_VTX_NUM_SHIFT 18 +# define R300_GL_CLIP_SPACE_DEF (0 << 22) +# define R300_DX_CLIP_SPACE_DEF (1 << 22) +# define R500_TCL_STATE_OPTIMIZATION (1 << 23) /* This register is written directly and also starts data section * in many 3d CP_PACKET3's @@ -375,7 +382,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_VAP_CLIP_CNTL 0x221C # define R300_221C_NORMAL 0x00000000 # define R300_221C_CLEAR 0x0001C000 -#define R300_VAP_UCP_ENABLE_0 (1 << 0) +# define R300_VAP_UCP_ENABLE_0 (1 << 0) /* These seem to be per-pixel and per-vertex X and Y clipping planes. The first * plane is per-pixel and the second plane is per-vertex. diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 7419b15a55..0740d7ea4a 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1981,10 +1981,34 @@ static void r300ResetHwState(r300ContextPtr r300) r300AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef); r300Enable(ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled); - if (!has_tcl) - r300->hw.vap_cntl.cmd[1] = 0x0014045a; + /* setup the VAP */ + /* PVS_NUM_SLOTS, PVS_NUM_CNTLRS, VF_MAX_VTX_NUM need to be adjusted + * dynamically. PVS_NUM_FPUS is fixed based on asic + */ + if (has_tcl) { + r300->hw.vap_cntl.cmd[1] = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + (5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (12 << R300_VF_MAX_VTX_NUM_SHIFT)); + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + r300->hw.vap_cntl.cmd[1] |= R500_TCL_STATE_OPTIMIZATION; + } else + r300->hw.vap_cntl.cmd[1] = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + (5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (5 << R300_VF_MAX_VTX_NUM_SHIFT)); + + if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515) + r300->hw.vap_cntl.cmd[1] |= (2 << R300_PVS_NUM_FPUS_SHIFT); + else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560)) + r300->hw.vap_cntl.cmd[1] |= (5 << R300_PVS_NUM_FPUS_SHIFT); + else if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420) + r300->hw.vap_cntl.cmd[1] |= (6 << R300_PVS_NUM_FPUS_SHIFT); + else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) + r300->hw.vap_cntl.cmd[1] |= (8 << R300_PVS_NUM_FPUS_SHIFT); else - r300->hw.vap_cntl.cmd[1] = 0x0030045A; //0x0030065a /* Dangerous */ + r300->hw.vap_cntl.cmd[1] |= (4 << R300_PVS_NUM_FPUS_SHIFT); r300->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA | R300_VPORT_X_OFFSET_ENA @@ -2035,20 +2059,27 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_0] = 0x66666666; r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_1] = 0x06666666; - /* XXX: Other families? */ + /* num pipes needs to be read back from the GB_PIPE_SELECT register + * on r4xx/r5xx/rs4xx/rs6xx + * should move this to the drm + */ r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = - R300_GB_TILE_ENABLE | R300_GB_TILE_SIZE_16; + R300_GB_TILE_ENABLE | R300_GB_TILE_SIZE_16 /*| R300_GB_SUBPIXEL_1_16*/; switch (r300->radeon.radeonScreen->chip_family) { case CHIP_FAMILY_R300: case CHIP_FAMILY_R350: - case CHIP_FAMILY_RV410: r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= R300_GB_TILE_PIPE_COUNT_R300; break; - case CHIP_FAMILY_R420: + case CHIP_FAMILY_RV350: case CHIP_FAMILY_RV515: - case CHIP_FAMILY_R520: case CHIP_FAMILY_RV530: + case CHIP_FAMILY_RV410: + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= + R300_GB_TILE_PIPE_COUNT_RV300; + break; + case CHIP_FAMILY_R420: + case CHIP_FAMILY_R520: case CHIP_FAMILY_R580: case CHIP_FAMILY_RV560: case CHIP_FAMILY_RV570: @@ -2329,28 +2360,6 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) /* emit the standard zero shader */ R300_STATECHANGE(rmesa, r500fp); i = 1; - rmesa->hw.r500fp.cmd[i++] = 0x7808; - rmesa->hw.r500fp.cmd[i++] = R500_TEX_ID(0) | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; - rmesa->hw.r500fp.cmd[i++] = R500_TEX_SRC_ADDR(0) | R500_TEX_SRC_S_SWIZ_R | - R500_TEX_SRC_T_SWIZ_G | - R500_TEX_DST_ADDR(0) | - R500_TEX_DST_R_SWIZ_R | - R500_TEX_DST_G_SWIZ_G | - R500_TEX_DST_B_SWIZ_B | - R500_TEX_DST_A_SWIZ_A; - rmesa->hw.r500fp.cmd[i++] = R500_DX_ADDR(0) | - R500_DX_S_SWIZ_R | - R500_DX_T_SWIZ_R | - R500_DX_R_SWIZ_R | - R500_DX_Q_SWIZ_R | - R500_DY_ADDR(0) | - R500_DY_S_SWIZ_R | - R500_DY_T_SWIZ_R | - R500_DY_R_SWIZ_R | - R500_DY_Q_SWIZ_R; - rmesa->hw.r500fp.cmd[i++] = 0x0; - rmesa->hw.r500fp.cmd[i++] = 0x0; - rmesa->hw.r500fp.cmd[i++] = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT | R500_INST_LAST | @@ -2388,7 +2397,7 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) R500_ALU_RGBA_B_SWIZ_0 | R500_ALU_RGBA_A_SWIZ_0; - bump_r500fp_count(rmesa->hw.r500fp.cmd, 12); + bump_r500fp_count(rmesa->hw.r500fp.cmd, 6); R300_STATECHANGE(rmesa, r500fp_const); for (i = 0; i < fp->const_nr; i++) { diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 6f9d912442..2f57d289fe 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -649,7 +649,9 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->chip_flags = RADEON_CHIPSET_TCL; break; - /* RV410 SE chips have half the pipes of regular RV410 */ + /* RV410 SE chips have half the pipes of regular RV410 + * Need to get num pipes form the GB_PIPE_SELECT register + */ case PCI_CHIP_RV410_5E4C: case PCI_CHIP_RV410_5E4F: screen->chip_family = CHIP_FAMILY_RV380; @@ -709,6 +711,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_R520_710E: case PCI_CHIP_R520_710F: screen->chip_family = CHIP_FAMILY_R520; + //screen->chip_flags = RADEON_CHIPSET_TCL; fprintf(stderr, "Warning, R520 detected, 3D HAHAHAHAHA!!.\n"); break; @@ -751,6 +754,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_RV515_7210: case PCI_CHIP_RV515_7211: screen->chip_family = CHIP_FAMILY_RV515; + //screen->chip_flags = RADEON_CHIPSET_TCL; fprintf(stderr, "Warning, RV515 detected, 3D HAHAHAHAHA!!.\n"); break; @@ -771,6 +775,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_RV530_71DA: case PCI_CHIP_RV530_71DE: screen->chip_family = CHIP_FAMILY_RV530; + //screen->chip_flags = RADEON_CHIPSET_TCL; fprintf(stderr, "Warning, RV530 detected, 3D HAHAHAHAHA!!.\n"); break; @@ -790,6 +795,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_R580_724F: case PCI_CHIP_R580_7284: screen->chip_family = CHIP_FAMILY_R580; + //screen->chip_flags = RADEON_CHIPSET_TCL; fprintf(stderr, "Warning, R580 detected, 3D HAHAHAHAHA!!.\n"); break; @@ -806,6 +812,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_RV560_7293: case PCI_CHIP_RV560_7297: screen->chip_family = CHIP_FAMILY_RV560; + //screen->chip_flags = RADEON_CHIPSET_TCL; fprintf(stderr, "Warning, RV560 detected, 3D HAHAHAHAHA!!.\n"); break; -- cgit v1.2.3 From 2bd26f4afa4f87d3dd2a8b9715455fc3f5a05046 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 May 2008 10:54:20 -0400 Subject: Update comment --- src/mesa/drivers/dri/r300/r300_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 0740d7ea4a..57ff9e9a73 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1982,7 +1982,7 @@ static void r300ResetHwState(r300ContextPtr r300) r300Enable(ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled); /* setup the VAP */ - /* PVS_NUM_SLOTS, PVS_NUM_CNTLRS, VF_MAX_VTX_NUM need to be adjusted + /* for tcl, PVS_NUM_SLOTS, PVS_NUM_CNTLRS, VF_MAX_VTX_NUM need to be adjusted * dynamically. PVS_NUM_FPUS is fixed based on asic */ if (has_tcl) { -- cgit v1.2.3 From e61dadf3de3084157f25ce0fbcc07990bb44aae5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 May 2008 12:29:40 -0400 Subject: R300: clean up VAP_PROG_STREAM_CNTL* register usage --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 4 +- src/mesa/drivers/dri/r300/r300_emit.c | 28 +++++++---- src/mesa/drivers/dri/r300/r300_ioctl.c | 25 ++++++++-- src/mesa/drivers/dri/r300/r300_reg.h | 88 ++++++++++++++++++++------------- src/mesa/drivers/dri/r300/r300_swtcl.c | 27 +++++----- 5 files changed, 109 insertions(+), 63 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index d965a95c0f..d0ce401ed6 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -328,10 +328,10 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(R300_VAP_CNTL_STATUS, 1); ALLOC_STATE(vir[0], variable, R300_VIR_CMDSIZE, 0); r300->hw.vir[0].cmd[R300_VIR_CMD_0] = - cmdpacket0(R300_VAP_INPUT_ROUTE_0_0, 1); + cmdpacket0(R300_VAP_PROG_STREAM_CNTL_0, 1); ALLOC_STATE(vir[1], variable, R300_VIR_CMDSIZE, 1); r300->hw.vir[1].cmd[R300_VIR_CMD_0] = - cmdpacket0(R300_VAP_INPUT_ROUTE_1_0, 1); + cmdpacket0(R300_VAP_PROG_STREAM_CNTL_EXT_0, 1); ALLOC_STATE(vic, always, R300_VIC_CMDSIZE, 0); r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(R300_VAP_INPUT_CNTL_0, 2); ALLOC_STATE(vap_psc_sgn_norm_cntl, always, 2, 0); diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c index e7371133d3..0eeb8bf98a 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.c +++ b/src/mesa/drivers/dri/r300/r300_emit.c @@ -216,14 +216,18 @@ static GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, for (i = 0; i < nr; i += 2) { /* make sure input is valid, would lockup the gpu */ assert(inputs[tab[i]] != -1); - dw = R300_INPUT_ROUTE_FLOAT | (inputs[tab[i]] << 8) | (attribptr[tab[i]]->size - 1); + dw = (R300_SIGNED | + (inputs[tab[i]] << R300_DST_VEC_LOC_SHIFT) | + (attribptr[tab[i]]->size - 1)) << R300_DATA_TYPE_0_SHIFT; if (i + 1 == nr) { - dw |= R300_VAP_INPUT_ROUTE_END; + dw |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT; } else { assert(inputs[tab[i + 1]] != -1); - dw |= (R300_INPUT_ROUTE_FLOAT | (inputs[tab[i + 1]] << 8) | (attribptr[tab[i + 1]]->size - 1)) << 16; + dw |= (R300_SIGNED | + (inputs[tab[i + 1]] << R300_DST_VEC_LOC_SHIFT) | + (attribptr[tab[i + 1]]->size - 1)) << R300_DATA_TYPE_1_SHIFT; if (i + 2 == nr) { - dw |= (R300_VAP_INPUT_ROUTE_END << 16); + dw |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT; } } dst[i >> 1] = dw; @@ -234,10 +238,10 @@ static GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, static GLuint r300VAPInputRoute1Swizzle(int swizzle[4]) { - return (swizzle[0] << R300_INPUT_ROUTE_X_SHIFT) | - (swizzle[1] << R300_INPUT_ROUTE_Y_SHIFT) | - (swizzle[2] << R300_INPUT_ROUTE_Z_SHIFT) | - (swizzle[3] << R300_INPUT_ROUTE_W_SHIFT); + return (swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) | + (swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) | + (swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) | + (swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT); } GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr) @@ -245,9 +249,13 @@ GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr) GLuint i, dw; for (i = 0; i < nr; i += 2) { - dw = r300VAPInputRoute1Swizzle(swizzle[i]) | R300_INPUT_ROUTE_ENABLE; + dw = (r300VAPInputRoute1Swizzle(swizzle[i]) | + ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | + R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE0_SHIFT; if (i + 1 < nr) { - dw |= (r300VAPInputRoute1Swizzle(swizzle[i + 1]) | R300_INPUT_ROUTE_ENABLE) << 16; + dw |= (r300VAPInputRoute1Swizzle(swizzle[i + 1]) | + ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | + R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT; } dst[i >> 1] = dw; } diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 14258324bd..0fef1c61a7 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -204,11 +204,15 @@ static void r300EmitClearState(GLcontext * ctx) * these registers, as well as the actual values used for rendering. */ R300_STATECHANGE(r300, vir[0]); - reg_start(R300_VAP_INPUT_ROUTE_0_0, 0); + reg_start(R300_VAP_PROG_STREAM_CNTL_0, 0); if (!has_tcl) - e32(0x22030003); + /*e32(0x22030003);*/ + e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | + ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); else - e32(0x21030003); + /*e32(0x21030003);*/ + e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | + ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); /* disable fog */ R300_STATECHANGE(r300, fogs); @@ -216,8 +220,19 @@ static void r300EmitClearState(GLcontext * ctx) e32(0x0); R300_STATECHANGE(r300, vir[1]); - reg_start(R300_VAP_INPUT_ROUTE_1_0, 0); - e32(0xF688F688); + reg_start(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0); + e32(((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | + (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | + (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | + ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) + << R300_SWIZZLE0_SHIFT) | + (((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | + (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | + (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | + ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) + << R300_SWIZZLE1_SHIFT))); /* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */ R300_STATECHANGE(r300, vic); diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index fee21dae67..f65aac3ca4 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -218,27 +218,31 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Always set COMPONENTS_4 in immediate mode. */ -#define R300_VAP_INPUT_ROUTE_0_0 0x2150 -# define R300_INPUT_ROUTE_COMPONENTS_1 (0 << 0) -# define R300_INPUT_ROUTE_COMPONENTS_2 (1 << 0) -# define R300_INPUT_ROUTE_COMPONENTS_3 (2 << 0) -# define R300_INPUT_ROUTE_COMPONENTS_4 (3 << 0) -# define R300_INPUT_ROUTE_COMPONENTS_RGBA (4 << 0) /* GUESS */ -# define R300_VAP_INPUT_ROUTE_IDX_SHIFT 8 -# define R300_VAP_INPUT_ROUTE_IDX_MASK (31 << 8) /* GUESS */ -# define R300_VAP_INPUT_ROUTE_END (1 << 13) -# define R300_INPUT_ROUTE_IMMEDIATE_MODE (0 << 14) /* GUESS */ -# define R300_INPUT_ROUTE_FLOAT (1 << 14) /* GUESS */ -# define R300_INPUT_ROUTE_UNSIGNED_BYTE (2 << 14) /* GUESS */ -# define R300_INPUT_ROUTE_FLOAT_COLOR (3 << 14) /* GUESS */ -#define R300_VAP_INPUT_ROUTE_0_1 0x2154 -#define R300_VAP_INPUT_ROUTE_0_2 0x2158 -#define R300_VAP_INPUT_ROUTE_0_3 0x215C -#define R300_VAP_INPUT_ROUTE_0_4 0x2160 -#define R300_VAP_INPUT_ROUTE_0_5 0x2164 -#define R300_VAP_INPUT_ROUTE_0_6 0x2168 -#define R300_VAP_INPUT_ROUTE_0_7 0x216C - +#define R300_VAP_PROG_STREAM_CNTL_0 0x2150 +# define R300_DATA_TYPE_0_SHIFT 0 +# define R300_DATA_TYPE_FLOAT_1 0 +# define R300_DATA_TYPE_FLOAT_2 1 +# define R300_DATA_TYPE_FLOAT_3 2 +# define R300_DATA_TYPE_FLOAT_4 3 +# define R300_DATA_TYPE_BYTE 4 +# define R300_DATA_TYPE_D3DCOLOR 5 +# define R300_DATA_TYPE_SHORT_2 6 +# define R300_DATA_TYPE_SHORT_4 7 +# define R300_DATA_TYPE_VECTOR_3_TTT 8 +# define R300_DATA_TYPE_VECTOR_3_EET 9 +# define R300_SKIP_DWORDS_SHIFT 4 +# define R300_DST_VEC_LOC_SHIFT 8 +# define R300_LAST_VEC (1 << 13) +# define R300_SIGNED (1 << 14) +# define R300_NORMALIZE (1 << 15) +# define R300_DATA_TYPE_1_SHIFT 16 +#define R300_VAP_PROG_STREAM_CNTL_1 0x2154 +#define R300_VAP_PROG_STREAM_CNTL_2 0x2158 +#define R300_VAP_PROG_STREAM_CNTL_3 0x215C +#define R300_VAP_PROG_STREAM_CNTL_4 0x2160 +#define R300_VAP_PROG_STREAM_CNTL_5 0x2164 +#define R300_VAP_PROG_STREAM_CNTL_6 0x2168 +#define R300_VAP_PROG_STREAM_CNTL_7 0x216C /* gap */ /* Notes: @@ -276,26 +280,40 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * mode, the swizzling pattern is e.g. used to set zw components in texture * coordinates with only tweo components. */ -#define R300_VAP_INPUT_ROUTE_1_0 0x21E0 +#define R300_VAP_PROG_STREAM_CNTL_EXT_0 0x21e0 +# define R300_SWIZZLE0_SHIFT 0 +# define R300_SWIZZLE_SELECT_X_SHIFT 0 +# define R300_SWIZZLE_SELECT_Y_SHIFT 3 +# define R300_SWIZZLE_SELECT_Z_SHIFT 6 +# define R300_SWIZZLE_SELECT_W_SHIFT 9 + +# define R300_SWIZZLE_SELECT_X 0 +# define R300_SWIZZLE_SELECT_Y 1 +# define R300_SWIZZLE_SELECT_Z 2 +# define R300_SWIZZLE_SELECT_W 3 +# define R300_SWIZZLE_SELECT_FP_ZERO 4 +# define R300_SWIZZLE_SELECT_FP_ONE 5 +/* alternate forms for r300_emit.c */ # define R300_INPUT_ROUTE_SELECT_X 0 # define R300_INPUT_ROUTE_SELECT_Y 1 # define R300_INPUT_ROUTE_SELECT_Z 2 # define R300_INPUT_ROUTE_SELECT_W 3 # define R300_INPUT_ROUTE_SELECT_ZERO 4 # define R300_INPUT_ROUTE_SELECT_ONE 5 -# define R300_INPUT_ROUTE_SELECT_MASK 7 -# define R300_INPUT_ROUTE_X_SHIFT 0 -# define R300_INPUT_ROUTE_Y_SHIFT 3 -# define R300_INPUT_ROUTE_Z_SHIFT 6 -# define R300_INPUT_ROUTE_W_SHIFT 9 -# define R300_INPUT_ROUTE_ENABLE (15 << 12) -#define R300_VAP_INPUT_ROUTE_1_1 0x21E4 -#define R300_VAP_INPUT_ROUTE_1_2 0x21E8 -#define R300_VAP_INPUT_ROUTE_1_3 0x21EC -#define R300_VAP_INPUT_ROUTE_1_4 0x21F0 -#define R300_VAP_INPUT_ROUTE_1_5 0x21F4 -#define R300_VAP_INPUT_ROUTE_1_6 0x21F8 -#define R300_VAP_INPUT_ROUTE_1_7 0x21FC + +# define R300_WRITE_ENA_SHIFT 12 +# define R300_WRITE_ENA_X 1 +# define R300_WRITE_ENA_Y 2 +# define R300_WRITE_ENA_Z 4 +# define R300_WRITE_ENA_W 8 +# define R300_SWIZZLE1_SHIFT 16 +#define R300_VAP_PROG_STREAM_CNTL_EXT_1 0x21e4 +#define R300_VAP_PROG_STREAM_CNTL_EXT_2 0x21e8 +#define R300_VAP_PROG_STREAM_CNTL_EXT_3 0x21ec +#define R300_VAP_PROG_STREAM_CNTL_EXT_4 0x21f0 +#define R300_VAP_PROG_STREAM_CNTL_EXT_5 0x21f4 +#define R300_VAP_PROG_STREAM_CNTL_EXT_6 0x21f8 +#define R300_VAP_PROG_STREAM_CNTL_EXT_7 0x21fc /* END: Vertex data assembly */ diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index a732bdb559..699499d7cc 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -85,21 +85,26 @@ static GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, GLuint i, dw; /* type, inputs, stop bit, size */ - for (i = 0; i + 1 < nr; i += 2) { - dw = (inputs[tab[i]] << 8) | 0x3; - dw |= ((inputs[tab[i + 1]] << 8) | 0x3) << 16; - if (i + 2 == nr) { - dw |= (R300_VAP_INPUT_ROUTE_END << 16); + for (i = 0; i < nr; i += 2) { + /* make sure input is valid, would lockup the gpu */ + assert(inputs[tab[i]] != -1); + dw = (R300_SIGNED | + (inputs[tab[i]] << R300_DST_VEC_LOC_SHIFT) | + (attribptr[tab[i]]->size - 1)) << R300_DATA_TYPE_0_SHIFT; + if (i + 1 == nr) { + dw |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT; + } else { + assert(inputs[tab[i + 1]] != -1); + dw |= (R300_SIGNED | + (inputs[tab[i + 1]] << R300_DST_VEC_LOC_SHIFT) | + (attribptr[tab[i + 1]]->size - 1)) << R300_DATA_TYPE_1_SHIFT; + if (i + 2 == nr) { + dw |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT; + } } dst[i >> 1] = dw; } - if (nr & 1) { - dw = (inputs[tab[nr - 1]] << 8) | 0x3; - dw |= R300_VAP_INPUT_ROUTE_END; - dst[nr >> 1] = dw; - } - return (nr + 1) >> 1; } -- cgit v1.2.3 From d5448ceb956d1884bf7aac4667b79a0905fa4166 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 May 2008 12:48:39 -0400 Subject: R300: cleanup VAP_CLIP_CNTL --- src/mesa/drivers/dri/r300/r300_ioctl.c | 2 +- src/mesa/drivers/dri/r300/r300_reg.h | 18 +++++++++++++++--- src/mesa/drivers/dri/r300/r300_state.c | 2 +- 3 files changed, 17 insertions(+), 5 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 0fef1c61a7..03449de934 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -283,7 +283,7 @@ static void r300EmitClearState(GLcontext * ctx) if (has_tcl) { R300_STATECHANGE(r300, vap_clip_cntl); reg_start(R300_VAP_CLIP_CNTL, 0); - e32(R300_221C_CLEAR); + e32(R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE); } R300_STATECHANGE(r300, ps); diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index f65aac3ca4..7b71eeab93 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -398,9 +398,21 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * See bug #9871. http://bugs.freedesktop.org/attachment.cgi?id=10672&action=view */ #define R300_VAP_CLIP_CNTL 0x221C -# define R300_221C_NORMAL 0x00000000 -# define R300_221C_CLEAR 0x0001C000 -# define R300_VAP_UCP_ENABLE_0 (1 << 0) +# define R300_VAP_UCP_ENABLE_0 (1 << 0) +# define R300_VAP_UCP_ENABLE_1 (1 << 1) +# define R300_VAP_UCP_ENABLE_2 (1 << 2) +# define R300_VAP_UCP_ENABLE_3 (1 << 3) +# define R300_VAP_UCP_ENABLE_4 (1 << 4) +# define R300_VAP_UCP_ENABLE_5 (1 << 5) +# define R300_PS_UCP_MODE_DIST_COP (0 << 14) +# define R300_PS_UCP_MODE_RADIUS_COP (1 << 14) +# define R300_PS_UCP_MODE_RADIUS_COP_CLIP (2 << 14) +# define R300_PS_UCP_MODE_CLIP_AS_TRIFAN (3 << 14) +# define R300_CLIP_DISABLE (1 << 16) +# define R300_UCP_CULL_ONLY_ENABLE (1 << 17) +# define R300_BOUNDARY_EDGE_FLAG_ENABLE (1 << 18) +# define R500_COLOR2_IS_TEXTURE (1 << 20) +# define R500_COLOR3_IS_TEXTURE (1 << 21) /* These seem to be per-pixel and per-vertex X and Y clipping planes. The first * plane is per-pixel and the second plane is per-vertex. diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 57ff9e9a73..27615fd568 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2035,7 +2035,7 @@ static void r300ResetHwState(r300ContextPtr r300) /* XXX: Other families? */ if (has_tcl) { - r300->hw.vap_clip_cntl.cmd[1] = R300_221C_NORMAL; + r300->hw.vap_clip_cntl.cmd[1] = R300_PS_UCP_MODE_DIST_COP; r300->hw.vap_clip.cmd[1] = r300PackFloat32(1.0); /* X */ r300->hw.vap_clip.cmd[2] = r300PackFloat32(1.0); /* X */ -- cgit v1.2.3 From a94cd0d77407a8cf5c151e1f5135eba5d11fdb2b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 May 2008 13:03:48 -0400 Subject: R300: fix VAP_OUTPUT_VTX_FMT_1 defines --- src/mesa/drivers/dri/r300/r300_reg.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 7b71eeab93..81b5c3faf3 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -132,11 +132,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_VAP_OUTPUT_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15 # define R300_VAP_OUTPUT_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18 # define R300_VAP_OUTPUT_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21 -# define R300_VAP_OUTPUT_VTX_FMT_1__NOT_PRESENT (1<<0) -# define R300_VAP_OUTPUT_VTX_FMT_1__1_COMPONENT (1<<1) -# define R300_VAP_OUTPUT_VTX_FMT_1__2_COMPONENTS (1<<2) -# define R300_VAP_OUTPUT_VTX_FMT_1__3_COMPONENTS (1<<3) -# define R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS (1<<4) +# define R300_VAP_OUTPUT_VTX_FMT_1__NOT_PRESENT 0 +# define R300_VAP_OUTPUT_VTX_FMT_1__1_COMPONENT 1 +# define R300_VAP_OUTPUT_VTX_FMT_1__2_COMPONENTS 2 +# define R300_VAP_OUTPUT_VTX_FMT_1__3_COMPONENTS 3 +# define R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS 4 #define R300_SE_VTE_CNTL 0x20b0 # define R300_VPORT_X_SCALE_ENA 0x00000001 -- cgit v1.2.3 From 9e7ae34da5aeb9a38c3f4280f6d9648faad48df5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 May 2008 13:56:56 -0400 Subject: R3/4/5: fix TCL on r5xx, cleanup PVS code --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 30 ++++++----- src/mesa/drivers/dri/r300/r300_ioctl.c | 4 +- src/mesa/drivers/dri/r300/r300_reg.h | 77 ++++++++++++++++------------- src/mesa/drivers/dri/r300/r300_state.c | 4 +- src/mesa/drivers/dri/radeon/radeon_screen.c | 10 ++-- 5 files changed, 72 insertions(+), 53 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index d0ce401ed6..5d6be48f76 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -333,7 +333,7 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.vir[1].cmd[R300_VIR_CMD_0] = cmdpacket0(R300_VAP_PROG_STREAM_CNTL_EXT_0, 1); ALLOC_STATE(vic, always, R300_VIC_CMDSIZE, 0); - r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(R300_VAP_INPUT_CNTL_0, 2); + r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(R300_VAP_VTX_STATE_CNTL, 2); ALLOC_STATE(vap_psc_sgn_norm_cntl, always, 2, 0); r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE); @@ -481,27 +481,35 @@ void r300InitCmdBuf(r300ContextPtr r300) int i; ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0); r300->hw.vpi.cmd[R300_VPI_CMD_0] = - cmdvpu(R300_PVS_UPLOAD_PROGRAM, 0); + cmdvpu(R300_PVS_CODE_START, 0); - ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); - r300->hw.vpp.cmd[R300_VPP_CMD_0] = - cmdvpu(R300_PVS_UPLOAD_PARAMETERS, 0); + if (is_r500) { + ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); + r300->hw.vpp.cmd[R300_VPP_CMD_0] = + cmdvpu(R500_PVS_CONST_START, 0); - ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); - r300->hw.vps.cmd[R300_VPS_CMD_0] = - cmdvpu(R300_PVS_UPLOAD_POINTSIZE, 1); + ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); + r300->hw.vps.cmd[R300_VPS_CMD_0] = + cmdvpu(R500_POINT_VPORT_SCALE_OFFSET, 1); - if (is_r500) { for (i = 0; i < 6; i++) { ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] = - cmdvpu(R500_PVS_UPLOAD_CLIP_PLANE0+i, 1); + cmdvpu(R500_PVS_UCP_START + i, 1); } } else { + ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); + r300->hw.vpp.cmd[R300_VPP_CMD_0] = + cmdvpu(R300_PVS_CONST_START, 0); + + ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); + r300->hw.vps.cmd[R300_VPS_CMD_0] = + cmdvpu(R300_POINT_VPORT_SCALE_OFFSET, 1); + for (i = 0; i < 6; i++) { ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] = - cmdvpu(R300_PVS_UPLOAD_CLIP_PLANE0+i, 1); + cmdvpu(R300_PVS_UCP_START + i, 1); } } } diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 03449de934..530c3711d9 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -236,8 +236,8 @@ static void r300EmitClearState(GLcontext * ctx) /* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */ R300_STATECHANGE(r300, vic); - reg_start(R300_VAP_INPUT_CNTL_0, 1); - e32(R300_INPUT_CNTL_0_COLOR); + reg_start(R300_VAP_VTX_STATE_CNTL, 1); + e32((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT)); e32(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0); R300_STATECHANGE(r300, vte); diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 81b5c3faf3..a6719d6553 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -139,17 +139,16 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS 4 #define R300_SE_VTE_CNTL 0x20b0 -# define R300_VPORT_X_SCALE_ENA 0x00000001 -# define R300_VPORT_X_OFFSET_ENA 0x00000002 -# define R300_VPORT_Y_SCALE_ENA 0x00000004 -# define R300_VPORT_Y_OFFSET_ENA 0x00000008 -# define R300_VPORT_Z_SCALE_ENA 0x00000010 -# define R300_VPORT_Z_OFFSET_ENA 0x00000020 -# define R300_VTX_XY_FMT 0x00000100 -# define R300_VTX_Z_FMT 0x00000200 -# define R300_VTX_W0_FMT 0x00000400 -# define R300_VTX_W0_NORMALIZE 0x00000800 -# define R300_VTX_ST_DENORMALIZED 0x00001000 +# define R300_VPORT_X_SCALE_ENA (1 << 0) +# define R300_VPORT_X_OFFSET_ENA (1 << 1) +# define R300_VPORT_Y_SCALE_ENA (1 << 2) +# define R300_VPORT_Y_OFFSET_ENA (1 << 3) +# define R300_VPORT_Z_SCALE_ENA (1 << 4) +# define R300_VPORT_Z_OFFSET_ENA (1 << 5) +# define R300_VTX_XY_FMT (1 << 8) +# define R300_VTX_Z_FMT (1 << 9) +# define R300_VTX_W0_FMT (1 << 10) +# define R300_SERIAL_PROC_ENA (1 << 11) /* BEGIN: Vertex data assembly - lots of uncertainties */ @@ -250,9 +249,26 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * if vertex program uses only position, fglrx will set normal, too * - INPUT_CNTL_0_COLOR and INPUT_CNTL_COLOR bits are always equal. */ -#define R300_VAP_INPUT_CNTL_0 0x2180 -# define R300_INPUT_CNTL_0_COLOR 0x00000001 -#define R300_VAP_INPUT_CNTL_1 0x2184 +#define R300_VAP_VTX_STATE_CNTL 0x2180 +# define R300_COLOR_0_ASSEMBLY_SHIFT 0 +# define R300_SEL_COLOR 0 +# define R300_SEL_USER_COLOR_0 1 +# define R300_SEL_USER_COLOR_1 2 +# define R300_COLOR_1_ASSEMBLY_SHIFT 2 +# define R300_COLOR_2_ASSEMBLY_SHIFT 4 +# define R300_COLOR_3_ASSEMBLY_SHIFT 6 +# define R300_COLOR_4_ASSEMBLY_SHIFT 8 +# define R300_COLOR_5_ASSEMBLY_SHIFT 10 +# define R300_COLOR_6_ASSEMBLY_SHIFT 12 +# define R300_COLOR_7_ASSEMBLY_SHIFT 14 +# define R300_UPDATE_USER_COLOR_0_ENA (1 << 16) + +/* + * Each bit in this field applies to the corresponding vector in the VSM + * memory (i.e. Bit 0 applies to VECTOR_0 (POSITION), etc.). If the bit + * is set, then the corresponding 4-Dword Vector is output into the Vertex Stream. + */ +#define R300_VAP_VSM_VTX_ASSM 0x2184 # define R300_INPUT_CNTL_POS 0x00000001 # define R300_INPUT_CNTL_NORMAL 0x00000002 # define R300_INPUT_CNTL_COLOR 0x00000004 @@ -345,25 +361,20 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Multiple vertex programs and parameter sets can be loaded at once, * which could explain the size discrepancy. */ -#define R300_VAP_PVS_UPLOAD_ADDRESS 0x2200 -# define R300_PVS_UPLOAD_PROGRAM 0x00000000 -/* gap */ -# define R300_PVS_UPLOAD_PARAMETERS 0x00000200 -/* gap */ -# define R300_PVS_UPLOAD_CLIP_PLANE0 0x00000400 -# define R300_PVS_UPLOAD_CLIP_PLANE1 0x00000401 -# define R300_PVS_UPLOAD_CLIP_PLANE2 0x00000402 -# define R300_PVS_UPLOAD_CLIP_PLANE3 0x00000403 -# define R300_PVS_UPLOAD_CLIP_PLANE4 0x00000404 -# define R300_PVS_UPLOAD_CLIP_PLANE5 0x00000405 -# define R300_PVS_UPLOAD_POINTSIZE 0x00000406 - -# define R500_PVS_UPLOAD_CLIP_PLANE0 0x00000600 -# define R500_PVS_UPLOAD_CLIP_PLANE1 0x00000601 -# define R500_PVS_UPLOAD_CLIP_PLANE2 0x00000602 -# define R500_PVS_UPLOAD_CLIP_PLANE3 0x00000603 -# define R500_PVS_UPLOAD_CLIP_PLANE4 0x00000604 -# define R500_PVS_UPLOAD_CLIP_PLANE5 0x00000605 +#define R300_VAP_PVS_VECTOR_INDX_REG 0x2200 +# define R300_PVS_CODE_START 0 +# define R300_MAX_PVS_CODE_LINES 256 +# define R500_MAX_PVS_CODE_LINES 1024 +# define R300_PVS_CONST_START 512 +# define R500_PVS_CONST_START 1024 +# define R300_MAX_PVS_CONST_VECS 256 +# define R500_MAX_PVS_CONST_VECS 1024 +# define R300_PVS_UCP_START 1024 +# define R500_PVS_UCP_START 1536 +# define R300_POINT_VPORT_SCALE_OFFSET 1030 +# define R500_POINT_VPORT_SCALE_OFFSET 1542 +# define R300_POINT_GEN_TEX_OFFSET 1031 +# define R500_POINT_GEN_TEX_OFFSET 1543 /* * These are obsolete defines form r300_context.h, but they might give some diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 27615fd568..89a0827b2f 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1802,7 +1802,7 @@ static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa) prog->program.length = program_end; - r300SetupVertexProgramFragment(rmesa, R300_PVS_UPLOAD_PROGRAM, + r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START, &(prog->program)); inst_count = (prog->program.length / 4) - 1; @@ -1837,7 +1837,7 @@ static void r300SetupRealVertexProgram(r300ContextPtr rmesa) bump_vpu_count(rmesa->hw.vpp.cmd, param_count); param_count /= 4; - r300SetupVertexProgramFragment(rmesa, R300_PVS_UPLOAD_PROGRAM, &(prog->program)); + r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START, &(prog->program)); inst_count = (prog->program.length / 4) - 1; R300_STATECHANGE(rmesa, pvs); diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 2f57d289fe..0f716a0b70 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -711,7 +711,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_R520_710E: case PCI_CHIP_R520_710F: screen->chip_family = CHIP_FAMILY_R520; - //screen->chip_flags = RADEON_CHIPSET_TCL; + screen->chip_flags = RADEON_CHIPSET_TCL; fprintf(stderr, "Warning, R520 detected, 3D HAHAHAHAHA!!.\n"); break; @@ -754,7 +754,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_RV515_7210: case PCI_CHIP_RV515_7211: screen->chip_family = CHIP_FAMILY_RV515; - //screen->chip_flags = RADEON_CHIPSET_TCL; + screen->chip_flags = RADEON_CHIPSET_TCL; fprintf(stderr, "Warning, RV515 detected, 3D HAHAHAHAHA!!.\n"); break; @@ -775,7 +775,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_RV530_71DA: case PCI_CHIP_RV530_71DE: screen->chip_family = CHIP_FAMILY_RV530; - //screen->chip_flags = RADEON_CHIPSET_TCL; + screen->chip_flags = RADEON_CHIPSET_TCL; fprintf(stderr, "Warning, RV530 detected, 3D HAHAHAHAHA!!.\n"); break; @@ -795,7 +795,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_R580_724F: case PCI_CHIP_R580_7284: screen->chip_family = CHIP_FAMILY_R580; - //screen->chip_flags = RADEON_CHIPSET_TCL; + screen->chip_flags = RADEON_CHIPSET_TCL; fprintf(stderr, "Warning, R580 detected, 3D HAHAHAHAHA!!.\n"); break; @@ -812,7 +812,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_RV560_7293: case PCI_CHIP_RV560_7297: screen->chip_family = CHIP_FAMILY_RV560; - //screen->chip_flags = RADEON_CHIPSET_TCL; + screen->chip_flags = RADEON_CHIPSET_TCL; fprintf(stderr, "Warning, RV560 detected, 3D HAHAHAHAHA!!.\n"); break; -- cgit v1.2.3 From ae09292a6e659eabc566a0fc2b1f6fa5e5fd8b36 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 May 2008 15:20:28 -0400 Subject: R300: fix rebase conflicts --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 2 +- src/mesa/drivers/dri/r300/r300_ioctl.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 5d6be48f76..3f9d9da399 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -402,7 +402,7 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0); r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R300_RS_IP_0, 8); ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); - r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_ROUTE_0, 1); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, 1); } ALLOC_STATE(sc_hyperz, always, 3, 0); r300->hw.sc_hyperz.cmd[0] = cmdpacket0(R300_SC_HYPERZ, 2); diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 530c3711d9..d38ee9003d 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -305,8 +305,8 @@ static void r300EmitClearState(GLcontext * ctx) e32(0x0); R300_STATECHANGE(r300, rr); - reg_start(R300_RS_ROUTE_0, 0); - e32(R300_RS_ROUTE_0_COLOR); + reg_start(R300_RS_INST_0, 0); + e32(R500_RS_INST_COL_CN_WRITE); } else { R300_STATECHANGE(r300, ri); -- cgit v1.2.3 From 92a0e93ac33ceb64a4e7e930223950d4529cef37 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Fri, 2 May 2008 15:33:02 -0700 Subject: Make radeon stuff build again. Yet more evidence that I am incompetent with git. --- src/mesa/drivers/dri/radeon/radeon_chipset.h | 4 ---- src/mesa/drivers/dri/radeon/radeon_screen.c | 3 --- 2 files changed, 7 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index f9e459de91..5ea8cff1bf 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -147,12 +147,8 @@ #define PCI_CHIP_RV410_5E4D 0x5E4D #define PCI_CHIP_RV410_5E4F 0x5E4F -<<<<<<< HEAD:src/mesa/drivers/dri/radeon/radeon_chipset.h #define PCI_CHIP_RV530_71C4 0x71C4 #define PCI_CHIP_RV530_71D5 0x71D5 -======= -#define PCI_CHIP_RV530_71C4 0x71C4 ->>>>>>> eff6f1203222a776c5990b5d104b57a7f69b9aab:src/mesa/drivers/dri/radeon/radeon_chipset.h #define PCI_CHIP_RS350_7834 0x7834 #define PCI_CHIP_RS350_7835 0x7835 diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 7d6b4a7577..661ffd3a3d 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -691,14 +691,11 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) fprintf(stderr, "Warning, R520 detected, 3D HAHAHAHAHA!!.\n"); break; -<<<<<<< HEAD:src/mesa/drivers/dri/radeon/radeon_screen.c case PCI_CHIP_RV530_71D5: screen->chip_family = CHIP_FAMILY_R520; fprintf(stderr, "Warning, RV530 detected, all your base belong to us\n"); break; -======= ->>>>>>> eff6f1203222a776c5990b5d104b57a7f69b9aab:src/mesa/drivers/dri/radeon/radeon_screen.c default: fprintf(stderr, "unknown chip id 0x%x, can't guess.\n", dri_priv->deviceID); -- cgit v1.2.3 From 026ef8111a94f6449dfa5e5cc0ae91fca4e68c0c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 3 May 2008 21:31:22 +1000 Subject: r300: add R300_NO_TCL to allow testing of non-tcl on tcl cards --- src/mesa/drivers/dri/radeon/radeon_screen.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 0f716a0b70..d840e22742 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -827,6 +827,9 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) return NULL; } + if (getenv("R300_NO_TCL")) + screen->chip_flags &= ~RADEON_CHIPSET_TCL; + if (screen->chip_family <= CHIP_FAMILY_RS200) screen->chip_flags |= RADEON_CLASS_R100; else if (screen->chip_family <= CHIP_FAMILY_RV280) -- cgit v1.2.3 From cea4f1464b00e025859b3b5dc415ce135afebd92 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 3 May 2008 21:31:47 +1000 Subject: r300: recombine the vap input route 0 code and clean This gets non-tcl cards working again on this branch.. at least texrect and glxgears --- src/mesa/drivers/dri/r300/r300_emit.c | 12 +++---- src/mesa/drivers/dri/r300/r300_emit.h | 2 ++ src/mesa/drivers/dri/r300/r300_swtcl.c | 57 ++++++++++------------------------ 3 files changed, 24 insertions(+), 47 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c index 0eeb8bf98a..dc08b642c5 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.c +++ b/src/mesa/drivers/dri/r300/r300_emit.c @@ -207,7 +207,10 @@ static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb, } } -static GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, +#define DW_SIZE(x) ((inputs[tab[(x)]] << R300_DST_VEC_LOC_SHIFT) | \ + (attribptr[tab[(x)]]->size - 1) << R300_DATA_TYPE_0_SHIFT) + +GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, int *inputs, GLint * tab, GLuint nr) { GLuint i, dw; @@ -216,16 +219,13 @@ static GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, for (i = 0; i < nr; i += 2) { /* make sure input is valid, would lockup the gpu */ assert(inputs[tab[i]] != -1); - dw = (R300_SIGNED | - (inputs[tab[i]] << R300_DST_VEC_LOC_SHIFT) | - (attribptr[tab[i]]->size - 1)) << R300_DATA_TYPE_0_SHIFT; + dw = (R300_SIGNED | DW_SIZE(i)); if (i + 1 == nr) { dw |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT; } else { assert(inputs[tab[i + 1]] != -1); dw |= (R300_SIGNED | - (inputs[tab[i + 1]] << R300_DST_VEC_LOC_SHIFT) | - (attribptr[tab[i + 1]]->size - 1)) << R300_DATA_TYPE_1_SHIFT; + DW_SIZE(i + 1)) << R300_DATA_TYPE_1_SHIFT; if (i + 2 == nr) { dw |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT; } diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h index 51302301f7..e6a6df8c4c 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.h +++ b/src/mesa/drivers/dri/r300/r300_emit.h @@ -257,6 +257,8 @@ extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim); extern void r300EmitCacheFlush(r300ContextPtr rmesa); +extern GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, + int *inputs, GLint * tab, GLuint nr); extern GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr); extern GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead); extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead); diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index 699499d7cc..15e66317a9 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -78,36 +78,6 @@ do { \ rmesa->swtcl.vertex_attr_count++; \ } while (0) -/* this differs from the VIR0 in emit.c - TODO merge them using another option */ -static GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, - int *inputs, GLint * tab, GLuint nr) -{ - GLuint i, dw; - - /* type, inputs, stop bit, size */ - for (i = 0; i < nr; i += 2) { - /* make sure input is valid, would lockup the gpu */ - assert(inputs[tab[i]] != -1); - dw = (R300_SIGNED | - (inputs[tab[i]] << R300_DST_VEC_LOC_SHIFT) | - (attribptr[tab[i]]->size - 1)) << R300_DATA_TYPE_0_SHIFT; - if (i + 1 == nr) { - dw |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT; - } else { - assert(inputs[tab[i + 1]] != -1); - dw |= (R300_SIGNED | - (inputs[tab[i + 1]] << R300_DST_VEC_LOC_SHIFT) | - (attribptr[tab[i + 1]]->size - 1)) << R300_DATA_TYPE_1_SHIFT; - if (i + 2 == nr) { - dw |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT; - } - } - dst[i >> 1] = dw; - } - - return (nr + 1) >> 1; -} - static void r300SetVertexFormat( GLcontext *ctx ) { r300ContextPtr rmesa = R300_CONTEXT( ctx ); @@ -123,6 +93,7 @@ static void r300SetVertexFormat( GLcontext *ctx ) GLint tab[VERT_ATTRIB_MAX]; int swizzle[VERT_ATTRIB_MAX][4]; GLuint i, nr; + GLuint sz, vap_fmt_1 = 0; DECLARE_RENDERINPUTS(render_inputs_bitset); RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset); @@ -145,14 +116,15 @@ static void r300SetVertexFormat( GLcontext *ctx ) * build up a hardware vertex. */ if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POS)) { - vap_vte_cntl |= R300_VTX_W0_FMT; + sz = VB->AttribPtr[VERT_ATTRIB_POS]->size; InputsRead |= 1 << VERT_ATTRIB_POS; OutputsWritten |= 1 << VERT_RESULT_HPOS; - EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F ); - } else + EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_1F + sz - 1 ); + offset = sz; + } else { + offset = 4; EMIT_PAD(4 * sizeof(float)); - - offset = 4; + } if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) { EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F ); @@ -161,18 +133,19 @@ static void r300SetVertexFormat( GLcontext *ctx ) } if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_COLOR0)) { + sz = VB->AttribPtr[VERT_ATTRIB_COLOR0]->size; rmesa->swtcl.coloroffset = offset; InputsRead |= 1 << VERT_ATTRIB_COLOR0; OutputsWritten |= 1 << VERT_RESULT_COL0; - EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4F ); + EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_1F + sz - 1 ); + offset += sz; } - offset += 4; - rmesa->swtcl.specoffset = 0; if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) { + sz = VB->AttribPtr[VERT_ATTRIB_COLOR1]->size; rmesa->swtcl.specoffset = offset; - EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4F ); + EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_1F + sz - 1 ); InputsRead |= 1 << VERT_ATTRIB_COLOR1; OutputsWritten |= 1 << VERT_RESULT_COL1; } @@ -182,9 +155,11 @@ static void r300SetVertexFormat( GLcontext *ctx ) for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) { + sz = VB->TexCoordPtr[i]->size; InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i); OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); - EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_4F ); + EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_1F + sz - 1 ); + vap_fmt_1 |= sz << (3 * i); } } } @@ -243,7 +218,7 @@ static void r300SetVertexFormat( GLcontext *ctx ) R300_STATECHANGE(rmesa, vof); rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten); - rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten); + rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1; rmesa->swtcl.vertex_size = _tnl_install_attrs( ctx, -- cgit v1.2.3 From 37924cf175b5f61ca85dab685ec5d7879519ebc4 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 3 May 2008 22:08:11 +1000 Subject: r300: update r300 rs unit for swtcl need to fix r500 most likely --- src/mesa/drivers/dri/r300/r300_state.c | 74 ++++++++++++++++++++++++---------- 1 file changed, 52 insertions(+), 22 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 89a0827b2f..4fc50b8494 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1491,21 +1491,17 @@ static void r300SetupRSUnit(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); /* I'm still unsure if these are needed */ - GLuint interp_magic[8] = { - 0x00, - R300_RS_COL_PTR(1), - R300_RS_COL_PTR(2), - R300_RS_COL_PTR(3), - 0x00, - 0x00, - 0x00, - 0x00 - }; + GLuint interp_col[8]; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; union r300_outputs_written OutputsWritten; GLuint InputsRead; int fp_reg, high_rr; - int in_texcoords, col_interp_nr; - int i; + int col_interp_nr; + int rs_tex_count = 0, rs_col_count = 0; + int i, count; + + memset(interp_col, 0, 8); if (hw_tcl_on) OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; @@ -1523,7 +1519,7 @@ static void r300SetupRSUnit(GLcontext * ctx) R300_STATECHANGE(r300, rc); R300_STATECHANGE(r300, rr); - fp_reg = in_texcoords = col_interp_nr = high_rr = 0; + fp_reg = col_interp_nr = high_rr = 0; r300->hw.rr.cmd[R300_RR_INST_1] = 0; @@ -1541,12 +1537,50 @@ static void r300SetupRSUnit(GLcontext * ctx) InputsRead &= ~FRAG_BIT_WPOS; } + if (InputsRead & FRAG_BIT_COL0) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size; + interp_col[0] |= R300_RS_COL_PTR(rs_col_count); + if (count == 3) + interp_col[0] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGB1); + rs_col_count += count; + } + else + interp_col[0] = R300_RS_COL_FMT(R300_RS_COL_FMT_0001); + + if (InputsRead & FRAG_BIT_COL1) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size; + if (count == 3) + interp_col[1] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGB1); + interp_col[1] |= R300_RS_COL_PTR(1); + rs_col_count += count; + } + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0 | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | (in_texcoords << R300_RS_INTERP_SRC_SHIFT) - | interp_magic[i]; + int swiz; + + /* with TCL we always seem to route 4 components */ + if (hw_tcl_on) + count = 4; + else + count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; + + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = interp_col[i] | rs_tex_count; + switch(count) { + case 4:swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3); break; + case 3: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(R300_RS_SEL_K1); break; + default: + case 1: + case 2: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1); break; + }; + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] |= swiz; + r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0; if (InputsRead & (FRAG_BIT_TEX0 << i)) { + + rs_tex_count += count; + //assert(r300->state.texture.tc_count != 0); r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] |= R300_RS_INST_TEX_CN_WRITE | i /* source INTERP */ | (fp_reg << R300_RS_INST_TEX_ADDR_SHIFT); @@ -1560,10 +1594,6 @@ static void r300SetupRSUnit(GLcontext * ctx) WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i); } } - /* Need to count all coords enabled at vof */ - if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { - in_texcoords++; - } } if (InputsRead & FRAG_BIT_COL0) { @@ -1589,18 +1619,18 @@ static void r300SetupRSUnit(GLcontext * ctx) } /* Need at least one. This might still lock as the values are undefined... */ - if (in_texcoords == 0 && col_interp_nr == 0) { + if (rs_tex_count == 0 && col_interp_nr == 0) { r300->hw.rr.cmd[R300_RR_INST_0] |= R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | (fp_reg++ << R300_RS_INST_COL_ADDR_SHIFT); col_interp_nr++; } - r300->hw.rc.cmd[1] = 0 | ((in_texcoords << 2) << R300_IT_COUNT_SHIFT) + r300->hw.rc.cmd[1] = 0 | (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_interp_nr << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; assert(high_rr >= 0); r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, high_rr + 1); - r300->hw.rc.cmd[2] = 0xC0 | high_rr; + r300->hw.rc.cmd[2] = high_rr; if (InputsRead) WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); -- cgit v1.2.3 From d3eb5df259698c6f4080f3e988fbdaaba9698636 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 4 May 2008 04:13:56 +1000 Subject: r300: setup vte according to inputs --- src/mesa/drivers/dri/r300/r300_swtcl.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index 15e66317a9..ed0a080cde 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -100,13 +100,19 @@ static void r300SetVertexFormat( GLcontext *ctx ) RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset ); RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset); + vte = rmesa->hw.vte.cmd[1]; + vte &= ~(R300_VTX_XY_FMT | R300_VTX_Z_FMT | R300_VTX_W0_FMT); /* Important: */ if ( VB->NdcPtr != NULL ) { + fprintf(stderr,"NDC NDC\n"); VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr; + vte |= R300_VTX_XY_FMT | R300_VTX_Z_FMT; } else { + fprintf(stderr,"CLIPPY \n"); VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr; + vte |= R300_VTX_W0_FMT; } assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL ); @@ -230,7 +236,7 @@ static void r300SetVertexFormat( GLcontext *ctx ) RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset ); - vte = rmesa->hw.vte.cmd[1]; + R300_STATECHANGE(rmesa, vte); rmesa->hw.vte.cmd[1] = vte; rmesa->hw.vte.cmd[2] = rmesa->swtcl.vertex_size; -- cgit v1.2.3 From a03e261193bdee1ae1cf3e12af3455cbf085fcc7 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 4 May 2008 04:17:15 +1000 Subject: r300: remove debugging code --- src/mesa/drivers/dri/r300/r300_swtcl.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index ed0a080cde..a41fa1023a 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -105,12 +105,10 @@ static void r300SetVertexFormat( GLcontext *ctx ) /* Important: */ if ( VB->NdcPtr != NULL ) { - fprintf(stderr,"NDC NDC\n"); VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr; vte |= R300_VTX_XY_FMT | R300_VTX_Z_FMT; } else { - fprintf(stderr,"CLIPPY \n"); VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr; vte |= R300_VTX_W0_FMT; } -- cgit v1.2.3 From 4ef195a36946c8d587d129abd54683c73eecc304 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 3 May 2008 09:08:07 -0700 Subject: r5xx: Fix dumb shader. For some reason, FGLRX doesn't actually set R500_US_INST_TEX. Let us not make that same mistake. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 52 +++++++++++++++++++------------ 1 file changed, 32 insertions(+), 20 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index f94b244232..c5369b1dde 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -67,6 +67,8 @@ /* "Register" flags */ #define REG_CONSTANT (1 << 8) +#define REG_SRC_REL (1 << 9) +#define REG_DEST_REL (1 << 7) /* Swizzle tools */ #define R500_SWIZZLE_ZERO 4 @@ -187,26 +189,36 @@ static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_regist static void dumb_shader(struct r500_fragment_program *fp) { - /* R500_INST_TYPE_TEX? */ - fp->inst[0].inst0 = 0x7808; - fp->inst[0].inst1 = R500_TEX_ID(0) | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; - fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0) | R500_TEX_SRC_S_SWIZ_R | - R500_TEX_SRC_T_SWIZ_G | - R500_TEX_DST_ADDR(0) | - R500_TEX_DST_R_SWIZ_R | - R500_TEX_DST_G_SWIZ_G | - R500_TEX_DST_B_SWIZ_B | - R500_TEX_DST_A_SWIZ_A; - fp->inst[0].inst3 = R500_DX_ADDR(0) | - R500_DX_S_SWIZ_R | - R500_DX_T_SWIZ_R | - R500_DX_R_SWIZ_R | - R500_DX_Q_SWIZ_R | - R500_DY_ADDR(0) | - R500_DY_S_SWIZ_R | - R500_DY_T_SWIZ_R | - R500_DY_R_SWIZ_R | - R500_DY_Q_SWIZ_R; + fp->inst[0].inst0 = R500_INST_TYPE_TEX + | R500_INST_TEX_SEM_WAIT + | R500_INST_RGB_WMASK_R + | R500_INST_RGB_WMASK_G + | R500_INST_RGB_WMASK_B + | R500_INST_ALPHA_WMASK + | R500_INST_RGB_CLAMP + | R500_INST_ALPHA_CLAMP; + fp->inst[0].inst1 = R500_TEX_ID(0) + | R500_TEX_INST_LD + | R500_TEX_SEM_ACQUIRE + | R500_TEX_IGNORE_UNCOVERED; + fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0) + | R500_TEX_SRC_S_SWIZ_R + | R500_TEX_SRC_T_SWIZ_G + | R500_TEX_DST_ADDR(0) + | R500_TEX_DST_R_SWIZ_R + | R500_TEX_DST_G_SWIZ_G + | R500_TEX_DST_B_SWIZ_B + | R500_TEX_DST_A_SWIZ_A; + fp->inst[0].inst3 = R500_DX_ADDR(0) + | R500_DX_S_SWIZ_R + | R500_DX_T_SWIZ_R + | R500_DX_R_SWIZ_R + | R500_DX_Q_SWIZ_R + | R500_DY_ADDR(0) + | R500_DY_S_SWIZ_R + | R500_DY_T_SWIZ_R + | R500_DY_R_SWIZ_R + | R500_DY_Q_SWIZ_R; fp->inst[0].inst4 = 0x0; fp->inst[0].inst5 = 0x0; -- cgit v1.2.3 From b79a769b2d878d6e8e55f675209ffa7f3f2a6f68 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 3 May 2008 09:09:57 -0700 Subject: r5xx: Fix for loops. Thanks to dli in IRC for pointing this out. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index c5369b1dde..b08beb617f 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -93,7 +93,8 @@ static inline GLuint make_rgb_swizzle(struct prog_src_register src) { GLuint swiz = 0x0; GLuint temp; /* This could be optimized, but it should be plenty fast already. */ - for (int i = 0; i < 3; i++) { + int i; + for (i = 0; i < 3; i++) { temp = (src.Swizzle >> i*3) & 0x7; /* Fix SWIZZLE_ONE */ if (temp == 5) temp++; @@ -111,7 +112,8 @@ static inline GLuint make_alpha_swizzle(struct prog_src_register src) { static inline GLuint make_strq_swizzle(struct prog_src_register src) { GLuint swiz = 0x0; GLuint temp = src.Swizzle; - for (int i = 0; i < 4; i++) { + int i; + for (i = 0; i < 4; i++) { swiz += (temp & 0x3) << i*2; temp >>= 3; } -- cgit v1.2.3 From 63503f284863530d628f26bea27f2390aca518e5 Mon Sep 17 00:00:00 2001 From: Markus Amsler Date: Sat, 3 May 2008 12:55:45 -0400 Subject: r300: Set correct VAP_CNTL per vertex program. adapted from Markus' patch on bug 15386 with updates for non-TCL and R500. --- src/mesa/drivers/dri/r300/r300_ioctl.c | 32 +++++++++- src/mesa/drivers/dri/r300/r300_state.c | 111 ++++++++++++++++++++++----------- 2 files changed, 105 insertions(+), 38 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index d38ee9003d..ffcde7ff35 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -187,6 +187,7 @@ static void r300EmitClearState(GLcontext * ctx) drm_radeon_cmd_header_t *cmd = NULL; int has_tcl = 1; int is_r500 = 0; + GLuint vap_cntl; if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) has_tcl = 0; @@ -206,11 +207,9 @@ static void r300EmitClearState(GLcontext * ctx) R300_STATECHANGE(r300, vir[0]); reg_start(R300_VAP_PROG_STREAM_CNTL_0, 0); if (!has_tcl) - /*e32(0x22030003);*/ e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); else - /*e32(0x21030003);*/ e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); @@ -425,6 +424,35 @@ static void r300EmitClearState(GLcontext * ctx) R500_ALU_RGBA_A_SWIZ_0); } + if (has_tcl) { + vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + (5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (12 << R300_VF_MAX_VTX_NUM_SHIFT)); + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + vap_cntl |= R500_TCL_STATE_OPTIMIZATION; + } else + vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + (5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (5 << R300_VF_MAX_VTX_NUM_SHIFT)); + + if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515) + vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT); + else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560)) + vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT); + else if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420) + vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT); + else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) + vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT); + else + vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT); + + R300_STATECHANGE(rmesa, vap_cntl); + reg_start(R300_VAP_CNTL, 0); + e32(vap_cntl); + if (has_tcl) { R300_STATECHANGE(r300, pvs); reg_start(R300_VAP_PVS_CNTL_1, 2); diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 21b5ac969b..10002e3c4f 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1811,10 +1811,70 @@ static inline void r300SetupVertexProgramFragment(r300ContextPtr r300, int dest, } } +#define MIN3(a,b,c) ((a)<(b) ? MIN2(a, c): MIN2(b, c)) + +static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, GLuint output_count, GLuint temp_count) +{ + int vtx_mem_size; + int cmd_reserved = 0; + int cmd_written = 0; + drm_radeon_cmd_header_t *cmd = NULL; + int pvs_num_slots; + int pvs_num_cntrls; + + /* Flush PVS engine before changing PVS_NUM_SLOTS, PVS_NUM_CNTRLS. + * See r500 docs 6.5.2 */ + reg_start(R300_VAP_PVS_WAITIDLE, 0); + e32(0x00000000); + + /* avoid division by zero */ + if (input_count == 0) input_count = 1; + if (output_count == 0) output_count = 1; + if (temp_count == 0) temp_count = 1; + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + vtx_mem_size = 128; + else + vtx_mem_size = 72; + + pvs_num_slots = MIN3(10, vtx_mem_size/input_count, vtx_mem_size/output_count); + pvs_num_cntrls = MIN2(6, vtx_mem_size/temp_count); + + R300_STATECHANGE(rmesa, vap_cntl); + if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { + rmesa->hw.vap_cntl.cmd[1] = + (pvs_num_slots << R300_PVS_NUM_SLOTS_SHIFT) | + (pvs_num_cntrls << R300_PVS_NUM_CNTLRS_SHIFT) | + (12 << R300_VF_MAX_VTX_NUM_SHIFT); + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + rmesa->hw.vap_cntl.cmd[1] |= R500_TCL_STATE_OPTIMIZATION; + } else + /* not sure about non-tcl */ + rmesa->hw.vap_cntl.cmd[1] = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + (5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (5 << R300_VF_MAX_VTX_NUM_SHIFT)); + + if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515) + rmesa->hw.vap_cntl.cmd[1] |= (2 << R300_PVS_NUM_FPUS_SHIFT); + else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560)) + rmesa->hw.vap_cntl.cmd[1] |= (5 << R300_PVS_NUM_FPUS_SHIFT); + else if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420) + rmesa->hw.vap_cntl.cmd[1] |= (6 << R300_PVS_NUM_FPUS_SHIFT); + else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) + rmesa->hw.vap_cntl.cmd[1] |= (8 << R300_PVS_NUM_FPUS_SHIFT); + else + rmesa->hw.vap_cntl.cmd[1] |= (4 << R300_PVS_NUM_FPUS_SHIFT); + +} + static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa) { struct r300_vertex_shader_state *prog = &(rmesa->state.vertex_shader); GLuint o_reg = 0; + GLuint i_reg = 0; int i; int inst_count = 0; int param_count = 0; @@ -1827,6 +1887,7 @@ static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa) prog->program.body.i[program_end + 2] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE); prog->program.body.i[program_end + 3] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE); program_end += 4; + i_reg++; } } @@ -1836,6 +1897,8 @@ static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa) &(prog->program)); inst_count = (prog->program.length / 4) - 1; + r300VapCntl(rmesa, i_reg, o_reg, 0); + R300_STATECHANGE(rmesa, pvs); rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) | @@ -1849,6 +1912,15 @@ static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa) (inst_count << R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT); } +static int bit_count (int x) +{ + x = ((x & 0xaaaaaaaaU) >> 1) + (x & 0x55555555U); + x = ((x & 0xccccccccU) >> 2) + (x & 0x33333333U); + x = (x >> 16) + (x & 0xffff); + x = ((x & 0xf0f0) >> 4) + (x & 0x0f0f); + return (x >> 8) + (x & 0x00ff); +} + static void r300SetupRealVertexProgram(r300ContextPtr rmesa) { GLcontext *ctx = rmesa->radeon.glCtx; @@ -1870,6 +1942,9 @@ static void r300SetupRealVertexProgram(r300ContextPtr rmesa) r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START, &(prog->program)); inst_count = (prog->program.length / 4) - 1; + r300VapCntl(rmesa, bit_count(prog->key.InputsRead), + bit_count(prog->key.OutputsWritten), prog->num_temporaries); + R300_STATECHANGE(rmesa, pvs); rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) | @@ -1903,13 +1978,6 @@ static void r300SetupVertexProgram(r300ContextPtr rmesa) r300SetupDefaultVertexProgram(rmesa); } - - /* FIXME: This is done for vertex shader fragments, but also needs to be - * done for vap_pvs, so I leave it as a reminder. */ -#if 0 - reg_start(R300_VAP_PVS_WAITIDLE, 0); - e32(0x00000000); -#endif } /** @@ -2011,35 +2079,6 @@ static void r300ResetHwState(r300ContextPtr r300) r300AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef); r300Enable(ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled); - /* setup the VAP */ - /* for tcl, PVS_NUM_SLOTS, PVS_NUM_CNTLRS, VF_MAX_VTX_NUM need to be adjusted - * dynamically. PVS_NUM_FPUS is fixed based on asic - */ - if (has_tcl) { - r300->hw.vap_cntl.cmd[1] = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | - (5 << R300_PVS_NUM_CNTLRS_SHIFT) | - (12 << R300_VF_MAX_VTX_NUM_SHIFT)); - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) - r300->hw.vap_cntl.cmd[1] |= R500_TCL_STATE_OPTIMIZATION; - } else - r300->hw.vap_cntl.cmd[1] = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | - (5 << R300_PVS_NUM_CNTLRS_SHIFT) | - (5 << R300_VF_MAX_VTX_NUM_SHIFT)); - - if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515) - r300->hw.vap_cntl.cmd[1] |= (2 << R300_PVS_NUM_FPUS_SHIFT); - else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) || - (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560)) - r300->hw.vap_cntl.cmd[1] |= (5 << R300_PVS_NUM_FPUS_SHIFT); - else if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420) - r300->hw.vap_cntl.cmd[1] |= (6 << R300_PVS_NUM_FPUS_SHIFT); - else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) || - (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580) || - (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) - r300->hw.vap_cntl.cmd[1] |= (8 << R300_PVS_NUM_FPUS_SHIFT); - else - r300->hw.vap_cntl.cmd[1] |= (4 << R300_PVS_NUM_FPUS_SHIFT); - r300->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA | R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA -- cgit v1.2.3 From 66a5562ce2906fbf5b96d1cee18f9a31a78c4360 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 5 May 2008 23:49:50 +1000 Subject: r300: fix swtcl texrect path properly. We really need to update the shader state so the texrect parameters work. This should fix compiz looking crappy on rs480 and rs690 --- src/mesa/drivers/dri/r300/r300_state.c | 3 ++- src/mesa/drivers/dri/r300/r300_swtcl.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 10002e3c4f..dbe1f6952e 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2323,10 +2323,11 @@ void r300UpdateShaders(r300ContextPtr rmesa) hw_tcl_on = future_hw_tcl_on = 0; r300ResetHwState(rmesa); + r300UpdateStateParameters(ctx, _NEW_PROGRAM); return; } - r300UpdateStateParameters(ctx, _NEW_PROGRAM); } + r300UpdateStateParameters(ctx, _NEW_PROGRAM); } static void r300SetupPixelShader(r300ContextPtr rmesa) diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index a41fa1023a..8aebd9be3e 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -575,6 +575,7 @@ static void r300RenderStart(GLcontext *ctx) r300ChooseRenderState(ctx); r300SetVertexFormat(ctx); + r300UpdateShaders(rmesa); r300UpdateShaderStates(rmesa); r300EmitCacheFlush(rmesa); -- cgit v1.2.3 From 1f420b008bd4bc7b5fe7809e7f7506ef5dcb7209 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 5 May 2008 16:41:07 +1000 Subject: r500: make sure we emit max temp atom. We don't appear to update max_temp_idx yet anywhere though --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 4 ++++ src/mesa/drivers/dri/r300/r300_context.h | 5 +++++ src/mesa/drivers/dri/r300/r300_state.c | 3 +++ 3 files changed, 12 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 3f9d9da399..75f8910c3e 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -412,6 +412,10 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R500_US_OUT_FMT, 5); if (is_r500) { + ALLOC_STATE(fp, always, R500_FP_CMDSIZE, 0); + r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(R500_US_CONFIG, 2); + r300->hw.fp.cmd[R500_FP_CNTL] = R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO; + ALLOC_STATE(r500fp, r500fp, R300_FPI_CMDSIZE, 0); r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 0, 0); ALLOC_STATE(r500fp_const, r500fp_const, R300_FPP_CMDSIZE, 0); diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index bb5f5c35f0..980a26ffdd 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -354,6 +354,11 @@ struct r300_state_atom { #define R300_FP_NODE3 8 #define R300_FP_CMDSIZE 9 +#define R500_FP_CMD_0 0 +#define R500_FP_CNTL 1 +#define R500_FP_PIXSIZE 2 +#define R500_FP_CMDSIZE 3 + #define R300_FPT_CMD_0 0 #define R300_FPT_INSTR_0 1 #define R300_FPT_CMDSIZE 65 diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index dbe1f6952e..b79b5e99f6 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2434,6 +2434,9 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) return; } + R300_STATECHANGE(rmesa, fp); + rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = fp->max_temp_idx; + R300_STATECHANGE(rmesa, r500fp); /* Emit our shader... */ for (i = 0; i < fp->cs->nrslots; i++) { -- cgit v1.2.3 From 3816ae9ce835691e690d68f37ff6b01207068870 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 5 May 2008 18:05:59 +1000 Subject: r500: make tri-param work This makes constant work which are 32-bit on r500 unlike r300. Switch MOV to using MAD no idea if we might have negative things MAX 0,-5 is likely to do the wrong thing.. --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 2 +- src/mesa/drivers/dri/r300/r300_state.c | 8 ++++---- src/mesa/drivers/dri/r300/r500_fragprog.c | 26 ++++++++++++++++---------- 3 files changed, 21 insertions(+), 15 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 75f8910c3e..806e2755c5 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -54,7 +54,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_state.h" // Set this to 1 for extremely verbose debugging of command buffers -#define DEBUG_CMDBUF 0 +#define DEBUG_CMDBUF 1 /** * Send the current command buffer via ioctl to the hardware. diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index b79b5e99f6..a083db9bbc 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2453,10 +2453,10 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) R300_STATECHANGE(rmesa, r500fp_const); for (i = 0; i < fp->const_nr; i++) { - rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(fp->constant[i][0]); - rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(fp->constant[i][1]); - rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(fp->constant[i][2]); - rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(fp->constant[i][3]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(fp->constant[i][0]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(fp->constant[i][1]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(fp->constant[i][2]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(fp->constant[i][3]); } bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, fp->const_nr * 4); diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index b08beb617f..5d4412bb1f 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -158,6 +158,8 @@ static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_registe * fragments don't get loaded right otherwise! */ reg = 0x0; break; + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: case PROGRAM_CONSTANT: reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters-> ParameterValues[src.Index]); @@ -440,8 +442,9 @@ static GLboolean parse_program(struct r500_fragment_program *fp) break; case OPCODE_MOV: src[0] = make_src(fp, fpi->SrcReg[0]); - /* We use MAX, but MIN, CND, and CMP also work. - * Just remember to disable the OMOD! */ + + /* changed to use MAD - not sure if we + ever have negative things which max will fail on */ fp->inst[counter].inst0 = R500_INST_TYPE_ALU | mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); @@ -449,14 +452,17 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R | R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B | R500_ALU_RGB_SEL_B_SRC0 - | R500_ALU_RGB_R_SWIZ_B_R | R500_ALU_RGB_G_SWIZ_B_G | R500_ALU_RGB_B_SWIZ_B_B - | R500_ALU_RGB_OMOD_DISABLE; - fp->inst[counter].inst4 = R500_ALPHA_OP_MAX + | R500_ALU_RGB_R_SWIZ_B_1 | R500_ALU_RGB_G_SWIZ_B_1 | R500_ALU_RGB_B_SWIZ_B_1; + + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SEL_B_SRC0 - | R500_ALPHA_OMOD_DISABLE; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX - | R500_ALU_RGBA_ADDRD(dest); + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SEL_B_SRC0 + | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_1; + + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_R_SWIZ_0 | R500_ALU_RGBA_G_SWIZ_0 + | R500_ALU_RGBA_B_SWIZ_0 | R500_ALU_RGBA_A_SWIZ_0; break; case OPCODE_MUL: src[0] = make_src(fp, fpi->SrcReg[0]); @@ -593,7 +599,7 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) fp->cur_node = 0; fp->first_node_has_tex = 0; fp->const_nr = 0; - fp->max_temp_idx = 0; + fp->max_temp_idx = 64; fp->node[0].alu_end = -1; fp->node[0].tex_end = -1; -- cgit v1.2.3 From 697680d687544c4495f05d5baa83659fb877477b Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 5 May 2008 18:15:40 +1000 Subject: r500: mov cleanup macros --- src/mesa/drivers/dri/r300/r500_fragprog.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 5d4412bb1f..ac6e306d20 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -76,6 +76,7 @@ #define R500_SWIZZLE_ONE 6 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6)) #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6)) +#define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6)) /* Swizzles for inst2 */ #define MAKE_SWIZ_TEX_STRQ(x) (x << 8) #define MAKE_SWIZ_TEX_RGBA(x) (x << 24) @@ -450,10 +451,9 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | R500_ALU_RGB_R_SWIZ_A_R | R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B - | R500_ALU_RGB_SEL_B_SRC0 - | R500_ALU_RGB_R_SWIZ_B_1 | R500_ALU_RGB_G_SWIZ_B_1 | R500_ALU_RGB_B_SWIZ_B_1; - + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); fp->inst[counter].inst4 = R500_ALPHA_OP_MAD | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SEL_B_SRC0 @@ -461,8 +461,8 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_R_SWIZ_0 | R500_ALU_RGBA_G_SWIZ_0 - | R500_ALU_RGBA_B_SWIZ_0 | R500_ALU_RGBA_A_SWIZ_0; + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); break; case OPCODE_MUL: src[0] = make_src(fp, fpi->SrcReg[0]); -- cgit v1.2.3 From 66a49df9cba8f17059be420126346a4234e81cba Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 5 May 2008 18:42:27 +1000 Subject: r500: consolidate tex instructions you cannot change a tex into an output so this means we have to actually do another instruction after this one to mov if its an output --- src/mesa/drivers/dri/r300/r500_fragprog.c | 68 ++++++++++++++++++------------- 1 file changed, 39 insertions(+), 29 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index ac6e306d20..bbcbd2efd1 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -192,6 +192,43 @@ static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_regist return reg; } +static void emit_tex(struct r500_fragment_program *fp, + struct prog_instruction *fpi, int opcode, int dest, int counter) +{ + int hwsrc, hwdest; + GLuint mask; + + mask = fpi->DstReg.WriteMask << 11; + hwsrc = make_src(fp, fpi->SrcReg[0]); + + fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask + | R500_INST_TEX_SEM_WAIT; + + fp->inst[counter].inst1 = fpi->TexSrcUnit + | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; + switch (opcode) { + case OPCODE_TEX: + fp->inst[counter].inst1 |= R500_TEX_INST_LD; + break; + case OPCODE_TXP: + fp->inst[counter].inst1 |= R500_TEX_INST_PROJ; + } + + fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc) + /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */ + | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G + | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A + | R500_TEX_DST_ADDR(dest) + | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G + | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; + + + + fp->inst[counter].inst3 = 0x0; + fp->inst[counter].inst4 = 0x0; + fp->inst[counter].inst5 = 0x0; +} + static void dumb_shader(struct r500_fragment_program *fp) { fp->inst[0].inst0 = R500_INST_TYPE_TEX @@ -515,37 +552,10 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ALPHA_MOD_C_NEG; break; case OPCODE_TEX: - src[0] = make_src(fp, fpi->SrcReg[0]); - fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask - | R500_INST_TEX_SEM_WAIT; - fp->inst[counter].inst1 = fpi->TexSrcUnit - | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; - fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(src[0]) - /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */ - | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G - | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A - | R500_TEX_DST_ADDR(dest) - | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G - | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; - fp->inst[counter].inst3 = 0x0; - fp->inst[counter].inst4 = 0x0; - fp->inst[counter].inst5 = 0x0; + emit_tex(fp, fpi, OPCODE_TEX, dest, counter); break; case OPCODE_TXP: - src[0] = make_src(fp, fpi->SrcReg[0]); - fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask; - fp->inst[counter].inst1 = fpi->TexSrcUnit - | R500_TEX_INST_PROJ | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; - fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(src[0]) - /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */ - | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G - | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A - | R500_TEX_DST_ADDR(dest) - | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G - | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; - fp->inst[counter].inst3 = 0x0; - fp->inst[counter].inst4 = 0x0; - fp->inst[counter].inst5 = 0x0; + emit_tex(fp, fpi, OPCODE_TXP, dest, counter); break; default: ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); -- cgit v1.2.3 From 06e2e1b87ce7db9f48b9d198d71d46636f7e6fe3 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 6 May 2008 11:57:24 -0700 Subject: r5xx: Use max_temp_idx. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index bbcbd2efd1..0e2bda1c64 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -152,7 +152,10 @@ static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_registe GLuint reg; switch (src.File) { case PROGRAM_TEMPORARY: - reg = (src.Index << 0x1) | 0x1; + // reg = (src.Index << 0x1) | 0x1; + reg = src.Index; + if (src.Index > fp->max_temp_idx) + fp->max_temp_idx = src.Index; break; case PROGRAM_INPUT: /* Ugly hack needed to work around Mesa; @@ -177,7 +180,10 @@ static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_regist GLuint reg; switch (dest.File) { case PROGRAM_TEMPORARY: - reg = (dest.Index << 0x1) | 0x1; + // reg = (dest.Index << 0x1) | 0x1; + reg = dest.Index; + if (dest.Index > fp->max_temp_idx) + fp->max_temp_idx = src.Index; break; case PROGRAM_OUTPUT: /* Eventually we may need to handle multiple @@ -354,9 +360,9 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst0 = R500_INST_TYPE_ALU | mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); + | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(0); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); + | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(0); fp->inst[counter].inst3 = /* 1 */ MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); @@ -586,8 +592,12 @@ static GLboolean parse_program(struct r500_fragment_program *fp) if ((fp->inst[counter].inst0 & 0x3) ^ 0x2) { fp->inst[counter].inst0 |= R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT | R500_INST_LAST; + } else { + /* We still need to put an output inst, right? */ } + fp->max_temp_idx++; + return GL_TRUE; } -- cgit v1.2.3 From 171ba1d0d154f7fdeb712fd411f19e1ebddd3b55 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 6 May 2008 12:18:07 -0700 Subject: r5xx: Fix typo. Gotta be more careful with my cut'n'paste, lawl. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 0e2bda1c64..9ad081e9b8 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -183,7 +183,7 @@ static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_regist // reg = (dest.Index << 0x1) | 0x1; reg = dest.Index; if (dest.Index > fp->max_temp_idx) - fp->max_temp_idx = src.Index; + fp->max_temp_idx = dest.Index; break; case PROGRAM_OUTPUT: /* Eventually we may need to handle multiple -- cgit v1.2.3 From fa465fb2b1ce4119e4ae8f9b64721f385f361ad9 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 6 May 2008 12:42:40 -0700 Subject: r5xx: We update max_temp_idx now, so no need to hard-code it. This roughly doubles the speed of glxgears (GINAB) by allowing more pixels to run concurrently. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 9ad081e9b8..b91cc273fd 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -619,7 +619,7 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) fp->cur_node = 0; fp->first_node_has_tex = 0; fp->const_nr = 0; - fp->max_temp_idx = 64; + fp->max_temp_idx = 0; fp->node[0].alu_end = -1; fp->node[0].tex_end = -1; -- cgit v1.2.3 From 1562dd2c26d43bffa8c6bd08ec6128c750ad58ff Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 6 May 2008 12:44:53 -0700 Subject: r5xx: Emit an OUT instruction at the end of execution. This should make TEX/TXP work right. (Note: "Should" is not "does.") --- src/mesa/drivers/dri/r300/r500_fragprog.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index b91cc273fd..65fa805d81 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -585,17 +585,35 @@ static GLboolean parse_program(struct r500_fragment_program *fp) } - fp->cs->nrslots = counter; - - /* Finish him! (If it's an output instruction...) - * Yes, I know it's ugly... */ + /* Finish him! (If it's an ALU/OUT instruction...) */ if ((fp->inst[counter].inst0 & 0x3) ^ 0x2) { fp->inst[counter].inst0 |= R500_INST_TYPE_OUT - | R500_INST_TEX_SEM_WAIT | R500_INST_LAST; + | R500_INST_TEX_SEM_WAIT | R500_INST_LAST; } else { /* We still need to put an output inst, right? */ + counter++; + fp->inst[counter].inst0 = R500_INST_TYPE_OUT + | R500_INST_TEX_SEM_WAIT | R500_INST_LAST + | R500_INST_RGB_OMASK_R | R500_INST_RGB_OMASK_G + | R500_INST_RGB_OMASK_B | R500_INST_ALPHA_OMASK; + fp->inst[counter].inst1 = R500_RGB_ADDR0(dest); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(dest); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(0) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SEL_B_SRC0 + | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_1; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(0) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); } + fp->cs->nrslots = counter; + fp->max_temp_idx++; return GL_TRUE; -- cgit v1.2.3 From 20baf128ef39dca058636c1bff4c526a8879b3d5 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 6 May 2008 17:21:30 -0700 Subject: r5xx: FP: Make MOV/ABS look pretty. We can't really do anything like emit_alu, so we're doing emit_mov instead. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 71 ++++++++++++++----------------- 1 file changed, 32 insertions(+), 39 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 65fa805d81..e6f7e173f7 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -310,7 +310,30 @@ static void dumb_shader(struct r500_fragment_program *fp) fp->translated = GL_TRUE; } -static void emit_alu(struct r500_fragment_program *fp) { +/* static void emit_alu(struct r500_fragment_program *fp) { + * } */ + +static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_src_register src, GLuint dest) { + /* The r3xx shader uses MAD to implement MOV. We are using CMP, since + * it is technically more accurate and recommended by ATI/AMD. */ + GLuint src_reg = make_src(fp, src); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU; + fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(src)) + | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src)) + | R500_ALU_RGB_OMOD_DISABLE; + fp->inst[counter].inst4 = R500_ALPHA_OP_CMP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src)) + | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src)) + | R500_ALPHA_OMOD_DISABLE; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP + | R500_ALU_RGBA_ADDRD(dest) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); } static GLboolean parse_program(struct r500_fragment_program *fp) @@ -335,23 +358,12 @@ static GLboolean parse_program(struct r500_fragment_program *fp) switch (fpi->Opcode) { case OPCODE_ABS: - src[0] = make_src(fp, fpi->SrcReg[0]); - /* Variation on MOV */ - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | mask; - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_MOD_A_ABS | R500_ALU_RGB_SEL_B_SRC0 - | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAX - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 - | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) | R500_ALPHA_MOD_A_ABS - | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX - | R500_ALU_RGBA_ADDRD(dest); + emit_mov(fp, counter, fpi->SrcReg[0], dest); + fp->inst[counter].inst0 |= mask; + fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS + | R500_ALU_RGB_MOD_B_ABS; + fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS + | R500_ALPHA_MOD_B_ABS; break; case OPCODE_ADD: src[0] = make_src(fp, fpi->SrcReg[0]); @@ -485,27 +497,8 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ADDRD(dest); break; case OPCODE_MOV: - src[0] = make_src(fp, fpi->SrcReg[0]); - - /* changed to use MAD - not sure if we - ever have negative things which max will fail on */ - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | mask; - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) - | R500_ALU_RGB_SEL_B_SRC0 - | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SEL_B_SRC0 - | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_1; - - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(dest) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + emit_mov(fp, counter, fpi->SrcReg[0], dest); + fp->inst[counter].inst0 |= mask; break; case OPCODE_MUL: src[0] = make_src(fp, fpi->SrcReg[0]); -- cgit v1.2.3 From 40db59038cc62a5a8e4f94cb069eeb1d9b95a1a9 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 6 May 2008 18:14:21 -0700 Subject: r5xx: FP: Add OPCODE_TXB. Tex lookup with biased LOD. Should magically work. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index e6f7e173f7..15bc5798e0 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -216,8 +216,14 @@ static void emit_tex(struct r500_fragment_program *fp, case OPCODE_TEX: fp->inst[counter].inst1 |= R500_TEX_INST_LD; break; + case OPCODE_TXB: + fp->inst[counter].inst1 |= R500_TEX_INST_LODBIAS; + break; case OPCODE_TXP: fp->inst[counter].inst1 |= R500_TEX_INST_PROJ; + break; + default: + ERROR("emit_tex can't handle opcode %x\n", opcode); } fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc) @@ -553,6 +559,9 @@ static GLboolean parse_program(struct r500_fragment_program *fp) case OPCODE_TEX: emit_tex(fp, fpi, OPCODE_TEX, dest, counter); break; + case OPCODE_TXB: + emit_tex(fp, fpi, OPCODE_TXB, dest, counter); + break; case OPCODE_TXP: emit_tex(fp, fpi, OPCODE_TXP, dest, counter); break; -- cgit v1.2.3 From dc24fb51a31de8443e653655105d4e1c88847bcc Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 6 May 2008 22:18:28 -0700 Subject: r5xx: Index inputs and temps. This is not the same as r3xx indexing. It only tries to protect inputs on the pixel stack from getting clobbered by temps or texs. Texs don't need special treatment since they read from special input regs and write to the same temp regs as ALU/FC instructions. --- src/mesa/drivers/dri/r300/r300_context.h | 5 +- src/mesa/drivers/dri/r300/r500_fragprog.c | 131 +++++++++--------------------- 2 files changed, 41 insertions(+), 95 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 980a26ffdd..815a729969 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -807,10 +807,7 @@ struct r500_fragment_program { int cur_node; int first_node_has_tex; - int alu_offset; - int alu_end; - int tex_offset; - int tex_end; + int temp_reg_offset; /* Hardware constants. * Contains a pointer to the value. The destination of the pointer diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 15bc5798e0..c753c2b6f7 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -65,6 +65,9 @@ #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs +#define R500_US_NUM_TEMP_REGS 128 +#define R500_US_NUM_CONST_REGS 256 + /* "Register" flags */ #define REG_CONSTANT (1 << 8) #define REG_SRC_REL (1 << 9) @@ -121,6 +124,30 @@ static inline GLuint make_strq_swizzle(struct prog_src_register src) { return swiz; } +static int get_temp(struct r500_fragment_program *fp, int slot) { + + COMPILE_STATE; + + int r = slot + fp->temp_reg_offset; + + while (cs->inputs[r].refcount != 0) { + /* Crap, taken. */ + r++; + } + + fp->temp_reg_offset = r - slot; + + if (r >= R500_US_NUM_TEMP_REGS) { + ERROR("Out of hardware temps!\n"); + return 0; + } + + if (r > fp->max_temp_idx) + fp->max_temp_idx = r; + + return r; +} + /* Borrowed verbatim from r300_fragprog since it hasn't changed. */ static GLuint emit_const4fv(struct r500_fragment_program *fp, const GLfloat * cp) @@ -134,8 +161,7 @@ static GLuint emit_const4fv(struct r500_fragment_program *fp, } if (index >= fp->const_nr) { - /* TODO: This should be r5xx nums, not r300 */ - if (index >= PFS_NUM_CONST_REGS) { + if (index >= R500_US_NUM_CONST_REGS) { ERROR("Out of hw constants!\n"); return reg; } @@ -152,15 +178,12 @@ static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_registe GLuint reg; switch (src.File) { case PROGRAM_TEMPORARY: - // reg = (src.Index << 0x1) | 0x1; - reg = src.Index; - if (src.Index > fp->max_temp_idx) - fp->max_temp_idx = src.Index; + reg = get_temp(fp, src.Index); break; case PROGRAM_INPUT: /* Ugly hack needed to work around Mesa; * fragments don't get loaded right otherwise! */ - reg = 0x0; + reg = src.Index; break; case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: @@ -180,10 +203,7 @@ static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_regist GLuint reg; switch (dest.File) { case PROGRAM_TEMPORARY: - // reg = (dest.Index << 0x1) | 0x1; - reg = dest.Index; - if (dest.Index > fp->max_temp_idx) - fp->max_temp_idx = dest.Index; + reg = get_temp(fp, dest.Index); break; case PROGRAM_OUTPUT: /* Eventually we may need to handle multiple @@ -323,7 +343,7 @@ static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_ /* The r3xx shader uses MAD to implement MOV. We are using CMP, since * it is technically more accurate and recommended by ATI/AMD. */ GLuint src_reg = make_src(fp, src); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU; + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT; fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 @@ -511,7 +531,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) src[1] = make_src(fp, fpi->SrcReg[1]); /* Variation on MAD: src0*src1+0 */ fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | mask; + | R500_INST_TEX_SEM_WAIT | mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) @@ -639,7 +659,10 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) fp->cur_node = 0; fp->first_node_has_tex = 0; fp->const_nr = 0; - fp->max_temp_idx = 0; + /* Size of pixel stack, plus 1. */ + fp->max_temp_idx = 1; + /* Temp register offset. */ + fp->temp_reg_offset = 0; fp->node[0].alu_end = -1; fp->node[0].tex_end = -1; @@ -659,49 +682,6 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) * starting from register 0. */ -#if 0 - /* Texcoords come first */ - for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) { - if (InputsRead & (FRAG_BIT_TEX0 << i)) { - cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0; - cs->inputs[FRAG_ATTRIB_TEX0 + i].reg = - get_hw_temp(fp, 0); - } - } - InputsRead &= ~FRAG_BITS_TEX_ANY; - - /* fragment position treated as a texcoord */ - if (InputsRead & FRAG_BIT_WPOS) { - cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0; - cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(fp, 0); - insert_wpos(&mp->Base); - } - InputsRead &= ~FRAG_BIT_WPOS; - - /* Then primary colour */ - if (InputsRead & FRAG_BIT_COL0) { - cs->inputs[FRAG_ATTRIB_COL0].refcount = 0; - cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(fp, 0); - } - InputsRead &= ~FRAG_BIT_COL0; - - /* Secondary color */ - if (InputsRead & FRAG_BIT_COL1) { - cs->inputs[FRAG_ATTRIB_COL1].refcount = 0; - cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(fp, 0); - } - InputsRead &= ~FRAG_BIT_COL1; - - /* Anything else */ - if (InputsRead) { - WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead); - /* force read from hwreg 0 for now */ - for (i = 0; i < 32; i++) - if (InputsRead & (1 << i)) - cs->inputs[i].reg = 0; - } -#endif - /* Pre-parse the mesa program, grabbing refcounts on input/temp regs. * That way, we can free up the reg when it's no longer needed */ @@ -712,35 +692,14 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { int idx; - for (i = 0; i < 3; i++) { idx = fpi->SrcReg[i].Index; - switch (fpi->SrcReg[i].File) { - case PROGRAM_TEMPORARY: - if (!(temps_used & (1 << idx))) { - cs->temps[idx].reg = -1; - cs->temps[idx].refcount = 1; - temps_used |= (1 << idx); - } else - cs->temps[idx].refcount++; - break; - case PROGRAM_INPUT: + if (fpi->SrcReg[i].File == PROGRAM_INPUT) { cs->inputs[idx].refcount++; - break; - default: - break; + if (fp->max_temp_idx < idx) + fp->max_temp_idx = idx; } } - - idx = fpi->DstReg.Index; - if (fpi->DstReg.File == PROGRAM_TEMPORARY) { - if (!(temps_used & (1 << idx))) { - cs->temps[idx].reg = -1; - cs->temps[idx].refcount = 1; - temps_used |= (1 << idx); - } else - cs->temps[idx].refcount++; - } } cs->temp_in_use = temps_used; } @@ -777,16 +736,6 @@ void r500TranslateFragmentShader(r300ContextPtr r300, return; } - /* Finish off */ - fp->node[fp->cur_node].alu_end = - cs->nrslots - fp->node[fp->cur_node].alu_offset - 1; - if (fp->node[fp->cur_node].tex_end < 0) - fp->node[fp->cur_node].tex_end = 0; - fp->alu_offset = 0; - fp->alu_end = cs->nrslots - 1; - //assert(fp->node[fp->cur_node].alu_end >= 0); - //assert(fp->alu_end >= 0); - fp->translated = GL_TRUE; r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM); } -- cgit v1.2.3 From 49c30ce958e5e95e9e6ab79d2308751705d0ff22 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 6 May 2008 23:36:50 -0700 Subject: r5xx: Fix false error with DP3/DP4. DP3/DP4 only takes two arguments, but tried to load three, causing a false fallback to the dumb shader. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index c753c2b6f7..f90f467cb7 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -418,13 +418,12 @@ static GLboolean parse_program(struct r500_fragment_program *fp) case OPCODE_DP3: src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); - src[2] = make_src(fp, fpi->SrcReg[2]); fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | mask; + | R500_INST_TEX_SEM_WAIT | mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]); + | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]); + | R500_ALPHA_ADDR1(src[1]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); @@ -433,23 +432,18 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3 - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2])) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2])); + | R500_ALU_RGBA_ADDRD(dest); break; case OPCODE_DP4: src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); - src[2] = make_src(fp, fpi->SrcReg[2]); /* Based on DP3 */ fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | mask; + | R500_INST_TEX_SEM_WAIT | mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]); + | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]); + | R500_ALPHA_ADDR1(src[1]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); @@ -458,11 +452,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4 - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2])) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2])); + | R500_ALU_RGBA_ADDRD(dest); break; case OPCODE_MAD: src[0] = make_src(fp, fpi->SrcReg[0]); -- cgit v1.2.3 From 1da094c9adf49c48a8b61ee7ab5336e8ba3f9e8d Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 7 May 2008 00:06:26 -0700 Subject: r5xx: Fix FP inputs. (For good?) FP inputs are now counted and mapped correctly, and temps are allocated tightly and correctly. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 67 +++++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 8 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index f90f467cb7..ed14c93df7 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -128,7 +128,7 @@ static int get_temp(struct r500_fragment_program *fp, int slot) { COMPILE_STATE; - int r = slot + fp->temp_reg_offset; + int r = slot; while (cs->inputs[r].refcount != 0) { /* Crap, taken. */ @@ -175,15 +175,14 @@ static GLuint emit_const4fv(struct r500_fragment_program *fp, } static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) { + COMPILE_STATE; GLuint reg; switch (src.File) { case PROGRAM_TEMPORARY: - reg = get_temp(fp, src.Index); + reg = src.Index + fp->temp_reg_offset; break; case PROGRAM_INPUT: - /* Ugly hack needed to work around Mesa; - * fragments don't get loaded right otherwise! */ - reg = src.Index; + reg = cs->inputs[src.Index].reg; break; case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: @@ -203,7 +202,7 @@ static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_regist GLuint reg; switch (dest.File) { case PROGRAM_TEMPORARY: - reg = get_temp(fp, dest.Index); + reg = dest.Index + fp->temp_reg_offset; break; case PROGRAM_OUTPUT: /* Eventually we may need to handle multiple @@ -669,17 +668,65 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) * configures itself based on the fragprog's InputsRead * * NOTE: this depends on get_hw_temp() allocating registers in order, - * starting from register 0. + * starting from register 0, so we're just going to do that instead. */ + /* Texcoords come first */ + for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) { + cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0; + cs->inputs[FRAG_ATTRIB_TEX0 + i].reg = + fp->temp_reg_offset; + fp->temp_reg_offset++; + } + } + InputsRead &= ~FRAG_BITS_TEX_ANY; + + /* fragment position treated as a texcoord */ + if (InputsRead & FRAG_BIT_WPOS) { + cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0; + cs->inputs[FRAG_ATTRIB_WPOS].reg = + fp->temp_reg_offset; + fp->temp_reg_offset++; + } + InputsRead &= ~FRAG_BIT_WPOS; + + /* Then primary colour */ + if (InputsRead & FRAG_BIT_COL0) { + cs->inputs[FRAG_ATTRIB_COL0].refcount = 0; + cs->inputs[FRAG_ATTRIB_COL0].reg = + fp->temp_reg_offset; + fp->temp_reg_offset++; + } + InputsRead &= ~FRAG_BIT_COL0; + + /* Secondary color */ + if (InputsRead & FRAG_BIT_COL1) { + cs->inputs[FRAG_ATTRIB_COL1].refcount = 0; + cs->inputs[FRAG_ATTRIB_COL1].reg = + fp->temp_reg_offset; + fp->temp_reg_offset++; + } + InputsRead &= ~FRAG_BIT_COL1; + + /* Anything else */ + if (InputsRead) { + WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead); + /* force read from hwreg 0 for now */ + for (i = 0; i < 32; i++) + if (InputsRead & (1 << i)) + cs->inputs[i].reg = 0; + } + /* Pre-parse the mesa program, grabbing refcounts on input/temp regs. * That way, we can free up the reg when it's no longer needed */ if (!mp->Base.Instructions) { - ERROR("No instructions found in program\n"); + ERROR("No instructions found in program, going to go die now.\n"); return; } +#if 0 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { int idx; for (i = 0; i < 3; i++) { @@ -691,6 +738,10 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) } } } +#endif + + fp->max_temp_idx = fp->temp_reg_offset + 1; + cs->temp_in_use = temps_used; } -- cgit v1.2.3 From 53a7ccc08b286a02f5a276f213cfae31c8e6bf7c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 7 May 2008 15:16:27 +1000 Subject: r500: for rectangular textures set to unscaled coordinates. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index ed14c93df7..f9ef582d0a 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -231,6 +231,10 @@ static void emit_tex(struct r500_fragment_program *fp, fp->inst[counter].inst1 = fpi->TexSrcUnit | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; + + if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) + fp->inst[counter].inst1 |= R500_TEX_UNSCALED; + switch (opcode) { case OPCODE_TEX: fp->inst[counter].inst1 |= R500_TEX_INST_LD; -- cgit v1.2.3 From 3d1528027889d67ca98002833dcb42b3f2f48067 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 7 May 2008 15:59:21 +1000 Subject: r500: cleanup r500 RS setup --- src/mesa/drivers/dri/r300/r300_reg.h | 33 ++++++-------- src/mesa/drivers/dri/r300/r300_state.c | 83 ++++++++++++++++++++++------------ 2 files changed, 67 insertions(+), 49 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index a6719d6553..c6d0d66c6f 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -721,23 +721,16 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R500_RS_IP_13 0x40A8 #define R500_RS_IP_14 0x40AC #define R500_RS_IP_15 0x40B0 +#define R500_RS_IP_PTR_K0 62 +#define R500_RS_IP_PTR_K1 63 #define R500_RS_IP_TEX_PTR_S_SHIFT 0 #define R500_RS_IP_TEX_PTR_T_SHIFT 6 #define R500_RS_IP_TEX_PTR_R_SHIFT 12 #define R500_RS_IP_TEX_PTR_Q_SHIFT 18 #define R500_RS_IP_COL_PTR_SHIFT 24 #define R500_RS_IP_COL_FMT_SHIFT 27 -#define R500_RS_IP_COL_FMT_RGBA (0 << 27) -#define R500_RS_IP_COL_FMT_RGB0 (1 << 27) -#define R500_RS_IP_COL_FMT_RGB1 (2 << 27) -/* gap */ -#define R500_RS_IP_COL_FMT_000A (4 << 27) -#define R500_RS_IP_COL_FMT_0000 (5 << 27) -#define R500_RS_IP_COL_FMT_0001 (6 << 27) -/* gap */ -#define R500_RS_IP_COL_FMT_111A (8 << 27) -#define R500_RS_IP_COL_FMT_1110 (9 << 27) -#define R500_RS_IP_COL_FMT_1111 (10 << 27) +# define R500_RS_COL_PTR(x) (x << 24) +# define R500_RS_COL_FMT(x) (x << 27) /* gap */ #define R500_RS_IP_OFFSET_DIS (0 << 31) #define R500_RS_IP_OFFSET_EN (1 << 31) @@ -1177,15 +1170,15 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_RS_TEX_PTR(x) (x << 0) # define R300_RS_COL_PTR(x) (x << 6) # define R300_RS_COL_FMT(x) (x << 9) -# define R300_RS_COL_FMT_RGBA 0 -# define R300_RS_COL_FMT_RGB0 2 -# define R300_RS_COL_FMT_RGB1 3 -# define R300_RS_COL_FMT_000A 4 -# define R300_RS_COL_FMT_0000 5 -# define R300_RS_COL_FMT_0001 6 -# define R300_RS_COL_FMT_111A 8 -# define R300_RS_COL_FMT_1110 9 -# define R300_RS_COL_FMT_1111 10 +# define R300_RS_COL_FMT_RGBA 0 +# define R300_RS_COL_FMT_RGB0 2 +# define R300_RS_COL_FMT_RGB1 3 +# define R300_RS_COL_FMT_000A 4 +# define R300_RS_COL_FMT_0000 5 +# define R300_RS_COL_FMT_0001 6 +# define R300_RS_COL_FMT_111A 8 +# define R300_RS_COL_FMT_1110 9 +# define R300_RS_COL_FMT_1111 10 # define R300_RS_SEL_S(x) (x << 13) # define R300_RS_SEL_T(x) (x << 16) # define R300_RS_SEL_R(x) (x << 19) diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index a083db9bbc..298de096fb 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1501,7 +1501,7 @@ static void r300SetupRSUnit(GLcontext * ctx) int rs_tex_count = 0, rs_col_count = 0; int i, count; - memset(interp_col, 0, 8); + memset(interp_col, 0, sizeof(interp_col)); if (hw_tcl_on) OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; @@ -1640,22 +1640,17 @@ static void r500SetupRSUnit(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); /* I'm still unsure if these are needed */ - GLuint interp_magic[8] = { - 0x00, - 1 << 24, - 2 << 24, - 3 << 24, - 0x00, - 0x00, - 0x00, - 0x00 - }; + GLuint interp_col[8]; union r300_outputs_written OutputsWritten; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; GLuint InputsRead; int fp_reg, high_rr; + int rs_tex_count = 0, rs_col_count = 0; int in_texcoords, col_interp_nr; - int i; + int i, count; + memset(interp_col, 0, sizeof(interp_col)); if (hw_tcl_on) OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; else @@ -1672,7 +1667,7 @@ static void r500SetupRSUnit(GLcontext * ctx) R300_STATECHANGE(r300, rc); R300_STATECHANGE(r300, rr); - fp_reg = in_texcoords = col_interp_nr = high_rr = 0; + fp_reg = col_interp_nr = high_rr = in_texcoords = 0; r300->hw.rr.cmd[R300_RR_INST_1] = 0; @@ -1690,15 +1685,51 @@ static void r500SetupRSUnit(GLcontext * ctx) InputsRead &= ~FRAG_BIT_WPOS; } + if (InputsRead & FRAG_BIT_COL0) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size; + interp_col[0] |= R500_RS_COL_PTR(rs_col_count); + if (count == 3) + interp_col[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGB1); + rs_col_count += count; + } + else + interp_col[0] = R500_RS_COL_FMT(R300_RS_COL_FMT_0001); + + if (InputsRead & FRAG_BIT_COL1) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size; + interp_col[1] |= R500_RS_COL_PTR(1); + if (count == 3) + interp_col[1] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGB1); + rs_col_count += count; + } + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - - // r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0 | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | (in_texcoords << R300_RS_INTERP_SRC_SHIFT) - - r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = (0 << R500_RS_IP_TEX_PTR_S_SHIFT) | - (1 << R500_RS_IP_TEX_PTR_T_SHIFT) | - (2 << R500_RS_IP_TEX_PTR_R_SHIFT) | - (3 << R500_RS_IP_TEX_PTR_Q_SHIFT) | - (in_texcoords << 0) | interp_magic[i]; + GLuint swiz; + + /* with TCL we always seem to route 4 components */ + if (InputsRead & (FRAG_BIT_TEX0 << i)) { + + if (hw_tcl_on) + count = 4; + else + count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; + + swiz = 0; + if (count == 4) + swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_Q_SHIFT; + else + swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; + + if (count >= 3) + swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_R_SHIFT; + else + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; + + /* always have a least 2 tex coords */ + swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_T_SHIFT; + swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_S_SHIFT; + } + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = interp_col[i] | rs_tex_count | swiz; r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0; if (InputsRead & (FRAG_BIT_TEX0 << i)) { @@ -1715,16 +1746,11 @@ static void r500SetupRSUnit(GLcontext * ctx) WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i); } } - /* Need to count all coords enabled at vof */ - if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { - in_texcoords++; - } } if (InputsRead & FRAG_BIT_COL0) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { - // r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0 | R300_RS_ROUTE_0_COLOR | (fp_reg++ << R300_RS_ROUTE_0_COLOR_DEST_SHIFT); - r300->hw.rr.cmd[R300_RR_INST_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); + r300->hw.rr.cmd[R300_RR_INST_0] |= R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); InputsRead &= ~FRAG_BIT_COL0; col_interp_nr++; } else { @@ -1734,7 +1760,6 @@ static void r500SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL1) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { - // r300->hw.rr.cmd[R300_RR_ROUTE_1] |= R300_RS_ROUTE_1_UNKNOWN11 | R300_RS_ROUTE_1_COLOR1 | (fp_reg++ << R300_RS_ROUTE_1_COLOR1_DEST_SHIFT); r300->hw.rr.cmd[R300_RR_INST_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT); InputsRead &= ~FRAG_BIT_COL1; if (high_rr < 1) @@ -1751,7 +1776,7 @@ static void r500SetupRSUnit(GLcontext * ctx) col_interp_nr++; } - r300->hw.rc.cmd[1] = 0 | ((in_texcoords << 2) << R300_IT_COUNT_SHIFT) + r300->hw.rc.cmd[1] = 0 | (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_interp_nr << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; -- cgit v1.2.3 From 2a4d1085cb9d2d03e6aeb2c71a59888826c31afd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 12 May 2008 11:57:08 -0400 Subject: R500: add support for 4k textures --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 2 +- src/mesa/drivers/dri/r300/r300_context.c | 5 +++++ src/mesa/drivers/dri/r300/r300_reg.h | 2 +- src/mesa/drivers/dri/r300/r300_texstate.c | 14 +++++++++++--- 4 files changed, 18 insertions(+), 5 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 3f9d9da399..df7f29a2ce 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -164,7 +164,7 @@ static inline void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty) r300->cmdbuf.count_used++; /* Emit cache flush */ - *dest = cmdpacket0(R300_TX_CNTL, 1); + *dest = cmdpacket0(R300_TX_INVALTAGS, 1); dest++; r300->cmdbuf.count_used++; diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index c56a762289..31cc00a081 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -278,6 +278,11 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, ctx->Const.MaxTextureCoordUnits); ctx->Const.MaxTextureMaxAnisotropy = 16.0; + if (screen->chip_family >= CHIP_FAMILY_RV515) { + ctx->Const.MaxTextureLevels = 13; + ctx->Const.MaxTextureRectSize = 4096; + } + ctx->Const.MinPointSize = 1.0; ctx->Const.MinPointSizeAA = 1.0; ctx->Const.MaxPointSize = R300_POINTSIZE_MAX; diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index a6719d6553..8e3fe0c524 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -745,7 +745,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* gap */ /* Zero to flush caches. */ -#define R300_TX_CNTL 0x4100 +#define R300_TX_INVALTAGS 0x4100 #define R300_TX_FLUSH 0x0 /* The upper enable bits are guessed, based on fglrx reported limits. */ diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index 43d1406da3..f69a27671b 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -398,16 +398,24 @@ static void r300SetTexImages(r300ContextPtr rmesa, R300_TX_HEIGHTMASK_SHIFT)) | ((numLevels - 1) << R300_TX_MAX_MIP_LEVEL_SHIFT); + t->pitch = 0; + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + if (tObj->Image[0][t->base.firstLevel]->Width > 2048) + t->pitch |= R500_TXWIDTH_BIT11; + if (tObj->Image[0][t->base.firstLevel]->Height > 2048) + t->pitch |= R500_TXHEIGHT_BIT11; + } + /* Only need to round to nearest 32 for textures, but the blitter * requires 64-byte aligned pitches, and we may/may not need the * blitter. NPOT only! */ if (baseImage->IsCompressed) { - t->pitch = + t->pitch |= (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63); } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { unsigned int align = blitWidth - 1; - t->pitch = ((tObj->Image[0][t->base.firstLevel]->Width * + t->pitch |= ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63); t->size |= R300_TX_SIZE_TXPITCH_EN; if (!t->image_override) @@ -415,7 +423,7 @@ static void r300SetTexImages(r300ContextPtr rmesa, (((tObj->Image[0][t->base.firstLevel]->Width) + align) & ~align) - 1; } else { - t->pitch = + t->pitch |= ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63); } -- cgit v1.2.3 From 0cc8ed5cccd3073670f3b77189177d44eae9b099 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 13 May 2008 06:09:55 -0400 Subject: R3xx: more PVS cleanup --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 2 +- src/mesa/drivers/dri/r300/r300_ioctl.c | 15 ++++++++------- src/mesa/drivers/dri/r300/r300_reg.h | 21 ++++++++++----------- src/mesa/drivers/dri/r300/r300_state.c | 28 +++++++++++++--------------- 4 files changed, 32 insertions(+), 34 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 8668dba9f0..08a616c335 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -353,7 +353,7 @@ void r300InitCmdBuf(r300ContextPtr r300) if (has_tcl) { ALLOC_STATE(pvs, always, R300_PVS_CMDSIZE, 0); r300->hw.pvs.cmd[R300_PVS_CMD_0] = - cmdpacket0(R300_VAP_PVS_CNTL_1, 3); + cmdpacket0(R300_VAP_PVS_CODE_CNTL_0, 3); } ALLOC_STATE(gb_enable, always, 2, 0); diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index ffcde7ff35..68f2437b86 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -455,13 +455,14 @@ static void r300EmitClearState(GLcontext * ctx) if (has_tcl) { R300_STATECHANGE(r300, pvs); - reg_start(R300_VAP_PVS_CNTL_1, 2); - - e32((0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) | - (0 << R300_PVS_CNTL_1_POS_END_SHIFT) | - (1 << R300_PVS_CNTL_1_PROGRAM_END_SHIFT)); - e32(0x0); - e32(1 << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT); + reg_start(R300_VAP_PVS_CODE_CNTL_0, 2); + + e32((0 << R300_PVS_FIRST_INST_SHIFT) | + (0 << R300_PVS_XYZW_VALID_INST_SHIFT) | + (1 << R300_PVS_LAST_INST_SHIFT)); + e32((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | + (0 << R300_PVS_MAX_CONST_ADDR_SHIFT)); + e32(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT); R300_STATECHANGE(r300, vpi); vsf_start_fragment(0x0, 8); diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index b404e515df..ff2fc15524 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -444,7 +444,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Therefore, I suspect writing zero to 0x2284 synchronizes the engine and * avoids bugs caused by still running shaders reading bad data from memory. */ -#define R300_VAP_PVS_WAITIDLE 0x2284 /* GUESS */ +#define R300_VAP_PVS_STATE_FLUSH_REG 0x2284 /* This register is used to define the number of core clocks to wait for a * vertex to be received by the VAP input controller (while the primitive @@ -474,17 +474,16 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * is sometimes accepted other instruction that have no relationship with * position calculations. */ -#define R300_VAP_PVS_CNTL_1 0x22D0 -# define R300_PVS_CNTL_1_PROGRAM_START_SHIFT 0 -# define R300_PVS_CNTL_1_POS_END_SHIFT 10 -# define R300_PVS_CNTL_1_PROGRAM_END_SHIFT 20 +#define R300_VAP_PVS_CODE_CNTL_0 0x22D0 +# define R300_PVS_FIRST_INST_SHIFT 0 +# define R300_PVS_XYZW_VALID_INST_SHIFT 10 +# define R300_PVS_LAST_INST_SHIFT 20 /* Addresses are relative the the vertex program parameters area. */ -#define R300_VAP_PVS_CNTL_2 0x22D4 -# define R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT 0 -# define R300_PVS_CNTL_2_PARAM_COUNT_SHIFT 16 -#define R300_VAP_PVS_CNTL_3 0x22D8 -# define R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT 10 -# define R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT 0 +#define R300_VAP_PVS_CONST_CNTL 0x22D4 +# define R300_PVS_CONST_BASE_OFFSET_SHIFT 0 +# define R300_PVS_MAX_CONST_ADDR_SHIFT 16 +#define R300_VAP_PVS_CODE_CNTL_1 0x22D8 +# define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0 /* The entire range from 0x2300 to 0x2AC inclusive seems to be used for * immediate vertices diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 298de096fb..012c8d5e3f 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1849,7 +1849,7 @@ static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, GLuint output_ /* Flush PVS engine before changing PVS_NUM_SLOTS, PVS_NUM_CNTRLS. * See r500 docs 6.5.2 */ - reg_start(R300_VAP_PVS_WAITIDLE, 0); + reg_start(R300_VAP_PVS_STATE_FLUSH_REG, 0); e32(0x00000000); /* avoid division by zero */ @@ -1926,15 +1926,14 @@ static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa) R300_STATECHANGE(rmesa, pvs); rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = - (0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) | - (inst_count << R300_PVS_CNTL_1_POS_END_SHIFT) | - (inst_count << R300_PVS_CNTL_1_PROGRAM_END_SHIFT); + (0 << R300_PVS_FIRST_INST_SHIFT) | + (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | + (inst_count << R300_PVS_LAST_INST_SHIFT); rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = - (0 << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT) | - (param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT); + (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | + (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT); rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = - (inst_count << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT) | - (inst_count << R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT); + (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT); } static int bit_count (int x) @@ -1972,15 +1971,14 @@ static void r300SetupRealVertexProgram(r300ContextPtr rmesa) R300_STATECHANGE(rmesa, pvs); rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = - (0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) | - (inst_count << R300_PVS_CNTL_1_POS_END_SHIFT) | - (inst_count << R300_PVS_CNTL_1_PROGRAM_END_SHIFT); + (0 << R300_PVS_FIRST_INST_SHIFT) | + (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | + (inst_count << R300_PVS_LAST_INST_SHIFT); rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = - (0 << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT) | - (param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT); + (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | + (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT); rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = - (inst_count << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT) | - (inst_count << R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT); + (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT); } static void r300SetupVertexProgram(r300ContextPtr rmesa) -- cgit v1.2.3 From 9ef4126d48153d4754b29bd4231d29dfb15fa73f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 13 May 2008 08:37:58 -0400 Subject: R300: cleanup FS code and fill in missing details --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 16 +- src/mesa/drivers/dri/r300/r300_fragprog.c | 207 +++++++------- src/mesa/drivers/dri/r300/r300_fragprog.h | 64 ++--- src/mesa/drivers/dri/r300/r300_ioctl.c | 14 +- src/mesa/drivers/dri/r300/r300_reg.h | 455 ++++++++++++++++-------------- src/mesa/drivers/dri/r300/r300_state.c | 28 +- 6 files changed, 409 insertions(+), 375 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 08a616c335..82c7eb0935 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -409,7 +409,7 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(sc_screendoor, always, 2, 0); r300->hw.sc_screendoor.cmd[0] = cmdpacket0(R300_SC_SCREENDOOR, 1); ALLOC_STATE(us_out_fmt, always, 6, 0); - r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R500_US_OUT_FMT, 5); + r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R300_US_OUT_FMT, 5); if (is_r500) { ALLOC_STATE(fp, always, R500_FP_CMDSIZE, 0); @@ -422,19 +422,19 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 1, 0); } else { ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); - r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_PFS_CNTL_0, 3); - r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(R300_PFS_NODE_0, 4); + r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_US_CONFIG, 3); + r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(R300_US_CODE_ADDR_0, 4); ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0); - r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_PFS_TEXI_0, 0); + r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_US_TEX_INST_0, 0); ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0); - r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR0_0, 1); + r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, 1); ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1); - r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR1_0, 1); + r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, 1); ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2); - r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR2_0, 1); + r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, 1); ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3); - r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR3_0, 1); + r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, 1); ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0); r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, 0); } diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 5ba2971fb9..a28841dda8 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -172,19 +172,19 @@ static const struct { int s_op; } r300_fpop[] = { /* *INDENT-OFF* */ - {"MAD", 3, R300_FPI0_OUTC_MAD, R300_FPI2_OUTA_MAD}, - {"DP3", 2, R300_FPI0_OUTC_DP3, R300_FPI2_OUTA_DP4}, - {"DP4", 2, R300_FPI0_OUTC_DP4, R300_FPI2_OUTA_DP4}, - {"MIN", 2, R300_FPI0_OUTC_MIN, R300_FPI2_OUTA_MIN}, - {"MAX", 2, R300_FPI0_OUTC_MAX, R300_FPI2_OUTA_MAX}, - {"CMP", 3, R300_FPI0_OUTC_CMP, R300_FPI2_OUTA_CMP}, - {"FRC", 1, R300_FPI0_OUTC_FRC, R300_FPI2_OUTA_FRC}, - {"EX2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_EX2}, - {"LG2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_LG2}, - {"RCP", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RCP}, - {"RSQ", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RSQ}, - {"REPL_ALPHA", 1, R300_FPI0_OUTC_REPL_ALPHA, PFS_INVAL}, - {"CMPH", 3, R300_FPI0_OUTC_CMPH, PFS_INVAL}, + {"MAD", 3, R300_ALU_OUTC_MAD, R300_ALU_OUTA_MAD}, + {"DP3", 2, R300_ALU_OUTC_DP3, R300_ALU_OUTA_DP4}, + {"DP4", 2, R300_ALU_OUTC_DP4, R300_ALU_OUTA_DP4}, + {"MIN", 2, R300_ALU_OUTC_MIN, R300_ALU_OUTA_MIN}, + {"MAX", 2, R300_ALU_OUTC_MAX, R300_ALU_OUTA_MAX}, + {"CMP", 3, R300_ALU_OUTC_CMP, R300_ALU_OUTA_CMP}, + {"FRC", 1, R300_ALU_OUTC_FRC, R300_ALU_OUTA_FRC}, + {"EX2", 1, R300_ALU_OUTC_REPL_ALPHA, R300_ALU_OUTA_EX2}, + {"LG2", 1, R300_ALU_OUTC_REPL_ALPHA, R300_ALU_OUTA_LG2}, + {"RCP", 1, R300_ALU_OUTC_REPL_ALPHA, R300_ALU_OUTA_RCP}, + {"RSQ", 1, R300_ALU_OUTC_REPL_ALPHA, R300_ALU_OUTA_RSQ}, + {"REPL_ALPHA", 1, R300_ALU_OUTC_REPL_ALPHA, PFS_INVAL}, + {"CMPH", 3, R300_ALU_OUTC_CMPH, PFS_INVAL}, /* *INDENT-ON* */ }; @@ -209,17 +209,17 @@ static const struct r300_pfs_swizzle { GLuint flags; } v_swiz[] = { /* *INDENT-OFF* */ - {MAKE_SWZ3(X, Y, Z), R300_FPI0_ARGC_SRC0C_XYZ, 4, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(X, X, X), R300_FPI0_ARGC_SRC0C_XXX, 4, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(Y, Y, Y), R300_FPI0_ARGC_SRC0C_YYY, 4, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(Z, Z, Z), R300_FPI0_ARGC_SRC0C_ZZZ, 4, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(W, W, W), R300_FPI0_ARGC_SRC0A, 1, SLOT_SRC_SCALAR}, - {MAKE_SWZ3(Y, Z, X), R300_FPI0_ARGC_SRC0C_YZX, 1, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(Z, X, Y), R300_FPI0_ARGC_SRC0C_ZXY, 1, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(W, Z, Y), R300_FPI0_ARGC_SRC0CA_WZY, 1, SLOT_SRC_BOTH}, - {MAKE_SWZ3(ONE, ONE, ONE), R300_FPI0_ARGC_ONE, 0, 0}, - {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_FPI0_ARGC_ZERO, 0, 0}, - {MAKE_SWZ3(HALF, HALF, HALF), R300_FPI0_ARGC_HALF, 0, 0}, + {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, SLOT_SRC_SCALAR}, + {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, SLOT_SRC_BOTH}, + {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0}, + {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0}, + {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0}, {PFS_INVAL, 0, 0, 0}, /* *INDENT-ON* */ }; @@ -252,13 +252,13 @@ static const struct { GLuint flags; } s_swiz[] = { /* *INDENT-OFF* */ - {R300_FPI2_ARGA_SRC0C_X, 3, SLOT_SRC_VECTOR}, - {R300_FPI2_ARGA_SRC0C_Y, 3, SLOT_SRC_VECTOR}, - {R300_FPI2_ARGA_SRC0C_Z, 3, SLOT_SRC_VECTOR}, - {R300_FPI2_ARGA_SRC0A, 1, SLOT_SRC_SCALAR}, - {R300_FPI2_ARGA_ZERO, 0, 0}, - {R300_FPI2_ARGA_ONE, 0, 0}, - {R300_FPI2_ARGA_HALF, 0, 0} + {R300_ALU_ARGA_SRC0C_X, 3, SLOT_SRC_VECTOR}, + {R300_ALU_ARGA_SRC0C_Y, 3, SLOT_SRC_VECTOR}, + {R300_ALU_ARGA_SRC0C_Z, 3, SLOT_SRC_VECTOR}, + {R300_ALU_ARGA_SRC0A, 1, SLOT_SRC_SCALAR}, + {R300_ALU_ARGA_ZERO, 0, 0}, + {R300_ALU_ARGA_ONE, 0, 0}, + {R300_ALU_ARGA_HALF, 0, 0} /* *INDENT-ON* */ }; @@ -859,11 +859,11 @@ static int t_hw_dst(struct r300_fragment_program *fp, switch (index) { case FRAG_RESULT_COLR: fp->node[fp->cur_node].flags |= - R300_PFS_NODE_OUTPUT_COLOR; + R300_RGBA_OUT; break; case FRAG_RESULT_DEPR: fp->node[fp->cur_node].flags |= - R300_PFS_NODE_OUTPUT_DEPTH; + R300_W_OUT; break; } return index; @@ -907,7 +907,7 @@ static void emit_tex(struct r300_fragment_program *fp, din = cs->dest_in_node; /* Resolve source/dest to hardware registers */ - if (opcode != R300_FPITX_OP_KIL) { + if (opcode != R300_TEX_OP_KIL) { if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) { /** * Hardware uses [0..1]x[0..1] range for rectangle textures @@ -1007,11 +1007,10 @@ static void emit_tex(struct r300_fragment_program *fp, if (fp->cur_node == 0) fp->first_node_has_tex = 1; - fp->tex.inst[fp->tex.length++] = 0 | (hwsrc << R300_FPITX_SRC_SHIFT) - | (hwdest << R300_FPITX_DST_SHIFT) - | (unit << R300_FPITX_IMAGE_SHIFT) - /* not entirely sure about this */ - | (opcode << R300_FPITX_OPCODE_SHIFT); + fp->tex.inst[fp->tex.length++] = 0 | (hwsrc << R300_SRC_ADDR_SHIFT) + | (hwdest << R300_DST_ADDR_SHIFT) + | (unit << R300_TEX_ID_SHIFT) + | (opcode << R300_TEX_INST_SHIFT); cs->dest_in_node |= (1 << hwdest); if (REG_GET_TYPE(coord) != REG_TYPE_CONST) @@ -1228,17 +1227,17 @@ static int find_and_prepare_slot(struct r300_fragment_program *fp, } // Emit the source fetch code - fp->alu.inst[pos].inst1 &= ~R300_FPI1_SRC_MASK; + fp->alu.inst[pos].inst1 &= ~R300_ALU_SRC_MASK; fp->alu.inst[pos].inst1 |= - ((cs->slot[pos].vsrc[0] << R300_FPI1_SRC0C_SHIFT) | - (cs->slot[pos].vsrc[1] << R300_FPI1_SRC1C_SHIFT) | - (cs->slot[pos].vsrc[2] << R300_FPI1_SRC2C_SHIFT)); + ((cs->slot[pos].vsrc[0] << R300_ALU_SRC0C_SHIFT) | + (cs->slot[pos].vsrc[1] << R300_ALU_SRC1C_SHIFT) | + (cs->slot[pos].vsrc[2] << R300_ALU_SRC2C_SHIFT)); - fp->alu.inst[pos].inst3 &= ~R300_FPI3_SRC_MASK; + fp->alu.inst[pos].inst3 &= ~R300_ALU_SRC_MASK; fp->alu.inst[pos].inst3 |= - ((cs->slot[pos].ssrc[0] << R300_FPI3_SRC0A_SHIFT) | - (cs->slot[pos].ssrc[1] << R300_FPI3_SRC1A_SHIFT) | - (cs->slot[pos].ssrc[2] << R300_FPI3_SRC2A_SHIFT)); + ((cs->slot[pos].ssrc[0] << R300_ALU_SRC0A_SHIFT) | + (cs->slot[pos].ssrc[1] << R300_ALU_SRC1A_SHIFT) | + (cs->slot[pos].ssrc[2] << R300_ALU_SRC2A_SHIFT)); // Emit the argument selection code if (emit_vop) { @@ -1257,17 +1256,17 @@ static int find_and_prepare_slot(struct r300_fragment_program *fp, ARG_ABS : 0); } else { - swz[i] = R300_FPI0_ARGC_ZERO; + swz[i] = R300_ALU_ARGC_ZERO; } } fp->alu.inst[pos].inst0 &= - ~(R300_FPI0_ARG0C_MASK | R300_FPI0_ARG1C_MASK | - R300_FPI0_ARG2C_MASK); + ~(R300_ALU_ARG0C_MASK | R300_ALU_ARG1C_MASK | + R300_ALU_ARG2C_MASK); fp->alu.inst[pos].inst0 |= - (swz[0] << R300_FPI0_ARG0C_SHIFT) | (swz[1] << - R300_FPI0_ARG1C_SHIFT) - | (swz[2] << R300_FPI0_ARG2C_SHIFT); + (swz[0] << R300_ALU_ARG0C_SHIFT) | (swz[1] << + R300_ALU_ARG1C_SHIFT) + | (swz[2] << R300_ALU_ARG2C_SHIFT); } if (emit_sop) { @@ -1286,17 +1285,17 @@ static int find_and_prepare_slot(struct r300_fragment_program *fp, ARG_ABS : 0); } else { - swz[i] = R300_FPI2_ARGA_ZERO; + swz[i] = R300_ALU_ARGA_ZERO; } } fp->alu.inst[pos].inst2 &= - ~(R300_FPI2_ARG0A_MASK | R300_FPI2_ARG1A_MASK | - R300_FPI2_ARG2A_MASK); + ~(R300_ALU_ARG0A_MASK | R300_ALU_ARG1A_MASK | + R300_ALU_ARG2A_MASK); fp->alu.inst[pos].inst2 |= - (swz[0] << R300_FPI2_ARG0A_SHIFT) | (swz[1] << - R300_FPI2_ARG1A_SHIFT) - | (swz[2] << R300_FPI2_ARG2A_SHIFT); + (swz[0] << R300_ALU_ARG0A_SHIFT) | (swz[1] << + R300_ALU_ARG1A_SHIFT) + | (swz[2] << R300_ALU_ARG2A_SHIFT); } return pos; @@ -1333,9 +1332,9 @@ static void emit_arith(struct r300_fragment_program *fp, emit_vop = GL_FALSE; emit_sop = GL_FALSE; - if ((mask & WRITEMASK_XYZ) || vop == R300_FPI0_OUTC_DP3) + if ((mask & WRITEMASK_XYZ) || vop == R300_ALU_OUTC_DP3) emit_vop = GL_TRUE; - if ((mask & WRITEMASK_W) || vop == R300_FPI0_OUTC_REPL_ALPHA) + if ((mask & WRITEMASK_W) || vop == R300_ALU_OUTC_REPL_ALPHA) emit_sop = GL_TRUE; pos = @@ -1347,33 +1346,33 @@ static void emit_arith(struct r300_fragment_program *fp, hwdest = t_hw_dst(fp, dest, GL_FALSE, pos); /* Note: Side effects wrt register allocation */ if (flags & PFS_FLAG_SAT) { - vop |= R300_FPI0_OUTC_SAT; - sop |= R300_FPI2_OUTA_SAT; + vop |= R300_ALU_OUTC_CLAMP; + sop |= R300_ALU_OUTA_CLAMP; } - /* Throw the pieces together and get FPI0/1 */ + /* Throw the pieces together and get ALU/1 */ if (emit_vop) { fp->alu.inst[pos].inst0 |= vop; - fp->alu.inst[pos].inst1 |= hwdest << R300_FPI1_DSTC_SHIFT; + fp->alu.inst[pos].inst1 |= hwdest << R300_ALU_DSTC_SHIFT; if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { fp->alu.inst[pos].inst1 |= (mask & WRITEMASK_XYZ) << - R300_FPI1_DSTC_OUTPUT_MASK_SHIFT; + R300_ALU_DSTC_OUTPUT_MASK_SHIFT; } else assert(0); } else { fp->alu.inst[pos].inst1 |= (mask & WRITEMASK_XYZ) << - R300_FPI1_DSTC_REG_MASK_SHIFT; + R300_ALU_DSTC_REG_MASK_SHIFT; cs->hwtemps[hwdest].vector_valid = pos + 1; } } - /* And now FPI2/3 */ + /* And now ALU/3 */ if (emit_sop) { fp->alu.inst[pos].inst2 |= sop; @@ -1381,18 +1380,18 @@ static void emit_arith(struct r300_fragment_program *fp, if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { fp->alu.inst[pos].inst3 |= - (hwdest << R300_FPI3_DSTA_SHIFT) | - R300_FPI3_DSTA_OUTPUT; + (hwdest << R300_ALU_DSTA_SHIFT) | + R300_ALU_DSTA_OUTPUT; } else if (REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) { fp->alu.inst[pos].inst3 |= - R300_FPI3_DSTA_DEPTH; + R300_ALU_DSTA_DEPTH; } else assert(0); } else { fp->alu.inst[pos].inst3 |= - (hwdest << R300_FPI3_DSTA_SHIFT) | - R300_FPI3_DSTA_REG; + (hwdest << R300_ALU_DSTA_SHIFT) | + R300_ALU_DSTA_REG; cs->hwtemps[hwdest].scalar_valid = pos + 1; } @@ -1708,7 +1707,7 @@ static GLboolean parse_program(struct r300_fragment_program *fp) src[0], undef, undef, flags); break; case OPCODE_KIL: - emit_tex(fp, fpi, R300_FPITX_OP_KIL); + emit_tex(fp, fpi, R300_TEX_OP_KIL); break; case OPCODE_LG2: src[0] = t_scalar_src(fp, fpi->SrcReg[0]); @@ -1943,13 +1942,13 @@ static GLboolean parse_program(struct r300_fragment_program *fp) src[0], pfs_one, negate(src[1]), flags); break; case OPCODE_TEX: - emit_tex(fp, fpi, R300_FPITX_OP_TEX); + emit_tex(fp, fpi, R300_TEX_OP_LD); break; case OPCODE_TXB: - emit_tex(fp, fpi, R300_FPITX_OP_TXB); + emit_tex(fp, fpi, R300_TEX_OP_TXB); break; case OPCODE_TXP: - emit_tex(fp, fpi, R300_FPITX_OP_TXP); + emit_tex(fp, fpi, R300_TEX_OP_TXP); break; case OPCODE_XPD:{ src[0] = t_src(fp, fpi->SrcReg[0]); @@ -2282,18 +2281,18 @@ static void dump_program(struct r300_fragment_program *fp) const char *instr; switch ((fp->tex. - inst[i] >> R300_FPITX_OPCODE_SHIFT) & + inst[i] >> R300_TEX_INST_SHIFT) & 15) { - case R300_FPITX_OP_TEX: + case R300_TEX_OP_LD: instr = "TEX"; break; - case R300_FPITX_OP_KIL: + case R300_TEX_OP_KIL: instr = "KIL"; break; - case R300_FPITX_OP_TXP: + case R300_TEX_OP_TXP: instr = "TXP"; break; - case R300_FPITX_OP_TXB: + case R300_TEX_OP_TXB: instr = "TXB"; break; default: @@ -2304,15 +2303,13 @@ static void dump_program(struct r300_fragment_program *fp) " %s t%i, %c%i, texture[%i] (%08x)\n", instr, (fp->tex. - inst[i] >> R300_FPITX_DST_SHIFT) & 31, - (fp->tex. - inst[i] & R300_FPITX_SRC_CONST) ? 'c' : + inst[i] >> R300_DST_ADDR_SHIFT) & 31, 't', (fp->tex. - inst[i] >> R300_FPITX_SRC_SHIFT) & 31, + inst[i] >> R300_SRC_ADDR_SHIFT) & 31, (fp->tex. - inst[i] & R300_FPITX_IMAGE_MASK) >> - R300_FPITX_IMAGE_SHIFT, + inst[i] & R300_TEX_ID_MASK) >> + R300_TEX_ID_SHIFT, fp->tex.inst[i]); } } @@ -2338,45 +2335,45 @@ static void dump_program(struct r300_fragment_program *fp) dstc[0] = 0; sprintf(flags, "%s%s%s", (fp->alu.inst[i]. - inst1 & R300_FPI1_DSTC_REG_X) ? "x" : "", + inst1 & R300_ALU_DSTC_REG_X) ? "x" : "", (fp->alu.inst[i]. - inst1 & R300_FPI1_DSTC_REG_Y) ? "y" : "", + inst1 & R300_ALU_DSTC_REG_Y) ? "y" : "", (fp->alu.inst[i]. - inst1 & R300_FPI1_DSTC_REG_Z) ? "z" : ""); + inst1 & R300_ALU_DSTC_REG_Z) ? "z" : ""); if (flags[0] != 0) { sprintf(dstc, "t%i.%s ", (fp->alu.inst[i]. - inst1 >> R300_FPI1_DSTC_SHIFT) & 31, + inst1 >> R300_ALU_DSTC_SHIFT) & 31, flags); } sprintf(flags, "%s%s%s", (fp->alu.inst[i]. - inst1 & R300_FPI1_DSTC_OUTPUT_X) ? "x" : "", + inst1 & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", (fp->alu.inst[i]. - inst1 & R300_FPI1_DSTC_OUTPUT_Y) ? "y" : "", + inst1 & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", (fp->alu.inst[i]. - inst1 & R300_FPI1_DSTC_OUTPUT_Z) ? "z" : ""); + inst1 & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); if (flags[0] != 0) { sprintf(tmp, "o%i.%s", (fp->alu.inst[i]. - inst1 >> R300_FPI1_DSTC_SHIFT) & 31, + inst1 >> R300_ALU_DSTC_SHIFT) & 31, flags); strcat(dstc, tmp); } dsta[0] = 0; - if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_REG) { + if (fp->alu.inst[i].inst3 & R300_ALU_DSTA_REG) { sprintf(dsta, "t%i.w ", (fp->alu.inst[i]. - inst3 >> R300_FPI3_DSTA_SHIFT) & 31); + inst3 >> R300_ALU_DSTA_SHIFT) & 31); } - if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_OUTPUT) { + if (fp->alu.inst[i].inst3 & R300_ALU_DSTA_OUTPUT) { sprintf(tmp, "o%i.w ", (fp->alu.inst[i]. - inst3 >> R300_FPI3_DSTA_SHIFT) & 31); + inst3 >> R300_ALU_DSTA_SHIFT) & 31); strcat(dsta, tmp); } - if (fp->alu.inst[i].inst3 & R300_FPI3_DSTA_DEPTH) { + if (fp->alu.inst[i].inst3 & R300_ALU_DSTA_DEPTH) { strcat(dsta, "Z"); } @@ -2396,19 +2393,19 @@ static void dump_program(struct r300_fragment_program *fp) d = regc & 31; if (d < 12) { switch (d % 4) { - case R300_FPI0_ARGC_SRC0C_XYZ: + case R300_ALU_ARGC_SRC0C_XYZ: sprintf(buf, "%s.xyz", srcc[d / 4]); break; - case R300_FPI0_ARGC_SRC0C_XXX: + case R300_ALU_ARGC_SRC0C_XXX: sprintf(buf, "%s.xxx", srcc[d / 4]); break; - case R300_FPI0_ARGC_SRC0C_YYY: + case R300_ALU_ARGC_SRC0C_YYY: sprintf(buf, "%s.yyy", srcc[d / 4]); break; - case R300_FPI0_ARGC_SRC0C_ZZZ: + case R300_ALU_ARGC_SRC0C_ZZZ: sprintf(buf, "%s.zzz", srcc[d / 4]); break; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h index 73efe49fc1..573aacf19a 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.h +++ b/src/mesa/drivers/dri/r300/r300_fragprog.h @@ -75,23 +75,23 @@ typedef struct r300_fragment_program_swizzle { #define SRC_STRIDE 6 #define NOP_INST0 ( \ - (R300_FPI0_OUTC_MAD) | \ - (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG0C_SHIFT) | \ - (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG1C_SHIFT) | \ - (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG2C_SHIFT)) + (R300_ALU_OUTC_MAD) | \ + (R300_ALU_ARGC_ZERO << R300_ALU_ARG0C_SHIFT) | \ + (R300_ALU_ARGC_ZERO << R300_ALU_ARG1C_SHIFT) | \ + (R300_ALU_ARGC_ZERO << R300_ALU_ARG2C_SHIFT)) #define NOP_INST1 ( \ - ((0 | SRC_CONST) << R300_FPI1_SRC0C_SHIFT) | \ - ((0 | SRC_CONST) << R300_FPI1_SRC1C_SHIFT) | \ - ((0 | SRC_CONST) << R300_FPI1_SRC2C_SHIFT)) + ((0 | SRC_CONST) << R300_ALU_SRC0C_SHIFT) | \ + ((0 | SRC_CONST) << R300_ALU_SRC1C_SHIFT) | \ + ((0 | SRC_CONST) << R300_ALU_SRC2C_SHIFT)) #define NOP_INST2 ( \ - (R300_FPI2_OUTA_MAD) | \ - (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG0A_SHIFT) | \ - (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG1A_SHIFT) | \ - (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG2A_SHIFT)) + (R300_ALU_OUTA_MAD) | \ + (R300_ALU_ARGA_ZERO << R300_ALU_ARG0A_SHIFT) | \ + (R300_ALU_ARGA_ZERO << R300_ALU_ARG1A_SHIFT) | \ + (R300_ALU_ARGA_ZERO << R300_ALU_ARG2A_SHIFT)) #define NOP_INST3 ( \ - ((0 | SRC_CONST) << R300_FPI3_SRC0A_SHIFT) | \ - ((0 | SRC_CONST) << R300_FPI3_SRC1A_SHIFT) | \ - ((0 | SRC_CONST) << R300_FPI3_SRC2A_SHIFT)) + ((0 | SRC_CONST) << R300_ALU_SRC0A_SHIFT) | \ + ((0 | SRC_CONST) << R300_ALU_SRC1A_SHIFT) | \ + ((0 | SRC_CONST) << R300_ALU_SRC2A_SHIFT)) #define DRI_CONF_FP_OPTIMIZATION_SPEED 0 #define DRI_CONF_FP_OPTIMIZATION_QUALITY 1 @@ -117,42 +117,42 @@ typedef struct r300_fragment_program_swizzle { #define FP_SELC_MASK_XYZ 7 #define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \ - (((destidx) << R300_FPI1_DSTC_SHIFT) | \ + (((destidx) << R300_ALU_DSTC_SHIFT) | \ (FP_SELC_MASK_##regmask << 23) | \ (FP_SELC_MASK_##outmask << 26) | \ - ((src0) << R300_FPI1_SRC0C_SHIFT) | \ - ((src1) << R300_FPI1_SRC1C_SHIFT) | \ - ((src2) << R300_FPI1_SRC2C_SHIFT)) + ((src0) << R300_ALU_SRC0C_SHIFT) | \ + ((src1) << R300_ALU_SRC1C_SHIFT) | \ + ((src2) << R300_ALU_SRC2C_SHIFT)) #define FP_SELA_MASK_NO 0 #define FP_SELA_MASK_W 1 #define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \ - (((destidx) << R300_FPI3_DSTA_SHIFT) | \ + (((destidx) << R300_ALU_DSTA_SHIFT) | \ (FP_SELA_MASK_##regmask << 23) | \ (FP_SELA_MASK_##outmask << 24) | \ - ((src0) << R300_FPI3_SRC0A_SHIFT) | \ - ((src1) << R300_FPI3_SRC1A_SHIFT) | \ - ((src2) << R300_FPI3_SRC2A_SHIFT)) + ((src0) << R300_ALU_SRC0A_SHIFT) | \ + ((src1) << R300_ALU_SRC1A_SHIFT) | \ + ((src2) << R300_ALU_SRC2A_SHIFT)) /* Produce unshifted argument selectors */ -#define FP_ARGC(source) R300_FPI0_ARGC_##source -#define FP_ARGA(source) R300_FPI2_ARGA_##source +#define FP_ARGC(source) R300_ALU_ARGC_##source +#define FP_ARGA(source) R300_ALU_ARGA_##source #define FP_ABS(arg) ((arg) | (1 << 6)) #define FP_NEG(arg) ((arg) ^ (1 << 5)) /* Produce instruction dword */ #define FP_INSTRC(opcode,arg0,arg1,arg2) \ - (R300_FPI0_OUTC_##opcode | \ - ((arg0) << R300_FPI0_ARG0C_SHIFT) | \ - ((arg1) << R300_FPI0_ARG1C_SHIFT) | \ - ((arg2) << R300_FPI0_ARG2C_SHIFT)) + (R300_ALU_OUTC_##opcode | \ + ((arg0) << R300_ALU_ARG0C_SHIFT) | \ + ((arg1) << R300_ALU_ARG1C_SHIFT) | \ + ((arg2) << R300_ALU_ARG2C_SHIFT)) #define FP_INSTRA(opcode,arg0,arg1,arg2) \ - (R300_FPI2_OUTA_##opcode | \ - ((arg0) << R300_FPI2_ARG0A_SHIFT) | \ - ((arg1) << R300_FPI2_ARG1A_SHIFT) | \ - ((arg2) << R300_FPI2_ARG2A_SHIFT)) + (R300_ALU_OUTA_##opcode | \ + ((arg0) << R300_ALU_ARG0A_SHIFT) | \ + ((arg1) << R300_ALU_ARG1A_SHIFT) | \ + ((arg2) << R300_ALU_ARG2A_SHIFT)) #endif diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 68f2437b86..ede0bec566 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -330,31 +330,31 @@ static void r300EmitClearState(GLcontext * ctx) if (!is_r500) { R300_STATECHANGE(r300, fp); - reg_start(R300_PFS_CNTL_0, 2); + reg_start(R300_US_CONFIG, 2); e32(0x0); e32(0x0); e32(0x0); - reg_start(R300_PFS_NODE_0, 3); + reg_start(R300_US_CODE_ADDR_0, 3); e32(0x0); e32(0x0); e32(0x0); - e32(R300_PFS_NODE_OUTPUT_COLOR); + e32(R300_RGBA_OUT); R300_STATECHANGE(r300, fpi[0]); R300_STATECHANGE(r300, fpi[1]); R300_STATECHANGE(r300, fpi[2]); R300_STATECHANGE(r300, fpi[3]); - reg_start(R300_PFS_INSTR0_0, 0); + reg_start(R300_US_ALU_RGB_INST_0, 0); e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); - reg_start(R300_PFS_INSTR1_0, 0); + reg_start(R300_US_ALU_RGB_ADDR_0, 0); e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); - reg_start(R300_PFS_INSTR2_0, 0); + reg_start(R300_US_ALU_ALPHA_INST_0, 0); e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); - reg_start(R300_PFS_INSTR3_0, 0); + reg_start(R300_US_ALU_ALPHA_ADDR_0, 0); e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); } else { R300_STATECHANGE(r300, r500fp); diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index ff2fc15524..558d327028 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -1596,23 +1596,23 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * offsets into the respective instruction streams, while *_END points to the * last instruction relative to this offset. */ -#define R300_PFS_CNTL_0 0x4600 +#define R300_US_CONFIG 0x4600 # define R300_PFS_CNTL_LAST_NODES_SHIFT 0 # define R300_PFS_CNTL_LAST_NODES_MASK (3 << 0) # define R300_PFS_CNTL_FIRST_NODE_HAS_TEX (1 << 3) -#define R300_PFS_CNTL_1 0x4604 +#define R300_US_PIXSIZE 0x4604 /* There is an unshifted value here which has so far always been equal to the * index of the highest used temporary register. */ -#define R300_PFS_CNTL_2 0x4608 +#define R300_US_CODE_OFFSET 0x4608 # define R300_PFS_CNTL_ALU_OFFSET_SHIFT 0 # define R300_PFS_CNTL_ALU_OFFSET_MASK (63 << 0) # define R300_PFS_CNTL_ALU_END_SHIFT 6 # define R300_PFS_CNTL_ALU_END_MASK (63 << 6) -# define R300_PFS_CNTL_TEX_OFFSET_SHIFT 12 -# define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 12) /* GUESS */ +# define R300_PFS_CNTL_TEX_OFFSET_SHIFT 13 +# define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 13) # define R300_PFS_CNTL_TEX_END_SHIFT 18 -# define R300_PFS_CNTL_TEX_END_MASK (31 << 18) /* GUESS */ +# define R300_PFS_CNTL_TEX_END_MASK (31 << 18) /* gap */ @@ -1623,70 +1623,65 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * * Offsets are relative to the master offset from PFS_CNTL_2. */ -#define R300_PFS_NODE_0 0x4610 -#define R300_PFS_NODE_1 0x4614 -#define R300_PFS_NODE_2 0x4618 -#define R300_PFS_NODE_3 0x461C -# define R300_PFS_NODE_ALU_OFFSET_SHIFT 0 -# define R300_PFS_NODE_ALU_OFFSET_MASK (63 << 0) -# define R300_PFS_NODE_ALU_END_SHIFT 6 -# define R300_PFS_NODE_ALU_END_MASK (63 << 6) -# define R300_PFS_NODE_TEX_OFFSET_SHIFT 12 -# define R300_PFS_NODE_TEX_OFFSET_MASK (31 << 12) -# define R300_PFS_NODE_TEX_END_SHIFT 17 -# define R300_PFS_NODE_TEX_END_MASK (31 << 17) -# define R300_PFS_NODE_OUTPUT_COLOR (1 << 22) -# define R300_PFS_NODE_OUTPUT_DEPTH (1 << 23) +#define R300_US_CODE_ADDR_0 0x4610 +#define R300_US_CODE_ADDR_1 0x4614 +#define R300_US_CODE_ADDR_2 0x4618 +#define R300_US_CODE_ADDR_3 0x461C +# define R300_ALU_START_SHIFT 0 +# define R300_ALU_START_MASK (63 << 0) +# define R300_ALU_SIZE_SHIFT 6 +# define R300_ALU_SIZE_MASK (63 << 6) +# define R300_TEX_START_SHIFT 12 +# define R300_TEX_START_MASK (31 << 12) +# define R300_TEX_SIZE_SHIFT 17 +# define R300_TEX_SIZE_MASK (31 << 17) +# define R300_RGBA_OUT (1 << 22) +# define R300_W_OUT (1 << 23) /* TEX * As far as I can tell, texture instructions cannot write into output * registers directly. A subsequent ALU instruction is always necessary, * even if it's just MAD o0, r0, 1, 0 */ -#define R300_PFS_TEXI_0 0x4620 -# define R300_FPITX_SRC_SHIFT 0 -# define R300_FPITX_SRC_MASK (31 << 0) - /* GUESS */ -# define R300_FPITX_SRC_CONST (1 << 5) -# define R300_FPITX_DST_SHIFT 6 -# define R300_FPITX_DST_MASK (31 << 6) -# define R300_FPITX_IMAGE_SHIFT 11 - /* GUESS based on layout and native limits */ -# define R300_FPITX_IMAGE_MASK (15 << 11) -/* Unsure if these are opcodes, or some kind of bitfield, but this is how - * they were set when I checked - */ -# define R300_FPITX_OPCODE_SHIFT 15 -# define R300_FPITX_OP_TEX 1 -# define R300_FPITX_OP_KIL 2 -# define R300_FPITX_OP_TXP 3 -# define R300_FPITX_OP_TXB 4 -# define R300_FPITX_OPCODE_MASK (7 << 15) +#define R300_US_TEX_INST_0 0x4620 +# define R300_SRC_ADDR_SHIFT 0 +# define R300_SRC_ADDR_MASK (31 << 0) +# define R300_DST_ADDR_SHIFT 6 +# define R300_DST_ADDR_MASK (31 << 6) +# define R300_TEX_ID_SHIFT 11 +# define R300_TEX_ID_MASK (15 << 11) +# define R300_TEX_INST_SHIFT 15 +# define R300_TEX_OP_NOP 0 +# define R300_TEX_OP_LD 1 +# define R300_TEX_OP_KIL 2 +# define R300_TEX_OP_TXP 3 +# define R300_TEX_OP_TXB 4 +# define R300_TEX_INST_MASK (7 << 15) /* Output format from the unfied shader */ -#define R500_US_OUT_FMT 0x46A4 -# define R500_US_OUT_FMT_C4_8 (0 << 0) -# define R500_US_OUT_FMT_C4_10 (1 << 0) -# define R500_US_OUT_FMT_C4_10_GAMMA (2 << 0) -# define R500_US_OUT_FMT_C_16 (3 << 0) -# define R500_US_OUT_FMT_C2_16 (4 << 0) -# define R500_US_OUT_FMT_C4_16 (5 << 0) -# define R500_US_OUT_FMT_C_16_MPEG (6 << 0) -# define R500_US_OUT_FMT_C2_16_MPEG (7 << 0) -# define R500_US_OUT_FMT_C2_4 (8 << 0) -# define R500_US_OUT_FMT_C_3_3_2 (9 << 0) -# define R500_US_OUT_FMT_C_6_5_6 (10 << 0) -# define R500_US_OUT_FMT_C_11_11_10 (11 << 0) -# define R500_US_OUT_FMT_C_10_11_11 (12 << 0) -# define R500_US_OUT_FMT_C_2_10_10_10 (13 << 0) +#define R300_US_OUT_FMT 0x46A4 +# define R300_US_OUT_FMT_C4_8 (0 << 0) +# define R300_US_OUT_FMT_C4_10 (1 << 0) +# define R300_US_OUT_FMT_C4_10_GAMMA (2 << 0) +# define R300_US_OUT_FMT_C_16 (3 << 0) +# define R300_US_OUT_FMT_C2_16 (4 << 0) +# define R300_US_OUT_FMT_C4_16 (5 << 0) +# define R300_US_OUT_FMT_C_16_MPEG (6 << 0) +# define R300_US_OUT_FMT_C2_16_MPEG (7 << 0) +# define R300_US_OUT_FMT_C2_4 (8 << 0) +# define R300_US_OUT_FMT_C_3_3_2 (9 << 0) +# define R300_US_OUT_FMT_C_6_5_6 (10 << 0) +# define R300_US_OUT_FMT_C_11_11_10 (11 << 0) +# define R300_US_OUT_FMT_C_10_11_11 (12 << 0) +# define R300_US_OUT_FMT_C_2_10_10_10 (13 << 0) /* reserved */ -# define R500_US_OUT_FMT_UNUSED (15 << 0) -# define R500_US_OUT_FMT_C_16_FP (16 << 0) -# define R500_US_OUT_FMT_C2_16_FP (17 << 0) -# define R500_US_OUT_FMT_C4_16_FP (18 << 0) -# define R500_US_OUT_FMT_C_32_FP (19 << 0) -# define R500_US_OUT_FMT_C2_32_FP (20 << 0) -# define R500_US_OUT_FMT_C4_32_FP (20 << 0) +# define R300_US_OUT_FMT_UNUSED (15 << 0) +# define R300_US_OUT_FMT_C_16_FP (16 << 0) +# define R300_US_OUT_FMT_C2_16_FP (17 << 0) +# define R300_US_OUT_FMT_C4_16_FP (18 << 0) +# define R300_US_OUT_FMT_C_32_FP (19 << 0) +# define R300_US_OUT_FMT_C2_32_FP (20 << 0) +# define R300_US_OUT_FMT_C4_32_FP (20 << 0) /* ALU * The ALU instructions register blocks are enumerated according to the order @@ -1752,147 +1747,189 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * - Set FPI0/FPI2_SPECIAL_LRP * Arbitrary LRP (including support for swizzling) requires vanilla MAD+MAD */ -#define R300_PFS_INSTR1_0 0x46C0 -# define R300_FPI1_SRC0C_SHIFT 0 -# define R300_FPI1_SRC0C_MASK (31 << 0) -# define R300_FPI1_SRC0C_CONST (1 << 5) -# define R300_FPI1_SRC1C_SHIFT 6 -# define R300_FPI1_SRC1C_MASK (31 << 6) -# define R300_FPI1_SRC1C_CONST (1 << 11) -# define R300_FPI1_SRC2C_SHIFT 12 -# define R300_FPI1_SRC2C_MASK (31 << 12) -# define R300_FPI1_SRC2C_CONST (1 << 17) -# define R300_FPI1_SRC_MASK 0x0003ffff -# define R300_FPI1_DSTC_SHIFT 18 -# define R300_FPI1_DSTC_MASK (31 << 18) -# define R300_FPI1_DSTC_REG_MASK_SHIFT 23 -# define R300_FPI1_DSTC_REG_X (1 << 23) -# define R300_FPI1_DSTC_REG_Y (1 << 24) -# define R300_FPI1_DSTC_REG_Z (1 << 25) -# define R300_FPI1_DSTC_OUTPUT_MASK_SHIFT 26 -# define R300_FPI1_DSTC_OUTPUT_X (1 << 26) -# define R300_FPI1_DSTC_OUTPUT_Y (1 << 27) -# define R300_FPI1_DSTC_OUTPUT_Z (1 << 28) - -#define R300_PFS_INSTR3_0 0x47C0 -# define R300_FPI3_SRC0A_SHIFT 0 -# define R300_FPI3_SRC0A_MASK (31 << 0) -# define R300_FPI3_SRC0A_CONST (1 << 5) -# define R300_FPI3_SRC1A_SHIFT 6 -# define R300_FPI3_SRC1A_MASK (31 << 6) -# define R300_FPI3_SRC1A_CONST (1 << 11) -# define R300_FPI3_SRC2A_SHIFT 12 -# define R300_FPI3_SRC2A_MASK (31 << 12) -# define R300_FPI3_SRC2A_CONST (1 << 17) -# define R300_FPI3_SRC_MASK 0x0003ffff -# define R300_FPI3_DSTA_SHIFT 18 -# define R300_FPI3_DSTA_MASK (31 << 18) -# define R300_FPI3_DSTA_REG (1 << 23) -# define R300_FPI3_DSTA_OUTPUT (1 << 24) -# define R300_FPI3_DSTA_DEPTH (1 << 27) - -#define R300_PFS_INSTR0_0 0x48C0 -# define R300_FPI0_ARGC_SRC0C_XYZ 0 -# define R300_FPI0_ARGC_SRC0C_XXX 1 -# define R300_FPI0_ARGC_SRC0C_YYY 2 -# define R300_FPI0_ARGC_SRC0C_ZZZ 3 -# define R300_FPI0_ARGC_SRC1C_XYZ 4 -# define R300_FPI0_ARGC_SRC1C_XXX 5 -# define R300_FPI0_ARGC_SRC1C_YYY 6 -# define R300_FPI0_ARGC_SRC1C_ZZZ 7 -# define R300_FPI0_ARGC_SRC2C_XYZ 8 -# define R300_FPI0_ARGC_SRC2C_XXX 9 -# define R300_FPI0_ARGC_SRC2C_YYY 10 -# define R300_FPI0_ARGC_SRC2C_ZZZ 11 -# define R300_FPI0_ARGC_SRC0A 12 -# define R300_FPI0_ARGC_SRC1A 13 -# define R300_FPI0_ARGC_SRC2A 14 -# define R300_FPI0_ARGC_SRC1C_LRP 15 -# define R300_FPI0_ARGC_ZERO 20 -# define R300_FPI0_ARGC_ONE 21 - /* GUESS */ -# define R300_FPI0_ARGC_HALF 22 -# define R300_FPI0_ARGC_SRC0C_YZX 23 -# define R300_FPI0_ARGC_SRC1C_YZX 24 -# define R300_FPI0_ARGC_SRC2C_YZX 25 -# define R300_FPI0_ARGC_SRC0C_ZXY 26 -# define R300_FPI0_ARGC_SRC1C_ZXY 27 -# define R300_FPI0_ARGC_SRC2C_ZXY 28 -# define R300_FPI0_ARGC_SRC0CA_WZY 29 -# define R300_FPI0_ARGC_SRC1CA_WZY 30 -# define R300_FPI0_ARGC_SRC2CA_WZY 31 - -# define R300_FPI0_ARG0C_SHIFT 0 -# define R300_FPI0_ARG0C_MASK (31 << 0) -# define R300_FPI0_ARG0C_NEG (1 << 5) -# define R300_FPI0_ARG0C_ABS (1 << 6) -# define R300_FPI0_ARG1C_SHIFT 7 -# define R300_FPI0_ARG1C_MASK (31 << 7) -# define R300_FPI0_ARG1C_NEG (1 << 12) -# define R300_FPI0_ARG1C_ABS (1 << 13) -# define R300_FPI0_ARG2C_SHIFT 14 -# define R300_FPI0_ARG2C_MASK (31 << 14) -# define R300_FPI0_ARG2C_NEG (1 << 19) -# define R300_FPI0_ARG2C_ABS (1 << 20) -# define R300_FPI0_SPECIAL_LRP (1 << 21) -# define R300_FPI0_OUTC_MAD (0 << 23) -# define R300_FPI0_OUTC_DP3 (1 << 23) -# define R300_FPI0_OUTC_DP4 (2 << 23) -# define R300_FPI0_OUTC_MIN (4 << 23) -# define R300_FPI0_OUTC_MAX (5 << 23) -# define R300_FPI0_OUTC_CMPH (7 << 23) -# define R300_FPI0_OUTC_CMP (8 << 23) -# define R300_FPI0_OUTC_FRC (9 << 23) -# define R300_FPI0_OUTC_REPL_ALPHA (10 << 23) -# define R300_FPI0_OUTC_SAT (1 << 30) -# define R300_FPI0_INSERT_NOP (1 << 31) - -#define R300_PFS_INSTR2_0 0x49C0 -# define R300_FPI2_ARGA_SRC0C_X 0 -# define R300_FPI2_ARGA_SRC0C_Y 1 -# define R300_FPI2_ARGA_SRC0C_Z 2 -# define R300_FPI2_ARGA_SRC1C_X 3 -# define R300_FPI2_ARGA_SRC1C_Y 4 -# define R300_FPI2_ARGA_SRC1C_Z 5 -# define R300_FPI2_ARGA_SRC2C_X 6 -# define R300_FPI2_ARGA_SRC2C_Y 7 -# define R300_FPI2_ARGA_SRC2C_Z 8 -# define R300_FPI2_ARGA_SRC0A 9 -# define R300_FPI2_ARGA_SRC1A 10 -# define R300_FPI2_ARGA_SRC2A 11 -# define R300_FPI2_ARGA_SRC1A_LRP 15 -# define R300_FPI2_ARGA_ZERO 16 -# define R300_FPI2_ARGA_ONE 17 - /* GUESS */ -# define R300_FPI2_ARGA_HALF 18 -# define R300_FPI2_ARG0A_SHIFT 0 -# define R300_FPI2_ARG0A_MASK (31 << 0) -# define R300_FPI2_ARG0A_NEG (1 << 5) - /* GUESS */ -# define R300_FPI2_ARG0A_ABS (1 << 6) -# define R300_FPI2_ARG1A_SHIFT 7 -# define R300_FPI2_ARG1A_MASK (31 << 7) -# define R300_FPI2_ARG1A_NEG (1 << 12) - /* GUESS */ -# define R300_FPI2_ARG1A_ABS (1 << 13) -# define R300_FPI2_ARG2A_SHIFT 14 -# define R300_FPI2_ARG2A_MASK (31 << 14) -# define R300_FPI2_ARG2A_NEG (1 << 19) - /* GUESS */ -# define R300_FPI2_ARG2A_ABS (1 << 20) -# define R300_FPI2_SPECIAL_LRP (1 << 21) -# define R300_FPI2_OUTA_MAD (0 << 23) -# define R300_FPI2_OUTA_DP4 (1 << 23) -# define R300_FPI2_OUTA_MIN (2 << 23) -# define R300_FPI2_OUTA_MAX (3 << 23) -# define R300_FPI2_OUTA_CMP (6 << 23) -# define R300_FPI2_OUTA_FRC (7 << 23) -# define R300_FPI2_OUTA_EX2 (8 << 23) -# define R300_FPI2_OUTA_LG2 (9 << 23) -# define R300_FPI2_OUTA_RCP (10 << 23) -# define R300_FPI2_OUTA_RSQ (11 << 23) -# define R300_FPI2_OUTA_SAT (1 << 30) -# define R300_FPI2_UNKNOWN_31 (1 << 31) +#define R300_US_ALU_RGB_ADDR_0 0x46C0 +# define R300_ALU_SRC0C_SHIFT 0 +# define R300_ALU_SRC0C_MASK (31 << 0) +# define R300_ALU_SRC0C_CONST (1 << 5) +# define R300_ALU_SRC1C_SHIFT 6 +# define R300_ALU_SRC1C_MASK (31 << 6) +# define R300_ALU_SRC1C_CONST (1 << 11) +# define R300_ALU_SRC2C_SHIFT 12 +# define R300_ALU_SRC2C_MASK (31 << 12) +# define R300_ALU_SRC2C_CONST (1 << 17) +# define R300_ALU_SRC_MASK 0x0003ffff +# define R300_ALU_DSTC_SHIFT 18 +# define R300_ALU_DSTC_MASK (31 << 18) +# define R300_ALU_DSTC_REG_MASK_SHIFT 23 +# define R300_ALU_DSTC_REG_X (1 << 23) +# define R300_ALU_DSTC_REG_Y (1 << 24) +# define R300_ALU_DSTC_REG_Z (1 << 25) +# define R300_ALU_DSTC_OUTPUT_MASK_SHIFT 26 +# define R300_ALU_DSTC_OUTPUT_X (1 << 26) +# define R300_ALU_DSTC_OUTPUT_Y (1 << 27) +# define R300_ALU_DSTC_OUTPUT_Z (1 << 28) + +#define R300_US_ALU_ALPHA_ADDR_0 0x47C0 +# define R300_ALU_SRC0A_SHIFT 0 +# define R300_ALU_SRC0A_MASK (31 << 0) +# define R300_ALU_SRC0A_CONST (1 << 5) +# define R300_ALU_SRC1A_SHIFT 6 +# define R300_ALU_SRC1A_MASK (31 << 6) +# define R300_ALU_SRC1A_CONST (1 << 11) +# define R300_ALU_SRC2A_SHIFT 12 +# define R300_ALU_SRC2A_MASK (31 << 12) +# define R300_ALU_SRC2A_CONST (1 << 17) +# define R300_ALU_SRC_MASK 0x0003ffff +# define R300_ALU_DSTA_SHIFT 18 +# define R300_ALU_DSTA_MASK (31 << 18) +# define R300_ALU_DSTA_REG (1 << 23) +# define R300_ALU_DSTA_OUTPUT (1 << 24) +# define R300_ALU_DSTA_DEPTH (1 << 27) + +#define R300_US_ALU_RGB_INST_0 0x48C0 +# define R300_ALU_ARGC_SRC0C_XYZ 0 +# define R300_ALU_ARGC_SRC0C_XXX 1 +# define R300_ALU_ARGC_SRC0C_YYY 2 +# define R300_ALU_ARGC_SRC0C_ZZZ 3 +# define R300_ALU_ARGC_SRC1C_XYZ 4 +# define R300_ALU_ARGC_SRC1C_XXX 5 +# define R300_ALU_ARGC_SRC1C_YYY 6 +# define R300_ALU_ARGC_SRC1C_ZZZ 7 +# define R300_ALU_ARGC_SRC2C_XYZ 8 +# define R300_ALU_ARGC_SRC2C_XXX 9 +# define R300_ALU_ARGC_SRC2C_YYY 10 +# define R300_ALU_ARGC_SRC2C_ZZZ 11 +# define R300_ALU_ARGC_SRC0A 12 +# define R300_ALU_ARGC_SRC1A 13 +# define R300_ALU_ARGC_SRC2A 14 +# define R300_ALU_ARGC_SRCP_XYZ 15 +# define R300_ALU_ARGC_SRCP_XXX 16 +# define R300_ALU_ARGC_SRCP_YYY 17 +# define R300_ALU_ARGC_SRCP_ZZZ 18 +# define R300_ALU_ARGC_SRCP_WWW 19 +# define R300_ALU_ARGC_ZERO 20 +# define R300_ALU_ARGC_ONE 21 +# define R300_ALU_ARGC_HALF 22 +# define R300_ALU_ARGC_SRC0C_YZX 23 +# define R300_ALU_ARGC_SRC1C_YZX 24 +# define R300_ALU_ARGC_SRC2C_YZX 25 +# define R300_ALU_ARGC_SRC0C_ZXY 26 +# define R300_ALU_ARGC_SRC1C_ZXY 27 +# define R300_ALU_ARGC_SRC2C_ZXY 28 +# define R300_ALU_ARGC_SRC0CA_WZY 29 +# define R300_ALU_ARGC_SRC1CA_WZY 30 +# define R300_ALU_ARGC_SRC2CA_WZY 31 + +# define R300_ALU_ARG0C_SHIFT 0 +# define R300_ALU_ARG0C_MASK (31 << 0) +# define R300_ALU_ARG0C_NOP (0 << 5) +# define R300_ALU_ARG0C_NEG (1 << 5) +# define R300_ALU_ARG0C_ABS (2 << 5) +# define R300_ALU_ARG0C_NAB (3 << 5) +# define R300_ALU_ARG1C_SHIFT 7 +# define R300_ALU_ARG1C_MASK (31 << 7) +# define R300_ALU_ARG1C_NOP (0 << 12) +# define R300_ALU_ARG1C_NEG (1 << 12) +# define R300_ALU_ARG1C_ABS (2 << 12) +# define R300_ALU_ARG1C_NAB (3 << 12) +# define R300_ALU_ARG2C_SHIFT 14 +# define R300_ALU_ARG2C_MASK (31 << 14) +# define R300_ALU_ARG2C_NOP (0 << 19) +# define R300_ALU_ARG2C_NEG (1 << 19) +# define R300_ALU_ARG2C_ABS (2 << 19) +# define R300_ALU_ARG2C_NAB (3 << 19) +# define R300_ALU_SRCP_1_MINUS_2_SRC0 (0 << 21) +# define R300_ALU_SRCP_SRC1_MINUS_SRC0 (1 << 21) +# define R300_ALU_SRCP_SRC1_PLUS_SRC0 (2 << 21) +# define R300_ALU_SRCP_1_MINUS_SRC0 (3 << 21) + +# define R300_ALU_OUTC_MAD (0 << 23) +# define R300_ALU_OUTC_DP3 (1 << 23) +# define R300_ALU_OUTC_DP4 (2 << 23) +# define R300_ALU_OUTC_D2A (3 << 23) +# define R300_ALU_OUTC_MIN (4 << 23) +# define R300_ALU_OUTC_MAX (5 << 23) +# define R300_ALU_OUTC_CMPH (7 << 23) +# define R300_ALU_OUTC_CMP (8 << 23) +# define R300_ALU_OUTC_FRC (9 << 23) +# define R300_ALU_OUTC_REPL_ALPHA (10 << 23) + +# define R300_ALU_OUTC_MOD_NOP (0 << 27) +# define R300_ALU_OUTC_MOD_MUL2 (1 << 27) +# define R300_ALU_OUTC_MOD_MUL4 (2 << 27) +# define R300_ALU_OUTC_MOD_MUL8 (3 << 27) +# define R300_ALU_OUTC_MOD_DIV2 (4 << 27) +# define R300_ALU_OUTC_MOD_DIV4 (5 << 27) +# define R300_ALU_OUTC_MOD_DIV8 (6 << 27) + +# define R300_ALU_OUTC_CLAMP (1 << 30) +# define R300_ALU_INSERT_NOP (1 << 31) + +#define R300_US_ALU_ALPHA_INST_0 0x49C0 +# define R300_ALU_ARGA_SRC0C_X 0 +# define R300_ALU_ARGA_SRC0C_Y 1 +# define R300_ALU_ARGA_SRC0C_Z 2 +# define R300_ALU_ARGA_SRC1C_X 3 +# define R300_ALU_ARGA_SRC1C_Y 4 +# define R300_ALU_ARGA_SRC1C_Z 5 +# define R300_ALU_ARGA_SRC2C_X 6 +# define R300_ALU_ARGA_SRC2C_Y 7 +# define R300_ALU_ARGA_SRC2C_Z 8 +# define R300_ALU_ARGA_SRC0A 9 +# define R300_ALU_ARGA_SRC1A 10 +# define R300_ALU_ARGA_SRC2A 11 +# define R300_ALU_ARGA_SRCP_X 12 +# define R300_ALU_ARGA_SRCP_Y 13 +# define R300_ALU_ARGA_SRCP_Z 14 +# define R300_ALU_ARGA_SRCP_W 15 + +# define R300_ALU_ARGA_ZERO 16 +# define R300_ALU_ARGA_ONE 17 +# define R300_ALU_ARGA_HALF 18 +# define R300_ALU_ARG0A_SHIFT 0 +# define R300_ALU_ARG0A_MASK (31 << 0) +# define R300_ALU_ARG0A_NOP (0 << 5) +# define R300_ALU_ARG0A_NEG (1 << 5) +# define R300_ALU_ARG0A_ABS (2 << 5) +# define R300_ALU_ARG0A_NAB (3 << 5) +# define R300_ALU_ARG1A_SHIFT 7 +# define R300_ALU_ARG1A_MASK (31 << 7) +# define R300_ALU_ARG1A_NOP (0 << 12) +# define R300_ALU_ARG1A_NEG (1 << 12) +# define R300_ALU_ARG1A_ABS (2 << 12) +# define R300_ALU_ARG1A_NAB (3 << 12) +# define R300_ALU_ARG2A_SHIFT 14 +# define R300_ALU_ARG2A_MASK (31 << 14) +# define R300_ALU_ARG2A_NOP (0 << 19) +# define R300_ALU_ARG2A_NEG (1 << 19) +# define R300_ALU_ARG2A_ABS (2 << 19) +# define R300_ALU_ARG2A_NAB (3 << 19) +# define R300_ALU_SRCP_1_MINUS_2_SRC0 (0 << 21) +# define R300_ALU_SRCP_SRC1_MINUS_SRC0 (1 << 21) +# define R300_ALU_SRCP_SRC1_PLUS_SRC0 (2 << 21) +# define R300_ALU_SRCP_1_MINUS_SRC0 (3 << 21) + +# define R300_ALU_OUTA_MAD (0 << 23) +# define R300_ALU_OUTA_DP4 (1 << 23) +# define R300_ALU_OUTA_MIN (2 << 23) +# define R300_ALU_OUTA_MAX (3 << 23) +# define R300_ALU_OUTA_CND (5 << 23) +# define R300_ALU_OUTA_CMP (6 << 23) +# define R300_ALU_OUTA_FRC (7 << 23) +# define R300_ALU_OUTA_EX2 (8 << 23) +# define R300_ALU_OUTA_LG2 (9 << 23) +# define R300_ALU_OUTA_RCP (10 << 23) +# define R300_ALU_OUTA_RSQ (11 << 23) + +# define R300_ALU_OUTA_MOD_NOP (0 << 27) +# define R300_ALU_OUTA_MOD_MUL2 (1 << 27) +# define R300_ALU_OUTA_MOD_MUL4 (2 << 27) +# define R300_ALU_OUTA_MOD_MUL8 (3 << 27) +# define R300_ALU_OUTA_MOD_DIV2 (4 << 27) +# define R300_ALU_OUTA_MOD_DIV4 (5 << 27) +# define R300_ALU_OUTA_MOD_DIV8 (6 << 27) + +# define R300_ALU_OUTA_CLAMP (1 << 30) /* END: Fragment program instruction set */ /* Fog: Fog Blending Enable */ @@ -1967,7 +2004,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_PFS_PARAM_0_Y 0x4C04 #define R300_PFS_PARAM_0_Z 0x4C08 #define R300_PFS_PARAM_0_W 0x4C0C -/* GUESS: PARAM_31 is last, based on native limits reported by fglrx */ +/* last consts */ #define R300_PFS_PARAM_31_X 0x4DF0 #define R300_PFS_PARAM_31_Y 0x4DF4 #define R300_PFS_PARAM_31_Z 0x4DF8 @@ -3103,12 +3140,12 @@ enum { # define R500_TEX_SEM_ACQUIRE (1 << 25) # define R500_TEX_IGNORE_UNCOVERED (1 << 26) # define R500_TEX_UNSCALED (1 << 27) -#define R500_US_W_FMT 0x46b4 -# define R500_W_FMT_W0 (0 << 0) -# define R500_W_FMT_W24 (1 << 0) -# define R500_W_FMT_W24FP (2 << 0) -# define R500_W_SRC_US (0 << 2) -# define R500_W_SRC_RAS (1 << 2) +#define R300_US_W_FMT 0x46b4 +# define R300_W_FMT_W0 (0 << 0) +# define R300_W_FMT_W24 (1 << 0) +# define R300_W_FMT_W24FP (2 << 0) +# define R300_W_SRC_US (0 << 2) +# define R300_W_SRC_RAS (1 << 2) /* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR. diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 012c8d5e3f..e060523ff8 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1445,21 +1445,21 @@ static void r300SetupTextures(GLcontext * ctx) int opcode; unsigned long val; - unit = fp->tex.inst[i] >> R300_FPITX_IMAGE_SHIFT; + unit = fp->tex.inst[i] >> R300_TEX_ID_SHIFT; unit &= 15; val = fp->tex.inst[i]; - val &= ~R300_FPITX_IMAGE_MASK; + val &= ~R300_TEX_ID_MASK; opcode = - (val & R300_FPITX_OPCODE_MASK) >> R300_FPITX_OPCODE_SHIFT; - if (opcode == R300_FPITX_OP_KIL) { + (val & R300_TEX_INST_MASK) >> R300_TEX_INST_SHIFT; + if (opcode == R300_TEX_OP_KIL) { r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; } else { if (tmu_mappings[unit] >= 0) { val |= tmu_mappings[unit] << - R300_FPITX_IMAGE_SHIFT; + R300_TEX_ID_SHIFT; r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; } else { // We get here when the corresponding texture image is incomplete @@ -1470,7 +1470,7 @@ static void r300SetupTextures(GLcontext * ctx) } r300->hw.fpt.cmd[R300_FPT_CMD_0] = - cmdpacket0(R300_PFS_TEXI_0, fp->tex.length); + cmdpacket0(R300_US_TEX_INST_0, fp->tex.length); } if (RADEON_DEBUG & DEBUG_STATE) @@ -2371,25 +2371,25 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) } R300_STATECHANGE(rmesa, fpi[0]); - rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR0_0, fp->alu_end + 1); + rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, fp->alu_end + 1); for (i = 0; i <= fp->alu_end; i++) { rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst0; } R300_STATECHANGE(rmesa, fpi[1]); - rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR1_0, fp->alu_end + 1); + rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, fp->alu_end + 1); for (i = 0; i <= fp->alu_end; i++) { rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst1; } R300_STATECHANGE(rmesa, fpi[2]); - rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR2_0, fp->alu_end + 1); + rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, fp->alu_end + 1); for (i = 0; i <= fp->alu_end; i++) { rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst2; } R300_STATECHANGE(rmesa, fpi[3]); - rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR3_0, fp->alu_end + 1); + rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, fp->alu_end + 1); for (i = 0; i <= fp->alu_end; i++) { rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst3; } @@ -2406,10 +2406,10 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) for (i = 0, k = (4 - (fp->cur_node + 1)); i < 4; i++, k++) { if (i < (fp->cur_node + 1)) { rmesa->hw.fp.cmd[R300_FP_NODE0 + k] = - (fp->node[i].alu_offset << R300_PFS_NODE_ALU_OFFSET_SHIFT) | - (fp->node[i].alu_end << R300_PFS_NODE_ALU_END_SHIFT) | - (fp->node[i].tex_offset << R300_PFS_NODE_TEX_OFFSET_SHIFT) | - (fp->node[i].tex_end << R300_PFS_NODE_TEX_END_SHIFT) | + (fp->node[i].alu_offset << R300_ALU_START_SHIFT) | + (fp->node[i].alu_end << R300_ALU_SIZE_SHIFT) | + (fp->node[i].tex_offset << R300_TEX_START_SHIFT) | + (fp->node[i].tex_end << R300_TEX_SIZE_SHIFT) | fp->node[i].flags; } else { rmesa->hw.fp.cmd[R300_FP_NODE0 + (3 - i)] = 0; -- cgit v1.2.3 From d09aa2138bdedf32569844fa14cf88f28d41020a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 13 May 2008 13:38:30 -0400 Subject: R500: fixup r300EmitClearState() FP for r5xx --- src/mesa/drivers/dri/r300/r300_ioctl.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index ede0bec566..e95c797fa2 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -360,9 +360,20 @@ static void r300EmitClearState(GLcontext * ctx) R300_STATECHANGE(r300, r500fp); r500fp_start_fragment(0, 12); - e32(0x7808); - e32(R500_TEX_ID(0) | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED); - e32(R500_TEX_SRC_ADDR(0) | R500_TEX_SRC_S_SWIZ_R | + e32((R500_INST_TYPE_TEX | + R500_INST_TEX_SEM_WAIT | + R500_INST_RGB_WMASK_R | + R500_INST_RGB_WMASK_G | + R500_INST_RGB_WMASK_B | + R500_INST_ALPHA_WMASK | + R500_INST_RGB_CLAMP | + R500_INST_ALPHA_CLAMP)); + e32(R500_TEX_ID(0) | + R500_TEX_INST_LD | + R500_TEX_SEM_ACQUIRE | + R500_TEX_IGNORE_UNCOVERED); + e32(R500_TEX_SRC_ADDR(0) | + R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G | R500_TEX_DST_ADDR(0) | R500_TEX_DST_R_SWIZ_R | @@ -388,21 +399,21 @@ static void r300EmitClearState(GLcontext * ctx) R500_INST_RGB_OMASK_R | R500_INST_RGB_OMASK_G | R500_INST_RGB_OMASK_B | - R500_INST_ALPHA_OMASK); + R500_INST_ALPHA_OMASK | + R500_INST_RGB_CLAMP | + R500_INST_ALPHA_CLAMP); e32(R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST | R500_RGB_ADDR2(0) | - R500_RGB_ADDR2_CONST | - R500_RGB_SRCP_OP_1_MINUS_2RGB0); + R500_RGB_ADDR2_CONST); e32(R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST | R500_ALPHA_ADDR2(0) | - R500_ALPHA_ADDR2_CONST | - R500_ALPHA_SRCP_OP_1_MINUS_2A0); + R500_ALPHA_ADDR2_CONST); e32(R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R | -- cgit v1.2.3 From 8d70181b031d7557dd4083dc041cc7c658fddfc4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 13 May 2008 14:02:29 -0400 Subject: R300: clean up Fog registers --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 6 +- src/mesa/drivers/dri/r300/r300_ioctl.c | 4 +- src/mesa/drivers/dri/r300/r300_reg.h | 99 ++++++++++++++++----------------- src/mesa/drivers/dri/r300/r300_state.c | 34 +++++------ 4 files changed, 71 insertions(+), 72 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 82c7eb0935..07384eecdd 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -439,11 +439,11 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, 0); } ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0); - r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(FG_FOG_BLEND, 1); + r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(R300_FG_FOG_BLEND, 1); ALLOC_STATE(fogc, always, R300_FOGC_CMDSIZE, 0); - r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(FG_FOG_COLOR_R, 3); + r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(R300_FG_FOG_COLOR_R, 3); ALLOC_STATE(at, always, R300_AT_CMDSIZE, 0); - r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(FG_ALPHA_FUNC, 2); + r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(R300_FG_ALPHA_FUNC, 2); ALLOC_STATE(fg_depth_src, always, 2, 0); r300->hw.fg_depth_src.cmd[0] = cmdpacket0(R300_FG_DEPTH_SRC, 1); ALLOC_STATE(rb3d_cctl, always, 2, 0); diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index e95c797fa2..3863a54031 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -215,7 +215,7 @@ static void r300EmitClearState(GLcontext * ctx) /* disable fog */ R300_STATECHANGE(r300, fogs); - reg_start(FG_FOG_BLEND, 0); + reg_start(R300_FG_FOG_BLEND, 0); e32(0x0); R300_STATECHANGE(r300, vir[1]); @@ -271,7 +271,7 @@ static void r300EmitClearState(GLcontext * ctx) efloat(0.0); R300_STATECHANGE(r300, at); - reg_start(FG_ALPHA_FUNC, 0); + reg_start(R300_FG_ALPHA_FUNC, 0); e32(0x0); R300_STATECHANGE(r300, bld); diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 558d327028..5948c9b22c 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -1933,60 +1933,61 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* END: Fragment program instruction set */ /* Fog: Fog Blending Enable */ -#define FG_FOG_BLEND 0x4bc0 -# define FG_FOG_BLEND_DISABLE (0 << 0) -# define FG_FOG_BLEND_ENABLE (1 << 0) -# define FG_FOG_BLEND_FN_LINEAR (0 << 1) -# define FG_FOG_BLEND_FN_EXP (1 << 1) -# define FG_FOG_BLEND_FN_EXP2 (2 << 1) -# define FG_FOG_BLEND_FN_CONSTANT (3 << 1) -# define FG_FOG_BLEND_FN_MASK 0x00000006 +#define R300_FG_FOG_BLEND 0x4bc0 +# define R300_FG_FOG_BLEND_DISABLE (0 << 0) +# define R300_FG_FOG_BLEND_ENABLE (1 << 0) +# define R300_FG_FOG_BLEND_FN_LINEAR (0 << 1) +# define R300_FG_FOG_BLEND_FN_EXP (1 << 1) +# define R300_FG_FOG_BLEND_FN_EXP2 (2 << 1) +# define R300_FG_FOG_BLEND_FN_CONSTANT (3 << 1) +# define R300_FG_FOG_BLEND_FN_MASK (3 << 1) /* Fog: Red Component of Fog Color */ -#define FG_FOG_COLOR_R 0x4bc8 +#define R300_FG_FOG_COLOR_R 0x4bc8 /* Fog: Green Component of Fog Color */ -#define FG_FOG_COLOR_G 0x4bcc +#define R300_FG_FOG_COLOR_G 0x4bcc /* Fog: Blue Component of Fog Color */ -#define FG_FOG_COLOR_B 0x4bd0 -# define FG_FOG_COLOR_MASK 0x000001ff +#define R300_FG_FOG_COLOR_B 0x4bd0 +# define R300_FG_FOG_COLOR_MASK 0x000003ff /* Fog: Constant Factor for Fog Blending */ -#define FG_FOG_FACTOR 0x4bc4 -# define FG_FOG_FACTOR_MASK 0x000001ff +#define R300_FG_FOG_FACTOR 0x4bc4 +# define FG_FOG_FACTOR_MASK 0x000003ff /* Fog: Alpha function */ -#define FG_ALPHA_FUNC 0x4bd4 -# define R300_REF_ALPHA_MASK 0x000000ff -# define FG_ALPHA_FUNC_NEVER (0 << 8) -# define FG_ALPHA_FUNC_LESS (1 << 8) -# define FG_ALPHA_FUNC_EQUAL (2 << 8) -# define FG_ALPHA_FUNC_LE (3 << 8) -# define FG_ALPHA_FUNC_GREATER (4 << 8) -# define FG_ALPHA_FUNC_NOTEQUAL (5 << 8) -# define FG_ALPHA_FUNC_GE (6 << 8) -# define FG_ALPHA_FUNC_ALWAYS (7 << 8) -# define R300_ALPHA_TEST_OP_MASK (7 << 8) -# define FG_ALPHA_FUNC_DISABLE (0 << 11) -# define FG_ALPHA_FUNC_ENABLE (1 << 11) -# define FG_ALPHA_FUNC_10BIT (0 << 12) -# define FG_ALPHA_FUNC_8BIT (1 << 12) -/* gap in AMD spec */ -# define FG_ALPHA_FUNC_MASK_DISABLE (0 << 16) -# define FG_ALPHA_FUNC_MASK_ENABLE (1 << 16) -# define FG_ALPHA_FUNC_CFG_2_OF_4 (0 << 17) -# define FG_ALPHA_FUNC_CFG_3_OF_6 (1 << 17) -/* gap in AMD spec */ -# define FG_ALPHA_FUNC_DITH_DISABLE (0 << 20) -# define FG_ALPHA_FUNC_DITH_ENABLE (1 << 20) -/* gap in AMD spec */ -# define FG_ALPHA_FUNC_OFFSET_DISABLE (0 << 24) /* Not supported in R520. Default R300 and RV350 behaviour. */ -# define FG_ALPHA_FUNC_OFFSET_ENABLE (1 << 24) /* Not supported in R520 */ -# define FG_ALPHA_FUNC_DISC_ZERO_MASK_DISABLE (0 << 25) -# define FG_ALPHA_FUNC_DISC_ZERO_MASK_ENABLE (1 << 25) -/* gap in AMD spec */ -# define FG_ALPHA_FUNC_FP16_DISABLE (0 << 28) -# define FG_ALPHA_FUNC_FP16_ENABLE (1 << 28) -/* gap in AMD spec */ +#define R300_FG_ALPHA_FUNC 0x4bd4 +# define R300_FG_ALPHA_FUNC_VAL_MASK 0x0000000f +# define R300_FG_ALPHA_FUNC_NEVER (0 << 8) +# define R300_FG_ALPHA_FUNC_LESS (1 << 8) +# define R300_FG_ALPHA_FUNC_EQUAL (2 << 8) +# define R300_FG_ALPHA_FUNC_LE (3 << 8) +# define R300_FG_ALPHA_FUNC_GREATER (4 << 8) +# define R300_FG_ALPHA_FUNC_NOTEQUAL (5 << 8) +# define R300_FG_ALPHA_FUNC_GE (6 << 8) +# define R300_FG_ALPHA_FUNC_ALWAYS (7 << 8) +# define R300_ALPHA_TEST_OP_MASK (7 << 8) +# define R300_FG_ALPHA_FUNC_DISABLE (0 << 11) +# define R300_FG_ALPHA_FUNC_ENABLE (1 << 11) + +# define R500_FG_ALPHA_FUNC_10BIT (0 << 12) +# define R500_FG_ALPHA_FUNC_8BIT (1 << 12) + +# define R300_FG_ALPHA_FUNC_MASK_DISABLE (0 << 16) +# define R300_FG_ALPHA_FUNC_MASK_ENABLE (1 << 16) +# define R300_FG_ALPHA_FUNC_CFG_2_OF_4 (0 << 17) +# define R300_FG_ALPHA_FUNC_CFG_3_OF_6 (1 << 17) + +# define R300_FG_ALPHA_FUNC_DITH_DISABLE (0 << 20) +# define R300_FG_ALPHA_FUNC_DITH_ENABLE (1 << 20) + +# define R500_FG_ALPHA_FUNC_OFFSET_DISABLE (0 << 24) +# define R500_FG_ALPHA_FUNC_OFFSET_ENABLE (1 << 24) /* Not supported in R520 */ +# define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_DISABLE (0 << 25) +# define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_ENABLE (1 << 25) + +# define R500_FG_ALPHA_FUNC_FP16_DISABLE (0 << 28) +# define R500_FG_ALPHA_FUNC_FP16_ENABLE (1 << 28) + /* Fog: Where does the depth come from? */ #define R300_FG_DEPTH_SRC 0x4bd8 @@ -1994,8 +1995,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_FG_DEPTH_SRC_SHADER (1 << 0) /* Fog: Alpha Compare Value */ -#define FG_ALPHA_VALUE 0x4be0 -# define FG_ALPHA_VALUE_MASK 0x0000ffff +#define R500_FG_ALPHA_VALUE 0x4be0 +# define R500_FG_ALPHA_VALUE_MASK 0x0000ffff /* gap */ @@ -2012,14 +2013,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* Unpipelined. */ #define R300_RB3D_CCTL 0x4e00 -/* gap in AMD docs */ # define R300_RB3D_CCTL_NUM_MULTIWRITES_1_BUFFER (0 << 5) # define R300_RB3D_CCTL_NUM_MULTIWRITES_2_BUFFERS (1 << 5) # define R300_RB3D_CCTL_NUM_MULTIWRITES_3_BUFFERS (2 << 5) # define R300_RB3D_CCTL_NUM_MULTIWRITES_4_BUFFERS (3 << 5) # define R300_RB3D_CCTL_CLRCMP_FLIPE_DISABLE (0 << 7) # define R300_RB3D_CCTL_CLRCMP_FLIPE_ENABLE (1 << 7) -/* gap in AMD docs */ # define R300_RB3D_CCTL_AA_COMPRESSION_DISABLE (0 << 9) # define R300_RB3D_CCTL_AA_COMPRESSION_ENABLE (1 << 9) # define R300_RB3D_CCTL_CMASK_DISABLE (0 << 10) diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index e060523ff8..30e853b01f 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -450,25 +450,25 @@ static void r300SetAlphaState(GLcontext * ctx) switch (ctx->Color.AlphaFunc) { case GL_NEVER: - pp_misc |= FG_ALPHA_FUNC_NEVER; + pp_misc |= R300_FG_ALPHA_FUNC_NEVER; break; case GL_LESS: - pp_misc |= FG_ALPHA_FUNC_LESS; + pp_misc |= R300_FG_ALPHA_FUNC_LESS; break; case GL_EQUAL: - pp_misc |= FG_ALPHA_FUNC_EQUAL; + pp_misc |= R300_FG_ALPHA_FUNC_EQUAL; break; case GL_LEQUAL: - pp_misc |= FG_ALPHA_FUNC_LE; + pp_misc |= R300_FG_ALPHA_FUNC_LE; break; case GL_GREATER: - pp_misc |= FG_ALPHA_FUNC_GREATER; + pp_misc |= R300_FG_ALPHA_FUNC_GREATER; break; case GL_NOTEQUAL: - pp_misc |= FG_ALPHA_FUNC_NOTEQUAL; + pp_misc |= R300_FG_ALPHA_FUNC_NOTEQUAL; break; case GL_GEQUAL: - pp_misc |= FG_ALPHA_FUNC_GE; + pp_misc |= R300_FG_ALPHA_FUNC_GE; break; case GL_ALWAYS: /*pp_misc |= FG_ALPHA_FUNC_ALWAYS; */ @@ -477,8 +477,8 @@ static void r300SetAlphaState(GLcontext * ctx) } if (really_enabled) { - pp_misc |= FG_ALPHA_FUNC_ENABLE; - pp_misc |= (refByte & R300_REF_ALPHA_MASK); + pp_misc |= R300_FG_ALPHA_FUNC_ENABLE; + pp_misc |= (refByte & R300_FG_ALPHA_FUNC_VAL_MASK); } else { pp_misc = 0x0; } @@ -716,8 +716,8 @@ static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) R300_STATECHANGE(r300, fogs); r300->hw.fogs.cmd[R300_FOGS_STATE] = (r300->hw.fogs. - cmd[R300_FOGS_STATE] & ~FG_FOG_BLEND_FN_MASK) | - FG_FOG_BLEND_FN_LINEAR; + cmd[R300_FOGS_STATE] & ~R300_FG_FOG_BLEND_FN_MASK) | + R300_FG_FOG_BLEND_FN_LINEAR; if (ctx->Fog.Start == ctx->Fog.End) { fogScale.f = -1.0; @@ -734,8 +734,8 @@ static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) R300_STATECHANGE(r300, fogs); r300->hw.fogs.cmd[R300_FOGS_STATE] = (r300->hw.fogs. - cmd[R300_FOGS_STATE] & ~FG_FOG_BLEND_FN_MASK) | - FG_FOG_BLEND_FN_EXP; + cmd[R300_FOGS_STATE] & ~R300_FG_FOG_BLEND_FN_MASK) | + R300_FG_FOG_BLEND_FN_EXP; fogScale.f = 0.0933 * ctx->Fog.Density; fogStart.f = 0.0; break; @@ -743,8 +743,8 @@ static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) R300_STATECHANGE(r300, fogs); r300->hw.fogs.cmd[R300_FOGS_STATE] = (r300->hw.fogs. - cmd[R300_FOGS_STATE] & ~FG_FOG_BLEND_FN_MASK) | - FG_FOG_BLEND_FN_EXP2; + cmd[R300_FOGS_STATE] & ~R300_FG_FOG_BLEND_FN_MASK) | + R300_FG_FOG_BLEND_FN_EXP2; fogScale.f = 0.3 * ctx->Fog.Density; fogStart.f = 0.0; default: @@ -808,7 +808,7 @@ static void r300SetFogState(GLcontext * ctx, GLboolean state) R300_STATECHANGE(r300, fogs); if (state) { - r300->hw.fogs.cmd[R300_FOGS_STATE] |= FG_FOG_BLEND_ENABLE; + r300->hw.fogs.cmd[R300_FOGS_STATE] |= R300_FG_FOG_BLEND_ENABLE; r300Fogfv(ctx, GL_FOG_MODE, NULL); r300Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density); @@ -816,7 +816,7 @@ static void r300SetFogState(GLcontext * ctx, GLboolean state) r300Fogfv(ctx, GL_FOG_END, &ctx->Fog.End); r300Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color); } else { - r300->hw.fogs.cmd[R300_FOGS_STATE] &= ~FG_FOG_BLEND_ENABLE; + r300->hw.fogs.cmd[R300_FOGS_STATE] &= ~R300_FG_FOG_BLEND_ENABLE; } } -- cgit v1.2.3 From c5b7a1ee3cae26854c6a489ee2977e4134efabfd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 13 May 2008 14:32:30 -0400 Subject: R300: clean up CB registers --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 4 +- src/mesa/drivers/dri/r300/r300_emit.c | 4 +- src/mesa/drivers/dri/r300/r300_reg.h | 129 +++++++++++++++++--------------- src/mesa/drivers/dri/r300/r300_state.c | 6 +- 4 files changed, 76 insertions(+), 67 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 07384eecdd..0de1190e9f 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -460,9 +460,9 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(rb3d_dither_ctl, always, 10, 0); r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(R300_RB3D_DITHER_CTL, 9); ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0); - r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(RB3D_AARESOLVE_CTL, 1); + r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(R300_RB3D_AARESOLVE_CTL, 1); ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0); - r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2); + r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2); ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0); r300->hw.zs.cmd[R300_ZS_CMD_0] = cmdpacket0(R300_RB3D_ZSTENCIL_CNTL_0, 3); diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c index dc08b642c5..18d9ff2fac 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.c +++ b/src/mesa/drivers/dri/r300/r300_emit.c @@ -550,8 +550,8 @@ void r300EmitCacheFlush(r300ContextPtr rmesa) drm_radeon_cmd_header_t *cmd = NULL; reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0); - e32(RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | - RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); + e32(R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | + R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); reg_start(ZB_ZCACHE_CTLSTAT, 0); e32(ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 5948c9b22c..777b6225df 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -2043,9 +2043,17 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_RB3D_CBLEND 0x4E04 #define R300_RB3D_ABLEND 0x4E08 /* the following only appear in CBLEND */ -# define R300_BLEND_ENABLE (1 << 0) -# define R300_BLEND_UNKNOWN (3 << 1) -# define R300_BLEND_NO_SEPARATE (1 << 3) +# define R300_ALPHA_BLEND_ENABLE (1 << 0) +# define R300_SEPARATE_ALPHA_ENABLE (1 << 1) +# define R300_READ_ENABLE (1 << 2) +# define R300_DISCARD_SRC_PIXELS_DIS (0 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0 (1 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_0 (2 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0 (3 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1 (4 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_1 (5 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1 (6 << 3) + /* the following are shared between CBLEND and ABLEND */ # define R300_FCN_MASK (3 << 12) # define R300_COMB_FCN_ADD_CLAMP (0 << 12) @@ -2120,7 +2128,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* Color Buffer Address Offset of multibuffer 0. Unpipelined. */ #define R300_RB3D_COLOROFFSET0 0x4E28 -# define R300_COLOROFFSET_MASK 0xFFFFFFF0 /* GUESS */ +# define R300_COLOROFFSET_MASK 0xFFFFFFE0 /* Color Buffer Address Offset of multibuffer 1. Unpipelined. */ #define R300_RB3D_COLOROFFSET1 0x4E2C /* Color Buffer Address Offset of multibuffer 2. Unpipelined. */ @@ -2137,7 +2145,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Bit 18: Extremely weird tile like, but some pixels duplicated? */ #define R300_RB3D_COLORPITCH0 0x4E38 -# define R300_COLORPITCH_MASK 0x00001FF8 /* GUESS, should be 13:1 */ +# define R300_COLORPITCH_MASK 0x00003FFE # define R300_COLOR_TILE_DISABLE (0 << 16) # define R300_COLOR_TILE_ENABLE (1 << 16) # define R300_COLOR_MICROTILE_DISABLE (0 << 17) @@ -2147,12 +2155,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_COLOR_ENDIAN_WORD_SWAP (1 << 19) # define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 19) # define R300_COLOR_ENDIAN_HALF_DWORD_SWAP (3 << 19) -# define R300_COLOR_FORMAT_ARGB10101010 (0 << 21) -# define R300_COLOR_FORMAT_UV1010 (1 << 21) -# define R300_COLOR_FORMAT_CI8 (2 << 21) /* 2D only */ +# define R500_COLOR_FORMAT_ARGB10101010 (0 << 21) +# define R500_COLOR_FORMAT_UV1010 (1 << 21) +# define R500_COLOR_FORMAT_CI8 (2 << 21) /* 2D only */ # define R300_COLOR_FORMAT_ARGB1555 (3 << 21) # define R300_COLOR_FORMAT_RGB565 (4 << 21) -# define R300_COLOR_FORMAT_ARGB2101010 (5 << 21) +# define R500_COLOR_FORMAT_ARGB2101010 (5 << 21) # define R300_COLOR_FORMAT_ARGB8888 (6 << 21) # define R300_COLOR_FORMAT_ARGB32323232 (7 << 21) /* reserved */ @@ -2161,7 +2169,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_COLOR_FORMAT_VYUY (11 << 21) # define R300_COLOR_FORMAT_YVYU (12 << 21) # define R300_COLOR_FORMAT_UV88 (13 << 21) -# define R300_COLOR_FORMAT_I10 (14 << 21) +# define R500_COLOR_FORMAT_I10 (14 << 21) # define R300_COLOR_FORMAT_ARGB4444 (15 << 21) #define R300_RB3D_COLORPITCH1 0x4E3C #define R300_RB3D_COLORPITCH2 0x4E40 @@ -2180,16 +2188,16 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Set to 0A before 3D operations, set to 02 afterwards. */ #define R300_RB3D_DSTCACHE_CTLSTAT 0x4e4c -# define RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT (0 << 0) -# define RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT_1 (1 << 0) -# define RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D (2 << 0) -# define RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D_1 (3 << 0) -# define RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT (0 << 2) -# define RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT_1 (1 << 2) -# define RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS (2 << 2) -# define RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS_1 (3 << 2) -# define RB3D_DSTCACHE_CTLSTAT_DC_FINISH_NO_SIGNAL (0 << 4) -# define RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL (1 << 4) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT (0 << 0) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT_1 (1 << 0) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D (2 << 0) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D_1 (3 << 0) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT (0 << 2) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT_1 (1 << 2) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS (2 << 2) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS_1 (3 << 2) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_NO_SIGNAL (0 << 4) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL (1 << 4) #define R300_RB3D_DITHER_CTL 0x4E50 # define R300_RB3D_DITHER_CTL_DITHER_MODE_TRUNCATE (0 << 0) @@ -2204,68 +2212,67 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* Resolve buffer destination address. The cache must be empty before changing * this register if the cb is in resolve mode. Unpipelined */ -#define RB3D_AARESOLVE_OFFSET 0x4e80 -# define RB3D_AARESOLVE_OFFSET_SHIFT 5 -# define RB3D_AARESOLVE_OFFSET_MASK 0xffffffe0 /* At least according to the calculations of Christoph Brill */ +#define R300_RB3D_AARESOLVE_OFFSET 0x4e80 +# define R300_RB3D_AARESOLVE_OFFSET_SHIFT 5 +# define R300_RB3D_AARESOLVE_OFFSET_MASK 0xffffffe0 /* At least according to the calculations of Christoph Brill */ /* Resolve Buffer Pitch and Tiling Control. The cache must be empty before * changing this register if the cb is in resolve mode. Unpipelined */ -#define RB3D_AARESOLVE_PITCH 0x4e84 -# define RB3D_AARESOLVE_PITCH_SHIFT 1 -# define RB3D_AARESOLVE_PITCH_MASK 0x00003ffe /* At least according to the calculations of Christoph Brill */ +#define R300_RB3D_AARESOLVE_PITCH 0x4e84 +# define R300_RB3D_AARESOLVE_PITCH_SHIFT 1 +# define R300_RB3D_AARESOLVE_PITCH_MASK 0x00003ffe /* At least according to the calculations of Christoph Brill */ /* Resolve Buffer Control. Unpipelined */ -#define RB3D_AARESOLVE_CTL 0x4e88 -# define RB3D_AARESOLVE_CTL_AARESOLVE_MODE_NORMAL (0 << 0) -# define RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE (1 << 0) -# define RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_10 (0 << 1) -# define RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_22 (1 << 1) -# define RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_SAMPLE0 (0 << 2) -# define RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE (1 << 2) +#define R300_RB3D_AARESOLVE_CTL 0x4e88 +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_NORMAL (0 << 0) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE (1 << 0) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_10 (0 << 1) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_22 (1 << 1) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_SAMPLE0 (0 << 2) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE (1 << 2) /* Discard src pixels less than or equal to threshold. */ -#define RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD 0x4ea0 +#define R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD 0x4ea0 /* Discard src pixels greater than or equal to threshold. */ -#define RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD 0x4ea4 -# define RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_SHIFT 0 -# define RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_MASK 0x000000ff -# define RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_SHIFT 8 -# define RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_MASK 0x0000ff00 -# define RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_SHIFT 16 -# define RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_MASK 0x00ff0000 -# define RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_SHIFT 24 -# define RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_MASK 0xff000000 +#define R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD 0x4ea4 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_SHIFT 0 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_MASK 0x000000ff +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_SHIFT 8 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_MASK 0x0000ff00 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_SHIFT 16 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_MASK 0x00ff0000 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_SHIFT 24 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_MASK 0xff000000 /* 3D ROP Control. Stalls the 2d/3d datapath until it is idle. */ -#define RB3D_ROPCNTL 0x4e18 +#define R300_RB3D_ROPCNTL 0x4e18 /* TODO: fill in content here */ /* Color Compare Flip. Stalls the 2d/3d datapath until it is idle. */ -#define RB3D_CLRCMP_FLIPE 0x4e1c +#define R300_RB3D_CLRCMP_FLIPE 0x4e1c /* Sets the fifo sizes */ -#define RB3D_FIFO_SIZE 0x4ef4 -# define RB3D_FIFO_SIZE_OP_FIFO_SIZE_FULL (0 << 0) -# define RB3D_FIFO_SIZE_OP_FIFO_SIZE_HALF (1 << 0) -# define RB3D_FIFO_SIZE_OP_FIFO_SIZE_QUATER (2 << 0) -# define RB3D_FIFO_SIZE_OP_FIFO_SIZE_EIGTHS (3 << 0) -/* gap in AMD spec */ +#define R500_RB3D_FIFO_SIZE 0x4ef4 +# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_FULL (0 << 0) +# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_HALF (1 << 0) +# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_QUATER (2 << 0) +# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_EIGTHS (3 << 0) /* Constant color used by the blender. Pipelined through the blender. */ -#define RB3D_CONSTANT_COLOR_AR 0x4ef8 -# define RB3D_CONSTANT_COLOR_AR_RED_MASK 0x0000ffff -# define RB3D_CONSTANT_COLOR_AR_RED_SHIFT 0 -# define RB3D_CONSTANT_COLOR_AR_ALPHA_MASK 0xffff0000 -# define RB3D_CONSTANT_COLOR_AR_ALPHA_SHIFT 16 +#define R500_RB3D_CONSTANT_COLOR_AR 0x4ef8 +# define R500_RB3D_CONSTANT_COLOR_AR_RED_MASK 0x0000ffff +# define R500_RB3D_CONSTANT_COLOR_AR_RED_SHIFT 0 +# define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_MASK 0xffff0000 +# define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_SHIFT 16 /* Constant color used by the blender. Pipelined through the blender. */ -#define RB3D_CONSTANT_COLOR_GB 0x4efc -# define RB3D_CONSTANT_COLOR_AR_BLUE_MASK 0x0000ffff -# define RB3D_CONSTANT_COLOR_AR_BLUE_SHIFT 0 -# define RB3D_CONSTANT_COLOR_AR_GREEN_MASK 0xffff0000 -# define RB3D_CONSTANT_COLOR_AR_GREEN_SHIFT 16 +#define R500_RB3D_CONSTANT_COLOR_GB 0x4efc +# define R500_RB3D_CONSTANT_COLOR_AR_BLUE_MASK 0x0000ffff +# define R500_RB3D_CONSTANT_COLOR_AR_BLUE_SHIFT 0 +# define R500_RB3D_CONSTANT_COLOR_AR_GREEN_MASK 0xffff0000 +# define R500_RB3D_CONSTANT_COLOR_AR_GREEN_SHIFT 16 /* gap */ /* There seems to be no "write only" setting, so use Z-test = ALWAYS diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 30e853b01f..69c672e286 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -189,7 +189,7 @@ static void r300SetBlendCntl(r300ContextPtr r300, int func, int eqn, */ #if 0 if (new_ablend == new_cblend) { - new_cblend |= R300_BLEND_NO_SEPARATE; + new_cblend |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0; } #endif new_cblend |= cbits; @@ -295,7 +295,9 @@ static void r300SetBlendState(GLcontext * ctx) r300SetBlendCntl(r300, func, eqn, - R300_BLEND_UNKNOWN | R300_BLEND_ENABLE, funcA, eqnA); + (R300_SEPARATE_ALPHA_ENABLE | + R300_READ_ENABLE | + R300_ALPHA_BLEND_ENABLE), funcA, eqnA); } static void r300BlendEquationSeparate(GLcontext * ctx, -- cgit v1.2.3 From de3fc8b1c47eaa87917e1886d4a0988327438038 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 13 May 2008 15:46:23 -0400 Subject: R3xx: clean up ZB registers --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 12 +- src/mesa/drivers/dri/r300/r300_emit.c | 6 +- src/mesa/drivers/dri/r300/r300_ioctl.c | 28 ++-- src/mesa/drivers/dri/r300/r300_reg.h | 219 ++++++++++++++++---------------- src/mesa/drivers/dri/r300/r300_state.c | 102 +++++++-------- 5 files changed, 181 insertions(+), 186 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 0de1190e9f..a41dee50ff 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -465,20 +465,20 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2); ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0); r300->hw.zs.cmd[R300_ZS_CMD_0] = - cmdpacket0(R300_RB3D_ZSTENCIL_CNTL_0, 3); + cmdpacket0(R300_ZB_CNTL, 3); ALLOC_STATE(zstencil_format, always, 5, 0); r300->hw.zstencil_format.cmd[0] = - cmdpacket0(ZB_FORMAT, 4); + cmdpacket0(R300_ZB_FORMAT, 4); ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0); - r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(ZB_DEPTHOFFSET, 2); + r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_ZB_DEPTHOFFSET, 2); ALLOC_STATE(zb_depthclearvalue, always, 2, 0); - r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(ZB_DEPTHCLEARVALUE, 1); + r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(R300_ZB_DEPTHCLEARVALUE, 1); ALLOC_STATE(unk4F30, always, 3, 0); r300->hw.unk4F30.cmd[0] = cmdpacket0(0x4F30, 2); ALLOC_STATE(zb_hiz_offset, always, 2, 0); - r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(ZB_HIZ_OFFSET, 1); + r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(R300_ZB_HIZ_OFFSET, 1); ALLOC_STATE(zb_hiz_pitch, always, 2, 0); - r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(ZB_HIZ_PITCH, 1); + r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(R300_ZB_HIZ_PITCH, 1); /* VPU only on TCL */ if (has_tcl) { diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c index 18d9ff2fac..2ea17ad0a7 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.c +++ b/src/mesa/drivers/dri/r300/r300_emit.c @@ -553,7 +553,7 @@ void r300EmitCacheFlush(r300ContextPtr rmesa) e32(R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); - reg_start(ZB_ZCACHE_CTLSTAT, 0); - e32(ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | - ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + reg_start(R300_ZB_ZCACHE_CTLSTAT, 0); + e32(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | + R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); } diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 3863a54031..bf12c2cfbf 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -118,7 +118,7 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) } R300_STATECHANGE(r300, zs); - reg_start(R300_RB3D_ZSTENCIL_CNTL_0, 2); + reg_start(R300_ZB_CNTL, 2); { uint32_t t1, t2; @@ -127,32 +127,32 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) t2 = 0x0; if (flags & CLEARBUFFER_DEPTH) { - t1 |= R300_RB3D_Z_WRITE_ONLY; + t1 |= R300_Z_ENABLE | R300_Z_WRITE_ENABLE; t2 |= - (R300_ZS_ALWAYS << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT); - } else { - t1 |= R300_RB3D_Z_DISABLED_1; // disable + (R300_ZS_ALWAYS << R300_Z_FUNC_SHIFT); + } else { //XXX + t1 |= R300_STENCIL_FRONT_BACK; // disable } if (flags & CLEARBUFFER_STENCIL) { - t1 |= R300_RB3D_STENCIL_ENABLE; + t1 |= R300_STENCIL_ENABLE; t2 |= (R300_ZS_ALWAYS << - R300_RB3D_ZS1_FRONT_FUNC_SHIFT) | + R300_S_FRONT_FUNC_SHIFT) | (R300_ZS_REPLACE << - R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT) | + R300_S_FRONT_SFAIL_OP_SHIFT) | (R300_ZS_REPLACE << - R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT) | + R300_S_FRONT_ZPASS_OP_SHIFT) | (R300_ZS_REPLACE << - R300_RB3D_ZS1_FRONT_ZFAIL_OP_SHIFT) | + R300_S_FRONT_ZFAIL_OP_SHIFT) | (R300_ZS_ALWAYS << - R300_RB3D_ZS1_BACK_FUNC_SHIFT) | + R300_S_BACK_FUNC_SHIFT) | (R300_ZS_REPLACE << - R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT) | + R300_S_BACK_SFAIL_OP_SHIFT) | (R300_ZS_REPLACE << - R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT) | + R300_S_BACK_ZPASS_OP_SHIFT) | (R300_ZS_REPLACE << - R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT); + R300_S_BACK_ZFAIL_OP_SHIFT); } e32(t1); diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 777b6225df..bf61cd4abf 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -2279,19 +2279,14 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * for this. * Bit (1<<8) is the "test" bit. so plain write is 6 - vd */ -#define R300_RB3D_ZSTENCIL_CNTL_0 0x4F00 -# define R300_RB3D_Z_DISABLED_1 0x00000010 -# define R300_RB3D_Z_DISABLED_2 0x00000014 -# define R300_RB3D_Z_TEST 0x00000012 -# define R300_RB3D_Z_TEST_AND_WRITE 0x00000016 -# define R300_RB3D_Z_WRITE_ONLY 0x00000006 - -# define R300_RB3D_Z_TEST 0x00000012 -# define R300_RB3D_Z_TEST_AND_WRITE 0x00000016 -# define R300_RB3D_Z_WRITE_ONLY 0x00000006 -# define R300_RB3D_STENCIL_ENABLE 0x00000001 - -#define R300_RB3D_ZSTENCIL_CNTL_1 0x4f04 +#define R300_ZB_CNTL 0x4F00 +# define R300_STENCIL_ENABLE (1 << 0) +# define R300_Z_ENABLE (1 << 1) +# define R300_Z_WRITE_ENABLE (1 << 2) +# define R300_Z_SIGNED_COMPARE (1 << 3) +# define R300_STENCIL_FRONT_BACK (1 << 4) + +#define R300_ZB_ZSTENCILCNTL 0x4f04 /* functions */ # define R300_ZS_NEVER 0 # define R300_ZS_LESS 1 @@ -2311,51 +2306,49 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ZS_INVERT 5 # define R300_ZS_INCR_WRAP 6 # define R300_ZS_DECR_WRAP 7 +# define R300_Z_FUNC_SHIFT 0 /* front and back refer to operations done for front and back faces, i.e. separate stencil function support */ -# define R300_RB3D_ZS1_DEPTH_FUNC_SHIFT 0 -# define R300_RB3D_ZS1_FRONT_FUNC_SHIFT 3 -# define R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT 6 -# define R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT 9 -# define R300_RB3D_ZS1_FRONT_ZFAIL_OP_SHIFT 12 -# define R300_RB3D_ZS1_BACK_FUNC_SHIFT 15 -# define R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT 18 -# define R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT 21 -# define R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT 24 - -#define ZB_STENCILREFMASK 0x4f08 -# define ZB_STENCILREFMASK_STENCILREF_SHIFT 0 -# define ZB_STENCILREFMASK_STENCIL_MASK 0xff -# define ZB_STENCILREFMASK_STENCILREF_MASK 0x000000ff -# define ZB_STENCILREFMASK_STENCILMASK_SHIFT 8 -# define ZB_STENCILREFMASK_STENCILMASK_MASK 0x0000ff00 -# define ZB_STENCILREFMASK_STENCILWRITEMASK_SHIFT 16 -# define ZB_STENCILREFMASK_STENCILWRITEMASK_MASK 0xffff0000 +# define R300_S_FRONT_FUNC_SHIFT 3 +# define R300_S_FRONT_SFAIL_OP_SHIFT 6 +# define R300_S_FRONT_ZPASS_OP_SHIFT 9 +# define R300_S_FRONT_ZFAIL_OP_SHIFT 12 +# define R300_S_BACK_FUNC_SHIFT 15 +# define R300_S_BACK_SFAIL_OP_SHIFT 18 +# define R300_S_BACK_ZPASS_OP_SHIFT 21 +# define R300_S_BACK_ZFAIL_OP_SHIFT 24 + +#define R300_ZB_STENCILREFMASK 0x4f08 +# define R300_STENCILREF_SHIFT 0 +# define R300_STENCILREF_MASK 0x000000ff +# define R300_STENCILMASK_SHIFT 8 +# define R300_STENCILMASK_MASK 0x0000ff00 +# define R300_STENCILWRITEMASK_SHIFT 16 +# define R300_STENCILWRITEMASK_MASK 0x00ff0000 /* gap */ -#define ZB_FORMAT 0x4f10 -# define ZB_FORMAR_DEPTHFORMAT_16BIT_INT_Z (0 << 0) -# define ZB_FORMAR_DEPTHFORMAT_16BIT_13E3 (1 << 0) -# define ZB_FORMAR_DEPTHFORMAT_24BIT_INT_Z (2 << 0) +#define R300_ZB_FORMAT 0x4f10 +# define R300_DEPTHFORMAT_16BIT_INT_Z (0 << 0) +# define R300_DEPTHFORMAT_16BIT_13E3 (1 << 0) +# define R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL (2 << 0) /* reserved up to (15 << 0) */ -# define ZB_FORMAR_INVERT_13E3_LEADING_ONES (0 << 4) -# define ZB_FORMAR_INVERT_13E3_LEADING_ZEROS (1 << 4) -# define ZB_FORMAR_PEQ8_UNUSED (1 << 5) +# define R300_INVERT_13E3_LEADING_ONES (0 << 4) +# define R300_INVERT_13E3_LEADING_ZEROS (1 << 4) -#define R300_RB3D_EARLY_Z 0x4F14 -# define R300_EARLY_Z_DISABLE (0 << 0) -# define R300_EARLY_Z_ENABLE (1 << 0) +#define R300_ZB_ZTOP 0x4F14 +# define R300_ZTOP_DISABLE (0 << 0) +# define R300_ZTOP_ENABLE (1 << 0) /* gap */ -#define ZB_ZCACHE_CTLSTAT 0x4f18 -# define ZB_ZCACHE_CTLSTAT_ZC_FLUSH_NO_EFFECT (0 << 0) -# define ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE (1 << 0) -# define ZB_ZCACHE_CTLSTAT_ZC_FREE_NO_EFFECT (0 << 1) -# define ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE (1 << 1) -# define ZB_ZCACHE_CTLSTAT_ZC_BUSY_IDLE (0 << 31) -# define ZB_ZCACHE_CTLSTAT_ZC_BUSY_BUSY (1 << 31) +#define R300_ZB_ZCACHE_CTLSTAT 0x4f18 +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_NO_EFFECT (0 << 0) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE (1 << 0) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_NO_EFFECT (0 << 1) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE (1 << 1) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_IDLE (0 << 31) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_BUSY (1 << 31) #define R300_ZB_BW_CNTL 0x4f1c # define R300_HIZ_DISABLE (0 << 0) @@ -2372,31 +2365,32 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ZB_CB_CLEAR_CACHE_LINEAR (1 << 5) # define R300_FORCE_COMPRESSED_STENCIL_VALUE_DISABLE (0 << 6) # define R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE (1 << 6) -# define R300_ZEQUAL_OPTIMIZE_ENABLE (0 << 7) -# define R300_ZEQUAL_OPTIMIZE_DISABLE (1 << 7) -# define R300_SEQUAL_OPTIMIZE_ENABLE (0 << 8) -# define R300_SEQUAL_OPTIMIZE_DISABLE (1 << 8) -/* gap in AMD docs */ -# define R300_BMASK_ENABLE (0 << 10) -# define R300_BMASK_DISABLE (1 << 10) -# define R300_HIZ_EQUAL_REJECT_DISABLE (0 << 11) -# define R300_HIZ_EQUAL_REJECT_ENABLE (1 << 11) -# define R300_HIZ_FP_EXP_BITS_DISABLE (0 << 12) -# define R300_HIZ_FP_EXP_BITS_1 (1 << 12) -# define R300_HIZ_FP_EXP_BITS_2 (2 << 12) -# define R300_HIZ_FP_EXP_BITS_3 (3 << 12) -# define R300_HIZ_FP_EXP_BITS_4 (4 << 12) -# define R300_HIZ_FP_EXP_BITS_5 (5 << 12) -# define R300_HIZ_FP_INVERT_LEADING_ONES (0 << 15) -# define R300_HIZ_FP_INVERT_LEADING_ZEROS (1 << 15) -# define R300_TILE_OVERWRITE_RECOMPRESSION_ENABLE (0 << 16) -# define R300_TILE_OVERWRITE_RECOMPRESSION_DISABLE (1 << 16) -# define R300_CONTIGUOUS_6XAA_SAMPLES_ENABLE (0 << 17) -# define R300_CONTIGUOUS_6XAA_SAMPLES_DISABLE (1 << 17) -# define R300_PEQ_PACKING_DISABLE (0 << 18) -# define R300_PEQ_PACKING_ENABLE (1 << 18) -# define R300_COVERED_PTR_MASKING_DISABLE (0 << 18) -# define R300_COVERED_PTR_MASKING_ENABLE (1 << 18) + +# define R500_ZEQUAL_OPTIMIZE_ENABLE (0 << 7) +# define R500_ZEQUAL_OPTIMIZE_DISABLE (1 << 7) +# define R500_SEQUAL_OPTIMIZE_ENABLE (0 << 8) +# define R500_SEQUAL_OPTIMIZE_DISABLE (1 << 8) + +# define R500_BMASK_ENABLE (0 << 10) +# define R500_BMASK_DISABLE (1 << 10) +# define R500_HIZ_EQUAL_REJECT_DISABLE (0 << 11) +# define R500_HIZ_EQUAL_REJECT_ENABLE (1 << 11) +# define R500_HIZ_FP_EXP_BITS_DISABLE (0 << 12) +# define R500_HIZ_FP_EXP_BITS_1 (1 << 12) +# define R500_HIZ_FP_EXP_BITS_2 (2 << 12) +# define R500_HIZ_FP_EXP_BITS_3 (3 << 12) +# define R500_HIZ_FP_EXP_BITS_4 (4 << 12) +# define R500_HIZ_FP_EXP_BITS_5 (5 << 12) +# define R500_HIZ_FP_INVERT_LEADING_ONES (0 << 15) +# define R500_HIZ_FP_INVERT_LEADING_ZEROS (1 << 15) +# define R500_TILE_OVERWRITE_RECOMPRESSION_ENABLE (0 << 16) +# define R500_TILE_OVERWRITE_RECOMPRESSION_DISABLE (1 << 16) +# define R500_CONTIGUOUS_6XAA_SAMPLES_ENABLE (0 << 17) +# define R500_CONTIGUOUS_6XAA_SAMPLES_DISABLE (1 << 17) +# define R500_PEQ_PACKING_DISABLE (0 << 18) +# define R500_PEQ_PACKING_ENABLE (1 << 18) +# define R500_COVERED_PTR_MASKING_DISABLE (0 << 18) +# define R500_COVERED_PTR_MASKING_ENABLE (1 << 18) /* gap */ @@ -2404,67 +2398,68 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* Z Buffer Address Offset. * Bits 31 to 5 are used for aligned Z buffer address offset for macro tiles. */ -#define ZB_DEPTHOFFSET 0x4f20 +#define R300_ZB_DEPTHOFFSET 0x4f20 /* Z Buffer Pitch and Endian Control */ -#define ZB_DEPTHPITCH 0x4f24 -# define R300_DEPTHPITCH_MASK 0x00001FF8 /* TODO: should be (13:2) */ -# define ZB_DEPTHPITCH_DEPTHMACROTILE_DISABLE (0 << 16) -# define ZB_DEPTHPITCH_DEPTHMACROTILE_ENABLE (1 << 16) -# define ZB_DEPTHPITCH_DEPTHMICROTILE_LINEAR (0 << 17) -# define ZB_DEPTHPITCH_DEPTHMICROTILE_TILED (1 << 17) -# define ZB_DEPTHPITCH_DEPTHMICROTILE_TILED_SQUARE (2 << 17) -# define ZB_DEPTHPITCH_DEPTHENDIAN_NO_SWAP (0 << 18) -# define ZB_DEPTHPITCH_DEPTHENDIAN_WORD_SWAP (1 << 18) -# define ZB_DEPTHPITCH_DEPTHENDIAN_DWORD_SWAP (2 << 18) -# define ZB_DEPTHPITCH_DEPTHENDIAN_HALF_DWORD_SWAP (3 << 18) +#define R300_ZB_DEPTHPITCH 0x4f24 +# define R300_DEPTHPITCH_MASK 0x00003FFC +# define R300_DEPTHMACROTILE_DISABLE (0 << 16) +# define R300_DEPTHMACROTILE_ENABLE (1 << 16) +# define R300_DEPTHMICROTILE_LINEAR (0 << 17) +# define R300_DEPTHMICROTILE_TILED (1 << 17) +# define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17) +# define R300_DEPTHENDIAN_NO_SWAP (0 << 18) +# define R300_DEPTHENDIAN_WORD_SWAP (1 << 18) +# define R300_DEPTHENDIAN_DWORD_SWAP (2 << 18) +# define R300_DEPTHENDIAN_HALF_DWORD_SWAP (3 << 18) /* Z Buffer Clear Value */ -#define ZB_DEPTHCLEARVALUE 0x4f28 +#define R300_ZB_DEPTHCLEARVALUE 0x4f28 /* Hierarchical Z Memory Offset */ -#define ZB_HIZ_OFFSET 0x4f44 +#define R300_ZB_HIZ_OFFSET 0x4f44 -/* Hierarchical Z Read Index */ -#define ZB_HIZ_RDINDEX 0x4f48 +/* Hierarchical Z Write Index */ +#define R300_ZB_HIZ_WRINDEX 0x4f48 /* Hierarchical Z Data */ -#define ZB_HIZ_DWORD 0x4f4c +#define R300_ZB_HIZ_DWORD 0x4f4c -/* Hierarchical Z Write Index */ -#define ZB_HIZ_WRINDEX 0x4f50 +/* Hierarchical Z Read Index */ +#define R300_ZB_HIZ_RDINDEX 0x4f50 /* Hierarchical Z Pitch */ -#define ZB_HIZ_PITCH 0x4f54 +#define R300_ZB_HIZ_PITCH 0x4f54 /* Z Buffer Z Pass Counter Data */ -#define ZB_ZPASS_DATA 0x4f58 +#define R300_ZB_ZPASS_DATA 0x4f58 /* Z Buffer Z Pass Counter Address */ -#define ZB_ZPASS_ADDR 0x4f5c +#define R300_ZB_ZPASS_ADDR 0x4f5c /* Depth buffer X and Y coordinate offset */ -#define ZB_DEPTHXY_OFFSET 0x4f60 -# define ZB_DEPTHX_OFFSET_SHIFT 1 -# define ZB_DEPTHX_OFFSET_MASK 0x000007FE -# define ZB_DEPTHY_OFFSET_SHIFT 17 -# define ZB_DEPTHY_OFFSET_MASK 0x07FE0000 +#define R300_ZB_DEPTHXY_OFFSET 0x4f60 +# define R300_DEPTHX_OFFSET_SHIFT 1 +# define R300_DEPTHX_OFFSET_MASK 0x000007FE +# define R300_DEPTHY_OFFSET_SHIFT 17 +# define R300_DEPTHY_OFFSET_MASK 0x07FE0000 /* Sets the fifo sizes */ -#define ZB_FIFO_SIZE 0x4fd0 -# define ZB_FIFO_SIZE_OP_FIFO_SIZE_FULL (0 << 0) -# define ZB_FIFO_SIZE_OP_FIFO_SIZE_HALF (1 << 0) -# define ZB_FIFO_SIZE_OP_FIFO_SIZE_QUATER (2 << 0) -# define ZB_FIFO_SIZE_OP_FIFO_SIZE_EIGTHS (4 << 0) +#define R500_ZB_FIFO_SIZE 0x4fd0 +# define R500_OP_FIFO_SIZE_FULL (0 << 0) +# define R500_OP_FIFO_SIZE_HALF (1 << 0) +# define R500_OP_FIFO_SIZE_QUATER (2 << 0) +# define R500_OP_FIFO_SIZE_EIGTHS (4 << 0) /* Stencil Reference Value and Mask for backfacing quads */ -#define ZB_STENCILREFMASK_BF 0x4fd4 -# define ZB_STENCILREFMASK_BF_STENCILREF_SHIFT 0 -# define ZB_STENCILREFMASK_BF_STENCILREF_MASK 0x000000ff -# define ZB_STENCILREFMASK_BF_STENCILMASK_SHIFT 8 -# define ZB_STENCILREFMASK_BF_STENCILMASK_MASK 0x0000ff00 -# define ZB_STENCILREFMASK_BF_STENCILWRITEMASK_SHIFT 16 -# define ZB_STENCILREFMASK_BF_STENCILWRITEMASK_MASK 0xffff0000 +/* R300_ZB_STENCILREFMASK handles front face */ +#define R500_ZB_STENCILREFMASK_BF 0x4fd4 +# define R500_STENCILREF_SHIFT 0 +# define R500_STENCILREF_MASK 0x000000ff +# define R500_STENCILMASK_SHIFT 8 +# define R500_STENCILMASK_MASK 0x0000ff00 +# define R500_STENCILWRITEMASK_SHIFT 16 +# define R500_STENCILWRITEMASK_MASK 0x00ff0000 /** * \defgroup R3XX_R5XX_PROGRAMMABLE_VERTEX_SHADER_DESCRIPTION R3XX-R5XX PROGRAMMABLE VERTEX SHADER DESCRIPTION diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 69c672e286..178e4a7c00 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -415,10 +415,10 @@ static void r300SetEarlyZState(GLcontext * ctx) R300_STATECHANGE(r300, zstencil_format); switch (ctx->Visual.depthBits) { case 16: - r300->hw.zstencil_format.cmd[1] = ZB_FORMAR_DEPTHFORMAT_16BIT_INT_Z; + r300->hw.zstencil_format.cmd[1] = R300_DEPTHFORMAT_16BIT_INT_Z; break; case 24: - r300->hw.zstencil_format.cmd[1] = ZB_FORMAR_DEPTHFORMAT_24BIT_INT_Z; + r300->hw.zstencil_format.cmd[1] = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; break; default: fprintf(stderr, "Error: Unsupported depth %d... exiting\n", ctx->Visual.depthBits); @@ -427,14 +427,14 @@ static void r300SetEarlyZState(GLcontext * ctx) if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS) /* disable early Z */ - r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE; + r300->hw.zstencil_format.cmd[2] = R300_ZTOP_DISABLE; else { if (ctx->Depth.Test && ctx->Depth.Func != GL_NEVER) /* enable early Z */ - r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_ENABLE; + r300->hw.zstencil_format.cmd[2] = R300_ZTOP_ENABLE; else /* disable early Z */ - r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE; + r300->hw.zstencil_format.cmd[2] = R300_ZTOP_DISABLE; } r300->hw.zstencil_format.cmd[3] = 0x00000003; @@ -527,24 +527,24 @@ static void r300SetDepthState(GLcontext * ctx) r300ContextPtr r300 = R300_CONTEXT(ctx); R300_STATECHANGE(r300, zs); - r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_RB3D_STENCIL_ENABLE; + r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_STENCIL_ENABLE; // XXX r300->hw.zs.cmd[R300_ZS_CNTL_1] &= - ~(R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT); + ~(R300_ZS_MASK << R300_Z_FUNC_SHIFT); if (ctx->Depth.Test && ctx->Depth.Func != GL_NEVER) { if (ctx->Depth.Mask) r300->hw.zs.cmd[R300_ZS_CNTL_0] |= - R300_RB3D_Z_TEST_AND_WRITE; + R300_Z_ENABLE | R300_Z_WRITE_ENABLE | R300_STENCIL_FRONT_BACK; // XXX else - r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_TEST; + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_ENABLE | R300_STENCIL_FRONT_BACK; // XXX r300->hw.zs.cmd[R300_ZS_CNTL_1] |= translate_func(ctx->Depth. - Func) << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT; + Func) << R300_Z_FUNC_SHIFT; } else { - r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_RB3D_Z_DISABLED_1; + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_STENCIL_FRONT_BACK; // XXX r300->hw.zs.cmd[R300_ZS_CNTL_1] |= - translate_func(GL_NEVER) << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT; + translate_func(GL_NEVER) << R300_Z_FUNC_SHIFT; } r300SetEarlyZState(ctx); @@ -558,10 +558,10 @@ static void r300SetStencilState(GLcontext * ctx, GLboolean state) R300_STATECHANGE(r300, zs); if (state) { r300->hw.zs.cmd[R300_ZS_CNTL_0] |= - R300_RB3D_STENCIL_ENABLE; + R300_STENCIL_ENABLE; } else { r300->hw.zs.cmd[R300_ZS_CNTL_0] &= - ~R300_RB3D_STENCIL_ENABLE; + ~R300_STENCIL_ENABLE; } } else { #if R200_MERGED @@ -916,36 +916,36 @@ static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face, r300ContextPtr rmesa = R300_CONTEXT(ctx); GLuint refmask = (((ctx->Stencil. - Ref[0] & 0xff) << ZB_STENCILREFMASK_STENCILREF_SHIFT) | ((ctx-> - Stencil. - ValueMask - [0] & - 0xff) - << - ZB_STENCILREFMASK_STENCILMASK_SHIFT)); + Ref[0] & 0xff) << R300_STENCILREF_SHIFT) | ((ctx-> + Stencil. + ValueMask + [0] & + 0xff) + << + R300_STENCILMASK_SHIFT)); GLuint flag; R300_STATECHANGE(rmesa, zs); rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= ~((R300_ZS_MASK << - R300_RB3D_ZS1_FRONT_FUNC_SHIFT) + R300_S_FRONT_FUNC_SHIFT) | (R300_ZS_MASK << - R300_RB3D_ZS1_BACK_FUNC_SHIFT)); + R300_S_BACK_FUNC_SHIFT)); rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &= - ~((ZB_STENCILREFMASK_STENCIL_MASK << ZB_STENCILREFMASK_STENCILREF_SHIFT) | - (ZB_STENCILREFMASK_STENCIL_MASK << ZB_STENCILREFMASK_STENCILMASK_SHIFT)); + ~((R300_STENCILREF_MASK << R300_STENCILREF_SHIFT) | + (R300_STENCILREF_MASK << R300_STENCILMASK_SHIFT)); flag = translate_func(ctx->Stencil.Function[0]); rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= - (flag << R300_RB3D_ZS1_FRONT_FUNC_SHIFT); + (flag << R300_S_FRONT_FUNC_SHIFT); if (ctx->Stencil._TestTwoSide) flag = translate_func(ctx->Stencil.Function[1]); rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= - (flag << R300_RB3D_ZS1_BACK_FUNC_SHIFT); + (flag << R300_S_BACK_FUNC_SHIFT); rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= refmask; } @@ -955,12 +955,12 @@ static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) R300_STATECHANGE(rmesa, zs); rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &= - ~(ZB_STENCILREFMASK_STENCIL_MASK << - ZB_STENCILREFMASK_STENCILWRITEMASK_SHIFT); + ~(R300_STENCILREF_MASK << + R300_STENCILWRITEMASK_SHIFT); rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= (ctx->Stencil. - WriteMask[0] & ZB_STENCILREFMASK_STENCIL_MASK) << - ZB_STENCILREFMASK_STENCILWRITEMASK_SHIFT; + WriteMask[0] & R300_STENCILREF_MASK) << + R300_STENCILWRITEMASK_SHIFT; } static void r300StencilOpSeparate(GLcontext * ctx, GLenum face, @@ -971,34 +971,34 @@ static void r300StencilOpSeparate(GLcontext * ctx, GLenum face, R300_STATECHANGE(rmesa, zs); /* It is easier to mask what's left.. */ rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= - (R300_ZS_MASK << R300_RB3D_ZS1_DEPTH_FUNC_SHIFT) | - (R300_ZS_MASK << R300_RB3D_ZS1_FRONT_FUNC_SHIFT) | - (R300_ZS_MASK << R300_RB3D_ZS1_BACK_FUNC_SHIFT); + (R300_ZS_MASK << R300_Z_FUNC_SHIFT) | + (R300_ZS_MASK << R300_S_FRONT_FUNC_SHIFT) | + (R300_ZS_MASK << R300_S_BACK_FUNC_SHIFT); rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= (translate_stencil_op(ctx->Stencil.FailFunc[0]) << - R300_RB3D_ZS1_FRONT_FAIL_OP_SHIFT) + R300_S_FRONT_SFAIL_OP_SHIFT) | (translate_stencil_op(ctx->Stencil.ZFailFunc[0]) << - R300_RB3D_ZS1_FRONT_ZFAIL_OP_SHIFT) + R300_S_FRONT_ZFAIL_OP_SHIFT) | (translate_stencil_op(ctx->Stencil.ZPassFunc[0]) << - R300_RB3D_ZS1_FRONT_ZPASS_OP_SHIFT); + R300_S_FRONT_ZPASS_OP_SHIFT); if (ctx->Stencil._TestTwoSide) { rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= (translate_stencil_op(ctx->Stencil.FailFunc[1]) << - R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT) + R300_S_BACK_SFAIL_OP_SHIFT) | (translate_stencil_op(ctx->Stencil.ZFailFunc[1]) << - R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT) + R300_S_BACK_ZFAIL_OP_SHIFT) | (translate_stencil_op(ctx->Stencil.ZPassFunc[1]) << - R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT); + R300_S_BACK_ZPASS_OP_SHIFT); } else { rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= (translate_stencil_op(ctx->Stencil.FailFunc[0]) << - R300_RB3D_ZS1_BACK_FAIL_OP_SHIFT) + R300_S_BACK_SFAIL_OP_SHIFT) | (translate_stencil_op(ctx->Stencil.ZFailFunc[0]) << - R300_RB3D_ZS1_BACK_ZFAIL_OP_SHIFT) + R300_S_BACK_ZFAIL_OP_SHIFT) | (translate_stencil_op(ctx->Stencil.ZPassFunc[0]) << - R300_RB3D_ZS1_BACK_ZPASS_OP_SHIFT); + R300_S_BACK_ZPASS_OP_SHIFT); } } @@ -1007,10 +1007,10 @@ static void r300ClearStencil(GLcontext * ctx, GLint s) r300ContextPtr rmesa = R300_CONTEXT(ctx); rmesa->state.stencil.clear = - ((GLuint) (ctx->Stencil.Clear & ZB_STENCILREFMASK_STENCIL_MASK) | - (ZB_STENCILREFMASK_STENCIL_MASK << ZB_STENCILREFMASK_STENCILMASK_SHIFT) | - ((ctx->Stencil.WriteMask[0] & ZB_STENCILREFMASK_STENCIL_MASK) << - ZB_STENCILREFMASK_STENCILMASK_SHIFT)); + ((GLuint) (ctx->Stencil.Clear & R300_STENCILREF_MASK) | + (R300_STENCILREF_MASK << R300_STENCILMASK_SHIFT) | + ((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << + R300_STENCILMASK_SHIFT)); } /* ============================================================= @@ -2287,11 +2287,11 @@ static void r300ResetHwState(r300ContextPtr r300) if (r300->radeon.sarea->tiling_enabled) { /* XXX: Turn off when clearing buffers ? */ - r300->hw.zb.cmd[R300_ZB_PITCH] |= ZB_DEPTHPITCH_DEPTHMACROTILE_ENABLE; + r300->hw.zb.cmd[R300_ZB_PITCH] |= R300_DEPTHMACROTILE_ENABLE; if (ctx->Visual.depthBits == 24) r300->hw.zb.cmd[R300_ZB_PITCH] |= - ZB_DEPTHPITCH_DEPTHMICROTILE_TILED; + R300_DEPTHMICROTILE_TILED; } r300->hw.zb_depthclearvalue.cmd[1] = 0; @@ -2546,12 +2546,12 @@ void r300InitState(r300ContextPtr r300) switch (ctx->Visual.depthBits) { case 16: r300->state.depth.scale = 1.0 / (GLfloat) 0xffff; - depth_fmt = ZB_FORMAR_DEPTHFORMAT_16BIT_INT_Z; + depth_fmt = R300_DEPTHFORMAT_16BIT_INT_Z; r300->state.stencil.clear = 0x00000000; break; case 24: r300->state.depth.scale = 1.0 / (GLfloat) 0xffffff; - depth_fmt = ZB_FORMAR_DEPTHFORMAT_24BIT_INT_Z; + depth_fmt = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; r300->state.stencil.clear = 0x00ff0000; break; default: -- cgit v1.2.3 From f86baae1a7092533c7c3d56846e7bae97590951b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 13 May 2008 16:12:57 -0400 Subject: R300: clean up GA registers --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 10 +- src/mesa/drivers/dri/r300/r300_reg.h | 328 ++++++++++++++++---------------- src/mesa/drivers/dri/r300/r300_state.c | 16 +- 3 files changed, 177 insertions(+), 177 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index a41dee50ff..8596b46640 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -363,21 +363,21 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(txe, always, R300_TXE_CMDSIZE, 0); r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(R300_TX_ENABLE, 1); ALLOC_STATE(ga_point_s0, always, 5, 0); - r300->hw.ga_point_s0.cmd[0] = cmdpacket0(GA_POINT_S0, 4); + r300->hw.ga_point_s0.cmd[0] = cmdpacket0(R300_GA_POINT_S0, 4); ALLOC_STATE(ga_triangle_stipple, always, 2, 0); - r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(GA_TRIANGLE_STIPPLE, 1); + r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(R300_GA_TRIANGLE_STIPPLE, 1); ALLOC_STATE(ps, always, R300_PS_CMDSIZE, 0); r300->hw.ps.cmd[0] = cmdpacket0(R300_GA_POINT_SIZE, 1); ALLOC_STATE(ga_point_minmax, always, 4, 0); r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(R300_GA_POINT_MINMAX, 3); ALLOC_STATE(lcntl, always, 2, 0); - r300->hw.lcntl.cmd[0] = cmdpacket0(GA_LINE_CNTL, 1); + r300->hw.lcntl.cmd[0] = cmdpacket0(R300_GA_LINE_CNTL, 1); ALLOC_STATE(ga_line_stipple, always, 4, 0); r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(R300_GA_LINE_STIPPLE_VALUE, 3); ALLOC_STATE(shade, always, 5, 0); - r300->hw.shade.cmd[0] = cmdpacket0(GA_ENHANCE, 4); + r300->hw.shade.cmd[0] = cmdpacket0(R300_GA_ENHANCE, 4); ALLOC_STATE(polygon_mode, always, 4, 0); - r300->hw.polygon_mode.cmd[0] = cmdpacket0(GA_POLY_MODE, 3); + r300->hw.polygon_mode.cmd[0] = cmdpacket0(R300_GA_POLY_MODE, 3); ALLOC_STATE(fogp, always, 3, 0); r300->hw.fogp.cmd[0] = cmdpacket0(R300_RE_FOG_SCALE, 2); ALLOC_STATE(zbias_cntl, always, 2, 0); diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index bf61cd4abf..b23f587c50 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -768,25 +768,25 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R500_TX_DIRECTION_VERITCAL (1<<27) /* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */ -#define GA_POINT_S0 0x4200 +#define R300_GA_POINT_S0 0x4200 /* T Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */ -#define GA_POINT_T0 0x4204 +#define R300_GA_POINT_T0 0x4204 /* S Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */ -#define GA_POINT_S1 0x4208 +#define R300_GA_POINT_S1 0x4208 /* T Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */ -#define GA_POINT_T1 0x420c +#define R300_GA_POINT_T1 0x420c /* Specifies amount to shift integer position of vertex (screen space) before * converting to float for triangle stipple. */ -#define GA_TRIANGLE_STIPPLE 0x4214 -# define GA_TRIANGLE_STIPPLE_X_SHIFT_SHIFT 0 -# define GA_TRIANGLE_STIPPLE_X_SHIFT_MASK 0x0000000f -# define GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT 16 -# define GA_TRIANGLE_STIPPLE_Y_SHIFT_MASK 0x000f0000 +#define R300_GA_TRIANGLE_STIPPLE 0x4214 +# define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_SHIFT 0 +# define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_MASK 0x0000000f +# define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT 16 +# define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_MASK 0x000f0000 /* The pointsize is given in multiples of 6. The pointsize can be enormous: * Clear() renders a single point that fills the entire framebuffer. @@ -801,16 +801,16 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_POINTSIZE_MAX (R300_POINTSIZE_Y_MASK / 6) /* Blue fill color */ -#define GA_FILL_R 0x4220 +#define R500_GA_FILL_R 0x4220 /* Blue fill color */ -#define GA_FILL_G 0x4224 +#define R500_GA_FILL_G 0x4224 /* Blue fill color */ -#define GA_FILL_B 0x4228 +#define R500_GA_FILL_B 0x4228 /* Alpha fill color */ -#define GA_FILL_A 0x422c +#define R500_GA_FILL_A 0x422c /* Specifies maximum and minimum point & sprite sizes for per vertex size @@ -831,159 +831,159 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * VE: vertical or horizontal * HO & VE: no classification */ -#define GA_LINE_CNTL 0x4234 -# define GA_LINE_CNTL_WIDTH_SHIFT 0 -# define GA_LINE_CNTL_WIDTH_MASK 0x0000ffff -# define GA_LINE_CNTL_END_TYPE_HOR (0 << 16) -# define GA_LINE_CNTL_END_TYPE_VER (1 << 16) -# define GA_LINE_CNTL_END_TYPE_SQR (2 << 16) /* horizontal or vertical depending upon slope */ -# define GA_LINE_CNTL_END_TYPE_COMP (3 << 16) /* Computed (perpendicular to slope) */ -# define GA_LINE_CNTL_SORT_NO (0 << 18) -# define GA_LINE_CNTL_SORT_MINX_MINY (1 << 18) +#define R300_GA_LINE_CNTL 0x4234 +# define R300_GA_LINE_CNTL_WIDTH_SHIFT 0 +# define R300_GA_LINE_CNTL_WIDTH_MASK 0x0000ffff +# define R300_GA_LINE_CNTL_END_TYPE_HOR (0 << 16) +# define R300_GA_LINE_CNTL_END_TYPE_VER (1 << 16) +# define R300_GA_LINE_CNTL_END_TYPE_SQR (2 << 16) /* horizontal or vertical depending upon slope */ +# define R300_GA_LINE_CNTL_END_TYPE_COMP (3 << 16) /* Computed (perpendicular to slope) */ +# define R500_GA_LINE_CNTL_SORT_NO (0 << 18) +# define R500_GA_LINE_CNTL_SORT_MINX_MINY (1 << 18) /** TODO: looks wrong */ -# define R300_LINESIZE_MAX (GA_LINE_CNTL_WIDTH_MASK / 6) +# define R300_LINESIZE_MAX (R300_GA_LINE_CNTL_WIDTH_MASK / 6) /** TODO: looks wrong */ # define R300_LINE_CNT_HO (1 << 16) /** TODO: looks wrong */ # define R300_LINE_CNT_VE (1 << 17) /* Line Stipple configuration information. */ -#define GA_LINE_STIPPLE_CONFIG 0x4238 -# define GA_LINE_STIPPLE_CONFIG_LINE_RESET_NO (0 << 0) -# define GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE (1 << 0) -# define GA_LINE_STIPPLE_CONFIG_LINE_RESET_PACKET (2 << 0) -# define GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_SHIFT 2 -# define GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK 0xfffffffc +#define R300_GA_LINE_STIPPLE_CONFIG 0x4238 +# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_NO (0 << 0) +# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE (1 << 0) +# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_PACKET (2 << 0) +# define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_SHIFT 2 +# define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK 0xfffffffc /* Used to load US instructions and constants */ #define R500_GA_US_VECTOR_INDEX 0x4250 -# define GA_US_VECTOR_INDEX_SHIFT 0 -# define GA_US_VECTOR_INDEX_MASK 0x000000ff -# define GA_US_VECTOR_INDEX_TYPE_INSTR (0 << 16) -# define GA_US_VECTOR_INDEX_TYPE_CONST (1 << 16) -# define GA_US_VECTOR_INDEX_CLAMP_NO (0 << 17) -# define GA_US_VECTOR_INDEX_CLAMP_CONST (1 << 17) +# define R500_GA_US_VECTOR_INDEX_SHIFT 0 +# define R500_GA_US_VECTOR_INDEX_MASK 0x000000ff +# define R500_GA_US_VECTOR_INDEX_TYPE_INSTR (0 << 16) +# define R500_GA_US_VECTOR_INDEX_TYPE_CONST (1 << 16) +# define R500_GA_US_VECTOR_INDEX_CLAMP_NO (0 << 17) +# define R500_GA_US_VECTOR_INDEX_CLAMP_CONST (1 << 17) /* Data register for loading US instructions and constants */ #define R500_GA_US_VECTOR_DATA 0x4254 /* Specifies color properties and mappings of textures. */ -#define GA_COLOR_CONTROL_PS3 0x4258 -# define TEX0_SHADING_PS3_SOLID (0 << 0) -# define TEX0_SHADING_PS3_FLAT (1 << 0) -# define TEX0_SHADING_PS3_GOURAUD (2 << 0) -# define TEX1_SHADING_PS3_SOLID (0 << 2) -# define TEX1_SHADING_PS3_FLAT (1 << 2) -# define TEX1_SHADING_PS3_GOURAUD (2 << 2) -# define TEX2_SHADING_PS3_SOLID (0 << 4) -# define TEX2_SHADING_PS3_FLAT (1 << 4) -# define TEX2_SHADING_PS3_GOURAUD (2 << 4) -# define TEX3_SHADING_PS3_SOLID (0 << 6) -# define TEX3_SHADING_PS3_FLAT (1 << 6) -# define TEX3_SHADING_PS3_GOURAUD (2 << 6) -# define TEX4_SHADING_PS3_SOLID (0 << 8) -# define TEX4_SHADING_PS3_FLAT (1 << 8) -# define TEX4_SHADING_PS3_GOURAUD (2 << 8) -# define TEX5_SHADING_PS3_SOLID (0 << 10) -# define TEX5_SHADING_PS3_FLAT (1 << 10) -# define TEX5_SHADING_PS3_GOURAUD (2 << 10) -# define TEX6_SHADING_PS3_SOLID (0 << 12) -# define TEX6_SHADING_PS3_FLAT (1 << 12) -# define TEX6_SHADING_PS3_GOURAUD (2 << 12) -# define TEX7_SHADING_PS3_SOLID (0 << 14) -# define TEX7_SHADING_PS3_FLAT (1 << 14) -# define TEX7_SHADING_PS3_GOURAUD (2 << 14) -# define TEX8_SHADING_PS3_SOLID (0 << 16) -# define TEX8_SHADING_PS3_FLAT (1 << 16) -# define TEX8_SHADING_PS3_GOURAUD (2 << 16) -# define TEX9_SHADING_PS3_SOLID (0 << 18) -# define TEX9_SHADING_PS3_FLAT (1 << 18) -# define TEX9_SHADING_PS3_GOURAUD (2 << 18) -# define TEX10_SHADING_PS3_SOLID (0 << 20) -# define TEX10_SHADING_PS3_FLAT (1 << 20) -# define TEX10_SHADING_PS3_GOURAUD (2 << 20) -# define COLOR0_TEX_OVERRIDE_NO (0 << 22) -# define COLOR0_TEX_OVERRIDE_TEX_0 (1 << 22) -# define COLOR0_TEX_OVERRIDE_TEX_1 (2 << 22) -# define COLOR0_TEX_OVERRIDE_TEX_2 (3 << 22) -# define COLOR0_TEX_OVERRIDE_TEX_3 (4 << 22) -# define COLOR0_TEX_OVERRIDE_TEX_4 (5 << 22) -# define COLOR0_TEX_OVERRIDE_TEX_5 (6 << 22) -# define COLOR0_TEX_OVERRIDE_TEX_6 (7 << 22) -# define COLOR0_TEX_OVERRIDE_TEX_7 (8 << 22) -# define COLOR0_TEX_OVERRIDE_TEX_8_C2 (9 << 22) -# define COLOR0_TEX_OVERRIDE_TEX_9_C3 (10 << 22) -# define COLOR1_TEX_OVERRIDE_NO (0 << 26) -# define COLOR1_TEX_OVERRIDE_TEX_0 (1 << 26) -# define COLOR1_TEX_OVERRIDE_TEX_1 (2 << 26) -# define COLOR1_TEX_OVERRIDE_TEX_2 (3 << 26) -# define COLOR1_TEX_OVERRIDE_TEX_3 (4 << 26) -# define COLOR1_TEX_OVERRIDE_TEX_4 (5 << 26) -# define COLOR1_TEX_OVERRIDE_TEX_5 (6 << 26) -# define COLOR1_TEX_OVERRIDE_TEX_6 (7 << 26) -# define COLOR1_TEX_OVERRIDE_TEX_7 (8 << 26) -# define COLOR1_TEX_OVERRIDE_TEX_8_C2 (9 << 26) -# define COLOR1_TEX_OVERRIDE_TEX_9_C3 (10 << 26) +#define R500_GA_COLOR_CONTROL_PS3 0x4258 +# define R500_TEX0_SHADING_PS3_SOLID (0 << 0) +# define R500_TEX0_SHADING_PS3_FLAT (1 << 0) +# define R500_TEX0_SHADING_PS3_GOURAUD (2 << 0) +# define R500_TEX1_SHADING_PS3_SOLID (0 << 2) +# define R500_TEX1_SHADING_PS3_FLAT (1 << 2) +# define R500_TEX1_SHADING_PS3_GOURAUD (2 << 2) +# define R500_TEX2_SHADING_PS3_SOLID (0 << 4) +# define R500_TEX2_SHADING_PS3_FLAT (1 << 4) +# define R500_TEX2_SHADING_PS3_GOURAUD (2 << 4) +# define R500_TEX3_SHADING_PS3_SOLID (0 << 6) +# define R500_TEX3_SHADING_PS3_FLAT (1 << 6) +# define R500_TEX3_SHADING_PS3_GOURAUD (2 << 6) +# define R500_TEX4_SHADING_PS3_SOLID (0 << 8) +# define R500_TEX4_SHADING_PS3_FLAT (1 << 8) +# define R500_TEX4_SHADING_PS3_GOURAUD (2 << 8) +# define R500_TEX5_SHADING_PS3_SOLID (0 << 10) +# define R500_TEX5_SHADING_PS3_FLAT (1 << 10) +# define R500_TEX5_SHADING_PS3_GOURAUD (2 << 10) +# define R500_TEX6_SHADING_PS3_SOLID (0 << 12) +# define R500_TEX6_SHADING_PS3_FLAT (1 << 12) +# define R500_TEX6_SHADING_PS3_GOURAUD (2 << 12) +# define R500_TEX7_SHADING_PS3_SOLID (0 << 14) +# define R500_TEX7_SHADING_PS3_FLAT (1 << 14) +# define R500_TEX7_SHADING_PS3_GOURAUD (2 << 14) +# define R500_TEX8_SHADING_PS3_SOLID (0 << 16) +# define R500_TEX8_SHADING_PS3_FLAT (1 << 16) +# define R500_TEX8_SHADING_PS3_GOURAUD (2 << 16) +# define R500_TEX9_SHADING_PS3_SOLID (0 << 18) +# define R500_TEX9_SHADING_PS3_FLAT (1 << 18) +# define R500_TEX9_SHADING_PS3_GOURAUD (2 << 18) +# define R500_TEX10_SHADING_PS3_SOLID (0 << 20) +# define R500_TEX10_SHADING_PS3_FLAT (1 << 20) +# define R500_TEX10_SHADING_PS3_GOURAUD (2 << 20) +# define R500_COLOR0_TEX_OVERRIDE_NO (0 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_0 (1 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_1 (2 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_2 (3 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_3 (4 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_4 (5 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_5 (6 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_6 (7 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_7 (8 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_8_C2 (9 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_9_C3 (10 << 22) +# define R500_COLOR1_TEX_OVERRIDE_NO (0 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_0 (1 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_1 (2 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_2 (3 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_3 (4 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_4 (5 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_5 (6 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_6 (7 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_7 (8 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_8_C2 (9 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_9_C3 (10 << 26) /* Returns idle status of various G3D block, captured when GA_IDLE written or * when hard or soft reset asserted. */ -#define GA_IDLE 0x425c -# define GA_IDLE_PIPE3_Z_IDLE (0 << 0) -# define GA_IDLE_PIPE2_Z_IDLE (0 << 1) -# define GA_IDLE_PIPE3_CD_IDLE (0 << 2) -# define GA_IDLE_PIPE2_CD_IDLE (0 << 3) -# define GA_IDLE_PIPE3_FG_IDLE (0 << 4) -# define GA_IDLE_PIPE2_FG_IDLE (0 << 5) -# define GA_IDLE_PIPE3_US_IDLE (0 << 6) -# define GA_IDLE_PIPE2_US_IDLE (0 << 7) -# define GA_IDLE_PIPE3_SC_IDLE (0 << 8) -# define GA_IDLE_PIPE2_SC_IDLE (0 << 9) -# define GA_IDLE_PIPE3_RS_IDLE (0 << 10) -# define GA_IDLE_PIPE2_RS_IDLE (0 << 11) -# define GA_IDLE_PIPE1_Z_IDLE (0 << 12) -# define GA_IDLE_PIPE0_Z_IDLE (0 << 13) -# define GA_IDLE_PIPE1_CD_IDLE (0 << 14) -# define GA_IDLE_PIPE0_CD_IDLE (0 << 15) -# define GA_IDLE_PIPE1_FG_IDLE (0 << 16) -# define GA_IDLE_PIPE0_FG_IDLE (0 << 17) -# define GA_IDLE_PIPE1_US_IDLE (0 << 18) -# define GA_IDLE_PIPE0_US_IDLE (0 << 19) -# define GA_IDLE_PIPE1_SC_IDLE (0 << 20) -# define GA_IDLE_PIPE0_SC_IDLE (0 << 21) -# define GA_IDLE_PIPE1_RS_IDLE (0 << 22) -# define GA_IDLE_PIPE0_RS_IDLE (0 << 23) -# define GA_IDLE_SU_IDLE (0 << 24) -# define GA_IDLE_GA_IDLE (0 << 25) -# define GA_IDLE_GA_UNIT2_IDLE (0 << 26) +#define R500_GA_IDLE 0x425c +# define R500_GA_IDLE_PIPE3_Z_IDLE (0 << 0) +# define R500_GA_IDLE_PIPE2_Z_IDLE (0 << 1) +# define R500_GA_IDLE_PIPE3_CD_IDLE (0 << 2) +# define R500_GA_IDLE_PIPE2_CD_IDLE (0 << 3) +# define R500_GA_IDLE_PIPE3_FG_IDLE (0 << 4) +# define R500_GA_IDLE_PIPE2_FG_IDLE (0 << 5) +# define R500_GA_IDLE_PIPE3_US_IDLE (0 << 6) +# define R500_GA_IDLE_PIPE2_US_IDLE (0 << 7) +# define R500_GA_IDLE_PIPE3_SC_IDLE (0 << 8) +# define R500_GA_IDLE_PIPE2_SC_IDLE (0 << 9) +# define R500_GA_IDLE_PIPE3_RS_IDLE (0 << 10) +# define R500_GA_IDLE_PIPE2_RS_IDLE (0 << 11) +# define R500_GA_IDLE_PIPE1_Z_IDLE (0 << 12) +# define R500_GA_IDLE_PIPE0_Z_IDLE (0 << 13) +# define R500_GA_IDLE_PIPE1_CD_IDLE (0 << 14) +# define R500_GA_IDLE_PIPE0_CD_IDLE (0 << 15) +# define R500_GA_IDLE_PIPE1_FG_IDLE (0 << 16) +# define R500_GA_IDLE_PIPE0_FG_IDLE (0 << 17) +# define R500_GA_IDLE_PIPE1_US_IDLE (0 << 18) +# define R500_GA_IDLE_PIPE0_US_IDLE (0 << 19) +# define R500_GA_IDLE_PIPE1_SC_IDLE (0 << 20) +# define R500_GA_IDLE_PIPE0_SC_IDLE (0 << 21) +# define R500_GA_IDLE_PIPE1_RS_IDLE (0 << 22) +# define R500_GA_IDLE_PIPE0_RS_IDLE (0 << 23) +# define R500_GA_IDLE_SU_IDLE (0 << 24) +# define R500_GA_IDLE_GA_IDLE (0 << 25) +# define R500_GA_IDLE_GA_UNIT2_IDLE (0 << 26) /* Current value of stipple accumulator. */ #define R300_GA_LINE_STIPPLE_VALUE 0x4260 /* S Texture Coordinate Value for Vertex 0 of Line (stuff textures -- i.e. AA) */ -#define GA_LINE_S0 0x4264 +#define R300_GA_LINE_S0 0x4264 /* S Texture Coordinate Value for Vertex 1 of Lines (V2 of parallelogram -- stuff textures -- i.e. AA) */ -#define GA_LINE_S1 0x4268 +#define R300_GA_LINE_S1 0x4268 /* GA Input fifo high water marks */ -#define GA_FIFO_CNTL 0x4270 -# define GA_FIFO_CNTL_VERTEX_FIFO_MASK 0x00000007 -# define GA_FIFO_CNTL_VERTEX_FIFO_SHIFT 0 -# define GA_FIFO_CNTL_VERTEX_INDEX_MASK 0x00000038 -# define GA_FIFO_CNTL_VERTEX_INDEX_SHIFT 3 -# define GA_FIFO_CNTL_VERTEX_REG_MASK 0x00003fc0 -# define GA_FIFO_CNTL_VERTEX_REG_SHIFT 6 - -/* Something shade related */ -#define GA_ENHANCE 0x4274 -# define GA_ENHANCE_DEADLOCK_CNTL_NO_EFFECT (0 << 0) -# define GA_ENHANCE_DEADLOCK_CNTL_PREVENT_TCL (1 << 0) /* Prevents TCL interface from deadlocking on GA side. */ -# define GA_ENHANCE_FASTSYNC_CNTL_NO_EFFECT (0 << 1) -# define GA_ENHANCE_FASTSYNC_CNTL_ENABLE (1 << 1) /* Enables high-performance register/primitive switching. */ -# define GA_ENHANCE_REG_READWRITE_NO_EFFECT (0 << 2) /* R520+ only */ -# define GA_ENHANCE_REG_READWRITE_ENABLE (1 << 2) /* R520+ only, Enables GA support of simultaneous register reads and writes. */ -# define GA_ENHANCE_REG_NOSTALL_NO_EFFECT (0 << 3) -# define GA_ENHANCE_REG_NOSTALL_ENABLE (1 << 3) /* Enables GA support of no-stall reads for register read back. */ +#define R500_GA_FIFO_CNTL 0x4270 +# define R500_GA_FIFO_CNTL_VERTEX_FIFO_MASK 0x00000007 +# define R500_GA_FIFO_CNTL_VERTEX_FIFO_SHIFT 0 +# define R500_GA_FIFO_CNTL_VERTEX_INDEX_MASK 0x00000038 +# define R500_GA_FIFO_CNTL_VERTEX_INDEX_SHIFT 3 +# define R500_GA_FIFO_CNTL_VERTEX_REG_MASK 0x00003fc0 +# define R500_GA_FIFO_CNTL_VERTEX_REG_SHIFT 6 + +/* GA enhance/tweaks */ +#define R300_GA_ENHANCE 0x4274 +# define R300_GA_ENHANCE_DEADLOCK_CNTL_NO_EFFECT (0 << 0) +# define R300_GA_ENHANCE_DEADLOCK_CNTL_PREVENT_TCL (1 << 0) /* Prevents TCL interface from deadlocking on GA side. */ +# define R300_GA_ENHANCE_FASTSYNC_CNTL_NO_EFFECT (0 << 1) +# define R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE (1 << 1) /* Enables high-performance register/primitive switching. */ +# define R500_GA_ENHANCE_REG_READWRITE_NO_EFFECT (0 << 2) /* R520+ only */ +# define R500_GA_ENHANCE_REG_READWRITE_ENABLE (1 << 2) /* R520+ only, Enables GA support of simultaneous register reads and writes. */ +# define R500_GA_ENHANCE_REG_NOSTALL_NO_EFFECT (0 << 3) +# define R500_GA_ENHANCE_REG_NOSTALL_ENABLE (1 << 3) /* Enables GA support of no-stall reads for register read back. */ #define R300_GA_COLOR_CONTROL 0x4278 # define R300_GA_COLOR_CONTROL_RGB0_SHADING_SOLID (0 << 0) @@ -1046,41 +1046,41 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* Polygon Mode * Dangerous */ -#define GA_POLY_MODE 0x4288 -# define GA_POLY_MODE_DISABLE (0 << 0) -# define GA_POLY_MODE_DUAL (1 << 0) /* send 2 sets of 3 polys with specified poly type */ +#define R300_GA_POLY_MODE 0x4288 +# define R300_GA_POLY_MODE_DISABLE (0 << 0) +# define R300_GA_POLY_MODE_DUAL (1 << 0) /* send 2 sets of 3 polys with specified poly type */ /* reserved */ -# define GA_POLY_MODE_FRONT_PTYPE_POINT (0 << 4) -# define GA_POLY_MODE_FRONT_PTYPE_LINE (1 << 4) -# define GA_POLY_MODE_FRONT_PTYPE_TRI (2 << 4) +# define R300_GA_POLY_MODE_FRONT_PTYPE_POINT (0 << 4) +# define R300_GA_POLY_MODE_FRONT_PTYPE_LINE (1 << 4) +# define R300_GA_POLY_MODE_FRONT_PTYPE_TRI (2 << 4) /* reserved */ -# define GA_POLY_MODE_BACK_PTYPE_POINT (0 << 7) -# define GA_POLY_MODE_BACK_PTYPE_LINE (1 << 7) -# define GA_POLY_MODE_BACK_PTYPE_TRI (2 << 7) +# define R300_GA_POLY_MODE_BACK_PTYPE_POINT (0 << 7) +# define R300_GA_POLY_MODE_BACK_PTYPE_LINE (1 << 7) +# define R300_GA_POLY_MODE_BACK_PTYPE_TRI (2 << 7) /* reserved */ /* Specifies the rouding mode for geometry & color SPFP to FP conversions. */ -#define GA_ROUND_MODE 0x428c -# define GA_ROUND_MODE_GEOMETRY_ROUND_TRUNC (0 << 0) -# define GA_ROUND_MODE_GEOMETRY_ROUND_NEAREST (1 << 0) -# define GA_ROUND_MODE_COLOR_ROUND_TRUNC (0 << 2) -# define GA_ROUND_MODE_COLOR_ROUND_NEAREST (1 << 2) -# define GA_ROUND_MODE_RGB_CLAMP_RGB (0 << 4) -# define GA_ROUND_MODE_RGB_CLAMP_FP20 (1 << 4) -# define GA_ROUND_MODE_ALPHA_CLAMP_RGB (0 << 5) -# define GA_ROUND_MODE_ALPHA_CLAMP_FP20 (1 << 5) -# define GA_ROUND_MODE_GEOMETRY_MASK_SHIFT 6 -# define GA_ROUND_MODE_GEOMETRY_MASK_MASK 0x000003c0 +#define R300_GA_ROUND_MODE 0x428c +# define R300_GA_ROUND_MODE_GEOMETRY_ROUND_TRUNC (0 << 0) +# define R300_GA_ROUND_MODE_GEOMETRY_ROUND_NEAREST (1 << 0) +# define R300_GA_ROUND_MODE_COLOR_ROUND_TRUNC (0 << 2) +# define R300_GA_ROUND_MODE_COLOR_ROUND_NEAREST (1 << 2) +# define R300_GA_ROUND_MODE_RGB_CLAMP_RGB (0 << 4) +# define R300_GA_ROUND_MODE_RGB_CLAMP_FP20 (1 << 4) +# define R300_GA_ROUND_MODE_ALPHA_CLAMP_RGB (0 << 5) +# define R300_GA_ROUND_MODE_ALPHA_CLAMP_FP20 (1 << 5) +# define R500_GA_ROUND_MODE_GEOMETRY_MASK_SHIFT 6 +# define R500_GA_ROUND_MODE_GEOMETRY_MASK_MASK 0x000003c0 /* Specifies x & y offsets for vertex data after conversion to FP. * Offsets are in S15 format (subpixels -- 1/12 or 1/16, even in 8b * subprecision). */ -#define GA_OFFSET 0x4290 -# define GA_OFFSET_X_OFFSET_SHIFT 0 -# define GA_OFFSET_X_OFFSET_MASK 0x0000ffff -# define GA_OFFSET_Y_OFFSET_SHIFT 16 -# define GA_OFFSET_Y_OFFSET_MASK 0xffff0000 +#define R300_GA_OFFSET 0x4290 +# define R300_GA_OFFSET_X_OFFSET_SHIFT 0 +# define R300_GA_OFFSET_X_OFFSET_MASK 0x0000ffff +# define R300_GA_OFFSET_Y_OFFSET_SHIFT 16 +# define R300_GA_OFFSET_Y_OFFSET_MASK 0xffff0000 /* Specifies the scale to apply to fog. */ #define R300_RE_FOG_SCALE 0x4294 diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 178e4a7c00..82fa6579a5 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -573,7 +573,7 @@ static void r300SetStencilState(GLcontext * ctx, GLboolean state) static void r300UpdatePolygonMode(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - uint32_t hw_mode = GA_POLY_MODE_DISABLE; + uint32_t hw_mode = R300_GA_POLY_MODE_DISABLE; /* Only do something if a polygon mode is wanted, default is GL_FILL */ if (ctx->Polygon.FrontMode != GL_FILL || @@ -592,29 +592,29 @@ static void r300UpdatePolygonMode(GLcontext * ctx) } /* Enable polygon mode */ - hw_mode |= GA_POLY_MODE_DUAL; + hw_mode |= R300_GA_POLY_MODE_DUAL; switch (f) { case GL_LINE: - hw_mode |= GA_POLY_MODE_FRONT_PTYPE_LINE; + hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_LINE; break; case GL_POINT: - hw_mode |= GA_POLY_MODE_FRONT_PTYPE_POINT; + hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_POINT; break; case GL_FILL: - hw_mode |= GA_POLY_MODE_FRONT_PTYPE_TRI; + hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_TRI; break; } switch (b) { case GL_LINE: - hw_mode |= GA_POLY_MODE_BACK_PTYPE_LINE; + hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_LINE; break; case GL_POINT: - hw_mode |= GA_POLY_MODE_BACK_PTYPE_POINT; + hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_POINT; break; case GL_FILL: - hw_mode |= GA_POLY_MODE_BACK_PTYPE_TRI; + hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_TRI; break; } } -- cgit v1.2.3 From 375656440bd03d229701a1c97ef3f2ac61ba6712 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 15 May 2008 11:46:23 +1000 Subject: r500: we just need to emit a colour for clear drop tex instruction --- src/mesa/drivers/dri/r300/r300_ioctl.c | 35 +--------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index bf12c2cfbf..9c46cc795f 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -358,40 +358,7 @@ static void r300EmitClearState(GLcontext * ctx) e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); } else { R300_STATECHANGE(r300, r500fp); - r500fp_start_fragment(0, 12); - - e32((R500_INST_TYPE_TEX | - R500_INST_TEX_SEM_WAIT | - R500_INST_RGB_WMASK_R | - R500_INST_RGB_WMASK_G | - R500_INST_RGB_WMASK_B | - R500_INST_ALPHA_WMASK | - R500_INST_RGB_CLAMP | - R500_INST_ALPHA_CLAMP)); - e32(R500_TEX_ID(0) | - R500_TEX_INST_LD | - R500_TEX_SEM_ACQUIRE | - R500_TEX_IGNORE_UNCOVERED); - e32(R500_TEX_SRC_ADDR(0) | - R500_TEX_SRC_S_SWIZ_R | - R500_TEX_SRC_T_SWIZ_G | - R500_TEX_DST_ADDR(0) | - R500_TEX_DST_R_SWIZ_R | - R500_TEX_DST_G_SWIZ_G | - R500_TEX_DST_B_SWIZ_B | - R500_TEX_DST_A_SWIZ_A); - e32(R500_DX_ADDR(0) | - R500_DX_S_SWIZ_R | - R500_DX_T_SWIZ_R | - R500_DX_R_SWIZ_R | - R500_DX_Q_SWIZ_R | - R500_DY_ADDR(0) | - R500_DY_S_SWIZ_R | - R500_DY_T_SWIZ_R | - R500_DY_R_SWIZ_R | - R500_DY_Q_SWIZ_R); - e32(0x0); - e32(0x0); + r500fp_start_fragment(0, 6); e32(R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT | -- cgit v1.2.3 From 10e0a36a496a7032b15728343cf8ee2ca2df5cb3 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 15 May 2008 13:21:50 +1000 Subject: r500: some trivial fixups to get tri working. the counter was being used one instruction over the end --- src/mesa/drivers/dri/r300/r300_ioctl.c | 18 +++++++++++------- src/mesa/drivers/dri/r300/r500_fragprog.c | 6 +++--- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 9c46cc795f..b6d600d40a 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -305,7 +305,7 @@ static void r300EmitClearState(GLcontext * ctx) R300_STATECHANGE(r300, rr); reg_start(R300_RS_INST_0, 0); - e32(R500_RS_INST_COL_CN_WRITE); + e32(R300_RS_INST_COL_CN_WRITE); } else { R300_STATECHANGE(r300, ri); @@ -357,6 +357,10 @@ static void r300EmitClearState(GLcontext * ctx) reg_start(R300_US_ALU_ALPHA_ADDR_0, 0); e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); } else { + R300_STATECHANGE(r300, fp); + reg_start(R500_US_PIXSIZE, 0); + e32(0x2); + R300_STATECHANGE(r300, r500fp); r500fp_start_fragment(0, 6); @@ -387,15 +391,15 @@ static void r300EmitClearState(GLcontext * ctx) R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B | R500_ALU_RGB_SEL_B_SRC0 | - R500_ALU_RGB_R_SWIZ_B_1 | - R500_ALU_RGB_B_SWIZ_B_1 | - R500_ALU_RGB_G_SWIZ_B_1); + R500_ALU_RGB_R_SWIZ_B_R | + R500_ALU_RGB_B_SWIZ_B_G | + R500_ALU_RGB_G_SWIZ_B_B); - e32(R500_ALPHA_OP_MAD | + e32(R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | - R500_ALPHA_SWIZ_B_1); + R500_ALPHA_SWIZ_B_A); - e32(R500_ALU_RGBA_OP_MAD | + e32(R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 | R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 | diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index f9ef582d0a..2ea77bcfad 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -601,12 +601,11 @@ static GLboolean parse_program(struct r500_fragment_program *fp) } /* Finish him! (If it's an ALU/OUT instruction...) */ - if ((fp->inst[counter].inst0 & 0x3) ^ 0x2) { - fp->inst[counter].inst0 |= R500_INST_TYPE_OUT + if ((fp->inst[counter-1].inst0 & 0x3) <= 1) { + fp->inst[counter-1].inst0 |= R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT | R500_INST_LAST; } else { /* We still need to put an output inst, right? */ - counter++; fp->inst[counter].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT | R500_INST_LAST | R500_INST_RGB_OMASK_R | R500_INST_RGB_OMASK_G @@ -625,6 +624,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ADDRD(0) | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + counter++; } fp->cs->nrslots = counter; -- cgit v1.2.3 From e1bffd03188479f85f09eba2b478d36d77a70264 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 15 May 2008 13:24:01 +1000 Subject: r500: add cmp support in theory --- src/mesa/drivers/dri/r300/r500_fragprog.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 2ea77bcfad..eee9eb8d71 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -418,6 +418,30 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ALPHA_SEL_C_SRC1 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1])); break; + case OPCODE_CMP: + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + src[2] = make_src(fp, fpi->SrcReg[2]); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU + | mask; + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst4 = R500_ALPHA_OP_CMP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRC2 + | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2])) + | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2])); + break; case OPCODE_DP3: src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); -- cgit v1.2.3 From 350c80fa99c759d3080d3c3876e525ae130d1b04 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 15 May 2008 13:47:47 +1000 Subject: r300: set screen so that context init can find out chip ids --- src/mesa/drivers/dri/r300/radeon_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/radeon_context.c b/src/mesa/drivers/dri/r300/radeon_context.c index 9c0a5868b5..3fc724a553 100644 --- a/src/mesa/drivers/dri/r300/radeon_context.c +++ b/src/mesa/drivers/dri/r300/radeon_context.c @@ -135,6 +135,7 @@ GLboolean radeonInitContext(radeonContextPtr radeon, /* Fill in additional standard functions. */ radeonInitDriverFuncs(functions); + radeon->radeonScreen = screen; /* Allocate and initialize the Mesa context */ if (sharedContextPrivate) shareCtx = ((radeonContextPtr)sharedContextPrivate)->glCtx; @@ -158,7 +159,6 @@ GLboolean radeonInitContext(radeonContextPtr radeon, radeon->dri.fd = sPriv->fd; radeon->dri.drmMinor = sPriv->drm_version.minor; - radeon->radeonScreen = screen; radeon->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA + screen->sarea_priv_offset); -- cgit v1.2.3 From 412c850eaba82a06532fcb6300234a63e7ae037b Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 15 May 2008 13:48:31 +1000 Subject: r500: fixup the program allocations to be the correct sizes --- src/mesa/drivers/dri/r300/r300_shader.c | 41 +++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 10 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c index 77abf86a8e..5c8fd8a5e5 100644 --- a/src/mesa/drivers/dri/r300/r300_shader.c +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -9,8 +9,10 @@ static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, GLuint id) { + r300ContextPtr rmesa = R300_CONTEXT(ctx); struct r300_vertex_program_cont *vp; - struct r300_fragment_program *fp; + struct r300_fragment_program *r300_fp; + struct r500_fragment_program *r500_fp; switch (target) { case GL_VERTEX_STATE_PROGRAM_NV: @@ -19,14 +21,28 @@ static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id); case GL_FRAGMENT_PROGRAM_ARB: - fp = CALLOC_STRUCT(r300_fragment_program); - fp->ctx = ctx; - return _mesa_init_fragment_program(ctx, &fp->mesa_program, - target, id); + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + r500_fp = CALLOC_STRUCT(r500_fragment_program); + r500_fp->ctx = ctx; + return _mesa_init_fragment_program(ctx, &r500_fp->mesa_program, + target, id); + } else { + r300_fp = CALLOC_STRUCT(r300_fragment_program); + r300_fp->ctx = ctx; + return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program, + target, id); + } + case GL_FRAGMENT_PROGRAM_NV: - fp = CALLOC_STRUCT(r300_fragment_program); - return _mesa_init_fragment_program(ctx, &fp->mesa_program, - target, id); + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + r500_fp = CALLOC_STRUCT(r500_fragment_program); + return _mesa_init_fragment_program(ctx, &r500_fp->mesa_program, + target, id); + } else { + r300_fp = CALLOC_STRUCT(r300_fragment_program); + return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program, + target, id); + } default: _mesa_problem(ctx, "Bad target in r300NewProgram"); } @@ -42,15 +58,20 @@ static void r300DeleteProgram(GLcontext * ctx, struct gl_program *prog) static void r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) { + r300ContextPtr rmesa = R300_CONTEXT(ctx); struct r300_vertex_program_cont *vp = (void *)prog; - struct r300_fragment_program *fp = (struct r300_fragment_program *)prog; + struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)prog; + struct r500_fragment_program *r500_fp = (struct r500_fragment_program *)prog; switch (target) { case GL_VERTEX_PROGRAM_ARB: vp->progs = NULL; break; case GL_FRAGMENT_PROGRAM_ARB: - fp->translated = GL_FALSE; + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + r500_fp->translated = GL_FALSE; + else + r300_fp->translated = GL_FALSE; break; } -- cgit v1.2.3 From c9d5d11d2da77fada77dca1e239accd126fa3300 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 15 May 2008 14:51:50 +1000 Subject: r3/500: emit RS state before VAP --- src/mesa/drivers/dri/r300/r300_state.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 82fa6579a5..25d1627074 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2500,13 +2500,14 @@ void r300UpdateShaderStates(r300ContextPtr rmesa) r300SetupPixelShader(rmesa); r300SetupTextures(ctx); - if ((rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) - r300SetupVertexProgram(rmesa); - if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) r500SetupRSUnit(ctx); else r300SetupRSUnit(ctx); + + if ((rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + r300SetupVertexProgram(rmesa); + } /** -- cgit v1.2.3 From 73af48fff5502bc156160ce74896ffd156ee08ee Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 15 May 2008 14:52:39 +1000 Subject: r500: split output/pixel masks and emit in the correct places --- src/mesa/drivers/dri/r300/r500_fragprog.c | 39 ++++++++++++++++--------------- 1 file changed, 20 insertions(+), 19 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index eee9eb8d71..e97ce9fce2 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -371,7 +371,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) const struct prog_instruction *inst = mp->Base.Instructions; struct prog_instruction *fpi; GLuint src[3], dest, temp[2]; - int flags, mask, counter = 0; + int flags, pixel_mask, output_mask, counter = 0; if (!inst || inst[0].Opcode == OPCODE_END) { ERROR("The program is empty!\n"); @@ -382,13 +382,15 @@ static GLboolean parse_program(struct r500_fragment_program *fp) if (fpi->Opcode != OPCODE_KIL) { dest = make_dest(fp, fpi->DstReg); - mask = fpi->DstReg.WriteMask << 11; + + pixel_mask = fpi->DstReg.WriteMask << 11; + output_mask = fpi->DstReg.WriteMask << 14; } switch (fpi->Opcode) { case OPCODE_ABS: emit_mov(fp, counter, fpi->SrcReg[0], dest); - fp->inst[counter].inst0 |= mask; + fp->inst[counter].inst0 |= pixel_mask; fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS | R500_ALU_RGB_MOD_B_ABS; fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS @@ -399,7 +401,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) src[1] = make_src(fp, fpi->SrcReg[1]); /* Variation on MAD: 1*src0+src1 */ fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | mask; + | pixel_mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(0); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) @@ -423,7 +425,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) src[1] = make_src(fp, fpi->SrcReg[1]); src[2] = make_src(fp, fpi->SrcReg[2]); fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | mask; + | pixel_mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) @@ -446,7 +448,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | R500_INST_TEX_SEM_WAIT | mask; + | R500_INST_TEX_SEM_WAIT | pixel_mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) @@ -466,7 +468,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) src[1] = make_src(fp, fpi->SrcReg[1]); /* Based on DP3 */ fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | R500_INST_TEX_SEM_WAIT | mask; + | R500_INST_TEX_SEM_WAIT | pixel_mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) @@ -486,7 +488,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) src[1] = make_src(fp, fpi->SrcReg[1]); src[2] = make_src(fp, fpi->SrcReg[2]); fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | mask; + | pixel_mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) @@ -508,7 +510,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) case OPCODE_MAX: src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[0]); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | mask; + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | pixel_mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 @@ -525,7 +527,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) case OPCODE_MIN: src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[0]); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | mask; + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | pixel_mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 @@ -541,14 +543,16 @@ static GLboolean parse_program(struct r500_fragment_program *fp) break; case OPCODE_MOV: emit_mov(fp, counter, fpi->SrcReg[0], dest); - fp->inst[counter].inst0 |= mask; + fp->inst[counter].inst0 |= pixel_mask; + fprintf(stderr,"wm is %x\n", pixel_mask); + break; case OPCODE_MUL: src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); /* Variation on MAD: src0*src1+0 */ fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | R500_INST_TEX_SEM_WAIT | mask; + | R500_INST_TEX_SEM_WAIT | pixel_mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) @@ -572,7 +576,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) src[1] = make_src(fp, fpi->SrcReg[1]); /* Variation on MAD: 1*src0-src1 */ fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | mask; + | pixel_mask; fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0]) | R500_RGB_ADDR2(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0]) @@ -612,9 +616,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP; } if (fpi->DstReg.File == PROGRAM_OUTPUT) { - fp->inst[counter].inst0 |= R500_INST_TYPE_OUT - | R500_INST_RGB_OMASK_R | R500_INST_RGB_OMASK_G - | R500_INST_RGB_OMASK_B | R500_INST_ALPHA_OMASK; + fp->inst[counter].inst0 |= R500_INST_TYPE_OUT | output_mask; } counter++; @@ -631,9 +633,8 @@ static GLboolean parse_program(struct r500_fragment_program *fp) } else { /* We still need to put an output inst, right? */ fp->inst[counter].inst0 = R500_INST_TYPE_OUT - | R500_INST_TEX_SEM_WAIT | R500_INST_LAST - | R500_INST_RGB_OMASK_R | R500_INST_RGB_OMASK_G - | R500_INST_RGB_OMASK_B | R500_INST_ALPHA_OMASK; + | R500_INST_TEX_SEM_WAIT | R500_INST_LAST | + output_mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(dest); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(dest); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 -- cgit v1.2.3 From a0bc6d2fb21e47c5e659bc113c0c47b2288a9898 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 15 May 2008 14:54:51 +1000 Subject: r500: remove some debugging --- src/mesa/drivers/dri/r300/r500_fragprog.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index e97ce9fce2..f65300a1bc 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -371,7 +371,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) const struct prog_instruction *inst = mp->Base.Instructions; struct prog_instruction *fpi; GLuint src[3], dest, temp[2]; - int flags, pixel_mask, output_mask, counter = 0; + int flags, pixel_mask = 0, output_mask = 0, counter = 0; if (!inst || inst[0].Opcode == OPCODE_END) { ERROR("The program is empty!\n"); @@ -544,8 +544,6 @@ static GLboolean parse_program(struct r500_fragment_program *fp) case OPCODE_MOV: emit_mov(fp, counter, fpi->SrcReg[0], dest); fp->inst[counter].inst0 |= pixel_mask; - fprintf(stderr,"wm is %x\n", pixel_mask); - break; case OPCODE_MUL: src[0] = make_src(fp, fpi->SrcReg[0]); -- cgit v1.2.3 From 76f32499d20ac9ee12a1b7aafbd2493749364a86 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 15 May 2008 15:05:40 +1000 Subject: r500: fixup r500 rs unit texture coordinate counting --- src/mesa/drivers/dri/r300/r300_state.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 25d1627074..006c2808c5 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1569,7 +1569,7 @@ static void r300SetupRSUnit(GLcontext * ctx) r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = interp_col[i] | rs_tex_count; switch(count) { - case 4:swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3); break; + case 4: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3); break; case 3: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(R300_RS_SEL_K1); break; default: case 1: @@ -1648,7 +1648,7 @@ static void r500SetupRSUnit(GLcontext * ctx) struct vertex_buffer *VB = &tnl->vb; GLuint InputsRead; int fp_reg, high_rr; - int rs_tex_count = 0, rs_col_count = 0; + int rs_col_count = 0; int in_texcoords, col_interp_nr; int i, count; @@ -1706,7 +1706,7 @@ static void r500SetupRSUnit(GLcontext * ctx) } for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - GLuint swiz; + GLuint swiz = 0; /* with TCL we always seem to route 4 components */ if (InputsRead & (FRAG_BIT_TEX0 << i)) { @@ -1717,21 +1717,23 @@ static void r500SetupRSUnit(GLcontext * ctx) count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; swiz = 0; + /* always have a least 2 tex coords */ + swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_S_SHIFT; + swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_T_SHIFT; + + if (count >= 3) + swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_R_SHIFT; + else + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; + if (count == 4) swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_Q_SHIFT; else swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; - if (count >= 3) - swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_R_SHIFT; - else - swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; - /* always have a least 2 tex coords */ - swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_T_SHIFT; - swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_S_SHIFT; } - r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = interp_col[i] | rs_tex_count | swiz; + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = interp_col[i] | swiz; r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0; if (InputsRead & (FRAG_BIT_TEX0 << i)) { @@ -1778,7 +1780,7 @@ static void r500SetupRSUnit(GLcontext * ctx) col_interp_nr++; } - r300->hw.rc.cmd[1] = 0 | (rs_tex_count << R300_IT_COUNT_SHIFT) + r300->hw.rc.cmd[1] = 0 | (in_texcoords << R300_IT_COUNT_SHIFT) | (col_interp_nr << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; @@ -2447,7 +2449,7 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) GLcontext *ctx = rmesa->radeon.glCtx; struct r500_fragment_program *fp = (struct r500_fragment_program *) (char *)ctx->FragmentProgram._Current; - int i, k; + int i; if (!fp) /* should only happenen once, just after context is created */ return; -- cgit v1.2.3 From 9aa62c723807f569c55a8e0df069cb2eadad77ae Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 15 May 2008 16:24:54 +1000 Subject: r500: shift tex src properly --- src/mesa/drivers/dri/r300/r500_fragprog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index f65300a1bc..7569783f1f 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -229,11 +229,11 @@ static void emit_tex(struct r500_fragment_program *fp, fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask | R500_INST_TEX_SEM_WAIT; - fp->inst[counter].inst1 = fpi->TexSrcUnit + fp->inst[counter].inst1 = R500_TEX_ID(fpi->TexSrcUnit) | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) - fp->inst[counter].inst1 |= R500_TEX_UNSCALED; + fp->inst[counter].inst1 |= R500_TEX_UNSCALED; switch (opcode) { case OPCODE_TEX: -- cgit v1.2.3 From d6333af7e9b01d0e878ddbb92b5f972c67f5350f Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 15 May 2008 20:38:41 +1000 Subject: r500: default rsunit swizzle like fglrx --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 8 ++++++++ src/mesa/drivers/dri/r300/r300_state.c | 9 ++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 8596b46640..7ddb1a946e 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -297,6 +297,7 @@ void r300InitCmdBuf(r300ContextPtr r300) int size, mtu; int has_tcl = 1; int is_r500 = 0; + int i; if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) has_tcl = 0; @@ -396,6 +397,13 @@ void r300InitCmdBuf(r300ContextPtr r300) if (is_r500) { ALLOC_STATE(ri, always, R500_RI_CMDSIZE, 0); r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R500_RS_IP_0, 16); + for (i = 0; i < 8; i++) { + r300->hw.ri.cmd[R300_RI_CMD_0 + i +1] = + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT); + } ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, 1); } else { diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 006c2808c5..175c385915 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1716,7 +1716,6 @@ static void r500SetupRSUnit(GLcontext * ctx) else count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; - swiz = 0; /* always have a least 2 tex coords */ swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_S_SHIFT; swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_T_SHIFT; @@ -1731,8 +1730,12 @@ static void r500SetupRSUnit(GLcontext * ctx) else swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; - - } + } else + swiz = (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT); + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = interp_col[i] | swiz; r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0; -- cgit v1.2.3 From 791c95230cd399eaa5892d6e13d3ce08e6167e0c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sat, 17 May 2008 10:27:11 +1000 Subject: r500: write out the correct FP registers --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 10 +++++++++- src/mesa/drivers/dri/r300/r300_context.h | 14 ++++++++++++-- src/mesa/drivers/dri/r300/r300_ioctl.c | 11 ++++++++--- src/mesa/drivers/dri/r300/r300_reg.h | 2 ++ src/mesa/drivers/dri/r300/r300_state.c | 9 +++++++++ src/mesa/drivers/dri/r300/r500_fragprog.c | 3 +++ 6 files changed, 43 insertions(+), 6 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 7ddb1a946e..873f8344fa 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -321,6 +321,11 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(R300_SE_VPORT_XSCALE, 6); ALLOC_STATE(vap_cntl, always, 2, 0); r300->hw.vap_cntl.cmd[0] = cmdpacket0(R300_VAP_CNTL, 1); + if (is_r500) { + ALLOC_STATE(vap_index_offset, always, 2, 0); + r300->hw.vap_index_offset.cmd[0] = cmdpacket0(R500_VAP_INDEX_OFFSET, 1); + r300->hw.vap_index_offset.cmd[1] = 0; + } ALLOC_STATE(vte, always, 3, 0); r300->hw.vte.cmd[0] = cmdpacket0(R300_SE_VTE_CNTL, 2); ALLOC_STATE(vap_vf_max_vtx_indx, always, 3, 0); @@ -423,7 +428,10 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(fp, always, R500_FP_CMDSIZE, 0); r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(R500_US_CONFIG, 2); r300->hw.fp.cmd[R500_FP_CNTL] = R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO; - + r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(R500_US_CODE_ADDR, 3); + r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(R500_US_FC_CTRL, 1); + r300->hw.fp.cmd[R500_FP_FC_CNTL] = 0; /* FIXME when we add flow control */ + ALLOC_STATE(r500fp, r500fp, R300_FPI_CMDSIZE, 0); r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 0, 0); ALLOC_STATE(r500fp_const, r500fp_const, R300_FPP_CMDSIZE, 0); diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 815a729969..06b81e6e63 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -357,7 +357,13 @@ struct r300_state_atom { #define R500_FP_CMD_0 0 #define R500_FP_CNTL 1 #define R500_FP_PIXSIZE 2 -#define R500_FP_CMDSIZE 3 +#define R500_FP_CMD_1 3 +#define R500_FP_CODE_ADDR 4 +#define R500_FP_CODE_RANGE 5 +#define R500_FP_CODE_OFFSET 6 +#define R500_FP_CMD_2 7 +#define R500_FP_FC_CNTL 8 +#define R500_FP_CMDSIZE 9 #define R300_FPT_CMD_0 0 #define R300_FPT_INSTR_0 1 @@ -458,6 +464,7 @@ struct r300_hw_state { struct r300_state_atom vpt; /* viewport (1D98) */ struct r300_state_atom vap_cntl; + struct r300_state_atom vap_index_offset; /* 0x208c r5xx only */ struct r300_state_atom vof; /* VAP output format register 0x2090 */ struct r300_state_atom vte; /* (20B0) */ struct r300_state_atom vap_vf_max_vtx_indx; /* Maximum Vertex Indx Clamp (2134) */ @@ -480,7 +487,7 @@ struct r300_hw_state { struct r300_state_atom shade; struct r300_state_atom polygon_mode; struct r300_state_atom fogp; /* fog parameters (4294) */ - struct r300_state_atom unk429C; /* (429C) */ + struct r300_state_atom ga_soft_reset; /* (429C) */ struct r300_state_atom zbias_cntl; struct r300_state_atom zbs; /* zbias (42A4) */ struct r300_state_atom occlusion_cntl; @@ -809,6 +816,9 @@ struct r500_fragment_program { int temp_reg_offset; + int inst_offset; + int inst_end; + /* Hardware constants. * Contains a pointer to the value. The destination of the pointer * is supposed to be updated when GL state changes. diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index b6d600d40a..9aa1120966 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -357,9 +357,14 @@ static void r300EmitClearState(GLcontext * ctx) reg_start(R300_US_ALU_ALPHA_ADDR_0, 0); e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); } else { - R300_STATECHANGE(r300, fp); - reg_start(R500_US_PIXSIZE, 0); - e32(0x2); + R300_STATECHANGE(r300, fp); + reg_start(R500_US_CONFIG, 1); + e32(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); + e32(0x0); + reg_start(R500_US_CODE_ADDR, 2); + e32(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1)); + e32(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1)); + e32(R500_US_CODE_OFFSET_ADDR(0)); R300_STATECHANGE(r300, r500fp); r500fp_start_fragment(0, 6); diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index b23f587c50..15bc574c13 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -113,6 +113,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* number of vertices */ # define R300_VAP_VF_CNTL__NUM_VERTICES__SHIFT 16 +#define R500_VAP_INDEX_OFFSET 0x208c + #define R300_VAP_OUTPUT_VTX_FMT_0 0x2090 # define R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT (1<<0) # define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT (1<<1) diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 175c385915..063f39ab70 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2467,6 +2467,15 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) R300_STATECHANGE(rmesa, fp); rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = fp->max_temp_idx; + rmesa->hw.fp.cmd[R500_FP_CODE_ADDR] = + R500_US_CODE_START_ADDR(fp->inst_offset) | + R500_US_CODE_END_ADDR(fp->inst_end); + rmesa->hw.fp.cmd[R500_FP_CODE_RANGE] = + R500_US_CODE_RANGE_ADDR(fp->inst_offset) | + R500_US_CODE_RANGE_SIZE(fp->inst_end); + rmesa->hw.fp.cmd[R500_FP_CODE_OFFSET] = + R500_US_CODE_OFFSET_ADDR(0); /* FIXME when we add flow control */ + R300_STATECHANGE(rmesa, r500fp); /* Emit our shader... */ for (i = 0; i < fp->cs->nrslots; i++) { diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 7569783f1f..4020edd6c8 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -798,6 +798,9 @@ void r500TranslateFragmentShader(r300ContextPtr r300, init_program(r300, fp); cs = fp->cs; + fp->inst_offset = 0; + fp->inst_end = cs->nrslots - 1; + if (parse_program(fp) == GL_FALSE) { ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n"); dumb_shader(fp); -- cgit v1.2.3 From ba50c3fed38283968f622d7f47437f56033f09b7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sat, 17 May 2008 10:40:47 +1000 Subject: r300: SC register naming cleanup --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 12 +++---- src/mesa/drivers/dri/r300/r300_reg.h | 56 ++++++++++++++++++--------------- 2 files changed, 37 insertions(+), 31 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 873f8344fa..995bfee42a 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -347,7 +347,7 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(vap_clip_cntl, always, 2, 0); r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(R300_VAP_CLIP_CNTL, 1); ALLOC_STATE(vap_clip, always, 5, 0); - r300->hw.vap_clip.cmd[0] = cmdpacket0(R300_VAP_CLIP_X_0, 4); + r300->hw.vap_clip.cmd[0] = cmdpacket0(R300_VAP_GB_VERT_CLIP_ADJ, 4); ALLOC_STATE(vap_pvs_vtx_timeout_reg, always, 2, 0); r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(VAP_PVS_VTX_TIMEOUT_REG, 1); } @@ -385,16 +385,16 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE(polygon_mode, always, 4, 0); r300->hw.polygon_mode.cmd[0] = cmdpacket0(R300_GA_POLY_MODE, 3); ALLOC_STATE(fogp, always, 3, 0); - r300->hw.fogp.cmd[0] = cmdpacket0(R300_RE_FOG_SCALE, 2); + r300->hw.fogp.cmd[0] = cmdpacket0(R300_GA_FOG_SCALE, 2); ALLOC_STATE(zbias_cntl, always, 2, 0); - r300->hw.zbias_cntl.cmd[0] = cmdpacket0(R300_RE_ZBIAS_CNTL, 1); + r300->hw.zbias_cntl.cmd[0] = cmdpacket0(R300_SU_TEX_WRAP, 1); ALLOC_STATE(zbs, always, R300_ZBS_CMDSIZE, 0); r300->hw.zbs.cmd[R300_ZBS_CMD_0] = - cmdpacket0(R300_RE_ZBIAS_T_FACTOR, 4); + cmdpacket0(R300_SU_POLY_OFFSET_FRONT_SCALE, 4); ALLOC_STATE(occlusion_cntl, always, 2, 0); - r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(R300_RE_OCCLUSION_CNTL, 1); + r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(R300_SU_POLY_OFFSET_ENABLE, 1); ALLOC_STATE(cul, always, R300_CUL_CMDSIZE, 0); - r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(R300_RE_CULL_CNTL, 1); + r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(R300_SU_CULL_MODE, 1); ALLOC_STATE(su_depth_scale, always, 3, 0); r300->hw.su_depth_scale.cmd[0] = cmdpacket0(R300_SU_DEPTH_SCALE, 2); ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0); diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 15bc574c13..b742fabbb5 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -434,10 +434,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * * These registers are called X_QUAD0_1_FL to X_QUAD0_4_FL by glxtest. */ -#define R300_VAP_CLIP_X_0 0x2220 -#define R300_VAP_CLIP_X_1 0x2224 -#define R300_VAP_CLIP_Y_0 0x2228 -#define R300_VAP_CLIP_Y_1 0x222c +#define R300_VAP_GB_VERT_CLIP_ADJ 0x2220 +#define R300_VAP_GB_VERT_DISC_ADJ 0x2224 +#define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228 +#define R300_VAP_GB_HORZ_DISC_ADJ 0x222c /* gap */ @@ -486,6 +486,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_PVS_MAX_CONST_ADDR_SHIFT 16 #define R300_VAP_PVS_CODE_CNTL_1 0x22D8 # define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0 +#define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC /* The entire range from 0x2300 to 0x2AC inclusive seems to be used for * immediate vertices @@ -1085,9 +1086,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_GA_OFFSET_Y_OFFSET_MASK 0xffff0000 /* Specifies the scale to apply to fog. */ -#define R300_RE_FOG_SCALE 0x4294 +#define R300_GA_FOG_SCALE 0x4294 /* Specifies the offset to apply to fog. */ -#define R300_RE_FOG_START 0x4298 +#define R300_GA_FOG_OFFSET 0x4298 +/* Specifies number of cycles to assert reset, and also causes RB3D soft reset to assert. */ +#define R300_GA_SOFT_RESET 0x429c /* Not sure why there are duplicate of factor and constant values. * My best guess so far is that there are seperate zbiases for test and write. @@ -1095,11 +1098,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Some of the tests indicate that fgl has a fallback implementation of zbias * via pixel shaders. */ -#define R300_RE_ZBIAS_CNTL 0x42A0 /* GUESS */ -#define R300_RE_ZBIAS_T_FACTOR 0x42A4 -#define R300_RE_ZBIAS_T_CONSTANT 0x42A8 -#define R300_RE_ZBIAS_W_FACTOR 0x42AC -#define R300_RE_ZBIAS_W_CONSTANT 0x42B0 +#define R300_SU_TEX_WRAP 0x42A0 +#define R300_SU_POLY_OFFSET_FRONT_SCALE 0x42A4 +#define R300_SU_POLY_OFFSET_FRONT_OFFSET 0x42A8 +#define R300_SU_POLY_OFFSET_BACK_SCALE 0x42AC +#define R300_SU_POLY_OFFSET_BACK_OFFSET 0x42B0 /* This register needs to be set to (1<<1) for RV350 to correctly * perform depth test (see --vb-triangles in r300_demo) @@ -1110,10 +1113,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * One to enable depth test and one for depth write. * Yet this doesnt explain why depth writes work ... */ -#define R300_RE_OCCLUSION_CNTL 0x42B4 -# define R300_OCCLUSION_ON (1<<1) +#define R300_SU_POLY_OFFSET_ENABLE 0x42B4 +# define R300_FRONT_ENABLE (1 << 0) +# define R300_BACK_ENABLE (1 << 1) +# define R300_PARA_ENABLE (1 << 2) -#define R300_RE_CULL_CNTL 0x42B8 +#define R300_SU_CULL_MODE 0x42B8 # define R300_CULL_FRONT (1 << 0) # define R300_CULL_BACK (1 << 1) # define R300_FRONT_FACE_CCW (0 << 2) @@ -1262,6 +1267,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_SC_HYPERZ_HZ_Z0MAX_NO (0 << 6) # define R300_SC_HYPERZ_HZ_Z0MAX (1 << 6) +#define R300_SC_EDGERULE 0x43a8 /* BEGIN: Scissors and cliprects */ @@ -1279,21 +1285,21 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * For some reason, the top-left corner of the framebuffer is at (1440, 1440) * for the purpose of clipping and scissors. */ -#define R300_RE_CLIPRECT_TL_0 0x43B0 -#define R300_RE_CLIPRECT_BR_0 0x43B4 -#define R300_RE_CLIPRECT_TL_1 0x43B8 -#define R300_RE_CLIPRECT_BR_1 0x43BC -#define R300_RE_CLIPRECT_TL_2 0x43C0 -#define R300_RE_CLIPRECT_BR_2 0x43C4 -#define R300_RE_CLIPRECT_TL_3 0x43C8 -#define R300_RE_CLIPRECT_BR_3 0x43CC +#define R300_SC_CLIPRECT_TL_0 0x43B0 +#define R300_SC_CLIPRECT_BR_0 0x43B4 +#define R300_SC_CLIPRECT_TL_1 0x43B8 +#define R300_SC_CLIPRECT_BR_1 0x43BC +#define R300_SC_CLIPRECT_TL_2 0x43C0 +#define R300_SC_CLIPRECT_BR_2 0x43C4 +#define R300_SC_CLIPRECT_TL_3 0x43C8 +#define R300_SC_CLIPRECT_BR_3 0x43CC # define R300_CLIPRECT_OFFSET 1440 # define R300_CLIPRECT_MASK 0x1FFF # define R300_CLIPRECT_X_SHIFT 0 # define R300_CLIPRECT_X_MASK (0x1FFF << 0) # define R300_CLIPRECT_Y_SHIFT 13 # define R300_CLIPRECT_Y_MASK (0x1FFF << 13) -#define R300_RE_CLIPRECT_CNTL 0x43D0 +#define R300_SC_CLIP_RULE 0x43D0 # define R300_CLIP_OUT (1 << 0) # define R300_CLIP_0 (1 << 1) # define R300_CLIP_1 (1 << 2) @@ -1313,8 +1319,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* gap */ -#define R300_RE_SCISSORS_TL 0x43E0 -#define R300_RE_SCISSORS_BR 0x43E4 +#define R300_SC_SCISSORS_TL 0x43E0 +#define R300_SC_SCISSORS_BR 0x43E4 # define R300_SCISSORS_OFFSET 1440 # define R300_SCISSORS_X_SHIFT 0 # define R300_SCISSORS_X_MASK (0x1FFF << 0) -- cgit v1.2.3 From 5e075fb80968744c72dfaba062e0b591ac69fad0 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 17 May 2008 13:31:14 +1000 Subject: r500: set fragprog end to correct place --- src/mesa/drivers/dri/r300/r500_fragprog.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 4020edd6c8..a986947e9d 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -798,14 +798,15 @@ void r500TranslateFragmentShader(r300ContextPtr r300, init_program(r300, fp); cs = fp->cs; - fp->inst_offset = 0; - fp->inst_end = cs->nrslots - 1; - if (parse_program(fp) == GL_FALSE) { ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n"); dumb_shader(fp); + fp->inst_offset = 0; + fp->inst_end = cs->nrslots - 1; return; } + fp->inst_offset = 0; + fp->inst_end = cs->nrslots - 1; fp->translated = GL_TRUE; r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM); -- cgit v1.2.3 From d8529d9b008ede05165317b8ebb834525fd9835c Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Thu, 15 May 2008 00:11:10 -0700 Subject: r5xx: Unbreak MAX and MIN. Both of them had faulty copypasta. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index a986947e9d..2f63f3010e 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -509,7 +509,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) break; case OPCODE_MAX: src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); fp->inst[counter].inst0 = R500_INST_TYPE_ALU | pixel_mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); @@ -526,7 +526,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) break; case OPCODE_MIN: src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); fp->inst[counter].inst0 = R500_INST_TYPE_ALU | pixel_mask; fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); -- cgit v1.2.3 From 0de02f1716edc16257339af78f198072da87539f Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Thu, 15 May 2008 00:18:08 -0700 Subject: r5xx: First swing at OPCODE_COS. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 2f63f3010e..0aa6dc474a 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -444,6 +444,20 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2])); break; + case OPCODE_COS: + src[0] = make_src(fp, fpi->SrcReg[0]); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU + | R500_INST_TEX_SEM_WAIT | pixel_mask; + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst4 = R500_ALPHA_OP_COS + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP + | R500_ALU_RGBA_ADDRD(dest); + break; case OPCODE_DP3: src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); -- cgit v1.2.3 From 405ee871c54d78e63cef1a570578a755250102c8 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Thu, 15 May 2008 00:49:32 -0700 Subject: r5xx: Adding more opcodes. EX2, FRC, LG2, SIN, RCP, and RSQ, if you care. All of these except FRC are like COS. This pretty much rounds out the set of opcodes which can be done in one ALU inst. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 82 +++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 0aa6dc474a..550cc0dbdd 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -497,6 +497,47 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4 | R500_ALU_RGBA_ADDRD(dest); break; + case OPCODE_EX2: + src[0] = make_src(fp, fpi->SrcReg[0]); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU + | R500_INST_TEX_SEM_WAIT | pixel_mask; + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst4 = R500_ALPHA_OP_EX2 + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP + | R500_ALU_RGBA_ADDRD(dest); + break; + case OPCODE_FRC: + src[0] = make_src(fp, fpi->SrcReg[0]); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU + | R500_INST_TEX_SEM_WAIT | pixel_mask; + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst4 = R500_ALPHA_OP_FRC + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC + | R500_ALU_RGBA_ADDRD(dest); + case OPCODE_LG2: + src[0] = make_src(fp, fpi->SrcReg[0]); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU + | R500_INST_TEX_SEM_WAIT | pixel_mask; + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst4 = R500_ALPHA_OP_LN2 + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP + | R500_ALU_RGBA_ADDRD(dest); + break; case OPCODE_MAD: src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); @@ -583,6 +624,47 @@ static GLboolean parse_program(struct r500_fragment_program *fp) // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); break; + case OPCODE_RCP: + src[0] = make_src(fp, fpi->SrcReg[0]); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU + | R500_INST_TEX_SEM_WAIT | pixel_mask; + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst4 = R500_ALPHA_OP_RCP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP + | R500_ALU_RGBA_ADDRD(dest); + break; + case OPCODE_RSQ: + src[0] = make_src(fp, fpi->SrcReg[0]); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU + | R500_INST_TEX_SEM_WAIT | pixel_mask; + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst4 = R500_ALPHA_OP_RSQ + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP + | R500_ALU_RGBA_ADDRD(dest); + break; + case OPCODE_SIN: + src[0] = make_src(fp, fpi->SrcReg[0]); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU + | R500_INST_TEX_SEM_WAIT | pixel_mask; + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst4 = R500_ALPHA_OP_SIN + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP + | R500_ALU_RGBA_ADDRD(dest); case OPCODE_SUB: src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); -- cgit v1.2.3 From d5aa42166152c4817d4fb06f183552efc135304b Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Thu, 15 May 2008 10:29:38 -0700 Subject: r5xx: Add OPCODE_SCS. It's disabled, though, because it doesn't work. I'll figure it out later... --- src/mesa/drivers/dri/r300/r500_fragprog.c | 52 ++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 550cc0dbdd..f6a4f2c25d 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -371,7 +371,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) const struct prog_instruction *inst = mp->Base.Instructions; struct prog_instruction *fpi; GLuint src[3], dest, temp[2]; - int flags, pixel_mask = 0, output_mask = 0, counter = 0; + int flags, pixel_mask = 0, output_mask = 0, counter = 0, temp_pixel_mask = 0; if (!inst || inst[0].Opcode == OPCODE_END) { ERROR("The program is empty!\n"); @@ -652,6 +652,56 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP | R500_ALU_RGBA_ADDRD(dest); break; +#if 0 + case OPCODE_SCS: + /* Do a cosine, then a sine, masking out the channels we want to protect. */ + src[0] = make_src(fp, fpi->SrcReg[0]); + /* Cosine only goes in R (x) channel. */ + temp_pixel_mask = 0x1 << 11; + fp->inst[counter].inst0 = R500_INST_TYPE_ALU + | R500_INST_TEX_SEM_WAIT | temp_pixel_mask; + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst4 = R500_ALPHA_OP_COS + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP + | R500_ALU_RGBA_ADDRD(dest); + counter++; + /* Sine only goes in G (y) channel. */ + temp_pixel_mask = 0x2 << 11; + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | temp_pixel_mask; + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst4 = R500_ALPHA_OP_SIN + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP + | R500_ALU_RGBA_ADDRD(dest); + counter++; + /* Put 0 into B,A (z,w) channels. */ + temp_pixel_mask = 0xC << 11; + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | temp_pixel_mask; + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO) + | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); + fp->inst[counter].inst4 = R500_ALPHA_OP_CMP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZ_ZERO) + | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZ_ZERO); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP + | R500_ALU_RGBA_ADDRD(dest) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + break; +#endif case OPCODE_SIN: src[0] = make_src(fp, fpi->SrcReg[0]); fp->inst[counter].inst0 = R500_INST_TYPE_ALU -- cgit v1.2.3 From c11a33fe76123abb19cfc1da7d3701a44fca2f23 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Thu, 15 May 2008 10:51:52 -0700 Subject: r5xx: Add OPCODE_SWZ. It's so easy! --- src/mesa/drivers/dri/r300/r500_fragprog.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index f6a4f2c25d..f8334f4dc6 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -741,6 +741,10 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1])) | R500_ALU_RGBA_ALPHA_MOD_C_NEG; break; + case OPCODE_SWZ: + emit_mov(fp, counter, fpi->SrcReg[0], dest); + fp->inst[counter].inst0 |= pixel_mask; + break; case OPCODE_TEX: emit_tex(fp, fpi, OPCODE_TEX, dest, counter); break; -- cgit v1.2.3 From 16cc362f0ba9fb240f3d47f06e74ac215c4d6c27 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Fri, 16 May 2008 11:46:26 -0700 Subject: r5xx: Fix SCS. Output instructions need to be marked OUT so they can write to the fifo. Also, negation doesn't work with SWZ yet. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 32 ++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index f8334f4dc6..01f6010544 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -652,14 +652,18 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP | R500_ALU_RGBA_ADDRD(dest); break; -#if 0 case OPCODE_SCS: + /* TODO: Make this elegant! */ /* Do a cosine, then a sine, masking out the channels we want to protect. */ src[0] = make_src(fp, fpi->SrcReg[0]); /* Cosine only goes in R (x) channel. */ - temp_pixel_mask = 0x1 << 11; - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | R500_INST_TEX_SEM_WAIT | temp_pixel_mask; + if (fpi->DstReg.File == PROGRAM_OUTPUT) { + fp->inst[counter].inst0 = R500_INST_TYPE_OUT + | R500_INST_TEX_SEM_WAIT | 0x1 << 14; + } else { + fp->inst[counter].inst0 = R500_INST_TYPE_ALU + | R500_INST_TEX_SEM_WAIT | 0x1 << 11; + } fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 @@ -671,8 +675,11 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ADDRD(dest); counter++; /* Sine only goes in G (y) channel. */ - temp_pixel_mask = 0x2 << 11; - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | temp_pixel_mask; + if (fpi->DstReg.File == PROGRAM_OUTPUT) { + fp->inst[counter].inst0 = R500_INST_TYPE_OUT | 0x2 << 14; + } else { + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | 0x2 << 11; + } fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 @@ -684,8 +691,11 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ADDRD(dest); counter++; /* Put 0 into B,A (z,w) channels. */ - temp_pixel_mask = 0xC << 11; - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | temp_pixel_mask; + if (fpi->DstReg.File == PROGRAM_OUTPUT) { + fp->inst[counter].inst0 = R500_INST_TYPE_OUT | 0xC << 14; + } else { + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | 0xC << 11; + } fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 @@ -694,14 +704,13 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); fp->inst[counter].inst4 = R500_ALPHA_OP_CMP | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZ_ZERO) - | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZ_ZERO); + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO) + | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_ADDRD(dest) | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); break; -#endif case OPCODE_SIN: src[0] = make_src(fp, fpi->SrcReg[0]); fp->inst[counter].inst0 = R500_INST_TYPE_ALU @@ -742,6 +751,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ALPHA_MOD_C_NEG; break; case OPCODE_SWZ: + /* TODO: Negation masks! */ emit_mov(fp, counter, fpi->SrcReg[0], dest); fp->inst[counter].inst0 |= pixel_mask; break; -- cgit v1.2.3 From 6dd3c0ed962dd3c2d4db331d4c745b39b7dde8c3 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 17 May 2008 09:27:35 -0700 Subject: r5xx: Fix FRC. This makes tri-frc work. (Remind me again why I'm allowed near a compiler, lawl.) --- src/mesa/drivers/dri/r300/r500_fragprog.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 01f6010544..8abdc26733 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -524,6 +524,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC | R500_ALU_RGBA_ADDRD(dest); + break; case OPCODE_LG2: src[0] = make_src(fp, fpi->SrcReg[0]); fp->inst[counter].inst0 = R500_INST_TYPE_ALU @@ -724,6 +725,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP | R500_ALU_RGBA_ADDRD(dest); + break; case OPCODE_SUB: src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); -- cgit v1.2.3 From c57b3b1d2c4344603763c8d200f111a132d3899f Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 17 May 2008 12:45:46 -0700 Subject: r5xx: Added OPCODE_DPH. Like DP4, but with one swizzle change. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 8abdc26733..4f2ff514cc 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -497,6 +497,26 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4 | R500_ALU_RGBA_ADDRD(dest); break; + case OPCODE_DPH: + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + /* Based on DP3 */ + fp->inst[counter].inst0 = R500_INST_TYPE_ALU + | R500_INST_TEX_SEM_WAIT | pixel_mask; + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst4 = R500_ALPHA_OP_DP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4 + | R500_ALU_RGBA_ADDRD(dest); + break; case OPCODE_EX2: src[0] = make_src(fp, fpi->SrcReg[0]); fp->inst[counter].inst0 = R500_INST_TYPE_ALU -- cgit v1.2.3 From 0910d9d4d68a3757f8777974ead2e4e34f48433e Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 17 May 2008 13:38:35 -0700 Subject: r5xx: Add OPCODE_KIL. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 4f2ff514cc..3a055aeadb 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -236,6 +236,9 @@ static void emit_tex(struct r500_fragment_program *fp, fp->inst[counter].inst1 |= R500_TEX_UNSCALED; switch (opcode) { + case OPCODE_KIL: + fp->inst[counter].inst1 |= R500_TEX_INST_TEXKILL; + break; case OPCODE_TEX: fp->inst[counter].inst1 |= R500_TEX_INST_LD; break; @@ -545,6 +548,9 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC | R500_ALU_RGBA_ADDRD(dest); break; + case OPCODE_KIL: + emit_tex(fp, fpi, OPCODE_KIL, dest, counter); + break; case OPCODE_LG2: src[0] = make_src(fp, fpi->SrcReg[0]); fp->inst[counter].inst0 = R500_INST_TYPE_ALU -- cgit v1.2.3 From 126673261de0dc5d64b05e3f76ced6801c28fffb Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 18 May 2008 15:24:38 +1000 Subject: r500: you can have a single texcoord --- src/mesa/drivers/dri/r300/r300_state.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 063f39ab70..383a6fbae8 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1716,9 +1716,12 @@ static void r500SetupRSUnit(GLcontext * ctx) else count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; - /* always have a least 2 tex coords */ + /* always have on texcoord */ swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_S_SHIFT; - swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_T_SHIFT; + if (count >= 2) + swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_T_SHIFT; + else + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT; if (count >= 3) swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_R_SHIFT; -- cgit v1.2.3 From bdfd5d95c5543154f7af17f0c001dc2b7044c1c4 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 18 May 2008 17:58:29 +1000 Subject: r300: fixup US_OUT_FMT bits --- src/mesa/drivers/dri/r300/r300_state.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 383a6fbae8..74b95d107c 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2239,11 +2239,15 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.sc_screendoor.cmd[1] = 0x00FFFFFF; - r300->hw.us_out_fmt.cmd[1] = 0x00001B01; - r300->hw.us_out_fmt.cmd[2] = 0x00001B0F; - r300->hw.us_out_fmt.cmd[3] = 0x00001B0F; - r300->hw.us_out_fmt.cmd[4] = 0x00001B0F; - r300->hw.us_out_fmt.cmd[5] = 0x00000001; + r300->hw.us_out_fmt.cmd[1] = R500_OUT_FMT_C4_8 | + R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A; + r300->hw.us_out_fmt.cmd[2] = R500_OUT_FMT_UNUSED | + R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A; + r300->hw.us_out_fmt.cmd[3] = R500_OUT_FMT_UNUSED | + R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A; + r300->hw.us_out_fmt.cmd[4] = R500_OUT_FMT_UNUSED | + R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A; + r300->hw.us_out_fmt.cmd[5] = R300_W_FMT_W24; r300Enable(ctx, GL_FOG, ctx->Fog.Enabled); r300Fogfv(ctx, GL_FOG_MODE, NULL); -- cgit v1.2.3 From 2225b9bdb08228fc824e9011341e8c0916fe2e07 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 18 May 2008 22:38:28 -0700 Subject: r5xx: ALU/OUT fixups. Lots of small changes. Intentionally breaks some tex stuffs. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 99 ++++++++++++++----------------- 1 file changed, 43 insertions(+), 56 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 3a055aeadb..65cb62f67b 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -342,14 +342,24 @@ static void dumb_shader(struct r500_fragment_program *fp) fp->translated = GL_TRUE; } -/* static void emit_alu(struct r500_fragment_program *fp) { - * } */ +static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi) { + if (fpi->DstReg.Index == PROGRAM_OUTPUT) { + fp->inst[counter].inst0 = R500_INST_TYPE_OUT + /* output_mask */ + | (fpi->DstReg.WriteMask << 14); + } else { + fp->inst[counter].inst0 = R500_INST_TYPE_ALU + /* pixel_mask */ + | (fpi->DstReg.WriteMask << 11); + } + + fp->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT; +} static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_src_register src, GLuint dest) { /* The r3xx shader uses MAD to implement MOV. We are using CMP, since * it is technically more accurate and recommended by ATI/AMD. */ GLuint src_reg = make_src(fp, src); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT; fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 @@ -374,7 +384,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) const struct prog_instruction *inst = mp->Base.Instructions; struct prog_instruction *fpi; GLuint src[3], dest, temp[2]; - int flags, pixel_mask = 0, output_mask = 0, counter = 0, temp_pixel_mask = 0; + int flags, pixel_mask = 0, output_mask = 0, counter = 0; if (!inst || inst[0].Opcode == OPCODE_END) { ERROR("The program is empty!\n"); @@ -392,8 +402,8 @@ static GLboolean parse_program(struct r500_fragment_program *fp) switch (fpi->Opcode) { case OPCODE_ABS: + emit_alu(fp, counter, fpi); emit_mov(fp, counter, fpi->SrcReg[0], dest); - fp->inst[counter].inst0 |= pixel_mask; fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS | R500_ALU_RGB_MOD_B_ABS; fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS @@ -403,8 +413,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); /* Variation on MAD: 1*src0+src1 */ - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | pixel_mask; + emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(0); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) @@ -427,8 +436,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); src[2] = make_src(fp, fpi->SrcReg[2]); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | pixel_mask; + emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) @@ -449,8 +457,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) break; case OPCODE_COS: src[0] = make_src(fp, fpi->SrcReg[0]); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | R500_INST_TEX_SEM_WAIT | pixel_mask; + emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 @@ -464,8 +471,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) case OPCODE_DP3: src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | R500_INST_TEX_SEM_WAIT | pixel_mask; + emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) @@ -484,8 +490,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); /* Based on DP3 */ - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | R500_INST_TEX_SEM_WAIT | pixel_mask; + emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) @@ -504,8 +509,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); /* Based on DP3 */ - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | R500_INST_TEX_SEM_WAIT | pixel_mask; + emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) @@ -522,8 +526,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) break; case OPCODE_EX2: src[0] = make_src(fp, fpi->SrcReg[0]); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | R500_INST_TEX_SEM_WAIT | pixel_mask; + emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 @@ -536,8 +539,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) break; case OPCODE_FRC: src[0] = make_src(fp, fpi->SrcReg[0]); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | R500_INST_TEX_SEM_WAIT | pixel_mask; + emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 @@ -553,8 +555,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) break; case OPCODE_LG2: src[0] = make_src(fp, fpi->SrcReg[0]); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | R500_INST_TEX_SEM_WAIT | pixel_mask; + emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 @@ -569,8 +570,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); src[2] = make_src(fp, fpi->SrcReg[2]); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | pixel_mask; + emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) @@ -592,7 +592,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) case OPCODE_MAX: src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | pixel_mask; + emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 @@ -609,7 +609,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) case OPCODE_MIN: src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | pixel_mask; + emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 @@ -624,15 +624,14 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ADDRD(dest); break; case OPCODE_MOV: + emit_alu(fp, counter, fpi); emit_mov(fp, counter, fpi->SrcReg[0], dest); - fp->inst[counter].inst0 |= pixel_mask; break; case OPCODE_MUL: src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); /* Variation on MAD: src0*src1+0 */ - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | R500_INST_TEX_SEM_WAIT | pixel_mask; + emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) @@ -653,8 +652,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) break; case OPCODE_RCP: src[0] = make_src(fp, fpi->SrcReg[0]); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | R500_INST_TEX_SEM_WAIT | pixel_mask; + emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 @@ -667,8 +665,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) break; case OPCODE_RSQ: src[0] = make_src(fp, fpi->SrcReg[0]); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | R500_INST_TEX_SEM_WAIT | pixel_mask; + emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 @@ -684,6 +681,8 @@ static GLboolean parse_program(struct r500_fragment_program *fp) /* Do a cosine, then a sine, masking out the channels we want to protect. */ src[0] = make_src(fp, fpi->SrcReg[0]); /* Cosine only goes in R (x) channel. */ + fpi->DstReg.WriteMask = 0x1; + emit_alu(fp, counter, fpi); if (fpi->DstReg.File == PROGRAM_OUTPUT) { fp->inst[counter].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT | 0x1 << 14; @@ -702,11 +701,8 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ADDRD(dest); counter++; /* Sine only goes in G (y) channel. */ - if (fpi->DstReg.File == PROGRAM_OUTPUT) { - fp->inst[counter].inst0 = R500_INST_TYPE_OUT | 0x2 << 14; - } else { - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | 0x2 << 11; - } + fpi->DstReg.WriteMask = 0x2; + emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 @@ -718,11 +714,8 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ADDRD(dest); counter++; /* Put 0 into B,A (z,w) channels. */ - if (fpi->DstReg.File == PROGRAM_OUTPUT) { - fp->inst[counter].inst0 = R500_INST_TYPE_OUT | 0xC << 14; - } else { - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | 0xC << 11; - } + fpi->DstReg.WriteMask = 0xC; + emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 @@ -740,8 +733,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) break; case OPCODE_SIN: src[0] = make_src(fp, fpi->SrcReg[0]); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | R500_INST_TEX_SEM_WAIT | pixel_mask; + emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 @@ -756,8 +748,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); /* Variation on MAD: 1*src0-src1 */ - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | pixel_mask; + emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0]) | R500_RGB_ADDR2(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0]) @@ -780,8 +771,8 @@ static GLboolean parse_program(struct r500_fragment_program *fp) break; case OPCODE_SWZ: /* TODO: Negation masks! */ + emit_alu(fp, counter, fpi); emit_mov(fp, counter, fpi->SrcReg[0], dest); - fp->inst[counter].inst0 |= pixel_mask; break; case OPCODE_TEX: emit_tex(fp, fpi, OPCODE_TEX, dest, counter); @@ -801,9 +792,6 @@ static GLboolean parse_program(struct r500_fragment_program *fp) if (fpi->SaturateMode == SATURATE_ZERO_ONE) { fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP; } - if (fpi->DstReg.File == PROGRAM_OUTPUT) { - fp->inst[counter].inst0 |= R500_INST_TYPE_OUT | output_mask; - } counter++; @@ -813,9 +801,8 @@ static GLboolean parse_program(struct r500_fragment_program *fp) } /* Finish him! (If it's an ALU/OUT instruction...) */ - if ((fp->inst[counter-1].inst0 & 0x3) <= 1) { - fp->inst[counter-1].inst0 |= R500_INST_TYPE_OUT - | R500_INST_TEX_SEM_WAIT | R500_INST_LAST; + if ((fp->inst[counter-1].inst0 & 0x3) == 1) { + fp->inst[counter-1].inst0 |= R500_INST_LAST; } else { /* We still need to put an output inst, right? */ fp->inst[counter].inst0 = R500_INST_TYPE_OUT @@ -824,7 +811,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst1 = R500_RGB_ADDR0(dest); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(dest); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); fp->inst[counter].inst4 = R500_ALPHA_OP_MAD -- cgit v1.2.3 From a6c38f2f648f91f35594383666eec01abdc19632 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 18 May 2008 23:35:07 -0700 Subject: r5xx: Fix typo of epic proportions. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 65cb62f67b..07b000b488 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -343,7 +343,7 @@ static void dumb_shader(struct r500_fragment_program *fp) } static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi) { - if (fpi->DstReg.Index == PROGRAM_OUTPUT) { + if (fpi->DstReg.File == PROGRAM_OUTPUT) { fp->inst[counter].inst0 = R500_INST_TYPE_OUT /* output_mask */ | (fpi->DstReg.WriteMask << 14); -- cgit v1.2.3 From 2708d7f7005c6a65980f5eb0377a9fd7917bce51 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 18 May 2008 23:52:54 -0700 Subject: r5xx: Swap sources for CMP. Follows the same pattern as the op on r3xx/r4xx. Thanks airlied. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 07b000b488..ef8717e387 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -433,27 +433,29 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1])); break; case OPCODE_CMP: + /* This inst's selects need to be swapped as follows: + * 0 -> C ; 1 -> B ; 2 -> A */ src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); src[2] = make_src(fp, fpi->SrcReg[2]); emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[2]) + | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[0]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[2]) + | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[0]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[2])) | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); fp->inst[counter].inst4 = R500_ALPHA_OP_CMP | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[2])) | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_ADDRD(dest) | R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2])) + | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0])) | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2])); + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0])); break; case OPCODE_COS: src[0] = make_src(fp, fpi->SrcReg[0]); @@ -805,6 +807,8 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter-1].inst0 |= R500_INST_LAST; } else { /* We still need to put an output inst, right? */ + WARN_ONCE("Final FP instruction is not an OUT.\n"); +#if 0 fp->inst[counter].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT | R500_INST_LAST | output_mask; @@ -823,6 +827,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); counter++; +#endif } fp->cs->nrslots = counter; -- cgit v1.2.3 From c60bdcf8a80b7307add8e09aca2356591c86fbcd Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Mon, 19 May 2008 00:00:08 -0700 Subject: r5xx: Fix magic offsets for output fifo write masks. Well, this sure explains a lot. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index ef8717e387..4fc5a676a5 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -346,7 +346,7 @@ static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_ if (fpi->DstReg.File == PROGRAM_OUTPUT) { fp->inst[counter].inst0 = R500_INST_TYPE_OUT /* output_mask */ - | (fpi->DstReg.WriteMask << 14); + | (fpi->DstReg.WriteMask << 15); } else { fp->inst[counter].inst0 = R500_INST_TYPE_ALU /* pixel_mask */ @@ -397,7 +397,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) dest = make_dest(fp, fpi->DstReg); pixel_mask = fpi->DstReg.WriteMask << 11; - output_mask = fpi->DstReg.WriteMask << 14; + output_mask = fpi->DstReg.WriteMask << 15; } switch (fpi->Opcode) { -- cgit v1.2.3 From cddab021e392ed78b5375ef6924bc7d4dbdd01c0 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 19 May 2008 20:24:09 +1000 Subject: r500: add fragment program debug dumper --- src/mesa/drivers/dri/r300/r500_fragprog.c | 145 ++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 4fc5a676a5..252171c66b 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -93,6 +93,8 @@ #define MAKE_SWIZ_RGBA_C(x) (x << 14) #define MAKE_SWIZ_ALPHA_C(x) (x << 27) +static void dump_program(struct r500_fragment_program *fp); + static inline GLuint make_rgb_swizzle(struct prog_src_register src) { GLuint swiz = 0x0; GLuint temp; @@ -989,8 +991,151 @@ void r500TranslateFragmentShader(r300ContextPtr r300, fp->inst_end = cs->nrslots - 1; fp->translated = GL_TRUE; + if (RADEON_DEBUG & DEBUG_PIXEL) + dump_program(fp); + r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM); } update_params(fp); + +} + +static char *toswiz(int swiz_val) { + switch(swiz_val) { + case 0: return "R"; + case 1: return "G"; + case 2: return "B"; + case 3: return "A"; + case 4: return "0"; + case 5: return "1/2"; + case 6: return "1"; + case 7: return "U"; + } +} + +static char *toop(int op_val) +{ + char *str; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP3"; break; + case 2: str = "DP4"; break; + case 3: str = "D2A"; break; + case 4: str = "MIN"; break; + case 5: str = "MAX"; break; + case 6: str = "Reserved"; break; + case 7: str = "CND"; break; + case 8: str = "CMP"; break; + case 9: str = "FRC"; break; + case 10: str = "SOP"; break; + case 11: str = "MDH"; break; + case 12: str = "MDV"; break; + } + return str; +} + +static char *to_alpha_op(int op_val) +{ + char *str; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP"; break; + case 2: str = "MIN"; break; + case 3: str = "MAX"; break; + case 4: str = "Reserved"; break; + case 5: str = "CND"; break; + case 6: str = "CMP"; break; + case 7: str = "FRC"; break; + case 8: str = "EX2"; break; + case 9: str = "LN2"; break; + case 10: str = "RCP"; break; + case 11: str = "RSQ"; break; + case 12: str = "SIN"; break; + case 13: str = "COS"; break; + case 14: str = "MDH"; break; + case 15: str = "MDV"; break; + } + return str; +} + +static void dump_program(struct r500_fragment_program *fp) +{ + int pc = 0; + int n; + uint32_t inst; + uint32_t inst0; + char *str; + + for (n = 0; n < fp->inst_end+1; n++) { + inst0 = inst = fp->inst[n].inst0; + fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst); + switch(inst & 0x3) { + case R500_INST_TYPE_ALU: str = "ALU"; break; + case R500_INST_TYPE_OUT: str = "OUT"; break; + case R500_INST_TYPE_FC: str = "FC"; break; + case R500_INST_TYPE_TEX: str = "TEX"; break; + }; + fprintf(stderr,"%s %s %s %s %s ", str, + inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "", + inst & R500_INST_LAST ? "LAST" : "", + inst & R500_INST_NOP ? "NOP" : "", + inst & R500_INST_ALU_WAIT ? "ALU WAIT" : ""); + fprintf(stderr,"%x %x\n", (inst >> 11) & 0xf, (inst >> 15) & 0xf); + + switch(inst0 & 0x3) { + case 0: + case 1: + fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", fp->inst[n].inst1); + inst = fp->inst[n].inst1; + + fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1<<8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', + (inst >> 30)); + + fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", fp->inst[n].inst2); + inst = fp->inst[n].inst2; + fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1<<8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', + (inst >> 30)); + fprintf(stderr,"\t3 RGB_INST: 0x%08x:", fp->inst[n].inst3); + inst = fp->inst[n].inst3; + fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n", + (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), + (inst >> 11) & 0x3, + (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), + (inst >> 24) & 0x3); + + + fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", fp->inst[n].inst4); + inst = fp->inst[n].inst4; + fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_b_src:%d %s %d\n", to_alpha_op(inst & 0xf), + (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, + (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3); + + fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", fp->inst[n].inst5); + inst = fp->inst[n].inst5; + fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf), + (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7), + (inst >> 23) & 0x3, + (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3); + break; + case 2: + break; + case 3: + fprintf(stderr,"1: TEX INST 0x%08x\n", fp->inst[n].inst1); + fprintf(stderr,"2: TEX ADDR 0x%08x\n", fp->inst[n].inst2); + fprintf(stderr,"2: TEX ADDR DXDY 0x%08x\n", fp->inst[n].inst3); + break; + } + fprintf(stderr,"\n"); + } + + } -- cgit v1.2.3 From 60b8e1f5243dfc3233d38700755a06c38b4967f1 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 19 May 2008 21:11:55 +1000 Subject: r500: add mask debugging --- src/mesa/drivers/dri/r300/r500_fragprog.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 252171c66b..d0675f6eb3 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -1059,6 +1059,30 @@ static char *to_alpha_op(int op_val) return str; } +static char *to_mask(int val) +{ + char *str; + switch(val) { + case 0: str = "NONE"; break; + case 1: str = "R"; break; + case 2: str = "G"; break; + case 3: str = "RG"; break; + case 4: str = "B"; break; + case 5: str = "RB"; break; + case 6: str = "GB"; break; + case 7: str = "RGB"; break; + case 8: str = "A"; break; + case 9: str = "AR"; break; + case 10: str = "AG"; break; + case 11: str = "ARG"; break; + case 12: str = "AB"; break; + case 13: str = "ARB"; break; + case 14: str = "AGB"; break; + case 15: str = "ARGB"; break; + } + return str; +} + static void dump_program(struct r500_fragment_program *fp) { int pc = 0; @@ -1081,7 +1105,8 @@ static void dump_program(struct r500_fragment_program *fp) inst & R500_INST_LAST ? "LAST" : "", inst & R500_INST_NOP ? "NOP" : "", inst & R500_INST_ALU_WAIT ? "ALU WAIT" : ""); - fprintf(stderr,"%x %x\n", (inst >> 11) & 0xf, (inst >> 15) & 0xf); + fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf), + to_mask((inst >> 15) & 0xf)); switch(inst0 & 0x3) { case 0: -- cgit v1.2.3 From ac315792bfccd547e6f84faabbb76c48af48a404 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 19 May 2008 21:40:40 +1000 Subject: r500: fix swz gets and some returns --- src/mesa/drivers/dri/r300/r500_fragprog.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index d0675f6eb3..e2a32d05c8 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -101,7 +101,7 @@ static inline GLuint make_rgb_swizzle(struct prog_src_register src) { /* This could be optimized, but it should be plenty fast already. */ int i; for (i = 0; i < 3; i++) { - temp = (src.Swizzle >> i*3) & 0x7; + temp = GET_SWZ(src.Swizzle, i); /* Fix SWIZZLE_ONE */ if (temp == 5) temp++; swiz += temp << i*3; @@ -110,7 +110,8 @@ static inline GLuint make_rgb_swizzle(struct prog_src_register src) { } static inline GLuint make_alpha_swizzle(struct prog_src_register src) { - GLuint swiz = (src.Swizzle >> 12) & 0x7; + GLuint swiz = GET_SWZ(src.Swizzle, 3); + if (swiz == 5) swiz++; return swiz; } @@ -1012,6 +1013,7 @@ static char *toswiz(int swiz_val) { case 6: return "1"; case 7: return "U"; } + return NULL; } static char *toop(int op_val) @@ -1037,7 +1039,7 @@ static char *toop(int op_val) static char *to_alpha_op(int op_val) { - char *str; + char *str = NULL; switch (op_val) { case 0: str = "MAD"; break; case 1: str = "DP"; break; @@ -1061,7 +1063,7 @@ static char *to_alpha_op(int op_val) static char *to_mask(int val) { - char *str; + char *str = NULL; switch(val) { case 0: str = "NONE"; break; case 1: str = "R"; break; @@ -1089,7 +1091,7 @@ static void dump_program(struct r500_fragment_program *fp) int n; uint32_t inst; uint32_t inst0; - char *str; + char *str = NULL; for (n = 0; n < fp->inst_end+1; n++) { inst0 = inst = fp->inst[n].inst0; @@ -1161,6 +1163,5 @@ static void dump_program(struct r500_fragment_program *fp) } fprintf(stderr,"\n"); } - } -- cgit v1.2.3 From 03b3fed8f1dcd5df5049b9236cfaa60a17e56e6f Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 19 May 2008 21:58:28 +1000 Subject: r500: add more input srcs --- src/mesa/drivers/dri/r300/r500_fragprog.c | 42 +++++++++++++++++++------------ 1 file changed, 26 insertions(+), 16 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index e2a32d05c8..7f8139a533 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -181,22 +181,32 @@ static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_registe COMPILE_STATE; GLuint reg; switch (src.File) { - case PROGRAM_TEMPORARY: - reg = src.Index + fp->temp_reg_offset; - break; - case PROGRAM_INPUT: - reg = cs->inputs[src.Index].reg; - break; - case PROGRAM_STATE_VAR: - case PROGRAM_NAMED_PARAM: - case PROGRAM_CONSTANT: - reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters-> - ParameterValues[src.Index]); - break; - default: - ERROR("Can't handle src.File %x\n", src.File); - reg = 0x0; - break; + case PROGRAM_TEMPORARY: + reg = src.Index + fp->temp_reg_offset; + break; + case PROGRAM_INPUT: + reg = cs->inputs[src.Index].reg; + break; + case PROGRAM_LOCAL_PARAM: + reg = emit_const4fv(fp, + fp->mesa_program.Base.LocalParams[src. + Index]); + break; + case PROGRAM_ENV_PARAM: + reg = emit_const4fv(fp, + fp->ctx->FragmentProgram.Parameters[src. + Index]); + break; + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters-> + ParameterValues[src.Index]); + break; + default: + ERROR("Can't handle src.File %x\n", src.File); + reg = 0x0; + break; } return reg; } -- cgit v1.2.3 From 476248befe2bd04558ce53e937230c1a400a51b6 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Mon, 19 May 2008 11:01:00 -0700 Subject: r5xx: Fixup emit_tex, add debugging info, enable temp temps. emit_tex now chases itself with an OUT if needed. Added airlied's dump_program, with some fixes. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 170 ++++++++++++++++++------------ 1 file changed, 105 insertions(+), 65 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 7f8139a533..cc042a00ab 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -127,28 +127,8 @@ static inline GLuint make_strq_swizzle(struct prog_src_register src) { return swiz; } -static int get_temp(struct r500_fragment_program *fp, int slot) { - - COMPILE_STATE; - - int r = slot; - - while (cs->inputs[r].refcount != 0) { - /* Crap, taken. */ - r++; - } - - fp->temp_reg_offset = r - slot; - - if (r >= R500_US_NUM_TEMP_REGS) { - ERROR("Out of hardware temps!\n"); - return 0; - } - - if (r > fp->max_temp_idx) - fp->max_temp_idx = r; - - return r; +static int get_temp(struct r500_fragment_program *fp) { + return fp->max_temp_idx + 1; } /* Borrowed verbatim from r300_fragprog since it hasn't changed. */ @@ -239,6 +219,12 @@ static void emit_tex(struct r500_fragment_program *fp, mask = fpi->DstReg.WriteMask << 11; hwsrc = make_src(fp, fpi->SrcReg[0]); + if (fpi->DstReg.File == PROGRAM_OUTPUT) { + hwdest = get_temp(fp); + } else { + hwdest = dest; + } + fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask | R500_INST_TEX_SEM_WAIT; @@ -269,15 +255,35 @@ static void emit_tex(struct r500_fragment_program *fp, /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */ | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A - | R500_TEX_DST_ADDR(dest) + | R500_TEX_DST_ADDR(hwdest) | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; - - fp->inst[counter].inst3 = 0x0; fp->inst[counter].inst4 = 0x0; - fp->inst[counter].inst5 = 0x0; + fp->inst[counter].inst5 = 0x0; + + if (fpi->DstReg.File == PROGRAM_OUTPUT) { + counter++; + fp->inst[counter].inst0 = R500_INST_TYPE_OUT + | R500_INST_TEX_SEM_WAIT | (mask << 4); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp)); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_OMOD_DISABLE; + fp->inst[counter].inst4 = R500_ALPHA_OP_CMP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_ALPHA_SWIZ_A_A) + | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_ALPHA_SWIZ_A_A) + | R500_ALPHA_OMOD_DISABLE; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP + | R500_ALU_RGBA_ADDRD(dest) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + } } static void dumb_shader(struct r500_fragment_program *fp) @@ -727,8 +733,8 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP | R500_ALU_RGBA_ADDRD(dest); + /* Put 0 into B,A (z,w) channels. counter++; - /* Put 0 into B,A (z,w) channels. */ fpi->DstReg.WriteMask = 0xC; emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); @@ -744,7 +750,36 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_ADDRD(dest) | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); */ + break; + case OPCODE_SGE: + /* We use SRCP, so as a precaution we're + * going to set NOP in previous inst, if possible. */ + /* This inst's selects need to be swapped as follows: + * 0 -> C ; 1 -> B ; 2 -> A */ + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + emit_alu(fp, counter, fpi); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]) + | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0; + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]) + | R500_ALPHA_SRCP_OP_A1_MINUS_A0; + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) + | R500_ALU_RGB_SEL_B_SRC1 + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); + fp->inst[counter].inst4 = R500_ALPHA_OP_CMP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRCP + | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGBA_ALPHA_SEL_C_SRCP + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0])); break; case OPCODE_SIN: src[0] = make_src(fp, fpi->SrcReg[0]); @@ -759,6 +794,35 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP | R500_ALU_RGBA_ADDRD(dest); break; + case OPCODE_SLT: + /* We use SRCP, so as a precaution we're + * going to set NOP in previous inst, if possible. */ + /* This inst's selects need to be swapped as follows: + * 0 -> C ; 1 -> B ; 2 -> A */ + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + emit_alu(fp, counter, fpi); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]) + | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0; + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]) + | R500_ALPHA_SRCP_OP_A1_MINUS_A0; + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO) + | R500_ALU_RGB_SEL_B_SRC1 + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); + fp->inst[counter].inst4 = R500_ALPHA_OP_CMP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRCP + | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGBA_ALPHA_SEL_C_SRCP + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0])); + break; case OPCODE_SUB: src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); @@ -791,12 +855,18 @@ static GLboolean parse_program(struct r500_fragment_program *fp) break; case OPCODE_TEX: emit_tex(fp, fpi, OPCODE_TEX, dest, counter); + if (fpi->DstReg.File == PROGRAM_OUTPUT) + counter++; break; case OPCODE_TXB: emit_tex(fp, fpi, OPCODE_TXB, dest, counter); + if (fpi->DstReg.File == PROGRAM_OUTPUT) + counter++; break; case OPCODE_TXP: emit_tex(fp, fpi, OPCODE_TXP, dest, counter); + if (fpi->DstReg.File == PROGRAM_OUTPUT) + counter++; break; default: ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); @@ -822,24 +892,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) /* We still need to put an output inst, right? */ WARN_ONCE("Final FP instruction is not an OUT.\n"); #if 0 - fp->inst[counter].inst0 = R500_INST_TYPE_OUT - | R500_INST_TEX_SEM_WAIT | R500_INST_LAST | - output_mask; - fp->inst[counter].inst1 = R500_RGB_ADDR0(dest); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(dest); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) - | R500_ALU_RGB_SEL_B_SRC0 - | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(0) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SEL_B_SRC0 - | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_1; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(0) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - counter++; + #endif } @@ -946,20 +999,6 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) return; } -#if 0 - for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { - int idx; - for (i = 0; i < 3; i++) { - idx = fpi->SrcReg[i].Index; - if (fpi->SrcReg[i].File == PROGRAM_INPUT) { - cs->inputs[idx].refcount++; - if (fp->max_temp_idx < idx) - fp->max_temp_idx = idx; - } - } - } -#endif - fp->max_temp_idx = fp->temp_reg_offset + 1; cs->temp_in_use = temps_used; @@ -1005,6 +1044,7 @@ void r500TranslateFragmentShader(r300ContextPtr r300, if (RADEON_DEBUG & DEBUG_PIXEL) dump_program(fp); + r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM); } @@ -1094,7 +1134,7 @@ static char *to_mask(int val) } return str; } - + static void dump_program(struct r500_fragment_program *fp) { int pc = 0; @@ -1142,9 +1182,9 @@ static void dump_program(struct r500_fragment_program *fp) fprintf(stderr,"\t3 RGB_INST: 0x%08x:", fp->inst[n].inst3); inst = fp->inst[n].inst3; fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n", - (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), + (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), (inst >> 11) & 0x3, - (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), + (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3); @@ -1159,7 +1199,7 @@ static void dump_program(struct r500_fragment_program *fp) inst = fp->inst[n].inst5; fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf), (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", - (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7), + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7), (inst >> 23) & 0x3, (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3); break; @@ -1173,5 +1213,5 @@ static void dump_program(struct r500_fragment_program *fp) } fprintf(stderr,"\n"); } - + } -- cgit v1.2.3 From 282cdc8b5c2495195fab8ee4afa9a7903caa459e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 20 May 2008 15:59:56 +1000 Subject: r300/r500: fix RS col fmt bits --- src/mesa/drivers/dri/r300/r300_reg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index b742fabbb5..08e41820d6 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -1177,8 +1177,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_RS_COL_PTR(x) (x << 6) # define R300_RS_COL_FMT(x) (x << 9) # define R300_RS_COL_FMT_RGBA 0 -# define R300_RS_COL_FMT_RGB0 2 -# define R300_RS_COL_FMT_RGB1 3 +# define R300_RS_COL_FMT_RGB0 1 +# define R300_RS_COL_FMT_RGB1 2 # define R300_RS_COL_FMT_000A 4 # define R300_RS_COL_FMT_0000 5 # define R300_RS_COL_FMT_0001 6 -- cgit v1.2.3 From 2005de48f9b87de6fffb792a6b16a880e92e3d82 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 20 May 2008 16:02:19 +1000 Subject: r300: some ctrl-m's wierd. --- src/mesa/drivers/dri/r300/r300_reg.h | 40 ++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 08e41820d6..8b0da0db4e 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -1173,9 +1173,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_RS_IP_3 0x431C # define R300_RS_INTERP_SRC_SHIFT 2 /* TODO: check for removal */ # define R300_RS_INTERP_SRC_MASK (7 << 2) /* TODO: check for removal */ -# define R300_RS_TEX_PTR(x) (x << 0) -# define R300_RS_COL_PTR(x) (x << 6) -# define R300_RS_COL_FMT(x) (x << 9) +# define R300_RS_TEX_PTR(x) (x << 0) +# define R300_RS_COL_PTR(x) (x << 6) +# define R300_RS_COL_FMT(x) (x << 9) # define R300_RS_COL_FMT_RGBA 0 # define R300_RS_COL_FMT_RGB0 1 # define R300_RS_COL_FMT_RGB1 2 @@ -1189,12 +1189,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_RS_SEL_T(x) (x << 16) # define R300_RS_SEL_R(x) (x << 19) # define R300_RS_SEL_Q(x) (x << 22) -# define R300_RS_SEL_C0 0 -# define R300_RS_SEL_C1 1 -# define R300_RS_SEL_C2 2 -# define R300_RS_SEL_C3 3 -# define R300_RS_SEL_K0 4 -# define R300_RS_SEL_K1 5 +# define R300_RS_SEL_C0 0 +# define R300_RS_SEL_C1 1 +# define R300_RS_SEL_C2 2 +# define R300_RS_SEL_C3 3 +# define R300_RS_SEL_K0 4 +# define R300_RS_SEL_K1 5 /* */ @@ -1214,17 +1214,17 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R500_RS_INST_13 0x4354 #define R500_RS_INST_14 0x4358 #define R500_RS_INST_15 0x435c -#define R500_RS_INST_TEX_ID_SHIFT 0 -#define R500_RS_INST_TEX_CN_WRITE (1 << 4) -#define R500_RS_INST_TEX_ADDR_SHIFT 5 -#define R500_RS_INST_COL_ID_SHIFT 12 -#define R500_RS_INST_COL_CN_NO_WRITE (0 << 16) -#define R500_RS_INST_COL_CN_WRITE (1 << 16) -#define R500_RS_INST_COL_CN_WRITE_FBUFFER (2 << 16) -#define R500_RS_INST_COL_CN_WRITE_BACKFACE (3 << 16) -#define R500_RS_INST_COL_ADDR_SHIFT 18 -#define R500_RS_INST_TEX_ADJ (1 << 25) -#define R500_RS_INST_W_CN (1 << 26) +#define R500_RS_INST_TEX_ID_SHIFT 0 +#define R500_RS_INST_TEX_CN_WRITE (1 << 4) +#define R500_RS_INST_TEX_ADDR_SHIFT 5 +#define R500_RS_INST_COL_ID_SHIFT 12 +#define R500_RS_INST_COL_CN_NO_WRITE (0 << 16) +#define R500_RS_INST_COL_CN_WRITE (1 << 16) +#define R500_RS_INST_COL_CN_WRITE_FBUFFER (2 << 16) +#define R500_RS_INST_COL_CN_WRITE_BACKFACE (3 << 16) +#define R500_RS_INST_COL_ADDR_SHIFT 18 +#define R500_RS_INST_TEX_ADJ (1 << 25) +#define R500_RS_INST_W_CN (1 << 26) /* These DWORDs control how vertex data is routed into fragment program * registers, after interpolators. -- cgit v1.2.3 From f0d76d526b0d37f36085d58b0c5c8cb9d9d9d7c9 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 20 May 2008 16:30:36 +1000 Subject: r300/r500: fixup some of the register write sizes --- src/mesa/drivers/dri/r300/r300_ioctl.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 9aa1120966..cc85d45efc 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -248,7 +248,7 @@ static void r300EmitClearState(GLcontext * ctx) R300_VPORT_Z_OFFSET_ENA); e32(0x8); - reg_start(R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO); + reg_start(R300_VAP_PSC_SGN_NORM_CNTL, 0); e32(0xaaaaaaaa); R300_STATECHANGE(r300, vof); @@ -292,7 +292,7 @@ static void r300EmitClearState(GLcontext * ctx) if (!is_r500) { R300_STATECHANGE(r300, ri); - reg_start(R300_RS_IP_0, 8); + reg_start(R300_RS_IP_0, 7); for (i = 0; i < 8; ++i) { e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3)); } @@ -309,11 +309,12 @@ static void r300EmitClearState(GLcontext * ctx) } else { R300_STATECHANGE(r300, ri); - reg_start(R500_RS_IP_0, 8); + reg_start(R500_RS_IP_0, 7); for (i = 0; i < 8; ++i) { - e32((1 << R500_RS_IP_TEX_PTR_T_SHIFT) | - (2 << R500_RS_IP_TEX_PTR_R_SHIFT) | - (3 << R500_RS_IP_TEX_PTR_Q_SHIFT) ); + e32((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)); } R300_STATECHANGE(r300, rc); -- cgit v1.2.3 From 78fa5060593b9a419281230a264eb1180c9ed2b2 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Mon, 19 May 2008 12:26:04 -0700 Subject: r5xx: New fix for COS/SIN/SCS. Not perfect yet, but getting better. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 45 +++++++++++++++---------------- 1 file changed, 22 insertions(+), 23 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index cc042a00ab..3a9e181780 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -116,6 +116,13 @@ static inline GLuint make_alpha_swizzle(struct prog_src_register src) { return swiz; } +static inline GLuint make_sop_swizzle(struct prog_src_register src) { + GLuint swiz = GET_SWZ(src.Swizzle, 0); + + if (swiz == 5) swiz++; + return swiz; +} + static inline GLuint make_strq_swizzle(struct prog_src_register src) { GLuint swiz = 0x0; GLuint temp = src.Swizzle; @@ -481,11 +488,10 @@ static GLboolean parse_program(struct r500_fragment_program *fp) emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0; fp->inst[counter].inst4 = R500_ALPHA_OP_COS | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP | R500_ALU_RGBA_ADDRD(dest); break; @@ -704,20 +710,13 @@ static GLboolean parse_program(struct r500_fragment_program *fp) /* Cosine only goes in R (x) channel. */ fpi->DstReg.WriteMask = 0x1; emit_alu(fp, counter, fpi); - if (fpi->DstReg.File == PROGRAM_OUTPUT) { - fp->inst[counter].inst0 = R500_INST_TYPE_OUT - | R500_INST_TEX_SEM_WAIT | 0x1 << 14; - } else { - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - | R500_INST_TEX_SEM_WAIT | 0x1 << 11; - } fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst4 = R500_ALPHA_OP_COS | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP | R500_ALU_RGBA_ADDRD(dest); counter++; @@ -730,7 +729,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst4 = R500_ALPHA_OP_SIN | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP | R500_ALU_RGBA_ADDRD(dest); /* Put 0 into B,A (z,w) channels. @@ -786,11 +785,10 @@ static GLboolean parse_program(struct r500_fragment_program *fp) emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0; fp->inst[counter].inst4 = R500_ALPHA_OP_SIN | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP | R500_ALU_RGBA_ADDRD(dest); break; @@ -1021,11 +1019,7 @@ void r500TranslateFragmentShader(r300ContextPtr r300, if (!fp->translated) { - /* I need to see what I'm working with! */ - fprintf(stderr, "Mesa program:\n"); - fprintf(stderr, "-------------\n"); - _mesa_print_program(&fp->mesa_program.Base); - fflush(stdout); + init_program(r300, fp); cs = fp->cs; @@ -1041,8 +1035,13 @@ void r500TranslateFragmentShader(r300ContextPtr r300, fp->inst_end = cs->nrslots - 1; fp->translated = GL_TRUE; - if (RADEON_DEBUG & DEBUG_PIXEL) - dump_program(fp); + if (RADEON_DEBUG & DEBUG_PIXEL) { + dump_program(fp); + fprintf(stderr, "Mesa program:\n"); + fprintf(stderr, "-------------\n"); + _mesa_print_program(&fp->mesa_program.Base); + fflush(stdout); + } r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM); @@ -1190,7 +1189,7 @@ static void dump_program(struct r500_fragment_program *fp) fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", fp->inst[n].inst4); inst = fp->inst[n].inst4; - fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_b_src:%d %s %d\n", to_alpha_op(inst & 0xf), + fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d\n", to_alpha_op(inst & 0xf), (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3); -- cgit v1.2.3 From 94994b13c51e076a4df069d550ec2f27550f6a28 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Mon, 19 May 2008 23:55:59 -0700 Subject: r5xx: Fixup SOP insts. Use the correct swizzle for alpha/SOP stuff. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 3a9e181780..f7c5efcee2 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -560,7 +560,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst4 = R500_ALPHA_OP_EX2 | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP | R500_ALU_RGBA_ADDRD(dest); break; @@ -589,7 +589,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst4 = R500_ALPHA_OP_LN2 | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP | R500_ALU_RGBA_ADDRD(dest); break; @@ -686,7 +686,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst4 = R500_ALPHA_OP_RCP | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP | R500_ALU_RGBA_ADDRD(dest); break; @@ -699,7 +699,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst4 = R500_ALPHA_OP_RSQ | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP | R500_ALU_RGBA_ADDRD(dest); break; -- cgit v1.2.3 From 2bda1a9502206ca2b7b35e39c82356f91de06914 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 20 May 2008 09:47:50 -0700 Subject: r5xx: Count refs so we don't have to guess on temp reg allocation. As a bonus, we can now have multiple temp temps, by slot. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 35 ++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 10 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index f7c5efcee2..5869dca933 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -134,8 +134,17 @@ static inline GLuint make_strq_swizzle(struct prog_src_register src) { return swiz; } -static int get_temp(struct r500_fragment_program *fp) { - return fp->max_temp_idx + 1; +static int get_temp(struct r500_fragment_program *fp, int slot) { + + COMPILE_STATE; + + int r = cs->temp_in_use + 1 + slot; + + if (r > R500_US_NUM_TEMP_REGS) { + ERROR("Too many temporary registers requested, can't compile!\n"); + } + + return r; } /* Borrowed verbatim from r300_fragprog since it hasn't changed. */ @@ -227,7 +236,7 @@ static void emit_tex(struct r500_fragment_program *fp, hwsrc = make_src(fp, fpi->SrcReg[0]); if (fpi->DstReg.File == PROGRAM_OUTPUT) { - hwdest = get_temp(fp); + hwdest = get_temp(fp, 0); } else { hwdest = dest; } @@ -274,8 +283,8 @@ static void emit_tex(struct r500_fragment_program *fp, counter++; fp->inst[counter].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT | (mask << 4); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp)); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) | R500_ALU_RGB_SEL_B_SRC0 @@ -907,7 +916,7 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) struct gl_fragment_program *mp = &fp->mesa_program; struct prog_instruction *fpi; GLuint InputsRead = mp->Base.InputsRead; - GLuint temps_used = 0; /* for fp->temps[] */ + GLuint temps_used = 0; int i, j; /* New compile, reset tracking data */ @@ -989,17 +998,23 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) cs->inputs[i].reg = 0; } - /* Pre-parse the mesa program, grabbing refcounts on input/temp regs. - * That way, we can free up the reg when it's no longer needed - */ if (!mp->Base.Instructions) { ERROR("No instructions found in program, going to go die now.\n"); return; } - fp->max_temp_idx = fp->temp_reg_offset + 1; + for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { + for (i = 0; i < 3; i++) { + if (fpi->SrcReg[i].File == PROGRAM_TEMPORARY) { + if (fpi->SrcReg[i].Index > temps_used) + temps_used = fpi->SrcReg[i].Index; + } + } + } cs->temp_in_use = temps_used; + + fp->max_temp_idx = fp->temp_reg_offset + cs->temp_in_use + 1; } static void update_params(struct r500_fragment_program *fp) -- cgit v1.2.3 From 9ec2b1c83f3390dac2e5e7d3aa21ff4920f29243 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 21 May 2008 10:49:26 +1000 Subject: r500: finish main texture instruction decoding --- src/mesa/drivers/dri/r300/r500_fragprog.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 5869dca933..c83fa3befe 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -1149,6 +1149,20 @@ static char *to_mask(int val) return str; } +static char *to_texop(int val) +{ + switch(val) { + case 0: return "NOP"; + case 1: return "LD"; + case 2: return "TEXKILL"; + case 3: return "PROJ"; + case 4: return "LODBIAS"; + case 5: return "LOD"; + case 6: return "DXDY"; + } + return NULL; +} + static void dump_program(struct r500_fragment_program *fp) { int pc = 0; @@ -1220,9 +1234,20 @@ static void dump_program(struct r500_fragment_program *fp) case 2: break; case 3: - fprintf(stderr,"1: TEX INST 0x%08x\n", fp->inst[n].inst1); - fprintf(stderr,"2: TEX ADDR 0x%08x\n", fp->inst[n].inst2); - fprintf(stderr,"2: TEX ADDR DXDY 0x%08x\n", fp->inst[n].inst3); + inst = fp->inst[n].inst1; + fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf, + to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "", + (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED"); + inst = fp->inst[n].inst2; + fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst, + inst & 127, inst & (1<<7) ? "(rel)" : "", + toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3), + toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3), + (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "", + toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3), + toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3)); + + fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", fp->inst[n].inst3); break; } fprintf(stderr,"\n"); -- cgit v1.2.3 From b453b0e2e1c82e6d08180c341989d6d0c05f21fa Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 21 May 2008 12:14:42 +1000 Subject: r500: set the RS unit register for R500 not R300 dangnammit.. So this appears to be my BUG. damn it to hell. also fix sec color to be more like spec says. --- src/mesa/drivers/dri/r300/r300_state.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 74b95d107c..496b76dce4 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1552,7 +1552,7 @@ static void r300SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL1) { count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size; if (count == 3) - interp_col[1] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGB1); + interp_col[1] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGB0); interp_col[1] |= R300_RS_COL_PTR(1); rs_col_count += count; } @@ -1701,7 +1701,7 @@ static void r500SetupRSUnit(GLcontext * ctx) count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size; interp_col[1] |= R500_RS_COL_PTR(1); if (count == 3) - interp_col[1] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGB1); + interp_col[1] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGB0); rs_col_count += count; } @@ -1791,7 +1791,7 @@ static void r500SetupRSUnit(GLcontext * ctx) | R300_HIRES_EN; assert(high_rr >= 0); - r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, high_rr + 1); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, high_rr + 1); r300->hw.rc.cmd[2] = 0xC0 | high_rr; if (InputsRead) -- cgit v1.2.3 From bb57c30a537f2ae01a146dd697ca332f7667c5c5 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 21 May 2008 16:00:18 +1000 Subject: r500: print out opcode string --- src/mesa/drivers/dri/r300/r500_fragprog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index c83fa3befe..9e08b2757f 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -876,7 +876,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) counter++; break; default: - ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); + ERROR("unknown fpi->Opcode %s\n", _mesa_opcode_string(fpi->Opcode)); break; } -- cgit v1.2.3 From 1e2907f170116138b1ae304dc075ee52e377fd73 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 21 May 2008 08:24:28 -0700 Subject: r5xx: Add OPCODE_POW. Necessary for Google Earth, among other things. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 47 +++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 9e08b2757f..70940e8215 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -93,6 +93,9 @@ #define MAKE_SWIZ_RGBA_C(x) (x << 14) #define MAKE_SWIZ_ALPHA_C(x) (x << 27) +/* Writemasks */ +#define R500_WRITEMASK_ARGB 0xF + static void dump_program(struct r500_fragment_program *fp); static inline GLuint make_rgb_swizzle(struct prog_src_register src) { @@ -686,6 +689,50 @@ static GLboolean parse_program(struct r500_fragment_program *fp) // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); break; + case OPCODE_POW: + /* POW(a,b) = EX2(LN2(a)*b) */ + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst4 = R500_ALPHA_OP_LN2 + | R500_ALPHA_ADDRD(get_temp(fp, 0)) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)); + counter++; + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)) + | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)) + | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(fp, 1)) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + counter++; + emit_alu(fp, counter, fpi); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1)); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst4 = R500_ALPHA_OP_EX2 + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP + | R500_ALU_RGBA_ADDRD(dest); + break; case OPCODE_RCP: src[0] = make_src(fp, fpi->SrcReg[0]); emit_alu(fp, counter, fpi); -- cgit v1.2.3 From d06f4edb146cfb42fa2a3f654db141f88dcfe074 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 21 May 2008 13:51:32 -0700 Subject: r5xx: Initial (broken) OPCODE_LRP. Will compile, run, and not eat your kids, but the math is wrong. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 43 +++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 70940e8215..e67ff98a9a 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -605,6 +605,49 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP | R500_ALU_RGBA_ADDRD(dest); break; + case OPCODE_LRP: + /* src0 * src1 + INV(src0) * src2 */ + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + src[2] = make_src(fp, fpi->SrcReg[2]); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(fp, 0)) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + counter++; + emit_alu(fp, counter, fpi); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(get_temp(fp, 0)) + | R500_RGB_SRCP_OP_1_MINUS_RGB0; + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(get_temp(fp, 0)) + | R500_ALPHA_SRCP_OP_1_MINUS_A0; + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRCP + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRCP | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRC0 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGBA_ALPHA_SEL_C_SRC0 + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0])); + break; case OPCODE_MAD: src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); -- cgit v1.2.3 From 0dfbe9cdd7e076fb23d90e99e225fd0e19b63dfb Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 21 May 2008 23:33:13 -0700 Subject: r5xx: Change debug info for readability. It's weird seeing the compiled program before the assembly, that's all. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index e67ff98a9a..f0c7fed7cb 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -1141,11 +1141,11 @@ void r500TranslateFragmentShader(r300ContextPtr r300, fp->translated = GL_TRUE; if (RADEON_DEBUG & DEBUG_PIXEL) { - dump_program(fp); fprintf(stderr, "Mesa program:\n"); fprintf(stderr, "-------------\n"); _mesa_print_program(&fp->mesa_program.Base); fflush(stdout); + dump_program(fp); } -- cgit v1.2.3 From 4f9dcdc35b277aa1ded60059a654da22d2075067 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Wed, 21 May 2008 23:35:43 -0700 Subject: r5xx: Fixed LRP. Works perfectly. It's a complex one, though, so it might fail in weird ways... --- src/mesa/drivers/dri/r300/r500_fragprog.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index f0c7fed7cb..7bde21a61e 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -606,12 +606,14 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ADDRD(dest); break; case OPCODE_LRP: - /* src0 * src1 + INV(src0) * src2 */ + /* src0 * src1 + INV(src0) * src2 + * 1) MUL src0, src1, temp + * 2) PRE 1-src0; MAD srcp, src2, temp */ src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); src[2] = make_src(fp, fpi->SrcReg[2]); fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | (R500_WRITEMASK_ARGB << 11); + | R500_INST_NOP | (R500_WRITEMASK_ARGB << 11); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) @@ -630,10 +632,12 @@ static GLboolean parse_program(struct r500_fragment_program *fp) counter++; emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(get_temp(fp, 0)) + | R500_RGB_ADDR1(src[2]) + | R500_RGB_ADDR2(get_temp(fp, 0)) | R500_RGB_SRCP_OP_1_MINUS_RGB0; fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(get_temp(fp, 0)) + | R500_ALPHA_ADDR1(src[2]) + | R500_ALPHA_ADDR2(get_temp(fp, 0)) | R500_ALPHA_SRCP_OP_1_MINUS_A0; fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRCP | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) @@ -641,12 +645,12 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst4 = R500_ALPHA_OP_MAD | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRCP | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC0 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC0 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0])); + | R500_ALU_RGBA_SEL_C_SRC2 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2])) + | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2])); break; case OPCODE_MAD: src[0] = make_src(fp, fpi->SrcReg[0]); -- cgit v1.2.3 From a01816da59cd7a18fca281ef94a822f08cec5c6e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 22 May 2008 17:09:30 +1000 Subject: r500: bump state atom size up for fp and fp constants --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 4 ++-- src/mesa/drivers/dri/r300/r300_context.h | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 995bfee42a..fc1b95b0ef 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -432,9 +432,9 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(R500_US_FC_CTRL, 1); r300->hw.fp.cmd[R500_FP_FC_CNTL] = 0; /* FIXME when we add flow control */ - ALLOC_STATE(r500fp, r500fp, R300_FPI_CMDSIZE, 0); + ALLOC_STATE(r500fp, r500fp, R500_FPI_CMDSIZE, 0); r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 0, 0); - ALLOC_STATE(r500fp_const, r500fp_const, R300_FPP_CMDSIZE, 0); + ALLOC_STATE(r500fp_const, r500fp_const, R500_FPP_CMDSIZE, 0); r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 1, 0); } else { ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 06b81e6e63..eba93e4bfb 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -372,10 +372,14 @@ struct r300_state_atom { #define R300_FPI_CMD_0 0 #define R300_FPI_INSTR_0 1 #define R300_FPI_CMDSIZE 65 +/* R500 has space for 512 instructions - 6 dwords per instruction */ +#define R500_FPI_CMDSIZE (512*6+1) #define R300_FPP_CMD_0 0 #define R300_FPP_PARAM_0 1 #define R300_FPP_CMDSIZE (32*4+1) +/* R500 has spcae for 256 constants - 4 dwords per constant */ +#define R500_FPP_CMDSIZE (256*4+1) #define R300_FOGS_CMD_0 0 #define R300_FOGS_STATE 1 -- cgit v1.2.3 From d4e93864b8f05f8973d291ac287b27febbb5cb62 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Thu, 22 May 2008 02:34:57 -0700 Subject: r5xx: More trig work. SCS now works. COS/SIN have slight issues still. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 136 +++++++++++++++++++++++------- 1 file changed, 107 insertions(+), 29 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 7bde21a61e..35116a14e9 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -96,6 +96,13 @@ /* Writemasks */ #define R500_WRITEMASK_ARGB 0xF +/* 1/(2pi), needed for quick modulus in trig insts + * Thanks to glisse for pointing out how to do it! */ +static const GLfloat RCP_2PI[] = {0.15915494309189535, + 0.15915494309189535, + 0.15915494309189535, + 0.15915494309189535}; + static void dump_program(struct r500_fragment_program *fp); static inline GLuint make_rgb_swizzle(struct prog_src_register src) { @@ -497,9 +504,39 @@ static GLboolean parse_program(struct r500_fragment_program *fp) break; case OPCODE_COS: src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = emit_const4fv(fp, RCP_2PI); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(fp, 0)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A + | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + counter++; + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); + fp->inst[counter].inst4 = R500_ALPHA_OP_FRC + | R500_ALPHA_ADDRD(get_temp(fp, 1)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC + | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)); + counter++; emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1)); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0; fp->inst[counter].inst4 = R500_ALPHA_OP_COS | R500_ALPHA_ADDRD(dest) @@ -807,14 +844,43 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ADDRD(dest); break; case OPCODE_SCS: - /* TODO: Make this elegant! */ - /* Do a cosine, then a sine, masking out the channels we want to protect. */ src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = emit_const4fv(fp, RCP_2PI); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(fp, 0)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A + | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + counter++; + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); + fp->inst[counter].inst4 = R500_ALPHA_OP_FRC + | R500_ALPHA_ADDRD(get_temp(fp, 1)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC + | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)); + counter++; + /* Do a cosine, then a sine, masking out the channels we want to protect. */ /* Cosine only goes in R (x) channel. */ fpi->DstReg.WriteMask = 0x1; emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1)); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst4 = R500_ALPHA_OP_COS @@ -826,8 +892,8 @@ static GLboolean parse_program(struct r500_fragment_program *fp) /* Sine only goes in G (y) channel. */ fpi->DstReg.WriteMask = 0x2; emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1)); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst4 = R500_ALPHA_OP_SIN @@ -835,24 +901,6 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP | R500_ALU_RGBA_ADDRD(dest); - /* Put 0 into B,A (z,w) channels. - counter++; - fpi->DstReg.WriteMask = 0xC; - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO) - | R500_ALU_RGB_SEL_B_SRC0 - | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); - fp->inst[counter].inst4 = R500_ALPHA_OP_CMP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO) - | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP - | R500_ALU_RGBA_ADDRD(dest) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); */ break; case OPCODE_SGE: /* We use SRCP, so as a precaution we're @@ -885,9 +933,39 @@ static GLboolean parse_program(struct r500_fragment_program *fp) break; case OPCODE_SIN: src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = emit_const4fv(fp, RCP_2PI); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(fp, 0)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A + | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + counter++; + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); + fp->inst[counter].inst4 = R500_ALPHA_OP_FRC + | R500_ALPHA_ADDRD(get_temp(fp, 1)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC + | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)); + counter++; emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1)); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0; fp->inst[counter].inst4 = R500_ALPHA_OP_SIN | R500_ALPHA_ADDRD(dest) @@ -1144,7 +1222,7 @@ void r500TranslateFragmentShader(r300ContextPtr r300, fp->inst_end = cs->nrslots - 1; fp->translated = GL_TRUE; - if (RADEON_DEBUG & DEBUG_PIXEL) { + if (1 || RADEON_DEBUG & DEBUG_PIXEL) { fprintf(stderr, "Mesa program:\n"); fprintf(stderr, "-------------\n"); _mesa_print_program(&fp->mesa_program.Base); -- cgit v1.2.3 From 34010bcc91bc2e8503e7b80187c1aea0e51e53b0 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Fri, 23 May 2008 00:12:37 -0700 Subject: r5xx: Add OPCODE_DST. Works completely, swizzles and everything. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 35116a14e9..dba7138374 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -429,7 +429,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) const struct prog_instruction *inst = mp->Base.Instructions; struct prog_instruction *fpi; GLuint src[3], dest, temp[2]; - int flags, pixel_mask = 0, output_mask = 0, counter = 0; + int temp_swiz, pixel_mask = 0, output_mask = 0, counter = 0; if (!inst || inst[0].Opcode == OPCODE_END) { ERROR("The program is empty!\n"); @@ -600,6 +600,33 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4 | R500_ALU_RGBA_ADDRD(dest); break; + case OPCODE_DST: + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + /* [1, src0.y*src1.y, src0.z, src1.w] + * So basically MUL with lotsa swizzling. */ + emit_alu(fp, counter, fpi); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | R500_ALU_RGB_SEL_B_SRC1; + /* Select [1, y, z, 1] */ + temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x7) | R500_SWIZZLE_ONE; + fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(temp_swiz); + /* Select [1, y, 1, w] */ + temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x1c7) | R500_SWIZZLE_ONE | (R500_SWIZZLE_ONE << 6); + fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(temp_swiz); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(dest) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + break; case OPCODE_EX2: src[0] = make_src(fp, fpi->SrcReg[0]); emit_alu(fp, counter, fpi); -- cgit v1.2.3 From 30e61500e162453d7affd855fe531ed2d1d6e80b Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Fri, 23 May 2008 00:14:31 -0700 Subject: r5xx: Move dumb_shader. Was getting ticked having to scroll around it, lawl. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 152 +++++++++++++++--------------- 1 file changed, 75 insertions(+), 77 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index dba7138374..e6d684ee46 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -312,81 +312,6 @@ static void emit_tex(struct r500_fragment_program *fp, } } -static void dumb_shader(struct r500_fragment_program *fp) -{ - fp->inst[0].inst0 = R500_INST_TYPE_TEX - | R500_INST_TEX_SEM_WAIT - | R500_INST_RGB_WMASK_R - | R500_INST_RGB_WMASK_G - | R500_INST_RGB_WMASK_B - | R500_INST_ALPHA_WMASK - | R500_INST_RGB_CLAMP - | R500_INST_ALPHA_CLAMP; - fp->inst[0].inst1 = R500_TEX_ID(0) - | R500_TEX_INST_LD - | R500_TEX_SEM_ACQUIRE - | R500_TEX_IGNORE_UNCOVERED; - fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0) - | R500_TEX_SRC_S_SWIZ_R - | R500_TEX_SRC_T_SWIZ_G - | R500_TEX_DST_ADDR(0) - | R500_TEX_DST_R_SWIZ_R - | R500_TEX_DST_G_SWIZ_G - | R500_TEX_DST_B_SWIZ_B - | R500_TEX_DST_A_SWIZ_A; - fp->inst[0].inst3 = R500_DX_ADDR(0) - | R500_DX_S_SWIZ_R - | R500_DX_T_SWIZ_R - | R500_DX_R_SWIZ_R - | R500_DX_Q_SWIZ_R - | R500_DY_ADDR(0) - | R500_DY_S_SWIZ_R - | R500_DY_T_SWIZ_R - | R500_DY_R_SWIZ_R - | R500_DY_Q_SWIZ_R; - fp->inst[0].inst4 = 0x0; - fp->inst[0].inst5 = 0x0; - - fp->inst[1].inst0 = R500_INST_TYPE_OUT | - R500_INST_TEX_SEM_WAIT | - R500_INST_LAST | - R500_INST_RGB_OMASK_R | - R500_INST_RGB_OMASK_G | - R500_INST_RGB_OMASK_B | - R500_INST_ALPHA_OMASK; - fp->inst[1].inst1 = R500_RGB_ADDR0(0) | - R500_RGB_ADDR1(0) | - R500_RGB_ADDR1_CONST | - R500_RGB_ADDR2(0) | - R500_RGB_ADDR2_CONST | - R500_RGB_SRCP_OP_1_MINUS_2RGB0; - fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) | - R500_ALPHA_ADDR1(0) | - R500_ALPHA_ADDR1_CONST | - R500_ALPHA_ADDR2(0) | - R500_ALPHA_ADDR2_CONST | - R500_ALPHA_SRCP_OP_1_MINUS_2A0; - fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 | - R500_ALU_RGB_R_SWIZ_A_R | - R500_ALU_RGB_G_SWIZ_A_G | - R500_ALU_RGB_B_SWIZ_A_B | - R500_ALU_RGB_SEL_B_SRC0 | - R500_ALU_RGB_R_SWIZ_B_1 | - R500_ALU_RGB_B_SWIZ_B_1 | - R500_ALU_RGB_G_SWIZ_B_1; - fp->inst[1].inst4 = R500_ALPHA_OP_MAD | - R500_ALPHA_SWIZ_A_A | - R500_ALPHA_SWIZ_B_1; - fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD | - R500_ALU_RGBA_R_SWIZ_0 | - R500_ALU_RGBA_G_SWIZ_0 | - R500_ALU_RGBA_B_SWIZ_0 | - R500_ALU_RGBA_A_SWIZ_0; - - fp->cs->nrslots = 2; - fp->translated = GL_TRUE; -} - static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi) { if (fpi->DstReg.File == PROGRAM_OUTPUT) { fp->inst[counter].inst0 = R500_INST_TYPE_OUT @@ -1225,6 +1150,81 @@ static void update_params(struct r500_fragment_program *fp) _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters); } +static void dumb_shader(struct r500_fragment_program *fp) +{ + fp->inst[0].inst0 = R500_INST_TYPE_TEX + | R500_INST_TEX_SEM_WAIT + | R500_INST_RGB_WMASK_R + | R500_INST_RGB_WMASK_G + | R500_INST_RGB_WMASK_B + | R500_INST_ALPHA_WMASK + | R500_INST_RGB_CLAMP + | R500_INST_ALPHA_CLAMP; + fp->inst[0].inst1 = R500_TEX_ID(0) + | R500_TEX_INST_LD + | R500_TEX_SEM_ACQUIRE + | R500_TEX_IGNORE_UNCOVERED; + fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0) + | R500_TEX_SRC_S_SWIZ_R + | R500_TEX_SRC_T_SWIZ_G + | R500_TEX_DST_ADDR(0) + | R500_TEX_DST_R_SWIZ_R + | R500_TEX_DST_G_SWIZ_G + | R500_TEX_DST_B_SWIZ_B + | R500_TEX_DST_A_SWIZ_A; + fp->inst[0].inst3 = R500_DX_ADDR(0) + | R500_DX_S_SWIZ_R + | R500_DX_T_SWIZ_R + | R500_DX_R_SWIZ_R + | R500_DX_Q_SWIZ_R + | R500_DY_ADDR(0) + | R500_DY_S_SWIZ_R + | R500_DY_T_SWIZ_R + | R500_DY_R_SWIZ_R + | R500_DY_Q_SWIZ_R; + fp->inst[0].inst4 = 0x0; + fp->inst[0].inst5 = 0x0; + + fp->inst[1].inst0 = R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | + R500_INST_LAST | + R500_INST_RGB_OMASK_R | + R500_INST_RGB_OMASK_G | + R500_INST_RGB_OMASK_B | + R500_INST_ALPHA_OMASK; + fp->inst[1].inst1 = R500_RGB_ADDR0(0) | + R500_RGB_ADDR1(0) | + R500_RGB_ADDR1_CONST | + R500_RGB_ADDR2(0) | + R500_RGB_ADDR2_CONST | + R500_RGB_SRCP_OP_1_MINUS_2RGB0; + fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) | + R500_ALPHA_ADDR1(0) | + R500_ALPHA_ADDR1_CONST | + R500_ALPHA_ADDR2(0) | + R500_ALPHA_ADDR2_CONST | + R500_ALPHA_SRCP_OP_1_MINUS_2A0; + fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | + R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC0 | + R500_ALU_RGB_R_SWIZ_B_1 | + R500_ALU_RGB_B_SWIZ_B_1 | + R500_ALU_RGB_G_SWIZ_B_1; + fp->inst[1].inst4 = R500_ALPHA_OP_MAD | + R500_ALPHA_SWIZ_A_A | + R500_ALPHA_SWIZ_B_1; + fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_R_SWIZ_0 | + R500_ALU_RGBA_G_SWIZ_0 | + R500_ALU_RGBA_B_SWIZ_0 | + R500_ALU_RGBA_A_SWIZ_0; + + fp->cs->nrslots = 2; + fp->translated = GL_TRUE; +} + void r500TranslateFragmentShader(r300ContextPtr r300, struct r500_fragment_program *fp) { @@ -1233,8 +1233,6 @@ void r500TranslateFragmentShader(r300ContextPtr r300, if (!fp->translated) { - - init_program(r300, fp); cs = fp->cs; -- cgit v1.2.3 From 9ab7a2df030fe3eb7b82a99d9f17093c0036bc06 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Fri, 23 May 2008 00:16:49 -0700 Subject: r5xx: Clean up some compiler warnings. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index e6d684ee46..061f53a697 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -353,7 +353,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) struct gl_fragment_program *mp = &fp->mesa_program; const struct prog_instruction *inst = mp->Base.Instructions; struct prog_instruction *fpi; - GLuint src[3], dest, temp[2]; + GLuint src[3], dest = 0; int temp_swiz, pixel_mask = 0, output_mask = 0, counter = 0; if (!inst || inst[0].Opcode == OPCODE_END) { -- cgit v1.2.3 From e9031d6f63947963e2105e5aaf89cb57dcd9a122 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 24 May 2008 18:12:26 +1000 Subject: r500: add depth output write Not sure how well this works yet, but we need to set the alpha to w_omask --- src/mesa/drivers/dri/r300/r500_fragprog.c | 69 ++++++++++++++++--------------- 1 file changed, 36 insertions(+), 33 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 061f53a697..a0ed496e9a 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -314,9 +314,13 @@ static void emit_tex(struct r500_fragment_program *fp, static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi) { if (fpi->DstReg.File == PROGRAM_OUTPUT) { - fp->inst[counter].inst0 = R500_INST_TYPE_OUT - /* output_mask */ - | (fpi->DstReg.WriteMask << 15); + fp->inst[counter].inst0 = R500_INST_TYPE_OUT; + + if (fpi->DstReg.Index == FRAG_RESULT_COLR) + fp->inst[counter].inst0 |= (fpi->DstReg.WriteMask << 15); + + if (fpi->DstReg.Index == FRAG_RESULT_DEPR) + fp->inst[counter].inst4 = R500_ALPHA_W_OMASK; } else { fp->inst[counter].inst0 = R500_INST_TYPE_ALU /* pixel_mask */ @@ -354,7 +358,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) const struct prog_instruction *inst = mp->Base.Instructions; struct prog_instruction *fpi; GLuint src[3], dest = 0; - int temp_swiz, pixel_mask = 0, output_mask = 0, counter = 0; + int temp_swiz, counter = 0; if (!inst || inst[0].Opcode == OPCODE_END) { ERROR("The program is empty!\n"); @@ -365,9 +369,6 @@ static GLboolean parse_program(struct r500_fragment_program *fp) if (fpi->Opcode != OPCODE_KIL) { dest = make_dest(fp, fpi->DstReg); - - pixel_mask = fpi->DstReg.WriteMask << 11; - output_mask = fpi->DstReg.WriteMask << 15; } switch (fpi->Opcode) { @@ -382,6 +383,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) case OPCODE_ADD: src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); + fp->inst[counter].inst4 = 0; /* Variation on MAD: 1*src0+src1 */ emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) @@ -391,7 +393,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst3 = /* 1 */ MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD | R500_ALPHA_ADDRD(dest) | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); @@ -416,7 +418,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[2])) | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 = R500_ALPHA_OP_CMP + fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[2])) | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); @@ -463,7 +465,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1)); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1)); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0; - fp->inst[counter].inst4 = R500_ALPHA_OP_COS + fp->inst[counter].inst4 |= R500_ALPHA_OP_COS | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP @@ -480,7 +482,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 = R500_ALPHA_OP_DP + fp->inst[counter].inst4 |= R500_ALPHA_OP_DP | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); @@ -499,7 +501,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 = R500_ALPHA_OP_DP + fp->inst[counter].inst4 |= R500_ALPHA_OP_DP | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); @@ -518,7 +520,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 = R500_ALPHA_OP_DP + fp->inst[counter].inst4 |= R500_ALPHA_OP_DP | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); @@ -543,7 +545,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) /* Select [1, y, 1, w] */ temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x1c7) | R500_SWIZZLE_ONE | (R500_SWIZZLE_ONE << 6); fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(temp_swiz); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); @@ -559,7 +561,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 = R500_ALPHA_OP_EX2 + fp->inst[counter].inst4 |= R500_ALPHA_OP_EX2 | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP @@ -572,7 +574,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 = R500_ALPHA_OP_FRC + fp->inst[counter].inst4 |= R500_ALPHA_OP_FRC | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC @@ -588,7 +590,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 = R500_ALPHA_OP_LN2 + fp->inst[counter].inst4 |= R500_ALPHA_OP_LN2 | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP @@ -631,7 +633,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRCP | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRCP | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; @@ -653,7 +655,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); @@ -674,7 +676,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAX + fp->inst[counter].inst4 |= R500_ALPHA_OP_MAX | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); @@ -691,7 +693,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 = R500_ALPHA_OP_MIN + fp->inst[counter].inst4 |= R500_ALPHA_OP_MIN | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); @@ -714,7 +716,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); @@ -763,7 +765,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1)); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 = R500_ALPHA_OP_EX2 + fp->inst[counter].inst4 |= R500_ALPHA_OP_EX2 | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP @@ -776,7 +778,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 = R500_ALPHA_OP_RCP + fp->inst[counter].inst4 |= R500_ALPHA_OP_RCP | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP @@ -789,7 +791,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 = R500_ALPHA_OP_RSQ + fp->inst[counter].inst4 |= R500_ALPHA_OP_RSQ | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP @@ -835,7 +837,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1)); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 = R500_ALPHA_OP_COS + fp->inst[counter].inst4 |= R500_ALPHA_OP_COS | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP @@ -848,7 +850,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1)); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 = R500_ALPHA_OP_SIN + fp->inst[counter].inst4 |= R500_ALPHA_OP_SIN | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP @@ -872,7 +874,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); - fp->inst[counter].inst4 = R500_ALPHA_OP_CMP + fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO); @@ -919,7 +921,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1)); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1)); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0; - fp->inst[counter].inst4 = R500_ALPHA_OP_SIN + fp->inst[counter].inst4 |= R500_ALPHA_OP_SIN | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP @@ -943,7 +945,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO) | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); - fp->inst[counter].inst4 = R500_ALPHA_OP_CMP + fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO) | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE); @@ -966,7 +968,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst3 = /* 1 */ MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); @@ -1415,10 +1417,11 @@ static void dump_program(struct r500_fragment_program *fp) fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", fp->inst[n].inst4); inst = fp->inst[n].inst4; - fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d\n", to_alpha_op(inst & 0xf), + fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf), (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, - (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3); + (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3, + (inst >> 31) & 0x1); fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", fp->inst[n].inst5); inst = fp->inst[n].inst5; -- cgit v1.2.3 From af77de66d9e97a1f37849a51f7b48ae36a0c8127 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 24 May 2008 18:18:18 +1000 Subject: r500: missed a couple of inst4s. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index a0ed496e9a..ef4c6119b3 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -341,7 +341,7 @@ static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_ | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src)) | R500_ALU_RGB_OMOD_DISABLE; - fp->inst[counter].inst4 = R500_ALPHA_OP_CMP + fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src)) | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src)) @@ -383,7 +383,6 @@ static GLboolean parse_program(struct r500_fragment_program *fp) case OPCODE_ADD: src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); - fp->inst[counter].inst4 = 0; /* Variation on MAD: 1*src0+src1 */ emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) -- cgit v1.2.3 From 6f918a9fda91321b50ae327791787f21417226c8 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Fri, 23 May 2008 02:05:24 -0700 Subject: r5xx: Remove some debugging cruft. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index ef4c6119b3..657d6340ba 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -1023,9 +1023,6 @@ static GLboolean parse_program(struct r500_fragment_program *fp) } else { /* We still need to put an output inst, right? */ WARN_ONCE("Final FP instruction is not an OUT.\n"); -#if 0 - -#endif } fp->cs->nrslots = counter; @@ -1248,7 +1245,7 @@ void r500TranslateFragmentShader(r300ContextPtr r300, fp->inst_end = cs->nrslots - 1; fp->translated = GL_TRUE; - if (1 || RADEON_DEBUG & DEBUG_PIXEL) { + if (RADEON_DEBUG & DEBUG_PIXEL) { fprintf(stderr, "Mesa program:\n"); fprintf(stderr, "-------------\n"); _mesa_print_program(&fp->mesa_program.Base); -- cgit v1.2.3 From b6b51906824bbf02769eeaf42646ff709877ae42 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 24 May 2008 09:17:28 -0700 Subject: r5xx: Fix SGE/SLT. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 96 ++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 32 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 657d6340ba..5dc674c577 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -856,33 +856,49 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ADDRD(dest); break; case OPCODE_SGE: - /* We use SRCP, so as a precaution we're - * going to set NOP in previous inst, if possible. */ - /* This inst's selects need to be swapped as follows: - * 0 -> C ; 1 -> B ; 2 -> A */ src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0]) + | R500_RGB_ADDR2(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0]) + | R500_ALPHA_ADDR2(src[1]); + fp->inst[counter].inst3 = /* 1 */ + MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(fp, 0)) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) + | R500_ALU_RGBA_SEL_C_SRC2 + | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1])) + | R500_ALU_RGBA_MOD_C_NEG + | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1])) + | R500_ALU_RGBA_ALPHA_MOD_C_NEG; + counter++; + /* This inst's selects need to be swapped as follows: + * 0 -> C ; 1 -> B ; 2 -> A */ emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]) - | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0; - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]) - | R500_ALPHA_SRCP_OP_A1_MINUS_A0; + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) - | R500_ALU_RGB_SEL_B_SRC1 + | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO); + | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRCP - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGBA_ALPHA_SEL_C_SRCP - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0])); + | R500_ALU_RGBA_SEL_C_SRC0 + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB) + | R500_ALU_RGBA_ALPHA_SEL_C_SRC0 + | R500_ALU_RGBA_A_SWIZ_A; break; case OPCODE_SIN: src[0] = make_src(fp, fpi->SrcReg[0]); @@ -927,33 +943,49 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ADDRD(dest); break; case OPCODE_SLT: - /* We use SRCP, so as a precaution we're - * going to set NOP in previous inst, if possible. */ - /* This inst's selects need to be swapped as follows: - * 0 -> C ; 1 -> B ; 2 -> A */ src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0]) + | R500_RGB_ADDR2(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0]) + | R500_ALPHA_ADDR2(src[1]); + fp->inst[counter].inst3 = /* 1 */ + MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(fp, 0)) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) + | R500_ALU_RGBA_SEL_C_SRC2 + | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1])) + | R500_ALU_RGBA_MOD_C_NEG + | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1])) + | R500_ALU_RGBA_ALPHA_MOD_C_NEG; + counter++; + /* This inst's selects need to be swapped as follows: + * 0 -> C ; 1 -> B ; 2 -> A */ emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]) - | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0; - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]) - | R500_ALPHA_SRCP_OP_A1_MINUS_A0; + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO) - | R500_ALU_RGB_SEL_B_SRC1 + | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP | R500_ALPHA_ADDRD(dest) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE); + | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRCP - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGBA_ALPHA_SEL_C_SRCP - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0])); + | R500_ALU_RGBA_SEL_C_SRC0 + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB) + | R500_ALU_RGBA_ALPHA_SEL_C_SRC0 + | R500_ALU_RGBA_A_SWIZ_A; break; case OPCODE_SUB: src[0] = make_src(fp, fpi->SrcReg[0]); -- cgit v1.2.3 From f1d04cd76681a3b8d37bc1a06b7ab36350087135 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 24 May 2008 11:30:57 -0700 Subject: r5xx: Consolidate FP tex insts. They're all the same, really. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 5dc674c577..482f9d55e7 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -237,7 +237,7 @@ static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_regist } static void emit_tex(struct r500_fragment_program *fp, - struct prog_instruction *fpi, int opcode, int dest, int counter) + struct prog_instruction *fpi, int dest, int counter) { int hwsrc, hwdest; GLuint mask; @@ -260,7 +260,7 @@ static void emit_tex(struct r500_fragment_program *fp, if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) fp->inst[counter].inst1 |= R500_TEX_UNSCALED; - switch (opcode) { + switch (fpi->Opcode) { case OPCODE_KIL: fp->inst[counter].inst1 |= R500_TEX_INST_TEXKILL; break; @@ -274,7 +274,7 @@ static void emit_tex(struct r500_fragment_program *fp, fp->inst[counter].inst1 |= R500_TEX_INST_PROJ; break; default: - ERROR("emit_tex can't handle opcode %x\n", opcode); + ERROR("emit_tex can't handle opcode %x\n", fpi->Opcode); } fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc) @@ -579,9 +579,6 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC | R500_ALU_RGBA_ADDRD(dest); break; - case OPCODE_KIL: - emit_tex(fp, fpi, OPCODE_KIL, dest, counter); - break; case OPCODE_LG2: src[0] = make_src(fp, fpi->SrcReg[0]); emit_alu(fp, counter, fpi); @@ -1017,18 +1014,11 @@ static GLboolean parse_program(struct r500_fragment_program *fp) emit_alu(fp, counter, fpi); emit_mov(fp, counter, fpi->SrcReg[0], dest); break; + case OPCODE_KIL: case OPCODE_TEX: - emit_tex(fp, fpi, OPCODE_TEX, dest, counter); - if (fpi->DstReg.File == PROGRAM_OUTPUT) - counter++; - break; case OPCODE_TXB: - emit_tex(fp, fpi, OPCODE_TXB, dest, counter); - if (fpi->DstReg.File == PROGRAM_OUTPUT) - counter++; - break; case OPCODE_TXP: - emit_tex(fp, fpi, OPCODE_TXP, dest, counter); + emit_tex(fp, fpi, dest, counter); if (fpi->DstReg.File == PROGRAM_OUTPUT) counter++; break; -- cgit v1.2.3 From 810270ad11d51c65e33bbe9337c2db9dd4cebb98 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 25 May 2008 11:07:51 -0700 Subject: r5xx: Add emit_mad() for FP. If it uses MAD, emit it with emit_mad()! (Now available at your local grocer's. Multiply and add responsibly.) --- src/mesa/drivers/dri/r300/r500_fragprog.c | 67 ++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 482f9d55e7..e8612255c9 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -328,6 +328,12 @@ static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_ } fp->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT; + + /* Ideally, we shouldn't have to explicitly clear memory here! */ + fp->inst[counter].inst1 = 0x0; + fp->inst[counter].inst2 = 0x0; + fp->inst[counter].inst3 = 0x0; + fp->inst[counter].inst5 = 0x0; } static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_src_register src, GLuint dest) { @@ -352,6 +358,62 @@ static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_ | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); } +static void emit_mad(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi, int one, int two, int three) { + /* Note: This code was all Corbin's. Corbin is a rather hackish coder. + * If you can make it pretty or fast, please do so! */ + emit_alu(fp, counter, fpi); + /* Common MAD stuff */ + fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(make_dest(fp, fpi->DstReg)); + fp->inst[counter].inst5 |= R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(make_dest(fp, fpi->DstReg)); + switch (one) { + case 0: + case 1: + case 2: + fp->inst[counter].inst1 |= R500_RGB_ADDR0(make_src(fp, fpi->SrcReg[one])); + fp->inst[counter].inst2 |= R500_ALPHA_ADDR0(make_src(fp, fpi->SrcReg[one])); + fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[one])); + fp->inst[counter].inst4 |= R500_ALPHA_SEL_A_SRC0 + | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[one])); + break; + default: + WARN_ONCE("Bad src index in emit_mad: %d\n", one); + break; + } + switch (two) { + case 0: + case 1: + case 2: + fp->inst[counter].inst1 |= R500_RGB_ADDR1(make_src(fp, fpi->SrcReg[two])); + fp->inst[counter].inst2 |= R500_ALPHA_ADDR1(make_src(fp, fpi->SrcReg[two])); + fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1 + | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[two])); + fp->inst[counter].inst4 |= R500_ALPHA_SEL_B_SRC1 + | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[two])); + break; + default: + WARN_ONCE("Bad src index in emit_mad: %d\n", one); + break; + } + switch (three) { + case 0: + case 1: + case 2: + fp->inst[counter].inst1 |= R500_RGB_ADDR2(make_src(fp, fpi->SrcReg[three])); + fp->inst[counter].inst2 |= R500_ALPHA_ADDR2(make_src(fp, fpi->SrcReg[three])); + fp->inst[counter].inst5 |= R500_ALU_RGBA_SEL_C_SRC2 + | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[three])) + | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[three])); + break; + default: + WARN_ONCE("Bad src index in emit_mad: %d\n", one); + break; + } +} + static GLboolean parse_program(struct r500_fragment_program *fp) { struct gl_fragment_program *mp = &fp->mesa_program; @@ -640,7 +702,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2])); break; case OPCODE_MAD: - src[0] = make_src(fp, fpi->SrcReg[0]); + /* src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); src[2] = make_src(fp, fpi->SrcReg[2]); emit_alu(fp, counter, fpi); @@ -660,7 +722,8 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_SEL_C_SRC2 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2])) | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2])); + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2])); */ + emit_mad(fp, counter, fpi, 0, 1, 2); break; case OPCODE_MAX: src[0] = make_src(fp, fpi->SrcReg[0]); -- cgit v1.2.3 From bd74d2aa26a2b87b05e8d086c020a6bdde9e06a7 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 25 May 2008 11:35:20 -0700 Subject: r5xx: More emit_alu(). Converted ADD. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index e8612255c9..2c18c3f6fd 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -80,6 +80,9 @@ #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6)) #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6)) #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6)) +#define R500_SWIZ_MOD_NEG 1 +#define R500_SWIZ_MOD_ABS 2 +#define R500_SWIZ_MOD_NEG_ABS 3 /* Swizzles for inst2 */ #define MAKE_SWIZ_TEX_STRQ(x) (x << 8) #define MAKE_SWIZ_TEX_RGBA(x) (x << 24) @@ -378,8 +381,12 @@ static void emit_mad(struct r500_fragment_program *fp, int counter, struct prog_ fp->inst[counter].inst4 |= R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[one])); break; + case R500_SWIZZLE_ONE: + fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE); + fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE); + break; default: - WARN_ONCE("Bad src index in emit_mad: %d\n", one); + ERROR("Bad src index in emit_mad: %d\n", one); break; } switch (two) { @@ -393,8 +400,12 @@ static void emit_mad(struct r500_fragment_program *fp, int counter, struct prog_ fp->inst[counter].inst4 |= R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[two])); break; + case R500_SWIZZLE_ONE: + fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); + fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE); + break; default: - WARN_ONCE("Bad src index in emit_mad: %d\n", one); + ERROR("Bad src index in emit_mad: %d\n", two); break; } switch (three) { @@ -408,8 +419,12 @@ static void emit_mad(struct r500_fragment_program *fp, int counter, struct prog_ | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[three])); break; + case R500_SWIZZLE_ONE: + fp->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ONE) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ONE); + break; default: - WARN_ONCE("Bad src index in emit_mad: %d\n", one); + ERROR("Bad src index in emit_mad: %d\n", three); break; } } @@ -446,6 +461,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); /* Variation on MAD: 1*src0+src1 */ +#if 0 emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(0); @@ -464,6 +480,8 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1])) | R500_ALU_RGBA_ALPHA_SEL_C_SRC1 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1])); +#endif + emit_mad(fp, counter, fpi, R500_SWIZZLE_ONE, 0, 1); break; case OPCODE_CMP: /* This inst's selects need to be swapped as follows: -- cgit v1.2.3 From 594760148cb42cdaf568eef63357fac1c1b7f124 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 25 May 2008 11:35:54 -0700 Subject: r5xx: Negation masks for every inst except SWZ. Yay? --- src/mesa/drivers/dri/r300/r500_fragprog.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 2c18c3f6fd..c7b551d4b8 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -117,8 +117,10 @@ static inline GLuint make_rgb_swizzle(struct prog_src_register src) { temp = GET_SWZ(src.Swizzle, i); /* Fix SWIZZLE_ONE */ if (temp == 5) temp++; - swiz += temp << i*3; + swiz |= temp << i*3; } + if (src.NegateBase) + swiz |= (R500_SWIZ_MOD_NEG << 10); return swiz; } @@ -126,6 +128,10 @@ static inline GLuint make_alpha_swizzle(struct prog_src_register src) { GLuint swiz = GET_SWZ(src.Swizzle, 3); if (swiz == 5) swiz++; + + if (src.NegateBase) + swiz |= (R500_SWIZ_MOD_NEG << 4); + return swiz; } -- cgit v1.2.3 From f776f693c0aca4d01cc2bfdaedbb527062189e6d Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 25 May 2008 11:46:16 -0700 Subject: r5xx: Massive MAD cleanup. Common uses of MAD now use emit_mad(), the two common negation masks work, and fixed a few off-by-one errors. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 106 +++++------------------------- 1 file changed, 18 insertions(+), 88 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index c7b551d4b8..832763c554 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -120,7 +120,7 @@ static inline GLuint make_rgb_swizzle(struct prog_src_register src) { swiz |= temp << i*3; } if (src.NegateBase) - swiz |= (R500_SWIZ_MOD_NEG << 10); + swiz |= (R500_SWIZ_MOD_NEG << 9); return swiz; } @@ -130,7 +130,7 @@ static inline GLuint make_alpha_swizzle(struct prog_src_register src) { if (swiz == 5) swiz++; if (src.NegateBase) - swiz |= (R500_SWIZ_MOD_NEG << 4); + swiz |= (R500_SWIZ_MOD_NEG << 3); return swiz; } @@ -387,6 +387,10 @@ static void emit_mad(struct r500_fragment_program *fp, int counter, struct prog_ fp->inst[counter].inst4 |= R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[one])); break; + case R500_SWIZZLE_ZERO: + fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO); + fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO); + break; case R500_SWIZZLE_ONE: fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE); fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE); @@ -406,6 +410,10 @@ static void emit_mad(struct r500_fragment_program *fp, int counter, struct prog_ fp->inst[counter].inst4 |= R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[two])); break; + case R500_SWIZZLE_ZERO: + fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); + fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO); + break; case R500_SWIZZLE_ONE: fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE); @@ -425,6 +433,10 @@ static void emit_mad(struct r500_fragment_program *fp, int counter, struct prog_ | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[three])); break; + case R500_SWIZZLE_ZERO: + fp->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + break; case R500_SWIZZLE_ONE: fp->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ONE) | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ONE); @@ -464,29 +476,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALPHA_MOD_B_ABS; break; case OPCODE_ADD: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); /* Variation on MAD: 1*src0+src1 */ -#if 0 - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(0); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(0); - fp->inst[counter].inst3 = /* 1 */ - MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) - | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(dest) - | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) - | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC1 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1])) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC1 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1])); -#endif emit_mad(fp, counter, fpi, R500_SWIZZLE_ONE, 0, 1); break; case OPCODE_CMP: @@ -726,27 +716,6 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2])); break; case OPCODE_MAD: - /* src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - src[2] = make_src(fp, fpi->SrcReg[2]); - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2])) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2])); */ emit_mad(fp, counter, fpi, 0, 1, 2); break; case OPCODE_MAX: @@ -788,27 +757,8 @@ static GLboolean parse_program(struct r500_fragment_program *fp) emit_mov(fp, counter, fpi->SrcReg[0], dest); break; case OPCODE_MUL: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); /* Variation on MAD: src0*src1+0 */ - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(dest) - // | R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + emit_mad(fp, counter, fpi, 0, 1, R500_SWIZZLE_ZERO); break; case OPCODE_POW: /* POW(a,b) = EX2(LN2(a)*b) */ @@ -1072,32 +1022,12 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_A_SWIZ_A; break; case OPCODE_SUB: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); /* Variation on MAD: 1*src0-src1 */ - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0]) - | R500_RGB_ADDR2(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0]) - | R500_ALPHA_ADDR2(src[1]); - fp->inst[counter].inst3 = /* 1 */ - MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1])) - | R500_ALU_RGBA_MOD_C_NEG - | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1])) - | R500_ALU_RGBA_ALPHA_MOD_C_NEG; + fpi->SrcReg[1].NegateBase = 0xF; /* NEG_XYZW */ + emit_mad(fp, counter, fpi, R500_SWIZZLE_ONE, 0, 1); break; case OPCODE_SWZ: - /* TODO: Negation masks! */ + /* TODO: The rarer negation masks! */ emit_alu(fp, counter, fpi); emit_mov(fp, counter, fpi->SrcReg[0], dest); break; -- cgit v1.2.3 From 27d8fcd506942b115e480cfe9ca811194736579a Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 25 May 2008 19:53:48 -0700 Subject: r5xx: Unbreak texture swizzling. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 832763c554..5abb6e7399 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -143,12 +143,11 @@ static inline GLuint make_sop_swizzle(struct prog_src_register src) { } static inline GLuint make_strq_swizzle(struct prog_src_register src) { - GLuint swiz = 0x0; - GLuint temp = src.Swizzle; + GLuint swiz = 0x0, temp = 0x0; int i; for (i = 0; i < 4; i++) { - swiz += (temp & 0x3) << i*2; - temp >>= 3; + temp = GET_SWZ(src.Swizzle, i) & 0x3; + swiz |= temp << i*2; } return swiz; } @@ -287,9 +286,9 @@ static void emit_tex(struct r500_fragment_program *fp, } fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc) - /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */ - | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G - | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A + | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) + /* | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G + | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A */ | R500_TEX_DST_ADDR(hwdest) | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; -- cgit v1.2.3 From 21b352bb146e1b35050c1315b0d44689dcfdc8d6 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 25 May 2008 22:50:00 -0700 Subject: Replace copyright on r500_fragprog.c Huh, could have sworn I already did this once before... Maybe I forgot to commit it? --- src/mesa/drivers/dri/r300/r500_fragprog.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 5abb6e7399..428b8fd1c3 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -1,6 +1,9 @@ /* * Copyright (C) 2005 Ben Skeggs. * + * Copyright 2008 Corbin Simpson + * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets. + * * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining -- cgit v1.2.3 From a2db33219debbc1a2a64a8b096b321d263cfacb5 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Mon, 26 May 2008 12:35:39 -0700 Subject: r5xx: First stab at LIT. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 106 ++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 428b8fd1c3..81976573c6 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -100,6 +100,8 @@ #define MAKE_SWIZ_ALPHA_C(x) (x << 27) /* Writemasks */ +#define R500_WRITEMASK_G 0x2 +#define R500_WRITEMASK_A 0x8 #define R500_WRITEMASK_ARGB 0xF /* 1/(2pi), needed for quick modulus in trig insts @@ -109,6 +111,11 @@ static const GLfloat RCP_2PI[] = {0.15915494309189535, 0.15915494309189535, 0.15915494309189535}; +static const GLfloat LIT[] = {127.999999, + 127.999999, + 127.999999, + -127.999999}; + static void dump_program(struct r500_fragment_program *fp); static inline GLuint make_rgb_swizzle(struct prog_src_register src) { @@ -670,6 +677,105 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP | R500_ALU_RGBA_ADDRD(dest); break; + case OPCODE_LIT: + /* I think I've got a pretty good path through this. + * MAX temp1, tmp, [0, 0, 0, -128]; + * MIN temp1.w, temp1.w, [128]; + * POW temp1.z, temp1.y, temp1.w; (3 insts) + * MOV result.xyzw, [1, temp1.y, temp1.z, 1]; */ + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = emit_const4fv(fp, LIT); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAX + | R500_ALPHA_ADDRD(get_temp(fp, 0)) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)); + counter++; + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_A << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)) | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAX + | R500_ALPHA_ADDRD(get_temp(fp, 0)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A + | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A | R500_ALPHA_MOD_B_NEG; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)); + counter++; + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); + fp->inst[counter].inst4 = R500_ALPHA_OP_LN2 + | R500_ALPHA_ADDRD(get_temp(fp, 1)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_G; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP + | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)); + counter++; + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)) + | R500_RGB_ADDR1(get_temp(fp, 1)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)) + | R500_ALPHA_ADDR1(get_temp(fp, 1)); + /* Select [w, w, w, w] */ + temp_swiz = 3 | (3 << 3) | (3 << 6); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(temp_swiz) + | R500_ALU_RGB_SEL_B_SRC1 + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(fp, 1)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A + | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + counter++; + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_G << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1)); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); + fp->inst[counter].inst4 = R500_ALPHA_OP_EX2 + | R500_ALPHA_ADDRD(get_temp(fp, 0)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)); + counter++; + emit_alu(fp, counter, fpi); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); + /* Select [1, y, z, 1] */ + temp_swiz = R500_SWIZZLE_ONE | (2 << 3) | (3 << 6); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(temp_swiz) + | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B(temp_swiz) + | R500_ALU_RGB_OMOD_DISABLE; + fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_1 + | R500_ALPHA_SEL_B_SRC0 | R500_ALPHA_SWIZ_B_1 + | R500_ALPHA_OMOD_DISABLE; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP + | R500_ALU_RGBA_ADDRD(dest) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + break; case OPCODE_LRP: /* src0 * src1 + INV(src0) * src2 * 1) MUL src0, src1, temp -- cgit v1.2.3 From 5499685931cac382bffb053ab527d882a7d0e109 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Mon, 26 May 2008 15:18:41 -0700 Subject: r5xx: Moar LIT. Still not working, but getting closer. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 112 +++++++++++++++--------------- 1 file changed, 57 insertions(+), 55 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 81976573c6..f55c8560c7 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -102,6 +102,10 @@ /* Writemasks */ #define R500_WRITEMASK_G 0x2 #define R500_WRITEMASK_A 0x8 +#define R500_WRITEMASK_AR 0x9 +#define R500_WRITEMASK_AG 0xA +#define R500_WRITEMASK_ARG 0xB +#define R500_WRITEMASK_AB 0xC #define R500_WRITEMASK_ARGB 0xF /* 1/(2pi), needed for quick modulus in trig insts @@ -678,20 +682,19 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ADDRD(dest); break; case OPCODE_LIT: - /* I think I've got a pretty good path through this. - * MAX temp1, tmp, [0, 0, 0, -128]; - * MIN temp1.w, temp1.w, [128]; - * POW temp1.z, temp1.y, temp1.w; (3 insts) - * MOV result.xyzw, [1, temp1.y, temp1.z, 1]; */ + /* To be honest, I have no idea how I came up with the following. + * All I know is that it's based on the r3xx stuff, and was + * concieved with the help of NyQuil. Mmm, MyQuil. */ + + /* First instruction */ src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = emit_const4fv(fp, LIT); fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | (R500_WRITEMASK_ARGB << 11); + | (R500_WRITEMASK_ARG << 11); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); fp->inst[counter].inst4 = R500_ALPHA_OP_MAX | R500_ALPHA_ADDRD(get_temp(fp, 0)) @@ -700,81 +703,80 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)); counter++; - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_A << 11); + /* Second instruction */ + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AB << 11); fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)) | R500_ALPHA_ADDR1(src[1]); + /* Select [z, z, z, y] */ + temp_swiz = 2 | (2 << 3) | (2 << 6); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) + | MAKE_SWIZ_RGB_A(temp_swiz) | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAX + fp->inst[counter].inst4 = R500_ALPHA_OP_LN2 | R500_ALPHA_ADDRD(get_temp(fp, 0)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A - | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A | R500_ALPHA_MOD_B_NEG; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_G; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)); counter++; - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); + /* Third instruction */ + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AG << 11); fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); - fp->inst[counter].inst4 = R500_ALPHA_OP_LN2 - | R500_ALPHA_ADDRD(get_temp(fp, 1)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_G; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP - | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)); - counter++; - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)) - | R500_RGB_ADDR1(get_temp(fp, 1)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)) - | R500_ALPHA_ADDR1(get_temp(fp, 1)); - /* Select [w, w, w, w] */ - temp_swiz = 3 | (3 << 3) | (3 << 6); + /* Select [x, x, x, z] */ + temp_swiz = 0; fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A(temp_swiz) - | R500_ALU_RGB_SEL_B_SRC1 - | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); + | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); fp->inst[counter].inst4 = R500_ALPHA_OP_MAD | R500_ALPHA_ADDRD(get_temp(fp, 1)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A - | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_B + | R500_ALPHA_SEL_B_SRC0 | R500_ALPHA_SWIZ_B_A; fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)) | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + | R500_ALU_RGBA_A_SWIZ_0; counter++; - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_G << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1)); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); + /* Fourth instruction */ + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AR << 11); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); + fp->inst[counter].inst3 = MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); fp->inst[counter].inst4 = R500_ALPHA_OP_EX2 | R500_ALPHA_ADDRD(get_temp(fp, 0)) | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP - | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); counter++; - emit_alu(fp, counter, fpi); + /* Fifth instruction */ + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AB << 11); fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); - /* Select [1, y, z, 1] */ - temp_swiz = R500_SWIZZLE_ONE | (2 << 3) | (3 << 6); + /* Select [w, w, w] */ + temp_swiz = 3 | (3 << 3) | (3 << 6); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(temp_swiz) + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO) | R500_ALU_RGB_SEL_B_SRC0 - | MAKE_SWIZ_RGB_B(temp_swiz) - | R500_ALU_RGB_OMOD_DISABLE; - fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_1 - | R500_ALPHA_SEL_B_SRC0 | R500_ALPHA_SWIZ_B_1 - | R500_ALPHA_OMOD_DISABLE; + | MAKE_SWIZ_RGB_B(temp_swiz); + fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(fp, 0)) + | R500_ALPHA_SWIZ_A_1 + | R500_ALPHA_SWIZ_B_1; + /* Select [-y, -y, -y] */ + temp_swiz = 1 | (1 << 3) | (1 << 6); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP - | R500_ALU_RGBA_ADDRD(dest) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) + | MAKE_SWIZ_RGBA_C(temp_swiz) + | R500_ALU_RGBA_MOD_C_NEG | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + counter++; + /* Final instruction */ + emit_alu(fp, counter, fpi); + fpi->SrcReg[0].Index = get_temp(fp, 0); + fpi->SrcReg[0].Swizzle = 1672; + emit_mov(fp, counter, fpi->SrcReg[0], dest); break; case OPCODE_LRP: /* src0 * src1 + INV(src0) * src2 -- cgit v1.2.3 From b57ba7c5b0205ad6885530f63cef85401386565b Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Mon, 26 May 2008 16:00:05 -0700 Subject: r5xx: Enhance emit_mov(). Now we can add arbitrary sources and swizzles. Will make many things smoother. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index f55c8560c7..d331ac1036 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -358,21 +358,22 @@ static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_ fp->inst[counter].inst5 = 0x0; } -static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_src_register src, GLuint dest) { +static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi, GLuint src_reg, GLuint swizzle, GLuint dest) { /* The r3xx shader uses MAD to implement MOV. We are using CMP, since * it is technically more accurate and recommended by ATI/AMD. */ - GLuint src_reg = make_src(fp, src); + emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg); + /* 0x1FF is 9 bits, size of an RGB swizzle. */ fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(src)) + | MAKE_SWIZ_RGB_A((swizzle & 0x1ff)) | R500_ALU_RGB_SEL_B_SRC0 - | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src)) + | MAKE_SWIZ_RGB_B((swizzle & 0x1ff)) | R500_ALU_RGB_OMOD_DISABLE; fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src)) - | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src)) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(GET_SWZ(swizzle, 3)) + | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(GET_SWZ(swizzle, 3)) | R500_ALPHA_OMOD_DISABLE; fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_ADDRD(dest) @@ -481,8 +482,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) switch (fpi->Opcode) { case OPCODE_ABS: - emit_alu(fp, counter, fpi); - emit_mov(fp, counter, fpi->SrcReg[0], dest); + emit_mov(fp, counter, fpi, make_src(fp, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest); fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS | R500_ALU_RGB_MOD_B_ABS; fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS @@ -773,10 +773,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); counter++; /* Final instruction */ - emit_alu(fp, counter, fpi); - fpi->SrcReg[0].Index = get_temp(fp, 0); - fpi->SrcReg[0].Swizzle = 1672; - emit_mov(fp, counter, fpi->SrcReg[0], dest); + emit_mov(fp, counter, fpi, get_temp(fp, 0), 1672, dest); break; case OPCODE_LRP: /* src0 * src1 + INV(src0) * src2 @@ -863,8 +860,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALU_RGBA_ADDRD(dest); break; case OPCODE_MOV: - emit_alu(fp, counter, fpi); - emit_mov(fp, counter, fpi->SrcReg[0], dest); + emit_mov(fp, counter, fpi, make_src(fp, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest); break; case OPCODE_MUL: /* Variation on MAD: src0*src1+0 */ @@ -1138,8 +1134,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) break; case OPCODE_SWZ: /* TODO: The rarer negation masks! */ - emit_alu(fp, counter, fpi); - emit_mov(fp, counter, fpi->SrcReg[0], dest); + emit_mov(fp, counter, fpi, make_src(fp, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest); break; case OPCODE_KIL: case OPCODE_TEX: -- cgit v1.2.3 From 5a5ba350696e6b753a9e49da010513670b697db5 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 27 May 2008 10:59:42 +1000 Subject: r500: initial support for tmu mappings --- src/mesa/drivers/dri/r300/r300_state.c | 118 +++++++++++++++++++++++---------- 1 file changed, 83 insertions(+), 35 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 496b76dce4..86607478a3 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1324,6 +1324,85 @@ static unsigned long gen_fixed_filter(unsigned long f) return f; } +static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int i; + struct r300_fragment_program *fp = (struct r300_fragment_program *) + (char *)ctx->FragmentProgram._Current; + + R300_STATECHANGE(r300, fpt); + + for (i = 0; i < fp->tex.length; i++) { + int unit; + int opcode; + unsigned long val; + + unit = fp->tex.inst[i] >> R300_TEX_ID_SHIFT; + unit &= 15; + + val = fp->tex.inst[i]; + val &= ~R300_TEX_ID_MASK; + + opcode = + (val & R300_TEX_INST_MASK) >> R300_TEX_INST_SHIFT; + if (opcode == R300_TEX_OP_KIL) { + r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + } else { + if (tmu_mappings[unit] >= 0) { + val |= + tmu_mappings[unit] << + R300_TEX_ID_SHIFT; + r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + } else { + // We get here when the corresponding texture image is incomplete + // (e.g. incomplete mipmaps etc.) + r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + } + } + } + + r300->hw.fpt.cmd[R300_FPT_CMD_0] = + cmdpacket0(R300_US_TEX_INST_0, fp->tex.length); + +} + +static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int i; + struct r500_fragment_program *fp = (struct r500_fragment_program *) + (char *)ctx->FragmentProgram._Current; + + /* find all the texture instructions and relocate the texture units */ + for (i = 0; i < fp->inst_end + 1; i++) { + if ((fp->inst[i].inst0 & 0x3) == R500_INST_TYPE_TEX) { + uint32_t val; + int unit, opcode, new_unit; + + val = fp->inst[i].inst1; + + unit = (val >> 16) & 0xf; + + val &= ~(0xf << 16); + + opcode = val & (0x7 << 22); + if (opcode == R500_TEX_INST_TEXKILL) { + new_unit = 0; + } else { + if (tmu_mappings[unit] >= 0) { + new_unit = tmu_mappings[unit]; + } else { + new_unit = 0; + } + } + fprintf(stderr,"unit translate %d to %d\n", unit, new_unit); + val |= R500_TEX_ID(new_unit); + fp->inst[i].inst1 = val; + } + } +} + static void r300SetupTextures(GLcontext * ctx) { int i, mtu; @@ -1439,41 +1518,10 @@ static void r300SetupTextures(GLcontext * ctx) return; - if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { - R300_STATECHANGE(r300, fpt); - - for (i = 0; i < fp->tex.length; i++) { - int unit; - int opcode; - unsigned long val; - - unit = fp->tex.inst[i] >> R300_TEX_ID_SHIFT; - unit &= 15; - - val = fp->tex.inst[i]; - val &= ~R300_TEX_ID_MASK; - - opcode = - (val & R300_TEX_INST_MASK) >> R300_TEX_INST_SHIFT; - if (opcode == R300_TEX_OP_KIL) { - r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; - } else { - if (tmu_mappings[unit] >= 0) { - val |= - tmu_mappings[unit] << - R300_TEX_ID_SHIFT; - r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; - } else { - // We get here when the corresponding texture image is incomplete - // (e.g. incomplete mipmaps etc.) - r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; - } - } - } - - r300->hw.fpt.cmd[R300_FPT_CMD_0] = - cmdpacket0(R300_US_TEX_INST_0, fp->tex.length); - } + if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) + r300SetupFragmentShaderTextures(ctx, tmu_mappings); + else + r500SetupFragmentShaderTextures(ctx, tmu_mappings); if (RADEON_DEBUG & DEBUG_STATE) fprintf(stderr, "TX_ENABLE: %08x last_hw_tmu=%d\n", -- cgit v1.2.3 From 7b88f40116cc6ccff5c0f7c923a0dca31187480e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 27 May 2008 12:45:15 +1000 Subject: r300/r500: fix r500 fragment program texture unit references --- src/mesa/drivers/dri/r300/r300_state.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 86607478a3..076d4f092f 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2432,6 +2432,8 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) return; } + r300SetupTextures(ctx); + R300_STATECHANGE(rmesa, fpi[0]); rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, fp->alu_end + 1); for (i = 0; i <= fp->alu_end; i++) { @@ -2519,6 +2521,8 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) return; } + r300SetupTextures(ctx); + R300_STATECHANGE(rmesa, fp); rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = fp->max_temp_idx; @@ -2567,7 +2571,6 @@ void r300UpdateShaderStates(r300ContextPtr rmesa) r500SetupPixelShader(rmesa); else r300SetupPixelShader(rmesa); - r300SetupTextures(ctx); if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) r500SetupRSUnit(ctx); -- cgit v1.2.3 From 9f03e93de9a0b75485d1de8a990513b0c2582385 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 27 May 2008 12:46:44 +1000 Subject: r500: remove debugging --- src/mesa/drivers/dri/r300/r300_state.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 076d4f092f..4092fcc9ad 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1396,7 +1396,6 @@ static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) new_unit = 0; } } - fprintf(stderr,"unit translate %d to %d\n", unit, new_unit); val |= R500_TEX_ID(new_unit); fp->inst[i].inst1 = val; } -- cgit v1.2.3 From b5372746ffcaab4ce158c1ca205e039a561ca01f Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Mon, 26 May 2008 22:12:24 -0700 Subject: r5xx: Fix FP temp counting. One of the ref counters wasn't being added to the temp counter. Yet another product of late-night coding... --- src/mesa/drivers/dri/r300/r500_fragprog.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index d331ac1036..f76a3d9560 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -170,7 +170,7 @@ static int get_temp(struct r500_fragment_program *fp, int slot) { COMPILE_STATE; - int r = cs->temp_in_use + 1 + slot; + int r = fp->temp_reg_offset + cs->temp_in_use + slot; if (r > R500_US_NUM_TEMP_REGS) { ERROR("Too many temporary registers requested, can't compile!\n"); @@ -1272,15 +1272,18 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { for (i = 0; i < 3; i++) { if (fpi->SrcReg[i].File == PROGRAM_TEMPORARY) { - if (fpi->SrcReg[i].Index > temps_used) - temps_used = fpi->SrcReg[i].Index; + if (fpi->SrcReg[i].Index >= temps_used) + temps_used = fpi->SrcReg[i].Index + 1; } } } - cs->temp_in_use = temps_used; + cs->temp_in_use = temps_used + 1; - fp->max_temp_idx = fp->temp_reg_offset + cs->temp_in_use + 1; + fp->max_temp_idx = fp->temp_reg_offset + cs->temp_in_use; + + if (RADEON_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "FP temp indices: fp->max_temp_idx: %d cs->temp_in_use: %d\n", fp->max_temp_idx, cs->temp_in_use); } static void update_params(struct r500_fragment_program *fp) -- cgit v1.2.3 From 8eb7df63029ebc7c30c67c0266d727f9c240b402 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 27 May 2008 15:29:39 +1000 Subject: r500: hopefully fix 4096 texture harder --- src/mesa/drivers/dri/r300/r300_texstate.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index f69a27671b..78fa75228e 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -399,12 +399,6 @@ static void r300SetTexImages(r300ContextPtr rmesa, | ((numLevels - 1) << R300_TX_MAX_MIP_LEVEL_SHIFT); t->pitch = 0; - if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - if (tObj->Image[0][t->base.firstLevel]->Width > 2048) - t->pitch |= R500_TXWIDTH_BIT11; - if (tObj->Image[0][t->base.firstLevel]->Height > 2048) - t->pitch |= R500_TXHEIGHT_BIT11; - } /* Only need to round to nearest 32 for textures, but the blitter * requires 64-byte aligned pitches, and we may/may not need the @@ -428,6 +422,13 @@ static void r300SetTexImages(r300ContextPtr rmesa, texelBytes) + 63) & ~(63); } + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + if (tObj->Image[0][t->base.firstLevel]->Width > 2048) + t->pitch_reg |= R500_TXWIDTH_BIT11; + if (tObj->Image[0][t->base.firstLevel]->Height > 2048) + t->pitch_reg |= R500_TXHEIGHT_BIT11; + } + t->dirty_state = TEX_ALL; /* FYI: r300UploadTexImages( rmesa, t ) used to be called here */ @@ -581,6 +582,7 @@ void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname, struct gl_texture_object *tObj = _mesa_lookup_texture(rmesa->radeon.glCtx, texname); r300TexObjPtr t; + uint32_t pitch_val; if (!tObj) return; @@ -593,28 +595,30 @@ void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname, return; t->offset = offset; - t->pitch_reg = pitch; + t->pitch_reg &= (1 << 13) -1; + pitch_val = pitch; switch (depth) { case 32: t->format = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); t->filter |= tx_table[2].filter; - t->pitch_reg /= 4; + pitch_val /= 4; break; case 24: default: t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); t->filter |= tx_table[4].filter; - t->pitch_reg /= 4; + pitch_val /= 4; break; case 16: t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); t->filter |= tx_table[5].filter; - t->pitch_reg /= 2; + pitch_val /= 2; break; } + pitch_val--; - t->pitch_reg--; + t->pitch_reg |= pitch_val; } static GLboolean r300UpdateTextureUnit(GLcontext * ctx, int unit) -- cgit v1.2.3 From 4af22c9076954d544417e615561695695773708d Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 27 May 2008 17:08:03 +1000 Subject: r300/r500: emit flush inside vap_cntl state atom Not sure if this is a good or bad plan, it certainly doesn't make things worse here. --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 6 ++++-- src/mesa/drivers/dri/r300/r300_context.h | 6 ++++++ src/mesa/drivers/dri/r300/r300_ioctl.c | 2 ++ src/mesa/drivers/dri/r300/r300_state.c | 21 ++++++++++----------- 4 files changed, 22 insertions(+), 13 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index fc1b95b0ef..535866ee17 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -319,8 +319,10 @@ void r300InitCmdBuf(r300ContextPtr r300) /* Initialize state atoms */ ALLOC_STATE(vpt, always, R300_VPT_CMDSIZE, 0); r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(R300_SE_VPORT_XSCALE, 6); - ALLOC_STATE(vap_cntl, always, 2, 0); - r300->hw.vap_cntl.cmd[0] = cmdpacket0(R300_VAP_CNTL, 1); + ALLOC_STATE(vap_cntl, always, R300_VAP_CNTL_SIZE, 0); + r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(R300_VAP_PVS_STATE_FLUSH_REG, 1); + r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH_1] = 0; + r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(R300_VAP_CNTL, 1); if (is_r500) { ALLOC_STATE(vap_index_offset, always, 2, 0); r300->hw.vap_index_offset.cmd[0] = cmdpacket0(R500_VAP_INDEX_OFFSET, 1); diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index eba93e4bfb..53882b6750 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -427,6 +427,12 @@ struct r300_state_atom { #define R300_ZB_PITCH 2 #define R300_ZB_CMDSIZE 3 +#define R300_VAP_CNTL_FLUSH 0 +#define R300_VAP_CNTL_FLUSH_1 1 +#define R300_VAP_CNTL_CMD 2 +#define R300_VAP_CNTL_INSTR 3 +#define R300_VAP_CNTL_SIZE 4 + #define R300_VPI_CMD_0 0 #define R300_VPI_INSTR_0 1 #define R300_VPI_CMDSIZE 1025 /* 256 16 byte instructions */ diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index cc85d45efc..b0225453d3 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -412,6 +412,8 @@ static void r300EmitClearState(GLcontext * ctx) R500_ALU_RGBA_A_SWIZ_0); } + reg_start(R300_VAP_PVS_STATE_FLUSH_REG, 0); + e32(0x00000000); if (has_tcl) { vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | (5 << R300_PVS_NUM_CNTLRS_SHIFT) | diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 4092fcc9ad..8857673831 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1905,9 +1905,7 @@ static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, GLuint output_ int pvs_num_cntrls; /* Flush PVS engine before changing PVS_NUM_SLOTS, PVS_NUM_CNTRLS. - * See r500 docs 6.5.2 */ - reg_start(R300_VAP_PVS_STATE_FLUSH_REG, 0); - e32(0x00000000); + * See r500 docs 6.5.2 - done in emit */ /* avoid division by zero */ if (input_count == 0) input_count = 1; @@ -1924,31 +1922,31 @@ static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, GLuint output_ R300_STATECHANGE(rmesa, vap_cntl); if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { - rmesa->hw.vap_cntl.cmd[1] = + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] = (pvs_num_slots << R300_PVS_NUM_SLOTS_SHIFT) | (pvs_num_cntrls << R300_PVS_NUM_CNTLRS_SHIFT) | (12 << R300_VF_MAX_VTX_NUM_SHIFT); if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) - rmesa->hw.vap_cntl.cmd[1] |= R500_TCL_STATE_OPTIMIZATION; + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= R500_TCL_STATE_OPTIMIZATION; } else /* not sure about non-tcl */ - rmesa->hw.vap_cntl.cmd[1] = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | (5 << R300_PVS_NUM_CNTLRS_SHIFT) | (5 << R300_VF_MAX_VTX_NUM_SHIFT)); if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515) - rmesa->hw.vap_cntl.cmd[1] |= (2 << R300_PVS_NUM_FPUS_SHIFT); + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (2 << R300_PVS_NUM_FPUS_SHIFT); else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) || (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560)) - rmesa->hw.vap_cntl.cmd[1] |= (5 << R300_PVS_NUM_FPUS_SHIFT); + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (5 << R300_PVS_NUM_FPUS_SHIFT); else if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420) - rmesa->hw.vap_cntl.cmd[1] |= (6 << R300_PVS_NUM_FPUS_SHIFT); + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (6 << R300_PVS_NUM_FPUS_SHIFT); else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) || (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580) || (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) - rmesa->hw.vap_cntl.cmd[1] |= (8 << R300_PVS_NUM_FPUS_SHIFT); + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (8 << R300_PVS_NUM_FPUS_SHIFT); else - rmesa->hw.vap_cntl.cmd[1] |= (4 << R300_PVS_NUM_FPUS_SHIFT); + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (4 << R300_PVS_NUM_FPUS_SHIFT); } @@ -2362,6 +2360,7 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.zb_hiz_pitch.cmd[1] = 0; + r300VapCntl(r300, 0, 0, 0); if (has_tcl) { r300->hw.vps.cmd[R300_VPS_ZERO_0] = 0; r300->hw.vps.cmd[R300_VPS_ZERO_1] = 0; -- cgit v1.2.3 From 774b3bc5a57dc768ed09516a6b91358783c63f72 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 27 May 2008 17:11:04 +1000 Subject: r500: reset fp/fp_const counts --- src/mesa/drivers/dri/r300/r300_state.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 8857673831..32ea7c9f5a 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2512,6 +2512,9 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) if (!fp) /* should only happenen once, just after context is created */ return; + ((drm_r300_cmd_header_t *) rmesa->hw.r500fp.cmd)->r500fp.count = 0; + ((drm_r300_cmd_header_t *) rmesa->hw.r500fp_const.cmd)->r500fp.count = 0; + r500TranslateFragmentShader(rmesa, fp); if (!fp->translated) { fprintf(stderr, "%s: No valid fragment shader, exiting\n", -- cgit v1.2.3 From 7278266612fe6be91b30b084de666a1ac4f2c20b Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 27 May 2008 17:39:35 +1000 Subject: r500: need to re-setup inst offset/end for translated programs this fixes texenv --- src/mesa/drivers/dri/r300/r500_fragprog.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index f76a3d9560..0e85a2d7fb 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -1402,6 +1402,9 @@ void r500TranslateFragmentShader(r300ContextPtr r300, r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM); + } else { + fp->inst_offset = 0; + fp->inst_end = fp->cs->nrslots - 1; } update_params(fp); -- cgit v1.2.3 From eee53dfb22e0c950b11a466ebcd5d764864229cd Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 27 May 2008 18:15:14 +1000 Subject: r500: the cs is shared per context - doh so don't use for program upload. Also remove some unused bits of the r500 fragprog struct --- src/mesa/drivers/dri/r300/r300_context.h | 10 ---------- src/mesa/drivers/dri/r300/r300_state.c | 5 ++--- src/mesa/drivers/dri/r300/r500_fragprog.c | 7 ------- 3 files changed, 2 insertions(+), 20 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 53882b6750..4cca4a8093 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -814,16 +814,6 @@ struct r500_fragment_program { } inst[512]; /* TODO: This is magic! */ - struct { - int tex_offset; - int tex_end; - int alu_offset; - int alu_end; - int flags; - } node[4]; - int cur_node; - int first_node_has_tex; - int temp_reg_offset; int inst_offset; diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 32ea7c9f5a..df63f32d1d 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2538,7 +2538,7 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) R300_STATECHANGE(rmesa, r500fp); /* Emit our shader... */ - for (i = 0; i < fp->cs->nrslots; i++) { + for (i = 0; i < fp->inst_end+1; i++) { rmesa->hw.r500fp.cmd[i*6+1] = fp->inst[i].inst0; rmesa->hw.r500fp.cmd[i*6+2] = fp->inst[i].inst1; rmesa->hw.r500fp.cmd[i*6+3] = fp->inst[i].inst2; @@ -2547,8 +2547,7 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) rmesa->hw.r500fp.cmd[i*6+6] = fp->inst[i].inst5; } - bump_r500fp_count(rmesa->hw.r500fp.cmd, fp->cs->nrslots * 6); - + bump_r500fp_count(rmesa->hw.r500fp.cmd, (fp->inst_end + 1) * 6); R300_STATECHANGE(rmesa, r500fp_const); for (i = 0; i < fp->const_nr; i++) { diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 0e85a2d7fb..911e6ae81e 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -1191,15 +1191,11 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) fp->translated = GL_FALSE; fp->error = GL_FALSE; fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile); - fp->cur_node = 0; - fp->first_node_has_tex = 0; fp->const_nr = 0; /* Size of pixel stack, plus 1. */ fp->max_temp_idx = 1; /* Temp register offset. */ fp->temp_reg_offset = 0; - fp->node[0].alu_end = -1; - fp->node[0].tex_end = -1; _mesa_memset(cs, 0, sizeof(*fp->cs)); for (i = 0; i < PFS_MAX_ALU_INST; i++) { @@ -1402,9 +1398,6 @@ void r500TranslateFragmentShader(r300ContextPtr r300, r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM); - } else { - fp->inst_offset = 0; - fp->inst_end = fp->cs->nrslots - 1; } update_params(fp); -- cgit v1.2.3 From 9412aee4dc6a94ffc3d4043e8c843ba051f5507b Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Mon, 26 May 2008 22:34:32 -0700 Subject: r5xx: Fix emit_mov() regression. Specifically, fix improper swizzling. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 911e6ae81e..2315830a59 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -138,6 +138,19 @@ static inline GLuint make_rgb_swizzle(struct prog_src_register src) { return swiz; } +static inline GLuint make_rgba_swizzle(GLuint src) { + GLuint swiz = 0x0; + GLuint temp; + int i; + for (i = 0; i < 4; i++) { + temp = GET_SWZ(src, i); + /* Fix SWIZZLE_ONE */ + if (temp == 5) temp++; + swiz |= temp << i*3; + } + return swiz; +} + static inline GLuint make_alpha_swizzle(struct prog_src_register src) { GLuint swiz = GET_SWZ(src.Swizzle, 3); @@ -364,6 +377,8 @@ static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_ emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg); fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg); + /* (De)mangle the swizzle from Mesa to R500. */ + swizzle = make_rgba_swizzle(swizzle); /* 0x1FF is 9 bits, size of an RGB swizzle. */ fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 | MAKE_SWIZ_RGB_A((swizzle & 0x1ff)) -- cgit v1.2.3 From a242b331c6567af20d3cad804664bda30e1e9586 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 27 May 2008 01:01:46 -0700 Subject: r5xx: Just a few small LIT fixes. Still broken; will fix tomorrow. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 2315830a59..39f035bf1c 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -720,8 +720,8 @@ static GLboolean parse_program(struct r500_fragment_program *fp) counter++; /* Second instruction */ fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)) | R500_ALPHA_ADDR1(src[1]); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)) | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); /* Select [z, z, z, y] */ temp_swiz = 2 | (2 << 3) | (2 << 6); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 @@ -746,8 +746,8 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); fp->inst[counter].inst4 = R500_ALPHA_OP_MAD | R500_ALPHA_ADDRD(get_temp(fp, 1)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_B - | R500_ALPHA_SEL_B_SRC0 | R500_ALPHA_SWIZ_B_A; + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A + | R500_ALPHA_SEL_B_SRC0 | R500_ALPHA_SWIZ_B_B; fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)) | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) @@ -788,7 +788,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); counter++; /* Final instruction */ - emit_mov(fp, counter, fpi, get_temp(fp, 0), 1672, dest); + emit_mov(fp, counter, fpi, get_temp(fp, 0), SWIZZLE_NOOP, dest); break; case OPCODE_LRP: /* src0 * src1 + INV(src0) * src2 -- cgit v1.2.3 From 8c39e24ec397200420146faa4f48672eadeac9b2 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Tue, 27 May 2008 02:12:10 -0700 Subject: r5xx: Add OPCODE_XPD. In working condition, I might add. And we're officially finished with the ARB_fragment_program instruction set. It's worth noting that LIT is still not reliable. SIN and COS were fixed a few commits ago. We're finished with stage 1! Whoohoo! --- src/mesa/drivers/dri/r300/r500_fragprog.c | 60 +++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 39f035bf1c..c7ece029c0 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -101,6 +101,7 @@ /* Writemasks */ #define R500_WRITEMASK_G 0x2 +#define R500_WRITEMASK_RGB 0x7 #define R500_WRITEMASK_A 0x8 #define R500_WRITEMASK_AR 0x9 #define R500_WRITEMASK_AG 0xA @@ -1151,6 +1152,65 @@ static GLboolean parse_program(struct r500_fragment_program *fp) /* TODO: The rarer negation masks! */ emit_mov(fp, counter, fpi, make_src(fp, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest); break; + case OPCODE_XPD: + /* src0 * src1 - src1 * src0 + * 1) MUL temp.xyz, src0.yzx, src1.zxy + * 2) MAD src0.zxy, src1.yzx, -temp.xyz */ + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | (R500_WRITEMASK_RGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + /* Select [y, z, x] */ + temp_swiz = make_rgb_swizzle(fpi->SrcReg[0]); + temp_swiz = (GET_SWZ(temp_swiz, 1) << 0) | (GET_SWZ(temp_swiz, 2) << 3) | (GET_SWZ(temp_swiz, 0) << 6); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(temp_swiz); + /* Select [z, x, y] */ + temp_swiz = make_rgb_swizzle(fpi->SrcReg[1]); + temp_swiz = (GET_SWZ(temp_swiz, 2) << 0) | (GET_SWZ(temp_swiz, 0) << 3) | (GET_SWZ(temp_swiz, 1) << 6); + fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1 + | MAKE_SWIZ_RGB_B(temp_swiz); + fp->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(fp, 0)) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + counter++; + emit_alu(fp, counter, fpi); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]) + | R500_RGB_ADDR2(get_temp(fp, 0)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]) + | R500_ALPHA_ADDR2(get_temp(fp, 0)); + /* Select [z, x, y] */ + temp_swiz = make_rgb_swizzle(fpi->SrcReg[0]); + temp_swiz = (GET_SWZ(temp_swiz, 2) << 0) | (GET_SWZ(temp_swiz, 0) << 3) | (GET_SWZ(temp_swiz, 1) << 6); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(temp_swiz); + /* Select [y, z, x] */ + temp_swiz = make_rgb_swizzle(fpi->SrcReg[1]); + temp_swiz = (GET_SWZ(temp_swiz, 1) << 0) | (GET_SWZ(temp_swiz, 2) << 3) | (GET_SWZ(temp_swiz, 0) << 6); + fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1 + | MAKE_SWIZ_RGB_B(temp_swiz); + fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SWIZ_A_1 + | R500_ALPHA_SWIZ_B_1; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRC2 + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB) + | R500_ALU_RGBA_MOD_C_NEG + | R500_ALU_RGBA_A_SWIZ_0; + break; case OPCODE_KIL: case OPCODE_TEX: case OPCODE_TXB: -- cgit v1.2.3 From 5552500cdfc4b97f5c824f6af1f8213c785693f9 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 28 May 2008 10:03:10 +1000 Subject: r500: cleanup warnings and include files --- src/mesa/drivers/dri/r300/r300_context.h | 1 + src/mesa/drivers/dri/r300/r300_state.c | 5 ----- src/mesa/drivers/dri/r300/r500_fragprog.c | 3 +-- src/mesa/drivers/dri/r300/r500_fragprog.h | 32 ++----------------------------- 4 files changed, 4 insertions(+), 37 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 4cca4a8093..53e5d181a4 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -74,6 +74,7 @@ typedef struct r300_context *r300ContextPtr; #include "r300_vertprog.h" #include "r300_fragprog.h" +#include "r500_fragprog.h" /** * This function takes a float and packs it into a uint32_t diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index df63f32d1d..c0896acc23 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -60,7 +60,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_state.h" #include "r300_reg.h" #include "r300_emit.h" -#include "r300_fragprog.h" #include "r300_tex.h" #include "drirenderbuffer.h" @@ -1369,7 +1368,6 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) { - r300ContextPtr r300 = R300_CONTEXT(ctx); int i; struct r500_fragment_program *fp = (struct r500_fragment_program *) (char *)ctx->FragmentProgram._Current; @@ -1898,9 +1896,6 @@ static inline void r300SetupVertexProgramFragment(r300ContextPtr r300, int dest, static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, GLuint output_count, GLuint temp_count) { int vtx_mem_size; - int cmd_reserved = 0; - int cmd_written = 0; - drm_radeon_cmd_header_t *cmd = NULL; int pvs_num_slots; int pvs_num_cntrls; diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index c7ece029c0..cdbec35da5 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -1495,7 +1495,7 @@ static char *toswiz(int swiz_val) { static char *toop(int op_val) { - char *str; + char *str = NULL; switch (op_val) { case 0: str = "MAD"; break; case 1: str = "DP3"; break; @@ -1578,7 +1578,6 @@ static char *to_texop(int val) static void dump_program(struct r500_fragment_program *fp) { - int pc = 0; int n; uint32_t inst; uint32_t inst0; diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/r500_fragprog.h index 404dbf3b7c..5dd2def1c4 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/r500_fragprog.h @@ -30,8 +30,8 @@ * Ben Skeggs * Jerome Glisse */ -#ifndef __R300_FRAGPROG_H_ -#define __R300_FRAGPROG_H_ +#ifndef __R500_FRAGPROG_H_ +#define __R500_FRAGPROG_H_ #include "glheader.h" #include "macros.h" @@ -41,12 +41,6 @@ #include "r300_context.h" -typedef struct r300_fragment_program_swizzle { - GLuint length; - GLuint src[4]; - GLuint inst[8]; -} r300_fragment_program_swizzle_t; - /* supported hw opcodes */ #define PFS_OP_MAD 0 #define PFS_OP_DP3 1 @@ -74,25 +68,6 @@ typedef struct r300_fragment_program_swizzle { #define SRC_MASK (63 << 0) #define SRC_STRIDE 6 -#define NOP_INST0 ( \ - (R300_FPI0_OUTC_MAD) | \ - (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG0C_SHIFT) | \ - (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG1C_SHIFT) | \ - (R300_FPI0_ARGC_ZERO << R300_FPI0_ARG2C_SHIFT)) -#define NOP_INST1 ( \ - ((0 | SRC_CONST) << R300_FPI1_SRC0C_SHIFT) | \ - ((0 | SRC_CONST) << R300_FPI1_SRC1C_SHIFT) | \ - ((0 | SRC_CONST) << R300_FPI1_SRC2C_SHIFT)) -#define NOP_INST2 ( \ - (R300_FPI2_OUTA_MAD) | \ - (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG0A_SHIFT) | \ - (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG1A_SHIFT) | \ - (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG2A_SHIFT)) -#define NOP_INST3 ( \ - ((0 | SRC_CONST) << R300_FPI3_SRC0A_SHIFT) | \ - ((0 | SRC_CONST) << R300_FPI3_SRC1A_SHIFT) | \ - ((0 | SRC_CONST) << R300_FPI3_SRC2A_SHIFT)) - #define DRI_CONF_FP_OPTIMIZATION_SPEED 0 #define DRI_CONF_FP_OPTIMIZATION_QUALITY 1 @@ -101,7 +76,4 @@ struct r500_fragment_program; extern void r500TranslateFragmentShader(r300ContextPtr r300, struct r500_fragment_program *fp); -extern void r300TranslateFragmentShader(r300ContextPtr r300, - struct r300_fragment_program *fp); - #endif -- cgit v1.2.3 From 867f5aac5361eda657491a98feca33c91eae3218 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 28 May 2008 10:11:14 +1000 Subject: R3/4/5xx: update to use drm get_param for num gb pipes --- src/mesa/drivers/dri/r300/r300_state.c | 31 ++++++++---------------- src/mesa/drivers/dri/radeon/radeon_screen.c | 37 +++++++++++++++++++++++------ src/mesa/drivers/dri/radeon/radeon_screen.h | 2 ++ 3 files changed, 42 insertions(+), 28 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index c0896acc23..6f1f4aac74 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2201,36 +2201,25 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_0] = 0x66666666; r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_1] = 0x06666666; - /* num pipes needs to be read back from the GB_PIPE_SELECT register - * on r4xx/r5xx/rs4xx/rs6xx - * should move this to the drm - */ r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = R300_GB_TILE_ENABLE | R300_GB_TILE_SIZE_16 /*| R300_GB_SUBPIXEL_1_16*/; - switch (r300->radeon.radeonScreen->chip_family) { - case CHIP_FAMILY_R300: - case CHIP_FAMILY_R350: + switch (r300->radeon.radeonScreen->num_gb_pipes) { + case 1: + default: r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= - R300_GB_TILE_PIPE_COUNT_R300; + R300_GB_TILE_PIPE_COUNT_RV300; break; - case CHIP_FAMILY_RV350: - case CHIP_FAMILY_RV515: - case CHIP_FAMILY_RV530: - case CHIP_FAMILY_RV410: + case 2: r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= - R300_GB_TILE_PIPE_COUNT_RV300; + R300_GB_TILE_PIPE_COUNT_R300; break; - case CHIP_FAMILY_R420: - case CHIP_FAMILY_R520: - case CHIP_FAMILY_R580: - case CHIP_FAMILY_RV560: - case CHIP_FAMILY_RV570: + case 3: r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= - R300_GB_TILE_PIPE_COUNT_R420; + R300_GB_TILE_PIPE_COUNT_R420_3P; break; - default: + case 4: r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= - R300_GB_TILE_DISABLE; /* TODO: This disables tiling totally. I guess it happened accidentially. */ + R300_GB_TILE_PIPE_COUNT_R420; break; } diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index d840e22742..c962d23da1 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -649,15 +649,8 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->chip_flags = RADEON_CHIPSET_TCL; break; - /* RV410 SE chips have half the pipes of regular RV410 - * Need to get num pipes form the GB_PIPE_SELECT register - */ case PCI_CHIP_RV410_5E4C: case PCI_CHIP_RV410_5E4F: - screen->chip_family = CHIP_FAMILY_RV380; - screen->chip_flags = RADEON_CHIPSET_TCL; - break; - case PCI_CHIP_RV410_564A: case PCI_CHIP_RV410_564B: case PCI_CHIP_RV410_564F: @@ -854,6 +847,36 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->fbLocation = (temp & 0xffff) << 16; } + if (screen->chip_family >= CHIP_FAMILY_RV515) { + ret = radeonGetParam( sPriv->fd, RADEON_PARAM_NUM_GB_PIPES, + &temp); + if (ret) { + fprintf(stderr, "Unable to get num_pipes, need newer drm\n"); + switch (screen->chip_family) { + case CHIP_FAMILY_R300: + case CHIP_FAMILY_R350: + screen->num_gb_pipes = 2; + break; + case CHIP_FAMILY_R420: + case CHIP_FAMILY_R520: + case CHIP_FAMILY_R580: + case CHIP_FAMILY_RV560: + case CHIP_FAMILY_RV570: + screen->num_gb_pipes = 4; + break; + case CHIP_FAMILY_RV350: + case CHIP_FAMILY_RV515: + case CHIP_FAMILY_RV530: + case CHIP_FAMILY_RV410: + default: + screen->num_gb_pipes = 1; + break; + } + } else { + screen->num_gb_pipes = temp; + } + } + if ( sPriv->drm_version.minor >= 10 ) { drm_radeon_setparam_t sp; diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h index 184b0d225e..ab859d55bd 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.h +++ b/src/mesa/drivers/dri/radeon/radeon_screen.h @@ -105,6 +105,8 @@ typedef struct { driOptionCache optionCache; const __DRIextension *extensions[8]; + + int num_gb_pipes; } radeonScreenRec, *radeonScreenPtr; #define IS_R100_CLASS(screen) \ -- cgit v1.2.3 From 85af4fde7fd5c7f6a6976fbd9d6529a9082f42b7 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 28 May 2008 10:07:30 +1000 Subject: r500: remove warnings and only start on newer drms. This removes lots of warnings to the user, and only allows the driver to run on > .29 drms for r500 cards. --- src/mesa/drivers/dri/radeon/radeon_screen.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index c962d23da1..9ad95c375c 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -673,20 +673,17 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_RC410_5A61: case PCI_CHIP_RC410_5A62: screen->chip_family = CHIP_FAMILY_RS400; - fprintf(stderr, "Warning, xpress200 detected.\n"); break; case PCI_CHIP_RS690_791E: case PCI_CHIP_RS690_791F: screen->chip_family = CHIP_FAMILY_RS690; - fprintf(stderr, "Warning, RS690 detected, 3D support is incomplete.\n"); break; case PCI_CHIP_RS740_796C: case PCI_CHIP_RS740_796D: case PCI_CHIP_RS740_796E: case PCI_CHIP_RS740_796F: screen->chip_family = CHIP_FAMILY_RS740; - fprintf(stderr, "Warning, RS740 detected, 3D support is incomplete.\n"); break; case PCI_CHIP_R520_7100: @@ -705,7 +702,6 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_R520_710F: screen->chip_family = CHIP_FAMILY_R520; screen->chip_flags = RADEON_CHIPSET_TCL; - fprintf(stderr, "Warning, R520 detected, 3D HAHAHAHAHA!!.\n"); break; case PCI_CHIP_RV515_7140: @@ -748,7 +744,6 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_RV515_7211: screen->chip_family = CHIP_FAMILY_RV515; screen->chip_flags = RADEON_CHIPSET_TCL; - fprintf(stderr, "Warning, RV515 detected, 3D HAHAHAHAHA!!.\n"); break; case PCI_CHIP_RV530_71C0: @@ -769,7 +764,6 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_RV530_71DE: screen->chip_family = CHIP_FAMILY_RV530; screen->chip_flags = RADEON_CHIPSET_TCL; - fprintf(stderr, "Warning, RV530 detected, 3D HAHAHAHAHA!!.\n"); break; case PCI_CHIP_R580_7240: @@ -789,7 +783,6 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_R580_7284: screen->chip_family = CHIP_FAMILY_R580; screen->chip_flags = RADEON_CHIPSET_TCL; - fprintf(stderr, "Warning, R580 detected, 3D HAHAHAHAHA!!.\n"); break; case PCI_CHIP_RV570_7280: @@ -806,7 +799,6 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) case PCI_CHIP_RV560_7297: screen->chip_family = CHIP_FAMILY_RV560; screen->chip_flags = RADEON_CHIPSET_TCL; - fprintf(stderr, "Warning, RV560 detected, 3D HAHAHAHAHA!!.\n"); break; default: @@ -820,6 +812,11 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) return NULL; } + if ((sPriv->drm_version.minor < 29) && (screen->chip_family >= CHIP_FAMILY_RV515)) { + fprintf(stderr, "R500 support requires a newer drm.\n"); + return NULL; + } + if (getenv("R300_NO_TCL")) screen->chip_flags &= ~RADEON_CHIPSET_TCL; -- cgit v1.2.3