From 4f800c5bacb9734d3030fe11144c8003ec4f169a Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sun, 21 Jan 2007 09:48:33 +1100 Subject: nouveau: rename pass0_arb to pass0. I was expecting to have 2 frontends for the shader code (asm, glsl). With Brian's work on GLSL this is unnecessary :) --- src/mesa/drivers/dri/nouveau/nouveau_shader_0.c | 710 ++++++++++++++++++++++++ 1 file changed, 710 insertions(+) create mode 100644 src/mesa/drivers/dri/nouveau/nouveau_shader_0.c (limited to 'src/mesa/drivers/dri/nouveau/nouveau_shader_0.c') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c new file mode 100644 index 0000000000..34475cad03 --- /dev/null +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c @@ -0,0 +1,710 @@ +/* + * Copyright (C) 2006 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs + */ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" + +#include "program.h" +#include "programopt.h" +#include "program_instruction.h" + +#include "nouveau_context.h" +#include "nouveau_shader.h" + +static nvsFixedReg _tx_mesa_vp_dst_reg[VERT_RESULT_MAX] = { + NVS_FR_POSITION, NVS_FR_COL0, NVS_FR_COL1, NVS_FR_FOGCOORD, + NVS_FR_TEXCOORD0, NVS_FR_TEXCOORD1, NVS_FR_TEXCOORD2, NVS_FR_TEXCOORD3, + NVS_FR_TEXCOORD4, NVS_FR_TEXCOORD5, NVS_FR_TEXCOORD6, NVS_FR_TEXCOORD7, + NVS_FR_POINTSZ, NVS_FR_BFC0, NVS_FR_BFC1, NVS_FR_UNKNOWN /* EDGE */ +}; + +static nvsFixedReg _tx_mesa_fp_dst_reg[FRAG_RESULT_MAX] = { + NVS_FR_FRAGDATA0 /* COLR */, NVS_FR_FRAGDATA0 /* COLH */, + NVS_FR_UNKNOWN /* DEPR */ +}; + +static nvsFixedReg _tx_mesa_vp_src_reg[VERT_ATTRIB_MAX] = { + NVS_FR_POSITION, NVS_FR_WEIGHT, NVS_FR_NORMAL, NVS_FR_COL0, NVS_FR_COL1, + NVS_FR_FOGCOORD, NVS_FR_UNKNOWN /* COLOR_INDEX */, NVS_FR_UNKNOWN, + NVS_FR_TEXCOORD0, NVS_FR_TEXCOORD1, NVS_FR_TEXCOORD2, NVS_FR_TEXCOORD3, + NVS_FR_TEXCOORD4, NVS_FR_TEXCOORD5, NVS_FR_TEXCOORD6, NVS_FR_TEXCOORD7, +/* Generic attribs 0-15, aliased to the above */ + NVS_FR_POSITION, NVS_FR_WEIGHT, NVS_FR_NORMAL, NVS_FR_COL0, NVS_FR_COL1, + NVS_FR_FOGCOORD, NVS_FR_UNKNOWN /* COLOR_INDEX */, NVS_FR_UNKNOWN, + NVS_FR_TEXCOORD0, NVS_FR_TEXCOORD1, NVS_FR_TEXCOORD2, NVS_FR_TEXCOORD3, + NVS_FR_TEXCOORD4, NVS_FR_TEXCOORD5, NVS_FR_TEXCOORD6, NVS_FR_TEXCOORD7 +}; + +static nvsFixedReg _tx_mesa_fp_src_reg[FRAG_ATTRIB_MAX] = { + NVS_FR_POSITION, NVS_FR_COL0, NVS_FR_COL1, NVS_FR_FOGCOORD, + NVS_FR_TEXCOORD0, NVS_FR_TEXCOORD1, NVS_FR_TEXCOORD2, NVS_FR_TEXCOORD3, + NVS_FR_TEXCOORD4, NVS_FR_TEXCOORD5, NVS_FR_TEXCOORD6, NVS_FR_TEXCOORD7 +}; + +static nvsSwzComp _tx_mesa_swizzle[4] = { + NVS_SWZ_X, NVS_SWZ_Y, NVS_SWZ_Z, NVS_SWZ_W +}; + +static nvsOpcode _tx_mesa_opcode[] = { + [OPCODE_ABS] = NVS_OP_ABS, [OPCODE_ADD] = NVS_OP_ADD, + [OPCODE_ARA] = NVS_OP_ARA, [OPCODE_ARL] = NVS_OP_ARL, + [OPCODE_ARL_NV] = NVS_OP_ARL, [OPCODE_ARR] = NVS_OP_ARR, + [OPCODE_CMP] = NVS_OP_CMP, [OPCODE_COS] = NVS_OP_COS, + [OPCODE_DDX] = NVS_OP_DDX, [OPCODE_DDY] = NVS_OP_DDY, + [OPCODE_DP3] = NVS_OP_DP3, [OPCODE_DP4] = NVS_OP_DP4, + [OPCODE_DPH] = NVS_OP_DPH, [OPCODE_DST] = NVS_OP_DST, + [OPCODE_EX2] = NVS_OP_EX2, [OPCODE_EXP] = NVS_OP_EXP, + [OPCODE_FLR] = NVS_OP_FLR, [OPCODE_FRC] = NVS_OP_FRC, + [OPCODE_KIL] = NVS_OP_EMUL, [OPCODE_KIL_NV] = NVS_OP_KIL, + [OPCODE_LG2] = NVS_OP_LG2, [OPCODE_LIT] = NVS_OP_LIT, + [OPCODE_LOG] = NVS_OP_LOG, + [OPCODE_LRP] = NVS_OP_LRP, + [OPCODE_MAD] = NVS_OP_MAD, [OPCODE_MAX] = NVS_OP_MAX, + [OPCODE_MIN] = NVS_OP_MIN, [OPCODE_MOV] = NVS_OP_MOV, + [OPCODE_MUL] = NVS_OP_MUL, + [OPCODE_PK2H] = NVS_OP_PK2H, [OPCODE_PK2US] = NVS_OP_PK2US, + [OPCODE_PK4B] = NVS_OP_PK4B, [OPCODE_PK4UB] = NVS_OP_PK4UB, + [OPCODE_POW] = NVS_OP_POW, [OPCODE_POPA] = NVS_OP_POPA, + [OPCODE_PUSHA] = NVS_OP_PUSHA, + [OPCODE_RCC] = NVS_OP_RCC, [OPCODE_RCP] = NVS_OP_RCP, + [OPCODE_RFL] = NVS_OP_RFL, [OPCODE_RSQ] = NVS_OP_RSQ, + [OPCODE_SCS] = NVS_OP_SCS, [OPCODE_SEQ] = NVS_OP_SEQ, + [OPCODE_SFL] = NVS_OP_SFL, [OPCODE_SGE] = NVS_OP_SGE, + [OPCODE_SGT] = NVS_OP_SGT, [OPCODE_SIN] = NVS_OP_SIN, + [OPCODE_SLE] = NVS_OP_SLE, [OPCODE_SLT] = NVS_OP_SLT, + [OPCODE_SNE] = NVS_OP_SNE, [OPCODE_SSG] = NVS_OP_SSG, + [OPCODE_STR] = NVS_OP_STR, [OPCODE_SUB] = NVS_OP_SUB, + [OPCODE_SWZ] = NVS_OP_MOV, + [OPCODE_TEX] = NVS_OP_TEX, [OPCODE_TXB] = NVS_OP_TXB, + [OPCODE_TXD] = NVS_OP_TXD, + [OPCODE_TXL] = NVS_OP_TXL, [OPCODE_TXP] = NVS_OP_TXP, + [OPCODE_TXP_NV] = NVS_OP_TXP, + [OPCODE_UP2H] = NVS_OP_UP2H, [OPCODE_UP2US] = NVS_OP_UP2US, + [OPCODE_UP4B] = NVS_OP_UP4B, [OPCODE_UP4UB] = NVS_OP_UP4UB, + [OPCODE_X2D] = NVS_OP_X2D, + [OPCODE_XPD] = NVS_OP_XPD +}; + +static nvsCond _tx_mesa_condmask[] = { + NVS_COND_UNKNOWN, NVS_COND_GT, NVS_COND_LT, NVS_COND_UN, NVS_COND_GE, + NVS_COND_LE, NVS_COND_NE, NVS_COND_NE, NVS_COND_TR, NVS_COND_FL +}; + +struct pass0_rec { + int nvs_ipos; + int next_temp; + int swzconst_done; + int swzconst_id; + nvsRegister const_half; +}; + +#define X NVS_SWZ_X +#define Y NVS_SWZ_Y +#define Z NVS_SWZ_Z +#define W NVS_SWZ_W + +static void +pass0_append_fragment(nouveauShader *nvs, nvsFragmentHeader *fragment) +{ + nvsFragmentList *list = calloc(1, sizeof(nvsFragmentList)); + if (!list) + return; + + list->fragment = fragment; + list->prev = nvs->list_tail; + if ( nvs->list_tail) + nvs->list_tail->next = list; + if (!nvs->list_head) + nvs->list_head = list; + nvs->list_tail = list; + + nvs->inst_count++; +} + +static void +pass0_make_reg(nouveauShader *nvs, nvsRegister *reg, + nvsRegFile file, unsigned int index) +{ + struct pass0_rec *rec = nvs->pass_rec; + + /* defaults */ + *reg = nvr_unused; + /* -1 == quick-and-dirty temp alloc */ + if (file == NVS_FILE_TEMP && index == -1) { + index = rec->next_temp++; + assert(index < NVS_MAX_TEMPS); + } + reg->file = file; + reg->index = index; +} + +static void +pass0_make_swizzle(nvsSwzComp *swz, unsigned int mesa) +{ + int i; + + for (i=0;i<4;i++) + swz[i] = _tx_mesa_swizzle[GET_SWZ(mesa, i)]; +} + +static nvsOpcode +pass0_make_opcode(enum prog_opcode op) +{ + if (op > MAX_OPCODE) + return NVS_OP_UNKNOWN; + return _tx_mesa_opcode[op]; +} + +static nvsCond +pass0_make_condmask(GLuint mesa) +{ + if (mesa > COND_FL) + return NVS_COND_UNKNOWN; + return _tx_mesa_condmask[mesa]; +} + +static unsigned int +pass0_make_mask(GLuint mesa_mask) +{ + unsigned int mask = 0; + + if (mesa_mask & WRITEMASK_X) mask |= SMASK_X; + if (mesa_mask & WRITEMASK_Y) mask |= SMASK_Y; + if (mesa_mask & WRITEMASK_Z) mask |= SMASK_Z; + if (mesa_mask & WRITEMASK_W) mask |= SMASK_W; + + return mask; +} + +static nvsTexTarget +pass0_make_tex_target(GLuint mesa) +{ + switch (mesa) { + case TEXTURE_1D_INDEX: return NVS_TEX_TARGET_1D; + case TEXTURE_2D_INDEX: return NVS_TEX_TARGET_2D; + case TEXTURE_3D_INDEX: return NVS_TEX_TARGET_3D; + case TEXTURE_CUBE_INDEX: return NVS_TEX_TARGET_CUBE; + case TEXTURE_RECT_INDEX: return NVS_TEX_TARGET_RECT; + default: + return NVS_TEX_TARGET_UNKNOWN; + } +} + +static void +pass0_make_dst_reg(nvsPtr nvs, nvsRegister *reg, + struct prog_dst_register *dst) +{ + struct gl_program *mesa = (struct gl_program*)&nvs->mesa.vp; + nvsFixedReg sfr; + + switch (dst->File) { + case PROGRAM_OUTPUT: + if (mesa->Target == GL_VERTEX_PROGRAM_ARB) { + sfr = (dst->Index < VERT_RESULT_MAX) ? + _tx_mesa_vp_dst_reg[dst->Index] : NVS_FR_UNKNOWN; + } else { + sfr = (dst->Index < FRAG_RESULT_MAX) ? + _tx_mesa_fp_dst_reg[dst->Index] : NVS_FR_UNKNOWN; + } + pass0_make_reg(nvs, reg, NVS_FILE_RESULT, sfr); + break; + case PROGRAM_TEMPORARY: + pass0_make_reg(nvs, reg, NVS_FILE_TEMP, dst->Index); + break; + case PROGRAM_ADDRESS: + pass0_make_reg(nvs, reg, NVS_FILE_ADDRESS, dst->Index); + break; + default: + fprintf(stderr, "Unknown dest file %d\n", dst->File); + assert(0); + } +} + +static void +pass0_make_src_reg(nvsPtr nvs, nvsRegister *reg, struct prog_src_register *src) +{ + struct gl_program *mesa = (struct gl_program *)&nvs->mesa.vp.Base; + struct gl_program_parameter_list *p = mesa->Parameters; + + *reg = nvr_unused; + + switch (src->File) { + case PROGRAM_INPUT: + reg->file = NVS_FILE_ATTRIB; + if (mesa->Target == GL_VERTEX_PROGRAM_ARB) { + reg->index = (src->Index < VERT_ATTRIB_MAX) ? + _tx_mesa_vp_src_reg[src->Index] : NVS_FR_UNKNOWN; + } else { + reg->index = (src->Index < FRAG_ATTRIB_MAX) ? + _tx_mesa_fp_src_reg[src->Index] : NVS_FR_UNKNOWN; + } + break; + /* All const types seem to get shoved into here, not really sure why */ + case PROGRAM_STATE_VAR: + switch (p->Parameters[src->Index].Type) { + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + nvs->params[src->Index].source_val = NULL; + COPY_4V(nvs->params[src->Index].val, p->ParameterValues[src->Index]); + break; + case PROGRAM_STATE_VAR: + nvs->params[src->Index].source_val = p->ParameterValues[src->Index]; + break; + default: + fprintf(stderr, "Unknown parameter type %d\n", + p->Parameters[src->Index].Type); + assert(0); + break; + } + + if (src->RelAddr) { + reg->indexed = 1; + reg->addr_reg = 0; + reg->addr_comp = NVS_SWZ_X; + } else + reg->indexed = 0; + reg->file = NVS_FILE_CONST; + reg->index = src->Index; + break; + case PROGRAM_TEMPORARY: + reg->file = NVS_FILE_TEMP; + reg->index = src->Index; + break; + default: + fprintf(stderr, "Unknown source type %d\n", src->File); + assert(0); + } + + /* per-component negate handled elsewhere */ + reg->negate = src->NegateBase != 0; + reg->abs = src->Abs; + pass0_make_swizzle(reg->swizzle, src->Swizzle); +} + +static nvsInstruction * +pass0_emit(nouveauShader *nvs, nvsOpcode op, nvsRegister dst, + unsigned int mask, int saturate, + nvsRegister src0, nvsRegister src1, nvsRegister src2) +{ + struct pass0_rec *rec = nvs->pass_rec; + nvsInstruction *sif = NULL; + + /* Seems mesa doesn't explicitly 0 this.. */ + if (nvs->mesa.vp.Base.Target == GL_VERTEX_PROGRAM_ARB) + saturate = 0; + + sif = calloc(1, sizeof(nvsInstruction)); + if (sif) { + sif->header.type = NVS_INSTRUCTION; + sif->header.position = rec->nvs_ipos++; + sif->op = op; + sif->saturate = saturate; + sif->dest = dst; + sif->mask = mask; + sif->src[0] = src0; + sif->src[1] = src1; + sif->src[2] = src2; + sif->cond = COND_TR; + sif->cond_reg = 0; + sif->cond_test = 0; + sif->cond_update = 0; + pass0_make_swizzle(sif->cond_swizzle, SWIZZLE_NOOP); + pass0_append_fragment(nvs, (nvsFragmentHeader *)sif); + } + + return sif; +} + +static void +pass0_fixup_swizzle(nvsPtr nvs, + struct prog_src_register *src, + unsigned int sm1, + unsigned int sm2) +{ + static const float sc[4] = { 1.0, 0.0, -1.0, 0.0 }; + struct pass0_rec *rec = nvs->pass_rec; + int fixup_1, fixup_2; + nvsRegister sr, dr = nvr_unused; + nvsRegister sm1const, sm2const; + + if (!rec->swzconst_done) { + struct gl_program *prog = &nvs->mesa.vp.Base; + rec->swzconst_id = _mesa_add_unnamed_constant(prog->Parameters, sc, 4); + rec->swzconst_done = 1; + COPY_4V(nvs->params[rec->swzconst_id].val, sc); + } + + fixup_1 = (sm1 != MAKE_SWIZZLE4(0,0,0,0) && sm2 != MAKE_SWIZZLE4(2,2,2,2)); + fixup_2 = (sm2 != MAKE_SWIZZLE4(2,2,2,2)); + + if (src->File != PROGRAM_TEMPORARY && src->File != PROGRAM_INPUT) { + /* We can't use more than one const in an instruction, so move the const + * into a temp, and swizzle from there. + *TODO: should just emit the swizzled const, instead of swizzling it + * in the shader.. would need to reswizzle any state params when they + * change however.. + */ + pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1); + pass0_make_src_reg(nvs, &sr, src); + pass0_emit(nvs, NVS_OP_MOV, dr, SMASK_ALL, 0, sr, nvr_unused, nvr_unused); + pass0_make_reg(nvs, &sr, NVS_FILE_TEMP, dr.index); + } else { + if (fixup_1) + src->NegateBase = 0; + pass0_make_src_reg(nvs, &sr, src); + pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1); + } + + pass0_make_reg(nvs, &sm1const, NVS_FILE_CONST, rec->swzconst_id); + pass0_make_swizzle(sm1const.swizzle, sm1); + if (fixup_1 && fixup_2) { + /* Any combination with SWIZZLE_ONE */ + pass0_make_reg(nvs, &sm2const, NVS_FILE_CONST, rec->swzconst_id); + pass0_make_swizzle(sm2const.swizzle, sm2); + pass0_emit(nvs, NVS_OP_MAD, dr, SMASK_ALL, 0, sr, sm1const, sm2const); + } else { + /* SWIZZLE_ZERO || arbitrary negate */ + pass0_emit(nvs, NVS_OP_MUL, dr, SMASK_ALL, 0, sr, sm1const, nvr_unused); + } + + src->File = PROGRAM_TEMPORARY; + src->Index = dr.index; + src->Swizzle = SWIZZLE_NOOP; +} + +#define SET_SWZ(fs, cp, c) fs = (fs & ~(0x7<<(cp*3))) | (c<<(cp*3)) +static void +pass0_check_sources(nvsPtr nvs, struct prog_instruction *inst) +{ + unsigned int insrc = -1, constsrc = -1; + int i; + + for (i=0;i<_mesa_num_inst_src_regs(inst->Opcode);i++) { + struct prog_src_register *src = &inst->SrcReg[i]; + unsigned int sm_1 = 0, sm_2 = 0; + nvsRegister sr, dr; + int do_mov = 0, c; + + /* Build up swizzle masks as if we were going to use + * "MAD new, src, const1, const2" to support arbitrary negation + * and SWIZZLE_ZERO/SWIZZLE_ONE. + */ + for (c=0;c<4;c++) { + if (GET_SWZ(src->Swizzle, c) == SWIZZLE_ZERO) { + SET_SWZ(sm_1, c, SWIZZLE_Y); /* 0.0 */ + SET_SWZ(sm_2, c, SWIZZLE_Y); + SET_SWZ(src->Swizzle, c, SWIZZLE_X); + } else if (GET_SWZ(src->Swizzle, c) == SWIZZLE_ONE) { + SET_SWZ(sm_1, c, SWIZZLE_Y); + if (src->NegateBase & (1<Swizzle, c, SWIZZLE_X); + } else { + if (src->NegateBase & (1<File) { + case PROGRAM_INPUT: + if (insrc != -1 && insrc != src->Index) + do_mov = 1; + else insrc = src->Index; + break; + case PROGRAM_STATE_VAR: + if (constsrc != -1 && constsrc != src->Index) + do_mov = 1; + else constsrc = src->Index; + break; + default: + break; + } + + /* Emit any extra ATTRIB/CONST to a temp, and modify the Mesa instruction + * to point at the temp. + */ + if (do_mov) { + pass0_make_src_reg(nvs, &sr, src); + pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1); + pass0_emit(nvs, NVS_OP_MOV, dr, SMASK_ALL, 0, + sr, nvr_unused, nvr_unused); + + src->File = PROGRAM_TEMPORARY; + src->Index = dr.index; + src->Swizzle= SWIZZLE_NOOP; + } + } +} + +static GLboolean +pass0_emulate_instruction(nouveauShader *nvs, struct prog_instruction *inst) +{ + nvsFunc *shader = nvs->func; + nvsRegister src[3], dest, temp; + nvsInstruction *nvsinst; + struct pass0_rec *rec = nvs->pass_rec; + unsigned int mask = pass0_make_mask(inst->DstReg.WriteMask); + int i, sat; + + sat = (inst->SaturateMode == SATURATE_ZERO_ONE); + + /* Build all the "real" regs for the instruction */ + for (i=0; i<_mesa_num_inst_src_regs(inst->Opcode); i++) + pass0_make_src_reg(nvs, &src[i], &inst->SrcReg[i]); + if (inst->Opcode != OPCODE_KIL) + pass0_make_dst_reg(nvs, &dest, &inst->DstReg); + + switch (inst->Opcode) { + case OPCODE_ABS: + if (shader->caps & SCAP_SRC_ABS) + pass0_emit(nvs, NVS_OP_MOV, dest, mask, sat, + nvsAbs(src[0]), nvr_unused, nvr_unused); + else + pass0_emit(nvs, NVS_OP_MAX, dest, mask, sat, + src[0], nvsNegate(src[0]), nvr_unused); + break; + case OPCODE_KIL: + /* This is only in ARB shaders, so we don't have to worry + * about clobbering a CC reg as they aren't supported anyway. + */ + /* MOVC0 temp, src */ + pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); + nvsinst = pass0_emit(nvs, NVS_OP_MOV, temp, SMASK_ALL, 0, + src[0], nvr_unused, nvr_unused); + nvsinst->cond_update = 1; + nvsinst->cond_reg = 0; + /* KIL_NV (LT0.xyzw) temp */ + nvsinst = pass0_emit(nvs, NVS_OP_KIL, nvr_unused, 0, 0, + nvr_unused, nvr_unused, nvr_unused); + nvsinst->cond = COND_LT; + nvsinst->cond_reg = 0; + nvsinst->cond_test = 1; + pass0_make_swizzle(nvsinst->cond_swizzle, MAKE_SWIZZLE4(0,1,2,3)); + break; + case OPCODE_LIT: + break; + case OPCODE_LRP: + pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); + pass0_emit(nvs, NVS_OP_MAD, temp, mask, 0, + nvsNegate(src[0]), src[2], src[2]); + pass0_emit(nvs, NVS_OP_MAD, dest, mask, sat, + src[0], src[1], temp); + break; + case OPCODE_POW: + if (shader->SupportsOpcode(shader, NVS_OP_LG2) && + shader->SupportsOpcode(shader, NVS_OP_EX2)) { + pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); + /* LG2 temp.x, src0.c */ + pass0_emit(nvs, NVS_OP_LG2, temp, SMASK_X, 0, + nvsSwizzle(src[0], X, X, X, X), + nvr_unused, + nvr_unused); + /* MUL temp.x, temp.x, src1.c */ + pass0_emit(nvs, NVS_OP_MUL, temp, SMASK_X, 0, + nvsSwizzle(temp, X, X, X, X), + nvsSwizzle(src[1], X, X, X, X), + nvr_unused); + /* EX2 dest, temp.x */ + pass0_emit(nvs, NVS_OP_EX2, dest, mask, sat, + nvsSwizzle(temp, X, X, X, X), + nvr_unused, + nvr_unused); + } else { + /* can we use EXP/LOG instead of EX2/LG2?? */ + fprintf(stderr, "Implement POW for NV20 vtxprog!\n"); + return GL_FALSE; + } + break; + case OPCODE_RSQ: + if (rec->const_half.file != NVS_FILE_CONST) { + GLfloat const_half[4] = { 0.5, 0.0, 0.0, 0.0 }; + pass0_make_reg(nvs, &rec->const_half, NVS_FILE_CONST, + _mesa_add_unnamed_constant(nvs->mesa.vp.Base.Parameters, + const_half, 4)); + COPY_4V(nvs->params[rec->const_half.index].val, const_half); + } + pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); + pass0_emit(nvs, NVS_OP_LG2, temp, SMASK_X, 0, + nvsAbs(nvsSwizzle(src[0], X, X, X, X)), + nvr_unused, + nvr_unused); + pass0_emit(nvs, NVS_OP_MUL, temp, SMASK_X, 0, + nvsSwizzle(temp, X, X, X, X), + nvsNegate(rec->const_half), + nvr_unused); + pass0_emit(nvs, NVS_OP_EX2, dest, mask, sat, + nvsSwizzle(temp, X, X, X, X), + nvr_unused, + nvr_unused); + break; + case OPCODE_SCS: + if (mask & SMASK_X) + pass0_emit(nvs, NVS_OP_COS, dest, SMASK_X, sat, + nvsSwizzle(src[0], X, X, X, X), + nvr_unused, + nvr_unused); + if (mask & SMASK_Y) + pass0_emit(nvs, NVS_OP_SIN, dest, SMASK_Y, sat, + nvsSwizzle(src[0], X, X, X, X), + nvr_unused, + nvr_unused); + break; + case OPCODE_SUB: + pass0_emit(nvs, NVS_OP_ADD, dest, mask, sat, + src[0], nvsNegate(src[1]), nvr_unused); + break; + case OPCODE_XPD: + pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); + pass0_emit(nvs, NVS_OP_MUL, temp, SMASK_ALL, 0, + nvsSwizzle(src[0], Z, X, Y, Y), + nvsSwizzle(src[1], Y, Z, X, X), + nvr_unused); + pass0_emit(nvs, NVS_OP_MAD, dest, (mask & ~SMASK_W), sat, + nvsSwizzle(src[0], Y, Z, X, X), + nvsSwizzle(src[1], Z, X, Y, Y), + nvsNegate(temp)); + break; + default: + fprintf(stderr, "hw doesn't support opcode \"%s\", and no emulation found\n", + _mesa_opcode_string(inst->Opcode)); + return GL_FALSE; + } + + return GL_TRUE; +} + +static GLboolean +pass0_translate_instructions(nouveauShader *nvs) +{ + struct gl_program *prog = (struct gl_program *)&nvs->mesa.vp; + nvsFunc *shader = nvs->func; + int ipos; + + for (ipos=0; iposNumInstructions; ipos++) { + struct prog_instruction *inst = &prog->Instructions[ipos]; + + if (inst->Opcode == OPCODE_END) + break; + + /* Deal with multiple ATTRIB/PARAM in a single instruction */ + pass0_check_sources(nvs, inst); + + /* Now it's safe to do the prog_instruction->nvsInstruction conversion */ + if (shader->SupportsOpcode(shader, pass0_make_opcode(inst->Opcode))) { + nvsInstruction *nvsinst; + nvsRegister src[3], dest; + int i; + + for (i=0; i<_mesa_num_inst_src_regs(inst->Opcode); i++) + pass0_make_src_reg(nvs, &src[i], &inst->SrcReg[i]); + pass0_make_dst_reg(nvs, &dest, &inst->DstReg); + + nvsinst = pass0_emit(nvs, + pass0_make_opcode(inst->Opcode), + dest, + pass0_make_mask(inst->DstReg.WriteMask), + (inst->SaturateMode != SATURATE_OFF), + src[0], src[1], src[2]); + nvsinst->tex_unit = inst->TexSrcUnit; + nvsinst->tex_target = pass0_make_tex_target(inst->TexSrcTarget); + /* TODO when NV_fp/vp is implemented */ + nvsinst->cond = COND_TR; + } else { + if (!pass0_emulate_instruction(nvs, inst)) + return GL_FALSE; + } + } + + return GL_TRUE; +} + +GLboolean +nouveau_shader_pass0(GLcontext *ctx, nouveauShader *nvs) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + struct gl_program *prog = (struct gl_program*)nvs; + struct gl_vertex_program *vp = (struct gl_vertex_program *)prog; + struct gl_fragment_program *fp = (struct gl_fragment_program *)prog; + struct pass0_rec *rec; + int ret; + + switch (prog->Target) { + case GL_VERTEX_PROGRAM_ARB: + nvs->func = &nmesa->VPfunc; + if (vp->IsPositionInvariant) + _mesa_insert_mvp_code(ctx, vp); +#if 0 + if (IS_FIXEDFUNCTION_PROG && CLIP_PLANES_USED) + pass0_insert_ff_clip_planes(); +#endif + break; + case GL_FRAGMENT_PROGRAM_ARB: + nvs->func = &nmesa->FPfunc; + if (fp->FogOption != GL_NONE) + _mesa_append_fog_code(ctx, fp); + break; + default: + fprintf(stderr, "Unknown program type %d", prog->Target); + return GL_FALSE; + } + + rec = calloc(1, sizeof(struct pass0_rec)); + rec->next_temp = prog->NumTemporaries; + nvs->pass_rec = rec; + + ret = pass0_translate_instructions(nvs); + if (!ret) { + /* DESTROY list */ + } + + free(nvs->pass_rec); + return ret; +} + -- cgit v1.2.3 From c3ac2709967299481928dee175a124bf8a72fecd Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sun, 21 Jan 2007 16:45:40 +1100 Subject: nouveau: start converting shaders into a tree format again.. No branching stuff implemented yet. Works enough for gears, probably other stuff broken. --- src/mesa/drivers/dri/nouveau/nouveau_shader.c | 6 +- src/mesa/drivers/dri/nouveau/nouveau_shader.h | 60 +++- src/mesa/drivers/dri/nouveau/nouveau_shader_0.c | 375 +++++++++++++++--------- src/mesa/drivers/dri/nouveau/nouveau_shader_2.c | 45 ++- 4 files changed, 315 insertions(+), 171 deletions(-) (limited to 'src/mesa/drivers/dri/nouveau/nouveau_shader_0.c') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader.c b/src/mesa/drivers/dri/nouveau/nouveau_shader.c index e4db115362..f911347d62 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader.c @@ -563,12 +563,12 @@ nvsDumpInstruction(nvsInstruction * inst, int slot, int lvl) } void -nvsDumpFragmentList(nvsFragmentList *f, int lvl) +nvsDumpFragmentList(nvsFragmentHeader *f, int lvl) { while (f) { - switch (f->fragment->type) { + switch (f->type) { case NVS_INSTRUCTION: - nvsDumpInstruction((nvsInstruction*)f->fragment, 0, lvl); + nvsDumpInstruction((nvsInstruction*)f, 0, lvl); break; default: fprintf(stderr, "%s: Only NVS_INSTRUCTION fragments can be in" diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader.h b/src/mesa/drivers/dri/nouveau/nouveau_shader.h index 1a971e56bf..a3ab027142 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader.h @@ -12,19 +12,18 @@ typedef struct _nvsFunc nvsFunc; #define NVS_MAX_ADDRESS 2 #define NVS_MAX_INSNS 4096 -typedef struct { +typedef struct _nvs_fragment_header { + struct _nvs_fragment_header *parent; + struct _nvs_fragment_header *prev; + struct _nvs_fragment_header *next; enum { NVS_INSTRUCTION, + NVS_BRANCH, + NVS_LOOP, + NVS_SUBROUTINE } type; - int position; } nvsFragmentHeader; -typedef struct _nvs_fragment_list { - struct _nvs_fragment_list *prev; - struct _nvs_fragment_list *next; - nvsFragmentHeader *fragment; -} nvsFragmentList; - typedef struct _nouveauShader { union { struct gl_vertex_program vp; @@ -59,8 +58,7 @@ typedef struct _nouveauShader { /* Pass-private data */ void *pass_rec; - nvsFragmentList *list_head; - nvsFragmentList *list_tail; + nvsFragmentHeader *program_tree; } nouveauShader, *nvsPtr; typedef enum { @@ -186,7 +184,8 @@ typedef enum { NVS_TEX_TARGET_UNKNOWN = 0 } nvsTexTarget; -typedef struct { +/* Arith/TEX instructions */ +typedef struct nvs_instruction { nvsFragmentHeader header; nvsOpcode op; @@ -207,6 +206,43 @@ typedef struct { int cond_update; } nvsInstruction; +/* BRA, CAL, IF */ +typedef struct nvs_branch { + nvsFragmentHeader header; + + nvsOpcode op; + + nvsCond cond; + nvsSwzComp cond_swizzle[4]; + int cond_test; + + nvsFragmentHeader *target_head; + nvsFragmentHeader *target_tail; + nvsFragmentHeader *else_head; + nvsFragmentHeader *else_tail; +} nvsBranch; + +/* LOOP+ENDLOOP */ +typedef struct { + nvsFragmentHeader header; + + int count; + int initial; + int increment; + + nvsFragmentHeader *insn_head; + nvsFragmentHeader *insn_tail; +} nvsLoop; + +/* label+following instructions */ +typedef struct nvs_subroutine { + nvsFragmentHeader header; + + char * label; + nvsFragmentHeader *insn_head; + nvsFragmentHeader *insn_tail; +} nvsSubroutine; + #define SMASK_X (1<<0) #define SMASK_Y (1<<1) #define SMASK_Z (1<<2) @@ -353,7 +389,7 @@ nvsSwizzle(nvsRegister reg, nvsSwzComp x, nvsSwzComp y, extern GLboolean nvsUpdateShader(GLcontext *ctx, nouveauShader *nvs); extern void nvsDisasmHWShader(nvsPtr); -extern void nvsDumpFragmentList(nvsFragmentList *f, int lvl); +extern void nvsDumpFragmentList(nvsFragmentHeader *f, int lvl); extern nouveauShader *nvsBuildTextShader(GLcontext *ctx, GLenum target, const char *text); diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c index 34475cad03..503eae36b7 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c @@ -40,6 +40,7 @@ #include "nouveau_context.h" #include "nouveau_shader.h" +#include "nouveau_msg.h" static nvsFixedReg _tx_mesa_vp_dst_reg[VERT_RESULT_MAX] = { NVS_FR_POSITION, NVS_FR_COL0, NVS_FR_COL1, NVS_FR_FOGCOORD, @@ -134,21 +135,63 @@ struct pass0_rec { #define W NVS_SWZ_W static void -pass0_append_fragment(nouveauShader *nvs, nvsFragmentHeader *fragment) +pass0_append_fragment(nvsFragmentHeader *parent, + nvsFragmentHeader *fragment, + int pos) { - nvsFragmentList *list = calloc(1, sizeof(nvsFragmentList)); - if (!list) - return; - - list->fragment = fragment; - list->prev = nvs->list_tail; - if ( nvs->list_tail) - nvs->list_tail->next = list; - if (!nvs->list_head) - nvs->list_head = list; - nvs->list_tail = list; - - nvs->inst_count++; + nvsFragmentHeader **head, **tail; + assert(parent && fragment); + + switch (parent->type) { + case NVS_BRANCH: + if (pos == 0) { + head = &((nvsBranch *)parent)->target_head; + tail = &((nvsBranch *)parent)->target_tail; + } else { + head = &((nvsBranch *)parent)->else_head; + tail = &((nvsBranch *)parent)->else_tail; + } + break; + case NVS_LOOP: + head = &((nvsLoop *)parent)->insn_head; + tail = &((nvsLoop *)parent)->insn_tail; + break; + case NVS_SUBROUTINE: + head = &((nvsSubroutine *)parent)->insn_head; + tail = &((nvsSubroutine *)parent)->insn_tail; + break; + default: + assert(0); + break; + } + + fragment->parent = parent; + fragment->prev = *tail; + fragment->next = NULL; + if (!(*head)) + *head = fragment; + else + (*tail)->next = fragment; + *tail = fragment; + +} + +static nvsSubroutine * +pass0_create_subroutine(nouveauShader *nvs, const char *label) +{ + nvsSubroutine *sub; + + sub = CALLOC_STRUCT(nvs_subroutine); + if (sub) { + sub->header.type = NVS_SUBROUTINE; + sub->label = strdup(label); + if (!nvs->program_tree) + nvs->program_tree = &sub->header; + else + pass0_append_fragment(nvs->program_tree, &sub->header, 0); + } + + return sub; } static void @@ -312,41 +355,40 @@ pass0_make_src_reg(nvsPtr nvs, nvsRegister *reg, struct prog_src_register *src) } static nvsInstruction * -pass0_emit(nouveauShader *nvs, nvsOpcode op, nvsRegister dst, - unsigned int mask, int saturate, +pass0_emit(nouveauShader *nvs, nvsFragmentHeader *parent, int fpos, + nvsOpcode op, nvsRegister dst, + unsigned int mask, int saturate, nvsRegister src0, nvsRegister src1, nvsRegister src2) { - struct pass0_rec *rec = nvs->pass_rec; - nvsInstruction *sif = NULL; - - /* Seems mesa doesn't explicitly 0 this.. */ - if (nvs->mesa.vp.Base.Target == GL_VERTEX_PROGRAM_ARB) - saturate = 0; - - sif = calloc(1, sizeof(nvsInstruction)); - if (sif) { - sif->header.type = NVS_INSTRUCTION; - sif->header.position = rec->nvs_ipos++; - sif->op = op; - sif->saturate = saturate; - sif->dest = dst; - sif->mask = mask; - sif->src[0] = src0; - sif->src[1] = src1; - sif->src[2] = src2; - sif->cond = COND_TR; - sif->cond_reg = 0; - sif->cond_test = 0; - sif->cond_update = 0; - pass0_make_swizzle(sif->cond_swizzle, SWIZZLE_NOOP); - pass0_append_fragment(nvs, (nvsFragmentHeader *)sif); - } - - return sif; + nvsInstruction *sif; + + sif = CALLOC_STRUCT(nvs_instruction); + if (!sif) + return NULL; + + /* Seems mesa doesn't explicitly 0 this.. */ + if (nvs->mesa.vp.Base.Target == GL_VERTEX_PROGRAM_ARB) + saturate = 0; + + sif->op = op; + sif->saturate = saturate; + sif->dest = dst; + sif->mask = mask; + sif->src[0] = src0; + sif->src[1] = src1; + sif->src[2] = src2; + sif->cond = COND_TR; + sif->cond_reg = 0; + sif->cond_test = 0; + sif->cond_update= 0; + pass0_make_swizzle(sif->cond_swizzle, SWIZZLE_NOOP); + pass0_append_fragment(parent, &sif->header, fpos); + + return sif; } static void -pass0_fixup_swizzle(nvsPtr nvs, +pass0_fixup_swizzle(nvsPtr nvs, nvsFragmentHeader *parent, int fpos, struct prog_src_register *src, unsigned int sm1, unsigned int sm2) @@ -376,7 +418,7 @@ pass0_fixup_swizzle(nvsPtr nvs, */ pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1); pass0_make_src_reg(nvs, &sr, src); - pass0_emit(nvs, NVS_OP_MOV, dr, SMASK_ALL, 0, sr, nvr_unused, nvr_unused); + pass0_emit(nvs, parent, fpos, NVS_OP_MOV, dr, SMASK_ALL, 0, sr, nvr_unused, nvr_unused); pass0_make_reg(nvs, &sr, NVS_FILE_TEMP, dr.index); } else { if (fixup_1) @@ -391,10 +433,10 @@ pass0_fixup_swizzle(nvsPtr nvs, /* Any combination with SWIZZLE_ONE */ pass0_make_reg(nvs, &sm2const, NVS_FILE_CONST, rec->swzconst_id); pass0_make_swizzle(sm2const.swizzle, sm2); - pass0_emit(nvs, NVS_OP_MAD, dr, SMASK_ALL, 0, sr, sm1const, sm2const); + pass0_emit(nvs, parent, fpos, NVS_OP_MAD, dr, SMASK_ALL, 0, sr, sm1const, sm2const); } else { /* SWIZZLE_ZERO || arbitrary negate */ - pass0_emit(nvs, NVS_OP_MUL, dr, SMASK_ALL, 0, sr, sm1const, nvr_unused); + pass0_emit(nvs, parent, fpos, NVS_OP_MUL, dr, SMASK_ALL, 0, sr, sm1const, nvr_unused); } src->File = PROGRAM_TEMPORARY; @@ -404,7 +446,8 @@ pass0_fixup_swizzle(nvsPtr nvs, #define SET_SWZ(fs, cp, c) fs = (fs & ~(0x7<<(cp*3))) | (c<<(cp*3)) static void -pass0_check_sources(nvsPtr nvs, struct prog_instruction *inst) +pass0_check_sources(nvsPtr nvs, nvsFragmentHeader *parent, int fpos, + struct prog_instruction *inst) { unsigned int insrc = -1, constsrc = -1; int i; @@ -444,7 +487,7 @@ pass0_check_sources(nvsPtr nvs, struct prog_instruction *inst) */ if ((sm_1 != MAKE_SWIZZLE4(0,0,0,0) && sm_1 != MAKE_SWIZZLE4(2,2,2,2)) || sm_2 != MAKE_SWIZZLE4(1,1,1,1)) { - pass0_fixup_swizzle(nvs, src, sm_1, sm_2); + pass0_fixup_swizzle(nvs, parent, fpos, src, sm_1, sm_2); /* The source is definitely in a temp now, so don't bother checking * for multiple ATTRIB/CONST regs. */ @@ -473,7 +516,7 @@ pass0_check_sources(nvsPtr nvs, struct prog_instruction *inst) if (do_mov) { pass0_make_src_reg(nvs, &sr, src); pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1); - pass0_emit(nvs, NVS_OP_MOV, dr, SMASK_ALL, 0, + pass0_emit(nvs, parent, fpos, NVS_OP_MOV, dr, SMASK_ALL, 0, sr, nvr_unused, nvr_unused); src->File = PROGRAM_TEMPORARY; @@ -484,7 +527,9 @@ pass0_check_sources(nvsPtr nvs, struct prog_instruction *inst) } static GLboolean -pass0_emulate_instruction(nouveauShader *nvs, struct prog_instruction *inst) +pass0_emulate_instruction(nouveauShader *nvs, + nvsFragmentHeader *parent, int fpos, + struct prog_instruction *inst) { nvsFunc *shader = nvs->func; nvsRegister src[3], dest, temp; @@ -504,10 +549,10 @@ pass0_emulate_instruction(nouveauShader *nvs, struct prog_instruction *inst) switch (inst->Opcode) { case OPCODE_ABS: if (shader->caps & SCAP_SRC_ABS) - pass0_emit(nvs, NVS_OP_MOV, dest, mask, sat, + pass0_emit(nvs, parent, fpos, NVS_OP_MOV, dest, mask, sat, nvsAbs(src[0]), nvr_unused, nvr_unused); else - pass0_emit(nvs, NVS_OP_MAX, dest, mask, sat, + pass0_emit(nvs, parent, fpos, NVS_OP_MAX, dest, mask, sat, src[0], nvsNegate(src[0]), nvr_unused); break; case OPCODE_KIL: @@ -516,12 +561,12 @@ pass0_emulate_instruction(nouveauShader *nvs, struct prog_instruction *inst) */ /* MOVC0 temp, src */ pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); - nvsinst = pass0_emit(nvs, NVS_OP_MOV, temp, SMASK_ALL, 0, + nvsinst = pass0_emit(nvs, parent, fpos, NVS_OP_MOV, temp, SMASK_ALL, 0, src[0], nvr_unused, nvr_unused); nvsinst->cond_update = 1; nvsinst->cond_reg = 0; /* KIL_NV (LT0.xyzw) temp */ - nvsinst = pass0_emit(nvs, NVS_OP_KIL, nvr_unused, 0, 0, + nvsinst = pass0_emit(nvs, parent, fpos, NVS_OP_KIL, nvr_unused, 0, 0, nvr_unused, nvr_unused, nvr_unused); nvsinst->cond = COND_LT; nvsinst->cond_reg = 0; @@ -532,9 +577,9 @@ pass0_emulate_instruction(nouveauShader *nvs, struct prog_instruction *inst) break; case OPCODE_LRP: pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); - pass0_emit(nvs, NVS_OP_MAD, temp, mask, 0, + pass0_emit(nvs, parent, fpos, NVS_OP_MAD, temp, mask, 0, nvsNegate(src[0]), src[2], src[2]); - pass0_emit(nvs, NVS_OP_MAD, dest, mask, sat, + pass0_emit(nvs, parent, fpos, NVS_OP_MAD, dest, mask, sat, src[0], src[1], temp); break; case OPCODE_POW: @@ -542,17 +587,17 @@ pass0_emulate_instruction(nouveauShader *nvs, struct prog_instruction *inst) shader->SupportsOpcode(shader, NVS_OP_EX2)) { pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); /* LG2 temp.x, src0.c */ - pass0_emit(nvs, NVS_OP_LG2, temp, SMASK_X, 0, + pass0_emit(nvs, parent, fpos, NVS_OP_LG2, temp, SMASK_X, 0, nvsSwizzle(src[0], X, X, X, X), nvr_unused, nvr_unused); /* MUL temp.x, temp.x, src1.c */ - pass0_emit(nvs, NVS_OP_MUL, temp, SMASK_X, 0, + pass0_emit(nvs, parent, fpos, NVS_OP_MUL, temp, SMASK_X, 0, nvsSwizzle(temp, X, X, X, X), nvsSwizzle(src[1], X, X, X, X), nvr_unused); /* EX2 dest, temp.x */ - pass0_emit(nvs, NVS_OP_EX2, dest, mask, sat, + pass0_emit(nvs, parent, fpos, NVS_OP_EX2, dest, mask, sat, nvsSwizzle(temp, X, X, X, X), nvr_unused, nvr_unused); @@ -571,42 +616,42 @@ pass0_emulate_instruction(nouveauShader *nvs, struct prog_instruction *inst) COPY_4V(nvs->params[rec->const_half.index].val, const_half); } pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); - pass0_emit(nvs, NVS_OP_LG2, temp, SMASK_X, 0, + pass0_emit(nvs, parent, fpos, NVS_OP_LG2, temp, SMASK_X, 0, nvsAbs(nvsSwizzle(src[0], X, X, X, X)), nvr_unused, nvr_unused); - pass0_emit(nvs, NVS_OP_MUL, temp, SMASK_X, 0, + pass0_emit(nvs, parent, fpos, NVS_OP_MUL, temp, SMASK_X, 0, nvsSwizzle(temp, X, X, X, X), nvsNegate(rec->const_half), nvr_unused); - pass0_emit(nvs, NVS_OP_EX2, dest, mask, sat, + pass0_emit(nvs, parent, fpos, NVS_OP_EX2, dest, mask, sat, nvsSwizzle(temp, X, X, X, X), nvr_unused, nvr_unused); break; case OPCODE_SCS: if (mask & SMASK_X) - pass0_emit(nvs, NVS_OP_COS, dest, SMASK_X, sat, + pass0_emit(nvs, parent, fpos, NVS_OP_COS, dest, SMASK_X, sat, nvsSwizzle(src[0], X, X, X, X), nvr_unused, nvr_unused); if (mask & SMASK_Y) - pass0_emit(nvs, NVS_OP_SIN, dest, SMASK_Y, sat, + pass0_emit(nvs, parent, fpos, NVS_OP_SIN, dest, SMASK_Y, sat, nvsSwizzle(src[0], X, X, X, X), nvr_unused, nvr_unused); break; case OPCODE_SUB: - pass0_emit(nvs, NVS_OP_ADD, dest, mask, sat, + pass0_emit(nvs, parent, fpos, NVS_OP_ADD, dest, mask, sat, src[0], nvsNegate(src[1]), nvr_unused); break; case OPCODE_XPD: pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); - pass0_emit(nvs, NVS_OP_MUL, temp, SMASK_ALL, 0, + pass0_emit(nvs, parent, fpos, NVS_OP_MUL, temp, SMASK_ALL, 0, nvsSwizzle(src[0], Z, X, Y, Y), nvsSwizzle(src[1], Y, Z, X, X), nvr_unused); - pass0_emit(nvs, NVS_OP_MAD, dest, (mask & ~SMASK_W), sat, + pass0_emit(nvs, parent, fpos, NVS_OP_MAD, dest, (mask & ~SMASK_W), sat, nvsSwizzle(src[0], Y, Z, X, X), nvsSwizzle(src[1], Z, X, Y, Y), nvsNegate(temp)); @@ -621,90 +666,132 @@ pass0_emulate_instruction(nouveauShader *nvs, struct prog_instruction *inst) } static GLboolean -pass0_translate_instructions(nouveauShader *nvs) +pass0_translate_arith(nouveauShader *nvs, struct gl_program *prog, + int ipos, int fpos, + nvsFragmentHeader *parent) { - struct gl_program *prog = (struct gl_program *)&nvs->mesa.vp; - nvsFunc *shader = nvs->func; - int ipos; - - for (ipos=0; iposNumInstructions; ipos++) { - struct prog_instruction *inst = &prog->Instructions[ipos]; - - if (inst->Opcode == OPCODE_END) - break; - - /* Deal with multiple ATTRIB/PARAM in a single instruction */ - pass0_check_sources(nvs, inst); - - /* Now it's safe to do the prog_instruction->nvsInstruction conversion */ - if (shader->SupportsOpcode(shader, pass0_make_opcode(inst->Opcode))) { - nvsInstruction *nvsinst; - nvsRegister src[3], dest; - int i; - - for (i=0; i<_mesa_num_inst_src_regs(inst->Opcode); i++) - pass0_make_src_reg(nvs, &src[i], &inst->SrcReg[i]); - pass0_make_dst_reg(nvs, &dest, &inst->DstReg); - - nvsinst = pass0_emit(nvs, - pass0_make_opcode(inst->Opcode), - dest, - pass0_make_mask(inst->DstReg.WriteMask), - (inst->SaturateMode != SATURATE_OFF), - src[0], src[1], src[2]); - nvsinst->tex_unit = inst->TexSrcUnit; - nvsinst->tex_target = pass0_make_tex_target(inst->TexSrcTarget); - /* TODO when NV_fp/vp is implemented */ - nvsinst->cond = COND_TR; - } else { - if (!pass0_emulate_instruction(nvs, inst)) - return GL_FALSE; - } - } + struct prog_instruction *inst = &prog->Instructions[ipos]; + nvsFunc *shader = nvs->func; + nvsInstruction *nvsinst; + GLboolean ret; + + /* Deal with multiple ATTRIB/PARAM in a single instruction */ + pass0_check_sources(nvs, parent, fpos, inst); + + /* Now it's safe to do the prog_instruction->nvsInstruction + * conversion + */ + if (shader->SupportsOpcode(shader, + pass0_make_opcode(inst->Opcode))) { + nvsRegister src[3], dest; + int i; + + for (i=0; i<_mesa_num_inst_src_regs(inst->Opcode); i++) + pass0_make_src_reg(nvs, &src[i], &inst->SrcReg[i]); + pass0_make_dst_reg(nvs, &dest, &inst->DstReg); + + nvsinst = pass0_emit(nvs, parent, fpos, + pass0_make_opcode(inst->Opcode), + dest, + pass0_make_mask(inst->DstReg.WriteMask), + (inst->SaturateMode != SATURATE_OFF), + src[0], src[1], src[2]); + nvsinst->tex_unit = inst->TexSrcUnit; + nvsinst->tex_target = pass0_make_tex_target(inst->TexSrcTarget); + /* TODO when NV_fp/vp is implemented */ + nvsinst->cond = COND_TR; + + ret = GL_TRUE; + } else + ret = pass0_emulate_instruction(nvs, parent, fpos, inst); + + return ret; +} - return GL_TRUE; +static GLboolean +pass0_translate_instructions(nouveauShader *nvs, int ipos, int fpos, + nvsFragmentHeader *parent) +{ + struct gl_program *prog = (struct gl_program *)&nvs->mesa.vp; + + while (1) { + struct prog_instruction *inst = &prog->Instructions[ipos]; + + switch (inst->Opcode) { + case OPCODE_END: + return GL_TRUE; + case OPCODE_BRA: + case OPCODE_CAL: + //case OPCDOE_RET: + //case OPCODE_LOOP: + //case OPCODE_ENDLOOP: + //case OPCODE_IF: + //case OPCODE_ELSE: + //case OPCODE_ENDIF: + WARN_ONCE("branch ops unimplemented\n"); + return GL_FALSE; + break; + default: + if (!pass0_translate_arith(nvs, prog, + ipos, fpos, parent)) + return GL_FALSE; + break; + } + + ipos++; + } + + return GL_TRUE; } GLboolean nouveau_shader_pass0(GLcontext *ctx, nouveauShader *nvs) { - nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); - struct gl_program *prog = (struct gl_program*)nvs; - struct gl_vertex_program *vp = (struct gl_vertex_program *)prog; - struct gl_fragment_program *fp = (struct gl_fragment_program *)prog; - struct pass0_rec *rec; - int ret; - - switch (prog->Target) { - case GL_VERTEX_PROGRAM_ARB: - nvs->func = &nmesa->VPfunc; - if (vp->IsPositionInvariant) - _mesa_insert_mvp_code(ctx, vp); + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + struct gl_program *prog = (struct gl_program*)nvs; + struct gl_vertex_program *vp = (struct gl_vertex_program *)prog; + struct gl_fragment_program *fp = (struct gl_fragment_program *)prog; + struct pass0_rec *rec; + int ret = GL_FALSE; + + switch (prog->Target) { + case GL_VERTEX_PROGRAM_ARB: + nvs->func = &nmesa->VPfunc; + + if (vp->IsPositionInvariant) + _mesa_insert_mvp_code(ctx, vp); #if 0 - if (IS_FIXEDFUNCTION_PROG && CLIP_PLANES_USED) - pass0_insert_ff_clip_planes(); + if (IS_FIXEDFUNCTION_PROG && CLIP_PLANES_USED) + pass0_insert_ff_clip_planes(); #endif - break; - case GL_FRAGMENT_PROGRAM_ARB: - nvs->func = &nmesa->FPfunc; - if (fp->FogOption != GL_NONE) - _mesa_append_fog_code(ctx, fp); - break; - default: - fprintf(stderr, "Unknown program type %d", prog->Target); - return GL_FALSE; - } - - rec = calloc(1, sizeof(struct pass0_rec)); - rec->next_temp = prog->NumTemporaries; - nvs->pass_rec = rec; - - ret = pass0_translate_instructions(nvs); - if (!ret) { - /* DESTROY list */ - } - - free(nvs->pass_rec); - return ret; + break; + case GL_FRAGMENT_PROGRAM_ARB: + nvs->func = &nmesa->FPfunc; + + if (fp->FogOption != GL_NONE) + _mesa_append_fog_code(ctx, fp); + break; + default: + fprintf(stderr, "Unknown program type %d", prog->Target); + return GL_FALSE; + } + + rec = CALLOC_STRUCT(pass0_rec); + if (rec) { + rec->next_temp = prog->NumTemporaries; + nvs->pass_rec = rec; + + nvs->program_tree = (nvsFragmentHeader*) + pass0_create_subroutine(nvs, "program body"); + if (nvs->program_tree) { + ret = pass0_translate_instructions(nvs, + 0, 0, + nvs->program_tree); + /*XXX: if (!ret) DESTROY TREE!!! */ + } + FREE(rec); + } + + return ret; } diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c index 0476b05f58..b9b87ccf91 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c @@ -38,6 +38,7 @@ #include "nouveau_context.h" #include "nouveau_shader.h" +#include "nouveau_msg.h" struct pass2_rec { /* Map nvsRegister temp ID onto hw temp ID */ @@ -166,11 +167,37 @@ pass2_assemble_instruction(nvsPtr nvs, nvsInstruction *inst, int last) return 1; } +static GLboolean +pass2_translate(nvsPtr nvs, nvsFragmentHeader *f) +{ + nvsFunc *shader = nvs->func; + GLboolean last; + + while (f) { + last = (f == ((nvsSubroutine*)nvs->program_tree)->insn_tail); + + switch (f->type) { + case NVS_INSTRUCTION: + if (!pass2_assemble_instruction(nvs, + (nvsInstruction *)f, + last)) + return GL_FALSE; + break; + default: + WARN_ONCE("Unimplemented fragment type\n"); + return GL_FALSE; + } + + f = f->next; + } + + return GL_TRUE; +} + /* Translate program into hardware format */ GLboolean nouveau_shader_pass2(nvsPtr nvs) { - nvsFragmentList *list = nvs->list_head; struct pass2_rec *rec; int i; @@ -182,21 +209,15 @@ nouveau_shader_pass2(nvsPtr nvs) /* Start off with allocating 4 uint32_t's for each inst, will be grown * if necessary.. */ - nvs->program_alloc_size = nvs->inst_count * 4; + nvs->program_alloc_size = nvs->mesa.vp.Base.NumInstructions * 4; nvs->program = calloc(nvs->program_alloc_size, sizeof(uint32_t)); nvs->program_size = 0; nvs->program_current = 0; - while (list) { - assert(list->fragment->type == NVS_INSTRUCTION); - - if (!pass2_assemble_instruction(nvs, (nvsInstruction *)list->fragment, list->next ? 0 : 1)) { - free(nvs->program); - nvs->program = NULL; - return GL_FALSE; - } - - list = list->next; + if (!pass2_translate(nvs, ((nvsSubroutine*)nvs->program_tree)->insn_head)) { + free(nvs->program); + nvs->program = NULL; + return GL_FALSE; } /* Shrink allocated memory to only what we need */ -- cgit v1.2.3 From 50227f6fd23f3a4737dada1a98d26f6d0141af47 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sun, 21 Jan 2007 17:16:39 +1100 Subject: nouveau: reindent shader pass0/pass2 if this gets rejected by the commit list, just ignore it.. nothing interesting to see here :) --- src/mesa/drivers/dri/nouveau/nouveau_shader_0.c | 779 ++++++++++++------------ src/mesa/drivers/dri/nouveau/nouveau_shader_2.c | 311 +++++----- 2 files changed, 566 insertions(+), 524 deletions(-) (limited to 'src/mesa/drivers/dri/nouveau/nouveau_shader_0.c') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c index 503eae36b7..d6ea42573a 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c @@ -188,7 +188,8 @@ pass0_create_subroutine(nouveauShader *nvs, const char *label) if (!nvs->program_tree) nvs->program_tree = &sub->header; else - pass0_append_fragment(nvs->program_tree, &sub->header, 0); + pass0_append_fragment(nvs->program_tree, + &sub->header, 0); } return sub; @@ -196,162 +197,168 @@ pass0_create_subroutine(nouveauShader *nvs, const char *label) static void pass0_make_reg(nouveauShader *nvs, nvsRegister *reg, - nvsRegFile file, unsigned int index) + nvsRegFile file, unsigned int index) { - struct pass0_rec *rec = nvs->pass_rec; - - /* defaults */ - *reg = nvr_unused; - /* -1 == quick-and-dirty temp alloc */ - if (file == NVS_FILE_TEMP && index == -1) { - index = rec->next_temp++; - assert(index < NVS_MAX_TEMPS); - } - reg->file = file; - reg->index = index; + struct pass0_rec *rec = nvs->pass_rec; + + /* defaults */ + *reg = nvr_unused; + /* -1 == quick-and-dirty temp alloc */ + if (file == NVS_FILE_TEMP && index == -1) { + index = rec->next_temp++; + assert(index < NVS_MAX_TEMPS); + } + reg->file = file; + reg->index = index; } static void pass0_make_swizzle(nvsSwzComp *swz, unsigned int mesa) { - int i; + int i; - for (i=0;i<4;i++) - swz[i] = _tx_mesa_swizzle[GET_SWZ(mesa, i)]; + for (i=0;i<4;i++) + swz[i] = _tx_mesa_swizzle[GET_SWZ(mesa, i)]; } static nvsOpcode pass0_make_opcode(enum prog_opcode op) { - if (op > MAX_OPCODE) - return NVS_OP_UNKNOWN; - return _tx_mesa_opcode[op]; + if (op > MAX_OPCODE) + return NVS_OP_UNKNOWN; + return _tx_mesa_opcode[op]; } static nvsCond pass0_make_condmask(GLuint mesa) { - if (mesa > COND_FL) - return NVS_COND_UNKNOWN; - return _tx_mesa_condmask[mesa]; + if (mesa > COND_FL) + return NVS_COND_UNKNOWN; + return _tx_mesa_condmask[mesa]; } static unsigned int pass0_make_mask(GLuint mesa_mask) { - unsigned int mask = 0; + unsigned int mask = 0; - if (mesa_mask & WRITEMASK_X) mask |= SMASK_X; - if (mesa_mask & WRITEMASK_Y) mask |= SMASK_Y; - if (mesa_mask & WRITEMASK_Z) mask |= SMASK_Z; - if (mesa_mask & WRITEMASK_W) mask |= SMASK_W; + if (mesa_mask & WRITEMASK_X) mask |= SMASK_X; + if (mesa_mask & WRITEMASK_Y) mask |= SMASK_Y; + if (mesa_mask & WRITEMASK_Z) mask |= SMASK_Z; + if (mesa_mask & WRITEMASK_W) mask |= SMASK_W; - return mask; + return mask; } static nvsTexTarget pass0_make_tex_target(GLuint mesa) { - switch (mesa) { - case TEXTURE_1D_INDEX: return NVS_TEX_TARGET_1D; - case TEXTURE_2D_INDEX: return NVS_TEX_TARGET_2D; - case TEXTURE_3D_INDEX: return NVS_TEX_TARGET_3D; - case TEXTURE_CUBE_INDEX: return NVS_TEX_TARGET_CUBE; - case TEXTURE_RECT_INDEX: return NVS_TEX_TARGET_RECT; - default: - return NVS_TEX_TARGET_UNKNOWN; - } + switch (mesa) { + case TEXTURE_1D_INDEX: return NVS_TEX_TARGET_1D; + case TEXTURE_2D_INDEX: return NVS_TEX_TARGET_2D; + case TEXTURE_3D_INDEX: return NVS_TEX_TARGET_3D; + case TEXTURE_CUBE_INDEX: return NVS_TEX_TARGET_CUBE; + case TEXTURE_RECT_INDEX: return NVS_TEX_TARGET_RECT; + default: + return NVS_TEX_TARGET_UNKNOWN; + } } static void pass0_make_dst_reg(nvsPtr nvs, nvsRegister *reg, struct prog_dst_register *dst) { - struct gl_program *mesa = (struct gl_program*)&nvs->mesa.vp; - nvsFixedReg sfr; - - switch (dst->File) { - case PROGRAM_OUTPUT: - if (mesa->Target == GL_VERTEX_PROGRAM_ARB) { - sfr = (dst->Index < VERT_RESULT_MAX) ? - _tx_mesa_vp_dst_reg[dst->Index] : NVS_FR_UNKNOWN; - } else { - sfr = (dst->Index < FRAG_RESULT_MAX) ? - _tx_mesa_fp_dst_reg[dst->Index] : NVS_FR_UNKNOWN; - } - pass0_make_reg(nvs, reg, NVS_FILE_RESULT, sfr); - break; - case PROGRAM_TEMPORARY: - pass0_make_reg(nvs, reg, NVS_FILE_TEMP, dst->Index); - break; - case PROGRAM_ADDRESS: - pass0_make_reg(nvs, reg, NVS_FILE_ADDRESS, dst->Index); - break; - default: - fprintf(stderr, "Unknown dest file %d\n", dst->File); - assert(0); - } + struct gl_program *mesa = (struct gl_program*)&nvs->mesa.vp; + nvsFixedReg sfr; + + switch (dst->File) { + case PROGRAM_OUTPUT: + if (mesa->Target == GL_VERTEX_PROGRAM_ARB) { + sfr = (dst->Index < VERT_RESULT_MAX) ? + _tx_mesa_vp_dst_reg[dst->Index] : + NVS_FR_UNKNOWN; + } else { + sfr = (dst->Index < FRAG_RESULT_MAX) ? + _tx_mesa_fp_dst_reg[dst->Index] : + NVS_FR_UNKNOWN; + } + pass0_make_reg(nvs, reg, NVS_FILE_RESULT, sfr); + break; + case PROGRAM_TEMPORARY: + pass0_make_reg(nvs, reg, NVS_FILE_TEMP, dst->Index); + break; + case PROGRAM_ADDRESS: + pass0_make_reg(nvs, reg, NVS_FILE_ADDRESS, dst->Index); + break; + default: + fprintf(stderr, "Unknown dest file %d\n", dst->File); + assert(0); + } } static void pass0_make_src_reg(nvsPtr nvs, nvsRegister *reg, struct prog_src_register *src) { - struct gl_program *mesa = (struct gl_program *)&nvs->mesa.vp.Base; - struct gl_program_parameter_list *p = mesa->Parameters; - - *reg = nvr_unused; - - switch (src->File) { - case PROGRAM_INPUT: - reg->file = NVS_FILE_ATTRIB; - if (mesa->Target == GL_VERTEX_PROGRAM_ARB) { - reg->index = (src->Index < VERT_ATTRIB_MAX) ? - _tx_mesa_vp_src_reg[src->Index] : NVS_FR_UNKNOWN; - } else { - reg->index = (src->Index < FRAG_ATTRIB_MAX) ? - _tx_mesa_fp_src_reg[src->Index] : NVS_FR_UNKNOWN; - } - break; - /* All const types seem to get shoved into here, not really sure why */ - case PROGRAM_STATE_VAR: - switch (p->Parameters[src->Index].Type) { - case PROGRAM_NAMED_PARAM: - case PROGRAM_CONSTANT: - nvs->params[src->Index].source_val = NULL; - COPY_4V(nvs->params[src->Index].val, p->ParameterValues[src->Index]); - break; - case PROGRAM_STATE_VAR: - nvs->params[src->Index].source_val = p->ParameterValues[src->Index]; - break; - default: - fprintf(stderr, "Unknown parameter type %d\n", - p->Parameters[src->Index].Type); - assert(0); - break; - } - - if (src->RelAddr) { - reg->indexed = 1; - reg->addr_reg = 0; - reg->addr_comp = NVS_SWZ_X; - } else - reg->indexed = 0; - reg->file = NVS_FILE_CONST; - reg->index = src->Index; - break; - case PROGRAM_TEMPORARY: - reg->file = NVS_FILE_TEMP; - reg->index = src->Index; - break; - default: - fprintf(stderr, "Unknown source type %d\n", src->File); - assert(0); - } - - /* per-component negate handled elsewhere */ - reg->negate = src->NegateBase != 0; - reg->abs = src->Abs; - pass0_make_swizzle(reg->swizzle, src->Swizzle); + struct gl_program *mesa = (struct gl_program *)&nvs->mesa.vp.Base; + struct gl_program_parameter_list *p = mesa->Parameters; + + *reg = nvr_unused; + + switch (src->File) { + case PROGRAM_INPUT: + reg->file = NVS_FILE_ATTRIB; + if (mesa->Target == GL_VERTEX_PROGRAM_ARB) { + reg->index = (src->Index < VERT_ATTRIB_MAX) ? + _tx_mesa_vp_src_reg[src->Index] : + NVS_FR_UNKNOWN; + } else { + reg->index = (src->Index < FRAG_ATTRIB_MAX) ? + _tx_mesa_fp_src_reg[src->Index] : + NVS_FR_UNKNOWN; + } + break; + /* All const types seem to get shoved into here, not really sure why */ + case PROGRAM_STATE_VAR: + switch (p->Parameters[src->Index].Type) { + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + nvs->params[src->Index].source_val = NULL; + COPY_4V(nvs->params[src->Index].val, + p->ParameterValues[src->Index]); + break; + case PROGRAM_STATE_VAR: + nvs->params[src->Index].source_val = + p->ParameterValues[src->Index]; + break; + default: + fprintf(stderr, "Unknown parameter type %d\n", + p->Parameters[src->Index].Type); + assert(0); + break; + } + + if (src->RelAddr) { + reg->indexed = 1; + reg->addr_reg = 0; + reg->addr_comp = NVS_SWZ_X; + } else + reg->indexed = 0; + reg->file = NVS_FILE_CONST; + reg->index = src->Index; + break; + case PROGRAM_TEMPORARY: + reg->file = NVS_FILE_TEMP; + reg->index = src->Index; + break; + default: + fprintf(stderr, "Unknown source type %d\n", src->File); + assert(0); + } + + /* per-component negate handled elsewhere */ + reg->negate = src->NegateBase != 0; + reg->abs = src->Abs; + pass0_make_swizzle(reg->swizzle, src->Swizzle); } static nvsInstruction * @@ -389,59 +396,66 @@ pass0_emit(nouveauShader *nvs, nvsFragmentHeader *parent, int fpos, static void pass0_fixup_swizzle(nvsPtr nvs, nvsFragmentHeader *parent, int fpos, - struct prog_src_register *src, + struct prog_src_register *src, unsigned int sm1, unsigned int sm2) { - static const float sc[4] = { 1.0, 0.0, -1.0, 0.0 }; - struct pass0_rec *rec = nvs->pass_rec; - int fixup_1, fixup_2; - nvsRegister sr, dr = nvr_unused; - nvsRegister sm1const, sm2const; - - if (!rec->swzconst_done) { - struct gl_program *prog = &nvs->mesa.vp.Base; - rec->swzconst_id = _mesa_add_unnamed_constant(prog->Parameters, sc, 4); - rec->swzconst_done = 1; - COPY_4V(nvs->params[rec->swzconst_id].val, sc); - } - - fixup_1 = (sm1 != MAKE_SWIZZLE4(0,0,0,0) && sm2 != MAKE_SWIZZLE4(2,2,2,2)); - fixup_2 = (sm2 != MAKE_SWIZZLE4(2,2,2,2)); - - if (src->File != PROGRAM_TEMPORARY && src->File != PROGRAM_INPUT) { - /* We can't use more than one const in an instruction, so move the const - * into a temp, and swizzle from there. - *TODO: should just emit the swizzled const, instead of swizzling it - * in the shader.. would need to reswizzle any state params when they - * change however.. - */ - pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1); - pass0_make_src_reg(nvs, &sr, src); - pass0_emit(nvs, parent, fpos, NVS_OP_MOV, dr, SMASK_ALL, 0, sr, nvr_unused, nvr_unused); - pass0_make_reg(nvs, &sr, NVS_FILE_TEMP, dr.index); - } else { - if (fixup_1) - src->NegateBase = 0; - pass0_make_src_reg(nvs, &sr, src); - pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1); - } - - pass0_make_reg(nvs, &sm1const, NVS_FILE_CONST, rec->swzconst_id); - pass0_make_swizzle(sm1const.swizzle, sm1); - if (fixup_1 && fixup_2) { - /* Any combination with SWIZZLE_ONE */ - pass0_make_reg(nvs, &sm2const, NVS_FILE_CONST, rec->swzconst_id); - pass0_make_swizzle(sm2const.swizzle, sm2); - pass0_emit(nvs, parent, fpos, NVS_OP_MAD, dr, SMASK_ALL, 0, sr, sm1const, sm2const); - } else { - /* SWIZZLE_ZERO || arbitrary negate */ - pass0_emit(nvs, parent, fpos, NVS_OP_MUL, dr, SMASK_ALL, 0, sr, sm1const, nvr_unused); - } - - src->File = PROGRAM_TEMPORARY; - src->Index = dr.index; - src->Swizzle = SWIZZLE_NOOP; + static const float sc[4] = { 1.0, 0.0, -1.0, 0.0 }; + struct pass0_rec *rec = nvs->pass_rec; + int fixup_1, fixup_2; + nvsRegister sr, dr = nvr_unused; + nvsRegister sm1const, sm2const; + + if (!rec->swzconst_done) { + struct gl_program *prog = &nvs->mesa.vp.Base; + rec->swzconst_id = _mesa_add_unnamed_constant(prog->Parameters, + sc, 4); + rec->swzconst_done = 1; + COPY_4V(nvs->params[rec->swzconst_id].val, sc); + } + + fixup_1 = (sm1 != MAKE_SWIZZLE4(0,0,0,0) && + sm2 != MAKE_SWIZZLE4(2,2,2,2)); + fixup_2 = (sm2 != MAKE_SWIZZLE4(2,2,2,2)); + + if (src->File != PROGRAM_TEMPORARY && src->File != PROGRAM_INPUT) { + /* We can't use more than one const in an instruction, + * so move the const into a temp, and swizzle from there. + * + * TODO: should just emit the swizzled const, instead of + * swizzling it in the shader.. would need to reswizzle + * any state params when they change however.. + */ + pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1); + pass0_make_src_reg(nvs, &sr, src); + pass0_emit(nvs, parent, fpos, NVS_OP_MOV, + dr, SMASK_ALL, 0, sr, nvr_unused, nvr_unused); + pass0_make_reg(nvs, &sr, NVS_FILE_TEMP, dr.index); + } else { + if (fixup_1) + src->NegateBase = 0; + pass0_make_src_reg(nvs, &sr, src); + pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1); + } + + pass0_make_reg(nvs, &sm1const, NVS_FILE_CONST, rec->swzconst_id); + pass0_make_swizzle(sm1const.swizzle, sm1); + if (fixup_1 && fixup_2) { + /* Any combination with SWIZZLE_ONE */ + pass0_make_reg(nvs, &sm2const, + NVS_FILE_CONST, rec->swzconst_id); + pass0_make_swizzle(sm2const.swizzle, sm2); + pass0_emit(nvs, parent, fpos, NVS_OP_MAD, + dr, SMASK_ALL, 0, sr, sm1const, sm2const); + } else { + /* SWIZZLE_ZERO || arbitrary negate */ + pass0_emit(nvs, parent, fpos, NVS_OP_MUL, + dr, SMASK_ALL, 0, sr, sm1const, nvr_unused); + } + + src->File = PROGRAM_TEMPORARY; + src->Index = dr.index; + src->Swizzle = SWIZZLE_NOOP; } #define SET_SWZ(fs, cp, c) fs = (fs & ~(0x7<<(cp*3))) | (c<<(cp*3)) @@ -449,81 +463,86 @@ static void pass0_check_sources(nvsPtr nvs, nvsFragmentHeader *parent, int fpos, struct prog_instruction *inst) { - unsigned int insrc = -1, constsrc = -1; - int i; - - for (i=0;i<_mesa_num_inst_src_regs(inst->Opcode);i++) { - struct prog_src_register *src = &inst->SrcReg[i]; - unsigned int sm_1 = 0, sm_2 = 0; - nvsRegister sr, dr; - int do_mov = 0, c; - - /* Build up swizzle masks as if we were going to use - * "MAD new, src, const1, const2" to support arbitrary negation - * and SWIZZLE_ZERO/SWIZZLE_ONE. - */ - for (c=0;c<4;c++) { - if (GET_SWZ(src->Swizzle, c) == SWIZZLE_ZERO) { - SET_SWZ(sm_1, c, SWIZZLE_Y); /* 0.0 */ - SET_SWZ(sm_2, c, SWIZZLE_Y); - SET_SWZ(src->Swizzle, c, SWIZZLE_X); - } else if (GET_SWZ(src->Swizzle, c) == SWIZZLE_ONE) { - SET_SWZ(sm_1, c, SWIZZLE_Y); - if (src->NegateBase & (1<Swizzle, c, SWIZZLE_X); - } else { - if (src->NegateBase & (1<File) { - case PROGRAM_INPUT: - if (insrc != -1 && insrc != src->Index) - do_mov = 1; - else insrc = src->Index; - break; - case PROGRAM_STATE_VAR: - if (constsrc != -1 && constsrc != src->Index) - do_mov = 1; - else constsrc = src->Index; - break; - default: - break; - } - - /* Emit any extra ATTRIB/CONST to a temp, and modify the Mesa instruction - * to point at the temp. - */ - if (do_mov) { - pass0_make_src_reg(nvs, &sr, src); - pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1); - pass0_emit(nvs, parent, fpos, NVS_OP_MOV, dr, SMASK_ALL, 0, - sr, nvr_unused, nvr_unused); - - src->File = PROGRAM_TEMPORARY; - src->Index = dr.index; - src->Swizzle= SWIZZLE_NOOP; - } - } + unsigned int insrc = -1, constsrc = -1; + int i; + + for (i=0;i<_mesa_num_inst_src_regs(inst->Opcode);i++) { + struct prog_src_register *src = &inst->SrcReg[i]; + unsigned int sm_1 = 0, sm_2 = 0; + nvsRegister sr, dr; + int do_mov = 0, c; + + /* Build up swizzle masks as if we were going to use + * "MAD new, src, const1, const2" to support arbitrary negation + * and SWIZZLE_ZERO/SWIZZLE_ONE. + */ + for (c=0;c<4;c++) { + if (GET_SWZ(src->Swizzle, c) == SWIZZLE_ZERO) { + SET_SWZ(sm_1, c, SWIZZLE_Y); /* 0.0 */ + SET_SWZ(sm_2, c, SWIZZLE_Y); + SET_SWZ(src->Swizzle, c, SWIZZLE_X); + } else if (GET_SWZ(src->Swizzle, c) == SWIZZLE_ONE) { + SET_SWZ(sm_1, c, SWIZZLE_Y); + if (src->NegateBase & (1<Swizzle, c, SWIZZLE_X); + } else { + if (src->NegateBase & (1<File) { + case PROGRAM_INPUT: + if (insrc != -1 && insrc != src->Index) + do_mov = 1; + else insrc = src->Index; + break; + case PROGRAM_STATE_VAR: + if (constsrc != -1 && constsrc != src->Index) + do_mov = 1; + else constsrc = src->Index; + break; + default: + break; + } + + /* Emit any extra ATTRIB/CONST to a temp, and modify the Mesa + * instruction to point at the temp. + */ + if (do_mov) { + pass0_make_src_reg(nvs, &sr, src); + pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1); + pass0_emit(nvs, parent, fpos, NVS_OP_MOV, + dr, SMASK_ALL, 0, + sr, nvr_unused, nvr_unused); + + src->File = PROGRAM_TEMPORARY; + src->Index = dr.index; + src->Swizzle= SWIZZLE_NOOP; + } + } } static GLboolean @@ -531,138 +550,150 @@ pass0_emulate_instruction(nouveauShader *nvs, nvsFragmentHeader *parent, int fpos, struct prog_instruction *inst) { - nvsFunc *shader = nvs->func; - nvsRegister src[3], dest, temp; - nvsInstruction *nvsinst; - struct pass0_rec *rec = nvs->pass_rec; - unsigned int mask = pass0_make_mask(inst->DstReg.WriteMask); - int i, sat; - - sat = (inst->SaturateMode == SATURATE_ZERO_ONE); - - /* Build all the "real" regs for the instruction */ - for (i=0; i<_mesa_num_inst_src_regs(inst->Opcode); i++) - pass0_make_src_reg(nvs, &src[i], &inst->SrcReg[i]); - if (inst->Opcode != OPCODE_KIL) - pass0_make_dst_reg(nvs, &dest, &inst->DstReg); - - switch (inst->Opcode) { - case OPCODE_ABS: - if (shader->caps & SCAP_SRC_ABS) - pass0_emit(nvs, parent, fpos, NVS_OP_MOV, dest, mask, sat, - nvsAbs(src[0]), nvr_unused, nvr_unused); - else - pass0_emit(nvs, parent, fpos, NVS_OP_MAX, dest, mask, sat, - src[0], nvsNegate(src[0]), nvr_unused); - break; - case OPCODE_KIL: - /* This is only in ARB shaders, so we don't have to worry - * about clobbering a CC reg as they aren't supported anyway. - */ - /* MOVC0 temp, src */ - pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); - nvsinst = pass0_emit(nvs, parent, fpos, NVS_OP_MOV, temp, SMASK_ALL, 0, - src[0], nvr_unused, nvr_unused); - nvsinst->cond_update = 1; - nvsinst->cond_reg = 0; - /* KIL_NV (LT0.xyzw) temp */ - nvsinst = pass0_emit(nvs, parent, fpos, NVS_OP_KIL, nvr_unused, 0, 0, - nvr_unused, nvr_unused, nvr_unused); - nvsinst->cond = COND_LT; - nvsinst->cond_reg = 0; - nvsinst->cond_test = 1; - pass0_make_swizzle(nvsinst->cond_swizzle, MAKE_SWIZZLE4(0,1,2,3)); - break; - case OPCODE_LIT: - break; - case OPCODE_LRP: - pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); - pass0_emit(nvs, parent, fpos, NVS_OP_MAD, temp, mask, 0, - nvsNegate(src[0]), src[2], src[2]); - pass0_emit(nvs, parent, fpos, NVS_OP_MAD, dest, mask, sat, - src[0], src[1], temp); - break; - case OPCODE_POW: - if (shader->SupportsOpcode(shader, NVS_OP_LG2) && - shader->SupportsOpcode(shader, NVS_OP_EX2)) { - pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); - /* LG2 temp.x, src0.c */ - pass0_emit(nvs, parent, fpos, NVS_OP_LG2, temp, SMASK_X, 0, - nvsSwizzle(src[0], X, X, X, X), - nvr_unused, - nvr_unused); - /* MUL temp.x, temp.x, src1.c */ - pass0_emit(nvs, parent, fpos, NVS_OP_MUL, temp, SMASK_X, 0, - nvsSwizzle(temp, X, X, X, X), - nvsSwizzle(src[1], X, X, X, X), - nvr_unused); - /* EX2 dest, temp.x */ - pass0_emit(nvs, parent, fpos, NVS_OP_EX2, dest, mask, sat, - nvsSwizzle(temp, X, X, X, X), - nvr_unused, - nvr_unused); - } else { - /* can we use EXP/LOG instead of EX2/LG2?? */ - fprintf(stderr, "Implement POW for NV20 vtxprog!\n"); - return GL_FALSE; - } - break; - case OPCODE_RSQ: - if (rec->const_half.file != NVS_FILE_CONST) { - GLfloat const_half[4] = { 0.5, 0.0, 0.0, 0.0 }; - pass0_make_reg(nvs, &rec->const_half, NVS_FILE_CONST, - _mesa_add_unnamed_constant(nvs->mesa.vp.Base.Parameters, - const_half, 4)); - COPY_4V(nvs->params[rec->const_half.index].val, const_half); - } - pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); - pass0_emit(nvs, parent, fpos, NVS_OP_LG2, temp, SMASK_X, 0, - nvsAbs(nvsSwizzle(src[0], X, X, X, X)), - nvr_unused, - nvr_unused); - pass0_emit(nvs, parent, fpos, NVS_OP_MUL, temp, SMASK_X, 0, - nvsSwizzle(temp, X, X, X, X), - nvsNegate(rec->const_half), - nvr_unused); - pass0_emit(nvs, parent, fpos, NVS_OP_EX2, dest, mask, sat, - nvsSwizzle(temp, X, X, X, X), - nvr_unused, - nvr_unused); - break; - case OPCODE_SCS: - if (mask & SMASK_X) - pass0_emit(nvs, parent, fpos, NVS_OP_COS, dest, SMASK_X, sat, - nvsSwizzle(src[0], X, X, X, X), - nvr_unused, - nvr_unused); - if (mask & SMASK_Y) - pass0_emit(nvs, parent, fpos, NVS_OP_SIN, dest, SMASK_Y, sat, - nvsSwizzle(src[0], X, X, X, X), - nvr_unused, - nvr_unused); - break; - case OPCODE_SUB: - pass0_emit(nvs, parent, fpos, NVS_OP_ADD, dest, mask, sat, - src[0], nvsNegate(src[1]), nvr_unused); - break; - case OPCODE_XPD: - pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); - pass0_emit(nvs, parent, fpos, NVS_OP_MUL, temp, SMASK_ALL, 0, - nvsSwizzle(src[0], Z, X, Y, Y), - nvsSwizzle(src[1], Y, Z, X, X), - nvr_unused); - pass0_emit(nvs, parent, fpos, NVS_OP_MAD, dest, (mask & ~SMASK_W), sat, - nvsSwizzle(src[0], Y, Z, X, X), - nvsSwizzle(src[1], Z, X, Y, Y), - nvsNegate(temp)); - break; - default: - fprintf(stderr, "hw doesn't support opcode \"%s\", and no emulation found\n", - _mesa_opcode_string(inst->Opcode)); - return GL_FALSE; - } - - return GL_TRUE; + nvsFunc *shader = nvs->func; + nvsRegister src[3], dest, temp; + nvsInstruction *nvsinst; + struct pass0_rec *rec = nvs->pass_rec; + unsigned int mask = pass0_make_mask(inst->DstReg.WriteMask); + int i, sat; + + sat = (inst->SaturateMode == SATURATE_ZERO_ONE); + + /* Build all the "real" regs for the instruction */ + for (i=0; i<_mesa_num_inst_src_regs(inst->Opcode); i++) + pass0_make_src_reg(nvs, &src[i], &inst->SrcReg[i]); + if (inst->Opcode != OPCODE_KIL) + pass0_make_dst_reg(nvs, &dest, &inst->DstReg); + + switch (inst->Opcode) { + case OPCODE_ABS: + if (shader->caps & SCAP_SRC_ABS) + pass0_emit(nvs, parent, fpos, NVS_OP_MOV, + dest, mask, sat, + nvsAbs(src[0]), nvr_unused, nvr_unused); + else + pass0_emit(nvs, parent, fpos, NVS_OP_MAX, + dest, mask, sat, + src[0], nvsNegate(src[0]), nvr_unused); + break; + case OPCODE_KIL: + /* This is only in ARB shaders, so we don't have to worry + * about clobbering a CC reg as they aren't supported anyway. + */ + /* MOVC0 temp, src */ + pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); + nvsinst = pass0_emit(nvs, parent, fpos, NVS_OP_MOV, + temp, SMASK_ALL, 0, + src[0], nvr_unused, nvr_unused); + nvsinst->cond_update = 1; + nvsinst->cond_reg = 0; + /* KIL_NV (LT0.xyzw) temp */ + nvsinst = pass0_emit(nvs, parent, fpos, NVS_OP_KIL, + nvr_unused, 0, 0, + nvr_unused, nvr_unused, nvr_unused); + nvsinst->cond = COND_LT; + nvsinst->cond_reg = 0; + nvsinst->cond_test = 1; + pass0_make_swizzle(nvsinst->cond_swizzle, + MAKE_SWIZZLE4(0,1,2,3)); + break; + case OPCODE_LRP: + pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); + pass0_emit(nvs, parent, fpos, NVS_OP_MAD, temp, mask, 0, + nvsNegate(src[0]), src[2], src[2]); + pass0_emit(nvs, parent, fpos, NVS_OP_MAD, dest, mask, sat, + src[0], src[1], temp); + break; + case OPCODE_POW: + if (shader->SupportsOpcode(shader, NVS_OP_LG2) && + shader->SupportsOpcode(shader, NVS_OP_EX2)) { + pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); + /* LG2 temp.x, src0.c */ + pass0_emit(nvs, parent, fpos, NVS_OP_LG2, + temp, SMASK_X, 0, + nvsSwizzle(src[0], X, X, X, X), + nvr_unused, + nvr_unused); + /* MUL temp.x, temp.x, src1.c */ + pass0_emit(nvs, parent, fpos, NVS_OP_MUL, + temp, SMASK_X, 0, + nvsSwizzle(temp, X, X, X, X), + nvsSwizzle(src[1], X, X, X, X), + nvr_unused); + /* EX2 dest, temp.x */ + pass0_emit(nvs, parent, fpos, NVS_OP_EX2, + dest, mask, sat, + nvsSwizzle(temp, X, X, X, X), + nvr_unused, + nvr_unused); + } else { + /* can we use EXP/LOG instead of EX2/LG2?? */ + fprintf(stderr, "Implement POW for NV20 vtxprog!\n"); + return GL_FALSE; + } + break; + case OPCODE_RSQ: + if (rec->const_half.file != NVS_FILE_CONST) { + GLfloat const_half[4] = { 0.5, 0.0, 0.0, 0.0 }; + pass0_make_reg(nvs, &rec->const_half, NVS_FILE_CONST, + _mesa_add_unnamed_constant( + nvs->mesa.vp.Base.Parameters, + const_half, 4)); + COPY_4V(nvs->params[rec->const_half.index].val, + const_half); + } + pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); + pass0_emit(nvs, parent, fpos, NVS_OP_LG2, temp, SMASK_X, 0, + nvsAbs(nvsSwizzle(src[0], X, X, X, X)), + nvr_unused, + nvr_unused); + pass0_emit(nvs, parent, fpos, NVS_OP_MUL, temp, SMASK_X, 0, + nvsSwizzle(temp, X, X, X, X), + nvsNegate(rec->const_half), + nvr_unused); + pass0_emit(nvs, parent, fpos, NVS_OP_EX2, dest, mask, sat, + nvsSwizzle(temp, X, X, X, X), + nvr_unused, + nvr_unused); + break; + case OPCODE_SCS: + if (mask & SMASK_X) + pass0_emit(nvs, parent, fpos, NVS_OP_COS, + dest, SMASK_X, sat, + nvsSwizzle(src[0], X, X, X, X), + nvr_unused, + nvr_unused); + if (mask & SMASK_Y) + pass0_emit(nvs, parent, fpos, NVS_OP_SIN, + dest, SMASK_Y, sat, + nvsSwizzle(src[0], X, X, X, X), + nvr_unused, + nvr_unused); + break; + case OPCODE_SUB: + pass0_emit(nvs, parent, fpos, NVS_OP_ADD, dest, mask, sat, + src[0], nvsNegate(src[1]), nvr_unused); + break; + case OPCODE_XPD: + pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); + pass0_emit(nvs, parent, fpos, NVS_OP_MUL, temp, SMASK_ALL, 0, + nvsSwizzle(src[0], Z, X, Y, Y), + nvsSwizzle(src[1], Y, Z, X, X), + nvr_unused); + pass0_emit(nvs, parent, fpos, NVS_OP_MAD, + dest, (mask & ~SMASK_W), sat, + nvsSwizzle(src[0], Y, Z, X, X), + nvsSwizzle(src[1], Z, X, Y, Y), + nvsNegate(temp)); + break; + default: + WARN_ONCE("hw doesn't support opcode \"%s\"," + "and no emulation found\n", + _mesa_opcode_string(inst->Opcode)); + return GL_FALSE; + } + + return GL_TRUE; } static GLboolean diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c index b9b87ccf91..6fb36c1daf 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c @@ -41,130 +41,138 @@ #include "nouveau_msg.h" struct pass2_rec { - /* Map nvsRegister temp ID onto hw temp ID */ - unsigned int temps[NVS_MAX_TEMPS]; - /* Track free hw registers */ - unsigned int hw_temps[NVS_MAX_TEMPS]; + /* Map nvsRegister temp ID onto hw temp ID */ + unsigned int temps[NVS_MAX_TEMPS]; + /* Track free hw registers */ + unsigned int hw_temps[NVS_MAX_TEMPS]; }; static int pass2_alloc_hw_temp(nvsPtr nvs) { - struct pass2_rec *rec = nvs->pass_rec; - int i; - - for (i=0; ifunc->MaxTemp; i++) { - /* This is a *horrible* hack.. R0 is both temp0 and result.color - * in NV30/40 fragprogs, we can use R0 as a temp before result is - * written however.. - */ - if (nvs->mesa.vp.Base.Target == GL_FRAGMENT_PROGRAM_ARB && i==0) - continue; - - if (rec->hw_temps[i] == 0) { - rec->hw_temps[i] = 1; - return i; - } - } - return -1; + struct pass2_rec *rec = nvs->pass_rec; + int i; + + for (i=0; ifunc->MaxTemp; i++) { + /* This is a *horrible* hack.. R0 is both temp0 and result.color + * in NV30/40 fragprogs, we can use R0 as a temp before result + * is written however.. + */ + if (nvs->mesa.vp.Base.Target == GL_FRAGMENT_PROGRAM_ARB && i==0) + continue; + if (rec->hw_temps[i] == 0) { + rec->hw_temps[i] = 1; + return i; + } + } + + return -1; } static nvsRegister pass2_mangle_reg(nvsPtr nvs, nvsInstruction *inst, nvsRegister reg) { - struct pass2_rec *rec = nvs->pass_rec; + struct pass2_rec *rec = nvs->pass_rec; - if (reg.file == NVS_FILE_TEMP) { - if (rec->temps[reg.index] == -1) - rec->temps[reg.index] = pass2_alloc_hw_temp(nvs); - reg.index = rec->temps[reg.index]; - } + if (reg.file == NVS_FILE_TEMP) { + if (rec->temps[reg.index] == -1) + rec->temps[reg.index] = pass2_alloc_hw_temp(nvs); + reg.index = rec->temps[reg.index]; + } - return reg; + return reg; } static void pass2_add_instruction(nvsPtr nvs, nvsInstruction *inst, - struct _op_xlat *op, int slot) + struct _op_xlat *op, int slot) { - nvsSwzComp default_swz[4] = { NVS_SWZ_X, NVS_SWZ_Y, NVS_SWZ_Z, NVS_SWZ_W }; - nvsFunc *shader = nvs->func; - nvsRegister reg; - int i; - - shader->SetOpcode(shader, op->NV, slot); - if (inst->saturate ) shader->SetSaturate(shader); - if (inst->cond_update) shader->SetCCUpdate(shader); - if (inst->cond_test ) shader->SetCondition(shader, 1, inst->cond, - inst->cond_reg, - inst->cond_swizzle); - else shader->SetCondition(shader, 0, NVS_COND_TR, - 0, - default_swz); - switch (inst->op) { - case NVS_OP_TEX: - case NVS_OP_TXB: - case NVS_OP_TXL: - case NVS_OP_TXP: - case NVS_OP_TXD: - shader->SetTexImageUnit(shader, inst->tex_unit); - break; - default: - break; - } - - for (i = 0; i < 3; i++) { - if (op->srcpos[i] != -1) { - reg = pass2_mangle_reg(nvs, inst, inst->src[i]); - if (reg.file == NVS_FILE_ATTRIB) - nvs->inputs_read |= (1 << reg.index); - shader->SetSource(shader, ®, op->srcpos[i]); - if (reg.file == NVS_FILE_CONST && shader->GetSourceConstVal) { - int idx_slot = nvs->params[reg.index].hw_index_cnt++; - nvs->params[reg.index].hw_index = realloc( - nvs->params[reg.index].hw_index, sizeof(int) * idx_slot+1); - nvs->params[reg.index].hw_index[idx_slot] = nvs->program_current + 4; - } - } - } - - reg = pass2_mangle_reg(nvs, inst, inst->dest); - if (reg.file == NVS_FILE_RESULT) - nvs->outputs_written |= (1 << reg.index); - shader->SetResult(shader, ®, inst->mask, slot); + nvsSwzComp default_swz[4] = { NVS_SWZ_X, NVS_SWZ_Y, + NVS_SWZ_Z, NVS_SWZ_W }; + nvsFunc *shader = nvs->func; + nvsRegister reg; + int i; + + shader->SetOpcode(shader, op->NV, slot); + if (inst->saturate ) shader->SetSaturate(shader); + if (inst->cond_update ) shader->SetCCUpdate(shader); + if (inst->cond_test ) shader->SetCondition(shader, 1, inst->cond, + inst->cond_reg, + inst->cond_swizzle); + else shader->SetCondition(shader, 0, NVS_COND_TR, + 0, + default_swz); + switch (inst->op) { + case NVS_OP_TEX: + case NVS_OP_TXB: + case NVS_OP_TXL: + case NVS_OP_TXP: + case NVS_OP_TXD: + shader->SetTexImageUnit(shader, inst->tex_unit); + break; + default: + break; + } + + for (i = 0; i < 3; i++) { + if (op->srcpos[i] != -1) { + reg = pass2_mangle_reg(nvs, inst, inst->src[i]); + + if (reg.file == NVS_FILE_ATTRIB) + nvs->inputs_read |= (1 << reg.index); + shader->SetSource(shader, ®, op->srcpos[i]); + + if (reg.file == NVS_FILE_CONST && + shader->GetSourceConstVal) { + int idx_slot = + nvs->params[reg.index].hw_index_cnt++; + nvs->params[reg.index].hw_index = realloc( + nvs->params[reg.index].hw_index, + sizeof(int) * idx_slot+1); + nvs->params[reg.index].hw_index[idx_slot] = + nvs->program_current + 4; + } + } + } + + reg = pass2_mangle_reg(nvs, inst, inst->dest); + if (reg.file == NVS_FILE_RESULT) + nvs->outputs_written |= (1 << reg.index); + shader->SetResult(shader, ®, inst->mask, slot); } static int pass2_assemble_instruction(nvsPtr nvs, nvsInstruction *inst, int last) { - nvsFunc *shader = nvs->func; - struct _op_xlat *op; - unsigned int hw_inst[8]; - int slot; - int instsz; - int i; - - shader->inst = hw_inst; - - /* Assemble this instruction */ - if (!(op = shader->GetOPTXFromSOP(inst->op, &slot))) - return 0; - shader->InitInstruction(shader); - pass2_add_instruction(nvs, inst, op, slot); - if (last) - shader->SetLastInst(shader); - - instsz = shader->GetOffsetNext(nvs->func); - if (nvs->program_size + instsz >= nvs->program_alloc_size) { - nvs->program_alloc_size *= 2; - nvs->program = realloc(nvs->program, - nvs->program_alloc_size * sizeof(uint32_t)); - } - - for (i=0; iprogram[nvs->program_current++] = hw_inst[i]; - nvs->program_size = nvs->program_current; - return 1; + nvsFunc *shader = nvs->func; + struct _op_xlat *op; + unsigned int hw_inst[8]; + int slot; + int instsz; + int i; + + shader->inst = hw_inst; + + /* Assemble this instruction */ + if (!(op = shader->GetOPTXFromSOP(inst->op, &slot))) + return 0; + shader->InitInstruction(shader); + pass2_add_instruction(nvs, inst, op, slot); + if (last) + shader->SetLastInst(shader); + + instsz = shader->GetOffsetNext(nvs->func); + if (nvs->program_size + instsz >= nvs->program_alloc_size) { + nvs->program_alloc_size *= 2; + nvs->program = realloc(nvs->program, + nvs->program_alloc_size * + sizeof(uint32_t)); + } + + for (i=0; iprogram[nvs->program_current++] = hw_inst[i]; + nvs->program_size = nvs->program_current; + return 1; } static GLboolean @@ -198,53 +206,56 @@ pass2_translate(nvsPtr nvs, nvsFragmentHeader *f) GLboolean nouveau_shader_pass2(nvsPtr nvs) { - struct pass2_rec *rec; - int i; - - rec = calloc(1, sizeof(struct pass2_rec)); - for (i=0; itemps[i] = -1; - nvs->pass_rec = rec; - - /* Start off with allocating 4 uint32_t's for each inst, will be grown - * if necessary.. - */ - nvs->program_alloc_size = nvs->mesa.vp.Base.NumInstructions * 4; - nvs->program = calloc(nvs->program_alloc_size, sizeof(uint32_t)); - nvs->program_size = 0; - nvs->program_current = 0; - - if (!pass2_translate(nvs, ((nvsSubroutine*)nvs->program_tree)->insn_head)) { - free(nvs->program); - nvs->program = NULL; - return GL_FALSE; - } - - /* Shrink allocated memory to only what we need */ - nvs->program = realloc(nvs->program, nvs->program_size * sizeof(uint32_t)); - nvs->program_alloc_size = nvs->program_size; - - nvs->translated = 1; - nvs->on_hardware = 0; - - if (NOUVEAU_DEBUG & DEBUG_SHADERS) { - fflush(stdout); fflush(stderr); - fprintf(stderr, "----------------MESA PROGRAM target=%s, id=0x%x\n", - _mesa_lookup_enum_by_nr(nvs->mesa.vp.Base.Target), - nvs->mesa.vp.Base.Id); - fflush(stdout); fflush(stderr); - _mesa_print_program(&nvs->mesa.vp.Base); - fflush(stdout); fflush(stderr); - fprintf(stderr, "^^^^^^^^^^^^^^^^MESA PROGRAM\n"); - fflush(stdout); fflush(stderr); - fprintf(stderr, "----------------NV PROGRAM\n"); - fflush(stdout); fflush(stderr); - nvsDisasmHWShader(nvs); - fflush(stdout); fflush(stderr); - fprintf(stderr, "^^^^^^^^^^^^^^^^NV PROGRAM\n"); - fflush(stdout); fflush(stderr); - } - - return GL_TRUE; + struct pass2_rec *rec; + int i; + + rec = calloc(1, sizeof(struct pass2_rec)); + for (i=0; itemps[i] = -1; + nvs->pass_rec = rec; + + /* Start off with allocating 4 uint32_t's for each inst, will be grown + * if necessary.. + */ + nvs->program_alloc_size = nvs->mesa.vp.Base.NumInstructions * 4; + nvs->program = calloc(nvs->program_alloc_size, sizeof(uint32_t)); + nvs->program_size = 0; + nvs->program_current = 0; + + if (!pass2_translate(nvs, + ((nvsSubroutine*)nvs->program_tree)->insn_head)) { + free(nvs->program); + nvs->program = NULL; + return GL_FALSE; + } + + /* Shrink allocated memory to only what we need */ + nvs->program = realloc(nvs->program, + nvs->program_size * sizeof(uint32_t)); + nvs->program_alloc_size = nvs->program_size; + + nvs->translated = 1; + nvs->on_hardware = 0; + + if (NOUVEAU_DEBUG & DEBUG_SHADERS) { + fflush(stdout); fflush(stderr); + fprintf(stderr, "-----------MESA PROGRAM target=%s, id=0x%x\n", + _mesa_lookup_enum_by_nr( + nvs->mesa.vp.Base.Target), + nvs->mesa.vp.Base.Id); + fflush(stdout); fflush(stderr); + _mesa_print_program(&nvs->mesa.vp.Base); + fflush(stdout); fflush(stderr); + fprintf(stderr, "^^^^^^^^^^^^^^^^MESA PROGRAM\n"); + fflush(stdout); fflush(stderr); + fprintf(stderr, "----------------NV PROGRAM\n"); + fflush(stdout); fflush(stderr); + nvsDisasmHWShader(nvs); + fflush(stdout); fflush(stderr); + fprintf(stderr, "^^^^^^^^^^^^^^^^NV PROGRAM\n"); + fflush(stdout); fflush(stderr); + } + + return GL_TRUE; } -- cgit v1.2.3 From a8b9d13f745405e370353cfb4aca680314a42d46 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 23 Jan 2007 13:36:45 +1100 Subject: nouveau: fill in condition info for instructions --- src/mesa/drivers/dri/nouveau/nouveau_shader_0.c | 153 ++++++++++++------------ 1 file changed, 76 insertions(+), 77 deletions(-) (limited to 'src/mesa/drivers/dri/nouveau/nouveau_shader_0.c') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c index d6ea42573a..5845d4f63a 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c @@ -117,7 +117,8 @@ static nvsOpcode _tx_mesa_opcode[] = { }; static nvsCond _tx_mesa_condmask[] = { - NVS_COND_UNKNOWN, NVS_COND_GT, NVS_COND_LT, NVS_COND_UN, NVS_COND_GE, + NVS_COND_TR, /* workaround mesa not filling a valid value */ + NVS_COND_GT, NVS_COND_LT, NVS_COND_UN, NVS_COND_GE, NVS_COND_LE, NVS_COND_NE, NVS_COND_NE, NVS_COND_TR, NVS_COND_FL }; @@ -134,6 +135,26 @@ struct pass0_rec { #define Z NVS_SWZ_Z #define W NVS_SWZ_W +#define FILL_CONDITION_FLAGS(fragment) do { \ + (fragment)->cond = \ + pass0_make_condmask(inst->DstReg.CondMask); \ + if ((fragment)->cond != NVS_COND_TR) \ + (fragment)->cond_test = 1; \ + (fragment)->cond_reg = inst->CondDst; \ + pass0_make_swizzle((fragment)->cond_swizzle, inst->DstReg.CondSwizzle);\ +} while(0) + +#define ARITH(op,dest,mask,sat,s0,s1,s2) do { \ + nvsinst = pass0_emit(nvs, parent, fpos, (op), \ + (dest), (mask), (sat), (s0), (s1), (s2));\ + FILL_CONDITION_FLAGS(nvsinst); \ +} while(0) + +#define ARITHu(op,dest,mask,sat,s0,s1,s2) do { \ + nvsinst = pass0_emit(nvs, parent, fpos, (op), \ + (dest), (mask), (sat), (s0), (s1), (s2));\ +} while(0) + static void pass0_append_fragment(nvsFragmentHeader *parent, nvsFragmentHeader *fragment, @@ -403,6 +424,7 @@ pass0_fixup_swizzle(nvsPtr nvs, nvsFragmentHeader *parent, int fpos, static const float sc[4] = { 1.0, 0.0, -1.0, 0.0 }; struct pass0_rec *rec = nvs->pass_rec; int fixup_1, fixup_2; + nvsInstruction *nvsinst; nvsRegister sr, dr = nvr_unused; nvsRegister sm1const, sm2const; @@ -428,8 +450,8 @@ pass0_fixup_swizzle(nvsPtr nvs, nvsFragmentHeader *parent, int fpos, */ pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1); pass0_make_src_reg(nvs, &sr, src); - pass0_emit(nvs, parent, fpos, NVS_OP_MOV, - dr, SMASK_ALL, 0, sr, nvr_unused, nvr_unused); + ARITHu(NVS_OP_MOV, dr, SMASK_ALL, 0, + sr, nvr_unused, nvr_unused); pass0_make_reg(nvs, &sr, NVS_FILE_TEMP, dr.index); } else { if (fixup_1) @@ -445,12 +467,10 @@ pass0_fixup_swizzle(nvsPtr nvs, nvsFragmentHeader *parent, int fpos, pass0_make_reg(nvs, &sm2const, NVS_FILE_CONST, rec->swzconst_id); pass0_make_swizzle(sm2const.swizzle, sm2); - pass0_emit(nvs, parent, fpos, NVS_OP_MAD, - dr, SMASK_ALL, 0, sr, sm1const, sm2const); + ARITHu(NVS_OP_MAD, dr, SMASK_ALL, 0, sr, sm1const, sm2const); } else { /* SWIZZLE_ZERO || arbitrary negate */ - pass0_emit(nvs, parent, fpos, NVS_OP_MUL, - dr, SMASK_ALL, 0, sr, sm1const, nvr_unused); + ARITHu(NVS_OP_MUL, dr, SMASK_ALL, 0, sr, sm1const, nvr_unused); } src->File = PROGRAM_TEMPORARY; @@ -568,64 +588,54 @@ pass0_emulate_instruction(nouveauShader *nvs, switch (inst->Opcode) { case OPCODE_ABS: if (shader->caps & SCAP_SRC_ABS) - pass0_emit(nvs, parent, fpos, NVS_OP_MOV, - dest, mask, sat, + ARITH(NVS_OP_MOV, dest, mask, sat, nvsAbs(src[0]), nvr_unused, nvr_unused); else - pass0_emit(nvs, parent, fpos, NVS_OP_MAX, - dest, mask, sat, + ARITH(NVS_OP_MAX, dest, mask, sat, src[0], nvsNegate(src[0]), nvr_unused); break; case OPCODE_KIL: /* This is only in ARB shaders, so we don't have to worry * about clobbering a CC reg as they aren't supported anyway. + *XXX: might have to worry with GLSL however... */ /* MOVC0 temp, src */ pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); - nvsinst = pass0_emit(nvs, parent, fpos, NVS_OP_MOV, - temp, SMASK_ALL, 0, - src[0], nvr_unused, nvr_unused); + ARITHu(NVS_OP_MOV, temp, SMASK_ALL, 0, + src[0], nvr_unused, nvr_unused); nvsinst->cond_update = 1; nvsinst->cond_reg = 0; /* KIL_NV (LT0.xyzw) temp */ - nvsinst = pass0_emit(nvs, parent, fpos, NVS_OP_KIL, - nvr_unused, 0, 0, - nvr_unused, nvr_unused, nvr_unused); + ARITHu(NVS_OP_KIL, nvr_unused, 0, 0, + nvr_unused, nvr_unused, nvr_unused); nvsinst->cond = COND_LT; nvsinst->cond_reg = 0; nvsinst->cond_test = 1; - pass0_make_swizzle(nvsinst->cond_swizzle, - MAKE_SWIZZLE4(0,1,2,3)); + pass0_make_swizzle(nvsinst->cond_swizzle, SWIZZLE_NOOP); break; case OPCODE_LRP: pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); - pass0_emit(nvs, parent, fpos, NVS_OP_MAD, temp, mask, 0, - nvsNegate(src[0]), src[2], src[2]); - pass0_emit(nvs, parent, fpos, NVS_OP_MAD, dest, mask, sat, - src[0], src[1], temp); + ARITHu(NVS_OP_MAD, temp, mask, 0, + nvsNegate(src[0]), src[2], src[2]); + ARITH (NVS_OP_MAD, dest, mask, sat, src[0], src[1], temp); break; case OPCODE_POW: if (shader->SupportsOpcode(shader, NVS_OP_LG2) && shader->SupportsOpcode(shader, NVS_OP_EX2)) { pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); /* LG2 temp.x, src0.c */ - pass0_emit(nvs, parent, fpos, NVS_OP_LG2, - temp, SMASK_X, 0, - nvsSwizzle(src[0], X, X, X, X), - nvr_unused, - nvr_unused); + ARITHu(NVS_OP_LG2, temp, SMASK_X, 0, + nvsSwizzle(src[0], X, X, X, X), + nvr_unused, nvr_unused); /* MUL temp.x, temp.x, src1.c */ - pass0_emit(nvs, parent, fpos, NVS_OP_MUL, - temp, SMASK_X, 0, - nvsSwizzle(temp, X, X, X, X), - nvsSwizzle(src[1], X, X, X, X), - nvr_unused); + ARITHu(NVS_OP_MUL, temp, SMASK_X, 0, + nvsSwizzle(temp, X, X, X, X), + nvsSwizzle(src[1], X, X, X, X), + nvr_unused); /* EX2 dest, temp.x */ - pass0_emit(nvs, parent, fpos, NVS_OP_EX2, - dest, mask, sat, - nvsSwizzle(temp, X, X, X, X), - nvr_unused, - nvr_unused); + ARITH (NVS_OP_EX2, dest, mask, sat, + nvsSwizzle(temp, X, X, X, X), + nvr_unused, nvr_unused); } else { /* can we use EXP/LOG instead of EX2/LG2?? */ fprintf(stderr, "Implement POW for NV20 vtxprog!\n"); @@ -643,48 +653,41 @@ pass0_emulate_instruction(nouveauShader *nvs, const_half); } pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); - pass0_emit(nvs, parent, fpos, NVS_OP_LG2, temp, SMASK_X, 0, - nvsAbs(nvsSwizzle(src[0], X, X, X, X)), - nvr_unused, - nvr_unused); - pass0_emit(nvs, parent, fpos, NVS_OP_MUL, temp, SMASK_X, 0, - nvsSwizzle(temp, X, X, X, X), - nvsNegate(rec->const_half), - nvr_unused); - pass0_emit(nvs, parent, fpos, NVS_OP_EX2, dest, mask, sat, - nvsSwizzle(temp, X, X, X, X), - nvr_unused, - nvr_unused); + ARITHu(NVS_OP_LG2, temp, SMASK_X, 0, + nvsAbs(nvsSwizzle(src[0], X, X, X, X)), + nvr_unused, nvr_unused); + ARITHu(NVS_OP_MUL, temp, SMASK_X, 0, + nvsSwizzle(temp, X, X, X, X), + nvsNegate(rec->const_half), + nvr_unused); + ARITH (NVS_OP_EX2, dest, mask, sat, + nvsSwizzle(temp, X, X, X, X), + nvr_unused, nvr_unused); break; case OPCODE_SCS: if (mask & SMASK_X) - pass0_emit(nvs, parent, fpos, NVS_OP_COS, - dest, SMASK_X, sat, + ARITH(NVS_OP_COS, dest, SMASK_X, sat, nvsSwizzle(src[0], X, X, X, X), - nvr_unused, - nvr_unused); + nvr_unused, nvr_unused); if (mask & SMASK_Y) - pass0_emit(nvs, parent, fpos, NVS_OP_SIN, - dest, SMASK_Y, sat, + ARITH(NVS_OP_SIN, dest, SMASK_Y, sat, nvsSwizzle(src[0], X, X, X, X), - nvr_unused, - nvr_unused); + nvr_unused, nvr_unused); break; case OPCODE_SUB: - pass0_emit(nvs, parent, fpos, NVS_OP_ADD, dest, mask, sat, + ARITH(NVS_OP_ADD, dest, mask, sat, src[0], nvsNegate(src[1]), nvr_unused); break; case OPCODE_XPD: pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); - pass0_emit(nvs, parent, fpos, NVS_OP_MUL, temp, SMASK_ALL, 0, - nvsSwizzle(src[0], Z, X, Y, Y), - nvsSwizzle(src[1], Y, Z, X, X), - nvr_unused); - pass0_emit(nvs, parent, fpos, NVS_OP_MAD, - dest, (mask & ~SMASK_W), sat, - nvsSwizzle(src[0], Y, Z, X, X), - nvsSwizzle(src[1], Z, X, Y, Y), - nvsNegate(temp)); + ARITHu(NVS_OP_MUL, temp, SMASK_ALL, 0, + nvsSwizzle(src[0], Z, X, Y, Y), + nvsSwizzle(src[1], Y, Z, X, X), + nvr_unused); + ARITH (NVS_OP_MAD, dest, (mask & ~SMASK_W), sat, + nvsSwizzle(src[0], Y, Z, X, X), + nvsSwizzle(src[1], Z, X, Y, Y), + nvsNegate(temp)); break; default: WARN_ONCE("hw doesn't support opcode \"%s\"," @@ -721,16 +724,12 @@ pass0_translate_arith(nouveauShader *nvs, struct gl_program *prog, pass0_make_src_reg(nvs, &src[i], &inst->SrcReg[i]); pass0_make_dst_reg(nvs, &dest, &inst->DstReg); - nvsinst = pass0_emit(nvs, parent, fpos, - pass0_make_opcode(inst->Opcode), - dest, - pass0_make_mask(inst->DstReg.WriteMask), - (inst->SaturateMode != SATURATE_OFF), - src[0], src[1], src[2]); + ARITH(pass0_make_opcode(inst->Opcode), dest, + pass0_make_mask(inst->DstReg.WriteMask), + (inst->SaturateMode != SATURATE_OFF), + src[0], src[1], src[2]); nvsinst->tex_unit = inst->TexSrcUnit; nvsinst->tex_target = pass0_make_tex_target(inst->TexSrcTarget); - /* TODO when NV_fp/vp is implemented */ - nvsinst->cond = COND_TR; ret = GL_TRUE; } else @@ -753,7 +752,7 @@ pass0_translate_instructions(nouveauShader *nvs, int ipos, int fpos, return GL_TRUE; case OPCODE_BRA: case OPCODE_CAL: - //case OPCDOE_RET: + case OPCODE_RET: //case OPCODE_LOOP: //case OPCODE_ENDLOOP: //case OPCODE_IF: -- cgit v1.2.3 From cf33bcf0b246f75094673beaa989034fa27c4b9f Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 23 Jan 2007 16:07:12 +1100 Subject: nouveau: allow for card-specific shader infos to be kept NV30/40 fragprog: build FP_CONTROL per-shader, still some hardcoded bits for this reg.. It looks like it has to do with the number of temps used, but needs more looking at. NV40 vtxprog : build VP_IN_REG/VP_OUT_REG during shader compile --- src/mesa/drivers/dri/nouveau/nouveau_shader.h | 74 ++++++++------- src/mesa/drivers/dri/nouveau/nouveau_shader_0.c | 1 + src/mesa/drivers/dri/nouveau/nouveau_shader_2.c | 4 - src/mesa/drivers/dri/nouveau/nv10_swtcl.c | 3 - src/mesa/drivers/dri/nouveau/nv30_fragprog.c | 4 + src/mesa/drivers/dri/nouveau/nv30_state.c | 3 - src/mesa/drivers/dri/nouveau/nv30_vertprog.c | 4 + src/mesa/drivers/dri/nouveau/nv40_vertprog.c | 118 +++++++++++++++++++----- 8 files changed, 149 insertions(+), 62 deletions(-) (limited to 'src/mesa/drivers/dri/nouveau/nouveau_shader_0.c') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader.h b/src/mesa/drivers/dri/nouveau/nouveau_shader.h index a3ab027142..8b4be9dfe7 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader.h @@ -24,6 +24,16 @@ typedef struct _nvs_fragment_header { } type; } nvsFragmentHeader; +typedef union { + struct { + uint32_t fp_control; + } NV30FP; + struct { + uint32_t vp_in_reg; + uint32_t vp_out_reg; + } NV30VP; +} nvsCardPriv; + typedef struct _nouveauShader { union { struct gl_vertex_program vp; @@ -41,10 +51,10 @@ typedef struct _nouveauShader { unsigned int program_start_id; unsigned int program_current; struct gl_buffer_object *program_buffer; - unsigned int inputs_read; - unsigned int outputs_written; int inst_count; + nvsCardPriv card_priv; + struct { GLfloat *source_val; /* NULL if invariant */ float val[4]; @@ -113,35 +123,35 @@ typedef enum { } nvsSwzComp; typedef enum { - NVS_FR_POSITION, - NVS_FR_WEIGHT, - NVS_FR_NORMAL, - NVS_FR_COL0, - NVS_FR_COL1, - NVS_FR_BFC0, - NVS_FR_BFC1, - NVS_FR_FOGCOORD, - NVS_FR_POINTSZ, - NVS_FR_TEXCOORD0, - NVS_FR_TEXCOORD1, - NVS_FR_TEXCOORD2, - NVS_FR_TEXCOORD3, - NVS_FR_TEXCOORD4, - NVS_FR_TEXCOORD5, - NVS_FR_TEXCOORD6, - NVS_FR_TEXCOORD7, - NVS_FR_FRAGDATA0, - NVS_FR_FRAGDATA1, - NVS_FR_FRAGDATA2, - NVS_FR_FRAGDATA3, - NVS_FR_CLIP0, - NVS_FR_CLIP1, - NVS_FR_CLIP2, - NVS_FR_CLIP3, - NVS_FR_CLIP4, - NVS_FR_CLIP5, - NVS_FR_CLIP6, - NVS_FR_FACING, + NVS_FR_POSITION = 0, + NVS_FR_WEIGHT = 1, + NVS_FR_NORMAL = 2, + NVS_FR_COL0 = 3, + NVS_FR_COL1 = 4, + NVS_FR_FOGCOORD = 5, + NVS_FR_TEXCOORD0 = 8, + NVS_FR_TEXCOORD1 = 9, + NVS_FR_TEXCOORD2 = 10, + NVS_FR_TEXCOORD3 = 11, + NVS_FR_TEXCOORD4 = 12, + NVS_FR_TEXCOORD5 = 13, + NVS_FR_TEXCOORD6 = 14, + NVS_FR_TEXCOORD7 = 15, + NVS_FR_BFC0 = 16, + NVS_FR_BFC1 = 17, + NVS_FR_POINTSZ = 18, + NVS_FR_FRAGDATA0 = 19, + NVS_FR_FRAGDATA1 = 20, + NVS_FR_FRAGDATA2 = 21, + NVS_FR_FRAGDATA3 = 22, + NVS_FR_CLIP0 = 23, + NVS_FR_CLIP1 = 24, + NVS_FR_CLIP2 = 25, + NVS_FR_CLIP3 = 26, + NVS_FR_CLIP4 = 27, + NVS_FR_CLIP5 = 28, + NVS_FR_CLIP6 = 29, + NVS_FR_FACING = 30, NVS_FR_UNKNOWN } nvsFixedReg; @@ -279,6 +289,8 @@ extern nvsSwzComp NV20VP_TX_SWIZZLE[4]; #define SCAP_SRC_ABS (1<<0) struct _nvsFunc { + nvsCardPriv *card_priv; + unsigned int MaxInst; unsigned int MaxAttrib; unsigned int MaxTemp; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c index 5845d4f63a..3e542ea9c0 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c @@ -805,6 +805,7 @@ nouveau_shader_pass0(GLcontext *ctx, nouveauShader *nvs) fprintf(stderr, "Unknown program type %d", prog->Target); return GL_FALSE; } + nvs->func->card_priv = &nvs->card_priv; rec = CALLOC_STRUCT(pass0_rec); if (rec) { diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c index 6fb36c1daf..c106fd2d94 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c @@ -118,8 +118,6 @@ pass2_add_instruction(nvsPtr nvs, nvsInstruction *inst, if (op->srcpos[i] != -1) { reg = pass2_mangle_reg(nvs, inst, inst->src[i]); - if (reg.file == NVS_FILE_ATTRIB) - nvs->inputs_read |= (1 << reg.index); shader->SetSource(shader, ®, op->srcpos[i]); if (reg.file == NVS_FILE_CONST && @@ -136,8 +134,6 @@ pass2_add_instruction(nvsPtr nvs, nvsInstruction *inst, } reg = pass2_mangle_reg(nvs, inst, inst->dest); - if (reg.file == NVS_FILE_RESULT) - nvs->outputs_written |= (1 << reg.index); shader->SetResult(shader, ®, inst->mask, slot); } diff --git a/src/mesa/drivers/dri/nouveau/nv10_swtcl.c b/src/mesa/drivers/dri/nouveau/nv10_swtcl.c index f916912c7e..c9bfac8c4a 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_swtcl.c +++ b/src/mesa/drivers/dri/nouveau/nv10_swtcl.c @@ -476,9 +476,6 @@ static void nv10ChooseVertexState( GLcontext *ctx ) * is up to date */ nvsUpdateShader(ctx, nmesa->passthrough_vp); - BEGIN_RING_CACHE(NvSub3D, NV30_TCL_PRIMITIVE_3D_VP_IN_REG, 2); - OUT_RING_CACHE (0xff09); /*IN : POS, COL, TC0-7 */ - OUT_RING_CACHE (0x3fc001); /*OUT: COL, TC0-7, POS implied */ /* Update texenv shader / user fragprog */ nvsUpdateShader(ctx, (nouveauShader*)ctx->FragmentProgram._Current); diff --git a/src/mesa/drivers/dri/nouveau/nv30_fragprog.c b/src/mesa/drivers/dri/nouveau/nv30_fragprog.c index cd7c955c9e..3c7501dd62 100644 --- a/src/mesa/drivers/dri/nouveau/nv30_fragprog.c +++ b/src/mesa/drivers/dri/nouveau/nv30_fragprog.c @@ -46,6 +46,8 @@ NV30FPUploadToHW(GLcontext *ctx, nouveauShader *nvs) */ BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_FP_ACTIVE_PROGRAM, 1); OUT_RING (offset | 1); + BEGIN_RING_SIZE(NvSub3D, 0x1d60, 1); + OUT_RING (nvs->card_priv.NV30FP.fp_control | 0x03000000); } static void @@ -92,6 +94,8 @@ NV30FPSupportsOpcode(nvsFunc *shader, nvsOpcode op) static void NV30FPSetOpcode(nvsFunc *shader, unsigned int opcode, int slot) { + if (opcode == NV30_FP_OP_OPCODE_KIL) + shader->card_priv->NV30FP.fp_control |= (1<<7); shader->inst[0] &= ~NV30_FP_OP_OPCODE_MASK; shader->inst[0] |= (opcode << NV30_FP_OP_OPCODE_SHIFT); } diff --git a/src/mesa/drivers/dri/nouveau/nv30_state.c b/src/mesa/drivers/dri/nouveau/nv30_state.c index 9bb4f14909..9b0d7425c8 100644 --- a/src/mesa/drivers/dri/nouveau/nv30_state.c +++ b/src/mesa/drivers/dri/nouveau/nv30_state.c @@ -862,9 +862,6 @@ static GLboolean nv40InitCard(nouveauContextPtr nmesa) BEGIN_RING_SIZE(NvSub3D, 0x1e94, 1); OUT_RING(0x00000001); - BEGIN_RING_SIZE(NvSub3D, 0x1d60, 1); - OUT_RING(0x03008000); - return GL_TRUE; } diff --git a/src/mesa/drivers/dri/nouveau/nv30_vertprog.c b/src/mesa/drivers/dri/nouveau/nv30_vertprog.c index 0b7678f55d..afcacf36c2 100644 --- a/src/mesa/drivers/dri/nouveau/nv30_vertprog.c +++ b/src/mesa/drivers/dri/nouveau/nv30_vertprog.c @@ -29,6 +29,10 @@ NV30VPUploadToHW(GLcontext *ctx, nouveauShader *nvs) } BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_VP_PROGRAM_START_ID, 1); OUT_RING(0); + + BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_VP_IN_REG, 2); + OUT_RING(nvs->card_priv.NV30VP.vp_in_reg); + OUT_RING(nvs->card_priv.NV30VP.vp_out_reg); } static void diff --git a/src/mesa/drivers/dri/nouveau/nv40_vertprog.c b/src/mesa/drivers/dri/nouveau/nv40_vertprog.c index 1ba1cfd155..6cb7e1cfd6 100644 --- a/src/mesa/drivers/dri/nouveau/nv40_vertprog.c +++ b/src/mesa/drivers/dri/nouveau/nv40_vertprog.c @@ -66,6 +66,96 @@ NV40VPSetCondition(nvsFunc *shader, int on, nvsCond cond, int reg, shader->inst[0] |= (swizzle[NVS_SWZ_W] << NV40_VP_INST_COND_SWZ_W_SHIFT); } +/* these just exist here until nouveau_reg.h has them. */ +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_COL0 (1<<0) +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_COL1 (1<<1) +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_BFC0 (1<<2) +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_BFC1 (1<<3) +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_FOGC (1<<4) +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_PSZ (1<<5) +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP0 (1<<6) +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP1 (1<<7) +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP2 (1<<8) +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP3 (1<<9) +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP4 (1<<10) +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP5 (1<<11) +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_TEX0 (1<<14) + +static unsigned int +NV40VPTranslateResultReg(nvsFunc *shader, nvsFixedReg result, + unsigned int *mask_ret) +{ + unsigned int *out_reg = &shader->card_priv->NV30VP.vp_out_reg; + + *mask_ret = 0xf; + + switch (result) { + case NVS_FR_POSITION: + /* out_reg POS implied */ + return NV40_VP_INST_DEST_POS; + case NVS_FR_COL0: + (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_COL0; + return NV40_VP_INST_DEST_COL0; + case NVS_FR_COL1: + (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_COL1; + return NV40_VP_INST_DEST_COL1; + case NVS_FR_BFC0: + (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_BFC0; + return NV40_VP_INST_DEST_BFC0; + case NVS_FR_BFC1: + (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_BFC1; + return NV40_VP_INST_DEST_BFC1; + case NVS_FR_FOGCOORD: + (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_FOGC; + *mask_ret = 0x8; + return NV40_VP_INST_DEST_FOGC; + case NVS_FR_CLIP0: + (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP0; + *mask_ret = 0x4; + return NV40_VP_INST_DEST_FOGC; + case NVS_FR_CLIP1: + (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP1; + *mask_ret = 0x2; + return NV40_VP_INST_DEST_FOGC; + case NVS_FR_CLIP2: + (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP2; + *mask_ret = 0x1; + return NV40_VP_INST_DEST_FOGC; + case NVS_FR_POINTSZ: + (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_PSZ; + *mask_ret = 0x8; + return NV40_VP_INST_DEST_PSZ; + case NVS_FR_CLIP3: + (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP3; + *mask_ret = 0x4; + return NV40_VP_INST_DEST_PSZ; + case NVS_FR_CLIP4: + (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP4; + *mask_ret = 0x2; + return NV40_VP_INST_DEST_PSZ; + case NVS_FR_CLIP5: + (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP5; + *mask_ret = 0x1; + return NV40_VP_INST_DEST_PSZ; + case NVS_FR_TEXCOORD0: + case NVS_FR_TEXCOORD1: + case NVS_FR_TEXCOORD2: + case NVS_FR_TEXCOORD3: + case NVS_FR_TEXCOORD4: + case NVS_FR_TEXCOORD5: + case NVS_FR_TEXCOORD6: + case NVS_FR_TEXCOORD7: + { + int unit = result - NVS_FR_TEXCOORD0; + (*out_reg) |= (NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_TEX0 << unit); + return NV40_VP_INST_DEST_TC(unit); + } + default: + WARN_ONCE("unknown vp output %d\n", result); + return NV40_VP_INST_DEST_POS; + } +} + static void NV40VPSetResult(nvsFunc *shader, nvsRegister * dest, unsigned int mask, int slot) @@ -78,29 +168,14 @@ NV40VPSetResult(nvsFunc *shader, nvsRegister * dest, unsigned int mask, if (mask & SMASK_W) hwmask |= (1 << 0); if (dest->file == NVS_FILE_RESULT) { + unsigned int valid_mask; int hwidx; - switch (dest->index) { - case NVS_FR_POSITION : hwidx = NV40_VP_INST_DEST_POS; break; - case NVS_FR_COL0 : hwidx = NV40_VP_INST_DEST_COL0; break; - case NVS_FR_COL1 : hwidx = NV40_VP_INST_DEST_COL1; break; - case NVS_FR_BFC0 : hwidx = NV40_VP_INST_DEST_BFC0; break; - case NVS_FR_BFC1 : hwidx = NV40_VP_INST_DEST_BFC1; break; - case NVS_FR_FOGCOORD : hwidx = NV40_VP_INST_DEST_FOGC; break; - case NVS_FR_POINTSZ : hwidx = NV40_VP_INST_DEST_PSZ; break; - case NVS_FR_TEXCOORD0: hwidx = NV40_VP_INST_DEST_TC(0); break; - case NVS_FR_TEXCOORD1: hwidx = NV40_VP_INST_DEST_TC(1); break; - case NVS_FR_TEXCOORD2: hwidx = NV40_VP_INST_DEST_TC(2); break; - case NVS_FR_TEXCOORD3: hwidx = NV40_VP_INST_DEST_TC(3); break; - case NVS_FR_TEXCOORD4: hwidx = NV40_VP_INST_DEST_TC(4); break; - case NVS_FR_TEXCOORD5: hwidx = NV40_VP_INST_DEST_TC(5); break; - case NVS_FR_TEXCOORD6: hwidx = NV40_VP_INST_DEST_TC(6); break; - case NVS_FR_TEXCOORD7: hwidx = NV40_VP_INST_DEST_TC(7); break; - default: - WARN_ONCE("unknown vtxprog output %d\n", dest->index); - hwidx = 0; - break; - } + hwidx = NV40VPTranslateResultReg(shader, dest->index, &valid_mask); + if (hwmask & ~valid_mask) + WARN_ONCE("writing invalid components of result reg\n"); + hwmask &= valid_mask; + shader->inst[3] &= ~NV40_VP_INST_DEST_MASK; shader->inst[3] |= (hwidx << NV40_VP_INST_DEST_SHIFT); @@ -174,6 +249,7 @@ NV40VPSetSource(nvsFunc *shader, nvsRegister * src, int pos) shader->inst[1] &= ~NV40_VP_INST_INPUT_SRC_MASK; shader->inst[1] |= (src->index << NV40_VP_INST_INPUT_SRC_SHIFT); + shader->card_priv->NV30VP.vp_in_reg |= (1 << src->index); if (src->indexed) { shader->inst[0] |= NV40_VP_INST_INDEX_INPUT; if (src->addr_reg) -- cgit v1.2.3 From 60c28739aa4afe543e0293c15a7aaf84ec2183ea Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 23 Jan 2007 16:25:25 +1100 Subject: nouveau: DPH and CMP for NV40 which doesn't do it natively. --- src/mesa/drivers/dri/nouveau/nouveau_shader_0.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'src/mesa/drivers/dri/nouveau/nouveau_shader_0.c') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c index 3e542ea9c0..28c6ad803b 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c @@ -594,6 +594,30 @@ pass0_emulate_instruction(nouveauShader *nvs, ARITH(NVS_OP_MAX, dest, mask, sat, src[0], nvsNegate(src[0]), nvr_unused); break; + case OPCODE_CMP: + /*XXX: this will clobber CC0... */ + ARITH (NVS_OP_MOV, dest, mask, sat, + src[2], nvr_unused, nvr_unused); + pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); + ARITHu(NVS_OP_MOV, temp, SMASK_ALL, 0, + src[0], nvr_unused, nvr_unused); + nvsinst->cond_update = 1; + nvsinst->cond_reg = 0; + ARITH (NVS_OP_MOV, dest, mask, sat, + src[1], nvr_unused, nvr_unused); + nvsinst->cond = COND_LT; + nvsinst->cond_reg = 0; + nvsinst->cond_test = 1; + break; + case OPCODE_DPH: + pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); + ARITHu(NVS_OP_DP3, temp, SMASK_X, 0, + src[0], src[1], nvr_unused); + ARITH (NVS_OP_ADD, dest, mask, sat, + nvsSwizzle(temp, X, X, X, X), + nvsSwizzle(src[1], W, W, W, W), + nvr_unused); + break; case OPCODE_KIL: /* This is only in ARB shaders, so we don't have to worry * about clobbering a CC reg as they aren't supported anyway. -- cgit v1.2.3 From 86996dfe32fccd5777dd0e410b5dbe964fb206d1 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 25 Jan 2007 13:40:51 +1100 Subject: nouveau: add result scaling to shader backend, use it in RSQ emul for NV40. --- src/mesa/drivers/dri/nouveau/nouveau_shader.c | 2 +- src/mesa/drivers/dri/nouveau/nouveau_shader.h | 13 ++++++++++++ src/mesa/drivers/dri/nouveau/nouveau_shader_0.c | 17 +++------------- src/mesa/drivers/dri/nouveau/nouveau_shader_2.c | 4 ++++ src/mesa/drivers/dri/nouveau/nv40_fragprog.c | 27 +++++++++++++++++++++++++ src/mesa/drivers/dri/nouveau/nv40_shader.h | 4 ++-- 6 files changed, 50 insertions(+), 17 deletions(-) (limited to 'src/mesa/drivers/dri/nouveau/nouveau_shader_0.c') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader.c b/src/mesa/drivers/dri/nouveau/nouveau_shader.c index f911347d62..cdb79fca1e 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader.c @@ -179,7 +179,7 @@ nvsBuildTextShader(GLcontext *ctx, GLenum target, const char *text) strlen(text), &nvs->mesa.vp); } else if (target == GL_FRAGMENT_PROGRAM_ARB) { - _mesa_init_fragment_program(ctx, &nvs->mesa.fp, GL_VERTEX_PROGRAM_ARB, 0); + _mesa_init_fragment_program(ctx, &nvs->mesa.fp, GL_FRAGMENT_PROGRAM_ARB, 0); _mesa_parse_arb_fragment_program(ctx, GL_FRAGMENT_PROGRAM_ARB, text, diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader.h b/src/mesa/drivers/dri/nouveau/nouveau_shader.h index 8b4be9dfe7..7329ccd9ea 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader.h @@ -194,6 +194,16 @@ typedef enum { NVS_TEX_TARGET_UNKNOWN = 0 } nvsTexTarget; +typedef enum { + NVS_SCALE_1X = 0, + NVS_SCALE_2X = 1, + NVS_SCALE_4X = 2, + NVS_SCALE_8X = 3, + NVS_SCALE_INV_2X = 5, + NVS_SCALE_INV_4X = 6, + NVS_SCALE_INV_8X = 7, +} nvsScale; + /* Arith/TEX instructions */ typedef struct nvs_instruction { nvsFragmentHeader header; @@ -203,6 +213,7 @@ typedef struct nvs_instruction { nvsRegister dest; unsigned int mask; + nvsScale dest_scale; nvsRegister src[3]; @@ -307,6 +318,7 @@ struct _nvsFunc { void (*InitInstruction) (nvsFunc *); int (*SupportsOpcode) (nvsFunc *, nvsOpcode); + int (*SupportsResultScale) (nvsFunc *, nvsScale); void (*SetOpcode) (nvsFunc *, unsigned int opcode, int slot); void (*SetCCUpdate) (nvsFunc *); @@ -314,6 +326,7 @@ struct _nvsFunc { nvsSwzComp *swizzle); void (*SetResult) (nvsFunc *, nvsRegister *, unsigned int mask, int slot); + void (*SetResultScale) (nvsFunc *, nvsScale); void (*SetSource) (nvsFunc *, nvsRegister *, int pos); void (*SetTexImageUnit) (nvsFunc *, int unit); void (*SetSaturate) (nvsFunc *); diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c index 28c6ad803b..3bcc2ba755 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c @@ -402,6 +402,7 @@ pass0_emit(nouveauShader *nvs, nvsFragmentHeader *parent, int fpos, sif->saturate = saturate; sif->dest = dst; sif->mask = mask; + sif->dest_scale = NVS_SCALE_1X; sif->src[0] = src0; sif->src[1] = src1; sif->src[2] = src2; @@ -667,25 +668,13 @@ pass0_emulate_instruction(nouveauShader *nvs, } break; case OPCODE_RSQ: - if (rec->const_half.file != NVS_FILE_CONST) { - GLfloat const_half[4] = { 0.5, 0.0, 0.0, 0.0 }; - pass0_make_reg(nvs, &rec->const_half, NVS_FILE_CONST, - _mesa_add_unnamed_constant( - nvs->mesa.vp.Base.Parameters, - const_half, 4)); - COPY_4V(nvs->params[rec->const_half.index].val, - const_half); - } pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); ARITHu(NVS_OP_LG2, temp, SMASK_X, 0, nvsAbs(nvsSwizzle(src[0], X, X, X, X)), nvr_unused, nvr_unused); - ARITHu(NVS_OP_MUL, temp, SMASK_X, 0, - nvsSwizzle(temp, X, X, X, X), - nvsNegate(rec->const_half), - nvr_unused); + nvsinst->dest_scale = NVS_SCALE_INV_2X; ARITH (NVS_OP_EX2, dest, mask, sat, - nvsSwizzle(temp, X, X, X, X), + nvsNegate(nvsSwizzle(temp, X, X, X, X)), nvr_unused, nvr_unused); break; case OPCODE_SCS: diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c index c106fd2d94..b043f877e4 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c @@ -135,6 +135,10 @@ pass2_add_instruction(nvsPtr nvs, nvsInstruction *inst, reg = pass2_mangle_reg(nvs, inst, inst->dest); shader->SetResult(shader, ®, inst->mask, slot); + + if (inst->dest_scale != NVS_SCALE_1X) { + shader->SetResultScale(shader, inst->dest_scale); + } } static int diff --git a/src/mesa/drivers/dri/nouveau/nv40_fragprog.c b/src/mesa/drivers/dri/nouveau/nv40_fragprog.c index 8bca6ae938..3e4ae0496e 100644 --- a/src/mesa/drivers/dri/nouveau/nv40_fragprog.c +++ b/src/mesa/drivers/dri/nouveau/nv40_fragprog.c @@ -11,6 +11,30 @@ struct _op_xlat NVFP_TX_BOP[64]; * - These extend the NV30 routines, which are almost identical. NV40 * just has branching hacked into the instruction set. */ +static int +NV40FPSupportsResultScale(nvsFunc *shader, nvsScale scale) +{ + switch (scale) { + case NVS_SCALE_1X: + case NVS_SCALE_2X: + case NVS_SCALE_4X: + case NVS_SCALE_8X: + case NVS_SCALE_INV_2X: + case NVS_SCALE_INV_4X: + case NVS_SCALE_INV_8X: + return 1; + default: + return 0; + } +} + +static void +NV40FPSetResultScale(nvsFunc *shader, nvsScale scale) +{ + shader->inst[2] &= ~NV40_FP_OP_DST_SCALE_MASK; + shader->inst[2] |= ((unsigned int)scale << NV40_FP_OP_DST_SCALE_SHIFT); +} + static void NV40FPSetBranchTarget(nvsFunc *shader, int addr) { @@ -179,6 +203,9 @@ NV40FPInitShaderFuncs(nvsFunc * shader) MOD_OPCODE(NVFP_TX_BOP, NV40_FP_OP_BRA_OPCODE_REP , NVS_OP_REP , -1, -1, -1); MOD_OPCODE(NVFP_TX_BOP, NV40_FP_OP_BRA_OPCODE_RET , NVS_OP_RET , -1, -1, -1); + shader->SupportsResultScale = NV40FPSupportsResultScale; + shader->SetResultScale = NV40FPSetResultScale; + /* fragment.facing */ shader->GetSourceID = NV40FPGetSourceID; diff --git a/src/mesa/drivers/dri/nouveau/nv40_shader.h b/src/mesa/drivers/dri/nouveau/nv40_shader.h index 2a2b5639b6..584f4c23e0 100644 --- a/src/mesa/drivers/dri/nouveau/nv40_shader.h +++ b/src/mesa/drivers/dri/nouveau/nv40_shader.h @@ -399,8 +399,8 @@ /* high order bits of SRC1 */ #define NV40_FP_OP_OPCODE_IS_BRANCH (1<<31) -#define NV40_FP_OP_SRC_SCALE_SHIFT 28 -#define NV40_FP_OP_SRC_SCALE_MASK (3 << 28) +#define NV40_FP_OP_DST_SCALE_SHIFT 28 +#define NV40_FP_OP_DST_SCALE_MASK (3 << 28) /* SRC1 LOOP */ #define NV40_FP_OP_LOOP_INCR_SHIFT 19 -- cgit v1.2.3 From cafbc459f51ce6645e1fc4b6b2b7ec34efedd874 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 27 Jan 2007 18:36:01 +1100 Subject: nouveau: maintain a map of which vtxprog input corresponds to which array --- src/mesa/drivers/dri/nouveau/nouveau_shader.h | 1 + src/mesa/drivers/dri/nouveau/nouveau_shader_0.c | 46 +++++++++++++++++++++++++ 2 files changed, 47 insertions(+) (limited to 'src/mesa/drivers/dri/nouveau/nouveau_shader_0.c') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader.h b/src/mesa/drivers/dri/nouveau/nouveau_shader.h index 82eb27b053..e2515c1c79 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader.h @@ -55,6 +55,7 @@ typedef struct _nouveauShader { int inst_count; nvsCardPriv card_priv; + int vp_attrib_map[NVS_MAX_ATTRIBS]; struct { GLfloat *source_val; /* NULL if invariant */ diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c index 3bcc2ba755..81ed012c78 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c @@ -787,6 +787,50 @@ pass0_translate_instructions(nouveauShader *nvs, int ipos, int fpos, return GL_TRUE; } +static void +pass0_build_attrib_map(nouveauShader *nvs, struct gl_vertex_program *vp) +{ + GLuint inputs_read = vp->Base.InputsRead; + GLuint input_alloc = ~0xFFFF; + int i; + + for (i=0; ivp_attrib_map[i] = -1; + + while (inputs_read) { + int in = ffs(inputs_read) - 1; + int hw; + inputs_read &= ~(1<IsNVProgram) { + /* NVvp: must alias */ + if (in >= VERT_ATTRIB_GENERIC0) + hw = in - VERT_ATTRIB_GENERIC0; + else + hw = in; + } else { + /* ARBvp: may alias + * GL2.0: must not alias + */ + if (in >= VERT_ATTRIB_GENERIC0) + hw = ffs(~input_alloc) - 1; + else + hw = in; + input_alloc |= (1<vp_attrib_map[hw] = in; + } + + if (NOUVEAU_DEBUG & DEBUG_SHADERS) { + printf("vtxprog attrib map:\n"); + for (i=0; ivp_attrib_map[i]); + } + } +} + GLboolean nouveau_shader_pass0(GLcontext *ctx, nouveauShader *nvs) { @@ -801,6 +845,8 @@ nouveau_shader_pass0(GLcontext *ctx, nouveauShader *nvs) case GL_VERTEX_PROGRAM_ARB: nvs->func = &nmesa->VPfunc; + pass0_build_attrib_map(nvs, vp); + if (vp->IsPositionInvariant) _mesa_insert_mvp_code(ctx, vp); #if 0 -- cgit v1.2.3 From 7fbf8d3324868e6920243e3b1abdeb6e398ea715 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 27 Jan 2007 18:51:19 +1100 Subject: nouveau: oops --- src/mesa/drivers/dri/nouveau/nouveau_shader_0.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'src/mesa/drivers/dri/nouveau/nouveau_shader_0.c') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c index 81ed012c78..fc55056854 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c @@ -322,6 +322,7 @@ pass0_make_src_reg(nvsPtr nvs, nvsRegister *reg, struct prog_src_register *src) { struct gl_program *mesa = (struct gl_program *)&nvs->mesa.vp.Base; struct gl_program_parameter_list *p = mesa->Parameters; + int i; *reg = nvr_unused; @@ -329,9 +330,14 @@ pass0_make_src_reg(nvsPtr nvs, nvsRegister *reg, struct prog_src_register *src) case PROGRAM_INPUT: reg->file = NVS_FILE_ATTRIB; if (mesa->Target == GL_VERTEX_PROGRAM_ARB) { - reg->index = (src->Index < VERT_ATTRIB_MAX) ? - _tx_mesa_vp_src_reg[src->Index] : - NVS_FR_UNKNOWN; + for (i=0; ivp_attrib_map[i] == src->Index) { + reg->index = i; + break; + } + } + if (i==NVS_MAX_ATTRIBS) + reg->index = NVS_FR_UNKNOWN; } else { reg->index = (src->Index < FRAG_ATTRIB_MAX) ? _tx_mesa_fp_src_reg[src->Index] : -- cgit v1.2.3 From 2d8b31610917e5e14a242725b047a21f6d2c14e0 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 27 Jan 2007 18:54:42 +1100 Subject: nouveau: remove an unused table --- src/mesa/drivers/dri/nouveau/nouveau_shader_0.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'src/mesa/drivers/dri/nouveau/nouveau_shader_0.c') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c index fc55056854..b96cf95939 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c @@ -54,18 +54,6 @@ static nvsFixedReg _tx_mesa_fp_dst_reg[FRAG_RESULT_MAX] = { NVS_FR_UNKNOWN /* DEPR */ }; -static nvsFixedReg _tx_mesa_vp_src_reg[VERT_ATTRIB_MAX] = { - NVS_FR_POSITION, NVS_FR_WEIGHT, NVS_FR_NORMAL, NVS_FR_COL0, NVS_FR_COL1, - NVS_FR_FOGCOORD, NVS_FR_UNKNOWN /* COLOR_INDEX */, NVS_FR_UNKNOWN, - NVS_FR_TEXCOORD0, NVS_FR_TEXCOORD1, NVS_FR_TEXCOORD2, NVS_FR_TEXCOORD3, - NVS_FR_TEXCOORD4, NVS_FR_TEXCOORD5, NVS_FR_TEXCOORD6, NVS_FR_TEXCOORD7, -/* Generic attribs 0-15, aliased to the above */ - NVS_FR_POSITION, NVS_FR_WEIGHT, NVS_FR_NORMAL, NVS_FR_COL0, NVS_FR_COL1, - NVS_FR_FOGCOORD, NVS_FR_UNKNOWN /* COLOR_INDEX */, NVS_FR_UNKNOWN, - NVS_FR_TEXCOORD0, NVS_FR_TEXCOORD1, NVS_FR_TEXCOORD2, NVS_FR_TEXCOORD3, - NVS_FR_TEXCOORD4, NVS_FR_TEXCOORD5, NVS_FR_TEXCOORD6, NVS_FR_TEXCOORD7 -}; - static nvsFixedReg _tx_mesa_fp_src_reg[FRAG_ATTRIB_MAX] = { NVS_FR_POSITION, NVS_FR_COL0, NVS_FR_COL1, NVS_FR_FOGCOORD, NVS_FR_TEXCOORD0, NVS_FR_TEXCOORD1, NVS_FR_TEXCOORD2, NVS_FR_TEXCOORD3, @@ -815,7 +803,7 @@ pass0_build_attrib_map(nouveauShader *nvs, struct gl_vertex_program *vp) else hw = in; } else { - /* ARBvp: may alias + /* ARBvp: may alias (but we won't) * GL2.0: must not alias */ if (in >= VERT_ATTRIB_GENERIC0) -- cgit v1.2.3 From de0cf18b096822cf8e113a46f12740ebeb10f8df Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 29 Jan 2007 17:08:45 +1100 Subject: nouveau: oops, build attrib map after we know how the final shader will look.. --- src/mesa/drivers/dri/nouveau/nouveau_shader_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers/dri/nouveau/nouveau_shader_0.c') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c index b96cf95939..9f32cd8c11 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c @@ -839,14 +839,14 @@ nouveau_shader_pass0(GLcontext *ctx, nouveauShader *nvs) case GL_VERTEX_PROGRAM_ARB: nvs->func = &nmesa->VPfunc; - pass0_build_attrib_map(nvs, vp); - if (vp->IsPositionInvariant) _mesa_insert_mvp_code(ctx, vp); #if 0 if (IS_FIXEDFUNCTION_PROG && CLIP_PLANES_USED) pass0_insert_ff_clip_planes(); #endif + + pass0_build_attrib_map(nvs, vp); break; case GL_FRAGMENT_PROGRAM_ARB: nvs->func = &nmesa->FPfunc; -- cgit v1.2.3 From ede8017d2c6c2f6da4c75c9ce0cc4d748e870973 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 30 Jan 2007 12:33:00 +1100 Subject: nouveau: rework shader param handling Conflicts: src/mesa/drivers/dri/nouveau/nouveau_shader_0.c --- src/mesa/drivers/dri/nouveau/nouveau_shader.c | 9 +- src/mesa/drivers/dri/nouveau/nouveau_shader.h | 3 + src/mesa/drivers/dri/nouveau/nouveau_shader_0.c | 145 +++++++++++++++++------- 3 files changed, 110 insertions(+), 47 deletions(-) (limited to 'src/mesa/drivers/dri/nouveau/nouveau_shader_0.c') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader.c b/src/mesa/drivers/dri/nouveau/nouveau_shader.c index cdb79fca1e..c78b72bd11 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader.c @@ -126,15 +126,16 @@ nvsUpdateShader(GLcontext *ctx, nouveauShader *nvs) /* Update state parameters */ plist = nvs->mesa.vp.Base.Parameters; _mesa_load_state_parameters(ctx, plist); - for (i=0; iNumParameters; i++) { + for (i=0; iparam_high; i++) { + if (!nvs->params[i].in_use) + continue; + if (!nvs->on_hardware) { /* if we've been kicked off the hardware there's no guarantee our * consts are still there.. reupload them all */ nvs->func->UpdateConst(ctx, nvs, i); - } else if (plist->Parameters[i].Type == PROGRAM_STATE_VAR) { - if (!nvs->params[i].source_val) /* this is a workaround when consts aren't alloc'd from id=0.. */ - continue; + } else if (nvs->params[i].source_val) { /* update any changed state parameters */ if (!TEST_EQ_4V(nvs->params[i].val, nvs->params[i].source_val)) nvs->func->UpdateConst(ctx, nvs, i); diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader.h b/src/mesa/drivers/dri/nouveau/nouveau_shader.h index e2515c1c79..68007dd1ea 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader.h @@ -58,6 +58,8 @@ typedef struct _nouveauShader { int vp_attrib_map[NVS_MAX_ATTRIBS]; struct { + GLboolean in_use; + GLfloat *source_val; /* NULL if invariant */ float val[4]; /* Hardware-specific tracking, currently only nv30_fragprog @@ -66,6 +68,7 @@ typedef struct _nouveauShader { int *hw_index; int hw_index_cnt; } params[NVS_MAX_CONSTS]; + int param_high; /* Pass-private data */ void *pass_rec; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c index 9f32cd8c11..fb6e0b05c3 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c @@ -113,6 +113,10 @@ static nvsCond _tx_mesa_condmask[] = { struct pass0_rec { int nvs_ipos; int next_temp; + + int mesa_const_base; + int mesa_const_last; + int swzconst_done; int swzconst_id; nvsRegister const_half; @@ -308,8 +312,8 @@ pass0_make_dst_reg(nvsPtr nvs, nvsRegister *reg, static void pass0_make_src_reg(nvsPtr nvs, nvsRegister *reg, struct prog_src_register *src) { + struct pass0_rec *rec = nvs->pass_rec; struct gl_program *mesa = (struct gl_program *)&nvs->mesa.vp.Base; - struct gl_program_parameter_list *p = mesa->Parameters; int i; *reg = nvr_unused; @@ -332,34 +336,16 @@ pass0_make_src_reg(nvsPtr nvs, nvsRegister *reg, struct prog_src_register *src) NVS_FR_UNKNOWN; } break; - /* All const types seem to get shoved into here, not really sure why */ case PROGRAM_STATE_VAR: - switch (p->Parameters[src->Index].Type) { - case PROGRAM_NAMED_PARAM: - case PROGRAM_CONSTANT: - nvs->params[src->Index].source_val = NULL; - COPY_4V(nvs->params[src->Index].val, - p->ParameterValues[src->Index]); - break; - case PROGRAM_STATE_VAR: - nvs->params[src->Index].source_val = - p->ParameterValues[src->Index]; - break; - default: - fprintf(stderr, "Unknown parameter type %d\n", - p->Parameters[src->Index].Type); - assert(0); - break; + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + reg->file = NVS_FILE_CONST; + reg->index = src->Index + rec->mesa_const_base; + reg->indexed = src->RelAddr; + if (reg->indexed) { + reg->addr_reg = 0; + reg->addr_comp = NVS_SWZ_X; } - - if (src->RelAddr) { - reg->indexed = 1; - reg->addr_reg = 0; - reg->addr_comp = NVS_SWZ_X; - } else - reg->indexed = 0; - reg->file = NVS_FILE_CONST; - reg->index = src->Index; break; case PROGRAM_TEMPORARY: reg->file = NVS_FILE_TEMP; @@ -568,7 +554,6 @@ pass0_emulate_instruction(nouveauShader *nvs, nvsFunc *shader = nvs->func; nvsRegister src[3], dest, temp; nvsInstruction *nvsinst; - struct pass0_rec *rec = nvs->pass_rec; unsigned int mask = pass0_make_mask(inst->DstReg.WriteMask); int i, sat; @@ -825,6 +810,73 @@ pass0_build_attrib_map(nouveauShader *nvs, struct gl_vertex_program *vp) } } +static void +pass0_prealloc_mesa_consts(nouveauShader *nvs) +{ + struct pass0_rec *rec = nvs->pass_rec; + struct gl_program *prog = &nvs->mesa.vp.Base; + struct prog_instruction *inst = prog->Instructions; + struct gl_program_parameter_list *plist = prog->Parameters; + int i; + + /*XXX: not a good idea, params->hw_index is malloc'd */ + memset(nvs->params, 0x00, sizeof(nvs->params)); + + /* When doing relative addressing on constants, the hardware needs us + * to fill the "const id" field with a positive value. Determine the + * most negative index that is used so that all accesses to a + * mesa-provided constant can be rebased to a positive index. + */ + while (inst->Opcode != OPCODE_END) { + for (i=0; i<_mesa_num_inst_src_regs(inst->Opcode); i++) { + struct prog_src_register *src = &inst->SrcReg[i]; + + switch (src->File) { + case PROGRAM_STATE_VAR: + case PROGRAM_CONSTANT: + case PROGRAM_NAMED_PARAM: + if (src->RelAddr && src->Index < 0) { + int base = src->Index * -1; + if (rec->mesa_const_base < base) + rec->mesa_const_base = base; + } + break; + default: + break; + } + } + + inst++; + } + + /* Init all const tracking/alloc info from the parameter list, rather + * than doing it as we translate the program. Otherwise we can't get + * at the correct constant info when relative addressing is being used. + */ + rec->mesa_const_last = plist->NumParameters + rec->mesa_const_base; + nvs->param_high = rec->mesa_const_last; + for (i=0; iNumParameters; i++) { + int hw = rec->mesa_const_base + i; + + switch (plist->Parameters[i].Type) { + case PROGRAM_NAMED_PARAM: + case PROGRAM_STATE_VAR: + nvs->params[hw].in_use = GL_TRUE; + nvs->params[hw].source_val = plist->ParameterValues[i]; + COPY_4V(nvs->params[hw].val, plist->ParameterValues[i]); + break; + case PROGRAM_CONSTANT: + nvs->params[hw].in_use = GL_TRUE; + nvs->params[hw].source_val = NULL; + COPY_4V(nvs->params[hw].val, plist->ParameterValues[i]); + break; + default: + assert(0); + break; + } + } +} + GLboolean nouveau_shader_pass0(GLcontext *ctx, nouveauShader *nvs) { @@ -835,12 +887,28 @@ nouveau_shader_pass0(GLcontext *ctx, nouveauShader *nvs) struct pass0_rec *rec; int ret = GL_FALSE; + rec = CALLOC_STRUCT(pass0_rec); + if (!rec) + return GL_FALSE; + + rec->next_temp = prog->NumTemporaries; + nvs->pass_rec = rec; + + nvs->program_tree = (nvsFragmentHeader*) + pass0_create_subroutine(nvs, "program body"); + if (!nvs->program_tree) { + FREE(rec); + return GL_FALSE; + } + switch (prog->Target) { case GL_VERTEX_PROGRAM_ARB: nvs->func = &nmesa->VPfunc; if (vp->IsPositionInvariant) _mesa_insert_mvp_code(ctx, vp); + pass0_prealloc_mesa_consts(nvs); + #if 0 if (IS_FIXEDFUNCTION_PROG && CLIP_PLANES_USED) pass0_insert_ff_clip_planes(); @@ -853,29 +921,20 @@ nouveau_shader_pass0(GLcontext *ctx, nouveauShader *nvs) if (fp->FogOption != GL_NONE) _mesa_append_fog_code(ctx, fp); + pass0_prealloc_mesa_consts(nvs); break; default: fprintf(stderr, "Unknown program type %d", prog->Target); + FREE(rec); + /* DESTROY TREE!! */ return GL_FALSE; } nvs->func->card_priv = &nvs->card_priv; - rec = CALLOC_STRUCT(pass0_rec); - if (rec) { - rec->next_temp = prog->NumTemporaries; - nvs->pass_rec = rec; - - nvs->program_tree = (nvsFragmentHeader*) - pass0_create_subroutine(nvs, "program body"); - if (nvs->program_tree) { - ret = pass0_translate_instructions(nvs, - 0, 0, - nvs->program_tree); - /*XXX: if (!ret) DESTROY TREE!!! */ - } - FREE(rec); - } + ret = pass0_translate_instructions(nvs, 0, 0, nvs->program_tree); + /*XXX: if (!ret) DESTROY TREE!!! */ + FREE(rec); return ret; } -- cgit v1.2.3 From d2c4d9ff9beb36895bb8ee7aabb65e70c3068816 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 30 Jan 2007 16:00:20 +1100 Subject: nouveau: NV40 glClipPlane support. --- src/mesa/drivers/dri/nouveau/nouveau_shader.h | 1 + src/mesa/drivers/dri/nouveau/nouveau_shader_0.c | 88 +++++++++++++++++++++---- src/mesa/drivers/dri/nouveau/nv30_state.c | 15 ++++- src/mesa/drivers/dri/nouveau/nv30_vertprog.c | 3 + src/mesa/drivers/dri/nouveau/nv40_vertprog.c | 7 ++ 5 files changed, 101 insertions(+), 13 deletions(-) (limited to 'src/mesa/drivers/dri/nouveau/nouveau_shader_0.c') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader.h b/src/mesa/drivers/dri/nouveau/nouveau_shader.h index 68007dd1ea..b2df3546f6 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader.h @@ -32,6 +32,7 @@ typedef union { struct { uint32_t vp_in_reg; uint32_t vp_out_reg; + uint32_t clip_enables; } NV30VP; } nvsCardPriv; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c index fb6e0b05c3..73c1f7c2a5 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c @@ -811,12 +811,65 @@ pass0_build_attrib_map(nouveauShader *nvs, struct gl_vertex_program *vp) } static void -pass0_prealloc_mesa_consts(nouveauShader *nvs) +pass0_vp_insert_ff_clip_planes(GLcontext *ctx, nouveauShader *nvs) +{ + struct gl_program *prog = &nvs->mesa.vp.Base; + nvsFragmentHeader *parent = nvs->program_tree; + nvsInstruction *nvsinst; + GLuint fpos = 0; + nvsRegister opos, epos, eqn, mv[4]; + GLint tokens[6] = { STATE_MATRIX, STATE_MODELVIEW, 0, 0, 0, 0 }; + GLint id; + int i; + + /* modelview transform */ + pass0_make_reg(nvs, &opos, NVS_FILE_ATTRIB, NVS_FR_POSITION); + pass0_make_reg(nvs, &epos, NVS_FILE_TEMP , -1); + for (i=0; i<4; i++) { + tokens[3] = tokens[4] = i; + id = _mesa_add_state_reference(prog->Parameters, tokens); + pass0_make_reg(nvs, &mv[i], NVS_FILE_CONST, id); + } + ARITHu(NVS_OP_DP4, epos, SMASK_X, 0, opos, mv[0], nvr_unused); + ARITHu(NVS_OP_DP4, epos, SMASK_Y, 0, opos, mv[1], nvr_unused); + ARITHu(NVS_OP_DP4, epos, SMASK_Z, 0, opos, mv[2], nvr_unused); + ARITHu(NVS_OP_DP4, epos, SMASK_W, 0, opos, mv[3], nvr_unused); + + /* Emit code to emulate fixed-function glClipPlane */ + for (i=0; i<6; i++) { + GLuint clipmask = SMASK_X; + nvsRegister clip; + + if (!(ctx->Transform.ClipPlanesEnabled & (1<Parameters, tokens); + pass0_make_reg(nvs, &eqn , NVS_FILE_CONST , id); + pass0_make_reg(nvs, &clip, NVS_FILE_RESULT, NVS_FR_CLIP0 + i); + + /*XXX: something else needs to take care of modifying the + * instructions to write to the correct hw clip register. + */ + switch (i) { + case 0: case 3: clipmask = SMASK_Y; break; + case 1: case 4: clipmask = SMASK_Z; break; + case 2: case 5: clipmask = SMASK_W; break; + } + + /* Emit transform */ + ARITHu(NVS_OP_DP4, clip, clipmask, 0, epos, eqn, nvr_unused); + } +} + +static void +pass0_rebase_mesa_consts(nouveauShader *nvs) { struct pass0_rec *rec = nvs->pass_rec; struct gl_program *prog = &nvs->mesa.vp.Base; struct prog_instruction *inst = prog->Instructions; - struct gl_program_parameter_list *plist = prog->Parameters; int i; /*XXX: not a good idea, params->hw_index is malloc'd */ @@ -848,10 +901,23 @@ pass0_prealloc_mesa_consts(nouveauShader *nvs) inst++; } - +} + +static void +pass0_resolve_mesa_consts(nouveauShader *nvs) +{ + struct pass0_rec *rec = nvs->pass_rec; + struct gl_program *prog = &nvs->mesa.vp.Base; + struct gl_program_parameter_list *plist = prog->Parameters; + int i; + /* Init all const tracking/alloc info from the parameter list, rather - * than doing it as we translate the program. Otherwise we can't get - * at the correct constant info when relative addressing is being used. + * than doing it as we translate the program. Otherwise: + * 1) we can't get at the correct constant info when relative + * addressing is being used due to src->Index not pointing + * at the exact const; + * 2) as we add extra consts to the program, mesa will call realloc() + * and we get invalid pointers to the const data. */ rec->mesa_const_last = plist->NumParameters + rec->mesa_const_base; nvs->param_high = rec->mesa_const_last; @@ -907,12 +973,10 @@ nouveau_shader_pass0(GLcontext *ctx, nouveauShader *nvs) if (vp->IsPositionInvariant) _mesa_insert_mvp_code(ctx, vp); - pass0_prealloc_mesa_consts(nvs); + pass0_rebase_mesa_consts(nvs); -#if 0 - if (IS_FIXEDFUNCTION_PROG && CLIP_PLANES_USED) - pass0_insert_ff_clip_planes(); -#endif + if (!prog->String && ctx->Transform.ClipPlanesEnabled) + pass0_vp_insert_ff_clip_planes(ctx, nvs); pass0_build_attrib_map(nvs, vp); break; @@ -921,7 +985,7 @@ nouveau_shader_pass0(GLcontext *ctx, nouveauShader *nvs) if (fp->FogOption != GL_NONE) _mesa_append_fog_code(ctx, fp); - pass0_prealloc_mesa_consts(nvs); + pass0_rebase_mesa_consts(nvs); break; default: fprintf(stderr, "Unknown program type %d", prog->Target); @@ -932,6 +996,8 @@ nouveau_shader_pass0(GLcontext *ctx, nouveauShader *nvs) nvs->func->card_priv = &nvs->card_priv; ret = pass0_translate_instructions(nvs, 0, 0, nvs->program_tree); + if (ret) + pass0_resolve_mesa_consts(nvs); /*XXX: if (!ret) DESTROY TREE!!! */ FREE(rec); diff --git a/src/mesa/drivers/dri/nouveau/nv30_state.c b/src/mesa/drivers/dri/nouveau/nv30_state.c index 55b6463781..db13ec70b4 100644 --- a/src/mesa/drivers/dri/nouveau/nv30_state.c +++ b/src/mesa/drivers/dri/nouveau/nv30_state.c @@ -127,6 +127,11 @@ static void nv30ClearStencil(GLcontext *ctx, GLint s) static void nv30ClipPlane(GLcontext *ctx, GLenum plane, const GLfloat *equation) { nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + + if (NOUVEAU_CARD_USING_SHADERS) + return; + + plane -= GL_CLIP_PLANE0; BEGIN_RING_CACHE(NvSub3D, NV30_TCL_PRIMITIVE_3D_CLIP_PLANE_A(plane), 4); OUT_RING_CACHEf(equation[0]); OUT_RING_CACHEf(equation[1]); @@ -208,8 +213,14 @@ static void nv30Enable(GLcontext *ctx, GLenum cap, GLboolean state) case GL_CLIP_PLANE3: case GL_CLIP_PLANE4: case GL_CLIP_PLANE5: - BEGIN_RING_CACHE(NvSub3D, NV30_TCL_PRIMITIVE_3D_CLIP_PLANE_ENABLE(cap-GL_CLIP_PLANE0), 1); - OUT_RING_CACHE(state); + if (NOUVEAU_CARD_USING_SHADERS) { + nouveauShader *nvs = (nouveauShader *)ctx->VertexProgram._Current; + if (nvs) + nvs->translated = GL_FALSE; + } else { + BEGIN_RING_CACHE(NvSub3D, NV30_TCL_PRIMITIVE_3D_CLIP_PLANE_ENABLE(cap-GL_CLIP_PLANE0), 1); + OUT_RING_CACHE(state); + } break; case GL_COLOR_LOGIC_OP: BEGIN_RING_CACHE(NvSub3D, NV30_TCL_PRIMITIVE_3D_COLOR_LOGIC_OP_ENABLE, 1); diff --git a/src/mesa/drivers/dri/nouveau/nv30_vertprog.c b/src/mesa/drivers/dri/nouveau/nv30_vertprog.c index afcacf36c2..d023e8439e 100644 --- a/src/mesa/drivers/dri/nouveau/nv30_vertprog.c +++ b/src/mesa/drivers/dri/nouveau/nv30_vertprog.c @@ -33,6 +33,9 @@ NV30VPUploadToHW(GLcontext *ctx, nouveauShader *nvs) BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_VP_IN_REG, 2); OUT_RING(nvs->card_priv.NV30VP.vp_in_reg); OUT_RING(nvs->card_priv.NV30VP.vp_out_reg); + + BEGIN_RING_CACHE(NvSub3D, NV30_TCL_PRIMITIVE_3D_SET_CLIPPING_PLANES, 1); + OUT_RING_CACHE (nvs->card_priv.NV30VP.clip_enables); } static void diff --git a/src/mesa/drivers/dri/nouveau/nv40_vertprog.c b/src/mesa/drivers/dri/nouveau/nv40_vertprog.c index 6cb7e1cfd6..d054140bcd 100644 --- a/src/mesa/drivers/dri/nouveau/nv40_vertprog.c +++ b/src/mesa/drivers/dri/nouveau/nv40_vertprog.c @@ -86,6 +86,7 @@ NV40VPTranslateResultReg(nvsFunc *shader, nvsFixedReg result, unsigned int *mask_ret) { unsigned int *out_reg = &shader->card_priv->NV30VP.vp_out_reg; + unsigned int *clip_en = &shader->card_priv->NV30VP.clip_enables; *mask_ret = 0xf; @@ -111,14 +112,17 @@ NV40VPTranslateResultReg(nvsFunc *shader, nvsFixedReg result, return NV40_VP_INST_DEST_FOGC; case NVS_FR_CLIP0: (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP0; + (*clip_en) |= 0x00000002; *mask_ret = 0x4; return NV40_VP_INST_DEST_FOGC; case NVS_FR_CLIP1: (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP1; + (*clip_en) |= 0x00000020; *mask_ret = 0x2; return NV40_VP_INST_DEST_FOGC; case NVS_FR_CLIP2: (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP2; + (*clip_en) |= 0x00000200; *mask_ret = 0x1; return NV40_VP_INST_DEST_FOGC; case NVS_FR_POINTSZ: @@ -127,13 +131,16 @@ NV40VPTranslateResultReg(nvsFunc *shader, nvsFixedReg result, return NV40_VP_INST_DEST_PSZ; case NVS_FR_CLIP3: (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP3; + (*clip_en) |= 0x00002000; *mask_ret = 0x4; return NV40_VP_INST_DEST_PSZ; case NVS_FR_CLIP4: + (*clip_en) |= 0x00020000; (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP4; *mask_ret = 0x2; return NV40_VP_INST_DEST_PSZ; case NVS_FR_CLIP5: + (*clip_en) |= 0x00200000; (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP5; *mask_ret = 0x1; return NV40_VP_INST_DEST_PSZ; -- cgit v1.2.3