From 4c2247538394a313e1e90bfcd07c1ab9c7d41281 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 12 Nov 2010 15:17:40 +0100 Subject: nvc0: import nvc0 gallium driver --- src/gallium/drivers/nvc0/nvc0_program.c | 595 ++++++++++++++++++++++++++++++++ 1 file changed, 595 insertions(+) create mode 100644 src/gallium/drivers/nvc0/nvc0_program.c (limited to 'src/gallium/drivers/nvc0/nvc0_program.c') diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c new file mode 100644 index 0000000000..8cc161ae8e --- /dev/null +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -0,0 +1,595 @@ +/* + * Copyright 2010 Christoph Bumiller + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_shader_tokens.h" +#include "pipe/p_defines.h" + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_dump.h" + +#include "nvc0_context.h" +#include "nvc0_pc.h" + +#define NOUVEAU_DEBUG_BITS 1 + +static unsigned +nvc0_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c) +{ + unsigned mask = inst->Dst[0].Register.WriteMask; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_COS: + case TGSI_OPCODE_SIN: + return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0); + case TGSI_OPCODE_DP3: + return 0x7; + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_DPH: + case TGSI_OPCODE_KIL: /* WriteMask ignored */ + return 0xf; + case TGSI_OPCODE_DST: + return mask & (c ? 0xa : 0x6); + case TGSI_OPCODE_EX2: + case TGSI_OPCODE_EXP: + case TGSI_OPCODE_LG2: + case TGSI_OPCODE_LOG: + case TGSI_OPCODE_POW: + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_SCS: + return 0x1; + case TGSI_OPCODE_IF: + return 0x1; + case TGSI_OPCODE_LIT: + return 0xb; + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXL: + case TGSI_OPCODE_TXP: + { + const struct tgsi_instruction_texture *tex; + + assert(inst->Instruction.Texture); + tex = &inst->Texture; + + mask = 0x7; + if (inst->Instruction.Opcode != TGSI_OPCODE_TEX && + inst->Instruction.Opcode != TGSI_OPCODE_TXD) + mask |= 0x8; /* bias, lod or proj */ + + switch (tex->Texture) { + case TGSI_TEXTURE_1D: + mask &= 0x9; + break; + case TGSI_TEXTURE_SHADOW1D: + mask &= 0x5; + break; + case TGSI_TEXTURE_2D: + mask &= 0xb; + break; + default: + break; + } + } + return mask; + case TGSI_OPCODE_XPD: + { + unsigned x = 0; + if (mask & 1) x |= 0x6; + if (mask & 2) x |= 0x5; + if (mask & 4) x |= 0x3; + return x; + } + default: + break; + } + + return mask; +} + +static void +nvc0_indirect_inputs(struct nvc0_translation_info *ti, int id) +{ + int i, c; + + for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i) + for (c = 0; c < 4; ++c) + ti->input_access[i][c] = id; + + ti->indirect_inputs = TRUE; +} + +static void +nvc0_indirect_outputs(struct nvc0_translation_info *ti, int id) +{ + int i, c; + + for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) + for (c = 0; c < 4; ++c) + ti->output_access[i][c] = id; + + ti->indirect_outputs = TRUE; +} + +static INLINE unsigned +nvc0_system_value_location(unsigned sn, unsigned si) +{ + switch (sn) { + /* + case TGSI_SEMANTIC_VERTEXID: + return 0x2fc; + */ + case TGSI_SEMANTIC_PRIMID: + return 0x60; + /* + case TGSI_SEMANTIC_LAYER_INDEX: + return 0x64; + case TGSI_SEMANTIC_VIEWPORT_INDEX: + return 0x68; + */ + case TGSI_SEMANTIC_INSTANCEID: + return 0x2f8; + default: + assert(0); + return 0x000; + } +} + +static INLINE unsigned +nvc0_varying_location(unsigned sn, unsigned si) +{ + switch (sn) { + case TGSI_SEMANTIC_POSITION: + return 0x70; + case TGSI_SEMANTIC_COLOR: + return 0x280 + (si * 16); /* are these hard-wired ? */ + case TGSI_SEMANTIC_BCOLOR: + return 0x2a0 + (si * 16); + case TGSI_SEMANTIC_FOG: + return 0x270; + case TGSI_SEMANTIC_PSIZE: + return 0x6c; + /* + case TGSI_SEMANTIC_PNTC: + return 0x2e0; + */ + case TGSI_SEMANTIC_GENERIC: + assert(si < 31); + return 0x80 + (si * 16); + case TGSI_SEMANTIC_NORMAL: + return 0x360; + case TGSI_SEMANTIC_PRIMID: + return 0x40; + /* + case TGSI_SEMANTIC_CLIP_DISTANCE: + return 0x2c0 + (si * 4); + */ + default: + assert(0); + return 0x000; + } +} + +static INLINE unsigned +nvc0_interp_mode(const struct tgsi_full_declaration *decl) +{ + unsigned mode; + + if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT) + mode = NVC0_INTERP_FLAT; + else + if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE) + mode = NVC0_INTERP_PERSPECTIVE; + else + mode = NVC0_INTERP_LINEAR; + + if (decl->Declaration.Centroid) + mode |= NVC0_INTERP_CENTROID; + + return mode; +} + +static void +prog_immediate(struct nvc0_translation_info *ti, + const struct tgsi_full_immediate *imm) +{ + int c; + unsigned n = ti->immd32_nr++; + + assert(ti->immd32_nr <= ti->scan.immediate_count); + + for (c = 0; c < 4; ++c) + ti->immd32[n * 4 + c] = imm->u[c].Uint; + + ti->immd32_ty[n] = imm->Immediate.DataType; +} + +static boolean +prog_decl(struct nvc0_translation_info *ti, + const struct tgsi_full_declaration *decl) +{ + unsigned i, c; + unsigned sn = TGSI_SEMANTIC_GENERIC; + unsigned si = 0; + const unsigned first = decl->Range.First; + const unsigned last = decl->Range.Last; + + if (decl->Declaration.Semantic) { + sn = decl->Semantic.Name; + si = decl->Semantic.Index; + } + + switch (decl->Declaration.File) { + case TGSI_FILE_INPUT: + for (i = first; i <= last; ++i) { + if (ti->prog->type == PIPE_SHADER_VERTEX) { + sn = TGSI_SEMANTIC_GENERIC; + si = i; + } + for (c = 0; c < 4; ++c) + ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; + + if (ti->prog->type == PIPE_SHADER_FRAGMENT) + ti->interp_mode[i] = nvc0_interp_mode(decl); + } + break; + case TGSI_FILE_OUTPUT: + for (i = first; i <= last; ++i, ++si) { + if (ti->prog->type == PIPE_SHADER_FRAGMENT) { + si = i; + if (i == ti->fp_depth_output) { + ti->output_loc[i][2] = (ti->scan.num_outputs - 1) * 4; + } else { + if (i > ti->fp_depth_output) + si -= 1; + for (c = 0; c < 4; ++c) + ti->output_loc[i][c] = si * 4 + c; + } + } else { + for (c = 0; c < 4; ++c) + ti->output_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; + } + } + break; + case TGSI_FILE_SYSTEM_VALUE: + ti->sysval_loc[i] = nvc0_system_value_location(sn, si); + assert(first == last); + break; + case TGSI_FILE_NULL: + case TGSI_FILE_CONSTANT: + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_SAMPLER: + case TGSI_FILE_ADDRESS: + case TGSI_FILE_IMMEDIATE: + case TGSI_FILE_PREDICATE: + break; + default: + NOUVEAU_ERR("unhandled TGSI_FILE %d\n", decl->Declaration.File); + return FALSE; + } + return TRUE; +} + +static void +prog_inst(struct nvc0_translation_info *ti, + const struct tgsi_full_instruction *inst, int id) +{ + const struct tgsi_dst_register *dst; + const struct tgsi_src_register *src; + int s, c, k; + unsigned mask; + + if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) { + ti->subr[ti->num_subrs].first_insn = id - 1; + ti->subr[ti->num_subrs].id = ti->num_subrs + 1; /* id 0 is main program */ + ++ti->num_subrs; + } + + if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) { + dst = &inst->Dst[0].Register; + + for (c = 0; c < 4; ++c) { + if (dst->Indirect) + nvc0_indirect_outputs(ti, id); + if (!(dst->WriteMask & (1 << c))) + continue; + ti->output_access[dst->Index][c] = id; + } + + if (inst->Instruction.Opcode == TGSI_OPCODE_MOV && + inst->Src[0].Register.File == TGSI_FILE_INPUT && + dst->Index == ti->edgeflag_out) + ti->prog->vp.edgeflag = inst->Src[0].Register.Index; + } else + if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) { + if (inst->Dst[0].Register.Indirect) + ti->require_stores = TRUE; + } + + for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) { + src = &inst->Src[s].Register; + if (src->File == TGSI_FILE_TEMPORARY) + if (inst->Src[s].Register.Indirect) + ti->require_stores = TRUE; + if (src->File != TGSI_FILE_INPUT) + continue; + mask = nvc0_tgsi_src_mask(inst, s); + + if (inst->Src[s].Register.Indirect) + nvc0_indirect_inputs(ti, id); + + for (c = 0; c < 4; ++c) { + if (!(mask & (1 << c))) + continue; + k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c); + if (k <= TGSI_SWIZZLE_W) + ti->input_access[src->Index][k] = id; + } + } +} + +/* Probably should introduce something like struct tgsi_function_declaration + * instead of trying to guess inputs/outputs. + */ +static void +prog_subroutine_inst(struct nvc0_subroutine *subr, + const struct tgsi_full_instruction *inst) +{ + const struct tgsi_dst_register *dst; + const struct tgsi_src_register *src; + int s, c, k; + unsigned mask; + + for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) { + src = &inst->Src[s].Register; + if (src->File != TGSI_FILE_TEMPORARY) + continue; + mask = nvc0_tgsi_src_mask(inst, s); + + for (c = 0; c < 4; ++c) { + k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c); + + if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W) + if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32)))) + subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32); + } + } + + if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) { + dst = &inst->Dst[0].Register; + + for (c = 0; c < 4; ++c) + if (dst->WriteMask & (1 << c)) + subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32); + } +} + +static int +nvc0_vp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti) +{ + int i, c; + unsigned a; + + vp->hdr[0] = 0x20461; + vp->hdr[4] = 0xff000; + + for (a = 0x80/4, i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) { + for (c = 0; c < 4; ++c, ++a) + if (ti->input_access[i][c]) + vp->hdr[5 + a / 32] |= 1 << (a % 32); /* VP_ATTR_EN */ + } + + for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) { + a = (ti->output_loc[i][0] - 0x40) / 4; + for (c = 0; c < 4; ++c, ++a) { + if (!ti->output_access[i][c]) + continue; + vp->hdr[13 + a / 32] |= 1 << (a % 32); /* VP_EXPORT_EN */ + } + } + + return 0; +} + +static int +nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti) +{ + int i, c; + unsigned a, m; + + fp->hdr[0] = 0x21462; + fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */ + + if (ti->scan.uses_kill) + fp->hdr[0] |= 0x8000; + if (ti->scan.writes_z) { + fp->hdr[19] |= 0x2; + if (ti->scan.num_outputs > 2) + fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */ + } else { + if (ti->scan.num_outputs > 1) + fp->hdr[0] |= 0x8000; /* FP_MULTIPLE_COLOR_OUTPUTS */ + } + + for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) { + m = ti->interp_mode[i]; + for (c = 0; c < 4; ++c) { + if (!ti->input_access[i][c]) + continue; + a = ti->input_loc[i][c] / 2; + if ((a & ~7) == 0x70/2) + fp->hdr[5] |= 1 << (28 + (a & 7) / 2); /* FRAG_COORD_UMASK */ + else + fp->hdr[4 + a / 32] |= m << (a % 32); + } + } + + for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) { + if (i != ti->fp_depth_output) + fp->hdr[18] |= 0xf << ti->output_loc[i][0]; + } + + return 0; +} + +static boolean +nvc0_prog_scan(struct nvc0_translation_info *ti) +{ + struct nvc0_program *prog = ti->prog; + struct tgsi_parse_context parse; + int ret; + unsigned i; + +#ifdef NOUVEAU_DEBUG_BITS + tgsi_dump(prog->pipe.tokens, 0); +#endif + + tgsi_scan_shader(prog->pipe.tokens, &ti->scan); + + if (ti->prog->type == PIPE_SHADER_FRAGMENT) { + ti->fp_depth_output = 255; + for (i = 0; i < ti->scan.num_outputs; ++i) + if (ti->scan.output_semantic_name[i] == TGSI_SEMANTIC_POSITION) + ti->fp_depth_output = i; + } + + ti->subr = + CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0])); + + ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16); + ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte)); + + ti->insns = MALLOC(ti->scan.num_instructions * sizeof(ti->insns[0])); + + tgsi_parse_init(&parse, prog->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + prog_immediate(ti, &parse.FullToken.FullImmediate); + break; + case TGSI_TOKEN_TYPE_DECLARATION: + prog_decl(ti, &parse.FullToken.FullDeclaration); + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + ti->insns[ti->num_insns] = parse.FullToken.FullInstruction; + prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->num_insns); + break; + default: + break; + } + } + + for (i = 0; i < ti->num_subrs; ++i) { + unsigned pc = ti->subr[i].id; + while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB) + prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]); + } + + switch (prog->type) { + case PIPE_SHADER_VERTEX: + ti->input_file = NV_FILE_MEM_A; + ti->output_file = NV_FILE_MEM_V; + ret = nvc0_vp_gen_header(prog, ti); + break; + /* + case PIPE_SHADER_TESSELLATION_CONTROL: + ret = nvc0_tcp_gen_header(ti); + break; + case PIPE_SHADER_TESSELLATION_EVALUATION: + ret = nvc0_tep_gen_header(ti); + break; + case PIPE_SHADER_GEOMETRY: + ret = nvc0_gp_gen_header(ti); + break; + */ + case PIPE_SHADER_FRAGMENT: + ti->input_file = NV_FILE_MEM_V; + ti->output_file = NV_FILE_GPR; + ret = nvc0_fp_gen_header(prog, ti); + break; + default: + assert(!"unsupported program type"); + ret = -1; + break; + } + + assert(!ret); + return ret; +} + +boolean +nvc0_program_translate(struct nvc0_program *prog) +{ + struct nvc0_translation_info *ti; + int ret; + + ti = CALLOC_STRUCT(nvc0_translation_info); + ti->prog = prog; + + ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS; + + ret = nvc0_prog_scan(ti); + if (ret) { + NOUVEAU_ERR("unsupported shader program\n"); + goto out; + } + + ret = nvc0_generate_code(ti); + if (ret) + NOUVEAU_ERR("shader translation failed\n"); + + { + unsigned i; + for (i = 0; i < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++i) + debug_printf("HDR[%02lx] = 0x%08x\n", + i * sizeof(prog->hdr[0]), prog->hdr[i]); + } + +out: + if (ti->immd32) + FREE(ti->immd32); + if (ti->immd32_ty) + FREE(ti->immd32_ty); + if (ti->insns) + FREE(ti->insns); + if (ti->subr) + FREE(ti->subr); + FREE(ti); + return ret ? FALSE : TRUE; +} + +void +nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog) +{ + if (prog->res) + nouveau_resource_free(&prog->res); + + if (prog->code) + FREE(prog->code); + if (prog->relocs) + FREE(prog->relocs); + + prog->translated = FALSE; +} -- cgit v1.2.3 From 0d1a2bd0fb356fdb74a9aed1c34276dc9e97b4c6 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 9 Dec 2010 14:44:21 +0100 Subject: nvc0: generate shader header for geometry programs --- src/gallium/drivers/nvc0/nvc0_program.c | 63 +++++++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers/nvc0/nvc0_program.c') diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index 8cc161ae8e..aeb9213037 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -30,8 +30,6 @@ #include "nvc0_context.h" #include "nvc0_pc.h" -#define NOUVEAU_DEBUG_BITS 1 - static unsigned nvc0_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c) { @@ -385,13 +383,10 @@ prog_subroutine_inst(struct nvc0_subroutine *subr, } static int -nvc0_vp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti) +nvc0_vp_gp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti) { int i, c; unsigned a; - - vp->hdr[0] = 0x20461; - vp->hdr[4] = 0xff000; for (a = 0x80/4, i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) { for (c = 0; c < 4; ++c, ++a) @@ -411,6 +406,60 @@ nvc0_vp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti) return 0; } +static int +nvc0_vp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti) +{ + vp->hdr[0] = 0x20461; + vp->hdr[4] = 0xff000; + + return nvc0_vp_gp_gen_header(vp, ti); +} + +static int +nvc0_gp_gen_header(struct nvc0_program *gp, struct nvc0_translation_info *ti) +{ + unsigned max_output_verts, output_prim; + unsigned i; + + gp->hdr[0] = 0x00021061; + gp->hdr[2] = 0x01000000; + + for (i = 0; i < ti->scan.num_properties; ++i) { + switch (ti->scan.properties[i].name) { + case TGSI_PROPERTY_GS_OUTPUT_PRIM: + output_prim = ti->scan.properties[i].data[0]; + break; + case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: + max_output_verts = ti->scan.properties[i].data[0]; + break; + default: + break; + } + } + + switch (output_prim) { + case PIPE_PRIM_POINTS: + gp->hdr[3] = 0x01000000; + gp->hdr[0] |= 0xf0000000; + break; + case PIPE_PRIM_LINE_STRIP: + gp->hdr[3] = 0x06000000; + gp->hdr[0] |= 0x10000000; + break; + case PIPE_PRIM_TRIANGLE_STRIP: + gp->hdr[3] = 0x07000000; + gp->hdr[0] |= 0x10000000; + break; + default: + assert(0); + break; + } + + gp->hdr[4] = max_output_verts & 0x1ff; + + return nvc0_vp_gp_gen_header(gp, ti); +} + static int nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti) { @@ -460,7 +509,7 @@ nvc0_prog_scan(struct nvc0_translation_info *ti) int ret; unsigned i; -#ifdef NOUVEAU_DEBUG_BITS +#ifdef NOUVEAU_DEBUG tgsi_dump(prog->pipe.tokens, 0); #endif -- cgit v1.2.3 From dea9d604005e9aaed2bd6807f1913ac668479443 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 10 Dec 2010 20:16:10 +0100 Subject: nvc0: fix FACE state and and handle FACE sysval/varying offset --- src/gallium/drivers/nvc0/nvc0_program.c | 6 ++++++ src/gallium/drivers/nvc0/nvc0_state.c | 34 +++++++++++++----------------- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 2 ++ 3 files changed, 23 insertions(+), 19 deletions(-) (limited to 'src/gallium/drivers/nvc0/nvc0_program.c') diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index aeb9213037..7aa6ef02bc 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -23,6 +23,8 @@ #include "pipe/p_shader_tokens.h" #include "pipe/p_defines.h" +#define NOUVEAU_DEBUG + #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_dump.h" @@ -147,6 +149,8 @@ nvc0_system_value_location(unsigned sn, unsigned si) */ case TGSI_SEMANTIC_INSTANCEID: return 0x2f8; + case TGSI_SEMANTIC_FACE: + return 0x3fc; default: assert(0); return 0x000; @@ -178,6 +182,8 @@ nvc0_varying_location(unsigned sn, unsigned si) return 0x360; case TGSI_SEMANTIC_PRIMID: return 0x40; + case TGSI_SEMANTIC_FACE: + return 0x3fc; /* case TGSI_SEMANTIC_CLIP_DISTANCE: return 0x2c0 + (si * 4); diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c index afba7ce6a0..c7a8c4b747 100644 --- a/src/gallium/drivers/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -210,25 +210,21 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe, SB_DATA (so, nvgl_polygon_mode(cso->fill_back)); SB_OUT_3D (so, POLYGON_SMOOTH_ENABLE, cso->poly_smooth); - if (cso->cull_face != PIPE_FACE_NONE) { - SB_BEGIN_3D(so, CULL_FACE_ENABLE, 3); - SB_DATA (so, 1); - SB_DATA (so, cso->front_ccw ? NVC0_3D_FRONT_FACE_CCW : - NVC0_3D_FRONT_FACE_CW); - switch (cso->cull_face) { - case PIPE_FACE_FRONT_AND_BACK: - SB_DATA(so, NVC0_3D_CULL_FACE_FRONT_AND_BACK); - break; - case PIPE_FACE_FRONT: - SB_DATA(so, NVC0_3D_CULL_FACE_FRONT); - break; - case PIPE_FACE_BACK: - default: - SB_DATA(so, NVC0_3D_CULL_FACE_BACK); - break; - } - } else { - SB_OUT_3D(so, CULL_FACE_ENABLE, 0); + SB_BEGIN_3D(so, CULL_FACE_ENABLE, 3); + SB_DATA (so, cso->cull_face != PIPE_FACE_NONE); + SB_DATA (so, cso->front_ccw ? NVC0_3D_FRONT_FACE_CCW : + NVC0_3D_FRONT_FACE_CW); + switch (cso->cull_face) { + case PIPE_FACE_FRONT_AND_BACK: + SB_DATA(so, NVC0_3D_CULL_FACE_FRONT_AND_BACK); + break; + case PIPE_FACE_FRONT: + SB_DATA(so, NVC0_3D_CULL_FACE_FRONT); + break; + case PIPE_FACE_BACK: + default: + SB_DATA(so, NVC0_3D_CULL_FACE_BACK); + break; } SB_OUT_3D (so, POLYGON_STIPPLE_ENABLE, cso->poly_stipple_enable); diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 26f9e735fb..8a20983a7e 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -963,8 +963,10 @@ bld_interp(struct bld_context *bld, unsigned mode, struct nv_value *val) if (val->reg.address == 0x3fc) { /* gl_FrontFacing: 0/~0 to -1.0/+1.0 */ val = bld_insn_1(bld, NV_OP_LINTERP, val); + val->insn->flat = 1; val = bld_insn_2(bld, NV_OP_SHL, val, bld_imm_u32(bld, 31)); val = bld_insn_2(bld, NV_OP_XOR, val, bld_imm_f32(bld, -1.0f)); + return val; } else if (mode == NVC0_INTERP_PERSPECTIVE) { val = bld_insn_2(bld, NV_OP_PINTERP, val, bld->frag_coord[3]); -- cgit v1.2.3 From 5138ac033ad3708e2b82f2beebc887f65a77309e Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sat, 11 Dec 2010 16:23:43 +0100 Subject: nvc0: support user clip planes --- src/gallium/drivers/nvc0/nvc0_program.c | 7 ++++ src/gallium/drivers/nvc0/nvc0_program.h | 2 ++ src/gallium/drivers/nvc0/nvc0_screen.c | 12 ++++++- src/gallium/drivers/nvc0/nvc0_shader_state.c | 8 +++++ src/gallium/drivers/nvc0/nvc0_state.c | 5 +++ src/gallium/drivers/nvc0/nvc0_state_validate.c | 17 +++++++++ src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 48 ++++++++++++++++++++++++-- 7 files changed, 96 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers/nvc0/nvc0_program.c') diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index 7aa6ef02bc..e159b7161d 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -418,6 +418,8 @@ nvc0_vp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti) vp->hdr[0] = 0x20461; vp->hdr[4] = 0xff000; + vp->hdr[18] = (1 << vp->vp.num_ucps) - 1; + return nvc0_vp_gp_gen_header(vp, ti); } @@ -605,6 +607,9 @@ nvc0_program_translate(struct nvc0_program *prog) ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS; + if (prog->type == PIPE_SHADER_VERTEX && prog->vp.num_ucps) + ti->append_ucp = TRUE; + ret = nvc0_prog_scan(ti); if (ret) { NOUVEAU_ERR("unsupported shader program\n"); @@ -646,5 +651,7 @@ nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog) if (prog->relocs) FREE(prog->relocs); + memset(prog->hdr, 0, sizeof(prog->hdr)); + prog->translated = FALSE; } diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h index 42d9be3830..1271303144 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.h +++ b/src/gallium/drivers/nvc0/nvc0_program.h @@ -27,6 +27,7 @@ struct nvc0_program { struct { uint8_t edgeflag; + uint8_t num_ucps; } vp; void *relocs; @@ -71,6 +72,7 @@ struct nvc0_translation_info { ubyte edgeflag_out; struct nvc0_subroutine *subr; unsigned num_subrs; + boolean append_ucp; struct tgsi_shader_info scan; }; diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index fb100e5233..f0d06be9ce 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -457,11 +457,21 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nouveau_resource_init(&screen->text_heap, 0, 1 << 20); - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 12, 5 << 16, + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 12, 6 << 16, &screen->uniforms); if (ret) goto fail; + /* auxiliary constants (6 user clip planes, base instance id) */ + BEGIN_RING(chan, RING_3D(CB_SIZE), 3); + OUT_RING (chan, 256); + OUT_RELOCh(chan, screen->uniforms, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, screen->uniforms, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + for (i = 0; i < 5; ++i) { + BEGIN_RING(chan, RING_3D(CB_BIND(i)), 1); + OUT_RING (chan, (15 << 4) | 1); + } + screen->tls_size = 4 * 4 * 32 * 128 * 4; ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, screen->tls_size, &screen->tls); diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c index a16fa6cf5b..a6595c5610 100644 --- a/src/gallium/drivers/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c @@ -67,6 +67,14 @@ nvc0_vertprog_validate(struct nvc0_context *nvc0) struct nouveau_channel *chan = nvc0->screen->base.channel; struct nvc0_program *vp = nvc0->vertprog; + if (nvc0->clip.nr > vp->vp.num_ucps) { + assert(nvc0->clip.nr <= 6); + vp->vp.num_ucps = 6; + + if (vp->translated) + nvc0_program_destroy(nvc0, vp); + } + if (!nvc0_program_validate(nvc0, vp)) return; diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c index c7a8c4b747..8d293233b1 100644 --- a/src/gallium/drivers/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -699,8 +699,13 @@ nvc0_set_clip_state(struct pipe_context *pipe, const struct pipe_clip_state *clip) { struct nvc0_context *nvc0 = nvc0_context(pipe); + const unsigned size = clip->nr * sizeof(clip->ucp[0]); + + memcpy(&nvc0->clip.ucp[0][0], &clip->ucp[0][0], size); + nvc0->clip.nr = clip->nr; nvc0->clip.depth_clamp = clip->depth_clamp; + nvc0->dirty |= NVC0_NEW_CLIP; } diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c index 2b38ebc8ba..ded461b9c5 100644 --- a/src/gallium/drivers/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -193,6 +193,23 @@ nvc0_validate_clip(struct nvc0_context *nvc0) BEGIN_RING(chan, RING_3D(VIEW_VOLUME_CLIP_CTRL), 1); OUT_RING (chan, clip); + + if (nvc0->clip.nr) { + struct nouveau_bo *bo = nvc0->screen->uniforms; + + BEGIN_RING(chan, RING_3D(CB_SIZE), 3); + OUT_RING (chan, 256); + OUT_RELOCh(chan, bo, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, bo, 5 << 16, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + BEGIN_RING_1I(chan, RING_3D(CB_POS), nvc0->clip.nr * 4 + 1); + OUT_RING (chan, 0); + OUT_RINGp (chan, &nvc0->clip.ucp[0][0], nvc0->clip.nr * 4); + + BEGIN_RING(chan, RING_3D(VP_CLIP_DISTANCE_ENABLE), 1); + OUT_RING (chan, (1 << nvc0->clip.nr) - 1); + } else { + INLIN_RING(chan, RING_3D(VP_CLIP_DISTANCE_ENABLE), 0); + } } static void diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 8a20983a7e..d0c8275489 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -111,6 +111,7 @@ struct bld_context { struct bld_register ovs[BLD_MAX_OUTPS][4]; /* TGSI_FILE_OUTPUT, FP only */ uint32_t outputs_written[(PIPE_MAX_SHADER_OUTPUTS + 7) / 8]; + int hpos_index; struct nv_value *zero; struct nv_value *frag_coord[4]; @@ -903,6 +904,38 @@ bld_is_output_written(struct bld_context *bld, int i, int c) return bld->outputs_written[i / 8] & (1 << ((i * 4 + c) % 32)); } +static void +bld_append_vp_ucp(struct bld_context *bld) +{ + struct nv_value *res[6]; + struct nv_value *ucp, *vtx, *out; + struct nv_instruction *insn; + int i, c; + + assert(bld->ti->prog->vp.num_ucps <= 6); + + for (c = 0; c < 4; ++c) { + vtx = bld_fetch_global(bld, &bld->ovs[bld->hpos_index][c]); + + for (i = 0; i < bld->ti->prog->vp.num_ucps; ++i) { + ucp = new_value(bld->pc, NV_FILE_MEM_C(15), 4); + ucp->reg.address = i * 16 + c * 4; + + if (c == 0) + res[i] = bld_insn_2(bld, NV_OP_MUL_F32, vtx, ucp); + else + res[i] = bld_insn_3(bld, NV_OP_MAD_F32, vtx, ucp, res[i]); + } + } + + for (i = 0; i < bld->ti->prog->vp.num_ucps; ++i) { + (out = new_value(bld->pc, NV_FILE_MEM_V, 4))->reg.address = 0x2c0 + i * 4; + (insn = new_instruction(bld->pc, NV_OP_EXPORT))->fixed = 1; + nv_reference(bld->pc, insn, 0, out); + nv_reference(bld->pc, insn, 1, res[i]); + } +} + static void bld_export_fp_outputs(struct bld_context *bld) { @@ -1755,11 +1788,13 @@ bld_instruction(struct bld_context *bld, /* VP outputs are exported in-place as scalars, optimization later */ if (bld->pc->is_fragprog) bld_export_fp_outputs(bld); - break; + if (bld->ti->append_ucp) + bld_append_vp_ucp(bld); + return; default: NOUVEAU_ERR("unhandled opcode %u\n", insn->Instruction.Opcode); abort(); - break; + return; } if (insn->Dst[0].Register.File == TGSI_FILE_OUTPUT && @@ -1767,6 +1802,15 @@ bld_instruction(struct bld_context *bld, struct nv_instruction *mi = NULL; uint size; + if (bld->ti->append_ucp) { + if (bld->ti->output_loc[insn->Dst[0].Register.Index][0] == 0x70) { + bld->hpos_index = insn->Dst[0].Register.Index; + for (c = 0; c < 4; ++c) + if (mask & (1 << c)) + STORE_OUTP(insn->Dst[0].Register.Index, c, dst0[c]); + } + } + for (c = 0; c < 4; ++c) if ((mask & (1 << c)) && ((dst0[c]->reg.file == NV_FILE_IMM) || -- cgit v1.2.3 From 92caa65c24d1af99e02715dae1f1081278bf9e64 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Sun, 2 Jan 2011 13:25:06 +0100 Subject: nvc0: demagic GP invocation count bitfield --- src/gallium/drivers/nvc0/nvc0_program.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/nvc0/nvc0_program.c') diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index e159b7161d..b5e02f1ba1 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -134,6 +134,7 @@ nvc0_indirect_outputs(struct nvc0_translation_info *ti, int id) static INLINE unsigned nvc0_system_value_location(unsigned sn, unsigned si) { + /* NOTE: locations 0xfxx indicate special regs */ switch (sn) { /* case TGSI_SEMANTIC_VERTEXID: @@ -151,6 +152,10 @@ nvc0_system_value_location(unsigned sn, unsigned si) return 0x2f8; case TGSI_SEMANTIC_FACE: return 0x3fc; + /* + case TGSI_SEMANTIC_INVOCATIONID: + return 0xf11; + */ default: assert(0); return 0x000; @@ -426,11 +431,11 @@ nvc0_vp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti) static int nvc0_gp_gen_header(struct nvc0_program *gp, struct nvc0_translation_info *ti) { + unsigned invocations = 1; unsigned max_output_verts, output_prim; unsigned i; - gp->hdr[0] = 0x00021061; - gp->hdr[2] = 0x01000000; + gp->hdr[0] = 0x21061; for (i = 0; i < ti->scan.num_properties; ++i) { switch (ti->scan.properties[i].name) { @@ -439,12 +444,21 @@ nvc0_gp_gen_header(struct nvc0_program *gp, struct nvc0_translation_info *ti) break; case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: max_output_verts = ti->scan.properties[i].data[0]; + assert(max_output_verts < 512); break; + /* + case TGSI_PROPERTY_GS_INVOCATIONS: + invocations = ti->scan.properties[i].data[0]; + assert(invocations <= 32); + break; + */ default: break; } } + gp->hdr[2] = MIN2(invocations, 32) << 24; + switch (output_prim) { case PIPE_PRIM_POINTS: gp->hdr[3] = 0x01000000; -- cgit v1.2.3 From 2f08d872b212fc294d354ec34eb9184ae2c2bcc3 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Tue, 4 Jan 2011 01:52:12 +0100 Subject: nvc0: set the correct FP header bit for multiple colour outputs --- src/gallium/drivers/nvc0/nvc0_program.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers/nvc0/nvc0_program.c') diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index b5e02f1ba1..3e7fc4d350 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -499,7 +499,7 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti) fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */ } else { if (ti->scan.num_outputs > 1) - fp->hdr[0] |= 0x8000; /* FP_MULTIPLE_COLOR_OUTPUTS */ + fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */ } for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) { -- cgit v1.2.3