From d29f55546dec74ca77dce3a3bf581c251be1d397 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 24 Dec 2009 12:39:42 +0100 Subject: nv50: make edgeflags work It doesn't seem to be possible to set the egdeflag in the vertex shader, so we need to fallback to pushing vertices through the FIFO and use method 0x15e4 if they are used. This only works if VP does MOV OUT[X] IN[Y] where X is the edgeflag output, and Y is saved so we can tell the correct input later. The VP still writes the useless values to wasted outputs as punishment. --- src/gallium/drivers/nv50/nv50_program.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 679c28ce4b..ce3fa5fc88 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -159,6 +159,8 @@ struct nv50_pc { unsigned insn_nr; boolean allow32; + + uint8_t edgeflag_out; }; static INLINE struct nv50_reg * @@ -2554,10 +2556,16 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) mask = dst->WriteMask; if (dst->File == TGSI_FILE_TEMPORARY) - reg = pc->temp; + reg = pc->temp; else - if (dst->File == TGSI_FILE_OUTPUT) - reg = pc->result; + if (dst->File == TGSI_FILE_OUTPUT) { + reg = pc->result; + + if (insn->Instruction.Opcode == TGSI_OPCODE_MOV && + dst->Index == pc->edgeflag_out && + insn->Src[0].Register.File == TGSI_FILE_INPUT) + pc->p->cfg.edgeflag_in = insn->Src[0].Register.Index; + } if (reg) { for (c = 0; c < 4; c++) { @@ -2856,6 +2864,9 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (p->cfg.io_nr > first) p->cfg.io_nr = first; break; + case TGSI_SEMANTIC_EDGEFLAG: + pc->edgeflag_out = first; + break; /* case TGSI_SEMANTIC_CLIP_DISTANCE: p->cfg.clpd = MIN2(p->cfg.clpd, first); @@ -3104,6 +3115,8 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) p->cfg.two_side[0].hw = 0x40; p->cfg.two_side[1].hw = 0x40; + p->cfg.edgeflag_in = pc->edgeflag_out = 0xff; + switch (p->type) { case PIPE_SHADER_VERTEX: p->cfg.psiz = 0x40; -- cgit v1.2.3 From b7b2226a75f1955da9bd4a28754b7eaebb01fed5 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 24 Dec 2009 13:35:55 +0100 Subject: nv50: support TGSI_OPCODE_CONT --- src/gallium/drivers/nv50/nv50_program.c | 5 +++++ src/gallium/drivers/nv50/nv50_screen.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index ce3fa5fc88..a101ac095c 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -2241,6 +2241,11 @@ nv50_program_tx_insn(struct nv50_pc *pc, set_pred(pc, 0x6, 1, pc->p->exec_tail); /* @NSF */ } break; + case TGSI_OPCODE_CONT: + assert(pc->loop_lvl > 0); + emit_branch(pc, -1, 0)->param.index = + pc->loop_pos[pc->loop_lvl - 1]; + break; case TGSI_OPCODE_COS: if (mask & 8) { emit_precossin(pc, temp, src[0][3]); diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 2435f65ed2..5a1efd3998 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -128,7 +128,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param) case PIPE_CAP_TEXTURE_MIRROR_REPEAT: return 1; case PIPE_CAP_TGSI_CONT_SUPPORTED: - return 0; + return 1; case PIPE_CAP_BLEND_EQUATION_SEPARATE: return 1; case NOUVEAU_CAP_HW_VTXBUF: -- cgit v1.2.3 From c84cc09d41a83caa96eca84c73284024d8d63024 Mon Sep 17 00:00:00 2001 From: Marcin Koƛcielnicki Date: Mon, 28 Dec 2009 16:23:40 +0000 Subject: nv50: Dehexify and bring up to date with new method defines. Signed-off-by: Francisco Jerez --- src/gallium/drivers/nv50/nv50_program.c | 4 +-- src/gallium/drivers/nv50/nv50_query.c | 4 +-- src/gallium/drivers/nv50/nv50_screen.c | 44 ++++++++++++-------------- src/gallium/drivers/nv50/nv50_state.c | 13 ++++---- src/gallium/drivers/nv50/nv50_state_validate.c | 20 ++++++------ src/gallium/drivers/nv50/nv50_surface.c | 6 ++-- src/gallium/drivers/nv50/nv50_transfer.c | 12 +++---- src/gallium/drivers/nv50/nv50_vbo.c | 38 +++++++++++----------- 8 files changed, 69 insertions(+), 72 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index a101ac095c..b9910b430a 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -3498,7 +3498,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_data (so, p->cfg.high_temp); so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1); so_data (so, p->cfg.high_result); - so_method(so, tesla, NV50TCL_FP_CTRL_UNK19A8, 1); + so_method(so, tesla, NV50TCL_FP_CONTROL, 1); so_data (so, p->cfg.regs[2]); so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1); so_data (so, p->cfg.regs[3]); @@ -3670,7 +3670,7 @@ nv50_linkage_validate(struct nv50_context *nv50) so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1); so_data (so, reg[4]); - so_method(so, tesla, 0x1540, 4); + so_method(so, tesla, NV50TCL_NOPERSPECTIVE_BITMAP(0), 4); so_datap (so, lin, 4); if (nv50->rasterizer->pipe.point_sprite) { diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c index 268c9823f7..5d9e18218a 100644 --- a/src/gallium/drivers/nv50/nv50_query.c +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -77,9 +77,9 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_query *q = nv50_query(pq); - BEGIN_RING(chan, tesla, 0x1530, 1); + BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_RESET, 1); OUT_RING (chan, 1); - BEGIN_RING(chan, tesla, 0x1514, 1); + BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_ENABLE, 1); OUT_RING (chan, 1); q->ready = FALSE; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 5a1efd3998..15e4b6e5ca 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -231,8 +231,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) break; case 0x80: case 0x90: - /* this stupid name should be corrected. */ - tesla_class = NV54TCL; + tesla_class = NV84TCL; break; case 0xa0: switch (chipset) { @@ -242,7 +241,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) tesla_class = NVA0TCL; break; default: - tesla_class = 0x8597; + tesla_class = NVA8TCL; break; } break; @@ -287,7 +286,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_data (so, chan->vram->handle); so_method(so, screen->eng2d, NV50_2D_OPERATION, 1); so_data (so, NV50_2D_OPERATION_SRCCOPY); - so_method(so, screen->eng2d, 0x0290, 1); + so_method(so, screen->eng2d, NV50_2D_CLIP_ENABLE, 1); so_data (so, 0); so_method(so, screen->eng2d, 0x0888, 1); so_data (so, 1); @@ -297,34 +296,33 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) /* Static tesla init */ so = so_new(256, 20); - so_method(so, screen->tesla, 0x1558, 1); - so_data (so, 1); + so_method(so, screen->tesla, NV50TCL_COND_MODE, 1); + so_data (so, NV50TCL_COND_MODE_ALWAYS); so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1); so_data (so, screen->sync->handle); - so_method(so, screen->tesla, NV50TCL_DMA_UNK0(0), - NV50TCL_DMA_UNK0__SIZE); - for (i = 0; i < NV50TCL_DMA_UNK0__SIZE; i++) + so_method(so, screen->tesla, NV50TCL_DMA_ZETA, 11); + for (i = 0; i < 11; i++) so_data(so, chan->vram->handle); - so_method(so, screen->tesla, NV50TCL_DMA_UNK1(0), - NV50TCL_DMA_UNK1__SIZE); - for (i = 0; i < NV50TCL_DMA_UNK1__SIZE; i++) + so_method(so, screen->tesla, NV50TCL_DMA_COLOR(0), + NV50TCL_DMA_COLOR__SIZE); + for (i = 0; i < NV50TCL_DMA_COLOR__SIZE; i++) so_data(so, chan->vram->handle); - so_method(so, screen->tesla, 0x121c, 1); + so_method(so, screen->tesla, NV50TCL_RT_CONTROL, 1); so_data (so, 1); /* activate all 32 lanes (threads) in a warp */ - so_method(so, screen->tesla, 0x19a0, 1); + so_method(so, screen->tesla, NV50TCL_WARP_HALVES, 1); so_data (so, 0x2); so_method(so, screen->tesla, 0x1400, 1); so_data (so, 0xf); /* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */ - so_method(so, screen->tesla, 0x13b4, 1); + so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(0), 1); so_data (so, 0x54); - so_method(so, screen->tesla, 0x13bc, 1); + so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(2), 1); so_data (so, 0x54); /* origin is top left (set to 1 for bottom left) */ - so_method(so, screen->tesla, 0x13ac, 1); + so_method(so, screen->tesla, NV50TCL_Y_ORIGIN_BOTTOM, 1); so_data (so, 0); so_method(so, screen->tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1); so_data (so, 8); @@ -360,7 +358,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) // B = buffer ID (maybe more than 1 byte) // N = CB index used in shader instruction // P = program type (0 = VP, 2 = GP, 3 = FP) - so_method(so, screen->tesla, 0x1694, 1); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x000BBNP1); */ @@ -424,24 +422,24 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) /* Vertex array limits - max them out */ for (i = 0; i < 16; i++) { - so_method(so, screen->tesla, NV50TCL_UNK1080_OFFSET_HIGH(i), 2); + so_method(so, screen->tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2); so_data (so, 0x000000ff); so_data (so, 0xffffffff); } - so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR, 2); + so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR(0), 2); so_data (so, fui(0.0)); so_data (so, fui(1.0)); /* no dynamic combination of TIC & TSC entries => only BIND_TIC used */ - so_method(so, screen->tesla, 0x1234, 1); + so_method(so, screen->tesla, NV50TCL_LINKED_TSC, 1); so_data (so, 1); /* activate first scissor rectangle */ - so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE, 1); + so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE(0), 1); so_data (so, 1); - so_method(so, screen->tesla, 0x15e4, 1); + so_method(so, screen->tesla, NV50TCL_EDGEFLAG_ENABLE, 1); so_data (so, 1); /* default edgeflag to TRUE */ so_emit(chan, so); diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 88aef52d08..18a2b819d8 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -295,7 +295,7 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, so_method(so, tesla, NV50TCL_SHADE_MODEL, 1); so_data (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT : NV50TCL_SHADE_MODEL_SMOOTH); - so_method(so, tesla, 0x1684, 1); + so_method(so, tesla, NV50TCL_PROVOKING_VERTEX_LAST, 1); so_data (so, cso->flatshade_first ? 0 : 1); so_method(so, tesla, NV50TCL_VERTEX_TWO_SIDE_ENABLE, 1); @@ -439,9 +439,8 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, 0); } - /* XXX: keep hex values until header is updated (names reversed) */ if (cso->stencil[0].enabled) { - so_method(so, tesla, 0x1380, 8); + so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 8); so_data (so, 1); so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); @@ -451,23 +450,23 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, cso->stencil[0].writemask); so_data (so, cso->stencil[0].valuemask); } else { - so_method(so, tesla, 0x1380, 1); + so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 1); so_data (so, 0); } if (cso->stencil[1].enabled) { - so_method(so, tesla, 0x1594, 5); + so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 5); so_data (so, 1); so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); so_data (so, nvgl_comparison_op(cso->stencil[1].func)); - so_method(so, tesla, 0x0f54, 3); + so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 3); so_data (so, cso->stencil[1].ref_value); so_data (so, cso->stencil[1].writemask); so_data (so, cso->stencil[1].valuemask); } else { - so_method(so, tesla, 0x1594, 1); + so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 1); so_data (so, 0); } diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 871e8097b6..c8bdf9dc27 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -41,7 +41,7 @@ nv50_state_validate_fb(struct nv50_context *nv50) * FP result 0 always goes to RT[0], bits 4 - 6 are ignored. * Ambiguous assignment results in no rendering (no DATA_ERROR). */ - so_method(so, tesla, 0x121c, 1); + so_method(so, tesla, NV50TCL_RT_CONTROL, 1); so_data (so, fb->nr_cbufs | (0 << 4) | (1 << 7) | (2 << 10) | (3 << 13) | (4 << 16) | (5 << 19) | (6 << 22) | (7 << 25)); @@ -87,7 +87,7 @@ nv50_state_validate_fb(struct nv50_context *nv50) level[fb->cbufs[i]->level].tile_mode << 4); so_data(so, 0x00000000); - so_method(so, tesla, 0x1224, 1); + so_method(so, tesla, NV50TCL_RT_ARRAY_MODE, 1); so_data (so, 1); } @@ -124,22 +124,22 @@ nv50_state_validate_fb(struct nv50_context *nv50) level[fb->zsbuf->level].tile_mode << 4); so_data(so, 0x00000000); - so_method(so, tesla, 0x1538, 1); + so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1); so_data (so, 1); so_method(so, tesla, NV50TCL_ZETA_HORIZ, 3); so_data (so, fb->zsbuf->width); so_data (so, fb->zsbuf->height); so_data (so, 0x00010001); } else { - so_method(so, tesla, 0x1538, 1); + so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1); so_data (so, 0); } - so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2); + so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ(0), 2); so_data (so, w << 16); so_data (so, h << 16); /* set window lower left corner */ - so_method(so, tesla, NV50TCL_WINDOW_LEFT, 2); + so_method(so, tesla, NV50TCL_WINDOW_OFFSET_X, 2); so_data (so, 0); so_data (so, 0); /* set screen scissor rectangle */ @@ -325,7 +325,7 @@ nv50_state_validate(struct nv50_context *nv50) nv50->state.scissor_enabled = rast->scissor; so = so_new(3, 0); - so_method(so, tesla, NV50TCL_SCISSOR_HORIZ, 2); + so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2); if (nv50->state.scissor_enabled) { so_data(so, (s->maxx << 16) | s->minx); so_data(so, (s->maxy << 16) | s->miny); @@ -355,11 +355,11 @@ scissor_uptodate: so = so_new(14, 0); if (!bypass) { - so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE(0), 3); + so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3); so_data (so, fui(nv50->viewport.translate[0])); so_data (so, fui(nv50->viewport.translate[1])); so_data (so, fui(nv50->viewport.translate[2])); - so_method(so, tesla, NV50TCL_VIEWPORT_SCALE(0), 3); + so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3); so_data (so, fui(nv50->viewport.scale[0])); so_data (so, fui(nv50->viewport.scale[1])); so_data (so, fui(nv50->viewport.scale[2])); @@ -440,7 +440,7 @@ void nv50_so_init_sifc(struct nv50_context *nv50, so_data (so, 1); so_reloc (so, bo, offset, reloc | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, bo, offset, reloc | NOUVEAU_BO_LOW, 0, 0); - so_method(so, eng2d, NV50_2D_SIFC_UNK0800, 2); + so_method(so, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2); so_data (so, 0); so_data (so, NV50_2D_SIFC_FORMAT_R8_UNORM); so_method(so, eng2d, NV50_2D_SIFC_WIDTH, 10); diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 79655fc08d..6378132979 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -176,11 +176,11 @@ nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, if (ret) return; - BEGIN_RING(chan, eng2d, 0x0580, 3); - OUT_RING (chan, 4); + BEGIN_RING(chan, eng2d, NV50_2D_DRAW_SHAPE, 3); + OUT_RING (chan, NV50_2D_DRAW_SHAPE_RECTANGLES); OUT_RING (chan, format); OUT_RING (chan, value); - BEGIN_RING(chan, eng2d, NV50_2D_RECT_X1, 4); + BEGIN_RING(chan, eng2d, NV50_2D_DRAW_POINT32_X(0), 4); OUT_RING (chan, destx); OUT_RING (chan, desty); OUT_RING (chan, width); diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index 4d9afa6fed..a2f1db2914 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -47,7 +47,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 1); OUT_RING (chan, 1); BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1); + NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1); OUT_RING (chan, src_pitch); src_offset += (sy * src_pitch) + (sx * cpp); } else { @@ -66,7 +66,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 1); OUT_RING (chan, 1); BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1); + NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1); OUT_RING (chan, dst_pitch); dst_offset += (dy * dst_pitch) + (dx * cpp); } else { @@ -89,7 +89,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, OUT_RELOCh(chan, src_bo, src_offset, src_reloc); OUT_RELOCh(chan, dst_bo, dst_offset, dst_reloc); BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2); + NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2); OUT_RELOCl(chan, src_bo, src_offset, src_reloc); OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc); if (src_bo->tile_flags) { @@ -107,7 +107,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, dst_offset += (line_count * dst_pitch); } BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4); + NV04_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4); OUT_RING (chan, width * cpp); OUT_RING (chan, line_count); OUT_RING (chan, 0x00000101); @@ -291,7 +291,7 @@ nv50_upload_sifc(struct nv50_context *nv50, /* NV50_2D_OPERATION_SRCCOPY assumed already set */ - BEGIN_RING(chan, eng2d, NV50_2D_SIFC_UNK0800, 2); + BEGIN_RING(chan, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2); OUT_RING (chan, 0); OUT_RING (chan, src_format); BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10); @@ -334,6 +334,6 @@ nv50_upload_sifc(struct nv50_context *nv50, src += src_pitch; } - BEGIN_RING(chan, tesla, 0x1440, 1); + BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1); OUT_RING (chan, 0); } diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 39324e30f6..602adfc50d 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -99,19 +99,19 @@ nv50_vbo_size_to_hw(unsigned size, unsigned nr_c) { static const uint32_t hw_values[] = { 0, 0, 0, 0, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8_8, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8_8_8, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16_16, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16_16_16, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16, 0, 0, 0, 0, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32_32, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32_32_32 }; + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 }; /* we'd also have R11G11B10 and R10G10B10A2 */ @@ -198,7 +198,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, return nv50_push_elements_u08(nv50, map, count); if (count & 1) { - BEGIN_RING(chan, tesla, 0x15e8, 1); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1); OUT_RING (chan, map[0]); map++; count--; @@ -208,7 +208,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, unsigned nr = count > 2046 ? 2046 : count; int i; - BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1); for (i = 0; i < nr; i += 2) OUT_RING (chan, (map[i + 1] << 16) | map[i]); @@ -231,7 +231,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, return nv50_push_elements_u16(nv50, map, count); if (count & 1) { - BEGIN_RING(chan, tesla, 0x15e8, 1); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1); OUT_RING (chan, map[0]); map++; count--; @@ -241,7 +241,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, unsigned nr = count > 2046 ? 2046 : count; int i; - BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1); for (i = 0; i < nr; i += 2) OUT_RING (chan, (map[i + 1] << 16) | map[i]); @@ -266,7 +266,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, while (count) { unsigned nr = count > 2047 ? 2047 : count; - BEGIN_RING(chan, tesla, 0x400015e8, nr); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x40000000, nr); OUT_RINGp (chan, map, nr); count -= nr; @@ -373,7 +373,7 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib, break; case 1: if (attrib == nv50->vertprog->cfg.edgeflag_in) { - so_method(so, tesla, 0x15e4, 1); + so_method(so, tesla, NV50TCL_EDGEFLAG_ENABLE, 1); so_data (so, v[0] ? 1 : 0); } so_method(so, tesla, NV50TCL_VTX_ATTR_1F(attrib), 1); @@ -452,7 +452,7 @@ nv50_vbo_validate(struct nv50_context *nv50) NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); /* vertex array limits */ - so_method(vtxbuf, tesla, 0x1080 + (i * 8), 2); + so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2); so_reloc (vtxbuf, bo, vb->buffer->size - 1, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); -- cgit v1.2.3 From 3019afdbd862b86eb26d222d3a1d743faf3693be Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Mon, 28 Dec 2009 23:17:26 +0100 Subject: nv50: don't negate immediates in set_immd This negation would only be triggered in situations where it's incorrect. The caller of set_immd should negate the immediate value in the instruction itself if desired, and will also know if it's a float or an int. ADD TEMP[0], CONST[0], -IMMD[0] would load the immediate into extra TEMP, negated, and set the negate flag in add as well - double negation. --- src/gallium/drivers/nv50/nv50_program.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index b9910b430a..8895a920fe 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -499,15 +499,6 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e) static INLINE void set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) { - union { - float f; - uint32_t ui; - } u; - u.ui = pc->immd_buf[imm->hw]; - - u.f = (imm->mod & NV50_MOD_ABS) ? fabsf(u.f) : u.f; - u.f = (imm->mod & NV50_MOD_NEG) ? -u.f : u.f; - set_long(pc, e); /* XXX: can't be predicated - bits overlap; cases where both * are required should be avoided by using pc->allow32 */ @@ -515,8 +506,8 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) set_pred_wr(pc, 0, 0, e); e->inst[1] |= 0x00000002 | 0x00000001; - e->inst[0] |= (u.ui & 0x3f) << 16; - e->inst[1] |= (u.ui >> 6) << 2; + e->inst[0] |= (pc->immd_buf[imm->hw] & 0x3f) << 16; + e->inst[1] |= (pc->immd_buf[imm->hw] >> 6) << 2; } static INLINE void @@ -888,7 +879,7 @@ emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, set_dst(pc, dst, e); set_src_0(pc, src0, e); if (src1->type == P_IMMD && !is_long(e)) { - if (src0->mod & NV50_MOD_NEG) + if (src0->mod ^ src1->mod) e->inst[0] |= 0x00008000; set_immd(pc, src1, e); } else { @@ -999,6 +990,8 @@ emit_bitop2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, op != TGSI_OPCODE_XOR) assert(!"invalid bit op"); + assert(!(src0->mod | src1->mod)); + if (src1->type == P_IMMD && src0->type == P_TEMP && pc->allow32) { set_immd(pc, src1, e); if (op == TGSI_OPCODE_OR) -- cgit v1.2.3 From 4d2551beb7b3f5ae9f47ee97e24556c5bcb905c8 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Tue, 29 Dec 2009 00:02:45 +0100 Subject: nv50: neg and abs modifiers for flops Also fixes RSQ of negative sources. --- src/gallium/drivers/nv50/nv50_program.c | 83 ++++++++++++++++++++++----------- 1 file changed, 55 insertions(+), 28 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 8895a920fe..739621a5b3 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -656,6 +656,7 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22); } +/* Never apply nv50_reg::mod in emit_mov, or carefully check the code !!! */ static void emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { @@ -1043,6 +1044,14 @@ emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, src2->mod ^= NV50_MOD_NEG; } +#define NV50_FLOP_RCP 0 +#define NV50_FLOP_RSQ 2 +#define NV50_FLOP_LG2 3 +#define NV50_FLOP_SIN 4 +#define NV50_FLOP_COS 5 +#define NV50_FLOP_EX2 6 + +/* rcp, rsqrt, lg2 support neg and abs */ static void emit_flop(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, struct nv50_reg *src) @@ -1050,17 +1059,20 @@ emit_flop(struct nv50_pc *pc, unsigned sub, struct nv50_program_exec *e = exec(pc); e->inst[0] |= 0x90000000; - if (sub) { + if (sub || src->mod) { set_long(pc, e); e->inst[1] |= (sub << 29); } set_dst(pc, dst, e); + set_src_0_restricted(pc, src, e); - if (sub == 0 || sub == 2) - set_src_0_restricted(pc, src, e); - else - set_src_0(pc, src, e); + assert(!src->mod || sub < 4); + + if (src->mod & NV50_MOD_NEG) + e->inst[1] |= 0x04000000; + if (src->mod & NV50_MOD_ABS) + e->inst[1] |= 0x00100000; emit(pc, e); } @@ -1077,6 +1089,11 @@ emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) set_long(pc, e); e->inst[1] |= (6 << 29) | 0x00004000; + if (src->mod & NV50_MOD_NEG) + e->inst[1] |= 0x04000000; + if (src->mod & NV50_MOD_ABS) + e->inst[1] |= 0x00100000; + emit(pc, e); } @@ -1092,6 +1109,11 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) set_long(pc, e); e->inst[1] |= (6 << 29); + if (src->mod & NV50_MOD_NEG) + e->inst[1] |= 0x04000000; + if (src->mod & NV50_MOD_ABS) + e->inst[1] |= 0x00100000; + emit(pc, e); } @@ -1226,10 +1248,10 @@ emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, { struct nv50_reg *temp = alloc_temp(pc, NULL); - emit_flop(pc, 3, temp, v); + emit_flop(pc, NV50_FLOP_LG2, temp, v); emit_mul(pc, temp, temp, e); emit_preex2(pc, temp, temp); - emit_flop(pc, 6, dst, temp); + emit_flop(pc, NV50_FLOP_EX2, dst, temp); free_temp(pc, temp); } @@ -1453,7 +1475,7 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], if (arg == 4) /* there is no textureProj(samplerCubeShadow) */ emit_mov(pc, t[3], src[3]); - emit_flop(pc, 0, t[2], t[2]); + emit_flop(pc, NV50_FLOP_RCP, t[2], t[2]); emit_mul(pc, t[0], src[0], t[2]); emit_mul(pc, t[1], src[1], t[2]); @@ -1471,7 +1493,7 @@ load_proj_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], t[3]->rhw = src[3]->rhw; emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID)); - emit_flop(pc, 0, t[3], t[3]); + emit_flop(pc, NV50_FLOP_RCP, t[3], t[3]); for (c = 0; c < dim; ++c) { t[c]->rhw = src[c]->rhw; @@ -1485,7 +1507,7 @@ load_proj_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], /* XXX: for some reason the blob sometimes uses MAD * (mad f32 $rX $rY $rZ neg $r63) */ - emit_flop(pc, 0, t[3], src[3]); + emit_flop(pc, NV50_FLOP_RCP, t[3], src[3]); for (c = 0; c < dim; ++c) emit_mul(pc, t[c], src[c], t[3]); if (arg != dim) /* depth reference value */ @@ -1777,20 +1799,24 @@ static boolean negate_supported(const struct tgsi_full_instruction *insn, int i) { switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_COS: case TGSI_OPCODE_DDX: case TGSI_OPCODE_DDY: case TGSI_OPCODE_DP3: case TGSI_OPCODE_DP4: - case TGSI_OPCODE_MUL: + case TGSI_OPCODE_EX2: case TGSI_OPCODE_KIL: - case TGSI_OPCODE_ADD: - case TGSI_OPCODE_SUB: + case TGSI_OPCODE_LG2: case TGSI_OPCODE_MAD: - return TRUE; + case TGSI_OPCODE_MUL: case TGSI_OPCODE_POW: - if (i == 1) - return TRUE; - return FALSE; + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: /* ignored, RSQ = rsqrt(abs(src.x)) */ + case TGSI_OPCODE_SCS: + case TGSI_OPCODE_SIN: + case TGSI_OPCODE_SUB: + return TRUE; default: return FALSE; } @@ -2242,14 +2268,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, case TGSI_OPCODE_COS: if (mask & 8) { emit_precossin(pc, temp, src[0][3]); - emit_flop(pc, 5, dst[3], temp); + emit_flop(pc, NV50_FLOP_COS, dst[3], temp); if (!(mask &= 7)) break; if (temp == dst[3]) temp = brdc = temp_temp(pc); } emit_precossin(pc, temp, src[0][0]); - emit_flop(pc, 5, brdc, temp); + emit_flop(pc, NV50_FLOP_COS, brdc, temp); break; case TGSI_OPCODE_DDX: for (c = 0; c < 4; c++) { @@ -2323,7 +2349,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, break; case TGSI_OPCODE_EX2: emit_preex2(pc, temp, src[0][0]); - emit_flop(pc, 6, brdc, temp); + emit_flop(pc, NV50_FLOP_EX2, brdc, temp); break; case TGSI_OPCODE_FLR: for (c = 0; c < 4; c++) { @@ -2363,7 +2389,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_lit(pc, &dst[0], mask, &src[0][0]); break; case TGSI_OPCODE_LG2: - emit_flop(pc, 3, brdc, src[0][0]); + emit_flop(pc, NV50_FLOP_LG2, brdc, src[0][0]); break; case TGSI_OPCODE_LRP: temp = temp_temp(pc); @@ -2413,7 +2439,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_pow(pc, brdc, src[0][0], src[1][0]); break; case TGSI_OPCODE_RCP: - emit_flop(pc, 0, brdc, src[0][0]); + emit_flop(pc, NV50_FLOP_RCP, brdc, src[0][0]); break; case TGSI_OPCODE_RET: if (pc->p->type == PIPE_SHADER_FRAGMENT) @@ -2421,16 +2447,17 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_ret(pc, -1, 0); break; case TGSI_OPCODE_RSQ: - emit_flop(pc, 2, brdc, src[0][0]); + src[0][0]->mod |= NV50_MOD_ABS; + emit_flop(pc, NV50_FLOP_RSQ, brdc, src[0][0]); break; case TGSI_OPCODE_SCS: temp = temp_temp(pc); if (mask & 3) emit_precossin(pc, temp, src[0][0]); if (mask & (1 << 0)) - emit_flop(pc, 5, dst[0], temp); + emit_flop(pc, NV50_FLOP_COS, dst[0], temp); if (mask & (1 << 1)) - emit_flop(pc, 4, dst[1], temp); + emit_flop(pc, NV50_FLOP_SIN, dst[1], temp); if (mask & (1 << 2)) emit_mov_immdval(pc, dst[2], 0.0); if (mask & (1 << 3)) @@ -2439,14 +2466,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, case TGSI_OPCODE_SIN: if (mask & 8) { emit_precossin(pc, temp, src[0][3]); - emit_flop(pc, 4, dst[3], temp); + emit_flop(pc, NV50_FLOP_SIN, dst[3], temp); if (!(mask &= 7)) break; if (temp == dst[3]) temp = brdc = temp_temp(pc); } emit_precossin(pc, temp, src[0][0]); - emit_flop(pc, 4, brdc, temp); + emit_flop(pc, NV50_FLOP_SIN, brdc, temp); break; case TGSI_OPCODE_SLT: case TGSI_OPCODE_SGE: @@ -2781,7 +2808,7 @@ load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg) iv->rhw = popcnt4(pc->p->cfg.regs[1] >> 24) - 1; emit_interp(pc, iv, NULL, mode & INTERP_CENTROID); - emit_flop(pc, 0, iv, iv); + emit_flop(pc, NV50_FLOP_RCP, iv, iv); /* XXX: when loading interpolants dynamically, move these * to the program head, or make sure it can't be skipped. -- cgit v1.2.3 From 170cdb4507683fb9042620f7ab2ad96e57787d6c Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 31 Dec 2009 00:37:47 +0100 Subject: nv50: alloc_reg on reg_instance If we create multiple instances of an nv50_reg referencing them same resource, register allocation from alloc_reg has to be done with the original nv50_reg. --- src/gallium/drivers/nv50/nv50_program.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 739621a5b3..295725a6c0 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -163,20 +163,6 @@ struct nv50_pc { uint8_t edgeflag_out; }; -static INLINE struct nv50_reg * -reg_instance(struct nv50_pc *pc, struct nv50_reg *reg) -{ - struct nv50_reg *ri; - - assert(pc->reg_instance_nr < 16); - ri = &pc->reg_instances[pc->reg_instance_nr++]; - if (reg) { - *ri = *reg; - reg->mod = 0; - } - return ri; -} - static INLINE void ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw) { @@ -255,6 +241,21 @@ alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) assert(0); } +static INLINE struct nv50_reg * +reg_instance(struct nv50_pc *pc, struct nv50_reg *reg) +{ + struct nv50_reg *ri; + + assert(pc->reg_instance_nr < 16); + ri = &pc->reg_instances[pc->reg_instance_nr++]; + if (reg) { + alloc_reg(pc, reg); + *ri = *reg; + reg->mod = 0; + } + return ri; +} + /* XXX: For shaders that aren't executed linearly (e.g. shaders that * contain loops), we need to assign all hw regs to TGSI TEMPs early, * lest we risk temp_temps overwriting regs alloc'd "later". -- cgit v1.2.3 From f2cca04bd87547f8a76d87f4cd9b585de0cf40de Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 31 Dec 2009 12:26:14 +0100 Subject: nv50: add support for subroutines --- src/gallium/drivers/nv50/nv50_program.c | 131 +++++++++++++++++++------------- 1 file changed, 78 insertions(+), 53 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 295725a6c0..1847e8ab4c 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -154,6 +154,9 @@ struct nv50_pc { int if_lvl, loop_lvl; unsigned loop_pos[NV50_MAX_LOOP_NESTING]; + unsigned *insn_pos; /* actual program offset of each TGSI insn */ + boolean in_subroutine; + /* current instruction and total number of insns */ unsigned insn_cur; unsigned insn_nr; @@ -454,6 +457,14 @@ is_immd(struct nv50_program_exec *e) return FALSE; } +static boolean +is_join(struct nv50_program_exec *e) +{ + if (is_long(e) && (e->inst[1] & 3) == 2) + return TRUE; + return FALSE; +} + static INLINE void set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, struct nv50_program_exec *e) @@ -1354,66 +1365,53 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src) } static struct nv50_program_exec * -emit_breakaddr(struct nv50_pc *pc) +emit_control_flow(struct nv50_pc *pc, unsigned op, int pred, unsigned cc) { struct nv50_program_exec *e = exec(pc); - e->inst[0] = 0x40000002; + e->inst[0] = (op << 28) | 2; set_long(pc, e); + if (pred >= 0) + set_pred(pc, cc, pred, e); emit(pc, e); return e; } -static void -emit_break(struct nv50_pc *pc, int pred, unsigned cc) +static INLINE struct nv50_program_exec * +emit_breakaddr(struct nv50_pc *pc) { - struct nv50_program_exec *e = exec(pc); - - e->inst[0] = 0x50000002; - set_long(pc, e); - if (pred >= 0) - set_pred(pc, cc, pred, e); + return emit_control_flow(pc, 0x4, -1, 0); +} - emit(pc, e); +static INLINE void +emit_break(struct nv50_pc *pc, int pred, unsigned cc) +{ + emit_control_flow(pc, 0x5, pred, cc); } -static struct nv50_program_exec * +static INLINE struct nv50_program_exec * emit_joinat(struct nv50_pc *pc) { - struct nv50_program_exec *e = exec(pc); - - e->inst[0] = 0xa0000002; - set_long(pc, e); - - emit(pc, e); - return e; + return emit_control_flow(pc, 0xa, -1, 0); } -static struct nv50_program_exec * +static INLINE struct nv50_program_exec * emit_branch(struct nv50_pc *pc, int pred, unsigned cc) { - struct nv50_program_exec *e = exec(pc); + return emit_control_flow(pc, 0x1, pred, cc); +} - e->inst[0] = 0x10000002; - set_long(pc, e); - if (pred >= 0) - set_pred(pc, cc, pred, e); - emit(pc, e); - return pc->p->exec_tail; +static INLINE struct nv50_program_exec * +emit_call(struct nv50_pc *pc, int pred, unsigned cc) +{ + return emit_control_flow(pc, 0x2, pred, cc); } -static void +static INLINE void emit_ret(struct nv50_pc *pc, int pred, unsigned cc) { - struct nv50_program_exec *e = exec(pc); - - e->inst[0] = 0x30000002; - set_long(pc, e); - if (pred >= 0) - set_pred(pc, cc, pred, e); - - emit(pc, e); + emit_control_flow(pc, 0x3, pred, cc); } #define QOP_ADD 0 @@ -2237,10 +2235,22 @@ nv50_program_tx_insn(struct nv50_pc *pc, pc->loop_pos[pc->loop_lvl++] = pc->p->exec_size; terminate_mbb(pc); break; + case TGSI_OPCODE_BGNSUB: + assert(!pc->in_subroutine); + pc->in_subroutine = TRUE; + /* probably not necessary, but align to 8 byte boundary */ + if (!is_long(pc->p->exec_tail)) + convert_to_long(pc, pc->p->exec_tail); + break; case TGSI_OPCODE_BRK: assert(pc->loop_lvl > 0); emit_break(pc, -1, 0); break; + case TGSI_OPCODE_CAL: + assert(inst->Label.Label < pc->insn_nr); + emit_call(pc, -1, 0)->param.index = inst->Label.Label; + /* replaced by actual offset in nv50_program_fixup_insns */ + break; case TGSI_OPCODE_CEIL: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) @@ -2348,6 +2358,10 @@ nv50_program_tx_insn(struct nv50_pc *pc, pc->loop_brka[pc->loop_lvl]->param.index = pc->p->exec_size; terminate_mbb(pc); break; + case TGSI_OPCODE_ENDSUB: + assert(pc->in_subroutine); + pc->in_subroutine = FALSE; + break; case TGSI_OPCODE_EX2: emit_preex2(pc, temp, src[0][0]); emit_flop(pc, NV50_FLOP_EX2, brdc, temp); @@ -2443,7 +2457,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_flop(pc, NV50_FLOP_RCP, brdc, src[0][0]); break; case TGSI_OPCODE_RET: - if (pc->p->type == PIPE_SHADER_FRAGMENT) + if (pc->p->type == PIPE_SHADER_FRAGMENT && !pc->in_subroutine) nv50_fp_move_results(pc); emit_ret(pc, -1, 0); break; @@ -2538,6 +2552,17 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_mov_immdval(pc, dst[3], 1.0); break; case TGSI_OPCODE_END: + if (pc->p->type == PIPE_SHADER_FRAGMENT) + nv50_fp_move_results(pc); + + /* last insn must be long so it can have the exit bit set */ + if (!is_long(pc->p->exec_tail)) + convert_to_long(pc, pc->p->exec_tail); + else + if (is_immd(pc->p->exec_tail) || is_join(pc->p->exec_tail)) + emit_nop(pc); + + pc->p->exec_tail->inst[1] |= 1; /* set exit bit */ break; default: NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); @@ -3231,16 +3256,6 @@ nv50_program_fixup_insns(struct nv50_pc *pc) if (e->param.index >= 0 && !e->param.mask) bra_list[n++] = e; - /* last instruction must be long so it can have the exit bit set */ - if (!is_long(pc->p->exec_tail)) - convert_to_long(pc, pc->p->exec_tail); - /* set exit bit */ - pc->p->exec_tail->inst[1] |= 1; - - /* !immd on exit insn simultaneously means !join */ - assert(!is_immd(pc->p->exec_head)); - assert(!is_immd(pc->p->exec_tail)); - /* Make sure we don't have any single 32 bit instructions. */ for (e = pc->p->exec_head, pos = 0; e; e = e->next) { pos += is_long(e) ? 2 : 1; @@ -3249,12 +3264,24 @@ nv50_program_fixup_insns(struct nv50_pc *pc) for (i = 0; i < n; ++i) if (bra_list[i]->param.index >= pos) bra_list[i]->param.index += 1; + for (i = 0; i < pc->insn_nr; ++i) + if (pc->insn_pos[i] >= pos) + pc->insn_pos[i] += 1; convert_to_long(pc, e); ++pos; } } FREE(bra_list); + + if (!pc->p->info.opcode_count[TGSI_OPCODE_CAL]) + return; + + /* fill in CALL offsets */ + for (e = pc->p->exec_head; e; e = e->next) { + if ((e->inst[0] & 2) && (e->inst[0] >> 28) == 0x2) + e->param.index = pc->insn_pos[e->param.index]; + } } static boolean @@ -3276,19 +3303,20 @@ nv50_program_tx(struct nv50_program *p) if (ret == FALSE) goto out_cleanup; + pc->insn_pos = MALLOC(pc->insn_nr * sizeof(unsigned)); + tgsi_parse_init(&parse, pc->p->pipe.tokens); while (!tgsi_parse_end_of_tokens(&parse)) { const union tgsi_full_token *tok = &parse.FullToken; - /* don't allow half insn/immd on first and last instruction */ + /* previously allow32 was FALSE for first & last instruction */ pc->allow32 = TRUE; - if (pc->insn_cur == 0 || pc->insn_cur + 2 == pc->insn_nr) - pc->allow32 = FALSE; tgsi_parse_token(&parse); switch (tok->Token.Type) { case TGSI_TOKEN_TYPE_INSTRUCTION: + pc->insn_pos[pc->insn_cur] = pc->p->exec_size; ++pc->insn_cur; ret = nv50_tgsi_insn(pc, tok); if (ret == FALSE) @@ -3299,9 +3327,6 @@ nv50_program_tx(struct nv50_program *p) } } - if (pc->p->type == PIPE_SHADER_FRAGMENT) - nv50_fp_move_results(pc); - nv50_program_fixup_insns(pc); p->param_nr = pc->param_nr * 4; -- cgit v1.2.3 From d9ae8f31d5e687fae412edf888028cce69b039f0 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 30 Dec 2009 20:54:58 +0100 Subject: nv50: handle TGSI_OPCODE_EXP,LOG Not that they make much sense on nv50, but we also do LIT ... --- src/gallium/drivers/nv50/nv50_program.c | 59 +++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 1847e8ab4c..a85587b986 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1840,7 +1840,9 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c) case TGSI_OPCODE_DST: return mask & (c ? 0xa : 0x6); case TGSI_OPCODE_EX2: + case TGSI_OPCODE_EXP: case TGSI_OPCODE_LG2: + case TGSI_OPCODE_LOG: case TGSI_OPCODE_POW: case TGSI_OPCODE_RCP: case TGSI_OPCODE_RSQ: @@ -2062,6 +2064,8 @@ nv50_tgsi_dst_revdep(unsigned op, int s, int c) assert(0); return 0x0; } + case TGSI_OPCODE_EXP: + case TGSI_OPCODE_LOG: case TGSI_OPCODE_LIT: case TGSI_OPCODE_SCS: case TGSI_OPCODE_TEX: @@ -2366,6 +2370,33 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_preex2(pc, temp, src[0][0]); emit_flop(pc, NV50_FLOP_EX2, brdc, temp); break; + case TGSI_OPCODE_EXP: + { + struct nv50_reg *t[2]; + + assert(!temp); + t[0] = temp_temp(pc); + t[1] = temp_temp(pc); + + if (mask & 0x6) + emit_mov(pc, t[0], src[0][0]); + if (mask & 0x3) + emit_flr(pc, t[1], src[0][0]); + + if (mask & (1 << 1)) + emit_sub(pc, dst[1], t[0], t[1]); + if (mask & (1 << 0)) { + emit_preex2(pc, t[1], t[1]); + emit_flop(pc, NV50_FLOP_EX2, dst[0], t[1]); + } + if (mask & (1 << 2)) { + emit_preex2(pc, t[0], t[0]); + emit_flop(pc, NV50_FLOP_EX2, dst[2], t[0]); + } + if (mask & (1 << 3)) + emit_mov_immdval(pc, dst[3], 1.0f); + } + break; case TGSI_OPCODE_FLR: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) @@ -2406,6 +2437,34 @@ nv50_program_tx_insn(struct nv50_pc *pc, case TGSI_OPCODE_LG2: emit_flop(pc, NV50_FLOP_LG2, brdc, src[0][0]); break; + case TGSI_OPCODE_LOG: + { + struct nv50_reg *t[2]; + + t[0] = temp_temp(pc); + if (mask & (1 << 1)) + t[1] = temp_temp(pc); + else + t[1] = t[0]; + + emit_abs(pc, t[0], src[0][0]); + emit_flop(pc, NV50_FLOP_LG2, t[1], t[0]); + if (mask & (1 << 2)) + emit_mov(pc, dst[2], t[1]); + emit_flr(pc, t[1], t[1]); + if (mask & (1 << 0)) + emit_mov(pc, dst[0], t[1]); + if (mask & (1 << 1)) { + t[1]->mod = NV50_MOD_NEG; + emit_preex2(pc, t[1], t[1]); + t[1]->mod = 0; + emit_flop(pc, NV50_FLOP_EX2, t[1], t[1]); + emit_mul(pc, dst[1], t[0], t[1]); + } + if (mask & (1 << 3)) + emit_mov_immdval(pc, dst[3], 1.0f); + } + break; case TGSI_OPCODE_LRP: temp = temp_temp(pc); for (c = 0; c < 4; c++) { -- cgit v1.2.3 From b3425bc3607666e5c8ba4f403b4ead8c8870e49c Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 30 Dec 2009 21:17:31 +0100 Subject: nv50: make assimilate_temp safe Cannot change hw reg assigned to a TGSI TEMP on the fly if we are in a loop, conditional, or can jump around wildly. --- src/gallium/drivers/nv50/nv50_program.c | 44 +++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index a85587b986..7d1b5fd82c 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -285,22 +285,6 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) return NULL; } -/* Assign the hw of the discarded temporary register src - * to the tgsi register dst and free src. - */ -static void -assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) -{ - assert(src->index == -1 && src->hw != -1); - - if (dst->hw != -1) - pc->r_temp[dst->hw] = NULL; - pc->r_temp[src->hw] = dst; - dst->hw = src->hw; - - FREE(src); -} - /* release the hardware resource held by r */ static void release_hw(struct nv50_pc *pc, struct nv50_reg *r) @@ -721,6 +705,34 @@ emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f) FREE(imm); } +/* Assign the hw of the discarded temporary register src + * to the tgsi register dst and free src. + */ +static void +assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + assert(src->index == -1 && src->hw != -1); + + if (pc->if_lvl || pc->loop_lvl || + (dst->type != P_TEMP) || + (src->hw < pc->result_nr * 4 && + pc->p->type == PIPE_SHADER_FRAGMENT) || + pc->p->info.opcode_count[TGSI_OPCODE_CAL] || + pc->p->info.opcode_count[TGSI_OPCODE_BRA]) { + + emit_mov(pc, dst, src); + free_temp(pc, src); + return; + } + + if (dst->hw != -1) + pc->r_temp[dst->hw] = NULL; + pc->r_temp[src->hw] = dst; + dst->hw = src->hw; + + FREE(src); +} + static void emit_nop(struct nv50_pc *pc) { -- cgit v1.2.3 From 616ddc8dfa5bfc886db8d4c7ef7fc66793f7940d Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Wed, 30 Dec 2009 21:25:13 +0100 Subject: nv50: cannot kill branch if immediate is used The immediate's bits eat the condition bits. --- src/gallium/drivers/nv50/nv50_program.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 7d1b5fd82c..cc15f45347 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -453,6 +453,7 @@ static INLINE void set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, struct nv50_program_exec *e) { + assert(!is_immd(e)); set_long(pc, e); e->inst[1] &= ~((0x1f << 7) | (0x3 << 12)); e->inst[1] |= (pred << 7) | (idx << 12); @@ -2118,6 +2119,8 @@ nv50_kill_branch(struct nv50_pc *pc) if (pc->if_insn[lvl]->next != pc->p->exec_tail) return FALSE; + if (is_immd(pc->p->exec_tail)) + return FALSE; /* if ccode == 'true', the BRA is from an ELSE and the predicate * reg may no longer be valid, since we currently always use $p0 -- cgit v1.2.3 From 4e4244b910c83797e3dde62020618f20eb026908 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 31 Dec 2009 13:38:53 +0100 Subject: nv50: fix TEXLOD sequence and use it only in FPs --- src/gallium/drivers/nv50/nv50_program.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index cc15f45347..6733347735 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1566,7 +1566,13 @@ emit_texlod_sequence(struct nv50_pc *pc, struct nv50_reg *tlod, struct nv50_reg *src, struct nv50_program_exec *tex) { struct nv50_program_exec *join_at; - unsigned i, target = pc->p->exec_size + 7 * 2; + unsigned i, target = pc->p->exec_size + 9 * 2; + + if (pc->p->type != PIPE_SHADER_FRAGMENT) { + emit(pc, tex); + return; + } + pc->allow32 = FALSE; /* Subtract lod of each pixel from lod of top left pixel, jump * texlod insn if result is 0, then repeat for 2 other pixels. @@ -1692,6 +1698,7 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, emit(pc, e); } else if (bias_lod < 0) { + assert(pc->p->type == PIPE_SHADER_FRAGMENT); e->inst[0] |= arg << 22; e->inst[1] |= 0x20000000; /* texbias */ emit_mov(pc, t[arg], src[3]); -- cgit v1.2.3 From 45a01bc5bd95a6cc85a9db7d8e2c07812346373d Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Thu, 31 Dec 2009 22:24:13 +0100 Subject: nv50: small fix for handling "dangerous" swizzles --- src/gallium/drivers/nv50/nv50_program.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers/nv50/nv50_program.c') diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 6733347735..2d0b1818ef 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -2864,7 +2864,7 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } pc->r_brdc = NULL; - if (!deqs) + if (!deqs || (!rdep[0] && !rdep[1] && !rdep[2] && !rdep[3])) return nv50_program_tx_insn(pc, &insn); deqs = nv50_revdep_reorder(m, rdep); -- cgit v1.2.3