diff options
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_state_derived.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_fs.c | 10 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_texture.h | 13 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_tgsi_to_rc.c | 63 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_tgsi_to_rc.h | 7 | ||||
-rw-r--r-- | src/gallium/drivers/softpipe/sp_state_derived.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/svga/svga_context.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/svga/svga_context.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/svga/svga_state_fs.c | 126 | ||||
-rw-r--r-- | src/gallium/drivers/svga/svga_tgsi.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/svga/svga_tgsi_decl_sm30.c | 15 | ||||
-rw-r--r-- | src/gallium/drivers/svga/svga_tgsi_emit.h | 19 | ||||
-rw-r--r-- | src/gallium/drivers/svga/svga_tgsi_insn.c | 271 |
13 files changed, 383 insertions, 152 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index 0155b9be50..353ae176fd 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -65,7 +65,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) /* compute vertex layout now */ const struct lp_fragment_shader *lpfs = llvmpipe->fs; struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; - const uint num = draw_current_shader_outputs(llvmpipe->draw); + const uint num = draw_num_shader_outputs(llvmpipe->draw); uint i; /* Tell draw_vbuf to simply emit the whole post-xform vertex diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 60ea9c171d..39bcdc8fe6 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -77,17 +77,21 @@ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, static void find_output_registers(struct r300_fragment_program_compiler * compiler, struct r300_fragment_shader * fs) { - unsigned i; + unsigned i, colorbuf_count = 0; /* Mark the outputs as not present initially */ - compiler->OutputColor = fs->info.num_outputs; + compiler->OutputColor[0] = fs->info.num_outputs; + compiler->OutputColor[1] = fs->info.num_outputs; + compiler->OutputColor[2] = fs->info.num_outputs; + compiler->OutputColor[3] = fs->info.num_outputs; compiler->OutputDepth = fs->info.num_outputs; /* Now see where they really are. */ for(i = 0; i < fs->info.num_outputs; ++i) { switch(fs->info.output_semantic_name[i]) { case TGSI_SEMANTIC_COLOR: - compiler->OutputColor = i; + compiler->OutputColor[colorbuf_count] = i; + colorbuf_count++; break; case TGSI_SEMANTIC_POSITION: compiler->OutputDepth = i; diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 453fb1accc..b37be26133 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -37,24 +37,31 @@ unsigned r300_texture_get_stride(struct r300_screen* screen, unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, unsigned zslice, unsigned face); -/* Note the signature of R300_EASY_TX_FORMAT(A, R, G, B, FORMAT)... */ +/* Translate a pipe_format into a useful texture format for sampling. + * + * R300_EASY_TX_FORMAT swizzles the texture. + * Note the signature of R300_EASY_TX_FORMAT: + * R300_EASY_TX_FORMAT(B, G, R, A, FORMAT); + * + * The FORMAT specifies how the texture sampler will treat the texture, and + * makes available X, Y, Z, W, ZERO, and ONE for swizzling. */ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) { switch (format) { /* X8 */ case PIPE_FORMAT_A8_UNORM: + return R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8); case PIPE_FORMAT_I8_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, X8); case PIPE_FORMAT_L8_UNORM: return R300_EASY_TX_FORMAT(X, X, X, ONE, X8); /* X16 */ case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_Z16_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, X16); case PIPE_FORMAT_R16_SNORM: return R300_EASY_TX_FORMAT(X, X, X, X, X16) | R300_TX_FORMAT_SIGNED; - case PIPE_FORMAT_Z16_UNORM: - return R300_EASY_TX_FORMAT(X, X, X, X, X16); /* Y8X8 */ case PIPE_FORMAT_A8L8_UNORM: return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8); diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index a792c2cf98..941ec17016 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -201,6 +201,8 @@ static void transform_srcreg( struct rc_src_register * dst, struct tgsi_full_src_register * src) { + unsigned i, j; + dst->File = translate_register_file(src->Register.File); dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index); dst->RelAddr = src->Register.Indirect; @@ -210,6 +212,21 @@ static void transform_srcreg( dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 3) << 9; dst->Abs = src->Register.Absolute; dst->Negate = src->Register.Negate ? RC_MASK_XYZW : 0; + + if (src->Register.File == TGSI_FILE_IMMEDIATE) { + for (i = 0; i < ttr->imms_to_swizzle_count; i++) { + if (ttr->imms_to_swizzle[i].index == src->Register.Index) { + dst->File = RC_FILE_TEMPORARY; + dst->Index = 0; + dst->Swizzle = 0; + for (j = 0; j < 4; j++) { + dst->Swizzle |= GET_SWZ(ttr->imms_to_swizzle[i].swizzle, + tgsi_util_get_full_src_register_swizzle(src, j)) << (j * 3); + } + break; + } + } + } } static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_texture src, @@ -277,21 +294,45 @@ static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_inst &ttr->compiler->Program.ShadowSamplers); } -static void handle_immediate(struct tgsi_to_rc * ttr, struct tgsi_full_immediate * imm) +static void handle_immediate(struct tgsi_to_rc * ttr, + struct tgsi_full_immediate * imm, + unsigned index) { struct rc_constant constant; - int i; + unsigned swizzle = 0; + boolean can_swizzle = TRUE; + unsigned i; - constant.Type = RC_CONSTANT_IMMEDIATE; - constant.Size = 4; - for(i = 0; i < 4; ++i) - constant.u.Immediate[i] = imm->u[i].Float; - rc_constants_add(&ttr->compiler->Program.Constants, &constant); + for (i = 0; i < 4; i++) { + if (imm->u[i].Float == 0.0f) { + swizzle |= RC_SWIZZLE_ZERO << (i * 3); + } else if (imm->u[i].Float == 0.5f) { + swizzle |= RC_SWIZZLE_HALF << (i * 3); + } else if (imm->u[i].Float == 1.0f) { + swizzle |= RC_SWIZZLE_ONE << (i * 3); + } else { + can_swizzle = FALSE; + break; + } + } + + if (can_swizzle) { + ttr->imms_to_swizzle[ttr->imms_to_swizzle_count].index = index; + ttr->imms_to_swizzle[ttr->imms_to_swizzle_count].swizzle = swizzle; + ttr->imms_to_swizzle_count++; + } else { + constant.Type = RC_CONSTANT_IMMEDIATE; + constant.Size = 4; + for(i = 0; i < 4; ++i) + constant.u.Immediate[i] = imm->u[i].Float; + rc_constants_add(&ttr->compiler->Program.Constants, &constant); + } } void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens) { struct tgsi_parse_context parser; + unsigned imm_index = 0; int i; /* Allocate constants placeholders. @@ -308,6 +349,9 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens) ttr->immediate_offset = ttr->compiler->Program.Constants.Count; + ttr->imms_to_swizzle = malloc(ttr->info->immediate_count * sizeof(struct swizzled_imms)); + ttr->imms_to_swizzle_count = 0; + tgsi_parse_init(&parser, tokens); while (!tgsi_parse_end_of_tokens(&parser)) { @@ -317,7 +361,8 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens) case TGSI_TOKEN_TYPE_DECLARATION: break; case TGSI_TOKEN_TYPE_IMMEDIATE: - handle_immediate(ttr, &parser.FullToken.FullImmediate); + handle_immediate(ttr, &parser.FullToken.FullImmediate, imm_index); + imm_index++; break; case TGSI_TOKEN_TYPE_INSTRUCTION: transform_instruction(ttr, &parser.FullToken.FullInstruction); @@ -327,6 +372,8 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens) tgsi_parse_free(&parser); + free(ttr->imms_to_swizzle); + rc_calculate_inputs_outputs(ttr->compiler); } diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.h b/src/gallium/drivers/r300/r300_tgsi_to_rc.h index 93e90ec6d2..39b473c7bf 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.h +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.h @@ -29,11 +29,18 @@ struct tgsi_full_declaration; struct tgsi_shader_info; struct tgsi_token; +struct swizzled_imms { + unsigned index; + unsigned swizzle; +}; + struct tgsi_to_rc { struct radeon_compiler * compiler; const struct tgsi_shader_info * info; int immediate_offset; + struct swizzled_imms * imms_to_swizzle; + unsigned imms_to_swizzle_count; }; void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens); diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index f6856a5f69..d2eda7324c 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -30,7 +30,6 @@ #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "draw/draw_vertex.h" -#include "draw/draw_private.h" #include "sp_context.h" #include "sp_screen.h" #include "sp_state.h" @@ -67,7 +66,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) /* compute vertex layout now */ const struct sp_fragment_shader *spfs = softpipe->fs; struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; - const uint num = draw_current_shader_outputs(softpipe->draw); + const uint num = draw_num_shader_outputs(softpipe->draw); uint i; /* Tell draw_vbuf to simply emit the whole post-xform vertex diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index e88ef8d8fe..c4181c3f5b 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -215,7 +215,6 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen ) svga->state.hw_draw.num_views = 0; svga->dirty = ~0; - svga->state.white_fs_id = SVGA3D_INVALID_ID; LIST_INITHEAD(&svga->dirty_buffers); diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 66259fd010..ba86256eb2 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -327,10 +327,6 @@ struct svga_context unsigned texture_timestamp; - /* Internally generated shaders: - */ - unsigned white_fs_id; - /* */ struct svga_sw_state sw; diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c index adc7120217..2973444d0a 100644 --- a/src/gallium/drivers/svga/svga_state_fs.c +++ b/src/gallium/drivers/svga/svga_state_fs.c @@ -108,70 +108,6 @@ fail: return ret; } -/* The blend workaround for simulating logicop xor behaviour requires - * that the incoming fragment color be white. This change achieves - * that by hooking up a hard-wired fragment shader that just emits - * color 1,1,1,1 - * - * This is a slightly incomplete solution as it assumes that the - * actual bound shader has no other effects beyond generating a - * fragment color. In particular shaders containing TEXKIL and/or - * depth-write will not have the correct behaviour, nor will those - * expecting to use alphatest. - * - * These are avoidable issues, but they are not much worse than the - * unavoidable ones associated with this technique, so it's not clear - * how much effort should be expended trying to resolve them - the - * ultimate result will still not be correct in most cases. - * - * Shader below was generated with: - * SVGA_DEBUG=tgsi ./mesa/progs/fp/fp-tri white.txt - */ -static int emit_white_fs( struct svga_context *svga ) -{ - int ret = PIPE_ERROR; - - /* ps_3_0 - * def c0, 1.000000, 0.000000, 0.000000, 1.000000 - * mov oC0, c0.x - * end - */ - static const unsigned white_tokens[] = { - 0xffff0300, - 0x05000051, - 0xa00f0000, - 0x3f800000, - 0x00000000, - 0x00000000, - 0x3f800000, - 0x02000001, - 0x800f0800, - 0xa0000000, - 0x0000ffff, - }; - - assert(SVGA3D_INVALID_ID == UTIL_BITMASK_INVALID_INDEX); - svga->state.white_fs_id = util_bitmask_add(svga->fs_bm); - if(svga->state.white_fs_id == SVGA3D_INVALID_ID) - goto no_fs_id; - - ret = SVGA3D_DefineShader(svga->swc, - svga->state.white_fs_id, - SVGA3D_SHADERTYPE_PS, - white_tokens, - sizeof(white_tokens)); - if (ret) - goto no_definition; - - return 0; - -no_definition: - util_bitmask_clear(svga->fs_bm, svga->state.white_fs_id); - svga->state.white_fs_id = SVGA3D_INVALID_ID; -no_fs_id: - return ret; -} - /* SVGA_NEW_TEXTURE_BINDING * SVGA_NEW_RAST @@ -199,6 +135,23 @@ static int make_fs_key( const struct svga_context *svga, PIPE_WINDING_CW); } + /* The blend workaround for simulating logicop xor behaviour + * requires that the incoming fragment color be white. This change + * achieves that by creating a varient of the current fragment + * shader that overrides all output colors with 1,1,1,1 + * + * This will work for most shaders, including those containing + * TEXKIL and/or depth-write. However, it will break on the + * combination of xor-logicop plus alphatest. + * + * Ultimately, we could implement alphatest in the shader using + * texkil prior to overriding the outgoing fragment color. + * + * SVGA_NEW_BLEND + */ + if (svga->curr.blend->need_white_fragments) { + key->white_fragments = 1; + } /* XXX: want to limit this to the textures that the shader actually * refers to. @@ -238,40 +191,29 @@ static int emit_hw_fs( struct svga_context *svga, unsigned id = SVGA3D_INVALID_ID; int ret = 0; + struct svga_fragment_shader *fs = svga->curr.fs; + struct svga_fs_compile_key key; + /* SVGA_NEW_BLEND + * SVGA_NEW_TEXTURE_BINDING + * SVGA_NEW_RAST + * SVGA_NEW_NEED_SWTNL + * SVGA_NEW_SAMPLER */ - if (svga->curr.blend->need_white_fragments) { - if (svga->state.white_fs_id == SVGA3D_INVALID_ID) { - ret = emit_white_fs( svga ); - if (ret) - return ret; - } - id = svga->state.white_fs_id; - } - else { - struct svga_fragment_shader *fs = svga->curr.fs; - struct svga_fs_compile_key key; - - /* SVGA_NEW_TEXTURE_BINDING - * SVGA_NEW_RAST - * SVGA_NEW_NEED_SWTNL - * SVGA_NEW_SAMPLER - */ - ret = make_fs_key( svga, &key ); + ret = make_fs_key( svga, &key ); + if (ret) + return ret; + + result = search_fs_key( fs, &key ); + if (!result) { + ret = compile_fs( svga, fs, &key, &result ); if (ret) return ret; - - result = search_fs_key( fs, &key ); - if (!result) { - ret = compile_fs( svga, fs, &key, &result ); - if (ret) - return ret; - } - - assert (result); - id = result->id; } + assert (result); + id = result->id; + assert(id != SVGA3D_INVALID_ID); if (result != svga->state.hw_draw.fs) { diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h index 737a2213af..063c9cf422 100644 --- a/src/gallium/drivers/svga/svga_tgsi.h +++ b/src/gallium/drivers/svga/svga_tgsi.h @@ -49,6 +49,7 @@ struct svga_fs_compile_key { unsigned light_twoside:1; unsigned front_cw:1; + unsigned white_fragments:1; unsigned num_textures:8; unsigned num_unnormalized_coords:8; struct { diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c index 43fc0d3235..73102a72a8 100644 --- a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c +++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c @@ -194,8 +194,19 @@ static boolean ps30_output( struct svga_shader_emitter *emit, switch (semantic.Name) { case TGSI_SEMANTIC_COLOR: - emit->output_map[idx] = dst_register( SVGA3DREG_COLOROUT, - semantic.Index ); + if (emit->unit == PIPE_SHADER_FRAGMENT && + emit->key.fkey.white_fragments) { + + emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, + emit->nr_hw_temp++ ); + emit->temp_col[idx] = emit->output_map[idx]; + emit->true_col[idx] = dst_register( SVGA3DREG_COLOROUT, + semantic.Index ); + } + else { + emit->output_map[idx] = dst_register( SVGA3DREG_COLOROUT, + semantic.Index ); + } break; case TGSI_SEMANTIC_POSITION: emit->output_map[idx] = dst_register( SVGA3DREG_TEMP, diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h index 2557824293..e8f75485d5 100644 --- a/src/gallium/drivers/svga/svga_tgsi_emit.h +++ b/src/gallium/drivers/svga/svga_tgsi_emit.h @@ -79,6 +79,8 @@ struct svga_shader_emitter int ps30_input_count; + int dynamic_branching_level; + boolean in_main_func; boolean created_zero_immediate; @@ -199,6 +201,23 @@ static INLINE boolean emit_op3( struct svga_shader_emitter *emit, } +static INLINE boolean emit_op4( struct svga_shader_emitter *emit, + SVGA3dShaderInstToken inst, + SVGA3dShaderDestToken dest, + struct src_register src0, + struct src_register src1, + struct src_register src2, + struct src_register src3) +{ + return (emit_instruction( emit, inst ) && + emit_dst( emit, dest ) && + emit_src( emit, src0 ) && + emit_src( emit, src1 ) && + emit_src( emit, src2 ) && + emit_src( emit, src3 )); +} + + #define TRANSLATE_SWIZZLE(x,y,z,w) ((x) | ((y) << 2) | ((z) << 4) | ((w) << 6)) #define SWIZZLE_XYZW \ TRANSLATE_SWIZZLE(TGSI_SWIZZLE_X,TGSI_SWIZZLE_Y,TGSI_SWIZZLE_Z,TGSI_SWIZZLE_W) diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index dc5eb8fc60..be821e9821 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -46,8 +46,6 @@ translate_opcode( case TGSI_OPCODE_ABS: return SVGA3DOP_ABS; case TGSI_OPCODE_ADD: return SVGA3DOP_ADD; case TGSI_OPCODE_BREAKC: return SVGA3DOP_BREAKC; - case TGSI_OPCODE_DDX: return SVGA3DOP_DSX; - case TGSI_OPCODE_DDY: return SVGA3DOP_DSY; case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD; case TGSI_OPCODE_DP3: return SVGA3DOP_DP3; case TGSI_OPCODE_DP4: return SVGA3DOP_DP4; @@ -415,6 +413,88 @@ static boolean submit_op3( struct svga_shader_emitter *emit, } + + +/* SVGA shaders may not refer to >1 constant register in a single + * instruction. This function checks for that usage and inserts a + * move to temporary if detected. + */ +static boolean submit_op4( struct svga_shader_emitter *emit, + SVGA3dShaderInstToken inst, + SVGA3dShaderDestToken dest, + struct src_register src0, + struct src_register src1, + struct src_register src2, + struct src_register src3) +{ + SVGA3dShaderDestToken temp0; + SVGA3dShaderDestToken temp3; + boolean need_temp0 = FALSE; + boolean need_temp3 = FALSE; + SVGA3dShaderRegType type0, type1, type2, type3; + + temp0.value = 0; + temp3.value = 0; + type0 = SVGA3dShaderGetRegType( src0.base.value ); + type1 = SVGA3dShaderGetRegType( src1.base.value ); + type2 = SVGA3dShaderGetRegType( src2.base.value ); + type3 = SVGA3dShaderGetRegType( src2.base.value ); + + /* Make life a little easier - this is only used by the TXD + * instruction which is guaranteed not to have a constant/input reg + * in one slot at least: + */ + assert(type1 == SVGA3DREG_SAMPLER); + + if (type0 == SVGA3DREG_CONST && + ((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) || + (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num))) + need_temp0 = TRUE; + + if (type3 == SVGA3DREG_CONST && + (type2 == SVGA3DREG_CONST && src3.base.num != src2.base.num)) + need_temp3 = TRUE; + + if (type0 == SVGA3DREG_INPUT && + ((type3 == SVGA3DREG_INPUT && src0.base.num != src3.base.num) || + (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num))) + need_temp0 = TRUE; + + if (type3 == SVGA3DREG_INPUT && + (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num)) + need_temp3 = TRUE; + + if (need_temp0) + { + temp0 = get_temp( emit ); + + if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 )) + return FALSE; + + src0 = src( temp0 ); + } + + if (need_temp3) + { + temp3 = get_temp( emit ); + + if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp3, src3 )) + return FALSE; + + src3 = src( temp3 ); + } + + if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 )) + return FALSE; + + if (need_temp3) + release_temp( emit, temp3 ); + if (need_temp0) + release_temp( emit, temp0 ); + return TRUE; +} + + static boolean emit_def_const( struct svga_shader_emitter *emit, SVGA3dShaderConstType type, unsigned idx, @@ -660,6 +740,8 @@ static boolean emit_if(struct svga_shader_emitter *emit, if_token.control = SVGA3DOPCOMPC_NE; zero = scalar(zero, TGSI_SWIZZLE_X); + emit->dynamic_branching_level++; + return (emit_instruction( emit, if_token ) && emit_src( emit, src ) && emit_src( emit, zero ) ); @@ -668,6 +750,8 @@ static boolean emit_if(struct svga_shader_emitter *emit, static boolean emit_endif(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) { + emit->dynamic_branching_level--; + return (emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ))); } @@ -1011,10 +1095,10 @@ static boolean emit_kilp(struct svga_shader_emitter *emit, { SVGA3dShaderInstToken inst; SVGA3dShaderDestToken temp; - struct src_register one = get_zero_immediate( emit ); + struct src_register one = scalar( get_zero_immediate( emit ), + TGSI_SWIZZLE_W ); inst = inst_token( SVGA3DOP_TEXKILL ); - one = scalar( one, TGSI_SWIZZLE_W ); /* texkill doesn't allow negation on the operand so lets move * negation of {1} to a temp register */ @@ -1169,41 +1253,79 @@ static boolean emit_tex2(struct svga_shader_emitter *emit, SVGA3dShaderDestToken dst ) { SVGA3dShaderInstToken inst; - struct src_register src0; - struct src_register src1; - + struct src_register texcoord; + struct src_register sampler; + SVGA3dShaderDestToken tmp; + inst.value = 0; - inst.op = SVGA3DOP_TEX; switch (insn->Instruction.Opcode) { case TGSI_OPCODE_TEX: + inst.op = SVGA3DOP_TEX; break; case TGSI_OPCODE_TXP: + inst.op = SVGA3DOP_TEX; inst.control = SVGA3DOPCONT_PROJECT; break; case TGSI_OPCODE_TXB: + inst.op = SVGA3DOP_TEX; inst.control = SVGA3DOPCONT_BIAS; break; + case TGSI_OPCODE_TXL: + inst.op = SVGA3DOP_TEXLDL; + break; default: assert(0); return FALSE; } - src0 = translate_src_register( emit, &insn->Src[0] ); - src1 = translate_src_register( emit, &insn->Src[1] ); + texcoord = translate_src_register( emit, &insn->Src[0] ); + sampler = translate_src_register( emit, &insn->Src[1] ); - if (emit->key.fkey.tex[src1.base.num].unnormalized) { - struct src_register wh = get_tex_dimensions( emit, src1.base.num ); - SVGA3dShaderDestToken tmp = get_temp( emit ); + if (emit->key.fkey.tex[sampler.base.num].unnormalized || + emit->dynamic_branching_level > 0) + tmp = get_temp( emit ); + + /* Can't do mipmapping inside dynamic branch constructs. Force LOD + * zero in that case. + */ + if (emit->dynamic_branching_level > 0 && + inst.op == SVGA3DOP_TEX && + SVGA3dShaderGetRegType(texcoord.base.value) == SVGA3DREG_TEMP) { + struct src_register zero = get_zero_immediate( emit ); + + /* MOV tmp, texcoord */ + if (!submit_op1( emit, + inst_token( SVGA3DOP_MOV ), + tmp, + texcoord )) + return FALSE; + + /* MOV tmp.w, zero */ + if (!submit_op1( emit, + inst_token( SVGA3DOP_MOV ), + writemask( tmp, TGSI_WRITEMASK_W ), + scalar( zero, TGSI_SWIZZLE_X ))) + return FALSE; + + texcoord = src( tmp ); + inst.op = SVGA3DOP_TEXLDL; + } + + /* Explicit normalization of texcoords: + */ + if (emit->key.fkey.tex[sampler.base.num].unnormalized) { + struct src_register wh = get_tex_dimensions( emit, sampler.base.num ); /* MUL tmp, SRC0, WH */ if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), - tmp, src0, wh )) + tmp, texcoord, wh )) return FALSE; - src0 = src( tmp ); + + texcoord = src( tmp ); } - return submit_op2( emit, inst, dst, src0, src1 ); + return submit_op2( emit, inst, dst, texcoord, sampler ); } @@ -1211,31 +1333,33 @@ static boolean emit_tex2(struct svga_shader_emitter *emit, /* Translate texture instructions to SVGA3D representation. */ -static boolean emit_tex3(struct svga_shader_emitter *emit, +static boolean emit_tex4(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn, SVGA3dShaderDestToken dst ) { SVGA3dShaderInstToken inst; - struct src_register src0; - struct src_register src1; - struct src_register src2; + struct src_register texcoord; + struct src_register ddx; + struct src_register ddy; + struct src_register sampler; + + texcoord = translate_src_register( emit, &insn->Src[0] ); + ddx = translate_src_register( emit, &insn->Src[1] ); + ddy = translate_src_register( emit, &insn->Src[2] ); + sampler = translate_src_register( emit, &insn->Src[3] ); inst.value = 0; switch (insn->Instruction.Opcode) { case TGSI_OPCODE_TXD: - inst.op = SVGA3DOP_TEXLDD; - break; - case TGSI_OPCODE_TXL: - inst.op = SVGA3DOP_TEXLDL; + inst.op = SVGA3DOP_TEXLDD; /* 4 args! */ break; + default: + assert(0); + return FALSE; } - src0 = translate_src_register( emit, &insn->Src[0] ); - src1 = translate_src_register( emit, &insn->Src[1] ); - src2 = translate_src_register( emit, &insn->Src[2] ); - - return submit_op3( emit, inst, dst, src0, src1, src2 ); + return submit_op4( emit, inst, dst, texcoord, sampler, ddx, ddy ); } @@ -1271,12 +1395,12 @@ static boolean emit_tex(struct svga_shader_emitter *emit, case TGSI_OPCODE_TEX: case TGSI_OPCODE_TXB: case TGSI_OPCODE_TXP: + case TGSI_OPCODE_TXL: if (!emit_tex2( emit, insn, tex_result )) return FALSE; break; - case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXD: - if (!emit_tex3( emit, insn, tex_result )) + if (!emit_tex4( emit, insn, tex_result )) return FALSE; break; default: @@ -1330,6 +1454,8 @@ static boolean emit_bgnloop2( struct svga_shader_emitter *emit, struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 ); struct src_register const_int = get_loop_const( emit ); + emit->dynamic_branching_level++; + return (emit_instruction( emit, inst ) && emit_src( emit, loop_reg ) && emit_src( emit, const_int ) ); @@ -1339,6 +1465,9 @@ static boolean emit_endloop2( struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn ) { SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP ); + + emit->dynamic_branching_level--; + return emit_instruction( emit, inst ); } @@ -1398,6 +1527,46 @@ static boolean emit_simple_instruction(struct svga_shader_emitter *emit, } } + +static boolean emit_deriv(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn ) +{ + if (emit->dynamic_branching_level > 0 && + insn->Src[0].Register.File == TGSI_FILE_TEMPORARY) + { + struct src_register zero = get_zero_immediate( emit ); + SVGA3dShaderDestToken dst = + translate_dst_register( emit, insn, 0 ); + + /* Deriv opcodes not valid inside dynamic branching, workaround + * by zeroing out the destination. + */ + if (!submit_op1(emit, + inst_token( SVGA3DOP_MOV ), + dst, + scalar(zero, TGSI_SWIZZLE_X))) + return FALSE; + + return TRUE; + } + else { + unsigned opcode; + + switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_DDX: + opcode = SVGA3DOP_DSX; + break; + case TGSI_OPCODE_DDY: + opcode = SVGA3DOP_DSY; + break; + default: + return FALSE; + } + + return emit_simple_instruction( emit, opcode, insn ); + } +} + static boolean emit_arl(struct svga_shader_emitter *emit, const struct tgsi_full_instruction *insn) { @@ -2002,6 +2171,10 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit, case TGSI_OPCODE_TXD: return emit_tex( emit, insn ); + case TGSI_OPCODE_DDX: + case TGSI_OPCODE_DDY: + return emit_deriv( emit, insn ); + case TGSI_OPCODE_BGNSUB: return emit_bgnsub( emit, position, insn ); @@ -2254,11 +2427,28 @@ static boolean emit_ps_postamble( struct svga_shader_emitter *emit ) for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) { - if (!submit_op1( emit, - inst_token(SVGA3DOP_MOV), - emit->true_col[i], - src(emit->temp_col[i]) )) - return FALSE; + /* Potentially override output colors with white for XOR + * logicop workaround. + */ + if (emit->unit == PIPE_SHADER_FRAGMENT && + emit->key.fkey.white_fragments) { + + struct src_register one = scalar( get_zero_immediate( emit ), + TGSI_SWIZZLE_W ); + + if (!submit_op1( emit, + inst_token(SVGA3DOP_MOV), + emit->true_col[i], + one )) + return FALSE; + } + else { + if (!submit_op1( emit, + inst_token(SVGA3DOP_MOV), + emit->true_col[i], + src(emit->temp_col[i]) )) + return FALSE; + } } } @@ -2467,6 +2657,9 @@ needs_to_create_zero( struct svga_shader_emitter *emit ) if (emit->key.fkey.light_twoside) return TRUE; + if (emit->key.fkey.white_fragments) + return TRUE; + if (emit->emit_frontface) return TRUE; @@ -2476,6 +2669,10 @@ needs_to_create_zero( struct svga_shader_emitter *emit ) } if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_BGNFOR] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 || + emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 || emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 || emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 || emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 || @@ -2702,6 +2899,8 @@ boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit, goto done; } + assert(emit->dynamic_branching_level == 0); + /* Need to terminate the whole shader: */ ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) ); |