diff options
Diffstat (limited to 'src/gallium/state_trackers/g3dvl/vl_context.c')
-rw-r--r-- | src/gallium/state_trackers/g3dvl/vl_context.c | 2282 |
1 files changed, 2282 insertions, 0 deletions
diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c new file mode 100644 index 0000000000..d446d218a4 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -0,0 +1,2282 @@ +#include "vl_context.h" +#include <assert.h> +#include <stdlib.h> +#include <pipe/p_context.h> +#include <pipe/p_winsys.h> +#include <pipe/p_screen.h> +#include <pipe/p_state.h> +#include <pipe/p_inlines.h> +#include <pipe/p_shader_tokens.h> +#include <tgsi/util/tgsi_parse.h> +#include <tgsi/util/tgsi_build.h> +#include "vl_shader_build.h" +#include "vl_data.h" +#include "vl_defs.h" +#include "vl_util.h" + +static int vlCreateVertexShaderFrameIDCT(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Vertex texcoords + */ + for (i = 0; i < 2; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Vertex texcoords + */ + for (i = 0; i < 2; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move pos in to pos out + * mov o1, i1 ; Move texcoord in to texcoord out */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + //context->states.idct.frame_vs = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFrameIDCT(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* decl i0 ; Texcoords for s0 */ + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl s0 ; Sampler for tex containing picture to display */ + decl = vl_decl_samplers(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* tex2d t0, i0, s0 ; Read src pixel */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t0, t0, c0 ; Subtract bias vector from pixel */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix + * dp4 o0.y, t0, c2 + * dp4 o0.z, t0, c3 + * dp4 o0.w, t0, c4 ; XXX: Don't need 4th coefficient + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1); + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + //context->states.idct.frame_fs = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlInitIDCT(struct VL_CONTEXT *context) +{ + struct pipe_context *pipe; + struct pipe_sampler_state sampler; + struct pipe_texture template; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + context->states.idct.viewport.scale[0] = VL_BLOCK_WIDTH; + context->states.idct.viewport.scale[1] = VL_BLOCK_HEIGHT; + context->states.idct.viewport.scale[2] = 1; + context->states.idct.viewport.scale[3] = 1; + context->states.idct.viewport.translate[0] = 0; + context->states.idct.viewport.translate[1] = 0; + context->states.idct.viewport.translate[2] = 0; + context->states.idct.viewport.translate[3] = 0; + + context->states.idct.render_target.width = VL_BLOCK_WIDTH; + context->states.idct.render_target.height = VL_BLOCK_HEIGHT; + context->states.idct.render_target.num_cbufs = 1; + context->states.idct.render_target.zsbuf = NULL; + + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + /*sampler.prefilter = ;*/ + /*sampler.shadow_ambient = ;*/ + /*sampler.lod_bias = ;*/ + sampler.min_lod = 0; + /*sampler.max_lod = ;*/ + /*sampler.border_color[i] = ;*/ + /*sampler.max_anisotropy = ;*/ + context->states.idct.sampler = pipe->create_sampler_state(pipe, &sampler); + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + template.format = PIPE_FORMAT_A8L8_UNORM; + template.last_level = 0; + template.width[0] = 8; + template.height[0] = 8; + template.depth[0] = 1; + template.compressed = 0; + pf_get_block(template.format, &template.block); + + context->states.idct.texture = pipe->screen->texture_create(pipe->screen, &template); + + template.format = PIPE_FORMAT_A8R8G8B8_UNORM; + template.width[0] = 16; + template.height[0] = 1; + + context->states.idct.basis = pipe->screen->texture_create(pipe->screen, &template); + + for (i = 0; i < 2; ++i) + { + context->states.idct.vertex_bufs[i] = &context->states.csc.vertex_bufs[i]; + context->states.idct.vertex_buf_elems[i] = &context->states.csc.vertex_buf_elems[i]; + /* + context->states.idct.vertex_bufs[i].pitch = sizeof(struct VL_VERTEX2F); + context->states.idct.vertex_bufs[i].max_index = 3; + context->states.idct.vertex_bufs[i].buffer_offset = 0; + context->states.idct.vertex_bufs[i].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct VL_VERTEX2F) * 4 + ); + + context->states.idct.vertex_buf_elems[i].src_offset = 0; + context->states.idct.vertex_buf_elems[i].vertex_buffer_index = i; + context->states.idct.vertex_buf_elems[i].nr_components = 2; + context->states.idct.vertex_buf_elems[i].src_format = PIPE_FORMAT_R32G32_FLOAT; + */ + } + + vlCreateVertexShaderFrameIDCT(context); + vlCreateFragmentShaderFrameIDCT(context); + + return 0; +} + +static int vlDestroyIDCT(struct VL_CONTEXT *context) +{ + //unsigned int i; + + assert(context); + + context->pipe->delete_sampler_state(context->pipe, context->states.idct.sampler); + + //for (i = 0; i < 2; ++i) + //context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.vertex_bufs[i].buffer); + + pipe_texture_release(&context->states.idct.texture); + pipe_texture_release(&context->states.idct.basis); + + //context->pipe->delete_vs_state(context->pipe, context->states.idct.frame_vs); + //context->pipe->delete_fs_state(context->pipe, context->states.idct.frame_fs); + + //context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.vs_const_buf.buffer); + //context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.fs_const_buf.buffer); + + return 0; +} + +static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + context->states.mc.i_vs = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + */ + for (i = 0; i < 2; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + */ + for (i = 0; i < 3; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul o0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + context->states.mc.i_fs = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFramePMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + * decl c2 ; Unused + * decl c3 ; Translation vector to move ref macroblock texcoords into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + * decl o3 ; Ref macroblock texcoords + */ + for (i = 0; i < 4; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* add o3, t0, c3 ; Translate rect into position on ref macroblock */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + context->states.mc.p_vs[0] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFieldPMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + * decl c2 ; Denorm coefficients + * decl c3 ; Translation vector to move top field ref macroblock texcoords into position + * decl c4 ; Translation vector to move bottom field ref macroblock texcoords into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + * decl o3 ; Top field ref macroblock texcoords + * decl o4 ; Bottom field ref macroblock texcoords + * decl o5 ; Denormalized vertex pos + */ + for (i = 0; i < 6; i++) + { + decl = vl_decl_output((i == 0 || i == 5) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add t1, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mov o0, t1 ; Move vertex pos to output */ + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + mov o1, i1 ; Move input luma texcoords to output + mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* add o3, t0, c3 ; Translate top field rect into position on ref macroblock + add o4, t0, c4 ; Translate bottom field rect into position on ref macroblock */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul o5, t1, c2 ; Denorm vertex pos */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 5, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + context->states.mc.p_vs[1] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + * decl i2 ; Texcoords for s3 + */ + for (i = 0; i < 3; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for ref surface texture + */ + for (i = 0; i < 4; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* tex2d t1, i2, s3 ; Read texel from ref macroblock */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 2, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + context->states.mc.p_fs[0] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 200; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + * decl i2 ; Texcoords for s3 + * decl i3 ; Texcoords for s3 + * decl i4 ; Denormalized vertex pos + */ + for (i = 0; i < 5; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0-t4 */ + decl = vl_decl_temps(0, 4); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for ref surface texture + */ + for (i = 0; i < 4; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i2, s3 ; Read texel from ref macroblock top field + * tex2d t2, i3, s3 ; Read texel from ref macroblock bottom field + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* XXX: Pos values off by 0.5? */ + /* sub t4, i4.y, c1.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 4, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* floor t3, t3 ; Get rid of fractional part */ + inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t3, c1.y ; Multiply by 2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + context->states.mc.p_fs[1] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFrameBMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + * decl c2 ; Unused + * decl c3 ; Translation vector to move past ref macroblock texcoords into position + * decl c4 ; Unused + * decl c5 ; Translation vector to move future ref macroblock texcoords into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 5); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + * decl o3 ; Past ref macroblock texcoords + * decl o4 ; Future ref macroblock texcoords + */ + for (i = 0; i < 5; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* add o3, t0, c3 ; Translate rect into position on past ref macroblock + add o4, t0, c5 ; Translate rect into position on future ref macroblock */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i * 2 + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + context->states.mc.b_vs[0] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFieldBMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + * decl c2 ; Denorm coefficients + * decl c3 ; Translation vector to move top field past ref macroblock texcoords into position + * decl c4 ; Translation vector to move bottom field past ref macroblock texcoords into position + * decl c5 ; Translation vector to move top field future ref macroblock texcoords into position + * decl c6 ; Translation vector to move bottom field future ref macroblock texcoords into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 6); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + * decl o3 ; Top field past ref macroblock texcoords + * decl o4 ; Bottom field past ref macroblock texcoords + * decl o5 ; Top field future ref macroblock texcoords + * decl o6 ; Bottom field future ref macroblock texcoords + * decl o7 ; Denormalized vertex pos + */ + for (i = 0; i < 8; i++) + { + decl = vl_decl_output((i == 0 || i == 7) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add t1, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mov o0, t1 ; Move vertex pos to output */ + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* + * add o3, t0, c3 ; Translate top field rect into position on past ref macroblock + * add o4, t0, c4 ; Translate bottom field rect into position on past ref macroblock + * add o5, t0, c5 ; Translate top field rect into position on future ref macroblock + * add o6, t0, c6 ; Translate bottom field rect into position on future ref macroblock + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul o7, t1, c2 ; Denorm vertex pos */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 7, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + context->states.mc.b_vs[1] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + * decl i2 ; Texcoords for s3 + * decl i3 ; Texcoords for s4 + */ + for (i = 0; i < 4; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0-t2 */ + decl = vl_decl_temps(0, 2); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for past ref surface texture + * decl s4 ; Sampler for future ref surface texture + */ + for (i = 0; i < 5; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i2, s3 ; Read texel from past ref macroblock + * tex2d t2, i3, s4 ; Read texel from future ref macroblock + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + context->states.mc.b_fs[0] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 200; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + * decl i2 ; Texcoords for s3 + * decl i3 ; Texcoords for s3 + * decl i4 ; Texcoords for s4 + * decl i5 ; Texcoords for s4 + * decl i6 ; Denormalized vertex pos + */ + for (i = 0; i < 7; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels + * ; and for Y-mod-2 top/bottom field selection + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0-t5 */ + decl = vl_decl_temps(0, 5); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for past ref surface texture + * decl s4 ; Sampler for future ref surface texture + */ + for (i = 0; i < 5; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* XXX: Pos values off by 0.5? */ + /* sub t4, i6.y, c1.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 6, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* floor t3, t3 ; Get rid of fractional part */ + inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t3, c1.y ; Multiply by 2 */ + inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i2, s3 ; Read texel from past ref macroblock top field + * tex2d t2, i3, s3 ; Read texel from past ref macroblock bottom field + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t4, i4, s4 ; Read texel from future ref macroblock top field + * tex2d t5, i5, s4 ; Read texel from future ref macroblock bottom field + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 4, TGSI_FILE_SAMPLER, 4); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + context->states.mc.b_fs[1] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +int vlCreateDataBufsMC(struct VL_CONTEXT *context) +{ + struct pipe_context *pipe; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + /* Create our vertex buffer and vertex buffer element */ + context->states.mc.vertex_bufs[0].pitch = sizeof(struct VL_VERTEX2F); + context->states.mc.vertex_bufs[0].max_index = 23; + context->states.mc.vertex_bufs[0].buffer_offset = 0; + context->states.mc.vertex_bufs[0].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct VL_VERTEX2F) * 24 + ); + + context->states.mc.vertex_buf_elems[0].src_offset = 0; + context->states.mc.vertex_buf_elems[0].vertex_buffer_index = 0; + context->states.mc.vertex_buf_elems[0].nr_components = 2; + context->states.mc.vertex_buf_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Create our texcoord buffers and texcoord buffer elements */ + for (i = 1; i < 3; ++i) + { + context->states.mc.vertex_bufs[i].pitch = sizeof(struct VL_TEXCOORD2F); + context->states.mc.vertex_bufs[i].max_index = 23; + context->states.mc.vertex_bufs[i].buffer_offset = 0; + context->states.mc.vertex_bufs[i].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct VL_TEXCOORD2F) * 24 + ); + + context->states.mc.vertex_buf_elems[i].src_offset = 0; + context->states.mc.vertex_buf_elems[i].vertex_buffer_index = i; + context->states.mc.vertex_buf_elems[i].nr_components = 2; + context->states.mc.vertex_buf_elems[i].src_format = PIPE_FORMAT_R32G32_FLOAT; + } + + /* Fill buffers */ + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + vl_chroma_420_texcoords, + sizeof(struct VL_VERTEX2F) * 24 + ); + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + vl_luma_texcoords, + sizeof(struct VL_TEXCOORD2F) * 24 + ); + /* TODO: Accomodate 422, 444 */ + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[2].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + vl_chroma_420_texcoords, + sizeof(struct VL_TEXCOORD2F) * 24 + ); + + for (i = 0; i < 3; ++i) + pipe->winsys->buffer_unmap(pipe->winsys, context->states.mc.vertex_bufs[i].buffer); + + /* Create our constant buffer */ + context->states.mc.vs_const_buf.size = sizeof(struct VL_MC_VS_CONSTS); + context->states.mc.vs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + context->states.mc.vs_const_buf.size + ); + + context->states.mc.fs_const_buf.size = sizeof(struct VL_MC_FS_CONSTS); + context->states.mc.fs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + context->states.mc.fs_const_buf.size + ); + + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, context->states.mc.fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + &vl_mc_fs_consts, + sizeof(struct VL_MC_FS_CONSTS) + ); + + pipe->winsys->buffer_unmap(pipe->winsys, context->states.mc.fs_const_buf.buffer); + + return 0; +} + +static int vlInitMC(struct VL_CONTEXT *context) +{ + struct pipe_context *pipe; + struct pipe_sampler_state sampler; + struct pipe_texture template; + unsigned int filters[5]; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + /* For MC we render to textures, which are rounded up to nearest POT */ + context->states.mc.viewport.scale[0] = vlRoundUpPOT(context->video_width); + context->states.mc.viewport.scale[1] = vlRoundUpPOT(context->video_height); + context->states.mc.viewport.scale[2] = 1; + context->states.mc.viewport.scale[3] = 1; + context->states.mc.viewport.translate[0] = 0; + context->states.mc.viewport.translate[1] = 0; + context->states.mc.viewport.translate[2] = 0; + context->states.mc.viewport.translate[3] = 0; + + context->states.mc.render_target.width = vlRoundUpPOT(context->video_width); + context->states.mc.render_target.height = vlRoundUpPOT(context->video_height); + context->states.mc.render_target.num_cbufs = 1; + /* FB for MC stage is a VL_SURFACE, set in vlSetRenderSurface() */ + context->states.mc.render_target.zsbuf = NULL; + + filters[0] = PIPE_TEX_FILTER_NEAREST; + filters[1] = context->video_format == VL_FORMAT_YCBCR_444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; + filters[2] = context->video_format == VL_FORMAT_YCBCR_444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; + filters[3] = PIPE_TEX_FILTER_LINEAR; + filters[4] = PIPE_TEX_FILTER_LINEAR; + + for (i = 0; i < 5; ++i) + { + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = filters[i]; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = filters[i]; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + /*sampler.prefilter = ;*/ + /*sampler.shadow_ambient = ;*/ + /*sampler.lod_bias = ;*/ + sampler.min_lod = 0; + /*sampler.max_lod = ;*/ + /*sampler.border_color[i] = ;*/ + /*sampler.max_anisotropy = ;*/ + context->states.mc.samplers[i] = pipe->create_sampler_state(pipe, &sampler); + } + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + template.format = PIPE_FORMAT_R16_SNORM; + template.last_level = 0; + template.width[0] = 8; + template.height[0] = 8 * 4; + template.depth[0] = 1; + template.compressed = 0; + pf_get_block(template.format, &template.block); + + context->states.mc.textures[0] = pipe->screen->texture_create(pipe->screen, &template); + + if (context->video_format == VL_FORMAT_YCBCR_420) + template.height[0] = 8; + else if (context->video_format == VL_FORMAT_YCBCR_422) + template.height[0] = 8 * 2; + else if (context->video_format == VL_FORMAT_YCBCR_444) + template.height[0] = 8 * 4; + else + assert(0); + + context->states.mc.textures[1] = pipe->screen->texture_create(pipe->screen, &template); + context->states.mc.textures[2] = pipe->screen->texture_create(pipe->screen, &template); + + /* textures[3] & textures[4] are assigned from VL_SURFACEs for P and B macroblocks at render time */ + + vlCreateVertexShaderIMC(context); + vlCreateFragmentShaderIMC(context); + vlCreateVertexShaderFramePMC(context); + vlCreateVertexShaderFieldPMC(context); + vlCreateFragmentShaderFramePMC(context); + vlCreateFragmentShaderFieldPMC(context); + vlCreateVertexShaderFrameBMC(context); + vlCreateVertexShaderFieldBMC(context); + vlCreateFragmentShaderFrameBMC(context); + vlCreateFragmentShaderFieldBMC(context); + vlCreateDataBufsMC(context); + + return 0; +} + +static int vlDestroyMC(struct VL_CONTEXT *context) +{ + unsigned int i; + + assert(context); + + for (i = 0; i < 5; ++i) + context->pipe->delete_sampler_state(context->pipe, context->states.mc.samplers[i]); + + for (i = 0; i < 3; ++i) + context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.vertex_bufs[i].buffer); + + /* Textures 3 & 4 are not created directly, no need to release them here */ + for (i = 0; i < 3; ++i) + pipe_texture_release(&context->states.mc.textures[i]); + + context->pipe->delete_vs_state(context->pipe, context->states.mc.i_vs); + context->pipe->delete_fs_state(context->pipe, context->states.mc.i_fs); + + for (i = 0; i < 2; ++i) + { + context->pipe->delete_vs_state(context->pipe, context->states.mc.p_vs[i]); + context->pipe->delete_fs_state(context->pipe, context->states.mc.p_fs[i]); + context->pipe->delete_vs_state(context->pipe, context->states.mc.b_vs[i]); + context->pipe->delete_fs_state(context->pipe, context->states.mc.b_fs[i]); + } + + context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.vs_const_buf.buffer); + context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.fs_const_buf.buffer); + + return 0; +} + +static int vlCreateVertexShaderCSC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Vertex texcoords + */ + for (i = 0; i < 2; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling vector to scale texcoord rect to source size + * decl c1 ; Translation vector to move texcoord rect into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Vertex texcoords + */ + for (i = 0; i < 2; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* mov o0, i0 ; Move pos in to pos out */ + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_INPUT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t0, i1, c0 ; Scale unit texcoord rect to source size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 1, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o1, t0, c1 ; Translate texcoord rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + context->states.csc.vertex_shader = pipe->create_vs_state(pipe, &vs); + //free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderCSC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* decl i0 ; Texcoords for s0 */ + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl c0 ; Bias vector for CSC + * decl c1-c4 ; CSC matrix c1-c4 + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl s0 ; Sampler for tex containing picture to display */ + decl = vl_decl_samplers(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* tex2d t0, i0, s0 ; Read src pixel */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t0, t0, c0 ; Subtract bias vector from pixel */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix + * dp4 o0.y, t0, c2 + * dp4 o0.z, t0, c3 + * dp4 o0.w, t0, c4 ; XXX: Don't need 4th coefficient + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1); + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + context->states.csc.fragment_shader = pipe->create_fs_state(pipe, &fs); + //free(tokens); + + return 0; +} + +static int vlCreateDataBufsCSC(struct VL_CONTEXT *context) +{ + struct pipe_context *pipe; + + assert(context); + + pipe = context->pipe; + + /* + Create our vertex buffer and vertex buffer element + VB contains 4 vertices that render a quad covering the entire window + to display a rendered surface + Quad is rendered as a tri strip + */ + context->states.csc.vertex_bufs[0].pitch = sizeof(struct VL_VERTEX2F); + context->states.csc.vertex_bufs[0].max_index = 3; + context->states.csc.vertex_bufs[0].buffer_offset = 0; + context->states.csc.vertex_bufs[0].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct VL_VERTEX2F) * 4 + ); + + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, context->states.csc.vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + vl_surface_vertex_positions, + sizeof(struct VL_VERTEX2F) * 4 + ); + + pipe->winsys->buffer_unmap(pipe->winsys, context->states.csc.vertex_bufs[0].buffer); + + context->states.csc.vertex_buf_elems[0].src_offset = 0; + context->states.csc.vertex_buf_elems[0].vertex_buffer_index = 0; + context->states.csc.vertex_buf_elems[0].nr_components = 2; + context->states.csc.vertex_buf_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* + Create our texcoord buffer and texcoord buffer element + Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices + */ + context->states.csc.vertex_bufs[1].pitch = sizeof(struct VL_TEXCOORD2F); + context->states.csc.vertex_bufs[1].max_index = 3; + context->states.csc.vertex_bufs[1].buffer_offset = 0; + context->states.csc.vertex_bufs[1].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct VL_TEXCOORD2F) * 4 + ); + + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, context->states.csc.vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + vl_surface_texcoords, + sizeof(struct VL_TEXCOORD2F) * 4 + ); + + pipe->winsys->buffer_unmap(pipe->winsys, context->states.csc.vertex_bufs[1].buffer); + + context->states.csc.vertex_buf_elems[1].src_offset = 0; + context->states.csc.vertex_buf_elems[1].vertex_buffer_index = 1; + context->states.csc.vertex_buf_elems[1].nr_components = 2; + context->states.csc.vertex_buf_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* + Create our vertex shader's constant buffer + Const buffer contains scaling and translation vectors + */ + context->states.csc.vs_const_buf.size = sizeof(struct VL_CSC_VS_CONSTS); + context->states.csc.vs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + context->states.csc.vs_const_buf.size + ); + + /* + Create our fragment shader's constant buffer + Const buffer contains the color conversion matrix and bias vectors + */ + context->states.csc.fs_const_buf.size = sizeof(struct VL_CSC_FS_CONSTS); + context->states.csc.fs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + context->states.csc.fs_const_buf.size + ); + + /* + TODO: Refactor this into a seperate function, + allow changing the CSC matrix at runtime to switch between regular & full versions + */ + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, context->states.csc.fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + &vl_csc_fs_consts_601, + sizeof(struct VL_CSC_FS_CONSTS) + ); + + pipe->winsys->buffer_unmap(pipe->winsys, context->states.csc.fs_const_buf.buffer); + + return 0; +} + +static int vlInitCSC(struct VL_CONTEXT *context) +{ + struct pipe_context *pipe; + struct pipe_sampler_state sampler; + + assert(context); + + pipe = context->pipe; + + /* Delay creating the FB until vlPutSurface() so we know window size */ + context->states.csc.framebuffer.num_cbufs = 1; + context->states.csc.framebuffer.cbufs[0] = NULL; + context->states.csc.framebuffer.zsbuf = NULL; + + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + /*sampler.prefilter = ;*/ + /*sampler.shadow_ambient = ;*/ + /*sampler.lod_bias = ;*/ + /*sampler.min_lod = ;*/ + /*sampler.max_lod = ;*/ + /*sampler.border_color[i] = ;*/ + /*sampler.max_anisotropy = ;*/ + context->states.csc.sampler = pipe->create_sampler_state(pipe, &sampler); + + vlCreateVertexShaderCSC(context); + vlCreateFragmentShaderCSC(context); + vlCreateDataBufsCSC(context); + + return 0; +} + +static int vlDestroyCSC(struct VL_CONTEXT *context) +{ + assert(context); + + /* + Since we create the final FB when we display our first surface, + it may not be created if vlPutSurface() is never called + */ + if (context->states.csc.framebuffer.cbufs[0]) + context->pipe->winsys->surface_release(context->pipe->winsys, &context->states.csc.framebuffer.cbufs[0]); + context->pipe->delete_sampler_state(context->pipe, context->states.csc.sampler); + context->pipe->delete_vs_state(context->pipe, context->states.csc.vertex_shader); + context->pipe->delete_fs_state(context->pipe, context->states.csc.fragment_shader); + context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vertex_bufs[0].buffer); + context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vertex_bufs[1].buffer); + context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vs_const_buf.buffer); + context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.fs_const_buf.buffer); + + return 0; +} + +static int vlInitCommon(struct VL_CONTEXT *context) +{ + struct pipe_context *pipe; + struct pipe_rasterizer_state rast; + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state dsa; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + rast.flatshade = 1; + rast.flatshade_first = 0; + rast.light_twoside = 0; + rast.front_winding = PIPE_WINDING_CCW; + rast.cull_mode = PIPE_WINDING_CW; + rast.fill_cw = PIPE_POLYGON_MODE_FILL; + rast.fill_ccw = PIPE_POLYGON_MODE_FILL; + rast.offset_cw = 0; + rast.offset_ccw = 0; + rast.scissor = 0; + rast.poly_smooth = 0; + rast.poly_stipple_enable = 0; + rast.point_sprite = 0; + rast.point_size_per_vertex = 0; + rast.multisample = 0; + rast.line_smooth = 0; + rast.line_stipple_enable = 0; + rast.line_stipple_factor = 0; + rast.line_stipple_pattern = 0; + rast.line_last_pixel = 0; + /* Don't need clipping, but viewport mapping done here */ + rast.bypass_clipping = 0; + rast.bypass_vs = 0; + rast.origin_lower_left = 0; + rast.line_width = 1; + rast.point_smooth = 0; + rast.point_size = 1; + rast.offset_units = 1; + rast.offset_scale = 1; + /*rast.sprite_coord_mode[i] = ;*/ + context->states.common.raster = pipe->create_rasterizer_state(pipe, &rast); + pipe->bind_rasterizer_state(pipe, context->states.common.raster); + + blend.blend_enable = 0; + blend.rgb_func = PIPE_BLEND_ADD; + blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rgb_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_func = PIPE_BLEND_ADD; + blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.logicop_enable = 0; + blend.logicop_func = PIPE_LOGICOP_CLEAR; + /* Needed to allow color writes to FB, even if blending disabled */ + blend.colormask = PIPE_MASK_RGBA; + blend.dither = 0; + context->states.common.blend = pipe->create_blend_state(pipe, &blend); + pipe->bind_blend_state(pipe, context->states.common.blend); + + dsa.depth.enabled = 0; + dsa.depth.writemask = 0; + dsa.depth.func = PIPE_FUNC_ALWAYS; + dsa.depth.occlusion_count = 0; + for (i = 0; i < 2; ++i) + { + dsa.stencil[i].enabled = 0; + dsa.stencil[i].func = PIPE_FUNC_ALWAYS; + dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].ref_value = 0; + dsa.stencil[i].value_mask = 0; + dsa.stencil[i].write_mask = 0; + } + dsa.alpha.enabled = 0; + dsa.alpha.func = PIPE_FUNC_ALWAYS; + dsa.alpha.ref = 0; + context->states.common.dsa = pipe->create_depth_stencil_alpha_state(pipe, &dsa); + pipe->bind_depth_stencil_alpha_state(pipe, context->states.common.dsa); + + return 0; +} + +static int vlDestroyCommon(struct VL_CONTEXT *context) +{ + assert(context); + + context->pipe->delete_blend_state(context->pipe, context->states.common.blend); + context->pipe->delete_rasterizer_state(context->pipe, context->states.common.raster); + context->pipe->delete_depth_stencil_alpha_state(context->pipe, context->states.common.dsa); + + return 0; +} + +static int vlInit(struct VL_CONTEXT *context) +{ + assert(context); + + vlInitCommon(context); + vlInitCSC(context); + vlInitMC(context); + vlInitIDCT(context); + + return 0; +} + +static int vlDestroy(struct VL_CONTEXT *context) +{ + assert(context); + + /* XXX: Must unbind shaders before we can delete them for some reason */ + context->pipe->bind_vs_state(context->pipe, NULL); + context->pipe->bind_fs_state(context->pipe, NULL); + + vlDestroyCommon(context); + vlDestroyCSC(context); + vlDestroyMC(context); + vlDestroyIDCT(context); + + return 0; +} + +int vlCreateContext +( + Display *display, + struct pipe_context *pipe, + unsigned int video_width, + unsigned int video_height, + enum VL_FORMAT video_format, + struct VL_CONTEXT **context +) +{ + struct VL_CONTEXT *ctx; + + assert(display); + assert(pipe); + assert(context); + + ctx = calloc(1, sizeof(struct VL_CONTEXT)); + + ctx->display = display; + ctx->pipe = pipe; + ctx->video_width = video_width; + ctx->video_height = video_height; + ctx->video_format = video_format; + + vlInit(ctx); + + /* Since we only change states in vlPutSurface() we need to start in render mode */ + vlBeginRender(ctx); + + *context = ctx; + + return 0; +} + +int vlDestroyContext(struct VL_CONTEXT *context) +{ + assert(context); + + vlDestroy(context); + + free(context); + + return 0; +} + +int vlBeginRender(struct VL_CONTEXT *context) +{ + struct pipe_context *pipe; + + assert(context); + + pipe = context->pipe; + + /* Frame buffer set in vlRender*Macroblock() */ + /* Shaders, samplers, textures set in vlRender*Macroblock() */ + pipe->set_vertex_buffers(pipe, 3, context->states.mc.vertex_bufs); + pipe->set_vertex_elements(pipe, 3, context->states.mc.vertex_buf_elems); + pipe->set_viewport_state(pipe, &context->states.mc.viewport); + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &context->states.mc.vs_const_buf); + pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &context->states.mc.fs_const_buf); + + return 0; +} + +int vlEndRender(struct VL_CONTEXT *context) +{ + struct pipe_context *pipe; + + assert(context); + + pipe = context->pipe; + + pipe->set_framebuffer_state(pipe, &context->states.csc.framebuffer); + pipe->set_viewport_state(pipe, &context->states.csc.viewport); + pipe->bind_sampler_states(pipe, 1, (void**)&context->states.csc.sampler); + /* Source texture set in vlPutSurface() */ + pipe->bind_vs_state(pipe, context->states.csc.vertex_shader); + pipe->bind_fs_state(pipe, context->states.csc.fragment_shader); + pipe->set_vertex_buffers(pipe, 2, context->states.csc.vertex_bufs); + pipe->set_vertex_elements(pipe, 2, context->states.csc.vertex_buf_elems); + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &context->states.csc.vs_const_buf); + pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &context->states.csc.fs_const_buf); + + return 0; +} + |