From c11a7ec821d41b91a3825c5abfb4687c98b5bf98 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sun, 8 Jun 2008 03:04:14 -0400 Subject: Initial commit for g3dvl. Initial commit for g3dvl, contains support for basic XvMC features. - Context, surface, block, macroblock creation and deletion - Surface rendering - Frame pictures - Frame based motion compensation - Intra-coded macroblocks - Predicted macroblocks - Bi-directionally predicted macroblocks - Surface display - Color conversion - Scaling --- src/gallium/state_trackers/g3dvl/Makefile | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 src/gallium/state_trackers/g3dvl/Makefile (limited to 'src/gallium/state_trackers/g3dvl/Makefile') diff --git a/src/gallium/state_trackers/g3dvl/Makefile b/src/gallium/state_trackers/g3dvl/Makefile new file mode 100644 index 0000000000..a0d85fbcc8 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/Makefile @@ -0,0 +1,18 @@ +TARGET = libg3dvl.a +OBJECTS = vl_context.o vl_data.o vl_surface.o +GALLIUMDIR = ../.. + +CFLAGS += -g -Wall -fPIC -Werror -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary + +############################################# + +.PHONY = all clean + +all: ${TARGET} + +${TARGET}: ${OBJECTS} + ar rcs $@ $^ + +clean: + rm -rf ${OBJECTS} ${TARGET} + -- cgit v1.2.3 From 1c893fd513f5335a81dd72db70d64763634ea856 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sun, 29 Jun 2008 20:52:58 -0400 Subject: g3dvl: Simplify shader code. --- src/gallium/state_trackers/g3dvl/Makefile | 2 +- src/gallium/state_trackers/g3dvl/vl_context.c | 2968 ++++---------------- src/gallium/state_trackers/g3dvl/vl_shader_build.c | 205 ++ src/gallium/state_trackers/g3dvl/vl_shader_build.h | 61 + 4 files changed, 890 insertions(+), 2346 deletions(-) create mode 100644 src/gallium/state_trackers/g3dvl/vl_shader_build.c create mode 100644 src/gallium/state_trackers/g3dvl/vl_shader_build.h (limited to 'src/gallium/state_trackers/g3dvl/Makefile') diff --git a/src/gallium/state_trackers/g3dvl/Makefile b/src/gallium/state_trackers/g3dvl/Makefile index a0d85fbcc8..bd46d004e2 100644 --- a/src/gallium/state_trackers/g3dvl/Makefile +++ b/src/gallium/state_trackers/g3dvl/Makefile @@ -1,5 +1,5 @@ TARGET = libg3dvl.a -OBJECTS = vl_context.o vl_data.o vl_surface.o +OBJECTS = vl_context.o vl_data.o vl_surface.o vl_shader_build.o GALLIUMDIR = ../.. CFLAGS += -g -Wall -fPIC -Werror -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index d2b1ad7948..88da47c06a 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -9,6 +9,7 @@ #include #include #include +#include "vl_shader_build.h" #include "vl_data.h" static int vlInitIDCT(struct VL_CONTEXT *context) @@ -32,21 +33,11 @@ static int vlDestroyIDCT(struct VL_CONTEXT *context) static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 50; - const unsigned int num_attribs = 3; - const unsigned int semantic_names[3] = - { - TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ - TGSI_SEMANTIC_GENERIC, /* Chroma texcoords */ - }; - const unsigned int semantic_indexes[3] = {0, 1, 2}; - const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; struct pipe_context *pipe; struct pipe_shader_state vs; struct tgsi_token *tokens; struct tgsi_header *header; - struct tgsi_processor *processor; struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -57,158 +48,71 @@ static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context) assert(context); pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); - /* Processor */ - processor = (struct tgsi_processor*)&tokens[2]; - *processor = tgsi_build_processor(proc_type, header); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); ti = 3; - /* Declare inputs (pos, texcoords) */ - for (i = 0; i < num_attribs; i++) + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = semantic_names[i]; - decl.Semantic.SemanticIndex = semantic_indexes[i]; - - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* Declare constant inputs */ - /* C[0] scales the normalized MB to cover 16x16 pixels, - C[1] translates the macroblock into position on the surface */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_CONSTANT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 1; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare outputs (pos, texcoords) */ - for (i = 0; i < num_attribs; i++) + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = semantic_names[i]; - decl.Semantic.SemanticIndex = semantic_indexes[i]; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* mul t0, i0, c0 ; Scale normalized coords to window coords */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* add o0, t0, c1 ; Translate vertex into position */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add o0, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* - mov o1, i1 ; Move texcoords to output - mov o2, i2 - */ - for (i = 1; i < num_attribs; ++i) + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = i; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* END */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); vs.tokens = tokens; - context->states.mc.i_vs = pipe->create_vs_state(pipe, &vs); - free(tokens); return 0; @@ -217,13 +121,11 @@ static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context) static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 50; - const unsigned int proc_type = TGSI_PROCESSOR_FRAGMENT; - + struct pipe_context *pipe; struct pipe_shader_state fs; struct tgsi_token *tokens; struct tgsi_header *header; - struct tgsi_processor *processor; struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -234,120 +136,61 @@ static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) assert(context); pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); - /* Processor */ - processor = (struct tgsi_processor*)&tokens[2]; - *processor = tgsi_build_processor(proc_type, header); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); ti = 3; - /* Declare inputs (texcoords) */ + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + */ for (i = 0; i < 2; ++i) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = i + 1; - decl.Declaration.Interpolate = 1; - decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* Declare output (color) */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 0; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare samplers */ + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + */ for (i = 0; i < 3; ++i) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti); } /* - tex2d o0.x, i0, s0 ; Read texel from luma texture into .x channel - tex2d o0.y, i1, s1 ; Read texel from chroma Cb texture into .y channel - tex2d o0.z, i1, s2 ; Read texel from chroma Cr texture into .z channel + * tex2d o0.x, i0, s0 ; Read texel from luma texture into .x channel + * tex2d o0.y, i1, s1 ; Read texel from chroma Cb texture into .y channel + * tex2d o0.z, i1, s2 ; Read texel from chroma Cr texture into .z channel */ for (i = 0; i < 3; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_TEX; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 0; + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_OUTPUT, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i > 0 ? 1 : 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - inst.FullSrcRegisters[1].SrcRegister.Index = i; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* END */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); fs.tokens = tokens; - context->states.mc.i_fs = pipe->create_fs_state(pipe, &fs); - free(tokens); return 0; @@ -356,30 +199,11 @@ static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) static int vlCreateVertexShaderFramePMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 100; - const unsigned int num_input_attribs = 3; - const unsigned int num_output_attribs = 4; - const unsigned int input_semantic_names[3] = - { - TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ - TGSI_SEMANTIC_GENERIC /* Chroma texcoords */ - }; - const unsigned int output_semantic_names[4] = - { - TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ - TGSI_SEMANTIC_GENERIC, /* Chroma texcoords */ - TGSI_SEMANTIC_GENERIC /* Ref surface texcoords */ - }; - const unsigned int input_semantic_indexes[3] = {0, 1, 2}; - const unsigned int output_semantic_indexes[4] = {0, 1, 2, 3}; - const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; struct pipe_context *pipe; struct pipe_shader_state vs; struct tgsi_token *tokens; struct tgsi_header *header; - struct tgsi_processor *processor; struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -390,179 +214,78 @@ static int vlCreateVertexShaderFramePMC(struct VL_CONTEXT *context) assert(context); pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); - /* Processor */ - processor = (struct tgsi_processor*)&tokens[2]; - *processor = tgsi_build_processor(proc_type, header); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); ti = 3; - /* Declare inputs (pos, texcoords) */ - for (i = 0; i < num_input_attribs; i++) + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = input_semantic_names[i]; - decl.Semantic.SemanticIndex = input_semantic_indexes[i]; - - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* Declare constant inputs */ - /* C[0] scales the normalized MB to cover 16x16 pixels, - C[1] translates the macroblock into position on the surface - C[2] unused - C[3] translates the ref surface texcoords to the ref macroblock */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_CONSTANT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 3; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + * decl c2 ; Unused + * decl c3 ; Translation vector to move ref macroblock texcoords into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare outputs (pos, texcoords) */ - for (i = 0; i < num_output_attribs; i++) + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + * decl o3 ; Ref macroblock texcoords + */ + for (i = 0; i < 4; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = output_semantic_names[i]; - decl.Semantic.SemanticIndex = output_semantic_indexes[i]; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* mul t0, i0, c0 ; Scale normalized coords to window coords */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* add o0, t0, c1 ; Translate vertex into position */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add o0, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* - mov o1, i1 ; Move luma & chroma texcoords to output - mov o2, i2 - */ - for (i = 1; i < num_output_attribs - 1; ++i) + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = i; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* add o3, t0, c3 ; Translate texcoords into position */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 3; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 3; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add o3, t0, c3 ; Translate rect into position on ref macroblock */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* END */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); vs.tokens = tokens; - context->states.mc.p_vs[0] = pipe->create_vs_state(pipe, &vs); - free(tokens); return 0; @@ -571,32 +294,11 @@ static int vlCreateVertexShaderFramePMC(struct VL_CONTEXT *context) static int vlCreateVertexShaderFieldPMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 100; - const unsigned int num_input_attribs = 3; - const unsigned int num_output_attribs = 6; - const unsigned int input_semantic_names[3] = - { - TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ - TGSI_SEMANTIC_GENERIC /* Chroma texcoords */ - }; - const unsigned int output_semantic_names[6] = - { - TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ - TGSI_SEMANTIC_GENERIC, /* Chroma texcoords */ - TGSI_SEMANTIC_GENERIC, /* Top field surface texcoords */ - TGSI_SEMANTIC_GENERIC, /* Bottom field surface texcoords */ - TGSI_SEMANTIC_POSITION /* Pos */ - }; - const unsigned int input_semantic_indexes[3] = {0, 1, 2}; - const unsigned int output_semantic_indexes[6] = {0, 1, 2, 3, 4, 5}; - const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; struct pipe_context *pipe; struct pipe_shader_state vs; struct tgsi_token *tokens; struct tgsi_header *header; - struct tgsi_processor *processor; struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -607,34 +309,26 @@ static int vlCreateVertexShaderFieldPMC(struct VL_CONTEXT *context) assert(context); pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); - /* Processor */ - processor = (struct tgsi_processor*)&tokens[2]; - *processor = tgsi_build_processor(proc_type, header); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); ti = 3; - /* Declare inputs (pos, texcoords) */ - for (i = 0; i < num_input_attribs; i++) + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = input_semantic_names[i]; - decl.Semantic.SemanticIndex = input_semantic_indexes[i]; - - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration ( &decl, @@ -644,183 +338,70 @@ static int vlCreateVertexShaderFieldPMC(struct VL_CONTEXT *context) ); } - /* Declare constant inputs */ - /* C[0] scales the normalized MB to cover 16x16 pixels, - C[1] translates the macroblock into position on the surface - C[2] denormalizes pos components - C[3] translates the ref surface top field texcoords to the ref macroblock - C[4] translates the ref surface bottom field texcoords to the ref macroblock */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_CONSTANT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 4; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + * decl c2 ; Denorm coefficients + * decl c3 ; Translation vector to move top field ref macroblock texcoords into position + * decl c4 ; Translation vector to move bottom field ref macroblock texcoords into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare outputs (pos, texcoords) */ - for (i = 0; i < num_output_attribs; i++) + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + * decl o3 ; Top field ref macroblock texcoords + * decl o4 ; Bottom field ref macroblock texcoords + * decl o5 ; Denormalized vertex pos + */ + for (i = 0; i < 6; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = output_semantic_names[i]; - decl.Semantic.SemanticIndex = output_semantic_indexes[i]; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_output((i == 0 || i == 5) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* mul t0, i0, c0 ; Scale normalized coords to window coords */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* add t1, t0, c1 ; Translate vertex into position */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add t1, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* mov o0, t1 ; Move vertex pos to output */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* - mov o1, i1 ; Move luma & chroma texcoords to output - mov o2, i2 + mov o1, i1 ; Move input luma texcoords to output + mov o2, i2 ; Move input chroma texcoords to output */ - for (i = 1; i < num_output_attribs - 1; ++i) + for (i = 1; i < 3; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = i; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* add o3, t0, c3 ; Translate top field texcoords into position - add o4, t0, c4 ; Translate bottom field texcoords into position */ + /* add o3, t0, c3 ; Translate top field rect into position on ref macroblock + add o4, t0, c4 ; Translate bottom field rect into position on ref macroblock */ for (i = 0; i < 2; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = i + 3; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = i + 3; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* mul o5, t1, c2 ; Denorm pos for fragment shader */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 5; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 1; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 2; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* mul o5, t1, c2 ; Denorm vertex pos */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 5, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* END */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); vs.tokens = tokens; - context->states.mc.p_vs[1] = pipe->create_vs_state(pipe, &vs); - free(tokens); return 0; @@ -829,13 +410,11 @@ static int vlCreateVertexShaderFieldPMC(struct VL_CONTEXT *context) static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 100; - const unsigned int proc_type = TGSI_PROCESSOR_FRAGMENT; struct pipe_context *pipe; struct pipe_shader_state fs; struct tgsi_token *tokens; struct tgsi_header *header; - struct tgsi_processor *processor; struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -846,258 +425,101 @@ static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context) assert(context); pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); - /* Processor */ - processor = (struct tgsi_processor*)&tokens[2]; - *processor = tgsi_build_processor(proc_type, header); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); ti = 3; - /* Declare inputs (texcoords) */ + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + * decl i2 ; Texcoords for s3 + */ for (i = 0; i < 3; ++i) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = i + 1; - decl.Declaration.Interpolate = 1; - decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* Declare constant input */ - /* C[0] is a multiplier to use when concatenating differential into a single channel - C[0] is a bias to get differential back to -1,1 range*/ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_CONSTANT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 1; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* + * decl c0 ; Multiplier to shift 9th bit of differential into place + * decl c1 ; Bias to get differential back to a signed value + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare output */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 0; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare samplers */ + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for ref surface texture + */ for (i = 0; i < 4; ++i) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } /* - tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels - mov t1.x, t0.w ; Move high part from .w channel to .x - tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels - mov t1.y, t0.w ; Move high part from .w channel to .y - tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels - mov t1.z, t0.w ; Move high part from .w channel to .z - */ + * tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels + * mov t1.x, t0.w ; Move 9th bit from .w channel to .x + * tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels + * mov t1.y, t0.w ; Move 9th bit from .w channel to .y + * tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels + * mov t1.z, t0.w ; Move 9th bit from .w channel to .z + */ for (i = 0; i < 3; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_TEX; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; - inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i > 0 ? 1 : 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - inst.FullSrcRegisters[1].SrcRegister.Index = i; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0); inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* mul t1, t1, c0 ; Muliply high part by multiplier to get back its full value */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 1; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* mul t1, t1, c0 ; Muliply 9th bit by multiplier to shift it into place */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* add t0, t0, t1 ; Add luma and chroma low and high parts to get a single value */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add t0, t0, t1 ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* sub t0, t0, c1 ; Subtract bias to get back the signed value */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_SUB; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* sub t0, t0, c1 ; Subtract bias to get back signed values */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* tex2d t1, i2, s3 ; Read texel from ref macroblock */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_TEX; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; - inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = 2; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - inst.FullSrcRegisters[1].SrcRegister.Index = 3; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* tex2d t1, i2, s3 ; Read texel from ref macroblock */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 2, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* add o0, t0, t1 ; Add ref and differential to form final output */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* END */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); fs.tokens = tokens; - context->states.mc.p_fs[0] = pipe->create_fs_state(pipe, &fs); - free(tokens); return 0; @@ -1106,13 +528,11 @@ static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context) static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 200; - const unsigned int proc_type = TGSI_PROCESSOR_FRAGMENT; struct pipe_context *pipe; struct pipe_shader_state fs; struct tgsi_token *tokens; struct tgsi_header *header; - struct tgsi_processor *processor; struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -1123,399 +543,150 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) assert(context); pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); - /* Processor */ - processor = (struct tgsi_processor*)&tokens[2]; - *processor = tgsi_build_processor(proc_type, header); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); ti = 3; - /* Declare inputs (texcoords) - I[0] Luma texcoords - I[1] Chroma texcoords - I[2] Ref top field surface texcoords - I[3] Ref bottom field surface texcoords - I[4] Denormalized texel pos */ + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + * decl i2 ; Texcoords for s3 + * decl i3 ; Texcoords for s3 + * decl i4 ; Denormalized vertex pos + */ for (i = 0; i < 5; ++i) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = i + 1; - decl.Declaration.Interpolate = 1; - decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* Declare constant input */ - /* C[0] is a multiplier to use when concatenating differential into a single channel - C[1] is a bias to get differential back to -1,1 range - C[2] is constants 2 and 1/2 for Y%2 field selector */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_CONSTANT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 2; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* + * decl c0 ; Multiplier to shift 9th bit of differential into place + * decl c1 ; Bias to get differential back to a signed value + * decl c2 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 2); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare output */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 0; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare samplers */ + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for ref surface texture + */ for (i = 0; i < 4; ++i) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } /* - tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels - mov t1.x, t0.w ; Move high part from .w channel to .x - tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels - mov t1.y, t0.w ; Move high part from .w channel to .y - tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels - mov t1.z, t0.w ; Move high part from .w channel to .z - */ + * tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels + * mov t1.x, t0.w ; Move 9th bit from .w channel to .x + * tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels + * mov t1.y, t0.w ; Move 9th bit from .w channel to .y + * tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels + * mov t1.z, t0.w ; Move 9th bit from .w channel to .z + */ for (i = 0; i < 3; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_TEX; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; - inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i > 0 ? 1 : 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - inst.FullSrcRegisters[1].SrcRegister.Index = i; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0); inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* mul t1, t1, c0 ; Muliply high part by multiplier to get back its full value */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 1; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* mul t1, t1, c0 ; Muliply 9th bit by multiplier to shift it into place */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* add t0, t0, t1 ; Add luma and chroma low and high parts to get a single value */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add t0, t0, t1 ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* sub t0, t0, c1 ; Subtract bias to get back the signed value */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_SUB; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* sub t0, t0, c1 ; Subtract bias to get back signed values */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* tex2d t1, i2, s3 ; Read texel from ref macroblock top field - tex2d t2, i3, s3 ; Read texel from ref macroblock bottom field */ + /* tex2d t1, i2, s3 ; Read texel from ref macroblock top field + tex2d t2, i3, s3 ; Read texel from ref macroblock bottom field */ for (i = 0; i < 2; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_TEX; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = i + 1; - inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i + 2; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - inst.FullSrcRegisters[1].SrcRegister.Index = 3; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* XXX: Pos values off by 0.5 for rounding? */ - /* sub t4, i4.y, c2.x ; Sub 0.5 from position */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_SUB; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 4; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = 4; + /* XXX: Pos values off by 0.5? */ + /* sub t4, i4.y, c2.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 4, TGSI_FILE_INPUT, 2); inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 2; inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* mul t3, t4, c2.x ; Divide pos y coord by 2 (mul by 0.5) */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 3; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 4; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 2; + /* mul t3, t4, c2.x ; Multiply pos Y-coord by 1/2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 2); inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* floor t3, t3 ; Get rid of fractional part */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_FLOOR; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 3; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 3; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* floor t3, t3 ; Get rid of fractional part */ + inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* mul t3, t3, c2.y ; Multiply by 2 */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 3; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 3; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 2; + /* mul t3, t3, c2.y ; Multiply by 2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 2); inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* sub t3, t4, t3 ; Subtract from y to get y % 2 */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_SUB; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 3; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 4; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 3; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* lerp t1, t3, t1, t2 */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_LERP; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; - inst.Instruction.NumSrcRegs = 3; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 3; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[2].SrcRegister.Index = 2; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* add o0, t0, t1 ; Add ref and differential to form final output */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* END */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); fs.tokens = tokens; - context->states.mc.p_fs[1] = pipe->create_fs_state(pipe, &fs); - free(tokens); return 0; @@ -1524,31 +695,11 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) static int vlCreateVertexShaderFrameBMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 100; - const unsigned int num_input_attribs = 3; - const unsigned int num_output_attribs = 5; - const unsigned int input_semantic_names[3] = - { - TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, - TGSI_SEMANTIC_GENERIC - }; - const unsigned int output_semantic_names[5] = - { - TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, - TGSI_SEMANTIC_GENERIC, - TGSI_SEMANTIC_GENERIC, - TGSI_SEMANTIC_GENERIC - }; - const unsigned int input_semantic_indexes[3] = {0, 1, 2}; - const unsigned int output_semantic_indexes[5] = {0, 1, 2, 3, 4}; - const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; struct pipe_context *pipe; struct pipe_shader_state vs; struct tgsi_token *tokens; struct tgsi_header *header; - struct tgsi_processor *processor; struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -1559,185 +710,85 @@ static int vlCreateVertexShaderFrameBMC(struct VL_CONTEXT *context) assert(context); pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); - /* Processor */ - processor = (struct tgsi_processor*)&tokens[2]; - *processor = tgsi_build_processor(proc_type, header); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); ti = 3; - /* Declare inputs (pos, texcoords) */ - for (i = 0; i < num_input_attribs; i++) + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = input_semantic_names[i]; - decl.Semantic.SemanticIndex = input_semantic_indexes[i]; - - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* Declare constant inputs */ - /* C[0] scales the normalized MB to cover 16x16 pixels, - C[1] translates the macroblock into position on the surface - C[2] unused - C[3] translates the past surface texcoords to the ref macroblock - C[4] unused - C[5] translates the future surface texcoords to the ref macroblock */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_CONSTANT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 5; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + * decl c2 ; Unused + * decl c3 ; Translation vector to move past ref macroblock texcoords into position + * decl c4 ; Unused + * decl c5 ; Translation vector to move future ref macroblock texcoords into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 5); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare outputs (pos, texcoords) */ - for (i = 0; i < num_output_attribs; i++) + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + * decl o3 ; Past ref macroblock texcoords + * decl o4 ; Future ref macroblock texcoords + */ + for (i = 0; i < 5; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = output_semantic_names[i]; - decl.Semantic.SemanticIndex = output_semantic_indexes[i]; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* mul t0, i0, c0 ; Scale normalized coords to window coords */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); - - /* add o0, t0, c1 ; Translate vertex into position */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* - mov o1, i1 ; Move luma & chroma texcoords to output - mov o2, i2 - */ - for (i = 1; i < num_output_attribs - 2; ++i) + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = i; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* add o3, t0, c3 ; Translate past surface texcoords into position - add o4, t0, c5 ; Repeat for future surface texcoords */ + /* add o3, t0, c3 ; Translate rect into position on past ref macroblock + add o4, t0, c5 ; Translate rect into position on future ref macroblock */ for (i = 0; i < 2; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = i + 3; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = i * 2 + 3; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i * 2 + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* END */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); vs.tokens = tokens; - context->states.mc.b_vs[0] = pipe->create_vs_state(pipe, &vs); - free(tokens); return 0; @@ -1746,34 +797,11 @@ static int vlCreateVertexShaderFrameBMC(struct VL_CONTEXT *context) static int vlCreateVertexShaderFieldBMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 100; - const unsigned int num_input_attribs = 3; - const unsigned int num_output_attribs = 8; - const unsigned int input_semantic_names[3] = - { - TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ - TGSI_SEMANTIC_GENERIC /* Chroma texcoords */ - }; - const unsigned int output_semantic_names[8] = - { - TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ - TGSI_SEMANTIC_GENERIC, /* Chroma texcoords */ - TGSI_SEMANTIC_GENERIC, /* Top field past surface texcoords */ - TGSI_SEMANTIC_GENERIC, /* Bottom field past surface texcoords */ - TGSI_SEMANTIC_GENERIC, /* Top field future surface texcoords */ - TGSI_SEMANTIC_GENERIC, /* Bottom field future surface texcoords */ - TGSI_SEMANTIC_POSITION /* Pos */ - }; - const unsigned int input_semantic_indexes[3] = {0, 1, 2}; - const unsigned int output_semantic_indexes[8] = {0, 1, 2, 3, 4, 5, 6, 7}; - const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; struct pipe_context *pipe; struct pipe_shader_state vs; struct tgsi_token *tokens; struct tgsi_header *header; - struct tgsi_processor *processor; struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -1783,225 +811,102 @@ static int vlCreateVertexShaderFieldBMC(struct VL_CONTEXT *context) assert(context); - pipe = context->pipe; - + pipe = context->pipe; tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); - /* Processor */ - processor = (struct tgsi_processor*)&tokens[2]; - *processor = tgsi_build_processor(proc_type, header); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); ti = 3; - /* Declare inputs (pos, texcoords) */ - for (i = 0; i < num_input_attribs; i++) + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = input_semantic_names[i]; - decl.Semantic.SemanticIndex = input_semantic_indexes[i]; - - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* Declare constant inputs */ - /* C[0] scales the normalized MB to cover 16x16 pixels, - C[1] translates the macroblock into position on the surface - C[2] denormalizes pos components - C[3] translates the past surface top field texcoords to the ref macroblock - C[4] translates the past surface bottom field texcoords to the ref macroblock - C[5] translates the future surface top field texcoords to the ref macroblock - C[6] translates the future surface bottom field texcoords to the ref macroblock */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_CONSTANT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 6; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + * decl c2 ; Denorm coefficients + * decl c3 ; Translation vector to move top field past ref macroblock texcoords into position + * decl c4 ; Translation vector to move bottom field past ref macroblock texcoords into position + * decl c5 ; Translation vector to move top field future ref macroblock texcoords into position + * decl c6 ; Translation vector to move bottom field future ref macroblock texcoords into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 6); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare outputs (pos, texcoords) */ - for (i = 0; i < num_output_attribs; i++) + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + * decl o3 ; Top field past ref macroblock texcoords + * decl o4 ; Bottom field past ref macroblock texcoords + * decl o5 ; Top field future ref macroblock texcoords + * decl o6 ; Bottom field future ref macroblock texcoords + * decl o7 ; Denormalized vertex pos + */ + for (i = 0; i < 8; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = output_semantic_names[i]; - decl.Semantic.SemanticIndex = output_semantic_indexes[i]; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_output((i == 0 || i == 7) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* mul t0, i0, c0 ; Scale normalized coords to window coords */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* add t1, t0, c1 ; Translate vertex into position */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add t1, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* mov o0, t1 ; Move vertex pos to output */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* - mov o1, i1 ; Move luma & chroma texcoords to output - mov o2, i2 - */ - for (i = 1; i < num_output_attribs - 1; ++i) + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = i; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* add o3, t0, c3 ; Translate top field past texcoords into position - add o4, t0, c4 ; Translate bottom field past texcoords into position - add o5, t0, c5 ; Translate top field past texcoords into position - add o6, t0, c6 ; Translate bottom field past texcoords into position */ + /* + * add o3, t0, c3 ; Translate top field rect into position on past ref macroblock + * add o4, t0, c4 ; Translate bottom field rect into position on past ref macroblock + * add o5, t0, c5 ; Translate top field rect into position on future ref macroblock + * add o6, t0, c6 ; Translate bottom field rect into position on future ref macroblock + */ for (i = 0; i < 4; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = i + 3; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = i + 3; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* mul o7, t1, c2 ; Denorm pos for fragment shader */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 7; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 1; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 2; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* mul o7, t1, c2 ; Denorm vertex pos */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 7, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* END */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); vs.tokens = tokens; - context->states.mc.b_vs[1] = pipe->create_vs_state(pipe, &vs); - free(tokens); return 0; @@ -2010,13 +915,11 @@ static int vlCreateVertexShaderFieldBMC(struct VL_CONTEXT *context) static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 100; - const unsigned int proc_type = TGSI_PROCESSOR_FRAGMENT; struct pipe_context *pipe; struct pipe_shader_state fs; struct tgsi_token *tokens; struct tgsi_header *header; - struct tgsi_processor *processor; struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -2027,288 +930,118 @@ static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context) assert(context); pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); - /* Processor */ - processor = (struct tgsi_processor*)&tokens[2]; - *processor = tgsi_build_processor(proc_type, header); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); ti = 3; - /* Declare inputs (texcoords) */ + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + * decl i2 ; Texcoords for s3 + * decl i3 ; Texcoords for s4 + */ for (i = 0; i < 4; ++i) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = i + 1; - decl.Declaration.Interpolate = 1; - decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* Declare constant input */ - /* C[0] is a multiplier to use when concatenating differential into a single channel - C[1] is a bias to get differential back to -1,1 range - C[2] contains 0.5 in channel X for use as a weight to blend past and future samples */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_CONSTANT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 2; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* + * decl c0 ; Multiplier to shift 9th bit of differential into place + * decl c1 ; Bias to get differential back to a signed value + * decl c2 ; Constant 1/2 in .x channel to use as weight to blend past and future texels + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 2); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare output */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 0; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare samplers */ + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for past ref surface texture + * decl s4 ; Sampler for future ref surface texture + */ for (i = 0; i < 5; ++i) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } /* - tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels - mov t1.x, t0.w ; Move high part from .w channel to .x - tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels - mov t1.y, t0.w ; Move high part from .w channel to .y - tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels - mov t1.z, t0.w ; Move high part from .w channel to .z - */ + * tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels + * mov t1.x, t0.w ; Move 9th bit from .w channel to .x + * tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels + * mov t1.y, t0.w ; Move 9th bit from .w channel to .y + * tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels + * mov t1.z, t0.w ; Move 9th bit from .w channel to .z + */ for (i = 0; i < 3; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_TEX; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; - inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i > 0 ? 1 : 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - inst.FullSrcRegisters[1].SrcRegister.Index = i; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0); inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* mul t1, t1, c0 ; Muliply high part by multiplier to get back its full value */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 1; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* mul t1, t1, c0 ; Muliply 9th bit by multiplier to shift it into place */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* add t0, t0, t1 ; Add luma and chroma low and high parts to get a single value */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add t0, t0, t1 ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* sub t0, t0, c1 ; Subtract bias to get back the signed value */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_SUB; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* sub t0, t0, c1 ; Subtract bias to get back signed values */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* tex2d t1, i2, s3 ; Read texel from past macroblock - tex2d t2, i3, s4 ; Read texel from future macroblock */ + /* + * tex2d t1, i2, s3 ; Read texel from past ref macroblock + * tex2d t2, i3, s4 ; Read texel from future ref macroblock + */ for (i = 0; i < 2; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_TEX; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = i + 1; - inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i + 2; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - inst.FullSrcRegisters[1].SrcRegister.Index = i + 3; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } /* lerp t1, c2.x, t1, t2 ; Blend past and future texels */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_LERP; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; - inst.Instruction.NumSrcRegs = 3; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[0].SrcRegister.Index = 2; + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[2].SrcRegister.Index = 2; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* add o0, t0, t1 ; Add ref and differential to form final output */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* END */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); fs.tokens = tokens; - context->states.mc.b_fs[0] = pipe->create_fs_state(pipe, &fs); - free(tokens); return 0; @@ -2317,13 +1050,11 @@ static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context) static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 200; - const unsigned int proc_type = TGSI_PROCESSOR_FRAGMENT; struct pipe_context *pipe; struct pipe_shader_state fs; struct tgsi_token *tokens; struct tgsi_header *header; - struct tgsi_processor *processor; struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -2334,471 +1065,179 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) assert(context); pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); - /* Processor */ - processor = (struct tgsi_processor*)&tokens[2]; - *processor = tgsi_build_processor(proc_type, header); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); ti = 3; - /* Declare inputs (texcoords) - I[0] Luma texcoords - I[1] Chroma texcoords - I[2] Past top field surface texcoords - I[3] Past bottom field surface texcoords - I[4] Future top field surface texcoords - I[5] Future bottom field surface texcoords - I[6] Denormalized texel pos */ + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + * decl i2 ; Texcoords for s3 + * decl i3 ; Texcoords for s3 + * decl i4 ; Texcoords for s4 + * decl i5 ; Texcoords for s4 + * decl i6 ; Denormalized vertex pos + */ for (i = 0; i < 7; ++i) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = i + 1; - decl.Declaration.Interpolate = 1; - decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* Declare constant input */ - /* C[0] is a multiplier to use when concatenating differential into a single channel - C[1] is a bias to get differential back to -1,1 range - C[2] is constants 2 and 1/2 for Y%2 field selector */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_CONSTANT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 2; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); - - /* Declare output */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 0; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* + * decl c0 ; Multiplier to shift 9th bit of differential into place + * decl c1 ; Bias to get differential back to a signed value + * decl c2 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels + * ; and for Y-mod-2 top/bottom field selection + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 2); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare samplers */ + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for past ref surface texture + * decl s4 ; Sampler for future ref surface texture + */ for (i = 0; i < 5; ++i) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } /* - tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels - mov t1.x, t0.w ; Move high part from .w channel to .x - tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels - mov t1.y, t0.w ; Move high part from .w channel to .y - tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels - mov t1.z, t0.w ; Move high part from .w channel to .z - */ + * tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels + * mov t1.x, t0.w ; Move 9th bit from .w channel to .x + * tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels + * mov t1.y, t0.w ; Move 9th bit from .w channel to .y + * tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels + * mov t1.z, t0.w ; Move 9th bit from .w channel to .z + */ for (i = 0; i < 3; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_TEX; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; - inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i > 0 ? 1 : 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - inst.FullSrcRegisters[1].SrcRegister.Index = i; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0); inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* mul t1, t1, c0 ; Muliply high part by multiplier to get back its full value */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 1; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* mul t1, t1, c0 ; Muliply 9th bit by multiplier to shift it into place */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* add t0, t0, t1 ; Add luma and chroma low and high parts to get a single value */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add t0, t0, t1 ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* sub t0, t0, c1 ; Subtract bias to get back the signed value */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_SUB; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* sub t0, t0, c1 ; Subtract bias to get back signed values */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* XXX: Pos values off by 0.5 for rounding? */ - /* sub t4, i6.y, c2.x ; Sub 0.5 from position */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_SUB; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 4; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = 6; + /* XXX: Pos values off by 0.5? */ + /* sub t4, i6.y, c2.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 6, TGSI_FILE_CONSTANT, 2); inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 2; inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* mul t3, t4, c2.x ; Divide pos y coord by 2 (mul by 0.5) */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 3; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 4; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 2; + /* mul t3, t4, c2.x ; Multiply pos Y-coord by 1/2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 2); inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* floor t3, t3 ; Get rid of fractional part */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_FLOOR; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 3; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 3; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* floor t3, t3 ; Get rid of fractional part */ + inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* mul t3, t3, c2.y ; Multiply by 2 */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 3; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 3; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 2; + /* mul t3, t3, c2.y ; Multiply by 2 */ + inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 2); inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* sub t3, t4, t3 ; Subtract from y to get y % 2 */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_SUB; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 3; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 4; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 3; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* tex2d t1, i2, s3 ; Read texel from past macroblock top field - tex2d t2, i3, s3 ; Read texel from past macroblock bottom field */ + /* + * tex2d t1, i2, s3 ; Read texel from past ref macroblock top field + * tex2d t2, i3, s3 ; Read texel from past ref macroblock bottom field + */ for (i = 0; i < 2; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_TEX; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = i + 1; - inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i + 2; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - inst.FullSrcRegisters[1].SrcRegister.Index = 3; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* lerp t1, t3, t1, t2 */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_LERP; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; - inst.Instruction.NumSrcRegs = 3; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 3; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[2].SrcRegister.Index = 2; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* tex2d t4, i4, s4 ; Read texel from future macroblock top field - tex2d t5, i5, s4 ; Read texel from future macroblock bottom field */ + /* + * tex2d t4, i4, s4 ; Read texel from future ref macroblock top field + * tex2d t5, i5, s4 ; Read texel from future ref macroblock bottom field + */ for (i = 0; i < 2; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_TEX; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = i + 4; - inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i + 4; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - inst.FullSrcRegisters[1].SrcRegister.Index = 4; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 4, TGSI_FILE_SAMPLER, 4); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* lerp t2, t3, t4, t5 */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_LERP; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 2; - inst.Instruction.NumSrcRegs = 3; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 3; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 4; - inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[2].SrcRegister.Index = 5; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* lerp t1, c2.x, t1, t2 */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_LERP; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; - inst.Instruction.NumSrcRegs = 3; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[0].SrcRegister.Index = 2; + /* lerp t1, c2.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[2].SrcRegister.Index = 2; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* add o0, t0, t1 ; Add future and differential to form final output */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* END */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); fs.tokens = tokens; - context->states.mc.b_fs[1] = pipe->create_fs_state(pipe, &fs); - free(tokens); return 0; @@ -2936,8 +1375,8 @@ static int vlInitMC(struct VL_CONTEXT *context) filters[0] = PIPE_TEX_FILTER_NEAREST; filters[1] = context->video_format == VL_FORMAT_YCBCR_444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; filters[2] = context->video_format == VL_FORMAT_YCBCR_444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; - filters[3] = PIPE_TEX_FILTER_NEAREST; - filters[4] = PIPE_TEX_FILTER_NEAREST; + filters[3] = PIPE_TEX_FILTER_LINEAR; + filters[4] = PIPE_TEX_FILTER_LINEAR; for (i = 0; i < 5; ++i) { @@ -3037,16 +1476,11 @@ static int vlDestroyMC(struct VL_CONTEXT *context) static int vlCreateVertexShaderCSC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 50; - const unsigned int num_attribs = 2; - const unsigned int semantic_names[2] = {TGSI_SEMANTIC_POSITION, TGSI_SEMANTIC_GENERIC}; - const unsigned int semantic_indexes[2] = {0, 1}; - const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; struct pipe_context *pipe; struct pipe_shader_state vs; struct tgsi_token *tokens; struct tgsi_header *header; - struct tgsi_processor *processor; struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -3057,101 +1491,54 @@ static int vlCreateVertexShaderCSC(struct VL_CONTEXT *context) assert(context); pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); - /* Processor */ - processor = (struct tgsi_processor*)&tokens[2]; - *processor = tgsi_build_processor(proc_type, header); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); ti = 3; - /* Declare inputs (pos, texcoords) */ - for (i = 0; i < num_attribs; i++) + /* + * decl i0 ; Vertex pos + * decl i1 ; Vertex texcoords + */ + for (i = 0; i < 2; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = semantic_names[i]; - decl.Semantic.SemanticIndex = semantic_indexes[i]; - - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* Declare outputs (pos, texcoords) */ - for (i = 0; i < num_attribs; i++) + /* + * decl o0 ; Vertex pos + * decl o1 ; Vertex texcoords + */ + for (i = 0; i < 2; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = semantic_names[i]; - decl.Semantic.SemanticIndex = semantic_indexes[i]; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* MOV instructions */ - /* mov o0, i0 - mov o1, i1 */ - for (i = 0; i < num_attribs; i++) + /* + * mov o0, i0 ; Move pos in to pos out + * mov o1, i1 ; Move input texcoords to output + */ + for (i = 0; i < 2; i++) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = i; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* END instruction */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); vs.tokens = tokens; - context->states.csc.vertex_shader = pipe->create_vs_state(pipe, &vs); - free(tokens); return 0; @@ -3160,13 +1547,11 @@ static int vlCreateVertexShaderCSC(struct VL_CONTEXT *context) static int vlCreateFragmentShaderCSC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 50; - const unsigned int proc_type = TGSI_PROCESSOR_FRAGMENT; struct pipe_context *pipe; struct pipe_shader_state fs; struct tgsi_token *tokens; struct tgsi_header *header; - struct tgsi_processor *processor; struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -3177,171 +1562,64 @@ static int vlCreateFragmentShaderCSC(struct VL_CONTEXT *context) assert(context); pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); - /* Processor */ - processor = (struct tgsi_processor*)&tokens[2]; - *processor = tgsi_build_processor(proc_type, header); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); ti = 3; - /* Declare TEX[0] input */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 1; - decl.Declaration.Interpolate = 1; - decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 0; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* decl i0 ; Texcoords for s0 */ + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare constant input */ - /* Constants include bias vector, 4x4 csc matrix, total 5 vectors */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_CONSTANT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 4; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* + * decl c0 ; Bias vector for CSC + * decl c1-c4 ; CSC matrix c1-c4 + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare output */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 0; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare sampler */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 0; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* decl s0 ; Sampler for tex containing picture to display */ + decl = vl_decl_samplers(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* TEX instruction */ - /* tex2d t0, i0, s0 */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_TEX; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* tex2d t0, i0, s0 ; Read src pixel */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* SUB instruction */ - /* sub t0, t0, c0 */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_SUB; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* sub t0, t0, c0 ; Subtract bias vector from pixel */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* DP4 instruction */ - /* dp4 o0.x, t0, c1 - dp4 o0.y, t0, c2 - dp4 o0.z, t0, c3 - dp4 o0.w, t0, c4 */ + /* + * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix + * dp4 o0.y, t0, c2 + * dp4 o0.z, t0, c3 + * dp4 o0.w, t0, c4 ; XXX: Don't need 4th coefficient + */ for (i = 0; i < 4; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_DP4; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 0; + inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1); inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = i + 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* END instruction */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); fs.tokens = tokens; - context->states.csc.fragment_shader = pipe->create_fs_state(pipe, &fs); - free(tokens); return 0; diff --git a/src/gallium/state_trackers/g3dvl/vl_shader_build.c b/src/gallium/state_trackers/g3dvl/vl_shader_build.c new file mode 100644 index 0000000000..fd9de7dab8 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_shader_build.c @@ -0,0 +1,205 @@ +#include "vl_shader_build.h" +#include +#include +#include + +struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index, unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = name; + decl.Semantic.SemanticIndex = index; + decl.u.DeclarationRange.First = first; + decl.u.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_interpolated_input +( + unsigned int name, + unsigned int index, + unsigned int first, + unsigned int last, + int interpolation +) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + assert + ( + interpolation == TGSI_INTERPOLATE_CONSTANT || + interpolation == TGSI_INTERPOLATE_LINEAR || + interpolation == TGSI_INTERPOLATE_PERSPECTIVE + ); + + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = name; + decl.Semantic.SemanticIndex = index; + decl.Declaration.Interpolate = 1; + decl.Interpolation.Interpolate = interpolation; + decl.u.DeclarationRange.First = first; + decl.u.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = name; + decl.Semantic.SemanticIndex = index; + decl.u.DeclarationRange.First = first; + decl.u.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int index, unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = name; + decl.Semantic.SemanticIndex = index; + decl.u.DeclarationRange.First = first; + decl.u.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.u.DeclarationRange.First = first; + decl.u.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_instruction vl_inst2 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src_file, + unsigned int src_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = opcode; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = dst_file; + inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = src_file; + inst.FullSrcRegisters[0].SrcRegister.Index = src_index; + + return inst; +} + +struct tgsi_full_instruction vl_inst3 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = opcode; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = dst_file; + inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = src1_file; + inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; + inst.FullSrcRegisters[1].SrcRegister.File = src2_file; + inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; + + return inst; +} + +struct tgsi_full_instruction vl_tex +( + int tex, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = dst_file; + inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = tex; + inst.FullSrcRegisters[0].SrcRegister.File = src1_file; + inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; + inst.FullSrcRegisters[1].SrcRegister.File = src2_file; + inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; + + return inst; +} + +struct tgsi_full_instruction vl_inst4 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index, + enum tgsi_file_type src3_file, + unsigned int src3_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = opcode; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = dst_file; + inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Instruction.NumSrcRegs = 3; + inst.FullSrcRegisters[0].SrcRegister.File = src1_file; + inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; + inst.FullSrcRegisters[1].SrcRegister.File = src2_file; + inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; + inst.FullSrcRegisters[2].SrcRegister.File = src3_file; + inst.FullSrcRegisters[2].SrcRegister.Index = src3_index; + + return inst; +} + +struct tgsi_full_instruction vl_end(void) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + + return inst; +} + diff --git a/src/gallium/state_trackers/g3dvl/vl_shader_build.h b/src/gallium/state_trackers/g3dvl/vl_shader_build.h new file mode 100644 index 0000000000..9e64bbeeae --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_shader_build.h @@ -0,0 +1,61 @@ +#ifndef vl_shader_build_h +#define vl_shader_build_h + +#include + +struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index, unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_interpolated_input +( + unsigned int name, + unsigned int index, + unsigned int first, + unsigned int last, + int interpolation +); +struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int index, unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last); +struct tgsi_full_instruction vl_inst2 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src_file, + unsigned int src_index +); +struct tgsi_full_instruction vl_inst3 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +); +struct tgsi_full_instruction vl_tex +( + int tex, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +); +struct tgsi_full_instruction vl_inst4 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index, + enum tgsi_file_type src3_file, + unsigned int src3_index +); +struct tgsi_full_instruction vl_end(void); + +#endif + -- cgit v1.2.3 From 49937b99855984dd01a431c026f9308b6c0dac4f Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Thu, 3 Jul 2008 20:05:32 -0400 Subject: g3dvl: Round surfaces up to POT, use src rect when outputting surfaces. --- src/gallium/state_trackers/g3dvl/Makefile | 2 +- src/gallium/state_trackers/g3dvl/vl_context.c | 48 +++++++++++++++++++++------ src/gallium/state_trackers/g3dvl/vl_context.h | 2 +- src/gallium/state_trackers/g3dvl/vl_surface.c | 29 +++++++++++++--- src/gallium/state_trackers/g3dvl/vl_types.h | 6 ++++ src/gallium/state_trackers/g3dvl/vl_util.c | 17 ++++++++++ src/gallium/state_trackers/g3dvl/vl_util.h | 7 ++++ 7 files changed, 93 insertions(+), 18 deletions(-) create mode 100644 src/gallium/state_trackers/g3dvl/vl_util.c create mode 100644 src/gallium/state_trackers/g3dvl/vl_util.h (limited to 'src/gallium/state_trackers/g3dvl/Makefile') diff --git a/src/gallium/state_trackers/g3dvl/Makefile b/src/gallium/state_trackers/g3dvl/Makefile index bd46d004e2..50e3c843b5 100644 --- a/src/gallium/state_trackers/g3dvl/Makefile +++ b/src/gallium/state_trackers/g3dvl/Makefile @@ -1,5 +1,5 @@ TARGET = libg3dvl.a -OBJECTS = vl_context.o vl_data.o vl_surface.o vl_shader_build.o +OBJECTS = vl_context.o vl_data.o vl_surface.o vl_shader_build.o vl_util.o GALLIUMDIR = ../.. CFLAGS += -g -Wall -fPIC -Werror -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index bd8743dc9a..58971bd7c7 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -11,6 +11,7 @@ #include #include "vl_shader_build.h" #include "vl_data.h" +#include "vl_util.h" static int vlInitIDCT(struct VL_CONTEXT *context) { @@ -1357,8 +1358,9 @@ static int vlInitMC(struct VL_CONTEXT *context) pipe = context->pipe; - context->states.mc.viewport.scale[0] = context->video_width; - context->states.mc.viewport.scale[1] = context->video_height; + /* For MC we render to textures, which are rounded up to nearest POT */ + context->states.mc.viewport.scale[0] = vlRoundUpPOT(context->video_width); + context->states.mc.viewport.scale[1] = vlRoundUpPOT(context->video_height); context->states.mc.viewport.scale[2] = 1; context->states.mc.viewport.scale[3] = 1; context->states.mc.viewport.translate[0] = 0; @@ -1512,6 +1514,13 @@ static int vlCreateVertexShaderCSC(struct VL_CONTEXT *context) decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } + + /* + * decl c0 ; Scaling vector to scale texcoord rect to source size + * decl c1 ; Translation vector to move texcoord rect into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); /* * decl o0 ; Vertex pos @@ -1522,16 +1531,18 @@ static int vlCreateVertexShaderCSC(struct VL_CONTEXT *context) decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } + + /* mov o0, i0 ; Move pos in to pos out */ + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_INPUT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t0, i1, c0 ; Scale unit texcoord rect to source size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 1, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* - * mov o0, i0 ; Move pos in to pos out - * mov o1, i1 ; Move input texcoords to output - */ - for (i = 0; i < 2; i++) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } + /* add o1, t0, c1 ; Translate texcoord rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* end */ inst = vl_end(); @@ -1693,6 +1704,19 @@ static int vlCreateDataBufsCSC(struct VL_CONTEXT *context) context->states.csc.vertex_buf_elems[1].nr_components = 2; context->states.csc.vertex_buf_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; + /* + Create our vertex shader's constant buffer + Const buffer contains scaling and translation vectors + */ + context->states.csc.vs_const_buf.size = sizeof(struct VL_CSC_VS_CONSTS); + context->states.csc.vs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + context->states.csc.vs_const_buf.size + ); + /* Create our fragment shader's constant buffer Const buffer contains the color conversion matrix and bias vectors @@ -1776,6 +1800,7 @@ static int vlDestroyCSC(struct VL_CONTEXT *context) context->pipe->delete_fs_state(context->pipe, context->states.csc.fragment_shader); context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vertex_bufs[0].buffer); context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vertex_bufs[1].buffer); + context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vs_const_buf.buffer); context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.fs_const_buf.buffer); return 0; @@ -1986,6 +2011,7 @@ int vlEndRender(struct VL_CONTEXT *context) pipe->bind_fs_state(pipe, context->states.csc.fragment_shader); pipe->set_vertex_buffers(pipe, 2, context->states.csc.vertex_bufs); pipe->set_vertex_elements(pipe, 2, context->states.csc.vertex_buf_elems); + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &context->states.csc.vs_const_buf); pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &context->states.csc.fs_const_buf); return 0; diff --git a/src/gallium/state_trackers/g3dvl/vl_context.h b/src/gallium/state_trackers/g3dvl/vl_context.h index 8a12318073..9ebda21a1c 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.h +++ b/src/gallium/state_trackers/g3dvl/vl_context.h @@ -49,7 +49,7 @@ struct VL_CONTEXT struct pipe_shader_state *vertex_shader, *fragment_shader; struct pipe_vertex_buffer vertex_bufs[2]; struct pipe_vertex_element vertex_buf_elems[2]; - struct pipe_constant_buffer fs_const_buf; + struct pipe_constant_buffer vs_const_buf, fs_const_buf; } csc; } states; }; diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index 68313cc750..13f7301f07 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -7,6 +7,7 @@ #include #include "vl_context.h" #include "vl_defs.h" +#include "vl_util.h" static int vlGrabFrameCodedFullBlock(short *src, short *dst, unsigned int dst_pitch) { @@ -194,9 +195,6 @@ static int vlGrabBlocks pipe_surface_unmap(tex_surface); } - /* XXX: Texture cache is not invalidated when texture contents change */ - context->pipe->flush(context->pipe, PIPE_FLUSH_TEXTURE_CACHE, NULL); - return 0; } @@ -214,8 +212,8 @@ int vlCreateSurface(struct VL_CONTEXT *context, struct VL_SURFACE **surface) sfc = calloc(1, sizeof(struct VL_SURFACE)); sfc->context = context; - sfc->width = context->video_width; - sfc->height = context->video_height; + sfc->width = vlRoundUpPOT(context->video_width); + sfc->height = vlRoundUpPOT(context->video_height); sfc->format = context->video_format; memset(&template, 0, sizeof(struct pipe_texture)); @@ -227,6 +225,7 @@ int vlCreateSurface(struct VL_CONTEXT *context, struct VL_SURFACE **surface) template.depth[0] = 1; template.compressed = 0; pf_get_block(template.format, &template.block); + /* XXX: Needed? */ template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET; sfc->texture = pipe->screen->texture_create(pipe->screen, &template); @@ -517,6 +516,7 @@ int vlPutSurface { unsigned int create_fb = 0; struct pipe_context *pipe; + struct VL_CSC_VS_CONSTS *vs_consts; assert(surface); @@ -568,9 +568,28 @@ int vlPutSurface vlEndRender(surface->context); + vs_consts = pipe->winsys->buffer_map + ( + pipe->winsys, + surface->context->states.csc.vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ); + + vs_consts->src_scale.x = srcw / (float)surface->width; + vs_consts->src_scale.y = srch / (float)surface->height; + vs_consts->src_scale.z = 1; + vs_consts->src_scale.w = 1; + vs_consts->src_trans.x = srcx / (float)surface->width; + vs_consts->src_trans.y = srcy / (float)surface->height; + vs_consts->src_trans.z = 0; + vs_consts->src_trans.w = 0; + + pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.csc.vs_const_buf.buffer); + pipe->set_sampler_textures(pipe, 1, &surface->texture); pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + /* XXX: Need to take destx, desty into consideration */ pipe->winsys->flush_frontbuffer ( pipe->winsys, diff --git a/src/gallium/state_trackers/g3dvl/vl_types.h b/src/gallium/state_trackers/g3dvl/vl_types.h index 97753699db..4d210c9e0a 100644 --- a/src/gallium/state_trackers/g3dvl/vl_types.h +++ b/src/gallium/state_trackers/g3dvl/vl_types.h @@ -75,6 +75,12 @@ struct VL_MC_FS_CONSTS struct VL_VERTEX4F y_divider; }; +struct VL_CSC_VS_CONSTS +{ + struct VL_VERTEX4F src_scale; + struct VL_VERTEX4F src_trans; +}; + struct VL_CSC_FS_CONSTS { struct VL_VERTEX4F bias; diff --git a/src/gallium/state_trackers/g3dvl/vl_util.c b/src/gallium/state_trackers/g3dvl/vl_util.c new file mode 100644 index 0000000000..2421ae2210 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_util.c @@ -0,0 +1,17 @@ +#include "vl_util.h" +#include + +unsigned int vlRoundUpPOT(unsigned int x) +{ + unsigned int i; + + assert(x > 0); + + --x; + + for (i = 1; i < sizeof(unsigned int) * 8; i <<= 1) + x |= x >> i; + + return x + 1; +} + diff --git a/src/gallium/state_trackers/g3dvl/vl_util.h b/src/gallium/state_trackers/g3dvl/vl_util.h new file mode 100644 index 0000000000..e4b72c4f87 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_util.h @@ -0,0 +1,7 @@ +#ifndef vl_util_h +#define vl_util_h + +unsigned int vlRoundUpPOT(unsigned int x); + +#endif + -- cgit v1.2.3 From 0c25ac52425e6d6eb037b99ab90f41b47e3f4491 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Tue, 22 Jul 2008 22:26:26 -0400 Subject: g3dvl: Add Nouveau winsys, libdriclient. Nouveau winsys is based on Mesa's Nouveau winsys and soft-links to most of it. The 'nouveau_context' and 'nouveau_screen' code contains most of the changes, 'nouveau_winsys_pipe', 'nouveau_swapbuffers' and 'nouveau_lock' contain some minor changes. The driclient library contains the DRI userland stuff, most of which was based on Mesa's DRI code. --- src/driclient/include/driclient.h | 98 ++++ src/driclient/include/xf86dri.h | 119 ++++ src/driclient/src/Makefile | 22 + src/driclient/src/XF86dri.c | 619 +++++++++++++++++++++ src/driclient/src/driclient.c | 292 ++++++++++ src/driclient/src/test | Bin 0 -> 68389 bytes src/driclient/src/test.c | 41 ++ src/driclient/src/xf86dri.h | 119 ++++ src/driclient/src/xf86dristr.h | 342 ++++++++++++ src/gallium/state_trackers/g3dvl/Makefile | 2 +- src/gallium/state_trackers/g3dvl/vl_context.c | 68 ++- src/gallium/state_trackers/g3dvl/vl_shader_build.c | 12 + src/gallium/state_trackers/g3dvl/vl_shader_build.h | 1 + src/gallium/state_trackers/g3dvl/vl_surface.c | 4 +- src/gallium/winsys/g3dvl/nouveau/Makefile | 46 ++ src/gallium/winsys/g3dvl/nouveau/nouveau_bo.c | 1 + src/gallium/winsys/g3dvl/nouveau/nouveau_channel.c | 1 + src/gallium/winsys/g3dvl/nouveau/nouveau_context.c | 371 ++++++++++++ src/gallium/winsys/g3dvl/nouveau/nouveau_context.h | 105 ++++ src/gallium/winsys/g3dvl/nouveau/nouveau_device.c | 1 + src/gallium/winsys/g3dvl/nouveau/nouveau_dma.c | 1 + src/gallium/winsys/g3dvl/nouveau/nouveau_dma.h | 1 + src/gallium/winsys/g3dvl/nouveau/nouveau_dri.h | 1 + src/gallium/winsys/g3dvl/nouveau/nouveau_drmif.h | 1 + src/gallium/winsys/g3dvl/nouveau/nouveau_fence.c | 1 + src/gallium/winsys/g3dvl/nouveau/nouveau_grobj.c | 1 + src/gallium/winsys/g3dvl/nouveau/nouveau_local.h | 1 + src/gallium/winsys/g3dvl/nouveau/nouveau_lock.c | 92 +++ .../winsys/g3dvl/nouveau/nouveau_notifier.c | 1 + src/gallium/winsys/g3dvl/nouveau/nouveau_pushbuf.c | 1 + .../winsys/g3dvl/nouveau/nouveau_resource.c | 1 + src/gallium/winsys/g3dvl/nouveau/nouveau_screen.c | 88 +++ src/gallium/winsys/g3dvl/nouveau/nouveau_screen.h | 24 + .../winsys/g3dvl/nouveau/nouveau_swapbuffers.c | 64 +++ .../winsys/g3dvl/nouveau/nouveau_swapbuffers.h | 10 + src/gallium/winsys/g3dvl/nouveau/nouveau_winsys.c | 1 + .../winsys/g3dvl/nouveau/nouveau_winsys_pipe.c | 261 +++++++++ .../winsys/g3dvl/nouveau/nouveau_winsys_pipe.h | 1 + .../winsys/g3dvl/nouveau/nouveau_winsys_softpipe.c | 1 + src/gallium/winsys/g3dvl/nouveau/nv04_surface.c | 1 + src/gallium/winsys/g3dvl/nouveau/nv50_surface.c | 1 + src/gallium/winsys/g3dvl/vl_winsys.h | 14 + src/gallium/winsys/g3dvl/xsp_winsys.c | 72 ++- src/gallium/winsys/g3dvl/xsp_winsys.h | 12 - src/libXvMC/Makefile | 19 +- src/libXvMC/context.c | 4 +- 46 files changed, 2896 insertions(+), 43 deletions(-) create mode 100644 src/driclient/include/driclient.h create mode 100644 src/driclient/include/xf86dri.h create mode 100644 src/driclient/src/Makefile create mode 100644 src/driclient/src/XF86dri.c create mode 100644 src/driclient/src/driclient.c create mode 100755 src/driclient/src/test create mode 100644 src/driclient/src/test.c create mode 100644 src/driclient/src/xf86dri.h create mode 100644 src/driclient/src/xf86dristr.h create mode 100644 src/gallium/winsys/g3dvl/nouveau/Makefile create mode 120000 src/gallium/winsys/g3dvl/nouveau/nouveau_bo.c create mode 120000 src/gallium/winsys/g3dvl/nouveau/nouveau_channel.c create mode 100644 src/gallium/winsys/g3dvl/nouveau/nouveau_context.c create mode 100644 src/gallium/winsys/g3dvl/nouveau/nouveau_context.h create mode 120000 src/gallium/winsys/g3dvl/nouveau/nouveau_device.c create mode 120000 src/gallium/winsys/g3dvl/nouveau/nouveau_dma.c create mode 120000 src/gallium/winsys/g3dvl/nouveau/nouveau_dma.h create mode 120000 src/gallium/winsys/g3dvl/nouveau/nouveau_dri.h create mode 120000 src/gallium/winsys/g3dvl/nouveau/nouveau_drmif.h create mode 120000 src/gallium/winsys/g3dvl/nouveau/nouveau_fence.c create mode 120000 src/gallium/winsys/g3dvl/nouveau/nouveau_grobj.c create mode 120000 src/gallium/winsys/g3dvl/nouveau/nouveau_local.h create mode 100644 src/gallium/winsys/g3dvl/nouveau/nouveau_lock.c create mode 120000 src/gallium/winsys/g3dvl/nouveau/nouveau_notifier.c create mode 120000 src/gallium/winsys/g3dvl/nouveau/nouveau_pushbuf.c create mode 120000 src/gallium/winsys/g3dvl/nouveau/nouveau_resource.c create mode 100644 src/gallium/winsys/g3dvl/nouveau/nouveau_screen.c create mode 100644 src/gallium/winsys/g3dvl/nouveau/nouveau_screen.h create mode 100644 src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.c create mode 100644 src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.h create mode 120000 src/gallium/winsys/g3dvl/nouveau/nouveau_winsys.c create mode 100644 src/gallium/winsys/g3dvl/nouveau/nouveau_winsys_pipe.c create mode 120000 src/gallium/winsys/g3dvl/nouveau/nouveau_winsys_pipe.h create mode 120000 src/gallium/winsys/g3dvl/nouveau/nouveau_winsys_softpipe.c create mode 120000 src/gallium/winsys/g3dvl/nouveau/nv04_surface.c create mode 120000 src/gallium/winsys/g3dvl/nouveau/nv50_surface.c create mode 100644 src/gallium/winsys/g3dvl/vl_winsys.h delete mode 100644 src/gallium/winsys/g3dvl/xsp_winsys.h (limited to 'src/gallium/state_trackers/g3dvl/Makefile') diff --git a/src/driclient/include/driclient.h b/src/driclient/include/driclient.h new file mode 100644 index 0000000000..36438a9f79 --- /dev/null +++ b/src/driclient/include/driclient.h @@ -0,0 +1,98 @@ +#ifndef driclient_h +#define driclient_h + +#include +#include +#include +#include "xf86dri.h" + +/* TODO: Bring in DRI XML options */ + +typedef struct dri_version +{ + int major; + int minor; + int patch; +} dri_version_t; + +typedef struct dri_screen +{ + Display *display; + unsigned int num; + dri_version_t ddx, dri, drm; + int draw_lock_id; + int fd; + drm_sarea_t *sarea; + void *drawable_hash; + void *private; +} dri_screen_t; + +struct dri_context; + +typedef struct dri_drawable +{ + drm_drawable_t drm_drawable; + Drawable x_drawable; + unsigned int sarea_index; + unsigned int *sarea_stamp; + unsigned int last_sarea_stamp; + int x, y, w, h; + int back_x, back_y; + int num_cliprects, num_back_cliprects; + drm_clip_rect_t *cliprects, *back_cliprects; + dri_screen_t *dri_screen; + unsigned int refcount; + void *private; +} dri_drawable_t; + +typedef struct dri_context +{ + XID id; + drm_context_t drm_context; + dri_screen_t *dri_screen; + void *private; +} dri_context_t; + +typedef struct dri_framebuffer +{ + drm_handle_t drm_handle; + int base, size, stride; + int private_size; + void *private; +} dri_framebuffer_t; + +int driCreateScreen(Display *display, int screen, dri_screen_t **dri_screen, dri_framebuffer_t *dri_framebuf); +int driDestroyScreen(dri_screen_t *dri_screen); +int driCreateDrawable(dri_screen_t *dri_screen, Drawable drawable, dri_drawable_t **dri_drawable); +int driUpdateDrawableInfo(dri_drawable_t *dri_drawable); +int driDestroyDrawable(dri_drawable_t *dri_drawable); +int driCreateContext(dri_screen_t *dri_screen, Visual *visual, dri_context_t **dri_context); +int driDestroyContext(dri_context_t *dri_context); +int driCompareVersions(const dri_version_t *v1, const dri_version_t *v2); + +#define DRI_VALIDATE_DRAWABLE_INFO_ONCE(dri_drawable) \ +do \ +{ \ + if (*(dri_drawable->sarea_stamp) != dri_drawable->last_sarea_stamp) \ + driUpdateDrawableInfo(dri_drawable); \ +} while (0) + +#define DRI_VALIDATE_DRAWABLE_INFO(dri_screen, dri_drawable) \ +do \ +{ \ + while (*(dri_drawable->sarea_stamp) != dri_drawable->last_sarea_stamp) \ + { \ + register unsigned int hwContext = dri_screen->sarea->lock.lock & \ + ~(DRM_LOCK_HELD | DRM_LOCK_CONT); \ + DRM_UNLOCK(dri_screen->fd, &dri_screen->sarea->lock, hwContext); \ + \ + DRM_SPINLOCK(&dri_screen->sarea->drawable_lock, dri_screen->draw_lock_id); \ + DRI_VALIDATE_DRAWABLE_INFO_ONCE(dri_drawable); \ + DRM_SPINUNLOCK(&dri_screen->sarea->drawable_lock, dri_screen->draw_lock_id); \ + \ + DRM_LIGHT_LOCK(dri_screen->fd, &dri_screen->sarea->lock, hwContext); \ + } \ +} while (0) + +#endif + diff --git a/src/driclient/include/xf86dri.h b/src/driclient/include/xf86dri.h new file mode 100644 index 0000000000..baf80a7a9d --- /dev/null +++ b/src/driclient/include/xf86dri.h @@ -0,0 +1,119 @@ +/************************************************************************** + +Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. +Copyright 2000 VA Linux Systems, Inc. +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sub license, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR +ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file xf86dri.h + * Protocol numbers and function prototypes for DRI X protocol. + * + * \author Kevin E. Martin + * \author Jens Owen + * \author Rickard E. (Rik) Faith + */ + +#ifndef _XF86DRI_H_ +#define _XF86DRI_H_ + +#include +#include + +#define X_XF86DRIQueryVersion 0 +#define X_XF86DRIQueryDirectRenderingCapable 1 +#define X_XF86DRIOpenConnection 2 +#define X_XF86DRICloseConnection 3 +#define X_XF86DRIGetClientDriverName 4 +#define X_XF86DRICreateContext 5 +#define X_XF86DRIDestroyContext 6 +#define X_XF86DRICreateDrawable 7 +#define X_XF86DRIDestroyDrawable 8 +#define X_XF86DRIGetDrawableInfo 9 +#define X_XF86DRIGetDeviceInfo 10 +#define X_XF86DRIAuthConnection 11 +#define X_XF86DRIOpenFullScreen 12 /* Deprecated */ +#define X_XF86DRICloseFullScreen 13 /* Deprecated */ + +#define XF86DRINumberEvents 0 + +#define XF86DRIClientNotLocal 0 +#define XF86DRIOperationNotSupported 1 +#define XF86DRINumberErrors (XF86DRIOperationNotSupported + 1) + +#ifndef _XF86DRI_SERVER_ + +_XFUNCPROTOBEGIN + +Bool XF86DRIQueryExtension( Display *dpy, int *event_base, int *error_base ); + +Bool XF86DRIQueryVersion( Display *dpy, int *majorVersion, int *minorVersion, + int *patchVersion ); + +Bool XF86DRIQueryDirectRenderingCapable( Display *dpy, int screen, + Bool *isCapable ); + +Bool XF86DRIOpenConnection( Display *dpy, int screen, drm_handle_t *hSAREA, + char **busIDString ); + +Bool XF86DRIAuthConnection( Display *dpy, int screen, drm_magic_t magic ); + +Bool XF86DRICloseConnection( Display *dpy, int screen ); + +Bool XF86DRIGetClientDriverName( Display *dpy, int screen, + int *ddxDriverMajorVersion, int *ddxDriverMinorVersion, + int *ddxDriverPatchVersion, char **clientDriverName ); + +Bool XF86DRICreateContext( Display *dpy, int screen, Visual *visual, + XID *ptr_to_returned_context_id, drm_context_t *hHWContext ); + +Bool XF86DRICreateContextWithConfig( Display *dpy, int screen, int configID, + XID *ptr_to_returned_context_id, drm_context_t *hHWContext ); + +Bool XF86DRIDestroyContext( Display *dpy, int screen, + XID context_id ); + +Bool XF86DRICreateDrawable( Display *dpy, int screen, + Drawable drawable, drm_drawable_t *hHWDrawable ); + +Bool XF86DRIDestroyDrawable( Display *dpy, int screen, + Drawable drawable); + +Bool XF86DRIGetDrawableInfo( Display *dpy, int screen, Drawable drawable, + unsigned int *index, unsigned int *stamp, + int *X, int *Y, int *W, int *H, + int *numClipRects, drm_clip_rect_t ** pClipRects, + int *backX, int *backY, + int *numBackClipRects, drm_clip_rect_t **pBackClipRects ); + +Bool XF86DRIGetDeviceInfo( Display *dpy, int screen, + drm_handle_t *hFrameBuffer, int *fbOrigin, int *fbSize, + int *fbStride, int *devPrivateSize, void **pDevPrivate ); + +_XFUNCPROTOEND + +#endif /* _XF86DRI_SERVER_ */ + +#endif /* _XF86DRI_H_ */ + diff --git a/src/driclient/src/Makefile b/src/driclient/src/Makefile new file mode 100644 index 0000000000..0fac552de5 --- /dev/null +++ b/src/driclient/src/Makefile @@ -0,0 +1,22 @@ +TARGET = libdriclient.a +OBJECTS = driclient.o XF86dri.o +DRMDIR ?= /usr + +CFLAGS += -g -Wall -fPIC -Werror -I../include -I${DRMDIR}/include -I${DRMDIR}/include/drm + +############################################# + +.PHONY = all clean + +all: ${TARGET} test + +${TARGET}: ${OBJECTS} + ar rcs $@ $^ + cp ${TARGET} ../lib + +test: test.o + $(CC) -L../lib -L${DRMDIR}/lib ${LDFLAGS} -o $@ $^ -ldriclient -lX11 -lXext -ldrm + +clean: + rm -rf ${OBJECTS} ${TARGET} test test.o + diff --git a/src/driclient/src/XF86dri.c b/src/driclient/src/XF86dri.c new file mode 100644 index 0000000000..9e359a9238 --- /dev/null +++ b/src/driclient/src/XF86dri.c @@ -0,0 +1,619 @@ +/************************************************************************** + +Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. +Copyright 2000 VA Linux Systems, Inc. +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sub license, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR +ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Kevin E. Martin + * Jens Owen + * Rickard E. (Rik) Faith + * + */ + +/* THIS IS NOT AN X CONSORTIUM STANDARD */ + +#define NEED_REPLIES +#include +#include +#include +#include "xf86dristr.h" + +static XExtensionInfo _xf86dri_info_data; +static XExtensionInfo *xf86dri_info = &_xf86dri_info_data; +static char xf86dri_extension_name[] = XF86DRINAME; + +#define XF86DRICheckExtension(dpy,i,val) \ + XextCheckExtension (dpy, i, xf86dri_extension_name, val) + +/***************************************************************************** + * * + * private utility routines * + * * + *****************************************************************************/ + +static int close_display(Display *dpy, XExtCodes *extCodes); +static /* const */ XExtensionHooks xf86dri_extension_hooks = { + NULL, /* create_gc */ + NULL, /* copy_gc */ + NULL, /* flush_gc */ + NULL, /* free_gc */ + NULL, /* create_font */ + NULL, /* free_font */ + close_display, /* close_display */ + NULL, /* wire_to_event */ + NULL, /* event_to_wire */ + NULL, /* error */ + NULL, /* error_string */ +}; + +static XEXT_GENERATE_FIND_DISPLAY (find_display, xf86dri_info, + xf86dri_extension_name, + &xf86dri_extension_hooks, + 0, NULL) + +static XEXT_GENERATE_CLOSE_DISPLAY (close_display, xf86dri_info) + + +/***************************************************************************** + * * + * public XFree86-DRI Extension routines * + * * + *****************************************************************************/ + +#if 0 +#include +#define TRACE(msg) fprintf(stderr,"XF86DRI%s\n", msg); +#else +#define TRACE(msg) +#endif + +#define PUBLIC + +PUBLIC Bool XF86DRIQueryExtension (dpy, event_basep, error_basep) + Display *dpy; + int *event_basep, *error_basep; +{ + XExtDisplayInfo *info = find_display (dpy); + + TRACE("QueryExtension..."); + if (XextHasExtension(info)) { + *event_basep = info->codes->first_event; + *error_basep = info->codes->first_error; + TRACE("QueryExtension... return True"); + return True; + } else { + TRACE("QueryExtension... return False"); + return False; + } +} + +PUBLIC Bool XF86DRIQueryVersion(dpy, majorVersion, minorVersion, patchVersion) + Display* dpy; + int* majorVersion; + int* minorVersion; + int* patchVersion; +{ + XExtDisplayInfo *info = find_display (dpy); + xXF86DRIQueryVersionReply rep; + xXF86DRIQueryVersionReq *req; + + TRACE("QueryVersion..."); + XF86DRICheckExtension (dpy, info, False); + + LockDisplay(dpy); + GetReq(XF86DRIQueryVersion, req); + req->reqType = info->codes->major_opcode; + req->driReqType = X_XF86DRIQueryVersion; + if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) { + UnlockDisplay(dpy); + SyncHandle(); + TRACE("QueryVersion... return False"); + return False; + } + *majorVersion = rep.majorVersion; + *minorVersion = rep.minorVersion; + *patchVersion = rep.patchVersion; + UnlockDisplay(dpy); + SyncHandle(); + TRACE("QueryVersion... return True"); + return True; +} + +PUBLIC Bool XF86DRIQueryDirectRenderingCapable(dpy, screen, isCapable) + Display* dpy; + int screen; + Bool* isCapable; +{ + XExtDisplayInfo *info = find_display (dpy); + xXF86DRIQueryDirectRenderingCapableReply rep; + xXF86DRIQueryDirectRenderingCapableReq *req; + + TRACE("QueryDirectRenderingCapable..."); + XF86DRICheckExtension (dpy, info, False); + + LockDisplay(dpy); + GetReq(XF86DRIQueryDirectRenderingCapable, req); + req->reqType = info->codes->major_opcode; + req->driReqType = X_XF86DRIQueryDirectRenderingCapable; + req->screen = screen; + if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) { + UnlockDisplay(dpy); + SyncHandle(); + TRACE("QueryDirectRenderingCapable... return False"); + return False; + } + *isCapable = rep.isCapable; + UnlockDisplay(dpy); + SyncHandle(); + TRACE("QueryDirectRenderingCapable... return True"); + return True; +} + +PUBLIC Bool XF86DRIOpenConnection(dpy, screen, hSAREA, busIdString) + Display* dpy; + int screen; + drm_handle_t * hSAREA; + char **busIdString; +{ + XExtDisplayInfo *info = find_display (dpy); + xXF86DRIOpenConnectionReply rep; + xXF86DRIOpenConnectionReq *req; + + TRACE("OpenConnection..."); + XF86DRICheckExtension (dpy, info, False); + + LockDisplay(dpy); + GetReq(XF86DRIOpenConnection, req); + req->reqType = info->codes->major_opcode; + req->driReqType = X_XF86DRIOpenConnection; + req->screen = screen; + if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) { + UnlockDisplay(dpy); + SyncHandle(); + TRACE("OpenConnection... return False"); + return False; + } + + *hSAREA = rep.hSAREALow; + if (sizeof(drm_handle_t) == 8) { + int shift = 32; /* var to prevent warning on next line */ + *hSAREA |= ((drm_handle_t) rep.hSAREAHigh) << shift; + } + + if (rep.length) { + if (!(*busIdString = (char *)Xcalloc(rep.busIdStringLength + 1, 1))) { + _XEatData(dpy, ((rep.busIdStringLength+3) & ~3)); + UnlockDisplay(dpy); + SyncHandle(); + TRACE("OpenConnection... return False"); + return False; + } + _XReadPad(dpy, *busIdString, rep.busIdStringLength); + } else { + *busIdString = NULL; + } + UnlockDisplay(dpy); + SyncHandle(); + TRACE("OpenConnection... return True"); + return True; +} + +PUBLIC Bool XF86DRIAuthConnection(dpy, screen, magic) + Display* dpy; + int screen; + drm_magic_t magic; +{ + XExtDisplayInfo *info = find_display (dpy); + xXF86DRIAuthConnectionReq *req; + xXF86DRIAuthConnectionReply rep; + + TRACE("AuthConnection..."); + XF86DRICheckExtension (dpy, info, False); + + LockDisplay(dpy); + GetReq(XF86DRIAuthConnection, req); + req->reqType = info->codes->major_opcode; + req->driReqType = X_XF86DRIAuthConnection; + req->screen = screen; + req->magic = magic; + rep.authenticated = 0; + if (!_XReply(dpy, (xReply *)&rep, 0, xFalse) || !rep.authenticated) { + UnlockDisplay(dpy); + SyncHandle(); + TRACE("AuthConnection... return False"); + return False; + } + UnlockDisplay(dpy); + SyncHandle(); + TRACE("AuthConnection... return True"); + return True; +} + +PUBLIC Bool XF86DRICloseConnection(dpy, screen) + Display* dpy; + int screen; +{ + XExtDisplayInfo *info = find_display (dpy); + xXF86DRICloseConnectionReq *req; + + TRACE("CloseConnection..."); + + XF86DRICheckExtension (dpy, info, False); + + LockDisplay(dpy); + GetReq(XF86DRICloseConnection, req); + req->reqType = info->codes->major_opcode; + req->driReqType = X_XF86DRICloseConnection; + req->screen = screen; + UnlockDisplay(dpy); + SyncHandle(); + TRACE("CloseConnection... return True"); + return True; +} + +PUBLIC Bool XF86DRIGetClientDriverName(dpy, screen, ddxDriverMajorVersion, + ddxDriverMinorVersion, ddxDriverPatchVersion, clientDriverName) + Display* dpy; + int screen; + int* ddxDriverMajorVersion; + int* ddxDriverMinorVersion; + int* ddxDriverPatchVersion; + char** clientDriverName; +{ + XExtDisplayInfo *info = find_display (dpy); + xXF86DRIGetClientDriverNameReply rep; + xXF86DRIGetClientDriverNameReq *req; + + TRACE("GetClientDriverName..."); + XF86DRICheckExtension (dpy, info, False); + + LockDisplay(dpy); + GetReq(XF86DRIGetClientDriverName, req); + req->reqType = info->codes->major_opcode; + req->driReqType = X_XF86DRIGetClientDriverName; + req->screen = screen; + if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) { + UnlockDisplay(dpy); + SyncHandle(); + TRACE("GetClientDriverName... return False"); + return False; + } + + *ddxDriverMajorVersion = rep.ddxDriverMajorVersion; + *ddxDriverMinorVersion = rep.ddxDriverMinorVersion; + *ddxDriverPatchVersion = rep.ddxDriverPatchVersion; + + if (rep.length) { + if (!(*clientDriverName = (char *)Xcalloc(rep.clientDriverNameLength + 1, 1))) { + _XEatData(dpy, ((rep.clientDriverNameLength+3) & ~3)); + UnlockDisplay(dpy); + SyncHandle(); + TRACE("GetClientDriverName... return False"); + return False; + } + _XReadPad(dpy, *clientDriverName, rep.clientDriverNameLength); + } else { + *clientDriverName = NULL; + } + UnlockDisplay(dpy); + SyncHandle(); + TRACE("GetClientDriverName... return True"); + return True; +} + +PUBLIC Bool XF86DRICreateContextWithConfig(dpy, screen, configID, context, + hHWContext) + Display* dpy; + int screen; + int configID; + XID* context; + drm_context_t * hHWContext; +{ + XExtDisplayInfo *info = find_display (dpy); + xXF86DRICreateContextReply rep; + xXF86DRICreateContextReq *req; + + TRACE("CreateContext..."); + XF86DRICheckExtension (dpy, info, False); + + LockDisplay(dpy); + GetReq(XF86DRICreateContext, req); + req->reqType = info->codes->major_opcode; + req->driReqType = X_XF86DRICreateContext; + req->visual = configID; + req->screen = screen; + *context = XAllocID(dpy); + req->context = *context; + if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) { + UnlockDisplay(dpy); + SyncHandle(); + TRACE("CreateContext... return False"); + return False; + } + *hHWContext = rep.hHWContext; + UnlockDisplay(dpy); + SyncHandle(); + TRACE("CreateContext... return True"); + return True; +} + +PUBLIC Bool XF86DRICreateContext(dpy, screen, visual, context, hHWContext) + Display* dpy; + int screen; + Visual* visual; + XID* context; + drm_context_t * hHWContext; +{ + return XF86DRICreateContextWithConfig( dpy, screen, visual->visualid, + context, hHWContext ); +} + +PUBLIC Bool XF86DRIDestroyContext( Display * ndpy, int screen, + XID context ) +{ + Display * const dpy = (Display *) ndpy; + XExtDisplayInfo *info = find_display (dpy); + xXF86DRIDestroyContextReq *req; + + TRACE("DestroyContext..."); + XF86DRICheckExtension (dpy, info, False); + + LockDisplay(dpy); + GetReq(XF86DRIDestroyContext, req); + req->reqType = info->codes->major_opcode; + req->driReqType = X_XF86DRIDestroyContext; + req->screen = screen; + req->context = context; + UnlockDisplay(dpy); + SyncHandle(); + TRACE("DestroyContext... return True"); + return True; +} + +PUBLIC Bool XF86DRICreateDrawable( Display * ndpy, int screen, + Drawable drawable, drm_drawable_t * hHWDrawable ) +{ + Display * const dpy = (Display *) ndpy; + XExtDisplayInfo *info = find_display (dpy); + xXF86DRICreateDrawableReply rep; + xXF86DRICreateDrawableReq *req; + + TRACE("CreateDrawable..."); + XF86DRICheckExtension (dpy, info, False); + + LockDisplay(dpy); + GetReq(XF86DRICreateDrawable, req); + req->reqType = info->codes->major_opcode; + req->driReqType = X_XF86DRICreateDrawable; + req->screen = screen; + req->drawable = drawable; + if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) { + UnlockDisplay(dpy); + SyncHandle(); + TRACE("CreateDrawable... return False"); + return False; + } + *hHWDrawable = rep.hHWDrawable; + UnlockDisplay(dpy); + SyncHandle(); + TRACE("CreateDrawable... return True"); + return True; +} + +PUBLIC Bool XF86DRIDestroyDrawable( Display * ndpy, int screen, + Drawable drawable ) +{ + Display * const dpy = (Display *) ndpy; + XExtDisplayInfo *info = find_display (dpy); + xXF86DRIDestroyDrawableReq *req; + + TRACE("DestroyDrawable..."); + XF86DRICheckExtension (dpy, info, False); + + LockDisplay(dpy); + GetReq(XF86DRIDestroyDrawable, req); + req->reqType = info->codes->major_opcode; + req->driReqType = X_XF86DRIDestroyDrawable; + req->screen = screen; + req->drawable = drawable; + UnlockDisplay(dpy); + SyncHandle(); + TRACE("DestroyDrawable... return True"); + return True; +} + +PUBLIC Bool XF86DRIGetDrawableInfo(Display* dpy, int screen, Drawable drawable, + unsigned int* index, unsigned int* stamp, + int* X, int* Y, int* W, int* H, + int* numClipRects, drm_clip_rect_t ** pClipRects, + int* backX, int* backY, + int* numBackClipRects, drm_clip_rect_t ** pBackClipRects ) +{ + XExtDisplayInfo *info = find_display (dpy); + xXF86DRIGetDrawableInfoReply rep; + xXF86DRIGetDrawableInfoReq *req; + int total_rects; + + TRACE("GetDrawableInfo..."); + XF86DRICheckExtension (dpy, info, False); + + LockDisplay(dpy); + GetReq(XF86DRIGetDrawableInfo, req); + req->reqType = info->codes->major_opcode; + req->driReqType = X_XF86DRIGetDrawableInfo; + req->screen = screen; + req->drawable = drawable; + + if (!_XReply(dpy, (xReply *)&rep, 1, xFalse)) + { + UnlockDisplay(dpy); + SyncHandle(); + TRACE("GetDrawableInfo... return False"); + return False; + } + *index = rep.drawableTableIndex; + *stamp = rep.drawableTableStamp; + *X = (int)rep.drawableX; + *Y = (int)rep.drawableY; + *W = (int)rep.drawableWidth; + *H = (int)rep.drawableHeight; + *numClipRects = rep.numClipRects; + total_rects = *numClipRects; + + *backX = rep.backX; + *backY = rep.backY; + *numBackClipRects = rep.numBackClipRects; + total_rects += *numBackClipRects; + +#if 0 + /* Because of the fix in Xserver/GL/dri/xf86dri.c, this check breaks + * backwards compatibility (Because of the >> 2 shift) but the fix + * enables multi-threaded apps to work. + */ + if (rep.length != ((((SIZEOF(xXF86DRIGetDrawableInfoReply) - + SIZEOF(xGenericReply) + + total_rects * sizeof(drm_clip_rect_t)) + 3) & ~3) >> 2)) { + _XEatData(dpy, rep.length); + UnlockDisplay(dpy); + SyncHandle(); + TRACE("GetDrawableInfo... return False"); + return False; + } +#endif + + if (*numClipRects) { + int len = sizeof(drm_clip_rect_t) * (*numClipRects); + + *pClipRects = (drm_clip_rect_t *)Xcalloc(len, 1); + if (*pClipRects) + _XRead(dpy, (char*)*pClipRects, len); + } else { + *pClipRects = NULL; + } + + if (*numBackClipRects) { + int len = sizeof(drm_clip_rect_t) * (*numBackClipRects); + + *pBackClipRects = (drm_clip_rect_t *)Xcalloc(len, 1); + if (*pBackClipRects) + _XRead(dpy, (char*)*pBackClipRects, len); + } else { + *pBackClipRects = NULL; + } + + UnlockDisplay(dpy); + SyncHandle(); + TRACE("GetDrawableInfo... return True"); + return True; +} + +PUBLIC Bool XF86DRIGetDeviceInfo(dpy, screen, hFrameBuffer, + fbOrigin, fbSize, fbStride, devPrivateSize, pDevPrivate) + Display* dpy; + int screen; + drm_handle_t * hFrameBuffer; + int* fbOrigin; + int* fbSize; + int* fbStride; + int* devPrivateSize; + void** pDevPrivate; +{ + XExtDisplayInfo *info = find_display (dpy); + xXF86DRIGetDeviceInfoReply rep; + xXF86DRIGetDeviceInfoReq *req; + + TRACE("GetDeviceInfo..."); + XF86DRICheckExtension (dpy, info, False); + + LockDisplay(dpy); + GetReq(XF86DRIGetDeviceInfo, req); + req->reqType = info->codes->major_opcode; + req->driReqType = X_XF86DRIGetDeviceInfo; + req->screen = screen; + if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) { + UnlockDisplay(dpy); + SyncHandle(); + TRACE("GetDeviceInfo... return False"); + return False; + } + + *hFrameBuffer = rep.hFrameBufferLow; + if (sizeof(drm_handle_t) == 8) { + int shift = 32; /* var to prevent warning on next line */ + *hFrameBuffer |= ((drm_handle_t) rep.hFrameBufferHigh) << shift; + } + + *fbOrigin = rep.framebufferOrigin; + *fbSize = rep.framebufferSize; + *fbStride = rep.framebufferStride; + *devPrivateSize = rep.devPrivateSize; + + if (rep.length) { + if (!(*pDevPrivate = (void *)Xcalloc(rep.devPrivateSize, 1))) { + _XEatData(dpy, ((rep.devPrivateSize+3) & ~3)); + UnlockDisplay(dpy); + SyncHandle(); + TRACE("GetDeviceInfo... return False"); + return False; + } + _XRead(dpy, (char*)*pDevPrivate, rep.devPrivateSize); + } else { + *pDevPrivate = NULL; + } + + UnlockDisplay(dpy); + SyncHandle(); + TRACE("GetDeviceInfo... return True"); + return True; +} + +PUBLIC Bool XF86DRIOpenFullScreen(dpy, screen, drawable) + Display* dpy; + int screen; + Drawable drawable; +{ + /* This function and the underlying X protocol are deprecated. + */ + (void) dpy; + (void) screen; + (void) drawable; + return False; +} + +PUBLIC Bool XF86DRICloseFullScreen(dpy, screen, drawable) + Display* dpy; + int screen; + Drawable drawable; +{ + /* This function and the underlying X protocol are deprecated. + */ + (void) dpy; + (void) screen; + (void) drawable; + return True; +} + diff --git a/src/driclient/src/driclient.c b/src/driclient/src/driclient.c new file mode 100644 index 0000000000..7a7ca95702 --- /dev/null +++ b/src/driclient/src/driclient.c @@ -0,0 +1,292 @@ +#include "driclient.h" +#include +#include + +int driCreateScreen(Display *display, int screen, dri_screen_t **dri_screen, dri_framebuffer_t *dri_framebuf) +{ + int evbase, errbase; + char *driver_name; + int newly_opened; + drm_magic_t magic; + drmVersionPtr drm_version; + drm_handle_t sarea_handle; + char *bus_id; + dri_screen_t *dri_scrn; + + assert(display); + assert(dri_screen); + + if (!XF86DRIQueryExtension(display, &evbase, &errbase)) + return 1; + + dri_scrn = calloc(1, sizeof(dri_screen_t)); + + if (!dri_scrn) + return 1; + + if (!XF86DRIQueryVersion(display, &dri_scrn->dri.major, &dri_scrn->dri.minor, &dri_scrn->dri.patch)) + goto free_screen; + + dri_scrn->display = display; + dri_scrn->num = screen; + dri_scrn->draw_lock_id = 1; + + if (!XF86DRIOpenConnection(display, screen, &sarea_handle, &bus_id)) + goto free_screen; + + dri_scrn->fd = -1; + dri_scrn->fd = drmOpenOnce(NULL, bus_id, &newly_opened); + XFree(bus_id); + + if (dri_scrn->fd < 0) + goto close_connection; + + if (drmGetMagic(dri_scrn->fd, &magic)) + goto close_drm; + + drm_version = drmGetVersion(dri_scrn->fd); + + if (!drm_version) + goto close_drm; + + dri_scrn->drm.major = drm_version->version_major; + dri_scrn->drm.minor = drm_version->version_minor; + dri_scrn->drm.patch = drm_version->version_patchlevel; + drmFreeVersion(drm_version); + + if (!XF86DRIAuthConnection(display, screen, magic)) + goto close_drm; + + if (!XF86DRIGetClientDriverName + ( + display, + screen, + &dri_scrn->ddx.major, + &dri_scrn->ddx.minor, + &dri_scrn->ddx.patch, + &driver_name + )) + goto close_drm; + + if (drmMap(dri_scrn->fd, sarea_handle, SAREA_MAX, (drmAddress)&dri_scrn->sarea)) + goto close_drm; + + dri_scrn->drawable_hash = drmHashCreate(); + + if (!dri_scrn->drawable_hash) + goto unmap_sarea; + + if (dri_framebuf) + { + if (!XF86DRIGetDeviceInfo + ( + display, + screen, &dri_framebuf->drm_handle, + &dri_framebuf->base, + &dri_framebuf->size, + &dri_framebuf->stride, + &dri_framebuf->private_size, + &dri_framebuf->private + )) + goto destroy_hash; + } + + *dri_screen = dri_scrn; + + return 0; + +destroy_hash: + drmHashDestroy(dri_scrn->drawable_hash); +unmap_sarea: + drmUnmap(dri_scrn->sarea, SAREA_MAX); +close_drm: + drmCloseOnce(dri_scrn->fd); +close_connection: + XF86DRICloseConnection(display, screen); +free_screen: + free(dri_scrn); + + return 1; +} + +int driDestroyScreen(dri_screen_t *dri_screen) +{ + assert(dri_screen); + + drmHashDestroy(dri_screen->drawable_hash); + drmUnmap(dri_screen->sarea, SAREA_MAX); + drmCloseOnce(dri_screen->fd); + XF86DRICloseConnection(dri_screen->display, dri_screen->num); + free(dri_screen); + + return 0; +} + +int driCreateDrawable(dri_screen_t *dri_screen, Drawable drawable, dri_drawable_t **dri_drawable) +{ + int evbase, errbase; + dri_drawable_t *dri_draw; + + assert(dri_screen); + assert(dri_drawable); + + if (!XF86DRIQueryExtension(dri_screen->display, &evbase, &errbase)) + return 1; + + if (!drmHashLookup(dri_screen->drawable_hash, drawable, (void**)dri_drawable)) + { + /* Found */ + (*dri_drawable)->refcount++; + return 0; + } + + dri_draw = calloc(1, sizeof(dri_drawable_t)); + + if (!dri_draw) + return 1; + + if (!XF86DRICreateDrawable(dri_screen->display, 0, drawable, &dri_draw->drm_drawable)) + { + free(dri_draw); + return 1; + } + + dri_draw->x_drawable = drawable; + dri_draw->sarea_index = 0; + dri_draw->sarea_stamp = NULL; + dri_draw->last_sarea_stamp = 0; + dri_draw->dri_screen = dri_screen; + dri_draw->refcount = 1; + + if (drmHashInsert(dri_screen->drawable_hash, drawable, dri_draw)) + { + XF86DRIDestroyDrawable(dri_screen->display, dri_screen->num, drawable); + free(dri_draw); + return 1; + } + + /* + * XXX: Need this to initialize sarea pointer and other stuff in dri_drawable_t + * to be able to use the DRI_VALIDATE_DRAWABLE_INFO macro, but is it safe to + * call without any sync? + */ + if (driUpdateDrawableInfo(dri_draw)) + { + XF86DRIDestroyDrawable(dri_screen->display, dri_screen->num, drawable); + free(dri_draw); + return 1; + } + + *dri_drawable = dri_draw; + + return 0; +} + +int driUpdateDrawableInfo(dri_drawable_t *dri_drawable) +{ + assert(dri_drawable); + + if (dri_drawable->cliprects) + XFree(dri_drawable->cliprects); + if (dri_drawable->back_cliprects) + XFree(dri_drawable->back_cliprects); + + DRM_SPINUNLOCK(&dri_drawable->dri_screen->sarea->drawable_lock, dri_drawable->dri_screen->draw_lock_id); + + if (!XF86DRIGetDrawableInfo + ( + dri_drawable->dri_screen->display, + dri_drawable->dri_screen->num, + dri_drawable->x_drawable, + &dri_drawable->sarea_index, + &dri_drawable->last_sarea_stamp, + &dri_drawable->x, + &dri_drawable->y, + &dri_drawable->w, + &dri_drawable->h, + &dri_drawable->num_cliprects, + &dri_drawable->cliprects, + &dri_drawable->back_x, + &dri_drawable->back_y, + &dri_drawable->num_back_cliprects, + &dri_drawable->back_cliprects + )) + { + dri_drawable->sarea_stamp = &dri_drawable->last_sarea_stamp; + dri_drawable->num_cliprects = 0; + dri_drawable->cliprects = NULL; + dri_drawable->num_back_cliprects = 0; + dri_drawable->back_cliprects = 0; + + return 1; + } + else + dri_drawable->sarea_stamp = &dri_drawable->dri_screen->sarea->drawableTable[dri_drawable->sarea_index].stamp; + + DRM_SPINLOCK(&dri_drawable->dri_screen->sarea->drawable_lock, dri_drawable->dri_screen->draw_lock_id); + + return 0; +} + +int driDestroyDrawable(dri_drawable_t *dri_drawable) +{ + assert(dri_drawable); + + if (--dri_drawable->refcount == 0) + { + if (dri_drawable->cliprects) + XFree(dri_drawable->cliprects); + if (dri_drawable->back_cliprects) + XFree(dri_drawable->back_cliprects); + drmHashDelete(dri_drawable->dri_screen->drawable_hash, dri_drawable->x_drawable); + XF86DRIDestroyDrawable(dri_drawable->dri_screen->display, dri_drawable->dri_screen->num, dri_drawable->x_drawable); + free(dri_drawable); + } + + return 0; +} + +int driCreateContext(dri_screen_t *dri_screen, Visual *visual, dri_context_t **dri_context) +{ + int evbase, errbase; + dri_context_t *dri_ctx; + + assert(dri_screen); + assert(visual); + assert(dri_context); + + if (!XF86DRIQueryExtension(dri_screen->display, &evbase, &errbase)) + return 1; + + dri_ctx = calloc(1, sizeof(dri_context_t)); + + if (!dri_ctx) + return 1; + + if (!XF86DRICreateContext(dri_screen->display, dri_screen->num, visual, &dri_ctx->id, &dri_ctx->drm_context)) + { + free(dri_ctx); + return 1; + } + + dri_ctx->dri_screen = dri_screen; + *dri_context = dri_ctx; + + return 0; +} + +int driDestroyContext(dri_context_t *dri_context) +{ + assert(dri_context); + + XF86DRIDestroyContext(dri_context->dri_screen->display, dri_context->dri_screen->num, dri_context->id); + free(dri_context); + + return 0; +} + +int driCompareVersions(const dri_version_t *v1, const dri_version_t *v2) +{ + return (v1->major == v2->major) && (v1->minor == v2->minor) && (v1->patch == v2->patch); +} + diff --git a/src/driclient/src/test b/src/driclient/src/test new file mode 100755 index 0000000000..57cddf8e00 Binary files /dev/null and b/src/driclient/src/test differ diff --git a/src/driclient/src/test.c b/src/driclient/src/test.c new file mode 100644 index 0000000000..15f75d928b --- /dev/null +++ b/src/driclient/src/test.c @@ -0,0 +1,41 @@ +#include +#include +#include "driclient.h" + +int main(int argc, char **argv) +{ + Display *dpy; + Window root, window; + + dri_screen_t *screen; + dri_drawable_t *dri_drawable; + dri_context_t *context; + + dpy = XOpenDisplay(NULL); + root = XDefaultRootWindow(dpy); + window = XCreateSimpleWindow(dpy, root, 0, 0, 100, 100, 0, 0, 0); + + XSelectInput(dpy, window, 0); + XMapWindow(dpy, window); + XSync(dpy, 0); + + assert(driCreateScreen(dpy, 0, &screen, NULL) == 0); + assert(driCreateDrawable(screen, window, &dri_drawable) == 0); + assert(driCreateContext(screen, XDefaultVisual(dpy, 0), &context) == 0); + assert(driUpdateDrawableInfo(dri_drawable) == 0); + + DRI_VALIDATE_DRAWABLE_INFO(screen, dri_drawable); + + assert(drmGetLock(screen->fd, context->drm_context, 0) == 0); + assert(drmUnlock(screen->fd, context->drm_context) == 0); + + assert(driDestroyContext(context) == 0); + assert(driDestroyDrawable(dri_drawable) == 0); + assert(driDestroyScreen(screen) == 0); + + XDestroyWindow(dpy, window); + XCloseDisplay(dpy); + + return 0; +} + diff --git a/src/driclient/src/xf86dri.h b/src/driclient/src/xf86dri.h new file mode 100644 index 0000000000..baf80a7a9d --- /dev/null +++ b/src/driclient/src/xf86dri.h @@ -0,0 +1,119 @@ +/************************************************************************** + +Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. +Copyright 2000 VA Linux Systems, Inc. +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sub license, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR +ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file xf86dri.h + * Protocol numbers and function prototypes for DRI X protocol. + * + * \author Kevin E. Martin + * \author Jens Owen + * \author Rickard E. (Rik) Faith + */ + +#ifndef _XF86DRI_H_ +#define _XF86DRI_H_ + +#include +#include + +#define X_XF86DRIQueryVersion 0 +#define X_XF86DRIQueryDirectRenderingCapable 1 +#define X_XF86DRIOpenConnection 2 +#define X_XF86DRICloseConnection 3 +#define X_XF86DRIGetClientDriverName 4 +#define X_XF86DRICreateContext 5 +#define X_XF86DRIDestroyContext 6 +#define X_XF86DRICreateDrawable 7 +#define X_XF86DRIDestroyDrawable 8 +#define X_XF86DRIGetDrawableInfo 9 +#define X_XF86DRIGetDeviceInfo 10 +#define X_XF86DRIAuthConnection 11 +#define X_XF86DRIOpenFullScreen 12 /* Deprecated */ +#define X_XF86DRICloseFullScreen 13 /* Deprecated */ + +#define XF86DRINumberEvents 0 + +#define XF86DRIClientNotLocal 0 +#define XF86DRIOperationNotSupported 1 +#define XF86DRINumberErrors (XF86DRIOperationNotSupported + 1) + +#ifndef _XF86DRI_SERVER_ + +_XFUNCPROTOBEGIN + +Bool XF86DRIQueryExtension( Display *dpy, int *event_base, int *error_base ); + +Bool XF86DRIQueryVersion( Display *dpy, int *majorVersion, int *minorVersion, + int *patchVersion ); + +Bool XF86DRIQueryDirectRenderingCapable( Display *dpy, int screen, + Bool *isCapable ); + +Bool XF86DRIOpenConnection( Display *dpy, int screen, drm_handle_t *hSAREA, + char **busIDString ); + +Bool XF86DRIAuthConnection( Display *dpy, int screen, drm_magic_t magic ); + +Bool XF86DRICloseConnection( Display *dpy, int screen ); + +Bool XF86DRIGetClientDriverName( Display *dpy, int screen, + int *ddxDriverMajorVersion, int *ddxDriverMinorVersion, + int *ddxDriverPatchVersion, char **clientDriverName ); + +Bool XF86DRICreateContext( Display *dpy, int screen, Visual *visual, + XID *ptr_to_returned_context_id, drm_context_t *hHWContext ); + +Bool XF86DRICreateContextWithConfig( Display *dpy, int screen, int configID, + XID *ptr_to_returned_context_id, drm_context_t *hHWContext ); + +Bool XF86DRIDestroyContext( Display *dpy, int screen, + XID context_id ); + +Bool XF86DRICreateDrawable( Display *dpy, int screen, + Drawable drawable, drm_drawable_t *hHWDrawable ); + +Bool XF86DRIDestroyDrawable( Display *dpy, int screen, + Drawable drawable); + +Bool XF86DRIGetDrawableInfo( Display *dpy, int screen, Drawable drawable, + unsigned int *index, unsigned int *stamp, + int *X, int *Y, int *W, int *H, + int *numClipRects, drm_clip_rect_t ** pClipRects, + int *backX, int *backY, + int *numBackClipRects, drm_clip_rect_t **pBackClipRects ); + +Bool XF86DRIGetDeviceInfo( Display *dpy, int screen, + drm_handle_t *hFrameBuffer, int *fbOrigin, int *fbSize, + int *fbStride, int *devPrivateSize, void **pDevPrivate ); + +_XFUNCPROTOEND + +#endif /* _XF86DRI_SERVER_ */ + +#endif /* _XF86DRI_H_ */ + diff --git a/src/driclient/src/xf86dristr.h b/src/driclient/src/xf86dristr.h new file mode 100644 index 0000000000..b834bd1a1a --- /dev/null +++ b/src/driclient/src/xf86dristr.h @@ -0,0 +1,342 @@ +/************************************************************************** + +Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. +Copyright 2000 VA Linux Systems, Inc. +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sub license, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR +ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Kevin E. Martin + * Jens Owen + * Rickard E. (Rik) Fiath + * + */ + +#ifndef _XF86DRISTR_H_ +#define _XF86DRISTR_H_ + +#include "xf86dri.h" + +#define XF86DRINAME "XFree86-DRI" + +/* The DRI version number. This was originally set to be the same of the + * XFree86 version number. However, this version is really indepedent of + * the XFree86 version. + * + * Version History: + * 4.0.0: Original + * 4.0.1: Patch to bump clipstamp when windows are destroyed, 28 May 02 + * 4.1.0: Add transition from single to multi in DRMInfo rec, 24 Jun 02 + */ +#define XF86DRI_MAJOR_VERSION 4 +#define XF86DRI_MINOR_VERSION 1 +#define XF86DRI_PATCH_VERSION 0 + +typedef struct _XF86DRIQueryVersion { + CARD8 reqType; /* always DRIReqCode */ + CARD8 driReqType; /* always X_DRIQueryVersion */ + CARD16 length B16; +} xXF86DRIQueryVersionReq; +#define sz_xXF86DRIQueryVersionReq 4 + +typedef struct { + BYTE type; /* X_Reply */ + BOOL pad1; + CARD16 sequenceNumber B16; + CARD32 length B32; + CARD16 majorVersion B16; /* major version of DRI protocol */ + CARD16 minorVersion B16; /* minor version of DRI protocol */ + CARD32 patchVersion B32; /* patch version of DRI protocol */ + CARD32 pad3 B32; + CARD32 pad4 B32; + CARD32 pad5 B32; + CARD32 pad6 B32; +} xXF86DRIQueryVersionReply; +#define sz_xXF86DRIQueryVersionReply 32 + +typedef struct _XF86DRIQueryDirectRenderingCapable { + CARD8 reqType; /* always DRIReqCode */ + CARD8 driReqType; /* X_DRIQueryDirectRenderingCapable */ + CARD16 length B16; + CARD32 screen B32; +} xXF86DRIQueryDirectRenderingCapableReq; +#define sz_xXF86DRIQueryDirectRenderingCapableReq 8 + +typedef struct { + BYTE type; /* X_Reply */ + BOOL pad1; + CARD16 sequenceNumber B16; + CARD32 length B32; + BOOL isCapable; + BOOL pad2; + BOOL pad3; + BOOL pad4; + CARD32 pad5 B32; + CARD32 pad6 B32; + CARD32 pad7 B32; + CARD32 pad8 B32; + CARD32 pad9 B32; +} xXF86DRIQueryDirectRenderingCapableReply; +#define sz_xXF86DRIQueryDirectRenderingCapableReply 32 + +typedef struct _XF86DRIOpenConnection { + CARD8 reqType; /* always DRIReqCode */ + CARD8 driReqType; /* always X_DRIOpenConnection */ + CARD16 length B16; + CARD32 screen B32; +} xXF86DRIOpenConnectionReq; +#define sz_xXF86DRIOpenConnectionReq 8 + +typedef struct { + BYTE type; /* X_Reply */ + BOOL pad1; + CARD16 sequenceNumber B16; + CARD32 length B32; + CARD32 hSAREALow B32; + CARD32 hSAREAHigh B32; + CARD32 busIdStringLength B32; + CARD32 pad6 B32; + CARD32 pad7 B32; + CARD32 pad8 B32; +} xXF86DRIOpenConnectionReply; +#define sz_xXF86DRIOpenConnectionReply 32 + +typedef struct _XF86DRIAuthConnection { + CARD8 reqType; /* always DRIReqCode */ + CARD8 driReqType; /* always X_DRICloseConnection */ + CARD16 length B16; + CARD32 screen B32; + CARD32 magic B32; +} xXF86DRIAuthConnectionReq; +#define sz_xXF86DRIAuthConnectionReq 12 + +typedef struct { + BYTE type; + BOOL pad1; + CARD16 sequenceNumber B16; + CARD32 length B32; + CARD32 authenticated B32; + CARD32 pad2 B32; + CARD32 pad3 B32; + CARD32 pad4 B32; + CARD32 pad5 B32; + CARD32 pad6 B32; +} xXF86DRIAuthConnectionReply; +#define zx_xXF86DRIAuthConnectionReply 32 + +typedef struct _XF86DRICloseConnection { + CARD8 reqType; /* always DRIReqCode */ + CARD8 driReqType; /* always X_DRICloseConnection */ + CARD16 length B16; + CARD32 screen B32; +} xXF86DRICloseConnectionReq; +#define sz_xXF86DRICloseConnectionReq 8 + +typedef struct _XF86DRIGetClientDriverName { + CARD8 reqType; /* always DRIReqCode */ + CARD8 driReqType; /* always X_DRIGetClientDriverName */ + CARD16 length B16; + CARD32 screen B32; +} xXF86DRIGetClientDriverNameReq; +#define sz_xXF86DRIGetClientDriverNameReq 8 + +typedef struct { + BYTE type; /* X_Reply */ + BOOL pad1; + CARD16 sequenceNumber B16; + CARD32 length B32; + CARD32 ddxDriverMajorVersion B32; + CARD32 ddxDriverMinorVersion B32; + CARD32 ddxDriverPatchVersion B32; + CARD32 clientDriverNameLength B32; + CARD32 pad5 B32; + CARD32 pad6 B32; +} xXF86DRIGetClientDriverNameReply; +#define sz_xXF86DRIGetClientDriverNameReply 32 + +typedef struct _XF86DRICreateContext { + CARD8 reqType; /* always DRIReqCode */ + CARD8 driReqType; /* always X_DRICreateContext */ + CARD16 length B16; + CARD32 screen B32; + CARD32 visual B32; + CARD32 context B32; +} xXF86DRICreateContextReq; +#define sz_xXF86DRICreateContextReq 16 + +typedef struct { + BYTE type; /* X_Reply */ + BOOL pad1; + CARD16 sequenceNumber B16; + CARD32 length B32; + CARD32 hHWContext B32; + CARD32 pad2 B32; + CARD32 pad3 B32; + CARD32 pad4 B32; + CARD32 pad5 B32; + CARD32 pad6 B32; +} xXF86DRICreateContextReply; +#define sz_xXF86DRICreateContextReply 32 + +typedef struct _XF86DRIDestroyContext { + CARD8 reqType; /* always DRIReqCode */ + CARD8 driReqType; /* always X_DRIDestroyContext */ + CARD16 length B16; + CARD32 screen B32; + CARD32 context B32; +} xXF86DRIDestroyContextReq; +#define sz_xXF86DRIDestroyContextReq 12 + +typedef struct _XF86DRICreateDrawable { + CARD8 reqType; /* always DRIReqCode */ + CARD8 driReqType; /* always X_DRICreateDrawable */ + CARD16 length B16; + CARD32 screen B32; + CARD32 drawable B32; +} xXF86DRICreateDrawableReq; +#define sz_xXF86DRICreateDrawableReq 12 + +typedef struct { + BYTE type; /* X_Reply */ + BOOL pad1; + CARD16 sequenceNumber B16; + CARD32 length B32; + CARD32 hHWDrawable B32; + CARD32 pad2 B32; + CARD32 pad3 B32; + CARD32 pad4 B32; + CARD32 pad5 B32; + CARD32 pad6 B32; +} xXF86DRICreateDrawableReply; +#define sz_xXF86DRICreateDrawableReply 32 + +typedef struct _XF86DRIDestroyDrawable { + CARD8 reqType; /* always DRIReqCode */ + CARD8 driReqType; /* always X_DRIDestroyDrawable */ + CARD16 length B16; + CARD32 screen B32; + CARD32 drawable B32; +} xXF86DRIDestroyDrawableReq; +#define sz_xXF86DRIDestroyDrawableReq 12 + +typedef struct _XF86DRIGetDrawableInfo { + CARD8 reqType; /* always DRIReqCode */ + CARD8 driReqType; /* always X_DRIGetDrawableInfo */ + CARD16 length B16; + CARD32 screen B32; + CARD32 drawable B32; +} xXF86DRIGetDrawableInfoReq; +#define sz_xXF86DRIGetDrawableInfoReq 12 + +typedef struct { + BYTE type; /* X_Reply */ + BOOL pad1; + CARD16 sequenceNumber B16; + CARD32 length B32; + CARD32 drawableTableIndex B32; + CARD32 drawableTableStamp B32; + INT16 drawableX B16; + INT16 drawableY B16; + INT16 drawableWidth B16; + INT16 drawableHeight B16; + CARD32 numClipRects B32; + INT16 backX B16; + INT16 backY B16; + CARD32 numBackClipRects B32; +} xXF86DRIGetDrawableInfoReply; + +#define sz_xXF86DRIGetDrawableInfoReply 36 + + +typedef struct _XF86DRIGetDeviceInfo { + CARD8 reqType; /* always DRIReqCode */ + CARD8 driReqType; /* always X_DRIGetDeviceInfo */ + CARD16 length B16; + CARD32 screen B32; +} xXF86DRIGetDeviceInfoReq; +#define sz_xXF86DRIGetDeviceInfoReq 8 + +typedef struct { + BYTE type; /* X_Reply */ + BOOL pad1; + CARD16 sequenceNumber B16; + CARD32 length B32; + CARD32 hFrameBufferLow B32; + CARD32 hFrameBufferHigh B32; + CARD32 framebufferOrigin B32; + CARD32 framebufferSize B32; + CARD32 framebufferStride B32; + CARD32 devPrivateSize B32; +} xXF86DRIGetDeviceInfoReply; +#define sz_xXF86DRIGetDeviceInfoReply 32 + +typedef struct _XF86DRIOpenFullScreen { + CARD8 reqType; /* always DRIReqCode */ + CARD8 driReqType; /* always X_DRIOpenFullScreen */ + CARD16 length B16; + CARD32 screen B32; + CARD32 drawable B32; +} xXF86DRIOpenFullScreenReq; +#define sz_xXF86DRIOpenFullScreenReq 12 + +typedef struct { + BYTE type; + BOOL pad1; + CARD16 sequenceNumber B16; + CARD32 length B32; + CARD32 isFullScreen B32; + CARD32 pad2 B32; + CARD32 pad3 B32; + CARD32 pad4 B32; + CARD32 pad5 B32; + CARD32 pad6 B32; +} xXF86DRIOpenFullScreenReply; +#define sz_xXF86DRIOpenFullScreenReply 32 + +typedef struct _XF86DRICloseFullScreen { + CARD8 reqType; /* always DRIReqCode */ + CARD8 driReqType; /* always X_DRICloseFullScreen */ + CARD16 length B16; + CARD32 screen B32; + CARD32 drawable B32; +} xXF86DRICloseFullScreenReq; +#define sz_xXF86DRICloseFullScreenReq 12 + +typedef struct { + BYTE type; + BOOL pad1; + CARD16 sequenceNumber B16; + CARD32 length B32; + CARD32 pad2 B32; + CARD32 pad3 B32; + CARD32 pad4 B32; + CARD32 pad5 B32; + CARD32 pad6 B32; + CARD32 pad7 B32; +} xXF86DRICloseFullScreenReply; +#define sz_xXF86DRICloseFullScreenReply 32 + + +#endif /* _XF86DRISTR_H_ */ diff --git a/src/gallium/state_trackers/g3dvl/Makefile b/src/gallium/state_trackers/g3dvl/Makefile index 50e3c843b5..c6a22cad4e 100644 --- a/src/gallium/state_trackers/g3dvl/Makefile +++ b/src/gallium/state_trackers/g3dvl/Makefile @@ -2,7 +2,7 @@ TARGET = libg3dvl.a OBJECTS = vl_context.o vl_data.o vl_surface.o vl_shader_build.o vl_util.o GALLIUMDIR = ../.. -CFLAGS += -g -Wall -fPIC -Werror -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary +CFLAGS += -g -Wall -fPIC -Werror -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary -I${GALLIUMDIR}/winsys/g3dvl ############################################# diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 3d4ca7cf4e..850a769376 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -330,6 +330,10 @@ static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context) ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); @@ -354,7 +358,7 @@ static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context) vs.tokens = tokens; context->states.mc.i_vs = pipe->create_vs_state(pipe, &vs); - free(tokens); + //free(tokens); return 0; } @@ -432,7 +436,7 @@ static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) fs.tokens = tokens; context->states.mc.i_fs = pipe->create_fs_state(pipe, &fs); - free(tokens); + //free(tokens); return 0; } @@ -499,6 +503,10 @@ static int vlCreateVertexShaderFramePMC(struct VL_CONTEXT *context) ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); @@ -527,7 +535,7 @@ static int vlCreateVertexShaderFramePMC(struct VL_CONTEXT *context) vs.tokens = tokens; context->states.mc.p_vs[0] = pipe->create_vs_state(pipe, &vs); - free(tokens); + //free(tokens); return 0; } @@ -603,6 +611,10 @@ static int vlCreateVertexShaderFieldPMC(struct VL_CONTEXT *context) ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); @@ -643,7 +655,7 @@ static int vlCreateVertexShaderFieldPMC(struct VL_CONTEXT *context) vs.tokens = tokens; context->states.mc.p_vs[1] = pipe->create_vs_state(pipe, &vs); - free(tokens); + //free(tokens); return 0; } @@ -700,6 +712,10 @@ static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context) decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* * decl s0 ; Sampler for luma texture * decl s1 ; Sampler for chroma Cb texture @@ -761,7 +777,7 @@ static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context) fs.tokens = tokens; context->states.mc.p_fs[0] = pipe->create_fs_state(pipe, &fs); - free(tokens); + //free(tokens); return 0; } @@ -821,6 +837,10 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* decl t0-t4 */ + decl = vl_decl_temps(0, 4); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* * decl s0 ; Sampler for luma texture * decl s1 ; Sampler for chroma Cb texture @@ -928,7 +948,7 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) fs.tokens = tokens; context->states.mc.p_fs[1] = pipe->create_fs_state(pipe, &fs); - free(tokens); + //free(tokens); return 0; } @@ -998,6 +1018,10 @@ static int vlCreateVertexShaderFrameBMC(struct VL_CONTEXT *context) ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); @@ -1030,7 +1054,7 @@ static int vlCreateVertexShaderFrameBMC(struct VL_CONTEXT *context) vs.tokens = tokens; context->states.mc.b_vs[0] = pipe->create_vs_state(pipe, &vs); - free(tokens); + //free(tokens); return 0; } @@ -1104,6 +1128,10 @@ static int vlCreateVertexShaderFieldBMC(struct VL_CONTEXT *context) ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); @@ -1148,7 +1176,7 @@ static int vlCreateVertexShaderFieldBMC(struct VL_CONTEXT *context) vs.tokens = tokens; context->states.mc.b_vs[1] = pipe->create_vs_state(pipe, &vs); - free(tokens); + //free(tokens); return 0; } @@ -1207,6 +1235,10 @@ static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context) decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* decl t0-t2 */ + decl = vl_decl_temps(0, 2); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* * decl s0 ; Sampler for luma texture * decl s1 ; Sampler for chroma Cb texture @@ -1283,7 +1315,7 @@ static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context) fs.tokens = tokens; context->states.mc.b_fs[0] = pipe->create_fs_state(pipe, &fs); - free(tokens); + //free(tokens); return 0; } @@ -1346,6 +1378,10 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* decl t0-t5 */ + decl = vl_decl_temps(0, 5); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* * decl s0 ; Sampler for luma texture * decl s1 ; Sampler for chroma Cb texture @@ -1479,7 +1515,7 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) fs.tokens = tokens; context->states.mc.b_fs[1] = pipe->create_fs_state(pipe, &fs); - free(tokens); + //free(tokens); return 0; } @@ -1771,6 +1807,10 @@ static int vlCreateVertexShaderCSC(struct VL_CONTEXT *context) ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* mov o0, i0 ; Move pos in to pos out */ inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_INPUT, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); @@ -1789,7 +1829,7 @@ static int vlCreateVertexShaderCSC(struct VL_CONTEXT *context) vs.tokens = tokens; context->states.csc.vertex_shader = pipe->create_vs_state(pipe, &vs); - free(tokens); + //free(tokens); return 0; } @@ -1839,6 +1879,10 @@ static int vlCreateFragmentShaderCSC(struct VL_CONTEXT *context) decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* decl s0 ; Sampler for tex containing picture to display */ decl = vl_decl_samplers(0, 0); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); @@ -1870,7 +1914,7 @@ static int vlCreateFragmentShaderCSC(struct VL_CONTEXT *context) fs.tokens = tokens; context->states.csc.fragment_shader = pipe->create_fs_state(pipe, &fs); - free(tokens); + //free(tokens); return 0; } diff --git a/src/gallium/state_trackers/g3dvl/vl_shader_build.c b/src/gallium/state_trackers/g3dvl/vl_shader_build.c index 365ad69725..1dc5be6fdb 100644 --- a/src/gallium/state_trackers/g3dvl/vl_shader_build.c +++ b/src/gallium/state_trackers/g3dvl/vl_shader_build.c @@ -74,6 +74,18 @@ struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int inde return decl; } +struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last) { struct tgsi_full_declaration decl = tgsi_default_full_declaration(); diff --git a/src/gallium/state_trackers/g3dvl/vl_shader_build.h b/src/gallium/state_trackers/g3dvl/vl_shader_build.h index 9e64bbeeae..878d7e2c45 100644 --- a/src/gallium/state_trackers/g3dvl/vl_shader_build.h +++ b/src/gallium/state_trackers/g3dvl/vl_shader_build.h @@ -14,6 +14,7 @@ struct tgsi_full_declaration vl_decl_interpolated_input ); struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last); struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int index, unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last); struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last); struct tgsi_full_instruction vl_inst2 ( diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index 145ea32892..9b91ab4e22 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "vl_context.h" #include "vl_defs.h" #include "vl_util.h" @@ -711,12 +712,13 @@ int vlPutSurface pipe->set_sampler_textures(pipe, 1, &surface->texture); pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + bind_pipe_drawable(pipe, drawable); /* TODO: Need to take destx, desty into consideration */ pipe->winsys->flush_frontbuffer ( pipe->winsys, surface->context->states.csc.framebuffer.cbufs[0], - &drawable + pipe->priv ); vlBeginRender(surface->context); diff --git a/src/gallium/winsys/g3dvl/nouveau/Makefile b/src/gallium/winsys/g3dvl/nouveau/Makefile new file mode 100644 index 0000000000..644ac083e6 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/Makefile @@ -0,0 +1,46 @@ +TARGET = libnouveau_dri.so +GALLIUMDIR = ../../.. +DRMDIR ?= /usr +DRIDIR = ../../../../driclient + +OBJECTS = nouveau_bo.o nouveau_fence.o nouveau_swapbuffers.o nouveau_channel.o \ + nouveau_grobj.o nouveau_context.o nouveau_winsys.o nouveau_lock.o \ + nouveau_winsys_pipe.o nouveau_device.o nouveau_notifier.o nouveau_dma.o \ + nouveau_pushbuf.o nouveau_resource.o nouveau_screen.o nv04_surface.o \ + nv50_surface.o #nouveau_winsys_softpipe.o + +CFLAGS += -g -Wall -Werror -fPIC \ + -I${GALLIUMDIR}/include \ + -I${GALLIUMDIR}/winsys/g3dvl \ + -I${DRMDIR}/include \ + -I${DRMDIR}/include/drm \ + -I${GALLIUMDIR}/drivers \ + -I${GALLIUMDIR}/auxiliary \ + -I${DRIDIR}/include + +LDFLAGS += -L${DRMDIR}/lib \ + -L${DRIDIR}/lib \ + -L${GALLIUMDIR}/auxiliary/draw \ + -L${GALLIUMDIR}/auxiliary/tgsi \ + -L${GALLIUMDIR}/auxiliary/translate \ + -L${GALLIUMDIR}/auxiliary/rtasm \ + -L${GALLIUMDIR}/auxiliary/cso_cache \ + -L${GALLIUMDIR}/drivers/nv10 \ + -L${GALLIUMDIR}/drivers/nv30 \ + -L${GALLIUMDIR}/drivers/nv40 \ + -L${GALLIUMDIR}/drivers/nv50 + +LIBS += -ldriclient -ldrm -lnv10 -lnv30 -lnv40 -lnv50 -ldraw -ltgsi -ltranslate -lrtasm -lcso_cache -lm + +############################################# + +.PHONY = all clean + +all: ${TARGET} + +${TARGET}: ${OBJECTS} + $(CC) ${LDFLAGS} -shared -o $@ $^ ${LIBS} + +clean: + rm -rf ${OBJECTS} ${TARGET} + diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_bo.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_bo.c new file mode 120000 index 0000000000..73ac4a4171 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_bo.c @@ -0,0 +1 @@ +../../dri/nouveau/nouveau_bo.c \ No newline at end of file diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_channel.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_channel.c new file mode 120000 index 0000000000..6c9b2c48d8 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_channel.c @@ -0,0 +1 @@ +../../dri/nouveau/nouveau_channel.c \ No newline at end of file diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_context.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_context.c new file mode 100644 index 0000000000..5e173c7672 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_context.c @@ -0,0 +1,371 @@ +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/p_screen.h" +#include "pipe/p_util.h" + +#include "nouveau_context.h" +#include "nouveau_dri.h" +#include "nouveau_local.h" +#include "nouveau_screen.h" +#include "nouveau_winsys_pipe.h" + +/* +#ifdef DEBUG +static const struct dri_debug_control debug_control[] = { + { "bo", DEBUG_BO }, + { NULL, 0 } +}; +int __nouveau_debug = 0; +#endif +*/ + +/* + * TODO: Re-examine dri_screen, dri_context, nouveau_screen, nouveau_context + * relationships, seems like there is a lot of room for simplification there. + */ + +static void +nouveau_channel_context_destroy(struct nouveau_channel_context *nvc) +{ + nouveau_grobj_free(&nvc->NvCtxSurf2D); + nouveau_grobj_free(&nvc->NvImageBlit); + nouveau_grobj_free(&nvc->NvGdiRect); + nouveau_grobj_free(&nvc->NvM2MF); + nouveau_grobj_free(&nvc->Nv2D); + nouveau_grobj_free(&nvc->NvSwzSurf); + nouveau_grobj_free(&nvc->NvSIFM); + + nouveau_notifier_free(&nvc->sync_notifier); + + nouveau_channel_free(&nvc->channel); + + FREE(nvc); +} + +static struct nouveau_channel_context * +nouveau_channel_context_create(struct nouveau_device *dev) +{ + struct nouveau_channel_context *nvc; + int ret; + + nvc = CALLOC_STRUCT(nouveau_channel_context); + if (!nvc) + return NULL; + + if ((ret = nouveau_channel_alloc(dev, 0x8003d001, 0x8003d002, + &nvc->channel))) { + NOUVEAU_ERR("Error creating GPU channel: %d\n", ret); + nouveau_channel_context_destroy(nvc); + return NULL; + } + + nvc->next_handle = 0x80000000; + + if ((ret = nouveau_notifier_alloc(nvc->channel, nvc->next_handle++, 1, + &nvc->sync_notifier))) { + NOUVEAU_ERR("Error creating channel sync notifier: %d\n", ret); + nouveau_channel_context_destroy(nvc); + return NULL; + } + + switch (dev->chipset & 0xf0) { + case 0x50: + case 0x80: + case 0x90: + ret = nouveau_surface_channel_create_nv50(nvc); + break; + default: + ret = nouveau_surface_channel_create_nv04(nvc); + break; + } + + if (ret) { + NOUVEAU_ERR("Error initialising surface objects: %d\n", ret); + nouveau_channel_context_destroy(nvc); + return NULL; + } + + return nvc; +} + +int +nouveau_context_create(dri_context_t *dri_context) +{ + dri_screen_t *dri_screen = dri_context->dri_screen; + struct nouveau_screen *nv_screen = dri_screen->private; + struct nouveau_context *nv = CALLOC_STRUCT(nouveau_context); + struct pipe_context *pipe = NULL; + struct nouveau_channel_context *nvc = NULL; + struct nouveau_device *dev = nv_screen->device; + int i; + + switch (dev->chipset & 0xf0) { + case 0x10: + case 0x20: + /* NV10 */ + case 0x30: + /* NV30 */ + case 0x40: + case 0x60: + /* NV40 */ + case 0x50: + case 0x80: + case 0x90: + /* G80 */ + break; + default: + NOUVEAU_ERR("Unsupported chipset: NV%02x\n", dev->chipset); + return 1; + } + + dri_context->private = (void*)nv; + nv->dri_context = dri_context; + nv->nv_screen = nv_screen; + + { + struct nouveau_device_priv *nvdev = nouveau_device(dev); + + nvdev->ctx = dri_context->drm_context; + nvdev->lock = (drmLock*)&dri_screen->sarea->lock; + } + + /* + driParseConfigFiles(&nv->dri_option_cache, &nv_screen->option_cache, + nv->dri_screen->myNum, "nouveau"); +#ifdef DEBUG + __nouveau_debug = driParseDebugString(getenv("NOUVEAU_DEBUG"), + debug_control); +#endif + */ + + /*XXX: Hack up a fake region and buffer object for front buffer. + * This will go away with TTM, replaced with a simple reference + * of the front buffer handle passed to us by the DDX. + */ + { + struct pipe_surface *fb_surf; + struct nouveau_pipe_buffer *fb_buf; + struct nouveau_bo_priv *fb_bo; + + fb_bo = calloc(1, sizeof(struct nouveau_bo_priv)); + fb_bo->drm.offset = nv_screen->front_offset; + fb_bo->drm.flags = NOUVEAU_MEM_FB; + fb_bo->drm.size = nv_screen->front_pitch * + nv_screen->front_height; + fb_bo->refcount = 1; + fb_bo->base.flags = NOUVEAU_BO_PIN | NOUVEAU_BO_VRAM; + fb_bo->base.offset = fb_bo->drm.offset; + fb_bo->base.handle = (unsigned long)fb_bo; + fb_bo->base.size = fb_bo->drm.size; + fb_bo->base.device = nv_screen->device; + + fb_buf = calloc(1, sizeof(struct nouveau_pipe_buffer)); + fb_buf->bo = &fb_bo->base; + + fb_surf = calloc(1, sizeof(struct pipe_surface)); + if (nv_screen->front_cpp == 2) + fb_surf->format = PIPE_FORMAT_R5G6B5_UNORM; + else + fb_surf->format = PIPE_FORMAT_A8R8G8B8_UNORM; + pf_get_block(fb_surf->format, &fb_surf->block); + fb_surf->width = nv_screen->front_pitch / nv_screen->front_cpp; + fb_surf->height = nv_screen->front_height; + fb_surf->stride = fb_surf->width * fb_surf->block.size; + fb_surf->refcount = 1; + fb_surf->buffer = &fb_buf->base; + + nv->frontbuffer = fb_surf; + } + + nvc = nv_screen->nvc; + + if (!nvc) { + nvc = nouveau_channel_context_create(dev); + if (!nvc) { + NOUVEAU_ERR("Failed initialising GPU context\n"); + return 1; + } + nv_screen->nvc = nvc; + } + + nvc->refcount++; + nv->nvc = nvc; + + /* Find a free slot for a pipe context, allocate a new one if needed */ + nv->pctx_id = -1; + for (i = 0; i < nvc->nr_pctx; i++) { + if (nvc->pctx[i] == NULL) { + nv->pctx_id = i; + break; + } + } + + if (nv->pctx_id < 0) { + nv->pctx_id = nvc->nr_pctx++; + nvc->pctx = + realloc(nvc->pctx, + sizeof(struct pipe_context *) * nvc->nr_pctx); + } + + /* Create pipe */ + switch (dev->chipset & 0xf0) { + case 0x50: + case 0x80: + case 0x90: + if (nouveau_surface_init_nv50(nv)) + return 1; + break; + default: + if (nouveau_surface_init_nv04(nv)) + return 1; + break; + } + + if (!getenv("NOUVEAU_FORCE_SOFTPIPE")) { + struct pipe_screen *pscreen; + + pipe = nouveau_pipe_create(nv); + if (!pipe) + NOUVEAU_ERR("Couldn't create hw pipe\n"); + pscreen = nvc->pscreen; + + nv->cap.hw_vertex_buffer = + pscreen->get_param(pscreen, NOUVEAU_CAP_HW_VTXBUF); + nv->cap.hw_index_buffer = + pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF); + } + + /* XXX: nouveau_winsys_softpipe needs a mesa header removed before we can compile it. */ + /* + if (!pipe) { + NOUVEAU_MSG("Using softpipe\n"); + pipe = nouveau_create_softpipe(nv); + if (!pipe) { + NOUVEAU_ERR("Error creating pipe, bailing\n"); + return 1; + } + } + */ + if (!pipe) { + NOUVEAU_ERR("Error creating pipe, bailing\n"); + return 1; + } + + pipe->priv = nv; + + return 0; +} + +void +nouveau_context_destroy(dri_context_t *dri_context) +{ + struct nouveau_context *nv = dri_context->private; + struct nouveau_channel_context *nvc = nv->nvc; + + assert(nv); + + if (nv->pctx_id >= 0) { + nvc->pctx[nv->pctx_id] = NULL; + if (--nvc->refcount <= 0) { + nouveau_channel_context_destroy(nvc); + nv->nv_screen->nvc = NULL; + } + } + + free(nv); +} + +int +nouveau_context_bind(struct nouveau_context *nv, dri_drawable_t *dri_drawable) +{ + assert(nv); + assert(dri_drawable); + + if (nv->dri_drawable != dri_drawable) + { + nv->dri_drawable = dri_drawable; + dri_drawable->private = nv; + } + + return 0; +} + +int +nouveau_context_unbind(struct nouveau_context *nv) +{ + assert(nv); + + nv->dri_drawable = NULL; + + return 0; +} + +/* Show starts here */ + +int bind_pipe_drawable(struct pipe_context *pipe, Drawable drawable) +{ + struct nouveau_context *nv; + dri_drawable_t *dri_drawable; + + nv = pipe->priv; + + driCreateDrawable(nv->nv_screen->dri_screen, drawable, &dri_drawable); + + nouveau_context_bind(nv, dri_drawable); + + return 0; +} + +int unbind_pipe_drawable(struct pipe_context *pipe) +{ + nouveau_context_unbind(pipe->priv); + + return 0; +} + +struct pipe_context* create_pipe_context(Display *display, int screen) +{ + dri_screen_t *dri_screen; + dri_framebuffer_t dri_framebuf; + dri_context_t *dri_context; + struct nouveau_context *nv; + + driCreateScreen(display, screen, &dri_screen, &dri_framebuf); + driCreateContext(dri_screen, XDefaultVisual(display, screen), &dri_context); + + nouveau_screen_create(dri_screen, &dri_framebuf); + nouveau_context_create(dri_context); + + nv = dri_context->private; + + return nv->nvc->pctx[nv->pctx_id]; +} + +int destroy_pipe_context(struct pipe_context *pipe) +{ + struct pipe_screen *screen; + struct pipe_winsys *winsys; + struct nouveau_context *nv; + dri_screen_t *dri_screen; + dri_context_t *dri_context; + + assert(pipe); + + screen = pipe->screen; + winsys = pipe->winsys; + nv = pipe->priv; + dri_context = nv->dri_context; + dri_screen = dri_context->dri_screen; + + pipe->destroy(pipe); + screen->destroy(screen); + free(winsys); + + nouveau_context_destroy(dri_context); + nouveau_screen_destroy(dri_screen); + driDestroyContext(dri_context); + driDestroyScreen(dri_screen); + + return 0; +} + diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_context.h b/src/gallium/winsys/g3dvl/nouveau/nouveau_context.h new file mode 100644 index 0000000000..395a3ab790 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_context.h @@ -0,0 +1,105 @@ +#ifndef __NOUVEAU_CONTEXT_H__ +#define __NOUVEAU_CONTEXT_H__ + +/*#include "xmlconfig.h"*/ + +#include +#include "nouveau/nouveau_winsys.h" +#include "nouveau_drmif.h" +#include "nouveau_dma.h" + +struct nouveau_channel_context { + struct pipe_screen *pscreen; + int refcount; + + unsigned cur_pctx; + unsigned nr_pctx; + struct pipe_context **pctx; + + struct nouveau_channel *channel; + + struct nouveau_notifier *sync_notifier; + + /* Common */ + struct nouveau_grobj *NvM2MF; + /* NV04-NV40 */ + struct nouveau_grobj *NvCtxSurf2D; + struct nouveau_grobj *NvSwzSurf; + struct nouveau_grobj *NvImageBlit; + struct nouveau_grobj *NvGdiRect; + struct nouveau_grobj *NvSIFM; + /* G80 */ + struct nouveau_grobj *Nv2D; + + uint32_t next_handle; + uint32_t next_subchannel; + uint32_t next_sequence; +}; + +struct nouveau_context { + /* DRI stuff */ + dri_context_t *dri_context; + dri_drawable_t *dri_drawable; + unsigned int last_stamp; + /*driOptionCache dri_option_cache;*/ + drm_context_t drm_context; + drmLock drm_lock; + int locked; + struct nouveau_screen *nv_screen; + struct pipe_surface *frontbuffer; + + struct { + int hw_vertex_buffer; + int hw_index_buffer; + } cap; + + /* Hardware context */ + struct nouveau_channel_context *nvc; + int pctx_id; + + /* pipe_surface accel */ + struct pipe_surface *surf_src, *surf_dst; + unsigned surf_src_offset, surf_dst_offset; + + int (*surface_copy_prep)(struct nouveau_context *, + struct pipe_surface *dst, + struct pipe_surface *src); + void (*surface_copy)(struct nouveau_context *, unsigned dx, unsigned dy, + unsigned sx, unsigned sy, unsigned w, unsigned h); + void (*surface_copy_done)(struct nouveau_context *); + int (*surface_fill)(struct nouveau_context *, struct pipe_surface *, + unsigned, unsigned, unsigned, unsigned, unsigned); +}; + +extern int nouveau_context_create(dri_context_t *); +extern void nouveau_context_destroy(dri_context_t *); +extern int nouveau_context_bind(struct nouveau_context *, dri_drawable_t *); +extern int nouveau_context_unbind(struct nouveau_context *); + +#ifdef DEBUG +extern int __nouveau_debug; + +#define DEBUG_BO (1 << 0) + +#define DBG(flag, ...) do { \ + if (__nouveau_debug & (DEBUG_##flag)) \ + NOUVEAU_ERR(__VA_ARGS__); \ +} while(0) +#else +#define DBG(flag, ...) +#endif + +extern void LOCK_HARDWARE(struct nouveau_context *); +extern void UNLOCK_HARDWARE(struct nouveau_context *); + +extern int +nouveau_surface_channel_create_nv04(struct nouveau_channel_context *); +extern int +nouveau_surface_channel_create_nv50(struct nouveau_channel_context *); +extern int nouveau_surface_init_nv04(struct nouveau_context *); +extern int nouveau_surface_init_nv50(struct nouveau_context *); + +extern uint32_t *nouveau_pipe_dma_beginp(struct nouveau_grobj *, int, int); +extern void nouveau_pipe_dma_kickoff(struct nouveau_channel *); + +#endif diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_device.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_device.c new file mode 120000 index 0000000000..47d52dafa8 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_device.c @@ -0,0 +1 @@ +../../dri/nouveau/nouveau_device.c \ No newline at end of file diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_dma.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_dma.c new file mode 120000 index 0000000000..45078c964f --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_dma.c @@ -0,0 +1 @@ +../../dri/nouveau/nouveau_dma.c \ No newline at end of file diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_dma.h b/src/gallium/winsys/g3dvl/nouveau/nouveau_dma.h new file mode 120000 index 0000000000..6b9ec77741 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_dma.h @@ -0,0 +1 @@ +../../dri/nouveau/nouveau_dma.h \ No newline at end of file diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_dri.h b/src/gallium/winsys/g3dvl/nouveau/nouveau_dri.h new file mode 120000 index 0000000000..0e6c9fce35 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_dri.h @@ -0,0 +1 @@ +../../dri/nouveau/nouveau_dri.h \ No newline at end of file diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_drmif.h b/src/gallium/winsys/g3dvl/nouveau/nouveau_drmif.h new file mode 120000 index 0000000000..473b7d4812 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_drmif.h @@ -0,0 +1 @@ +../../dri/nouveau/nouveau_drmif.h \ No newline at end of file diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_fence.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_fence.c new file mode 120000 index 0000000000..ef1f0c6afe --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_fence.c @@ -0,0 +1 @@ +../../dri/nouveau/nouveau_fence.c \ No newline at end of file diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_grobj.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_grobj.c new file mode 120000 index 0000000000..428186544c --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_grobj.c @@ -0,0 +1 @@ +../../dri/nouveau/nouveau_grobj.c \ No newline at end of file diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_local.h b/src/gallium/winsys/g3dvl/nouveau/nouveau_local.h new file mode 120000 index 0000000000..6b878aad22 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_local.h @@ -0,0 +1 @@ +../../dri/nouveau/nouveau_local.h \ No newline at end of file diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_lock.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_lock.c new file mode 100644 index 0000000000..375634bd05 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_lock.c @@ -0,0 +1,92 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include +#include +#include "nouveau_context.h" +#include "nouveau_screen.h" + +static pthread_mutex_t lockMutex = PTHREAD_MUTEX_INITIALIZER; + +static void +nouveau_contended_lock(struct nouveau_context *nv, unsigned int flags) +{ + dri_drawable_t *dri_drawable = nv->dri_drawable; + dri_screen_t *dri_screen = nv->dri_context->dri_screen; + struct nouveau_screen *nv_screen = nv->nv_screen; + struct nouveau_device *dev = nv_screen->device; + struct nouveau_device_priv *nvdev = nouveau_device(dev); + + drmGetLock(nvdev->fd, nvdev->ctx, flags); + + /* If the window moved, may need to set a new cliprect now. + * + * NOTE: This releases and regains the hw lock, so all state + * checking must be done *after* this call: + */ + if (dri_drawable) + DRI_VALIDATE_DRAWABLE_INFO(dri_screen, dri_drawable); +} + +/* Lock the hardware and validate our state. + */ +void +LOCK_HARDWARE(struct nouveau_context *nv) +{ + struct nouveau_screen *nv_screen = nv->nv_screen; + struct nouveau_device *dev = nv_screen->device; + struct nouveau_device_priv *nvdev = nouveau_device(dev); + char __ret=0; + + pthread_mutex_lock(&lockMutex); + assert(!nv->locked); + + DRM_CAS(nvdev->lock, nvdev->ctx, + (DRM_LOCK_HELD | nvdev->ctx), __ret); + + if (__ret) + nouveau_contended_lock(nv, 0); + nv->locked = 1; +} + + +/* Unlock the hardware using the global current context + */ +void +UNLOCK_HARDWARE(struct nouveau_context *nv) +{ + struct nouveau_screen *nv_screen = nv->nv_screen; + struct nouveau_device *dev = nv_screen->device; + struct nouveau_device_priv *nvdev = nouveau_device(dev); + + assert(nv->locked); + nv->locked = 0; + + DRM_UNLOCK(nvdev->fd, nvdev->lock, nvdev->ctx); + + pthread_mutex_unlock(&lockMutex); +} diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_notifier.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_notifier.c new file mode 120000 index 0000000000..3024a612a7 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_notifier.c @@ -0,0 +1 @@ +../../dri/nouveau/nouveau_notifier.c \ No newline at end of file diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_pushbuf.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_pushbuf.c new file mode 120000 index 0000000000..dae31d9799 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_pushbuf.c @@ -0,0 +1 @@ +../../dri/nouveau/nouveau_pushbuf.c \ No newline at end of file diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_resource.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_resource.c new file mode 120000 index 0000000000..e0d71e9d2b --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_resource.c @@ -0,0 +1 @@ +../../dri/nouveau/nouveau_resource.c \ No newline at end of file diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_screen.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_screen.c new file mode 100644 index 0000000000..0087ce0056 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_screen.c @@ -0,0 +1,88 @@ +#include "pipe/p_context.h" +#include "pipe/p_util.h" +#include "nouveau_context.h" +#include "nouveau_drm.h" +#include "nouveau_dri.h" +#include "nouveau_local.h" +#include "nouveau_screen.h" +#include "nouveau_swapbuffers.h" + +#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 11 +#error nouveau_drm.h version does not match expected version +#endif + +/* +PUBLIC const char __driConfigOptions[] = +DRI_CONF_BEGIN +DRI_CONF_END; +static const GLuint __driNConfigOptions = 0; +*/ + +int +nouveau_screen_create(dri_screen_t *dri_screen, dri_framebuffer_t *dri_framebuf) +{ + /* XXX: Someone forgot to bump this? */ + static const dri_version_t ddx_expected = {0, 0, 10 /*NOUVEAU_DRM_HEADER_PATCHLEVEL*/}; + static const dri_version_t dri_expected = {4, 1, 0}; + static const dri_version_t drm_expected = {0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL}; + + struct nouveau_dri *nv_dri = dri_framebuf->private; + struct nouveau_screen *nv_screen; + int ret; + + if (!driCompareVersions(&ddx_expected, &dri_screen->ddx)) + { + NOUVEAU_ERR("Unexpected DDX version.\n"); + return 1; + } + + if (!driCompareVersions(&drm_expected, &dri_screen->drm)) + { + NOUVEAU_ERR("Unexpected DRM version.\n"); + return 1; + } + + if (!driCompareVersions(&dri_expected, &dri_screen->dri)) + { + NOUVEAU_ERR("Unexpected DRI version.\n"); + return 1; + } + + if (dri_framebuf->private_size != sizeof(struct nouveau_dri)) { + NOUVEAU_ERR("DRI struct mismatch between DDX/DRI.\n"); + return 1; + } + + nv_screen = CALLOC_STRUCT(nouveau_screen); + if (!nv_screen) + return 1; + nv_screen->dri_screen = dri_screen; + dri_screen->private = (void*)nv_screen; + + /* + driParseOptionInfo(&nv_screen->option_cache, + __driConfigOptions, __driNConfigOptions); + */ + + if ((ret = nouveau_device_open_existing(&nv_screen->device, 0, + dri_screen->fd, 0))) { + NOUVEAU_ERR("Failed opening nouveau device: %d.\n", ret); + return 1; + } + + nv_screen->front_offset = nv_dri->front_offset; + nv_screen->front_pitch = nv_dri->front_pitch * (nv_dri->bpp / 8); + nv_screen->front_cpp = nv_dri->bpp / 8; + nv_screen->front_height = nv_dri->height; + + return 0; +} + +void +nouveau_screen_destroy(dri_screen_t *dri_screen) +{ + struct nouveau_screen *nv_screen = dri_screen->private; + + FREE(nv_screen); +} + diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_screen.h b/src/gallium/winsys/g3dvl/nouveau/nouveau_screen.h new file mode 100644 index 0000000000..8a58bb7556 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_screen.h @@ -0,0 +1,24 @@ +#ifndef __NOUVEAU_SCREEN_H__ +#define __NOUVEAU_SCREEN_H__ + +/* TODO: Investigate using DRI options for interesting things */ +/*#include "xmlconfig.h"*/ + +struct nouveau_screen { + dri_screen_t *dri_screen; + struct nouveau_device *device; + struct nouveau_channel_context *nvc; + + uint32_t front_offset; + uint32_t front_pitch; + uint32_t front_cpp; + uint32_t front_height; + + /*driOptionCache option_cache;*/ +}; + +int nouveau_screen_create(dri_screen_t *dri_screen, dri_framebuffer_t *dri_framebuf); +void nouveau_screen_destroy(dri_screen_t *dri_screen); + +#endif + diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.c new file mode 100644 index 0000000000..7916c80615 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.c @@ -0,0 +1,64 @@ +#include "pipe/p_context.h" +#include "nouveau_context.h" +#include "nouveau_local.h" +#include "nouveau_screen.h" +#include "nouveau_swapbuffers.h" + +void +nouveau_copy_buffer(dri_drawable_t *dri_drawable, struct pipe_surface *surf, + const drm_clip_rect_t *rect) +{ + struct nouveau_context *nv = dri_drawable->private; + drm_clip_rect_t *pbox; + int nbox, i; + + LOCK_HARDWARE(nv); + if (!dri_drawable->num_cliprects) { + UNLOCK_HARDWARE(nv); + return; + } + pbox = dri_drawable->cliprects; + nbox = dri_drawable->num_cliprects; + + nv->surface_copy_prep(nv, nv->frontbuffer, surf); + for (i = 0; i < nbox; i++, pbox++) { + int sx, sy, dx, dy, w, h; + + sx = pbox->x1 - dri_drawable->x; + sy = pbox->y1 - dri_drawable->y; + dx = pbox->x1; + dy = pbox->y1; + w = pbox->x2 - pbox->x1; + h = pbox->y2 - pbox->y1; + + nv->surface_copy(nv, dx, dy, sx, sy, w, h); + } + + FIRE_RING(nv->nvc->channel); + UNLOCK_HARDWARE(nv); + + //if (nv->last_stamp != dri_drawable->last_sarea_stamp) + //nv->last_stamp = dri_drawable->last_sarea_stamp; +} + +void +nouveau_copy_sub_buffer(dri_drawable_t *dri_drawable, struct pipe_surface *surf, int x, int y, int w, int h) +{ + if (surf) { + drm_clip_rect_t rect; + rect.x1 = x; + rect.y1 = y; + rect.x2 = x + w; + rect.y2 = y + h; + + nouveau_copy_buffer(dri_drawable, surf, &rect); + } +} + +void +nouveau_swap_buffers(dri_drawable_t *dri_drawable, struct pipe_surface *surf) +{ + if (surf) + nouveau_copy_buffer(dri_drawable, surf, NULL); +} + diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.h b/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.h new file mode 100644 index 0000000000..35e934adba --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.h @@ -0,0 +1,10 @@ +#ifndef __NOUVEAU_SWAPBUFFERS_H__ +#define __NOUVEAU_SWAPBUFFERS_H__ + +extern void nouveau_copy_buffer(dri_drawable_t *, struct pipe_surface *, + const drm_clip_rect_t *); +extern void nouveau_copy_sub_buffer(dri_drawable_t *, struct pipe_surface *, + int x, int y, int w, int h); +extern void nouveau_swap_buffers(dri_drawable_t *, struct pipe_surface *); + +#endif diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_winsys.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_winsys.c new file mode 120000 index 0000000000..43de49b98b --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_winsys.c @@ -0,0 +1 @@ +../../dri/nouveau/nouveau_winsys.c \ No newline at end of file diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_winsys_pipe.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_winsys_pipe.c new file mode 100644 index 0000000000..b835bd5760 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_winsys_pipe.c @@ -0,0 +1,261 @@ +#include "pipe/p_winsys.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" + +#include "nouveau_context.h" +#include "nouveau_local.h" +#include "nouveau_screen.h" +#include "nouveau_swapbuffers.h" +#include "nouveau_winsys_pipe.h" + +static void +nouveau_flush_frontbuffer(struct pipe_winsys *pws, struct pipe_surface *surf, + void *context_private) +{ + struct nouveau_context *nv = context_private; + dri_drawable_t *dri_drawable = nv->dri_drawable; + + nouveau_copy_buffer(dri_drawable, surf, NULL); +} + +static const char * +nouveau_get_name(struct pipe_winsys *pws) +{ + return "Nouveau/DRI"; +} + +static struct pipe_surface * +nouveau_surface_alloc(struct pipe_winsys *ws) +{ + struct pipe_surface *surf; + + surf = CALLOC_STRUCT(pipe_surface); + if (!surf) + return NULL; + + surf->refcount = 1; + surf->winsys = ws; + return surf; +} + +/* Borrowed from Mesa's xm_winsys */ +static unsigned int +round_up(unsigned n, unsigned multiple) +{ + return (n + multiple - 1) & ~(multiple - 1); +} + +static int +nouveau_surface_alloc_storage +( + struct pipe_winsys *pws, + struct pipe_surface *surface, + unsigned width, + unsigned height, + enum pipe_format format, + unsigned flags, + unsigned tex_usage +) +{ + const unsigned int ALIGNMENT = 256; + + assert(pws); + assert(surface); + + surface->width = width; + surface->height = height; + surface->format = format; + pf_get_block(format, &surface->block); + surface->nblocksx = pf_get_nblocksx(&surface->block, width); + surface->nblocksy = pf_get_nblocksy(&surface->block, height); + surface->stride = round_up(surface->nblocksx * surface->block.size, ALIGNMENT); + surface->usage = flags; + surface->buffer = pws->buffer_create(pws, ALIGNMENT, PIPE_BUFFER_USAGE_PIXEL, surface->stride * surface->nblocksy); + + return 0; +} + +static void +nouveau_surface_release(struct pipe_winsys *ws, struct pipe_surface **s) +{ + struct pipe_surface *surf = *s; + + *s = NULL; + if (--surf->refcount <= 0) { + if (surf->buffer) + pipe_buffer_reference(ws, &surf->buffer, NULL); + free(surf); + } +} + +static struct pipe_buffer * +nouveau_pipe_bo_create(struct pipe_winsys *pws, unsigned alignment, + unsigned usage, unsigned size) +{ + struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)pws; + struct nouveau_context *nv = nvpws->nv; + struct nouveau_device *dev = nv->nv_screen->device; + struct nouveau_pipe_buffer *nvbuf; + uint32_t flags; + + nvbuf = calloc(1, sizeof(*nvbuf)); + if (!nvbuf) + return NULL; + nvbuf->base.refcount = 1; + nvbuf->base.alignment = alignment; + nvbuf->base.usage = usage; + nvbuf->base.size = size; + + flags = NOUVEAU_BO_LOCAL; + + if (usage & PIPE_BUFFER_USAGE_PIXEL) { + if (usage & NOUVEAU_BUFFER_USAGE_TEXTURE) + flags |= NOUVEAU_BO_GART; + flags |= NOUVEAU_BO_VRAM; + } + + if (usage & PIPE_BUFFER_USAGE_VERTEX) { + if (nv->cap.hw_vertex_buffer) + flags |= NOUVEAU_BO_GART; + } + + if (usage & PIPE_BUFFER_USAGE_INDEX) { + if (nv->cap.hw_index_buffer) + flags |= NOUVEAU_BO_GART; + } + + if (nouveau_bo_new(dev, flags, alignment, size, &nvbuf->bo)) { + free(nvbuf); + return NULL; + } + + return &nvbuf->base; +} + +static struct pipe_buffer * +nouveau_pipe_bo_user_create(struct pipe_winsys *pws, void *ptr, unsigned bytes) +{ + struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)pws; + struct nouveau_device *dev = nvpws->nv->nv_screen->device; + struct nouveau_pipe_buffer *nvbuf; + + nvbuf = calloc(1, sizeof(*nvbuf)); + if (!nvbuf) + return NULL; + nvbuf->base.refcount = 1; + nvbuf->base.size = bytes; + + if (nouveau_bo_user(dev, ptr, bytes, &nvbuf->bo)) { + free(nvbuf); + return NULL; + } + + return &nvbuf->base; +} + +static void +nouveau_pipe_bo_del(struct pipe_winsys *ws, struct pipe_buffer *buf) +{ + struct nouveau_pipe_buffer *nvbuf = nouveau_buffer(buf); + + nouveau_bo_del(&nvbuf->bo); + free(nvbuf); +} + +static void * +nouveau_pipe_bo_map(struct pipe_winsys *pws, struct pipe_buffer *buf, + unsigned flags) +{ + struct nouveau_pipe_buffer *nvbuf = nouveau_buffer(buf); + uint32_t map_flags = 0; + + if (flags & PIPE_BUFFER_USAGE_CPU_READ) + map_flags |= NOUVEAU_BO_RD; + if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) + map_flags |= NOUVEAU_BO_WR; + + if (nouveau_bo_map(nvbuf->bo, map_flags)) + return NULL; + return nvbuf->bo->map; +} + +static void +nouveau_pipe_bo_unmap(struct pipe_winsys *pws, struct pipe_buffer *buf) +{ + struct nouveau_pipe_buffer *nvbuf = nouveau_buffer(buf); + + nouveau_bo_unmap(nvbuf->bo); +} + +static INLINE struct nouveau_fence * +nouveau_pipe_fence(struct pipe_fence_handle *pfence) +{ + return (struct nouveau_fence *)pfence; +} + +static void +nouveau_pipe_fence_reference(struct pipe_winsys *ws, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *pfence) +{ + nouveau_fence_ref((void *)pfence, (void *)ptr); +} + +static int +nouveau_pipe_fence_signalled(struct pipe_winsys *ws, + struct pipe_fence_handle *pfence, unsigned flag) +{ + struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)ws; + struct nouveau_fence *fence = nouveau_pipe_fence(pfence); + + if (nouveau_fence(fence)->signalled == 0) + nouveau_fence_flush(nvpws->nv->nvc->channel); + + return !nouveau_fence(fence)->signalled; +} + +static int +nouveau_pipe_fence_finish(struct pipe_winsys *ws, + struct pipe_fence_handle *pfence, unsigned flag) +{ + struct nouveau_fence *fence = nouveau_pipe_fence(pfence); + struct nouveau_fence *ref = NULL; + + nouveau_fence_ref(fence, &ref); + return nouveau_fence_wait(&ref); +} + +struct pipe_winsys * +nouveau_create_pipe_winsys(struct nouveau_context *nv) +{ + struct nouveau_pipe_winsys *nvpws; + struct pipe_winsys *pws; + + nvpws = CALLOC_STRUCT(nouveau_pipe_winsys); + if (!nvpws) + return NULL; + nvpws->nv = nv; + pws = &nvpws->pws; + + pws->flush_frontbuffer = nouveau_flush_frontbuffer; + + pws->surface_alloc = nouveau_surface_alloc; + pws->surface_alloc_storage = nouveau_surface_alloc_storage; + pws->surface_release = nouveau_surface_release; + + pws->buffer_create = nouveau_pipe_bo_create; + pws->buffer_destroy = nouveau_pipe_bo_del; + pws->user_buffer_create = nouveau_pipe_bo_user_create; + pws->buffer_map = nouveau_pipe_bo_map; + pws->buffer_unmap = nouveau_pipe_bo_unmap; + + pws->fence_reference = nouveau_pipe_fence_reference; + pws->fence_signalled = nouveau_pipe_fence_signalled; + pws->fence_finish = nouveau_pipe_fence_finish; + + pws->get_name = nouveau_get_name; + + return &nvpws->pws; +} + diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_winsys_pipe.h b/src/gallium/winsys/g3dvl/nouveau/nouveau_winsys_pipe.h new file mode 120000 index 0000000000..264716fef0 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_winsys_pipe.h @@ -0,0 +1 @@ +../../dri/nouveau/nouveau_winsys_pipe.h \ No newline at end of file diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_winsys_softpipe.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_winsys_softpipe.c new file mode 120000 index 0000000000..83faccde96 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_winsys_softpipe.c @@ -0,0 +1 @@ +../../dri/nouveau/nouveau_winsys_softpipe.c \ No newline at end of file diff --git a/src/gallium/winsys/g3dvl/nouveau/nv04_surface.c b/src/gallium/winsys/g3dvl/nouveau/nv04_surface.c new file mode 120000 index 0000000000..e05f0671d6 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nv04_surface.c @@ -0,0 +1 @@ +../../dri/nouveau/nv04_surface.c \ No newline at end of file diff --git a/src/gallium/winsys/g3dvl/nouveau/nv50_surface.c b/src/gallium/winsys/g3dvl/nouveau/nv50_surface.c new file mode 120000 index 0000000000..3850748229 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nv50_surface.c @@ -0,0 +1 @@ +../../dri/nouveau/nv50_surface.c \ No newline at end of file diff --git a/src/gallium/winsys/g3dvl/vl_winsys.h b/src/gallium/winsys/g3dvl/vl_winsys.h new file mode 100644 index 0000000000..c83db28dd9 --- /dev/null +++ b/src/gallium/winsys/g3dvl/vl_winsys.h @@ -0,0 +1,14 @@ +#ifndef vl_winsys_h +#define vl_winsys_h + +#include + +struct pipe_context; + +struct pipe_context* create_pipe_context(Display *display, int screen); +int destroy_pipe_context(struct pipe_context *pipe); +int bind_pipe_drawable(struct pipe_context *pipe, Drawable drawable); +int unbind_pipe_drawable(struct pipe_context *pipe); + +#endif + diff --git a/src/gallium/winsys/g3dvl/xsp_winsys.c b/src/gallium/winsys/g3dvl/xsp_winsys.c index c660c6e018..0100fe37bd 100644 --- a/src/gallium/winsys/g3dvl/xsp_winsys.c +++ b/src/gallium/winsys/g3dvl/xsp_winsys.c @@ -1,4 +1,4 @@ -#include "xsp_winsys.h" +#include "vl_winsys.h" #include #include #include @@ -11,10 +11,17 @@ struct xsp_pipe_winsys { struct pipe_winsys base; - Display *display; XImage fbimage; }; +struct xsp_context +{ + Display *display; + int screen; + Drawable drawable; + int drawable_bound; +}; + struct xsp_buffer { struct pipe_buffer base; @@ -183,13 +190,18 @@ static int xsp_fence_finish(struct pipe_winsys *pws, struct pipe_fence_handle *f static void xsp_flush_frontbuffer(struct pipe_winsys *pws, struct pipe_surface *surface, void *context_private) { - struct xsp_pipe_winsys *xsp_winsys; + struct xsp_pipe_winsys *xsp_winsys; + struct xsp_context *xsp_context; assert(pws); assert(surface); assert(context_private); xsp_winsys = (struct xsp_pipe_winsys*)pws; + xsp_context = (struct xsp_context*)context_private; + + if (!xsp_context->drawable_bound) + return; xsp_winsys->fbimage.width = surface->width; xsp_winsys->fbimage.height = surface->height; @@ -198,9 +210,9 @@ static void xsp_flush_frontbuffer(struct pipe_winsys *pws, struct pipe_surface * XPutImage ( - xsp_winsys->display, - *(Drawable*)context_private, - XDefaultGC(xsp_winsys->display, XDefaultScreen(xsp_winsys->display)), + xsp_context->display, + xsp_context->drawable, + XDefaultGC(xsp_context->display, xsp_context->screen), &xsp_winsys->fbimage, 0, 0, @@ -209,7 +221,7 @@ static void xsp_flush_frontbuffer(struct pipe_winsys *pws, struct pipe_surface * surface->width, surface->height ); - XFlush(xsp_winsys->display); + XFlush(xsp_context->display); pipe_surface_unmap(surface); } @@ -221,11 +233,37 @@ static const char* xsp_get_name(struct pipe_winsys *pws) /* Show starts here */ -struct pipe_context* create_pipe_context(Display *display) +int bind_pipe_drawable(struct pipe_context *pipe, Drawable drawable) +{ + struct xsp_context *xsp_context; + + assert(pipe); + + xsp_context = pipe->priv; + xsp_context->drawable = drawable; + xsp_context->drawable_bound = 1; + + return 0; +} + +int unbind_pipe_drawable(struct pipe_context *pipe) +{ + struct xsp_context *xsp_context; + + assert(pipe); + + xsp_context = pipe->priv; + xsp_context->drawable_bound = 0; + + return 0; +} + +struct pipe_context* create_pipe_context(Display *display, int screen) { struct xsp_pipe_winsys *xsp_winsys; - struct pipe_screen *screen; - struct pipe_context *pipe; + struct xsp_context *xsp_context; + struct pipe_screen *sp_screen; + struct pipe_context *sp_pipe; assert(display); @@ -243,7 +281,6 @@ struct pipe_context* create_pipe_context(Display *display) xsp_winsys->base.fence_finish = xsp_fence_finish; xsp_winsys->base.flush_frontbuffer = xsp_flush_frontbuffer; xsp_winsys->base.get_name = xsp_get_name; - xsp_winsys->display = display; { /* XXX: Can't use the returned XImage* directly, @@ -269,10 +306,16 @@ struct pipe_context* create_pipe_context(Display *display) XDestroyImage(template); } - screen = softpipe_create_screen((struct pipe_winsys*)xsp_winsys); - pipe = softpipe_create(screen, (struct pipe_winsys*)xsp_winsys, NULL); + sp_screen = softpipe_create_screen((struct pipe_winsys*)xsp_winsys); + sp_pipe = softpipe_create(sp_screen, (struct pipe_winsys*)xsp_winsys, NULL); + + xsp_context = calloc(1, sizeof(struct xsp_context)); + xsp_context->display = display; + xsp_context->screen = screen; + + sp_pipe->priv = xsp_context; - return pipe; + return sp_pipe; } int destroy_pipe_context(struct pipe_context *pipe) @@ -284,6 +327,7 @@ int destroy_pipe_context(struct pipe_context *pipe) screen = pipe->screen; winsys = pipe->winsys; + free(pipe->priv); pipe->destroy(pipe); screen->destroy(screen); free(winsys); diff --git a/src/gallium/winsys/g3dvl/xsp_winsys.h b/src/gallium/winsys/g3dvl/xsp_winsys.h deleted file mode 100644 index cb163dc24d..0000000000 --- a/src/gallium/winsys/g3dvl/xsp_winsys.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef xsp_winsys_h -#define xsp_winsys_h - -#include - -struct pipe_context; - -struct pipe_context* create_pipe_context(Display *display); -int destroy_pipe_context(struct pipe_context *pipe); - -#endif - diff --git a/src/libXvMC/Makefile b/src/libXvMC/Makefile index 4985ecd3e9..83fcfc523c 100644 --- a/src/libXvMC/Makefile +++ b/src/libXvMC/Makefile @@ -1,7 +1,12 @@ TARGET = libXvMCg3dvl.so SONAME = libXvMCg3dvl.so.1 GALLIUMDIR = ../gallium -OBJECTS = block.o surface.o context.o subpicture.o attributes.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o + +OBJECTS = block.o surface.o context.o subpicture.o attributes.o + +ifeq (${DRIVER}, softpipe) +OBJECTS += ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o +endif CFLAGS += -g -fPIC -Wall -Werror \ -I${GALLIUMDIR}/state_trackers/g3dvl \ @@ -9,6 +14,8 @@ CFLAGS += -g -fPIC -Wall -Werror \ -I${GALLIUMDIR}/include \ -I${GALLIUMDIR}/auxiliary \ -I${GALLIUMDIR}/drivers + +ifeq (${DRIVER}, softpipe) LDFLAGS += -L${GALLIUMDIR}/state_trackers/g3dvl \ -L${GALLIUMDIR}/drivers/softpipe \ -L${GALLIUMDIR}/auxiliary/tgsi \ @@ -17,7 +24,17 @@ LDFLAGS += -L${GALLIUMDIR}/state_trackers/g3dvl \ -L${GALLIUMDIR}/auxiliary/cso_cache \ -L${GALLIUMDIR}/auxiliary/util \ -L${GALLIUMDIR}/auxiliary/rtasm +else +LDFLAGS += -L${GALLIUMDIR}/state_trackers/g3dvl \ + -L${GALLIUMDIR}/winsys/g3dvl/nouveau \ + -L${GALLIUMDIR}/auxiliary/util +endif + +ifeq (${DRIVER}, softpipe) LIBS += -lg3dvl -lsoftpipe -ldraw -ltgsi -ltranslate -lrtasm -lcso_cache -lutil -lm +else +LIBS += -lg3dvl -lnouveau_dri -lutil +endif ############################################# diff --git a/src/libXvMC/context.c b/src/libXvMC/context.c index 22eadc1889..59e1cb2b25 100644 --- a/src/libXvMC/context.c +++ b/src/libXvMC/context.c @@ -2,7 +2,7 @@ #include #include #include -#include +#include static Status Validate(Display *display, XvPortID port, int surface_type_id, unsigned int width, unsigned int height, int flags, int *chroma_format) { @@ -117,7 +117,7 @@ Status XvMCCreateContext(Display *display, XvPortID port, int surface_type_id, i if (ret != Success) return ret; - pipe = create_pipe_context(display); + pipe = create_pipe_context(display, XDefaultScreen(display)); assert(pipe); -- cgit v1.2.3 From 6858dd50c9b696c1c6044f5a403000f9d20b286b Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sat, 16 Aug 2008 13:04:23 -0400 Subject: g3dvl: Modularized rendering, refactored to accommodate VAAPI, other APIs. --- src/gallium/state_trackers/g3dvl/Makefile | 4 +- src/gallium/state_trackers/g3dvl/vl_basic_csc.c | 694 ++++++ src/gallium/state_trackers/g3dvl/vl_basic_csc.h | 13 + src/gallium/state_trackers/g3dvl/vl_context.c | 2272 +------------------ src/gallium/state_trackers/g3dvl/vl_context.h | 118 +- src/gallium/state_trackers/g3dvl/vl_csc.h | 53 + src/gallium/state_trackers/g3dvl/vl_data.c | 130 +- src/gallium/state_trackers/g3dvl/vl_data.h | 19 +- src/gallium/state_trackers/g3dvl/vl_defs.h | 1 - src/gallium/state_trackers/g3dvl/vl_display.c | 48 + src/gallium/state_trackers/g3dvl/vl_display.h | 29 + src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c | 2315 ++++++++++++++++++++ src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h | 18 + src/gallium/state_trackers/g3dvl/vl_render.h | 33 + src/gallium/state_trackers/g3dvl/vl_screen.c | 115 + src/gallium/state_trackers/g3dvl/vl_screen.h | 63 + src/gallium/state_trackers/g3dvl/vl_shader_build.c | 37 +- src/gallium/state_trackers/g3dvl/vl_shader_build.h | 1 - src/gallium/state_trackers/g3dvl/vl_surface.c | 671 +----- src/gallium/state_trackers/g3dvl/vl_surface.h | 91 +- src/gallium/state_trackers/g3dvl/vl_types.h | 124 +- src/gallium/state_trackers/g3dvl/vl_util.c | 9 +- src/gallium/state_trackers/g3dvl/vl_util.h | 1 - src/libXvMC/block.c | 47 +- src/libXvMC/context.c | 194 +- src/libXvMC/surface.c | 403 ++-- 26 files changed, 4061 insertions(+), 3442 deletions(-) create mode 100644 src/gallium/state_trackers/g3dvl/vl_basic_csc.c create mode 100644 src/gallium/state_trackers/g3dvl/vl_basic_csc.h create mode 100644 src/gallium/state_trackers/g3dvl/vl_csc.h create mode 100644 src/gallium/state_trackers/g3dvl/vl_display.c create mode 100644 src/gallium/state_trackers/g3dvl/vl_display.h create mode 100644 src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c create mode 100644 src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h create mode 100644 src/gallium/state_trackers/g3dvl/vl_render.h create mode 100644 src/gallium/state_trackers/g3dvl/vl_screen.c create mode 100644 src/gallium/state_trackers/g3dvl/vl_screen.h (limited to 'src/gallium/state_trackers/g3dvl/Makefile') diff --git a/src/gallium/state_trackers/g3dvl/Makefile b/src/gallium/state_trackers/g3dvl/Makefile index c6a22cad4e..9995c554ab 100644 --- a/src/gallium/state_trackers/g3dvl/Makefile +++ b/src/gallium/state_trackers/g3dvl/Makefile @@ -1,5 +1,6 @@ TARGET = libg3dvl.a -OBJECTS = vl_context.o vl_data.o vl_surface.o vl_shader_build.o vl_util.o +OBJECTS = vl_display.o vl_screen.o vl_context.o vl_surface.o vl_data.o vl_shader_build.o vl_util.o vl_basic_csc.o \ + vl_r16snorm_mc.o GALLIUMDIR = ../.. CFLAGS += -g -Wall -fPIC -Werror -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary -I${GALLIUMDIR}/winsys/g3dvl @@ -15,4 +16,3 @@ ${TARGET}: ${OBJECTS} clean: rm -rf ${OBJECTS} ${TARGET} - diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c new file mode 100644 index 0000000000..ea003a31d1 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c @@ -0,0 +1,694 @@ +#define VL_INTERNAL +#include "vl_basic_csc.h" +#include +#include +#include +#include +#include +#include +#include +#include "vl_csc.h" +#include "vl_surface.h" +#include "vl_shader_build.h" +#include "vl_types.h" + +struct vlVertexShaderConsts +{ + struct vlVertex4f src_scale; + struct vlVertex4f src_trans; +}; + +struct vlFragmentShaderConsts +{ + struct vlVertex4f bias; + float matrix[16]; +}; + +struct vlBasicCSC +{ + struct vlCSC base; + + struct pipe_context *pipe; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state framebuffer; + void *sampler; + void *vertex_shader, *fragment_shader; + struct pipe_vertex_buffer vertex_bufs[2]; + struct pipe_vertex_element vertex_elems[2]; + struct pipe_constant_buffer vs_const_buf, fs_const_buf; +}; + +static int vlResizeFrameBuffer +( + struct vlCSC *csc, + unsigned int width, + unsigned int height +) +{ + struct vlBasicCSC *basic_csc; + struct pipe_context *pipe; + + assert(csc); + + basic_csc = (struct vlBasicCSC*)csc; + pipe = basic_csc->pipe; + + if (basic_csc->framebuffer.width == width && basic_csc->framebuffer.height == height) + return 0; + + if (basic_csc->framebuffer.cbufs[0]) + pipe->winsys->surface_release + ( + pipe->winsys, + &basic_csc->framebuffer.cbufs[0] + ); + + basic_csc->viewport.scale[0] = width; + basic_csc->viewport.scale[1] = height; + basic_csc->viewport.scale[2] = 1; + basic_csc->viewport.scale[3] = 1; + basic_csc->viewport.translate[0] = 0; + basic_csc->viewport.translate[1] = 0; + basic_csc->viewport.translate[2] = 0; + basic_csc->viewport.translate[3] = 0; + + basic_csc->framebuffer.width = width; + basic_csc->framebuffer.height = height; + basic_csc->framebuffer.cbufs[0] = pipe->winsys->surface_alloc(pipe->winsys); + pipe->winsys->surface_alloc_storage + ( + pipe->winsys, + basic_csc->framebuffer.cbufs[0], + width, + height, + PIPE_FORMAT_A8R8G8B8_UNORM, + /* XXX: SoftPipe doesn't change GPU usage to CPU like it does for textures */ + PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE, + 0 + ); + + return 0; +} + +static int vlBegin +( + struct vlCSC *csc +) +{ + struct vlBasicCSC *basic_csc; + struct pipe_context *pipe; + + assert(csc); + + basic_csc = (struct vlBasicCSC*)csc; + pipe = basic_csc->pipe; + + pipe->set_framebuffer_state(pipe, &basic_csc->framebuffer); + pipe->set_viewport_state(pipe, &basic_csc->viewport); + pipe->bind_sampler_states(pipe, 1, (void**)&basic_csc->sampler); + /* Source texture set in vlPutSurface() */ + pipe->bind_vs_state(pipe, basic_csc->vertex_shader); + pipe->bind_fs_state(pipe, basic_csc->fragment_shader); + pipe->set_vertex_buffers(pipe, 2, basic_csc->vertex_bufs); + pipe->set_vertex_elements(pipe, 2, basic_csc->vertex_elems); + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &basic_csc->vs_const_buf); + pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &basic_csc->fs_const_buf); + + return 0; +} + +static int vlPutPictureCSC +( + struct vlCSC *csc, + struct vlSurface *surface, + int srcx, + int srcy, + int srcw, + int srch, + int destx, + int desty, + int destw, + int desth, + enum vlPictureType picture_type +) +{ + struct vlBasicCSC *basic_csc; + struct pipe_context *pipe; + struct vlVertexShaderConsts *vs_consts; + + assert(csc); + assert(surface); + + basic_csc = (struct vlBasicCSC*)csc; + pipe = basic_csc->pipe; + + vs_consts = pipe->winsys->buffer_map + ( + pipe->winsys, + basic_csc->vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ); + + vs_consts->src_scale.x = srcw / (float)surface->texture->width[0]; + vs_consts->src_scale.y = srch / (float)surface->texture->height[0]; + vs_consts->src_scale.z = 1; + vs_consts->src_scale.w = 1; + vs_consts->src_trans.x = srcx / (float)surface->texture->width[0]; + vs_consts->src_trans.y = srcy / (float)surface->texture->height[0]; + vs_consts->src_trans.z = 0; + vs_consts->src_trans.w = 0; + + pipe->winsys->buffer_unmap(pipe->winsys, basic_csc->vs_const_buf.buffer); + + pipe->set_sampler_textures(pipe, 1, &surface->texture); + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); + + return 0; +} + +static int vlEnd +( + struct vlCSC *csc +) +{ + assert(csc); + + return 0; +} + +static struct pipe_surface* vlGetFrameBuffer +( + struct vlCSC *csc +) +{ + struct vlBasicCSC *basic_csc; + + assert(csc); + + basic_csc = (struct vlBasicCSC*)csc; + + return basic_csc->framebuffer.cbufs[0]; +} + +static int vlDestroy +( + struct vlCSC *csc +) +{ + struct vlBasicCSC *basic_csc; + struct pipe_context *pipe; + unsigned int i; + + assert(csc); + + basic_csc = (struct vlBasicCSC*)csc; + pipe = basic_csc->pipe; + + if (basic_csc->framebuffer.cbufs[0]) + pipe->winsys->surface_release + ( + pipe->winsys, + &basic_csc->framebuffer.cbufs[0] + ); + + pipe->delete_sampler_state(pipe, basic_csc->sampler); + pipe->delete_vs_state(pipe, basic_csc->vertex_shader); + pipe->delete_fs_state(pipe, basic_csc->fragment_shader); + + for (i = 0; i < 2; ++i) + pipe->winsys->buffer_destroy(pipe->winsys, basic_csc->vertex_bufs[i].buffer); + + pipe->winsys->buffer_destroy(pipe->winsys, basic_csc->vs_const_buf.buffer); + pipe->winsys->buffer_destroy(pipe->winsys, basic_csc->fs_const_buf.buffer); + + free(basic_csc); + + return 0; +} + +/* + * Represents 2 triangles in a strip in normalized coords. + * Used to render the surface onto the frame buffer. + */ +static const struct vlVertex2f surface_verts[4] = +{ + {0.0f, 0.0f}, + {0.0f, 1.0f}, + {1.0f, 0.0f}, + {1.0f, 1.0f} +}; + +/* + * Represents texcoords for the above. We can use the position values directly. + * TODO: Duplicate these in the shader, no need to create a buffer. + */ +static const struct vlVertex2f *surface_texcoords = surface_verts; + +/* + * Identity color conversion constants, for debugging + */ +static const struct vlFragmentShaderConsts identity = +{ + { + 0.0f, 0.0f, 0.0f, 0.0f + }, + { + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [16,235] + */ +static const struct vlFragmentShaderConsts bt_601 = +{ + { + 0.0f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.0f, 0.0f, 1.371f, 0.0f, + 1.0f, -0.336f, -0.698f, 0.0f, + 1.0f, 1.732f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [0,255] + */ +static const struct vlFragmentShaderConsts bt_601_full = +{ + { + 0.062745098f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.164f, 0.0f, 1.596f, 0.0f, + 1.164f, -0.391f, -0.813f, 0.0f, + 1.164f, 2.018f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [16,235] + */ +static const struct vlFragmentShaderConsts bt_709 = +{ + { + 0.0f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.0f, 0.0f, 1.540f, 0.0f, + 1.0f, -0.183f, -0.459f, 0.0f, + 1.0f, 1.816f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [0,255] + */ +const struct vlFragmentShaderConsts bt_709_full = +{ + { + 0.062745098f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.164f, 0.0f, 1.793f, 0.0f, + 1.164f, -0.213f, -0.534f, 0.0f, + 1.164f, 2.115f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +static int vlCreateVertexShader +( + struct vlBasicCSC *csc +) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = csc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Vertex texcoords + */ + for (i = 0; i < 2; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling vector to scale texcoord rect to source size + * decl c1 ; Translation vector to move texcoord rect into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Vertex texcoords + */ + for (i = 0; i < 2; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* mov o0, i0 ; Move pos in to pos out */ + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_INPUT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t0, i1, c0 ; Scale unit texcoord rect to source size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 1, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o1, t0, c1 ; Translate texcoord rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + csc->vertex_shader = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShader +( + struct vlBasicCSC *csc +) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = csc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* decl i0 ; Texcoords for s0 */ + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl c0 ; Bias vector for CSC + * decl c1-c4 ; CSC matrix c1-c4 + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl s0 ; Sampler for tex containing picture to display */ + decl = vl_decl_samplers(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* tex2d t0, i0, s0 ; Read src pixel */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t0, t0, c0 ; Subtract bias vector from pixel */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix + * dp4 o0.y, t0, c2 + * dp4 o0.z, t0, c3 + * dp4 o0.w, t0, c4 ; XXX: Don't need 4th coefficient + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1); + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + csc->fragment_shader = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateDataBufs +( + struct vlBasicCSC *csc +) +{ + struct pipe_context *pipe; + + assert(csc); + + pipe = csc->pipe; + + /* + Create our vertex buffer and vertex buffer element + VB contains 4 vertices that render a quad covering the entire window + to display a rendered surface + Quad is rendered as a tri strip + */ + csc->vertex_bufs[0].pitch = sizeof(struct vlVertex2f); + csc->vertex_bufs[0].max_index = 3; + csc->vertex_bufs[0].buffer_offset = 0; + csc->vertex_bufs[0].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vlVertex2f) * 4 + ); + + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, csc->vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + surface_verts, + sizeof(struct vlVertex2f) * 4 + ); + + pipe->winsys->buffer_unmap(pipe->winsys, csc->vertex_bufs[0].buffer); + + csc->vertex_elems[0].src_offset = 0; + csc->vertex_elems[0].vertex_buffer_index = 0; + csc->vertex_elems[0].nr_components = 2; + csc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* + Create our texcoord buffer and texcoord buffer element + Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices + */ + csc->vertex_bufs[1].pitch = sizeof(struct vlVertex2f); + csc->vertex_bufs[1].max_index = 3; + csc->vertex_bufs[1].buffer_offset = 0; + csc->vertex_bufs[1].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vlVertex2f) * 4 + ); + + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, csc->vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + surface_texcoords, + sizeof(struct vlVertex2f) * 4 + ); + + pipe->winsys->buffer_unmap(pipe->winsys, csc->vertex_bufs[1].buffer); + + csc->vertex_elems[1].src_offset = 0; + csc->vertex_elems[1].vertex_buffer_index = 1; + csc->vertex_elems[1].nr_components = 2; + csc->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* + Create our vertex shader's constant buffer + Const buffer contains scaling and translation vectors + */ + csc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts); + csc->vs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + csc->vs_const_buf.size + ); + + /* + Create our fragment shader's constant buffer + Const buffer contains the color conversion matrix and bias vectors + */ + csc->fs_const_buf.size = sizeof(struct vlFragmentShaderConsts); + csc->fs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + csc->fs_const_buf.size + ); + + /* + TODO: Refactor this into a seperate function, + allow changing the CSC matrix at runtime to switch between regular & full versions + */ + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, csc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + &bt_601, + sizeof(struct vlFragmentShaderConsts) + ); + + pipe->winsys->buffer_unmap(pipe->winsys, csc->fs_const_buf.buffer); + + return 0; +} + +static int vlInit +( + struct vlBasicCSC *csc +) +{ + struct pipe_context *pipe; + struct pipe_sampler_state sampler; + + assert(csc); + + pipe = csc->pipe; + + /* Delay creating the FB until vlPutSurface() so we know window size */ + csc->framebuffer.num_cbufs = 1; + csc->framebuffer.cbufs[0] = NULL; + csc->framebuffer.zsbuf = NULL; + + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + /*sampler.prefilter = ;*/ + /*sampler.shadow_ambient = ;*/ + /*sampler.lod_bias = ;*/ + /*sampler.min_lod = ;*/ + /*sampler.max_lod = ;*/ + /*sampler.border_color[i] = ;*/ + /*sampler.max_anisotropy = ;*/ + csc->sampler = pipe->create_sampler_state(pipe, &sampler); + + vlCreateVertexShader(csc); + vlCreateFragmentShader(csc); + vlCreateDataBufs(csc); + + return 0; +} + +int vlCreateBasicCSC +( + struct pipe_context *pipe, + struct vlCSC **csc +) +{ + struct vlBasicCSC *basic_csc; + + assert(pipe); + assert(csc); + + basic_csc = calloc(1, sizeof(struct vlBasicCSC)); + + if (!basic_csc) + return 1; + + basic_csc->base.vlResizeFrameBuffer = &vlResizeFrameBuffer; + basic_csc->base.vlBegin = &vlBegin; + basic_csc->base.vlPutPicture = &vlPutPictureCSC; + basic_csc->base.vlEnd = &vlEnd; + basic_csc->base.vlGetFrameBuffer = &vlGetFrameBuffer; + basic_csc->base.vlDestroy = &vlDestroy; + basic_csc->pipe = pipe; + + vlInit(basic_csc); + + *csc = &basic_csc->base; + + return 0; +} diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.h b/src/gallium/state_trackers/g3dvl/vl_basic_csc.h new file mode 100644 index 0000000000..2e17f1d814 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_basic_csc.h @@ -0,0 +1,13 @@ +#ifndef vl_basic_csc_h +#define vl_basic_csc_h + +struct pipe_context; +struct vlCSC; + +int vlCreateBasicCSC +( + struct pipe_context *pipe, + struct vlCSC **csc +); + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 5616de0ba4..56d360c05b 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -1,2088 +1,26 @@ +#define VL_INTERNAL #include "vl_context.h" #include #include #include -#include -#include #include -#include -#include -#include -#include -#include "vl_shader_build.h" -#include "vl_data.h" -#include "vl_defs.h" -#include "vl_util.h" +#include "vl_render.h" +#include "vl_r16snorm_mc.h" +#include "vl_csc.h" +#include "vl_basic_csc.h" -static int vlCreateVertexShaderFrameIDCT(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 50; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Vertex texcoords - */ - for (i = 0; i < 2; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl o0 ; Vertex pos - * decl o1 ; Vertex texcoords - */ - for (i = 0; i < 2; i++) - { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * mov o0, i0 ; Move pos in to pos out - * mov o1, i1 ; Move texcoord in to texcoord out */ - for (i = 0; i < 2; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - //context->states.idct.frame_vs = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFrameIDCT(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 50; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* decl i0 ; Texcoords for s0 */ - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl s0 ; Sampler for tex containing picture to display */ - decl = vl_decl_samplers(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* tex2d t0, i0, s0 ; Read src pixel */ - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* sub t0, t0, c0 ; Subtract bias vector from pixel */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix - * dp4 o0.y, t0, c2 - * dp4 o0.z, t0, c3 - * dp4 o0.w, t0, c4 ; XXX: Don't need 4th coefficient - */ - for (i = 0; i < 4; ++i) - { - inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1); - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - //context->states.idct.frame_fs = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlInitIDCT(struct VL_CONTEXT *context) -{ - struct pipe_context *pipe; - struct pipe_sampler_state sampler; - struct pipe_texture template; - unsigned int i; - - assert(context); - - pipe = context->pipe; - - context->states.idct.viewport.scale[0] = VL_BLOCK_WIDTH; - context->states.idct.viewport.scale[1] = VL_BLOCK_HEIGHT; - context->states.idct.viewport.scale[2] = 1; - context->states.idct.viewport.scale[3] = 1; - context->states.idct.viewport.translate[0] = 0; - context->states.idct.viewport.translate[1] = 0; - context->states.idct.viewport.translate[2] = 0; - context->states.idct.viewport.translate[3] = 0; - - context->states.idct.render_target.width = VL_BLOCK_WIDTH; - context->states.idct.render_target.height = VL_BLOCK_HEIGHT; - context->states.idct.render_target.num_cbufs = 1; - context->states.idct.render_target.zsbuf = NULL; - - sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; - sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; - sampler.compare_mode = PIPE_TEX_COMPARE_NONE; - sampler.compare_func = PIPE_FUNC_ALWAYS; - sampler.normalized_coords = 1; - /*sampler.prefilter = ;*/ - /*sampler.shadow_ambient = ;*/ - /*sampler.lod_bias = ;*/ - sampler.min_lod = 0; - /*sampler.max_lod = ;*/ - /*sampler.border_color[i] = ;*/ - /*sampler.max_anisotropy = ;*/ - context->states.idct.sampler = pipe->create_sampler_state(pipe, &sampler); - - memset(&template, 0, sizeof(struct pipe_texture)); - template.target = PIPE_TEXTURE_2D; - template.format = PIPE_FORMAT_A8L8_UNORM; - template.last_level = 0; - template.width[0] = 8; - template.height[0] = 8; - template.depth[0] = 1; - template.compressed = 0; - pf_get_block(template.format, &template.block); - - context->states.idct.texture = pipe->screen->texture_create(pipe->screen, &template); - - template.format = PIPE_FORMAT_A8R8G8B8_UNORM; - template.width[0] = 16; - template.height[0] = 1; - - context->states.idct.basis = pipe->screen->texture_create(pipe->screen, &template); - - for (i = 0; i < 2; ++i) - { - context->states.idct.vertex_bufs[i] = &context->states.csc.vertex_bufs[i]; - context->states.idct.vertex_buf_elems[i] = &context->states.csc.vertex_buf_elems[i]; - /* - context->states.idct.vertex_bufs[i].pitch = sizeof(struct VL_VERTEX2F); - context->states.idct.vertex_bufs[i].max_index = 3; - context->states.idct.vertex_bufs[i].buffer_offset = 0; - context->states.idct.vertex_bufs[i].buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct VL_VERTEX2F) * 4 - ); - - context->states.idct.vertex_buf_elems[i].src_offset = 0; - context->states.idct.vertex_buf_elems[i].vertex_buffer_index = i; - context->states.idct.vertex_buf_elems[i].nr_components = 2; - context->states.idct.vertex_buf_elems[i].src_format = PIPE_FORMAT_R32G32_FLOAT; - */ - } - - vlCreateVertexShaderFrameIDCT(context); - vlCreateFragmentShaderFrameIDCT(context); - - return 0; -} - -static int vlDestroyIDCT(struct VL_CONTEXT *context) -{ - //unsigned int i; - - assert(context); - - context->pipe->delete_sampler_state(context->pipe, context->states.idct.sampler); - - //for (i = 0; i < 2; ++i) - //context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.vertex_bufs[i].buffer); - - pipe_texture_release(&context->states.idct.texture); - pipe_texture_release(&context->states.idct.basis); - - //context->pipe->delete_vs_state(context->pipe, context->states.idct.frame_vs); - //context->pipe->delete_fs_state(context->pipe, context->states.idct.frame_fs); - - //context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.vs_const_buf.buffer); - //context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.fs_const_buf.buffer); - - return 0; -} - -static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 50; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0 */ - decl = vl_decl_temps(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - context->states.mc.i_vs = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - */ - for (i = 0; i < 2; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - */ - for (i = 0; i < 3; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul o0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - context->states.mc.i_fs = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateVertexShaderFramePMC(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - * decl c2 ; Unused - * decl c3 ; Translation vector to move ref macroblock texcoords into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - * decl o3 ; Ref macroblock texcoords - */ - for (i = 0; i < 4; i++) - { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0 */ - decl = vl_decl_temps(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* add o3, t0, c3 ; Translate rect into position on ref macroblock */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - context->states.mc.p_vs[0] = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateVertexShaderFieldPMC(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - * decl c2 ; Denorm coefficients - * decl c3 ; Translation vector to move top field ref macroblock texcoords into position - * decl c4 ; Translation vector to move bottom field ref macroblock texcoords into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - * decl o3 ; Top field ref macroblock texcoords - * decl o4 ; Bottom field ref macroblock texcoords - * decl o5 ; Denormalized vertex pos - */ - for (i = 0; i < 6; i++) - { - decl = vl_decl_output((i == 0 || i == 5) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add t1, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mov o0, t1 ; Move vertex pos to output */ - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - mov o1, i1 ; Move input luma texcoords to output - mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* add o3, t0, c3 ; Translate top field rect into position on ref macroblock - add o4, t0, c4 ; Translate bottom field rect into position on ref macroblock */ - for (i = 0; i < 2; ++i) - { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* mul o5, t1, c2 ; Denorm vertex pos */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 5, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - context->states.mc.p_vs[1] = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - * decl i2 ; Texcoords for s3 - */ - for (i = 0; i < 3; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for ref surface texture - */ - for (i = 0; i < 4; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* tex2d t1, i2, s3 ; Read texel from ref macroblock */ - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 2, TGSI_FILE_SAMPLER, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - context->states.mc.p_fs[0] = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 200; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - * decl i2 ; Texcoords for s3 - * decl i3 ; Texcoords for s3 - * decl i4 ; Denormalized vertex pos - */ - for (i = 0; i < 5; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm - * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0-t4 */ - decl = vl_decl_temps(0, 4); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for ref surface texture - */ - for (i = 0; i < 4; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t1, i2, s3 ; Read texel from ref macroblock top field - * tex2d t2, i3, s3 ; Read texel from ref macroblock bottom field - */ - for (i = 0; i < 2; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* XXX: Pos values off by 0.5? */ - /* sub t4, i4.y, c1.x ; Sub 0.5 from denormalized pos */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 4, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* floor t3, t3 ; Get rid of fractional part */ - inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t3, t3, c1.y ; Multiply by 2 */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* TODO: Move to conditional tex fetch on t3 instead of lerp */ - /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - context->states.mc.p_fs[1] = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateVertexShaderFrameBMC(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - * decl c2 ; Unused - * decl c3 ; Translation vector to move past ref macroblock texcoords into position - * decl c4 ; Unused - * decl c5 ; Translation vector to move future ref macroblock texcoords into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 5); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - * decl o3 ; Past ref macroblock texcoords - * decl o4 ; Future ref macroblock texcoords - */ - for (i = 0; i < 5; i++) - { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0 */ - decl = vl_decl_temps(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* add o3, t0, c3 ; Translate rect into position on past ref macroblock - add o4, t0, c5 ; Translate rect into position on future ref macroblock */ - for (i = 0; i < 2; ++i) - { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i * 2 + 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - context->states.mc.b_vs[0] = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateVertexShaderFieldBMC(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - * decl c2 ; Denorm coefficients - * decl c3 ; Translation vector to move top field past ref macroblock texcoords into position - * decl c4 ; Translation vector to move bottom field past ref macroblock texcoords into position - * decl c5 ; Translation vector to move top field future ref macroblock texcoords into position - * decl c6 ; Translation vector to move bottom field future ref macroblock texcoords into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 6); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - * decl o3 ; Top field past ref macroblock texcoords - * decl o4 ; Bottom field past ref macroblock texcoords - * decl o5 ; Top field future ref macroblock texcoords - * decl o6 ; Bottom field future ref macroblock texcoords - * decl o7 ; Denormalized vertex pos - */ - for (i = 0; i < 8; i++) - { - decl = vl_decl_output((i == 0 || i == 7) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add t1, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mov o0, t1 ; Move vertex pos to output */ - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* - * add o3, t0, c3 ; Translate top field rect into position on past ref macroblock - * add o4, t0, c4 ; Translate bottom field rect into position on past ref macroblock - * add o5, t0, c5 ; Translate top field rect into position on future ref macroblock - * add o6, t0, c6 ; Translate bottom field rect into position on future ref macroblock - */ - for (i = 0; i < 4; ++i) - { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* mul o7, t1, c2 ; Denorm vertex pos */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 7, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - context->states.mc.b_vs[1] = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - * decl i2 ; Texcoords for s3 - * decl i3 ; Texcoords for s4 - */ - for (i = 0; i < 4; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm - * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0-t2 */ - decl = vl_decl_temps(0, 2); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for past ref surface texture - * decl s4 ; Sampler for future ref surface texture - */ - for (i = 0; i < 5; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t1, i2, s3 ; Read texel from past ref macroblock - * tex2d t2, i3, s4 ; Read texel from future ref macroblock - */ - for (i = 0; i < 2; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, i + 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - context->states.mc.b_fs[0] = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 200; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - * decl i2 ; Texcoords for s3 - * decl i3 ; Texcoords for s3 - * decl i4 ; Texcoords for s4 - * decl i5 ; Texcoords for s4 - * decl i6 ; Denormalized vertex pos - */ - for (i = 0; i < 7; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm - * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels - * ; and for Y-mod-2 top/bottom field selection - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0-t5 */ - decl = vl_decl_temps(0, 5); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for past ref surface texture - * decl s4 ; Sampler for future ref surface texture - */ - for (i = 0; i < 5; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* XXX: Pos values off by 0.5? */ - /* sub t4, i6.y, c1.x ; Sub 0.5 from denormalized pos */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 6, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* floor t3, t3 ; Get rid of fractional part */ - inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t3, t3, c1.y ; Multiply by 2 */ - inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t1, i2, s3 ; Read texel from past ref macroblock top field - * tex2d t2, i3, s3 ; Read texel from past ref macroblock bottom field - */ - for (i = 0; i < 2; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* TODO: Move to conditional tex fetch on t3 instead of lerp */ - /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t4, i4, s4 ; Read texel from future ref macroblock top field - * tex2d t5, i5, s4 ; Read texel from future ref macroblock bottom field - */ - for (i = 0; i < 2; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 4, TGSI_FILE_SAMPLER, 4); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* TODO: Move to conditional tex fetch on t3 instead of lerp */ - /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - context->states.mc.b_fs[1] = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -int vlCreateDataBufsMC(struct VL_CONTEXT *context) -{ - struct pipe_context *pipe; - unsigned int i; - - assert(context); - - pipe = context->pipe; - - /* Create our vertex buffer and vertex buffer element */ - context->states.mc.vertex_bufs[0].pitch = sizeof(struct VL_VERTEX2F); - context->states.mc.vertex_bufs[0].max_index = 23; - context->states.mc.vertex_bufs[0].buffer_offset = 0; - context->states.mc.vertex_bufs[0].buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct VL_VERTEX2F) * 24 - ); - - context->states.mc.vertex_buf_elems[0].src_offset = 0; - context->states.mc.vertex_buf_elems[0].vertex_buffer_index = 0; - context->states.mc.vertex_buf_elems[0].nr_components = 2; - context->states.mc.vertex_buf_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* Create our texcoord buffers and texcoord buffer elements */ - for (i = 1; i < 3; ++i) - { - context->states.mc.vertex_bufs[i].pitch = sizeof(struct VL_TEXCOORD2F); - context->states.mc.vertex_bufs[i].max_index = 23; - context->states.mc.vertex_bufs[i].buffer_offset = 0; - context->states.mc.vertex_bufs[i].buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct VL_TEXCOORD2F) * 24 - ); - - context->states.mc.vertex_buf_elems[i].src_offset = 0; - context->states.mc.vertex_buf_elems[i].vertex_buffer_index = i; - context->states.mc.vertex_buf_elems[i].nr_components = 2; - context->states.mc.vertex_buf_elems[i].src_format = PIPE_FORMAT_R32G32_FLOAT; - } - - /* Fill buffers */ - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - vl_chroma_420_texcoords, - sizeof(struct VL_VERTEX2F) * 24 - ); - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - vl_luma_texcoords, - sizeof(struct VL_TEXCOORD2F) * 24 - ); - /* TODO: Accomodate 422, 444 */ - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[2].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - vl_chroma_420_texcoords, - sizeof(struct VL_TEXCOORD2F) * 24 - ); - - for (i = 0; i < 3; ++i) - pipe->winsys->buffer_unmap(pipe->winsys, context->states.mc.vertex_bufs[i].buffer); - - /* Create our constant buffer */ - context->states.mc.vs_const_buf.size = sizeof(struct VL_MC_VS_CONSTS); - context->states.mc.vs_const_buf.buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_CONSTANT, - context->states.mc.vs_const_buf.size - ); - - context->states.mc.fs_const_buf.size = sizeof(struct VL_MC_FS_CONSTS); - context->states.mc.fs_const_buf.buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_CONSTANT, - context->states.mc.fs_const_buf.size - ); - - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, context->states.mc.fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - &vl_mc_fs_consts, - sizeof(struct VL_MC_FS_CONSTS) - ); - - pipe->winsys->buffer_unmap(pipe->winsys, context->states.mc.fs_const_buf.buffer); - - return 0; -} - -static int vlInitMC(struct VL_CONTEXT *context) -{ - struct pipe_context *pipe; - struct pipe_sampler_state sampler; - struct pipe_texture template; - unsigned int filters[5]; - unsigned int i; - - assert(context); - - pipe = context->pipe; - - /* For MC we render to textures, which are rounded up to nearest POT */ - context->states.mc.viewport.scale[0] = vlRoundUpPOT(context->video_width); - context->states.mc.viewport.scale[1] = vlRoundUpPOT(context->video_height); - context->states.mc.viewport.scale[2] = 1; - context->states.mc.viewport.scale[3] = 1; - context->states.mc.viewport.translate[0] = 0; - context->states.mc.viewport.translate[1] = 0; - context->states.mc.viewport.translate[2] = 0; - context->states.mc.viewport.translate[3] = 0; - - context->states.mc.render_target.width = vlRoundUpPOT(context->video_width); - context->states.mc.render_target.height = vlRoundUpPOT(context->video_height); - context->states.mc.render_target.num_cbufs = 1; - /* FB for MC stage is a VL_SURFACE, set in vlSetRenderSurface() */ - context->states.mc.render_target.zsbuf = NULL; - - filters[0] = PIPE_TEX_FILTER_NEAREST; - filters[1] = context->video_format == VL_FORMAT_YCBCR_444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; - filters[2] = context->video_format == VL_FORMAT_YCBCR_444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; - filters[3] = PIPE_TEX_FILTER_LINEAR; - filters[4] = PIPE_TEX_FILTER_LINEAR; - - for (i = 0; i < 5; ++i) - { - sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.min_img_filter = filters[i]; - sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - sampler.mag_img_filter = filters[i]; - sampler.compare_mode = PIPE_TEX_COMPARE_NONE; - sampler.compare_func = PIPE_FUNC_ALWAYS; - sampler.normalized_coords = 1; - /*sampler.prefilter = ;*/ - /*sampler.shadow_ambient = ;*/ - /*sampler.lod_bias = ;*/ - sampler.min_lod = 0; - /*sampler.max_lod = ;*/ - /*sampler.border_color[i] = ;*/ - /*sampler.max_anisotropy = ;*/ - context->states.mc.samplers[i] = pipe->create_sampler_state(pipe, &sampler); - } - - memset(&template, 0, sizeof(struct pipe_texture)); - template.target = PIPE_TEXTURE_2D; - template.format = PIPE_FORMAT_R16_SNORM; - template.last_level = 0; - template.width[0] = 8; - template.height[0] = 8 * 4; - template.depth[0] = 1; - template.compressed = 0; - pf_get_block(template.format, &template.block); - - context->states.mc.textures[0] = pipe->screen->texture_create(pipe->screen, &template); - - if (context->video_format == VL_FORMAT_YCBCR_420) - template.height[0] = 8; - else if (context->video_format == VL_FORMAT_YCBCR_422) - template.height[0] = 8 * 2; - else if (context->video_format == VL_FORMAT_YCBCR_444) - template.height[0] = 8 * 4; - else - assert(0); - - context->states.mc.textures[1] = pipe->screen->texture_create(pipe->screen, &template); - context->states.mc.textures[2] = pipe->screen->texture_create(pipe->screen, &template); - - /* textures[3] & textures[4] are assigned from VL_SURFACEs for P and B macroblocks at render time */ - - vlCreateVertexShaderIMC(context); - vlCreateFragmentShaderIMC(context); - vlCreateVertexShaderFramePMC(context); - vlCreateVertexShaderFieldPMC(context); - vlCreateFragmentShaderFramePMC(context); - vlCreateFragmentShaderFieldPMC(context); - vlCreateVertexShaderFrameBMC(context); - vlCreateVertexShaderFieldBMC(context); - vlCreateFragmentShaderFrameBMC(context); - vlCreateFragmentShaderFieldBMC(context); - vlCreateDataBufsMC(context); - - return 0; -} - -static int vlDestroyMC(struct VL_CONTEXT *context) -{ - unsigned int i; - - assert(context); - - for (i = 0; i < 5; ++i) - context->pipe->delete_sampler_state(context->pipe, context->states.mc.samplers[i]); - - for (i = 0; i < 3; ++i) - context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.vertex_bufs[i].buffer); - - /* Textures 3 & 4 are not created directly, no need to release them here */ - for (i = 0; i < 3; ++i) - pipe_texture_release(&context->states.mc.textures[i]); - - context->pipe->delete_vs_state(context->pipe, context->states.mc.i_vs); - context->pipe->delete_fs_state(context->pipe, context->states.mc.i_fs); - - for (i = 0; i < 2; ++i) - { - context->pipe->delete_vs_state(context->pipe, context->states.mc.p_vs[i]); - context->pipe->delete_fs_state(context->pipe, context->states.mc.p_fs[i]); - context->pipe->delete_vs_state(context->pipe, context->states.mc.b_vs[i]); - context->pipe->delete_fs_state(context->pipe, context->states.mc.b_fs[i]); - } - - context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.vs_const_buf.buffer); - context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.fs_const_buf.buffer); - - return 0; -} - -static int vlCreateVertexShaderCSC(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 50; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Vertex texcoords - */ - for (i = 0; i < 2; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling vector to scale texcoord rect to source size - * decl c1 ; Translation vector to move texcoord rect into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Vertex texcoords - */ - for (i = 0; i < 2; i++) - { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0 */ - decl = vl_decl_temps(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mov o0, i0 ; Move pos in to pos out */ - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_INPUT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i1, c0 ; Scale unit texcoord rect to source size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 1, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o1, t0, c1 ; Translate texcoord rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - context->states.csc.vertex_shader = pipe->create_vs_state(pipe, &vs); - //free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderCSC(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 50; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* decl i0 ; Texcoords for s0 */ - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl c0 ; Bias vector for CSC - * decl c1-c4 ; CSC matrix c1-c4 - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0 */ - decl = vl_decl_temps(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl s0 ; Sampler for tex containing picture to display */ - decl = vl_decl_samplers(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* tex2d t0, i0, s0 ; Read src pixel */ - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* sub t0, t0, c0 ; Subtract bias vector from pixel */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix - * dp4 o0.y, t0, c2 - * dp4 o0.z, t0, c3 - * dp4 o0.w, t0, c4 ; XXX: Don't need 4th coefficient - */ - for (i = 0; i < 4; ++i) - { - inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1); - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - context->states.csc.fragment_shader = pipe->create_fs_state(pipe, &fs); - //free(tokens); - - return 0; -} - -static int vlCreateDataBufsCSC(struct VL_CONTEXT *context) -{ - struct pipe_context *pipe; - - assert(context); - - pipe = context->pipe; - - /* - Create our vertex buffer and vertex buffer element - VB contains 4 vertices that render a quad covering the entire window - to display a rendered surface - Quad is rendered as a tri strip - */ - context->states.csc.vertex_bufs[0].pitch = sizeof(struct VL_VERTEX2F); - context->states.csc.vertex_bufs[0].max_index = 3; - context->states.csc.vertex_bufs[0].buffer_offset = 0; - context->states.csc.vertex_bufs[0].buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct VL_VERTEX2F) * 4 - ); - - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, context->states.csc.vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - vl_surface_vertex_positions, - sizeof(struct VL_VERTEX2F) * 4 - ); - - pipe->winsys->buffer_unmap(pipe->winsys, context->states.csc.vertex_bufs[0].buffer); - - context->states.csc.vertex_buf_elems[0].src_offset = 0; - context->states.csc.vertex_buf_elems[0].vertex_buffer_index = 0; - context->states.csc.vertex_buf_elems[0].nr_components = 2; - context->states.csc.vertex_buf_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* - Create our texcoord buffer and texcoord buffer element - Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices - */ - context->states.csc.vertex_bufs[1].pitch = sizeof(struct VL_TEXCOORD2F); - context->states.csc.vertex_bufs[1].max_index = 3; - context->states.csc.vertex_bufs[1].buffer_offset = 0; - context->states.csc.vertex_bufs[1].buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct VL_TEXCOORD2F) * 4 - ); - - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, context->states.csc.vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - vl_surface_texcoords, - sizeof(struct VL_TEXCOORD2F) * 4 - ); - - pipe->winsys->buffer_unmap(pipe->winsys, context->states.csc.vertex_bufs[1].buffer); - - context->states.csc.vertex_buf_elems[1].src_offset = 0; - context->states.csc.vertex_buf_elems[1].vertex_buffer_index = 1; - context->states.csc.vertex_buf_elems[1].nr_components = 2; - context->states.csc.vertex_buf_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* - Create our vertex shader's constant buffer - Const buffer contains scaling and translation vectors - */ - context->states.csc.vs_const_buf.size = sizeof(struct VL_CSC_VS_CONSTS); - context->states.csc.vs_const_buf.buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_CONSTANT, - context->states.csc.vs_const_buf.size - ); - - /* - Create our fragment shader's constant buffer - Const buffer contains the color conversion matrix and bias vectors - */ - context->states.csc.fs_const_buf.size = sizeof(struct VL_CSC_FS_CONSTS); - context->states.csc.fs_const_buf.buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_CONSTANT, - context->states.csc.fs_const_buf.size - ); - - /* - TODO: Refactor this into a seperate function, - allow changing the CSC matrix at runtime to switch between regular & full versions - */ - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, context->states.csc.fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - &vl_csc_fs_consts_601, - sizeof(struct VL_CSC_FS_CONSTS) - ); - - pipe->winsys->buffer_unmap(pipe->winsys, context->states.csc.fs_const_buf.buffer); - - return 0; -} - -static int vlInitCSC(struct VL_CONTEXT *context) -{ - struct pipe_context *pipe; - struct pipe_sampler_state sampler; - - assert(context); - - pipe = context->pipe; - - /* Delay creating the FB until vlPutSurface() so we know window size */ - context->states.csc.framebuffer.num_cbufs = 1; - context->states.csc.framebuffer.cbufs[0] = NULL; - context->states.csc.framebuffer.zsbuf = NULL; - - sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; - sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; - sampler.compare_mode = PIPE_TEX_COMPARE_NONE; - sampler.compare_func = PIPE_FUNC_ALWAYS; - sampler.normalized_coords = 1; - /*sampler.prefilter = ;*/ - /*sampler.shadow_ambient = ;*/ - /*sampler.lod_bias = ;*/ - /*sampler.min_lod = ;*/ - /*sampler.max_lod = ;*/ - /*sampler.border_color[i] = ;*/ - /*sampler.max_anisotropy = ;*/ - context->states.csc.sampler = pipe->create_sampler_state(pipe, &sampler); - - vlCreateVertexShaderCSC(context); - vlCreateFragmentShaderCSC(context); - vlCreateDataBufsCSC(context); - - return 0; -} - -static int vlDestroyCSC(struct VL_CONTEXT *context) -{ - assert(context); - - /* - Since we create the final FB when we display our first surface, - it may not be created if vlPutSurface() is never called - */ - if (context->states.csc.framebuffer.cbufs[0]) - context->pipe->winsys->surface_release(context->pipe->winsys, &context->states.csc.framebuffer.cbufs[0]); - context->pipe->delete_sampler_state(context->pipe, context->states.csc.sampler); - context->pipe->delete_vs_state(context->pipe, context->states.csc.vertex_shader); - context->pipe->delete_fs_state(context->pipe, context->states.csc.fragment_shader); - context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vertex_bufs[0].buffer); - context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vertex_bufs[1].buffer); - context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vs_const_buf.buffer); - context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.fs_const_buf.buffer); - - return 0; -} - -static int vlInitCommon(struct VL_CONTEXT *context) +static int vlInitCommon(struct vlContext *context) { struct pipe_context *pipe; struct pipe_rasterizer_state rast; struct pipe_blend_state blend; struct pipe_depth_stencil_alpha_state dsa; unsigned int i; - + assert(context); - + pipe = context->pipe; - + rast.flatshade = 1; rast.flatshade_first = 0; rast.light_twoside = 0; @@ -2113,9 +51,9 @@ static int vlInitCommon(struct VL_CONTEXT *context) rast.offset_units = 1; rast.offset_scale = 1; /*rast.sprite_coord_mode[i] = ;*/ - context->states.common.raster = pipe->create_rasterizer_state(pipe, &rast); - pipe->bind_rasterizer_state(pipe, context->states.common.raster); - + context->raster = pipe->create_rasterizer_state(pipe, &rast); + pipe->bind_rasterizer_state(pipe, context->raster); + blend.blend_enable = 0; blend.rgb_func = PIPE_BLEND_ADD; blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; @@ -2128,9 +66,9 @@ static int vlInitCommon(struct VL_CONTEXT *context) /* Needed to allow color writes to FB, even if blending disabled */ blend.colormask = PIPE_MASK_RGBA; blend.dither = 0; - context->states.common.blend = pipe->create_blend_state(pipe, &blend); - pipe->bind_blend_state(pipe, context->states.common.blend); - + context->blend = pipe->create_blend_state(pipe, &blend); + pipe->bind_blend_state(pipe, context->blend); + dsa.depth.enabled = 0; dsa.depth.writemask = 0; dsa.depth.func = PIPE_FUNC_ALWAYS; @@ -2149,134 +87,122 @@ static int vlInitCommon(struct VL_CONTEXT *context) dsa.alpha.enabled = 0; dsa.alpha.func = PIPE_FUNC_ALWAYS; dsa.alpha.ref = 0; - context->states.common.dsa = pipe->create_depth_stencil_alpha_state(pipe, &dsa); - pipe->bind_depth_stencil_alpha_state(pipe, context->states.common.dsa); - - return 0; -} + context->dsa = pipe->create_depth_stencil_alpha_state(pipe, &dsa); + pipe->bind_depth_stencil_alpha_state(pipe, context->dsa); -static int vlDestroyCommon(struct VL_CONTEXT *context) -{ - assert(context); - - context->pipe->delete_blend_state(context->pipe, context->states.common.blend); - context->pipe->delete_rasterizer_state(context->pipe, context->states.common.raster); - context->pipe->delete_depth_stencil_alpha_state(context->pipe, context->states.common.dsa); - return 0; } -static int vlInit(struct VL_CONTEXT *context) +int vlCreateContext +( + struct vlScreen *screen, + struct pipe_context *pipe, + unsigned int picture_width, + unsigned int picture_height, + enum vlFormat picture_format, + enum vlProfile profile, + enum vlEntryPoint entry_point, + struct vlContext **context +) { + struct vlContext *ctx; + + assert(screen); assert(context); - - vlInitCommon(context); - vlInitCSC(context); - vlInitMC(context); - vlInitIDCT(context); - + assert(pipe); + + ctx = calloc(1, sizeof(struct vlContext)); + + if (!ctx) + return 1; + + ctx->screen = screen; + ctx->pipe = pipe; + ctx->picture_width = picture_width; + ctx->picture_height = picture_height; + ctx->picture_format = picture_format; + ctx->profile = profile; + ctx->entry_point = entry_point; + + vlInitCommon(ctx); + + vlCreateR16SNormMC(pipe, picture_width, picture_height, picture_format, &ctx->render); + vlCreateBasicCSC(pipe, &ctx->csc); + + *context = ctx; + return 0; } -static int vlDestroy(struct VL_CONTEXT *context) +int vlDestroyContext +( + struct vlContext *context +) { assert(context); - + /* XXX: Must unbind shaders before we can delete them for some reason */ context->pipe->bind_vs_state(context->pipe, NULL); context->pipe->bind_fs_state(context->pipe, NULL); - - vlDestroyCommon(context); - vlDestroyCSC(context); - vlDestroyMC(context); - vlDestroyIDCT(context); - + + context->render->vlDestroy(context->render); + context->csc->vlDestroy(context->csc); + + context->pipe->delete_blend_state(context->pipe, context->blend); + context->pipe->delete_rasterizer_state(context->pipe, context->raster); + context->pipe->delete_depth_stencil_alpha_state(context->pipe, context->dsa); + + free(context); + return 0; } -int vlCreateContext +struct vlScreen* vlContextGetScreen ( - Display *display, - struct pipe_context *pipe, - unsigned int video_width, - unsigned int video_height, - enum VL_FORMAT video_format, - struct VL_CONTEXT **context + struct vlContext *context ) { - struct VL_CONTEXT *ctx; - - assert(display); - assert(pipe); assert(context); - - ctx = calloc(1, sizeof(struct VL_CONTEXT)); - - ctx->display = display; - ctx->pipe = pipe; - ctx->video_width = video_width; - ctx->video_height = video_height; - ctx->video_format = video_format; - - vlInit(ctx); - - /* Since we only change states in vlPutSurface() we need to start in render mode */ - vlBeginRender(ctx); - - *context = ctx; - - return 0; + + return context->screen; } -int vlDestroyContext(struct VL_CONTEXT *context) +struct pipe_context* vlGetPipeContext +( + struct vlContext *context +) { assert(context); - - vlDestroy(context); - - free(context); - - return 0; + + return context->pipe; } -int vlBeginRender(struct VL_CONTEXT *context) +unsigned int vlGetPictureWidth +( + struct vlContext *context +) { - struct pipe_context *pipe; - assert(context); - - pipe = context->pipe; - - /* Frame buffer set in vlRender*Macroblock() */ - /* Shaders, samplers, textures set in vlRender*Macroblock() */ - pipe->set_vertex_buffers(pipe, 3, context->states.mc.vertex_bufs); - pipe->set_vertex_elements(pipe, 3, context->states.mc.vertex_buf_elems); - pipe->set_viewport_state(pipe, &context->states.mc.viewport); - pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &context->states.mc.vs_const_buf); - pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &context->states.mc.fs_const_buf); - - return 0; + + return context->picture_width; } -int vlEndRender(struct VL_CONTEXT *context) +unsigned int vlGetPictureHeight +( + struct vlContext *context +) { - struct pipe_context *pipe; - assert(context); - - pipe = context->pipe; - - pipe->set_framebuffer_state(pipe, &context->states.csc.framebuffer); - pipe->set_viewport_state(pipe, &context->states.csc.viewport); - pipe->bind_sampler_states(pipe, 1, (void**)&context->states.csc.sampler); - /* Source texture set in vlPutSurface() */ - pipe->bind_vs_state(pipe, context->states.csc.vertex_shader); - pipe->bind_fs_state(pipe, context->states.csc.fragment_shader); - pipe->set_vertex_buffers(pipe, 2, context->states.csc.vertex_bufs); - pipe->set_vertex_elements(pipe, 2, context->states.csc.vertex_buf_elems); - pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &context->states.csc.vs_const_buf); - pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &context->states.csc.fs_const_buf); - - return 0; + + return context->picture_height; } +enum vlFormat vlGetPictureFormat +( + struct vlContext *context +) +{ + assert(context); + + return context->picture_format; +} diff --git a/src/gallium/state_trackers/g3dvl/vl_context.h b/src/gallium/state_trackers/g3dvl/vl_context.h index bff318854a..3d14634c44 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.h +++ b/src/gallium/state_trackers/g3dvl/vl_context.h @@ -1,83 +1,73 @@ #ifndef vl_context_h #define vl_context_h -#include -#include #include "vl_types.h" struct pipe_context; -struct VL_CONTEXT +#ifdef VL_INTERNAL +struct vlRender; +struct vlCSC; + +struct vlContext { - Display *display; + struct vlScreen *screen; struct pipe_context *pipe; - unsigned int video_width; - unsigned int video_height; - enum VL_FORMAT video_format; - - struct - { - struct - { - struct pipe_rasterizer_state *raster; - struct pipe_depth_stencil_alpha_state *dsa; - struct pipe_blend_state *blend; - } common; - - struct - { - struct pipe_viewport_state viewport; - struct pipe_framebuffer_state render_target; - struct pipe_sampler_state *sampler; - struct pipe_texture *texture; - struct pipe_texture *basis; - struct pipe_shader_state *frame_vs; - struct pipe_shader_state *frame_fs; - struct pipe_vertex_buffer *vertex_bufs[2]; - struct pipe_vertex_element *vertex_buf_elems[2]; - //struct pipe_constant_buffer vs_const_buf, fs_const_buf; - } idct; - - struct - { - struct pipe_viewport_state viewport; - struct pipe_framebuffer_state render_target; - struct pipe_sampler_state *samplers[5]; - struct pipe_texture *textures[5]; - struct pipe_shader_state *i_vs, *p_vs[2], *b_vs[2]; - struct pipe_shader_state *i_fs, *p_fs[2], *b_fs[2]; - struct pipe_vertex_buffer vertex_bufs[3]; - struct pipe_vertex_element vertex_buf_elems[3]; - struct pipe_constant_buffer vs_const_buf, fs_const_buf; - } mc; - - struct - { - struct pipe_viewport_state viewport; - struct pipe_framebuffer_state framebuffer; - struct pipe_sampler_state *sampler; - struct pipe_shader_state *vertex_shader, *fragment_shader; - struct pipe_vertex_buffer vertex_bufs[2]; - struct pipe_vertex_element vertex_buf_elems[2]; - struct pipe_constant_buffer vs_const_buf, fs_const_buf; - } csc; - } states; + unsigned int picture_width; + unsigned int picture_height; + enum vlFormat picture_format; + enum vlProfile profile; + enum vlEntryPoint entry_point; + + void *raster; + void *dsa; + void *blend; + + struct vlRender *render; + struct vlCSC *csc; }; +#endif int vlCreateContext ( - Display *display, + struct vlScreen *screen, struct pipe_context *pipe, - unsigned int video_width, - unsigned int video_height, - enum VL_FORMAT video_format, - struct VL_CONTEXT **context + unsigned int picture_width, + unsigned int picture_height, + enum vlFormat picture_format, + enum vlProfile profile, + enum vlEntryPoint entry_point, + struct vlContext **context ); -int vlDestroyContext(struct VL_CONTEXT *context); +int vlDestroyContext +( + struct vlContext *context +); -int vlBeginRender(struct VL_CONTEXT *context); -int vlEndRender(struct VL_CONTEXT *context); +struct vlScreen* vlContextGetScreen +( + struct vlContext *context +); -#endif +struct pipe_context* vlGetPipeContext +( + struct vlContext *context +); +unsigned int vlGetPictureWidth +( + struct vlContext *context +); + +unsigned int vlGetPictureHeight +( + struct vlContext *context +); + +enum vlFormat vlGetPictureFormat +( + struct vlContext *context +); + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_csc.h b/src/gallium/state_trackers/g3dvl/vl_csc.h new file mode 100644 index 0000000000..36417a2792 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_csc.h @@ -0,0 +1,53 @@ +#ifndef vl_csc_h +#define vl_csc_h + +#include "vl_types.h" + +struct pipe_surface; + +struct vlCSC +{ + int (*vlResizeFrameBuffer) + ( + struct vlCSC *csc, + unsigned int width, + unsigned int height + ); + + int (*vlBegin) + ( + struct vlCSC *csc + ); + + int (*vlPutPicture) + ( + struct vlCSC *csc, + struct vlSurface *surface, + int srcx, + int srcy, + int srcw, + int srch, + int destx, + int desty, + int destw, + int desth, + enum vlPictureType picture_type + ); + + int (*vlEnd) + ( + struct vlCSC *csc + ); + + struct pipe_surface* (*vlGetFrameBuffer) + ( + struct vlCSC *csc + ); + + int (*vlDestroy) + ( + struct vlCSC *csc + ); +}; + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_data.c b/src/gallium/state_trackers/g3dvl/vl_data.c index 0e5c8c77f9..f2476dbf1e 100644 --- a/src/gallium/state_trackers/g3dvl/vl_data.c +++ b/src/gallium/state_trackers/g3dvl/vl_data.c @@ -6,17 +6,17 @@ * Need to be scaled to cover mbW*mbH macroblock pixels and translated into * position on target surface. */ -const struct VL_VERTEX2F vl_mb_vertex_positions[24] = +const struct vlVertex2f macroblock_verts[24] = { {0.0f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.0f}, {0.5f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.5f}, - + {0.5f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.0f}, {1.0f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.5f}, - + {0.0f, 0.5f}, {0.0f, 1.0f}, {0.5f, 0.5f}, {0.5f, 0.5f}, {0.0f, 1.0f}, {0.5f, 1.0f}, - + {0.5f, 0.5f}, {0.5f, 1.0f}, {1.0f, 0.5f}, {1.0f, 0.5f}, {0.5f, 1.0f}, {1.0f, 1.0f} }; @@ -26,17 +26,17 @@ const struct VL_VERTEX2F vl_mb_vertex_positions[24] = * in a bW*(bH*4) texture. First luma block located at 0,0->bW,bH; second at * 0,bH->bW,2bH; third at 0,2bH->bW,3bH; fourth at 0,3bH->bW,4bH. */ -const struct VL_TEXCOORD2F vl_luma_texcoords[24] = +const struct vlVertex2f macroblock_luma_texcoords[24] = { {0.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.0f}, {1.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.25f}, - + {0.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.25f}, {1.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.5f}, - + {0.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.5f}, {1.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.75f}, - + {0.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 0.75f}, {1.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 1.0f} }; @@ -45,7 +45,7 @@ const struct VL_TEXCOORD2F vl_luma_texcoords[24] = * Represents texcoords for the above for rendering 1 chroma block. * Straight forward 0,0->1,1 mapping so we can reuse the MB pos vectors. */ -const struct VL_TEXCOORD2F *vl_chroma_420_texcoords = (const struct VL_TEXCOORD2F*)vl_mb_vertex_positions; +const struct vlVertex2f *macroblock_chroma_420_texcoords = macroblock_verts; /* * Represents texcoords for the above for rendering 2 chroma blocks arranged @@ -53,30 +53,13 @@ const struct VL_TEXCOORD2F *vl_chroma_420_texcoords = (const struct VL_TEXCOORD2 * 0,bH->bW,2bH. We can render this with 0,0->1,1 mapping. * Straight forward 0,0->1,1 mapping so we can reuse MB pos vectors. */ -const struct VL_TEXCOORD2F *vl_chroma_422_texcoords = (const struct VL_TEXCOORD2F*)vl_mb_vertex_positions; +const struct vlVertex2f *macroblock_chroma_422_texcoords = macroblock_verts; /* * Represents texcoords for the above for rendering 4 chroma blocks. * Same case as 4 luma blocks. */ -const struct VL_TEXCOORD2F *vl_chroma_444_texcoords = vl_luma_texcoords; - -/* - * Represents 2 triangles in a strip in normalized coords. - * Used to render the surface onto the frame buffer. - */ -const struct VL_VERTEX2F vl_surface_vertex_positions[4] = -{ - {0.0f, 0.0f}, - {0.0f, 1.0f}, - {1.0f, 0.0f}, - {1.0f, 1.0f} -}; - -/* - * Represents texcoords for the above. We can use the position values directly. - */ -const struct VL_TEXCOORD2F *vl_surface_texcoords = (const struct VL_TEXCOORD2F*)vl_surface_vertex_positions; +const struct vlVertex2f *macroblock_chroma_444_texcoords = macroblock_luma_texcoords; /* * Used when rendering P and B macroblocks, multiplier is applied to the A channel, @@ -84,97 +67,10 @@ const struct VL_TEXCOORD2F *vl_surface_texcoords = (const struct VL_TEXCOORD2F*) * get back the differential. The differential is then added to the samples from the * reference surface(s). */ +#if 0 const struct VL_MC_FS_CONSTS vl_mc_fs_consts = { {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f}, {0.5f, 2.0f, 0.0f, 0.0f} }; - -/* - * Identity color conversion constants, for debugging - */ -const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_identity = -{ - { - 0.0f, 0.0f, 0.0f, 0.0f - }, - { - 1.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 1.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f - } -}; - -/* - * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: - * Y is in [16,235], Cb and Cr are in [16,240] - * R, G, and B are in [16,235] - */ -const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_601 = -{ - { - 0.0f, 0.501960784f, 0.501960784f, 0.0f - }, - { - 1.0f, 0.0f, 1.371f, 0.0f, - 1.0f, -0.336f, -0.698f, 0.0f, - 1.0f, 1.732f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f - } -}; - -/* - * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: - * Y is in [16,235], Cb and Cr are in [16,240] - * R, G, and B are in [0,255] - */ -const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_601_full = -{ - { - 0.062745098f, 0.501960784f, 0.501960784f, 0.0f - }, - { - 1.164f, 0.0f, 1.596f, 0.0f, - 1.164f, -0.391f, -0.813f, 0.0f, - 1.164f, 2.018f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f - } -}; - -/* - * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: - * Y is in [16,235], Cb and Cr are in [16,240] - * R, G, and B are in [16,235] - */ -const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_709 = -{ - { - 0.0f, 0.501960784f, 0.501960784f, 0.0f - }, - { - 1.0f, 0.0f, 1.540f, 0.0f, - 1.0f, -0.183f, -0.459f, 0.0f, - 1.0f, 1.816f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f - } -}; - -/* - * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: - * Y is in [16,235], Cb and Cr are in [16,240] - * R, G, and B are in [0,255] - */ -const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_709_full = -{ - { - 0.062745098f, 0.501960784f, 0.501960784f, 0.0f - }, - { - 1.164f, 0.0f, 1.793f, 0.0f, - 1.164f, -0.213f, -0.534f, 0.0f, - 1.164f, 2.115f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f - } -}; - +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_data.h b/src/gallium/state_trackers/g3dvl/vl_data.h index 8f347273ad..f0de2e976c 100644 --- a/src/gallium/state_trackers/g3dvl/vl_data.h +++ b/src/gallium/state_trackers/g3dvl/vl_data.h @@ -3,15 +3,18 @@ #include "vl_types.h" -extern const struct VL_VERTEX2F vl_mb_vertex_positions[24]; -extern const struct VL_TEXCOORD2F vl_luma_texcoords[24]; -extern const struct VL_TEXCOORD2F *vl_chroma_420_texcoords; -extern const struct VL_TEXCOORD2F *vl_chroma_422_texcoords; -extern const struct VL_TEXCOORD2F *vl_chroma_444_texcoords; +/* TODO: Needs to be rolled into the appropriate stage */ -extern const struct VL_VERTEX2F vl_surface_vertex_positions[4]; -extern const struct VL_TEXCOORD2F *vl_surface_texcoords; +extern const struct vlVertex2f macroblock_verts[24]; +extern const struct vlVertex2f macroblock_luma_texcoords[24]; +extern const struct vlVertex2f *macroblock_chroma_420_texcoords; +extern const struct vlVertex2f *macroblock_chroma_422_texcoords; +extern const struct vlVertex2f *macroblock_chroma_444_texcoords; +extern const struct vlVertex2f surface_verts[4]; +extern const struct vlVertex2f *surface_texcoords; + +/* extern const struct VL_MC_FS_CONSTS vl_mc_fs_consts; extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_identity; @@ -19,6 +22,6 @@ extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_601; extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_601_full; extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_709; extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_709_full; +*/ #endif - diff --git a/src/gallium/state_trackers/g3dvl/vl_defs.h b/src/gallium/state_trackers/g3dvl/vl_defs.h index e668a7a10e..d612d02502 100644 --- a/src/gallium/state_trackers/g3dvl/vl_defs.h +++ b/src/gallium/state_trackers/g3dvl/vl_defs.h @@ -9,4 +9,3 @@ #define VL_MACROBLOCK_SIZE (VL_MACROBLOCK_WIDTH * VL_MACROBLOCK_HEIGHT) #endif - diff --git a/src/gallium/state_trackers/g3dvl/vl_display.c b/src/gallium/state_trackers/g3dvl/vl_display.c new file mode 100644 index 0000000000..af80faa7f5 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_display.c @@ -0,0 +1,48 @@ +#define VL_INTERNAL +#include "vl_display.h" +#include +#include + +int vlCreateDisplay +( + vlNativeDisplay native_display, + struct vlDisplay **display +) +{ + struct vlDisplay *dpy; + + assert(native_display); + assert(display); + + dpy = calloc(1, sizeof(struct vlDisplay)); + + if (!dpy) + return 1; + + dpy->native = native_display; + *display = dpy; + + return 0; +} + +int vlDestroyDisplay +( + struct vlDisplay *display +) +{ + assert(display); + + free(display); + + return 0; +} + +vlNativeDisplay vlGetNativeDisplay +( + struct vlDisplay *display +) +{ + assert(display); + + return display->native; +} diff --git a/src/gallium/state_trackers/g3dvl/vl_display.h b/src/gallium/state_trackers/g3dvl/vl_display.h new file mode 100644 index 0000000000..e11fd40799 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_display.h @@ -0,0 +1,29 @@ +#ifndef vl_display_h +#define vl_display_h + +#include "vl_types.h" + +#ifdef VL_INTERNAL +struct vlDisplay +{ + vlNativeDisplay native; +}; +#endif + +int vlCreateDisplay +( + vlNativeDisplay native_display, + struct vlDisplay **display +); + +int vlDestroyDisplay +( + struct vlDisplay *display +); + +vlNativeDisplay vlGetNativeDisplay +( + struct vlDisplay *display +); + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c new file mode 100644 index 0000000000..4fae224431 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c @@ -0,0 +1,2315 @@ +#define VL_INTERNAL +#include "vl_r16snorm_mc.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include "vl_render.h" +#include "vl_shader_build.h" +#include "vl_surface.h" +#include "vl_util.h" +#include "vl_types.h" +#include "vl_defs.h" + +struct vlVertexShaderConsts +{ + /*struct vlVertex4f scale; + struct vlVertex4f denorm;*/ + struct vlVertex4f scale; + struct vlVertex4f mb_pos_trans; + struct vlVertex4f denorm; + struct + { + struct vlVertex4f top_field; + struct vlVertex4f bottom_field; + } mb_tc_trans[2]; +}; + +struct vlFragmentShaderConsts +{ + struct vlVertex4f multiplier; + struct vlVertex4f div; +}; + +struct vlR16SnormMC +{ + struct vlRender base; + + unsigned int video_width, video_height; + enum vlFormat video_format; + + struct pipe_context *pipe; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state render_target; + struct pipe_sampler_state *samplers[5]; + struct pipe_texture *textures[5]; + void *i_vs, *p_vs[2], *b_vs[2]; + void *i_fs, *p_fs[2], *b_fs[2]; + struct pipe_vertex_buffer vertex_bufs[3]; + struct pipe_vertex_element vertex_elems[3]; + struct pipe_constant_buffer vs_const_buf, fs_const_buf; +}; + +int vlBegin +( + struct vlRender *render +) +{ + struct vlR16SnormMC *mc; + struct pipe_context *pipe; + + assert(render); + + mc = (struct vlR16SnormMC*)render; + pipe = mc->pipe; + + /* Frame buffer set in vlRender*Macroblock() */ + /* Shaders, samplers, textures set in vlRender*Macroblock() */ + pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs); + pipe->set_vertex_elements(pipe, 3, mc->vertex_elems); + pipe->set_viewport_state(pipe, &mc->viewport); + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &mc->vs_const_buf); + pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &mc->fs_const_buf); + + return 0; +} + +/*static int vlGrabMacroBlock +( + struct vlR16SnormMC *mc, + struct vlMpeg2MacroBlock *macroblock +) +{ + assert(mc); + assert(macroblock); + + + + return 0; +}*/ + +/*#define DO_IDCT*/ + +#ifdef DO_IDCT +static int vlTransformBlock(short *src, short *dst, short bias) +{ + static const float basis[8][8] = + { + {0.3536, 0.4904, 0.4619, 0.4157, 0.3536, 0.2778, 0.1913, 0.0975}, + {0.3536, 0.4157, 0.1913, -0.0975, -0.3536, -0.4904, -0.4619, -0.2778}, + {0.3536, 0.2778, -0.1913, -0.4904, -0.3536, 0.0975, 0.4619, 0.4157}, + {0.3536, 0.0975, -0.4619, -0.2778, 0.3536, 0.4157, -0.1913, -0.4904}, + {0.3536, -0.0975, -0.4619, 0.2778, 0.3536, -0.4157, -0.1913, 0.4904}, + {0.3536, -0.2778, -0.1913, 0.4904, -0.3536, -0.0975, 0.4619, -0.4157}, + {0.3536, -0.4157, 0.1913, 0.0975, -0.3536, 0.4904, -0.4619, 0.2778}, + {0.3536, -0.4904, 0.4619, -0.4157, 0.3536, -0.2778, 0.1913, -0.0975} + }; + + unsigned int x, y; + short tmp[64]; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + tmp[y * VL_BLOCK_WIDTH + x] = (short) + ( + src[y * VL_BLOCK_WIDTH + 0] * basis[x][0] + + src[y * VL_BLOCK_WIDTH + 1] * basis[x][1] + + src[y * VL_BLOCK_WIDTH + 2] * basis[x][2] + + src[y * VL_BLOCK_WIDTH + 3] * basis[x][3] + + src[y * VL_BLOCK_WIDTH + 4] * basis[x][4] + + src[y * VL_BLOCK_WIDTH + 5] * basis[x][5] + + src[y * VL_BLOCK_WIDTH + 6] * basis[x][6] + + src[y * VL_BLOCK_WIDTH + 7] * basis[x][7] + ); + + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + { + dst[y * VL_BLOCK_WIDTH + x] = bias + (short) + ( + tmp[0 * VL_BLOCK_WIDTH + x] * basis[y][0] + + tmp[1 * VL_BLOCK_WIDTH + x] * basis[y][1] + + tmp[2 * VL_BLOCK_WIDTH + x] * basis[y][2] + + tmp[3 * VL_BLOCK_WIDTH + x] * basis[y][3] + + tmp[4 * VL_BLOCK_WIDTH + x] * basis[y][4] + + tmp[5 * VL_BLOCK_WIDTH + x] * basis[y][5] + + tmp[6 * VL_BLOCK_WIDTH + x] * basis[y][6] + + tmp[7 * VL_BLOCK_WIDTH + x] * basis[y][7] + ); + if (dst[y * VL_BLOCK_WIDTH + x] > 255) + dst[y * VL_BLOCK_WIDTH + x] = 255; + else if (bias > 0 && dst[y * VL_BLOCK_WIDTH + x] < 0) + dst[y * VL_BLOCK_WIDTH + x] = 0; + } + return 0; +} +#endif + +static int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch) +{ + unsigned int y; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + memcpy + ( + dst + y * dst_pitch, + src + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + + return 0; +} + +static int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch) +{ + unsigned int y; + + for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y) + memcpy + ( + dst + y * dst_pitch * 2, + src + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + + dst += VL_BLOCK_HEIGHT * dst_pitch; + + for (; y < VL_BLOCK_HEIGHT; ++y) + memcpy + ( + dst + y * dst_pitch * 2, + src + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + + return 0; +} + +static int vlGrabNoBlock(short *dst, unsigned int dst_pitch) +{ + unsigned int y; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + memset + ( + dst + y * dst_pitch, + 0, + VL_BLOCK_WIDTH * 2 + ); + + return 0; +} + +enum vlSampleType +{ + vlSampleTypeFull, + vlSampleTypeDiff +}; + +static int vlGrabBlocks +( + struct vlR16SnormMC *mc, + unsigned int coded_block_pattern, + enum vlDCTType dct_type, + enum vlSampleType sample_type, + short *blocks +) +{ + struct pipe_surface *tex_surface; + short *texels; + unsigned int tex_pitch; + unsigned int tb, sb = 0; + + assert(mc); + assert(blocks); + + tex_surface = mc->pipe->screen->get_tex_surface + ( + mc->pipe->screen, + mc->textures[0], + 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE + ); + + texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); + tex_pitch = tex_surface->stride / tex_surface->block.size; + + for (tb = 0; tb < 4; ++tb) + { + if ((coded_block_pattern >> (5 - tb)) & 1) + { + short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; + +#ifdef DO_IDCT + vlTransformBlock(cur_block, cur_block, sample_type == vlSampleTypeFull ? 128 : 0); +#endif + + if (dct_type == vlDCTTypeFrameCoded) + vlGrabFrameCodedBlock + ( + cur_block, + texels + tb * tex_pitch * VL_BLOCK_HEIGHT, + tex_pitch + ); + else + vlGrabFieldCodedBlock + ( + cur_block, + texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, + tex_pitch + ); + + ++sb; + } + else + vlGrabNoBlock(texels + tb * tex_pitch * VL_BLOCK_HEIGHT, tex_pitch); + } + + pipe_surface_unmap(tex_surface); + + /* TODO: Implement 422, 444 */ + for (tb = 0; tb < 2; ++tb) + { + tex_surface = mc->pipe->screen->get_tex_surface + ( + mc->pipe->screen, + mc->textures[tb + 1], + 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE + ); + + texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); + tex_pitch = tex_surface->stride / tex_surface->block.size; + + if ((coded_block_pattern >> (1 - tb)) & 1) + { + short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; + +#ifdef DO_IDCT + vlTransformBlock(cur_block, cur_block, sample_type == vlSampleTypeFull ? 128 : 0); +#endif + + vlGrabFrameCodedBlock + ( + cur_block, + texels, + tex_pitch + ); + + ++sb; + } + else + vlGrabNoBlock(texels, tex_pitch); + + pipe_surface_unmap(tex_surface); + } + + return 0; +} + +int vlRenderIMacroBlock +( + struct vlR16SnormMC *mc, + enum vlPictureType picture_type, + enum vlFieldOrder field_order, + unsigned int mbx, + unsigned int mby, + unsigned int coded_block_pattern, + enum vlDCTType dct_type, + short *blocks, + struct vlSurface *surface +) +{ + struct pipe_context *pipe; + struct vlVertexShaderConsts *vs_consts; + + assert(blocks); + assert(surface); + + /* TODO: Implement interlaced rendering */ + if (picture_type != vlPictureTypeFrame) + return 0; + + vlGrabBlocks(mc, coded_block_pattern, dct_type, vlSampleTypeFull, blocks); + + pipe = mc->pipe; + + vs_consts = pipe->winsys->buffer_map + ( + pipe->winsys, + mc->vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ); + + vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->texture->width[0]; + vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->texture->height[0]; + vs_consts->scale.z = 1.0f; + vs_consts->scale.w = 1.0f; + vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->texture->width[0]; + vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->texture->height[0]; + vs_consts->mb_pos_trans.z = 0.0f; + vs_consts->mb_pos_trans.w = 0.0f; + + pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer); + + mc->render_target.cbufs[0] = pipe->screen->get_tex_surface + ( + pipe->screen, + surface->texture, + 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE + ); + pipe->set_framebuffer_state(pipe, &mc->render_target); + pipe->set_sampler_textures(pipe, 3, mc->textures); + pipe->bind_sampler_states(pipe, 3, (void**)mc->samplers); + pipe->bind_vs_state(pipe, mc->i_vs); + pipe->bind_fs_state(pipe, mc->i_fs); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); + + return 0; +} + +int vlRenderPMacroBlock +( + struct vlR16SnormMC *mc, + enum vlPictureType picture_type, + enum vlFieldOrder field_order, + unsigned int mbx, + unsigned int mby, + enum vlMotionType mc_type, + short top_x, + short top_y, + short bottom_x, + short bottom_y, + unsigned int coded_block_pattern, + enum vlDCTType dct_type, + short *blocks, + struct vlSurface *ref_surface, + struct vlSurface *surface +) +{ + struct pipe_context *pipe; + struct vlVertexShaderConsts *vs_consts; + + assert(motion_vectors); + assert(blocks); + assert(ref_surface); + assert(surface); + + /* TODO: Implement interlaced rendering */ + if (picture_type != vlPictureTypeFrame) + return 0; + /* TODO: Implement other MC types */ + if (mc_type != vlMotionTypeFrame && mc_type != vlMotionTypeField) + return 0; + + vlGrabBlocks(mc, coded_block_pattern, dct_type, vlSampleTypeDiff, blocks); + + pipe = mc->pipe; + + vs_consts = pipe->winsys->buffer_map + ( + pipe->winsys, + mc->vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ); + + vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->texture->width[0]; + vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->texture->height[0]; + vs_consts->scale.z = 1.0f; + vs_consts->scale.w = 1.0f; + vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->texture->width[0]; + vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->texture->height[0]; + vs_consts->mb_pos_trans.z = 0.0f; + vs_consts->mb_pos_trans.w = 0.0f; + vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + top_x * 0.5f) / (float)surface->texture->width[0]; + vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + top_y * 0.5f) / (float)surface->texture->height[0]; + vs_consts->mb_tc_trans[0].top_field.z = 0.0f; + vs_consts->mb_tc_trans[0].top_field.w = 0.0f; + + if (mc_type == vlMotionTypeField) + { + vs_consts->denorm.x = (float)surface->texture->width[0]; + vs_consts->denorm.y = (float)surface->texture->height[0]; + + vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + bottom_x * 0.5f) / (float)surface->texture->width[0]; + vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + bottom_y * 0.5f) / (float)surface->texture->height[0]; + vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f; + vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f; + + pipe->bind_vs_state(pipe, mc->p_vs[1]); + pipe->bind_fs_state(pipe, mc->p_fs[1]); + } + else + { + pipe->bind_vs_state(pipe, mc->p_vs[0]); + pipe->bind_fs_state(pipe, mc->p_fs[0]); + } + + pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer); + + mc->render_target.cbufs[0] = pipe->screen->get_tex_surface + ( + pipe->screen, + surface->texture, + 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE + ); + pipe->set_framebuffer_state(pipe, &mc->render_target); + + mc->textures[3] = ref_surface->texture; + pipe->set_sampler_textures(pipe, 4, mc->textures); + pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); + + return 0; +} + +int vlRenderBMacroBlock +( + struct vlR16SnormMC *mc, + enum vlPictureType picture_type, + enum vlFieldOrder field_order, + unsigned int mbx, + unsigned int mby, + enum vlMotionType mc_type, + short top_past_x, + short top_past_y, + short bottom_past_x, + short bottom_past_y, + short top_future_x, + short top_future_y, + short bottom_future_x, + short bottom_future_y, + unsigned int coded_block_pattern, + enum vlDCTType dct_type, + short *blocks, + struct vlSurface *past_surface, + struct vlSurface *future_surface, + struct vlSurface *surface +) +{ + struct pipe_context *pipe; + struct vlVertexShaderConsts *vs_consts; + + assert(motion_vectors); + assert(blocks); + assert(ref_surface); + assert(surface); + + /* TODO: Implement interlaced rendering */ + if (picture_type != vlPictureTypeFrame) + return 0; + /* TODO: Implement other MC types */ + if (mc_type != vlMotionTypeFrame && mc_type != vlMotionTypeField) + return 0; + + vlGrabBlocks(mc, coded_block_pattern, dct_type, vlSampleTypeDiff, blocks); + + pipe = mc->pipe; + + vs_consts = pipe->winsys->buffer_map + ( + pipe->winsys, + mc->vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ); + + vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->texture->width[0]; + vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->texture->height[0]; + vs_consts->scale.z = 1.0f; + vs_consts->scale.w = 1.0f; + vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->texture->width[0]; + vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->texture->height[0]; + vs_consts->mb_pos_trans.z = 0.0f; + vs_consts->mb_pos_trans.w = 0.0f; + vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + top_past_x * 0.5f) / (float)surface->texture->width[0]; + vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + top_past_y * 0.5f) / (float)surface->texture->height[0]; + vs_consts->mb_tc_trans[0].top_field.z = 0.0f; + vs_consts->mb_tc_trans[0].top_field.w = 0.0f; + vs_consts->mb_tc_trans[1].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + top_future_x * 0.5f) / (float)surface->texture->width[0]; + vs_consts->mb_tc_trans[1].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + top_future_y * 0.5f) / (float)surface->texture->height[0]; + vs_consts->mb_tc_trans[1].top_field.z = 0.0f; + vs_consts->mb_tc_trans[1].top_field.w = 0.0f; + + if (mc_type == vlMotionTypeField) + { + vs_consts->denorm.x = (float)surface->texture->width[0]; + vs_consts->denorm.y = (float)surface->texture->height[0]; + + vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + bottom_past_x * 0.5f) / (float)surface->texture->width[0]; + vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + bottom_past_y * 0.5f) / (float)surface->texture->height[0]; + vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f; + vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f; + vs_consts->mb_tc_trans[1].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + bottom_future_x * 0.5f) / (float)surface->texture->width[0]; + vs_consts->mb_tc_trans[1].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + bottom_future_y * 0.5f) / (float)surface->texture->height[0]; + vs_consts->mb_tc_trans[1].bottom_field.z = 0.0f; + vs_consts->mb_tc_trans[1].bottom_field.w = 0.0f; + + pipe->bind_vs_state(pipe, mc->b_vs[1]); + pipe->bind_fs_state(pipe, mc->b_fs[1]); + } + else + { + pipe->bind_vs_state(pipe, mc->b_vs[0]); + pipe->bind_fs_state(pipe, mc->b_fs[0]); + } + + pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer); + + mc->render_target.cbufs[0] = pipe->screen->get_tex_surface + ( + pipe->screen, + surface->texture, + 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE + ); + pipe->set_framebuffer_state(pipe, &mc->render_target); + + mc->textures[3] = past_surface->texture; + mc->textures[4] = future_surface->texture; + pipe->set_sampler_textures(pipe, 5, mc->textures); + pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); + + return 0; +} + +int vlRenderMacroBlocksMpeg2R16Snorm +( + struct vlRender *render, + struct vlMpeg2MacroBlockBatch *batch, + struct vlSurface *surface +) +{ + struct vlR16SnormMC *mc; + unsigned int i; + + assert(render); + + mc = (struct vlR16SnormMC*)render; + + /*for (i = 0; i < batch->num_macroblocks; ++i) + vlGrabMacroBlock(batch->macroblocks[i]);*/ + + for (i = 0; i < batch->num_macroblocks; ++i) + { + switch (batch->macroblocks[i].mb_type) + { + case vlMacroBlockTypeIntra: + { + vlRenderIMacroBlock + ( + mc, + batch->picture_type, + batch->field_order, + batch->macroblocks[i].mbx, + batch->macroblocks[i].mby, + batch->macroblocks[i].cbp, + batch->macroblocks[i].dct_type, + batch->macroblocks[i].blocks, + surface + ); + break; + } + case vlMacroBlockTypeFwdPredicted: + { + vlRenderPMacroBlock + ( + mc, + batch->picture_type, + batch->field_order, + batch->macroblocks[i].mbx, + batch->macroblocks[i].mby, + batch->macroblocks[i].mo_type, + batch->macroblocks[i].PMV[0][0][0], + batch->macroblocks[i].PMV[0][0][1], + batch->macroblocks[i].PMV[1][0][0], + batch->macroblocks[i].PMV[1][0][1], + batch->macroblocks[i].cbp, + batch->macroblocks[i].dct_type, + batch->macroblocks[i].blocks, + batch->past_surface, + surface + ); + break; + } + case vlMacroBlockTypeBkwdPredicted: + { + vlRenderPMacroBlock + ( + mc, + batch->picture_type, + batch->field_order, + batch->macroblocks[i].mbx, + batch->macroblocks[i].mby, + batch->macroblocks[i].mo_type, + batch->macroblocks[i].PMV[0][1][0], + batch->macroblocks[i].PMV[0][1][1], + batch->macroblocks[i].PMV[1][1][0], + batch->macroblocks[i].PMV[1][1][1], + batch->macroblocks[i].cbp, + batch->macroblocks[i].dct_type, + batch->macroblocks[i].blocks, + batch->future_surface, + surface + ); + break; + } + case vlMacroBlockTypeBiPredicted: + { + vlRenderBMacroBlock + ( + mc, + batch->picture_type, + batch->field_order, + batch->macroblocks[i].mbx, + batch->macroblocks[i].mby, + batch->macroblocks[i].mo_type, + batch->macroblocks[i].PMV[0][0][0], + batch->macroblocks[i].PMV[0][0][1], + batch->macroblocks[i].PMV[1][0][0], + batch->macroblocks[i].PMV[1][0][1], + batch->macroblocks[i].PMV[0][1][0], + batch->macroblocks[i].PMV[0][1][1], + batch->macroblocks[i].PMV[1][1][0], + batch->macroblocks[i].PMV[1][1][1], + batch->macroblocks[i].cbp, + batch->macroblocks[i].dct_type, + batch->macroblocks[i].blocks, + batch->past_surface, + batch->future_surface, + surface + ); + break; + } + default: + assert(0); + } + } + + return 0; +} + +int vlEnd +( + struct vlRender *render +) +{ + assert(render); + + return 0; +} + +int vlDestroy +( + struct vlRender *render +) +{ + struct vlR16SnormMC *mc; + struct pipe_context *pipe; + unsigned int i; + + assert(render); + + mc = (struct vlR16SnormMC*)render; + pipe = mc->pipe; + + for (i = 0; i < 5; ++i) + pipe->delete_sampler_state(pipe, mc->samplers[i]); + + for (i = 0; i < 3; ++i) + pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs[i].buffer); + + /* Textures 3 & 4 are not created directly, no need to release them here */ + for (i = 0; i < 3; ++i) + pipe_texture_release(&mc->textures[i]); + + pipe->delete_vs_state(pipe, mc->i_vs); + pipe->delete_fs_state(pipe, mc->i_fs); + + for (i = 0; i < 2; ++i) + { + pipe->delete_vs_state(pipe, mc->p_vs[i]); + pipe->delete_fs_state(pipe, mc->p_fs[i]); + pipe->delete_vs_state(pipe, mc->b_vs[i]); + pipe->delete_fs_state(pipe, mc->b_fs[i]); + } + + pipe->winsys->buffer_destroy(pipe->winsys, mc->vs_const_buf.buffer); + pipe->winsys->buffer_destroy(pipe->winsys, mc->fs_const_buf.buffer); + + free(mc); + + return 0; +} + +/* + * Represents 8 triangles (4 quads, 1 per block) in noormalized coords + * that render a macroblock. + * Need to be scaled to cover mbW*mbH macroblock pixels and translated into + * position on target surface. + */ +const struct vlVertex2f macroblock_verts[24] = +{ + {0.0f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.0f}, + {0.5f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.5f}, + + {0.5f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.0f}, + {1.0f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.5f}, + + {0.0f, 0.5f}, {0.0f, 1.0f}, {0.5f, 0.5f}, + {0.5f, 0.5f}, {0.0f, 1.0f}, {0.5f, 1.0f}, + + {0.5f, 0.5f}, {0.5f, 1.0f}, {1.0f, 0.5f}, + {1.0f, 0.5f}, {0.5f, 1.0f}, {1.0f, 1.0f} +}; + +/* + * Represents texcoords for the above for rendering 4 luma blocks arranged + * in a bW*(bH*4) texture. First luma block located at 0,0->bW,bH; second at + * 0,bH->bW,2bH; third at 0,2bH->bW,3bH; fourth at 0,3bH->bW,4bH. + */ +const struct vlVertex2f macroblock_luma_texcoords[24] = +{ + {0.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.0f}, + {1.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.25f}, + + {0.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.25f}, + {1.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.5f}, + + {0.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.5f}, + {1.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.75f}, + + {0.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 0.75f}, + {1.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 1.0f} +}; + +/* + * Represents texcoords for the above for rendering 1 chroma block. + * Straight forward 0,0->1,1 mapping so we can reuse the MB pos vectors. + */ +const struct vlVertex2f *macroblock_chroma_420_texcoords = macroblock_verts; + +/* + * Represents texcoords for the above for rendering 2 chroma blocks arranged + * in a bW*(bH*2) texture. First chroma block located at 0,0->bW,bH; second at + * 0,bH->bW,2bH. We can render this with 0,0->1,1 mapping. + * Straight forward 0,0->1,1 mapping so we can reuse MB pos vectors. + */ +const struct vlVertex2f *macroblock_chroma_422_texcoords = macroblock_verts; + +/* + * Represents texcoords for the above for rendering 4 chroma blocks. + * Same case as 4 luma blocks. + */ +const struct vlVertex2f *macroblock_chroma_444_texcoords = macroblock_luma_texcoords; + +/* + * Used when rendering P and B macroblocks, multiplier is applied to the A channel, + * which is then added to the L channel, then the bias is subtracted from that to + * get back the differential. The differential is then added to the samples from the + * reference surface(s). + */ +const struct vlFragmentShaderConsts fs_consts = +{ + {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f}, + {0.5f, 2.0f, 0.0f, 0.0f} +}; + +static int vlCreateVertexShaderIMB +( + struct vlR16SnormMC *mc +) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->i_vs = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderIMB +( + struct vlR16SnormMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + */ + for (i = 0; i < 2; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + */ + for (i = 0; i < 3; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul o0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->i_fs = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFramePMB +( + struct vlR16SnormMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + * decl c2 ; Unused + * decl c3 ; Translation vector to move ref macroblock texcoords into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + * decl o3 ; Ref macroblock texcoords + */ + for (i = 0; i < 4; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* add o3, t0, c3 ; Translate rect into position on ref macroblock */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->p_vs[0] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFieldPMB +( + struct vlR16SnormMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + * decl c2 ; Denorm coefficients + * decl c3 ; Translation vector to move top field ref macroblock texcoords into position + * decl c4 ; Translation vector to move bottom field ref macroblock texcoords into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + * decl o3 ; Top field ref macroblock texcoords + * decl o4 ; Bottom field ref macroblock texcoords + * decl o5 ; Denormalized vertex pos + */ + for (i = 0; i < 6; i++) + { + decl = vl_decl_output((i == 0 || i == 5) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add t1, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mov o0, t1 ; Move vertex pos to output */ + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + mov o1, i1 ; Move input luma texcoords to output + mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* add o3, t0, c3 ; Translate top field rect into position on ref macroblock + add o4, t0, c4 ; Translate bottom field rect into position on ref macroblock */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul o5, t1, c2 ; Denorm vertex pos */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 5, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->p_vs[1] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFramePMB +( + struct vlR16SnormMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + * decl i2 ; Texcoords for s3 + */ + for (i = 0; i < 3; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for ref surface texture + */ + for (i = 0; i < 4; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* tex2d t1, i2, s3 ; Read texel from ref macroblock */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 2, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->p_fs[0] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFieldPMB +( + struct vlR16SnormMC *mc +) +{ + const unsigned int max_tokens = 200; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + * decl i2 ; Texcoords for s3 + * decl i3 ; Texcoords for s3 + * decl i4 ; Denormalized vertex pos + */ + for (i = 0; i < 5; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0-t4 */ + decl = vl_decl_temps(0, 4); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for ref surface texture + */ + for (i = 0; i < 4; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i2, s3 ; Read texel from ref macroblock top field + * tex2d t2, i3, s3 ; Read texel from ref macroblock bottom field + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* XXX: Pos values off by 0.5? */ + /* sub t4, i4.y, c1.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 4, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* floor t3, t3 ; Get rid of fractional part */ + inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t3, c1.y ; Multiply by 2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->p_fs[1] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFrameBMB +( + struct vlR16SnormMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + * decl c2 ; Unused + * decl c3 ; Translation vector to move past ref macroblock texcoords into position + * decl c4 ; Unused + * decl c5 ; Translation vector to move future ref macroblock texcoords into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 5); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + * decl o3 ; Past ref macroblock texcoords + * decl o4 ; Future ref macroblock texcoords + */ + for (i = 0; i < 5; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* add o3, t0, c3 ; Translate rect into position on past ref macroblock + add o4, t0, c5 ; Translate rect into position on future ref macroblock */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i * 2 + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->b_vs[0] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFieldBMB +( + struct vlR16SnormMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + * decl c2 ; Denorm coefficients + * decl c3 ; Translation vector to move top field past ref macroblock texcoords into position + * decl c4 ; Translation vector to move bottom field past ref macroblock texcoords into position + * decl c5 ; Translation vector to move top field future ref macroblock texcoords into position + * decl c6 ; Translation vector to move bottom field future ref macroblock texcoords into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 6); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + * decl o3 ; Top field past ref macroblock texcoords + * decl o4 ; Bottom field past ref macroblock texcoords + * decl o5 ; Top field future ref macroblock texcoords + * decl o6 ; Bottom field future ref macroblock texcoords + * decl o7 ; Denormalized vertex pos + */ + for (i = 0; i < 8; i++) + { + decl = vl_decl_output((i == 0 || i == 7) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add t1, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mov o0, t1 ; Move vertex pos to output */ + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* + * add o3, t0, c3 ; Translate top field rect into position on past ref macroblock + * add o4, t0, c4 ; Translate bottom field rect into position on past ref macroblock + * add o5, t0, c5 ; Translate top field rect into position on future ref macroblock + * add o6, t0, c6 ; Translate bottom field rect into position on future ref macroblock + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul o7, t1, c2 ; Denorm vertex pos */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 7, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->b_vs[1] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFrameBMB +( + struct vlR16SnormMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + * decl i2 ; Texcoords for s3 + * decl i3 ; Texcoords for s4 + */ + for (i = 0; i < 4; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0-t2 */ + decl = vl_decl_temps(0, 2); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for past ref surface texture + * decl s4 ; Sampler for future ref surface texture + */ + for (i = 0; i < 5; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i2, s3 ; Read texel from past ref macroblock + * tex2d t2, i3, s4 ; Read texel from future ref macroblock + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->b_fs[0] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFieldBMB +( + struct vlR16SnormMC *mc +) +{ + const unsigned int max_tokens = 200; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + * decl i2 ; Texcoords for s3 + * decl i3 ; Texcoords for s3 + * decl i4 ; Texcoords for s4 + * decl i5 ; Texcoords for s4 + * decl i6 ; Denormalized vertex pos + */ + for (i = 0; i < 7; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels + * ; and for Y-mod-2 top/bottom field selection + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0-t5 */ + decl = vl_decl_temps(0, 5); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for past ref surface texture + * decl s4 ; Sampler for future ref surface texture + */ + for (i = 0; i < 5; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* XXX: Pos values off by 0.5? */ + /* sub t4, i6.y, c1.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 6, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* floor t3, t3 ; Get rid of fractional part */ + inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t3, c1.y ; Multiply by 2 */ + inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i2, s3 ; Read texel from past ref macroblock top field + * tex2d t2, i3, s3 ; Read texel from past ref macroblock bottom field + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t4, i4, s4 ; Read texel from future ref macroblock top field + * tex2d t5, i5, s4 ; Read texel from future ref macroblock bottom field + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 4, TGSI_FILE_SAMPLER, 4); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->b_fs[1] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +int vlCreateDataBufs +( + struct vlR16SnormMC *mc +) +{ + struct pipe_context *pipe; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + + /* Create our vertex buffer and vertex buffer element */ + mc->vertex_bufs[0].pitch = sizeof(struct vlVertex2f); + mc->vertex_bufs[0].max_index = 23; + mc->vertex_bufs[0].buffer_offset = 0; + mc->vertex_bufs[0].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vlVertex2f) * 24 + ); + + mc->vertex_elems[0].src_offset = 0; + mc->vertex_elems[0].vertex_buffer_index = 0; + mc->vertex_elems[0].nr_components = 2; + mc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Create our texcoord buffers and texcoord buffer elements */ + for (i = 1; i < 3; ++i) + { + mc->vertex_bufs[i].pitch = sizeof(struct vlVertex2f); + mc->vertex_bufs[i].max_index = 23; + mc->vertex_bufs[i].buffer_offset = 0; + mc->vertex_bufs[i].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vlVertex2f) * 24 + ); + + mc->vertex_elems[i].src_offset = 0; + mc->vertex_elems[i].vertex_buffer_index = i; + mc->vertex_elems[i].nr_components = 2; + mc->vertex_elems[i].src_format = PIPE_FORMAT_R32G32_FLOAT; + } + + /* Fill buffers */ + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, mc->vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + macroblock_verts, + sizeof(struct vlVertex2f) * 24 + ); + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, mc->vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + macroblock_luma_texcoords, + sizeof(struct vlVertex2f) * 24 + ); + /* TODO: Accomodate 422, 444 */ + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, mc->vertex_bufs[2].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + macroblock_chroma_420_texcoords, + sizeof(struct vlVertex2f) * 24 + ); + + for (i = 0; i < 3; ++i) + pipe->winsys->buffer_unmap(pipe->winsys, mc->vertex_bufs[i].buffer); + + /* Create our constant buffer */ + mc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts); + mc->vs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + mc->vs_const_buf.size + ); + + mc->fs_const_buf.size = sizeof(struct vlFragmentShaderConsts); + mc->fs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + mc->fs_const_buf.size + ); + + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, mc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + &fs_consts, + sizeof(struct vlFragmentShaderConsts) + ); + + pipe->winsys->buffer_unmap(pipe->winsys, mc->fs_const_buf.buffer); + + return 0; +} + +static int vlInit +( + struct vlR16SnormMC *mc +) +{ + struct pipe_context *pipe; + struct pipe_sampler_state sampler; + struct pipe_texture template; + unsigned int filters[5]; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + + /* For MC we render to textures, which are rounded up to nearest POT */ + mc->viewport.scale[0] = vlRoundUpPOT(mc->video_width); + mc->viewport.scale[1] = vlRoundUpPOT(mc->video_height); + mc->viewport.scale[2] = 1; + mc->viewport.scale[3] = 1; + mc->viewport.translate[0] = 0; + mc->viewport.translate[1] = 0; + mc->viewport.translate[2] = 0; + mc->viewport.translate[3] = 0; + + mc->render_target.width = vlRoundUpPOT(mc->video_width); + mc->render_target.height = vlRoundUpPOT(mc->video_height); + mc->render_target.num_cbufs = 1; + /* FB for MC stage is a vlSurface, set in vlSetRenderSurface() */ + mc->render_target.zsbuf = NULL; + + filters[0] = PIPE_TEX_FILTER_NEAREST; + filters[1] = mc->video_format == vlFormatYCbCr444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; + filters[2] = mc->video_format == vlFormatYCbCr444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; + filters[3] = PIPE_TEX_FILTER_LINEAR; + filters[4] = PIPE_TEX_FILTER_LINEAR; + + for (i = 0; i < 5; ++i) + { + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = filters[i]; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = filters[i]; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + /*sampler.prefilter = ;*/ + /*sampler.shadow_ambient = ;*/ + /*sampler.lod_bias = ;*/ + sampler.min_lod = 0; + /*sampler.max_lod = ;*/ + /*sampler.border_color[i] = ;*/ + /*sampler.max_anisotropy = ;*/ + mc->samplers[i] = pipe->create_sampler_state(pipe, &sampler); + } + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + template.format = PIPE_FORMAT_R16_SNORM; + template.last_level = 0; + template.width[0] = 8; + template.height[0] = 8 * 4; + template.depth[0] = 1; + template.compressed = 0; + pf_get_block(template.format, &template.block); + + mc->textures[0] = pipe->screen->texture_create(pipe->screen, &template); + + if (mc->video_format == vlFormatYCbCr420) + template.height[0] = 8; + else if (mc->video_format == vlFormatYCbCr422) + template.height[0] = 8 * 2; + else if (mc->video_format == vlFormatYCbCr444) + template.height[0] = 8 * 4; + else + assert(0); + + mc->textures[1] = pipe->screen->texture_create(pipe->screen, &template); + mc->textures[2] = pipe->screen->texture_create(pipe->screen, &template); + + /* textures[3] & textures[4] are assigned from vlSurfaces for P and B macroblocks at render time */ + + vlCreateVertexShaderIMB(mc); + vlCreateFragmentShaderIMB(mc); + vlCreateVertexShaderFramePMB(mc); + vlCreateVertexShaderFieldPMB(mc); + vlCreateFragmentShaderFramePMB(mc); + vlCreateFragmentShaderFieldPMB(mc); + vlCreateVertexShaderFrameBMB(mc); + vlCreateVertexShaderFieldBMB(mc); + vlCreateFragmentShaderFrameBMB(mc); + vlCreateFragmentShaderFieldBMB(mc); + vlCreateDataBufs(mc); + + return 0; +} + +int vlCreateR16SNormMC +( + struct pipe_context *pipe, + unsigned int video_width, + unsigned int video_height, + enum vlFormat video_format, + struct vlRender **render +) +{ + struct vlR16SnormMC *mc; + + assert(pipe); + assert(render); + + mc = calloc(1, sizeof(struct vlR16SnormMC)); + + mc->base.vlBegin = &vlBegin; + mc->base.vlRenderMacroBlocksMpeg2 = &vlRenderMacroBlocksMpeg2R16Snorm; + mc->base.vlEnd = &vlEnd; + mc->base.vlDestroy = &vlDestroy; + mc->pipe = pipe; + mc->video_width = video_width; + mc->video_height = video_height; + + vlInit(mc); + + *render = &mc->base; + + return 0; +} diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h new file mode 100644 index 0000000000..a6eecf05b6 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h @@ -0,0 +1,18 @@ +#ifndef vl_mc_h +#define vl_mc_h + +#include "vl_types.h" + +struct pipe_context; +struct vlRender; + +int vlCreateR16SNormMC +( + struct pipe_context *pipe, + unsigned int video_width, + unsigned int video_height, + enum vlFormat video_format, + struct vlRender **render +); + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_render.h b/src/gallium/state_trackers/g3dvl/vl_render.h new file mode 100644 index 0000000000..63016b5cbe --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_render.h @@ -0,0 +1,33 @@ +#ifndef vl_render_h +#define vl_render_h + +#include "vl_types.h" + +struct pipe_surface; + +struct vlRender +{ + int (*vlBegin) + ( + struct vlRender *render + ); + + int (*vlRenderMacroBlocksMpeg2) + ( + struct vlRender *render, + struct vlMpeg2MacroBlockBatch *batch, + struct vlSurface *surface + ); + + int (*vlEnd) + ( + struct vlRender *render + ); + + int (*vlDestroy) + ( + struct vlRender *render + ); +}; + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_screen.c b/src/gallium/state_trackers/g3dvl/vl_screen.c new file mode 100644 index 0000000000..484f63b0d4 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_screen.c @@ -0,0 +1,115 @@ +#define VL_INTERNAL +#include "vl_screen.h" +#include +#include + +int vlCreateScreen +( + struct vlDisplay *display, + int screen, + struct pipe_screen *pscreen, + struct vlScreen **vl_screen +) +{ + struct vlScreen *scrn; + + assert(display); + assert(pscreen); + assert(vl_screen); + + scrn = calloc(1, sizeof(struct vlScreen)); + + if (!scrn) + return 1; + + scrn->display = display; + scrn->ordinal = screen; + scrn->pscreen = pscreen; + *vl_screen = scrn; + + return 0; +} + +int vlDestroyScreen +( + struct vlScreen *screen +) +{ + assert(screen); + + free(screen); + + return 0; +} + +struct vlDisplay* vlGetDisplay +( + struct vlScreen *screen +) +{ + assert(screen); + + return screen->display; +} + +struct pipe_screen* vlGetPipeScreen +( + struct vlScreen *screen +) +{ + assert(screen); + + return screen->pscreen; +} + +unsigned int vlGetMaxProfiles +( + struct vlScreen *screen +) +{ + assert(screen); + + return vlProfileCount; +} + +int vlQueryProfiles +( + struct vlScreen *screen, + enum vlProfile *profiles +) +{ + assert(screen); + assert(profiles); + + profiles[0] = vlProfileMpeg2Simple; + profiles[1] = vlProfileMpeg2Main; + + return 0; +} + +unsigned int vlGetMaxEntryPoints +( + struct vlScreen *screen +) +{ + assert(screen); + + return vlEntryPointCount; +} + +int vlQueryEntryPoints +( + struct vlScreen *screen, + enum vlProfile profile, + enum vlEntryPoint *entry_points +) +{ + assert(screen); + assert(entry_points); + + entry_points[0] = vlEntryPointIDCT; + entry_points[1] = vlEntryPointMC; + entry_points[2] = vlEntryPointCSC; + + return 0; +} diff --git a/src/gallium/state_trackers/g3dvl/vl_screen.h b/src/gallium/state_trackers/g3dvl/vl_screen.h new file mode 100644 index 0000000000..98f3d429b6 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_screen.h @@ -0,0 +1,63 @@ +#ifndef vl_screen_h +#define vl_screen_h + +#include "vl_types.h" + +struct pipe_screen; + +#ifdef VL_INTERNAL +struct vlScreen +{ + struct vlDisplay *display; + unsigned int ordinal; + struct pipe_screen *pscreen; +}; +#endif + +int vlCreateScreen +( + struct vlDisplay *display, + int screen, + struct pipe_screen *pscreen, + struct vlScreen **vl_screen +); + +int vlDestroyScreen +( + struct vlScreen *screen +); + +struct vlDisplay* vlGetDisplay +( + struct vlScreen *screen +); + +struct pipe_screen* vlGetPipeScreen +( + struct vlScreen *screen +); + +unsigned int vlGetMaxProfiles +( + struct vlScreen *screen +); + +int vlQueryProfiles +( + struct vlScreen *screen, + enum vlProfile *profiles +); + +unsigned int vlGetMaxEntryPoints +( + struct vlScreen *screen +); + +int vlQueryEntryPoints +( + struct vlScreen *screen, + enum vlProfile profile, + enum vlEntryPoint *entry_points +); + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_shader_build.c b/src/gallium/state_trackers/g3dvl/vl_shader_build.c index 5f30e23ff8..51f1721a33 100644 --- a/src/gallium/state_trackers/g3dvl/vl_shader_build.c +++ b/src/gallium/state_trackers/g3dvl/vl_shader_build.c @@ -13,7 +13,7 @@ struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index decl.Semantic.SemanticIndex = index; decl.DeclarationRange.First = first; decl.DeclarationRange.Last = last; - + return decl; } @@ -27,7 +27,7 @@ struct tgsi_full_declaration vl_decl_interpolated_input ) { struct tgsi_full_declaration decl = tgsi_default_full_declaration(); - + assert ( interpolation == TGSI_INTERPOLATE_CONSTANT || @@ -42,21 +42,21 @@ struct tgsi_full_declaration vl_decl_interpolated_input decl.Declaration.Interpolate = interpolation;; decl.DeclarationRange.First = first; decl.DeclarationRange.Last = last; - + return decl; } struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last) { struct tgsi_full_declaration decl = tgsi_default_full_declaration(); - + decl.Declaration.File = TGSI_FILE_CONSTANT; decl.Declaration.Semantic = 1; decl.Semantic.SemanticName = name; decl.Semantic.SemanticIndex = index; decl.DeclarationRange.First = first; decl.DeclarationRange.Last = last; - + return decl; } @@ -70,7 +70,7 @@ struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int inde decl.Semantic.SemanticIndex = index; decl.DeclarationRange.First = first; decl.DeclarationRange.Last = last; - + return decl; } @@ -82,7 +82,7 @@ struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last decl.Declaration.File = TGSI_FILE_TEMPORARY; decl.DeclarationRange.First = first; decl.DeclarationRange.Last = last; - + return decl; } @@ -94,7 +94,7 @@ struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int l decl.Declaration.File = TGSI_FILE_SAMPLER; decl.DeclarationRange.First = first; decl.DeclarationRange.Last = last; - + return decl; } @@ -108,7 +108,7 @@ struct tgsi_full_instruction vl_inst2 ) { struct tgsi_full_instruction inst = tgsi_default_full_instruction(); - + inst.Instruction.Opcode = opcode; inst.Instruction.NumDstRegs = 1; inst.FullDstRegisters[0].DstRegister.File = dst_file; @@ -116,7 +116,7 @@ struct tgsi_full_instruction vl_inst2 inst.Instruction.NumSrcRegs = 1; inst.FullSrcRegisters[0].SrcRegister.File = src_file; inst.FullSrcRegisters[0].SrcRegister.Index = src_index; - + return inst; } @@ -132,7 +132,7 @@ struct tgsi_full_instruction vl_inst3 ) { struct tgsi_full_instruction inst = tgsi_default_full_instruction(); - + inst.Instruction.Opcode = opcode; inst.Instruction.NumDstRegs = 1; inst.FullDstRegisters[0].DstRegister.File = dst_file; @@ -142,7 +142,7 @@ struct tgsi_full_instruction vl_inst3 inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; inst.FullSrcRegisters[1].SrcRegister.File = src2_file; inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; - + return inst; } @@ -158,7 +158,7 @@ struct tgsi_full_instruction vl_tex ) { struct tgsi_full_instruction inst = tgsi_default_full_instruction(); - + inst.Instruction.Opcode = TGSI_OPCODE_TEX; inst.Instruction.NumDstRegs = 1; inst.FullDstRegisters[0].DstRegister.File = dst_file; @@ -169,7 +169,7 @@ struct tgsi_full_instruction vl_tex inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; inst.FullSrcRegisters[1].SrcRegister.File = src2_file; inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; - + return inst; } @@ -187,7 +187,7 @@ struct tgsi_full_instruction vl_inst4 ) { struct tgsi_full_instruction inst = tgsi_default_full_instruction(); - + inst.Instruction.Opcode = opcode; inst.Instruction.NumDstRegs = 1; inst.FullDstRegisters[0].DstRegister.File = dst_file; @@ -199,18 +199,17 @@ struct tgsi_full_instruction vl_inst4 inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; inst.FullSrcRegisters[2].SrcRegister.File = src3_file; inst.FullSrcRegisters[2].SrcRegister.Index = src3_index; - + return inst; } struct tgsi_full_instruction vl_end(void) { struct tgsi_full_instruction inst = tgsi_default_full_instruction(); - + inst.Instruction.Opcode = TGSI_OPCODE_END; inst.Instruction.NumDstRegs = 0; inst.Instruction.NumSrcRegs = 0; - + return inst; } - diff --git a/src/gallium/state_trackers/g3dvl/vl_shader_build.h b/src/gallium/state_trackers/g3dvl/vl_shader_build.h index 878d7e2c45..dc615cb156 100644 --- a/src/gallium/state_trackers/g3dvl/vl_shader_build.h +++ b/src/gallium/state_trackers/g3dvl/vl_shader_build.h @@ -59,4 +59,3 @@ struct tgsi_full_instruction vl_inst4 struct tgsi_full_instruction vl_end(void); #endif - diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index 1386b1107c..ffc8122172 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -1,628 +1,177 @@ +#define VL_INTERNAL #include "vl_surface.h" #include #include -#include +#include +#include #include -#include #include #include +#include "vl_screen.h" #include "vl_context.h" -#include "vl_defs.h" +#include "vl_render.h" +#include "vl_csc.h" #include "vl_util.h" -/*#define DO_IDCT*/ - -#ifdef DO_IDCT -static int vlTransformBlock(short *src, short *dst, short bias) +int vlCreateSurface +( + struct vlScreen *screen, + unsigned int width, + unsigned int height, + enum vlFormat format, + struct vlSurface **surface +) { - static const float basis[8][8] = - { - {0.3536, 0.4904, 0.4619, 0.4157, 0.3536, 0.2778, 0.1913, 0.0975}, - {0.3536, 0.4157, 0.1913, -0.0975, -0.3536, -0.4904, -0.4619, -0.2778}, - {0.3536, 0.2778, -0.1913, -0.4904, -0.3536, 0.0975, 0.4619, 0.4157}, - {0.3536, 0.0975, -0.4619, -0.2778, 0.3536, 0.4157, -0.1913, -0.4904}, - {0.3536, -0.0975, -0.4619, 0.2778, 0.3536, -0.4157, -0.1913, 0.4904}, - {0.3536, -0.2778, -0.1913, 0.4904, -0.3536, -0.0975, 0.4619, -0.4157}, - {0.3536, -0.4157, 0.1913, 0.0975, -0.3536, 0.4904, -0.4619, 0.2778}, - {0.3536, -0.4904, 0.4619, -0.4157, 0.3536, -0.2778, 0.1913, -0.0975} - }; - - unsigned int x, y; - short tmp[64]; - - for (y = 0; y < VL_BLOCK_HEIGHT; ++y) - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - tmp[y * VL_BLOCK_WIDTH + x] = (short) - ( - src[y * VL_BLOCK_WIDTH + 0] * basis[x][0] + - src[y * VL_BLOCK_WIDTH + 1] * basis[x][1] + - src[y * VL_BLOCK_WIDTH + 2] * basis[x][2] + - src[y * VL_BLOCK_WIDTH + 3] * basis[x][3] + - src[y * VL_BLOCK_WIDTH + 4] * basis[x][4] + - src[y * VL_BLOCK_WIDTH + 5] * basis[x][5] + - src[y * VL_BLOCK_WIDTH + 6] * basis[x][6] + - src[y * VL_BLOCK_WIDTH + 7] * basis[x][7] - ); - - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - for (y = 0; y < VL_BLOCK_HEIGHT; ++y) - { - dst[y * VL_BLOCK_WIDTH + x] = bias + (short) - ( - tmp[0 * VL_BLOCK_WIDTH + x] * basis[y][0] + - tmp[1 * VL_BLOCK_WIDTH + x] * basis[y][1] + - tmp[2 * VL_BLOCK_WIDTH + x] * basis[y][2] + - tmp[3 * VL_BLOCK_WIDTH + x] * basis[y][3] + - tmp[4 * VL_BLOCK_WIDTH + x] * basis[y][4] + - tmp[5 * VL_BLOCK_WIDTH + x] * basis[y][5] + - tmp[6 * VL_BLOCK_WIDTH + x] * basis[y][6] + - tmp[7 * VL_BLOCK_WIDTH + x] * basis[y][7] - ); - if (dst[y * VL_BLOCK_WIDTH + x] > 255) - dst[y * VL_BLOCK_WIDTH + x] = 255; - else if (bias > 0 && dst[y * VL_BLOCK_WIDTH + x] < 0) - dst[y * VL_BLOCK_WIDTH + x] = 0; - } - return 0; -} -#endif + struct vlSurface *sfc; + struct pipe_texture template; -static int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch) -{ - unsigned int y; - - for (y = 0; y < VL_BLOCK_HEIGHT; ++y) - memcpy - ( - dst + y * dst_pitch, - src + y * VL_BLOCK_WIDTH, - VL_BLOCK_WIDTH * 2 - ); - - return 0; -} + assert(screen); + assert(surface); -static int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch) -{ - unsigned int y; - - for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y) - memcpy - ( - dst + y * dst_pitch * 2, - src + y * VL_BLOCK_WIDTH, - VL_BLOCK_WIDTH * 2 - ); - - dst += VL_BLOCK_HEIGHT * dst_pitch; - - for (; y < VL_BLOCK_HEIGHT; ++y) - memcpy - ( - dst + y * dst_pitch * 2, - src + y * VL_BLOCK_WIDTH, - VL_BLOCK_WIDTH * 2 - ); - - return 0; -} + sfc = calloc(1, sizeof(struct vlSurface)); -static int vlGrabNoBlock(short *dst, unsigned int dst_pitch) -{ - unsigned int y; - - for (y = 0; y < VL_BLOCK_HEIGHT; ++y) - memset - ( - dst + y * dst_pitch, - 0, - VL_BLOCK_WIDTH * 2 - ); - - return 0; -} + if (!sfc) + return 1; -static int vlGrabBlocks -( - struct VL_CONTEXT *context, - unsigned int coded_block_pattern, - enum VL_DCT_TYPE dct_type, - enum VL_SAMPLE_TYPE sample_type, - short *blocks -) -{ - struct pipe_surface *tex_surface; - short *texels; - unsigned int tex_pitch; - unsigned int tb, sb = 0; - - assert(context); - assert(blocks); - - tex_surface = context->pipe->screen->get_tex_surface - ( - context->pipe->screen, - context->states.mc.textures[0], - 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE - ); - - texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); - tex_pitch = tex_surface->stride / tex_surface->block.size; - - for (tb = 0; tb < 4; ++tb) - { - if ((coded_block_pattern >> (5 - tb)) & 1) - { - short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; - -#ifdef DO_IDCT - vlTransformBlock(cur_block, cur_block, sample_type == VL_FULL_SAMPLE ? 128 : 0); -#endif - - if (dct_type == VL_DCT_FRAME_CODED) - vlGrabFrameCodedBlock - ( - cur_block, - texels + tb * tex_pitch * VL_BLOCK_HEIGHT, - tex_pitch - ); - else - vlGrabFieldCodedBlock - ( - cur_block, - texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, - tex_pitch - ); - - ++sb; - } - else - vlGrabNoBlock(texels + tb * tex_pitch * VL_BLOCK_HEIGHT, tex_pitch); - } - - pipe_surface_unmap(tex_surface); - - /* TODO: Implement 422, 444 */ - for (tb = 0; tb < 2; ++tb) - { - tex_surface = context->pipe->screen->get_tex_surface - ( - context->pipe->screen, - context->states.mc.textures[tb + 1], - 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE - ); - - texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); - tex_pitch = tex_surface->stride / tex_surface->block.size; - - if ((coded_block_pattern >> (1 - tb)) & 1) - { - short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; - -#ifdef DO_IDCT - vlTransformBlock(cur_block, cur_block, sample_type == VL_FULL_SAMPLE ? 128 : 0); -#endif - - vlGrabFrameCodedBlock - ( - cur_block, - texels, - tex_pitch - ); - - ++sb; - } - else - vlGrabNoBlock(texels, tex_pitch); - - pipe_surface_unmap(tex_surface); - } - - return 0; -} + sfc->screen = screen; + sfc->width = width; + sfc->height = height; + sfc->format = format; -int vlCreateSurface(struct VL_CONTEXT *context, struct VL_SURFACE **surface) -{ - struct pipe_context *pipe; - struct pipe_texture template; - struct VL_SURFACE *sfc; - - assert(context); - assert(surface); - - pipe = context->pipe; - - sfc = calloc(1, sizeof(struct VL_SURFACE)); - - sfc->context = context; - sfc->width = vlRoundUpPOT(context->video_width); - sfc->height = vlRoundUpPOT(context->video_height); - sfc->format = context->video_format; - memset(&template, 0, sizeof(struct pipe_texture)); template.target = PIPE_TEXTURE_2D; template.format = PIPE_FORMAT_A8R8G8B8_UNORM; template.last_level = 0; - template.width[0] = sfc->width; - template.height[0] = sfc->height; + template.width[0] = vlRoundUpPOT(sfc->width); + template.height[0] = vlRoundUpPOT(sfc->height); template.depth[0] = 1; template.compressed = 0; pf_get_block(template.format, &template.block); - /* XXX: Needed? */ template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET; - - sfc->texture = pipe->screen->texture_create(pipe->screen, &template); - + + sfc->texture = vlGetPipeScreen(screen)->texture_create(vlGetPipeScreen(screen), &template); + *surface = sfc; - + return 0; } -int vlDestroySurface(struct VL_SURFACE *surface) +int vlDestroySurface +( + struct vlSurface *surface +) { assert(surface); + pipe_texture_release(&surface->texture); free(surface); - + return 0; } -int vlRenderIMacroBlock +int vlRenderMacroBlocksMpeg2 ( - enum VL_PICTURE picture_type, - enum VL_FIELD_ORDER field_order, - unsigned int mbx, - unsigned int mby, - unsigned int coded_block_pattern, - enum VL_DCT_TYPE dct_type, - short *blocks, - struct VL_SURFACE *surface + struct vlMpeg2MacroBlockBatch *batch, + struct vlSurface *surface ) { - struct pipe_context *pipe; - struct VL_MC_VS_CONSTS *vs_consts; - - assert(blocks); + assert(batch); assert(surface); - - /* TODO: Implement interlaced rendering */ - if (picture_type != VL_FRAME_PICTURE) - return 0; - - vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_FULL_SAMPLE, blocks); - - pipe = surface->context->pipe; - - vs_consts = pipe->winsys->buffer_map - ( - pipe->winsys, - surface->context->states.mc.vs_const_buf.buffer, - PIPE_BUFFER_USAGE_CPU_WRITE - ); - - vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width; - vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height; - vs_consts->scale.z = 1.0f; - vs_consts->scale.w = 1.0f; - vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width; - vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height; - vs_consts->mb_pos_trans.z = 0.0f; - vs_consts->mb_pos_trans.w = 0.0f; - - pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.mc.vs_const_buf.buffer); - - surface->context->states.mc.render_target.cbufs[0] = pipe->screen->get_tex_surface + + surface->context->render->vlBegin(surface->context->render); + + surface->context->render->vlRenderMacroBlocksMpeg2 ( - pipe->screen, - surface->texture, - 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE + surface->context->render, + batch, + surface ); - pipe->set_framebuffer_state(pipe, &surface->context->states.mc.render_target); - pipe->set_sampler_textures(pipe, 3, surface->context->states.mc.textures); - pipe->bind_sampler_states(pipe, 3, (void**)surface->context->states.mc.samplers); - pipe->bind_vs_state(pipe, surface->context->states.mc.i_vs); - pipe->bind_fs_state(pipe, surface->context->states.mc.i_fs); - - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); - + + surface->context->render->vlEnd(surface->context->render); + return 0; } -int vlRenderPMacroBlock +int vlPutPicture ( - enum VL_PICTURE picture_type, - enum VL_FIELD_ORDER field_order, - unsigned int mbx, - unsigned int mby, - enum VL_MC_TYPE mc_type, - struct VL_MOTION_VECTOR *motion_vector, - unsigned int coded_block_pattern, - enum VL_DCT_TYPE dct_type, - short *blocks, - struct VL_SURFACE *ref_surface, - struct VL_SURFACE *surface + struct vlSurface *surface, + vlNativeDrawable drawable, + int srcx, + int srcy, + int srcw, + int srch, + int destx, + int desty, + int destw, + int desth, + enum vlPictureType picture_type ) { + struct vlCSC *csc; struct pipe_context *pipe; - struct VL_MC_VS_CONSTS *vs_consts; - - assert(motion_vectors); - assert(blocks); - assert(ref_surface); + assert(surface); - - /* TODO: Implement interlaced rendering */ - if (picture_type != VL_FRAME_PICTURE) - return 0; - /* TODO: Implement other MC types */ - if (mc_type != VL_FRAME_MC && mc_type != VL_FIELD_MC) - return 0; - - vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks); - + assert(surface->context); + + csc = surface->context->csc; pipe = surface->context->pipe; - - vs_consts = pipe->winsys->buffer_map + + csc->vlResizeFrameBuffer(csc, destw, desth); + + csc->vlBegin(csc); + + csc->vlPutPicture ( - pipe->winsys, - surface->context->states.mc.vs_const_buf.buffer, - PIPE_BUFFER_USAGE_CPU_WRITE + csc, + surface, + srcx, + srcy, + srcw, + srch, + destx, + desty, + destw, + desth, + picture_type ); - - vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width; - vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height; - vs_consts->scale.z = 1.0f; - vs_consts->scale.w = 1.0f; - vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width; - vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height; - vs_consts->mb_pos_trans.z = 0.0f; - vs_consts->mb_pos_trans.w = 0.0f; - vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector->top_field.x * 0.5f) / (float)surface->width; - vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector->top_field.y * 0.5f) / (float)surface->height; - vs_consts->mb_tc_trans[0].top_field.z = 0.0f; - vs_consts->mb_tc_trans[0].top_field.w = 0.0f; - - if (mc_type == VL_FIELD_MC) - { - vs_consts->denorm.x = (float)surface->width; - vs_consts->denorm.y = (float)surface->height; - - vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector->bottom_field.x * 0.5f) / (float)surface->width; - vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector->bottom_field.y * 0.5f) / (float)surface->height; - vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f; - vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f; - - pipe->bind_vs_state(pipe, surface->context->states.mc.p_vs[1]); - pipe->bind_fs_state(pipe, surface->context->states.mc.p_fs[1]); - } - else - { - pipe->bind_vs_state(pipe, surface->context->states.mc.p_vs[0]); - pipe->bind_fs_state(pipe, surface->context->states.mc.p_fs[0]); - } - - pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.mc.vs_const_buf.buffer); - - surface->context->states.mc.render_target.cbufs[0] = pipe->screen->get_tex_surface + + csc->vlEnd(csc); + + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + bind_pipe_drawable(pipe, drawable); + /* TODO: Need to take destx, desty into consideration */ + pipe->winsys->flush_frontbuffer ( - pipe->screen, - surface->texture, - 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE + pipe->winsys, + csc->vlGetFrameBuffer(csc), + pipe->priv ); - pipe->set_framebuffer_state(pipe, &surface->context->states.mc.render_target); - - surface->context->states.mc.textures[3] = ref_surface->texture; - pipe->set_sampler_textures(pipe, 4, surface->context->states.mc.textures); - pipe->bind_sampler_states(pipe, 4, (void**)surface->context->states.mc.samplers); - - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); - + return 0; } -int vlRenderBMacroBlock +struct vlScreen* vlSurfaceGetScreen ( - enum VL_PICTURE picture_type, - enum VL_FIELD_ORDER field_order, - unsigned int mbx, - unsigned int mby, - enum VL_MC_TYPE mc_type, - struct VL_MOTION_VECTOR *motion_vector, - unsigned int coded_block_pattern, - enum VL_DCT_TYPE dct_type, - short *blocks, - struct VL_SURFACE *past_surface, - struct VL_SURFACE *future_surface, - struct VL_SURFACE *surface + struct vlSurface *surface ) { - struct pipe_context *pipe; - struct VL_MC_VS_CONSTS *vs_consts; - - assert(motion_vectors); - assert(blocks); - assert(ref_surface); assert(surface); - - /* TODO: Implement interlaced rendering */ - if (picture_type != VL_FRAME_PICTURE) - return 0; - /* TODO: Implement other MC types */ - if (mc_type != VL_FRAME_MC && mc_type != VL_FIELD_MC) - return 0; - - vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks); - - pipe = surface->context->pipe; - - vs_consts = pipe->winsys->buffer_map - ( - pipe->winsys, - surface->context->states.mc.vs_const_buf.buffer, - PIPE_BUFFER_USAGE_CPU_WRITE - ); - - vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width; - vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height; - vs_consts->scale.z = 1.0f; - vs_consts->scale.w = 1.0f; - vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width; - vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height; - vs_consts->mb_pos_trans.z = 0.0f; - vs_consts->mb_pos_trans.w = 0.0f; - vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[0].top_field.x * 0.5f) / (float)surface->width; - vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[0].top_field.y * 0.5f) / (float)surface->height; - vs_consts->mb_tc_trans[0].top_field.z = 0.0f; - vs_consts->mb_tc_trans[0].top_field.w = 0.0f; - vs_consts->mb_tc_trans[1].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[1].top_field.x * 0.5f) / (float)surface->width; - vs_consts->mb_tc_trans[1].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[1].top_field.y * 0.5f) / (float)surface->height; - vs_consts->mb_tc_trans[1].top_field.z = 0.0f; - vs_consts->mb_tc_trans[1].top_field.w = 0.0f; - - if (mc_type == VL_FIELD_MC) - { - vs_consts->denorm.x = (float)surface->width; - vs_consts->denorm.y = (float)surface->height; - - vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[0].bottom_field.x * 0.5f) / (float)surface->width; - vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[0].bottom_field.y * 0.5f) / (float)surface->height; - vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f; - vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f; - vs_consts->mb_tc_trans[1].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[1].bottom_field.x * 0.5f) / (float)surface->width; - vs_consts->mb_tc_trans[1].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[1].bottom_field.y * 0.5f) / (float)surface->height; - vs_consts->mb_tc_trans[1].bottom_field.z = 0.0f; - vs_consts->mb_tc_trans[1].bottom_field.w = 0.0f; - - pipe->bind_vs_state(pipe, surface->context->states.mc.b_vs[1]); - pipe->bind_fs_state(pipe, surface->context->states.mc.b_fs[1]); - } - else - { - pipe->bind_vs_state(pipe, surface->context->states.mc.b_vs[0]); - pipe->bind_fs_state(pipe, surface->context->states.mc.b_fs[0]); - } - - pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.mc.vs_const_buf.buffer); - - surface->context->states.mc.render_target.cbufs[0] = pipe->screen->get_tex_surface - ( - pipe->screen, - surface->texture, - 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE - ); - pipe->set_framebuffer_state(pipe, &surface->context->states.mc.render_target); - - surface->context->states.mc.textures[3] = past_surface->texture; - surface->context->states.mc.textures[4] = future_surface->texture; - pipe->set_sampler_textures(pipe, 5, surface->context->states.mc.textures); - pipe->bind_sampler_states(pipe, 5, (void**)surface->context->states.mc.samplers); - - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); - - return 0; + + return surface->screen; } -int vlPutSurface +struct vlContext* vlBindToContext ( - struct VL_SURFACE *surface, - Drawable drawable, - unsigned int srcx, - unsigned int srcy, - unsigned int srcw, - unsigned int srch, - unsigned int destx, - unsigned int desty, - unsigned int destw, - unsigned int desth, - enum VL_PICTURE picture_type + struct vlSurface *surface, + struct vlContext *context ) { - unsigned int create_fb = 0; - struct pipe_context *pipe; - struct VL_CSC_VS_CONSTS *vs_consts; - + struct vlContext *old; + assert(surface); - - pipe = surface->context->pipe; - - if (!surface->context->states.csc.framebuffer.cbufs[0]) - create_fb = 1; - else if - ( - surface->context->states.csc.framebuffer.width != destw || - surface->context->states.csc.framebuffer.height != desth - ) - { - pipe->winsys->surface_release - ( - pipe->winsys, - &surface->context->states.csc.framebuffer.cbufs[0] - ); - - create_fb = 1; - } - - if (create_fb) - { - surface->context->states.csc.viewport.scale[0] = destw; - surface->context->states.csc.viewport.scale[1] = desth; - surface->context->states.csc.viewport.scale[2] = 1; - surface->context->states.csc.viewport.scale[3] = 1; - surface->context->states.csc.viewport.translate[0] = 0; - surface->context->states.csc.viewport.translate[1] = 0; - surface->context->states.csc.viewport.translate[2] = 0; - surface->context->states.csc.viewport.translate[3] = 0; - - surface->context->states.csc.framebuffer.width = destw; - surface->context->states.csc.framebuffer.height = desth; - surface->context->states.csc.framebuffer.cbufs[0] = pipe->winsys->surface_alloc(pipe->winsys); - pipe->winsys->surface_alloc_storage - ( - pipe->winsys, - surface->context->states.csc.framebuffer.cbufs[0], - destw, - desth, - PIPE_FORMAT_A8R8G8B8_UNORM, - /* XXX: SoftPipe doesn't change GPU usage to CPU like it does for textures */ - PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE, - 0 - ); - } - - vlEndRender(surface->context); - - vs_consts = pipe->winsys->buffer_map - ( - pipe->winsys, - surface->context->states.csc.vs_const_buf.buffer, - PIPE_BUFFER_USAGE_CPU_WRITE - ); - - vs_consts->src_scale.x = srcw / (float)surface->width; - vs_consts->src_scale.y = srch / (float)surface->height; - vs_consts->src_scale.z = 1; - vs_consts->src_scale.w = 1; - vs_consts->src_trans.x = srcx / (float)surface->width; - vs_consts->src_trans.y = srcy / (float)surface->height; - vs_consts->src_trans.z = 0; - vs_consts->src_trans.w = 0; - - pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.csc.vs_const_buf.buffer); - - pipe->set_sampler_textures(pipe, 1, &surface->texture); - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); - pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); - bind_pipe_drawable(pipe, drawable); - /* TODO: Need to take destx, desty into consideration */ - pipe->winsys->flush_frontbuffer - ( - pipe->winsys, - surface->context->states.csc.framebuffer.cbufs[0], - pipe->priv - ); - - vlBeginRender(surface->context); - - return 0; -} + old = surface->context; + surface->context = context; + + return old; +} diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.h b/src/gallium/state_trackers/g3dvl/vl_surface.h index 9f56b77e1e..b975e131fa 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.h +++ b/src/gallium/state_trackers/g3dvl/vl_surface.h @@ -1,81 +1,66 @@ #ifndef vl_surface_h #define vl_surface_h -#include #include "vl_types.h" +#ifdef VL_INTERNAL struct pipe_texture; -struct VL_SURFACE +struct vlSurface { - struct VL_CONTEXT *context; + struct vlScreen *screen; + struct vlContext *context; unsigned int width; unsigned int height; - enum VL_FORMAT format; + enum vlFormat format; struct pipe_texture *texture; }; +#endif -int vlCreateSurface(struct VL_CONTEXT *context, struct VL_SURFACE **surface); +int vlCreateSurface +( + struct vlScreen *screen, + unsigned int width, + unsigned int height, + enum vlFormat format, + struct vlSurface **surface +); -int vlDestroySurface(struct VL_SURFACE *surface); +int vlDestroySurface +( + struct vlSurface *surface +); -int vlRenderIMacroBlock +int vlRenderMacroBlocksMpeg2 ( - enum VL_PICTURE picture_type, - enum VL_FIELD_ORDER field_order, - unsigned int mbx, - unsigned int mby, - unsigned int coded_block_pattern, - enum VL_DCT_TYPE dct_type, - short *blocks, - struct VL_SURFACE *surface + struct vlMpeg2MacroBlockBatch *batch, + struct vlSurface *surface ); -int vlRenderPMacroBlock +int vlPutPicture ( - enum VL_PICTURE picture_type, - enum VL_FIELD_ORDER field_order, - unsigned int mbx, - unsigned int mby, - enum VL_MC_TYPE mc_type, - struct VL_MOTION_VECTOR *motion_vector, - unsigned int coded_block_pattern, - enum VL_DCT_TYPE dct_type, - short *blocks, - struct VL_SURFACE *ref_surface, - struct VL_SURFACE *surface + struct vlSurface *surface, + vlNativeDrawable drawable, + int srcx, + int srcy, + int srcw, + int srch, + int destx, + int desty, + int destw, + int desth, + enum vlPictureType picture_type ); -int vlRenderBMacroBlock +struct vlScreen* vlSurfaceGetScreen ( - enum VL_PICTURE picture_type, - enum VL_FIELD_ORDER field_order, - unsigned int mbx, - unsigned int mby, - enum VL_MC_TYPE mc_type, - struct VL_MOTION_VECTOR *motion_vector, - unsigned int coded_block_pattern, - enum VL_DCT_TYPE dct_type, - short *blocks, - struct VL_SURFACE *past_surface, - struct VL_SURFACE *future_surface, - struct VL_SURFACE *surface + struct vlSurface *surface ); -int vlPutSurface +struct vlContext* vlBindToContext ( - struct VL_SURFACE *surface, - Drawable drawable, - unsigned int srcx, - unsigned int srcy, - unsigned int srcw, - unsigned int srch, - unsigned int destx, - unsigned int desty, - unsigned int destw, - unsigned int desth, - enum VL_PICTURE picture_type + struct vlSurface *surface, + struct vlContext *context ); #endif - diff --git a/src/gallium/state_trackers/g3dvl/vl_types.h b/src/gallium/state_trackers/g3dvl/vl_types.h index 4d210c9e0a..504ba8ac81 100644 --- a/src/gallium/state_trackers/g3dvl/vl_types.h +++ b/src/gallium/state_trackers/g3dvl/vl_types.h @@ -1,102 +1,106 @@ #ifndef vl_types_h #define vl_types_h -enum VL_FORMAT -{ - VL_FORMAT_YCBCR_420, - VL_FORMAT_YCBCR_422, - VL_FORMAT_YCBCR_444 -}; +#if 1 /*#ifdef X11*/ +#include -enum VL_PICTURE -{ - VL_TOP_FIELD, - VL_BOTTOM_FIELD, - VL_FRAME_PICTURE -}; +typedef Display* vlNativeDisplay; +typedef Drawable vlNativeDrawable; +#endif + +struct vlDisplay; +struct vlScreen; +struct vlContext; +struct vlSurface; -enum VL_FIELD_ORDER +enum vlProfile { - VL_FIELD_FIRST, - VL_FIELD_SECOND + vlProfileMpeg2Simple, + vlProfileMpeg2Main, + + vlProfileCount }; -enum VL_DCT_TYPE +enum vlEntryPoint { - VL_DCT_FIELD_CODED, - VL_DCT_FRAME_CODED + vlEntryPointIDCT, + vlEntryPointMC, + vlEntryPointCSC, + + vlEntryPointCount }; -enum VL_SAMPLE_TYPE +enum vlFormat { - VL_FULL_SAMPLE, - VL_DIFFERENCE_SAMPLE + vlFormatYCbCr420, + vlFormatYCbCr422, + vlFormatYCbCr444 }; -enum VL_MC_TYPE +enum vlPictureType { - VL_FIELD_MC, - VL_FRAME_MC, - VL_DUAL_PRIME_MC, - VL_16x8_MC = VL_FRAME_MC + vlPictureTypeTopField, + vlPictureTypeBottomField, + vlPictureTypeFrame }; -struct VL_VERTEX4F +enum vlMotionType { - float x, y, z, w; + vlMotionTypeField, + vlMotionTypeFrame, + vlMotionTypeDualPrime, + vlMotionType16x8 }; -struct VL_VERTEX2F +enum vlFieldOrder { - float x, y; + vlFieldOrderFirst, + vlFieldOrderSecond }; -struct VL_TEXCOORD2F +enum vlDCTType { - float s, t; + vlDCTTypeFrameCoded, + vlDCTTypeFieldCoded }; -struct VL_MC_VS_CONSTS +struct vlVertex2f { - struct VL_VERTEX4F scale; - struct VL_VERTEX4F mb_pos_trans; - struct VL_VERTEX4F denorm; - struct - { - struct VL_VERTEX4F top_field; - struct VL_VERTEX4F bottom_field; - } mb_tc_trans[2]; + float x, y; }; -struct VL_MC_FS_CONSTS +struct vlVertex4f { - struct VL_VERTEX4F multiplier; - struct VL_VERTEX4F bias; - struct VL_VERTEX4F y_divider; + float x, y, z, w; }; -struct VL_CSC_VS_CONSTS +enum vlMacroBlockType { - struct VL_VERTEX4F src_scale; - struct VL_VERTEX4F src_trans; + vlMacroBlockTypeIntra, + vlMacroBlockTypeFwdPredicted, + vlMacroBlockTypeBkwdPredicted, + vlMacroBlockTypeBiPredicted }; -struct VL_CSC_FS_CONSTS +struct vlMpeg2MacroBlock { - struct VL_VERTEX4F bias; - float matrix[16]; + unsigned int mbx, mby; + enum vlMacroBlockType mb_type; + enum vlMotionType mo_type; + enum vlDCTType dct_type; + int PMV[2][2][2]; + unsigned int cbp; + short *blocks; }; -struct VL_MOTION_VECTOR +struct vlMpeg2MacroBlockBatch { - struct - { - int x, y; - } top_field, bottom_field; + struct vlSurface *past_surface; + struct vlSurface *future_surface; + enum vlPictureType picture_type; + enum vlFieldOrder field_order; + unsigned int num_macroblocks; + struct vlMpeg2MacroBlock *macroblocks; }; -struct VL_CONTEXT; -struct VL_SURFACE; - #endif - diff --git a/src/gallium/state_trackers/g3dvl/vl_util.c b/src/gallium/state_trackers/g3dvl/vl_util.c index 2421ae2210..50aa9af66f 100644 --- a/src/gallium/state_trackers/g3dvl/vl_util.c +++ b/src/gallium/state_trackers/g3dvl/vl_util.c @@ -4,14 +4,13 @@ unsigned int vlRoundUpPOT(unsigned int x) { unsigned int i; - + assert(x > 0); - + --x; - + for (i = 1; i < sizeof(unsigned int) * 8; i <<= 1) x |= x >> i; - + return x + 1; } - diff --git a/src/gallium/state_trackers/g3dvl/vl_util.h b/src/gallium/state_trackers/g3dvl/vl_util.h index e4b72c4f87..bc98e79df4 100644 --- a/src/gallium/state_trackers/g3dvl/vl_util.h +++ b/src/gallium/state_trackers/g3dvl/vl_util.h @@ -4,4 +4,3 @@ unsigned int vlRoundUpPOT(unsigned int x); #endif - diff --git a/src/libXvMC/block.c b/src/libXvMC/block.c index deca305bdc..328b035576 100644 --- a/src/libXvMC/block.c +++ b/src/libXvMC/block.c @@ -2,83 +2,78 @@ #include #include #include +#include +#include #include -/* - * XvMC defines 64 element blocks (8x8 elements). - * Elements are 8 bits when they represent color values, - * 9 bits when they reprecent DCT coefficients, we - * store them in 2 bytes in either case. - */ #define BLOCK_SIZE (64 * 2) Status XvMCCreateBlocks(Display *display, XvMCContext *context, unsigned int num_blocks, XvMCBlockArray *blocks) { - struct vl_context *vl_ctx; - + struct vlContext *vl_ctx; + assert(display); - + if (!context) return XvMCBadContext; if (num_blocks == 0) return BadValue; - + assert(blocks); - + vl_ctx = context->privData; - assert(display == vl_ctx->display); + assert(display == vlGetNativeDisplay(vlGetDisplay(vlContextGetScreen(vl_ctx)))); blocks->context_id = context->context_id; blocks->num_blocks = num_blocks; blocks->blocks = malloc(BLOCK_SIZE * num_blocks); /* Since we don't have a VL type for blocks, set privData to the display so we can catch mismatches */ blocks->privData = display; - + return Success; } Status XvMCDestroyBlocks(Display *display, XvMCBlockArray *blocks) -{ +{ assert(display); assert(blocks); assert(display == blocks->privData); free(blocks->blocks); - + return Success; } Status XvMCCreateMacroBlocks(Display *display, XvMCContext *context, unsigned int num_blocks, XvMCMacroBlockArray *blocks) { - struct vl_context *vl_ctx; - + struct vlContext *vl_ctx; + assert(display); - + if (!context) return XvMCBadContext; if (num_blocks == 0) return BadValue; - + assert(blocks); - + vl_ctx = context->privData; - assert(display == vl_ctx->display); - + assert(display == vlGetNativeDisplay(vlGetDisplay(vlContextGetScreen(vl_ctx)))); + blocks->context_id = context->context_id; blocks->num_blocks = num_blocks; blocks->macro_blocks = malloc(sizeof(XvMCMacroBlock) * num_blocks); /* Since we don't have a VL type for blocks, set privData to the display so we can catch mismatches */ blocks->privData = display; - + return Success; } Status XvMCDestroyMacroBlocks(Display *display, XvMCMacroBlockArray *blocks) -{ +{ assert(display); assert(blocks); assert(display == blocks->privData); free(blocks->macro_blocks); - + return Success; } - diff --git a/src/libXvMC/context.c b/src/libXvMC/context.c index 9cf654d6bb..760e012d1a 100644 --- a/src/libXvMC/context.c +++ b/src/libXvMC/context.c @@ -1,10 +1,23 @@ #include #include #include +#include +#include +#include #include #include -static Status Validate(Display *display, XvPortID port, int surface_type_id, unsigned int width, unsigned int height, int flags, int *chroma_format) +static Status Validate +( + Display *display, + XvPortID port, + int surface_type_id, + unsigned int width, + unsigned int height, + int flags, + int *chroma_format, + int *mc_type +) { unsigned int found_port = 0; unsigned int found_surface = 0; @@ -14,13 +27,13 @@ static Status Validate(Display *display, XvPortID port, int surface_type_id, uns unsigned int max_width, max_height; Status ret; unsigned int i, j, k; - + assert(display && chroma_format); - + ret = XvQueryAdaptors(display, XDefaultRootWindow(display), &num_adaptors, &adaptor_info); if (ret != Success) return ret; - + /* Scan through all adaptors looking for this port and surface */ for (i = 0; i < num_adaptors && !found_port; ++i) { @@ -31,10 +44,10 @@ static Status Validate(Display *display, XvPortID port, int surface_type_id, uns if (adaptor_info[i].base_id + j == port) { XvMCSurfaceInfo *surface_info; - + found_port = 1; surface_info = XvMCListSurfaceTypes(display, adaptor_info[i].base_id, &num_types); - + if (surface_info) { for (k = 0; k < num_types && !found_surface; ++k) @@ -45,9 +58,10 @@ static Status Validate(Display *display, XvPortID port, int surface_type_id, uns max_width = surface_info[k].max_width; max_height = surface_info[k].max_height; *chroma_format = surface_info[k].chroma_format; + *mc_type = surface_info[k].mc_type; } } - + XFree(surface_info); } else @@ -58,9 +72,9 @@ static Status Validate(Display *display, XvPortID port, int surface_type_id, uns } } } - + XvFreeAdaptorInfo(adaptor_info); - + if (!found_port) return XvBadPort; if (!found_surface) @@ -69,60 +83,86 @@ static Status Validate(Display *display, XvPortID port, int surface_type_id, uns return BadValue; if (flags != XVMC_DIRECT && flags != 0) return BadValue; - + return Success; } -static enum VL_FORMAT FormatToVL(int xvmc_format) +static enum vlProfile ProfileToVL(int xvmc_profile) +{ + if (xvmc_profile & XVMC_MPEG_1) + assert(0); + else if (xvmc_profile & XVMC_MPEG_2) + return vlProfileMpeg2Main; + else if (xvmc_profile & XVMC_H263) + assert(0); + else if (xvmc_profile & XVMC_MPEG_4) + assert(0); + else + assert(0); + + return -1; +} + +static enum vlEntryPoint EntryToVL(int xvmc_entry) +{ + return xvmc_entry & XVMC_IDCT ? vlEntryPointIDCT : vlEntryPointMC; +} + +static enum vlFormat FormatToVL(int xvmc_format) { - enum VL_FORMAT vl_format; - switch (xvmc_format) { case XVMC_CHROMA_FORMAT_420: - { - vl_format = VL_FORMAT_YCBCR_420; - break; - } + return vlFormatYCbCr420; case XVMC_CHROMA_FORMAT_422: - { - vl_format = VL_FORMAT_YCBCR_422; - break; - } + return vlFormatYCbCr422; case XVMC_CHROMA_FORMAT_444: - { - vl_format = VL_FORMAT_YCBCR_444; - break; - } + return vlFormatYCbCr444; default: assert(0); } - - return vl_format; + + return -1; } Status XvMCCreateContext(Display *display, XvPortID port, int surface_type_id, int width, int height, int flags, XvMCContext *context) { int chroma_format; + int mc_type; Status ret; - struct VL_CONTEXT *vl_ctx; + struct vlDisplay *vl_dpy; + struct vlScreen *vl_scrn; + struct vlContext *vl_ctx; struct pipe_context *pipe; - + assert(display); - + if (!context) return XvMCBadContext; - - ret = Validate(display, port, surface_type_id, width, height, flags, &chroma_format); + + ret = Validate(display, port, surface_type_id, width, height, flags, &chroma_format, &mc_type); if (ret != Success) return ret; - + + /* XXX: Assumes default screen, should check which screen port is on */ pipe = create_pipe_context(display, XDefaultScreen(display)); - + assert(pipe); - - vlCreateContext(display, pipe, width, height, FormatToVL(chroma_format), &vl_ctx); - + + vlCreateDisplay(display, &vl_dpy); + vlCreateScreen(vl_dpy, XDefaultScreen(display), pipe->screen, &vl_scrn); + vlCreateContext + ( + vl_scrn, + pipe, + width, + height, + FormatToVL(chroma_format), + ProfileToVL(mc_type), + EntryToVL(mc_type), + &vl_ctx + ); + context->context_id = XAllocID(display); context->surface_type_id = surface_type_id; context->width = width; @@ -130,89 +170,27 @@ Status XvMCCreateContext(Display *display, XvPortID port, int surface_type_id, i context->flags = flags; context->port = port; context->privData = vl_ctx; - + return Success; } Status XvMCDestroyContext(Display *display, XvMCContext *context) { - struct VL_CONTEXT *vl_ctx; + struct vlContext *vl_ctx; struct pipe_context *pipe; - + assert(display); - + if (!context) return XvMCBadContext; - + vl_ctx = context->privData; - - assert(display == vl_ctx->display); - - pipe = vl_ctx->pipe; - vlDestroyContext(vl_ctx); - destroy_pipe_context(pipe); - - return Success; -} -/* XXX: The following are here temporarily, need to be implemented in the DDX driver */ -/* TODO: Figure out which of these need to be in DDX, which are better off in DDX, which can stay */ + assert(display == vlGetNativeDisplay(vlGetDisplay(vlContextGetScreen(vl_ctx)))); -Bool XvMCQueryExtension(Display *display, int *event_base, int *err_base) -{ - *event_base = 0; - *err_base = 0; - - return True; -} + pipe = vlGetPipeContext(vl_ctx); + vlDestroyContext(vl_ctx); + destroy_pipe_context(pipe); -Status XvMCQueryVersion(Display *display, int *major, int *minor) -{ - *major = 1; - *minor = 0; - return Success; } - -XvMCSurfaceInfo* XvMCListSurfaceTypes(Display *display, XvPortID port, int *num) -{ - XvMCSurfaceInfo *surface_info = calloc(1, sizeof(XvMCSurfaceInfo)); - - *num = 1; - - surface_info->chroma_format = XVMC_CHROMA_FORMAT_420; - surface_info->max_width = 2048; - surface_info->max_height = 2048; - surface_info->subpicture_max_width = 2048; - surface_info->subpicture_max_height = 2048; - surface_info->mc_type = XVMC_IDCT | XVMC_MPEG_2; - surface_info->surface_type_id = 123; /* FIXME: XAllocID(display)*/; - surface_info->flags = XVMC_INTRA_UNSIGNED | XVMC_SUBPICTURE_INDEPENDENT_SCALING | XVMC_BACKEND_SUBPICTURE; - - return surface_info; -} - -XvImageFormatValues* XvMCListSubpictureTypes(Display* display, XvPortID port, int surface_type_id, int *count_return) -{ - XvImageFormatValues *image_formats = calloc(1, sizeof(XvImageFormatValues)); - - *count_return = 1; - - image_formats[0].id = 123; - image_formats[0].type = XvRGB; - image_formats[0].byte_order = LSBFirst; - image_formats[0].bits_per_pixel = 8; - image_formats[0].format = XvPacked; - image_formats[0].num_planes = 1; - image_formats[0].depth = 8; - image_formats[0].red_mask = 0x0000FF; - image_formats[0].green_mask = 0x00FF00; - image_formats[0].blue_mask = 0xFF0000; - image_formats[0].component_order[0] = 'R'; - image_formats[0].component_order[0] = 'G'; - image_formats[0].component_order[0] = 'B'; - image_formats[0].scanline_order = XvTopToBottom; - - return image_formats; -} - diff --git a/src/libXvMC/surface.c b/src/libXvMC/surface.c index 1c07220e84..038befc297 100644 --- a/src/libXvMC/surface.c +++ b/src/libXvMC/surface.c @@ -1,90 +1,96 @@ #include #include #include +#include +#include #include #include +#include -static enum VL_PICTURE PictureToVL(int xvmc_pic) +static enum vlMacroBlockType TypeToVL(int xvmc_mb_type) +{ + if (xvmc_mb_type & XVMC_MB_TYPE_INTRA) + return vlMacroBlockTypeIntra; + if ((xvmc_mb_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) == XVMC_MB_TYPE_MOTION_FORWARD) + return vlMacroBlockTypeFwdPredicted; + if ((xvmc_mb_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) == XVMC_MB_TYPE_MOTION_BACKWARD) + return vlMacroBlockTypeBkwdPredicted; + if ((xvmc_mb_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) == (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) + return vlMacroBlockTypeBiPredicted; + + assert(0); + + return -1; +} + +static enum vlPictureType PictureToVL(int xvmc_pic) { - enum VL_PICTURE vl_pic; - switch (xvmc_pic) { case XVMC_TOP_FIELD: - { - vl_pic = VL_TOP_FIELD; - break; - } + return vlPictureTypeTopField; case XVMC_BOTTOM_FIELD: - { - vl_pic = VL_BOTTOM_FIELD; - break; - } + return vlPictureTypeBottomField; case XVMC_FRAME_PICTURE: - { - vl_pic = VL_FRAME_PICTURE; - break; - } + return vlPictureTypeFrame; default: assert(0); } - - return vl_pic; + + return -1; } -static enum VL_MC_TYPE MotionToVL(int xvmc_motion_type) +static enum vlMotionType MotionToVL(int xvmc_motion_type) { - enum VL_MC_TYPE vl_mc_type; - switch (xvmc_motion_type) { case XVMC_PREDICTION_FRAME: - { - vl_mc_type = VL_FRAME_MC; - break; - } + return vlMotionTypeFrame; case XVMC_PREDICTION_FIELD: - { - vl_mc_type = VL_FIELD_MC; - break; - } + return vlMotionTypeField; case XVMC_PREDICTION_DUAL_PRIME: - { - vl_mc_type = VL_DUAL_PRIME_MC; - break; - } + return vlMotionTypeDualPrime; default: assert(0); } - - return vl_mc_type; + + return -1; } Status XvMCCreateSurface(Display *display, XvMCContext *context, XvMCSurface *surface) { - struct VL_CONTEXT *vl_ctx; - struct VL_SURFACE *vl_sfc; - + struct vlContext *vl_ctx; + struct vlSurface *vl_sfc; + assert(display); - + if (!context) return XvMCBadContext; if (!surface) return XvMCBadSurface; - + vl_ctx = context->privData; - - assert(display == vl_ctx->display); - - vlCreateSurface(vl_ctx, &vl_sfc); - + + assert(display == vlGetNativeDisplay(vlGetDisplay(vlContextGetScreen(vl_ctx)))); + + vlCreateSurface + ( + vlContextGetScreen(vl_ctx), + context->width, + context->height, + vlGetPictureFormat(vl_ctx), + &vl_sfc + ); + + vlBindToContext(vl_sfc, vl_ctx); + surface->surface_id = XAllocID(display); surface->context_id = context->context_id; surface->surface_type_id = context->surface_type_id; surface->width = context->width; surface->height = context->height; surface->privData = vl_sfc; - + return Success; } @@ -103,19 +109,21 @@ Status XvMCRenderSurface XvMCBlockArray *blocks ) { - struct VL_CONTEXT *vl_ctx; - struct VL_SURFACE *target_vl_surface; - struct VL_SURFACE *past_vl_surface; - struct VL_SURFACE *future_vl_surface; - unsigned int i; - + struct vlContext *vl_ctx; + struct vlSurface *target_vl_surface; + struct vlSurface *past_vl_surface; + struct vlSurface *future_vl_surface; + struct vlMpeg2MacroBlockBatch batch; + struct vlMpeg2MacroBlock vl_macroblocks[num_macroblocks]; + unsigned int i; + assert(display); - + if (!context) return XvMCBadContext; if (!target_surface) return XvMCBadSurface; - + if ( picture_structure != XVMC_TOP_FIELD && @@ -125,178 +133,94 @@ Status XvMCRenderSurface return BadValue; if (future_surface && !past_surface) return BadMatch; - + vl_ctx = context->privData; - - assert(display == vl_ctx->display); - + + assert(display == vlGetNativeDisplay(vlGetDisplay(vlContextGetScreen(vl_ctx)))); + target_vl_surface = target_surface->privData; past_vl_surface = past_surface ? past_surface->privData : NULL; future_vl_surface = future_surface ? future_surface->privData : NULL; - - assert(vl_ctx == target_vl_surface->context); - assert(!past_vl_surface || vl_ctx == past_vl_surface->context); - assert(!future_vl_surface || vl_ctx == future_vl_surface->context); - + + assert(context->context_id == target_surface->context_id); + assert(!past_surface || context->context_id == past_surface->context_id); + assert(!future_surface || context->context_id == future_surface->context_id); + assert(macroblocks); assert(blocks); - + assert(macroblocks->context_id == context->context_id); assert(blocks->context_id == context->context_id); - + assert(flags == 0 || flags == XVMC_SECOND_FIELD); - - /* TODO: Batch macroblocks by type (I,P,B) */ - - for (i = first_macroblock; i < first_macroblock + num_macroblocks; ++i) - if (macroblocks->macro_blocks[i].macroblock_type & XVMC_MB_TYPE_INTRA) - vlRenderIMacroBlock - ( - PictureToVL(picture_structure), - flags == XVMC_SECOND_FIELD ? VL_FIELD_SECOND : VL_FIELD_FIRST, - macroblocks->macro_blocks[i].x, - macroblocks->macro_blocks[i].y, - macroblocks->macro_blocks[i].coded_block_pattern, - macroblocks->macro_blocks[i].dct_type == XVMC_DCT_TYPE_FIELD ? VL_DCT_FIELD_CODED : VL_DCT_FRAME_CODED, - blocks->blocks + (macroblocks->macro_blocks[i].index * 64), - target_vl_surface - ); - else if - ( - (macroblocks->macro_blocks[i].macroblock_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) - == XVMC_MB_TYPE_MOTION_FORWARD - ) - { - struct VL_MOTION_VECTOR motion_vector = - { - { - macroblocks->macro_blocks[i].PMV[0][0][0], - macroblocks->macro_blocks[i].PMV[0][0][1], - }, - { - macroblocks->macro_blocks[i].PMV[1][0][0], - macroblocks->macro_blocks[i].PMV[1][0][1], - } - }; - - vlRenderPMacroBlock - ( - PictureToVL(picture_structure), - flags == XVMC_SECOND_FIELD ? VL_FIELD_SECOND : VL_FIELD_FIRST, - macroblocks->macro_blocks[i].x, - macroblocks->macro_blocks[i].y, - MotionToVL(macroblocks->macro_blocks[i].motion_type), - &motion_vector, - macroblocks->macro_blocks[i].coded_block_pattern, - macroblocks->macro_blocks[i].dct_type == XVMC_DCT_TYPE_FIELD ? VL_DCT_FIELD_CODED : VL_DCT_FRAME_CODED, - blocks->blocks + (macroblocks->macro_blocks[i].index * 64), - past_vl_surface, - target_vl_surface - ); - } - else if - ( - (macroblocks->macro_blocks[i].macroblock_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) - == XVMC_MB_TYPE_MOTION_BACKWARD - ) - { - struct VL_MOTION_VECTOR motion_vector = - { - { - macroblocks->macro_blocks[i].PMV[0][1][0], - macroblocks->macro_blocks[i].PMV[0][1][1], - }, - { - macroblocks->macro_blocks[i].PMV[1][1][0], - macroblocks->macro_blocks[i].PMV[1][1][1], - } - }; - - vlRenderPMacroBlock - ( - PictureToVL(picture_structure), - flags == XVMC_SECOND_FIELD ? VL_FIELD_SECOND : VL_FIELD_FIRST, - macroblocks->macro_blocks[i].x, - macroblocks->macro_blocks[i].y, - MotionToVL(macroblocks->macro_blocks[i].motion_type), - &motion_vector, - macroblocks->macro_blocks[i].coded_block_pattern, - macroblocks->macro_blocks[i].dct_type == XVMC_DCT_TYPE_FIELD ? VL_DCT_FIELD_CODED : VL_DCT_FRAME_CODED, - blocks->blocks + (macroblocks->macro_blocks[i].index * 64), - future_vl_surface, - target_vl_surface - ); - } - else if - ( - (macroblocks->macro_blocks[i].macroblock_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) - == (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD) - ) - { - struct VL_MOTION_VECTOR motion_vector[2] = - { - { - { - macroblocks->macro_blocks[i].PMV[0][0][0], - macroblocks->macro_blocks[i].PMV[0][0][1], - }, - { - macroblocks->macro_blocks[i].PMV[1][0][0], - macroblocks->macro_blocks[i].PMV[1][0][1], - } - }, - { - { - macroblocks->macro_blocks[i].PMV[0][1][0], - macroblocks->macro_blocks[i].PMV[0][1][1], - }, - { - macroblocks->macro_blocks[i].PMV[1][1][0], - macroblocks->macro_blocks[i].PMV[1][1][1], - } - } - }; - - vlRenderBMacroBlock - ( - PictureToVL(picture_structure), - flags == XVMC_SECOND_FIELD ? VL_FIELD_SECOND : VL_FIELD_FIRST, - macroblocks->macro_blocks[i].x, - macroblocks->macro_blocks[i].y, - MotionToVL(macroblocks->macro_blocks[i].motion_type), - motion_vector, - macroblocks->macro_blocks[i].coded_block_pattern, - macroblocks->macro_blocks[i].dct_type == XVMC_DCT_TYPE_FIELD ? VL_DCT_FIELD_CODED : VL_DCT_FRAME_CODED, - blocks->blocks + (macroblocks->macro_blocks[i].index * 64), - past_vl_surface, - future_vl_surface, - target_vl_surface - ); - } - else - fprintf(stderr, "Unrecognized macroblock\n"); - + + batch.past_surface = past_vl_surface; + batch.future_surface = future_vl_surface; + batch.picture_type = PictureToVL(picture_structure); + batch.field_order = flags & XVMC_SECOND_FIELD ? vlFieldOrderSecond : vlFieldOrderFirst; + batch.num_macroblocks = num_macroblocks; + batch.macroblocks = vl_macroblocks; + + for (i = 0; i < num_macroblocks; ++i) + { + unsigned int j = first_macroblock + i; + + unsigned int k, l, m; + + batch.macroblocks[i].mbx = macroblocks->macro_blocks[j].x; + batch.macroblocks[i].mby = macroblocks->macro_blocks[j].y; + batch.macroblocks[i].mb_type = TypeToVL(macroblocks->macro_blocks[j].macroblock_type); + if (batch.macroblocks[i].mb_type != vlMacroBlockTypeIntra) + batch.macroblocks[i].mo_type = MotionToVL(macroblocks->macro_blocks[j].motion_type); + batch.macroblocks[i].dct_type = macroblocks->macro_blocks[j].dct_type & XVMC_DCT_TYPE_FIELD ? vlDCTTypeFieldCoded : vlDCTTypeFrameCoded; + + for (k = 0; k < 2; ++k) + for (l = 0; l < 2; ++l) + for (m = 0; m < 2; ++m) + batch.macroblocks[i].PMV[k][l][m] = macroblocks->macro_blocks[j].PMV[k][l][m]; + + batch.macroblocks[i].cbp = macroblocks->macro_blocks[j].coded_block_pattern; + batch.macroblocks[i].blocks = blocks->blocks + (macroblocks->macro_blocks[j].index * 64); + } + + vlRenderMacroBlocksMpeg2(&batch, target_vl_surface); + return Success; } Status XvMCFlushSurface(Display *display, XvMCSurface *surface) { + struct vlSurface *vl_sfc; + assert(display); - + if (!surface) return XvMCBadSurface; - - /* TODO: Check display & surface match */ + + vl_sfc = surface->privData; + + assert(display == vlGetNativeDisplay(vlGetDisplay(vlSurfaceGetScreen(vl_sfc)))); + + /* TODO */ + return Success; } Status XvMCSyncSurface(Display *display, XvMCSurface *surface) { + struct vlSurface *vl_sfc; + assert(display); - + if (!surface) return XvMCBadSurface; - + + vl_sfc = surface->privData; + + assert(display == vlGetNativeDisplay(vlGetDisplay(vlSurfaceGetScreen(vl_sfc)))); + + /* TODO */ + return Success; } @@ -321,92 +245,85 @@ Status XvMCPutSurface unsigned int width, height; unsigned int border_width; unsigned int depth; - struct VL_SURFACE *vl_sfc; - + struct vlSurface *vl_sfc; + assert(display); - + if (!surface) return XvMCBadSurface; - + if (XGetGeometry(display, drawable, &root, &x, &y, &width, &height, &border_width, &depth) == BadDrawable) return BadDrawable; - + assert(flags == XVMC_TOP_FIELD || flags == XVMC_BOTTOM_FIELD || flags == XVMC_FRAME_PICTURE); - + /* TODO: Correct for negative srcx,srcy & destx,desty by clipping */ - + assert(srcx + srcw - 1 < surface->width); assert(srcy + srch - 1 < surface->height); assert(destx + destw - 1 < width); assert(desty + desth - 1 < height); - + vl_sfc = surface->privData; - - vlPutSurface(vl_sfc, drawable, srcx, srcy, srcw, srch, destx, desty, destw, desth, PictureToVL(flags)); - + + vlPutPicture(vl_sfc, drawable, srcx, srcy, srcw, srch, destx, desty, destw, desth, PictureToVL(flags)); + return Success; } Status XvMCGetSurfaceStatus(Display *display, XvMCSurface *surface, int *status) { - struct VL_CONTEXT *vl_ctx; - struct VL_SURFACE *vl_sfc; - + struct vlSurface *vl_sfc; + assert(display); - + if (!surface) return XvMCBadSurface; - + assert(status); - + vl_sfc = surface->privData; - vl_ctx = vl_sfc->context; - - assert(display == vl_ctx->display); - + + assert(display == vlGetNativeDisplay(vlGetDisplay(vlSurfaceGetScreen(vl_sfc)))); + /* TODO */ *status = 0; - + return Success; } Status XvMCDestroySurface(Display *display, XvMCSurface *surface) { - struct VL_CONTEXT *vl_ctx; - struct VL_SURFACE *vl_sfc; - + struct vlSurface *vl_sfc; + assert(display); - + if (!surface) return XvMCBadSurface; - + vl_sfc = surface->privData; - vl_ctx = vl_sfc->context; - - assert(display == vl_ctx->display); - + + assert(display == vlGetNativeDisplay(vlGetDisplay(vlSurfaceGetScreen(vl_sfc)))); + vlDestroySurface(vl_sfc); - + return Success; } Status XvMCHideSurface(Display *display, XvMCSurface *surface) { - struct VL_CONTEXT *vl_ctx; - struct VL_SURFACE *vl_sfc; - + struct vlSurface *vl_sfc; + assert(display); - + if (!surface) return XvMCBadSurface; - + vl_sfc = surface->privData; - vl_ctx = vl_sfc->context; - - assert(display == vl_ctx->display); - + + assert(display == vlGetNativeDisplay(vlGetDisplay(vlSurfaceGetScreen(vl_sfc)))); + /* No op, only for overlaid rendering */ - + return Success; } - -- cgit v1.2.3 From 7f100d04ddacf9f6517c9aff1e2de5257eb77fb0 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Mon, 18 Aug 2008 00:04:29 -0400 Subject: g3dvl: Use rotating buffers to avoid waiting for map(). --- src/driclient/src/Makefile | 2 +- src/gallium/state_trackers/g3dvl/Makefile | 4 +- src/gallium/state_trackers/g3dvl/vl_data.c | 76 ----------------------- src/gallium/state_trackers/g3dvl/vl_data.h | 27 -------- src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c | 46 +++++++++----- src/gallium/winsys/g3dvl/nouveau/Makefile | 2 +- src/libXvMC/Makefile | 2 +- 7 files changed, 37 insertions(+), 122 deletions(-) delete mode 100644 src/gallium/state_trackers/g3dvl/vl_data.c delete mode 100644 src/gallium/state_trackers/g3dvl/vl_data.h (limited to 'src/gallium/state_trackers/g3dvl/Makefile') diff --git a/src/driclient/src/Makefile b/src/driclient/src/Makefile index 89ffb84477..5d913ad5fa 100644 --- a/src/driclient/src/Makefile +++ b/src/driclient/src/Makefile @@ -2,7 +2,7 @@ TARGET = libdriclient.a OBJECTS = driclient.o XF86dri.o DRMDIR ?= /usr -CFLAGS += -g -Wall -fPIC -Werror -I../include -I${DRMDIR}/include -I${DRMDIR}/include/drm +CFLAGS += -g -Wall -fPIC -I../include -I${DRMDIR}/include -I${DRMDIR}/include/drm ############################################# diff --git a/src/gallium/state_trackers/g3dvl/Makefile b/src/gallium/state_trackers/g3dvl/Makefile index 9995c554ab..bd77c62bc5 100644 --- a/src/gallium/state_trackers/g3dvl/Makefile +++ b/src/gallium/state_trackers/g3dvl/Makefile @@ -1,9 +1,9 @@ TARGET = libg3dvl.a -OBJECTS = vl_display.o vl_screen.o vl_context.o vl_surface.o vl_data.o vl_shader_build.o vl_util.o vl_basic_csc.o \ +OBJECTS = vl_display.o vl_screen.o vl_context.o vl_surface.o vl_shader_build.o vl_util.o vl_basic_csc.o \ vl_r16snorm_mc.o GALLIUMDIR = ../.. -CFLAGS += -g -Wall -fPIC -Werror -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary -I${GALLIUMDIR}/winsys/g3dvl +CFLAGS += -g -Wall -fPIC -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary -I${GALLIUMDIR}/winsys/g3dvl ############################################# diff --git a/src/gallium/state_trackers/g3dvl/vl_data.c b/src/gallium/state_trackers/g3dvl/vl_data.c deleted file mode 100644 index f2476dbf1e..0000000000 --- a/src/gallium/state_trackers/g3dvl/vl_data.c +++ /dev/null @@ -1,76 +0,0 @@ -#include "vl_data.h" - -/* - * Represents 8 triangles (4 quads, 1 per block) in noormalized coords - * that render a macroblock. - * Need to be scaled to cover mbW*mbH macroblock pixels and translated into - * position on target surface. - */ -const struct vlVertex2f macroblock_verts[24] = -{ - {0.0f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.0f}, - {0.5f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.5f}, - - {0.5f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.0f}, - {1.0f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.5f}, - - {0.0f, 0.5f}, {0.0f, 1.0f}, {0.5f, 0.5f}, - {0.5f, 0.5f}, {0.0f, 1.0f}, {0.5f, 1.0f}, - - {0.5f, 0.5f}, {0.5f, 1.0f}, {1.0f, 0.5f}, - {1.0f, 0.5f}, {0.5f, 1.0f}, {1.0f, 1.0f} -}; - -/* - * Represents texcoords for the above for rendering 4 luma blocks arranged - * in a bW*(bH*4) texture. First luma block located at 0,0->bW,bH; second at - * 0,bH->bW,2bH; third at 0,2bH->bW,3bH; fourth at 0,3bH->bW,4bH. - */ -const struct vlVertex2f macroblock_luma_texcoords[24] = -{ - {0.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.0f}, - {1.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.25f}, - - {0.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.25f}, - {1.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.5f}, - - {0.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.5f}, - {1.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.75f}, - - {0.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 0.75f}, - {1.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 1.0f} -}; - -/* - * Represents texcoords for the above for rendering 1 chroma block. - * Straight forward 0,0->1,1 mapping so we can reuse the MB pos vectors. - */ -const struct vlVertex2f *macroblock_chroma_420_texcoords = macroblock_verts; - -/* - * Represents texcoords for the above for rendering 2 chroma blocks arranged - * in a bW*(bH*2) texture. First chroma block located at 0,0->bW,bH; second at - * 0,bH->bW,2bH. We can render this with 0,0->1,1 mapping. - * Straight forward 0,0->1,1 mapping so we can reuse MB pos vectors. - */ -const struct vlVertex2f *macroblock_chroma_422_texcoords = macroblock_verts; - -/* - * Represents texcoords for the above for rendering 4 chroma blocks. - * Same case as 4 luma blocks. - */ -const struct vlVertex2f *macroblock_chroma_444_texcoords = macroblock_luma_texcoords; - -/* - * Used when rendering P and B macroblocks, multiplier is applied to the A channel, - * which is then added to the L channel, then the bias is subtracted from that to - * get back the differential. The differential is then added to the samples from the - * reference surface(s). - */ -#if 0 -const struct VL_MC_FS_CONSTS vl_mc_fs_consts = -{ - {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f}, - {0.5f, 2.0f, 0.0f, 0.0f} -}; -#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_data.h b/src/gallium/state_trackers/g3dvl/vl_data.h deleted file mode 100644 index f0de2e976c..0000000000 --- a/src/gallium/state_trackers/g3dvl/vl_data.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef vl_data_h -#define vl_data_h - -#include "vl_types.h" - -/* TODO: Needs to be rolled into the appropriate stage */ - -extern const struct vlVertex2f macroblock_verts[24]; -extern const struct vlVertex2f macroblock_luma_texcoords[24]; -extern const struct vlVertex2f *macroblock_chroma_420_texcoords; -extern const struct vlVertex2f *macroblock_chroma_422_texcoords; -extern const struct vlVertex2f *macroblock_chroma_444_texcoords; - -extern const struct vlVertex2f surface_verts[4]; -extern const struct vlVertex2f *surface_texcoords; - -/* -extern const struct VL_MC_FS_CONSTS vl_mc_fs_consts; - -extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_identity; -extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_601; -extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_601_full; -extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_709; -extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_709_full; -*/ - -#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c index 4fae224431..80b09a6d1d 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c @@ -15,6 +15,8 @@ #include "vl_types.h" #include "vl_defs.h" +#define NUM_BUFS 4 /* Number of rotating buffers to use */ + struct vlVertexShaderConsts { /*struct vlVertex4f scale; @@ -41,12 +43,13 @@ struct vlR16SnormMC unsigned int video_width, video_height; enum vlFormat video_format; + unsigned int cur_buf; struct pipe_context *pipe; struct pipe_viewport_state viewport; struct pipe_framebuffer_state render_target; struct pipe_sampler_state *samplers[5]; - struct pipe_texture *textures[5]; + struct pipe_texture *textures[NUM_BUFS][5]; void *i_vs, *p_vs[2], *b_vs[2]; void *i_fs, *p_fs[2], *b_fs[2]; struct pipe_vertex_buffer vertex_bufs[3]; @@ -230,7 +233,7 @@ static int vlGrabBlocks tex_surface = mc->pipe->screen->get_tex_surface ( mc->pipe->screen, - mc->textures[0], + mc->textures[mc->cur_buf % NUM_BUFS][0], 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE ); @@ -276,7 +279,7 @@ static int vlGrabBlocks tex_surface = mc->pipe->screen->get_tex_surface ( mc->pipe->screen, - mc->textures[tb + 1], + mc->textures[mc->cur_buf % NUM_BUFS][tb + 1], 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE ); @@ -361,12 +364,14 @@ int vlRenderIMacroBlock 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE ); pipe->set_framebuffer_state(pipe, &mc->render_target); - pipe->set_sampler_textures(pipe, 3, mc->textures); + pipe->set_sampler_textures(pipe, 3, mc->textures[mc->cur_buf % NUM_BUFS]); pipe->bind_sampler_states(pipe, 3, (void**)mc->samplers); pipe->bind_vs_state(pipe, mc->i_vs); pipe->bind_fs_state(pipe, mc->i_fs); pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); + + mc->cur_buf++; return 0; } @@ -458,11 +463,13 @@ int vlRenderPMacroBlock ); pipe->set_framebuffer_state(pipe, &mc->render_target); - mc->textures[3] = ref_surface->texture; - pipe->set_sampler_textures(pipe, 4, mc->textures); + mc->textures[mc->cur_buf % NUM_BUFS][3] = ref_surface->texture; + pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUFS]); pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); + + mc->cur_buf++; return 0; } @@ -567,12 +574,14 @@ int vlRenderBMacroBlock ); pipe->set_framebuffer_state(pipe, &mc->render_target); - mc->textures[3] = past_surface->texture; - mc->textures[4] = future_surface->texture; - pipe->set_sampler_textures(pipe, 5, mc->textures); + mc->textures[mc->cur_buf % NUM_BUFS][3] = past_surface->texture; + mc->textures[mc->cur_buf % NUM_BUFS][4] = future_surface->texture; + pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUFS]); pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers); pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); + + mc->cur_buf++; return 0; } @@ -724,8 +733,12 @@ int vlDestroy pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs[i].buffer); /* Textures 3 & 4 are not created directly, no need to release them here */ - for (i = 0; i < 3; ++i) - pipe_texture_release(&mc->textures[i]); + for (i = 0; i < NUM_BUFS; ++i) + { + pipe_texture_release(&mc->textures[i][0]); + pipe_texture_release(&mc->textures[i][1]); + pipe_texture_release(&mc->textures[i][2]); + } pipe->delete_vs_state(pipe, mc->i_vs); pipe->delete_fs_state(pipe, mc->i_fs); @@ -2252,7 +2265,8 @@ static int vlInit template.compressed = 0; pf_get_block(template.format, &template.block); - mc->textures[0] = pipe->screen->texture_create(pipe->screen, &template); + for (i = 0; i < NUM_BUFS; ++i) + mc->textures[i][0] = pipe->screen->texture_create(pipe->screen, &template); if (mc->video_format == vlFormatYCbCr420) template.height[0] = 8; @@ -2263,8 +2277,11 @@ static int vlInit else assert(0); - mc->textures[1] = pipe->screen->texture_create(pipe->screen, &template); - mc->textures[2] = pipe->screen->texture_create(pipe->screen, &template); + for (i = 0; i < NUM_BUFS; ++i) + { + mc->textures[i][1] = pipe->screen->texture_create(pipe->screen, &template); + mc->textures[i][2] = pipe->screen->texture_create(pipe->screen, &template); + } /* textures[3] & textures[4] are assigned from vlSurfaces for P and B macroblocks at render time */ @@ -2306,6 +2323,7 @@ int vlCreateR16SNormMC mc->pipe = pipe; mc->video_width = video_width; mc->video_height = video_height; + mc->cur_buf = 0; vlInit(mc); diff --git a/src/gallium/winsys/g3dvl/nouveau/Makefile b/src/gallium/winsys/g3dvl/nouveau/Makefile index 2861bd7db4..7fa29d2f5f 100644 --- a/src/gallium/winsys/g3dvl/nouveau/Makefile +++ b/src/gallium/winsys/g3dvl/nouveau/Makefile @@ -9,7 +9,7 @@ OBJECTS = nouveau_bo.o nouveau_fence.o nouveau_swapbuffers.o nouveau_channel.o nouveau_pushbuf.o nouveau_resource.o nouveau_screen.o nv04_surface.o \ nv50_surface.o #nouveau_winsys_softpipe.o -CFLAGS += -g -Wall -Werror -fPIC \ +CFLAGS += -g -Wall -fPIC \ -I${GALLIUMDIR}/include \ -I${GALLIUMDIR}/winsys/g3dvl \ -I${DRMDIR}/include \ diff --git a/src/libXvMC/Makefile b/src/libXvMC/Makefile index c154e17dc3..b72bb16efb 100644 --- a/src/libXvMC/Makefile +++ b/src/libXvMC/Makefile @@ -8,7 +8,7 @@ ifeq (${DRIVER}, softpipe) OBJECTS += ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o endif -CFLAGS += -g -fPIC -Wall -Werror \ +CFLAGS += -g -fPIC -Wall \ -I${GALLIUMDIR}/state_trackers/g3dvl \ -I${GALLIUMDIR}/winsys/g3dvl \ -I${GALLIUMDIR}/include \ -- cgit v1.2.3 From 4d9d192672508eaa9b2a70f84e933f11108bf09f Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Thu, 28 Aug 2008 23:25:13 -0400 Subject: g3dvl: Buffer the entire frame before rendering. --- src/gallium/state_trackers/g3dvl/Makefile | 2 +- src/gallium/state_trackers/g3dvl/vl_context.c | 4 +- src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c | 45 +- src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h | 4 +- .../state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 2341 ++++++++++++++++++++ .../state_trackers/g3dvl/vl_r16snorm_mc_buf.h | 18 + src/gallium/state_trackers/g3dvl/vl_render.h | 5 + src/gallium/state_trackers/g3dvl/vl_surface.c | 2 + 8 files changed, 2400 insertions(+), 21 deletions(-) create mode 100644 src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c create mode 100644 src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.h (limited to 'src/gallium/state_trackers/g3dvl/Makefile') diff --git a/src/gallium/state_trackers/g3dvl/Makefile b/src/gallium/state_trackers/g3dvl/Makefile index bd77c62bc5..4f7a953484 100644 --- a/src/gallium/state_trackers/g3dvl/Makefile +++ b/src/gallium/state_trackers/g3dvl/Makefile @@ -1,6 +1,6 @@ TARGET = libg3dvl.a OBJECTS = vl_display.o vl_screen.o vl_context.o vl_surface.o vl_shader_build.o vl_util.o vl_basic_csc.o \ - vl_r16snorm_mc.o + vl_r16snorm_mc.o vl_r16snorm_mc_buf.o GALLIUMDIR = ../.. CFLAGS += -g -Wall -fPIC -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary -I${GALLIUMDIR}/winsys/g3dvl diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 56d360c05b..fe107e406d 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -6,6 +6,7 @@ #include #include "vl_render.h" #include "vl_r16snorm_mc.h" +#include "vl_r16snorm_mc_buf.h" #include "vl_csc.h" #include "vl_basic_csc.h" @@ -126,7 +127,8 @@ int vlCreateContext vlInitCommon(ctx); - vlCreateR16SNormMC(pipe, picture_width, picture_height, picture_format, &ctx->render); + /*vlCreateR16SNormMC(pipe, picture_width, picture_height, picture_format, &ctx->render);*/ + vlCreateR16SNormBufferedMC(pipe, picture_width, picture_height, picture_format, &ctx->render); vlCreateBasicCSC(pipe, &ctx->csc); *context = ctx; diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c index 80b09a6d1d..3272220ef8 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c @@ -57,7 +57,7 @@ struct vlR16SnormMC struct pipe_constant_buffer vs_const_buf, fs_const_buf; }; -int vlBegin +static int vlBegin ( struct vlRender *render ) @@ -312,7 +312,7 @@ static int vlGrabBlocks return 0; } -int vlRenderIMacroBlock +static int vlRenderIMacroBlock ( struct vlR16SnormMC *mc, enum vlPictureType picture_type, @@ -370,13 +370,13 @@ int vlRenderIMacroBlock pipe->bind_fs_state(pipe, mc->i_fs); pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); - + mc->cur_buf++; return 0; } -int vlRenderPMacroBlock +static int vlRenderPMacroBlock ( struct vlR16SnormMC *mc, enum vlPictureType picture_type, @@ -468,13 +468,13 @@ int vlRenderPMacroBlock pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); - + mc->cur_buf++; return 0; } -int vlRenderBMacroBlock +static int vlRenderBMacroBlock ( struct vlR16SnormMC *mc, enum vlPictureType picture_type, @@ -580,13 +580,13 @@ int vlRenderBMacroBlock pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers); pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); - + mc->cur_buf++; return 0; } -int vlRenderMacroBlocksMpeg2R16Snorm +static int vlRenderMacroBlocksMpeg2R16Snorm ( struct vlRender *render, struct vlMpeg2MacroBlockBatch *batch, @@ -702,7 +702,17 @@ int vlRenderMacroBlocksMpeg2R16Snorm return 0; } -int vlEnd +static int vlEnd +( + struct vlRender *render +) +{ + assert(render); + + return 0; +} + +static int vlFlush ( struct vlRender *render ) @@ -712,7 +722,7 @@ int vlEnd return 0; } -int vlDestroy +static int vlDestroy ( struct vlRender *render ) @@ -765,7 +775,7 @@ int vlDestroy * Need to be scaled to cover mbW*mbH macroblock pixels and translated into * position on target surface. */ -const struct vlVertex2f macroblock_verts[24] = +static const struct vlVertex2f macroblock_verts[24] = { {0.0f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.0f}, {0.5f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.5f}, @@ -785,7 +795,7 @@ const struct vlVertex2f macroblock_verts[24] = * in a bW*(bH*4) texture. First luma block located at 0,0->bW,bH; second at * 0,bH->bW,2bH; third at 0,2bH->bW,3bH; fourth at 0,3bH->bW,4bH. */ -const struct vlVertex2f macroblock_luma_texcoords[24] = +static const struct vlVertex2f macroblock_luma_texcoords[24] = { {0.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.0f}, {1.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.25f}, @@ -804,7 +814,7 @@ const struct vlVertex2f macroblock_luma_texcoords[24] = * Represents texcoords for the above for rendering 1 chroma block. * Straight forward 0,0->1,1 mapping so we can reuse the MB pos vectors. */ -const struct vlVertex2f *macroblock_chroma_420_texcoords = macroblock_verts; +static const struct vlVertex2f *macroblock_chroma_420_texcoords = macroblock_verts; /* * Represents texcoords for the above for rendering 2 chroma blocks arranged @@ -812,13 +822,13 @@ const struct vlVertex2f *macroblock_chroma_420_texcoords = macroblock_verts; * 0,bH->bW,2bH. We can render this with 0,0->1,1 mapping. * Straight forward 0,0->1,1 mapping so we can reuse MB pos vectors. */ -const struct vlVertex2f *macroblock_chroma_422_texcoords = macroblock_verts; +static const struct vlVertex2f *macroblock_chroma_422_texcoords = macroblock_verts; /* * Represents texcoords for the above for rendering 4 chroma blocks. * Same case as 4 luma blocks. */ -const struct vlVertex2f *macroblock_chroma_444_texcoords = macroblock_luma_texcoords; +static const struct vlVertex2f *macroblock_chroma_444_texcoords = macroblock_luma_texcoords; /* * Used when rendering P and B macroblocks, multiplier is applied to the A channel, @@ -826,7 +836,7 @@ const struct vlVertex2f *macroblock_chroma_444_texcoords = macroblock_luma_texco * get back the differential. The differential is then added to the samples from the * reference surface(s). */ -const struct vlFragmentShaderConsts fs_consts = +static const struct vlFragmentShaderConsts fs_consts = { {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f}, {0.5f, 2.0f, 0.0f, 0.0f} @@ -2093,7 +2103,7 @@ static int vlCreateFragmentShaderFieldBMB return 0; } -int vlCreateDataBufs +static int vlCreateDataBufs ( struct vlR16SnormMC *mc ) @@ -2319,6 +2329,7 @@ int vlCreateR16SNormMC mc->base.vlBegin = &vlBegin; mc->base.vlRenderMacroBlocksMpeg2 = &vlRenderMacroBlocksMpeg2R16Snorm; mc->base.vlEnd = &vlEnd; + mc->base.vlFlush = &vlFlush; mc->base.vlDestroy = &vlDestroy; mc->pipe = pipe; mc->video_width = video_width; diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h index a6eecf05b6..9842926bf7 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h @@ -1,5 +1,5 @@ -#ifndef vl_mc_h -#define vl_mc_h +#ifndef vl_r16snorm_mc_h +#define vl_r16snorm_mc_h #include "vl_types.h" diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c new file mode 100644 index 0000000000..fc383cb8f6 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -0,0 +1,2341 @@ +#define VL_INTERNAL +#include "vl_r16snorm_mc_buf.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "vl_render.h" +#include "vl_shader_build.h" +#include "vl_surface.h" +#include "vl_util.h" +#include "vl_types.h" +#include "vl_defs.h" + +#define NUM_BUF_SETS 4 /* Number of rotating buffer sets to use */ + +enum vlMacroBlockTypeEx +{ + vlMacroBlockExTypeIntra, + vlMacroBlockExTypeFwdPredictedFrame, + vlMacroBlockExTypeFwdPredictedField, + vlMacroBlockExTypeBkwdPredictedFrame, + vlMacroBlockExTypeBkwdPredictedField, + vlMacroBlockExTypeBiPredictedFrame, + vlMacroBlockExTypeBiPredictedField, + + vlNumMacroBlockExTypes +}; + +struct vlVertexShaderConsts +{ + struct vlVertex4f denorm; +}; + +struct vlFragmentShaderConsts +{ + struct vlVertex4f multiplier; + struct vlVertex4f div; +}; + +struct vlR16SnormBufferedMC +{ + struct vlRender base; + + unsigned int video_width, video_height; + enum vlFormat video_format; + + unsigned int cur_buf; + struct vlSurface *buffered_surface; + struct vlSurface *past_surface, *future_surface; + struct vlVertex2f surface_tex_inv_size; + unsigned int num_macroblocks[vlNumMacroBlockExTypes]; + unsigned int total_num_macroblocks; + + struct pipe_context *pipe; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state render_target; + struct pipe_sampler_state *samplers[5]; + struct pipe_texture *textures[NUM_BUF_SETS][5]; + void *i_vs, *p_vs[2], *b_vs[2]; + void *i_fs, *p_fs[2], *b_fs[2]; + struct pipe_vertex_buffer vertex_bufs[NUM_BUF_SETS][vlNumMacroBlockExTypes][3]; + struct pipe_vertex_element vertex_elems[5]; + struct pipe_constant_buffer vs_const_buf, fs_const_buf; +}; + +static int vlBegin +( + struct vlRender *render +) +{ + assert(render); + + return 0; +} + +static int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch) +{ + unsigned int y; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + memcpy + ( + dst + y * dst_pitch, + src + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + + return 0; +} + +static int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch) +{ + unsigned int y; + + for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y) + memcpy + ( + dst + y * dst_pitch * 2, + src + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + + dst += VL_BLOCK_HEIGHT * dst_pitch; + + for (; y < VL_BLOCK_HEIGHT; ++y) + memcpy + ( + dst + y * dst_pitch * 2, + src + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + + return 0; +} + +static int vlGrabNoBlock(short *dst, unsigned int dst_pitch) +{ + unsigned int y; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + memset + ( + dst + y * dst_pitch, + 0, + VL_BLOCK_WIDTH * 2 + ); + + return 0; +} + +static int vlGrabBlocks +( + struct vlR16SnormBufferedMC *mc, + unsigned int mbx, + unsigned int mby, + enum vlDCTType dct_type, + unsigned int coded_block_pattern, + short *blocks +) +{ + struct pipe_surface *tex_surface; + short *texels; + unsigned int tex_pitch; + unsigned int x, y, tb = 0, sb = 0; + unsigned int mbpx = mbx * VL_MACROBLOCK_WIDTH, mbpy = mby * VL_MACROBLOCK_HEIGHT; + + assert(mc); + assert(blocks); + + tex_surface = mc->pipe->screen->get_tex_surface + ( + mc->pipe->screen, + mc->textures[mc->cur_buf % NUM_BUF_SETS][0], + 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE + ); + + texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); + tex_pitch = tex_surface->stride / tex_surface->block.size; + + texels += mbpy * tex_pitch + mbpx; + + for (y = 0; y < 2; ++y) + { + for (x = 0; x < 2; ++x, ++tb) + { + if ((coded_block_pattern >> (5 - tb)) & 1) + { + short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; + + if (dct_type == vlDCTTypeFrameCoded) + { + vlGrabFrameCodedBlock + ( + cur_block, + texels + y * tex_pitch * VL_BLOCK_HEIGHT + x * VL_BLOCK_WIDTH, + tex_pitch + ); + } + else + { + vlGrabFieldCodedBlock + ( + cur_block, + texels + y * tex_pitch + x * VL_BLOCK_WIDTH, + tex_pitch + ); + } + + ++sb; + } + else + vlGrabNoBlock(texels + y * tex_pitch * VL_BLOCK_HEIGHT + x * VL_BLOCK_WIDTH, tex_pitch); + } + } + + pipe_surface_unmap(tex_surface); + + /* TODO: Implement 422, 444 */ + mbpx >>= 1; + mbpy >>= 1; + + for (tb = 0; tb < 2; ++tb) + { + tex_surface = mc->pipe->screen->get_tex_surface + ( + mc->pipe->screen, + mc->textures[mc->cur_buf % NUM_BUF_SETS][tb + 1], + 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE + ); + + texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); + tex_pitch = tex_surface->stride / tex_surface->block.size; + + texels += mbpy * tex_pitch + mbpx; + + if ((coded_block_pattern >> (1 - tb)) & 1) + { + short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; + + vlGrabFrameCodedBlock + ( + cur_block, + texels, + tex_pitch + ); + + ++sb; + } + else + vlGrabNoBlock(texels, tex_pitch); + + pipe_surface_unmap(tex_surface); + } + + return 0; +} + +#if 0 +static int vlGrabMacroBlock +( + struct vlR16SnormBufferedMC *mc, + struct vlMpeg2MacroBlock *macroblock +) +{ + const struct vlVertex2f unit = + { + mc->surface_tex_inv_size.x * VL_MACROBLOCK_WIDTH, + mc->surface_tex_inv_size.y * VL_MACROBLOCK_HEIGHT + }; + const struct vlVertex2f half = + { + mc->surface_tex_inv_size.x * (VL_MACROBLOCK_WIDTH / 2), + mc->surface_tex_inv_size.y * (VL_MACROBLOCK_HEIGHT / 2) + }; + + struct vlVertex2f *vb; + enum vlMacroBlockTypeEx mb_type_ex; + struct vlVertex2f mo_vec[2]; + unsigned int i; + + assert(mc); + assert(macroblock); + + switch (macroblock->mb_type) + { + case vlMacroBlockTypeIntra: + { + mb_type_ex = vlMacroBlockExTypeIntra; + break; + } + case vlMacroBlockTypeFwdPredicted: + { + mb_type_ex = macroblock->mo_type == vlMotionTypeFrame ? + vlMacroBlockExTypeFwdPredictedFrame : vlMacroBlockExTypeFwdPredictedField; + break; + } + case vlMacroBlockTypeBkwdPredicted: + { + mb_type_ex = macroblock->mo_type == vlMotionTypeFrame ? + vlMacroBlockExTypeBkwdPredictedFrame : vlMacroBlockExTypeBkwdPredictedField; + break; + } + case vlMacroBlockTypeBiPredicted: + { + mb_type_ex = macroblock->mo_type == vlMotionTypeFrame ? + vlMacroBlockExTypeBiPredictedFrame : vlMacroBlockExTypeBiPredictedField; + break; + } + default: + assert(0); + } + + switch (macroblock->mb_type) + { + case vlMacroBlockTypeBiPredicted: + { + vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map + ( + mc->pipe->winsys, + mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][2].buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ) + mc->num_macroblocks[mb_type_ex] * 2 * 24; + + mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y; + + if (macroblock->mo_type == vlMotionTypeFrame) + { + for (i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + } + } + else + { + mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y; + + for (i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + vb[i + 1].x = mo_vec[1].x; + vb[i + 1].y = mo_vec[1].y; + } + } + + mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][2].buffer); + + /* fall-through */ + } + case vlMacroBlockTypeFwdPredicted: + case vlMacroBlockTypeBkwdPredicted: + { + vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map + ( + mc->pipe->winsys, + mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][1].buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ) + mc->num_macroblocks[mb_type_ex] * 2 * 24; + + if (macroblock->mb_type == vlMacroBlockTypeBkwdPredicted) + { + mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y; + + if (macroblock->mo_type == vlMotionTypeField) + { + mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y; + } + } + else + { + mo_vec[0].x = macroblock->PMV[0][0][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[0].y = macroblock->PMV[0][0][1] * 0.5f * mc->surface_tex_inv_size.y; + + if (macroblock->mo_type == vlMotionTypeField) + { + mo_vec[1].x = macroblock->PMV[1][0][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[1].y = macroblock->PMV[1][0][1] * 0.5f * mc->surface_tex_inv_size.y; + } + } + + if (macroblock->mo_type == vlMotionTypeFrame) + { + for (i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + } + } + else + { + for (i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + vb[i + 1].x = mo_vec[1].x; + vb[i + 1].y = mo_vec[1].y; + } + } + + mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][1].buffer); + + /* fall-through */ + } + case vlMacroBlockTypeIntra: + { + vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map + ( + mc->pipe->winsys, + mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][0].buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ) + mc->num_macroblocks[mb_type_ex] * 24; + + vb[0].x = macroblock->mbx * unit.x; vb[0].y = macroblock->mby * unit.y; + vb[1].x = macroblock->mbx * unit.x; vb[1].y = macroblock->mby * unit.y + half.y; + vb[2].x = macroblock->mbx * unit.x + half.x; vb[2].y = macroblock->mby * unit.y; + + vb[3].x = macroblock->mbx * unit.x + half.x; vb[3].y = macroblock->mby * unit.y; + vb[4].x = macroblock->mbx * unit.x; vb[4].y = macroblock->mby * unit.y + half.y; + vb[5].x = macroblock->mbx * unit.x + half.x; vb[5].y = macroblock->mby * unit.y + half.y; + + vb[6].x = macroblock->mbx * unit.x + half.x; vb[6].y = macroblock->mby * unit.y; + vb[7].x = macroblock->mbx * unit.x + half.x; vb[7].y = macroblock->mby * unit.y + half.y; + vb[8].x = macroblock->mbx * unit.x + unit.x; vb[8].y = macroblock->mby * unit.y; + + vb[9].x = macroblock->mbx * unit.x + unit.x; vb[9].y = macroblock->mby * unit.y; + vb[10].x = macroblock->mbx * unit.x + half.x; vb[10].y = macroblock->mby * unit.y + half.y; + vb[11].x = macroblock->mbx * unit.x + unit.x; vb[11].y = macroblock->mby * unit.y + half.y; + + vb[12].x = macroblock->mbx * unit.x; vb[12].y = macroblock->mby * unit.y + half.y; + vb[13].x = macroblock->mbx * unit.x; vb[13].y = macroblock->mby * unit.y + unit.y; + vb[14].x = macroblock->mbx * unit.x + half.x; vb[14].y = macroblock->mby * unit.y + half.y; + + vb[15].x = macroblock->mbx * unit.x + half.x; vb[15].y = macroblock->mby * unit.y + half.y; + vb[16].x = macroblock->mbx * unit.x; vb[16].y = macroblock->mby * unit.y + unit.y; + vb[17].x = macroblock->mbx * unit.x + half.x; vb[17].y = macroblock->mby * unit.y + unit.y; + + vb[18].x = macroblock->mbx * unit.x + half.x; vb[18].y = macroblock->mby * unit.y + half.y; + vb[19].x = macroblock->mbx * unit.x + half.x; vb[19].y = macroblock->mby * unit.y + unit.y; + vb[20].x = macroblock->mbx * unit.x + unit.x; vb[20].y = macroblock->mby * unit.y + half.y; + + vb[21].x = macroblock->mbx * unit.x + unit.x; vb[21].y = macroblock->mby * unit.y + half.y; + vb[22].x = macroblock->mbx * unit.x + half.x; vb[22].y = macroblock->mby * unit.y + unit.y; + vb[23].x = macroblock->mbx * unit.x + unit.x; vb[23].y = macroblock->mby * unit.y + unit.y; + + mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][0].buffer); + + break; + } + default: + assert(0); + } + + vlGrabBlocks + ( + mc, + macroblock->mbx, + macroblock->mby, + macroblock->dct_type, + macroblock->cbp, + macroblock->blocks + ); + + mc->num_macroblocks[mb_type_ex]++; + mc->total_num_macroblocks++; + + return 0; +} +#else +static int vlGrabMacroBlock +( + struct vlR16SnormBufferedMC *mc, + struct vlMpeg2MacroBlock *macroblock +) +{ + const struct vlVertex2f unit = + { + mc->surface_tex_inv_size.x * VL_MACROBLOCK_WIDTH, + mc->surface_tex_inv_size.y * VL_MACROBLOCK_HEIGHT + }; + const struct vlVertex2f half = + { + mc->surface_tex_inv_size.x * (VL_MACROBLOCK_WIDTH / 2), + mc->surface_tex_inv_size.y * (VL_MACROBLOCK_HEIGHT / 2) + }; + + struct vlVertex2f *vb; + unsigned int mb_buf_id; + struct vlVertex2f mo_vec[2]; + unsigned int i; + + assert(mc); + assert(macroblock); + + switch (macroblock->mb_type) + { + case vlMacroBlockTypeIntra: + { + mb_buf_id = vlMacroBlockExTypeIntra; + break; + } + case vlMacroBlockTypeFwdPredicted: + { + mb_buf_id = macroblock->mo_type == vlMotionTypeFrame ? + vlMacroBlockExTypeFwdPredictedFrame : vlMacroBlockExTypeFwdPredictedField; + break; + } + case vlMacroBlockTypeBkwdPredicted: + { + mb_buf_id = macroblock->mo_type == vlMotionTypeFrame ? + vlMacroBlockExTypeBkwdPredictedFrame : vlMacroBlockExTypeBkwdPredictedField; + break; + } + case vlMacroBlockTypeBiPredicted: + { + mb_buf_id = macroblock->mo_type == vlMotionTypeFrame ? + vlMacroBlockExTypeBiPredictedFrame : vlMacroBlockExTypeBiPredictedField; + break; + } + default: + assert(0); + } + + vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map + ( + mc->pipe->winsys, + mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_buf_id][0].buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ) + mc->num_macroblocks[mb_buf_id] * 24; + + vb[0].x = macroblock->mbx * unit.x; vb[0].y = macroblock->mby * unit.y; + vb[1].x = macroblock->mbx * unit.x; vb[1].y = macroblock->mby * unit.y + half.y; + vb[2].x = macroblock->mbx * unit.x + half.x; vb[2].y = macroblock->mby * unit.y; + + vb[3].x = macroblock->mbx * unit.x + half.x; vb[3].y = macroblock->mby * unit.y; + vb[4].x = macroblock->mbx * unit.x; vb[4].y = macroblock->mby * unit.y + half.y; + vb[5].x = macroblock->mbx * unit.x + half.x; vb[5].y = macroblock->mby * unit.y + half.y; + + vb[6].x = macroblock->mbx * unit.x + half.x; vb[6].y = macroblock->mby * unit.y; + vb[7].x = macroblock->mbx * unit.x + half.x; vb[7].y = macroblock->mby * unit.y + half.y; + vb[8].x = macroblock->mbx * unit.x + unit.x; vb[8].y = macroblock->mby * unit.y; + + vb[9].x = macroblock->mbx * unit.x + unit.x; vb[9].y = macroblock->mby * unit.y; + vb[10].x = macroblock->mbx * unit.x + half.x; vb[10].y = macroblock->mby * unit.y + half.y; + vb[11].x = macroblock->mbx * unit.x + unit.x; vb[11].y = macroblock->mby * unit.y + half.y; + + vb[12].x = macroblock->mbx * unit.x; vb[12].y = macroblock->mby * unit.y + half.y; + vb[13].x = macroblock->mbx * unit.x; vb[13].y = macroblock->mby * unit.y + unit.y; + vb[14].x = macroblock->mbx * unit.x + half.x; vb[14].y = macroblock->mby * unit.y + half.y; + + vb[15].x = macroblock->mbx * unit.x + half.x; vb[15].y = macroblock->mby * unit.y + half.y; + vb[16].x = macroblock->mbx * unit.x; vb[16].y = macroblock->mby * unit.y + unit.y; + vb[17].x = macroblock->mbx * unit.x + half.x; vb[17].y = macroblock->mby * unit.y + unit.y; + + vb[18].x = macroblock->mbx * unit.x + half.x; vb[18].y = macroblock->mby * unit.y + half.y; + vb[19].x = macroblock->mbx * unit.x + half.x; vb[19].y = macroblock->mby * unit.y + unit.y; + vb[20].x = macroblock->mbx * unit.x + unit.x; vb[20].y = macroblock->mby * unit.y + half.y; + + vb[21].x = macroblock->mbx * unit.x + unit.x; vb[21].y = macroblock->mby * unit.y + half.y; + vb[22].x = macroblock->mbx * unit.x + half.x; vb[22].y = macroblock->mby * unit.y + unit.y; + vb[23].x = macroblock->mbx * unit.x + unit.x; vb[23].y = macroblock->mby * unit.y + unit.y; + + mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_buf_id][0].buffer); + + if (macroblock->mb_type == vlMacroBlockTypeIntra) + { + vlGrabBlocks + ( + mc, + macroblock->mbx, + macroblock->mby, + macroblock->dct_type, + macroblock->cbp, + macroblock->blocks + ); + + mc->num_macroblocks[mb_buf_id]++; + mc->total_num_macroblocks++; + return 0; + } + + vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map + ( + mc->pipe->winsys, + mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_buf_id][1].buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ) + mc->num_macroblocks[mb_buf_id] * 2 * 24; + + if (macroblock->mb_type == vlMacroBlockTypeFwdPredicted || macroblock->mb_type == vlMacroBlockTypeBiPredicted) + { + mo_vec[0].x = macroblock->PMV[0][0][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[0].y = macroblock->PMV[0][0][1] * 0.5f * mc->surface_tex_inv_size.y; + + if (macroblock->mo_type == vlMotionTypeField) + { + mo_vec[1].x = macroblock->PMV[1][0][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[1].y = macroblock->PMV[1][0][1] * 0.5f * mc->surface_tex_inv_size.y; + } + } + else + { + mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y; + + if (macroblock->mo_type == vlMotionTypeField) + { + mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y; + } + } + + if (macroblock->mo_type == vlMotionTypeFrame) + { + for (i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + } + } + else + { + for (i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + vb[i + 1].x = mo_vec[1].x; + vb[i + 1].y = mo_vec[1].y; + } + } + + mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_buf_id][1].buffer); + + if (macroblock->mb_type != vlMacroBlockTypeBiPredicted) + { + vlGrabBlocks + ( + mc, + macroblock->mbx, + macroblock->mby, + macroblock->dct_type, + macroblock->cbp, + macroblock->blocks + ); + + mc->num_macroblocks[mb_buf_id]++; + mc->total_num_macroblocks++; + return 0; + } + + vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map + ( + mc->pipe->winsys, + mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_buf_id][2].buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ) + mc->num_macroblocks[mb_buf_id] * 2 * 24; + + mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y; + + if (macroblock->mo_type == vlMotionTypeFrame) + { + for (i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + } + } + else + { + mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y; + + for (i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + vb[i + 1].x = mo_vec[1].x; + vb[i + 1].y = mo_vec[1].y; + } + } + + mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_buf_id][2].buffer); + + vlGrabBlocks + ( + mc, + macroblock->mbx, + macroblock->mby, + macroblock->dct_type, + macroblock->cbp, + macroblock->blocks + ); + + mc->num_macroblocks[mb_buf_id]++; + mc->total_num_macroblocks++; + + return 0; +} +#endif + +static int vlFlush +( + struct vlRender *render +) +{ + struct vlR16SnormBufferedMC *mc; + struct pipe_context *pipe; + struct vlVertexShaderConsts *vs_consts; + + assert(mc); + + mc = (struct vlR16SnormBufferedMC*)render; + pipe = mc->pipe; + + mc->render_target.cbufs[0] = pipe->screen->get_tex_surface + ( + pipe->screen, + mc->buffered_surface->texture, + 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE + ); + + pipe->set_framebuffer_state(pipe, &mc->render_target); + pipe->set_viewport_state(pipe, &mc->viewport); + vs_consts = pipe->winsys->buffer_map + ( + pipe->winsys, + mc->vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ); + + vs_consts->denorm.x = mc->buffered_surface->texture->width[0]; + vs_consts->denorm.y = mc->buffered_surface->texture->height[0]; + + pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer); + pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &mc->fs_const_buf); + + if (mc->num_macroblocks[vlMacroBlockExTypeIntra] > 0) + { + pipe->set_vertex_buffers(pipe, 1, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeIntra]); + pipe->set_vertex_elements(pipe, 1, mc->vertex_elems); + pipe->set_sampler_textures(pipe, 3, mc->textures[mc->cur_buf % NUM_BUF_SETS]); + pipe->bind_sampler_states(pipe, 3, (void**)mc->samplers); + pipe->bind_vs_state(pipe, mc->i_vs); + pipe->bind_fs_state(pipe, mc->i_fs); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeIntra] * 24); + } + + if (mc->num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] > 0) + { + pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeFwdPredictedFrame]); + pipe->set_vertex_elements(pipe, 3, mc->vertex_elems); + mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; + pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); + pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); + pipe->bind_vs_state(pipe, mc->p_vs[0]); + pipe->bind_fs_state(pipe, mc->p_fs[0]); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] * 24); + } + + if (mc->num_macroblocks[vlMacroBlockExTypeFwdPredictedField] > 0) + { + pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeFwdPredictedField]); + pipe->set_vertex_elements(pipe, 3, mc->vertex_elems); + mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; + pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); + pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); + pipe->bind_vs_state(pipe, mc->p_vs[1]); + pipe->bind_fs_state(pipe, mc->p_fs[1]); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeFwdPredictedField] * 24); + } + + if (mc->num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] > 0) + { + pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeBkwdPredictedFrame]); + pipe->set_vertex_elements(pipe, 3, mc->vertex_elems); + mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture; + pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); + pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); + pipe->bind_vs_state(pipe, mc->p_vs[0]); + pipe->bind_fs_state(pipe, mc->p_fs[0]); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] * 24); + } + + if (mc->num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] > 0) + { + pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeBkwdPredictedField]); + pipe->set_vertex_elements(pipe, 3, mc->vertex_elems); + mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture; + pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); + pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); + pipe->bind_vs_state(pipe, mc->p_vs[1]); + pipe->bind_fs_state(pipe, mc->p_fs[1]); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] * 24); + } + + if (mc->num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] > 0) + { + pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeBiPredictedFrame]); + pipe->set_vertex_elements(pipe, 5, mc->vertex_elems); + mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; + mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture; + pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]); + pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers); + pipe->bind_vs_state(pipe, mc->b_vs[0]); + pipe->bind_fs_state(pipe, mc->b_fs[0]); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] * 24); + } + + if (mc->num_macroblocks[vlMacroBlockExTypeBiPredictedField] > 0) + { + pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeBiPredictedField]); + pipe->set_vertex_elements(pipe, 5, mc->vertex_elems); + mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; + mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture; + pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]); + pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers); + pipe->bind_vs_state(pipe, mc->b_vs[1]); + pipe->bind_fs_state(pipe, mc->b_fs[1]); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBiPredictedField] * 24); + } + + memset(mc->num_macroblocks, 0, sizeof(unsigned int) * 7); + mc->total_num_macroblocks = 0; + + return 0; +} + +static int vlRenderMacroBlocksMpeg2R16SnormBuffered +( + struct vlRender *render, + struct vlMpeg2MacroBlockBatch *batch, + struct vlSurface *surface +) +{ + struct vlR16SnormBufferedMC *mc; + unsigned int i; + + assert(render); + + mc = (struct vlR16SnormBufferedMC*)render; + + if (mc->buffered_surface) + { + if + ( + mc->buffered_surface != surface /*|| + mc->past_surface != batch->past_surface || + mc->future_surface != batch->future_surface*/ + ) + { + vlFlush(&mc->base); + mc->buffered_surface = surface; + mc->past_surface = batch->past_surface; + mc->future_surface = batch->future_surface; + mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0]; + mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0]; + } + } + else + { + mc->buffered_surface = surface; + mc->past_surface = batch->past_surface; + mc->future_surface = batch->future_surface; + mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0]; + mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0]; + } + + for (i = 0; i < batch->num_macroblocks; ++i) + vlGrabMacroBlock(mc, &batch->macroblocks[i]); + + return 0; +} + +static int vlEnd +( + struct vlRender *render +) +{ + assert(render); + + return 0; +} + +static int vlDestroy +( + struct vlRender *render +) +{ + struct vlR16SnormBufferedMC *mc; + struct pipe_context *pipe; + unsigned int g, h, i; + + assert(render); + + mc = (struct vlR16SnormBufferedMC*)render; + pipe = mc->pipe; + + for (i = 0; i < 5; ++i) + pipe->delete_sampler_state(pipe, mc->samplers[i]); + + for (g = 0; g < NUM_BUF_SETS; ++g) + for (h = 0; h < 7; ++h) + for (i = 0; i < 3; ++i) + pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs[g][h][i].buffer); + + /* Textures 3 & 4 are not created directly, no need to release them here */ + for (i = 0; i < NUM_BUF_SETS; ++i) + { + pipe_texture_release(&mc->textures[i][0]); + pipe_texture_release(&mc->textures[i][1]); + pipe_texture_release(&mc->textures[i][2]); + } + + pipe->delete_vs_state(pipe, mc->i_vs); + pipe->delete_fs_state(pipe, mc->i_fs); + + for (i = 0; i < 2; ++i) + { + pipe->delete_vs_state(pipe, mc->p_vs[i]); + pipe->delete_fs_state(pipe, mc->p_fs[i]); + pipe->delete_vs_state(pipe, mc->b_vs[i]); + pipe->delete_fs_state(pipe, mc->b_fs[i]); + } + + pipe->winsys->buffer_destroy(pipe->winsys, mc->vs_const_buf.buffer); + pipe->winsys->buffer_destroy(pipe->winsys, mc->fs_const_buf.buffer); + + free(mc); + + return 0; +} + +/* + * Muliplier renormalizes block samples from 16 bits to 12 bits. + * Divider is used when calculating Y % 2 for choosing top or bottom + * field for P or B macroblocks. + * TODO: Use immediates. + */ +static const struct vlFragmentShaderConsts fs_consts = +{ + {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f}, + {0.5f, 2.0f, 0.0f, 0.0f} +}; + +static int vlCreateVertexShaderIMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos, luma & chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma/chroma texcoords + */ + for (i = 0; i < 2; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i0 ; Move input luma/chroma texcoords to output + */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->i_vs = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderIMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* decl i0 ; Luma/chroma texcoords */ + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + */ + for (i = 0; i < 3; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i0, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i0, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul o0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->i_fs = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFramePMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos, luma/chroma texcoords + * decl i1 ; Ref surface top field texcoords + * decl i2 ; Ref surface bottom field texcoords (unused, packed in the same stream) + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma/chroma texcoords + * decl o2 ; Ref macroblock texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i0 ; Move input luma/chroma texcoords to output + */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* add o2, i0, i1 ; Translate vertex pos by motion vec to form ref macroblock texcoords */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->p_vs[0] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFieldPMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos, luma/chroma texcoords + * decl i1 ; Ref surface top field texcoords + * decl i2 ; Ref surface bottom field texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Texcoord denorm coefficients */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma/chroma texcoords + * decl o2 ; Top field ref macroblock texcoords + * decl o3 ; Bottom field ref macroblock texcoords + * decl o4 ; Denormalized vertex pos + */ + for (i = 0; i < 5; i++) + { + decl = vl_decl_output((i == 0 || i == 5) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i0 ; Move input luma/chroma texcoords to output + */ + for (i = 0; i < 3; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i == 0 ? 0 : i - 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* + * add o2, i0, i1 ; Translate vertex pos by motion vec to form top field macroblock texcoords + * add o3, i0, i2 ; Translate vertex pos by motion vec to form bottom field macroblock texcoords + */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul o4, i0, c0 ; Denorm vertex pos */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 5, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->p_vs[1] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFramePMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0, s1, s2 + * decl i1 ; Texcoords for s3 + */ + for (i = 0; i < 2; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for ref surface texture + */ + for (i = 0; i < 4; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i0, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i0, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* tex2d t1, i1, s3 ; Read texel from ref macroblock */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 1, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->p_fs[0] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFieldPMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 200; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0, s1, s2 + * decl i1 ; Texcoords for s3 + * decl i2 ; Texcoords for s3 + * decl i3 ; Denormalized vertex pos + */ + for (i = 0; i < 4; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0-t4 */ + decl = vl_decl_temps(0, 4); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for ref surface texture + */ + for (i = 0; i < 4; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i0, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i0, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i1, s3 ; Read texel from ref macroblock top field + * tex2d t2, i2, s3 ; Read texel from ref macroblock bottom field + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 1, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* XXX: Pos values off by 0.5? */ + /* sub t4, i3.y, c1.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 3, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* floor t3, t3 ; Get rid of fractional part */ + inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t3, c1.y ; Multiply by 2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->p_fs[1] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFrameBMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos, luma/chroma texcoords + * decl i1 ; First ref surface top field texcoords + * decl i2 ; First ref surface bottom field texcoords (unused, packed in the same stream) + * decl i3 ; Second ref surface top field texcoords + * decl i4 ; Second ref surface bottom field texcoords (unused, packed in the same stream) + */ + for (i = 0; i < 5; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma/chroma texcoords + * decl o2 ; First ref macroblock texcoords + * decl o3 ; Second ref macroblock texcoords + */ + for (i = 0; i < 4; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i0 ; Move input luma/chroma texcoords to output + */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* + * add o2, i0, i1 ; Translate vertex pos by motion vec to form first ref macroblock texcoords + * add o3, i0, i3 ; Translate vertex pos by motion vec to form second ref macroblock texcoords + */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i * 2 + 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->b_vs[0] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFieldBMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos, Luma/chroma texcoords + * decl i1 ; First ref surface top field texcoords + * decl i2 ; First ref surface bottom field texcoords + * decl i3 ; Second ref surface top field texcoords + * decl i4 ; Second ref surface bottom field texcoords + */ + for (i = 0; i < 5; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Denorm coefficients */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 6); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma/chroma texcoords + * decl o2 ; Top field past ref macroblock texcoords + * decl o3 ; Bottom field past ref macroblock texcoords + * decl o4 ; Top field future ref macroblock texcoords + * decl o5 ; Bottom field future ref macroblock texcoords + * decl o6 ; Denormalized vertex pos + */ + for (i = 0; i < 7; i++) + { + decl = vl_decl_output((i == 0 || i == 7) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i0 ; Move input luma/chroma texcoords to output + * mov o2, i1 ; Move past top field texcoords to output + * mov o3, i2 ; Move past bottom field texcoords to output + * mov o4, i3 ; Move future top field texcoords to output + * mov o5, i4 ; Move future bottom field texcoords to output + */ + for (i = 0; i < 6; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* + * add o2, i0, i1 ; Translate vertex pos by motion vec to form first top field macroblock texcoords + * add o3, i0, i2 ; Translate vertex pos by motion vec to form first bottom field macroblock texcoords + * add o4, i0, i3 ; Translate vertex pos by motion vec to form second top field macroblock texcoords + * add o5, i0, i4 ; Translate vertex pos by motion vec to form second bottom field macroblock texcoords + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul o6, i0, c0 ; Denorm vertex pos */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 6, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->b_vs[1] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFrameBMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0, s1, s2 + * decl i1 ; Texcoords for s3 + * decl i2 ; Texcoords for s4 + */ + for (i = 0; i < 3; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0-t2 */ + decl = vl_decl_temps(0, 2); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for past ref surface texture + * decl s4 ; Sampler for future ref surface texture + */ + for (i = 0; i < 5; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i0, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i0, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i1, s3 ; Read texel from past ref macroblock + * tex2d t2, i2, s4 ; Read texel from future ref macroblock + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 1, TGSI_FILE_SAMPLER, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->b_fs[0] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFieldBMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 200; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0, s1, s2 + * decl i1 ; Texcoords for s3 + * decl i2 ; Texcoords for s3 + * decl i3 ; Texcoords for s4 + * decl i4 ; Texcoords for s4 + * decl i5 ; Denormalized vertex pos + */ + for (i = 0; i < 6; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels + * ; and for Y-mod-2 top/bottom field selection + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0-t5 */ + decl = vl_decl_temps(0, 5); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for past ref surface texture + * decl s4 ; Sampler for future ref surface texture + */ + for (i = 0; i < 5; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i0, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i0, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* XXX: Pos values off by 0.5? */ + /* sub t4, i5.y, c1.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 5, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* floor t3, t3 ; Get rid of fractional part */ + inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t3, c1.y ; Multiply by 2 */ + inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i1, s3 ; Read texel from past ref macroblock top field + * tex2d t2, i2, s3 ; Read texel from past ref macroblock bottom field + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 1, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t4, i3, s4 ; Read texel from future ref macroblock top field + * tex2d t5, i4, s4 ; Read texel from future ref macroblock bottom field + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 4); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->b_fs[1] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateDataBufs +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int mbw = align(mc->video_width, VL_MACROBLOCK_WIDTH) / VL_MACROBLOCK_WIDTH; + const unsigned int mbh = align(mc->video_height, VL_MACROBLOCK_HEIGHT) / VL_MACROBLOCK_HEIGHT; + const unsigned int num_mb_per_frame = mbw * mbh; + + struct pipe_context *pipe; + unsigned int g, h, i; + + assert(mc); + + pipe = mc->pipe; + + for (g = 0; g < NUM_BUF_SETS; ++g) + { + for (h = 0; h < 7; ++h) + { + /* Create our vertex buffer and vertex buffer element */ + mc->vertex_bufs[g][h][0].pitch = sizeof(struct vlVertex2f); + mc->vertex_bufs[g][h][0].max_index = 24 * num_mb_per_frame - 1; + mc->vertex_bufs[g][h][0].buffer_offset = 0; + mc->vertex_bufs[g][h][0].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vlVertex2f) * 24 * num_mb_per_frame + ); + } + } + + /* Position & block luma, block chroma texcoord element */ + mc->vertex_elems[0].src_offset = 0; + mc->vertex_elems[0].vertex_buffer_index = 0; + mc->vertex_elems[0].nr_components = 2; + mc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + + for (g = 0; g < NUM_BUF_SETS; ++g) + { + for (h = 0; h < 7; ++h) + { + for (i = 1; i < 3; ++i) + { + mc->vertex_bufs[g][h][i].pitch = sizeof(struct vlVertex2f) * 2; + mc->vertex_bufs[g][h][i].max_index = 24 * num_mb_per_frame - 1; + mc->vertex_bufs[g][h][i].buffer_offset = 0; + mc->vertex_bufs[g][h][i].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vlVertex2f) * 2 * 24 * num_mb_per_frame + ); + } + } + } + + /* First ref surface top field texcoord element */ + mc->vertex_elems[1].src_offset = 0; + mc->vertex_elems[1].vertex_buffer_index = 1; + mc->vertex_elems[1].nr_components = 2; + mc->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* First ref surface bottom field texcoord element */ + mc->vertex_elems[2].src_offset = sizeof(struct vlVertex2f); + mc->vertex_elems[2].vertex_buffer_index = 1; + mc->vertex_elems[2].nr_components = 2; + mc->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Second ref surface top field texcoord element */ + mc->vertex_elems[3].src_offset = 0; + mc->vertex_elems[3].vertex_buffer_index = 2; + mc->vertex_elems[3].nr_components = 2; + mc->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Second ref surface bottom field texcoord element */ + mc->vertex_elems[4].src_offset = sizeof(struct vlVertex2f); + mc->vertex_elems[4].vertex_buffer_index = 2; + mc->vertex_elems[4].nr_components = 2; + mc->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Create our constant buffer */ + mc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts); + mc->vs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + mc->vs_const_buf.size + ); + + mc->fs_const_buf.size = sizeof(struct vlFragmentShaderConsts); + mc->fs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + mc->fs_const_buf.size + ); + + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, mc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + &fs_consts, + sizeof(struct vlFragmentShaderConsts) + ); + + pipe->winsys->buffer_unmap(pipe->winsys, mc->fs_const_buf.buffer); + + return 0; +} + +static int vlInit +( + struct vlR16SnormBufferedMC *mc +) +{ + struct pipe_context *pipe; + struct pipe_sampler_state sampler; + struct pipe_texture template; + unsigned int filters[5]; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + + /* For MC we render to textures, which are rounded up to nearest POT */ + mc->viewport.scale[0] = vlRoundUpPOT(mc->video_width); + mc->viewport.scale[1] = vlRoundUpPOT(mc->video_height); + mc->viewport.scale[2] = 1; + mc->viewport.scale[3] = 1; + mc->viewport.translate[0] = 0; + mc->viewport.translate[1] = 0; + mc->viewport.translate[2] = 0; + mc->viewport.translate[3] = 0; + + mc->render_target.width = vlRoundUpPOT(mc->video_width); + mc->render_target.height = vlRoundUpPOT(mc->video_height); + mc->render_target.num_cbufs = 1; + /* FB for MC stage is a vlSurface created by the user, set at render time */ + mc->render_target.zsbuf = NULL; + + filters[0] = PIPE_TEX_FILTER_NEAREST; + filters[1] = mc->video_format == vlFormatYCbCr444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; + filters[2] = mc->video_format == vlFormatYCbCr444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; + filters[3] = PIPE_TEX_FILTER_LINEAR; + filters[4] = PIPE_TEX_FILTER_LINEAR; + + for (i = 0; i < 5; ++i) + { + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = filters[i]; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = filters[i]; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + /*sampler.prefilter = ;*/ + /*sampler.shadow_ambient = ;*/ + /*sampler.lod_bias = ;*/ + sampler.min_lod = 0; + /*sampler.max_lod = ;*/ + /*sampler.border_color[i] = ;*/ + /*sampler.max_anisotropy = ;*/ + mc->samplers[i] = pipe->create_sampler_state(pipe, &sampler); + } + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + template.format = PIPE_FORMAT_R16_SNORM; + template.last_level = 0; + template.width[0] = vlRoundUpPOT(mc->video_width); + template.height[0] = vlRoundUpPOT(mc->video_height); + template.depth[0] = 1; + template.compressed = 0; + pf_get_block(template.format, &template.block); + + for (i = 0; i < NUM_BUF_SETS; ++i) + mc->textures[i][0] = pipe->screen->texture_create(pipe->screen, &template); + + if (mc->video_format == vlFormatYCbCr420) + { + template.width[0] = vlRoundUpPOT(mc->video_width / 2); + template.height[0] = vlRoundUpPOT(mc->video_height / 2); + } + else if (mc->video_format == vlFormatYCbCr422) + template.height[0] = vlRoundUpPOT(mc->video_height / 2); + + for (i = 0; i < NUM_BUF_SETS; ++i) + { + mc->textures[i][1] = pipe->screen->texture_create(pipe->screen, &template); + mc->textures[i][2] = pipe->screen->texture_create(pipe->screen, &template); + } + + /* textures[3] & textures[4] are assigned from vlSurfaces for P and B macroblocks at render time */ + + vlCreateVertexShaderIMB(mc); + vlCreateFragmentShaderIMB(mc); + vlCreateVertexShaderFramePMB(mc); + vlCreateVertexShaderFieldPMB(mc); + vlCreateFragmentShaderFramePMB(mc); + vlCreateFragmentShaderFieldPMB(mc); + vlCreateVertexShaderFrameBMB(mc); + vlCreateVertexShaderFieldBMB(mc); + vlCreateFragmentShaderFrameBMB(mc); + vlCreateFragmentShaderFieldBMB(mc); + vlCreateDataBufs(mc); + + return 0; +} + +int vlCreateR16SNormBufferedMC +( + struct pipe_context *pipe, + unsigned int video_width, + unsigned int video_height, + enum vlFormat video_format, + struct vlRender **render +) +{ + struct vlR16SnormBufferedMC *mc; + + assert(pipe); + assert(render); + + mc = calloc(1, sizeof(struct vlR16SnormBufferedMC)); + + mc->base.vlBegin = &vlBegin; + mc->base.vlRenderMacroBlocksMpeg2 = &vlRenderMacroBlocksMpeg2R16SnormBuffered; + mc->base.vlEnd = &vlEnd; + mc->base.vlFlush = &vlFlush; + mc->base.vlDestroy = &vlDestroy; + mc->pipe = pipe; + mc->video_width = video_width; + mc->video_height = video_height; + + mc->cur_buf = 0; + mc->buffered_surface = NULL; + mc->past_surface = NULL; + mc->future_surface = NULL; + memset(mc->num_macroblocks, 0, sizeof(unsigned int) * 7); + mc->total_num_macroblocks = 0; + + vlInit(mc); + + *render = &mc->base; + + return 0; +} diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.h b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.h new file mode 100644 index 0000000000..30f67db3e7 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.h @@ -0,0 +1,18 @@ +#ifndef vl_r16snorm_mc_buf_h +#define vl_r16snorm_mc_buf_h + +#include "vl_types.h" + +struct pipe_context; +struct vlRender; + +int vlCreateR16SNormBufferedMC +( + struct pipe_context *pipe, + unsigned int video_width, + unsigned int video_height, + enum vlFormat video_format, + struct vlRender **render +); + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_render.h b/src/gallium/state_trackers/g3dvl/vl_render.h index 63016b5cbe..166030b498 100644 --- a/src/gallium/state_trackers/g3dvl/vl_render.h +++ b/src/gallium/state_trackers/g3dvl/vl_render.h @@ -24,6 +24,11 @@ struct vlRender struct vlRender *render ); + int (*vlFlush) + ( + struct vlRender *render + ); + int (*vlDestroy) ( struct vlRender *render diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index ffc8122172..687fd1ec29 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -113,6 +113,8 @@ int vlPutPicture assert(surface); assert(surface->context); + surface->context->render->vlFlush(surface->context->render); + csc = surface->context->csc; pipe = surface->context->pipe; -- cgit v1.2.3 From db1021a37c29a60c70ce294077680ca3e98a6460 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sat, 20 Dec 2008 16:31:29 -0500 Subject: g3dvl: Get rid of old unbuffered motion compensation code. --- src/gallium/state_trackers/g3dvl/Makefile | 2 +- src/gallium/state_trackers/g3dvl/vl_context.c | 2 - src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c | 2344 --------------------- src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h | 18 - 4 files changed, 1 insertion(+), 2365 deletions(-) delete mode 100644 src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c delete mode 100644 src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h (limited to 'src/gallium/state_trackers/g3dvl/Makefile') diff --git a/src/gallium/state_trackers/g3dvl/Makefile b/src/gallium/state_trackers/g3dvl/Makefile index 4f7a953484..84a0b2c6d8 100644 --- a/src/gallium/state_trackers/g3dvl/Makefile +++ b/src/gallium/state_trackers/g3dvl/Makefile @@ -1,6 +1,6 @@ TARGET = libg3dvl.a OBJECTS = vl_display.o vl_screen.o vl_context.o vl_surface.o vl_shader_build.o vl_util.o vl_basic_csc.o \ - vl_r16snorm_mc.o vl_r16snorm_mc_buf.o + vl_r16snorm_mc_buf.o GALLIUMDIR = ../.. CFLAGS += -g -Wall -fPIC -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary -I${GALLIUMDIR}/winsys/g3dvl diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index fe107e406d..5b7bb73b39 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -5,7 +5,6 @@ #include #include #include "vl_render.h" -#include "vl_r16snorm_mc.h" #include "vl_r16snorm_mc_buf.h" #include "vl_csc.h" #include "vl_basic_csc.h" @@ -127,7 +126,6 @@ int vlCreateContext vlInitCommon(ctx); - /*vlCreateR16SNormMC(pipe, picture_width, picture_height, picture_format, &ctx->render);*/ vlCreateR16SNormBufferedMC(pipe, picture_width, picture_height, picture_format, &ctx->render); vlCreateBasicCSC(pipe, &ctx->csc); diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c deleted file mode 100644 index 3272220ef8..0000000000 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c +++ /dev/null @@ -1,2344 +0,0 @@ -#define VL_INTERNAL -#include "vl_r16snorm_mc.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include "vl_render.h" -#include "vl_shader_build.h" -#include "vl_surface.h" -#include "vl_util.h" -#include "vl_types.h" -#include "vl_defs.h" - -#define NUM_BUFS 4 /* Number of rotating buffers to use */ - -struct vlVertexShaderConsts -{ - /*struct vlVertex4f scale; - struct vlVertex4f denorm;*/ - struct vlVertex4f scale; - struct vlVertex4f mb_pos_trans; - struct vlVertex4f denorm; - struct - { - struct vlVertex4f top_field; - struct vlVertex4f bottom_field; - } mb_tc_trans[2]; -}; - -struct vlFragmentShaderConsts -{ - struct vlVertex4f multiplier; - struct vlVertex4f div; -}; - -struct vlR16SnormMC -{ - struct vlRender base; - - unsigned int video_width, video_height; - enum vlFormat video_format; - unsigned int cur_buf; - - struct pipe_context *pipe; - struct pipe_viewport_state viewport; - struct pipe_framebuffer_state render_target; - struct pipe_sampler_state *samplers[5]; - struct pipe_texture *textures[NUM_BUFS][5]; - void *i_vs, *p_vs[2], *b_vs[2]; - void *i_fs, *p_fs[2], *b_fs[2]; - struct pipe_vertex_buffer vertex_bufs[3]; - struct pipe_vertex_element vertex_elems[3]; - struct pipe_constant_buffer vs_const_buf, fs_const_buf; -}; - -static int vlBegin -( - struct vlRender *render -) -{ - struct vlR16SnormMC *mc; - struct pipe_context *pipe; - - assert(render); - - mc = (struct vlR16SnormMC*)render; - pipe = mc->pipe; - - /* Frame buffer set in vlRender*Macroblock() */ - /* Shaders, samplers, textures set in vlRender*Macroblock() */ - pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs); - pipe->set_vertex_elements(pipe, 3, mc->vertex_elems); - pipe->set_viewport_state(pipe, &mc->viewport); - pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &mc->vs_const_buf); - pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &mc->fs_const_buf); - - return 0; -} - -/*static int vlGrabMacroBlock -( - struct vlR16SnormMC *mc, - struct vlMpeg2MacroBlock *macroblock -) -{ - assert(mc); - assert(macroblock); - - - - return 0; -}*/ - -/*#define DO_IDCT*/ - -#ifdef DO_IDCT -static int vlTransformBlock(short *src, short *dst, short bias) -{ - static const float basis[8][8] = - { - {0.3536, 0.4904, 0.4619, 0.4157, 0.3536, 0.2778, 0.1913, 0.0975}, - {0.3536, 0.4157, 0.1913, -0.0975, -0.3536, -0.4904, -0.4619, -0.2778}, - {0.3536, 0.2778, -0.1913, -0.4904, -0.3536, 0.0975, 0.4619, 0.4157}, - {0.3536, 0.0975, -0.4619, -0.2778, 0.3536, 0.4157, -0.1913, -0.4904}, - {0.3536, -0.0975, -0.4619, 0.2778, 0.3536, -0.4157, -0.1913, 0.4904}, - {0.3536, -0.2778, -0.1913, 0.4904, -0.3536, -0.0975, 0.4619, -0.4157}, - {0.3536, -0.4157, 0.1913, 0.0975, -0.3536, 0.4904, -0.4619, 0.2778}, - {0.3536, -0.4904, 0.4619, -0.4157, 0.3536, -0.2778, 0.1913, -0.0975} - }; - - unsigned int x, y; - short tmp[64]; - - for (y = 0; y < VL_BLOCK_HEIGHT; ++y) - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - tmp[y * VL_BLOCK_WIDTH + x] = (short) - ( - src[y * VL_BLOCK_WIDTH + 0] * basis[x][0] + - src[y * VL_BLOCK_WIDTH + 1] * basis[x][1] + - src[y * VL_BLOCK_WIDTH + 2] * basis[x][2] + - src[y * VL_BLOCK_WIDTH + 3] * basis[x][3] + - src[y * VL_BLOCK_WIDTH + 4] * basis[x][4] + - src[y * VL_BLOCK_WIDTH + 5] * basis[x][5] + - src[y * VL_BLOCK_WIDTH + 6] * basis[x][6] + - src[y * VL_BLOCK_WIDTH + 7] * basis[x][7] - ); - - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - for (y = 0; y < VL_BLOCK_HEIGHT; ++y) - { - dst[y * VL_BLOCK_WIDTH + x] = bias + (short) - ( - tmp[0 * VL_BLOCK_WIDTH + x] * basis[y][0] + - tmp[1 * VL_BLOCK_WIDTH + x] * basis[y][1] + - tmp[2 * VL_BLOCK_WIDTH + x] * basis[y][2] + - tmp[3 * VL_BLOCK_WIDTH + x] * basis[y][3] + - tmp[4 * VL_BLOCK_WIDTH + x] * basis[y][4] + - tmp[5 * VL_BLOCK_WIDTH + x] * basis[y][5] + - tmp[6 * VL_BLOCK_WIDTH + x] * basis[y][6] + - tmp[7 * VL_BLOCK_WIDTH + x] * basis[y][7] - ); - if (dst[y * VL_BLOCK_WIDTH + x] > 255) - dst[y * VL_BLOCK_WIDTH + x] = 255; - else if (bias > 0 && dst[y * VL_BLOCK_WIDTH + x] < 0) - dst[y * VL_BLOCK_WIDTH + x] = 0; - } - return 0; -} -#endif - -static int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch) -{ - unsigned int y; - - for (y = 0; y < VL_BLOCK_HEIGHT; ++y) - memcpy - ( - dst + y * dst_pitch, - src + y * VL_BLOCK_WIDTH, - VL_BLOCK_WIDTH * 2 - ); - - return 0; -} - -static int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch) -{ - unsigned int y; - - for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y) - memcpy - ( - dst + y * dst_pitch * 2, - src + y * VL_BLOCK_WIDTH, - VL_BLOCK_WIDTH * 2 - ); - - dst += VL_BLOCK_HEIGHT * dst_pitch; - - for (; y < VL_BLOCK_HEIGHT; ++y) - memcpy - ( - dst + y * dst_pitch * 2, - src + y * VL_BLOCK_WIDTH, - VL_BLOCK_WIDTH * 2 - ); - - return 0; -} - -static int vlGrabNoBlock(short *dst, unsigned int dst_pitch) -{ - unsigned int y; - - for (y = 0; y < VL_BLOCK_HEIGHT; ++y) - memset - ( - dst + y * dst_pitch, - 0, - VL_BLOCK_WIDTH * 2 - ); - - return 0; -} - -enum vlSampleType -{ - vlSampleTypeFull, - vlSampleTypeDiff -}; - -static int vlGrabBlocks -( - struct vlR16SnormMC *mc, - unsigned int coded_block_pattern, - enum vlDCTType dct_type, - enum vlSampleType sample_type, - short *blocks -) -{ - struct pipe_surface *tex_surface; - short *texels; - unsigned int tex_pitch; - unsigned int tb, sb = 0; - - assert(mc); - assert(blocks); - - tex_surface = mc->pipe->screen->get_tex_surface - ( - mc->pipe->screen, - mc->textures[mc->cur_buf % NUM_BUFS][0], - 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE - ); - - texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); - tex_pitch = tex_surface->stride / tex_surface->block.size; - - for (tb = 0; tb < 4; ++tb) - { - if ((coded_block_pattern >> (5 - tb)) & 1) - { - short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; - -#ifdef DO_IDCT - vlTransformBlock(cur_block, cur_block, sample_type == vlSampleTypeFull ? 128 : 0); -#endif - - if (dct_type == vlDCTTypeFrameCoded) - vlGrabFrameCodedBlock - ( - cur_block, - texels + tb * tex_pitch * VL_BLOCK_HEIGHT, - tex_pitch - ); - else - vlGrabFieldCodedBlock - ( - cur_block, - texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, - tex_pitch - ); - - ++sb; - } - else - vlGrabNoBlock(texels + tb * tex_pitch * VL_BLOCK_HEIGHT, tex_pitch); - } - - pipe_surface_unmap(tex_surface); - - /* TODO: Implement 422, 444 */ - for (tb = 0; tb < 2; ++tb) - { - tex_surface = mc->pipe->screen->get_tex_surface - ( - mc->pipe->screen, - mc->textures[mc->cur_buf % NUM_BUFS][tb + 1], - 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE - ); - - texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); - tex_pitch = tex_surface->stride / tex_surface->block.size; - - if ((coded_block_pattern >> (1 - tb)) & 1) - { - short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; - -#ifdef DO_IDCT - vlTransformBlock(cur_block, cur_block, sample_type == vlSampleTypeFull ? 128 : 0); -#endif - - vlGrabFrameCodedBlock - ( - cur_block, - texels, - tex_pitch - ); - - ++sb; - } - else - vlGrabNoBlock(texels, tex_pitch); - - pipe_surface_unmap(tex_surface); - } - - return 0; -} - -static int vlRenderIMacroBlock -( - struct vlR16SnormMC *mc, - enum vlPictureType picture_type, - enum vlFieldOrder field_order, - unsigned int mbx, - unsigned int mby, - unsigned int coded_block_pattern, - enum vlDCTType dct_type, - short *blocks, - struct vlSurface *surface -) -{ - struct pipe_context *pipe; - struct vlVertexShaderConsts *vs_consts; - - assert(blocks); - assert(surface); - - /* TODO: Implement interlaced rendering */ - if (picture_type != vlPictureTypeFrame) - return 0; - - vlGrabBlocks(mc, coded_block_pattern, dct_type, vlSampleTypeFull, blocks); - - pipe = mc->pipe; - - vs_consts = pipe->winsys->buffer_map - ( - pipe->winsys, - mc->vs_const_buf.buffer, - PIPE_BUFFER_USAGE_CPU_WRITE - ); - - vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->texture->width[0]; - vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->texture->height[0]; - vs_consts->scale.z = 1.0f; - vs_consts->scale.w = 1.0f; - vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->texture->width[0]; - vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->texture->height[0]; - vs_consts->mb_pos_trans.z = 0.0f; - vs_consts->mb_pos_trans.w = 0.0f; - - pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer); - - mc->render_target.cbufs[0] = pipe->screen->get_tex_surface - ( - pipe->screen, - surface->texture, - 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE - ); - pipe->set_framebuffer_state(pipe, &mc->render_target); - pipe->set_sampler_textures(pipe, 3, mc->textures[mc->cur_buf % NUM_BUFS]); - pipe->bind_sampler_states(pipe, 3, (void**)mc->samplers); - pipe->bind_vs_state(pipe, mc->i_vs); - pipe->bind_fs_state(pipe, mc->i_fs); - - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); - - mc->cur_buf++; - - return 0; -} - -static int vlRenderPMacroBlock -( - struct vlR16SnormMC *mc, - enum vlPictureType picture_type, - enum vlFieldOrder field_order, - unsigned int mbx, - unsigned int mby, - enum vlMotionType mc_type, - short top_x, - short top_y, - short bottom_x, - short bottom_y, - unsigned int coded_block_pattern, - enum vlDCTType dct_type, - short *blocks, - struct vlSurface *ref_surface, - struct vlSurface *surface -) -{ - struct pipe_context *pipe; - struct vlVertexShaderConsts *vs_consts; - - assert(motion_vectors); - assert(blocks); - assert(ref_surface); - assert(surface); - - /* TODO: Implement interlaced rendering */ - if (picture_type != vlPictureTypeFrame) - return 0; - /* TODO: Implement other MC types */ - if (mc_type != vlMotionTypeFrame && mc_type != vlMotionTypeField) - return 0; - - vlGrabBlocks(mc, coded_block_pattern, dct_type, vlSampleTypeDiff, blocks); - - pipe = mc->pipe; - - vs_consts = pipe->winsys->buffer_map - ( - pipe->winsys, - mc->vs_const_buf.buffer, - PIPE_BUFFER_USAGE_CPU_WRITE - ); - - vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->texture->width[0]; - vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->texture->height[0]; - vs_consts->scale.z = 1.0f; - vs_consts->scale.w = 1.0f; - vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->texture->width[0]; - vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->texture->height[0]; - vs_consts->mb_pos_trans.z = 0.0f; - vs_consts->mb_pos_trans.w = 0.0f; - vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + top_x * 0.5f) / (float)surface->texture->width[0]; - vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + top_y * 0.5f) / (float)surface->texture->height[0]; - vs_consts->mb_tc_trans[0].top_field.z = 0.0f; - vs_consts->mb_tc_trans[0].top_field.w = 0.0f; - - if (mc_type == vlMotionTypeField) - { - vs_consts->denorm.x = (float)surface->texture->width[0]; - vs_consts->denorm.y = (float)surface->texture->height[0]; - - vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + bottom_x * 0.5f) / (float)surface->texture->width[0]; - vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + bottom_y * 0.5f) / (float)surface->texture->height[0]; - vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f; - vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f; - - pipe->bind_vs_state(pipe, mc->p_vs[1]); - pipe->bind_fs_state(pipe, mc->p_fs[1]); - } - else - { - pipe->bind_vs_state(pipe, mc->p_vs[0]); - pipe->bind_fs_state(pipe, mc->p_fs[0]); - } - - pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer); - - mc->render_target.cbufs[0] = pipe->screen->get_tex_surface - ( - pipe->screen, - surface->texture, - 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE - ); - pipe->set_framebuffer_state(pipe, &mc->render_target); - - mc->textures[mc->cur_buf % NUM_BUFS][3] = ref_surface->texture; - pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUFS]); - pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); - - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); - - mc->cur_buf++; - - return 0; -} - -static int vlRenderBMacroBlock -( - struct vlR16SnormMC *mc, - enum vlPictureType picture_type, - enum vlFieldOrder field_order, - unsigned int mbx, - unsigned int mby, - enum vlMotionType mc_type, - short top_past_x, - short top_past_y, - short bottom_past_x, - short bottom_past_y, - short top_future_x, - short top_future_y, - short bottom_future_x, - short bottom_future_y, - unsigned int coded_block_pattern, - enum vlDCTType dct_type, - short *blocks, - struct vlSurface *past_surface, - struct vlSurface *future_surface, - struct vlSurface *surface -) -{ - struct pipe_context *pipe; - struct vlVertexShaderConsts *vs_consts; - - assert(motion_vectors); - assert(blocks); - assert(ref_surface); - assert(surface); - - /* TODO: Implement interlaced rendering */ - if (picture_type != vlPictureTypeFrame) - return 0; - /* TODO: Implement other MC types */ - if (mc_type != vlMotionTypeFrame && mc_type != vlMotionTypeField) - return 0; - - vlGrabBlocks(mc, coded_block_pattern, dct_type, vlSampleTypeDiff, blocks); - - pipe = mc->pipe; - - vs_consts = pipe->winsys->buffer_map - ( - pipe->winsys, - mc->vs_const_buf.buffer, - PIPE_BUFFER_USAGE_CPU_WRITE - ); - - vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->texture->width[0]; - vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->texture->height[0]; - vs_consts->scale.z = 1.0f; - vs_consts->scale.w = 1.0f; - vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->texture->width[0]; - vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->texture->height[0]; - vs_consts->mb_pos_trans.z = 0.0f; - vs_consts->mb_pos_trans.w = 0.0f; - vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + top_past_x * 0.5f) / (float)surface->texture->width[0]; - vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + top_past_y * 0.5f) / (float)surface->texture->height[0]; - vs_consts->mb_tc_trans[0].top_field.z = 0.0f; - vs_consts->mb_tc_trans[0].top_field.w = 0.0f; - vs_consts->mb_tc_trans[1].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + top_future_x * 0.5f) / (float)surface->texture->width[0]; - vs_consts->mb_tc_trans[1].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + top_future_y * 0.5f) / (float)surface->texture->height[0]; - vs_consts->mb_tc_trans[1].top_field.z = 0.0f; - vs_consts->mb_tc_trans[1].top_field.w = 0.0f; - - if (mc_type == vlMotionTypeField) - { - vs_consts->denorm.x = (float)surface->texture->width[0]; - vs_consts->denorm.y = (float)surface->texture->height[0]; - - vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + bottom_past_x * 0.5f) / (float)surface->texture->width[0]; - vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + bottom_past_y * 0.5f) / (float)surface->texture->height[0]; - vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f; - vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f; - vs_consts->mb_tc_trans[1].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + bottom_future_x * 0.5f) / (float)surface->texture->width[0]; - vs_consts->mb_tc_trans[1].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + bottom_future_y * 0.5f) / (float)surface->texture->height[0]; - vs_consts->mb_tc_trans[1].bottom_field.z = 0.0f; - vs_consts->mb_tc_trans[1].bottom_field.w = 0.0f; - - pipe->bind_vs_state(pipe, mc->b_vs[1]); - pipe->bind_fs_state(pipe, mc->b_fs[1]); - } - else - { - pipe->bind_vs_state(pipe, mc->b_vs[0]); - pipe->bind_fs_state(pipe, mc->b_fs[0]); - } - - pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer); - - mc->render_target.cbufs[0] = pipe->screen->get_tex_surface - ( - pipe->screen, - surface->texture, - 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE - ); - pipe->set_framebuffer_state(pipe, &mc->render_target); - - mc->textures[mc->cur_buf % NUM_BUFS][3] = past_surface->texture; - mc->textures[mc->cur_buf % NUM_BUFS][4] = future_surface->texture; - pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUFS]); - pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers); - - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); - - mc->cur_buf++; - - return 0; -} - -static int vlRenderMacroBlocksMpeg2R16Snorm -( - struct vlRender *render, - struct vlMpeg2MacroBlockBatch *batch, - struct vlSurface *surface -) -{ - struct vlR16SnormMC *mc; - unsigned int i; - - assert(render); - - mc = (struct vlR16SnormMC*)render; - - /*for (i = 0; i < batch->num_macroblocks; ++i) - vlGrabMacroBlock(batch->macroblocks[i]);*/ - - for (i = 0; i < batch->num_macroblocks; ++i) - { - switch (batch->macroblocks[i].mb_type) - { - case vlMacroBlockTypeIntra: - { - vlRenderIMacroBlock - ( - mc, - batch->picture_type, - batch->field_order, - batch->macroblocks[i].mbx, - batch->macroblocks[i].mby, - batch->macroblocks[i].cbp, - batch->macroblocks[i].dct_type, - batch->macroblocks[i].blocks, - surface - ); - break; - } - case vlMacroBlockTypeFwdPredicted: - { - vlRenderPMacroBlock - ( - mc, - batch->picture_type, - batch->field_order, - batch->macroblocks[i].mbx, - batch->macroblocks[i].mby, - batch->macroblocks[i].mo_type, - batch->macroblocks[i].PMV[0][0][0], - batch->macroblocks[i].PMV[0][0][1], - batch->macroblocks[i].PMV[1][0][0], - batch->macroblocks[i].PMV[1][0][1], - batch->macroblocks[i].cbp, - batch->macroblocks[i].dct_type, - batch->macroblocks[i].blocks, - batch->past_surface, - surface - ); - break; - } - case vlMacroBlockTypeBkwdPredicted: - { - vlRenderPMacroBlock - ( - mc, - batch->picture_type, - batch->field_order, - batch->macroblocks[i].mbx, - batch->macroblocks[i].mby, - batch->macroblocks[i].mo_type, - batch->macroblocks[i].PMV[0][1][0], - batch->macroblocks[i].PMV[0][1][1], - batch->macroblocks[i].PMV[1][1][0], - batch->macroblocks[i].PMV[1][1][1], - batch->macroblocks[i].cbp, - batch->macroblocks[i].dct_type, - batch->macroblocks[i].blocks, - batch->future_surface, - surface - ); - break; - } - case vlMacroBlockTypeBiPredicted: - { - vlRenderBMacroBlock - ( - mc, - batch->picture_type, - batch->field_order, - batch->macroblocks[i].mbx, - batch->macroblocks[i].mby, - batch->macroblocks[i].mo_type, - batch->macroblocks[i].PMV[0][0][0], - batch->macroblocks[i].PMV[0][0][1], - batch->macroblocks[i].PMV[1][0][0], - batch->macroblocks[i].PMV[1][0][1], - batch->macroblocks[i].PMV[0][1][0], - batch->macroblocks[i].PMV[0][1][1], - batch->macroblocks[i].PMV[1][1][0], - batch->macroblocks[i].PMV[1][1][1], - batch->macroblocks[i].cbp, - batch->macroblocks[i].dct_type, - batch->macroblocks[i].blocks, - batch->past_surface, - batch->future_surface, - surface - ); - break; - } - default: - assert(0); - } - } - - return 0; -} - -static int vlEnd -( - struct vlRender *render -) -{ - assert(render); - - return 0; -} - -static int vlFlush -( - struct vlRender *render -) -{ - assert(render); - - return 0; -} - -static int vlDestroy -( - struct vlRender *render -) -{ - struct vlR16SnormMC *mc; - struct pipe_context *pipe; - unsigned int i; - - assert(render); - - mc = (struct vlR16SnormMC*)render; - pipe = mc->pipe; - - for (i = 0; i < 5; ++i) - pipe->delete_sampler_state(pipe, mc->samplers[i]); - - for (i = 0; i < 3; ++i) - pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs[i].buffer); - - /* Textures 3 & 4 are not created directly, no need to release them here */ - for (i = 0; i < NUM_BUFS; ++i) - { - pipe_texture_release(&mc->textures[i][0]); - pipe_texture_release(&mc->textures[i][1]); - pipe_texture_release(&mc->textures[i][2]); - } - - pipe->delete_vs_state(pipe, mc->i_vs); - pipe->delete_fs_state(pipe, mc->i_fs); - - for (i = 0; i < 2; ++i) - { - pipe->delete_vs_state(pipe, mc->p_vs[i]); - pipe->delete_fs_state(pipe, mc->p_fs[i]); - pipe->delete_vs_state(pipe, mc->b_vs[i]); - pipe->delete_fs_state(pipe, mc->b_fs[i]); - } - - pipe->winsys->buffer_destroy(pipe->winsys, mc->vs_const_buf.buffer); - pipe->winsys->buffer_destroy(pipe->winsys, mc->fs_const_buf.buffer); - - free(mc); - - return 0; -} - -/* - * Represents 8 triangles (4 quads, 1 per block) in noormalized coords - * that render a macroblock. - * Need to be scaled to cover mbW*mbH macroblock pixels and translated into - * position on target surface. - */ -static const struct vlVertex2f macroblock_verts[24] = -{ - {0.0f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.0f}, - {0.5f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.5f}, - - {0.5f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.0f}, - {1.0f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.5f}, - - {0.0f, 0.5f}, {0.0f, 1.0f}, {0.5f, 0.5f}, - {0.5f, 0.5f}, {0.0f, 1.0f}, {0.5f, 1.0f}, - - {0.5f, 0.5f}, {0.5f, 1.0f}, {1.0f, 0.5f}, - {1.0f, 0.5f}, {0.5f, 1.0f}, {1.0f, 1.0f} -}; - -/* - * Represents texcoords for the above for rendering 4 luma blocks arranged - * in a bW*(bH*4) texture. First luma block located at 0,0->bW,bH; second at - * 0,bH->bW,2bH; third at 0,2bH->bW,3bH; fourth at 0,3bH->bW,4bH. - */ -static const struct vlVertex2f macroblock_luma_texcoords[24] = -{ - {0.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.0f}, - {1.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.25f}, - - {0.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.25f}, - {1.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.5f}, - - {0.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.5f}, - {1.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.75f}, - - {0.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 0.75f}, - {1.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 1.0f} -}; - -/* - * Represents texcoords for the above for rendering 1 chroma block. - * Straight forward 0,0->1,1 mapping so we can reuse the MB pos vectors. - */ -static const struct vlVertex2f *macroblock_chroma_420_texcoords = macroblock_verts; - -/* - * Represents texcoords for the above for rendering 2 chroma blocks arranged - * in a bW*(bH*2) texture. First chroma block located at 0,0->bW,bH; second at - * 0,bH->bW,2bH. We can render this with 0,0->1,1 mapping. - * Straight forward 0,0->1,1 mapping so we can reuse MB pos vectors. - */ -static const struct vlVertex2f *macroblock_chroma_422_texcoords = macroblock_verts; - -/* - * Represents texcoords for the above for rendering 4 chroma blocks. - * Same case as 4 luma blocks. - */ -static const struct vlVertex2f *macroblock_chroma_444_texcoords = macroblock_luma_texcoords; - -/* - * Used when rendering P and B macroblocks, multiplier is applied to the A channel, - * which is then added to the L channel, then the bias is subtracted from that to - * get back the differential. The differential is then added to the samples from the - * reference surface(s). - */ -static const struct vlFragmentShaderConsts fs_consts = -{ - {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f}, - {0.5f, 2.0f, 0.0f, 0.0f} -}; - -static int vlCreateVertexShaderIMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 50; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0 */ - decl = vl_decl_temps(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - mc->i_vs = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderIMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - */ - for (i = 0; i < 2; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - */ - for (i = 0; i < 3; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul o0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - mc->i_fs = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateVertexShaderFramePMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - * decl c2 ; Unused - * decl c3 ; Translation vector to move ref macroblock texcoords into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - * decl o3 ; Ref macroblock texcoords - */ - for (i = 0; i < 4; i++) - { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0 */ - decl = vl_decl_temps(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* add o3, t0, c3 ; Translate rect into position on ref macroblock */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - mc->p_vs[0] = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateVertexShaderFieldPMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - * decl c2 ; Denorm coefficients - * decl c3 ; Translation vector to move top field ref macroblock texcoords into position - * decl c4 ; Translation vector to move bottom field ref macroblock texcoords into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - * decl o3 ; Top field ref macroblock texcoords - * decl o4 ; Bottom field ref macroblock texcoords - * decl o5 ; Denormalized vertex pos - */ - for (i = 0; i < 6; i++) - { - decl = vl_decl_output((i == 0 || i == 5) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add t1, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mov o0, t1 ; Move vertex pos to output */ - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - mov o1, i1 ; Move input luma texcoords to output - mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* add o3, t0, c3 ; Translate top field rect into position on ref macroblock - add o4, t0, c4 ; Translate bottom field rect into position on ref macroblock */ - for (i = 0; i < 2; ++i) - { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* mul o5, t1, c2 ; Denorm vertex pos */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 5, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - mc->p_vs[1] = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFramePMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - * decl i2 ; Texcoords for s3 - */ - for (i = 0; i < 3; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for ref surface texture - */ - for (i = 0; i < 4; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* tex2d t1, i2, s3 ; Read texel from ref macroblock */ - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 2, TGSI_FILE_SAMPLER, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - mc->p_fs[0] = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFieldPMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 200; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - * decl i2 ; Texcoords for s3 - * decl i3 ; Texcoords for s3 - * decl i4 ; Denormalized vertex pos - */ - for (i = 0; i < 5; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm - * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0-t4 */ - decl = vl_decl_temps(0, 4); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for ref surface texture - */ - for (i = 0; i < 4; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t1, i2, s3 ; Read texel from ref macroblock top field - * tex2d t2, i3, s3 ; Read texel from ref macroblock bottom field - */ - for (i = 0; i < 2; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* XXX: Pos values off by 0.5? */ - /* sub t4, i4.y, c1.x ; Sub 0.5 from denormalized pos */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 4, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* floor t3, t3 ; Get rid of fractional part */ - inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t3, t3, c1.y ; Multiply by 2 */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* TODO: Move to conditional tex fetch on t3 instead of lerp */ - /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - mc->p_fs[1] = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateVertexShaderFrameBMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - * decl c2 ; Unused - * decl c3 ; Translation vector to move past ref macroblock texcoords into position - * decl c4 ; Unused - * decl c5 ; Translation vector to move future ref macroblock texcoords into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 5); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - * decl o3 ; Past ref macroblock texcoords - * decl o4 ; Future ref macroblock texcoords - */ - for (i = 0; i < 5; i++) - { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0 */ - decl = vl_decl_temps(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* add o3, t0, c3 ; Translate rect into position on past ref macroblock - add o4, t0, c5 ; Translate rect into position on future ref macroblock */ - for (i = 0; i < 2; ++i) - { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i * 2 + 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - mc->b_vs[0] = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateVertexShaderFieldBMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - * decl c2 ; Denorm coefficients - * decl c3 ; Translation vector to move top field past ref macroblock texcoords into position - * decl c4 ; Translation vector to move bottom field past ref macroblock texcoords into position - * decl c5 ; Translation vector to move top field future ref macroblock texcoords into position - * decl c6 ; Translation vector to move bottom field future ref macroblock texcoords into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 6); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - * decl o3 ; Top field past ref macroblock texcoords - * decl o4 ; Bottom field past ref macroblock texcoords - * decl o5 ; Top field future ref macroblock texcoords - * decl o6 ; Bottom field future ref macroblock texcoords - * decl o7 ; Denormalized vertex pos - */ - for (i = 0; i < 8; i++) - { - decl = vl_decl_output((i == 0 || i == 7) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add t1, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mov o0, t1 ; Move vertex pos to output */ - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* - * add o3, t0, c3 ; Translate top field rect into position on past ref macroblock - * add o4, t0, c4 ; Translate bottom field rect into position on past ref macroblock - * add o5, t0, c5 ; Translate top field rect into position on future ref macroblock - * add o6, t0, c6 ; Translate bottom field rect into position on future ref macroblock - */ - for (i = 0; i < 4; ++i) - { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* mul o7, t1, c2 ; Denorm vertex pos */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 7, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - mc->b_vs[1] = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFrameBMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - * decl i2 ; Texcoords for s3 - * decl i3 ; Texcoords for s4 - */ - for (i = 0; i < 4; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm - * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0-t2 */ - decl = vl_decl_temps(0, 2); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for past ref surface texture - * decl s4 ; Sampler for future ref surface texture - */ - for (i = 0; i < 5; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t1, i2, s3 ; Read texel from past ref macroblock - * tex2d t2, i3, s4 ; Read texel from future ref macroblock - */ - for (i = 0; i < 2; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, i + 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - mc->b_fs[0] = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFieldBMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 200; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - * decl i2 ; Texcoords for s3 - * decl i3 ; Texcoords for s3 - * decl i4 ; Texcoords for s4 - * decl i5 ; Texcoords for s4 - * decl i6 ; Denormalized vertex pos - */ - for (i = 0; i < 7; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm - * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels - * ; and for Y-mod-2 top/bottom field selection - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0-t5 */ - decl = vl_decl_temps(0, 5); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for past ref surface texture - * decl s4 ; Sampler for future ref surface texture - */ - for (i = 0; i < 5; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* XXX: Pos values off by 0.5? */ - /* sub t4, i6.y, c1.x ; Sub 0.5 from denormalized pos */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 6, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* floor t3, t3 ; Get rid of fractional part */ - inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t3, t3, c1.y ; Multiply by 2 */ - inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t1, i2, s3 ; Read texel from past ref macroblock top field - * tex2d t2, i3, s3 ; Read texel from past ref macroblock bottom field - */ - for (i = 0; i < 2; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* TODO: Move to conditional tex fetch on t3 instead of lerp */ - /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t4, i4, s4 ; Read texel from future ref macroblock top field - * tex2d t5, i5, s4 ; Read texel from future ref macroblock bottom field - */ - for (i = 0; i < 2; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 4, TGSI_FILE_SAMPLER, 4); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* TODO: Move to conditional tex fetch on t3 instead of lerp */ - /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - mc->b_fs[1] = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateDataBufs -( - struct vlR16SnormMC *mc -) -{ - struct pipe_context *pipe; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - - /* Create our vertex buffer and vertex buffer element */ - mc->vertex_bufs[0].pitch = sizeof(struct vlVertex2f); - mc->vertex_bufs[0].max_index = 23; - mc->vertex_bufs[0].buffer_offset = 0; - mc->vertex_bufs[0].buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct vlVertex2f) * 24 - ); - - mc->vertex_elems[0].src_offset = 0; - mc->vertex_elems[0].vertex_buffer_index = 0; - mc->vertex_elems[0].nr_components = 2; - mc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* Create our texcoord buffers and texcoord buffer elements */ - for (i = 1; i < 3; ++i) - { - mc->vertex_bufs[i].pitch = sizeof(struct vlVertex2f); - mc->vertex_bufs[i].max_index = 23; - mc->vertex_bufs[i].buffer_offset = 0; - mc->vertex_bufs[i].buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct vlVertex2f) * 24 - ); - - mc->vertex_elems[i].src_offset = 0; - mc->vertex_elems[i].vertex_buffer_index = i; - mc->vertex_elems[i].nr_components = 2; - mc->vertex_elems[i].src_format = PIPE_FORMAT_R32G32_FLOAT; - } - - /* Fill buffers */ - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, mc->vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - macroblock_verts, - sizeof(struct vlVertex2f) * 24 - ); - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, mc->vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - macroblock_luma_texcoords, - sizeof(struct vlVertex2f) * 24 - ); - /* TODO: Accomodate 422, 444 */ - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, mc->vertex_bufs[2].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - macroblock_chroma_420_texcoords, - sizeof(struct vlVertex2f) * 24 - ); - - for (i = 0; i < 3; ++i) - pipe->winsys->buffer_unmap(pipe->winsys, mc->vertex_bufs[i].buffer); - - /* Create our constant buffer */ - mc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts); - mc->vs_const_buf.buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_CONSTANT, - mc->vs_const_buf.size - ); - - mc->fs_const_buf.size = sizeof(struct vlFragmentShaderConsts); - mc->fs_const_buf.buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_CONSTANT, - mc->fs_const_buf.size - ); - - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, mc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - &fs_consts, - sizeof(struct vlFragmentShaderConsts) - ); - - pipe->winsys->buffer_unmap(pipe->winsys, mc->fs_const_buf.buffer); - - return 0; -} - -static int vlInit -( - struct vlR16SnormMC *mc -) -{ - struct pipe_context *pipe; - struct pipe_sampler_state sampler; - struct pipe_texture template; - unsigned int filters[5]; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - - /* For MC we render to textures, which are rounded up to nearest POT */ - mc->viewport.scale[0] = vlRoundUpPOT(mc->video_width); - mc->viewport.scale[1] = vlRoundUpPOT(mc->video_height); - mc->viewport.scale[2] = 1; - mc->viewport.scale[3] = 1; - mc->viewport.translate[0] = 0; - mc->viewport.translate[1] = 0; - mc->viewport.translate[2] = 0; - mc->viewport.translate[3] = 0; - - mc->render_target.width = vlRoundUpPOT(mc->video_width); - mc->render_target.height = vlRoundUpPOT(mc->video_height); - mc->render_target.num_cbufs = 1; - /* FB for MC stage is a vlSurface, set in vlSetRenderSurface() */ - mc->render_target.zsbuf = NULL; - - filters[0] = PIPE_TEX_FILTER_NEAREST; - filters[1] = mc->video_format == vlFormatYCbCr444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; - filters[2] = mc->video_format == vlFormatYCbCr444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; - filters[3] = PIPE_TEX_FILTER_LINEAR; - filters[4] = PIPE_TEX_FILTER_LINEAR; - - for (i = 0; i < 5; ++i) - { - sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.min_img_filter = filters[i]; - sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - sampler.mag_img_filter = filters[i]; - sampler.compare_mode = PIPE_TEX_COMPARE_NONE; - sampler.compare_func = PIPE_FUNC_ALWAYS; - sampler.normalized_coords = 1; - /*sampler.prefilter = ;*/ - /*sampler.shadow_ambient = ;*/ - /*sampler.lod_bias = ;*/ - sampler.min_lod = 0; - /*sampler.max_lod = ;*/ - /*sampler.border_color[i] = ;*/ - /*sampler.max_anisotropy = ;*/ - mc->samplers[i] = pipe->create_sampler_state(pipe, &sampler); - } - - memset(&template, 0, sizeof(struct pipe_texture)); - template.target = PIPE_TEXTURE_2D; - template.format = PIPE_FORMAT_R16_SNORM; - template.last_level = 0; - template.width[0] = 8; - template.height[0] = 8 * 4; - template.depth[0] = 1; - template.compressed = 0; - pf_get_block(template.format, &template.block); - - for (i = 0; i < NUM_BUFS; ++i) - mc->textures[i][0] = pipe->screen->texture_create(pipe->screen, &template); - - if (mc->video_format == vlFormatYCbCr420) - template.height[0] = 8; - else if (mc->video_format == vlFormatYCbCr422) - template.height[0] = 8 * 2; - else if (mc->video_format == vlFormatYCbCr444) - template.height[0] = 8 * 4; - else - assert(0); - - for (i = 0; i < NUM_BUFS; ++i) - { - mc->textures[i][1] = pipe->screen->texture_create(pipe->screen, &template); - mc->textures[i][2] = pipe->screen->texture_create(pipe->screen, &template); - } - - /* textures[3] & textures[4] are assigned from vlSurfaces for P and B macroblocks at render time */ - - vlCreateVertexShaderIMB(mc); - vlCreateFragmentShaderIMB(mc); - vlCreateVertexShaderFramePMB(mc); - vlCreateVertexShaderFieldPMB(mc); - vlCreateFragmentShaderFramePMB(mc); - vlCreateFragmentShaderFieldPMB(mc); - vlCreateVertexShaderFrameBMB(mc); - vlCreateVertexShaderFieldBMB(mc); - vlCreateFragmentShaderFrameBMB(mc); - vlCreateFragmentShaderFieldBMB(mc); - vlCreateDataBufs(mc); - - return 0; -} - -int vlCreateR16SNormMC -( - struct pipe_context *pipe, - unsigned int video_width, - unsigned int video_height, - enum vlFormat video_format, - struct vlRender **render -) -{ - struct vlR16SnormMC *mc; - - assert(pipe); - assert(render); - - mc = calloc(1, sizeof(struct vlR16SnormMC)); - - mc->base.vlBegin = &vlBegin; - mc->base.vlRenderMacroBlocksMpeg2 = &vlRenderMacroBlocksMpeg2R16Snorm; - mc->base.vlEnd = &vlEnd; - mc->base.vlFlush = &vlFlush; - mc->base.vlDestroy = &vlDestroy; - mc->pipe = pipe; - mc->video_width = video_width; - mc->video_height = video_height; - mc->cur_buf = 0; - - vlInit(mc); - - *render = &mc->base; - - return 0; -} diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h deleted file mode 100644 index 9842926bf7..0000000000 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef vl_r16snorm_mc_h -#define vl_r16snorm_mc_h - -#include "vl_types.h" - -struct pipe_context; -struct vlRender; - -int vlCreateR16SNormMC -( - struct pipe_context *pipe, - unsigned int video_width, - unsigned int video_height, - enum vlFormat video_format, - struct vlRender **render -); - -#endif -- cgit v1.2.3 From 1e9c3efcc783cee46268cc227234ed118f0cc08b Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sat, 20 Dec 2008 17:09:55 -0500 Subject: g3dvl: Use Gallium MALLOC wrappers. --- src/gallium/state_trackers/g3dvl/Makefile | 5 ++++- src/gallium/state_trackers/g3dvl/vl_basic_csc.c | 14 +++++++------- src/gallium/state_trackers/g3dvl/vl_context.c | 6 +++--- src/gallium/state_trackers/g3dvl/vl_display.c | 6 +++--- src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 10 +++++----- src/gallium/state_trackers/g3dvl/vl_screen.c | 6 +++--- src/gallium/state_trackers/g3dvl/vl_surface.c | 6 +++--- src/libXvMC/Makefile | 2 +- src/libXvMC/block.c | 10 +++++----- 9 files changed, 34 insertions(+), 31 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl/Makefile') diff --git a/src/gallium/state_trackers/g3dvl/Makefile b/src/gallium/state_trackers/g3dvl/Makefile index 84a0b2c6d8..cddfca54fe 100644 --- a/src/gallium/state_trackers/g3dvl/Makefile +++ b/src/gallium/state_trackers/g3dvl/Makefile @@ -3,7 +3,10 @@ OBJECTS = vl_display.o vl_screen.o vl_context.o vl_surface.o vl_shader_build.o vl_r16snorm_mc_buf.o GALLIUMDIR = ../.. -CFLAGS += -g -Wall -fPIC -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary -I${GALLIUMDIR}/winsys/g3dvl +CFLAGS += -g -Wall -Werror=implicit-function-declaration -fPIC \ + -I${GALLIUMDIR}/include \ + -I${GALLIUMDIR}/auxiliary \ + -I${GALLIUMDIR}/winsys/g3dvl \ ############################################# diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c index 626d23cd46..3ce93cf49d 100644 --- a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c +++ b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c @@ -1,13 +1,13 @@ #define VL_INTERNAL #include "vl_basic_csc.h" #include -#include #include #include #include #include #include #include +#include #include "vl_csc.h" #include "vl_surface.h" #include "vl_shader_build.h" @@ -237,7 +237,7 @@ static int vlDestroy pipe->winsys->buffer_destroy(pipe->winsys, basic_csc->vs_const_buf.buffer); pipe->winsys->buffer_destroy(pipe->winsys, basic_csc->fs_const_buf.buffer); - free(basic_csc); + FREE(basic_csc); return 0; } @@ -369,7 +369,7 @@ static int vlCreateVertexShader assert(context); pipe = csc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); @@ -430,7 +430,7 @@ static int vlCreateVertexShader vs.tokens = tokens; csc->vertex_shader = pipe->create_vs_state(pipe, &vs); - free(tokens); + FREE(tokens); return 0; } @@ -456,7 +456,7 @@ static int vlCreateFragmentShader assert(context); pipe = csc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); @@ -517,7 +517,7 @@ static int vlCreateFragmentShader fs.tokens = tokens; csc->fragment_shader = pipe->create_fs_state(pipe, &fs); - free(tokens); + FREE(tokens); return 0; } @@ -691,7 +691,7 @@ int vlCreateBasicCSC assert(pipe); assert(csc); - basic_csc = calloc(1, sizeof(struct vlBasicCSC)); + basic_csc = CALLOC_STRUCT(vlBasicCSC); if (!basic_csc) return 1; diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 5b7bb73b39..fbea1363d8 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -1,9 +1,9 @@ #define VL_INTERNAL #include "vl_context.h" #include -#include #include #include +#include #include "vl_render.h" #include "vl_r16snorm_mc_buf.h" #include "vl_csc.h" @@ -111,7 +111,7 @@ int vlCreateContext assert(context); assert(pipe); - ctx = calloc(1, sizeof(struct vlContext)); + ctx = CALLOC_STRUCT(vlContext); if (!ctx) return 1; @@ -152,7 +152,7 @@ int vlDestroyContext context->pipe->delete_rasterizer_state(context->pipe, context->raster); context->pipe->delete_depth_stencil_alpha_state(context->pipe, context->dsa); - free(context); + FREE(context); return 0; } diff --git a/src/gallium/state_trackers/g3dvl/vl_display.c b/src/gallium/state_trackers/g3dvl/vl_display.c index af80faa7f5..dce06de758 100644 --- a/src/gallium/state_trackers/g3dvl/vl_display.c +++ b/src/gallium/state_trackers/g3dvl/vl_display.c @@ -1,7 +1,7 @@ #define VL_INTERNAL #include "vl_display.h" #include -#include +#include int vlCreateDisplay ( @@ -14,7 +14,7 @@ int vlCreateDisplay assert(native_display); assert(display); - dpy = calloc(1, sizeof(struct vlDisplay)); + dpy = CALLOC_STRUCT(vlDisplay); if (!dpy) return 1; @@ -32,7 +32,7 @@ int vlDestroyDisplay { assert(display); - free(display); + FREE(display); return 0; } diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index a31f5c58f4..fcea899ef9 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -1,7 +1,6 @@ #define VL_INTERNAL #include "vl_r16snorm_mc_buf.h" #include -#include #include #include #include @@ -10,6 +9,7 @@ #include #include #include +#include #include "vl_render.h" #include "vl_shader_build.h" #include "vl_surface.h" @@ -869,8 +869,8 @@ static int vlDestroy pipe->winsys->buffer_destroy(pipe->winsys, mc->vs_const_buf.buffer); pipe->winsys->buffer_destroy(pipe->winsys, mc->fs_const_buf.buffer); - free(mc->macroblocks); - free(mc); + FREE(mc->macroblocks); + FREE(mc); return 0; } @@ -1007,7 +1007,7 @@ static int vlCreateDataBufs pipe->winsys->buffer_unmap(pipe->winsys, mc->fs_const_buf.buffer); - mc->macroblocks = malloc(sizeof(struct vlMpeg2MacroBlock) * mc->macroblocks_per_picture); + mc->macroblocks = MALLOC(sizeof(struct vlMpeg2MacroBlock) * mc->macroblocks_per_picture); return 0; } @@ -1133,7 +1133,7 @@ int vlCreateR16SNormBufferedMC assert(pipe); assert(render); - mc = calloc(1, sizeof(struct vlR16SnormBufferedMC)); + mc = CALLOC_STRUCT(vlR16SnormBufferedMC); mc->base.vlBegin = &vlBegin; mc->base.vlRenderMacroBlocksMpeg2 = &vlRenderMacroBlocksMpeg2R16SnormBuffered; diff --git a/src/gallium/state_trackers/g3dvl/vl_screen.c b/src/gallium/state_trackers/g3dvl/vl_screen.c index 484f63b0d4..ade8643a66 100644 --- a/src/gallium/state_trackers/g3dvl/vl_screen.c +++ b/src/gallium/state_trackers/g3dvl/vl_screen.c @@ -1,7 +1,7 @@ #define VL_INTERNAL #include "vl_screen.h" #include -#include +#include int vlCreateScreen ( @@ -17,7 +17,7 @@ int vlCreateScreen assert(pscreen); assert(vl_screen); - scrn = calloc(1, sizeof(struct vlScreen)); + scrn = CALLOC_STRUCT(vlScreen); if (!scrn) return 1; @@ -37,7 +37,7 @@ int vlDestroyScreen { assert(screen); - free(screen); + FREE(screen); return 0; } diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index 076bd40d41..911469f966 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -1,11 +1,11 @@ #define VL_INTERNAL #include "vl_surface.h" #include -#include #include #include #include #include +#include #include #include "vl_screen.h" #include "vl_context.h" @@ -28,7 +28,7 @@ int vlCreateSurface assert(screen); assert(surface); - sfc = calloc(1, sizeof(struct vlSurface)); + sfc = CALLOC_STRUCT(vlSurface); if (!sfc) return 1; @@ -64,7 +64,7 @@ int vlDestroySurface assert(surface); pipe_texture_release(&surface->texture); - free(surface); + FREE(surface); return 0; } diff --git a/src/libXvMC/Makefile b/src/libXvMC/Makefile index 4de26e9620..7badcfd264 100644 --- a/src/libXvMC/Makefile +++ b/src/libXvMC/Makefile @@ -8,7 +8,7 @@ ifeq (${DRIVER}, softpipe) OBJECTS += ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o endif -CFLAGS += -g -fPIC -Wall \ +CFLAGS += -g -fPIC -Wall -Werror=implicit-function-declaration \ -I${GALLIUMDIR}/state_trackers/g3dvl \ -I${GALLIUMDIR}/winsys/g3dvl \ -I${GALLIUMDIR}/include \ diff --git a/src/libXvMC/block.c b/src/libXvMC/block.c index 328b035576..b38a89be09 100644 --- a/src/libXvMC/block.c +++ b/src/libXvMC/block.c @@ -1,7 +1,7 @@ #include -#include #include #include +#include #include #include #include @@ -26,7 +26,7 @@ Status XvMCCreateBlocks(Display *display, XvMCContext *context, unsigned int num blocks->context_id = context->context_id; blocks->num_blocks = num_blocks; - blocks->blocks = malloc(BLOCK_SIZE * num_blocks); + blocks->blocks = MALLOC(BLOCK_SIZE * num_blocks); /* Since we don't have a VL type for blocks, set privData to the display so we can catch mismatches */ blocks->privData = display; @@ -38,7 +38,7 @@ Status XvMCDestroyBlocks(Display *display, XvMCBlockArray *blocks) assert(display); assert(blocks); assert(display == blocks->privData); - free(blocks->blocks); + FREE(blocks->blocks); return Success; } @@ -61,7 +61,7 @@ Status XvMCCreateMacroBlocks(Display *display, XvMCContext *context, unsigned in blocks->context_id = context->context_id; blocks->num_blocks = num_blocks; - blocks->macro_blocks = malloc(sizeof(XvMCMacroBlock) * num_blocks); + blocks->macro_blocks = MALLOC(sizeof(XvMCMacroBlock) * num_blocks); /* Since we don't have a VL type for blocks, set privData to the display so we can catch mismatches */ blocks->privData = display; @@ -73,7 +73,7 @@ Status XvMCDestroyMacroBlocks(Display *display, XvMCMacroBlockArray *blocks) assert(display); assert(blocks); assert(display == blocks->privData); - free(blocks->macro_blocks); + FREE(blocks->macro_blocks); return Success; } -- cgit v1.2.3