diff options
author | Younes Manton <younes.m@gmail.com> | 2008-06-28 20:16:01 -0400 |
---|---|---|
committer | Younes Manton <younes.m@gmail.com> | 2008-06-30 10:11:02 -0400 |
commit | 3933fec6bd62285506fecdc3a254306648cfefb2 (patch) | |
tree | 62e9f0e1832a853d789a67c22f434a56cf5e69be /src/gallium | |
parent | 0a6aec8c0f2173cfb95ce95d12b66f090ea0ba1f (diff) |
g3dvl: Support for field and frame based MC for progressive pictures.
MC support for frame and field based motion prediction. Also various bug
fixes, clean up.
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/state_trackers/g3dvl/tests/.gitignore | 2 | ||||
-rw-r--r-- | src/gallium/state_trackers/g3dvl/tests/Makefile | 7 | ||||
-rw-r--r-- | src/gallium/state_trackers/g3dvl/tests/test_pf_rendering.c | 214 | ||||
-rw-r--r-- | src/gallium/state_trackers/g3dvl/vl_context.c | 1528 | ||||
-rw-r--r-- | src/gallium/state_trackers/g3dvl/vl_context.h | 4 | ||||
-rw-r--r-- | src/gallium/state_trackers/g3dvl/vl_data.c | 3 | ||||
-rw-r--r-- | src/gallium/state_trackers/g3dvl/vl_surface.c | 404 | ||||
-rw-r--r-- | src/gallium/state_trackers/g3dvl/vl_types.h | 12 |
8 files changed, 1946 insertions, 228 deletions
diff --git a/src/gallium/state_trackers/g3dvl/tests/.gitignore b/src/gallium/state_trackers/g3dvl/tests/.gitignore index 939666da9a..9b1ec4e212 100644 --- a/src/gallium/state_trackers/g3dvl/tests/.gitignore +++ b/src/gallium/state_trackers/g3dvl/tests/.gitignore @@ -2,5 +2,5 @@ test_context test_surface test_i_rendering test_p_rendering +test_pf_rendering test_b_rendering - diff --git a/src/gallium/state_trackers/g3dvl/tests/Makefile b/src/gallium/state_trackers/g3dvl/tests/Makefile index 8f983593c3..45cefa2e57 100644 --- a/src/gallium/state_trackers/g3dvl/tests/Makefile +++ b/src/gallium/state_trackers/g3dvl/tests/Makefile @@ -20,7 +20,7 @@ LIBS += -lg3dvl -lsoftpipe -ldraw -ltgsi -ltranslate -lrtasm -lcso_cache -lutil .PHONY = all clean -all: test_context test_surface test_i_rendering test_p_rendering test_b_rendering +all: test_context test_surface test_i_rendering test_p_rendering test_pf_rendering test_b_rendering test_context: test_context.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} @@ -34,9 +34,12 @@ test_i_rendering: test_i_rendering.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o test_p_rendering: test_p_rendering.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} +test_pf_rendering: test_pf_rendering.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o + $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} + test_b_rendering: test_b_rendering.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} clean: - rm -rf *.o test_context test_surface test_i_rendering test_p_rendering test_b_rendering + rm -rf *.o test_context test_surface test_i_rendering test_p_rendering test_pf_rendering test_b_rendering diff --git a/src/gallium/state_trackers/g3dvl/tests/test_pf_rendering.c b/src/gallium/state_trackers/g3dvl/tests/test_pf_rendering.c new file mode 100644 index 0000000000..43586fc553 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/tests/test_pf_rendering.c @@ -0,0 +1,214 @@ +#include <stdio.h> +#include <X11/Xlib.h> +#include <vl_context.h> +#include <vl_surface.h> +#include <xsp_winsys.h> + +static const unsigned short ycbcr16x16_420[8*8*6] = +{ + 0x00A5,0x00A5,0x00A5,0x0072,0x00A5,0x0072,0x0072,0x0072, + 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x0072,0x0072,0x0072, + 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x00A5,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5,0x00A5, + + 0x004F,0x004F,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F,0x004F, + + 0x003E,0x003E,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E,0x003E +}; + +static const signed short ycbcr16x16_420_2[8*8*6] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 +}; + +int main(int argc, char **argv) +{ + const unsigned int video_width = 32, video_height = 32; + const unsigned int window_width = video_width * 2, window_height = video_height * 2; + int quit = 0; + Display *display; + Window root, window; + Pixmap framebuffer; + XEvent event; + struct pipe_context *pipe; + struct VL_CONTEXT *ctx; + struct VL_SURFACE *sfc, *ref_sfc; + struct VL_MOTION_VECTOR motion_vector = + { + {0, 0}, {32, 32} + }; + + display = XOpenDisplay(NULL); + root = XDefaultRootWindow(display); + window = XCreateSimpleWindow(display, root, 0, 0, window_width, window_height, 0, 0, 0); + framebuffer = XCreatePixmap(display, root, window_width, window_height, 24); + + XSelectInput(display, window, ExposureMask | KeyPressMask); + XMapWindow(display, window); + XSync(display, 0); + + pipe = create_pipe_context(display); + vlCreateContext(display, pipe, video_width, video_height, VL_FORMAT_YCBCR_420, &ctx); + vlCreateSurface(ctx, &sfc); + vlCreateSurface(ctx, &ref_sfc); + + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); + vlRenderPMacroBlock + ( + VL_FRAME_PICTURE, + VL_FIELD_FIRST, + 0, + 0, + VL_FIELD_MC, + &motion_vector, + 0x3F, + VL_DCT_FRAME_CODED, + (short*)ycbcr16x16_420_2, + ref_sfc, + sfc + ); + vlPutSurface(sfc, framebuffer, 0, 0, video_width, video_height, 0, 0, window_width, window_height, VL_FRAME_PICTURE); + + puts("Press any key to continue..."); + + while (!quit) + { + XNextEvent(display, &event); + switch (event.type) + { + case Expose: + { + XCopyArea + ( + display, + framebuffer, + window, + XDefaultGC(display, XDefaultScreen(display)), + 0, + 0, + window_width, + window_height, + 0, + 0 + ); + break; + } + case KeyPress: + { + quit = 1; + break; + } + } + } + + vlDestroySurface(sfc); + vlDestroySurface(ref_sfc); + vlDestroyContext(ctx); + + XFreePixmap(display, framebuffer); + XDestroyWindow(display, window); + XCloseDisplay(display); + + return 0; +} + diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 3b9afabbb8..d2b1ad7948 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -36,8 +36,8 @@ static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context) const unsigned int semantic_names[3] = { TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, - TGSI_SEMANTIC_GENERIC, + TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ + TGSI_SEMANTIC_GENERIC, /* Chroma texcoords */ }; const unsigned int semantic_indexes[3] = {0, 1, 2}; const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; @@ -353,7 +353,7 @@ static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) return 0; } -static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context) +static int vlCreateVertexShaderFramePMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 100; const unsigned int num_input_attribs = 3; @@ -361,15 +361,15 @@ static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context) const unsigned int input_semantic_names[3] = { TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, - TGSI_SEMANTIC_GENERIC + TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ + TGSI_SEMANTIC_GENERIC /* Chroma texcoords */ }; const unsigned int output_semantic_names[4] = { TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, - TGSI_SEMANTIC_GENERIC, - TGSI_SEMANTIC_GENERIC + TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ + TGSI_SEMANTIC_GENERIC, /* Chroma texcoords */ + TGSI_SEMANTIC_GENERIC /* Ref surface texcoords */ }; const unsigned int input_semantic_indexes[3] = {0, 1, 2}; const unsigned int output_semantic_indexes[4] = {0, 1, 2, 3}; @@ -430,14 +430,15 @@ static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context) /* Declare constant inputs */ /* C[0] scales the normalized MB to cover 16x16 pixels, C[1] translates the macroblock into position on the surface - C[2] translates the ref surface texcoords to the ref macroblock */ + C[2] unused + C[3] translates the ref surface texcoords to the ref macroblock */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_CONSTANT; decl.Declaration.Semantic = 1; decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; decl.Semantic.SemanticIndex = 0; decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 2; + decl.u.DeclarationRange.Last = 3; ti += tgsi_build_full_declaration ( &decl, @@ -526,7 +527,7 @@ static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context) ); } - /* add o3, t0, c2 ; Translate texcoords into position */ + /* add o3, t0, c3 ; Translate texcoords into position */ inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = TGSI_OPCODE_ADD; inst.Instruction.NumDstRegs = 1; @@ -536,6 +537,264 @@ static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context) inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; inst.FullSrcRegisters[0].SrcRegister.Index = 0; inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 3; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* END */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + vs.tokens = tokens; + + context->states.mc.p_vs[0] = pipe->create_vs_state(pipe, &vs); + + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFieldPMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 100; + const unsigned int num_input_attribs = 3; + const unsigned int num_output_attribs = 6; + const unsigned int input_semantic_names[3] = + { + TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ + TGSI_SEMANTIC_GENERIC /* Chroma texcoords */ + }; + const unsigned int output_semantic_names[6] = + { + TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ + TGSI_SEMANTIC_GENERIC, /* Chroma texcoords */ + TGSI_SEMANTIC_GENERIC, /* Top field surface texcoords */ + TGSI_SEMANTIC_GENERIC, /* Bottom field surface texcoords */ + TGSI_SEMANTIC_POSITION /* Pos */ + }; + const unsigned int input_semantic_indexes[3] = {0, 1, 2}; + const unsigned int output_semantic_indexes[6] = {0, 1, 2, 3, 4, 5}; + const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + + /* Processor */ + processor = (struct tgsi_processor*)&tokens[2]; + *processor = tgsi_build_processor(proc_type, header); + + ti = 3; + + /* Declare inputs (pos, texcoords) */ + for (i = 0; i < num_input_attribs; i++) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = input_semantic_names[i]; + decl.Semantic.SemanticIndex = input_semantic_indexes[i]; + + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* Declare constant inputs */ + /* C[0] scales the normalized MB to cover 16x16 pixels, + C[1] translates the macroblock into position on the surface + C[2] denormalizes pos components + C[3] translates the ref surface top field texcoords to the ref macroblock + C[4] translates the ref surface bottom field texcoords to the ref macroblock */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = 0; + decl.u.DeclarationRange.Last = 4; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + + /* Declare outputs (pos, texcoords) */ + for (i = 0; i < num_output_attribs; i++) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = output_semantic_names[i]; + decl.Semantic.SemanticIndex = output_semantic_indexes[i]; + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* mul t0, i0, c0 ; Scale normalized coords to window coords */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* add t1, t0, c1 ; Translate vertex into position */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* mov o0, t1 ; Move vertex pos to output */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* + mov o1, i1 ; Move luma & chroma texcoords to output + mov o2, i2 + */ + for (i = 1; i < num_output_attribs - 1; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = i; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* add o3, t0, c3 ; Translate top field texcoords into position + add o4, t0, c4 ; Translate bottom field texcoords into position */ + for (i = 0; i < 2; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = i + 3; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = i + 3; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* mul o5, t1, c2 ; Denorm pos for fragment shader */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 5; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 1; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; inst.FullSrcRegisters[1].SrcRegister.Index = 2; ti += tgsi_build_full_instruction ( @@ -560,14 +819,14 @@ static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context) vs.tokens = tokens; - context->states.mc.p_vs = pipe->create_vs_state(pipe, &vs); + context->states.mc.p_vs[1] = pipe->create_vs_state(pipe, &vs); free(tokens); return 0; } -static int vlCreateFragmentShaderPMC(struct VL_CONTEXT *context) +static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 100; const unsigned int proc_type = TGSI_PROCESSOR_FRAGMENT; @@ -837,14 +1096,432 @@ static int vlCreateFragmentShaderPMC(struct VL_CONTEXT *context) fs.tokens = tokens; - context->states.mc.p_fs = pipe->create_fs_state(pipe, &fs); + context->states.mc.p_fs[0] = pipe->create_fs_state(pipe, &fs); + + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 200; + const unsigned int proc_type = TGSI_PROCESSOR_FRAGMENT; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + + /* Processor */ + processor = (struct tgsi_processor*)&tokens[2]; + *processor = tgsi_build_processor(proc_type, header); + + ti = 3; + + /* Declare inputs (texcoords) + I[0] Luma texcoords + I[1] Chroma texcoords + I[2] Ref top field surface texcoords + I[3] Ref bottom field surface texcoords + I[4] Denormalized texel pos */ + for (i = 0; i < 5; ++i) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = i + 1; + decl.Declaration.Interpolate = 1; + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* Declare constant input */ + /* C[0] is a multiplier to use when concatenating differential into a single channel + C[1] is a bias to get differential back to -1,1 range + C[2] is constants 2 and 1/2 for Y%2 field selector */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = 0; + decl.u.DeclarationRange.Last = 2; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + + /* Declare output */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = 0; + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + + /* Declare samplers */ + for (i = 0; i < 4; ++i) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* + tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels + mov t1.x, t0.w ; Move high part from .w channel to .x + tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels + mov t1.y, t0.w ; Move high part from .w channel to .y + tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels + mov t1.z, t0.w ; Move high part from .w channel to .z + */ + for (i = 0; i < 3; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i > 0 ? 1 : 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + inst.FullSrcRegisters[1].SrcRegister.Index = i; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* mul t1, t1, c0 ; Muliply high part by multiplier to get back its full value */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 1; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* add t0, t0, t1 ; Add luma and chroma low and high parts to get a single value */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* sub t0, t0, c1 ; Subtract bias to get back the signed value */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_SUB; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* tex2d t1, i2, s3 ; Read texel from ref macroblock top field + tex2d t2, i3, s3 ; Read texel from ref macroblock bottom field */ + for (i = 0; i < 2; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = i + 1; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i + 2; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + inst.FullSrcRegisters[1].SrcRegister.Index = 3; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* XXX: Pos values off by 0.5 for rounding? */ + /* sub t4, i4.y, c2.x ; Sub 0.5 from position */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_SUB; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 4; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = 4; + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 2; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* mul t3, t4, c2.x ; Divide pos y coord by 2 (mul by 0.5) */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 3; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 4; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 2; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* floor t3, t3 ; Get rid of fractional part */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_FLOOR; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 3; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 3; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* mul t3, t3, c2.y ; Multiply by 2 */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 3; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 3; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 2; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* sub t3, t4, t3 ; Subtract from y to get y % 2 */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_SUB; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 3; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 4; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[1].SrcRegister.Index = 3; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* lerp t1, t3, t1, t2 */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_LERP; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.Instruction.NumSrcRegs = 3; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 3; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[2].SrcRegister.Index = 2; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* END */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + fs.tokens = tokens; + + context->states.mc.p_fs[1] = pipe->create_fs_state(pipe, &fs); free(tokens); return 0; } -static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context) +static int vlCreateVertexShaderFrameBMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 100; const unsigned int num_input_attribs = 3; @@ -922,15 +1599,17 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context) /* Declare constant inputs */ /* C[0] scales the normalized MB to cover 16x16 pixels, C[1] translates the macroblock into position on the surface - C[2] translates the past surface texcoords to the ref macroblock - C[3] translates the future surface texcoords to the ref macroblock */ + C[2] unused + C[3] translates the past surface texcoords to the ref macroblock + C[4] unused + C[5] translates the future surface texcoords to the ref macroblock */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_CONSTANT; decl.Declaration.Semantic = 1; decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; decl.Semantic.SemanticIndex = 0; decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 3; + decl.u.DeclarationRange.Last = 5; ti += tgsi_build_full_declaration ( &decl, @@ -1019,8 +1698,8 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context) ); } - /* add o3, t0, c2 ; Translate past surface texcoords into position - add o4, t0, c3 ; Repeat for future surface texcoords */ + /* add o3, t0, c3 ; Translate past surface texcoords into position + add o4, t0, c5 ; Repeat for future surface texcoords */ for (i = 0; i < 2; ++i) { inst = tgsi_default_full_instruction(); @@ -1032,7 +1711,252 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context) inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; inst.FullSrcRegisters[0].SrcRegister.Index = 0; inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = i + 2; + inst.FullSrcRegisters[1].SrcRegister.Index = i * 2 + 3; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* END */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + vs.tokens = tokens; + + context->states.mc.b_vs[0] = pipe->create_vs_state(pipe, &vs); + + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFieldBMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 100; + const unsigned int num_input_attribs = 3; + const unsigned int num_output_attribs = 8; + const unsigned int input_semantic_names[3] = + { + TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ + TGSI_SEMANTIC_GENERIC /* Chroma texcoords */ + }; + const unsigned int output_semantic_names[8] = + { + TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ + TGSI_SEMANTIC_GENERIC, /* Chroma texcoords */ + TGSI_SEMANTIC_GENERIC, /* Top field past surface texcoords */ + TGSI_SEMANTIC_GENERIC, /* Bottom field past surface texcoords */ + TGSI_SEMANTIC_GENERIC, /* Top field future surface texcoords */ + TGSI_SEMANTIC_GENERIC, /* Bottom field future surface texcoords */ + TGSI_SEMANTIC_POSITION /* Pos */ + }; + const unsigned int input_semantic_indexes[3] = {0, 1, 2}; + const unsigned int output_semantic_indexes[8] = {0, 1, 2, 3, 4, 5, 6, 7}; + const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + + /* Processor */ + processor = (struct tgsi_processor*)&tokens[2]; + *processor = tgsi_build_processor(proc_type, header); + + ti = 3; + + /* Declare inputs (pos, texcoords) */ + for (i = 0; i < num_input_attribs; i++) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = input_semantic_names[i]; + decl.Semantic.SemanticIndex = input_semantic_indexes[i]; + + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* Declare constant inputs */ + /* C[0] scales the normalized MB to cover 16x16 pixels, + C[1] translates the macroblock into position on the surface + C[2] denormalizes pos components + C[3] translates the past surface top field texcoords to the ref macroblock + C[4] translates the past surface bottom field texcoords to the ref macroblock + C[5] translates the future surface top field texcoords to the ref macroblock + C[6] translates the future surface bottom field texcoords to the ref macroblock */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = 0; + decl.u.DeclarationRange.Last = 6; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + + /* Declare outputs (pos, texcoords) */ + for (i = 0; i < num_output_attribs; i++) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = output_semantic_names[i]; + decl.Semantic.SemanticIndex = output_semantic_indexes[i]; + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* mul t0, i0, c0 ; Scale normalized coords to window coords */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* add t1, t0, c1 ; Translate vertex into position */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* mov o0, t1 ; Move vertex pos to output */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* + mov o1, i1 ; Move luma & chroma texcoords to output + mov o2, i2 + */ + for (i = 1; i < num_output_attribs - 1; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = i; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* add o3, t0, c3 ; Translate top field past texcoords into position + add o4, t0, c4 ; Translate bottom field past texcoords into position + add o5, t0, c5 ; Translate top field past texcoords into position + add o6, t0, c6 ; Translate bottom field past texcoords into position */ + for (i = 0; i < 4; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = i + 3; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = i + 3; ti += tgsi_build_full_instruction ( &inst, @@ -1041,6 +1965,25 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context) max_tokens - ti ); } + + /* mul o7, t1, c2 ; Denorm pos for fragment shader */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 7; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 1; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 2; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); /* END */ inst = tgsi_default_full_instruction(); @@ -1057,14 +2000,14 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context) vs.tokens = tokens; - context->states.mc.b_vs = pipe->create_vs_state(pipe, &vs); + context->states.mc.b_vs[1] = pipe->create_vs_state(pipe, &vs); free(tokens); return 0; } -static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context) +static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 100; const unsigned int proc_type = TGSI_PROCESSOR_FRAGMENT; @@ -1123,14 +2066,15 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context) /* Declare constant input */ /* C[0] is a multiplier to use when concatenating differential into a single channel - C[0] is a bias to get differential back to -1,1 range*/ + C[1] is a bias to get differential back to -1,1 range + C[2] contains 0.5 in channel X for use as a weight to blend past and future samples */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_CONSTANT; decl.Declaration.Semantic = 1; decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; decl.Semantic.SemanticIndex = 0; decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 1; + decl.u.DeclarationRange.Last = 2; ti += tgsi_build_full_declaration ( &decl, @@ -1304,7 +2248,259 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context) ); } - /* add t0, t0, t1 ; Add past and differential to form partial output */ + /* lerp t1, c2.x, t1, t2 ; Blend past and future texels */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_LERP; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.Instruction.NumSrcRegs = 3; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[0].SrcRegister.Index = 2; + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[2].SrcRegister.Index = 2; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* END */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + fs.tokens = tokens; + + context->states.mc.b_fs[0] = pipe->create_fs_state(pipe, &fs); + + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 200; + const unsigned int proc_type = TGSI_PROCESSOR_FRAGMENT; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + + /* Processor */ + processor = (struct tgsi_processor*)&tokens[2]; + *processor = tgsi_build_processor(proc_type, header); + + ti = 3; + + /* Declare inputs (texcoords) + I[0] Luma texcoords + I[1] Chroma texcoords + I[2] Past top field surface texcoords + I[3] Past bottom field surface texcoords + I[4] Future top field surface texcoords + I[5] Future bottom field surface texcoords + I[6] Denormalized texel pos */ + for (i = 0; i < 7; ++i) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = i + 1; + decl.Declaration.Interpolate = 1; + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* Declare constant input */ + /* C[0] is a multiplier to use when concatenating differential into a single channel + C[1] is a bias to get differential back to -1,1 range + C[2] is constants 2 and 1/2 for Y%2 field selector */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = 0; + decl.u.DeclarationRange.Last = 2; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + + /* Declare output */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = 0; + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + + /* Declare samplers */ + for (i = 0; i < 5; ++i) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* + tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels + mov t1.x, t0.w ; Move high part from .w channel to .x + tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels + mov t1.y, t0.w ; Move high part from .w channel to .y + tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels + mov t1.z, t0.w ; Move high part from .w channel to .z + */ + for (i = 0; i < 3; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i > 0 ? 1 : 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + inst.FullSrcRegisters[1].SrcRegister.Index = i; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* mul t1, t1, c0 ; Muliply high part by multiplier to get back its full value */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 1; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* add t0, t0, t1 ; Add luma and chroma low and high parts to get a single value */ inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = TGSI_OPCODE_ADD; inst.Instruction.NumDstRegs = 1; @@ -1323,7 +2519,251 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context) max_tokens - ti ); - /* add o0, t0, t2 ; Add future and differential to form final output */ + /* sub t0, t0, c1 ; Subtract bias to get back the signed value */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_SUB; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* XXX: Pos values off by 0.5 for rounding? */ + /* sub t4, i6.y, c2.x ; Sub 0.5 from position */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_SUB; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 4; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = 6; + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 2; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* mul t3, t4, c2.x ; Divide pos y coord by 2 (mul by 0.5) */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 3; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 4; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 2; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* floor t3, t3 ; Get rid of fractional part */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_FLOOR; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 3; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 3; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* mul t3, t3, c2.y ; Multiply by 2 */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 3; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 3; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 2; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* sub t3, t4, t3 ; Subtract from y to get y % 2 */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_SUB; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 3; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 4; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[1].SrcRegister.Index = 3; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* tex2d t1, i2, s3 ; Read texel from past macroblock top field + tex2d t2, i3, s3 ; Read texel from past macroblock bottom field */ + for (i = 0; i < 2; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = i + 1; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i + 2; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + inst.FullSrcRegisters[1].SrcRegister.Index = 3; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* lerp t1, t3, t1, t2 */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_LERP; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.Instruction.NumSrcRegs = 3; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 3; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[2].SrcRegister.Index = 2; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* tex2d t4, i4, s4 ; Read texel from future macroblock top field + tex2d t5, i5, s4 ; Read texel from future macroblock bottom field */ + for (i = 0; i < 2; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = i + 4; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i + 4; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + inst.FullSrcRegisters[1].SrcRegister.Index = 4; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* lerp t2, t3, t4, t5 */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_LERP; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 2; + inst.Instruction.NumSrcRegs = 3; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 3; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[1].SrcRegister.Index = 4; + inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[2].SrcRegister.Index = 5; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* lerp t1, c2.x, t1, t2 */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_LERP; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.Instruction.NumSrcRegs = 3; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[0].SrcRegister.Index = 2; + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[2].SrcRegister.Index = 2; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* add o0, t0, t1 ; Add future and differential to form final output */ inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = TGSI_OPCODE_ADD; inst.Instruction.NumDstRegs = 1; @@ -1333,7 +2773,7 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context) inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; inst.FullSrcRegisters[0].SrcRegister.Index = 0; inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 2; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; ti += tgsi_build_full_instruction ( &inst, @@ -1357,7 +2797,7 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context) fs.tokens = tokens; - context->states.mc.b_fs = pipe->create_fs_state(pipe, &fs); + context->states.mc.b_fs[1] = pipe->create_fs_state(pipe, &fs); free(tokens); @@ -1491,7 +2931,6 @@ static int vlInitMC(struct VL_CONTEXT *context) context->states.mc.render_target.height = context->video_height; context->states.mc.render_target.num_cbufs = 1; /* FB for MC stage is a VL_SURFACE, set in vlSetRenderSurface() */ - /*context->states.mc.render_target.cbufs[0] = ;*/ context->states.mc.render_target.zsbuf = NULL; filters[0] = PIPE_TEX_FILTER_NEAREST; @@ -1530,6 +2969,7 @@ static int vlInitMC(struct VL_CONTEXT *context) template.depth[0] = 1; template.compressed = 0; template.cpp = 2; + context->states.mc.textures[0] = pipe->screen->texture_create(pipe->screen, &template); if (context->video_format == VL_FORMAT_YCBCR_420) @@ -1548,10 +2988,14 @@ static int vlInitMC(struct VL_CONTEXT *context) vlCreateVertexShaderIMC(context); vlCreateFragmentShaderIMC(context); - vlCreateVertexShaderPMC(context); - vlCreateFragmentShaderPMC(context); - vlCreateVertexShaderBMC(context); - vlCreateFragmentShaderBMC(context); + vlCreateVertexShaderFramePMC(context); + vlCreateVertexShaderFieldPMC(context); + vlCreateFragmentShaderFramePMC(context); + vlCreateFragmentShaderFieldPMC(context); + vlCreateVertexShaderFrameBMC(context); + vlCreateVertexShaderFieldBMC(context); + vlCreateFragmentShaderFrameBMC(context); + vlCreateFragmentShaderFieldBMC(context); vlCreateDataBufsMC(context); return 0; @@ -1575,10 +3019,14 @@ static int vlDestroyMC(struct VL_CONTEXT *context) context->pipe->delete_vs_state(context->pipe, context->states.mc.i_vs); context->pipe->delete_fs_state(context->pipe, context->states.mc.i_fs); - context->pipe->delete_vs_state(context->pipe, context->states.mc.p_vs); - context->pipe->delete_fs_state(context->pipe, context->states.mc.p_fs); - context->pipe->delete_vs_state(context->pipe, context->states.mc.b_vs); - context->pipe->delete_fs_state(context->pipe, context->states.mc.b_fs); + + for (i = 0; i < 2; ++i) + { + context->pipe->delete_vs_state(context->pipe, context->states.mc.p_vs[i]); + context->pipe->delete_fs_state(context->pipe, context->states.mc.p_fs[i]); + context->pipe->delete_vs_state(context->pipe, context->states.mc.b_vs[i]); + context->pipe->delete_fs_state(context->pipe, context->states.mc.b_fs[i]); + } context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.vs_const_buf.buffer); context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.fs_const_buf.buffer); @@ -1982,7 +3430,7 @@ static int vlCreateDataBufsCSC(struct VL_CONTEXT *context) /* TODO: Refactor this into a seperate function, - allow changing the csc matrix at runtime to switch between regular & full versions + allow changing the CSC matrix at runtime to switch between regular & full versions */ memcpy ( @@ -2166,7 +3614,7 @@ static int vlDestroy(struct VL_CONTEXT *context) { assert(context); - /* Must unbind shaders before we can delete them for some reason */ + /* XXX: Must unbind shaders before we can delete them for some reason */ context->pipe->bind_vs_state(context->pipe, NULL); context->pipe->bind_fs_state(context->pipe, NULL); diff --git a/src/gallium/state_trackers/g3dvl/vl_context.h b/src/gallium/state_trackers/g3dvl/vl_context.h index f26a4c5b6a..8a12318073 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.h +++ b/src/gallium/state_trackers/g3dvl/vl_context.h @@ -34,8 +34,8 @@ struct VL_CONTEXT struct pipe_framebuffer_state render_target; struct pipe_sampler_state *samplers[5]; struct pipe_texture *textures[5]; - struct pipe_shader_state *i_vs, *p_vs, *b_vs; - struct pipe_shader_state *i_fs, *p_fs, *b_fs; + struct pipe_shader_state *i_vs, *p_vs[2], *b_vs[2]; + struct pipe_shader_state *i_fs, *p_fs[2], *b_fs[2]; struct pipe_vertex_buffer vertex_bufs[3]; struct pipe_vertex_element vertex_buf_elems[3]; struct pipe_constant_buffer vs_const_buf, fs_const_buf; diff --git a/src/gallium/state_trackers/g3dvl/vl_data.c b/src/gallium/state_trackers/g3dvl/vl_data.c index 27893aee95..7e6ee8ac12 100644 --- a/src/gallium/state_trackers/g3dvl/vl_data.c +++ b/src/gallium/state_trackers/g3dvl/vl_data.c @@ -87,7 +87,8 @@ const struct VL_TEXCOORD2F *vl_surface_texcoords = (const struct VL_TEXCOORD2F*) const struct VL_MC_FS_CONSTS vl_mc_fs_consts = { {256.0f, 256.0f, 256.0f, 0.0f}, - {256.0f / 255.0f, 256.0f / 255.0f, 256.0f / 255.0f, 0.0f} + {256.0f / 255.0f, 256.0f / 255.0f, 256.0f / 255.0f, 0.0f}, + {0.5f, 2.0f, 0.0f, 0.0f} }; /* diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index 6451e54953..d2220d7abf 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -8,6 +8,85 @@ #include "vl_context.h" #include "vl_defs.h" +static int vlGrabFrameCodedFullBlock(short *src, short *dst, unsigned int dst_pitch) +{ + unsigned int y; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + memcpy + ( + dst + y * dst_pitch, + src + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + + return 0; +} + +static int vlGrabFrameCodedDiffBlock(short *src, short *dst, unsigned int dst_pitch) +{ + unsigned int x, y; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + dst[y * dst_pitch + x] = src[y * VL_BLOCK_WIDTH + x] + 0x100; + + return 0; +} + +static int vlGrabFieldCodedFullBlock(short *src, short *dst, unsigned int dst_pitch) +{ + unsigned int y; + + for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y) + memcpy + ( + dst + y * dst_pitch * 2, + src + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + + dst += VL_BLOCK_HEIGHT * dst_pitch; + + for (; y < VL_BLOCK_HEIGHT; ++y) + memcpy + ( + dst + y * dst_pitch * 2, + src + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + + return 0; +} + +static int vlGrabFieldCodedDiffBlock(short *src, short *dst, unsigned int dst_pitch) +{ + unsigned int x, y; + + for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y) + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + dst[y * dst_pitch * 2 + x] = src[y * VL_BLOCK_WIDTH + x] + 0x100; + + dst += VL_BLOCK_HEIGHT * dst_pitch; + + for (; y < VL_BLOCK_HEIGHT; ++y) + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + dst[y * dst_pitch * 2 + x] = src[y * VL_BLOCK_WIDTH + x] + 0x100; + + return 0; +} + +static int vlGrabNoBlock(short *dst, unsigned int dst_pitch) +{ + unsigned int x, y; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + dst[y * dst_pitch + x] = 0x100; + + return 0; +} + static int vlGrabBlocks ( struct VL_CONTEXT *context, @@ -19,7 +98,7 @@ static int vlGrabBlocks { struct pipe_surface *tex_surface; short *texels; - unsigned int b, x, y, y2; + unsigned int tb, sb = 0; assert(context); assert(blocks); @@ -33,134 +112,81 @@ static int vlGrabBlocks texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); - for (b = 0; b < 4; ++b) + for (tb = 0; tb < 4; ++tb) { - if ((coded_block_pattern >> b) & 1) + if ((coded_block_pattern >> (5 - tb)) & 1) { if (dct_type == VL_DCT_FRAME_CODED) - { if (sample_type == VL_FULL_SAMPLE) - { - for (y = VL_BLOCK_HEIGHT * b; y < VL_BLOCK_HEIGHT * (b + 1); ++y) - memcpy - ( - texels + y * tex_surface->pitch, - blocks + y * VL_BLOCK_WIDTH, - VL_BLOCK_WIDTH * 2 - ); - } + vlGrabFrameCodedFullBlock + ( + blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, + texels + tb * tex_surface->pitch * VL_BLOCK_HEIGHT, + tex_surface->pitch + ); else - { - for (y = VL_BLOCK_HEIGHT * b; y < VL_BLOCK_HEIGHT * (b + 1); ++y) - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - texels[y * tex_surface->pitch + x] = - blocks[y * VL_BLOCK_WIDTH + x] + 0x100; - } - } + vlGrabFrameCodedDiffBlock + ( + blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, + texels + tb * tex_surface->pitch * VL_BLOCK_HEIGHT, + tex_surface->pitch + ); else - { if (sample_type == VL_FULL_SAMPLE) - { - for + vlGrabFieldCodedFullBlock ( - y = VL_BLOCK_HEIGHT * (b % 2), y2 = VL_BLOCK_HEIGHT * b; - y < VL_BLOCK_HEIGHT * ((b % 2) + 1); - y += 2, ++y2 - ) - memcpy - ( - texels + y * tex_surface->pitch, - blocks + y2 * VL_BLOCK_WIDTH, - VL_BLOCK_WIDTH * 2 - ); - for - ( - y = VL_BLOCK_HEIGHT * ((b % 2) + 2); - y < VL_BLOCK_HEIGHT * (((b % 2) + 2) + 1); - y += 2, ++y2 - ) - memcpy - ( - texels + y * tex_surface->pitch, - blocks + y2 * VL_BLOCK_WIDTH, - VL_BLOCK_WIDTH * 2 - ); - } + blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, + texels + (tb % 2) * tex_surface->pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_surface->pitch, + tex_surface->pitch + ); else - { - for + vlGrabFieldCodedDiffBlock ( - y = VL_BLOCK_HEIGHT * (b % 2), y2 = VL_BLOCK_HEIGHT * b; - y < VL_BLOCK_HEIGHT * ((b % 2) + 1); - y += 2, ++y2 - ) - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - texels[y * tex_surface->pitch + x] = - blocks[y2 * VL_BLOCK_WIDTH + x] + 0x100; - for - ( - y = VL_BLOCK_HEIGHT * ((b % 2) + 2); - y < VL_BLOCK_HEIGHT * (((b % 2) + 2) + 1); - y += 2, ++y2 - ) - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - texels[y * tex_surface->pitch + x] = - blocks[y2 * VL_BLOCK_WIDTH + x] + 0x100; - } - } + blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, + texels + (tb % 2) * tex_surface->pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_surface->pitch, + tex_surface->pitch + ); + ++sb; } else - { - for (y = VL_BLOCK_HEIGHT * b; y < VL_BLOCK_HEIGHT * (b + 1); ++y) - { - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - texels[y * tex_surface->pitch + x] = 0x100; - } - } + vlGrabNoBlock(texels + tb * tex_surface->pitch * VL_BLOCK_HEIGHT, tex_surface->pitch); } pipe_surface_unmap(tex_surface); /* TODO: Implement 422, 444 */ - for (b = 0; b < 2; ++b) + for (tb = 0; tb < 2; ++tb) { tex_surface = context->pipe->screen->get_tex_surface - ( - context->pipe->screen, - context->states.mc.textures[b + 1], - 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE - ); + ( + context->pipe->screen, + context->states.mc.textures[tb + 1], + 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE + ); texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); - if ((coded_block_pattern >> (b + 4)) & 1) - { + if ((coded_block_pattern >> (1 - tb)) & 1) + { if (sample_type == VL_FULL_SAMPLE) - { - for (y = 0; y < tex_surface->height; ++y) - memcpy - ( - texels + y * tex_surface->pitch, - blocks + VL_BLOCK_SIZE * (b + 4) + y * VL_BLOCK_WIDTH, - VL_BLOCK_WIDTH * 2 - ); - } + vlGrabFrameCodedFullBlock + ( + blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, + texels, + tex_surface->pitch + ); else - { - for (y = 0; y < tex_surface->height; ++y) - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - texels[y * tex_surface->pitch + x] = - blocks[VL_BLOCK_SIZE * (b + 4) + y * VL_BLOCK_WIDTH + x] + 0x100; - } + vlGrabFrameCodedDiffBlock + ( + blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, + texels, + tex_surface->pitch + ); + + ++sb; } else - { - for (y = 0; y < tex_surface->height; ++y) - { - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - texels[y * tex_surface->pitch + x] = 0x100; - } - } + vlGrabNoBlock(texels, tex_surface->pitch); pipe_surface_unmap(tex_surface); } @@ -229,41 +255,35 @@ int vlRenderIMacroBlock ) { struct pipe_context *pipe; - struct VL_MC_VS_CONSTS *vscbdata; + struct VL_MC_VS_CONSTS *vs_consts; assert(blocks); assert(surface); /* TODO: Implement interlaced rendering */ - /*assert(picture_type == VL_FRAME_PICTURE);*/ if (picture_type != VL_FRAME_PICTURE) - { - /*fprintf(stderr, "field picture (I) unimplemented, ignoring\n");*/ return 0; - } pipe = surface->context->pipe; - vscbdata = pipe->winsys->buffer_map + vs_consts = pipe->winsys->buffer_map ( pipe->winsys, surface->context->states.mc.vs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE ); - vscbdata->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width; - vscbdata->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height; - vscbdata->scale.z = 1.0f; - vscbdata->scale.w = 1.0f; - vscbdata->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width; - vscbdata->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height; - vscbdata->mb_pos_trans.z = 0.0f; - vscbdata->mb_pos_trans.w = 0.0f; + vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width; + vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height; + vs_consts->scale.z = 1.0f; + vs_consts->scale.w = 1.0f; + vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width; + vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height; + vs_consts->mb_pos_trans.z = 0.0f; + vs_consts->mb_pos_trans.w = 0.0f; pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.mc.vs_const_buf.buffer); - vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_FULL_SAMPLE, blocks); - surface->context->states.mc.render_target.cbufs[0] = pipe->screen->get_tex_surface ( pipe->screen, @@ -276,6 +296,8 @@ int vlRenderIMacroBlock pipe->bind_vs_state(pipe, surface->context->states.mc.i_vs); pipe->bind_fs_state(pipe, surface->context->states.mc.i_fs); + vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_FULL_SAMPLE, blocks); + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); return 0; @@ -297,7 +319,7 @@ int vlRenderPMacroBlock ) { struct pipe_context *pipe; - struct VL_MC_VS_CONSTS *vscbdata; + struct VL_MC_VS_CONSTS *vs_consts; assert(motion_vectors); assert(blocks); @@ -305,46 +327,55 @@ int vlRenderPMacroBlock assert(surface); /* TODO: Implement interlaced rendering */ - /*assert(picture_type == VL_FRAME_PICTURE);*/ if (picture_type != VL_FRAME_PICTURE) - { - /*fprintf(stderr, "field picture (P) unimplemented, ignoring\n");*/ return 0; - } - /* TODO: Implement field based motion compensation */ - /*assert(mc_type == VL_FRAME_MC);*/ - if (mc_type != VL_FRAME_MC) - { - /*fprintf(stderr, "field MC (P) unimplemented, ignoring\n");*/ + /* TODO: Implement other MC types */ + if (mc_type != VL_FRAME_MC && mc_type != VL_FIELD_MC) return 0; - } pipe = surface->context->pipe; - vscbdata = pipe->winsys->buffer_map + vs_consts = pipe->winsys->buffer_map ( pipe->winsys, surface->context->states.mc.vs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE ); - vscbdata->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width; - vscbdata->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height; - vscbdata->scale.z = 1.0f; - vscbdata->scale.w = 1.0f; - vscbdata->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width; - vscbdata->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height; - vscbdata->mb_pos_trans.z = 0.0f; - vscbdata->mb_pos_trans.w = 0.0f; - vscbdata->mb_tc_trans[0].x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector->top_field.x * 0.5f) / (float)surface->width; - vscbdata->mb_tc_trans[0].y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector->top_field.y * 0.5f) / (float)surface->height; - vscbdata->mb_tc_trans[0].z = 0.0f; - vscbdata->mb_tc_trans[0].w = 0.0f; + vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width; + vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height; + vs_consts->scale.z = 1.0f; + vs_consts->scale.w = 1.0f; + vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width; + vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height; + vs_consts->mb_pos_trans.z = 0.0f; + vs_consts->mb_pos_trans.w = 0.0f; + vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector->top_field.x * 0.5f) / (float)surface->width; + vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector->top_field.y * 0.5f) / (float)surface->height; + vs_consts->mb_tc_trans[0].top_field.z = 0.0f; + vs_consts->mb_tc_trans[0].top_field.w = 0.0f; + + if (mc_type == VL_FIELD_MC) + { + vs_consts->denorm.x = (float)surface->width; + vs_consts->denorm.y = (float)surface->height; + + vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector->bottom_field.x * 0.5f) / (float)surface->width; + vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector->bottom_field.y * 0.5f) / (float)surface->height; + vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f; + vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f; + + pipe->bind_vs_state(pipe, surface->context->states.mc.p_vs[1]); + pipe->bind_fs_state(pipe, surface->context->states.mc.p_fs[1]); + } + else + { + pipe->bind_vs_state(pipe, surface->context->states.mc.p_vs[0]); + pipe->bind_fs_state(pipe, surface->context->states.mc.p_fs[0]); + } pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.mc.vs_const_buf.buffer); - vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks); - surface->context->states.mc.render_target.cbufs[0] = pipe->screen->get_tex_surface ( pipe->screen, @@ -356,8 +387,8 @@ int vlRenderPMacroBlock surface->context->states.mc.textures[3] = ref_surface->texture; pipe->set_sampler_textures(pipe, 4, surface->context->states.mc.textures); pipe->bind_sampler_states(pipe, 4, (void**)surface->context->states.mc.samplers); - pipe->bind_vs_state(pipe, surface->context->states.mc.p_vs); - pipe->bind_fs_state(pipe, surface->context->states.mc.p_fs); + + vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks); pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); @@ -381,7 +412,7 @@ int vlRenderBMacroBlock ) { struct pipe_context *pipe; - struct VL_MC_VS_CONSTS *vscbdata; + struct VL_MC_VS_CONSTS *vs_consts; assert(motion_vectors); assert(blocks); @@ -389,50 +420,63 @@ int vlRenderBMacroBlock assert(surface); /* TODO: Implement interlaced rendering */ - /*assert(picture_type == VL_FRAME_PICTURE);*/ if (picture_type != VL_FRAME_PICTURE) - { - /*fprintf(stderr, "field picture (B) unimplemented, ignoring\n");*/ return 0; - } - /* TODO: Implement field based motion compensation */ - /*assert(mc_type == VL_FRAME_MC);*/ - if (mc_type != VL_FRAME_MC) - { - /*fprintf(stderr, "field MC (B) unimplemented, ignoring\n");*/ + /* TODO: Implement other MC types */ + if (mc_type != VL_FRAME_MC && mc_type != VL_FIELD_MC) return 0; - } pipe = surface->context->pipe; - vscbdata = pipe->winsys->buffer_map + vs_consts = pipe->winsys->buffer_map ( pipe->winsys, surface->context->states.mc.vs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE ); - vscbdata->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width; - vscbdata->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height; - vscbdata->scale.z = 1.0f; - vscbdata->scale.w = 1.0f; - vscbdata->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width; - vscbdata->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height; - vscbdata->mb_pos_trans.z = 0.0f; - vscbdata->mb_pos_trans.w = 0.0f; - vscbdata->mb_tc_trans[0].x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[0].top_field.x * 0.5f) / (float)surface->width; - vscbdata->mb_tc_trans[0].y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[0].top_field.y * 0.5f) / (float)surface->height; - vscbdata->mb_tc_trans[0].z = 0.0f; - vscbdata->mb_tc_trans[0].w = 0.0f; - vscbdata->mb_tc_trans[1].x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[1].top_field.x * 0.5f) / (float)surface->width; - vscbdata->mb_tc_trans[1].y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[1].top_field.y * 0.5f) / (float)surface->height; - vscbdata->mb_tc_trans[1].z = 0.0f; - vscbdata->mb_tc_trans[1].w = 0.0f; + vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width; + vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height; + vs_consts->scale.z = 1.0f; + vs_consts->scale.w = 1.0f; + vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width; + vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height; + vs_consts->mb_pos_trans.z = 0.0f; + vs_consts->mb_pos_trans.w = 0.0f; + vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[0].top_field.x * 0.5f) / (float)surface->width; + vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[0].top_field.y * 0.5f) / (float)surface->height; + vs_consts->mb_tc_trans[0].top_field.z = 0.0f; + vs_consts->mb_tc_trans[0].top_field.w = 0.0f; + vs_consts->mb_tc_trans[1].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[1].top_field.x * 0.5f) / (float)surface->width; + vs_consts->mb_tc_trans[1].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[1].top_field.y * 0.5f) / (float)surface->height; + vs_consts->mb_tc_trans[1].top_field.z = 0.0f; + vs_consts->mb_tc_trans[1].top_field.w = 0.0f; + + if (mc_type == VL_FIELD_MC) + { + vs_consts->denorm.x = (float)surface->width; + vs_consts->denorm.y = (float)surface->height; + + vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[0].bottom_field.x * 0.5f) / (float)surface->width; + vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[0].bottom_field.y * 0.5f) / (float)surface->height; + vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f; + vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f; + vs_consts->mb_tc_trans[1].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[1].bottom_field.x * 0.5f) / (float)surface->width; + vs_consts->mb_tc_trans[1].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[1].bottom_field.y * 0.5f) / (float)surface->height; + vs_consts->mb_tc_trans[1].bottom_field.z = 0.0f; + vs_consts->mb_tc_trans[1].bottom_field.w = 0.0f; + + pipe->bind_vs_state(pipe, surface->context->states.mc.b_vs[1]); + pipe->bind_fs_state(pipe, surface->context->states.mc.b_fs[1]); + } + else + { + pipe->bind_vs_state(pipe, surface->context->states.mc.b_vs[0]); + pipe->bind_fs_state(pipe, surface->context->states.mc.b_fs[0]); + } pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.mc.vs_const_buf.buffer); - vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks); - surface->context->states.mc.render_target.cbufs[0] = pipe->screen->get_tex_surface ( pipe->screen, @@ -445,8 +489,8 @@ int vlRenderBMacroBlock surface->context->states.mc.textures[4] = future_surface->texture; pipe->set_sampler_textures(pipe, 5, surface->context->states.mc.textures); pipe->bind_sampler_states(pipe, 5, (void**)surface->context->states.mc.samplers); - pipe->bind_vs_state(pipe, surface->context->states.mc.b_vs); - pipe->bind_fs_state(pipe, surface->context->states.mc.b_fs); + + vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks); pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); @@ -513,7 +557,7 @@ int vlPutSurface destw, desth, PIPE_FORMAT_A8R8G8B8_UNORM, - /*XXX: SoftPipe doesn't change GPU usage to CPU like it does for textures */ + /* XXX: SoftPipe doesn't change GPU usage to CPU like it does for textures */ PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE, 0 ); diff --git a/src/gallium/state_trackers/g3dvl/vl_types.h b/src/gallium/state_trackers/g3dvl/vl_types.h index 7040b74503..97753699db 100644 --- a/src/gallium/state_trackers/g3dvl/vl_types.h +++ b/src/gallium/state_trackers/g3dvl/vl_types.h @@ -36,7 +36,9 @@ enum VL_SAMPLE_TYPE enum VL_MC_TYPE { VL_FIELD_MC, - VL_FRAME_MC + VL_FRAME_MC, + VL_DUAL_PRIME_MC, + VL_16x8_MC = VL_FRAME_MC }; struct VL_VERTEX4F @@ -58,13 +60,19 @@ struct VL_MC_VS_CONSTS { struct VL_VERTEX4F scale; struct VL_VERTEX4F mb_pos_trans; - struct VL_VERTEX4F mb_tc_trans[2]; + struct VL_VERTEX4F denorm; + struct + { + struct VL_VERTEX4F top_field; + struct VL_VERTEX4F bottom_field; + } mb_tc_trans[2]; }; struct VL_MC_FS_CONSTS { struct VL_VERTEX4F multiplier; struct VL_VERTEX4F bias; + struct VL_VERTEX4F y_divider; }; struct VL_CSC_FS_CONSTS |