summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_context.c248
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_data.c3
-rw-r--r--src/gallium/state_trackers/g3dvl/vl_surface.c178
3 files changed, 153 insertions, 276 deletions
diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c
index 638900b3f4..542ba996a7 100644
--- a/src/gallium/state_trackers/g3dvl/vl_context.c
+++ b/src/gallium/state_trackers/g3dvl/vl_context.c
@@ -365,7 +365,7 @@ static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context)
static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context)
{
- const unsigned int max_tokens = 50;
+ const unsigned int max_tokens = 100;
struct pipe_context *pipe;
struct pipe_shader_state fs;
@@ -402,11 +402,19 @@ static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context)
decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
}
+
+ /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
+ decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
/* decl o0 ; Fragment color */
decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+ /* decl t0, t1 */
+ decl = vl_decl_temps(0, 1);
+ ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+
/*
* decl s0 ; Sampler for luma texture
* decl s1 ; Sampler for chroma Cb texture
@@ -419,16 +427,30 @@ static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context)
}
/*
- * tex2d o0.x, i0, s0 ; Read texel from luma texture into .x channel
- * tex2d o0.y, i1, s1 ; Read texel from chroma Cb texture into .y channel
- * tex2d o0.z, i1, s2 ; Read texel from chroma Cr texture into .z channel
- */
+ * tex2d t1, i0, s0 ; Read texel from luma texture
+ * mov t0.x, t1.x ; Move luma sample into .x component
+ * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
+ * mov t0.y, t1.x ; Move Cb sample into .y component
+ * tex2d t1, i1, s2 ; Read texel from chroma Cr texture
+ * mov t0.z, t1.x ; Move Cr sample into .z component
+ */
for (i = 0; i < 3; ++i)
{
- inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_OUTPUT, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
+ inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
+ inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
}
+
+ /* mul o0, t0, c0 ; Rescale texel to correct range */
+ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
+ ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
/* end */
inst = vl_end();
@@ -701,10 +723,7 @@ static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context)
ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
}
- /*
- * decl c0 ; Multiplier to shift 9th bit of differential into place
- * decl c1 ; Bias to get differential back to a signed value
- */
+ /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
@@ -729,38 +748,29 @@ static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context)
}
/*
- * tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels
- * mov t1.x, t0.w ; Move 9th bit from .w channel to .x
- * tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels
- * mov t1.y, t0.w ; Move 9th bit from .w channel to .y
- * tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels
- * mov t1.z, t0.w ; Move 9th bit from .w channel to .z
+ * tex2d t1, i0, s0 ; Read texel from luma texture
+ * mov t0.x, t1.x ; Move luma sample into .x component
+ * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
+ * mov t0.y, t1.x ; Move Cb sample into .y component
+ * tex2d t1, i1, s2 ; Read texel from chroma Cr texture
+ * mov t0.z, t1.x ; Move Cr sample into .z component
*/
for (i = 0; i < 3; ++i)
{
- inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
- inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W;
+ inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
- inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0);
+ inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
- inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
- inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
- inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
- inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
}
- /* mul t1, t1, c0 ; Muliply 9th bit by multiplier to shift it into place */
- inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0);
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
- /* add t0, t0, t1 ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */
- inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
- /* sub t0, t0, c1 ; Subtract bias to get back signed values */
- inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
+ /* mul t0, t0, c0 ; Rescale texel to correct range */
+ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
/* tex2d t1, i2, s3 ; Read texel from ref macroblock */
@@ -826,11 +836,10 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context)
}
/*
- * decl c0 ; Multiplier to shift 9th bit of differential into place
- * decl c1 ; Bias to get differential back to a signed value
- * decl c2 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection
+ * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
+ * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection
*/
- decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 2);
+ decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
/* decl o0 ; Fragment color */
@@ -854,42 +863,35 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context)
}
/*
- * tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels
- * mov t1.x, t0.w ; Move 9th bit from .w channel to .x
- * tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels
- * mov t1.y, t0.w ; Move 9th bit from .w channel to .y
- * tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels
- * mov t1.z, t0.w ; Move 9th bit from .w channel to .z
+ * tex2d t1, i0, s0 ; Read texel from luma texture
+ * mov t0.x, t1.x ; Move luma sample into .x component
+ * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
+ * mov t0.y, t1.x ; Move Cb sample into .y component
+ * tex2d t1, i1, s2 ; Read texel from chroma Cr texture
+ * mov t0.z, t1.x ; Move Cr sample into .z component
*/
for (i = 0; i < 3; ++i)
{
- inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
- inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W;
+ inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
- inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0);
+ inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
- inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
- inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
- inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
- inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
}
- /* mul t1, t1, c0 ; Muliply 9th bit by multiplier to shift it into place */
- inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0);
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
- /* add t0, t0, t1 ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */
- inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
- /* sub t0, t0, c1 ; Subtract bias to get back signed values */
- inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
+ /* mul t0, t0, c0 ; Rescale texel to correct range */
+ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
- /* tex2d t1, i2, s3 ; Read texel from ref macroblock top field
- tex2d t2, i3, s3 ; Read texel from ref macroblock bottom field */
+ /*
+ * tex2d t1, i2, s3 ; Read texel from ref macroblock top field
+ * tex2d t2, i3, s3 ; Read texel from ref macroblock bottom field
+ */
for (i = 0; i < 2; ++i)
{
inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3);
@@ -897,8 +899,8 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context)
}
/* XXX: Pos values off by 0.5? */
- /* sub t4, i4.y, c2.x ; Sub 0.5 from denormalized pos */
- inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 4, TGSI_FILE_INPUT, 2);
+ /* sub t4, i4.y, c1.x ; Sub 0.5 from denormalized pos */
+ inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 4, TGSI_FILE_CONSTANT, 1);
inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
@@ -909,8 +911,8 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context)
inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
- /* mul t3, t4, c2.x ; Multiply pos Y-coord by 1/2 */
- inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 2);
+ /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
+ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
@@ -921,8 +923,8 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context)
inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
- /* mul t3, t3, c2.y ; Multiply by 2 */
- inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 2);
+ /* mul t3, t3, c1.y ; Multiply by 2 */
+ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
@@ -1224,11 +1226,10 @@ static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context)
}
/*
- * decl c0 ; Multiplier to shift 9th bit of differential into place
- * decl c1 ; Bias to get differential back to a signed value
- * decl c2 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
+ * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
+ * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
*/
- decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 2);
+ decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
/* decl o0 ; Fragment color */
@@ -1253,38 +1254,29 @@ static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context)
}
/*
- * tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels
- * mov t1.x, t0.w ; Move 9th bit from .w channel to .x
- * tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels
- * mov t1.y, t0.w ; Move 9th bit from .w channel to .y
- * tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels
- * mov t1.z, t0.w ; Move 9th bit from .w channel to .z
+ * tex2d t1, i0, s0 ; Read texel from luma texture
+ * mov t0.x, t1.x ; Move luma sample into .x component
+ * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
+ * mov t0.y, t1.x ; Move Cb sample into .y component
+ * tex2d t1, i1, s2 ; Read texel from chroma Cr texture
+ * mov t0.z, t1.x ; Move Cr sample into .z component
*/
for (i = 0; i < 3; ++i)
{
- inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
- inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W;
+ inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
- inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0);
+ inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
- inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
- inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
- inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
- inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
}
- /* mul t1, t1, c0 ; Muliply 9th bit by multiplier to shift it into place */
- inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0);
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
- /* add t0, t0, t1 ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */
- inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
- /* sub t0, t0, c1 ; Subtract bias to get back signed values */
- inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
+ /* mul t0, t0, c0 ; Rescale texel to correct range */
+ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
/*
@@ -1297,8 +1289,8 @@ static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context)
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
}
- /* lerp t1, c2.x, t1, t2 ; Blend past and future texels */
- inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
+ /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
+ inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
@@ -1366,12 +1358,11 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context)
}
/*
- * decl c0 ; Multiplier to shift 9th bit of differential into place
- * decl c1 ; Bias to get differential back to a signed value
- * decl c2 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels
+ * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
+ * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels
* ; and for Y-mod-2 top/bottom field selection
*/
- decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 2);
+ decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
/* decl o0 ; Fragment color */
@@ -1396,43 +1387,34 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context)
}
/*
- * tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels
- * mov t1.x, t0.w ; Move 9th bit from .w channel to .x
- * tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels
- * mov t1.y, t0.w ; Move 9th bit from .w channel to .y
- * tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels
- * mov t1.z, t0.w ; Move 9th bit from .w channel to .z
+ * tex2d t1, i0, s0 ; Read texel from luma texture
+ * mov t0.x, t1.x ; Move luma sample into .x component
+ * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
+ * mov t0.y, t1.x ; Move Cb sample into .y component
+ * tex2d t1, i1, s2 ; Read texel from chroma Cr texture
+ * mov t0.z, t1.x ; Move Cr sample into .z component
*/
for (i = 0; i < 3; ++i)
{
- inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
- inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W;
+ inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
- inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0);
+ inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+ inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
- inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
- inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
- inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
- inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+
}
- /* mul t1, t1, c0 ; Muliply 9th bit by multiplier to shift it into place */
- inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0);
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
- /* add t0, t0, t1 ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */
- inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
- ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
- /* sub t0, t0, c1 ; Subtract bias to get back signed values */
- inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
+ /* mul t0, t0, c0 ; Rescale texel to correct range */
+ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
/* XXX: Pos values off by 0.5? */
- /* sub t4, i6.y, c2.x ; Sub 0.5 from denormalized pos */
- inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 6, TGSI_FILE_CONSTANT, 2);
+ /* sub t4, i6.y, c1.x ; Sub 0.5 from denormalized pos */
+ inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 6, TGSI_FILE_CONSTANT, 1);
inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
@@ -1443,8 +1425,8 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context)
inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
- /* mul t3, t4, c2.x ; Multiply pos Y-coord by 1/2 */
- inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 2);
+ /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
+ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
@@ -1455,8 +1437,8 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context)
inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
- /* mul t3, t3, c2.y ; Multiply by 2 */
- inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 2);
+ /* mul t3, t3, c1.y ; Multiply by 2 */
+ inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
@@ -1497,8 +1479,8 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context)
inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5);
ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
- /* lerp t1, c2.x, t1, t2 ; Blend past and future texels */
- inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
+ /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
+ inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
@@ -1678,7 +1660,7 @@ static int vlInitMC(struct VL_CONTEXT *context)
memset(&template, 0, sizeof(struct pipe_texture));
template.target = PIPE_TEXTURE_2D;
- template.format = PIPE_FORMAT_A8L8_UNORM;
+ template.format = PIPE_FORMAT_R16_SNORM;
template.last_level = 0;
template.width[0] = 8;
template.height[0] = 8 * 4;
diff --git a/src/gallium/state_trackers/g3dvl/vl_data.c b/src/gallium/state_trackers/g3dvl/vl_data.c
index 7e6ee8ac12..0e5c8c77f9 100644
--- a/src/gallium/state_trackers/g3dvl/vl_data.c
+++ b/src/gallium/state_trackers/g3dvl/vl_data.c
@@ -86,8 +86,7 @@ const struct VL_TEXCOORD2F *vl_surface_texcoords = (const struct VL_TEXCOORD2F*)
*/
const struct VL_MC_FS_CONSTS vl_mc_fs_consts =
{
- {256.0f, 256.0f, 256.0f, 0.0f},
- {256.0f / 255.0f, 256.0f / 255.0f, 256.0f / 255.0f, 0.0f},
+ {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f},
{0.5f, 2.0f, 0.0f, 0.0f}
};
diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c
index 9b91ab4e22..6d4e14b95c 100644
--- a/src/gallium/state_trackers/g3dvl/vl_surface.c
+++ b/src/gallium/state_trackers/g3dvl/vl_surface.c
@@ -63,7 +63,7 @@ static int vlTransformBlock(short *src, short *dst, short bias)
return 0;
}
-static int vlGrabFrameCodedFullBlock(short *src, short *dst, unsigned int dst_pitch)
+static int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch)
{
unsigned int y;
@@ -78,18 +78,7 @@ static int vlGrabFrameCodedFullBlock(short *src, short *dst, unsigned int dst_pi
return 0;
}
-static int vlGrabFrameCodedDiffBlock(short *src, short *dst, unsigned int dst_pitch)
-{
- unsigned int x, y;
-
- for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
- for (x = 0; x < VL_BLOCK_WIDTH; ++x)
- dst[y * dst_pitch + x] = src[y * VL_BLOCK_WIDTH + x] + 0x100;
-
- return 0;
-}
-
-static int vlGrabFieldCodedFullBlock(short *src, short *dst, unsigned int dst_pitch)
+static int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch)
{
unsigned int y;
@@ -114,30 +103,17 @@ static int vlGrabFieldCodedFullBlock(short *src, short *dst, unsigned int dst_pi
return 0;
}
-static int vlGrabFieldCodedDiffBlock(short *src, short *dst, unsigned int dst_pitch)
-{
- unsigned int x, y;
-
- for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y)
- for (x = 0; x < VL_BLOCK_WIDTH; ++x)
- dst[y * dst_pitch * 2 + x] = src[y * VL_BLOCK_WIDTH + x] + 0x100;
-
- dst += VL_BLOCK_HEIGHT * dst_pitch;
-
- for (; y < VL_BLOCK_HEIGHT; ++y)
- for (x = 0; x < VL_BLOCK_WIDTH; ++x)
- dst[y * dst_pitch * 2 + x] = src[y * VL_BLOCK_WIDTH + x] + 0x100;
-
- return 0;
-}
-
static int vlGrabNoBlock(short *dst, unsigned int dst_pitch)
{
- unsigned int x, y;
+ unsigned int y;
for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
- for (x = 0; x < VL_BLOCK_WIDTH; ++x)
- dst[y * dst_pitch + x] = 0x100;
+ memset
+ (
+ dst + y * dst_pitch,
+ 0,
+ VL_BLOCK_WIDTH * 2
+ );
return 0;
}
@@ -156,7 +132,6 @@ static int vlGrabBlocks
unsigned int tex_pitch;
unsigned int tb, sb = 0;
- const int do_idct = 1;
short temp_block[64];
assert(context);
@@ -176,80 +151,26 @@ static int vlGrabBlocks
{
if ((coded_block_pattern >> (5 - tb)) & 1)
{
+ if (sample_type == VL_FULL_SAMPLE)
+ vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128);
+ else
+ vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0);
+
if (dct_type == VL_DCT_FRAME_CODED)
- if (sample_type == VL_FULL_SAMPLE)
- if (do_idct)
- {
- vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128);
- vlGrabFrameCodedFullBlock
- (
- temp_block,
- texels + tb * tex_pitch * VL_BLOCK_HEIGHT,
- tex_pitch
- );
- }
- else
- vlGrabFrameCodedFullBlock
- (
- blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
- texels + tb * tex_pitch * VL_BLOCK_HEIGHT,
- tex_pitch
- );
- else
- if (do_idct)
- {
- vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0);
- vlGrabFrameCodedDiffBlock
- (
- temp_block,
- texels + tb * tex_pitch * VL_BLOCK_HEIGHT,
- tex_pitch
- );
- }
- else
- vlGrabFrameCodedDiffBlock
- (
- blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
- texels + tb * tex_pitch * VL_BLOCK_HEIGHT,
- tex_pitch
- );
+ vlGrabFrameCodedBlock
+ (
+ temp_block,
+ texels + tb * tex_pitch * VL_BLOCK_HEIGHT,
+ tex_pitch
+ );
else
- if (sample_type == VL_FULL_SAMPLE)
- if (do_idct)
- {
- vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128);
- vlGrabFieldCodedFullBlock
- (
- temp_block,
- texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch,
- tex_pitch
- );
- }
- else
- vlGrabFieldCodedFullBlock
- (
- blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
- texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch,
- tex_pitch
- );
- else
- if (do_idct)
- {
- vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0);
- vlGrabFieldCodedDiffBlock
- (
- temp_block,
- texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch,
- tex_pitch
- );
- }
- else
- vlGrabFieldCodedDiffBlock
- (
- blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
- texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch,
- tex_pitch
- );
+ vlGrabFieldCodedBlock
+ (
+ blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
+ texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch,
+ tex_pitch
+ );
+
++sb;
}
else
@@ -272,43 +193,18 @@ static int vlGrabBlocks
tex_pitch = tex_surface->stride / tex_surface->block.size;
if ((coded_block_pattern >> (1 - tb)) & 1)
- {
+ {
if (sample_type == VL_FULL_SAMPLE)
- if (do_idct)
- {
- vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128);
- vlGrabFrameCodedFullBlock
- (
- temp_block,
- texels,
- tex_pitch
- );
- }
- else
- vlGrabFrameCodedFullBlock
- (
- blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
- texels,
- tex_pitch
- );
+ vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128);
else
- if (do_idct)
- {
- vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0);
- vlGrabFrameCodedDiffBlock
- (
- temp_block,
- texels,
- tex_pitch
- );
- }
- else
- vlGrabFrameCodedDiffBlock
- (
- blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT,
- texels,
- tex_pitch
- );
+ vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0);
+
+ vlGrabFrameCodedBlock
+ (
+ temp_block,
+ texels,
+ tex_pitch
+ );
++sb;
}