diff options
author | Younes Manton <younes.m@gmail.com> | 2008-09-04 04:10:43 -0400 |
---|---|---|
committer | Younes Manton <younes.m@gmail.com> | 2008-09-07 15:47:44 -0400 |
commit | 2f41095e04036654259bc2efc3324a92f32c8ca1 (patch) | |
tree | 57c874fe47aedbb1900b64a2343370c88f0927d3 /src/gallium/state_trackers/g3dvl | |
parent | 0bfbe834d35946fe75eb991d03ed777e115f418d (diff) |
g3dvl: Zero-block optimization.
For blocks whose contents are derived completely from ref surfaces, don't
bother zero-ing the corresponding block in the luma/chroma textures, except
for the first such luma, chroma Cb, and chroma Cr each frame. All later
zero blocks are textured from that first zero block.
Diffstat (limited to 'src/gallium/state_trackers/g3dvl')
-rw-r--r-- | src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 92 |
1 files changed, 54 insertions, 38 deletions
diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index b5aa79cdb4..93839e8aa9 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -63,6 +63,7 @@ struct vlR16SnormBufferedMC struct vlSurface *buffered_surface; struct vlSurface *past_surface, *future_surface; struct vlVertex2f surface_tex_inv_size; + struct vlVertex2f zero_block[3]; unsigned int num_macroblocks; struct vlMpeg2MacroBlock *macroblocks; @@ -203,8 +204,13 @@ static inline int vlGrabBlocks ++sb; } - else + else if (mc->zero_block[0].x < 0.0f) + { vlGrabNoBlock(texels + y * tex_pitch * VL_BLOCK_HEIGHT + x * VL_BLOCK_WIDTH, tex_pitch); + + mc->zero_block[0].x = (mbpx + x * 8) * mc->surface_tex_inv_size.x; + mc->zero_block[0].y = (mbpy + y * 8) * mc->surface_tex_inv_size.y; + } } } @@ -241,9 +247,14 @@ static inline int vlGrabBlocks ++sb; } - else + else if (mc->zero_block[tb + 1].x < 0.0f) + { vlGrabNoBlock(texels, tex_pitch); + mc->zero_block[tb + 1].x = (mbpx << 1) * mc->surface_tex_inv_size.x; + mc->zero_block[tb + 1].y = (mbpy << 1) * mc->surface_tex_inv_size.y; + } + pipe_surface_unmap(tex_surface); } @@ -315,7 +326,7 @@ static inline int vlGrabMacroBlock return 0; } -#define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, zx, zy) \ +#define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, zb) \ (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \ (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \ (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \ @@ -323,62 +334,62 @@ static inline int vlGrabMacroBlock (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \ (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \ \ - /*if ((cbp) & (lm)) \ - {*/ \ + if ((cbp) & (lm)) \ + { \ (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \ (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \ (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \ (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \ (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \ (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \ - /*} \ + } \ else \ { \ - (vb)[0].luma_tc.x = (zx); (vb)[0].luma_tc.y = (zy); \ - (vb)[1].luma_tc.x = (zx); (vb)[1].luma_tc.y = (zy) + (hy); \ - (vb)[2].luma_tc.x = (zx) + (hx); (vb)[2].luma_tc.y = (zy); \ - (vb)[3].luma_tc.x = (zx) + (hx); (vb)[3].luma_tc.y = (zy); \ - (vb)[4].luma_tc.x = (zx); (vb)[4].luma_tc.y = (zy) + (hy); \ - (vb)[5].luma_tc.x = ((zx) + (hx); (vb)[5].luma_tc.y = (zy) + (hy); \ - }*/ \ + (vb)[0].luma_tc.x = (zb)[0].x; (vb)[0].luma_tc.y = (zb)[0].y; \ + (vb)[1].luma_tc.x = (zb)[0].x; (vb)[1].luma_tc.y = (zb)[0].y + (hy); \ + (vb)[2].luma_tc.x = (zb)[0].x + (hx); (vb)[2].luma_tc.y = (zb)[0].y; \ + (vb)[3].luma_tc.x = (zb)[0].x + (hx); (vb)[3].luma_tc.y = (zb)[0].y; \ + (vb)[4].luma_tc.x = (zb)[0].x; (vb)[4].luma_tc.y = (zb)[0].y + (hy); \ + (vb)[5].luma_tc.x = (zb)[0].x + (hx); (vb)[5].luma_tc.y = (zb)[0].y + (hy); \ + } \ \ - /*if ((cbp) & (cbm)) \ - {*/ \ + if ((cbp) & (cbm)) \ + { \ (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \ (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \ (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \ (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \ (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \ (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \ - /*} \ + } \ else \ { \ - (vb)[0].cb_tc.x = (zx); (vb)[0].cb_tc.y = (zy); \ - (vb)[1].cb_tc.x = (zx); (vb)[1].cb_tc.y = (zy) + (hy); \ - (vb)[2].cb_tc.x = (zx) + (hx); (vb)[2].cb_tc.y = (zy); \ - (vb)[3].cb_tc.x = (zx) + (hx); (vb)[3].cb_tc.y = (zy); \ - (vb)[4].cb_tc.x = (zx); (vb)[4].cb_tc.y = (zy) + (hy); \ - (vb)[5].cb_tc.x = ((zx) + (hx); (vb)[5].cb_tc.y = (zy) + (hy); \ - }*/ \ + (vb)[0].cb_tc.x = (zb)[1].x; (vb)[0].cb_tc.y = (zb)[1].y; \ + (vb)[1].cb_tc.x = (zb)[1].x; (vb)[1].cb_tc.y = (zb)[1].y + (hy); \ + (vb)[2].cb_tc.x = (zb)[1].x + (hx); (vb)[2].cb_tc.y = (zb)[1].y; \ + (vb)[3].cb_tc.x = (zb)[1].x + (hx); (vb)[3].cb_tc.y = (zb)[1].y; \ + (vb)[4].cb_tc.x = (zb)[1].x; (vb)[4].cb_tc.y = (zb)[1].y + (hy); \ + (vb)[5].cb_tc.x = (zb)[1].x + (hx); (vb)[5].cb_tc.y = (zb)[1].y + (hy); \ + } \ \ - /*if ((cbp) & (crm)) \ - {*/ \ + if ((cbp) & (crm)) \ + { \ (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \ (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \ (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \ (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \ (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \ (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \ - /*} \ + } \ else \ { \ - (vb)[0].cr_tc.x = (zx); (vb)[0].cb_tc.y = (zy); \ - (vb)[1].cr_tc.x = (zx); (vb)[1].cb_tc.y = (zy) + (hy); \ - (vb)[2].cr_tc.x = (zx) + (hx); (vb)[2].cb_tc.y = (zy); \ - (vb)[3].cr_tc.x = (zx) + (hx); (vb)[3].cb_tc.y = (zy); \ - (vb)[4].cr_tc.x = (zx); (vb)[4].cb_tc.y = (zy) + (hy); \ - (vb)[5].cr_tc.x = ((zx) + (hx); (vb)[5].cb_tc.y = (zy) + (hy); \ - }*/ + (vb)[0].cr_tc.x = (zb)[2].x; (vb)[0].cr_tc.y = (zb)[2].y; \ + (vb)[1].cr_tc.x = (zb)[2].x; (vb)[1].cr_tc.y = (zb)[2].y + (hy); \ + (vb)[2].cr_tc.x = (zb)[2].x + (hx); (vb)[2].cr_tc.y = (zb)[2].y; \ + (vb)[3].cr_tc.x = (zb)[2].x + (hx); (vb)[3].cr_tc.y = (zb)[2].y; \ + (vb)[4].cr_tc.x = (zb)[2].x; (vb)[4].cr_tc.y = (zb)[2].y + (hy); \ + (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \ + } static inline int vlGrabMacroBlockVB ( @@ -526,7 +537,7 @@ static inline int vlGrabMacroBlockVB vb, macroblock->cbp, macroblock->mbx, macroblock->mby, unit.x, unit.y, 0, 0, half.x, half.y, - 32, 2, 1, mc->zero_block.x, mc->zero_block.y + 32, 2, 1, mc->zero_block ); SET_BLOCK @@ -534,7 +545,7 @@ static inline int vlGrabMacroBlockVB vb + 6, macroblock->cbp, macroblock->mbx, macroblock->mby, unit.x, unit.y, half.x, 0, half.x, half.y, - 16, 2, 1, mc->zero_block.x, mc->zero_block.y + 16, 2, 1, mc->zero_block ); SET_BLOCK @@ -542,7 +553,7 @@ static inline int vlGrabMacroBlockVB vb + 12, macroblock->cbp, macroblock->mbx, macroblock->mby, unit.x, unit.y, 0, half.y, half.x, half.y, - 8, 2, 1, mc->zero_block.x, mc->zero_block.y + 8, 2, 1, mc->zero_block ); SET_BLOCK @@ -550,7 +561,7 @@ static inline int vlGrabMacroBlockVB vb + 18, macroblock->cbp, macroblock->mbx, macroblock->mby, unit.x, unit.y, half.x, half.y, half.x, half.y, - 4, 2, 1, mc->zero_block.x, mc->zero_block.y + 4, 2, 1, mc->zero_block ); mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][0].buffer); @@ -725,6 +736,8 @@ static int vlFlush vb_start += num_macroblocks[vlMacroBlockExTypeBiPredictedField] * 24; } + for (i = 0; i < 3; ++i) + mc->zero_block[i].x = -1.0f; mc->num_macroblocks = 0; mc->cur_buf++; @@ -2277,7 +2290,8 @@ int vlCreateR16SNormBufferedMC struct vlRender **render ) { - struct vlR16SnormBufferedMC *mc; + struct vlR16SnormBufferedMC *mc; + unsigned int i; assert(pipe); assert(render); @@ -2297,6 +2311,8 @@ int vlCreateR16SNormBufferedMC mc->buffered_surface = NULL; mc->past_surface = NULL; mc->future_surface = NULL; + for (i = 0; i < 3; ++i) + mc->zero_block[i].x = -1.0f; mc->num_macroblocks = 0; vlInit(mc); |