From e24ea786faad502da63cc4d59b0c30e3f1915c45 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Jan 2010 19:04:53 -0700 Subject: llvmpipe: consolidate lp_scene_alloc_aligned() calls Use just one call instead of four. Good for a few more fps. --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 54 ++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers/llvmpipe/lp_setup_tri.c') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 15534756c4..e5e64c3e5c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -181,18 +181,8 @@ static void setup_tri_coefficients( struct setup_context *setup, const float (*v3)[4], boolean frontface) { - struct lp_scene *scene = lp_setup_get_current_scene(setup); unsigned slot; - /* Allocate space for the a0, dadx and dady arrays - */ - { - unsigned bytes = (setup->fs.nr_inputs + 1) * 4 * sizeof(float); - tri->inputs.a0 = lp_scene_alloc_aligned( scene, bytes, 16 ); - tri->inputs.dadx = lp_scene_alloc_aligned( scene, bytes, 16 ); - tri->inputs.dady = lp_scene_alloc_aligned( scene, bytes, 16 ); - } - /* The internal position input is in slot zero: */ setup_fragcoord_coef(tri, oneoverarea, 0, v1, v2, v3); @@ -243,6 +233,41 @@ static inline int subpixel_snap( float a ) } + +/** + * Alloc space for a new triangle plus the input.a0/dadx/dady arrays + * immediately after it. + * The memory is allocated from the per-scene pool, not per-tile. + * \param tri_size returns number of bytes allocated + * \param nr_inputs number of fragment shader inputs + * \return pointer to triangle space + */ +static INLINE struct lp_rast_triangle * +alloc_triangle(struct lp_scene *scene, unsigned nr_inputs, unsigned *tri_size) +{ + unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float); + struct lp_rast_triangle *tri; + unsigned bytes; + char *inputs; + + assert(sizeof(*tri) % 16 == 0); + + bytes = sizeof(*tri) + (3 * input_array_sz); + + tri = lp_scene_alloc_aligned( scene, bytes, 16 ); + + inputs = (char *) (tri + 1); + tri->inputs.a0 = (float (*)[4]) inputs; + tri->inputs.dadx = (float (*)[4]) (inputs + input_array_sz); + tri->inputs.dady = (float (*)[4]) (inputs + 2 * input_array_sz); + + *tri_size = bytes; + + return tri; +} + + + /** * Do basic setup for triangle rasterization and determine which * framebuffer tiles are touched. Put the triangle in the scene's @@ -264,10 +289,13 @@ do_triangle_ccw(struct setup_context *setup, const int y3 = subpixel_snap(v3[0][1]); struct lp_scene *scene = lp_setup_get_current_scene(setup); - struct lp_rast_triangle *tri = lp_scene_alloc_aligned( scene, sizeof *tri, 16 ); + struct lp_rast_triangle *tri; int area; float oneoverarea; int minx, maxx, miny, maxy; + unsigned tri_bytes; + + tri = alloc_triangle(scene, setup->fs.nr_inputs, &tri_bytes); tri->dx12 = x1 - x2; tri->dx23 = x2 - x3; @@ -286,7 +314,7 @@ do_triangle_ccw(struct setup_context *setup, * XXX: subject to overflow?? */ if (area <= 0) { - lp_scene_putback_data( scene, sizeof *tri ); + lp_scene_putback_data( scene, tri_bytes ); LP_COUNT(nr_culled_tris); return; } @@ -306,7 +334,7 @@ do_triangle_ccw(struct setup_context *setup, if (miny == maxy || minx == maxx) { - lp_scene_putback_data( scene, sizeof *tri ); + lp_scene_putback_data( scene, tri_bytes ); LP_COUNT(nr_culled_tris); return; } -- cgit v1.2.3