From 98f3ff8f4a761d579ee9b42ee3090635519213a5 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 20 Aug 2010 15:45:25 +0100 Subject: llvmpipe: more rasterization counters --- src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index 43f72d8ca8..70a4b64c8d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -102,6 +102,8 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, assert((partial_mask & inmask) == 0); + LP_COUNT_ADD(nr_empty_4, util_bitcount(0xffff & ~(partial_mask | inmask))); + /* Iterate over partials: */ while (partial_mask) { @@ -114,6 +116,8 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, partial_mask &= ~(1 << i); + LP_COUNT(nr_partially_covered_4); + for (j = 0; j < NR_PLANES; j++) cx[j] = (c[j] - plane[j].dcdx * ix @@ -133,6 +137,7 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, inmask &= ~(1 << i); + LP_COUNT(nr_fully_covered_4); block_full_4(task, tri, px, py); } } @@ -190,6 +195,8 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, assert((partial_mask & inmask) == 0); + LP_COUNT_ADD(nr_empty_16, util_bitcount(0xffff & ~(partial_mask | inmask))); + /* Iterate over partials: */ while (partial_mask) { -- cgit v1.2.3 From 5286dd701640976ffc328e8e85fb3830746851a1 Mon Sep 17 00:00:00 2001 From: Hui Qi Tay Date: Mon, 19 Jul 2010 15:23:09 +0100 Subject: llvmpipe: native rasterization for lines Rasterize lines directly by treating them as 4-sided polygons. Still need to check the exact pixel rasteration. --- src/gallium/drivers/llvmpipe/lp_context.c | 2 +- src/gallium/drivers/llvmpipe/lp_rast.h | 4 +- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 4 + src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 2 +- src/gallium/drivers/llvmpipe/lp_setup.c | 7 + src/gallium/drivers/llvmpipe/lp_setup.h | 4 + src/gallium/drivers/llvmpipe/lp_setup_context.h | 38 ++ src/gallium/drivers/llvmpipe/lp_setup_line.c | 617 ++++++++++++++++++++- src/gallium/drivers/llvmpipe/lp_setup_point.c | 4 +- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 23 +- src/gallium/drivers/llvmpipe/lp_state_rasterizer.c | 2 + 11 files changed, 685 insertions(+), 22 deletions(-) (limited to 'src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h') diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 086a2d5898..a6873abbee 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -157,7 +157,7 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) /* convert points and lines into triangles: */ draw_wide_point_threshold(llvmpipe->draw, 0.0); - draw_wide_line_threshold(llvmpipe->draw, 0.0); + draw_wide_line_threshold(llvmpipe->draw, 10000.0); #if USE_DRAW_STAGE_PSTIPPLE /* Do polygon stipple w/ texture map + frag prog? */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 102e902d02..b4564ef33b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -120,7 +120,7 @@ struct lp_rast_triangle { float v[3][2]; #endif - struct lp_rast_plane plane[7]; /* NOTE: may allocate fewer planes */ + struct lp_rast_plane plane[8]; /* NOTE: may allocate fewer planes */ }; @@ -236,6 +236,8 @@ void lp_rast_triangle_6( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); void lp_rast_triangle_7( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); +void lp_rast_triangle_8( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); void lp_rast_shade_tile( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 673f67386b..8d729c7481 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -157,6 +157,10 @@ build_mask_linear(int c, int dcdx, int dcdy) #define NR_PLANES 7 #include "lp_rast_tri_tmp.h" +#define TAG(x) x##_8 +#define NR_PLANES 8 +#include "lp_rast_tri_tmp.h" + /* Special case for 3 plane triangle which is contained entirely * within a 16x16 block. diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index 70a4b64c8d..0def5f7243 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -32,7 +32,7 @@ /** - * Prototype for a 7 plane rasterizer function. Will codegenerate + * Prototype for a 8 plane rasterizer function. Will codegenerate * several of these. * * XXX: Varients for more/fewer planes. diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 9aa6c4bf38..4c8275665e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -485,7 +485,14 @@ lp_setup_set_triangle_state( struct lp_setup_context *setup, } } +void +lp_setup_set_line_state( struct lp_setup_context *setup, + float line_width) +{ + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + setup->line_width = line_width; +} void lp_setup_set_fs_inputs( struct lp_setup_context *setup, diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index a41bb8863b..693550b8c8 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -100,6 +100,10 @@ lp_setup_set_triangle_state( struct lp_setup_context *setup, boolean scissor, boolean gl_rasterization_rules ); +void +lp_setup_set_line_state( struct lp_setup_context *setup, + float line_width); + void lp_setup_set_fs_inputs( struct lp_setup_context *setup, const struct lp_shader_input *interp, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 1a147e0353..a4838d59a5 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -91,6 +91,7 @@ struct lp_setup_context boolean scissor_test; unsigned cullmode; float pixel_offset; + float line_width; struct pipe_framebuffer_state fb; struct u_rect framebuffer; @@ -170,5 +171,42 @@ lp_setup_print_vertex(struct lp_setup_context *setup, const char *name, const float (*v)[4]); +/** shared code between lp_setup_line and lp_setup_tri */ +extern lp_rast_cmd lp_rast_tri_tab[]; + +void +do_triangle_ccw_whole_tile(struct lp_setup_context *setup, + struct lp_scene *scene, + struct lp_rast_triangle *tri, + int x, int y, + boolean opaque, + int *is_blit); + + +void +lp_setup_tri_coefficients( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + float oneoverarea, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + boolean frontface); + +struct lp_rast_triangle * +lp_setup_alloc_triangle(struct lp_scene *scene, + unsigned nr_inputs, + unsigned nr_planes, + unsigned *tri_size); + +void +lp_setup_fragcoord_coef(struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + float oneoverarea, + unsigned slot, + const float (*v1)[4], + const float (*v2)[4], + const float (*v3)[4], + unsigned usage_mask); + #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index be41c44e6f..930207ae33 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -29,19 +29,624 @@ * Binning code for lines */ +#include "util/u_math.h" +#include "util/u_memory.h" +#include "lp_perf.h" #include "lp_setup_context.h" +#include "lp_rast.h" +#include "lp_state_fs.h" -static void line_nop( struct lp_setup_context *setup, - const float (*v0)[4], - const float (*v1)[4] ) +#define NUM_CHANNELS 4 + + +static const int step_scissor_minx[16] = { + 0, 1, 0, 1, + 2, 3, 2, 3, + 0, 1, 0, 1, + 2, 3, 2, 3 +}; + +static const int step_scissor_maxx[16] = { + 0, -1, 0, -1, + -2, -3, -2, -3, + 0, -1, 0, -1, + -2, -3, -2, -3 +}; + +static const int step_scissor_miny[16] = { + 0, 0, 1, 1, + 0, 0, 1, 1, + 2, 2, 3, 3, + 2, 2, 3, 3 +}; + +static const int step_scissor_maxy[16] = { + 0, 0, -1, -1, + 0, 0, -1, -1, + -2, -2, -3, -3, + -2, -2, -3, -3 +}; + + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + */ +static void constant_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + unsigned slot, + const float value, + unsigned i ) +{ + tri->inputs.a0[slot][i] = value; + tri->inputs.dadx[slot][i] = 0.0f; + tri->inputs.dady[slot][i] = 0.0f; +} + + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a triangle. + */ +static void linear_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + float oneoverarea, + unsigned slot, + const float (*v1)[4], + const float (*v2)[4], + unsigned vert_attr, + unsigned i) +{ + float a1 = v1[vert_attr][i]; + float a2 = v2[vert_attr][i]; + + float da21 = a1 - a2; + float dadx = da21 * tri->dx * oneoverarea; + float dady = da21 * tri->dy * oneoverarea; + + tri->inputs.dadx[slot][i] = dadx; + tri->inputs.dady[slot][i] = dady; + + tri->inputs.a0[slot][i] = (a1 - + (dadx * (v1[0][0] - setup->pixel_offset) + + dady * (v1[0][1] - setup->pixel_offset))); +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void perspective_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + float oneoverarea, + unsigned slot, + const float (*v1)[4], + const float (*v2)[4], + unsigned vert_attr, + unsigned i) +{ + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + float a1 = v1[vert_attr][i] * v1[0][3]; + float a2 = v2[vert_attr][i] * v2[0][3]; + + float da21 = a1 - a2; + float dadx = da21 * tri->dx * oneoverarea; + float dady = da21 * tri->dy * oneoverarea; + + tri->inputs.dadx[slot][i] = dadx; + tri->inputs.dady[slot][i] = dady; + + tri->inputs.a0[slot][i] = (a1 - + (dadx * (v1[0][0] - setup->pixel_offset) + + dady * (v1[0][1] - setup->pixel_offset))); +} + +/** + * Compute the tri->coef[] array dadx, dady, a0 values. + */ +static void setup_line_coefficients( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + float oneoverarea, + const float (*v1)[4], + const float (*v2)[4]) { + unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; + unsigned slot; + + /* setup interpolation for all the remaining attributes: + */ + for (slot = 0; slot < setup->fs.nr_inputs; slot++) { + unsigned vert_attr = setup->fs.input[slot].src_index; + unsigned usage_mask = setup->fs.input[slot].usage_mask; + unsigned i; + + switch (setup->fs.input[slot].interp) { + case LP_INTERP_CONSTANT: + if (setup->flatshade_first) { + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + constant_coef(setup, tri, slot+1, v1[vert_attr][i], i); + } + else { + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + constant_coef(setup, tri, slot+1, v2[vert_attr][i], i); + } + break; + + case LP_INTERP_LINEAR: + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + linear_coef(setup, tri, oneoverarea, slot+1, v1, v2, vert_attr, i); + break; + + case LP_INTERP_PERSPECTIVE: + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + perspective_coef(setup, tri, oneoverarea, slot+1, v1, v2, vert_attr, i); + fragcoord_usage_mask |= TGSI_WRITEMASK_W; + break; + + case LP_INTERP_POSITION: + /* + * The generated pixel interpolators will pick up the coeffs from + * slot 0, so all need to ensure that the usage mask is covers all + * usages. + */ + fragcoord_usage_mask |= usage_mask; + break; + + default: + assert(0); + } + } + + /* The internal position input is in slot zero: + */ + lp_setup_fragcoord_coef(setup, tri, oneoverarea, 0, v1, v2, v2, + fragcoord_usage_mask); } -void -lp_setup_choose_line( struct lp_setup_context *setup ) + +static INLINE int subpixel_snap( float a ) { - setup->line = line_nop; + return util_iround(FIXED_ONE * a); +} + + +/** + * Print line vertex attribs (for debug). + */ +static void +print_line(struct lp_setup_context *setup, + const float (*v1)[4], + const float (*v2)[4]) +{ + uint i; + + debug_printf("llvmpipe line\n"); + for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { + debug_printf(" v1[%d]: %f %f %f %f\n", i, + v1[i][0], v1[i][1], v1[i][2], v1[i][3]); + } + for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { + debug_printf(" v2[%d]: %f %f %f %f\n", i, + v2[i][0], v2[i][1], v2[i][2], v2[i][3]); + } +} + + +static void +lp_setup_line( struct lp_setup_context *setup, + const float (*v1)[4], + const float (*v2)[4]) +{ + struct lp_scene *scene = lp_setup_get_current_scene(setup); + struct lp_rast_triangle *line; + float oneoverarea; + float half_width = setup->line_width / 2; + int minx, maxx, miny, maxy; + int ix0, ix1, iy0, iy1; + unsigned tri_bytes; + int x[4]; + int y[4]; + int i; + int nr_planes = 4; + boolean opaque; + + if (0) + print_line(setup, v1, v2); + + if (setup->scissor_test) { + nr_planes = 8; + } + else { + nr_planes = 4; + } + + line = lp_setup_alloc_triangle(scene, + setup->fs.nr_inputs, + nr_planes, + &tri_bytes); + if (!line) + return; + +#ifndef DEBUG + line->v[0][0] = v1[0][0]; + line->v[1][0] = v2[0][0]; + line->v[0][1] = v1[0][1]; + line->v[1][1] = v2[0][1]; +#endif + + /* pre-calculation(based on given vertices) to determine if line is + * more horizontal or more vertical + */ + line->dx = v1[0][0] - v2[0][0]; + line->dy = v1[0][1] - v2[0][1]; + + /* x-major line */ + if (fabsf(line->dx) >= fabsf(line->dy)) { + if (line->dx < 0) { + /* if v2 is to the right of v1, swap pointers */ + const float (*temp)[4] = v1; + v1 = v2; + v2 = temp; + line->dx = -line->dx; + line->dy = -line->dy; + } + + /* x/y positions in fixed point */ + x[0] = subpixel_snap(v1[0][0] - setup->pixel_offset); + x[1] = subpixel_snap(v2[0][0] - setup->pixel_offset); + x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset); + x[3] = subpixel_snap(v1[0][0] - setup->pixel_offset); + + y[0] = subpixel_snap(v1[0][1] - half_width - setup->pixel_offset); + y[1] = subpixel_snap(v2[0][1] - half_width - setup->pixel_offset); + y[2] = subpixel_snap(v2[0][1] + half_width - setup->pixel_offset); + y[3] = subpixel_snap(v1[0][1] + half_width - setup->pixel_offset); + } + else{ + /* y-major line */ + if (line->dy > 0) { + /* if v2 is on top of v1, swap pointers */ + const float (*temp)[4] = v1; + v1 = v2; + v2 = temp; + line->dx = -line->dx; + line->dy = -line->dy; + } + + x[0] = subpixel_snap(v1[0][0] - half_width - setup->pixel_offset); + x[1] = subpixel_snap(v2[0][0] - half_width - setup->pixel_offset); + x[2] = subpixel_snap(v2[0][0] + half_width - setup->pixel_offset); + x[3] = subpixel_snap(v1[0][0] + half_width - setup->pixel_offset); + + y[0] = subpixel_snap(v1[0][1] - setup->pixel_offset); + y[1] = subpixel_snap(v2[0][1] - setup->pixel_offset); + y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset); + y[3] = subpixel_snap(v1[0][1] - setup->pixel_offset); + } + + /* calculate the deltas */ + line->plane[0].dcdy = x[0] - x[1]; + line->plane[1].dcdy = x[1] - x[2]; + line->plane[2].dcdy = x[2] - x[3]; + line->plane[3].dcdy = x[3] - x[0]; + + line->plane[0].dcdx = y[0] - y[1]; + line->plane[1].dcdx = y[1] - y[2]; + line->plane[2].dcdx = y[2] - y[3]; + line->plane[3].dcdx = y[3] - y[0]; + + + LP_COUNT(nr_tris); + + + /* Bounding rectangle (in pixels) */ + { + /* Yes this is necessary to accurately calculate bounding boxes + * with the two fill-conventions we support. GL (normally) ends + * up needing a bottom-left fill convention, which requires + * slightly different rounding. + */ + int adj = (setup->pixel_offset != 0) ? 1 : 0; + + minx = (MIN4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER; + maxx = (MAX4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER; + miny = (MIN4(y[0], y[1], y[3], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + maxy = (MAX4(y[0], y[1], y[3], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + } + + if (setup->scissor_test) { + minx = MAX2(minx, setup->scissor.current.minx); + maxx = MIN2(maxx, setup->scissor.current.maxx); + miny = MAX2(miny, setup->scissor.current.miny); + maxy = MIN2(maxy, setup->scissor.current.maxy); + } + else { + minx = MAX2(minx, 0); + miny = MAX2(miny, 0); + maxx = MIN2(maxx, scene->fb.width); + maxy = MIN2(maxy, scene->fb.height); + } + + + if (miny >= maxy || minx >= maxx) { + lp_scene_putback_data( scene, tri_bytes ); + return; + } + + oneoverarea = 1.0f / (line->dx * line->dx + line->dy * line->dy); + + /* Setup parameter interpolants: + */ + setup_line_coefficients( setup, line, oneoverarea, v1, v2); + + for (i = 0; i < 4; i++) { + struct lp_rast_plane *plane = &line->plane[i]; + + /* half-edge constants, will be interated over the whole render + * target. + */ + plane->c = plane->dcdx * x[i] - plane->dcdy * y[i]; + + + /* correct for top-left vs. bottom-left fill convention. + * + * note that we're overloading gl_rasterization_rules to mean + * both (0.5,0.5) pixel centers *and* bottom-left filling + * convention. + * + * GL actually has a top-left filling convention, but GL's + * notion of "top" differs from gallium's... + * + * Also, sometimes (in FBO cases) GL will render upside down + * to its usual method, in which case it will probably want + * to use the opposite, top-left convention. + */ + if (plane->dcdx < 0) { + /* both fill conventions want this - adjust for left edges */ + plane->c++; + } + else if (plane->dcdx == 0) { + if (setup->pixel_offset == 0) { + /* correct for top-left fill convention: + */ + if (plane->dcdy > 0) plane->c++; + } + else { + /* correct for bottom-left fill convention: + */ + if (plane->dcdy < 0) plane->c++; + } + } + + plane->dcdx *= FIXED_ONE; + plane->dcdy *= FIXED_ONE; + + /* find trivial reject offsets for each edge for a single-pixel + * sized block. These will be scaled up at each recursive level to + * match the active blocksize. Scaling in this way works best if + * the blocks are square. + */ + plane->eo = 0; + if (plane->dcdx < 0) plane->eo -= plane->dcdx; + if (plane->dcdy > 0) plane->eo += plane->dcdy; + + /* Calculate trivial accept offsets from the above. + */ + plane->ei = plane->dcdy - plane->dcdx - plane->eo; + + plane->step = line->step[i]; + + /* Fill in the inputs.step[][] arrays. + * We've manually unrolled some loops here. + */ +#define SETUP_STEP(j, x, y) \ + line->step[i][j] = y * plane->dcdy - x * plane->dcdx + + SETUP_STEP(0, 0, 0); + SETUP_STEP(1, 1, 0); + SETUP_STEP(2, 0, 1); + SETUP_STEP(3, 1, 1); + + SETUP_STEP(4, 2, 0); + SETUP_STEP(5, 3, 0); + SETUP_STEP(6, 2, 1); + SETUP_STEP(7, 3, 1); + + SETUP_STEP(8, 0, 2); + SETUP_STEP(9, 1, 2); + SETUP_STEP(10, 0, 3); + SETUP_STEP(11, 1, 3); + + SETUP_STEP(12, 2, 2); + SETUP_STEP(13, 3, 2); + SETUP_STEP(14, 2, 3); + SETUP_STEP(15, 3, 3); +#undef STEP + } + + + /* + * When rasterizing scissored tris, use the intersection of the + * triangle bounding box and the scissor rect to generate the + * scissor planes. + * + * This permits us to cut off the triangle "tails" that are present + * in the intermediate recursive levels caused when two of the + * triangles edges don't diverge quickly enough to trivially reject + * exterior blocks from the triangle. + * + * It's not really clear if it's worth worrying about these tails, + * but since we generate the planes for each scissored tri, it's + * free to trim them in this case. + * + * Note that otherwise, the scissor planes only vary in 'C' value, + * and even then only on state-changes. Could alternatively store + * these planes elsewhere. + */ + if (nr_planes == 8) { + line->plane[4].step = step_scissor_maxx; + line->plane[4].dcdx = 1; + line->plane[4].dcdy = 0; + line->plane[4].c = maxx; + line->plane[4].ei = -1; + line->plane[4].eo = 0; + + line->plane[5].step = step_scissor_miny; + line->plane[5].dcdx = 0; + line->plane[5].dcdy = 1; + line->plane[5].c = 1-miny; + line->plane[5].ei = 0; + line->plane[5].eo = 1; + + line->plane[6].step = step_scissor_maxy; + line->plane[6].dcdx = 0; + line->plane[6].dcdy = -1; + line->plane[6].c = maxy; + line->plane[6].ei = -1; + line->plane[6].eo = 0; + + line->plane[7].step = step_scissor_minx; + line->plane[7].dcdx = -1; + line->plane[7].dcdy = 0; + line->plane[7].c = 1-minx; + line->plane[7].ei = 0; + line->plane[7].eo = 1; + } + + + /* + * All fields of 'tri' are now set. The remaining code here is + * concerned with binning. + */ + + /* Convert to tile coordinates, and inclusive ranges: + */ + ix0 = minx / TILE_SIZE; + iy0 = miny / TILE_SIZE; + ix1 = (maxx-1) / TILE_SIZE; + iy1 = (maxy-1) / TILE_SIZE; + + /* + * Clamp to framebuffer size + */ + assert(ix0 == MAX2(ix0, 0)); + assert(iy0 == MAX2(iy0, 0)); + assert(ix1 == MIN2(ix1, scene->tiles_x - 1)); + assert(iy1 == MIN2(iy1, scene->tiles_y - 1)); + + /* Determine which tile(s) intersect the triangle's bounding box + */ + if (iy0 == iy1 && ix0 == ix1) + { + /* Triangle is contained in a single tile: + */ + lp_scene_bin_command( scene, ix0, iy0, + lp_rast_tri_tab[nr_planes], + lp_rast_arg_triangle(line, (1<plane[i].c + + line->plane[i].dcdy * iy0 * TILE_SIZE - + line->plane[i].dcdx * ix0 * TILE_SIZE); + + ei[i] = line->plane[i].ei << TILE_ORDER; + eo[i] = line->plane[i].eo << TILE_ORDER; + xstep[i] = -(line->plane[i].dcdx << TILE_ORDER); + ystep[i] = line->plane[i].dcdy << TILE_ORDER; + } + + + + /* Test tile-sized blocks against the triangle. + * Discard blocks fully outside the tri. If the block is fully + * contained inside the tri, bin an lp_rast_shade_tile command. + * Else, bin a lp_rast_triangle command. + */ + for (y = iy0; y <= iy1; y++) + { + boolean in = FALSE; /* are we inside the triangle? */ + int cx[8]; + + for (i = 0; i < nr_planes; i++) + cx[i] = c[i]; + + for (x = ix0; x <= ix1; x++) + { + int out = 0; + int partial = 0; + + for (i = 0; i < nr_planes; i++) { + int planeout = cx[i] + eo[i]; + int planepartial = cx[i] + ei[i] - 1; + out |= (planeout >> 31); + partial |= (planepartial >> 31) & (1<line = lp_setup_line; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index 9f69e6c5ce..709c3e2fd2 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -31,7 +31,7 @@ #include "lp_setup_context.h" -static void point_nop( struct lp_setup_context *setup, +static void lp_setup_point( struct lp_setup_context *setup, const float (*v0)[4] ) { } @@ -40,7 +40,7 @@ static void point_nop( struct lp_setup_context *setup, void lp_setup_choose_point( struct lp_setup_context *setup ) { - setup->point = point_nop; + setup->point = lp_setup_point; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index d86fb8652a..212bb3ab90 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -68,11 +68,11 @@ fixed_to_float(int a) * \param nr_inputs number of fragment shader inputs * \return pointer to triangle space */ -static INLINE struct lp_rast_triangle * -alloc_triangle(struct lp_scene *scene, - unsigned nr_inputs, - unsigned nr_planes, - unsigned *tri_size) +struct lp_rast_triangle * +lp_setup_alloc_triangle(struct lp_scene *scene, + unsigned nr_inputs, + unsigned nr_planes, + unsigned *tri_size) { unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float); struct lp_rast_triangle *tri; @@ -160,7 +160,7 @@ lp_setup_print_triangle(struct lp_setup_context *setup, } -lp_rast_cmd lp_rast_tri_tab[8] = { +lp_rast_cmd lp_rast_tri_tab[9] = { NULL, /* should be impossible */ lp_rast_triangle_1, lp_rast_triangle_2, @@ -168,7 +168,8 @@ lp_rast_cmd lp_rast_tri_tab[8] = { lp_rast_triangle_4, lp_rast_triangle_5, lp_rast_triangle_6, - lp_rast_triangle_7 + lp_rast_triangle_7, + lp_rast_triangle_8 }; /** @@ -254,10 +255,10 @@ do_triangle_ccw(struct lp_setup_context *setup, u_rect_find_intersection(&setup->draw_region, &bbox); - tri = alloc_triangle(scene, - setup->fs.nr_inputs, - nr_planes, - &tri_bytes); + tri = lp_setup_alloc_triangle(scene, + setup->fs.nr_inputs, + nr_planes, + &tri_bytes); if (!tri) return; diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index afd3e0b21c..67b985aa24 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -73,6 +73,8 @@ llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, void *handle) llvmpipe->rasterizer->gl_rasterization_rules); lp_setup_set_flatshade_first( llvmpipe->setup, llvmpipe->rasterizer->flatshade_first); + lp_setup_set_line_state( llvmpipe->setup, + llvmpipe->rasterizer->line_width); } llvmpipe->dirty |= LP_NEW_RASTERIZER; -- cgit v1.2.3 From 0aa3a09ced07e150901cd0f7a7917556a018c252 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 22 Aug 2010 22:56:54 +0100 Subject: llvmpipe: combine linear mask calculation --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 73 ++++++++++++++++++++++++-- src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 26 +++++---- 2 files changed, 84 insertions(+), 15 deletions(-) (limited to 'src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 5b3ad6e0a7..bdb8d131cc 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -128,11 +128,71 @@ build_mask_linear(int c, int dcdx, int dcdy) return mask; } + + +static INLINE void +build_masks(int c, + int cdiff, + int dcdx, + int dcdy, + unsigned *outmask, + unsigned *partmask) +{ + *outmask |= build_mask_linear(c, dcdx, dcdy); + *partmask |= build_mask_linear(c + cdiff, dcdx, dcdy); +} + #else #include #include "util/u_sse.h" +static INLINE void +build_masks(int c, + int cdiff, + int dcdx, + int dcdy, + unsigned *outmask, + unsigned *partmask) +{ + __m128i cstep0 = _mm_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3); + __m128i xdcdy = _mm_set1_epi32(dcdy); + + /* Get values across the quad + */ + __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy); + __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy); + __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy); + + { + __m128i cstep01, cstep23, result; + + cstep01 = _mm_packs_epi32(cstep0, cstep1); + cstep23 = _mm_packs_epi32(cstep2, cstep3); + result = _mm_packs_epi16(cstep01, cstep23); + + *outmask |= _mm_movemask_epi8(result); + } + + + { + __m128i cio4 = _mm_set1_epi32(cdiff); + __m128i cstep01, cstep23, result; + + cstep0 = _mm_add_epi32(cstep0, cio4); + cstep1 = _mm_add_epi32(cstep1, cio4); + cstep2 = _mm_add_epi32(cstep2, cio4); + cstep3 = _mm_add_epi32(cstep3, cio4); + + cstep01 = _mm_packs_epi32(cstep0, cstep1); + cstep23 = _mm_packs_epi32(cstep2, cstep3); + result = _mm_packs_epi16(cstep01, cstep23); + + *partmask |= _mm_movemask_epi8(result); + } +} + + static INLINE unsigned build_mask_linear(int c, int dcdx, int dcdy) { @@ -263,11 +323,14 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task, { const int dcdx = -plane[j].dcdx * 4; const int dcdy = plane[j].dcdy * 4; - const int cox = c[j] + plane[j].eo * 4; - const int cio = c[j] + plane[j].ei * 4 - 1; - - outmask |= build_mask_linear(cox, dcdx, dcdy); - partmask |= build_mask_linear(cio, dcdx, dcdy); + const int cox = plane[j].eo * 4; + const int cio = plane[j].ei * 4 - 1; + + build_masks(c[j] + cox, + cio - cox, + dcdx, dcdy, + &outmask, /* sign bits from c[i][0..15] + cox */ + &partmask); /* sign bits from c[i][0..15] + cio */ } } diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index 0def5f7243..99a0bae45d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -81,11 +81,14 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, for (j = 0; j < NR_PLANES; j++) { const int dcdx = -plane[j].dcdx * 4; const int dcdy = plane[j].dcdy * 4; - const int cox = c[j] + plane[j].eo * 4; - const int cio = c[j] + plane[j].ei * 4 - 1; - - outmask |= build_mask_linear(cox, dcdx, dcdy); - partmask |= build_mask_linear(cio, dcdx, dcdy); + const int cox = plane[j].eo * 4; + const int cio = plane[j].ei * 4 - 1; + + build_masks(c[j] + cox, + cio - cox, + dcdx, dcdy, + &outmask, /* sign bits from c[i][0..15] + cox */ + &partmask); /* sign bits from c[i][0..15] + cio */ } if (outmask == 0xffff) @@ -171,11 +174,14 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, { const int dcdx = -plane[j].dcdx * 16; const int dcdy = plane[j].dcdy * 16; - const int cox = c[j] + plane[j].eo * 16; - const int cio = c[j] + plane[j].ei * 16 - 1; - - outmask |= build_mask_linear(cox, dcdx, dcdy); - partmask |= build_mask_linear(cio, dcdx, dcdy); + const int cox = plane[j].eo * 16; + const int cio = plane[j].ei * 16 - 1; + + build_masks(c[j] + cox, + cio - cox, + dcdx, dcdy, + &outmask, /* sign bits from c[i][0..15] + cox */ + &partmask); /* sign bits from c[i][0..15] + cio */ } j++; -- cgit v1.2.3