diff options
Diffstat (limited to 'src')
60 files changed, 1431 insertions, 1308 deletions
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index b5241fa64c..a7335c340c 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -90,6 +90,9 @@ struct cso_context { void *fragment_shader, *fragment_shader_saved, *geometry_shader; void *vertex_shader, *vertex_shader_saved, *geometry_shader_saved; + struct pipe_clip_state clip; + struct pipe_clip_state clip_saved; + struct pipe_framebuffer_state fb, fb_saved; struct pipe_viewport_state vp, vp_saved; struct pipe_blend_color blend_color; @@ -1126,3 +1129,54 @@ void cso_restore_geometry_shader(struct cso_context *ctx) } ctx->geometry_shader_saved = NULL; } + + +/* clip state */ + +static INLINE void +clip_state_cpy(struct pipe_clip_state *dst, + const struct pipe_clip_state *src) +{ + dst->nr = src->nr; + if (src->nr) { + memcpy(dst->ucp, src->ucp, src->nr * sizeof(src->ucp[0])); + } +} + +static INLINE int +clip_state_cmp(const struct pipe_clip_state *a, + const struct pipe_clip_state *b) +{ + if (a->nr != b->nr) { + return 1; + } + if (a->nr) { + return memcmp(a->ucp, b->ucp, a->nr * sizeof(a->ucp[0])); + } + return 0; +} + +void +cso_set_clip(struct cso_context *ctx, + const struct pipe_clip_state *clip) +{ + if (clip_state_cmp(&ctx->clip, clip)) { + clip_state_cpy(&ctx->clip, clip); + ctx->pipe->set_clip_state(ctx->pipe, clip); + } +} + +void +cso_save_clip(struct cso_context *ctx) +{ + clip_state_cpy(&ctx->clip_saved, &ctx->clip); +} + +void +cso_restore_clip(struct cso_context *ctx) +{ + if (clip_state_cmp(&ctx->clip, &ctx->clip_saved)) { + clip_state_cpy(&ctx->clip, &ctx->clip_saved); + ctx->pipe->set_clip_state(ctx->pipe, &ctx->clip_saved); + } +} diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index 707b3c2cee..251a9a644f 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -180,6 +180,19 @@ void cso_save_stencil_ref(struct cso_context *cso); void cso_restore_stencil_ref(struct cso_context *cso); +/* clip state */ + +void +cso_set_clip(struct cso_context *cso, + const struct pipe_clip_state *clip); + +void +cso_save_clip(struct cso_context *cso); + +void +cso_restore_clip(struct cso_context *cso); + + #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index d5ddc4a6a9..bb0988543f 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -128,9 +128,7 @@ void draw_set_rasterizer_state( struct draw_context *draw, draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); draw->rasterizer = raster; - draw->bypass_clipping = - ((draw->rasterizer && draw->rasterizer->bypass_vs_clip_and_viewport) || - draw->driver.bypass_clipping); + draw->bypass_clipping = draw->driver.bypass_clipping; } @@ -140,9 +138,7 @@ void draw_set_driver_clipping( struct draw_context *draw, draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); draw->driver.bypass_clipping = bypass_clipping; - draw->bypass_clipping = - ((draw->rasterizer && draw->rasterizer->bypass_vs_clip_and_viewport) || - draw->driver.bypass_clipping); + draw->bypass_clipping = draw->driver.bypass_clipping; } diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 341353f628..6d90a6c42f 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -87,9 +87,7 @@ draw_pt_arrays(struct draw_context *draw, opt |= PT_CLIPTEST; } - if (!draw->rasterizer->bypass_vs_clip_and_viewport) { - opt |= PT_SHADE; - } + opt |= PT_SHADE; } if (opt == 0) diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index c5dfbcfa3c..1aecb51077 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -100,8 +100,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, fse->key.nr_elements = MAX2(fse->key.nr_outputs, /* outputs - translate to hw format */ fse->key.nr_inputs); /* inputs - fetch from api format */ - fse->key.viewport = (!draw->rasterizer->bypass_vs_clip_and_viewport && - !draw->identity_viewport); + fse->key.viewport = !draw->identity_viewport; fse->key.clip = !draw->bypass_clipping; fse->key.const_vbuffers = 0; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 56b69354b2..da5106463a 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -96,8 +96,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, */ draw_pt_post_vs_prepare( fpme->post_vs, (boolean)draw->bypass_clipping, - (boolean)(draw->identity_viewport || - draw->rasterizer->bypass_vs_clip_and_viewport), + (boolean)draw->identity_viewport, (boolean)draw->rasterizer->gl_rasterization_rules, (draw->vs.edgeflag_output ? true : false) ); @@ -154,9 +153,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, (char *)pipeline_verts ); /* Run the shader, note that this overwrites the data[] parts of - * the pipeline verts. If there is no shader, eg if - * bypass_vs_clip_and_viewport, then the inputs == outputs, and are - * already in the correct place. + * the pipeline verts. */ if (opt & PT_SHADE) { @@ -239,9 +236,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, (char *)pipeline_verts ); /* Run the shader, note that this overwrites the data[] parts of - * the pipeline verts. If there is no shader, ie if - * bypass_vs_clip_and_viewport, then the inputs == outputs, and are - * already in the correct place. + * the pipeline verts. */ if (opt & PT_SHADE) { @@ -319,9 +314,7 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle (char *)pipeline_verts ); /* Run the shader, note that this overwrites the data[] parts of - * the pipeline verts. If there is no shader, ie if - * bypass_vs_clip_and_viewport, then the inputs == outputs, and are - * already in the correct place. + * the pipeline verts. */ if (opt & PT_SHADE) { diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 4cf28a9f93..ae866243a9 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -52,6 +52,7 @@ #include "lp_bld_swizzle.h" #include "lp_bld_flow.h" #include "lp_bld_tgsi.h" +#include "lp_bld_debug.h" #define LP_MAX_TEMPS 256 @@ -81,6 +82,21 @@ #define QUAD_BOTTOM_LEFT 2 #define QUAD_BOTTOM_RIGHT 3 +#define LP_TGSI_MAX_NESTING 16 + +struct lp_exec_mask { + struct lp_build_context *bld; + + boolean has_mask; + + LLVMTypeRef int_vec_type; + + LLVMValueRef cond_stack[LP_TGSI_MAX_NESTING]; + int cond_stack_size; + LLVMValueRef cond_mask; + + LLVMValueRef exec_mask; +}; struct lp_build_tgsi_soa_context { @@ -97,9 +113,9 @@ struct lp_build_tgsi_soa_context LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS]; struct lp_build_mask_context *mask; + struct lp_exec_mask exec_mask; }; - static const unsigned char swizzle_left[4] = { QUAD_TOP_LEFT, QUAD_TOP_LEFT, @@ -124,6 +140,73 @@ swizzle_bottom[4] = { QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT }; +static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) +{ + mask->bld = bld; + mask->has_mask = FALSE; + mask->cond_stack_size = 0; + + mask->int_vec_type = lp_build_int_vec_type(mask->bld->type); +} + +static void lp_exec_mask_update(struct lp_exec_mask *mask) +{ + mask->exec_mask = mask->cond_mask; + if (mask->cond_stack_size > 0) + mask->has_mask = TRUE; +} + +static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, + LLVMValueRef val) +{ + mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; + mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val, + mask->int_vec_type, ""); + + lp_exec_mask_update(mask); +} + +static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) +{ + LLVMValueRef prev_mask = mask->cond_stack[mask->cond_stack_size - 1]; + LLVMValueRef inv_mask = LLVMBuildNot(mask->bld->builder, + mask->cond_mask, ""); + + /* means that we didn't have any mask before and that + * we were fully enabled */ + if (mask->cond_stack_size <= 1) { + prev_mask = LLVMConstAllOnes(mask->int_vec_type); + } + + mask->cond_mask = LLVMBuildAnd(mask->bld->builder, + inv_mask, + prev_mask, ""); + lp_exec_mask_update(mask); +} + +static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) +{ + mask->cond_mask = mask->cond_stack[--mask->cond_stack_size]; + lp_exec_mask_update(mask); +} + +static void lp_exec_mask_store(struct lp_exec_mask *mask, + LLVMValueRef val, + LLVMValueRef dst) +{ + if (mask->has_mask) { + LLVMValueRef real_val, dst_val; + + dst_val = LLVMBuildLoad(mask->bld->builder, dst, ""); + real_val = lp_build_select(mask->bld, + mask->exec_mask, + val, dst_val); + + LLVMBuildStore(mask->bld->builder, real_val, dst); + } else + LLVMBuildStore(mask->bld->builder, val, dst); +} + static LLVMValueRef emit_ddx(struct lp_build_tgsi_soa_context *bld, @@ -287,13 +370,13 @@ emit_store( switch( reg->Register.File ) { case TGSI_FILE_OUTPUT: - LLVMBuildStore(bld->base.builder, value, - bld->outputs[reg->Register.Index][chan_index]); + lp_exec_mask_store(&bld->exec_mask, value, + bld->outputs[reg->Register.Index][chan_index]); break; case TGSI_FILE_TEMPORARY: - LLVMBuildStore(bld->base.builder, value, - bld->temps[reg->Register.Index][chan_index]); + lp_exec_mask_store(&bld->exec_mask, value, + bld->temps[reg->Register.Index][chan_index]); break; case TGSI_FILE_ADDRESS: @@ -1272,8 +1355,8 @@ emit_instruction( break; case TGSI_OPCODE_IF: - /* FIXME */ - return 0; + tmp0 = emit_fetch(bld, inst, 0, CHAN_X); + lp_exec_mask_cond_push(&bld->exec_mask, tmp0); break; case TGSI_OPCODE_BGNFOR: @@ -1289,13 +1372,11 @@ emit_instruction( break; case TGSI_OPCODE_ELSE: - /* FIXME */ - return 0; + lp_exec_mask_cond_invert(&bld->exec_mask); break; case TGSI_OPCODE_ENDIF: - /* FIXME */ - return 0; + lp_exec_mask_cond_pop(&bld->exec_mask); break; case TGSI_OPCODE_ENDFOR: @@ -1458,6 +1539,8 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, bld.consts_ptr = consts_ptr; bld.sampler = sampler; + lp_exec_mask_init(&bld.exec_mask, &bld.base); + tgsi_parse_init( &parse, tokens ); while( !tgsi_parse_end_of_tokens( &parse ) ) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 68566d3093..f853ea2820 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -110,6 +110,42 @@ micro_ceil(union tgsi_exec_channel *dst, } static void +micro_clamp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2) +{ + dst->f[0] = src0->f[0] < src1->f[0] ? src1->f[0] : src0->f[0] > src2->f[0] ? src2->f[0] : src0->f[0]; + dst->f[1] = src0->f[1] < src1->f[1] ? src1->f[1] : src0->f[1] > src2->f[1] ? src2->f[1] : src0->f[1]; + dst->f[2] = src0->f[2] < src1->f[2] ? src1->f[2] : src0->f[2] > src2->f[2] ? src2->f[2] : src0->f[2]; + dst->f[3] = src0->f[3] < src1->f[3] ? src1->f[3] : src0->f[3] > src2->f[3] ? src2->f[3] : src0->f[3]; +} + +static void +micro_cmp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2) +{ + dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0]; + dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1]; + dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2]; + dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3]; +} + +static void +micro_cnd(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2) +{ + dst->f[0] = src2->f[0] > 0.5f ? src0->f[0] : src1->f[0]; + dst->f[1] = src2->f[1] > 0.5f ? src0->f[1] : src1->f[1]; + dst->f[2] = src2->f[2] > 0.5f ? src0->f[2] : src1->f[2]; + dst->f[3] = src2->f[3] > 0.5f ? src0->f[3] : src1->f[3]; +} + +static void micro_cos(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) { @@ -2653,15 +2689,7 @@ exec_instruction( break; case TGSI_OPCODE_CND: - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_trinary(mach, inst, micro_cnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DP2A: @@ -2673,16 +2701,7 @@ exec_instruction( break; case TGSI_OPCODE_CLAMP: - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - micro_max(&r[0], &r[0], &r[1]); - FETCH(&r[1], 2, chan_index); - micro_min(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_trinary(mach, inst, micro_clamp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_FLR: @@ -3076,15 +3095,7 @@ exec_instruction( break; case TGSI_OPCODE_CMP: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SCS: diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index f0bc58a558..0b263a9db5 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -62,6 +62,7 @@ struct blit_state struct pipe_rasterizer_state rasterizer; struct pipe_sampler_state sampler; struct pipe_viewport_state viewport; + struct pipe_clip_state clip; void *vs; void *fs[TGSI_WRITEMASK_XYZW + 1]; @@ -101,7 +102,6 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso) memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer)); ctx->rasterizer.front_winding = PIPE_WINDING_CW; ctx->rasterizer.cull_mode = PIPE_WINDING_NONE; - ctx->rasterizer.bypass_vs_clip_and_viewport = 1; ctx->rasterizer.gl_rasterization_rules = 1; /* samplers */ @@ -114,7 +114,6 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso) ctx->sampler.mag_img_filter = 0; /* set later */ ctx->sampler.normalized_coords = 1; - /* vertex shader - still required to provide the linkage between * fragment shader input semantics and vertex_element/buffers. */ @@ -266,7 +265,6 @@ regions_overlap(int srcX0, int srcY0, * \param writemask controls which channels in the dest surface are sourced * from the src surface. Disabled channels are sourced * from (0,0,0,1). - * XXX what about clipping??? * XXX need some control over blitting Z and/or stencil. */ void @@ -407,14 +405,17 @@ util_blit_pixels_writemask(struct blit_state *ctx, cso_save_rasterizer(ctx->cso); cso_save_samplers(ctx->cso); cso_save_sampler_textures(ctx->cso); + cso_save_viewport(ctx->cso); cso_save_framebuffer(ctx->cso); cso_save_fragment_shader(ctx->cso); cso_save_vertex_shader(ctx->cso); + cso_save_clip(ctx->cso); /* set misc state we care about */ cso_set_blend(ctx->cso, &ctx->blend); cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil); cso_set_rasterizer(ctx->cso, &ctx->rasterizer); + cso_set_clip(ctx->cso, &ctx->clip); /* sampler */ ctx->sampler.min_img_filter = filter; @@ -422,6 +423,17 @@ util_blit_pixels_writemask(struct blit_state *ctx, cso_single_sampler(ctx->cso, 0, &ctx->sampler); cso_single_sampler_done(ctx->cso); + /* viewport */ + ctx->viewport.scale[0] = 0.5f * dst->width; + ctx->viewport.scale[1] = 0.5f * dst->height; + ctx->viewport.scale[2] = 0.5f; + ctx->viewport.scale[3] = 1.0f; + ctx->viewport.translate[0] = 0.5f * dst->width; + ctx->viewport.translate[1] = 0.5f * dst->height; + ctx->viewport.translate[2] = 0.5f; + ctx->viewport.translate[3] = 0.0f; + cso_set_viewport(ctx->cso, &ctx->viewport); + /* texture */ cso_set_sampler_textures(ctx->cso, 1, &tex); @@ -444,8 +456,10 @@ util_blit_pixels_writemask(struct blit_state *ctx, /* draw quad */ offset = setup_vertex_data_tex(ctx, - (float) dstX0, (float) dstY0, - (float) dstX1, (float) dstY1, + (float) dstX0 / dst->width * 2.0f - 1.0f, + (float) dstY0 / dst->height * 2.0f - 1.0f, + (float) dstX1 / dst->width * 2.0f - 1.0f, + (float) dstY1 / dst->height * 2.0f - 1.0f, s0, t0, s1, t1, z); @@ -461,9 +475,11 @@ util_blit_pixels_writemask(struct blit_state *ctx, cso_restore_rasterizer(ctx->cso); cso_restore_samplers(ctx->cso); cso_restore_sampler_textures(ctx->cso); + cso_restore_viewport(ctx->cso); cso_restore_framebuffer(ctx->cso); cso_restore_fragment_shader(ctx->cso); cso_restore_vertex_shader(ctx->cso); + cso_restore_clip(ctx->cso); pipe_texture_reference(&tex, NULL); } @@ -547,11 +563,13 @@ util_blit_pixels_tex(struct blit_state *ctx, cso_save_framebuffer(ctx->cso); cso_save_fragment_shader(ctx->cso); cso_save_vertex_shader(ctx->cso); + cso_save_clip(ctx->cso); /* set misc state we care about */ cso_set_blend(ctx->cso, &ctx->blend); cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil); cso_set_rasterizer(ctx->cso, &ctx->rasterizer); + cso_set_clip(ctx->cso, &ctx->clip); /* sampler */ ctx->sampler.min_img_filter = filter; @@ -559,6 +577,17 @@ util_blit_pixels_tex(struct blit_state *ctx, cso_single_sampler(ctx->cso, 0, &ctx->sampler); cso_single_sampler_done(ctx->cso); + /* viewport */ + ctx->viewport.scale[0] = 0.5f * dst->width; + ctx->viewport.scale[1] = 0.5f * dst->height; + ctx->viewport.scale[2] = 0.5f; + ctx->viewport.scale[3] = 1.0f; + ctx->viewport.translate[0] = 0.5f * dst->width; + ctx->viewport.translate[1] = 0.5f * dst->height; + ctx->viewport.translate[2] = 0.5f; + ctx->viewport.translate[3] = 0.0f; + cso_set_viewport(ctx->cso, &ctx->viewport); + /* texture */ cso_set_sampler_textures(ctx->cso, 1, &tex); @@ -576,8 +605,10 @@ util_blit_pixels_tex(struct blit_state *ctx, /* draw quad */ offset = setup_vertex_data_tex(ctx, - (float) dstX0, (float) dstY0, - (float) dstX1, (float) dstY1, + (float) dstX0 / dst->width * 2.0f - 1.0f, + (float) dstY0 / dst->height * 2.0f - 1.0f, + (float) dstX1 / dst->width * 2.0f - 1.0f, + (float) dstY1 / dst->height * 2.0f - 1.0f, s0, t0, s1, t1, z); @@ -596,4 +627,5 @@ util_blit_pixels_tex(struct blit_state *ctx, cso_restore_framebuffer(ctx->cso); cso_restore_fragment_shader(ctx->cso); cso_restore_vertex_shader(ctx->cso); + cso_restore_clip(ctx->cso); } diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 18f8606818..0ba09d33bf 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -93,6 +93,12 @@ struct blitter_context_priv /* Rasterizer state. */ void *rs_state; + + /* Viewport state. */ + struct pipe_viewport_state viewport; + + /* Clip state. */ + struct pipe_clip_state clip; }; struct blitter_context *util_blitter_create(struct pipe_context *pipe) @@ -160,7 +166,6 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) memset(&rs_state, 0, sizeof(rs_state)); rs_state.front_winding = PIPE_WINDING_CW; rs_state.cull_mode = PIPE_WINDING_NONE; - rs_state.bypass_vs_clip_and_viewport = 1; rs_state.gl_rasterization_rules = 1; rs_state.flatshade = 1; ctx->rs_state = pipe->create_rasterizer_state(pipe, &rs_state); @@ -263,6 +268,9 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx) pipe->set_stencil_ref(pipe, &ctx->blitter.saved_stencil_ref); + pipe->set_viewport_state(pipe, &ctx->blitter.saved_viewport); + pipe->set_clip_state(pipe, &ctx->blitter.saved_clip); + /* restore the state objects which are required to be saved before copy/fill */ if (ctx->blitter.saved_fb_state.nr_cbufs != ~0) { @@ -288,25 +296,40 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx) static void blitter_set_rectangle(struct blitter_context_priv *ctx, unsigned x1, unsigned y1, unsigned x2, unsigned y2, + unsigned width, unsigned height, float depth) { int i; /* set vertex positions */ - ctx->vertices[0][0][0] = x1; /*v0.x*/ - ctx->vertices[0][0][1] = y1; /*v0.y*/ + ctx->vertices[0][0][0] = (float)x1 / width * 2.0f - 1.0f; /*v0.x*/ + ctx->vertices[0][0][1] = (float)y1 / height * 2.0f - 1.0f; /*v0.y*/ - ctx->vertices[1][0][0] = x2; /*v1.x*/ - ctx->vertices[1][0][1] = y1; /*v1.y*/ + ctx->vertices[1][0][0] = (float)x2 / width * 2.0f - 1.0f; /*v1.x*/ + ctx->vertices[1][0][1] = (float)y1 / height * 2.0f - 1.0f; /*v1.y*/ - ctx->vertices[2][0][0] = x2; /*v2.x*/ - ctx->vertices[2][0][1] = y2; /*v2.y*/ + ctx->vertices[2][0][0] = (float)x2 / width * 2.0f - 1.0f; /*v2.x*/ + ctx->vertices[2][0][1] = (float)y2 / height * 2.0f - 1.0f; /*v2.y*/ - ctx->vertices[3][0][0] = x1; /*v3.x*/ - ctx->vertices[3][0][1] = y2; /*v3.y*/ + ctx->vertices[3][0][0] = (float)x1 / width * 2.0f - 1.0f; /*v3.x*/ + ctx->vertices[3][0][1] = (float)y2 / height * 2.0f - 1.0f; /*v3.y*/ for (i = 0; i < 4; i++) ctx->vertices[i][0][2] = depth; /*z*/ + + /* viewport */ + ctx->viewport.scale[0] = 0.5f * width; + ctx->viewport.scale[1] = 0.5f * height; + ctx->viewport.scale[2] = 1.0f; + ctx->viewport.scale[3] = 1.0f; + ctx->viewport.translate[0] = 0.5f * width; + ctx->viewport.translate[1] = 0.5f * height; + ctx->viewport.translate[2] = 0.0f; + ctx->viewport.translate[3] = 0.0f; + ctx->pipe->set_viewport_state(ctx->pipe, &ctx->viewport); + + /* clip */ + ctx->pipe->set_clip_state(ctx->pipe, &ctx->clip); } static void blitter_set_clear_color(struct blitter_context_priv *ctx, @@ -550,7 +573,7 @@ void util_blitter_clear(struct blitter_context *blitter, pipe->bind_vs_state(pipe, ctx->vs_col); blitter_set_clear_color(ctx, rgba); - blitter_set_rectangle(ctx, 0, 0, width, height, depth); + blitter_set_rectangle(ctx, 0, 0, width, height, width, height, depth); blitter_draw_quad(ctx); blitter_restore_CSOs(ctx); } @@ -633,7 +656,7 @@ static void util_blitter_do_copy(struct blitter_context *blitter, assert(0); } - blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, 0); + blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, dst->width, dst->height, 0); blitter_draw_quad(ctx); } @@ -794,7 +817,7 @@ void util_blitter_fill(struct blitter_context *blitter, pipe->set_framebuffer_state(pipe, &fb_state); blitter_set_clear_color(ctx, rgba); - blitter_set_rectangle(ctx, 0, 0, width, height, 0); + blitter_set_rectangle(ctx, 0, 0, width, height, dst->width, dst->height, 0); blitter_draw_quad(ctx); blitter_restore_CSOs(ctx); } diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index a2f17073ac..92008fce99 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -48,6 +48,8 @@ struct blitter_context struct pipe_framebuffer_state saved_fb_state; /**< framebuffer state */ struct pipe_stencil_ref saved_stencil_ref; /**< stencil ref */ + struct pipe_viewport_state saved_viewport; + struct pipe_clip_state saved_clip; int saved_num_sampler_states; void *saved_sampler_states[32]; @@ -206,6 +208,20 @@ void util_blitter_save_framebuffer(struct blitter_context *blitter, } static INLINE +void util_blitter_save_viewport(struct blitter_context *blitter, + struct pipe_viewport_state *state) +{ + blitter->saved_viewport = *state; +} + +static INLINE +void util_blitter_save_clip(struct blitter_context *blitter, + struct pipe_clip_state *state) +{ + blitter->saved_clip = *state; +} + +static INLINE void util_blitter_save_fragment_sampler_states( struct blitter_context *blitter, int num_sampler_states, diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c index eaf4ec90f2..ae7afd7311 100644 --- a/src/gallium/auxiliary/util/u_dump_state.c +++ b/src/gallium/auxiliary/util/u_dump_state.c @@ -318,7 +318,6 @@ util_dump_rasterizer_state(struct os_stream *stream, const struct pipe_rasterize util_dump_member(stream, uint, state, line_stipple_factor); util_dump_member(stream, uint, state, line_stipple_pattern); util_dump_member(stream, bool, state, line_last_pixel); - util_dump_member(stream, bool, state, bypass_vs_clip_and_viewport); util_dump_member(stream, bool, state, flatshade_first); util_dump_member(stream, bool, state, gl_rasterization_rules); diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 9390e8636f..fc027e48e4 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -61,6 +61,7 @@ struct gen_mipmap_state struct pipe_depth_stencil_alpha_state depthstencil; struct pipe_rasterizer_state rasterizer; struct pipe_sampler_state sampler; + struct pipe_clip_state clip; void *vs; void *fs2d, *fsCube; @@ -1296,7 +1297,6 @@ util_create_gen_mipmap(struct pipe_context *pipe, memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer)); ctx->rasterizer.front_winding = PIPE_WINDING_CW; ctx->rasterizer.cull_mode = PIPE_WINDING_NONE; - ctx->rasterizer.bypass_vs_clip_and_viewport = 1; ctx->rasterizer.gl_rasterization_rules = 1; /* sampler state */ @@ -1361,25 +1361,25 @@ get_next_slot(struct gen_mipmap_state *ctx) static unsigned set_vertex_data(struct gen_mipmap_state *ctx, enum pipe_texture_target tex_target, - uint face, float width, float height) + uint face) { unsigned offset; /* vert[0].position */ - ctx->vertices[0][0][0] = 0.0f; /*x*/ - ctx->vertices[0][0][1] = 0.0f; /*y*/ + ctx->vertices[0][0][0] = -1.0f; /*x*/ + ctx->vertices[0][0][1] = -1.0f; /*y*/ /* vert[1].position */ - ctx->vertices[1][0][0] = width; - ctx->vertices[1][0][1] = 0.0f; + ctx->vertices[1][0][0] = 1.0f; + ctx->vertices[1][0][1] = -1.0f; /* vert[2].position */ - ctx->vertices[2][0][0] = width; - ctx->vertices[2][0][1] = height; + ctx->vertices[2][0][0] = 1.0f; + ctx->vertices[2][0][1] = 1.0f; /* vert[3].position */ - ctx->vertices[3][0][0] = 0.0f; - ctx->vertices[3][0][1] = height; + ctx->vertices[3][0][0] = -1.0f; + ctx->vertices[3][0][1] = 1.0f; /* Setup vertex texcoords. This is a little tricky for cube maps. */ if (tex_target == PIPE_TEXTURE_CUBE) { @@ -1499,11 +1499,14 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, cso_save_framebuffer(ctx->cso); cso_save_fragment_shader(ctx->cso); cso_save_vertex_shader(ctx->cso); + cso_save_viewport(ctx->cso); + cso_save_clip(ctx->cso); /* bind our state */ cso_set_blend(ctx->cso, &ctx->blend); cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil); cso_set_rasterizer(ctx->cso, &ctx->rasterizer); + cso_set_clip(ctx->cso, &ctx->clip); cso_set_fragment_shader_handle(ctx->cso, fs); cso_set_vertex_shader_handle(ctx->cso, ctx->vs); @@ -1522,6 +1525,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, */ for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { const uint srcLevel = dstLevel - 1; + struct pipe_viewport_state vp; struct pipe_surface *surf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, @@ -1535,6 +1539,17 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, fb.height = u_minify(pt->height0, dstLevel); cso_set_framebuffer(ctx->cso, &fb); + /* viewport */ + vp.scale[0] = 0.5f * fb.width; + vp.scale[1] = 0.5f * fb.height; + vp.scale[2] = 1.0f; + vp.scale[3] = 1.0f; + vp.translate[0] = 0.5f * fb.width; + vp.translate[1] = 0.5f * fb.height; + vp.translate[2] = 0.0f; + vp.translate[3] = 0.0f; + cso_set_viewport(ctx->cso, &vp); + /* * Setup sampler state * Note: we should only have to set the min/max LOD clamps to ensure @@ -1549,12 +1564,10 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, cso_set_sampler_textures(ctx->cso, 1, &pt); - /* quad coords in window coords (bypassing vs, clip and viewport) */ + /* quad coords in clip coords */ offset = set_vertex_data(ctx, pt->target, - face, - (float) u_minify(pt->width0, dstLevel), - (float) u_minify(pt->height0, dstLevel)); + face); util_draw_vertex_buffer(ctx->pipe, ctx->vbuf, @@ -1578,4 +1591,6 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, cso_restore_framebuffer(ctx->cso); cso_restore_fragment_shader(ctx->cso); cso_restore_vertex_shader(ctx->cso); + cso_restore_viewport(ctx->cso); + cso_restore_clip(ctx->cso); } diff --git a/src/gallium/docs/source/cso/rasterizer.rst b/src/gallium/docs/source/cso/rasterizer.rst index 24cc78c68d..ccd9136a2e 100644 --- a/src/gallium/docs/source/cso/rasterizer.rst +++ b/src/gallium/docs/source/cso/rasterizer.rst @@ -10,18 +10,6 @@ multisample state, scissoring and flat/smooth shading. Members ------- -bypass_vs_clip_and_viewport -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Whether the entire TCL pipeline should be bypassed. This implies that -vertices are pre-transformed for the viewport, and will not be run -through the vertex shader. - -.. note:: - - Implementations may still clip away vertices that are not in the viewport - when this is set. - flatshade ^^^^^^^^^ diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index d2ab530d98..51f0ef12ba 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -244,6 +244,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + assert(tex); OUT_BATCH(BUF_3D_ID_DEPTH | BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ ztile); diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 9120226de0..e31ae6a3fc 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -183,7 +183,7 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) if (debug_get_bool_option( "LP_NO_RAST", FALSE )) llvmpipe->no_rast = TRUE; - llvmpipe->setup = lp_setup_create( screen, + llvmpipe->setup = lp_setup_create( &llvmpipe->pipe, llvmpipe->draw ); if (!llvmpipe->setup) goto fail; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 82c006d78b..0f83eea685 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -42,114 +42,60 @@ #include "lp_scene.h" -/** - * Begin the rasterization phase. - * Map the framebuffer surfaces. Initialize the 'rast' state. +/* Begin rasterizing a scene: */ static boolean lp_rast_begin( struct lp_rasterizer *rast, - const struct pipe_framebuffer_state *fb, - boolean write_color, - boolean write_zstencil ) + struct lp_scene *scene ) { - struct pipe_screen *screen = rast->screen; - struct pipe_surface *cbuf, *zsbuf; + const struct pipe_framebuffer_state *fb = &scene->fb; + boolean write_color = fb->nr_cbufs != 0; + boolean write_zstencil = fb->zsbuf != NULL; int i; - LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + rast->curr_scene = scene; - util_copy_framebuffer_state(&rast->state.fb, fb); + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + rast->state.nr_cbufs = scene->fb.nr_cbufs; rast->state.write_zstencil = write_zstencil; rast->state.write_color = write_color; - - rast->check_for_clipped_tiles = (fb->width % TILE_SIZE != 0 || - fb->height % TILE_SIZE != 0); - - for (i = 0; i < rast->state.fb.nr_cbufs; i++) { - cbuf = rast->state.fb.cbufs[i]; - if (cbuf) { - rast->cbuf_transfer[i] = screen->get_tex_transfer(rast->screen, - cbuf->texture, - cbuf->face, - cbuf->level, - cbuf->zslice, - PIPE_TRANSFER_READ_WRITE, - 0, 0, - cbuf->width, - cbuf->height); - if (!rast->cbuf_transfer[i]) - goto fail; - - rast->cbuf_map[i] = screen->transfer_map(rast->screen, - rast->cbuf_transfer[i]); - if (!rast->cbuf_map[i]) - goto fail; - } + for (i = 0; i < rast->state.nr_cbufs; i++) { + rast->cbuf[i].map = scene->cbuf_map[i]; + rast->cbuf[i].format = scene->cbuf_transfer[i]->texture->format; + rast->cbuf[i].width = scene->cbuf_transfer[i]->width; + rast->cbuf[i].height = scene->cbuf_transfer[i]->height; + rast->cbuf[i].stride = scene->cbuf_transfer[i]->stride; } - zsbuf = rast->state.fb.zsbuf; - if (zsbuf) { - rast->zsbuf_transfer = screen->get_tex_transfer(rast->screen, - zsbuf->texture, - zsbuf->face, - zsbuf->level, - zsbuf->zslice, - PIPE_TRANSFER_READ_WRITE, - 0, 0, - zsbuf->width, - zsbuf->height); - if (!rast->zsbuf_transfer) - goto fail; - - rast->zsbuf_map = screen->transfer_map(rast->screen, - rast->zsbuf_transfer); - if (!rast->zsbuf_map) - goto fail; + if (write_zstencil) { + rast->zsbuf.map = scene->zsbuf_map; + rast->zsbuf.stride = scene->zsbuf_transfer->stride; + rast->zsbuf.blocksize = + util_format_get_blocksize(scene->zsbuf_transfer->texture->format); } + lp_scene_bin_iter_begin( scene ); + return TRUE; - -fail: - /* Unmap and release transfers? - */ - return FALSE; } -/** - * Finish the rasterization phase. - * Unmap framebuffer surfaces. - */ static void lp_rast_end( struct lp_rasterizer *rast ) { - struct pipe_screen *screen = rast->screen; - unsigned i; - - for (i = 0; i < rast->state.fb.nr_cbufs; i++) { - if (rast->cbuf_map[i]) - screen->transfer_unmap(screen, rast->cbuf_transfer[i]); - - if (rast->cbuf_transfer[i]) - screen->tex_transfer_destroy(rast->cbuf_transfer[i]); - - rast->cbuf_transfer[i] = NULL; - rast->cbuf_map[i] = NULL; - } + int i; - if (rast->zsbuf_map) - screen->transfer_unmap(screen, rast->zsbuf_transfer); + lp_scene_reset( rast->curr_scene ); - if (rast->zsbuf_transfer) - screen->tex_transfer_destroy(rast->zsbuf_transfer); + for (i = 0; i < rast->state.nr_cbufs; i++) + rast->cbuf[i].map = NULL; - rast->zsbuf_transfer = NULL; - rast->zsbuf_map = NULL; + rast->zsbuf.map = NULL; + rast->curr_scene = NULL; } - /** * Begining rasterization of a tile. * \param x window X position of the tile, in pixels @@ -189,7 +135,7 @@ lp_rast_clear_color(struct lp_rasterizer_task *task, clear_color[1] == clear_color[2] && clear_color[2] == clear_color[3]) { /* clear to grayscale value {x, x, x, x} */ - for (i = 0; i < rast->state.fb.nr_cbufs; i++) { + for (i = 0; i < rast->state.nr_cbufs; i++) { memset(color_tile[i], clear_color[0], TILE_SIZE * TILE_SIZE * 4); } } @@ -200,7 +146,7 @@ lp_rast_clear_color(struct lp_rasterizer_task *task, * works. */ const unsigned chunk = TILE_SIZE / 4; - for (i = 0; i < rast->state.fb.nr_cbufs; i++) { + for (i = 0; i < rast->state.nr_cbufs; i++) { uint8_t *c = color_tile[i]; unsigned j; for (j = 0; j < 4 * TILE_SIZE; j++) { @@ -232,17 +178,17 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, struct lp_rasterizer *rast = task->rast; const unsigned tile_x = task->x; const unsigned tile_y = task->y; - const unsigned height = TILE_SIZE/TILE_VECTOR_HEIGHT; - const unsigned width = TILE_SIZE*TILE_VECTOR_HEIGHT; - unsigned block_size = util_format_get_blocksize(rast->zsbuf_transfer->texture->format); + const unsigned height = TILE_SIZE / TILE_VECTOR_HEIGHT; + const unsigned width = TILE_SIZE * TILE_VECTOR_HEIGHT; + unsigned block_size = rast->zsbuf.blocksize; uint8_t *dst; - unsigned dst_stride = rast->zsbuf_transfer->stride*TILE_VECTOR_HEIGHT; + unsigned dst_stride = rast->zsbuf.stride * TILE_VECTOR_HEIGHT; unsigned i, j; LP_DBG(DEBUG_RAST, "%s 0x%x\n", __FUNCTION__, arg.clear_zstencil); - assert(rast->zsbuf_map); - if (!rast->zsbuf_map) + assert(rast->zsbuf.map); + if (!rast->zsbuf.map) return; LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); @@ -278,8 +224,8 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task, } break; default: - assert(0); - break; + assert(0); + break; } } @@ -298,25 +244,16 @@ lp_rast_load_color(struct lp_rasterizer_task *task, LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y); - for (i = 0; i < rast->state.fb.nr_cbufs; i++) { - struct pipe_transfer *transfer = rast->cbuf_transfer[i]; - int w = TILE_SIZE; - int h = TILE_SIZE; - - if (x >= transfer->width || y >= transfer->height) + for (i = 0; i < rast->state.nr_cbufs; i++) { + if (x >= rast->cbuf[i].width || y >= rast->cbuf[i].height) continue; - assert(w >= 0); - assert(h >= 0); - assert(w <= TILE_SIZE); - assert(h <= TILE_SIZE); - - lp_tile_read_4ub(transfer->texture->format, + lp_tile_read_4ub(rast->cbuf[i].format, task->tile.color[i], - rast->cbuf_map[i], - transfer->stride, + rast->cbuf[i].map, + rast->cbuf[i].stride, x, y, - w, h); + TILE_SIZE, TILE_SIZE); LP_COUNT(nr_color_tile_load); } @@ -366,7 +303,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, block_offset = ((y / 4) * (16 * 16) + (x / 4) * 16); /* color buffer */ - for (i = 0; i < rast->state.fb.nr_cbufs; i++) + for (i = 0; i < rast->state.nr_cbufs; i++) color[i] = tile->color[i] + 4 * block_offset; /* depth buffer */ @@ -405,7 +342,6 @@ void lp_rast_shade_quads( struct lp_rasterizer_task *task, unsigned ix, iy; int block_offset; -#ifdef DEBUG assert(state); /* Sanity checks */ @@ -414,7 +350,6 @@ void lp_rast_shade_quads( struct lp_rasterizer_task *task, assert((x % 4) == 0); assert((y % 4) == 0); -#endif ix = x % TILE_SIZE; iy = y % TILE_SIZE; @@ -423,22 +358,19 @@ void lp_rast_shade_quads( struct lp_rasterizer_task *task, block_offset = ((iy / 4) * (16 * 16) + (ix / 4) * 16); /* color buffer */ - for (i = 0; i < rast->state.fb.nr_cbufs; i++) + for (i = 0; i < rast->state.nr_cbufs; i++) color[i] = tile->color[i] + 4 * block_offset; /* depth buffer */ depth = lp_rast_depth_pointer(rast, x, y); - -#ifdef DEBUG assert(lp_check_alignment(tile->color[0], 16)); assert(lp_check_alignment(state->jit_context.blend_color, 16)); assert(lp_check_alignment(inputs->step[0], 16)); assert(lp_check_alignment(inputs->step[1], 16)); assert(lp_check_alignment(inputs->step[2], 16)); -#endif /* run shader */ state->jit_function[1]( &state->jit_context, @@ -516,51 +448,33 @@ lp_rast_store_color(struct lp_rasterizer_task *task) const unsigned x = task->x, y = task->y; unsigned i; - for (i = 0; i < rast->state.fb.nr_cbufs; i++) { - struct pipe_transfer *transfer = rast->cbuf_transfer[i]; - int w = TILE_SIZE; - int h = TILE_SIZE; - - if (x >= transfer->width) + for (i = 0; i < rast->state.nr_cbufs; i++) { + if (x >= rast->cbuf[i].width) continue; - if (y >= transfer->height) + if (y >= rast->cbuf[i].height) continue; - LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__, - task->thread_index, x, y, w, h); + LP_DBG(DEBUG_RAST, "%s [%u] %d,%d\n", __FUNCTION__, + task->thread_index, x, y); if (LP_DEBUG & DEBUG_SHOW_SUBTILES) outline_subtiles(task->tile.color[i]); else if (LP_DEBUG & DEBUG_SHOW_TILES) outline_tile(task->tile.color[i]); - lp_tile_write_4ub(transfer->texture->format, + lp_tile_write_4ub(rast->cbuf[i].format, task->tile.color[i], - rast->cbuf_map[i], - transfer->stride, + rast->cbuf[i].map, + rast->cbuf[i].stride, x, y, - w, h); + TILE_SIZE, TILE_SIZE); LP_COUNT(nr_color_tile_store); } } -/** - * Write the rasterizer's tiles to the framebuffer. - */ -static void -lp_rast_end_tile(struct lp_rasterizer_task *task) -{ - struct lp_rasterizer *rast = task->rast; - - LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); - - if (rast->state.write_color) - lp_rast_store_color(task); -} - /** * Signal on a fence. This is called during bin execution/rasterization. @@ -586,23 +500,6 @@ lp_rast_fence(struct lp_rasterizer_task *task, } -/** - * When all the threads are done rasterizing a scene, one thread will - * call this function to reset the scene and put it onto the empty queue. - */ -static void -release_scene( struct lp_rasterizer *rast, - struct lp_scene *scene ) -{ - util_unreference_framebuffer_state( &scene->fb ); - - lp_scene_reset( scene ); - - assert(lp_scene_is_empty(scene)); - - lp_scene_enqueue( rast->empty_scenes, scene ); - rast->curr_scene = NULL; -} /** @@ -620,7 +517,7 @@ rasterize_bin(struct lp_rasterizer_task *task, struct cmd_block *block; unsigned k; - lp_rast_start_tile( task, x, y ); + lp_rast_start_tile( task, x * TILE_SIZE, y * TILE_SIZE ); /* simply execute each of the commands in the block list */ for (block = commands->head; block; block = block->next) { @@ -629,7 +526,14 @@ rasterize_bin(struct lp_rasterizer_task *task, } } - lp_rast_end_tile( task ); + /* Write the rasterizer's tiles to the framebuffer. + */ + if (task->rast->state.write_color) + lp_rast_store_color(task); + + /* Free data for this bin. + */ + lp_scene_bin_reset( task->rast->curr_scene, x, y); } @@ -714,8 +618,7 @@ is_empty_bin( const struct cmd_bin *bin ) */ static void rasterize_scene(struct lp_rasterizer_task *task, - struct lp_scene *scene, - bool write_depth) + struct lp_scene *scene) { /* loop over scene bins, rasterize each */ #if 0 @@ -724,7 +627,7 @@ rasterize_scene(struct lp_rasterizer_task *task, for (i = 0; i < scene->tiles_x; i++) { for (j = 0; j < scene->tiles_y; j++) { struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); - rasterize_bin(task, bin, i * TILE_SIZE, j * TILE_SIZE); + rasterize_bin(task, bin, i, j); } } } @@ -736,7 +639,7 @@ rasterize_scene(struct lp_rasterizer_task *task, assert(scene); while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) { if (!is_empty_bin( bin )) - rasterize_bin(task, bin, x * TILE_SIZE, y * TILE_SIZE); + rasterize_bin(task, bin, x, y); } } #endif @@ -747,44 +650,20 @@ rasterize_scene(struct lp_rasterizer_task *task, * Called by setup module when it has something for us to render. */ void -lp_rasterize_scene( struct lp_rasterizer *rast, - struct lp_scene *scene, - const struct pipe_framebuffer_state *fb, - bool write_depth ) +lp_rast_queue_scene( struct lp_rasterizer *rast, + struct lp_scene *scene) { - boolean debug = false; - LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - if (debug) { - unsigned x, y; - debug_printf("rasterize scene:\n"); - debug_printf(" data size: %u\n", lp_scene_data_size(scene)); - for (y = 0; y < scene->tiles_y; y++) { - for (x = 0; x < scene->tiles_x; x++) { - debug_printf(" bin %u, %u size: %u\n", x, y, - lp_scene_bin_size(scene, x, y)); - } - } - } - - /* save framebuffer state in the bin */ - util_copy_framebuffer_state(&scene->fb, fb); - scene->write_depth = write_depth; - if (rast->num_threads == 0) { /* no threading */ - lp_rast_begin( rast, fb, - fb->nr_cbufs != 0, /* always write color if cbufs present */ - fb->zsbuf != NULL && write_depth ); + lp_rast_begin( rast, scene ); - lp_scene_bin_iter_begin( scene ); - rasterize_scene( &rast->tasks[0], scene, write_depth ); + rasterize_scene( &rast->tasks[0], scene ); - release_scene( rast, scene ); - - lp_rast_end( rast ); + lp_scene_reset( scene ); + rast->curr_scene = NULL; } else { /* threaded rendering! */ @@ -796,14 +675,26 @@ lp_rasterize_scene( struct lp_rasterizer *rast, for (i = 0; i < rast->num_threads; i++) { pipe_semaphore_signal(&rast->tasks[i].work_ready); } + } + + LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); +} + + +void +lp_rast_finish( struct lp_rasterizer *rast ) +{ + if (rast->num_threads == 0) { + /* nothing to do */ + } + else { + int i; /* wait for work to complete */ for (i = 0; i < rast->num_threads; i++) { pipe_semaphore_wait(&rast->tasks[i].work_done); } } - - LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); } @@ -834,19 +725,8 @@ static PIPE_THREAD_ROUTINE( thread_func, init_data ) * - get next scene to rasterize * - map the framebuffer surfaces */ - const struct pipe_framebuffer_state *fb; - boolean write_depth; - - rast->curr_scene = lp_scene_dequeue( rast->full_scenes, TRUE ); - - lp_scene_bin_iter_begin( rast->curr_scene ); - - fb = &rast->curr_scene->fb; - write_depth = rast->curr_scene->write_depth; - - lp_rast_begin( rast, fb, - fb->nr_cbufs != 0, - fb->zsbuf != NULL && write_depth ); + lp_rast_begin( rast, + lp_scene_dequeue( rast->full_scenes, TRUE ) ); } /* Wait for all threads to get here so that threads[1+] don't @@ -857,25 +737,23 @@ static PIPE_THREAD_ROUTINE( thread_func, init_data ) /* do work */ if (debug) debug_printf("thread %d doing work\n", task->thread_index); + rasterize_scene(task, - rast->curr_scene, - rast->curr_scene->write_depth); + rast->curr_scene); /* wait for all threads to finish with this scene */ pipe_barrier_wait( &rast->barrier ); + /* XXX: shouldn't be necessary: + */ if (task->thread_index == 0) { - /* thread[0]: - * - release the scene object - * - unmap the framebuffer surfaces - */ - release_scene( rast, rast->curr_scene ); lp_rast_end( rast ); } /* signal done with work */ if (debug) debug_printf("thread %d done working\n", task->thread_index); + pipe_semaphore_signal(&task->work_done); } @@ -918,7 +796,7 @@ create_rast_threads(struct lp_rasterizer *rast) * processing them. */ struct lp_rasterizer * -lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty ) +lp_rast_create( void ) { struct lp_rasterizer *rast; unsigned i, cbuf; @@ -927,9 +805,6 @@ lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty ) if(!rast) return NULL; - rast->screen = screen; - - rast->empty_scenes = empty; rast->full_scenes = lp_scene_queue_create(); for (i = 0; i < Elements(rast->tasks); i++) { @@ -957,8 +832,6 @@ void lp_rast_destroy( struct lp_rasterizer *rast ) { unsigned i, cbuf; - util_unreference_framebuffer_state(&rast->state.fb); - for (i = 0; i < Elements(rast->tasks); i++) { for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ ) align_free(rast->tasks[i].tile.color[cbuf]); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 1ed2700191..d91c45eb2b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -43,10 +43,8 @@ struct lp_rasterizer; struct lp_scene; -struct lp_scene_queue; struct lp_fence; struct cmd_bin; -struct pipe_screen; /** For sub-pixel positioning */ #define FIXED_ORDER 4 @@ -129,18 +127,21 @@ struct lp_rast_triangle { -struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen, - struct lp_scene_queue *empty ); +struct lp_rasterizer * +lp_rast_create( void ); -void lp_rast_destroy( struct lp_rasterizer * ); +void +lp_rast_destroy( struct lp_rasterizer * ); -unsigned lp_rast_get_num_threads( struct lp_rasterizer * ); +unsigned +lp_rast_get_num_threads( struct lp_rasterizer * ); -void lp_rasterize_scene( struct lp_rasterizer *rast, - struct lp_scene *scene, - const struct pipe_framebuffer_state *fb, - bool write_depth ); +void +lp_rast_queue_scene( struct lp_rasterizer *rast, + struct lp_scene *scene ); +void +lp_rast_finish( struct lp_rasterizer *rast ); union lp_rast_cmd_arg { diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index abc5a9ad89..39bf2c2587 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -38,8 +38,6 @@ #define MAX_THREADS 8 /* XXX probably temporary here */ -struct pipe_transfer; -struct pipe_screen; struct lp_rasterizer; @@ -82,20 +80,26 @@ struct lp_rasterizer_task */ struct lp_rasterizer { - boolean clipped_tile; - boolean check_for_clipped_tiles; boolean exit_flag; /* Framebuffer stuff */ - struct pipe_screen *screen; - struct pipe_transfer *cbuf_transfer[PIPE_MAX_COLOR_BUFS]; - struct pipe_transfer *zsbuf_transfer; - void *cbuf_map[PIPE_MAX_COLOR_BUFS]; - uint8_t *zsbuf_map; + struct { + void *map; + unsigned stride; + unsigned width; + unsigned height; + enum pipe_format format; + } cbuf[PIPE_MAX_COLOR_BUFS]; + + struct { + uint8_t *map; + unsigned stride; + unsigned blocksize; + } zsbuf; struct { - struct pipe_framebuffer_state fb; + unsigned nr_cbufs; boolean write_color; boolean write_zstencil; unsigned clear_color; @@ -105,7 +109,14 @@ struct lp_rasterizer /** The incoming queue of scenes ready to rasterize */ struct lp_scene_queue *full_scenes; - /** The outgoing queue of processed scenes to return to setup modulee */ + + /** + * The outgoing queue of processed scenes to return to setup module + * + * XXX: while scenes are per-context but the rasterizer is + * (potentially) shared, these empty scenes should be returned to + * the context which created them rather than retained here. + */ struct lp_scene_queue *empty_scenes; /** The scene currently being rasterized by the threads */ @@ -137,17 +148,18 @@ lp_rast_depth_pointer( struct lp_rasterizer *rast, unsigned x, unsigned y ) { void * depth; + assert((x % TILE_VECTOR_WIDTH) == 0); assert((y % TILE_VECTOR_HEIGHT) == 0); - if(!rast->zsbuf_map) + + if (!rast->zsbuf.map) return NULL; - assert(rast->zsbuf_transfer); - depth = rast->zsbuf_map + - y*rast->zsbuf_transfer->stride + - TILE_VECTOR_HEIGHT*x*util_format_get_blocksize(rast->zsbuf_transfer->texture->format); -#ifdef DEBUG + + depth = (rast->zsbuf.map + + rast->zsbuf.stride * y + + rast->zsbuf.blocksize * x * TILE_VECTOR_HEIGHT); + assert(lp_check_alignment(depth, 16)); -#endif return depth; } @@ -175,7 +187,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, block_offset = (iy / 4) * (16 * 16) + (ix / 4) * 16; /* color buffer */ - for (i = 0; i < rast->state.fb.nr_cbufs; i++) + for (i = 0; i < rast->state.nr_cbufs; i++) color[i] = tile->color[i] + 4 * block_offset; depth = lp_rast_depth_pointer(rast, x, y); diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index cba0e21298..72492c0f0c 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -29,44 +29,67 @@ #include "util/u_memory.h" #include "util/u_inlines.h" #include "util/u_simple_list.h" +#include "util/u_surface.h" #include "lp_scene.h" +#include "lp_scene_queue.h" +#include "lp_debug.h" struct lp_scene * -lp_scene_create(void) +lp_scene_create( struct pipe_context *pipe, + struct lp_scene_queue *queue ) { + unsigned i, j; struct lp_scene *scene = CALLOC_STRUCT(lp_scene); - if (scene) - lp_scene_init(scene); + if (!scene) + return NULL; + + scene->pipe = pipe; + scene->empty_queue = queue; + + for (i = 0; i < TILES_X; i++) { + for (j = 0; j < TILES_Y; j++) { + struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); + bin->commands.head = bin->commands.tail = CALLOC_STRUCT(cmd_block); + } + } + + scene->data.head = + scene->data.tail = CALLOC_STRUCT(data_block); + + make_empty_list(&scene->textures); + + pipe_mutex_init(scene->mutex); + return scene; } +/** + * Free all data associated with the given scene, and free(scene). + */ void lp_scene_destroy(struct lp_scene *scene) { - lp_scene_reset(scene); - lp_scene_free_bin_data(scene); - FREE(scene); -} + unsigned i, j; + lp_scene_reset(scene); -void -lp_scene_init(struct lp_scene *scene) -{ - unsigned i, j; for (i = 0; i < TILES_X; i++) for (j = 0; j < TILES_Y; j++) { struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); - bin->commands.head = bin->commands.tail = CALLOC_STRUCT(cmd_block); + assert(bin->commands.head == bin->commands.tail); + FREE(bin->commands.head); + bin->commands.head = NULL; + bin->commands.tail = NULL; } - scene->data.head = - scene->data.tail = CALLOC_STRUCT(data_block); + FREE(scene->data.head); + scene->data.head = NULL; - make_empty_list(&scene->textures); + pipe_mutex_destroy(scene->mutex); - pipe_mutex_init(scene->mutex); + FREE(scene); } @@ -92,6 +115,9 @@ lp_scene_is_empty(struct lp_scene *scene ) } +/* Free data for one particular bin. May be called from the + * rasterizer thread(s). + */ void lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y) { @@ -115,7 +141,8 @@ lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y) /** - * Set scene to empty state. + * Free all the temporary data in a scene. May be called from the + * rasterizer thread(s). */ void lp_scene_reset(struct lp_scene *scene ) @@ -162,40 +189,8 @@ lp_scene_reset(struct lp_scene *scene ) } -/** - * Free all data associated with the given bin, but don't free(scene). - */ -void -lp_scene_free_bin_data(struct lp_scene *scene) -{ - unsigned i, j; - - for (i = 0; i < TILES_X; i++) - for (j = 0; j < TILES_Y; j++) { - struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); - /* lp_reset_scene() should have been already called */ - assert(bin->commands.head == bin->commands.tail); - FREE(bin->commands.head); - bin->commands.head = NULL; - bin->commands.tail = NULL; - } - - FREE(scene->data.head); - scene->data.head = NULL; - - pipe_mutex_destroy(scene->mutex); -} - -void -lp_scene_set_framebuffer_size( struct lp_scene *scene, - unsigned width, unsigned height ) -{ - assert(lp_scene_is_empty(scene)); - scene->tiles_x = align(width, TILE_SIZE) / TILE_SIZE; - scene->tiles_y = align(height, TILE_SIZE) / TILE_SIZE; -} void @@ -393,3 +388,158 @@ end: pipe_mutex_unlock(scene->mutex); return bin; } + + +/** + * Prepare this scene for the rasterizer. + * Map the framebuffer surfaces. Initialize the 'rast' state. + */ +static boolean +lp_scene_map_buffers( struct lp_scene *scene ) +{ + struct pipe_screen *screen = scene->pipe->screen; + struct pipe_surface *cbuf, *zsbuf; + int i; + + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + + + /* Map all color buffers + */ + for (i = 0; i < scene->fb.nr_cbufs; i++) { + cbuf = scene->fb.cbufs[i]; + if (cbuf) { + scene->cbuf_transfer[i] = screen->get_tex_transfer(screen, + cbuf->texture, + cbuf->face, + cbuf->level, + cbuf->zslice, + PIPE_TRANSFER_READ_WRITE, + 0, 0, + cbuf->width, + cbuf->height); + if (!scene->cbuf_transfer[i]) + goto fail; + + scene->cbuf_map[i] = screen->transfer_map(screen, + scene->cbuf_transfer[i]); + if (!scene->cbuf_map[i]) + goto fail; + } + } + + /* Map the zsbuffer + */ + zsbuf = scene->fb.zsbuf; + if (zsbuf) { + scene->zsbuf_transfer = screen->get_tex_transfer(screen, + zsbuf->texture, + zsbuf->face, + zsbuf->level, + zsbuf->zslice, + PIPE_TRANSFER_READ_WRITE, + 0, 0, + zsbuf->width, + zsbuf->height); + if (!scene->zsbuf_transfer) + goto fail; + + scene->zsbuf_map = screen->transfer_map(screen, + scene->zsbuf_transfer); + if (!scene->zsbuf_map) + goto fail; + } + + return TRUE; + +fail: + /* Unmap and release transfers? + */ + return FALSE; +} + + + +/** + * Called after rasterizer as finished rasterizing a scene. + * + * We want to call this from the pipe_context's current thread to + * avoid having to have mutexes on the transfer functions. + */ +static void +lp_scene_unmap_buffers( struct lp_scene *scene ) +{ + struct pipe_screen *screen = scene->pipe->screen; + unsigned i; + + for (i = 0; i < scene->fb.nr_cbufs; i++) { + if (scene->cbuf_map[i]) + screen->transfer_unmap(screen, scene->cbuf_transfer[i]); + + if (scene->cbuf_transfer[i]) + screen->tex_transfer_destroy(scene->cbuf_transfer[i]); + + scene->cbuf_transfer[i] = NULL; + scene->cbuf_map[i] = NULL; + } + + if (scene->zsbuf_map) + screen->transfer_unmap(screen, scene->zsbuf_transfer); + + if (scene->zsbuf_transfer) + screen->tex_transfer_destroy(scene->zsbuf_transfer); + + scene->zsbuf_transfer = NULL; + scene->zsbuf_map = NULL; + + util_unreference_framebuffer_state( &scene->fb ); +} + + +void lp_scene_begin_binning( struct lp_scene *scene, + struct pipe_framebuffer_state *fb ) +{ + assert(lp_scene_is_empty(scene)); + + util_copy_framebuffer_state(&scene->fb, fb); + + scene->tiles_x = align(fb->width, TILE_SIZE) / TILE_SIZE; + scene->tiles_y = align(fb->height, TILE_SIZE) / TILE_SIZE; +} + + +void lp_scene_rasterize( struct lp_scene *scene, + struct lp_rasterizer *rast, + boolean write_depth ) +{ + if (0) { + unsigned x, y; + debug_printf("rasterize scene:\n"); + debug_printf(" data size: %u\n", lp_scene_data_size(scene)); + for (y = 0; y < scene->tiles_y; y++) { + for (x = 0; x < scene->tiles_x; x++) { + debug_printf(" bin %u, %u size: %u\n", x, y, + lp_scene_bin_size(scene, x, y)); + } + } + } + + + scene->write_depth = (scene->fb.zsbuf != NULL && + write_depth); + + lp_scene_map_buffers( scene ); + + /* Enqueue the scene for rasterization, then immediately wait for + * it to finish. + */ + lp_rast_queue_scene( rast, scene ); + + /* Currently just wait for the rasterizer to finish. Some + * threading interactions need to be worked out, particularly once + * transfers become per-context: + */ + lp_rast_finish( rast ); + lp_scene_unmap_buffers( scene ); + lp_scene_enqueue( scene->empty_queue, scene ); +} diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index 8d725cd437..739ac22908 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -39,6 +39,7 @@ #include "lp_tile_soa.h" #include "lp_rast.h" +struct lp_scene_queue; /* We're limited to 2K by 2K for 32bit fixed point rasterization. * Will need a 64-bit version for larger framebuffers. @@ -112,8 +113,14 @@ struct texture_ref { * scenes: */ struct lp_scene { - struct cmd_bin tile[TILES_X][TILES_Y]; - struct data_block_list data; + struct pipe_context *pipe; + struct pipe_transfer *cbuf_transfer[PIPE_MAX_COLOR_BUFS]; + struct pipe_transfer *zsbuf_transfer; + + /* Scene's buffers are mapped at the time the scene is enqueued: + */ + void *cbuf_map[PIPE_MAX_COLOR_BUFS]; + uint8_t *zsbuf_map; /** the framebuffer to render the scene into */ struct pipe_framebuffer_state fb; @@ -131,25 +138,28 @@ struct lp_scene { int curr_x, curr_y; /**< for iterating over bins */ pipe_mutex mutex; + + /* Where to place this scene once it has been rasterized: + */ + struct lp_scene_queue *empty_queue; + + struct cmd_bin tile[TILES_X][TILES_Y]; + struct data_block_list data; }; -struct lp_scene *lp_scene_create(void); +struct lp_scene *lp_scene_create(struct pipe_context *pipe, + struct lp_scene_queue *empty_queue); void lp_scene_destroy(struct lp_scene *scene); -void lp_scene_init(struct lp_scene *scene); boolean lp_scene_is_empty(struct lp_scene *scene ); void lp_scene_reset(struct lp_scene *scene ); -void lp_scene_free_bin_data(struct lp_scene *scene); - -void lp_scene_set_framebuffer_size( struct lp_scene *scene, - unsigned width, unsigned height ); void lp_bin_new_data_block( struct data_block_list *list ); @@ -296,5 +306,13 @@ lp_scene_bin_iter_begin( struct lp_scene *scene ); struct cmd_bin * lp_scene_bin_iter_next( struct lp_scene *scene, int *bin_x, int *bin_y ); +void +lp_scene_rasterize( struct lp_scene *scene, + struct lp_rasterizer *rast, + boolean write_depth ); + +void +lp_scene_begin_binning( struct lp_scene *scene, + struct pipe_framebuffer_state *fb ); #endif /* LP_BIN_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 3aec9de373..aebed85fbb 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -66,11 +66,8 @@ lp_setup_get_current_scene(struct setup_context *setup) assert(lp_scene_is_empty(setup->scene)); - if(0)lp_scene_reset( setup->scene ); /* XXX temporary? */ - - lp_scene_set_framebuffer_size(setup->scene, - setup->fb.width, - setup->fb.height); + lp_scene_begin_binning(setup->scene, + &setup->fb ); } return setup->scene; } @@ -135,13 +132,12 @@ static void reset_context( struct setup_context *setup ) /** Rasterize all scene's bins */ static void lp_setup_rasterize_scene( struct setup_context *setup, - boolean write_depth ) + boolean write_depth ) { struct lp_scene *scene = lp_setup_get_current_scene(setup); - lp_rasterize_scene(setup->rast, - scene, - &setup->fb, + lp_scene_rasterize(scene, + setup->rast, write_depth); reset_context( setup ); @@ -246,19 +242,16 @@ void lp_setup_bind_framebuffer( struct setup_context *setup, const struct pipe_framebuffer_state *fb ) { - struct lp_scene *scene = lp_setup_get_current_scene(setup); - LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + /* Flush any old scene. + */ set_scene_state( setup, SETUP_FLUSHED ); - /* re-get scene pointer, may have a new scene after flushing */ - (void) scene; - scene = lp_setup_get_current_scene(setup); - + /* Set new state. This will be picked up later when we next need a + * scene. + */ util_copy_framebuffer_state(&setup->fb, fb); - - lp_scene_set_framebuffer_size(scene, setup->fb.width, setup->fb.height); } @@ -683,7 +676,7 @@ lp_setup_destroy( struct setup_context *setup ) * it. */ struct setup_context * -lp_setup_create( struct pipe_screen *screen, +lp_setup_create( struct pipe_context *pipe, struct draw_context *draw ) { unsigned i; @@ -698,7 +691,9 @@ lp_setup_create( struct pipe_screen *screen, if (!setup->empty_scenes) goto fail; - setup->rast = lp_rast_create( screen, setup->empty_scenes ); + /* XXX: move this to the screen and share between contexts: + */ + setup->rast = lp_rast_create(); if (!setup->rast) goto fail; @@ -711,7 +706,8 @@ lp_setup_create( struct pipe_screen *screen, /* create some empty scenes */ for (i = 0; i < MAX_SCENES; i++) { - setup->scenes[i] = lp_scene_create(); + setup->scenes[i] = lp_scene_create( pipe, setup->empty_scenes ); + lp_scene_enqueue(setup->empty_scenes, setup->scenes[i]); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 0e155a7dc3..17c112b528 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -62,7 +62,7 @@ struct lp_fragment_shader; struct lp_jit_context; struct setup_context * -lp_setup_create( struct pipe_screen *screen, +lp_setup_create( struct pipe_context *pipe, struct draw_context *draw ); void diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index ca3d6aca7f..ea259aadf3 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -101,7 +101,6 @@ struct nv30_blend_state { struct nv30_state { unsigned scissor_enabled; unsigned stipple_enabled; - unsigned viewport_bypass; unsigned fp_samplers; uint64_t dirty; diff --git a/src/gallium/drivers/nv30/nv30_state_viewport.c b/src/gallium/drivers/nv30/nv30_state_viewport.c index 2d7781292b..6fccd6b60e 100644 --- a/src/gallium/drivers/nv30/nv30_state_viewport.c +++ b/src/gallium/drivers/nv30/nv30_state_viewport.c @@ -5,55 +5,25 @@ nv30_state_viewport_validate(struct nv30_context *nv30) { struct pipe_viewport_state *vpt = &nv30->viewport; struct nouveau_stateobj *so; - unsigned bypass; - - if (/*nv30->render_mode == HW &&*/ - !nv30->rasterizer->pipe.bypass_vs_clip_and_viewport) - bypass = 0; - else - bypass = 1; if (nv30->state.hw[NV30_STATE_VIEWPORT] && - (bypass || !(nv30->dirty & NV30_NEW_VIEWPORT)) && - nv30->state.viewport_bypass == bypass) + !(nv30->dirty & NV30_NEW_VIEWPORT)) return FALSE; - nv30->state.viewport_bypass = bypass; so = so_new(3, 10, 0); - if (!bypass) { - so_method(so, nv30->screen->rankine, - NV34TCL_VIEWPORT_TRANSLATE_X, 8); - so_data (so, fui(vpt->translate[0])); - so_data (so, fui(vpt->translate[1])); - so_data (so, fui(vpt->translate[2])); - so_data (so, fui(vpt->translate[3])); - so_data (so, fui(vpt->scale[0])); - so_data (so, fui(vpt->scale[1])); - so_data (so, fui(vpt->scale[2])); - so_data (so, fui(vpt->scale[3])); -/* so_method(so, nv30->screen->rankine, 0x1d78, 1); - so_data (so, 1); -*/ } else { - so_method(so, nv30->screen->rankine, - NV34TCL_VIEWPORT_TRANSLATE_X, 8); - so_data (so, fui(0.0)); - so_data (so, fui(0.0)); - so_data (so, fui(0.0)); - so_data (so, fui(0.0)); - so_data (so, fui(1.0)); - so_data (so, fui(1.0)); - so_data (so, fui(1.0)); - so_data (so, fui(0.0)); - /* Not entirely certain what this is yet. The DDX uses this - * value also as it fixes rendering when you pass - * pre-transformed vertices to the GPU. My best gusss is that - * this bypasses some culling/clipping stage. Might be worth - * noting that points/lines are uneffected by whatever this - * value fixes, only filled polygons are effected. - */ -/* so_method(so, nv30->screen->rankine, 0x1d78, 1); - so_data (so, 0x110); -*/ } + so_method(so, nv30->screen->rankine, + NV34TCL_VIEWPORT_TRANSLATE_X, 8); + so_data (so, fui(vpt->translate[0])); + so_data (so, fui(vpt->translate[1])); + so_data (so, fui(vpt->translate[2])); + so_data (so, fui(vpt->translate[3])); + so_data (so, fui(vpt->scale[0])); + so_data (so, fui(vpt->scale[1])); + so_data (so, fui(vpt->scale[2])); + so_data (so, fui(vpt->scale[3])); +/* so_method(so, nv30->screen->rankine, 0x1d78, 1); + so_data (so, 1); +*/ /* TODO/FIXME: never saw value 0x0110 in renouveau dumps, only 0x0001 */ so_method(so, nv30->screen->rankine, 0x1d78, 1); so_data (so, 1); diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 4861924dac..97fb6a2ef9 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -101,7 +101,6 @@ struct nv40_blend_state { struct nv40_state { unsigned scissor_enabled; unsigned stipple_enabled; - unsigned viewport_bypass; unsigned fp_samplers; uint64_t dirty; diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c index 9919ba1d0b..3aacb00f99 100644 --- a/src/gallium/drivers/nv40/nv40_state_viewport.c +++ b/src/gallium/drivers/nv40/nv40_state_viewport.c @@ -5,55 +5,24 @@ nv40_state_viewport_validate(struct nv40_context *nv40) { struct pipe_viewport_state *vpt = &nv40->viewport; struct nouveau_stateobj *so; - unsigned bypass; - - if (nv40->render_mode == HW && - !nv40->rasterizer->pipe.bypass_vs_clip_and_viewport) - bypass = 0; - else - bypass = 1; if (nv40->state.hw[NV40_STATE_VIEWPORT] && - (bypass || !(nv40->dirty & NV40_NEW_VIEWPORT)) && - nv40->state.viewport_bypass == bypass) + !(nv40->dirty & NV40_NEW_VIEWPORT)) return FALSE; - nv40->state.viewport_bypass = bypass; so = so_new(2, 9, 0); - if (!bypass) { - so_method(so, nv40->screen->curie, - NV40TCL_VIEWPORT_TRANSLATE_X, 8); - so_data (so, fui(vpt->translate[0])); - so_data (so, fui(vpt->translate[1])); - so_data (so, fui(vpt->translate[2])); - so_data (so, fui(vpt->translate[3])); - so_data (so, fui(vpt->scale[0])); - so_data (so, fui(vpt->scale[1])); - so_data (so, fui(vpt->scale[2])); - so_data (so, fui(vpt->scale[3])); - so_method(so, nv40->screen->curie, 0x1d78, 1); - so_data (so, 1); - } else { - so_method(so, nv40->screen->curie, - NV40TCL_VIEWPORT_TRANSLATE_X, 8); - so_data (so, fui(0.0)); - so_data (so, fui(0.0)); - so_data (so, fui(0.0)); - so_data (so, fui(0.0)); - so_data (so, fui(1.0)); - so_data (so, fui(1.0)); - so_data (so, fui(1.0)); - so_data (so, fui(0.0)); - /* Not entirely certain what this is yet. The DDX uses this - * value also as it fixes rendering when you pass - * pre-transformed vertices to the GPU. My best gusss is that - * this bypasses some culling/clipping stage. Might be worth - * noting that points/lines are uneffected by whatever this - * value fixes, only filled polygons are effected. - */ - so_method(so, nv40->screen->curie, 0x1d78, 1); - so_data (so, 0x110); - } + so_method(so, nv40->screen->curie, + NV40TCL_VIEWPORT_TRANSLATE_X, 8); + so_data (so, fui(vpt->translate[0])); + so_data (so, fui(vpt->translate[1])); + so_data (so, fui(vpt->translate[2])); + so_data (so, fui(vpt->translate[3])); + so_data (so, fui(vpt->scale[0])); + so_data (so, fui(vpt->scale[1])); + so_data (so, fui(vpt->scale[2])); + so_data (so, fui(vpt->scale[3])); + so_method(so, nv40->screen->curie, 0x1d78, 1); + so_data (so, 1); so_ref(so, &nv40->state.hw[NV40_STATE_VIEWPORT]); so_ref(NULL, &so); diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index b4de3e2ba5..c540594b94 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -127,7 +127,6 @@ struct nv50_state { struct nouveau_stateobj *scissor; unsigned scissor_enabled; struct nouveau_stateobj *viewport; - unsigned viewport_bypass; struct nouveau_stateobj *tsc_upload; struct nouveau_stateobj *tic_upload; unsigned miptree_nr[PIPE_SHADER_TYPES]; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index efab94cab7..a91b31015e 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -375,50 +375,32 @@ nv50_state_validate(struct nv50_context *nv50) scissor_uptodate: if (nv50->dirty & (NV50_NEW_VIEWPORT | NV50_NEW_RASTERIZER)) { - unsigned bypass; - - if (!nv50->rasterizer->pipe.bypass_vs_clip_and_viewport) - bypass = 0; - else - bypass = 1; - if (nv50->state.viewport && - (bypass || !(nv50->dirty & NV50_NEW_VIEWPORT)) && - nv50->state.viewport_bypass == bypass) + !(nv50->dirty & NV50_NEW_VIEWPORT)) goto viewport_uptodate; - nv50->state.viewport_bypass = bypass; so = so_new(5, 9, 0); - if (!bypass) { - so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3); - so_data (so, fui(nv50->viewport.translate[0])); - so_data (so, fui(nv50->viewport.translate[1])); - so_data (so, fui(nv50->viewport.translate[2])); - so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3); - so_data (so, fui(nv50->viewport.scale[0])); - so_data (so, fui(nv50->viewport.scale[1])); - so_data (so, fui(nv50->viewport.scale[2])); - - so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1); - so_data (so, 1); - /* 0x0000 = remove whole primitive only (xyz) - * 0x1018 = remove whole primitive only (xy), clamp z - * 0x1080 = clip primitive (xyz) - * 0x1098 = clip primitive (xy), clamp z - */ - so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1); - so_data (so, 0x1080); - /* no idea what 0f90 does */ - so_method(so, tesla, 0x0f90, 1); - so_data (so, 0); - } else { - so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1); - so_data (so, 0); - so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1); - so_data (so, 0x0000); - so_method(so, tesla, 0x0f90, 1); - so_data (so, 1); - } + so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3); + so_data (so, fui(nv50->viewport.translate[0])); + so_data (so, fui(nv50->viewport.translate[1])); + so_data (so, fui(nv50->viewport.translate[2])); + so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3); + so_data (so, fui(nv50->viewport.scale[0])); + so_data (so, fui(nv50->viewport.scale[1])); + so_data (so, fui(nv50->viewport.scale[2])); + + so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1); + so_data (so, 1); + /* 0x0000 = remove whole primitive only (xyz) + * 0x1018 = remove whole primitive only (xy), clamp z + * 0x1080 = clip primitive (xyz) + * 0x1098 = clip primitive (xy), clamp z + */ + so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1); + so_data (so, 0x1080); + /* no idea what 0f90 does */ + so_method(so, tesla, 0x0f90, 1); + so_data (so, 0); so_ref(so, &nv50->state.viewport); so_ref(NULL, &so); diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 0e2d6c5b42..513cc0f5d4 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -34,6 +34,8 @@ static void r300_blitter_save_states(struct r300_context* r300) util_blitter_save_rasterizer(r300->blitter, r300->rs_state.state); util_blitter_save_fragment_shader(r300->blitter, r300->fs); util_blitter_save_vertex_shader(r300->blitter, r300->vs_state.state); + util_blitter_save_viewport(r300->blitter, &r300->viewport); + util_blitter_save_clip(r300->blitter, &r300->clip); } /* Clear currently bound buffers. */ diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index b98fe347b8..0d1518a05b 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -356,12 +356,14 @@ struct r300_context { struct pipe_stencil_ref stencil_ref; + struct pipe_clip_state clip; + + struct pipe_viewport_state viewport; + /* Bitmask of dirty state objects. */ uint32_t dirty_state; /* Flag indicating whether or not the HW is dirty. */ uint32_t dirty_hw; - /* Whether the TCL engine should be in bypass mode. */ - boolean tcl_bypass; /* Whether polygon offset is enabled. */ boolean polygon_offset_enabled; /* Z buffer bit depth. */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 17d55ba3b4..addb28bded 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -175,23 +175,15 @@ static const float * get_shader_constant( break; case RC_STATE_R300_VIEWPORT_SCALE: - if (r300->tcl_bypass) { - vec[0] = 1; - vec[1] = 1; - vec[2] = 1; - } else { - vec[0] = viewport->xscale; - vec[1] = viewport->yscale; - vec[2] = viewport->zscale; - } + vec[0] = viewport->xscale; + vec[1] = viewport->yscale; + vec[2] = viewport->zscale; break; case RC_STATE_R300_VIEWPORT_OFFSET: - if (!r300->tcl_bypass) { - vec[0] = viewport->xoffset; - vec[1] = viewport->yoffset; - vec[2] = viewport->zoffset; - } + vec[0] = viewport->xoffset; + vec[1] = viewport->yoffset; + vec[2] = viewport->zoffset; break; default: @@ -946,22 +938,16 @@ void r300_emit_viewport_state(struct r300_context* r300, struct r300_viewport_state* viewport = (struct r300_viewport_state*)state; CS_LOCALS(r300); - if (r300->tcl_bypass) { - BEGIN_CS(2); /* XXX tcl_bypass will be removed in gallium anyway */ - OUT_CS_REG(R300_VAP_VTE_CNTL, 0); - END_CS; - } else { - BEGIN_CS(size); - OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); - OUT_CS_32F(viewport->xscale); - OUT_CS_32F(viewport->xoffset); - OUT_CS_32F(viewport->yscale); - OUT_CS_32F(viewport->yoffset); - OUT_CS_32F(viewport->zscale); - OUT_CS_32F(viewport->zoffset); - OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); - END_CS; - } + BEGIN_CS(size); + OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); + OUT_CS_32F(viewport->xscale); + OUT_CS_32F(viewport->xoffset); + OUT_CS_32F(viewport->yscale); + OUT_CS_32F(viewport->yoffset); + OUT_CS_32F(viewport->zscale); + OUT_CS_32F(viewport->zoffset); + OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); + END_CS; } void r300_emit_ztop_state(struct r300_context* r300, diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 6c23045561..a983ec7b5b 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -376,6 +376,8 @@ static void r300_set_clip_state(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); + r300->clip = *state; + if (r300_screen(pipe->screen)->caps->has_tcl) { memcpy(r300->clip_state.state, state, sizeof(struct pipe_clip_state)); r300->clip_state.size = 29; @@ -714,8 +716,7 @@ static void* r300_create_rs_state(struct pipe_context* pipe, /* If bypassing TCL, or if no TCL engine is present, turn off the HW TCL. * Else, enable HW TCL and force Draw's TCL off. */ - if (state->bypass_vs_clip_and_viewport || - !r300screen->caps->has_tcl) { + if (!r300screen->caps->has_tcl) { rs->vap_control_status |= R300_VAP_TCL_BYPASS; } @@ -811,11 +812,9 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) } if (rs) { - r300->tcl_bypass = rs->rs.bypass_vs_clip_and_viewport; r300->polygon_offset_enabled = rs->rs.offset_cw || rs->rs.offset_ccw; r300->rs_state.dirty = TRUE; } else { - r300->tcl_bypass = FALSE; r300->polygon_offset_enabled = FALSE; } @@ -982,6 +981,8 @@ static void r300_set_viewport_state(struct pipe_context* pipe, struct r300_viewport_state* viewport = (struct r300_viewport_state*)r300->viewport_state.state; + r300->viewport = *state; + /* Do the transform in HW. */ viewport->vte_control = R300_VTX_W0_FMT; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 6eb7f2bfd1..9c8e907fdf 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -119,13 +119,7 @@ static void r300_vertex_psc(struct r300_context* r300) memset(vformat, 0, sizeof(struct r300_vertex_stream_state)); - /* If TCL is bypassed, map vertex streams to equivalent VS output - * locations. */ - if (r300->tcl_bypass) { - stream_tab = vs->stream_loc_notcl; - } else { - stream_tab = identity; - } + stream_tab = identity; /* Vertex shaders have no semantics on their inputs, * so PSC should just route stuff based on the vertex elements, diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c index 272ad40a90..242aaac466 100644 --- a/src/gallium/drivers/softpipe/sp_video_context.c +++ b/src/gallium/drivers/softpipe/sp_video_context.c @@ -176,7 +176,6 @@ init_pipe_state(struct sp_mpeg12_context *ctx) rast.line_stipple_factor = 0; rast.line_stipple_pattern = 0; rast.line_last_pixel = 0; - rast.bypass_vs_clip_and_viewport = 0; rast.line_width = 1; rast.point_smooth = 0; rast.point_quad_rasterization = 0; diff --git a/src/gallium/drivers/svga/svga_state_framebuffer.c b/src/gallium/drivers/svga/svga_state_framebuffer.c index 7b12c66f94..b710914acd 100644 --- a/src/gallium/drivers/svga/svga_state_framebuffer.c +++ b/src/gallium/drivers/svga/svga_state_framebuffer.c @@ -120,174 +120,153 @@ static int emit_viewport( struct svga_context *svga, float fb_width = svga->curr.framebuffer.width; float fb_height = svga->curr.framebuffer.height; - memset( &prescale, 0, sizeof(prescale) ); - - if (svga->curr.rast->templ.bypass_vs_clip_and_viewport) { - - /* Avoid POSITIONT as it has a non trivial implementation outside the D3D - * API. Always generate a vertex shader. - */ - rect.x = 0; - rect.y = 0; - rect.w = svga->curr.framebuffer.width; - rect.h = svga->curr.framebuffer.height; - - prescale.scale[0] = 2.0 / (float)rect.w; - prescale.scale[1] = - 2.0 / (float)rect.h; - prescale.scale[2] = 1.0; - prescale.scale[3] = 1.0; - prescale.translate[0] = -1.0f; - prescale.translate[1] = 1.0f; - prescale.translate[2] = 0; - prescale.translate[3] = 0; - prescale.enabled = TRUE; - } else { - - /* Examine gallium viewport transformation and produce a screen - * rectangle and possibly vertex shader pre-transformation to - * get the same results. - */ - float fx = viewport->scale[0] * -1.0 + viewport->translate[0]; - float fy = flip * viewport->scale[1] * -1.0 + viewport->translate[1]; - float fw = viewport->scale[0] * 2; - float fh = flip * viewport->scale[1] * 2; - - SVGA_DBG(DEBUG_VIEWPORT, - "\ninitial %f,%f %fx%f\n", - fx, - fy, - fw, - fh); - - prescale.scale[0] = 1.0; - prescale.scale[1] = 1.0; - prescale.scale[2] = 1.0; - prescale.scale[3] = 1.0; - prescale.translate[0] = 0; - prescale.translate[1] = 0; - prescale.translate[2] = 0; - prescale.translate[3] = 0; - prescale.enabled = TRUE; - - - - if (fw < 0) { - prescale.scale[0] *= -1.0; - prescale.translate[0] += -fw; - fw = -fw; - fx = viewport->scale[0] * 1.0 + viewport->translate[0]; - } + float fx = viewport->scale[0] * -1.0 + viewport->translate[0]; + float fy = flip * viewport->scale[1] * -1.0 + viewport->translate[1]; + float fw = viewport->scale[0] * 2; + float fh = flip * viewport->scale[1] * 2; - if (fh < 0) { - prescale.scale[1] *= -1.0; - prescale.translate[1] += -fh; - fh = -fh; - fy = flip * viewport->scale[1] * 1.0 + viewport->translate[1]; - } + memset( &prescale, 0, sizeof(prescale) ); - if (fx < 0) { - prescale.translate[0] += fx; - prescale.scale[0] *= fw / (fw + fx); - fw += fx; - fx = 0; - } + /* Examine gallium viewport transformation and produce a screen + * rectangle and possibly vertex shader pre-transformation to + * get the same results. + */ - if (fy < 0) { - prescale.translate[1] += fy; - prescale.scale[1] *= fh / (fh + fy); - fh += fy; - fy = 0; - } + SVGA_DBG(DEBUG_VIEWPORT, + "\ninitial %f,%f %fx%f\n", + fx, + fy, + fw, + fh); + + prescale.scale[0] = 1.0; + prescale.scale[1] = 1.0; + prescale.scale[2] = 1.0; + prescale.scale[3] = 1.0; + prescale.translate[0] = 0; + prescale.translate[1] = 0; + prescale.translate[2] = 0; + prescale.translate[3] = 0; + prescale.enabled = TRUE; + + + + if (fw < 0) { + prescale.scale[0] *= -1.0; + prescale.translate[0] += -fw; + fw = -fw; + fx = viewport->scale[0] * 1.0 + viewport->translate[0]; + } - if (fx + fw > fb_width) { - prescale.scale[0] *= fw / (fb_width - fx); - prescale.translate[0] -= fx * (fw / (fb_width - fx)); - prescale.translate[0] += fx; - fw = fb_width - fx; - - } + if (fh < 0) { + prescale.scale[1] *= -1.0; + prescale.translate[1] += -fh; + fh = -fh; + fy = flip * viewport->scale[1] * 1.0 + viewport->translate[1]; + } - if (fy + fh > fb_height) { - prescale.scale[1] *= fh / (fb_height - fy); - prescale.translate[1] -= fy * (fh / (fb_height - fy)); - prescale.translate[1] += fy; - fh = fb_height - fy; - } + if (fx < 0) { + prescale.translate[0] += fx; + prescale.scale[0] *= fw / (fw + fx); + fw += fx; + fx = 0; + } - if (fw < 0 || fh < 0) { - fw = fh = fx = fy = 0; - degenerate = TRUE; - goto out; - } + if (fy < 0) { + prescale.translate[1] += fy; + prescale.scale[1] *= fh / (fh + fy); + fh += fy; + fy = 0; + } + if (fx + fw > fb_width) { + prescale.scale[0] *= fw / (fb_width - fx); + prescale.translate[0] -= fx * (fw / (fb_width - fx)); + prescale.translate[0] += fx; + fw = fb_width - fx; + + } - /* D3D viewport is integer space. Convert fx,fy,etc. to - * integers. - * - * TODO: adjust pretranslate correct for any subpixel error - * introduced converting to integers. - */ - rect.x = fx; - rect.y = fy; - rect.w = fw; - rect.h = fh; + if (fy + fh > fb_height) { + prescale.scale[1] *= fh / (fb_height - fy); + prescale.translate[1] -= fy * (fh / (fb_height - fy)); + prescale.translate[1] += fy; + fh = fb_height - fy; + } - SVGA_DBG(DEBUG_VIEWPORT, - "viewport error %f,%f %fx%f\n", - fabs((float)rect.x - fx), - fabs((float)rect.y - fy), - fabs((float)rect.w - fw), - fabs((float)rect.h - fh)); + if (fw < 0 || fh < 0) { + fw = fh = fx = fy = 0; + degenerate = TRUE; + goto out; + } - SVGA_DBG(DEBUG_VIEWPORT, - "viewport %d,%d %dx%d\n", - rect.x, - rect.y, - rect.w, - rect.h); - - /* Finally, to get GL rasterization rules, need to tweak the - * screen-space coordinates slightly relative to D3D which is - * what hardware implements natively. - */ - if (svga->curr.rast->templ.gl_rasterization_rules) { - float adjust_x = 0.0; - float adjust_y = 0.0; - - switch (svga->curr.reduced_prim) { - case PIPE_PRIM_LINES: - adjust_x = -0.5; - adjust_y = 0; - break; - case PIPE_PRIM_POINTS: - case PIPE_PRIM_TRIANGLES: - adjust_x = -0.375; - adjust_y = -0.5; - break; - } - - prescale.translate[0] += adjust_x; - prescale.translate[1] += adjust_y; - prescale.translate[2] = 0.5; /* D3D clip space */ - prescale.scale[2] = 0.5; /* D3D clip space */ + /* D3D viewport is integer space. Convert fx,fy,etc. to + * integers. + * + * TODO: adjust pretranslate correct for any subpixel error + * introduced converting to integers. + */ + rect.x = fx; + rect.y = fy; + rect.w = fw; + rect.h = fh; + + SVGA_DBG(DEBUG_VIEWPORT, + "viewport error %f,%f %fx%f\n", + fabs((float)rect.x - fx), + fabs((float)rect.y - fy), + fabs((float)rect.w - fw), + fabs((float)rect.h - fh)); + + SVGA_DBG(DEBUG_VIEWPORT, + "viewport %d,%d %dx%d\n", + rect.x, + rect.y, + rect.w, + rect.h); + + + /* Finally, to get GL rasterization rules, need to tweak the + * screen-space coordinates slightly relative to D3D which is + * what hardware implements natively. + */ + if (svga->curr.rast->templ.gl_rasterization_rules) { + float adjust_x = 0.0; + float adjust_y = 0.0; + + switch (svga->curr.reduced_prim) { + case PIPE_PRIM_LINES: + adjust_x = -0.5; + adjust_y = 0; + break; + case PIPE_PRIM_POINTS: + case PIPE_PRIM_TRIANGLES: + adjust_x = -0.375; + adjust_y = -0.5; + break; } + prescale.translate[0] += adjust_x; + prescale.translate[1] += adjust_y; + prescale.translate[2] = 0.5; /* D3D clip space */ + prescale.scale[2] = 0.5; /* D3D clip space */ + } - range_min = viewport->scale[2] * -1.0 + viewport->translate[2]; - range_max = viewport->scale[2] * 1.0 + viewport->translate[2]; - /* D3D (and by implication SVGA) doesn't like dealing with zmax - * less than zmin. Detect that case, flip the depth range and - * invert our z-scale factor to achieve the same effect. - */ - if (range_min > range_max) { - float range_tmp; - range_tmp = range_min; - range_min = range_max; - range_max = range_tmp; - prescale.scale[2] = -prescale.scale[2]; - } + range_min = viewport->scale[2] * -1.0 + viewport->translate[2]; + range_max = viewport->scale[2] * 1.0 + viewport->translate[2]; + + /* D3D (and by implication SVGA) doesn't like dealing with zmax + * less than zmin. Detect that case, flip the depth range and + * invert our z-scale factor to achieve the same effect. + */ + if (range_min > range_max) { + float range_tmp; + range_tmp = range_min; + range_min = range_max; + range_max = range_tmp; + prescale.scale[2] = -prescale.scale[2]; } if (prescale.enabled) { diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c index d3644fecad..d774e3e504 100644 --- a/src/gallium/drivers/svga/svga_state_need_swtnl.c +++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c @@ -129,8 +129,7 @@ static int update_need_pipeline( struct svga_context *svga, /* SVGA_NEW_CLIP */ - if (!svga->curr.rast->templ.bypass_vs_clip_and_viewport && - svga->curr.clip.nr) { + if (svga->curr.clip.nr) { SVGA_DBG(DEBUG_SWTNL, "%s: userclip\n", __FUNCTION__); need_pipeline = TRUE; } diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 6da186a655..f97d963dba 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -112,7 +112,6 @@ void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state) trace_dump_member(uint, state, line_stipple_factor); trace_dump_member(uint, state, line_stipple_pattern); trace_dump_member(bool, state, line_last_pixel); - trace_dump_member(bool, state, bypass_vs_clip_and_viewport); trace_dump_member(bool, state, flatshade_first); trace_dump_member(bool, state, gl_rasterization_rules); diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 5ac5c87813..02558520bf 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -114,16 +114,6 @@ struct pipe_rasterizer_state unsigned line_last_pixel:1; /** - * Vertex coordinates are pre-transformed to screen space. Skip - * the vertex shader, clipping and viewport processing. Note that - * a vertex shader is still needed though, to indicate the mapping - * from vertex elements to fragment shader input semantics. - * - * XXX: considered for removal. - */ - unsigned bypass_vs_clip_and_viewport:1; - - /** * Use the first vertex of a primitive as the provoking vertex for * flat shading. */ diff --git a/src/gallium/state_trackers/wgl/stw_device.c b/src/gallium/state_trackers/wgl/stw_device.c index e5fa6ac8eb..472a2a5379 100644 --- a/src/gallium/state_trackers/wgl/stw_device.c +++ b/src/gallium/state_trackers/wgl/stw_device.c @@ -47,7 +47,6 @@ #ifdef WIN32_THREADS extern _glthread_Mutex OneTimeLock; -extern void FreeAllTSD(void); #endif @@ -183,7 +182,8 @@ stw_cleanup(void) #ifdef WIN32_THREADS _glthread_DESTROY_MUTEX(OneTimeLock); - FreeAllTSD(); + + _glapi_destroy_multithread(); #endif #ifdef DEBUG diff --git a/src/glx/glx_pbuffer.c b/src/glx/glx_pbuffer.c index f635138d62..52e067165c 100644 --- a/src/glx/glx_pbuffer.c +++ b/src/glx/glx_pbuffer.c @@ -379,8 +379,8 @@ CreateDrawable(Display * dpy, const __GLcontextModes * fbconfig, req->glxwindow = (GLXWindow) XAllocID(dpy); req->numAttribs = (CARD32) i; - assert(attrib_list); - memcpy(data, attrib_list, 8 * i); + if (attrib_list) + memcpy(data, attrib_list, 8 * i); UnlockDisplay(dpy); SyncHandle(); diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c index 65e16e70c7..42b41f3210 100644 --- a/src/mesa/drivers/dri/i965/gen6_cc.c +++ b/src/mesa/drivers/dri/i965/gen6_cc.c @@ -30,6 +30,7 @@ #include "brw_defines.h" #include "brw_util.h" #include "intel_batchbuffer.h" +#include "main/macros.h" struct brw_blend_state_key { GLboolean color_blend, alpha_enabled; diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 767ad8c9d2..ec379a77ac 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -891,7 +891,7 @@ intelMakeCurrent(__DRIcontext * driContextPriv, intel->driDrawable = driDrawPriv; driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1; driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1; - intel_prepare_render(&intel->ctx); + intel_prepare_render(intel); } else { _mesa_make_current(NULL, NULL, NULL); diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 1d7a94cbb2..1929b7cc12 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -829,11 +829,10 @@ static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer #if MESA_BIG_ENDIAN if (mesa_ind_buf->type == GL_UNSIGNED_INT) - { #else if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) - { #endif + { const GLvoid *src_ptr; GLvoid *dst_ptr; GLboolean mapped_named_bo = GL_FALSE; @@ -872,6 +871,14 @@ static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer } } +static GLboolean check_fallbacks(GLcontext *ctx) +{ + if (ctx->RenderMode != GL_RENDER) + return GL_TRUE; + + return GL_FALSE; +} + static GLboolean r700TryDrawPrims(GLcontext *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prim, @@ -888,6 +895,9 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, if (ctx->NewState) _mesa_update_state( ctx ); + if (check_fallbacks(ctx)) + return GL_FALSE; + _tnl_UpdateFixedFunctionProgram(ctx); r700SetVertexFormat(ctx, arrays, max_index + 1); /* shaders need to be updated before buffers are validated */ @@ -983,8 +993,10 @@ static void r700DrawPrims(GLcontext *ctx, retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); /* If failed run tnl pipeline - it should take care of fallbacks */ - if (!retval) + if (!retval) { + _swsetup_Wakeup(ctx); _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + } } void r700InitDraw(GLcontext *ctx) diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.c b/src/mesa/drivers/dri/radeon/radeon_dma.c index d31e4e47dd..22499bc38d 100644 --- a/src/mesa/drivers/dri/radeon/radeon_dma.c +++ b/src/mesa/drivers/dri/radeon/radeon_dma.c @@ -184,6 +184,8 @@ void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size) radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %d\n", __FUNCTION__, size, rmesa->dma.minimum_size); + if (!is_empty_list(&rmesa->dma.reserved)) + radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo); if (is_empty_list(&rmesa->dma.free) || last_elem(&rmesa->dma.free)->bo->size < size) { @@ -211,7 +213,7 @@ again_alloc: rmesa->dma.current_used = 0; rmesa->dma.current_vertexptr = 0; - + if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs, first_elem(&rmesa->dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0)) @@ -221,6 +223,7 @@ again_alloc: /* Cmd buff have been flushed in radeon_revalidate_bos */ goto again_alloc; } + radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1); } /* Allocates a region from rmesa->dma.current. If there isn't enough @@ -332,6 +335,10 @@ void radeonReleaseDmaRegions(radeonContextPtr rmesa) /* request updated cs processing information from kernel */ legacy_track_pending(rmesa->radeonScreen->bom, 0); } + + if (!is_empty_list(&rmesa->dma.reserved)) + radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo); + /* move waiting bos to free list. wait list provides gpu time to handle data before reuse */ foreach_s(dma_bo, temp, &rmesa->dma.wait) { @@ -349,8 +356,11 @@ void radeonReleaseDmaRegions(radeonContextPtr rmesa) FREE(dma_bo); continue; } - if (!radeon_bo_is_idle(dma_bo->bo)) + if (!radeon_bo_is_idle(dma_bo->bo)) { + if (rmesa->radeonScreen->driScreen->dri2.enabled) + break; continue; + } remove_from_list(dma_bo); dma_bo->expire_counter = expire_at; insert_at_tail(&rmesa->dma.free, dma_bo); @@ -388,7 +398,7 @@ void rcommon_flush_last_swtcl_prim( GLcontext *ctx ) { radeonContextPtr rmesa = RADEON_CONTEXT(ctx); struct radeon_dma *dma = &rmesa->dma; - + if (RADEON_DEBUG & RADEON_IOCTL) fprintf(stderr, "%s\n", __FUNCTION__); dma->flush = NULL; diff --git a/src/mesa/drivers/windows/gdi/mesa.def b/src/mesa/drivers/windows/gdi/mesa.def index baa592f136..b537b3460c 100644 --- a/src/mesa/drivers/windows/gdi/mesa.def +++ b/src/mesa/drivers/windows/gdi/mesa.def @@ -867,7 +867,6 @@ EXPORTS _glapi_get_proc_address _mesa_add_soft_renderbuffers _mesa_add_renderbuffer - _mesa_bzero _mesa_check_conditional_render _mesa_choose_tex_format _mesa_create_framebuffer diff --git a/src/mesa/glapi/gen/gl_x86-64_asm.py b/src/mesa/glapi/gen/gl_x86-64_asm.py index 31c1a2b93a..8ac57ab7eb 100644 --- a/src/mesa/glapi/gen/gl_x86-64_asm.py +++ b/src/mesa/glapi/gen/gl_x86-64_asm.py @@ -138,7 +138,7 @@ class PrintGenericStubs(gl_XML.gl_print_base): print '# define GL_PREFIX(n) GLNAME(CONCAT(gl,n))' print '# endif' print '' - print '#if defined(PTHREADS) || defined(SOLARIS_THREADS) || defined(WIN32_THREADS) || defined(BEOS_THREADS)' + print '#if defined(PTHREADS) || defined(WIN32_THREADS) || defined(BEOS_THREADS)' print '# define THREADS' print '#endif' print '' diff --git a/src/mesa/glapi/gen/gl_x86_asm.py b/src/mesa/glapi/gen/gl_x86_asm.py index d210f3a248..a48724ee61 100644 --- a/src/mesa/glapi/gen/gl_x86_asm.py +++ b/src/mesa/glapi/gen/gl_x86_asm.py @@ -79,7 +79,7 @@ class PrintGenericStubs(gl_XML.gl_print_base): print '#define GLOBL_FN(x) GLOBL x' print '#endif' print '' - print '#if defined(PTHREADS) || defined(SOLARIS_THREADS) || defined(WIN32_THREADS) || defined(BEOS_THREADS)' + print '#if defined(PTHREADS) || defined(WIN32_THREADS) || defined(BEOS_THREADS)' print '# define THREADS' print '#endif' print '' diff --git a/src/mesa/glapi/glapi.c b/src/mesa/glapi/glapi.c index 2fa673d307..13de594aaf 100644 --- a/src/mesa/glapi/glapi.c +++ b/src/mesa/glapi/glapi.c @@ -59,7 +59,6 @@ #endif #include "glapi/glapi.h" -#include "glapi/glapioffsets.h" #include "glapi/glapitable.h" extern _glapi_proc __glapi_noop_table[]; @@ -106,39 +105,23 @@ PUBLIC __thread void * _glapi_tls_Context __attribute__((tls_model("initial-exec"))); PUBLIC const struct _glapi_table *_glapi_Dispatch = NULL; + PUBLIC const void *_glapi_Context = NULL; #else #if defined(THREADS) -#ifdef WIN32_THREADS -/* _glthread_DECLARE_STATIC_MUTEX is broken on windows. There will be race! */ -#define CHECK_MULTITHREAD_LOCK() -#define CHECK_MULTITHREAD_UNLOCK() -#else -_glthread_DECLARE_STATIC_MUTEX(ThreadCheckMutex); -#define CHECK_MULTITHREAD_LOCK() _glthread_LOCK_MUTEX(ThreadCheckMutex) -#define CHECK_MULTITHREAD_UNLOCK() _glthread_UNLOCK_MUTEX(ThreadCheckMutex) -#endif - static GLboolean ThreadSafe = GL_FALSE; /**< In thread-safe mode? */ + _glthread_TSD _gl_DispatchTSD; /**< Per-thread dispatch pointer */ -static _glthread_TSD ContextTSD; /**< Per-thread context pointer */ -#if defined(WIN32_THREADS) -void FreeTSD(_glthread_TSD *p); -void FreeAllTSD(void) -{ - FreeTSD(&_gl_DispatchTSD); - FreeTSD(&ContextTSD); -} -#endif /* defined(WIN32_THREADS) */ +static _glthread_TSD ContextTSD; /**< Per-thread context pointer */ #endif /* defined(THREADS) */ -PUBLIC struct _glapi_table *_glapi_Dispatch = - (struct _glapi_table *) __glapi_noop_table; +PUBLIC struct _glapi_table *_glapi_Dispatch = (struct _glapi_table *) __glapi_noop_table; + PUBLIC void *_glapi_Context = NULL; #endif /* defined(GLX_USE_TLS) */ @@ -146,6 +129,36 @@ PUBLIC void *_glapi_Context = NULL; +#if defined(THREADS) && !defined(GLX_USE_TLS) + +void +_glapi_init_multithread(void) +{ + _glthread_InitTSD(&_gl_DispatchTSD); + _glthread_InitTSD(&ContextTSD); +} + +void +_glapi_destroy_multithread(void) +{ +#ifdef WIN32_THREADS + _glthread_DestroyTSD(&_gl_DispatchTSD); + _glthread_DestroyTSD(&ContextTSD); +#endif +} + +/** + * Mutex for multithread check. + */ +#ifdef WIN32_THREADS +/* _glthread_DECLARE_STATIC_MUTEX is broken on windows. There will be race! */ +#define CHECK_MULTITHREAD_LOCK() +#define CHECK_MULTITHREAD_UNLOCK() +#else +_glthread_DECLARE_STATIC_MUTEX(ThreadCheckMutex); +#define CHECK_MULTITHREAD_LOCK() _glthread_LOCK_MUTEX(ThreadCheckMutex) +#define CHECK_MULTITHREAD_UNLOCK() _glthread_UNLOCK_MUTEX(ThreadCheckMutex) +#endif /** * We should call this periodically from a function such as glXMakeCurrent @@ -154,7 +167,6 @@ PUBLIC void *_glapi_Context = NULL; PUBLIC void _glapi_check_multithread(void) { -#if defined(THREADS) && !defined(GLX_USE_TLS) static unsigned long knownID; static GLboolean firstCall = GL_TRUE; @@ -163,9 +175,7 @@ _glapi_check_multithread(void) CHECK_MULTITHREAD_LOCK(); if (firstCall) { - /* initialize TSDs */ - (void) _glthread_GetTSD(&ContextTSD); - (void) _glthread_GetTSD(&_gl_DispatchTSD); + _glapi_init_multithread(); knownID = _glthread_GetID(); firstCall = GL_FALSE; @@ -176,9 +186,21 @@ _glapi_check_multithread(void) _glapi_set_context(NULL); } CHECK_MULTITHREAD_UNLOCK(); -#endif } +#else + +void +_glapi_init_multithread(void) { } + +void +_glapi_destroy_multithread(void) { } + +PUBLIC void +_glapi_check_multithread(void) { } + +#endif + /** @@ -212,198 +234,12 @@ _glapi_get_context(void) #if defined(GLX_USE_TLS) return _glapi_tls_Context; #elif defined(THREADS) - if (ThreadSafe) { - return _glthread_GetTSD(&ContextTSD); - } - else { - return _glapi_Context; - } + return (ThreadSafe) ? _glthread_GetTSD(&ContextTSD) : _glapi_Context; #else return _glapi_Context; #endif } -#ifdef USE_X86_ASM - -#if defined( GLX_USE_TLS ) -extern GLubyte gl_dispatch_functions_start[]; -extern GLubyte gl_dispatch_functions_end[]; -#else -extern const GLubyte gl_dispatch_functions_start[]; -#endif - -#endif /* USE_X86_ASM */ - - -#if defined(USE_X64_64_ASM) && defined(GLX_USE_TLS) -# define DISPATCH_FUNCTION_SIZE 16 -#elif defined(USE_X86_ASM) -# if defined(THREADS) && !defined(GLX_USE_TLS) -# define DISPATCH_FUNCTION_SIZE 32 -# else -# define DISPATCH_FUNCTION_SIZE 16 -# endif -#endif - -#ifdef USE_SPARC_ASM -#ifdef GLX_USE_TLS -extern unsigned int __glapi_sparc_tls_stub; -#else -extern unsigned int __glapi_sparc_pthread_stub; -#endif -#endif - -#if !defined(DISPATCH_FUNCTION_SIZE) && !defined(XFree86Server) && !defined(XGLServer) -# define NEED_FUNCTION_POINTER -#endif - -#if defined(PTHREADS) || defined(GLX_USE_TLS) -/** - * Perform platform-specific GL API entry-point fixups. - */ -static void -init_glapi_relocs( void ) -{ -#if defined(USE_X86_ASM) && defined(GLX_USE_TLS) && !defined(GLX_X86_READONLY_TEXT) - extern unsigned long _x86_get_dispatch(void); - char run_time_patch[] = { - 0x65, 0xa1, 0, 0, 0, 0 /* movl %gs:0,%eax */ - }; - GLuint *offset = (GLuint *) &run_time_patch[2]; /* 32-bits for x86/32 */ - const GLubyte * const get_disp = (const GLubyte *) run_time_patch; - GLubyte * curr_func = (GLubyte *) gl_dispatch_functions_start; - - *offset = _x86_get_dispatch(); - while ( curr_func != (GLubyte *) gl_dispatch_functions_end ) { - (void) memcpy( curr_func, get_disp, sizeof(run_time_patch)); - curr_func += DISPATCH_FUNCTION_SIZE; - } -#endif -#ifdef USE_SPARC_ASM - extern void __glapi_sparc_icache_flush(unsigned int *); - static const unsigned int template[] = { -#ifdef GLX_USE_TLS - 0x05000000, /* sethi %hi(_glapi_tls_Dispatch), %g2 */ - 0x8730e00a, /* srl %g3, 10, %g3 */ - 0x8410a000, /* or %g2, %lo(_glapi_tls_Dispatch), %g2 */ -#ifdef __arch64__ - 0xc259c002, /* ldx [%g7 + %g2], %g1 */ - 0xc2584003, /* ldx [%g1 + %g3], %g1 */ -#else - 0xc201c002, /* ld [%g7 + %g2], %g1 */ - 0xc2004003, /* ld [%g1 + %g3], %g1 */ -#endif - 0x81c04000, /* jmp %g1 */ - 0x01000000, /* nop */ -#else -#ifdef __arch64__ - 0x03000000, /* 64-bit 0x00 --> sethi %hh(_glapi_Dispatch), %g1 */ - 0x05000000, /* 64-bit 0x04 --> sethi %lm(_glapi_Dispatch), %g2 */ - 0x82106000, /* 64-bit 0x08 --> or %g1, %hm(_glapi_Dispatch), %g1 */ - 0x8730e00a, /* 64-bit 0x0c --> srl %g3, 10, %g3 */ - 0x83287020, /* 64-bit 0x10 --> sllx %g1, 32, %g1 */ - 0x82004002, /* 64-bit 0x14 --> add %g1, %g2, %g1 */ - 0xc2586000, /* 64-bit 0x18 --> ldx [%g1 + %lo(_glapi_Dispatch)], %g1 */ -#else - 0x03000000, /* 32-bit 0x00 --> sethi %hi(_glapi_Dispatch), %g1 */ - 0x8730e00a, /* 32-bit 0x04 --> srl %g3, 10, %g3 */ - 0xc2006000, /* 32-bit 0x08 --> ld [%g1 + %lo(_glapi_Dispatch)], %g1 */ -#endif - 0x80a06000, /* --> cmp %g1, 0 */ - 0x02800005, /* --> be +4*5 */ - 0x01000000, /* --> nop */ -#ifdef __arch64__ - 0xc2584003, /* 64-bit --> ldx [%g1 + %g3], %g1 */ -#else - 0xc2004003, /* 32-bit --> ld [%g1 + %g3], %g1 */ -#endif - 0x81c04000, /* --> jmp %g1 */ - 0x01000000, /* --> nop */ -#ifdef __arch64__ - 0x9de3bf80, /* 64-bit --> save %sp, -128, %sp */ -#else - 0x9de3bfc0, /* 32-bit --> save %sp, -64, %sp */ -#endif - 0xa0100003, /* --> mov %g3, %l0 */ - 0x40000000, /* --> call _glapi_get_dispatch */ - 0x01000000, /* --> nop */ - 0x82100008, /* --> mov %o0, %g1 */ - 0x86100010, /* --> mov %l0, %g3 */ - 0x10bffff7, /* --> ba -4*9 */ - 0x81e80000, /* --> restore */ -#endif - }; -#ifdef GLX_USE_TLS - extern unsigned long __glapi_sparc_get_dispatch(void); - unsigned int *code = &__glapi_sparc_tls_stub; - unsigned long dispatch = __glapi_sparc_get_dispatch(); -#else - unsigned int *code = &__glapi_sparc_pthread_stub; - unsigned long dispatch = (unsigned long) &_glapi_Dispatch; - unsigned long call_dest = (unsigned long ) &_glapi_get_dispatch; - int idx; -#endif - -#if defined(GLX_USE_TLS) - code[0] = template[0] | (dispatch >> 10); - code[1] = template[1]; - __glapi_sparc_icache_flush(&code[0]); - code[2] = template[2] | (dispatch & 0x3ff); - code[3] = template[3]; - __glapi_sparc_icache_flush(&code[2]); - code[4] = template[4]; - code[5] = template[5]; - __glapi_sparc_icache_flush(&code[4]); - code[6] = template[6]; - __glapi_sparc_icache_flush(&code[6]); -#else -#if defined(__arch64__) - code[0] = template[0] | (dispatch >> (32 + 10)); - code[1] = template[1] | ((dispatch & 0xffffffff) >> 10); - __glapi_sparc_icache_flush(&code[0]); - code[2] = template[2] | ((dispatch >> 32) & 0x3ff); - code[3] = template[3]; - __glapi_sparc_icache_flush(&code[2]); - code[4] = template[4]; - code[5] = template[5]; - __glapi_sparc_icache_flush(&code[4]); - code[6] = template[6] | (dispatch & 0x3ff); - idx = 7; -#else - code[0] = template[0] | (dispatch >> 10); - code[1] = template[1]; - __glapi_sparc_icache_flush(&code[0]); - code[2] = template[2] | (dispatch & 0x3ff); - idx = 3; -#endif - code[idx + 0] = template[idx + 0]; - __glapi_sparc_icache_flush(&code[idx - 1]); - code[idx + 1] = template[idx + 1]; - code[idx + 2] = template[idx + 2]; - __glapi_sparc_icache_flush(&code[idx + 1]); - code[idx + 3] = template[idx + 3]; - code[idx + 4] = template[idx + 4]; - __glapi_sparc_icache_flush(&code[idx + 3]); - code[idx + 5] = template[idx + 5]; - code[idx + 6] = template[idx + 6]; - __glapi_sparc_icache_flush(&code[idx + 5]); - code[idx + 7] = template[idx + 7]; - code[idx + 8] = template[idx + 8] | - (((call_dest - ((unsigned long) &code[idx + 8])) - >> 2) & 0x3fffffff); - __glapi_sparc_icache_flush(&code[idx + 7]); - code[idx + 9] = template[idx + 9]; - code[idx + 10] = template[idx + 10]; - __glapi_sparc_icache_flush(&code[idx + 9]); - code[idx + 11] = template[idx + 11]; - code[idx + 12] = template[idx + 12]; - __glapi_sparc_icache_flush(&code[idx + 11]); - code[idx + 13] = template[idx + 13]; - __glapi_sparc_icache_flush(&code[idx + 13]); -#endif -#endif -} -#endif /* defined(PTHREADS) || defined(GLX_USE_TLS) */ /** @@ -414,17 +250,15 @@ init_glapi_relocs( void ) PUBLIC void _glapi_set_dispatch(struct _glapi_table *dispatch) { -#if defined(PTHREADS) || defined(GLX_USE_TLS) - static pthread_once_t once_control = PTHREAD_ONCE_INIT; - pthread_once( & once_control, init_glapi_relocs ); -#endif + init_glapi_relocs_once(); - if (!dispatch) { + if (dispatch == NULL) { /* use the no-op functions */ dispatch = (struct _glapi_table *) __glapi_noop_table; } #ifdef DEBUG else { + _glapi_check_table_not_null(dispatch); _glapi_check_table(dispatch); } #endif @@ -434,9 +268,9 @@ _glapi_set_dispatch(struct _glapi_table *dispatch) #elif defined(THREADS) _glthread_SetTSD(&_gl_DispatchTSD, (void *) dispatch); _glapi_Dispatch = (ThreadSafe) ? NULL : dispatch; -#else /*THREADS*/ +#else _glapi_Dispatch = dispatch; -#endif /*THREADS*/ +#endif } @@ -447,17 +281,15 @@ _glapi_set_dispatch(struct _glapi_table *dispatch) PUBLIC struct _glapi_table * _glapi_get_dispatch(void) { - struct _glapi_table * api; #if defined(GLX_USE_TLS) - api = _glapi_tls_Dispatch; + return _glapi_tls_Dispatch; #elif defined(THREADS) - api = (ThreadSafe) + return (ThreadSafe) ? (struct _glapi_table *) _glthread_GetTSD(&_gl_DispatchTSD) : _glapi_Dispatch; #else - api = _glapi_Dispatch; + return _glapi_Dispatch; #endif - return api; } @@ -488,75 +320,15 @@ _glapi_get_dispatch_table_size(void) * Intended for debugging purposes. */ void -_glapi_check_table(const struct _glapi_table *table) +_glapi_check_table_not_null(const struct _glapi_table *table) { -#if 0 /* Enable this for extra DEBUG */ +#if 0 /* enable this for extra DEBUG */ const GLuint entries = _glapi_get_dispatch_table_size(); const void **tab = (const void **) table; GLuint i; for (i = 1; i < entries; i++) { assert(tab[i]); } - - /* Do some spot checks to be sure that the dispatch table - * slots are assigned correctly. - */ - { - GLuint BeginOffset = _glapi_get_proc_offset("glBegin"); - char *BeginFunc = (char*) &table->Begin; - GLuint offset = (BeginFunc - (char *) table) / sizeof(void *); - assert(BeginOffset == _gloffset_Begin); - assert(BeginOffset == offset); - } - { - GLuint viewportOffset = _glapi_get_proc_offset("glViewport"); - char *viewportFunc = (char*) &table->Viewport; - GLuint offset = (viewportFunc - (char *) table) / sizeof(void *); - assert(viewportOffset == _gloffset_Viewport); - assert(viewportOffset == offset); - } - { - GLuint VertexPointerOffset = _glapi_get_proc_offset("glVertexPointer"); - char *VertexPointerFunc = (char*) &table->VertexPointer; - GLuint offset = (VertexPointerFunc - (char *) table) / sizeof(void *); - assert(VertexPointerOffset == _gloffset_VertexPointer); - assert(VertexPointerOffset == offset); - } - { - GLuint ResetMinMaxOffset = _glapi_get_proc_offset("glResetMinmax"); - char *ResetMinMaxFunc = (char*) &table->ResetMinmax; - GLuint offset = (ResetMinMaxFunc - (char *) table) / sizeof(void *); - assert(ResetMinMaxOffset == _gloffset_ResetMinmax); - assert(ResetMinMaxOffset == offset); - } - { - GLuint blendColorOffset = _glapi_get_proc_offset("glBlendColor"); - char *blendColorFunc = (char*) &table->BlendColor; - GLuint offset = (blendColorFunc - (char *) table) / sizeof(void *); - assert(blendColorOffset == _gloffset_BlendColor); - assert(blendColorOffset == offset); - } - { - GLuint secondaryColor3fOffset = _glapi_get_proc_offset("glSecondaryColor3fEXT"); - char *secondaryColor3fFunc = (char*) &table->SecondaryColor3fEXT; - GLuint offset = (secondaryColor3fFunc - (char *) table) / sizeof(void *); - assert(secondaryColor3fOffset == _gloffset_SecondaryColor3fEXT); - assert(secondaryColor3fOffset == offset); - } - { - GLuint pointParameterivOffset = _glapi_get_proc_offset("glPointParameterivNV"); - char *pointParameterivFunc = (char*) &table->PointParameterivNV; - GLuint offset = (pointParameterivFunc - (char *) table) / sizeof(void *); - assert(pointParameterivOffset == _gloffset_PointParameterivNV); - assert(pointParameterivOffset == offset); - } - { - GLuint setFenceOffset = _glapi_get_proc_offset("glSetFenceNV"); - char *setFenceFunc = (char*) &table->SetFenceNV; - GLuint offset = (setFenceFunc - (char *) table) / sizeof(void *); - assert(setFenceOffset == _gloffset_SetFenceNV); - assert(setFenceOffset == offset); - } #else (void) table; #endif diff --git a/src/mesa/glapi/glapi.h b/src/mesa/glapi/glapi.h index f802a61d3b..2eae6d5c43 100644 --- a/src/mesa/glapi/glapi.h +++ b/src/mesa/glapi/glapi.h @@ -44,8 +44,6 @@ #ifndef _GLAPI_H #define _GLAPI_H -#define GL_GLEXT_PROTOTYPES - #include "glthread.h" @@ -59,40 +57,61 @@ typedef void (*_glapi_proc)(void); /* generic function pointer */ #define _glapi_get_dispatch _mglapi_get_dispatch #define _glapi_set_context _mglapi_set_context #define _glapi_get_context _mglapi_get_context -#define _glapi_Context _mglapi_Context #define _glapi_Dispatch _mglapi_Dispatch +#define _glapi_Context _mglapi_Context #endif -/* - * Number of extension functions which we can dynamically add at runtime. - */ -#define MAX_EXTENSION_FUNCS 300 +#if defined(__GNUC__) && (__GNUC__ >= 3) +# define likely(x) __builtin_expect(!!(x), 1) +# define unlikely(x) __builtin_expect(!!(x), 0) +#else +# define likely(x) (x) +# define unlikely(x) (x) +#endif /** - ** Define the GET_CURRENT_CONTEXT() macro. + ** Define the GET_DISPATCH() and GET_CURRENT_CONTEXT() macros. + ** ** \param C local variable which will hold the current context. **/ #if defined (GLX_USE_TLS) -extern const void *_glapi_Context; extern const struct _glapi_table *_glapi_Dispatch; +extern const void *_glapi_Context; + +extern __thread struct _glapi_table * _glapi_tls_Dispatch + __attribute__((tls_model("initial-exec"))); + extern __thread void * _glapi_tls_Context __attribute__((tls_model("initial-exec"))); +# define GET_DISPATCH() _glapi_tls_Dispatch + # define GET_CURRENT_CONTEXT(C) GLcontext *C = (GLcontext *) _glapi_tls_Context #else -extern void *_glapi_Context; extern struct _glapi_table *_glapi_Dispatch; +extern void *_glapi_Context; + # ifdef THREADS -# define GET_CURRENT_CONTEXT(C) GLcontext *C = (GLcontext *) (_glapi_Context ? _glapi_Context : _glapi_get_context()) + +# define GET_DISPATCH() \ + (likely(_glapi_Dispatch) ? _glapi_Dispatch : _glapi_get_dispatch()) + +# define GET_CURRENT_CONTEXT(C) GLcontext *C = (GLcontext *) \ + (likely(_glapi_Context) ? _glapi_Context : _glapi_get_context()) + # else + +# define GET_DISPATCH() _glapi_Dispatch + # define GET_CURRENT_CONTEXT(C) GLcontext *C = (GLcontext *) _glapi_Context + # endif #endif /* defined (GLX_USE_TLS) */ @@ -103,6 +122,14 @@ extern struct _glapi_table *_glapi_Dispatch; **/ extern void +_glapi_init_multithread(void); + + +extern void +_glapi_destroy_multithread(void); + + +extern void _glapi_check_multithread(void); @@ -122,26 +149,10 @@ extern struct _glapi_table * _glapi_get_dispatch(void); -extern int -_glapi_begin_dispatch_override(struct _glapi_table *override); - - -extern void -_glapi_end_dispatch_override(int layer); - - -struct _glapi_table * -_glapi_get_override_dispatch(int layer); - - extern unsigned int _glapi_get_dispatch_table_size(void); -extern void -_glapi_check_table(const struct _glapi_table *table); - - extern int _glapi_add_dispatch( const char * const * function_names, const char * parameter_signature ); @@ -154,8 +165,29 @@ extern _glapi_proc _glapi_get_proc_address(const char *funcName); +/** + * GL API local functions and defines + */ + +extern void +init_glapi_relocs_once(void); + +extern void +_glapi_check_table_not_null(const struct _glapi_table *table); + + +extern void +_glapi_check_table(const struct _glapi_table *table); + + extern const char * _glapi_get_proc_name(unsigned int offset); +/* + * Number of extension functions which we can dynamically add at runtime. + */ +#define MAX_EXTENSION_FUNCS 300 + + #endif diff --git a/src/mesa/glapi/glapi_getproc.c b/src/mesa/glapi/glapi_getproc.c index 69eb42c79e..a6dbf173e8 100644 --- a/src/mesa/glapi/glapi_getproc.c +++ b/src/mesa/glapi/glapi_getproc.c @@ -43,27 +43,6 @@ #include "glapi/glapitable.h" -static void -fill_in_entrypoint_offset(_glapi_proc entrypoint, GLuint offset); - - -/** - * strdup() is actually not a standard ANSI C or POSIX routine. - * Irix will not define it if ANSI mode is in effect. - */ -static char * -str_dup(const char *str) -{ - char *copy; - copy = (char*) malloc(strlen(str) + 1); - if (!copy) - return NULL; - strcpy(copy, str); - return copy; -} - - - #if defined(USE_X64_64_ASM) && defined(GLX_USE_TLS) # define DISPATCH_FUNCTION_SIZE 16 #elif defined(USE_X86_ASM) @@ -121,7 +100,6 @@ get_static_proc_offset(const char *funcName) } -#if !defined(XFree86Server) && !defined(XGLServer) #ifdef USE_X86_ASM #if defined( GLX_USE_TLS ) @@ -134,6 +112,8 @@ extern const GLubyte gl_dispatch_functions_start[]; #endif /* USE_X86_ASM */ +#if !defined(XFree86Server) && !defined(XGLServer) + /** * Return dispatch function address for the named static (built-in) function. * Return NULL if function not found. @@ -182,6 +162,172 @@ get_static_proc_name( GLuint offset ) +#if defined(PTHREADS) || defined(GLX_USE_TLS) + +/** + * Perform platform-specific GL API entry-point fixups. + */ +static void +init_glapi_relocs( void ) +{ +#if defined(USE_X86_ASM) && defined(GLX_USE_TLS) && !defined(GLX_X86_READONLY_TEXT) + extern unsigned long _x86_get_dispatch(void); + char run_time_patch[] = { + 0x65, 0xa1, 0, 0, 0, 0 /* movl %gs:0,%eax */ + }; + GLuint *offset = (GLuint *) &run_time_patch[2]; /* 32-bits for x86/32 */ + const GLubyte * const get_disp = (const GLubyte *) run_time_patch; + GLubyte * curr_func = (GLubyte *) gl_dispatch_functions_start; + + *offset = _x86_get_dispatch(); + while ( curr_func != (GLubyte *) gl_dispatch_functions_end ) { + (void) memcpy( curr_func, get_disp, sizeof(run_time_patch)); + curr_func += DISPATCH_FUNCTION_SIZE; + } +#endif +#ifdef USE_SPARC_ASM + extern void __glapi_sparc_icache_flush(unsigned int *); + static const unsigned int template[] = { +#ifdef GLX_USE_TLS + 0x05000000, /* sethi %hi(_glapi_tls_Dispatch), %g2 */ + 0x8730e00a, /* srl %g3, 10, %g3 */ + 0x8410a000, /* or %g2, %lo(_glapi_tls_Dispatch), %g2 */ +#ifdef __arch64__ + 0xc259c002, /* ldx [%g7 + %g2], %g1 */ + 0xc2584003, /* ldx [%g1 + %g3], %g1 */ +#else + 0xc201c002, /* ld [%g7 + %g2], %g1 */ + 0xc2004003, /* ld [%g1 + %g3], %g1 */ +#endif + 0x81c04000, /* jmp %g1 */ + 0x01000000, /* nop */ +#else +#ifdef __arch64__ + 0x03000000, /* 64-bit 0x00 --> sethi %hh(_glapi_Dispatch), %g1 */ + 0x05000000, /* 64-bit 0x04 --> sethi %lm(_glapi_Dispatch), %g2 */ + 0x82106000, /* 64-bit 0x08 --> or %g1, %hm(_glapi_Dispatch), %g1 */ + 0x8730e00a, /* 64-bit 0x0c --> srl %g3, 10, %g3 */ + 0x83287020, /* 64-bit 0x10 --> sllx %g1, 32, %g1 */ + 0x82004002, /* 64-bit 0x14 --> add %g1, %g2, %g1 */ + 0xc2586000, /* 64-bit 0x18 --> ldx [%g1 + %lo(_glapi_Dispatch)], %g1 */ +#else + 0x03000000, /* 32-bit 0x00 --> sethi %hi(_glapi_Dispatch), %g1 */ + 0x8730e00a, /* 32-bit 0x04 --> srl %g3, 10, %g3 */ + 0xc2006000, /* 32-bit 0x08 --> ld [%g1 + %lo(_glapi_Dispatch)], %g1 */ +#endif + 0x80a06000, /* --> cmp %g1, 0 */ + 0x02800005, /* --> be +4*5 */ + 0x01000000, /* --> nop */ +#ifdef __arch64__ + 0xc2584003, /* 64-bit --> ldx [%g1 + %g3], %g1 */ +#else + 0xc2004003, /* 32-bit --> ld [%g1 + %g3], %g1 */ +#endif + 0x81c04000, /* --> jmp %g1 */ + 0x01000000, /* --> nop */ +#ifdef __arch64__ + 0x9de3bf80, /* 64-bit --> save %sp, -128, %sp */ +#else + 0x9de3bfc0, /* 32-bit --> save %sp, -64, %sp */ +#endif + 0xa0100003, /* --> mov %g3, %l0 */ + 0x40000000, /* --> call _glapi_get_dispatch */ + 0x01000000, /* --> nop */ + 0x82100008, /* --> mov %o0, %g1 */ + 0x86100010, /* --> mov %l0, %g3 */ + 0x10bffff7, /* --> ba -4*9 */ + 0x81e80000, /* --> restore */ +#endif + }; +#ifdef GLX_USE_TLS + extern unsigned int __glapi_sparc_tls_stub; + extern unsigned long __glapi_sparc_get_dispatch(void); + unsigned int *code = &__glapi_sparc_tls_stub; + unsigned long dispatch = __glapi_sparc_get_dispatch(); +#else + extern unsigned int __glapi_sparc_pthread_stub; + unsigned int *code = &__glapi_sparc_pthread_stub; + unsigned long dispatch = (unsigned long) &_glapi_Dispatch; + unsigned long call_dest = (unsigned long ) &_glapi_get_dispatch; + int idx; +#endif + +#if defined(GLX_USE_TLS) + code[0] = template[0] | (dispatch >> 10); + code[1] = template[1]; + __glapi_sparc_icache_flush(&code[0]); + code[2] = template[2] | (dispatch & 0x3ff); + code[3] = template[3]; + __glapi_sparc_icache_flush(&code[2]); + code[4] = template[4]; + code[5] = template[5]; + __glapi_sparc_icache_flush(&code[4]); + code[6] = template[6]; + __glapi_sparc_icache_flush(&code[6]); +#else +#if defined(__arch64__) + code[0] = template[0] | (dispatch >> (32 + 10)); + code[1] = template[1] | ((dispatch & 0xffffffff) >> 10); + __glapi_sparc_icache_flush(&code[0]); + code[2] = template[2] | ((dispatch >> 32) & 0x3ff); + code[3] = template[3]; + __glapi_sparc_icache_flush(&code[2]); + code[4] = template[4]; + code[5] = template[5]; + __glapi_sparc_icache_flush(&code[4]); + code[6] = template[6] | (dispatch & 0x3ff); + idx = 7; +#else + code[0] = template[0] | (dispatch >> 10); + code[1] = template[1]; + __glapi_sparc_icache_flush(&code[0]); + code[2] = template[2] | (dispatch & 0x3ff); + idx = 3; +#endif + code[idx + 0] = template[idx + 0]; + __glapi_sparc_icache_flush(&code[idx - 1]); + code[idx + 1] = template[idx + 1]; + code[idx + 2] = template[idx + 2]; + __glapi_sparc_icache_flush(&code[idx + 1]); + code[idx + 3] = template[idx + 3]; + code[idx + 4] = template[idx + 4]; + __glapi_sparc_icache_flush(&code[idx + 3]); + code[idx + 5] = template[idx + 5]; + code[idx + 6] = template[idx + 6]; + __glapi_sparc_icache_flush(&code[idx + 5]); + code[idx + 7] = template[idx + 7]; + code[idx + 8] = template[idx + 8] | + (((call_dest - ((unsigned long) &code[idx + 8])) + >> 2) & 0x3fffffff); + __glapi_sparc_icache_flush(&code[idx + 7]); + code[idx + 9] = template[idx + 9]; + code[idx + 10] = template[idx + 10]; + __glapi_sparc_icache_flush(&code[idx + 9]); + code[idx + 11] = template[idx + 11]; + code[idx + 12] = template[idx + 12]; + __glapi_sparc_icache_flush(&code[idx + 11]); + code[idx + 13] = template[idx + 13]; + __glapi_sparc_icache_flush(&code[idx + 13]); +#endif +#endif +} + +void +init_glapi_relocs_once( void ) +{ + static pthread_once_t once_control = PTHREAD_ONCE_INIT; + pthread_once( & once_control, init_glapi_relocs ); +} + +#else + +void +init_glapi_relocs_once( void ) { } + +#endif /* defined(PTHREADS) || defined(GLX_USE_TLS) */ + + + /********************************************************************** * Extension function management. */ @@ -239,6 +385,9 @@ static GLuint NumExtEntryPoints = 0; extern void __glapi_sparc_icache_flush(unsigned int *); #endif +static void +fill_in_entrypoint_offset(_glapi_proc entrypoint, GLuint offset); + /** * Generate a dispatch function (entrypoint) which jumps through * the given slot number (offset) in the current dispatch table. @@ -263,7 +412,9 @@ generate_entrypoint(GLuint functionOffset) } return (_glapi_proc) code; -#elif defined(USE_SPARC_ASM) && (defined(PTHREADS) || defined(GLX_USE_TLS)) +#elif defined(USE_SPARC_ASM) + +#if defined(PTHREADS) || defined(GLX_USE_TLS) static const unsigned int template[] = { 0x07000000, /* sethi %hi(0), %g3 */ 0x8210000f, /* mov %o7, %g1 */ @@ -289,6 +440,8 @@ generate_entrypoint(GLuint functionOffset) __glapi_sparc_icache_flush(&code[2]); } return (_glapi_proc) code; +#endif + #else (void) functionOffset; return NULL; @@ -333,6 +486,22 @@ fill_in_entrypoint_offset(_glapi_proc entrypoint, GLuint offset) /** + * strdup() is actually not a standard ANSI C or POSIX routine. + * Irix will not define it if ANSI mode is in effect. + */ +static char * +str_dup(const char *str) +{ + char *copy; + copy = (char*) malloc(strlen(str) + 1); + if (!copy) + return NULL; + strcpy(copy, str); + return copy; +} + + +/** * Generate new entrypoint * * Use a temporary dispatch offset of ~0 (i.e. -1). Later, when the driver @@ -594,3 +763,74 @@ _glapi_get_proc_name(GLuint offset) } return NULL; } + + + +/** + * Do some spot checks to be sure that the dispatch table + * slots are assigned correctly. For debugging only. + */ +void +_glapi_check_table(const struct _glapi_table *table) +{ +#if 0 /* enable this for extra DEBUG */ + { + GLuint BeginOffset = _glapi_get_proc_offset("glBegin"); + char *BeginFunc = (char*) &table->Begin; + GLuint offset = (BeginFunc - (char *) table) / sizeof(void *); + assert(BeginOffset == _gloffset_Begin); + assert(BeginOffset == offset); + } + { + GLuint viewportOffset = _glapi_get_proc_offset("glViewport"); + char *viewportFunc = (char*) &table->Viewport; + GLuint offset = (viewportFunc - (char *) table) / sizeof(void *); + assert(viewportOffset == _gloffset_Viewport); + assert(viewportOffset == offset); + } + { + GLuint VertexPointerOffset = _glapi_get_proc_offset("glVertexPointer"); + char *VertexPointerFunc = (char*) &table->VertexPointer; + GLuint offset = (VertexPointerFunc - (char *) table) / sizeof(void *); + assert(VertexPointerOffset == _gloffset_VertexPointer); + assert(VertexPointerOffset == offset); + } + { + GLuint ResetMinMaxOffset = _glapi_get_proc_offset("glResetMinmax"); + char *ResetMinMaxFunc = (char*) &table->ResetMinmax; + GLuint offset = (ResetMinMaxFunc - (char *) table) / sizeof(void *); + assert(ResetMinMaxOffset == _gloffset_ResetMinmax); + assert(ResetMinMaxOffset == offset); + } + { + GLuint blendColorOffset = _glapi_get_proc_offset("glBlendColor"); + char *blendColorFunc = (char*) &table->BlendColor; + GLuint offset = (blendColorFunc - (char *) table) / sizeof(void *); + assert(blendColorOffset == _gloffset_BlendColor); + assert(blendColorOffset == offset); + } + { + GLuint secondaryColor3fOffset = _glapi_get_proc_offset("glSecondaryColor3fEXT"); + char *secondaryColor3fFunc = (char*) &table->SecondaryColor3fEXT; + GLuint offset = (secondaryColor3fFunc - (char *) table) / sizeof(void *); + assert(secondaryColor3fOffset == _gloffset_SecondaryColor3fEXT); + assert(secondaryColor3fOffset == offset); + } + { + GLuint pointParameterivOffset = _glapi_get_proc_offset("glPointParameterivNV"); + char *pointParameterivFunc = (char*) &table->PointParameterivNV; + GLuint offset = (pointParameterivFunc - (char *) table) / sizeof(void *); + assert(pointParameterivOffset == _gloffset_PointParameterivNV); + assert(pointParameterivOffset == offset); + } + { + GLuint setFenceOffset = _glapi_get_proc_offset("glSetFenceNV"); + char *setFenceFunc = (char*) &table->SetFenceNV; + GLuint offset = (setFenceFunc - (char *) table) / sizeof(void *); + assert(setFenceOffset == _gloffset_SetFenceNV); + assert(setFenceOffset == offset); + } +#else + (void) table; +#endif +} diff --git a/src/mesa/glapi/glthread.c b/src/mesa/glapi/glthread.c index be4e2f754d..17b6bb2b6a 100644 --- a/src/mesa/glapi/glthread.c +++ b/src/mesa/glapi/glthread.c @@ -116,91 +116,15 @@ _glthread_SetTSD(_glthread_TSD *tsd, void *ptr) /* - * Solaris/Unix International Threads -- Use only if POSIX threads - * aren't available on your Unix platform. Solaris 2.[34] are examples - * of platforms where this is the case. Be sure to use -mt and/or - * -D_REENTRANT when compiling. - */ -#ifdef SOLARIS_THREADS -#define USE_LOCK_FOR_KEY /* undef this to try a version without - lock for the global key... */ - -PUBLIC unsigned long -_glthread_GetID(void) -{ - abort(); /* XXX not implemented yet */ - return (unsigned long) 0; -} - - -void -_glthread_InitTSD(_glthread_TSD *tsd) -{ - if ((errno = mutex_init(&tsd->keylock, 0, NULL)) != 0 || - (errno = thr_keycreate(&(tsd->key), free)) != 0) { - perror(INIT_TSD_ERROR); - exit(-1); - } - tsd->initMagic = INIT_MAGIC; -} - - -void * -_glthread_GetTSD(_glthread_TSD *tsd) -{ - void* ret; - if (tsd->initMagic != INIT_MAGIC) { - _glthread_InitTSD(tsd); - } -#ifdef USE_LOCK_FOR_KEY - mutex_lock(&tsd->keylock); - thr_getspecific(tsd->key, &ret); - mutex_unlock(&tsd->keylock); -#else - if ((errno = thr_getspecific(tsd->key, &ret)) != 0) { - perror(GET_TSD_ERROR); - exit(-1); - } -#endif - return ret; -} - - -void -_glthread_SetTSD(_glthread_TSD *tsd, void *ptr) -{ - if (tsd->initMagic != INIT_MAGIC) { - _glthread_InitTSD(tsd); - } - if ((errno = thr_setspecific(tsd->key, ptr)) != 0) { - perror(SET_TSD_ERROR); - exit(-1); - } -} - -#undef USE_LOCK_FOR_KEY -#endif /* SOLARIS_THREADS */ - - - -/* * Win32 Threads. The only available option for Windows 95/NT. * Be sure that you compile using the Multithreaded runtime, otherwise * bad things will happen. */ #ifdef WIN32_THREADS -void FreeTSD(_glthread_TSD *p) -{ - if (p->initMagic==INIT_MAGIC) { - TlsFree(p->key); - p->initMagic=0; - } -} - -void InsteadOf_exit(int nCode) +static void InsteadOf_exit(int nCode) { - DWORD dwErr=GetLastError(); + DWORD dwErr = GetLastError(); } PUBLIC unsigned long @@ -222,6 +146,17 @@ _glthread_InitTSD(_glthread_TSD *tsd) } +void +_glthread_DestroyTSD(_glthread_TSD *tsd) +{ + if (tsd->initMagic != INIT_MAGIC) { + return; + } + TlsFree(tsd->key); + tsd->initMagic = 0x0; +} + + void * _glthread_GetTSD(_glthread_TSD *tsd) { diff --git a/src/mesa/glapi/glthread.h b/src/mesa/glapi/glthread.h index 8ec933a851..e5193aaf98 100644 --- a/src/mesa/glapi/glthread.h +++ b/src/mesa/glapi/glthread.h @@ -64,21 +64,12 @@ #define GLTHREAD_H -#if defined(USE_MGL_NAMESPACE) -#define _glapi_Dispatch _mglapi_Dispatch +#if defined(PTHREADS) || defined(WIN32_THREADS) || defined(BEOS_THREADS) +#ifndef THREADS +#define THREADS #endif - - - -#if (defined(PTHREADS) || defined(SOLARIS_THREADS) ||\ - defined(WIN32_THREADS) || defined(BEOS_THREADS)) \ - && !defined(THREADS) -# define THREADS #endif -#ifdef VMS -#include <GL/vms_x_fix.h> -#endif /* * POSIX threads. This should be your choice in the Unix world @@ -121,7 +112,7 @@ typedef pthread_cond_t _glthread_Cond; #define _glthread_DECLARE_STATIC_COND(name) \ static _glthread_Cond name = PTHREAD_COND_INITIALIZER -#define _glthread_INIT_COND(cond) \ +#define _glthread_INIT_COND(cond) \ pthread_cond_init(&(cond), NULL) #define _glthread_DESTROY_COND(name) \ @@ -143,7 +134,7 @@ typedef unsigned int _glthread_Cond; #define _glthread_DECLARE_STATIC_COND(name) \ // #warning Condition variables not implemented. -#define _glthread_INIT_COND(cond) \ +#define _glthread_INIT_COND(cond) \ ASSERT(0); #define _glthread_DESTROY_COND(name) \ @@ -158,38 +149,7 @@ typedef unsigned int _glthread_Cond; #define _glthread_COND_BROADCAST(cond) \ ASSERT(0); -#endif - - -/* - * Solaris threads. Use only up to Solaris 2.4. - * Solaris 2.5 and higher provide POSIX threads. - * Be sure to compile with -mt on the Solaris compilers, or - * use -D_REENTRANT if using gcc. - */ -#ifdef SOLARIS_THREADS -#include <thread.h> - -typedef struct { - thread_key_t key; - mutex_t keylock; - int initMagic; -} _glthread_TSD; - -typedef thread_t _glthread_Thread; - -typedef mutex_t _glthread_Mutex; - -/* XXX need to really implement mutex-related macros */ -#define _glthread_DECLARE_STATIC_MUTEX(name) static _glthread_Mutex name = 0 -#define _glthread_INIT_MUTEX(name) (void) name -#define _glthread_DESTROY_MUTEX(name) (void) name -#define _glthread_LOCK_MUTEX(name) (void) name -#define _glthread_UNLOCK_MUTEX(name) (void) name - -#endif /* SOLARIS_THREADS */ - - +#endif /* PTHREADS */ /* @@ -209,11 +169,20 @@ typedef HANDLE _glthread_Thread; typedef CRITICAL_SECTION _glthread_Mutex; -#define _glthread_DECLARE_STATIC_MUTEX(name) /*static*/ _glthread_Mutex name = {0,0,0,0,0,0} -#define _glthread_INIT_MUTEX(name) InitializeCriticalSection(&name) -#define _glthread_DESTROY_MUTEX(name) DeleteCriticalSection(&name) -#define _glthread_LOCK_MUTEX(name) EnterCriticalSection(&name) -#define _glthread_UNLOCK_MUTEX(name) LeaveCriticalSection(&name) +#define _glthread_DECLARE_STATIC_MUTEX(name) \ + /* static */ _glthread_Mutex name = { 0, 0, 0, 0, 0, 0 } + +#define _glthread_INIT_MUTEX(name) \ + InitializeCriticalSection(&name) + +#define _glthread_DESTROY_MUTEX(name) \ + DeleteCriticalSection(&name) + +#define _glthread_LOCK_MUTEX(name) \ + EnterCriticalSection(&name) + +#define _glthread_UNLOCK_MUTEX(name) \ + LeaveCriticalSection(&name) #endif /* WIN32_THREADS */ @@ -252,12 +221,26 @@ typedef struct { } benaphore; typedef benaphore _glthread_Mutex; -#define _glthread_DECLARE_STATIC_MUTEX(name) static _glthread_Mutex name = { 0, 0 } -#define _glthread_INIT_MUTEX(name) name.sem = create_sem(0, #name"_benaphore"), name.lock = 0 -#define _glthread_DESTROY_MUTEX(name) delete_sem(name.sem), name.lock = 0 -#define _glthread_LOCK_MUTEX(name) if (name.sem == 0) _glthread_INIT_MUTEX(name); \ - if (atomic_add(&(name.lock), 1) >= 1) acquire_sem(name.sem) -#define _glthread_UNLOCK_MUTEX(name) if (atomic_add(&(name.lock), -1) > 1) release_sem(name.sem) +#define _glthread_DECLARE_STATIC_MUTEX(name) \ + static _glthread_Mutex name = { 0, 0 } + +#define _glthread_INIT_MUTEX(name) \ + name.sem = create_sem(0, #name"_benaphore"), \ + name.lock = 0 + +#define _glthread_DESTROY_MUTEX(name) \ + delete_sem(name.sem), \ + name.lock = 0 + +#define _glthread_LOCK_MUTEX(name) \ + if (name.sem == 0) \ + _glthread_INIT_MUTEX(name); \ + if (atomic_add(&(name.lock), 1) >= 1) \ + acquire_sem(name.sem) + +#define _glthread_UNLOCK_MUTEX(name) \ + if (atomic_add(&(name.lock), -1) > 1) \ + release_sem(name.sem) #endif /* BEOS_THREADS */ @@ -301,6 +284,10 @@ extern void _glthread_InitTSD(_glthread_TSD *); +extern void +_glthread_DestroyTSD(_glthread_TSD *); /* WIN32 only */ + + extern void * _glthread_GetTSD(_glthread_TSD *); @@ -308,26 +295,5 @@ _glthread_GetTSD(_glthread_TSD *); extern void _glthread_SetTSD(_glthread_TSD *, void *); -#if !defined __GNUC__ || __GNUC__ < 3 -# define __builtin_expect(x, y) x -#endif - -#if defined(GLX_USE_TLS) - -extern __thread struct _glapi_table * _glapi_tls_Dispatch - __attribute__((tls_model("initial-exec"))); - -#define GET_DISPATCH() _glapi_tls_Dispatch - -#elif !defined(GL_CALL) -# if defined(THREADS) -# define GET_DISPATCH() \ - ((__builtin_expect( _glapi_Dispatch != NULL, 1 )) \ - ? _glapi_Dispatch : _glapi_get_dispatch()) -# else -# define GET_DISPATCH() _glapi_Dispatch -# endif /* defined(THREADS) */ -#endif /* ndef GL_CALL */ - #endif /* THREADS_H */ diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c index 21ee317a31..e2efe81a8f 100644 --- a/src/mesa/main/mipmap.c +++ b/src/mesa/main/mipmap.c @@ -1269,6 +1269,8 @@ make_1d_stack_mipmap(GLenum datatype, GLuint comps, GLint border, if (border) { /* copy left-most pixel from source */ + assert(dstPtr); + assert(srcPtr); memcpy(dstPtr, srcPtr, bpt); /* copy right-most pixel from source */ memcpy(dstPtr + (dstWidth - 1) * bpt, diff --git a/src/mesa/shader/slang/slang_builtin.c b/src/mesa/shader/slang/slang_builtin.c index 5533cc5b65..f0659a8d8b 100644 --- a/src/mesa/shader/slang/slang_builtin.c +++ b/src/mesa/shader/slang/slang_builtin.c @@ -142,6 +142,7 @@ lookup_statevar(const char *var, GLint index1, GLint index2, const char *field, tokens[1] = index1; } else if (strcmp(var, "gl_Point") == 0) { + assert(field); if (strcmp(field, "size") == 0) { tokens[0] = STATE_POINT_SIZE; *swizzleOut = SWIZZLE_XXXX; diff --git a/src/mesa/state_tracker/st_atom_clip.c b/src/mesa/state_tracker/st_atom_clip.c index 23d709b814..80c0e92139 100644 --- a/src/mesa/state_tracker/st_atom_clip.c +++ b/src/mesa/state_tracker/st_atom_clip.c @@ -35,6 +35,8 @@ #include "pipe/p_context.h" #include "st_atom.h" +#include "cso_cache/cso_context.h" + /* Second state atom for user clip planes: */ @@ -56,7 +58,7 @@ static void update_clip( struct st_context *st ) if (memcmp(&clip, &st->state.clip, sizeof(clip)) != 0) { st->state.clip = clip; - st->pipe->set_clip_state(st->pipe, &clip); + cso_set_clip(st->cso_context, &clip); } } diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index 898c32293d..9e66eed363 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -62,11 +62,9 @@ st_init_clear(struct st_context *st) { struct pipe_context *pipe = st->pipe; - memset(&st->clear.raster, 0, sizeof(st->clear.raster)); - st->clear.raster.gl_rasterization_rules = 1; + memset(&st->clear, 0, sizeof(st->clear)); - /* rasterizer state: bypass vertex shader, clipping and viewport */ - st->clear.raster.bypass_vs_clip_and_viewport = 1; + st->clear.raster.gl_rasterization_rules = 1; /* fragment shader state: color pass-through program */ st->clear.fs = @@ -104,9 +102,7 @@ st_destroy_clear(struct st_context *st) /** * Draw a screen-aligned quadrilateral. - * Coords are window coords with y=0=bottom. These will be passed - * through unmodified to the rasterizer as we have set - * rasterizer->bypass_vs_clip_and_viewport. + * Coords are clip coords with y=0=bottom. */ static void draw_quad(GLcontext *ctx, @@ -192,18 +188,13 @@ clear_with_quad(GLcontext *ctx, GLboolean color, GLboolean depth, GLboolean stencil) { struct st_context *st = ctx->st; - const GLfloat x0 = (GLfloat) ctx->DrawBuffer->_Xmin; - const GLfloat x1 = (GLfloat) ctx->DrawBuffer->_Xmax; - GLfloat y0, y1; - - if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { - y0 = (GLfloat) (ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax); - y1 = (GLfloat) (ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin); - } - else { - y0 = (GLfloat) ctx->DrawBuffer->_Ymin; - y1 = (GLfloat) ctx->DrawBuffer->_Ymax; - } + const struct gl_framebuffer *fb = ctx->DrawBuffer; + const GLfloat fb_width = (GLfloat) fb->Width; + const GLfloat fb_height = (GLfloat) fb->Height; + const GLfloat x0 = (GLfloat) ctx->DrawBuffer->_Xmin / fb_width * 2.0f - 1.0f; + const GLfloat x1 = (GLfloat) ctx->DrawBuffer->_Xmax / fb_width * 2.0f - 1.0f; + const GLfloat y0 = (GLfloat) ctx->DrawBuffer->_Ymin / fb_height * 2.0f - 1.0f; + const GLfloat y1 = (GLfloat) ctx->DrawBuffer->_Ymax / fb_height * 2.0f - 1.0f; /* printf("%s %s%s%s %f,%f %f,%f\n", __FUNCTION__, @@ -218,6 +209,8 @@ clear_with_quad(GLcontext *ctx, cso_save_stencil_ref(st->cso_context); cso_save_depth_stencil_alpha(st->cso_context); cso_save_rasterizer(st->cso_context); + cso_save_viewport(st->cso_context); + cso_save_clip(st->cso_context); cso_save_fragment_shader(st->cso_context); cso_save_vertex_shader(st->cso_context); @@ -273,6 +266,22 @@ clear_with_quad(GLcontext *ctx, cso_set_rasterizer(st->cso_context, &st->clear.raster); + /* viewport state: viewport matching window dims */ + { + const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP); + struct pipe_viewport_state vp; + vp.scale[0] = 0.5f * fb_width; + vp.scale[1] = fb_height * (invert ? -0.5f : 0.5f); + vp.scale[2] = 1.0f; + vp.scale[3] = 1.0f; + vp.translate[0] = 0.5f * fb_width; + vp.translate[1] = 0.5f * fb_height; + vp.translate[2] = 0.0f; + vp.translate[3] = 0.0f; + cso_set_viewport(st->cso_context, &vp); + } + + cso_set_clip(st->cso_context, &st->clear.clip); cso_set_fragment_shader_handle(st->cso_context, st->clear.fs); cso_set_vertex_shader_handle(st->cso_context, st->clear.vs); @@ -284,9 +293,10 @@ clear_with_quad(GLcontext *ctx, cso_restore_stencil_ref(st->cso_context); cso_restore_depth_stencil_alpha(st->cso_context); cso_restore_rasterizer(st->cso_context); + cso_restore_viewport(st->cso_context); + cso_restore_clip(st->cso_context); cso_restore_fragment_shader(st->cso_context); cso_restore_vertex_shader(st->cso_context); - } diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 13b7b0e22d..045c029c30 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -166,6 +166,7 @@ struct st_context struct { struct pipe_rasterizer_state raster; struct pipe_viewport_state viewport; + struct pipe_clip_state clip; void *vs; void *fs; float vertices[4][2][4]; /**< vertex pos + color */ diff --git a/src/mesa/x86-64/glapi_x86-64.S b/src/mesa/x86-64/glapi_x86-64.S index 4c9eab882b..bd5a657e19 100644 --- a/src/mesa/x86-64/glapi_x86-64.S +++ b/src/mesa/x86-64/glapi_x86-64.S @@ -45,7 +45,7 @@ # define GL_PREFIX(n) GLNAME(CONCAT(gl,n)) # endif -#if defined(PTHREADS) || defined(SOLARIS_THREADS) || defined(WIN32_THREADS) || defined(BEOS_THREADS) +#if defined(PTHREADS) || defined(WIN32_THREADS) || defined(BEOS_THREADS) # define THREADS #endif diff --git a/src/mesa/x86/glapi_x86.S b/src/mesa/x86/glapi_x86.S index 13270ef35d..ae5dd2b0d1 100644 --- a/src/mesa/x86/glapi_x86.S +++ b/src/mesa/x86/glapi_x86.S @@ -52,7 +52,7 @@ #define GLOBL_FN(x) GLOBL x #endif -#if defined(PTHREADS) || defined(SOLARIS_THREADS) || defined(WIN32_THREADS) || defined(BEOS_THREADS) +#if defined(PTHREADS) || defined(WIN32_THREADS) || defined(BEOS_THREADS) # define THREADS #endif |