diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965')
33 files changed, 596 insertions, 174 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 02fb93486e..9e4ff112dc 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -20,6 +20,7 @@ DRIVER_SOURCES = \ intel_pixel_bitmap.c \ intel_state.c \ intel_tex.c \ + intel_tex_layout.c \ intel_tex_validate.c \ brw_aub.c \ brw_aub_playback.c \ @@ -83,8 +84,10 @@ C_SOURCES = \ ASM_SOURCES = - +DRIVER_DEFINES = -I../intel include ../Makefile.template +intel_tex_layout.o: ../intel/intel_tex_layout.c + symlinks: diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index 0e8591aaa8..3bec153075 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -62,6 +62,8 @@ static void compile_clip_prog( struct brw_context *brw, */ brw_init_compile(&c.func); + c.func.single_program_flow = 1; + c.key = *key; diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 263110bf5e..6faee65542 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -154,6 +154,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, brw_ProgramCacheInit( ctx ); + brw_FrameBufferTexInit( brw ); { const char *filename = getenv("INTEL_REPLAY"); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 0a61926ee8..08fdc54520 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -478,7 +478,7 @@ struct brw_context */ struct brw_state_pointers attribs; struct gl_vertex_program *vp; - struct gl_fragment_program *fp; + struct gl_fragment_program *fp, *fp_tex; struct gl_buffer_object *vbo; @@ -486,6 +486,8 @@ struct brw_context struct intel_region *saved_depth_region; GLuint restore_draw_mask; + struct gl_fragment_program *restore_fp; + GLboolean active; } metaops; @@ -496,8 +498,8 @@ struct brw_context /* Active vertex program: */ - struct gl_vertex_program *vertex_program; - struct gl_fragment_program *fragment_program; + const struct gl_vertex_program *vertex_program; + const struct gl_fragment_program *fragment_program; /* For populating the gtt: @@ -590,6 +592,7 @@ struct brw_context struct { struct brw_wm_prog_data *prog_data; + struct brw_wm_compile *compile_data; /* Input sizes, calculated from active vertex program: */ @@ -665,6 +668,8 @@ void brw_destroy_state( struct brw_context *brw ); */ void brwUpdateTextureState( struct intel_context *intel ); void brwInitTextureFuncs( struct dd_function_table *functions ); +void brw_FrameBufferTexInit( struct brw_context *brw ); +void brw_FrameBufferTexDestroy( struct brw_context *brw ); /*====================================================================== * brw_metaops.c diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index dfb598acdf..90637d16ea 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -411,7 +411,7 @@ GLboolean brw_upload_vertices( struct brw_context *brw, */ while (tmp) { - GLuint i = ffs(tmp)-1; + GLuint i = ffsll(tmp)-1; struct brw_vertex_element *input = &brw->vb.inputs[i]; tmp &= ~(1<<i); diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 1afa0f816b..d4dbcf38a7 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -104,6 +104,7 @@ struct brw_compile { struct brw_instruction *current; GLuint flag_value; + GLboolean single_program_flow; }; diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 6425c91450..9992b47d8a 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -464,7 +464,6 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p, return insn; } - /* EU takes the value from the flag register and pushes it onto some * sort of a stack (presumably merging with any flag value already on * the stack). Within an if block, the flags at the top of the stack @@ -482,7 +481,16 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p, */ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_IF); + struct brw_instruction *insn; + + if (p->single_program_flow) { + assert(execute_size == BRW_EXECUTE_1); + + insn = next_insn(p, BRW_OPCODE_ADD); + insn->header.predicate_inverse = 1; + } else { + insn = next_insn(p, BRW_OPCODE_IF); + } /* Override the defaults for this instruction: */ @@ -504,7 +512,13 @@ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) struct brw_instruction *brw_ELSE(struct brw_compile *p, struct brw_instruction *if_insn) { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ELSE); + struct brw_instruction *insn; + + if (p->single_program_flow) { + insn = next_insn(p, BRW_OPCODE_ADD); + } else { + insn = next_insn(p, BRW_OPCODE_ELSE); + } brw_set_dest(insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); @@ -516,11 +530,17 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, /* Patch the if instruction to point at this instruction. */ - assert(if_insn->header.opcode == BRW_OPCODE_IF); + if (p->single_program_flow) { + assert(if_insn->header.opcode == BRW_OPCODE_ADD); - if_insn->bits3.if_else.jump_count = insn - if_insn; - if_insn->bits3.if_else.pop_count = 1; - if_insn->bits3.if_else.pad0 = 0; + if_insn->bits3.ud = (insn - if_insn + 1) * 16; + } else { + assert(if_insn->header.opcode == BRW_OPCODE_IF); + + if_insn->bits3.if_else.jump_count = insn - if_insn; + if_insn->bits3.if_else.pop_count = 1; + if_insn->bits3.if_else.pad0 = 0; + } return insn; } @@ -528,63 +548,76 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, void brw_ENDIF(struct brw_compile *p, struct brw_instruction *patch_insn) { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); + if (p->single_program_flow) { + /* In single program flow mode, there's no need to execute an ENDIF, + * since we don't need to do any stack operations, and if we're executing + * currently, we want to just continue executing. + */ + struct brw_instruction *next = &p->store[p->nr_insn]; - brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src1(insn, brw_imm_d(0x0)); + assert(patch_insn->header.opcode == BRW_OPCODE_ADD); - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.execution_size = patch_insn->header.execution_size; - insn->header.mask_control = BRW_MASK_ENABLE; + patch_insn->bits3.ud = (next - patch_insn) * 16; + } else { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); - assert(patch_insn->bits3.if_else.jump_count == 0); - - /* Patch the if or else instructions to point at this or the next - * instruction respectively. - */ - if (patch_insn->header.opcode == BRW_OPCODE_IF) { - /* Automagically turn it into an IFF: + brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = patch_insn->header.execution_size; + insn->header.mask_control = BRW_MASK_ENABLE; + + assert(patch_insn->bits3.if_else.jump_count == 0); + + /* Patch the if or else instructions to point at this or the next + * instruction respectively. */ - patch_insn->header.opcode = BRW_OPCODE_IFF; - patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; - patch_insn->bits3.if_else.pop_count = 0; - patch_insn->bits3.if_else.pad0 = 0; + if (patch_insn->header.opcode == BRW_OPCODE_IF) { + /* Automagically turn it into an IFF: + */ + patch_insn->header.opcode = BRW_OPCODE_IFF; + patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; + patch_insn->bits3.if_else.pop_count = 0; + patch_insn->bits3.if_else.pad0 = 0; + } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { + patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; + patch_insn->bits3.if_else.pop_count = 1; + patch_insn->bits3.if_else.pad0 = 0; + } else { + assert(0); + } + /* Also pop item off the stack in the endif instruction: + */ + insn->bits3.if_else.jump_count = 0; + insn->bits3.if_else.pop_count = 1; + insn->bits3.if_else.pad0 = 0; } - else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { - patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; - patch_insn->bits3.if_else.pop_count = 1; - patch_insn->bits3.if_else.pad0 = 0; - } - else { - assert(0); - } - - /* Also pop item off the stack in the endif instruction: - */ - insn->bits3.if_else.jump_count = 0; - insn->bits3.if_else.pop_count = 1; - insn->bits3.if_else.pad0 = 0; } /* DO/WHILE loop: */ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); + if (p->single_program_flow) { + return &p->store[p->nr_insn]; + } else { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); - /* Override the defaults for this instruction: - */ - brw_set_dest(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src0(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src1(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); + /* Override the defaults for this instruction: + */ + brw_set_dest(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.execution_size = execute_size; -/* insn->header.mask_control = BRW_MASK_ENABLE; */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = execute_size; + /* insn->header.mask_control = BRW_MASK_ENABLE; */ - return insn; + return insn; + } } @@ -592,19 +625,31 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) void brw_WHILE(struct brw_compile *p, struct brw_instruction *do_insn) { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WHILE); + struct brw_instruction *insn; + + if (p->single_program_flow) + insn = next_insn(p, BRW_OPCODE_ADD); + else + insn = next_insn(p, BRW_OPCODE_WHILE); brw_set_dest(insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.execution_size = do_insn->header.execution_size; - assert(do_insn->header.opcode == BRW_OPCODE_DO); - insn->bits3.if_else.jump_count = do_insn - insn; - insn->bits3.if_else.pop_count = 0; - insn->bits3.if_else.pad0 = 0; + if (p->single_program_flow) { + insn->header.execution_size = BRW_EXECUTE_1; + + insn->bits3.d = (do_insn - insn) * 16; + } else { + insn->header.execution_size = do_insn->header.execution_size; + + assert(do_insn->header.opcode == BRW_OPCODE_DO); + insn->bits3.if_else.jump_count = do_insn - insn; + insn->bits3.if_else.pop_count = 0; + insn->bits3.if_else.pad0 = 0; + } /* insn->header.mask_control = BRW_MASK_ENABLE; */ diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 7d3f9dd5e3..9066e42252 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -66,7 +66,9 @@ static void compile_gs_prog( struct brw_context *brw, /* Begin the compilation: */ brw_init_compile(&c.func); - + + c.func.single_program_flow = 1; + /* For some reason the thread is spawned with only 4 channels * unmasked. */ diff --git a/src/mesa/drivers/dri/i965/brw_metaops.c b/src/mesa/drivers/dri/i965/brw_metaops.c index 2d4c84f612..1728fc8f56 100644 --- a/src/mesa/drivers/dri/i965/brw_metaops.c +++ b/src/mesa/drivers/dri/i965/brw_metaops.c @@ -27,6 +27,7 @@ /* * Authors: * Keith Whitwell <keith@tungstengraphics.com> + * frame buffer texture by Gary Wong <gtw@gnu.org> */ @@ -143,6 +144,15 @@ static const char *fp_prog = "MOV result.color, fragment.color;\n" "END\n"; +static const char *fp_tex_prog = + "!!ARBfp1.0\n" + "TEMP a;\n" + "ADD a, fragment.position, program.local[0];\n" + "MUL a, a, program.local[1];\n" + "TEX result.color, a, texture[0], 2D;\n" + "MOV result.depth.z, fragment.position;\n" + "END\n"; + /* Derived values of importance: * * FragmentProgram->_Current @@ -169,6 +179,9 @@ static void init_metaops_state( struct brw_context *brw ) brw->metaops.fp = (struct gl_fragment_program *) ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 1 ); + brw->metaops.fp_tex = (struct gl_fragment_program *) + ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 1 ); + brw->metaops.vp = (struct gl_vertex_program *) ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 1 ); @@ -176,6 +189,10 @@ static void init_metaops_state( struct brw_context *brw ) fp_prog, strlen(fp_prog), brw->metaops.fp); + _mesa_parse_arb_fragment_program(ctx, GL_FRAGMENT_PROGRAM_ARB, + fp_tex_prog, strlen(fp_tex_prog), + brw->metaops.fp_tex); + _mesa_parse_arb_vertex_program(ctx, GL_VERTEX_PROGRAM_ARB, vp_prog, strlen(vp_prog), brw->metaops.vp); @@ -266,7 +283,76 @@ static void meta_color_mask( struct intel_context *intel, GLboolean state ) static void meta_no_texture( struct intel_context *intel ) { - /* Nothing to do */ + struct brw_context *brw = brw_context(&intel->ctx); + + brw->metaops.attribs.FragmentProgram->_Current = brw->metaops.fp; + + brw->metaops.attribs.Texture->CurrentUnit = 0; + brw->metaops.attribs.Texture->_EnabledUnits = 0; + brw->metaops.attribs.Texture->_EnabledCoordUnits = 0; + brw->metaops.attribs.Texture->Unit[ 0 ].Enabled = 0; + brw->metaops.attribs.Texture->Unit[ 0 ]._ReallyEnabled = 0; + + brw->state.dirty.mesa |= _NEW_TEXTURE | _NEW_PROGRAM; +} + +static void meta_texture_blend_replace(struct intel_context *intel) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + brw->metaops.attribs.Texture->CurrentUnit = 0; + brw->metaops.attribs.Texture->_EnabledUnits = 1; + brw->metaops.attribs.Texture->_EnabledCoordUnits = 1; + brw->metaops.attribs.Texture->Unit[ 0 ].Enabled = TEXTURE_2D_BIT; + brw->metaops.attribs.Texture->Unit[ 0 ]._ReallyEnabled = TEXTURE_2D_BIT; + brw->metaops.attribs.Texture->Unit[ 0 ].Current2D = + intel->frame_buffer_texobj; + brw->metaops.attribs.Texture->Unit[ 0 ]._Current = + intel->frame_buffer_texobj; + + brw->state.dirty.mesa |= _NEW_TEXTURE | _NEW_PROGRAM; +} + +static void meta_import_pixel_state(struct intel_context *intel) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + RESTORE(brw, Color, _NEW_COLOR); + RESTORE(brw, Depth, _NEW_DEPTH); + RESTORE(brw, Fog, _NEW_FOG); + RESTORE(brw, Scissor, _NEW_SCISSOR); + RESTORE(brw, Stencil, _NEW_STENCIL); + RESTORE(brw, Texture, _NEW_TEXTURE); + RESTORE(brw, FragmentProgram, _NEW_PROGRAM); +} + +static void meta_frame_buffer_texture( struct intel_context *intel, + GLint xoff, GLint yoff ) +{ + struct brw_context *brw = brw_context(&intel->ctx); + struct intel_region *region = intel_drawbuf_region( intel ); + + INSTALL(brw, FragmentProgram, _NEW_PROGRAM); + + brw->metaops.attribs.FragmentProgram->_Current = brw->metaops.fp_tex; + /* This is unfortunate, but seems to be necessary, since later on we + will end up calling _mesa_load_state_parameters to lookup the + local params (below), and that will want to look in ctx.FragmentProgram + instead of brw->attribs.FragmentProgram. */ + intel->ctx.FragmentProgram.Current = brw->metaops.fp_tex; + + brw->metaops.fp_tex->Base.LocalParams[ 0 ][ 0 ] = xoff; + brw->metaops.fp_tex->Base.LocalParams[ 0 ][ 1 ] = yoff; + brw->metaops.fp_tex->Base.LocalParams[ 0 ][ 2 ] = 0.0; + brw->metaops.fp_tex->Base.LocalParams[ 0 ][ 3 ] = 0.0; + brw->metaops.fp_tex->Base.LocalParams[ 1 ][ 0 ] = + 1.0 / region->pitch; + brw->metaops.fp_tex->Base.LocalParams[ 1 ][ 1 ] = + -1.0 / region->height; + brw->metaops.fp_tex->Base.LocalParams[ 1 ][ 2 ] = 0.0; + brw->metaops.fp_tex->Base.LocalParams[ 1 ][ 3 ] = 1.0; + + brw->state.dirty.mesa |= _NEW_PROGRAM; } @@ -408,9 +494,11 @@ static void install_meta_state( struct intel_context *intel ) } install_attribs(brw); + meta_no_texture(&brw->intel); meta_flat_shade(&brw->intel); brw->metaops.restore_draw_mask = ctx->DrawBuffer->_ColorDrawBufferMask[0]; + brw->metaops.restore_fp = ctx->FragmentProgram.Current; /* This works without adjusting refcounts. Fix later? */ @@ -429,6 +517,7 @@ static void leave_meta_state( struct intel_context *intel ) restore_attribs(brw); ctx->DrawBuffer->_ColorDrawBufferMask[0] = brw->metaops.restore_draw_mask; + ctx->FragmentProgram.Current = brw->metaops.restore_fp; brw->state.draw_region = brw->metaops.saved_draw_region; brw->state.depth_region = brw->metaops.saved_depth_region; @@ -455,10 +544,11 @@ void brw_init_metaops( struct brw_context *brw ) brw->intel.vtbl.meta_depth_replace = meta_depth_replace; brw->intel.vtbl.meta_color_mask = meta_color_mask; brw->intel.vtbl.meta_no_texture = meta_no_texture; + brw->intel.vtbl.meta_import_pixel_state = meta_import_pixel_state; + brw->intel.vtbl.meta_frame_buffer_texture = meta_frame_buffer_texture; brw->intel.vtbl.meta_draw_region = meta_draw_region; brw->intel.vtbl.meta_draw_quad = meta_draw_quad; - -/* brw->intel.vtbl.meta_texture_blend_replace = meta_texture_blend_replace; */ + brw->intel.vtbl.meta_texture_blend_replace = meta_texture_blend_replace; /* brw->intel.vtbl.meta_tex_rect_source = meta_tex_rect_source; */ /* brw->intel.vtbl.meta_draw_format = set_draw_format; */ } @@ -471,5 +561,6 @@ void brw_destroy_metaops( struct brw_context *brw ) ctx->Driver.DeleteBuffer( ctx, brw->metaops.vbo ); /* ctx->Driver.DeleteProgram( ctx, brw->metaops.fp ); */ +/* ctx->Driver.DeleteProgram( ctx, brw->metaops.fp_tex ); */ /* ctx->Driver.DeleteProgram( ctx, brw->metaops.vp ); */ } diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 6a6c4503c7..d5779680ff 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -88,10 +88,10 @@ static void upload_drawing_rect(struct brw_context *brw) if (brw->intel.numClipRects > 1) return; - x1 = dPriv->x; - y1 = dPriv->y; - x2 = dPriv->x + dPriv->w; - y2 = dPriv->y + dPriv->h; + x1 = brw->intel.pClipRects[0].x1; + y1 = brw->intel.pClipRects[0].y1; + x2 = brw->intel.pClipRects[0].x2; + y2 = brw->intel.pClipRects[0].y2; if (x1 < 0) x1 = 0; if (y1 < 0) y1 = 0; diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 25acdcfe94..10fee944e8 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -519,7 +519,22 @@ struct thread3 struct brw_clip_unit_state { struct thread0 thread0; - struct thread1 thread1; + struct + { + GLuint pad0:7; + GLuint sw_exception_enable:1; + GLuint pad1:3; + GLuint mask_stack_exception_enable:1; + GLuint pad2:1; + GLuint illegal_op_exception_enable:1; + GLuint pad3:2; + GLuint floating_point_mode:1; + GLuint thread_priority:1; + GLuint binding_table_entry_count:8; + GLuint pad4:5; + GLuint single_program_flow:1; + } thread1; + struct thread2 thread2; struct thread3 thread3; @@ -532,8 +547,8 @@ struct brw_clip_unit_state GLuint pad1:1; GLuint urb_entry_allocation_size:5; GLuint pad2:1; - GLuint max_threads:6; /* may be less */ - GLuint pad3:1; + GLuint max_threads:1; /* may be less */ + GLuint pad3:6; } thread4; struct @@ -1322,6 +1337,7 @@ struct brw_instruction GLuint end_of_thread:1; } generic; + GLint d; GLuint ud; } bits3; }; diff --git a/src/mesa/drivers/dri/i965/brw_tex.c b/src/mesa/drivers/dri/i965/brw_tex.c index 8332d869e1..c3ffa9e657 100644 --- a/src/mesa/drivers/dri/i965/brw_tex.c +++ b/src/mesa/drivers/dri/i965/brw_tex.c @@ -36,11 +36,14 @@ #include "simple_list.h" #include "enums.h" #include "image.h" +#include "teximage.h" #include "texstore.h" #include "texformat.h" #include "texmem.h" +#include "intel_context.h" #include "intel_ioctl.h" +#include "intel_regions.h" #include "brw_context.h" #include "brw_defines.h" @@ -179,3 +182,32 @@ void brwInitTextureFuncs( struct dd_function_table *functions ) { functions->ChooseTextureFormat = brwChooseTextureFormat; } + +void brw_FrameBufferTexInit( struct brw_context *brw ) +{ + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + struct intel_region *region = intel->front_region; + struct gl_texture_object *obj; + struct gl_texture_image *img; + + intel->frame_buffer_texobj = obj = + ctx->Driver.NewTextureObject( ctx, (GLuint) -1, GL_TEXTURE_2D ); + + obj->MinFilter = GL_NEAREST; + obj->MagFilter = GL_NEAREST; + + img = ctx->Driver.NewTextureImage( ctx ); + + _mesa_init_teximage_fields( ctx, GL_TEXTURE_2D, img, + region->pitch, region->height, 1, 0, + region->cpp == 4 ? GL_RGBA : GL_RGB ); + + _mesa_set_tex_image( obj, GL_TEXTURE_2D, 0, img ); +} + +void brw_FrameBufferTexDestroy( struct brw_context *brw ) +{ + brw->intel.ctx.Driver.DeleteTexture( &brw->intel.ctx, + brw->intel.frame_buffer_texobj ); +} diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 1353325aff..af1ad0f1ef 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -34,21 +34,15 @@ */ #include "intel_mipmap_tree.h" +#include "intel_tex_layout.h" #include "macros.h" -static GLuint minify( GLuint d ) -{ - return MAX2(1, d>>1); -} - GLboolean brw_miptree_layout( struct intel_mipmap_tree *mt ) { /* XXX: these vary depending on image format: */ /* GLint align_w = 4; */ - GLint align_h = 2; - switch (mt->target) { case GL_TEXTURE_CUBE_MAP: @@ -107,53 +101,10 @@ GLboolean brw_miptree_layout( struct intel_mipmap_tree *mt ) break; } - default: { - GLuint level; - GLuint x = 0; - GLuint y = 0; - GLuint width = mt->width0; - GLuint height = mt->height0; - - mt->pitch = ((mt->width0 * mt->cpp + 3) & ~3) / mt->cpp; - mt->total_height = 0; - - for ( level = mt->first_level ; level <= mt->last_level ; level++ ) { - GLuint img_height; - - intel_miptree_set_level_info(mt, level, 1, - x, y, - width, - mt->compressed ? height/4 : height, 1); - - if (mt->compressed) - img_height = MAX2(1, height/4); - else - img_height = MAX2(align_h, height); - - - /* Because the images are packed better, the final offset - * might not be the maximal one: - */ - mt->total_height = MAX2(mt->total_height, y + img_height); - - /* Layout_below: step right after second mipmap. - */ - if (level == mt->first_level + 1) { - x += mt->pitch / 2; - x = (x + 3) & ~ 3; - } - else { - y += img_height; - y += align_h - 1; - y &= ~(align_h - 1); - } - - width = minify(width); - height = minify(height); - } + default: + i945_miptree_layout_2d(mt); break; } - } DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, mt->pitch, mt->total_height, diff --git a/src/mesa/drivers/dri/i965/brw_vs_tnl.c b/src/mesa/drivers/dri/i965/brw_vs_tnl.c index dc580998e3..0d61092247 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_tnl.c +++ b/src/mesa/drivers/dri/i965/brw_vs_tnl.c @@ -114,7 +114,7 @@ static GLuint translate_texgen( GLboolean enabled, GLenum mode ) static void make_state_key( GLcontext *ctx, struct state_key *key ) { struct brw_context *brw = brw_context(ctx); - struct gl_fragment_program *fp = brw->fragment_program; + const struct gl_fragment_program *fp = brw->fragment_program; GLuint i; /* This now relies on texenvprogram.c being active: @@ -402,7 +402,7 @@ static struct ureg register_const4f( struct tnl_program *p, values[1] = s1; values[2] = s2; values[3] = s3; - idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values ); + idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4 ); return make_ureg(PROGRAM_STATE_VAR, idx); } @@ -1625,7 +1625,7 @@ const struct brw_tracked_state brw_tnl_vertprog = { static void update_active_vertprog( struct brw_context *brw ) { - struct gl_vertex_program *prev = brw->vertex_program; + const struct gl_vertex_program *prev = brw->vertex_program; /* NEW_PROGRAM */ if (brw->attribs.VertexProgram->_Enabled) { diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index a5738e5774..786f30e641 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -67,6 +67,7 @@ static void brw_destroy_context( struct intel_context *intel ) brw_draw_destroy( brw ); brw_ProgramCacheDestroy( ctx ); + brw_FrameBufferTexDestroy( brw ); } /* called from intelDrawBuffer() diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 3e2f2d06b8..0f842d289d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -138,64 +138,75 @@ static void do_wm_prog( struct brw_context *brw, struct brw_fragment_program *fp, struct brw_wm_prog_key *key) { - struct brw_wm_compile c; + struct brw_wm_compile *c; const GLuint *program; GLuint program_size; - memset(&c, 0, sizeof(c)); - memcpy(&c.key, key, sizeof(*key)); + c = brw->wm.compile_data; + if (c == NULL) { + brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data)); + c = brw->wm.compile_data; + } else { + memset(c, 0, sizeof(*brw->wm.compile_data)); + } + memcpy(&c->key, key, sizeof(*key)); - c.fp = fp; - c.env_param = brw->intel.ctx.FragmentProgram.Parameters; + c->fp = fp; + c->env_param = brw->intel.ctx.FragmentProgram.Parameters; /* Augment fragment program. Add instructions for pre- and * post-fragment-program tasks such as interpolation and fogging. */ - brw_wm_pass_fp(&c); + brw_wm_pass_fp(c); /* Translate to intermediate representation. Build register usage * chains. */ - brw_wm_pass0(&c); + brw_wm_pass0(c); /* Dead code removal. */ - brw_wm_pass1(&c); + brw_wm_pass1(c); /* Hal optimization */ - brw_wm_pass_hal (&c); + brw_wm_pass_hal (c); /* Register allocation. */ - c.grf_limit = BRW_WM_MAX_GRF/2; + c->grf_limit = BRW_WM_MAX_GRF/2; /* This is where we start emitting gen4 code: */ - brw_init_compile(&c.func); + brw_init_compile(&c->func); - brw_wm_pass2(&c); + brw_wm_pass2(c); - c.prog_data.total_grf = c.max_wm_grf; - c.prog_data.total_scratch = c.last_scratch ? c.last_scratch + 0x40 : 0; + c->prog_data.total_grf = c->max_wm_grf; + if (c->last_scratch) { + c->prog_data.total_scratch = + c->last_scratch + 0x40; + } else { + c->prog_data.total_scratch = 0; + } /* Emit GEN4 code. */ - brw_wm_emit(&c); + brw_wm_emit(c); /* get the program */ - program = brw_get_program(&c.func, &program_size); + program = brw_get_program(&c->func, &program_size); /* */ brw->wm.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_WM_PROG], - &c.key, - sizeof(c.key), + &c->key, + sizeof(c->key), program, program_size, - &c.prog_data, + &c->prog_data, &brw->wm.prog_data ); } diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 04c7555b9b..bb0aa35615 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -432,7 +432,7 @@ static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c, return src_reg(PROGRAM_STATE_VAR, idx); } - idx = _mesa_add_unnamed_constant( paramList, values ); + idx = _mesa_add_unnamed_constant( paramList, values, 4 ); return src_reg(PROGRAM_STATE_VAR, idx); } diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 4707a709e7..ff5cb31bdd 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -122,7 +122,7 @@ static void upload_wm_unit(struct brw_context *brw ) /* BRW_NEW_FRAGMENT_PROGRAM */ { - struct gl_fragment_program *fp = brw->fragment_program; + const struct gl_fragment_program *fp = brw->fragment_program; if (fp->Base.InputsRead & (1<<FRAG_ATTRIB_WPOS)) wm.wm5.program_uses_depth = 1; /* as far as we can tell */ @@ -168,7 +168,7 @@ static void upload_wm_unit(struct brw_context *brw ) wm.wm5.line_stipple = 1; } - if (INTEL_DEBUG & DEBUG_STATS) + if (INTEL_DEBUG & DEBUG_STATS || intel->stats_wm) wm.wm4.stats_enable = 1; brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm ); diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 5c7dc500ca..d24c618a66 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -239,6 +239,12 @@ static void upload_wm_surfaces(struct brw_context *brw ) brw->wm.bind.surf_ss_offset[i+1] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf ); brw->wm.nr_surfaces = i+2; } + else if( texUnit->_ReallyEnabled && + texUnit->_Current == intel->frame_buffer_texobj ) + { + brw->wm.bind.surf_ss_offset[i+1] = brw->wm.bind.surf_ss_offset[0]; + brw->wm.nr_surfaces = i+2; + } else { brw->wm.bind.surf_ss_offset[i+1] = 0; } diff --git a/src/mesa/drivers/dri/i965/bufmgr.h b/src/mesa/drivers/dri/i965/bufmgr.h index 6932522d3d..e748c0d6d0 100644 --- a/src/mesa/drivers/dri/i965/bufmgr.h +++ b/src/mesa/drivers/dri/i965/bufmgr.h @@ -199,9 +199,11 @@ void *bmFindVirtual( struct intel_context *intel, * For now they can stay, but will likely change/move before final: */ unsigned bmSetFence( struct intel_context * ); +unsigned bmSetFenceLock( struct intel_context * ); unsigned bmLockAndFence( struct intel_context *intel ); int bmTestFence( struct intel_context *, unsigned fence ); void bmFinishFence( struct intel_context *, unsigned fence ); +void bmFinishFenceLock( struct intel_context *, unsigned fence ); void bm_fake_NotifyContendedLockTake( struct intel_context * ); diff --git a/src/mesa/drivers/dri/i965/bufmgr_fake.c b/src/mesa/drivers/dri/i965/bufmgr_fake.c index ed88ab3797..24ee11edd8 100644 --- a/src/mesa/drivers/dri/i965/bufmgr_fake.c +++ b/src/mesa/drivers/dri/i965/bufmgr_fake.c @@ -338,7 +338,6 @@ static int evict_mru( struct intel_context *intel, GLuint *pool ) } - static int check_fenced( struct intel_context *intel ) { struct bufmgr *bm = intel->bm; @@ -1328,11 +1327,21 @@ unsigned bmSetFence( struct intel_context *intel ) return intel->bm->last_fence; } +unsigned bmSetFenceLock( struct intel_context *intel ) +{ + unsigned last; + LOCK(intel->bm); + last = bmSetFence(intel); + UNLOCK(intel->bm); + return last; +} unsigned bmLockAndFence( struct intel_context *intel ) { if (intel->bm->need_fence) { LOCK_HARDWARE(intel); + LOCK(intel->bm); bmSetFence(intel); + UNLOCK(intel->bm); UNLOCK_HARDWARE(intel); } @@ -1350,7 +1359,12 @@ void bmFinishFence( struct intel_context *intel, unsigned fence ) check_fenced(intel); } - +void bmFinishFenceLock( struct intel_context *intel, unsigned fence ) +{ + LOCK(intel->bm); + bmFinishFence(intel, fence); + UNLOCK(intel->bm); +} /* Specifically ignore texture memory sharing. diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index 0974f1f80a..173d1d5b6c 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -66,7 +66,7 @@ void intelCopyBuffer( const __DRIdrawablePrivate *dPriv, intelFlush( &intel->ctx ); - bmFinishFence(intel, intel->last_swap_fence); + bmFinishFenceLock(intel, intel->last_swap_fence); /* The LOCK_HARDWARE is required for the cliprects. Buffer offsets * should work regardless. @@ -155,7 +155,7 @@ void intelCopyBuffer( const __DRIdrawablePrivate *dPriv, intel_batchbuffer_flush( intel->batch ); intel->second_last_swap_fence = intel->last_swap_fence; - intel->last_swap_fence = bmSetFence( intel ); + intel->last_swap_fence = bmSetFenceLock( intel ); UNLOCK_HARDWARE( intel ); if (!rect) @@ -221,6 +221,29 @@ void intelEmitFillBlit( struct intel_context *intel, ADVANCE_BATCH(); } +static GLuint translate_raster_op(GLenum logicop) +{ + switch(logicop) { + case GL_CLEAR: return 0x00; + case GL_AND: return 0x88; + case GL_AND_REVERSE: return 0x44; + case GL_COPY: return 0xCC; + case GL_AND_INVERTED: return 0x22; + case GL_NOOP: return 0xAA; + case GL_XOR: return 0x66; + case GL_OR: return 0xEE; + case GL_NOR: return 0x11; + case GL_EQUIV: return 0x99; + case GL_INVERT: return 0x55; + case GL_OR_REVERSE: return 0xDD; + case GL_COPY_INVERTED: return 0x33; + case GL_OR_INVERTED: return 0xBB; + case GL_NAND: return 0x77; + case GL_SET: return 0xFF; + default: return 0; + } +} + /* Copy BitBlt */ @@ -236,7 +259,8 @@ void intelEmitCopyBlit( struct intel_context *intel, GLboolean dst_tiled, GLshort src_x, GLshort src_y, GLshort dst_x, GLshort dst_y, - GLshort w, GLshort h ) + GLshort w, GLshort h, + GLenum logic_op ) { GLuint CMD, BR13; int dst_y2 = dst_y + h; @@ -244,12 +268,15 @@ void intelEmitCopyBlit( struct intel_context *intel, BATCH_LOCALS; - DBG("%s src:buf(%d)/%d %d,%d dst:buf(%d)/%d %d,%d sz:%dx%d\n", + DBG("%s src:buf(%d)/%d %d,%d dst:buf(%d)/%d %d,%d sz:%dx%d op:%d\n", __FUNCTION__, src_buffer, src_pitch, src_x, src_y, dst_buffer, dst_pitch, dst_x, dst_y, - w,h); + w,h,logic_op); + assert( logic_op - GL_CLEAR >= 0 ); + assert( logic_op - GL_CLEAR < 0x10 ); + src_pitch *= cpp; dst_pitch *= cpp; @@ -257,11 +284,12 @@ void intelEmitCopyBlit( struct intel_context *intel, case 1: case 2: case 3: - BR13 = (0xCC << 16) | (1<<24); + BR13 = (translate_raster_op(logic_op) << 16) | (1<<24); CMD = XY_SRC_COPY_BLT_CMD; break; case 4: - BR13 = (0xCC << 16) | (1<<24) | (1<<25); + BR13 = (translate_raster_op(logic_op) << 16) | (1<<24) | + (1<<25); CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | XY_SRC_COPY_BLT_WRITE_RGB); break; diff --git a/src/mesa/drivers/dri/i965/intel_blit.h b/src/mesa/drivers/dri/i965/intel_blit.h index b15fb1c2b7..8b0cc65243 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.h +++ b/src/mesa/drivers/dri/i965/intel_blit.h @@ -49,7 +49,8 @@ extern void intelEmitCopyBlit( struct intel_context *intel, GLboolean dst_tiled, GLshort srcx, GLshort srcy, GLshort dstx, GLshort dsty, - GLshort w, GLshort h ); + GLshort w, GLshort h, + GLenum logic_op ); extern void intelEmitFillBlit( struct intel_context *intel, GLuint cpp, diff --git a/src/mesa/drivers/dri/i965/intel_context.c b/src/mesa/drivers/dri/i965/intel_context.c index c4c5488cbb..60fcf95892 100644 --- a/src/mesa/drivers/dri/i965/intel_context.c +++ b/src/mesa/drivers/dri/i965/intel_context.c @@ -33,6 +33,7 @@ #include "extensions.h" #include "framebuffer.h" #include "imports.h" +#include "points.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" @@ -70,6 +71,7 @@ int INTEL_DEBUG = (0); #define need_GL_ARB_vertex_buffer_object #define need_GL_ARB_vertex_program #define need_GL_ARB_window_pos +#define need_GL_ARB_occlusion_query #define need_GL_EXT_blend_color #define need_GL_EXT_blend_equation_separate #define need_GL_EXT_blend_func_separate @@ -182,7 +184,8 @@ const struct dri_extension card_extensions[] = { NULL, NULL } }; - +static const struct dri_extension arb_oc_extension = + { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions}; static const struct dri_debug_control debug_control[] = { @@ -241,6 +244,36 @@ void intelFinish( GLcontext *ctx ) bmFinishFence(intel, bmLockAndFence(intel)); } +static void +intelBeginQuery(GLcontext *ctx, GLenum target, struct gl_query_object *q) +{ + struct intel_context *intel = intel_context( ctx ); + GLuint64EXT tmp = 0; + drmI830MMIO io = { + .read_write = MMIO_WRITE, + .reg = MMIO_REGS_PS_DEPTH_COUNT, + .data = &tmp + }; + intel->stats_wm = GL_TRUE; + intelFinish(&intel->ctx); + drmCommandWrite(intel->driFd, DRM_I830_MMIO, &io, sizeof(io)); +} + +static void +intelEndQuery(GLcontext *ctx, GLenum target, struct gl_query_object *q) +{ + struct intel_context *intel = intel_context( ctx ); + drmI830MMIO io = { + .read_write = MMIO_READ, + .reg = MMIO_REGS_PS_DEPTH_COUNT, + .data = &q->Result + }; + intelFinish(&intel->ctx); + drmCommandRead(intel->driFd, DRM_I830_MMIO, &io, sizeof(io)); + q->Ready = GL_TRUE; + intel->stats_wm = GL_FALSE; +} + void intelInitDriverFunctions( struct dd_function_table *functions ) { @@ -250,6 +283,8 @@ void intelInitDriverFunctions( struct dd_function_table *functions ) functions->Finish = intelFinish; functions->GetString = intelGetString; functions->UpdateState = intelInvalidateState; + functions->BeginQuery = intelBeginQuery; + functions->EndQuery = intelEndQuery; /* CopyPixels can be accelerated even with the current memory * manager: @@ -320,6 +355,11 @@ GLboolean intelInitContext( struct intel_context *intel, ctx->Const.MaxPointSizeAA = 3.0; ctx->Const.PointSizeGranularity = 1.0; + /* reinitialize the context point state. + * It depend on constants in __GLcontextRec::Const + */ + _mesa_init_point(ctx); + /* Initialize the software rasterizer and helper modules. */ _swrast_CreateContext( ctx ); _vbo_CreateContext( ctx ); @@ -373,6 +413,9 @@ GLboolean intelInitContext( struct intel_context *intel, driInitExtensions( ctx, card_extensions, GL_TRUE ); + if (intel->intelScreen->drmMinor >= 8) + driInitSingleExtension (ctx, &arb_oc_extension); + INTEL_DEBUG = driParseDebugString( getenv( "INTEL_DEBUG" ), debug_control ); @@ -398,7 +441,7 @@ GLboolean intelInitContext( struct intel_context *intel, intelScreen->cpp, intelScreen->front.pitch / intelScreen->cpp, intelScreen->height, - GL_FALSE); + intelScreen->front.tiled != 0); /* 0: LINEAR */ intel->back_region = @@ -409,7 +452,7 @@ GLboolean intelInitContext( struct intel_context *intel, intelScreen->cpp, intelScreen->back.pitch / intelScreen->cpp, intelScreen->height, - (INTEL_DEBUG & DEBUG_TILE) ? 0 : 1); + intelScreen->back.tiled != 0); /* Still assuming front.cpp == depth.cpp * @@ -425,7 +468,7 @@ GLboolean intelInitContext( struct intel_context *intel, intelScreen->cpp, intelScreen->depth.pitch / intelScreen->cpp, intelScreen->height, - (INTEL_DEBUG & DEBUG_TILE) ? 0 : 1); + intelScreen->depth.tiled != 0); intel_bufferobj_init( intel ); intel->batch = intel_batchbuffer_alloc( intel ); diff --git a/src/mesa/drivers/dri/i965/intel_context.h b/src/mesa/drivers/dri/i965/intel_context.h index 2df8faef28..fe7ee382a1 100644 --- a/src/mesa/drivers/dri/i965/intel_context.h +++ b/src/mesa/drivers/dri/i965/intel_context.h @@ -86,7 +86,6 @@ struct intel_texture_object - struct intel_context { GLcontext ctx; /* the parent class */ @@ -148,9 +147,14 @@ struct intel_context void (*meta_depth_replace)( struct intel_context *intel ); + void (*meta_texture_blend_replace) (struct intel_context * intel); + void (*meta_no_stencil_write)( struct intel_context *intel ); void (*meta_no_depth_write)( struct intel_context *intel ); void (*meta_no_texture)( struct intel_context *intel ); + void (*meta_import_pixel_state) (struct intel_context * intel); + void (*meta_frame_buffer_texture)( struct intel_context *intel, + GLint xoff, GLint yoff ); void (*meta_draw_quad)(struct intel_context *intel, GLfloat x0, GLfloat x1, @@ -173,6 +177,7 @@ struct intel_context GLuint second_last_swap_fence; GLboolean aub_wrap; + GLboolean stats_wm; struct intel_batchbuffer *batch; @@ -218,6 +223,7 @@ struct intel_context int drawY; GLuint numClipRects; /* cliprects for that buffer */ drm_clip_rect_t *pClipRects; + struct gl_texture_object *frame_buffer_texobj; GLboolean scissor; drm_clip_rect_t draw_rect; diff --git a/src/mesa/drivers/dri/i965/intel_ioctl.c b/src/mesa/drivers/dri/i965/intel_ioctl.c index d1f2e3f27c..4da31277ea 100644 --- a/src/mesa/drivers/dri/i965/intel_ioctl.c +++ b/src/mesa/drivers/dri/i965/intel_ioctl.c @@ -43,6 +43,26 @@ #include "drm.h" #include "bufmgr.h" +static int intelWaitIdleLocked( struct intel_context *intel ) +{ + static int in_wait_idle = 0; + unsigned int fence; + + if (!in_wait_idle) { + if (INTEL_DEBUG & DEBUG_SYNC) { + fprintf(stderr, "waiting for idle\n"); + } + + in_wait_idle = 1; + fence = bmSetFence(intel); + intelWaitIrq(intel, fence); + in_wait_idle = 0; + + return bmTestFence(intel, fence); + } else { + return 1; + } +} int intelEmitIrqLocked( struct intel_context *intel ) { @@ -75,7 +95,7 @@ void intelWaitIrq( struct intel_context *intel, int seq ) { if (!intel->no_hw) { drmI830IrqWait iw; - int ret; + int ret, lastdispatch; if (0) fprintf(stderr, "%s %d\n", __FUNCTION__, seq ); @@ -83,11 +103,12 @@ void intelWaitIrq( struct intel_context *intel, int seq ) iw.irq_seq = seq; do { + lastdispatch = intel->sarea->last_dispatch; ret = drmCommandWrite( intel->driFd, DRM_I830_IRQ_WAIT, &iw, sizeof(iw) ); /* This seems quite often to return before it should!?! */ - } while (ret == -EAGAIN || ret == -EINTR || (ret == 0 && seq > intel->sarea->last_dispatch)); + } while (ret == -EAGAIN || ret == -EINTR || (ret == -EBUSY && lastdispatch != intel->sarea->last_dispatch) || (ret == 0 && seq > intel->sarea->last_dispatch)); if ( ret ) { @@ -139,7 +160,11 @@ void intel_batch_ioctl( struct intel_context *intel, UNLOCK_HARDWARE(intel); exit(1); } - } + + if (INTEL_DEBUG & DEBUG_SYNC) { + intelWaitIdleLocked(intel); + } + } } void intel_cmd_ioctl( struct intel_context *intel, @@ -171,5 +196,9 @@ void intel_cmd_ioctl( struct intel_context *intel, UNLOCK_HARDWARE(intel); exit(1); } - } + + if (INTEL_DEBUG & DEBUG_SYNC) { + intelWaitIdleLocked(intel); + } + } } diff --git a/src/mesa/drivers/dri/i965/intel_pixel_copy.c b/src/mesa/drivers/dri/i965/intel_pixel_copy.c index d5d4899452..58dc49505f 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_copy.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_copy.c @@ -80,8 +80,6 @@ intel_check_blit_fragment_ops(GLcontext * ctx) if (ctx->NewState) _mesa_update_state(ctx); - /* Could do logicop with the blitter: - */ return !(ctx->_ImageTransferState || ctx->RenderMode != GL_RENDER || ctx->Color.AlphaEnabled || @@ -92,12 +90,112 @@ intel_check_blit_fragment_ops(GLcontext * ctx) !ctx->Color.ColorMask[1] || !ctx->Color.ColorMask[2] || !ctx->Color.ColorMask[3] || /* can do this! */ - ctx->Color.ColorLogicOpEnabled || /* can do this! */ ctx->Texture._EnabledUnits || - ctx->FragmentProgram._Enabled); + ctx->FragmentProgram._Enabled || + ctx->Color.BlendEnabled); } +/* Doesn't work for overlapping regions. Could do a double copy or + * just fallback. + */ +static GLboolean +do_texture_copypixels(GLcontext * ctx, + GLint srcx, GLint srcy, + GLsizei width, GLsizei height, + GLint dstx, GLint dsty, GLenum type) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_region *dst = intel_drawbuf_region(intel); + struct intel_region *src = copypix_src_region(intel, type); + GLenum src_format; + GLenum src_type; + + DBG("%s %d,%d %dx%d --> %d,%d\n", __FUNCTION__, + srcx, srcy, width, height, dstx, dsty); + + if (!src || !dst || type != GL_COLOR || + ctx->_ImageTransferState || + ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F || + ctx->RenderMode != GL_RENDER || + ctx->Texture._EnabledUnits || + ctx->FragmentProgram._Enabled || + src != dst ) + return GL_FALSE; + + /* Can't handle overlapping regions. Don't have sufficient control + * over rasterization to pull it off in-place. Punt on these for + * now. + * + * XXX: do a copy to a temporary. + */ + if (src->buffer == dst->buffer) { + drm_clip_rect_t srcbox; + drm_clip_rect_t dstbox; + drm_clip_rect_t tmp; + + srcbox.x1 = srcx; + srcbox.y1 = srcy; + srcbox.x2 = srcx + width - 1; + srcbox.y2 = srcy + height - 1; + + dstbox.x1 = dstx; + dstbox.y1 = dsty; + dstbox.x2 = dstx + width - 1; + dstbox.y2 = dsty + height - 1; + + DBG("src %d,%d %d,%d\n", srcbox.x1, srcbox.y1, srcbox.x2, srcbox.y2); + DBG("dst %d,%d %d,%d (%dx%d) (%f,%f)\n", dstbox.x1, dstbox.y1, dstbox.x2, dstbox.y2, + width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY); + + if (intel_intersect_cliprects(&tmp, &srcbox, &dstbox)) { + DBG("%s: regions overlap\n", __FUNCTION__); + return GL_FALSE; + } + } + intelFlush(&intel->ctx); + + intel->vtbl.install_meta_state(intel); + + /* Is this true? Also will need to turn depth testing on according + * to state: + */ + intel->vtbl.meta_no_stencil_write(intel); + intel->vtbl.meta_no_depth_write(intel); + + /* Set the 3d engine to draw into the destination region: + */ + intel->vtbl.meta_draw_region(intel, dst, intel->depth_region); + + intel->vtbl.meta_import_pixel_state(intel); + + if (src->cpp == 2) { + src_format = GL_RGB; + src_type = GL_UNSIGNED_SHORT_5_6_5; + } + else { + src_format = GL_BGRA; + src_type = GL_UNSIGNED_BYTE; + } + + /* Set the frontbuffer up as a large rectangular texture. + */ + intel->vtbl.meta_frame_buffer_texture( intel, srcx - dstx, srcy - dsty ); + + intel->vtbl.meta_texture_blend_replace(intel); + + if (intel->driDrawable->numClipRects) + intel->vtbl.meta_draw_quad( intel, + dstx, dstx + width, + dsty, dsty + height, + ctx->Current.RasterPos[ 2 ], + 0, 0, 0, 0, 0.0, 0.0, 0.0, 0.0 ); + + intel->vtbl.leave_meta_state( intel ); + + DBG("%s: success\n", __FUNCTION__); + return GL_TRUE; +} /** * CopyPixels with the blitter. Don't support zooming, pixel transfer, etc. @@ -210,7 +308,9 @@ do_blit_copypixels(GLcontext * ctx, rect.x1 + delta_x, rect.y1 + delta_y, /* srcx, srcy */ rect.x1, rect.y1, /* dstx, dsty */ - rect.x2 - rect.x1, rect.y2 - rect.y1); + rect.x2 - rect.x1, rect.y2 - rect.y1, + ctx->Color.ColorLogicOpEnabled ? + ctx->Color.LogicOp : GL_COPY); } intel->need_flush = GL_TRUE; @@ -233,6 +333,9 @@ intelCopyPixels(GLcontext * ctx, if (do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type)) return; + if (do_texture_copypixels(ctx, srcx, srcy, width, height, destx, desty, type)) + return; + if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("fallback to _swrast_CopyPixels\n"); diff --git a/src/mesa/drivers/dri/i965/intel_regions.c b/src/mesa/drivers/dri/i965/intel_regions.c index 53f0561237..398b0a0a3b 100644 --- a/src/mesa/drivers/dri/i965/intel_regions.c +++ b/src/mesa/drivers/dri/i965/intel_regions.c @@ -269,7 +269,8 @@ void intel_region_copy( struct intel_context *intel, dst->pitch, dst->buffer, dst_offset, dst->tiled, srcx, srcy, dstx, dsty, - width, height); + width, height, + GL_COPY ); } /* Fill a rectangular sub-region. Need better logic about when to diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 56e6a792fa..8269deba66 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -230,16 +230,19 @@ intelUpdateScreenFromSAREA(intelScreenPrivate *intelScreen, intelScreen->front.pitch = sarea->pitch * intelScreen->cpp; intelScreen->front.handle = sarea->front_handle; intelScreen->front.size = sarea->front_size; + intelScreen->front.tiled = sarea->front_tiled; intelScreen->back.offset = sarea->back_offset; intelScreen->back.pitch = sarea->pitch * intelScreen->cpp; intelScreen->back.handle = sarea->back_handle; intelScreen->back.size = sarea->back_size; - + intelScreen->back.tiled = sarea->back_tiled; + intelScreen->depth.offset = sarea->depth_offset; intelScreen->depth.pitch = sarea->pitch * intelScreen->cpp; intelScreen->depth.handle = sarea->depth_handle; intelScreen->depth.size = sarea->depth_size; + intelScreen->depth.tiled = sarea->depth_tiled; intelScreen->tex.offset = sarea->tex_offset; intelScreen->logTextureGranularity = sarea->log_tex_granularity; @@ -249,6 +252,7 @@ intelUpdateScreenFromSAREA(intelScreenPrivate *intelScreen, intelScreen->rotated.offset = sarea->rotated_offset; intelScreen->rotated.pitch = sarea->rotated_pitch * intelScreen->cpp; intelScreen->rotated.size = sarea->rotated_size; + intelScreen->rotated.tiled = sarea->rotated_tiled; intelScreen->current_rotation = sarea->rotation; #if 0 matrix23Rotate(&intelScreen->rotMatrix, diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h index 094158afd8..bf9a716082 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.h +++ b/src/mesa/drivers/dri/i965/intel_screen.h @@ -42,6 +42,7 @@ typedef struct { char *map; /* memory map */ int offset; /* from start of video mem, in bytes */ int pitch; /* row stride, in pixels */ + unsigned int tiled; } intelRegion; typedef struct diff --git a/src/mesa/drivers/dri/i965/intel_tex_layout.c b/src/mesa/drivers/dri/i965/intel_tex_layout.c new file mode 120000 index 0000000000..fe61b44194 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_tex_layout.c @@ -0,0 +1 @@ +../intel/intel_tex_layout.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c b/src/mesa/drivers/dri/i965/intel_tex_validate.c index 91ae0970a0..cb23b9dd87 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_validate.c +++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c @@ -133,6 +133,9 @@ GLuint intel_finalize_mipmap_tree( struct intel_context *intel, GLuint nr_faces = 0; struct gl_texture_image *firstImage; + if( tObj == intel->frame_buffer_texobj ) + return GL_FALSE; + /* We know/require this is true by now: */ assert(intelObj->base.Complete); diff --git a/src/mesa/drivers/dri/i965/server/i830_common.h b/src/mesa/drivers/dri/i965/server/i830_common.h index e3bbdc7907..f320378c2a 100644 --- a/src/mesa/drivers/dri/i965/server/i830_common.h +++ b/src/mesa/drivers/dri/i965/server/i830_common.h @@ -52,6 +52,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define DRM_I830_INIT_HEAP 0x0a #define DRM_I830_CMDBUFFER 0x0b #define DRM_I830_DESTROY_HEAP 0x0c +#define DRM_I830_MMIO 0x10 typedef struct { enum { @@ -199,5 +200,23 @@ typedef struct { int region; } drmI830MemDestroyHeap; +#define MMIO_READ 0 +#define MMIO_WRITE 1 + +#define MMIO_REGS_IA_PRIMATIVES_COUNT 0 +#define MMIO_REGS_IA_VERTICES_COUNT 1 +#define MMIO_REGS_VS_INVOCATION_COUNT 2 +#define MMIO_REGS_GS_PRIMITIVES_COUNT 3 +#define MMIO_REGS_GS_INVOCATION_COUNT 4 +#define MMIO_REGS_CL_PRIMITIVES_COUNT 5 +#define MMIO_REGS_CL_INVOCATION_COUNT 6 +#define MMIO_REGS_PS_INVOCATION_COUNT 7 +#define MMIO_REGS_PS_DEPTH_COUNT 8 + +typedef struct { + unsigned int read_write:1; + unsigned int reg:31; + void __user *data; +} drmI830MMIO; #endif /* _I830_DRM_H_ */ |