diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 38 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_curbe.c | 76 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu.h | 10 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu_emit.c | 84 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_misc_state.c | 7 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_program.c | 25 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state_dump.c | 20 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vs.h | 7 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vs_constval.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vs_emit.c | 289 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vs_state.c | 8 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vtbl.c | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_emit.c | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_fp.c | 13 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_glsl.c | 178 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_pass0.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_state.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 320 |
18 files changed, 801 insertions, 285 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 01e07c967f..a0b3b06309 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -141,7 +141,8 @@ struct brw_context; #define BRW_NEW_BATCH 0x10000 /** brw->depth_region updated */ #define BRW_NEW_DEPTH_BUFFER 0x20000 -#define BRW_NEW_NR_SURFACES 0x40000 +#define BRW_NEW_NR_WM_SURFACES 0x40000 +#define BRW_NEW_NR_VS_SURFACES 0x80000 struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -159,6 +160,7 @@ struct brw_state_flags { struct brw_vertex_program { struct gl_vertex_program program; GLuint id; + dri_bo *const_buffer; /** Program constant buffer/surface */ }; @@ -168,8 +170,7 @@ struct brw_fragment_program { GLuint id; /**< serial no. to identify frag progs, never re-used */ GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */ - /** Program constant buffer/surface */ - dri_bo *const_buffer; + dri_bo *const_buffer; /** Program constant buffer/surface */ }; @@ -186,7 +187,7 @@ struct brw_wm_prog_data { GLuint total_grf; GLuint total_scratch; - GLuint nr_params; + GLuint nr_params; /**< number of float params/constants */ GLboolean error; /* Pointer to tracked values (only valid once @@ -225,6 +226,7 @@ struct brw_vs_prog_data { GLuint urb_read_length; GLuint total_grf; GLuint outputs_written; + GLuint nr_params; /**< number of float params/constants */ GLuint inputs_read; @@ -245,12 +247,31 @@ struct brw_vs_ouput_sizes { #define BRW_MAX_TEX_UNIT 16 /** - * Size of our surface binding table. + * Size of our surface binding table for the WM. * This contains pointers to the drawing surfaces and current texture - * objects and shader constant buffer (+1). + * objects and shader constant buffers (+2). */ #define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) +/** + * Helpers to convert drawing buffers, textures and constant buffers + * to surface binding table indexes, for WM. + */ +#define SURF_INDEX_DRAW(d) (d) +#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS) +#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 1 + (t)) + +/** + * Size of surface binding table for the VS. + * Only one constant buffer for now. + */ +#define BRW_VS_MAX_SURF 1 + +/** + * Only a VS constant buffer + */ +#define SURF_INDEX_VERT_CONST_BUFFER 0 + enum brw_cache_id { BRW_CC_VP, @@ -557,6 +578,11 @@ struct brw_context dri_bo *prog_bo; dri_bo *state_bo; + + /** Binding table of pointers to surf_bo entries */ + dri_bo *bind_bo; + dri_bo *surf_bo[BRW_VS_MAX_SURF]; + GLuint nr_surfaces; } vs; struct { diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index a6bfb7507e..18b187ed1d 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -45,17 +45,21 @@ #include "brw_util.h" -/* Partition the CURBE between the various users of constant values: +/** + * Partition the CURBE between the various users of constant values: + * Note that vertex and fragment shaders can now fetch constants out + * of constant buffers. We no longer allocatea block of the GRF for + * constants. That greatly reduces the demand for space in the CURBE. + * Some of the comments within are dated... */ static void calculate_curbe_offsets( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; /* CACHE_NEW_WM_PROG */ - GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; + const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; /* BRW_NEW_VERTEX_PROGRAM */ - const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); - GLuint nr_vp_regs = (vp->program.Base.Parameters->NumParameters * 4 + 15) / 16; + const GLuint nr_vp_regs = (brw->vs.prog_data->nr_params + 15) / 16; GLuint nr_clip_regs = 0; GLuint total_regs; @@ -248,7 +252,7 @@ static void prepare_constant_buffer(struct brw_context *brw) /* vertex shader constants */ if (brw->curbe.vs_size) { GLuint offset = brw->curbe.vs_start * 16; - GLuint nr = vp->program.Base.Parameters->NumParameters; + GLuint nr = brw->vs.prog_data->nr_params / 4; _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); @@ -333,37 +337,69 @@ static void prepare_constant_buffer(struct brw_context *brw) /** - * Vertex/fragment shader constants are stored in a pseudo 1D texture. - * This function updates the constants in that buffer. + * Copy Mesa program parameters into given constant buffer. */ static void -update_texture_constant_buffer(struct brw_context *brw) +update_constant_buffer(struct brw_context *brw, + const struct gl_program_parameter_list *params, + dri_bo *const_buffer) { - struct brw_fragment_program *fp = - (struct brw_fragment_program *) brw->fragment_program; - const struct gl_program_parameter_list *params = fp->program.Base.Parameters; const int size = params->NumParameters * 4 * sizeof(GLfloat); - assert(fp->const_buffer); - assert(fp->const_buffer->size >= size); - - /* copy constants into the buffer */ - if (size > 0) { + /* copy Mesa program constants into the buffer */ + if (const_buffer && size > 0) { GLubyte *map; - dri_bo_map(fp->const_buffer, GL_TRUE); - map = fp->const_buffer->virtual; + + assert(const_buffer); + assert(const_buffer->size >= size); + + dri_bo_map(const_buffer, GL_TRUE); + map = const_buffer->virtual; memcpy(map, params->ParameterValues, size); - dri_bo_unmap(fp->const_buffer); + dri_bo_unmap(const_buffer); + + if (0) { + int i; + for (i = 0; i < params->NumParameters; i++) { + float *p = params->ParameterValues[i]; + printf("%d: %f %f %f %f\n", i, p[0], p[1], p[2], p[3]); + } + } } } +/** Copy current vertex program's parameters into the constant buffer */ +static void +update_vertex_constant_buffer(struct brw_context *brw) +{ + struct brw_vertex_program *vp = + (struct brw_vertex_program *) brw->vertex_program; + if (0) { + printf("update VS constants in buffer %p\n", vp->const_buffer); + printf("program %u\n", vp->program.Base.Id); + } + update_constant_buffer(brw, vp->program.Base.Parameters, vp->const_buffer); +} + + +/** Copy current fragment program's parameters into the constant buffer */ +static void +update_fragment_constant_buffer(struct brw_context *brw) +{ + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + update_constant_buffer(brw, fp->program.Base.Parameters, fp->const_buffer); +} + + static void emit_constant_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; GLuint sz = brw->curbe.total_size; - update_texture_constant_buffer(brw); + update_vertex_constant_buffer(brw); + update_fragment_constant_buffer(brw); BEGIN_BATCH(2, IGNORE_CLIPRECTS); if (sz == 0) { diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index d05f2e6c41..62c98bd8bb 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -862,9 +862,17 @@ void brw_dp_READ_4( struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, GLboolean relAddr, - GLuint scratch_offset, + GLuint location, GLuint bind_table_index ); +void brw_dp_READ_4_vs( struct brw_compile *p, + struct brw_reg dest, + GLuint oword, + GLboolean relAddr, + struct brw_reg addrReg, + GLuint location, + GLuint bind_table_index ); + void brw_dp_WRITE_16( struct brw_compile *p, struct brw_reg src, GLuint msg_reg_nr, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 21ce8369db..60ea44f7a9 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -952,7 +952,7 @@ void brw_dp_READ_16( struct brw_compile *p, /** * Read a float[4] vector from the data port Data Cache (const buffer). - * Scratch offset should be a multiple of 16. + * Location (in buffer) should be a multiple of 16. * Used for fetching shader constants. * If relAddr is true, we'll do an indirect fetch using the address register. */ @@ -960,7 +960,7 @@ void brw_dp_READ_4( struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, GLboolean relAddr, - GLuint scratch_offset, + GLuint location, GLuint bind_table_index ) { { @@ -969,18 +969,20 @@ void brw_dp_READ_4( struct brw_compile *p, brw_set_mask_control(p, BRW_MASK_DISABLE); /* set message header global offset field (reg 0, element 2) */ + /* Note that grf[0] will be copied to mrf[1] implicitly by the SEND instr */ brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD), - brw_imm_d(scratch_offset)); + brw_imm_d(location)); brw_pop_insn_state(p); } { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - insn->header.predicate_control = 0; /* XXX */ + insn->header.predicate_control = BRW_PREDICATE_NONE; insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; /* cast dest to a uword[8] vector */ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); @@ -989,7 +991,7 @@ void brw_dp_READ_4( struct brw_compile *p, brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); brw_set_dp_read_message(insn, - bind_table_index, /* binding table index (255=stateless) */ + bind_table_index, 0, /* msg_control (0 means 1 Oword) */ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 0, /* source cache = data cache */ @@ -1000,6 +1002,78 @@ void brw_dp_READ_4( struct brw_compile *p, } +/** + * Read float[4] constant(s) from VS constant buffer. + * For relative addressing, two float[4] constants will be read into 'dest'. + * Otherwise, one float[4] constant will be read into the lower half of 'dest'. + */ +void brw_dp_READ_4_vs(struct brw_compile *p, + struct brw_reg dest, + GLuint oword, + GLboolean relAddr, + struct brw_reg addrReg, + GLuint location, + GLuint bind_table_index) +{ + GLuint msg_reg_nr = 1; + + assert(oword < 2); + /* + printf("vs const read msg, location %u, msg_reg_nr %d\n", + location, msg_reg_nr); + */ + + /* Setup MRF[1] with location/offset into const buffer */ + { + struct brw_reg b; + + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + /*brw_set_access_mode(p, BRW_ALIGN_16);*/ + + /* XXX I think we're setting all the dwords of MRF[1] to 'location'. + * when the docs say only dword[2] should be set. Hmmm. But it works. + */ + b = brw_message_reg(msg_reg_nr); + b = retype(b, BRW_REGISTER_TYPE_UD); + /*b = get_element_ud(b, 2);*/ + if (relAddr) { + brw_ADD(p, b, addrReg, brw_imm_ud(location)); + } + else { + brw_MOV(p, b, brw_imm_ud(location)); + } + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; + /*insn->header.access_mode = BRW_ALIGN_16;*/ + + brw_set_dest(insn, dest); + brw_set_src0(insn, brw_null_reg()); + + brw_set_dp_read_message(insn, + bind_table_index, + oword, /* 0 = lower Oword, 1 = upper Oword */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 0, /* source cache = data cache */ + 1, /* msg_length */ + 1, /* response_length (1 Oword) */ + 0); /* eot */ + } +} + + + void brw_fb_WRITE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 5c94a49f60..9bc5c35139 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -101,6 +101,7 @@ const struct brw_tracked_state brw_drawing_rect = { static void prepare_binding_table_pointers(struct brw_context *brw) { + brw_add_validated_bo(brw, brw->vs.bind_bo); brw_add_validated_bo(brw, brw->wm.bind_bo); } @@ -117,13 +118,11 @@ static void upload_binding_table_pointers(struct brw_context *brw) BEGIN_BATCH(6, IGNORE_CLIPRECTS); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); - OUT_BATCH(0); /* vs */ + OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ OUT_BATCH(0); /* gs */ OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ - OUT_RELOC(brw->wm.bind_bo, - I915_GEM_DOMAIN_SAMPLER, 0, - 0); + OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */ ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 457bc2fc7f..bac69187c1 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -95,6 +95,12 @@ static struct gl_program *brwNewProgram( GLcontext *ctx, static void brwDeleteProgram( GLcontext *ctx, struct gl_program *prog ) { + if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; + struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog); + dri_bo_unreference(brw_fprog->const_buffer); + } + _mesa_delete_program( ctx, prog ); } @@ -111,7 +117,6 @@ static void brwProgramStringNotify( GLcontext *ctx, struct gl_program *prog ) { struct brw_context *brw = brw_context(ctx); - struct intel_context *intel = &brw->intel; if (target == GL_FRAGMENT_PROGRAM_ARB) { struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; @@ -128,24 +133,6 @@ static void brwProgramStringNotify( GLcontext *ctx, brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; newFP->id = brw->program_id++; newFP->isGLSL = brw_wm_is_glsl(fprog); - - /* alloc constant buffer/surface */ - { - const struct gl_program_parameter_list *params = prog->Parameters; - const int size = params->NumParameters * 4 * sizeof(GLfloat); - - /* free old const buffer if too small */ - if (newFP->const_buffer && newFP->const_buffer->size < size) { - dri_bo_unreference(newFP->const_buffer); - newFP->const_buffer = NULL; - } - - if (!newFP->const_buffer) { - newFP->const_buffer = drm_intel_bo_alloc(intel->bufmgr, - "fp_const_buffer", - size, 64); - } - } } else if (target == GL_VERTEX_PROGRAM_ARB) { struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog; diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 5d332d010c..a713262269 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -84,6 +84,19 @@ get_965_surfacetype(unsigned int surfacetype) } } +static const char * +get_965_surface_format(unsigned int surface_format) +{ + switch (surface_format) { + case 0x000: return "r32g32b32a32_float"; + case 0x0c1: return "b8g8r8a8_unorm"; + case 0x100: return "b5g6r5_unorm"; + case 0x102: return "b5g5r5a1_unorm"; + case 0x104: return "b4g4r4a4_unorm"; + default: return "unknown"; + } +} + static void dump_wm_surface_state(struct brw_context *brw) { int i; @@ -95,7 +108,7 @@ static void dump_wm_surface_state(struct brw_context *brw) char name[20]; if (surf_bo == NULL) { - fprintf(stderr, "WM SS%d: NULL\n", i); + fprintf(stderr, " WM SS%d: NULL\n", i); continue; } dri_bo_map(surf_bo, GL_FALSE); @@ -103,8 +116,9 @@ static void dump_wm_surface_state(struct brw_context *brw) surf = (struct brw_surface_state *)(surf_bo->virtual); sprintf(name, "WM SS%d", i); - state_out(name, surf, surfoff, 0, "%s\n", - get_965_surfacetype(surf->ss0.surface_type)); + state_out(name, surf, surfoff, 0, "%s %s\n", + get_965_surfacetype(surf->ss0.surface_type), + get_965_surface_format(surf->ss0.surface_format)); state_out(name, surf, surfoff, 1, "offset\n"); state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n", surf->ss2.width + 1, surf->ss2.height + 1, surf->ss2.mip_count); diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 99d0e93722..d20cf78b8a 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -75,6 +75,13 @@ struct brw_vs_compile { struct brw_reg userplane[6]; + /** using a real constant buffer? */ + GLboolean use_const_buffer; + /** we may need up to 3 constants per instruction (if use_const_buffer) */ + struct { + GLint index; + struct brw_reg reg; + } current_const[3]; }; void brw_vs_emit( struct brw_vs_compile *c ); diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c index d29eb17f8c..2637344b48 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_constval.c +++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c @@ -96,7 +96,7 @@ static GLubyte get_active( struct tracker *t, struct prog_src_register src ) { GLuint i; - GLubyte active = src.NegateBase; /* NOTE! */ + GLubyte active = src.Negate; /* NOTE! */ if (src.RelAddr) return 0xf; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 0d6c6ab9a8..524f1211ce 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -38,14 +38,44 @@ #include "brw_vs.h" +static struct brw_reg get_tmp( struct brw_vs_compile *c ) +{ + struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} -/* Do things as simply as possible. Allocate and populate all regs +static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp ) +{ + if (tmp.nr == c->last_tmp-1) + c->last_tmp--; +} + +static void release_tmps( struct brw_vs_compile *c ) +{ + c->last_tmp = c->first_tmp; +} + + +/** + * Preallocate GRF register before code emit. + * Do things as simply as possible. Allocate and populate all regs * ahead of time. */ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { GLuint i, reg = 0, mrf; - GLuint nr_params; + +#if 0 + if (c->vp->program.Base.Parameters->NumParameters >= 6) + c->use_const_buffer = 1; + else +#endif + c->use_const_buffer = GL_FALSE; + /*printf("use_const_buffer = %d\n", c->use_const_buffer);*/ /* r0 -- reserved as usual */ @@ -66,13 +96,22 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* Vertex program parameters from curbe: */ - nr_params = c->vp->program.Base.Parameters->NumParameters; - for (i = 0; i < nr_params; i++) { - c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); - } - reg += (nr_params + 1) / 2; + if (c->use_const_buffer) { + /* get constants from a real constant buffer */ + c->prog_data.curb_read_length = 0; + c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */ + } + else { + /* use a section of the GRF for constants */ + GLuint nr_params = c->vp->program.Base.Parameters->NumParameters; + for (i = 0; i < nr_params; i++) { + c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); + } + reg += (nr_params + 1) / 2; + c->prog_data.curb_read_length = reg - 1; - c->prog_data.curb_read_length = reg - 1; + c->prog_data.nr_params = nr_params * 4; + } /* Allocate input regs: */ @@ -133,6 +172,14 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) reg++; } + if (c->use_const_buffer) { + for (i = 0; i < 3; i++) { + c->current_const[i].index = -1; + c->current_const[i].reg = brw_vec8_grf(reg, 0); + reg++; + } + } + for (i = 0; i < 128; i++) { if (c->output_regs[i].used_in_src) { c->output_regs[i].reg = brw_vec8_grf(reg, 0); @@ -165,28 +212,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) } -static struct brw_reg get_tmp( struct brw_vs_compile *c ) -{ - struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0); - - if (++c->last_tmp > c->prog_data.total_grf) - c->prog_data.total_grf = c->last_tmp; - - return tmp; -} - -static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp ) -{ - if (tmp.nr == c->last_tmp-1) - c->last_tmp--; -} - -static void release_tmps( struct brw_vs_compile *c ) -{ - c->last_tmp = c->first_tmp; -} - - /** * If an instruction uses a temp reg both as a src and the dest, we * sometimes need to allocate an intermediate temporary. @@ -633,6 +658,8 @@ static void emit_lit_noalias( struct brw_vs_compile *c, } brw_ENDIF(p, if_insn); + + release_tmp(c, tmp); } static void emit_lrp_noalias(struct brw_vs_compile *c, @@ -673,13 +700,83 @@ static void emit_nrm( struct brw_vs_compile *c, } +static struct brw_reg +get_constant(struct brw_vs_compile *c, + const struct prog_instruction *inst, + GLuint argIndex) +{ + const struct prog_src_register *src = &inst->SrcReg[argIndex]; + struct brw_compile *p = &c->func; + struct brw_reg const_reg; + struct brw_reg const2_reg; + + assert(argIndex < 3); + + if (c->current_const[argIndex].index != src->Index || src->RelAddr) { + struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; + + c->current_const[argIndex].index = src->Index; + +#if 0 + printf(" fetch const[%d] for arg %d into reg %d\n", + src->Index, argIndex, c->current_const[argIndex].reg.nr); +#endif + /* need to fetch the constant now */ + brw_dp_READ_4_vs(p, + c->current_const[argIndex].reg,/* writeback dest */ + 0, /* oword */ + src->RelAddr, /* relative indexing? */ + addrReg, /* address register */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ + ); + + if (src->RelAddr) { + /* second read */ + const2_reg = get_tmp(c); + + /* use upper half of address reg for second read */ + addrReg = stride(addrReg, 0, 4, 0); + addrReg.subnr = 16; + + brw_dp_READ_4_vs(p, + const2_reg, /* writeback dest */ + 1, /* oword */ + src->RelAddr, /* relative indexing? */ + addrReg, /* address register */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER + ); + } + } + + const_reg = c->current_const[argIndex].reg; + + if (src->RelAddr) { + /* merge the two Owords into the constant register */ + /* const_reg[7..4] = const2_reg[7..4] */ + brw_MOV(p, + suboffset(stride(const_reg, 0, 4, 1), 4), + suboffset(stride(const2_reg, 0, 4, 1), 4)); + release_tmp(c, const2_reg); + } + else { + /* replicate lower four floats into upper half (to get XYZWXYZW) */ + const_reg = stride(const_reg, 0, 4, 0); + const_reg.subnr = 0; + } + + return const_reg; +} + + + /* TODO: relative addressing! */ static struct brw_reg get_reg( struct brw_vs_compile *c, gl_register_file file, GLuint index ) { - switch (file) { case PROGRAM_TEMPORARY: case PROGRAM_INPUT: @@ -708,13 +805,17 @@ static struct brw_reg get_reg( struct brw_vs_compile *c, } +/** + * Indirect addressing: get reg[[arg] + offset]. + */ static struct brw_reg deref( struct brw_vs_compile *c, struct brw_reg arg, GLint offset) { struct brw_compile *p = &c->func; struct brw_reg tmp = vec4(get_tmp(c)); - struct brw_reg vp_address = retype(vec1(get_reg(c, PROGRAM_ADDRESS, 0)), BRW_REGISTER_TYPE_UW); + struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0]; + struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW); GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16; struct brw_reg indirect = brw_vec4_indirect(0,0); @@ -735,10 +836,67 @@ static struct brw_reg deref( struct brw_vs_compile *c, brw_pop_insn_state(p); } + /* NOTE: tmp not released */ return vec8(tmp); } +/** + * Get brw reg corresponding to the instruction's [argIndex] src reg. + * TODO: relative addressing! + */ +static struct brw_reg +get_src_reg( struct brw_vs_compile *c, + const struct prog_instruction *inst, + GLuint argIndex ) +{ + const GLuint file = inst->SrcReg[argIndex].File; + const GLint index = inst->SrcReg[argIndex].Index; + const GLboolean relAddr = inst->SrcReg[argIndex].RelAddr; + + switch (file) { + case PROGRAM_TEMPORARY: + case PROGRAM_INPUT: + case PROGRAM_OUTPUT: + if (relAddr) { + return deref(c, c->regs[file][0], index); + } + else { + assert(c->regs[file][index].nr != 0); + return c->regs[file][index]; + } + + case PROGRAM_STATE_VAR: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + if (c->use_const_buffer) { + return get_constant(c, inst, argIndex); + } + else if (relAddr) { + return deref(c, c->regs[PROGRAM_STATE_VAR][0], index); + } + else { + assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0); + return c->regs[PROGRAM_STATE_VAR][index]; + } + case PROGRAM_ADDRESS: + assert(index == 0); + return c->regs[file][index]; + + case PROGRAM_UNDEFINED: + /* this is a normal case since we loop over all three src args */ + return brw_null_reg(); + + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_WRITE_ONLY: + default: + assert(0); + return brw_null_reg(); + } +} + + static void emit_arl( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0 ) @@ -750,30 +908,31 @@ static void emit_arl( struct brw_vs_compile *c, if (need_tmp) tmp = get_tmp(c); - brw_RNDD(p, tmp, arg0); - brw_MUL(p, dst, tmp, brw_imm_d(16)); + brw_RNDD(p, tmp, arg0); /* tmp = round(arg0) */ + brw_MUL(p, dst, tmp, brw_imm_d(16)); /* dst = tmp * 16 */ if (need_tmp) release_tmp(c, tmp); } -/* Will return mangled results for SWZ op. The emit_swz() function +/** + * Return the brw reg for the given instruction's src argument. + * Will return mangled results for SWZ op. The emit_swz() function * ignores this result and recalculates taking extended swizzles into * account. */ static struct brw_reg get_arg( struct brw_vs_compile *c, - struct prog_src_register *src ) + const struct prog_instruction *inst, + GLuint argIndex ) { + const struct prog_src_register *src = &inst->SrcReg[argIndex]; struct brw_reg reg; if (src->File == PROGRAM_UNDEFINED) return brw_null_reg(); - if (src->RelAddr) - reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index); - else - reg = get_reg(c, src->File, src->Index); + reg = get_src_reg(c, inst, argIndex); /* Convert 3-bit swizzle to 2-bit. */ @@ -784,16 +943,38 @@ static struct brw_reg get_arg( struct brw_vs_compile *c, /* Note this is ok for non-swizzle instructions: */ - reg.negate = src->NegateBase ? 1 : 0; + reg.negate = src->Negate ? 1 : 0; return reg; } +/** + * Get brw register for the given program dest register. + */ static struct brw_reg get_dst( struct brw_vs_compile *c, struct prog_dst_register dst ) { - struct brw_reg reg = get_reg(c, dst.File, dst.Index); + struct brw_reg reg; + + switch (dst.File) { + case PROGRAM_TEMPORARY: + case PROGRAM_OUTPUT: + assert(c->regs[dst.File][dst.Index].nr != 0); + reg = c->regs[dst.File][dst.Index]; + break; + case PROGRAM_ADDRESS: + assert(dst.Index == 0); + reg = c->regs[dst.File][dst.Index]; + break; + case PROGRAM_UNDEFINED: + /* we may hit this for OPCODE_END, OPCODE_KIL, etc */ + reg = brw_null_reg(); + break; + default: + assert(0); + reg = brw_null_reg(); + } reg.dw1.bits.writemask = dst.WriteMask; @@ -803,14 +984,16 @@ static struct brw_reg get_dst( struct brw_vs_compile *c, static void emit_swz( struct brw_vs_compile *c, struct brw_reg dst, - struct prog_src_register src ) + const struct prog_instruction *inst) { + const GLuint argIndex = 0; + const struct prog_src_register src = inst->SrcReg[argIndex]; struct brw_compile *p = &c->func; GLuint zeros_mask = 0; GLuint ones_mask = 0; GLuint src_mask = 0; GLubyte src_swz[4]; - GLboolean need_tmp = (src.NegateBase && + GLboolean need_tmp = (src.Negate && dst.file != BRW_GENERAL_REGISTER_FILE); struct brw_reg tmp = dst; GLuint i; @@ -844,10 +1027,7 @@ static void emit_swz( struct brw_vs_compile *c, if (src_mask) { struct brw_reg arg0; - if (src.RelAddr) - arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index); - else - arg0 = get_reg(c, src.File, src.Index); + arg0 = get_src_reg(c, inst, argIndex); arg0 = brw_swizzle(arg0, src_swz[0], src_swz[1], @@ -862,8 +1042,8 @@ static void emit_swz( struct brw_vs_compile *c, if (ones_mask) brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1)); - if (src.NegateBase) - brw_MOV(p, brw_writemask(tmp, src.NegateBase), negate(tmp)); + if (src.Negate) + brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp)); if (need_tmp) { brw_MOV(p, dst, tmp); @@ -1043,6 +1223,11 @@ void brw_vs_emit(struct brw_vs_compile *c ) struct brw_reg args[3], dst; GLuint i; +#if 0 + printf("%d: ", insn); + _mesa_print_instruction(inst); +#endif + /* Get argument regs. SWZ is special and does this itself. */ if (inst->Opcode != OPCODE_SWZ) @@ -1053,7 +1238,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) args[i] = c->output_regs[index].reg; else - args[i] = get_arg(c, src); + args[i] = get_arg(c, inst, i); } /* Get dest regs. Note that it is possible for a reg to be both @@ -1181,7 +1366,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) /* The args[0] value can't be used here as it won't have * correctly encoded the full swizzle: */ - emit_swz(c, dst, inst->SrcReg[0] ); + emit_swz(c, dst, inst); break; case OPCODE_TRUNC: /* round toward zero */ diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index 1a63766ea1..3d29538843 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -44,6 +44,8 @@ struct brw_vs_unit_key { unsigned int curbe_offset; unsigned int nr_urb_entries, urb_size; + + unsigned int nr_surfaces; }; static void @@ -62,6 +64,9 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key) key->nr_urb_entries = brw->urb.nr_vs_entries; key->urb_size = brw->urb.vsize; + /* BRW_NEW_NR_VS_SURFACES */ + key->nr_surfaces = brw->vs.nr_surfaces; + /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ if (ctx->Transform.ClipPlanesEnabled) { /* Note that we read in the userclip planes as well, hence @@ -92,6 +97,8 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) * brw_urb_WRITE() results. */ vs.thread1.single_program_flow = 0; + vs.thread1.binding_table_entry_count = key->nr_surfaces; + vs.thread3.urb_entry_read_length = key->urb_entry_read_length; vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length; vs.thread3.dispatch_grf_start_reg = 1; @@ -158,6 +165,7 @@ const struct brw_tracked_state brw_vs_unit = { .dirty = { .mesa = _NEW_TRANSFORM, .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_NR_VS_SURFACES | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_VS_PROG }, diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 960bbb311e..ba03afd6c1 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -79,6 +79,7 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->curbe.curbe_bo); dri_bo_release(&brw->vs.prog_bo); dri_bo_release(&brw->vs.state_bo); + dri_bo_release(&brw->vs.bind_bo); dri_bo_release(&brw->gs.prog_bo); dri_bo_release(&brw->gs.state_bo); dri_bo_release(&brw->clip.prog_bo); diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index d65b1332c6..72fc21d2eb 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -742,7 +742,7 @@ static void emit_tex( struct brw_wm_compile *c, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */ + SURF_INDEX_TEXTURE(inst->tex_unit), inst->tex_unit, /* sampler */ inst->writemask, (inst->tex_shadow ? @@ -791,7 +791,7 @@ static void emit_txb( struct brw_wm_compile *c, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */ + SURF_INDEX_TEXTURE(inst->tex_unit), inst->tex_unit, /* sampler */ inst->writemask, BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index a7f5f1b9a2..1798d842c7 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -80,9 +80,8 @@ static struct prog_src_register src_reg(GLuint file, GLuint idx) reg.Index = idx; reg.Swizzle = SWIZZLE_NOOP; reg.RelAddr = 0; - reg.NegateBase = 0; + reg.Negate = NEGATE_NONE; reg.Abs = 0; - reg.NegateAbs = 0; return reg; } @@ -569,7 +568,7 @@ static void precalc_dst( struct brw_wm_compile *c, src_undef(), src_undef()); /* Avoid letting negation flag of src0 affect our 1 constant. */ - swz->SrcReg[0].NegateBase &= ~NEGATE_X; + swz->SrcReg[0].Negate &= ~NEGATE_X; } if (dst.WriteMask & WRITEMASK_W) { /* dst.w = mov src1.w @@ -604,7 +603,7 @@ static void precalc_lit( struct brw_wm_compile *c, src_undef(), src_undef()); /* Avoid letting the negation flag of src0 affect our 1 constant. */ - swz->SrcReg[0].NegateBase = 0; + swz->SrcReg[0].Negate = NEGATE_NONE; } if (dst.WriteMask & WRITEMASK_YZ) { @@ -651,7 +650,7 @@ static void precalc_tex( struct brw_wm_compile *c, src0, src_undef(), src_undef()); - out->SrcReg[0].NegateBase = 0; + out->SrcReg[0].Negate = NEGATE_NONE; out->SrcReg[0].Abs = 1; /* tmp0 = MAX(coord.X, coord.Y) */ @@ -1050,14 +1049,14 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) case OPCODE_ABS: out = emit_insn(c, inst); out->Opcode = OPCODE_MOV; - out->SrcReg[0].NegateBase = 0; + out->SrcReg[0].Negate = NEGATE_NONE; out->SrcReg[0].Abs = 1; break; case OPCODE_SUB: out = emit_insn(c, inst); out->Opcode = OPCODE_ADD; - out->SrcReg[1].NegateBase ^= 0xf; + out->SrcReg[1].Negate ^= NEGATE_XYZW; break; case OPCODE_SCS: diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 575cd45d57..22e17622c6 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -56,7 +56,7 @@ static void set_reg(struct brw_wm_compile *c, int file, int index, * Examine instruction's write mask to find index of first component * enabled for writing. */ -static int get_scalar_dst_index(struct prog_instruction *inst) +static int get_scalar_dst_index(const struct prog_instruction *inst) { int i; for (i = 0; i < 4; i++) @@ -254,14 +254,15 @@ static void prealloc_reg(struct brw_wm_compile *c) * XXX alloc these on demand! */ if (c->use_const_buffer) { - c->current_const[0].reg = alloc_tmp(c); - c->current_const[1].reg = alloc_tmp(c); - c->current_const[2].reg = alloc_tmp(c); + for (i = 0; i < 3; i++) { + c->current_const[i].index = -1; + c->current_const[i].reg = alloc_tmp(c); + } } - /* +#if 0 printf("USE CONST BUFFER? %d\n", c->use_const_buffer); printf("AFTER PRE_ALLOC, reg_index = %d\n", c->reg_index); - */ +#endif } @@ -283,14 +284,12 @@ static void fetch_constants(struct brw_wm_compile *c, src->File == PROGRAM_CONSTANT || src->File == PROGRAM_UNIFORM) { if (c->current_const[i].index != src->Index) { - c->current_const[i].index = src->Index; - /*c->current_const[i].reg = alloc_tmp(c);*/ - /* +#if 0 printf(" fetch const[%d] for arg %d into reg %d\n", src->Index, i, c->current_const[i].reg.nr); - */ +#endif /* need to fetch the constant now */ brw_dp_READ_4(p, @@ -298,28 +297,8 @@ static void fetch_constants(struct brw_wm_compile *c, 1, /* msg_reg */ src->RelAddr, /* relative indexing? */ 16 * src->Index, /* byte offset */ - BRW_WM_MAX_SURF - 1 /* binding table index */ + SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */ ); - -#if 0 - /* dependency stall */ - { - int response_length = 1; - int mark = mark_tmps( c ); - struct brw_reg src = c->current_const[i].reg; - struct brw_reg tmp = alloc_tmp(c); - - /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } - */ - brw_push_insn_state(p); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, tmp, src); - brw_MOV(p, src, tmp); - brw_pop_insn_state(p); - - release_tmps( c, mark ); - } -#endif } } } @@ -361,18 +340,18 @@ get_src_reg_const(struct brw_wm_compile *c, const_reg = stride(const_reg, 0, 1, 0); const_reg.subnr = component * 4; - if (src->NegateBase) + if (src->Negate & (1 << component)) const_reg = negate(const_reg); if (src->Abs) const_reg = brw_abs(const_reg); - /* - printf(" form const[%d] for arg %d, comp %d, reg %d\n", +#if 0 + printf(" form const[%d].%d for arg %d, reg %d\n", c->current_const[srcRegIndex].index, - srcRegIndex, component, + srcRegIndex, const_reg.nr); - */ +#endif return const_reg; } @@ -398,7 +377,7 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, else { /* other type of source register */ return get_reg(c, src->File, src->Index, component, nr, - src->NegateBase, src->Abs); + src->Negate, src->Abs); } } @@ -423,11 +402,13 @@ static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c, const GLfloat *param = c->fp->program.Base.Parameters->ParameterValues[src->Index]; GLfloat value = param[component]; - if (src->NegateBase) + if (src->Negate & (1 << channel)) value = -value; if (src->Abs) value = FABSF(value); - /*printf(" form imm reg %f\n", value);*/ +#if 0 + printf(" form immed value %f for chan %d\n", value, channel); +#endif return brw_imm_f(value); } else { @@ -501,7 +482,7 @@ static void invoke_subroutine( struct brw_wm_compile *c, } static void emit_abs( struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -518,7 +499,7 @@ static void emit_abs( struct brw_wm_compile *c, } static void emit_trunc( struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -536,7 +517,7 @@ static void emit_trunc( struct brw_wm_compile *c, } static void emit_mov( struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -546,7 +527,9 @@ static void emit_mov( struct brw_wm_compile *c, if (mask & (1<<i)) { struct brw_reg src, dst; dst = get_dst_reg(c, inst, i); - src = get_src_reg_imm(c, inst, 0, i); + /* XXX some moves from immediate value don't work reliably!!! */ + /*src = get_src_reg_imm(c, inst, 0, i);*/ + src = get_src_reg(c, inst, 0, i); brw_MOV(p, dst, src); } } @@ -554,7 +537,7 @@ static void emit_mov( struct brw_wm_compile *c, } static void emit_pixel_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); @@ -584,7 +567,7 @@ static void emit_pixel_xy(struct brw_wm_compile *c, } static void emit_delta_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg dst0, dst1, src0, src1; @@ -644,7 +627,7 @@ static void fire_fb_write( struct brw_wm_compile *c, } static void emit_fb_write(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; int nr = 2; @@ -713,7 +696,7 @@ static void emit_fb_write(struct brw_wm_compile *c, } static void emit_pixel_w( struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -743,7 +726,7 @@ static void emit_pixel_w( struct brw_wm_compile *c, } static void emit_linterp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -772,7 +755,7 @@ static void emit_linterp(struct brw_wm_compile *c, } static void emit_cinterp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -798,7 +781,7 @@ static void emit_cinterp(struct brw_wm_compile *c, } static void emit_pinterp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -832,7 +815,7 @@ static void emit_pinterp(struct brw_wm_compile *c, /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */ static void emit_frontfacing(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); @@ -861,7 +844,7 @@ static void emit_frontfacing(struct brw_wm_compile *c, } static void emit_xpd(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -886,7 +869,7 @@ static void emit_xpd(struct brw_wm_compile *c, } static void emit_dp3(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg src0[3], src1[3], dst; int i; @@ -905,7 +888,7 @@ static void emit_dp3(struct brw_wm_compile *c, } static void emit_dp4(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg src0[4], src1[4], dst; int i; @@ -924,7 +907,7 @@ static void emit_dp4(struct brw_wm_compile *c, } static void emit_dph(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg src0[4], src1[4], dst; int i; @@ -948,7 +931,7 @@ static void emit_dph(struct brw_wm_compile *c, * register's X, Y, Z and W channels (subject to writemasking of course). */ static void emit_math1(struct brw_wm_compile *c, - struct prog_instruction *inst, GLuint func) + const struct prog_instruction *inst, GLuint func) { struct brw_compile *p = &c->func; struct brw_reg src0, dst, tmp; @@ -985,43 +968,43 @@ static void emit_math1(struct brw_wm_compile *c, } static void emit_rcp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_INV); } static void emit_rsq(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); } static void emit_sin(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); } static void emit_cos(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_COS); } static void emit_ex2(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); } static void emit_lg2(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); } static void emit_add(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, dst; @@ -1040,7 +1023,7 @@ static void emit_add(struct brw_wm_compile *c, } static void emit_arl(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, addr_reg; @@ -1053,7 +1036,7 @@ static void emit_arl(struct brw_wm_compile *c, } static void emit_sub(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, dst; @@ -1072,7 +1055,7 @@ static void emit_sub(struct brw_wm_compile *c, } static void emit_mul(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, dst; @@ -1091,7 +1074,7 @@ static void emit_mul(struct brw_wm_compile *c, } static void emit_frc(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, dst; @@ -1110,7 +1093,7 @@ static void emit_frc(struct brw_wm_compile *c, } static void emit_flr(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, dst; @@ -1177,7 +1160,7 @@ static void emit_min_max(struct brw_wm_compile *c, } static void emit_pow(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst, src0, src1; @@ -1199,7 +1182,7 @@ static void emit_pow(struct brw_wm_compile *c, } static void emit_lrp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1252,7 +1235,7 @@ static void emit_kil(struct brw_wm_compile *c) } static void emit_mad(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1275,7 +1258,7 @@ static void emit_mad(struct brw_wm_compile *c, } static void emit_sop(struct brw_wm_compile *c, - struct prog_instruction *inst, GLuint cond) + const struct prog_instruction *inst, GLuint cond) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1299,43 +1282,43 @@ static void emit_sop(struct brw_wm_compile *c, } static void emit_slt(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_L); } static void emit_sle(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_LE); } static void emit_sgt(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_G); } static void emit_sge(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_GE); } static void emit_seq(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_EQ); } static void emit_sne(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_NEQ); } static void emit_ddx(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1362,7 +1345,7 @@ static void emit_ddx(struct brw_wm_compile *c, } static void emit_ddy(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1505,7 +1488,7 @@ static void noise1_sub( struct brw_wm_compile *c ) { } static void emit_noise1( struct brw_wm_compile *c, - struct prog_instruction *inst ) + const struct prog_instruction *inst ) { struct brw_compile *p = &c->func; struct brw_reg src, param, dst; @@ -1675,7 +1658,7 @@ static void noise2_sub( struct brw_wm_compile *c ) { } static void emit_noise2( struct brw_wm_compile *c, - struct prog_instruction *inst ) + const struct prog_instruction *inst ) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, param0, param1, dst; @@ -1978,7 +1961,7 @@ static void noise3_sub( struct brw_wm_compile *c ) { } static void emit_noise3( struct brw_wm_compile *c, - struct prog_instruction *inst ) + const struct prog_instruction *inst ) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, src2, param0, param1, param2, dst; @@ -2401,7 +2384,7 @@ static void noise4_sub( struct brw_wm_compile *c ) } static void emit_noise4( struct brw_wm_compile *c, - struct prog_instruction *inst ) + const struct prog_instruction *inst ) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, src2, src3, param0, param1, param2, param3, dst; @@ -2443,7 +2426,7 @@ static void emit_noise4( struct brw_wm_compile *c, } static void emit_wpos_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -2479,7 +2462,7 @@ static void emit_wpos_xy(struct brw_wm_compile *c, BIAS on SIMD8 not working yet... */ static void emit_txb(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; @@ -2517,7 +2500,7 @@ static void emit_txb(struct brw_wm_compile *c, retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ 1, /* msg_reg_nr */ retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ - unit + MAX_DRAW_BUFFERS, /* surface */ + SURF_INDEX_TEXTURE(unit), unit, /* sampler */ inst->DstReg.WriteMask, /* writemask */ BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, /* msg_type */ @@ -2528,7 +2511,7 @@ static void emit_txb(struct brw_wm_compile *c, static void emit_tex(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; @@ -2581,7 +2564,7 @@ static void emit_tex(struct brw_wm_compile *c, retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ 1, /* msg_reg_nr */ retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ - unit + MAX_DRAW_BUFFERS, /* surface */ + SURF_INDEX_TEXTURE(unit), unit, /* sampler */ inst->DstReg.WriteMask, /* writemask */ BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, /* msg_type */ @@ -2618,22 +2601,22 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); for (i = 0; i < c->nr_fp_insns; i++) { - struct prog_instruction *inst = &c->prog_instructions[i]; + const struct prog_instruction *inst = &c->prog_instructions[i]; - if (inst->CondUpdate) - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - else - brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); - - /* +#if 0 _mesa_printf("Inst %d: ", i); _mesa_print_instruction(inst); - */ +#endif /* fetch any constants that this instruction needs */ if (c->use_const_buffer) fetch_constants(c, inst); + if (inst->CondUpdate) + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + else + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); + switch (inst->Opcode) { case WM_PIXELXY: emit_pixel_xy(c, inst); @@ -2819,6 +2802,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) break; case OPCODE_BGNLOOP: + /* XXX may need to invalidate the current_constant regs */ loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8); break; case OPCODE_BRK: diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index 2debd0678a..92142764f5 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -322,7 +322,7 @@ static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c, newref->value->lastuse = newref; } - if (src.NegateBase & (1<<i)) + if (src.Negate & (1 << i)) newref->hw_reg.negate ^= 1; if (src.Abs) { diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 58fa6aaf8f..67b41173fb 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -290,7 +290,7 @@ const struct brw_tracked_state brw_wm_unit = { .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_CURBE_OFFSETS | - BRW_NEW_NR_SURFACES), + BRW_NEW_NR_WM_SURFACES), .cache = (CACHE_NEW_WM_PROG | CACHE_NEW_SAMPLER) diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index e7d55d5dbd..71840d1e4e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -38,7 +38,7 @@ #include "intel_mipmap_tree.h" #include "intel_batchbuffer.h" #include "intel_tex.h" - +#include "intel_fbo.h" #include "brw_context.h" #include "brw_state.h" @@ -176,7 +176,11 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, } } -struct brw_wm_surface_key { + +/** + * Use same key for WM and VS surfaces. + */ +struct brw_surface_key { GLenum target, depthmode; dri_bo *bo; GLint format, internal_format; @@ -187,6 +191,7 @@ struct brw_wm_surface_key { GLuint offset; }; + static void brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) { @@ -208,7 +213,7 @@ brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) static dri_bo * brw_create_texture_surface( struct brw_context *brw, - struct brw_wm_surface_key *key ) + struct brw_surface_key *key ) { struct brw_surface_state surf; dri_bo *bo; @@ -287,8 +292,8 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; struct intel_texture_object *intelObj = intel_texture_object(tObj); struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; - struct brw_wm_surface_key key; - const GLuint j = MAX_DRAW_BUFFERS + unit; + struct brw_surface_key key; + const GLuint surf = SURF_INDEX_TEXTURE(unit); memset(&key, 0, sizeof(key)); @@ -315,25 +320,25 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.cpp = intelObj->mt->cpp; key.tiling = intelObj->mt->region->tiling; - dri_bo_unreference(brw->wm.surf_bo[j]); - brw->wm.surf_bo[j] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, &key, sizeof(key), &key.bo, key.bo ? 1 : 0, NULL); - if (brw->wm.surf_bo[j] == NULL) { - brw->wm.surf_bo[j] = brw_create_texture_surface(brw, &key); + if (brw->wm.surf_bo[surf] == NULL) { + brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key); } } /** - * Create the constant buffer surface. Fragment shader constanst will be + * Create the constant buffer surface. Vertex/fragment shader constants will be * read from this buffer with Data Port Read instructions/messages. */ static dri_bo * brw_create_constant_surface( struct brw_context *brw, - struct brw_wm_surface_key *key ) + struct brw_surface_key *key ) { const GLint w = key->width - 1; struct brw_surface_state surf; @@ -345,8 +350,6 @@ brw_create_constant_surface( struct brw_context *brw, surf.ss0.surface_type = BRW_SURFACE_BUFFER; surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; - /* This is ok for all textures with channel width 8bit or less: - */ assert(key->bo); if (key->bo) surf.ss1.base_addr = key->bo->offset; /* reloc */ @@ -356,8 +359,8 @@ brw_create_constant_surface( struct brw_context *brw, surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */ surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */ surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */ - surf.ss3.pitch = (key->pitch * key->cpp) - 1; - brw_set_surface_tiling(&surf, key->tiling); + surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */ + brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */ bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, key, sizeof(*key), @@ -379,26 +382,100 @@ brw_create_constant_surface( struct brw_context *brw, /** - * Update the constant buffer surface. + * Update the surface state for a WM constant buffer. + * The constant buffer will be (re)allocated here if needed. */ -static void -brw_update_constant_surface( GLcontext *ctx, - const struct brw_fragment_program *fp ) +static dri_bo * +brw_update_wm_constant_surface( GLcontext *ctx, + GLuint surf, + dri_bo *const_buffer, + const struct gl_program_parameter_list *params) { struct brw_context *brw = brw_context(ctx); - struct brw_wm_surface_key key; - const GLuint j = BRW_WM_MAX_SURF - 1; - const GLuint numParams = fp->program.Base.Parameters->NumParameters; + struct brw_surface_key key; + struct intel_context *intel = &brw->intel; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + + /* free old const buffer if too small */ + if (const_buffer && const_buffer->size < size) { + dri_bo_unreference(const_buffer); + const_buffer = NULL; + } + + /* alloc new buffer if needed */ + if (!const_buffer) { + const_buffer = + drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", size, 64); + } memset(&key, 0, sizeof(key)); key.format = MESA_FORMAT_RGBA_FLOAT32; key.internal_format = GL_RGBA; - key.bo = fp->const_buffer; + key.bo = const_buffer; + key.depthmode = GL_NONE; + key.pitch = params->NumParameters; + key.width = params->NumParameters; + key.height = 1; + key.depth = 1; + key.cpp = 16; + + /* + printf("%s:\n", __FUNCTION__); + printf(" width %d height %d depth %d cpp %d pitch %d\n", + key.width, key.height, key.depth, key.cpp, key.pitch); + */ + + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->wm.surf_bo[surf] == NULL) { + brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key); + } + + return const_buffer; +} + +/** + * Update the surface state for a VS constant buffer. + * The constant buffer will be (re)allocated here if needed. + */ +static dri_bo * +brw_update_vs_constant_surface( GLcontext *ctx, + GLuint surf, + dri_bo *const_buffer, + const struct gl_program_parameter_list *params) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_surface_key key; + struct intel_context *intel = &brw->intel; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + + assert(surf == 0); + + /* free old const buffer if too small */ + if (const_buffer && const_buffer->size < size) { + dri_bo_unreference(const_buffer); + const_buffer = NULL; + } + + /* alloc new buffer if needed */ + if (!const_buffer) { + const_buffer = + drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64); + } + + memset(&key, 0, sizeof(key)); + + key.format = MESA_FORMAT_RGBA_FLOAT32; + key.internal_format = GL_RGBA; + key.bo = const_buffer; key.depthmode = GL_NONE; - key.pitch = numParams; - key.width = numParams; + key.pitch = params->NumParameters; + key.width = params->NumParameters; key.height = 1; key.depth = 1; key.cpp = 16; @@ -409,14 +486,16 @@ brw_update_constant_surface( GLcontext *ctx, key.width, key.height, key.depth, key.cpp, key.pitch); */ - dri_bo_unreference(brw->wm.surf_bo[j]); - brw->wm.surf_bo[j] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, - NULL); - if (brw->wm.surf_bo[j] == NULL) { - brw->wm.surf_bo[j] = brw_create_constant_surface(brw, &key); + dri_bo_unreference(brw->vs.surf_bo[surf]); + brw->vs.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->vs.surf_bo[surf] == NULL) { + brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key); } + + return const_buffer; } @@ -426,15 +505,18 @@ brw_update_constant_surface( GLcontext *ctx, * usable for further buffers when doing ARB_draw_buffer support. */ static void -brw_update_region_surface(struct brw_context *brw, struct intel_region *region, - unsigned int unit, GLboolean cached) +brw_update_renderbuffer_surface(struct brw_context *brw, + struct gl_renderbuffer *rb, + unsigned int unit, GLboolean cached) { GLcontext *ctx = &brw->intel.ctx; dri_bo *region_bo = NULL; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + struct intel_region *region = irb ? irb->region : NULL; struct { unsigned int surface_type; unsigned int surface_format; - unsigned int width, height, cpp; + unsigned int width, height, pitch, cpp; GLubyte color_mask[4]; GLboolean color_blend; uint32_t tiling; @@ -446,13 +528,27 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, region_bo = region->buffer; key.surface_type = BRW_SURFACE_2D; - if (region->cpp == 4) + switch (irb->texformat->MesaFormat) { + case MESA_FORMAT_ARGB8888: key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - else + break; + case MESA_FORMAT_RGB565: key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; + break; + case MESA_FORMAT_ARGB1555: + key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM; + break; + case MESA_FORMAT_ARGB4444: + key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM; + break; + default: + _mesa_problem(ctx, "Bad renderbuffer format: %d\n", + irb->texformat->MesaFormat); + } key.tiling = region->tiling; - key.width = region->pitch; /* XXX: not really! */ + key.width = region->width; key.height = region->height; + key.pitch = region->pitch; key.cpp = region->cpp; } else { key.surface_type = BRW_SURFACE_NULL; @@ -488,7 +584,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, surf.ss2.width = key.width - 1; surf.ss2.height = key.height - 1; brw_set_surface_tiling(&surf, key.tiling); - surf.ss3.pitch = (key.width * key.cpp) - 1; + surf.ss3.pitch = (key.pitch * key.cpp) - 1; /* _NEW_COLOR */ surf.ss0.color_blend = key.color_blend; @@ -499,7 +595,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, /* Key size will never match key size for textures, so we're safe. */ brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), + &key, sizeof(key), ®ion_bo, 1, &surf, sizeof(surf), NULL, NULL); @@ -528,6 +624,8 @@ brw_wm_get_binding_table(struct brw_context *brw) { dri_bo *bind_bo; + assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF); + bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, @@ -574,69 +672,159 @@ static void prepare_wm_surfaces(struct brw_context *brw ) GLuint i; int old_nr_surfaces; + /* _NEW_BUFFERS */ /* Update surfaces for drawing buffers */ - if (brw->state.nr_color_regions > 1) { - for (i = 0; i < brw->state.nr_color_regions; i++) { - brw_update_region_surface(brw, brw->state.color_regions[i], i, - GL_FALSE); + if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) { + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + brw_update_renderbuffer_surface(brw, + ctx->DrawBuffer->_ColorDrawBuffers[i], + i, + GL_FALSE); } } else { - brw_update_region_surface(brw, brw->state.color_regions[0], 0, GL_TRUE); + brw_update_renderbuffer_surface(brw, NULL, 0, GL_TRUE); } old_nr_surfaces = brw->wm.nr_surfaces; brw->wm.nr_surfaces = MAX_DRAW_BUFFERS; + /* Update surface / buffer for fragment shader constant buffer */ + { + const GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + fp->const_buffer = + brw_update_wm_constant_surface(ctx, surf, fp->const_buffer, + fp->program.Base.Parameters); + + brw->wm.nr_surfaces = surf + 1; + } + /* Update surfaces for textures */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; - const GLuint j = MAX_DRAW_BUFFERS + i; + const GLuint surf = SURF_INDEX_TEXTURE(i); /* _NEW_TEXTURE, BRW_NEW_TEXDATA */ if (texUnit->_ReallyEnabled) { if (texUnit->_Current == intel->frame_buffer_texobj) { /* render to texture */ - dri_bo_unreference(brw->wm.surf_bo[j]); - brw->wm.surf_bo[j] = brw->wm.surf_bo[0]; - dri_bo_reference(brw->wm.surf_bo[j]); - brw->wm.nr_surfaces = j + 1; + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = brw->wm.surf_bo[0]; + dri_bo_reference(brw->wm.surf_bo[surf]); + brw->wm.nr_surfaces = surf + 1; } else { /* regular texture */ brw_update_texture_surface(ctx, i); - brw->wm.nr_surfaces = j + 1; + brw->wm.nr_surfaces = surf + 1; } } else { - dri_bo_unreference(brw->wm.surf_bo[j]); - brw->wm.surf_bo[j] = NULL; + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = NULL; } } - /* Update surface for fragment shader constant buffer */ - { - const GLuint j = BRW_WM_MAX_SURF - 1; - const struct brw_fragment_program *fp = - brw_fragment_program_const(brw->fragment_program); + dri_bo_unreference(brw->wm.bind_bo); + brw->wm.bind_bo = brw_wm_get_binding_table(brw); + + if (brw->wm.nr_surfaces != old_nr_surfaces) + brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES; +} + + +/** + * Constructs the binding table for the VS surface state. + */ +static dri_bo * +brw_vs_get_binding_table(struct brw_context *brw) +{ + dri_bo *bind_bo; + + assert(brw->vs.nr_surfaces <= BRW_VS_MAX_SURF); + + bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, brw->vs.nr_surfaces, + NULL); + + if (bind_bo == NULL) { + GLuint data_size = brw->vs.nr_surfaces * sizeof(GLuint); + uint32_t *data = malloc(data_size); + int i; + + for (i = 0; i < brw->vs.nr_surfaces; i++) + if (brw->vs.surf_bo[i]) + data[i] = brw->vs.surf_bo[i]->offset; + else + data[i] = 0; + + bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, brw->vs.nr_surfaces, + data, data_size, + NULL, NULL); - brw_update_constant_surface(ctx, fp); - brw->wm.nr_surfaces = j + 1; + /* Emit binding table relocations to surface state */ + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + if (brw->vs.surf_bo[i] != NULL) { + dri_bo_emit_reloc(bind_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0, + i * sizeof(GLuint), + brw->vs.surf_bo[i]); + } + } + + free(data); } + return bind_bo; +} - dri_bo_unreference(brw->wm.bind_bo); - brw->wm.bind_bo = brw_wm_get_binding_table(brw); - if (brw->wm.nr_surfaces != old_nr_surfaces) - brw->state.dirty.brw |= BRW_NEW_NR_SURFACES; +/** + * Vertex shader surfaces. Just constant buffer for now. Could add vertex + * shader textures in the future. + */ +static void prepare_vs_surfaces(struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + + /* Update surface / buffer for vertex shader constant buffer */ + { + const GLuint surf = SURF_INDEX_VERT_CONST_BUFFER; + struct brw_vertex_program *vp = + (struct brw_vertex_program *) brw->vertex_program; + vp->const_buffer = + brw_update_vs_constant_surface(ctx, surf, vp->const_buffer, + vp->program.Base.Parameters); + + brw->vs.nr_surfaces = 1; + } + + dri_bo_unreference(brw->vs.bind_bo); + brw->vs.bind_bo = brw_vs_get_binding_table(brw); + + if (1) + brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; +} + + +static void +prepare_surfaces(struct brw_context *brw) +{ + prepare_wm_surfaces(brw); + prepare_vs_surfaces(brw); } const struct brw_tracked_state brw_wm_surfaces = { .dirty = { - .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS, + .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS | _NEW_PROGRAM, .brw = BRW_NEW_CONTEXT, .cache = 0 }, - .prepare = prepare_wm_surfaces, + .prepare = prepare_surfaces, }; |