diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 17 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_curbe.c | 31 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_program.c | 20 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_glsl.c | 133 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 122 |
5 files changed, 288 insertions, 35 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index a020b621d6..01e07c967f 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -167,6 +167,9 @@ struct brw_fragment_program { struct gl_fragment_program program; GLuint id; /**< serial no. to identify frag progs, never re-used */ GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */ + + /** Program constant buffer/surface */ + dri_bo *const_buffer; }; @@ -238,8 +241,16 @@ struct brw_vs_ouput_sizes { }; +/** Number of texture sampler units */ #define BRW_MAX_TEX_UNIT 16 -#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + MAX_DRAW_BUFFERS + +/** + * Size of our surface binding table. + * This contains pointers to the drawing surfaces and current texture + * objects and shader constant buffer (+1). + */ +#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) + enum brw_cache_id { BRW_CC_VP, @@ -513,8 +524,8 @@ struct brw_context /* BRW_NEW_CURBE_OFFSETS: */ struct { - GLuint wm_start; - GLuint wm_size; + GLuint wm_start; /**< pos of first wm const in CURBE buffer */ + GLuint wm_size; /**< number of float[4] consts, multiple of 16 */ GLuint clip_start; GLuint clip_size; GLuint vs_start; diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 545dedd34b..39a8610952 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -38,6 +38,7 @@ #include "shader/prog_parameter.h" #include "shader/prog_statevars.h" #include "intel_batchbuffer.h" +#include "intel_regions.h" #include "brw_context.h" #include "brw_defines.h" #include "brw_state.h" @@ -251,6 +252,7 @@ static void prepare_constant_buffer(struct brw_context *brw) _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); + /* XXX just use a memcpy here */ for (i = 0; i < nr; i++) { const GLfloat *value = vp->program.Base.Parameters->ParameterValues[i]; buf[offset + i * 4 + 0] = value[0]; @@ -330,11 +332,40 @@ static void prepare_constant_buffer(struct brw_context *brw) } +/** + * Vertex/fragment shader constants are stored in a pseudo 1D texture. + * This function updates the constants in that buffer. + */ +static void +update_texture_constant_buffer(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + const struct gl_program_parameter_list *params = fp->program.Base.Parameters; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + + assert(fp->const_buffer); + assert(fp->const_buffer->size >= size); + + /* copy constants into the buffer */ + if (size > 0) { + GLubyte *map; + dri_bo_map(fp->const_buffer, GL_TRUE); + map = fp->const_buffer->virtual; + memcpy(map, params->ParameterValues, size); + dri_bo_unmap(fp->const_buffer); + } +} + + static void emit_constant_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; GLuint sz = brw->curbe.total_size; + update_texture_constant_buffer(brw); + BEGIN_BATCH(2, IGNORE_CLIPRECTS); if (sz == 0) { OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index d90bd82038..457bc2fc7f 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -111,6 +111,8 @@ static void brwProgramStringNotify( GLcontext *ctx, struct gl_program *prog ) { struct brw_context *brw = brw_context(ctx); + struct intel_context *intel = &brw->intel; + if (target == GL_FRAGMENT_PROGRAM_ARB) { struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; struct brw_fragment_program *newFP = brw_fragment_program(fprog); @@ -126,6 +128,24 @@ static void brwProgramStringNotify( GLcontext *ctx, brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; newFP->id = brw->program_id++; newFP->isGLSL = brw_wm_is_glsl(fprog); + + /* alloc constant buffer/surface */ + { + const struct gl_program_parameter_list *params = prog->Parameters; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + + /* free old const buffer if too small */ + if (newFP->const_buffer && newFP->const_buffer->size < size) { + dri_bo_unreference(newFP->const_buffer); + newFP->const_buffer = NULL; + } + + if (!newFP->const_buffer) { + newFP->const_buffer = drm_intel_bo_alloc(intel->bufmgr, + "fp_const_buffer", + size, 64); + } + } } else if (target == GL_VERTEX_PROGRAM_ARB) { struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 67ba59c325..28ef84e4aa 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -192,28 +192,41 @@ static void prealloc_reg(struct brw_wm_compile *c) /* constants */ { const int nr_params = c->fp->program.Base.Parameters->NumParameters; - const struct gl_program_parameter_list *plist = - c->fp->program.Base.Parameters; - int index = 0; - - /* number of float constants */ - c->prog_data.nr_params = 4 * nr_params; - - /* loop over program constants (float[4]) */ - for (i = 0; i < nr_params; i++) { - /* loop over XYZW channels */ - for (j = 0; j < 4; j++, index++) { - reg = brw_vec1_grf(c->reg_index + index / 8, index % 8); - /* Save pointer to parameter/constant value. - * Constants will be copied in prepare_constant_buffer() - */ - c->prog_data.param[index] = &plist->ParameterValues[i][j]; - set_reg(c, PROGRAM_STATE_VAR, i, j, reg); - } - } - /* number of constant regs used (each reg is float[8]) */ - c->nr_creg = 2 * ((4 * nr_params + 15) / 16); - c->reg_index += c->nr_creg; + + if (1 /* XXX threshold: nr_params <= 8 */) { + const struct gl_program_parameter_list *plist = + c->fp->program.Base.Parameters; + int index = 0; + + /* number of float constants in CURBE */ + c->prog_data.nr_params = 4 * nr_params; + + /* loop over program constants (float[4]) */ + for (i = 0; i < nr_params; i++) { + /* loop over XYZW channels */ + for (j = 0; j < 4; j++, index++) { + reg = brw_vec1_grf(c->reg_index + index / 8, index % 8); + /* Save pointer to parameter/constant value. + * Constants will be copied in prepare_constant_buffer() + */ + c->prog_data.param[index] = &plist->ParameterValues[i][j]; + set_reg(c, PROGRAM_STATE_VAR, i, j, reg); + } + } + /* number of constant regs used (each reg is float[8]) */ + c->nr_creg = 2 * ((4 * nr_params + 15) / 16); + c->reg_index += c->nr_creg; + } + else { + /* number of float constants in CURBE */ + c->prog_data.nr_params = 0; + + /* When there's a lot of FP constanst we'll store them in a + * texture-like buffer instead of using the CURBE buffer. + * This means we won't use GRF registers for constants and we'll + * have to fetch constants with a dataport read. + */ + } } /* fragment shader inputs */ @@ -892,6 +905,19 @@ static void emit_add(struct brw_wm_compile *c, brw_set_saturate(p, 0); } +static void emit_arl(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, addr_reg; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ADDRESS, 0); + src0 = get_src_reg(c, &inst->SrcReg[0], 0); /* channel 0 */ + brw_MOV(p, addr_reg, src0); + brw_set_saturate(p, 0); +} + static void emit_sub(struct brw_wm_compile *c, struct prog_instruction *inst) { @@ -2331,9 +2357,10 @@ static void emit_txb(struct brw_wm_compile *c, struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; - GLuint i; + payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); + for (i = 0; i < 4; i++) dst[i] = get_dst_reg(c, inst, i); for (i = 0; i < 4; i++) @@ -2372,13 +2399,13 @@ static void emit_txb(struct brw_wm_compile *c, 0); /* eot */ } + static void emit_tex(struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; - GLuint msg_len; GLuint i, nr; GLuint emit; @@ -2419,7 +2446,7 @@ static void emit_tex(struct brw_wm_compile *c, } if (shadow) { - brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); /* lod / bais */ + brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */ brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */ } @@ -2439,6 +2466,49 @@ static void emit_tex(struct brw_wm_compile *c, brw_MOV(p, dst[3], brw_imm_f(1.0)); } + +static void emit_get_constant(struct brw_context *brw, + struct brw_wm_compile *c, + struct prog_instruction *inst, + GLuint constIndex) +{ + struct brw_compile *p = &c->func; + struct brw_reg dst[4]; + GLuint i; + const int mark = mark_tmps( c ); + struct brw_reg writeback_reg[4]; + + /* XXX only need 1 temp reg??? */ + for (i = 0; i < 4; i++) { + writeback_reg[i] = alloc_tmp(c); + } + + for (i = 0; i < 4; i++) { + dst[i] = get_dst_reg(c, inst, i); + } + + /* Get float[4] vector from constant buffer */ + brw_dp_READ_4(p, + writeback_reg[0], /* first writeback dest */ + 1, /* msg_reg */ + GL_FALSE, /* rel addr? */ + 16 * constIndex, /* byte offset */ + BRW_WM_MAX_SURF - 1 /* surface, binding table index */ + ); + + /* Extract the four channel values, smear across dest registers */ + for (i = 0; i < 4; i++) { + /* extract 1 float from the writeback reg */ + struct brw_reg new_src = stride(writeback_reg[0], 0, 1, 0); + new_src.subnr = i * 4; + /* and smear it into the dest register */ + brw_MOV(p, dst[i], new_src); + } + + release_tmps( c, mark ); +} + + /** * Resolve subroutine calls after code emit is done. */ @@ -2504,6 +2574,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_ADD: emit_add(c, inst); break; + case OPCODE_ARL: + emit_arl(c, inst); + break; case OPCODE_SUB: emit_sub(c, inst); break; @@ -2520,7 +2593,17 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_trunc(c, inst); break; case OPCODE_MOV: +#if 0 + /* test hook for new constant buffer code */ + if (inst->SrcReg[0].File == PROGRAM_UNIFORM) { + emit_get_constant(brw, c, inst, inst->SrcReg[0].Index); + } + else { + emit_mov(c, inst); + } +#else emit_mov(c, inst); +#endif break; case OPCODE_DP3: emit_dp3(c, inst); diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 1fc537ca20..e7d55d5dbd 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -33,6 +33,7 @@ #include "main/mtypes.h" #include "main/texformat.h" #include "main/texstore.h" +#include "shader/prog_parameter.h" #include "intel_mipmap_tree.h" #include "intel_batchbuffer.h" @@ -287,6 +288,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) struct intel_texture_object *intelObj = intel_texture_object(tObj); struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; struct brw_wm_surface_key key; + const GLuint j = MAX_DRAW_BUFFERS + unit; memset(&key, 0, sizeof(key)); @@ -313,16 +315,111 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.cpp = intelObj->mt->cpp; key.tiling = intelObj->mt->region->tiling; - dri_bo_unreference(brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS]); - brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, - NULL); - if (brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] == NULL) { - brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_create_texture_surface(brw, &key); + dri_bo_unreference(brw->wm.surf_bo[j]); + brw->wm.surf_bo[j] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->wm.surf_bo[j] == NULL) { + brw->wm.surf_bo[j] = brw_create_texture_surface(brw, &key); } } + + +/** + * Create the constant buffer surface. Fragment shader constanst will be + * read from this buffer with Data Port Read instructions/messages. + */ +static dri_bo * +brw_create_constant_surface( struct brw_context *brw, + struct brw_wm_surface_key *key ) +{ + const GLint w = key->width - 1; + struct brw_surface_state surf; + dri_bo *bo; + + memset(&surf, 0, sizeof(surf)); + + surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + surf.ss0.surface_type = BRW_SURFACE_BUFFER; + surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + + /* This is ok for all textures with channel width 8bit or less: + */ + assert(key->bo); + if (key->bo) + surf.ss1.base_addr = key->bo->offset; /* reloc */ + else + surf.ss1.base_addr = key->offset; + + surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */ + surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */ + surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */ + surf.ss3.pitch = (key->pitch * key->cpp) - 1; + brw_set_surface_tiling(&surf, key->tiling); + + bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + key, sizeof(*key), + &key->bo, key->bo ? 1 : 0, + &surf, sizeof(surf), + NULL, NULL); + + if (key->bo) { + /* Emit relocation to surface contents */ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_SAMPLER, 0, + 0, + offsetof(struct brw_surface_state, ss1), + key->bo); + } + + return bo; +} + + +/** + * Update the constant buffer surface. + */ +static void +brw_update_constant_surface( GLcontext *ctx, + const struct brw_fragment_program *fp ) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_wm_surface_key key; + const GLuint j = BRW_WM_MAX_SURF - 1; + const GLuint numParams = fp->program.Base.Parameters->NumParameters; + + memset(&key, 0, sizeof(key)); + + key.format = MESA_FORMAT_RGBA_FLOAT32; + key.internal_format = GL_RGBA; + key.bo = fp->const_buffer; + + key.depthmode = GL_NONE; + key.pitch = numParams; + key.width = numParams; + key.height = 1; + key.depth = 1; + key.cpp = 16; + + /* + printf("%s:\n", __FUNCTION__); + printf(" width %d height %d depth %d cpp %d pitch %d\n", + key.width, key.height, key.depth, key.cpp, key.pitch); + */ + + dri_bo_unreference(brw->wm.surf_bo[j]); + brw->wm.surf_bo[j] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->wm.surf_bo[j] == NULL) { + brw->wm.surf_bo[j] = brw_create_constant_surface(brw, &key); + } +} + + /** * Sets up a surface state structure to point at the given region. * While it is only used for the front/back buffer currently, it should be @@ -514,6 +611,17 @@ static void prepare_wm_surfaces(struct brw_context *brw ) } } + /* Update surface for fragment shader constant buffer */ + { + const GLuint j = BRW_WM_MAX_SURF - 1; + const struct brw_fragment_program *fp = + brw_fragment_program_const(brw->fragment_program); + + brw_update_constant_surface(ctx, fp); + brw->wm.nr_surfaces = j + 1; + } + + dri_bo_unreference(brw->wm.bind_bo); brw->wm.bind_bo = brw_wm_get_binding_table(brw); |