diff options
Diffstat (limited to 'src/gallium/drivers')
242 files changed, 5626 insertions, 23253 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 312621fd53..19e3ab0844 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -231,7 +231,7 @@ static boolean is_register_src(struct codegen *gen, int channel, const struct tgsi_full_src_register *src) { - int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel); + int swizzle = tgsi_util_get_full_src_register_swizzle(src, channel); int sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel); if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) { @@ -271,23 +271,14 @@ get_src_reg(struct codegen *gen, const struct tgsi_full_src_register *src) { int reg = -1; - int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel); + int swizzle = tgsi_util_get_full_src_register_swizzle(src, channel); boolean reg_is_itemp = FALSE; uint sign_op; assert(swizzle >= TGSI_SWIZZLE_X); - assert(swizzle <= TGSI_EXTSWIZZLE_ONE); + assert(swizzle <= TGSI_SWIZZLE_W); - if (swizzle == TGSI_EXTSWIZZLE_ONE) { - /* Load const one float and early out */ - reg = get_const_one_reg(gen); - } - else if (swizzle == TGSI_EXTSWIZZLE_ZERO) { - /* Load const zero float and early out */ - reg = get_itemp(gen); - spe_xor(gen->f, reg, reg, reg); - } - else { + { int index = src->SrcRegister.Index; assert(swizzle < 4); @@ -674,7 +665,7 @@ emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst) * Emit linear interpolate. See emit_ADD for comments. */ static boolean -emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst) +emit_LRP(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4]; @@ -1758,7 +1749,6 @@ emit_instruction(struct codegen *gen, case TGSI_OPCODE_ARL: return emit_ARL(gen, inst); case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: return emit_MOV(gen, inst); case TGSI_OPCODE_ADD: case TGSI_OPCODE_SUB: @@ -1766,7 +1756,7 @@ emit_instruction(struct codegen *gen, return emit_binop(gen, inst); case TGSI_OPCODE_MAD: return emit_MAD(gen, inst); - case TGSI_OPCODE_LERP: + case TGSI_OPCODE_LRP: return emit_LRP(gen, inst); case TGSI_OPCODE_DP3: return emit_DP3(gen, inst); diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 6a63a0e6ce..ae4c61efb3 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -389,22 +389,14 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer) const uint texWidth = pt->width[level]; const uint texHeight = pt->height[level]; const uint stride = ct->stride[level]; - unsigned flags = 0x0; unsigned size; assert(transfer->texture); - if (transfer->usage != PIPE_TRANSFER_READ) { - flags |= PIPE_BUFFER_USAGE_CPU_WRITE; - } - - if (transfer->usage != PIPE_TRANSFER_WRITE) { - flags |= PIPE_BUFFER_USAGE_CPU_READ; - } - if (!ct->mapped) { /* map now */ - ct->mapped = pipe_buffer_map(screen, ct->buffer, flags); + ct->mapped = pipe_buffer_map(screen, ct->buffer, + pipe_transfer_buffer_flags(transfer)); } /* @@ -417,8 +409,7 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer) if (!ctrans->map) return NULL; /* out of memory */ - if (transfer->usage == PIPE_TRANSFER_READ || - transfer->usage == PIPE_TRANSFER_READ_WRITE) { + if (transfer->usage & PIPE_TRANSFER_READ) { /* need to untwiddle the texture to make a linear version */ const uint bpp = pf_get_size(ct->base.format); if (bpp == 4) { @@ -459,8 +450,7 @@ cell_transfer_unmap(struct pipe_screen *screen, PIPE_BUFFER_USAGE_CPU_READ); } - if (transfer->usage == PIPE_TRANSFER_WRITE || - transfer->usage == PIPE_TRANSFER_READ_WRITE) { + if (transfer->usage & PIPE_TRANSFER_WRITE) { /* The user wrote new texture data into the mapped buffer. * We need to convert the new linear data into the twiddled/tiled format. */ diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 0eaae2e451..4c32b2d06d 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -346,10 +346,10 @@ fetch_src_file_channel( union spu_exec_channel *chan ) { switch( swizzle ) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: switch( file ) { case TGSI_FILE_CONSTANT: { unsigned i; @@ -413,14 +413,6 @@ fetch_src_file_channel( } break; - case TGSI_EXTSWIZZLE_ZERO: - *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; - break; - - case TGSI_EXTSWIZZLE_ONE: - *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; - break; - default: ASSERT( 0 ); } @@ -500,7 +492,7 @@ fetch_source( } } - swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); fetch_src_file_channel( mach, reg->SrcRegister.File, @@ -610,10 +602,8 @@ exec_kil(struct spu_exec_machine *mach, uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ union spu_exec_channel r[1]; - /* This mask stores component bits that were already tested. Note that - * we test if the value is less than zero, so 1.0 and 0.0 need not to be - * tested. */ - uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + /* This mask stores component bits that were already tested. */ + uniquemask = 0; for (chan_index = 0; chan_index < 4; chan_index++) { @@ -621,7 +611,7 @@ exec_kil(struct spu_exec_machine *mach, uint i; /* unswizzle channel */ - swizzle = tgsi_util_get_full_src_register_extswizzle ( + swizzle = tgsi_util_get_full_src_register_swizzle ( &inst->FullSrcRegisters[0], chan_index); @@ -909,7 +899,6 @@ exec_instruction( break; case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); STORE( &r[0], 0, chan_index ); @@ -1807,22 +1796,6 @@ exec_instruction( /* no-op */ break; - case TGSI_OPCODE_NOISE1: - ASSERT( 0 ); - break; - - case TGSI_OPCODE_NOISE2: - ASSERT( 0 ); - break; - - case TGSI_OPCODE_NOISE3: - ASSERT( 0 ); - break; - - case TGSI_OPCODE_NOISE4: - ASSERT( 0 ); - break; - case TGSI_OPCODE_NOP: break; diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c index af25dd3718..c2c32b22d5 100644 --- a/src/gallium/drivers/cell/spu/spu_util.c +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -26,104 +26,17 @@ tgsi_util_get_src_register_swizzle( return 0; } -unsigned -tgsi_util_get_src_register_extswizzle( - const struct tgsi_src_register_ext_swz *reg, - unsigned component ) -{ - switch( component ) { - case 0: - return reg->ExtSwizzleX; - case 1: - return reg->ExtSwizzleY; - case 2: - return reg->ExtSwizzleZ; - case 3: - return reg->ExtSwizzleW; - default: - ASSERT( 0 ); - } - return 0; -} unsigned -tgsi_util_get_full_src_register_extswizzle( +tgsi_util_get_full_src_register_swizzle( const struct tgsi_full_src_register *reg, unsigned component ) { - unsigned swizzle; - - /* - * First, calculate the extended swizzle for a given channel. This will give - * us either a channel index into the simple swizzle or a constant 1 or 0. - */ - swizzle = tgsi_util_get_src_register_extswizzle( - ®->SrcRegisterExtSwz, + return tgsi_util_get_src_register_swizzle( + reg->SrcRegister, component ); - - ASSERT (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X); - ASSERT (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y); - ASSERT (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z); - ASSERT (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W); - ASSERT (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W); - ASSERT (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W); - - /* - * Second, calculate the simple swizzle for the unswizzled channel index. - * Leave the constants intact, they are not affected by the simple swizzle. - */ - if( swizzle <= TGSI_SWIZZLE_W ) { - swizzle = tgsi_util_get_src_register_swizzle( - ®->SrcRegister, - component ); - } - - return swizzle; } -unsigned -tgsi_util_get_src_register_extnegate( - const struct tgsi_src_register_ext_swz *reg, - unsigned component ) -{ - switch( component ) { - case 0: - return reg->NegateX; - case 1: - return reg->NegateY; - case 2: - return reg->NegateZ; - case 3: - return reg->NegateW; - default: - ASSERT( 0 ); - } - return 0; -} - -void -tgsi_util_set_src_register_extnegate( - struct tgsi_src_register_ext_swz *reg, - unsigned negate, - unsigned component ) -{ - switch( component ) { - case 0: - reg->NegateX = negate; - break; - case 1: - reg->NegateY = negate; - break; - case 2: - reg->NegateZ = negate; - break; - case 3: - reg->NegateW = negate; - break; - default: - ASSERT( 0 ); - } -} unsigned tgsi_util_get_full_src_register_sign_mode( @@ -148,9 +61,6 @@ tgsi_util_get_full_src_register_sign_mode( unsigned negate; negate = reg->SrcRegister.Negate; - if( tgsi_util_get_src_register_extnegate( ®->SrcRegisterExtSwz, component ) ) { - negate = !negate; - } if( reg->SrcRegisterExtMod.Negate ) { negate = !negate; } diff --git a/src/gallium/drivers/i915simple/Makefile b/src/gallium/drivers/i915/Makefile index fb533c1796..e33c74d02f 100644 --- a/src/gallium/drivers/i915simple/Makefile +++ b/src/gallium/drivers/i915/Makefile @@ -1,7 +1,7 @@ TOP = ../../../.. include $(TOP)/configs/current -LIBNAME = i915simple +LIBNAME = i915 C_SOURCES = \ i915_blit.c \ diff --git a/src/gallium/drivers/i915simple/SConscript b/src/gallium/drivers/i915/SConscript index 778c4ed0fd..5a1c47c88d 100644 --- a/src/gallium/drivers/i915simple/SConscript +++ b/src/gallium/drivers/i915/SConscript @@ -2,8 +2,8 @@ Import('*') env = env.Clone() -i915simple = env.ConvenienceLibrary( - target = 'i915simple', +i915 = env.ConvenienceLibrary( + target = 'i915', source = [ 'i915_blit.c', 'i915_buffer.c', @@ -27,4 +27,4 @@ i915simple = env.ConvenienceLibrary( 'i915_texture.c', ]) -Export('i915simple') +Export('i915') diff --git a/src/gallium/drivers/i915simple/i915_batch.h b/src/gallium/drivers/i915/i915_batch.h index b813784723..b813784723 100644 --- a/src/gallium/drivers/i915simple/i915_batch.h +++ b/src/gallium/drivers/i915/i915_batch.h diff --git a/src/gallium/drivers/i915simple/i915_blit.c b/src/gallium/drivers/i915/i915_blit.c index 83dfc33528..83dfc33528 100644 --- a/src/gallium/drivers/i915simple/i915_blit.c +++ b/src/gallium/drivers/i915/i915_blit.c diff --git a/src/gallium/drivers/i915simple/i915_blit.h b/src/gallium/drivers/i915/i915_blit.h index 8ce3220cfd..8ce3220cfd 100644 --- a/src/gallium/drivers/i915simple/i915_blit.h +++ b/src/gallium/drivers/i915/i915_blit.h diff --git a/src/gallium/drivers/i915simple/i915_buffer.c b/src/gallium/drivers/i915/i915_buffer.c index effeba1297..effeba1297 100644 --- a/src/gallium/drivers/i915simple/i915_buffer.c +++ b/src/gallium/drivers/i915/i915_buffer.c diff --git a/src/gallium/drivers/i915simple/i915_buffer.h b/src/gallium/drivers/i915/i915_buffer.h index 80fda7c62f..80fda7c62f 100644 --- a/src/gallium/drivers/i915simple/i915_buffer.h +++ b/src/gallium/drivers/i915/i915_buffer.h diff --git a/src/gallium/drivers/i915simple/i915_clear.c b/src/gallium/drivers/i915/i915_clear.c index 90530f2826..90530f2826 100644 --- a/src/gallium/drivers/i915simple/i915_clear.c +++ b/src/gallium/drivers/i915/i915_clear.c diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915/i915_context.c index e745f3342d..e745f3342d 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915/i915_context.h index 234b441ce6..234b441ce6 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h diff --git a/src/gallium/drivers/i915simple/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c index ce92d1af9a..e6640e587b 100644 --- a/src/gallium/drivers/i915simple/i915_debug.c +++ b/src/gallium/drivers/i915/i915_debug.c @@ -880,7 +880,7 @@ i915_dump_batchbuffer( struct intel_batchbuffer *batch ) return; } - debug_printf( "\n\nBATCH: (%d)\n", bytes / 4); + debug_printf( "\n\nBATCH: (%d)\n", (int)bytes / 4); while (!done && stream.offset < bytes) diff --git a/src/gallium/drivers/i915simple/i915_debug.h b/src/gallium/drivers/i915/i915_debug.h index dd9b86e17b..dd9b86e17b 100644 --- a/src/gallium/drivers/i915simple/i915_debug.h +++ b/src/gallium/drivers/i915/i915_debug.h diff --git a/src/gallium/drivers/i915simple/i915_debug_fp.c b/src/gallium/drivers/i915/i915_debug_fp.c index 9c5b117b6d..9c5b117b6d 100644 --- a/src/gallium/drivers/i915simple/i915_debug_fp.c +++ b/src/gallium/drivers/i915/i915_debug_fp.c diff --git a/src/gallium/drivers/i915simple/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c index 1582168eba..1582168eba 100644 --- a/src/gallium/drivers/i915simple/i915_flush.c +++ b/src/gallium/drivers/i915/i915_flush.c diff --git a/src/gallium/drivers/i915simple/i915_fpc.h b/src/gallium/drivers/i915/i915_fpc.h index 2f0f99d046..2f0f99d046 100644 --- a/src/gallium/drivers/i915simple/i915_fpc.h +++ b/src/gallium/drivers/i915/i915_fpc.h diff --git a/src/gallium/drivers/i915simple/i915_fpc_emit.c b/src/gallium/drivers/i915/i915_fpc_emit.c index b054ce41d3..b054ce41d3 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_emit.c +++ b/src/gallium/drivers/i915/i915_fpc_emit.c diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index 89504ced27..379d47e79a 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -127,7 +127,7 @@ i915_program_error(struct i915_fp_compile *p, const char *msg, ...) va_start( args, msg ); util_vsnprintf( buffer, sizeof(buffer), msg, args ); va_end( args ); - debug_printf(buffer); + debug_printf("%s", buffer); debug_printf("\n"); p->error = 1; @@ -214,37 +214,19 @@ src_vector(struct i915_fp_compile *p, return 0; } - if (source->SrcRegister.Extended) { - src = swizzle(src, - source->SrcRegisterExtSwz.ExtSwizzleX, - source->SrcRegisterExtSwz.ExtSwizzleY, - source->SrcRegisterExtSwz.ExtSwizzleZ, - source->SrcRegisterExtSwz.ExtSwizzleW); - } - else { - src = swizzle(src, - source->SrcRegister.SwizzleX, - source->SrcRegister.SwizzleY, - source->SrcRegister.SwizzleZ, - source->SrcRegister.SwizzleW); - } + src = swizzle(src, + source->SrcRegister.SwizzleX, + source->SrcRegister.SwizzleY, + source->SrcRegister.SwizzleZ, + source->SrcRegister.SwizzleW); /* There's both negate-all-components and per-component negation. * Try to handle both here. */ { - int nx = source->SrcRegisterExtSwz.NegateX; - int ny = source->SrcRegisterExtSwz.NegateY; - int nz = source->SrcRegisterExtSwz.NegateZ; - int nw = source->SrcRegisterExtSwz.NegateW; - if (source->SrcRegister.Negate) { - nx = !nx; - ny = !ny; - nz = !nz; - nw = !nw; - } - src = negate(src, nx, ny, nz, nw); + int n = source->SrcRegister.Negate; + src = negate(src, n, n, n, n); } /* no abs() or post-abs negation */ @@ -681,7 +663,6 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: emit_simple_arith(p, inst, A0_MOV, 1); break; diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915/i915_prim_emit.c index d9a5c40ab9..d9a5c40ab9 100644 --- a/src/gallium/drivers/i915simple/i915_prim_emit.c +++ b/src/gallium/drivers/i915/i915_prim_emit.c diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c index b3a7774fd6..6b832140a8 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915/i915_prim_vbuf.c @@ -52,6 +52,9 @@ #include "i915_state.h" +#undef VBUF_USE_FIFO +#undef VBUF_MAP_BUFFER + /** * Primitive renderer for i915. */ @@ -74,16 +77,25 @@ struct i915_vbuf_render { /* Stuff for the vbo */ struct intel_buffer *vbo; - size_t vbo_size; + size_t vbo_size; /**< current size of allocated buffer */ + size_t vbo_alloc_size; /**< minimum buffer size to allocate */ size_t vbo_offset; void *vbo_ptr; size_t vbo_max_used; - /* stuff for the pool */ +#ifndef VBUF_MAP_BUFFER + size_t map_used_start; + size_t map_used_end; + size_t map_size; +#endif + +#ifdef VBUF_USE_FIFO + /* Stuff for the pool */ struct util_fifo *pool_fifo; unsigned pool_used; unsigned pool_buffer_size; boolean pool_not_used; +#endif }; @@ -132,18 +144,31 @@ i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) struct intel_winsys *iws = i915->iws; if (i915_render->vbo) { +#ifdef VBUF_USE_FIFO if (i915_render->pool_not_used) iws->buffer_destroy(iws, i915_render->vbo); else u_fifo_add(i915_render->pool_fifo, i915_render->vbo); i915_render->vbo = NULL; +#else + iws->buffer_destroy(iws, i915_render->vbo); +#endif } i915->vbo_flushed = 0; - i915_render->vbo_size = MAX2(size, i915_render->pool_buffer_size); + i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size); i915_render->vbo_offset = 0; +#ifndef VBUF_MAP_BUFFER + if (i915_render->vbo_size > i915_render->map_size) { + i915_render->map_size = i915_render->vbo_size; + FREE(i915_render->vbo_ptr); + i915_render->vbo_ptr = MALLOC(i915_render->map_size); + } +#endif + +#ifdef VBUF_USE_FIFO if (i915_render->vbo_size != i915_render->pool_buffer_size) { i915_render->pool_not_used = TRUE; i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, @@ -158,6 +183,10 @@ i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) } u_fifo_pop(i915_render->pool_fifo, (void**)&i915_render->vbo); } +#else + i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, + 64, INTEL_NEW_VERTEX); +#endif } static boolean @@ -173,10 +202,11 @@ i915_vbuf_render_allocate_vertices(struct vbuf_render *render, assert(!i915->vbo); if (!i915_vbuf_render_reserve(i915_render, size)) { - +#ifdef VBUF_USE_FIFO + /* incase we flushed reset the number of pool buffers used */ if (i915->vbo_flushed) i915_render->pool_used = 0; - +#endif i915_vbuf_render_new_buf(i915_render, size); } @@ -198,11 +228,15 @@ i915_vbuf_render_map_vertices(struct vbuf_render *render) struct intel_winsys *iws = i915->iws; if (i915->vbo_flushed) - debug_printf("%s bad vbo flush occured stalling on hw\n"); + debug_printf("%s bad vbo flush occured stalling on hw\n", __FUNCTION__); +#ifdef VBUF_MAP_BUFFER i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE); - - return (unsigned char *)i915_render->vbo_ptr + i915->vbo_offset; + return (unsigned char *)i915_render->vbo_ptr + i915_render->vbo_offset; +#else + (void)iws; + return (unsigned char *)i915_render->vbo_ptr; +#endif } static void @@ -215,7 +249,17 @@ i915_vbuf_render_unmap_vertices(struct vbuf_render *render, struct intel_winsys *iws = i915->iws; i915_render->vbo_max_used = MAX2(i915_render->vbo_max_used, i915_render->vertex_size * (max_index + 1)); +#ifdef VBUF_MAP_BUFFER iws->buffer_unmap(iws, i915_render->vbo); +#else + i915_render->map_used_start = i915_render->vertex_size * min_index; + i915_render->map_used_end = i915_render->vertex_size * (max_index + 1); + iws->buffer_write(iws, i915_render->vbo, + i915_render->map_used_start + i915_render->vbo_offset, + i915_render->map_used_end - i915_render->map_used_start, + (unsigned char *)i915_render->vbo_ptr + i915_render->map_used_start); + +#endif } static boolean @@ -389,14 +433,43 @@ i915_vbuf_render_draw_arrays(struct vbuf_render *render, uint nr) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; if (i915_render->fallback) { draw_arrays_fallback(render, start, nr); return; } - /* JB: TODO submit direct cmds */ - draw_arrays_fallback(render, start, nr); + if (i915->dirty) + i915_update_derived(i915); + + if (i915->hardware_dirty) + i915_emit_hardware_state(i915); + + if (!BEGIN_BATCH(2, 0)) { + FLUSH_BATCH(NULL); + + /* Make sure state is re-emitted after a flush: + */ + i915_update_derived(i915); + i915_emit_hardware_state(i915); + i915->vbo_flushed = 1; + + if (!BEGIN_BATCH(2, 0)) { + assert(0); + goto out; + } + } + + OUT_BATCH(_3DPRIMITIVE | + PRIM_INDIRECT | + PRIM_INDIRECT_SEQUENTIAL | + i915_render->hwprim | + nr); + OUT_BATCH(start); /* Beginning vertex index */ + +out: + return; } /** @@ -552,9 +625,9 @@ i915_vbuf_render_create(struct i915_context *i915) int i; i915_render->i915 = i915; - - i915_render->base.max_vertex_buffer_bytes = 128*1024; - + + i915_render->base.max_vertex_buffer_bytes = 16*4096; + /* NOTE: it must be such that state and vertices indices fit in a single * batch buffer. */ @@ -570,23 +643,29 @@ i915_vbuf_render_create(struct i915_context *i915) i915_render->base.release_vertices = i915_vbuf_render_release_vertices; i915_render->base.destroy = i915_vbuf_render_destroy; +#ifndef VBUF_MAP_BUFFER + i915_render->map_size = 0; + i915_render->map_used_start = 0; + i915_render->map_used_end = 0; +#endif i915_render->vbo = NULL; + i915_render->vbo_ptr = NULL; i915_render->vbo_size = 0; i915_render->vbo_offset = 0; + i915_render->vbo_alloc_size = i915_render->base.max_vertex_buffer_bytes * 4; +#ifdef VBUF_USE_POOL i915_render->pool_used = FALSE; - i915_render->pool_buffer_size = 128 * 4096; + i915_render->pool_buffer_size = i915_render->vbo_alloc_size; i915_render->pool_fifo = u_fifo_create(6); for (i = 0; i < 6; i++) u_fifo_add(i915_render->pool_fifo, iws->buffer_create(iws, i915_render->pool_buffer_size, 64, INTEL_NEW_VERTEX)); - -#if 0 - /* TODO JB: is this realy needed? */ - i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE); - iws->buffer_unmap(iws, i915_render->vbo); +#else + (void)i; + (void)iws; #endif return &i915_render->base; diff --git a/src/gallium/drivers/i915simple/i915_reg.h b/src/gallium/drivers/i915/i915_reg.h index 04620fec68..04620fec68 100644 --- a/src/gallium/drivers/i915simple/i915_reg.h +++ b/src/gallium/drivers/i915/i915_reg.h diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index c66558c320..c66558c320 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c diff --git a/src/gallium/drivers/i915simple/i915_screen.h b/src/gallium/drivers/i915/i915_screen.h index 5126485caa..5126485caa 100644 --- a/src/gallium/drivers/i915simple/i915_screen.h +++ b/src/gallium/drivers/i915/i915_screen.h diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915/i915_state.c index 7d48e6e84d..7d48e6e84d 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c diff --git a/src/gallium/drivers/i915simple/i915_state.h b/src/gallium/drivers/i915/i915_state.h index 86c6b0027d..86c6b0027d 100644 --- a/src/gallium/drivers/i915simple/i915_state.h +++ b/src/gallium/drivers/i915/i915_state.h diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c index 178d4e8781..178d4e8781 100644 --- a/src/gallium/drivers/i915simple/i915_state_derived.c +++ b/src/gallium/drivers/i915/i915_state_derived.c diff --git a/src/gallium/drivers/i915simple/i915_state_dynamic.c b/src/gallium/drivers/i915/i915_state_dynamic.c index 86126a5a15..86126a5a15 100644 --- a/src/gallium/drivers/i915simple/i915_state_dynamic.c +++ b/src/gallium/drivers/i915/i915_state_dynamic.c diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index a3d4e3b04e..a3d4e3b04e 100644 --- a/src/gallium/drivers/i915simple/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c diff --git a/src/gallium/drivers/i915simple/i915_state_immediate.c b/src/gallium/drivers/i915/i915_state_immediate.c index 8c16bb4e27..8c16bb4e27 100644 --- a/src/gallium/drivers/i915simple/i915_state_immediate.c +++ b/src/gallium/drivers/i915/i915_state_immediate.c diff --git a/src/gallium/drivers/i915simple/i915_state_inlines.h b/src/gallium/drivers/i915/i915_state_inlines.h index 378de8f9c4..378de8f9c4 100644 --- a/src/gallium/drivers/i915simple/i915_state_inlines.h +++ b/src/gallium/drivers/i915/i915_state_inlines.h diff --git a/src/gallium/drivers/i915simple/i915_state_sampler.c b/src/gallium/drivers/i915/i915_state_sampler.c index c5e9084d12..c5e9084d12 100644 --- a/src/gallium/drivers/i915simple/i915_state_sampler.c +++ b/src/gallium/drivers/i915/i915_state_sampler.c diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c index ab8331f3e6..ab8331f3e6 100644 --- a/src/gallium/drivers/i915simple/i915_surface.c +++ b/src/gallium/drivers/i915/i915_surface.c diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915/i915_texture.c index 6a6c654271..286c9ace8e 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915/i915_texture.c @@ -165,7 +165,7 @@ i915_scanout_layout(struct i915_texture *tex) struct pipe_texture *pt = &tex->base; if (pt->last_level > 0 || pt->block.size != 4) - return 0; + return FALSE; i915_miptree_set_level_info(tex, 0, 1, tex->base.width[0], @@ -191,6 +191,38 @@ i915_scanout_layout(struct i915_texture *tex) return TRUE; } +/** + * Special case to deal with shared textures. + */ +static boolean +i915_display_target_layout(struct i915_texture *tex) +{ + struct pipe_texture *pt = &tex->base; + + if (pt->last_level > 0 || pt->block.size != 4) + return FALSE; + + /* fallback to normal textures for small textures */ + if (tex->base.width[0] < 240) + return FALSE; + + i915_miptree_set_level_info(tex, 0, 1, + tex->base.width[0], + tex->base.height[0], + 1); + i915_miptree_set_image_offset(tex, 0, 0, 0, 0); + + tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); + tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); + tex->hw_tiled = INTEL_TILE_X; + + debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, + tex->base.width[0], tex->base.height[0], pt->block.size, + tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy); + + return TRUE; +} + static void i915_miptree_layout_2d(struct i915_texture *tex) { @@ -201,6 +233,16 @@ i915_miptree_layout_2d(struct i915_texture *tex) unsigned nblocksx = pt->nblocksx[0]; unsigned nblocksy = pt->nblocksy[0]; + /* used for scanouts that need special layouts */ + if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) + if (i915_scanout_layout(tex)) + return; + + /* for shared buffers we use some very like scanout */ + if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) + if (i915_display_target_layout(tex)) + return; + tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); tex->total_nblocksy = 0; @@ -351,6 +393,11 @@ i945_miptree_layout_2d(struct i915_texture *tex) if (i915_scanout_layout(tex)) return; + /* for shared buffers we use some very like scanout */ + if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) + if (i915_display_target_layout(tex)) + return; + tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); /* May need to adjust pitch to accomodate the placement of @@ -812,7 +859,7 @@ i915_transfer_map(struct pipe_screen *screen, char *map; boolean write = FALSE; - if (transfer->usage != PIPE_TRANSFER_READ) + if (transfer->usage & PIPE_TRANSFER_WRITE) write = TRUE; map = iws->buffer_map(iws, tex->buffer, write); diff --git a/src/gallium/drivers/i915simple/i915_texture.h b/src/gallium/drivers/i915/i915_texture.h index 51a1dd984c..51a1dd984c 100644 --- a/src/gallium/drivers/i915simple/i915_texture.h +++ b/src/gallium/drivers/i915/i915_texture.h diff --git a/src/gallium/drivers/i915simple/intel_batchbuffer.h b/src/gallium/drivers/i915/intel_batchbuffer.h index db12dfd2ac..db12dfd2ac 100644 --- a/src/gallium/drivers/i915simple/intel_batchbuffer.h +++ b/src/gallium/drivers/i915/intel_batchbuffer.h diff --git a/src/gallium/drivers/i915simple/intel_winsys.h b/src/gallium/drivers/i915/intel_winsys.h index 42c5e7470e..2c8dc63f3f 100644 --- a/src/gallium/drivers/i915simple/intel_winsys.h +++ b/src/gallium/drivers/i915/intel_winsys.h @@ -153,13 +153,13 @@ struct intel_winsys { /** * Write to a buffer. * - * Arguments follows pwrite(2) + * Arguments follows pipe_buffer_write. */ int (*buffer_write)(struct intel_winsys *iws, struct intel_buffer *dst, - const void *src, + size_t offset, size_t size, - size_t offset); + const void *data); void (*buffer_destroy)(struct intel_winsys *iws, struct intel_buffer *buffer); diff --git a/src/gallium/drivers/i965simple/Makefile b/src/gallium/drivers/i965simple/Makefile deleted file mode 100644 index 19182afa75..0000000000 --- a/src/gallium/drivers/i965simple/Makefile +++ /dev/null @@ -1,52 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = i965simple - -C_SOURCES = \ - brw_blit.c \ - brw_flush.c \ - brw_screen.c \ - brw_surface.c \ - brw_cc.c \ - brw_clip.c \ - brw_clip_line.c \ - brw_clip_point.c \ - brw_clip_state.c \ - brw_clip_tri.c \ - brw_clip_util.c \ - brw_context.c \ - brw_curbe.c \ - brw_draw.c \ - brw_draw_upload.c \ - brw_eu.c \ - brw_eu_debug.c \ - brw_eu_emit.c \ - brw_eu_util.c \ - brw_gs.c \ - brw_gs_emit.c \ - brw_gs_state.c \ - brw_misc_state.c \ - brw_sf.c \ - brw_sf_emit.c \ - brw_sf_state.c \ - brw_state.c \ - brw_state_batch.c \ - brw_state_cache.c \ - brw_state_pool.c \ - brw_state_upload.c \ - brw_tex_layout.c \ - brw_urb.c \ - brw_util.c \ - brw_vs.c \ - brw_vs_emit.c \ - brw_vs_state.c \ - brw_wm.c \ - brw_wm_iz.c \ - brw_wm_decl.c \ - brw_wm_glsl.c \ - brw_wm_sampler_state.c \ - brw_wm_state.c \ - brw_wm_surface_state.c - -include ../../Makefile.template diff --git a/src/gallium/drivers/i965simple/SConscript b/src/gallium/drivers/i965simple/SConscript deleted file mode 100644 index 43fc2a4005..0000000000 --- a/src/gallium/drivers/i965simple/SConscript +++ /dev/null @@ -1,54 +0,0 @@ -Import('*') - -env = env.Clone() - -i965simple = env.ConvenienceLibrary( - target = 'i965simple', - source = [ - 'brw_blit.c', - 'brw_cc.c', - 'brw_clip.c', - 'brw_clip_line.c', - 'brw_clip_point.c', - 'brw_clip_state.c', - 'brw_clip_tri.c', - 'brw_clip_util.c', - 'brw_context.c', - 'brw_curbe.c', - 'brw_draw.c', - 'brw_draw_upload.c', - 'brw_eu.c', - 'brw_eu_debug.c', - 'brw_eu_emit.c', - 'brw_eu_util.c', - 'brw_flush.c', - 'brw_gs.c', - 'brw_gs_emit.c', - 'brw_gs_state.c', - 'brw_misc_state.c', - 'brw_screen.c', - 'brw_sf.c', - 'brw_sf_emit.c', - 'brw_sf_state.c', - 'brw_state.c', - 'brw_state_batch.c', - 'brw_state_cache.c', - 'brw_state_pool.c', - 'brw_state_upload.c', - 'brw_surface.c', - 'brw_tex_layout.c', - 'brw_urb.c', - 'brw_util.c', - 'brw_vs.c', - 'brw_vs_emit.c', - 'brw_vs_state.c', - 'brw_wm.c', - 'brw_wm_decl.c', - 'brw_wm_glsl.c', - 'brw_wm_iz.c', - 'brw_wm_sampler_state.c', - 'brw_wm_state.c', - 'brw_wm_surface_state.c', - ]) - -Export('i965simple') diff --git a/src/gallium/drivers/i965simple/brw_blit.c b/src/gallium/drivers/i965simple/brw_blit.c deleted file mode 100644 index 4d11f8d2ab..0000000000 --- a/src/gallium/drivers/i965simple/brw_blit.c +++ /dev/null @@ -1,218 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include <stdio.h> -#include <errno.h> - -#include "brw_batch.h" -#include "brw_blit.h" -#include "brw_context.h" -#include "brw_reg.h" - -#include "pipe/p_context.h" -#include "pipe/internal/p_winsys_screen.h" - -#define FILE_DEBUG_FLAG DEBUG_BLIT - -void brw_fill_blit(struct brw_context *brw, - unsigned cpp, - short dst_pitch, - struct pipe_buffer *dst_buffer, - unsigned dst_offset, - boolean dst_tiled, - short x, short y, - short w, short h, - unsigned color) -{ - unsigned BR13, CMD; - BATCH_LOCALS; - - dst_pitch *= cpp; - - switch(cpp) { - case 1: - case 2: - case 3: - BR13 = (0xF0 << 16) | (1<<24); - CMD = XY_COLOR_BLT_CMD; - break; - case 4: - BR13 = (0xF0 << 16) | (1<<24) | (1<<25); - CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; - break; - default: - return; - } - - if (dst_tiled) { - CMD |= XY_DST_TILED; - dst_pitch /= 4; - } - - BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); - OUT_BATCH( CMD ); - OUT_BATCH( dst_pitch | BR13 ); - OUT_BATCH( (y << 16) | x ); - OUT_BATCH( ((y+h) << 16) | (x+w) ); - OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, dst_offset ); - OUT_BATCH( color ); - ADVANCE_BATCH(); -} - -static unsigned translate_raster_op(unsigned logicop) -{ - switch(logicop) { - case PIPE_LOGICOP_CLEAR: return 0x00; - case PIPE_LOGICOP_AND: return 0x88; - case PIPE_LOGICOP_AND_REVERSE: return 0x44; - case PIPE_LOGICOP_COPY: return 0xCC; - case PIPE_LOGICOP_AND_INVERTED: return 0x22; - case PIPE_LOGICOP_NOOP: return 0xAA; - case PIPE_LOGICOP_XOR: return 0x66; - case PIPE_LOGICOP_OR: return 0xEE; - case PIPE_LOGICOP_NOR: return 0x11; - case PIPE_LOGICOP_EQUIV: return 0x99; - case PIPE_LOGICOP_INVERT: return 0x55; - case PIPE_LOGICOP_OR_REVERSE: return 0xDD; - case PIPE_LOGICOP_COPY_INVERTED: return 0x33; - case PIPE_LOGICOP_OR_INVERTED: return 0xBB; - case PIPE_LOGICOP_NAND: return 0x77; - case PIPE_LOGICOP_SET: return 0xFF; - default: return 0; - } -} - - -/* Copy BitBlt - */ -void brw_copy_blit(struct brw_context *brw, - unsigned do_flip, - unsigned cpp, - short src_pitch, - struct pipe_buffer *src_buffer, - unsigned src_offset, - boolean src_tiled, - short dst_pitch, - struct pipe_buffer *dst_buffer, - unsigned dst_offset, - boolean dst_tiled, - short src_x, short src_y, - short dst_x, short dst_y, - short w, short h, - unsigned logic_op) -{ - unsigned CMD, BR13; - int dst_y2 = dst_y + h; - int dst_x2 = dst_x + w; - BATCH_LOCALS; - - - DBG("%s src:buf(%d)/%d %d,%d dst:buf(%d)/%d %d,%d sz:%dx%d op:%d\n", - __FUNCTION__, - src_buffer, src_pitch, src_x, src_y, - dst_buffer, dst_pitch, dst_x, dst_y, - w,h,logic_op); - - assert( logic_op - PIPE_LOGICOP_CLEAR >= 0 ); - assert( logic_op - PIPE_LOGICOP_CLEAR < 0x10 ); - - src_pitch *= cpp; - dst_pitch *= cpp; - - switch(cpp) { - case 1: - case 2: - case 3: - BR13 = (translate_raster_op(logic_op) << 16) | (1<<24); - CMD = XY_SRC_COPY_BLT_CMD; - break; - case 4: - BR13 = (translate_raster_op(logic_op) << 16) | (1<<24) | - (1<<25); - CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; - break; - default: - return; - } - - if (src_tiled) { - CMD |= XY_SRC_TILED; - src_pitch /= 4; - } - - if (dst_tiled) { - CMD |= XY_DST_TILED; - dst_pitch /= 4; - } - - if (dst_y2 < dst_y || - dst_x2 < dst_x) { - return; - } - - dst_pitch &= 0xffff; - src_pitch &= 0xffff; - - /* Initial y values don't seem to work with negative pitches. If - * we adjust the offsets manually (below), it seems to work fine. - * - * On the other hand, if we always adjust, the hardware doesn't - * know which blit directions to use, so overlapping copypixels get - * the wrong result. - */ - if (dst_pitch > 0 && src_pitch > 0) { - BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS); - OUT_BATCH( CMD ); - OUT_BATCH( dst_pitch | BR13 ); - OUT_BATCH( (dst_y << 16) | dst_x ); - OUT_BATCH( (dst_y2 << 16) | dst_x2 ); - OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, - dst_offset ); - OUT_BATCH( (src_y << 16) | src_x ); - OUT_BATCH( src_pitch ); - OUT_RELOC( src_buffer, BRW_BUFFER_ACCESS_READ, - src_offset ); - ADVANCE_BATCH(); - } - else { - BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS); - OUT_BATCH( CMD ); - OUT_BATCH( (dst_pitch & 0xffff) | BR13 ); - OUT_BATCH( (0 << 16) | dst_x ); - OUT_BATCH( (h << 16) | dst_x2 ); - OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, - dst_offset + dst_y * dst_pitch ); - OUT_BATCH( (src_pitch & 0xffff) ); - OUT_RELOC( src_buffer, BRW_BUFFER_ACCESS_READ, - src_offset + src_y * src_pitch ); - ADVANCE_BATCH(); - } -} - - - diff --git a/src/gallium/drivers/i965simple/brw_blit.h b/src/gallium/drivers/i965simple/brw_blit.h deleted file mode 100644 index 111c5d91d3..0000000000 --- a/src/gallium/drivers/i965simple/brw_blit.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef BRW_BLIT_H -#define BRW_BLIT_H - -#include "pipe/p_compiler.h" - -struct pipe_buffer; -struct brw_context; - -void brw_fill_blit(struct brw_context *intel, - unsigned cpp, - short dst_pitch, - struct pipe_buffer *dst_buffer, - unsigned dst_offset, - boolean dst_tiled, - short x, short y, - short w, short h, - unsigned color); -void brw_copy_blit(struct brw_context *intel, - unsigned do_flip, - unsigned cpp, - short src_pitch, - struct pipe_buffer *src_buffer, - unsigned src_offset, - boolean src_tiled, - short dst_pitch, - struct pipe_buffer *dst_buffer, - unsigned dst_offset, - boolean dst_tiled, - short src_x, short src_y, - short dst_x, short dst_y, - short w, short h, - unsigned logic_op); -#endif diff --git a/src/gallium/drivers/i965simple/brw_cc.c b/src/gallium/drivers/i965simple/brw_cc.c deleted file mode 100644 index 3668123e2e..0000000000 --- a/src/gallium/drivers/i965simple/brw_cc.c +++ /dev/null @@ -1,269 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "util/u_math.h" -#include "util/u_memory.h" - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "brw_util.h" - - -static int brw_translate_compare_func(int func) -{ - switch(func) { - case PIPE_FUNC_NEVER: - return BRW_COMPAREFUNCTION_NEVER; - case PIPE_FUNC_LESS: - return BRW_COMPAREFUNCTION_LESS; - case PIPE_FUNC_LEQUAL: - return BRW_COMPAREFUNCTION_LEQUAL; - case PIPE_FUNC_GREATER: - return BRW_COMPAREFUNCTION_GREATER; - case PIPE_FUNC_GEQUAL: - return BRW_COMPAREFUNCTION_GEQUAL; - case PIPE_FUNC_NOTEQUAL: - return BRW_COMPAREFUNCTION_NOTEQUAL; - case PIPE_FUNC_EQUAL: - return BRW_COMPAREFUNCTION_EQUAL; - case PIPE_FUNC_ALWAYS: - return BRW_COMPAREFUNCTION_ALWAYS; - } - - debug_printf("Unknown value in %s: %x\n", __FUNCTION__, func); - return BRW_COMPAREFUNCTION_ALWAYS; -} - -static int brw_translate_stencil_op(int op) -{ - switch(op) { - case PIPE_STENCIL_OP_KEEP: - return BRW_STENCILOP_KEEP; - case PIPE_STENCIL_OP_ZERO: - return BRW_STENCILOP_ZERO; - case PIPE_STENCIL_OP_REPLACE: - return BRW_STENCILOP_REPLACE; - case PIPE_STENCIL_OP_INCR: - return BRW_STENCILOP_INCRSAT; - case PIPE_STENCIL_OP_DECR: - return BRW_STENCILOP_DECRSAT; - case PIPE_STENCIL_OP_INCR_WRAP: - return BRW_STENCILOP_INCR; - case PIPE_STENCIL_OP_DECR_WRAP: - return BRW_STENCILOP_DECR; - case PIPE_STENCIL_OP_INVERT: - return BRW_STENCILOP_INVERT; - default: - return BRW_STENCILOP_ZERO; - } -} - - -static int brw_translate_logic_op(int opcode) -{ - switch(opcode) { - case PIPE_LOGICOP_CLEAR: - return BRW_LOGICOPFUNCTION_CLEAR; - case PIPE_LOGICOP_AND: - return BRW_LOGICOPFUNCTION_AND; - case PIPE_LOGICOP_AND_REVERSE: - return BRW_LOGICOPFUNCTION_AND_REVERSE; - case PIPE_LOGICOP_COPY: - return BRW_LOGICOPFUNCTION_COPY; - case PIPE_LOGICOP_COPY_INVERTED: - return BRW_LOGICOPFUNCTION_COPY_INVERTED; - case PIPE_LOGICOP_AND_INVERTED: - return BRW_LOGICOPFUNCTION_AND_INVERTED; - case PIPE_LOGICOP_NOOP: - return BRW_LOGICOPFUNCTION_NOOP; - case PIPE_LOGICOP_XOR: - return BRW_LOGICOPFUNCTION_XOR; - case PIPE_LOGICOP_OR: - return BRW_LOGICOPFUNCTION_OR; - case PIPE_LOGICOP_OR_INVERTED: - return BRW_LOGICOPFUNCTION_OR_INVERTED; - case PIPE_LOGICOP_NOR: - return BRW_LOGICOPFUNCTION_NOR; - case PIPE_LOGICOP_EQUIV: - return BRW_LOGICOPFUNCTION_EQUIV; - case PIPE_LOGICOP_INVERT: - return BRW_LOGICOPFUNCTION_INVERT; - case PIPE_LOGICOP_OR_REVERSE: - return BRW_LOGICOPFUNCTION_OR_REVERSE; - case PIPE_LOGICOP_NAND: - return BRW_LOGICOPFUNCTION_NAND; - case PIPE_LOGICOP_SET: - return BRW_LOGICOPFUNCTION_SET; - default: - return BRW_LOGICOPFUNCTION_SET; - } -} - - -static void upload_cc_vp( struct brw_context *brw ) -{ - struct brw_cc_viewport ccv; - - memset(&ccv, 0, sizeof(ccv)); - - ccv.min_depth = 0.0; - ccv.max_depth = 1.0; - - brw->cc.vp_gs_offset = brw_cache_data( &brw->cache[BRW_CC_VP], &ccv ); -} - -const struct brw_tracked_state brw_cc_vp = { - .dirty = { - .brw = BRW_NEW_SCENE, - .cache = 0 - }, - .update = upload_cc_vp -}; - - -static void upload_cc_unit( struct brw_context *brw ) -{ - struct brw_cc_unit_state cc; - - memset(&cc, 0, sizeof(cc)); - - /* BRW_NEW_DEPTH_STENCIL */ - if (brw->attribs.DepthStencil->stencil[0].enabled) { - cc.cc0.stencil_enable = brw->attribs.DepthStencil->stencil[0].enabled; - cc.cc0.stencil_func = brw_translate_compare_func(brw->attribs.DepthStencil->stencil[0].func); - cc.cc0.stencil_fail_op = brw_translate_stencil_op(brw->attribs.DepthStencil->stencil[0].fail_op); - cc.cc0.stencil_pass_depth_fail_op = brw_translate_stencil_op( - brw->attribs.DepthStencil->stencil[0].zfail_op); - cc.cc0.stencil_pass_depth_pass_op = brw_translate_stencil_op( - brw->attribs.DepthStencil->stencil[0].zpass_op); - cc.cc1.stencil_ref = brw->attribs.DepthStencil->stencil[0].ref_value; - cc.cc1.stencil_write_mask = brw->attribs.DepthStencil->stencil[0].writemask; - cc.cc1.stencil_test_mask = brw->attribs.DepthStencil->stencil[0].valuemask; - - if (brw->attribs.DepthStencil->stencil[1].enabled) { - cc.cc0.bf_stencil_enable = brw->attribs.DepthStencil->stencil[1].enabled; - cc.cc0.bf_stencil_func = brw_translate_compare_func( - brw->attribs.DepthStencil->stencil[1].func); - cc.cc0.bf_stencil_fail_op = brw_translate_stencil_op( - brw->attribs.DepthStencil->stencil[1].fail_op); - cc.cc0.bf_stencil_pass_depth_fail_op = brw_translate_stencil_op( - brw->attribs.DepthStencil->stencil[1].zfail_op); - cc.cc0.bf_stencil_pass_depth_pass_op = brw_translate_stencil_op( - brw->attribs.DepthStencil->stencil[1].zpass_op); - cc.cc1.bf_stencil_ref = brw->attribs.DepthStencil->stencil[1].ref_value; - cc.cc2.bf_stencil_write_mask = brw->attribs.DepthStencil->stencil[1].writemask; - cc.cc2.bf_stencil_test_mask = brw->attribs.DepthStencil->stencil[1].valuemask; - } - - /* Not really sure about this: - */ - if (brw->attribs.DepthStencil->stencil[0].writemask || - brw->attribs.DepthStencil->stencil[1].writemask) - cc.cc0.stencil_write_enable = 1; - } - - /* BRW_NEW_BLEND */ - if (brw->attribs.Blend->logicop_enable) { - cc.cc2.logicop_enable = 1; - cc.cc5.logicop_func = brw_translate_logic_op( brw->attribs.Blend->logicop_func ); - } - else if (brw->attribs.Blend->blend_enable) { - int eqRGB = brw->attribs.Blend->rgb_func; - int eqA = brw->attribs.Blend->alpha_func; - int srcRGB = brw->attribs.Blend->rgb_src_factor; - int dstRGB = brw->attribs.Blend->rgb_dst_factor; - int srcA = brw->attribs.Blend->alpha_src_factor; - int dstA = brw->attribs.Blend->alpha_dst_factor; - - if (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX) { - srcRGB = dstRGB = PIPE_BLENDFACTOR_ONE; - } - - if (eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX) { - srcA = dstA = PIPE_BLENDFACTOR_ONE; - } - - cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); - cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); - cc.cc6.blend_function = brw_translate_blend_equation( eqRGB ); - - cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); - cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); - cc.cc5.ia_blend_function = brw_translate_blend_equation( eqA ); - - cc.cc3.blend_enable = 1; - cc.cc3.ia_blend_enable = (srcA != srcRGB || - dstA != dstRGB || - eqA != eqRGB); - } - - /* BRW_NEW_ALPHATEST - */ - if (brw->attribs.DepthStencil->alpha.enabled) { - cc.cc3.alpha_test = 1; - cc.cc3.alpha_test_func = - brw_translate_compare_func(brw->attribs.DepthStencil->alpha.func); - - cc.cc7.alpha_ref.ub[0] = float_to_ubyte(brw->attribs.DepthStencil->alpha.ref_value); - - cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; - } - - if (brw->attribs.Blend->dither) { - cc.cc5.dither_enable = 1; - cc.cc6.y_dither_offset = 0; - cc.cc6.x_dither_offset = 0; - } - - if (brw->attribs.DepthStencil->depth.enabled) { - cc.cc2.depth_test = brw->attribs.DepthStencil->depth.enabled; - cc.cc2.depth_test_function = brw_translate_compare_func(brw->attribs.DepthStencil->depth.func); - cc.cc2.depth_write_enable = brw->attribs.DepthStencil->depth.writemask; - } - - /* CACHE_NEW_CC_VP */ - cc.cc4.cc_viewport_state_offset = brw->cc.vp_gs_offset >> 5; - - if (BRW_DEBUG & DEBUG_STATS) - cc.cc5.statistics_enable = 1; - - brw->cc.state_gs_offset = brw_cache_data( &brw->cache[BRW_CC_UNIT], &cc ); -} - -const struct brw_tracked_state brw_cc_unit = { - .dirty = { - .brw = BRW_NEW_DEPTH_STENCIL | BRW_NEW_BLEND | BRW_NEW_ALPHA_TEST, - .cache = CACHE_NEW_CC_VP - }, - .update = upload_cc_unit -}; - diff --git a/src/gallium/drivers/i965simple/brw_clip.c b/src/gallium/drivers/i965simple/brw_clip.c deleted file mode 100644 index 268124cc53..0000000000 --- a/src/gallium/drivers/i965simple/brw_clip.c +++ /dev/null @@ -1,206 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_util.h" -#include "brw_state.h" -#include "brw_clip.h" - -#define FRONT_UNFILLED_BIT 0x1 -#define BACK_UNFILLED_BIT 0x2 - - -static void compile_clip_prog( struct brw_context *brw, - struct brw_clip_prog_key *key ) -{ - struct brw_clip_compile c; - const unsigned *program; - unsigned program_size; - unsigned delta; - unsigned i; - - memset(&c, 0, sizeof(c)); - - /* Begin the compilation: - */ - brw_init_compile(&c.func); - - c.func.single_program_flow = 1; - - c.key = *key; - - - /* Need to locate the two positions present in vertex + header. - * These are currently hardcoded: - */ - c.header_position_offset = ATTR_SIZE; - - for (i = 0, delta = REG_SIZE; i < PIPE_MAX_SHADER_OUTPUTS; i++) - if (c.key.attrs & (1<<i)) { - c.offset[i] = delta; - delta += ATTR_SIZE; - } - - c.nr_attrs = brw_count_bits(c.key.attrs); - c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ - c.nr_bytes = c.nr_regs * REG_SIZE; - - c.prog_data.clip_mode = c.key.clip_mode; /* XXX */ - - /* For some reason the thread is spawned with only 4 channels - * unmasked. - */ - brw_set_mask_control(&c.func, BRW_MASK_DISABLE); - - - /* Would ideally have the option of producing a program which could - * do all three: - */ - switch (key->primitive) { - case PIPE_PRIM_TRIANGLES: -#if 0 - if (key->do_unfilled) - brw_emit_unfilled_clip( &c ); - else -#endif - brw_emit_tri_clip( &c ); - break; - case PIPE_PRIM_LINES: - brw_emit_line_clip( &c ); - break; - case PIPE_PRIM_POINTS: - brw_emit_point_clip( &c ); - break; - default: - assert(0); - return; - } - - - - /* get the program - */ - program = brw_get_program(&c.func, &program_size); - - /* Upload - */ - brw->clip.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_CLIP_PROG], - &c.key, - sizeof(c.key), - program, - program_size, - &c.prog_data, - &brw->clip.prog_data ); -} - - -static boolean search_cache( struct brw_context *brw, - struct brw_clip_prog_key *key ) -{ - return brw_search_cache(&brw->cache[BRW_CLIP_PROG], - key, sizeof(*key), - &brw->clip.prog_data, - &brw->clip.prog_gs_offset); -} - - - - -/* Calculate interpolants for triangle and line rasterization. - */ -static void upload_clip_prog(struct brw_context *brw) -{ - struct brw_clip_prog_key key; - - memset(&key, 0, sizeof(key)); - - /* Populate the key: - */ - /* BRW_NEW_REDUCED_PRIMITIVE */ - key.primitive = brw->reduced_primitive; - /* CACHE_NEW_VS_PROG */ - key.attrs = brw->vs.prog_data->outputs_written; - /* BRW_NEW_RASTER */ - key.do_flat_shading = (brw->attribs.Raster->flatshade); - /* BRW_NEW_CLIP */ - key.nr_userclip = brw->attribs.Clip.nr; /* XXX */ - -#if 0 - key.clip_mode = BRW_CLIPMODE_NORMAL; - - if (key.primitive == PIPE_PRIM_TRIANGLES) { - if (brw->attribs.Raster->cull_mode == PIPE_WINDING_BOTH) - key.clip_mode = BRW_CLIPMODE_REJECT_ALL; - else { - if (brw->attribs.Raster->fill_cw != PIPE_POLYGON_MODE_FILL || - brw->attribs.Raster->fill_ccw != PIPE_POLYGON_MODE_FILL) - key.do_unfilled = 1; - - /* Most cases the fixed function units will handle. Cases where - * one or more polygon faces are unfilled will require help: - */ - if (key.do_unfilled) { - key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; - - if (brw->attribs.Raster->offset_cw || - brw->attribs.Raster->offset_ccw) { - key.offset_units = brw->attribs.Raster->offset_units; - key.offset_factor = brw->attribs.Raster->offset_scale; - } - key.fill_ccw = brw->attribs.Raster->fill_ccw; - key.fill_cw = brw->attribs.Raster->fill_cw; - key.offset_ccw = brw->attribs.Raster->offset_ccw; - key.offset_cw = brw->attribs.Raster->offset_cw; - if (brw->attribs.Raster->light_twoside && - key.fill_cw != CLIP_CULL) - key.copy_bfc_cw = 1; - } - } - } -#else - key.clip_mode = BRW_CLIPMODE_ACCEPT_ALL; -#endif - - if (!search_cache(brw, &key)) - compile_clip_prog( brw, &key ); -} - -const struct brw_tracked_state brw_clip_prog = { - .dirty = { - .brw = (BRW_NEW_RASTERIZER | - BRW_NEW_CLIP | - BRW_NEW_REDUCED_PRIMITIVE), - .cache = CACHE_NEW_VS_PROG - }, - .update = upload_clip_prog -}; diff --git a/src/gallium/drivers/i965simple/brw_clip.h b/src/gallium/drivers/i965simple/brw_clip.h deleted file mode 100644 index d70fc094ff..0000000000 --- a/src/gallium/drivers/i965simple/brw_clip.h +++ /dev/null @@ -1,170 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#ifndef BRW_CLIP_H -#define BRW_CLIP_H - - -#include "brw_context.h" -#include "brw_eu.h" - -#define MAX_VERTS (3+6+6) - -/* Note that if unfilled primitives are being emitted, we have to fix - * up polygon offset and flatshading at this point: - */ -struct brw_clip_prog_key { - unsigned attrs:32; - unsigned primitive:4; - unsigned nr_userclip:3; - unsigned do_flat_shading:1; - unsigned do_unfilled:1; - unsigned fill_cw:2; /* includes cull information */ - unsigned fill_ccw:2; /* includes cull information */ - unsigned offset_cw:1; - unsigned offset_ccw:1; - unsigned pad0:17; - - unsigned copy_bfc_cw:1; - unsigned copy_bfc_ccw:1; - unsigned clip_mode:3; - unsigned pad1:27; - - float offset_factor; - float offset_units; -}; - - -#define CLIP_LINE 0 -#define CLIP_POINT 1 -#define CLIP_FILL 2 -#define CLIP_CULL 3 - - -#define PRIM_MASK (0x1f) - -struct brw_clip_compile { - struct brw_compile func; - struct brw_clip_prog_key key; - struct brw_clip_prog_data prog_data; - - struct { - struct brw_reg R0; - struct brw_reg vertex[MAX_VERTS]; - - struct brw_reg t; - struct brw_reg t0, t1; - struct brw_reg dp0, dp1; - - struct brw_reg dpPrev; - struct brw_reg dp; - struct brw_reg loopcount; - struct brw_reg nr_verts; - struct brw_reg planemask; - - struct brw_reg inlist; - struct brw_reg outlist; - struct brw_reg freelist; - - struct brw_reg dir; - struct brw_reg tmp0, tmp1; - struct brw_reg offset; - - struct brw_reg fixed_planes; - struct brw_reg plane_equation; - } reg; - - /* 3 different ways of expressing vertex size: - */ - unsigned nr_attrs; - unsigned nr_regs; - unsigned nr_bytes; - - unsigned first_tmp; - unsigned last_tmp; - - boolean need_direction; - - unsigned last_mrf; - - unsigned header_position_offset; - unsigned offset[PIPE_MAX_ATTRIBS]; -}; - -#define ATTR_SIZE (4*4) - -/* Points are only culled, so no need for a clip routine, however it - * works out easier to have a dummy one. - */ -void brw_emit_unfilled_clip( struct brw_clip_compile *c ); -void brw_emit_tri_clip( struct brw_clip_compile *c ); -void brw_emit_line_clip( struct brw_clip_compile *c ); -void brw_emit_point_clip( struct brw_clip_compile *c ); - -/* brw_clip_tri.c, for use by the unfilled clip routine: - */ -void brw_clip_tri_init_vertices( struct brw_clip_compile *c ); -void brw_clip_tri_flat_shade( struct brw_clip_compile *c ); -void brw_clip_tri( struct brw_clip_compile *c ); -void brw_clip_tri_emit_polygon( struct brw_clip_compile *c ); -void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, - unsigned nr_verts ); - - -/* Utils: - */ - -void brw_clip_interp_vertex( struct brw_clip_compile *c, - struct brw_indirect dest_ptr, - struct brw_indirect v0_ptr, /* from */ - struct brw_indirect v1_ptr, /* to */ - struct brw_reg t0, - boolean force_edgeflag ); - -void brw_clip_init_planes( struct brw_clip_compile *c ); - -void brw_clip_emit_vue(struct brw_clip_compile *c, - struct brw_indirect vert, - boolean allocate, - boolean eot, - unsigned header); - -void brw_clip_kill_thread(struct brw_clip_compile *c); - -struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ); -struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ); - -void brw_clip_copy_colors( struct brw_clip_compile *c, - unsigned to, unsigned from ); - -void brw_clip_init_clipmask( struct brw_clip_compile *c ); - -#endif diff --git a/src/gallium/drivers/i965simple/brw_clip_line.c b/src/gallium/drivers/i965simple/brw_clip_line.c deleted file mode 100644 index 75d9e5fcda..0000000000 --- a/src/gallium/drivers/i965simple/brw_clip_line.c +++ /dev/null @@ -1,245 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_util.h" -#include "brw_clip.h" - - - -static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) -{ - unsigned i = 0,j; - - /* Register usage is static, precompute here: - */ - c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; - - if (c->key.nr_userclip) { - c->reg.fixed_planes = brw_vec4_grf(i, 0); - i += (6 + c->key.nr_userclip + 1) / 2; - - c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; - } - else - c->prog_data.curb_read_length = 0; - - - /* Payload vertices plus space for more generated vertices: - */ - for (j = 0; j < 4; j++) { - c->reg.vertex[j] = brw_vec4_grf(i, 0); - i += c->nr_regs; - } - - c->reg.t = brw_vec1_grf(i, 0); - c->reg.t0 = brw_vec1_grf(i, 1); - c->reg.t1 = brw_vec1_grf(i, 2); - c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); - c->reg.plane_equation = brw_vec4_grf(i, 4); - i++; - - c->reg.dp0 = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ - c->reg.dp1 = brw_vec1_grf(i, 4); - i++; - - if (!c->key.nr_userclip) { - c->reg.fixed_planes = brw_vec8_grf(i, 0); - i++; - } - - - c->first_tmp = i; - c->last_tmp = i; - - c->prog_data.urb_read_length = c->nr_regs; /* ? */ - c->prog_data.total_grf = i; -} - - - -/* Line clipping, more or less following the following algorithm: - * - * for (p=0;p<MAX_PLANES;p++) { - * if (clipmask & (1 << p)) { - * float dp0 = DOTPROD( vtx0, plane[p] ); - * float dp1 = DOTPROD( vtx1, plane[p] ); - * - * if (IS_NEGATIVE(dp1)) { - * float t = dp1 / (dp1 - dp0); - * if (t > t1) t1 = t; - * } else { - * float t = dp0 / (dp0 - dp1); - * if (t > t0) t0 = t; - * } - * - * if (t0 + t1 >= 1.0) - * return; - * } - * } - * - * interp( ctx, newvtx0, vtx0, vtx1, t0 ); - * interp( ctx, newvtx1, vtx1, vtx0, t1 ); - * - */ -static void clip_and_emit_line( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_indirect vtx0 = brw_indirect(0, 0); - struct brw_indirect vtx1 = brw_indirect(1, 0); - struct brw_indirect newvtx0 = brw_indirect(2, 0); - struct brw_indirect newvtx1 = brw_indirect(3, 0); - struct brw_indirect plane_ptr = brw_indirect(4, 0); - struct brw_instruction *plane_loop; - struct brw_instruction *plane_active; - struct brw_instruction *is_negative; - struct brw_instruction *is_neg2; - struct brw_instruction *not_culled; - struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); - - brw_MOV(p, get_addr_reg(vtx0), brw_address(c->reg.vertex[0])); - brw_MOV(p, get_addr_reg(vtx1), brw_address(c->reg.vertex[1])); - brw_MOV(p, get_addr_reg(newvtx0), brw_address(c->reg.vertex[2])); - brw_MOV(p, get_addr_reg(newvtx1), brw_address(c->reg.vertex[3])); - brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); - - /* Note: init t0, t1 together: - */ - brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0)); - - brw_clip_init_planes(c); - brw_clip_init_clipmask(c); - - /* -ve rhw workaround */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), - brw_imm_ud(1<<20)); - brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - plane_loop = brw_DO(p, BRW_EXECUTE_1); - { - /* if (planemask & 1) - */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1)); - - plane_active = brw_IF(p, BRW_EXECUTE_1); - { - if (c->key.nr_userclip) - brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); - else - brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); - -#if 0 - /* dp = DP4(vtx->position, plane) - */ - brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); - - /* if (IS_NEGATIVE(dp1)) - */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_L); - brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); -#else - #warning "disabled" -#endif - is_negative = brw_IF(p, BRW_EXECUTE_1); - { - brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0)); - brw_math_invert(p, c->reg.t, c->reg.t); - brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1); - - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 ); - brw_MOV(p, c->reg.t1, c->reg.t); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - } - is_negative = brw_ELSE(p, is_negative); - { - /* Coming back in. We know that both cannot be negative - * because the line would have been culled in that case. - */ - - /* If both are positive, do nothing */ - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); - is_neg2 = brw_IF(p, BRW_EXECUTE_1); - { - brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1)); - brw_math_invert(p, c->reg.t, c->reg.t); - brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0); - - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 ); - brw_MOV(p, c->reg.t0, c->reg.t); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - } - brw_ENDIF(p, is_neg2); - } - brw_ENDIF(p, is_negative); - } - brw_ENDIF(p, plane_active); - - /* plane_ptr++; - */ - brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); - - /* while (planemask>>=1) != 0 - */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); - } - brw_WHILE(p, plane_loop); - - brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1); - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0)); - not_culled = brw_IF(p, BRW_EXECUTE_1); - { - brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, FALSE); - brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, FALSE); - - brw_clip_emit_vue(c, newvtx0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START); - brw_clip_emit_vue(c, newvtx1, 0, 1, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); - } - brw_ENDIF(p, not_culled); - brw_clip_kill_thread(c); -} - - - -void brw_emit_line_clip( struct brw_clip_compile *c ) -{ - brw_clip_line_alloc_regs(c); - - if (c->key.do_flat_shading) - brw_clip_copy_colors(c, 0, 1); - - clip_and_emit_line(c); -} diff --git a/src/gallium/drivers/i965simple/brw_clip_point.c b/src/gallium/drivers/i965simple/brw_clip_point.c deleted file mode 100644 index 6fce7210d1..0000000000 --- a/src/gallium/drivers/i965simple/brw_clip_point.c +++ /dev/null @@ -1,47 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_util.h" -#include "brw_clip.h" - - -/* Point clipping, nothing to do? - */ -void brw_emit_point_clip( struct brw_clip_compile *c ) -{ - /* Send an empty message to kill the thread: - */ - brw_clip_tri_alloc_regs(c, 0); - brw_clip_kill_thread(c); -} diff --git a/src/gallium/drivers/i965simple/brw_clip_state.c b/src/gallium/drivers/i965simple/brw_clip_state.c deleted file mode 100644 index 8e78dd51be..0000000000 --- a/src/gallium/drivers/i965simple/brw_clip_state.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "util/u_math.h" -#include "util/u_memory.h" - - -static void upload_clip_unit( struct brw_context *brw ) -{ - struct brw_clip_unit_state clip; - - memset(&clip, 0, sizeof(clip)); - - /* CACHE_NEW_CLIP_PROG */ - clip.thread0.grf_reg_count = - align(brw->clip.prog_data->total_grf, 16) / 16 - 1; - clip.thread0.kernel_start_pointer = brw->clip.prog_gs_offset >> 6; - clip.thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length; - clip.thread3.const_urb_entry_read_length = brw->clip.prog_data->curb_read_length; - clip.clip5.clip_mode = brw->clip.prog_data->clip_mode; - - /* BRW_NEW_CURBE_OFFSETS */ - clip.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; - - /* BRW_NEW_URB_FENCE */ - clip.thread4.nr_urb_entries = brw->urb.nr_clip_entries; - clip.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; - clip.thread4.max_threads = 1; /* 2 threads */ - - if (BRW_DEBUG & DEBUG_STATS) - clip.thread4.stats_enable = 1; - - /* CONSTANT */ - clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - clip.thread1.single_program_flow = 1; - clip.thread3.dispatch_grf_start_reg = 1; - clip.thread3.urb_entry_read_offset = 0; - clip.clip5.userclip_enable_flags = 0x7f; - clip.clip5.userclip_must_clip = 1; - clip.clip5.guard_band_enable = 0; - clip.clip5.viewport_z_clip_enable = 1; - clip.clip5.viewport_xy_clip_enable = 1; - clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE; - clip.clip5.api_mode = BRW_CLIP_API_OGL; - clip.clip6.clipper_viewport_state_ptr = 0; - clip.viewport_xmin = -1; - clip.viewport_xmax = 1; - clip.viewport_ymin = -1; - clip.viewport_ymax = 1; - - brw->clip.state_gs_offset = brw_cache_data( &brw->cache[BRW_CLIP_UNIT], &clip ); -} - - -const struct brw_tracked_state brw_clip_unit = { - .dirty = { - .brw = (BRW_NEW_CURBE_OFFSETS | - BRW_NEW_URB_FENCE), - .cache = CACHE_NEW_CLIP_PROG - }, - .update = upload_clip_unit -}; diff --git a/src/gallium/drivers/i965simple/brw_clip_tri.c b/src/gallium/drivers/i965simple/brw_clip_tri.c deleted file mode 100644 index c5da7b825e..0000000000 --- a/src/gallium/drivers/i965simple/brw_clip_tri.c +++ /dev/null @@ -1,566 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_util.h" -#include "brw_clip.h" - -static struct brw_reg get_tmp( struct brw_clip_compile *c ) -{ - struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0); - - if (++c->last_tmp > c->prog_data.total_grf) - c->prog_data.total_grf = c->last_tmp; - - return tmp; -} - -static void release_tmps( struct brw_clip_compile *c ) -{ - c->last_tmp = c->first_tmp; -} - - -void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, - unsigned nr_verts ) -{ - unsigned i = 0,j; - - /* Register usage is static, precompute here: - */ - c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; - - if (c->key.nr_userclip) { - c->reg.fixed_planes = brw_vec4_grf(i, 0); - i += (6 + c->key.nr_userclip + 1) / 2; - - c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; - } - else - c->prog_data.curb_read_length = 0; - - - /* Payload vertices plus space for more generated vertices: - */ - for (j = 0; j < nr_verts; j++) { - c->reg.vertex[j] = brw_vec4_grf(i, 0); - i += c->nr_regs; - } - - if (c->nr_attrs & 1) { - for (j = 0; j < 3; j++) { - unsigned delta = c->nr_attrs*16 + 32; - brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); - } - } - - c->reg.t = brw_vec1_grf(i, 0); - c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_UD); - c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD); - c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); - c->reg.plane_equation = brw_vec4_grf(i, 4); - i++; - - c->reg.dpPrev = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ - c->reg.dp = brw_vec1_grf(i, 4); - i++; - - c->reg.inlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); - i++; - - c->reg.outlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); - i++; - - c->reg.freelist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); - i++; - - if (!c->key.nr_userclip) { - c->reg.fixed_planes = brw_vec8_grf(i, 0); - i++; - } - - if (c->key.do_unfilled) { - c->reg.dir = brw_vec4_grf(i, 0); - c->reg.offset = brw_vec4_grf(i, 4); - i++; - c->reg.tmp0 = brw_vec4_grf(i, 0); - c->reg.tmp1 = brw_vec4_grf(i, 4); - i++; - } - - c->first_tmp = i; - c->last_tmp = i; - - c->prog_data.urb_read_length = c->nr_regs; /* ? */ - c->prog_data.total_grf = i; -} - - - -void brw_clip_tri_init_vertices( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ - struct brw_instruction *is_rev; - - /* Initial list of indices for incoming vertexes: - */ - brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); - brw_CMP(p, - vec1(brw_null_reg()), - BRW_CONDITIONAL_EQ, - tmp0, - brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); - - /* XXX: Is there an easier way to do this? Need to reverse every - * second tristrip element: Can ignore sometimes? - */ - is_rev = brw_IF(p, BRW_EXECUTE_1); - { - brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[1]) ); - brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[0]) ); - if (c->need_direction) - brw_MOV(p, c->reg.dir, brw_imm_f(-1)); - } - is_rev = brw_ELSE(p, is_rev); - { - brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[0]) ); - brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[1]) ); - if (c->need_direction) - brw_MOV(p, c->reg.dir, brw_imm_f(1)); - } - brw_ENDIF(p, is_rev); - - brw_MOV(p, get_element(c->reg.inlist, 2), brw_address(c->reg.vertex[2]) ); - brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0)); - brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3)); -} - - - -void brw_clip_tri_flat_shade( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *is_poly; - struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ - - brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); - brw_CMP(p, - vec1(brw_null_reg()), - BRW_CONDITIONAL_EQ, - tmp0, - brw_imm_ud(_3DPRIM_POLYGON)); - - is_poly = brw_IF(p, BRW_EXECUTE_1); - { - brw_clip_copy_colors(c, 1, 0); - brw_clip_copy_colors(c, 2, 0); - } - is_poly = brw_ELSE(p, is_poly); - { - brw_clip_copy_colors(c, 0, 2); - brw_clip_copy_colors(c, 1, 2); - } - brw_ENDIF(p, is_poly); -} - - - -/* Use mesa's clipping algorithms, translated to GEN4 assembly. - */ -void brw_clip_tri( struct brw_clip_compile *c ) -{ -#if 0 - struct brw_compile *p = &c->func; - struct brw_indirect vtx = brw_indirect(0, 0); - struct brw_indirect vtxPrev = brw_indirect(1, 0); - struct brw_indirect vtxOut = brw_indirect(2, 0); - struct brw_indirect plane_ptr = brw_indirect(3, 0); - struct brw_indirect inlist_ptr = brw_indirect(4, 0); - struct brw_indirect outlist_ptr = brw_indirect(5, 0); - struct brw_indirect freelist_ptr = brw_indirect(6, 0); - struct brw_instruction *plane_loop; - struct brw_instruction *plane_active; - struct brw_instruction *vertex_loop; - struct brw_instruction *next_test; - struct brw_instruction *prev_test; - - brw_MOV(p, get_addr_reg(vtxPrev), brw_address(c->reg.vertex[2]) ); - brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); - brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); - brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); - - brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) ); - - plane_loop = brw_DO(p, BRW_EXECUTE_1); - { - /* if (planemask & 1) - */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1)); - - plane_active = brw_IF(p, BRW_EXECUTE_1); - { - /* vtxOut = freelist_ptr++ - */ - brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(freelist_ptr) ); - brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE)); - - if (c->key.nr_userclip) - brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); - else - brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); - - brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); - brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0)); - - vertex_loop = brw_DO(p, BRW_EXECUTE_1); - { - /* vtx = *input_ptr; - */ - brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0)); - - /* IS_NEGATIVE(prev) */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_L); - brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); - prev_test = brw_IF(p, BRW_EXECUTE_1); - { - /* IS_POSITIVE(next) - */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_GE); - brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); - next_test = brw_IF(p, BRW_EXECUTE_1); - { - - /* Coming back in. - */ - brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp)); - brw_math_invert(p, c->reg.t, c->reg.t); - brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev); - - /* If (vtxOut == 0) vtxOut = vtxPrev - */ - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); - brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) ); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, FALSE); - - /* *outlist_ptr++ = vtxOut; - * nr_verts++; - * vtxOut = 0; - */ - brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); - brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); - brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); - brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); - } - brw_ENDIF(p, next_test); - - } - prev_test = brw_ELSE(p, prev_test); - { - /* *outlist_ptr++ = vtxPrev; - * nr_verts++; - */ - brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev)); - brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); - brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); - - /* IS_NEGATIVE(next) - */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_L); - brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); - next_test = brw_IF(p, BRW_EXECUTE_1); - { - /* Going out of bounds. Avoid division by zero as we - * know dp != dpPrev from DIFFERENT_SIGNS, above. - */ - brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev)); - brw_math_invert(p, c->reg.t, c->reg.t); - brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp); - - /* If (vtxOut == 0) vtxOut = vtx - */ - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); - brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) ); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, TRUE); - - /* *outlist_ptr++ = vtxOut; - * nr_verts++; - * vtxOut = 0; - */ - brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); - brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); - brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); - brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); - } - brw_ENDIF(p, next_test); - } - brw_ENDIF(p, prev_test); - - /* vtxPrev = vtx; - * inlist_ptr++; - */ - brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx)); - brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short))); - - /* while (--loopcount != 0) - */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); - } - brw_WHILE(p, vertex_loop); - - /* vtxPrev = *(outlist_ptr-1) OR: outlist[nr_verts-1] - * inlist = outlist - * inlist_ptr = &inlist[0] - * outlist_ptr = &outlist[0] - */ - brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2)); - brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0)); - brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0)); - brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); - brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); - } - brw_ENDIF(p, plane_active); - - /* plane_ptr++; - */ - brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); - - /* nr_verts >= 3 - */ - brw_CMP(p, - vec1(brw_null_reg()), - BRW_CONDITIONAL_GE, - c->reg.nr_verts, - brw_imm_ud(3)); - - /* && (planemask>>=1) != 0 - */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); - } - brw_WHILE(p, plane_loop); -#else - #warning "disabled" -#endif -} - - - -void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *loop, *if_insn; - - /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--) - */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_G); - brw_ADD(p, - c->reg.loopcount, - c->reg.nr_verts, - brw_imm_d(-2)); - - if_insn = brw_IF(p, BRW_EXECUTE_1); - { - struct brw_indirect v0 = brw_indirect(0, 0); - struct brw_indirect vptr = brw_indirect(1, 0); - - brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist)); - brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); - - brw_clip_emit_vue(c, v0, 1, 0, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_START)); - - brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); - brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); - - loop = brw_DO(p, BRW_EXECUTE_1); - { - brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN << 2)); - - brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); - brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); - - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); - } - brw_WHILE(p, loop); - - brw_clip_emit_vue(c, v0, 0, 1, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_END)); - } - brw_ENDIF(p, if_insn); -} - -static void do_clip_tri( struct brw_clip_compile *c ) -{ - brw_clip_init_planes(c); - - brw_clip_tri(c); -} - - -static void maybe_do_clip_tri( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *do_clip; - - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); - do_clip = brw_IF(p, BRW_EXECUTE_1); - { - do_clip_tri(c); - } - brw_ENDIF(p, do_clip); -} - -static void brw_clip_test( struct brw_clip_compile *c ) -{ -#if 0 - struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); - struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); - struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); - struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); - - struct brw_reg v0 = get_tmp(c); - struct brw_reg v1 = get_tmp(c); - struct brw_reg v2 = get_tmp(c); - - struct brw_indirect vt0 = brw_indirect(0, 0); - struct brw_indirect vt1 = brw_indirect(1, 0); - struct brw_indirect vt2 = brw_indirect(2, 0); - - struct brw_compile *p = &c->func; - - brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0])); - brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1])); - brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2])); - brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS])); - brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS])); - brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS])); - - /* test nearz, xmin, ymin plane */ - brw_CMP(p, t1, BRW_CONDITIONAL_LE, negate(v0), get_element(v0, 3)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, t2, BRW_CONDITIONAL_LE, negate(v1), get_element(v1, 3)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, t3, BRW_CONDITIONAL_LE, negate(v2), get_element(v2, 3)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_XOR(p, t, t1, t2); - brw_XOR(p, t1, t2, t3); - brw_OR(p, t, t, t1); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 0), brw_imm_ud(0)); - brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5))); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 1), brw_imm_ud(0)); - brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3))); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 2), brw_imm_ud(0)); - brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1))); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - /* test farz, xmax, ymax plane */ - brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, get_element(v0, 3)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, get_element(v1, 3)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, get_element(v2, 3)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - brw_XOR(p, t, t1, t2); - brw_XOR(p, t1, t2, t3); - brw_OR(p, t, t, t1); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 0), brw_imm_ud(0)); - brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4))); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 1), brw_imm_ud(0)); - brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2))); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 2), brw_imm_ud(0)); - brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0))); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - release_tmps(c); -#else - #warning "disabled" -#endif -} - - -void brw_emit_tri_clip( struct brw_clip_compile *c ) -{ - struct brw_instruction *neg_rhw; - struct brw_compile *p = &c->func; - brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); - brw_clip_tri_init_vertices(c); - brw_clip_init_clipmask(c); - - /* if -ve rhw workaround bit is set, - do cliptest */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), - brw_imm_ud(1<<20)); - neg_rhw = brw_IF(p, BRW_EXECUTE_1); - { - brw_clip_test(c); - } - brw_ENDIF(p, neg_rhw); - - /* Can't push into do_clip_tri because with polygon (or quad) - * flatshading, need to apply the flatshade here because we don't - * respect the PV when converting to trifan for emit: - */ - if (c->key.do_flat_shading) - brw_clip_tri_flat_shade(c); - - if (c->key.clip_mode == BRW_CLIPMODE_NORMAL) - do_clip_tri(c); - else - maybe_do_clip_tri(c); - - brw_clip_tri_emit_polygon(c); - - /* Send an empty message to kill the thread: - */ - brw_clip_kill_thread(c); -} diff --git a/src/gallium/drivers/i965simple/brw_clip_unfilled.c b/src/gallium/drivers/i965simple/brw_clip_unfilled.c deleted file mode 100644 index b774a76dd6..0000000000 --- a/src/gallium/drivers/i965simple/brw_clip_unfilled.c +++ /dev/null @@ -1,477 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_util.h" -#include "brw_clip.h" - - - -/* This is performed against the original triangles, so no indirection - * required: -BZZZT! - */ -static void compute_tri_direction( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_reg e = c->reg.tmp0; - struct brw_reg f = c->reg.tmp1; - struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_HPOS]); - struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset[VERT_RESULT_HPOS]); - struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]); - - - /* Calculate the vectors of two edges of the triangle: - */ - brw_ADD(p, e, v0, negate(v2)); - brw_ADD(p, f, v1, negate(v2)); - - /* Take their crossproduct: - */ - brw_set_access_mode(p, BRW_ALIGN_16); - brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3), brw_swizzle(f,2,0,1,3)); - brw_MAC(p, vec4(e), negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3)); - brw_set_access_mode(p, BRW_ALIGN_1); - - brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e)); -} - - -static void cull_direction( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *ccw; - unsigned conditional; - - assert (!(c->key.fill_ccw == CLIP_CULL && - c->key.fill_cw == CLIP_CULL)); - - if (c->key.fill_ccw == CLIP_CULL) - conditional = BRW_CONDITIONAL_GE; - else - conditional = BRW_CONDITIONAL_L; - - brw_CMP(p, - vec1(brw_null_reg()), - conditional, - get_element(c->reg.dir, 2), - brw_imm_f(0)); - - ccw = brw_IF(p, BRW_EXECUTE_1); - { - brw_clip_kill_thread(c); - } - brw_ENDIF(p, ccw); -} - - - -static void copy_bfc( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *ccw; - unsigned conditional; - - /* Do we have any colors to copy? - */ - if (!(c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) && - !(c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1])) - return; - - /* In some wierd degnerate cases we can end up testing the - * direction twice, once for culling and once for bfc copying. Oh - * well, that's what you get for setting wierd GL state. - */ - if (c->key.copy_bfc_ccw) - conditional = BRW_CONDITIONAL_GE; - else - conditional = BRW_CONDITIONAL_L; - - brw_CMP(p, - vec1(brw_null_reg()), - conditional, - get_element(c->reg.dir, 2), - brw_imm_f(0)); - - ccw = brw_IF(p, BRW_EXECUTE_1); - { - unsigned i; - - for (i = 0; i < 3; i++) { - if (c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) - brw_MOV(p, - byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL0]), - byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC0])); - - if (c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1]) - brw_MOV(p, - byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL1]), - byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC1])); - } - } - brw_ENDIF(p, ccw); -} - - - - -/* - float iz = 1.0 / dir.z; - float ac = dir.x * iz; - float bc = dir.y * iz; - offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE; - offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor; - offset *= MRD; -*/ -static void compute_offset( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_reg off = c->reg.offset; - struct brw_reg dir = c->reg.dir; - - brw_math_invert(p, get_element(off, 2), get_element(dir, 2)); - brw_MUL(p, vec2(off), dir, get_element(off, 2)); - - brw_CMP(p, - vec1(brw_null_reg()), - BRW_CONDITIONAL_GE, - brw_abs(get_element(off, 0)), - brw_abs(get_element(off, 1))); - - brw_SEL(p, vec1(off), brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1))); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - brw_MUL(p, vec1(off), off, brw_imm_f(c->key.offset_factor)); - brw_ADD(p, vec1(off), off, brw_imm_f(c->key.offset_units)); -} - - -static void merge_edgeflags( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *is_poly; - struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0); - - brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); - brw_CMP(p, - vec1(brw_null_reg()), - BRW_CONDITIONAL_EQ, - tmp0, - brw_imm_ud(_3DPRIM_POLYGON)); - - /* Get away with using reg.vertex because we know that this is not - * a _3DPRIM_TRISTRIP_REVERSE: - */ - is_poly = brw_IF(p, BRW_EXECUTE_1); - { - brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); - brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8)); - brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); - brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9)); - brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - } - brw_ENDIF(p, is_poly); -} - - - -static void apply_one_offset( struct brw_clip_compile *c, - struct brw_indirect vert ) -{ - struct brw_compile *p = &c->func; - struct brw_reg pos = deref_4f(vert, c->offset[VERT_RESULT_HPOS]); - struct brw_reg z = get_element(pos, 2); - - brw_ADD(p, z, z, vec1(c->reg.offset)); -} - - - -/*********************************************************************** - * Output clipped polygon as an unfilled primitive: - */ -static void emit_lines(struct brw_clip_compile *c, - boolean do_offset) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *loop; - struct brw_instruction *draw_edge; - struct brw_indirect v0 = brw_indirect(0, 0); - struct brw_indirect v1 = brw_indirect(1, 0); - struct brw_indirect v0ptr = brw_indirect(2, 0); - struct brw_indirect v1ptr = brw_indirect(3, 0); - - /* Need a seperate loop for offset: - */ - if (do_offset) { - brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); - brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); - - loop = brw_DO(p, BRW_EXECUTE_1); - { - brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); - brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); - - apply_one_offset(c, v0); - - brw_set_conditionalmod(p, BRW_CONDITIONAL_G); - brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); - } - brw_WHILE(p, loop); - } - - /* v1ptr = &inlist[nr_verts] - * *v1ptr = v0 - */ - brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); - brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); - brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); - brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); - brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0)); - - loop = brw_DO(p, BRW_EXECUTE_1); - { - brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); - brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2)); - brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); - - /* draw edge if edgeflag != 0 */ - brw_CMP(p, - vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, - deref_1f(v0, c->offset[VERT_RESULT_EDGE]), - brw_imm_f(0)); - draw_edge = brw_IF(p, BRW_EXECUTE_1); - { - brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START); - brw_clip_emit_vue(c, v1, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); - } - brw_ENDIF(p, draw_edge); - - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); - } - brw_WHILE(p, loop); -} - - - -static void emit_points(struct brw_clip_compile *c, - boolean do_offset ) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *loop; - struct brw_instruction *draw_point; - - struct brw_indirect v0 = brw_indirect(0, 0); - struct brw_indirect v0ptr = brw_indirect(2, 0); - - brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); - brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); - - loop = brw_DO(p, BRW_EXECUTE_1); - { - brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); - brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); - - /* draw if edgeflag != 0 - */ - brw_CMP(p, - vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, - deref_1f(v0, c->offset[VERT_RESULT_EDGE]), - brw_imm_f(0)); - draw_point = brw_IF(p, BRW_EXECUTE_1); - { - if (do_offset) - apply_one_offset(c, v0); - - brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END); - } - brw_ENDIF(p, draw_point); - - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); - } - brw_WHILE(p, loop); -} - - - - - - - -static void emit_primitives( struct brw_clip_compile *c, - unsigned mode, - boolean do_offset ) -{ - switch (mode) { - case CLIP_FILL: - brw_clip_tri_emit_polygon(c); - break; - - case CLIP_LINE: - emit_lines(c, do_offset); - break; - - case CLIP_POINT: - emit_points(c, do_offset); - break; - - case CLIP_CULL: - assert(0); - break; - } -} - - - -static void emit_unfilled_primitives( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *ccw; - - /* Direction culling has already been done. - */ - if (c->key.fill_ccw != c->key.fill_cw && - c->key.fill_ccw != CLIP_CULL && - c->key.fill_cw != CLIP_CULL) - { - brw_CMP(p, - vec1(brw_null_reg()), - BRW_CONDITIONAL_GE, - get_element(c->reg.dir, 2), - brw_imm_f(0)); - - ccw = brw_IF(p, BRW_EXECUTE_1); - { - emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw); - } - ccw = brw_ELSE(p, ccw); - { - emit_primitives(c, c->key.fill_cw, c->key.offset_cw); - } - brw_ENDIF(p, ccw); - } - else if (c->key.fill_cw != CLIP_CULL) { - emit_primitives(c, c->key.fill_cw, c->key.offset_cw); - } - else if (c->key.fill_ccw != CLIP_CULL) { - emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw); - } -} - - - - -static void check_nr_verts( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *if_insn; - - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.nr_verts, brw_imm_d(3)); - if_insn = brw_IF(p, BRW_EXECUTE_1); - { - brw_clip_kill_thread(c); - } - brw_ENDIF(p, if_insn); -} - - -void brw_emit_unfilled_clip( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *do_clip; - - - c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) || - (c->key.fill_ccw != c->key.fill_cw) || - c->key.fill_ccw == CLIP_CULL || - c->key.fill_cw == CLIP_CULL || - c->key.copy_bfc_cw || - c->key.copy_bfc_ccw); - - brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); - brw_clip_tri_init_vertices(c); - - assert(c->offset[VERT_RESULT_EDGE]); - - if (c->key.fill_ccw == CLIP_CULL && - c->key.fill_cw == CLIP_CULL) { - brw_clip_kill_thread(c); - return; - } - - merge_edgeflags(c); - - /* Need to use the inlist indirection here: - */ - if (c->need_direction) - compute_tri_direction(c); - - if (c->key.fill_ccw == CLIP_CULL || - c->key.fill_cw == CLIP_CULL) - cull_direction(c); - - if (c->key.offset_ccw || - c->key.offset_cw) - compute_offset(c); - - if (c->key.copy_bfc_ccw || - c->key.copy_bfc_cw) - copy_bfc(c); - - /* Need to do this whether we clip or not: - */ - if (c->key.do_flat_shading) - brw_clip_tri_flat_shade(c); - - brw_clip_init_clipmask(c); - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); - do_clip = brw_IF(p, BRW_EXECUTE_1); - { - brw_clip_init_planes(c); - brw_clip_tri(c); - check_nr_verts(c); - } - brw_ENDIF(p, do_clip); - - emit_unfilled_primitives(c); - brw_clip_kill_thread(c); -} - - - diff --git a/src/gallium/drivers/i965simple/brw_clip_util.c b/src/gallium/drivers/i965simple/brw_clip_util.c deleted file mode 100644 index 6d58ceafff..0000000000 --- a/src/gallium/drivers/i965simple/brw_clip_util.c +++ /dev/null @@ -1,351 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_util.h" -#include "brw_clip.h" - - - - - -static struct brw_reg get_tmp( struct brw_clip_compile *c ) -{ - struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0); - - if (++c->last_tmp > c->prog_data.total_grf) - c->prog_data.total_grf = c->last_tmp; - - return tmp; -} - -static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp ) -{ - if (tmp.nr == c->last_tmp-1) - c->last_tmp--; -} - - -static struct brw_reg make_plane_ud(unsigned x, unsigned y, unsigned z, unsigned w) -{ - return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x); -} - - -void brw_clip_init_planes( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - - if (!c->key.nr_userclip) { - brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0, 0, 0xff, 1)); - brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0, 0, 1, 1)); - brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff, 0, 1)); - brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0, 1, 0, 1)); - brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff, 0, 0, 1)); - brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1, 0, 0, 1)); - } -} - - - -#define W 3 - -/* Project 'pos' to screen space (or back again), overwrite with results: - */ -static void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos ) -{ - struct brw_compile *p = &c->func; - - /* calc rhw - */ - brw_math_invert(p, get_element(pos, W), get_element(pos, W)); - - /* value.xyz *= value.rhw - */ - brw_set_access_mode(p, BRW_ALIGN_16); - brw_MUL(p, brw_writemask(pos, TGSI_WRITEMASK_XYZ), pos, brw_swizzle1(pos, W)); - brw_set_access_mode(p, BRW_ALIGN_1); -} - - -static void brw_clip_project_vertex( struct brw_clip_compile *c, - struct brw_indirect vert_addr ) -{ -#if 0 - struct brw_compile *p = &c->func; - struct brw_reg tmp = get_tmp(c); - - /* Fixup position. Extract from the original vertex and re-project - * to screen space: - */ - brw_MOV(p, tmp, deref_4f(vert_addr, c->offset[VERT_RESULT_HPOS])); - brw_clip_project_position(c, tmp); - brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp); - - release_tmp(c, tmp); -#else - #warning "disabled" -#endif -} - - - - -/* Interpolate between two vertices and put the result into a0.0. - * Increment a0.0 accordingly. - */ -void brw_clip_interp_vertex( struct brw_clip_compile *c, - struct brw_indirect dest_ptr, - struct brw_indirect v0_ptr, /* from */ - struct brw_indirect v1_ptr, /* to */ - struct brw_reg t0, - boolean force_edgeflag) -{ -#if 0 - struct brw_compile *p = &c->func; - struct brw_reg tmp = get_tmp(c); - unsigned i; - - /* Just copy the vertex header: - */ - brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1); - - /* Iterate over each attribute (could be done in pairs?) - */ - for (i = 0; i < c->nr_attrs; i++) { - unsigned delta = i*16 + 32; - - if (delta == c->offset[VERT_RESULT_EDGE]) { - if (force_edgeflag) - brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1)); - else - brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta)); - } - else { - /* Interpolate: - * - * New = attr0 + t*attr1 - t*attr0 - */ - brw_MUL(p, - vec4(brw_null_reg()), - deref_4f(v1_ptr, delta), - t0); - - brw_MAC(p, - tmp, - negate(deref_4f(v0_ptr, delta)), - t0); - - brw_ADD(p, - deref_4f(dest_ptr, delta), - deref_4f(v0_ptr, delta), - tmp); - } - } - - if (i & 1) { - unsigned delta = i*16 + 32; - brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); - } - - release_tmp(c, tmp); - - /* Recreate the projected (NDC) coordinate in the new vertex - * header: - */ - brw_clip_project_vertex(c, dest_ptr ); -#else - #warning "disabled" -#endif -} - - - - -#define MAX_MRF 16 - -void brw_clip_emit_vue(struct brw_clip_compile *c, - struct brw_indirect vert, - boolean allocate, - boolean eot, - unsigned header) -{ - struct brw_compile *p = &c->func; - unsigned start = c->last_mrf; - - assert(!(allocate && eot)); - - /* Cycle through mrf regs - probably futile as we have to wait for - * the allocation response anyway. Also, the order this function - * is invoked doesn't correspond to the order the instructions will - * be executed, so it won't have any effect in many cases. - */ -#if 0 - if (start + c->nr_regs + 1 >= MAX_MRF) - start = 0; - - c->last_mrf = start + c->nr_regs + 1; -#endif - - /* Copy the vertex from vertn into m1..mN+1: - */ - brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs); - - /* Overwrite PrimType and PrimStart in the message header, for - * each vertex in turn: - */ - brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); - - - /* Send each vertex as a seperate write to the urb. This - * is different to the concept in brw_sf_emit.c, where - * subsequent writes are used to build up a single urb - * entry. Each of these writes instantiates a seperate - * urb entry - (I think... what about 'allocate'?) - */ - brw_urb_WRITE(p, - allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), - start, - c->reg.R0, - allocate, - 1, /* used */ - c->nr_regs + 1, /* msg length */ - allocate ? 1 : 0, /* response_length */ - eot, /* eot */ - 1, /* writes_complete */ - 0, /* urb offset */ - BRW_URB_SWIZZLE_NONE); -} - - - -void brw_clip_kill_thread(struct brw_clip_compile *c) -{ - struct brw_compile *p = &c->func; - - /* Send an empty message to kill the thread and release any - * allocated urb entry: - */ - brw_urb_WRITE(p, - retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), - 0, - c->reg.R0, - 0, /* allocate */ - 0, /* used */ - 0, /* msg len */ - 0, /* response len */ - 1, /* eot */ - 1, /* writes complete */ - 0, - BRW_URB_SWIZZLE_NONE); -} - - - - -struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ) -{ - return brw_address(c->reg.fixed_planes); -} - - -struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ) -{ - if (c->key.nr_userclip) { - return brw_imm_uw(16); - } - else { - return brw_imm_uw(4); - } -} - - -/* If flatshading, distribute color from provoking vertex prior to - * clipping. - */ -void brw_clip_copy_colors( struct brw_clip_compile *c, - unsigned to, unsigned from ) -{ -#if 0 - struct brw_compile *p = &c->func; - - if (c->offset[VERT_RESULT_COL0]) - brw_MOV(p, - byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL0]), - byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL0])); - - if (c->offset[VERT_RESULT_COL1]) - brw_MOV(p, - byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL1]), - byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL1])); - - if (c->offset[VERT_RESULT_BFC0]) - brw_MOV(p, - byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC0]), - byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC0])); - - if (c->offset[VERT_RESULT_BFC1]) - brw_MOV(p, - byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC1]), - byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC1])); -#else - #warning "disabled" -#endif -} - - - -void brw_clip_init_clipmask( struct brw_clip_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_reg incoming = get_element_ud(c->reg.R0, 2); - - /* Shift so that lowest outcode bit is rightmost: - */ - brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26)); - - if (c->key.nr_userclip) { - struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD); - - /* Rearrange userclip outcodes so that they come directly after - * the fixed plane bits. - */ - brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14)); - brw_SHR(p, tmp, tmp, brw_imm_ud(8)); - brw_OR(p, c->reg.planemask, c->reg.planemask, tmp); - - release_tmp(c, tmp); - } -} - diff --git a/src/gallium/drivers/i965simple/brw_context.c b/src/gallium/drivers/i965simple/brw_context.c deleted file mode 100644 index 9b33285bc7..0000000000 --- a/src/gallium/drivers/i965simple/brw_context.c +++ /dev/null @@ -1,139 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_draw.h" -#include "brw_vs.h" -#include "brw_tex_layout.h" -#include "brw_winsys.h" - -#include "pipe/internal/p_winsys_screen.h" -#include "pipe/p_context.h" -#include "util/u_memory.h" -#include "pipe/p_screen.h" - - -#ifndef BRW_DEBUG -int BRW_DEBUG = (0); -#endif - - -static void brw_destroy(struct pipe_context *pipe) -{ - struct brw_context *brw = brw_context(pipe); - - if(brw->winsys->destroy) - brw->winsys->destroy(brw->winsys); - - FREE(brw); -} - - -static void brw_clear(struct pipe_context *pipe, struct pipe_surface *ps, - unsigned clearValue) -{ - int x, y, w, h; - /* FIXME: corny... */ - - x = 0; - y = 0; - w = ps->width; - h = ps->height; - - pipe->surface_fill(pipe, ps, x, y, w, h, clearValue); -} - -static unsigned int -brw_is_texture_referenced( struct pipe_context *pipe, - struct pipe_texture *texture, - unsigned face, unsigned level) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - -static unsigned int -brw_is_buffer_referenced( struct pipe_context *pipe, - struct pipe_buffer *buf) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - -struct pipe_context *brw_create(struct pipe_screen *screen, - struct brw_winsys *brw_winsys, - unsigned pci_id) -{ - struct brw_context *brw; - - debug_printf("%s: creating brw_context with pci id 0x%x\n", - __FUNCTION__, pci_id); - - brw = CALLOC_STRUCT(brw_context); - if (brw == NULL) - return NULL; - - brw->winsys = brw_winsys; - brw->pipe.winsys = screen->winsys; - brw->pipe.screen = screen; - - brw->pipe.destroy = brw_destroy; - brw->pipe.clear = brw_clear; - - brw->pipe.is_texture_referenced = brw_is_texture_referenced; - brw->pipe.is_buffer_referenced = brw_is_buffer_referenced; - - brw_init_surface_functions(brw); - brw_init_texture_functions(brw); - brw_init_state_functions(brw); - brw_init_flush_functions(brw); - brw_init_draw_functions( brw ); - - - brw_init_state( brw ); - - brw->pci_id = pci_id; - brw->dirty = ~0; - brw->hardware_dirty = ~0; - - memset(&brw->wm.bind, ~0, sizeof(brw->wm.bind)); - - return &brw->pipe; -} - diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h deleted file mode 100644 index 3079485180..0000000000 --- a/src/gallium/drivers/i965simple/brw_context.h +++ /dev/null @@ -1,684 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#ifndef BRWCONTEXT_INC -#define BRWCONTEXT_INC - - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "tgsi/tgsi_scan.h" - -#include "brw_structs.h" -#include "brw_winsys.h" - - -/* Glossary: - * - * URB - uniform resource buffer. A mid-sized buffer which is - * partitioned between the fixed function units and used for passing - * values (vertices, primitives, constants) between them. - * - * CURBE - constant URB entry. An urb region (entry) used to hold - * constant values which the fixed function units can be instructed to - * preload into the GRF when spawining a thread. - * - * VUE - vertex URB entry. An urb entry holding a vertex and usually - * a vertex header. The header contains control information and - * things like primitive type, Begin/end flags and clip codes. - * - * PUE - primitive URB entry. An urb entry produced by the setup (SF) - * unit holding rasterization and interpolation parameters. - * - * GRF - general register file. One of several register files - * addressable by programmed threads. The inputs (r0, payload, curbe, - * urb) of the thread are preloaded to this area before the thread is - * spawned. The registers are individually 8 dwords wide and suitable - * for general usage. Registers holding thread input values are not - * special and may be overwritten. - * - * MRF - message register file. Threads communicate (and terminate) - * by sending messages. Message parameters are placed in contigous - * MRF registers. All program output is via these messages. URB - * entries are populated by sending a message to the shared URB - * function containing the new data, together with a control word, - * often an unmodified copy of R0. - * - * R0 - GRF register 0. Typically holds control information used when - * sending messages to other threads. - * - * EU or GEN4 EU: The name of the programmable subsystem of the - * i965 hardware. Threads are executed by the EU, the registers - * described above are part of the EU architecture. - * - * Fixed function units: - * - * CS - Command streamer. Notional first unit, little software - * interaction. Holds the URB entries used for constant data, ie the - * CURBEs. - * - * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of - * this unit is responsible for pulling vertices out of vertex buffers - * in vram and injecting them into the processing pipe as VUEs. If - * enabled, it first passes them to a VS thread which is a good place - * for the driver to implement any active vertex shader. - * - * GS - Geometry Shader. This corresponds to a new DX10 concept. If - * enabled, incoming strips etc are passed to GS threads in individual - * line/triangle/point units. The GS thread may perform arbitary - * computation and emit whatever primtives with whatever vertices it - * chooses. This makes GS an excellent place to implement GL's - * unfilled polygon modes, though of course it is capable of much - * more. Additionally, GS is used to translate away primitives not - * handled by latter units, including Quads and Lineloops. - * - * CS - Clipper. Mesa's clipping algorithms are imported to run on - * this unit. The fixed function part performs cliptesting against - * the 6 fixed clipplanes and makes descisions on whether or not the - * incoming primitive needs to be passed to a thread for clipping. - * User clip planes are handled via cooperation with the VS thread. - * - * SF - Strips Fans or Setup: Triangles are prepared for - * rasterization. Interpolation coefficients are calculated. - * Flatshading and two-side lighting usually performed here. - * - * WM - Windower. Interpolation of vertex attributes performed here. - * Fragment shader implemented here. SIMD aspects of EU taken full - * advantage of, as pixels are processed in blocks of 16. - * - * CC - Color Calculator. No EU threads associated with this unit. - * Handles blending and (presumably) depth and stencil testing. - */ - -#define BRW_MAX_CURBE (32*16) - -struct brw_context; -struct brw_winsys; - - -/* Raised when we receive new state across the pipe interface: - */ -#define BRW_NEW_VIEWPORT 0x1 -#define BRW_NEW_RASTERIZER 0x2 -#define BRW_NEW_FS 0x4 -#define BRW_NEW_BLEND 0x8 -#define BRW_NEW_CLIP 0x10 -#define BRW_NEW_SCISSOR 0x20 -#define BRW_NEW_STIPPLE 0x40 -#define BRW_NEW_FRAMEBUFFER 0x80 -#define BRW_NEW_ALPHA_TEST 0x100 -#define BRW_NEW_DEPTH_STENCIL 0x200 -#define BRW_NEW_SAMPLER 0x400 -#define BRW_NEW_TEXTURE 0x800 -#define BRW_NEW_CONSTANTS 0x1000 -#define BRW_NEW_VBO 0x2000 -#define BRW_NEW_VS 0x4000 - -/* Raised for other internal events: - */ -#define BRW_NEW_URB_FENCE 0x10000 -#define BRW_NEW_PSP 0x20000 -#define BRW_NEW_CURBE_OFFSETS 0x40000 -#define BRW_NEW_REDUCED_PRIMITIVE 0x80000 -#define BRW_NEW_PRIMITIVE 0x100000 -#define BRW_NEW_SCENE 0x200000 -#define BRW_NEW_SF_LINKAGE 0x400000 - -extern int BRW_DEBUG; - -#define DEBUG_TEXTURE 0x1 -#define DEBUG_STATE 0x2 -#define DEBUG_IOCTL 0x4 -#define DEBUG_PRIMS 0x8 -#define DEBUG_VERTS 0x10 -#define DEBUG_FALLBACKS 0x20 -#define DEBUG_VERBOSE 0x40 -#define DEBUG_DRI 0x80 -#define DEBUG_DMA 0x100 -#define DEBUG_SANITY 0x200 -#define DEBUG_SYNC 0x400 -#define DEBUG_SLEEP 0x800 -#define DEBUG_PIXEL 0x1000 -#define DEBUG_STATS 0x2000 -#define DEBUG_TILE 0x4000 -#define DEBUG_SINGLE_THREAD 0x8000 -#define DEBUG_WM 0x10000 -#define DEBUG_URB 0x20000 -#define DEBUG_VS 0x40000 -#define DEBUG_BATCH 0x80000 -#define DEBUG_BUFMGR 0x100000 -#define DEBUG_BLIT 0x200000 -#define DEBUG_REGION 0x400000 -#define DEBUG_MIPTREE 0x800000 - -#define DBG(...) do { \ - if (BRW_DEBUG & FILE_DEBUG_FLAG) \ - debug_printf(__VA_ARGS__); \ -} while(0) - -#define PRINT(...) do { \ - debug_printf(__VA_ARGS__); \ -} while(0) - -struct brw_state_flags { - unsigned cache; - unsigned brw; -}; - - -struct brw_vertex_program { - struct pipe_shader_state program; - struct tgsi_shader_info info; - int id; -}; - - -struct brw_fragment_program { - struct pipe_shader_state program; - struct tgsi_shader_info info; - - boolean UsesDepth; /* XXX add this to tgsi_shader_info? */ - int id; -}; - - -struct pipe_setup_linkage { - struct { - unsigned vp_output:5; - unsigned interp_mode:4; - unsigned bf_vp_output:5; - } fp_input[PIPE_MAX_SHADER_INPUTS]; - - unsigned fp_input_count:5; - unsigned max_vp_output:5; -}; - - - -struct brw_texture { - struct pipe_texture base; - - /* Derived from the above: - */ - unsigned stride; - unsigned depth_pitch; /* per-image on i945? */ - unsigned total_nblocksy; - - unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; - - /* Explicitly store the offset of each image for each cube face or - * depth value. Pretty much have to accept that hardware formats - * are going to be so diverse that there is no unified way to - * compute the offsets of depth/cube images within a mipmap level, - * so have to store them as a lookup table: - */ - unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ - - /* Includes image offset tables: - */ - unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; - - /* The data is held here: - */ - struct pipe_buffer *buffer; -}; - -/* Data about a particular attempt to compile a program. Note that - * there can be many of these, each in a different GL state - * corresponding to a different brw_wm_prog_key struct, with different - * compiled programs: - */ -/* Data about a particular attempt to compile a program. Note that - * there can be many of these, each in a different GL state - * corresponding to a different brw_wm_prog_key struct, with different - * compiled programs: - */ - -struct brw_wm_prog_data { - unsigned curb_read_length; - unsigned urb_read_length; - - unsigned first_curbe_grf; - unsigned total_grf; - unsigned total_scratch; - - /* Internally generated constants for the CURBE. These are loaded - * ahead of the data from the constant buffer. - */ - const float internal_const[8]; - unsigned nr_internal_consts; - unsigned max_const; - - boolean error; -}; - -struct brw_sf_prog_data { - unsigned urb_read_length; - unsigned total_grf; - - /* Each vertex may have upto 12 attributes, 4 components each, - * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11 - * rows. - * - * Actually we use 4 for each, so call it 12 rows. - */ - unsigned urb_entry_size; -}; - -struct brw_clip_prog_data { - unsigned curb_read_length; /* user planes? */ - unsigned clip_mode; - unsigned urb_read_length; - unsigned total_grf; -}; - -struct brw_gs_prog_data { - unsigned urb_read_length; - unsigned total_grf; -}; - -struct brw_vs_prog_data { - unsigned curb_read_length; - unsigned urb_read_length; - unsigned total_grf; - unsigned outputs_written; - - unsigned inputs_read; - - unsigned max_const; - - float imm_buf[PIPE_MAX_CONSTANT][4]; - unsigned num_imm; - unsigned num_consts; - - /* Used for calculating urb partitions: - */ - unsigned urb_entry_size; -}; - - -#define BRW_MAX_TEX_UNIT 8 -#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1 - -/* Create a fixed sized struct for caching binding tables: - */ -struct brw_surface_binding_table { - unsigned surf_ss_offset[BRW_WM_MAX_SURF]; -}; - - -struct brw_cache; - -struct brw_mem_pool { - struct pipe_buffer *buffer; - - unsigned size; - unsigned offset; /* offset of first free byte */ - - struct brw_context *brw; -}; - -struct brw_cache_item { - unsigned hash; - unsigned key_size; /* for variable-sized keys */ - const void *key; - - unsigned offset; /* offset within pool's buffer */ - unsigned data_size; - - struct brw_cache_item *next; -}; - - - -struct brw_cache { - unsigned id; - - const char *name; - - struct brw_context *brw; - struct brw_mem_pool *pool; - - struct brw_cache_item **items; - unsigned size, n_items; - - unsigned key_size; /* for fixed-size keys */ - unsigned aux_size; - - unsigned last_addr; /* offset of active item */ -}; - - - - -/* Considered adding a member to this struct to document which flags - * an update might raise so that ordering of the state atoms can be - * checked or derived at runtime. Dropped the idea in favor of having - * a debug mode where the state is monitored for flags which are - * raised that have already been tested against. - */ -struct brw_tracked_state { - struct brw_state_flags dirty; - void (*update)( struct brw_context *brw ); -}; - - -/* Flags for brw->state.cache. - */ -#define CACHE_NEW_CC_VP (1<<BRW_CC_VP) -#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT) -#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG) -#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR) -#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER) -#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT) -#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG) -#define CACHE_NEW_SF_VP (1<<BRW_SF_VP) -#define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT) -#define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT) -#define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG) -#define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT) -#define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG) -#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP) -#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT) -#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG) -#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE) -#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND) - - - - -enum brw_mempool_id { - BRW_GS_POOL, - BRW_SS_POOL, - BRW_MAX_POOL -}; - - -struct brw_cached_batch_item { - struct header *header; - unsigned sz; - struct brw_cached_batch_item *next; -}; - - - -/* Protect against a future where PIPE_MAX_ATTRIBS > 32. Wouldn't life - * be easier if C allowed arrays of packed elements? - */ -#define ATTRIB_BIT_DWORDS ((PIPE_MAX_ATTRIBS+31)/32) - - - - -struct brw_vertex_info { - unsigned varying; /* varying:1[PIPE_MAX_ATTRIBS] */ - unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_MAX_ATTRIBS] */ -}; - - - - - -struct brw_context -{ - struct pipe_context pipe; - struct brw_winsys *winsys; - - unsigned primitive; - unsigned reduced_primitive; - - boolean emit_state_always; - - struct { - struct brw_state_flags dirty; - } state; - - - struct { - const struct pipe_blend_state *Blend; - const struct pipe_depth_stencil_alpha_state *DepthStencil; - const struct pipe_poly_stipple *PolygonStipple; - const struct pipe_rasterizer_state *Raster; - const struct pipe_sampler_state *Samplers[PIPE_MAX_SAMPLERS]; - const struct brw_vertex_program *VertexProgram; - const struct brw_fragment_program *FragmentProgram; - - struct pipe_clip_state Clip; - struct pipe_blend_color BlendColor; - struct pipe_scissor_state Scissor; - struct pipe_viewport_state Viewport; - struct pipe_framebuffer_state FrameBuffer; - - const struct pipe_constant_buffer *Constants[2]; - const struct brw_texture *Texture[PIPE_MAX_SAMPLERS]; - } attribs; - - unsigned num_samplers; - unsigned num_textures; - - struct brw_mem_pool pool[BRW_MAX_POOL]; - struct brw_cache cache[BRW_MAX_CACHE]; - struct brw_cached_batch_item *cached_batch_items; - - struct { - - /* Arrays with buffer objects to copy non-bufferobj arrays into - * for upload: - */ - const struct pipe_vertex_buffer *vbo_array[PIPE_MAX_ATTRIBS]; - - struct brw_vertex_element_state inputs[PIPE_MAX_ATTRIBS]; - -#define BRW_NR_UPLOAD_BUFS 17 -#define BRW_UPLOAD_INIT_SIZE (128*1024) - - /* Summary of size and varying of active arrays, so we can check - * for changes to this state: - */ - struct brw_vertex_info info; - } vb; - - - unsigned hardware_dirty; - unsigned dirty; - unsigned pci_id; - /* BRW_NEW_URB_ALLOCATIONS: - */ - struct { - unsigned vsize; /* vertex size plus header in urb registers */ - unsigned csize; /* constant buffer size in urb registers */ - unsigned sfsize; /* setup data size in urb registers */ - - boolean constrained; - - unsigned nr_vs_entries; - unsigned nr_gs_entries; - unsigned nr_clip_entries; - unsigned nr_sf_entries; - unsigned nr_cs_entries; - -/* unsigned vs_size; */ -/* unsigned gs_size; */ -/* unsigned clip_size; */ -/* unsigned sf_size; */ -/* unsigned cs_size; */ - - unsigned vs_start; - unsigned gs_start; - unsigned clip_start; - unsigned sf_start; - unsigned cs_start; - } urb; - - - /* BRW_NEW_CURBE_OFFSETS: - */ - struct { - unsigned wm_start; - unsigned wm_size; - unsigned clip_start; - unsigned clip_size; - unsigned vs_start; - unsigned vs_size; - unsigned total_size; - - unsigned gs_offset; - - float *last_buf; - unsigned last_bufsz; - } curbe; - - struct { - struct brw_vs_prog_data *prog_data; - - unsigned prog_gs_offset; - unsigned state_gs_offset; - } vs; - - struct { - struct brw_gs_prog_data *prog_data; - - boolean prog_active; - unsigned prog_gs_offset; - unsigned state_gs_offset; - } gs; - - struct { - struct brw_clip_prog_data *prog_data; - - unsigned prog_gs_offset; - unsigned vp_gs_offset; - unsigned state_gs_offset; - } clip; - - - struct { - struct brw_sf_prog_data *prog_data; - - struct pipe_setup_linkage linkage; - - unsigned prog_gs_offset; - unsigned vp_gs_offset; - unsigned state_gs_offset; - } sf; - - struct { - struct brw_wm_prog_data *prog_data; - -// struct brw_wm_compiler *compile_data; - - - /** - * Array of sampler state uploaded at sampler_gs_offset of BRW_SAMPLER - * cache - */ - struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; - - unsigned render_surf; - unsigned nr_surfaces; - - unsigned max_threads; - struct pipe_buffer *scratch_buffer; - unsigned scratch_buffer_size; - - unsigned sampler_count; - unsigned sampler_gs_offset; - - struct brw_surface_binding_table bind; - unsigned bind_ss_offset; - - unsigned prog_gs_offset; - unsigned state_gs_offset; - } wm; - - - struct { - unsigned vp_gs_offset; - unsigned state_gs_offset; - } cc; - - - /* Used to give every program string a unique id - */ - unsigned program_id; -}; - - -#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a) - - -/*====================================================================== - * brw_vtbl.c - */ -void brw_do_flush( struct brw_context *brw, - unsigned flags ); - - -/*====================================================================== - * brw_state.c - */ -void brw_validate_state(struct brw_context *brw); -void brw_init_state(struct brw_context *brw); -void brw_destroy_state(struct brw_context *brw); - - -/*====================================================================== - * brw_tex.c - */ -void brwUpdateTextureState( struct brw_context *brw ); - - -/* brw_urb.c - */ -void brw_upload_urb_fence(struct brw_context *brw); - -void brw_upload_constant_buffer_state(struct brw_context *brw); - -void brw_init_surface_functions(struct brw_context *brw); -void brw_init_state_functions(struct brw_context *brw); -void brw_init_flush_functions(struct brw_context *brw); -void brw_init_string_functions(struct brw_context *brw); - -/*====================================================================== - * Inline conversion functions. These are better-typed than the - * macros used previously: - */ -static inline struct brw_context * -brw_context( struct pipe_context *ctx ) -{ - return (struct brw_context *)ctx; -} - -#endif - diff --git a/src/gallium/drivers/i965simple/brw_curbe.c b/src/gallium/drivers/i965simple/brw_curbe.c deleted file mode 100644 index 904cde8e30..0000000000 --- a/src/gallium/drivers/i965simple/brw_curbe.c +++ /dev/null @@ -1,369 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_state.h" -#include "brw_batch.h" -#include "brw_util.h" -#include "brw_wm.h" -#include "pipe/p_state.h" -#include "pipe/internal/p_winsys_screen.h" -#include "util/u_math.h" -#include "util/u_memory.h" - -#define FILE_DEBUG_FLAG DEBUG_FALLBACKS - -/* Partition the CURBE between the various users of constant values: - */ -static void calculate_curbe_offsets( struct brw_context *brw ) -{ - /* CACHE_NEW_WM_PROG */ - unsigned nr_fp_regs = align(brw->wm.prog_data->max_const, 16); - - /* BRW_NEW_VERTEX_PROGRAM */ - unsigned nr_vp_regs = align(brw->vs.prog_data->max_const, 16); - unsigned nr_clip_regs = 0; - unsigned total_regs; - -#if 0 - /* BRW_NEW_CLIP ? */ - if (brw->attribs.Transform->ClipPlanesEnabled) { - unsigned nr_planes = 6 + brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled); - nr_clip_regs = align(nr_planes * 4, 16); - } -#endif - - - total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs; - - /* This can happen - what to do? Probably rather than falling - * back, the best thing to do is emit programs which code the - * constants as immediate values. Could do this either as a static - * cap on WM and VS, or adaptively. - * - * Unfortunately, this is currently dependent on the results of the - * program generation process (in the case of wm), so this would - * introduce the need to re-generate programs in the event of a - * curbe allocation failure. - */ - /* Max size is 32 - just large enough to - * hold the 128 parameters allowed by - * the fragment and vertex program - * api's. It's not clear what happens - * when both VP and FP want to use 128 - * parameters, though. - */ - assert(total_regs <= 32); - - /* Lazy resize: - */ - if (nr_fp_regs > brw->curbe.wm_size || - nr_vp_regs > brw->curbe.vs_size || - nr_clip_regs != brw->curbe.clip_size || - (total_regs < brw->curbe.total_size / 4 && - brw->curbe.total_size > 16)) { - - unsigned reg = 0; - - /* Calculate a new layout: - */ - reg = 0; - brw->curbe.wm_start = reg; - brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs; - brw->curbe.clip_start = reg; - brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs; - brw->curbe.vs_start = reg; - brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs; - brw->curbe.total_size = reg; - -#if 0 - if (0) - DBG("curbe wm %d+%d clip %d+%d vs %d+%d\n", - brw->curbe.wm_start, - brw->curbe.wm_size, - brw->curbe.clip_start, - brw->curbe.clip_size, - brw->curbe.vs_start, - brw->curbe.vs_size ); -#endif - - brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS; - } -} - - -const struct brw_tracked_state brw_curbe_offsets = { - .dirty = { - .brw = (BRW_NEW_CLIP | - BRW_NEW_VS), - .cache = CACHE_NEW_WM_PROG - }, - .update = calculate_curbe_offsets -}; - - - -/* Define the number of curbes within CS's urb allocation. Multiple - * urb entries -> multiple curbes. These will be used by - * fixed-function hardware in a double-buffering scheme to avoid a - * pipeline stall each time the contents of the curbe is changed. - */ -void brw_upload_constant_buffer_state(struct brw_context *brw) -{ - struct brw_constant_buffer_state cbs; - memset(&cbs, 0, sizeof(cbs)); - - /* It appears that this is the state packet for the CS unit, ie. the - * urb entries detailed here are housed in the CS range from the - * URB_FENCE command. - */ - cbs.header.opcode = CMD_CONST_BUFFER_STATE; - cbs.header.length = sizeof(cbs)/4 - 2; - - /* BRW_NEW_URB_FENCE */ - cbs.bits0.nr_urb_entries = brw->urb.nr_cs_entries; - cbs.bits0.urb_entry_size = brw->urb.csize - 1; - - assert(brw->urb.nr_cs_entries); - BRW_CACHED_BATCH_STRUCT(brw, &cbs); -} - - -static float fixed_plane[6][4] = { - { 0, 0, -1, 1 }, - { 0, 0, 1, 1 }, - { 0, -1, 0, 1 }, - { 0, 1, 0, 1 }, - {-1, 0, 0, 1 }, - { 1, 0, 0, 1 } -}; - -/* Upload a new set of constants. Too much variability to go into the - * cache mechanism, but maybe would benefit from a comparison against - * the current uploaded set of constants. - */ -static void upload_constant_buffer(struct brw_context *brw) -{ - struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL]; - unsigned sz = brw->curbe.total_size; - unsigned bufsz = sz * sizeof(float); - float *buf; - unsigned i; - - - if (sz == 0) { - struct brw_constant_buffer cb; - cb.header.opcode = CMD_CONST_BUFFER; - cb.header.length = sizeof(cb)/4 - 2; - cb.header.valid = 0; - cb.bits0.buffer_length = 0; - cb.bits0.buffer_address = 0; - BRW_BATCH_STRUCT(brw, &cb); - - if (brw->curbe.last_buf) { - free(brw->curbe.last_buf); - brw->curbe.last_buf = NULL; - brw->curbe.last_bufsz = 0; - } - - return; - } - - buf = (float *)malloc(bufsz); - - memset(buf, 0, bufsz); - - if (brw->curbe.wm_size) { - unsigned offset = brw->curbe.wm_start * 16; - - /* First the constant buffer constants: - */ - - /* Then any internally generated constants: - */ - for (i = 0; i < brw->wm.prog_data->nr_internal_consts; i++) - buf[offset + i] = brw->wm.prog_data->internal_const[i]; - - assert(brw->wm.prog_data->max_const == - brw->wm.prog_data->nr_internal_consts); - } - - - /* The clipplanes are actually delivered to both CLIP and VS units. - * VS uses them to calculate the outcode bitmasks. - */ - if (brw->curbe.clip_size) { - unsigned offset = brw->curbe.clip_start * 16; - unsigned j; - - /* If any planes are going this way, send them all this way: - */ - for (i = 0; i < 6; i++) { - buf[offset + i * 4 + 0] = fixed_plane[i][0]; - buf[offset + i * 4 + 1] = fixed_plane[i][1]; - buf[offset + i * 4 + 2] = fixed_plane[i][2]; - buf[offset + i * 4 + 3] = fixed_plane[i][3]; - } - - /* Clip planes: BRW_NEW_CLIP: - */ - for (j = 0; j < brw->attribs.Clip.nr; j++) { - buf[offset + i * 4 + 0] = brw->attribs.Clip.ucp[j][0]; - buf[offset + i * 4 + 1] = brw->attribs.Clip.ucp[j][1]; - buf[offset + i * 4 + 2] = brw->attribs.Clip.ucp[j][2]; - buf[offset + i * 4 + 3] = brw->attribs.Clip.ucp[j][3]; - i++; - } - } - - - if (brw->curbe.vs_size) { - unsigned offset = brw->curbe.vs_start * 16; - /*unsigned nr = vp->max_const;*/ - const struct pipe_constant_buffer *cbuffer = brw->attribs.Constants[0]; - struct pipe_winsys *ws = brw->pipe.winsys; - /* FIXME: buffer size is num_consts + num_immediates */ - if (brw->vs.prog_data->num_consts) { - /* map the vertex constant buffer and copy to curbe: */ - void *data = ws->buffer_map(ws, cbuffer->buffer, 0); - /* FIXME: this is wrong. the cbuffer->buffer->size currently - * represents size of consts + immediates. so if we'll - * have both we'll copy over the end of the buffer - * with the subsequent memcpy */ - memcpy(&buf[offset], data, cbuffer->buffer->size); - ws->buffer_unmap(ws, cbuffer->buffer); - offset += cbuffer->buffer->size; - } - /*immediates*/ - if (brw->vs.prog_data->num_imm) { - memcpy(&buf[offset], brw->vs.prog_data->imm_buf, - brw->vs.prog_data->num_imm * 4 * sizeof(float)); - } - } - - if (1) { - for (i = 0; i < sz; i+=4) - debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, - buf[i+0], buf[i+1], buf[i+2], buf[i+3]); - - debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n", - brw->curbe.last_buf, buf, - bufsz, brw->curbe.last_bufsz, - brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); - } - - if (brw->curbe.last_buf && - bufsz == brw->curbe.last_bufsz && - memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { - free(buf); -/* return; */ - } - else { - if (brw->curbe.last_buf) - free(brw->curbe.last_buf); - brw->curbe.last_buf = buf; - brw->curbe.last_bufsz = bufsz; - - - if (!brw_pool_alloc(pool, - bufsz, - 1 << 6, - &brw->curbe.gs_offset)) { - debug_printf("out of GS memory for curbe\n"); - assert(0); - return; - } - - - /* Copy data to the buffer: - */ - brw->winsys->buffer_subdata_typed(brw->winsys, - pool->buffer, - brw->curbe.gs_offset, - bufsz, - buf, - BRW_CONSTANT_BUFFER ); - } - - /* TODO: only emit the constant_buffer packet when necessary, ie: - - contents have changed - - offset has changed - - hw requirements due to other packets emitted. - */ - { - struct brw_constant_buffer cb; - - memset(&cb, 0, sizeof(cb)); - - cb.header.opcode = CMD_CONST_BUFFER; - cb.header.length = sizeof(cb)/4 - 2; - cb.header.valid = 1; - cb.bits0.buffer_length = sz - 1; - cb.bits0.buffer_address = brw->curbe.gs_offset >> 6; - - /* Because this provokes an action (ie copy the constants into the - * URB), it shouldn't be shortcircuited if identical to the - * previous time - because eg. the urb destination may have - * changed, or the urb contents different to last time. - * - * Note that the data referred to is actually copied internally, - * not just used in place according to passed pointer. - * - * It appears that the CS unit takes care of using each available - * URB entry (Const URB Entry == CURBE) in turn, and issuing - * flushes as necessary when doublebuffering of CURBEs isn't - * possible. - */ - BRW_BATCH_STRUCT(brw, &cb); - } -} - -/* This tracked state is unique in that the state it monitors varies - * dynamically depending on the parameters tracked by the fragment and - * vertex programs. This is the template used as a starting point, - * each context will maintain a copy of this internally and update as - * required. - */ -const struct brw_tracked_state brw_constant_buffer = { - .dirty = { - .brw = (BRW_NEW_CLIP | - BRW_NEW_CONSTANTS | - BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ - BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */ - BRW_NEW_CURBE_OFFSETS), - .cache = (CACHE_NEW_WM_PROG) - }, - .update = upload_constant_buffer -}; - diff --git a/src/gallium/drivers/i965simple/brw_defines.h b/src/gallium/drivers/i965simple/brw_defines.h deleted file mode 100644 index 715d2d2d01..0000000000 --- a/src/gallium/drivers/i965simple/brw_defines.h +++ /dev/null @@ -1,870 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#ifndef BRW_DEFINES_H -#define BRW_DEFINES_H - -/* - */ -#define MI_NOOP 0x00 -#define MI_USER_INTERRUPT 0x02 -#define MI_WAIT_FOR_EVENT 0x03 -#define MI_FLUSH 0x04 -#define MI_REPORT_HEAD 0x07 -#define MI_ARB_ON_OFF 0x08 -#define MI_BATCH_BUFFER_END 0x0A -#define MI_OVERLAY_FLIP 0x11 -#define MI_LOAD_SCAN_LINES_INCL 0x12 -#define MI_LOAD_SCAN_LINES_EXCL 0x13 -#define MI_DISPLAY_BUFFER_INFO 0x14 -#define MI_SET_CONTEXT 0x18 -#define MI_STORE_DATA_IMM 0x20 -#define MI_STORE_DATA_INDEX 0x21 -#define MI_LOAD_REGISTER_IMM 0x22 -#define MI_STORE_REGISTER_MEM 0x24 -#define MI_BATCH_BUFFER_START 0x31 - -#define MI_SYNCHRONOUS_FLIP 0x0 -#define MI_ASYNCHRONOUS_FLIP 0x1 - -#define MI_BUFFER_SECURE 0x0 -#define MI_BUFFER_NONSECURE 0x1 - -#define MI_ARBITRATE_AT_CHAIN_POINTS 0x0 -#define MI_ARBITRATE_BETWEEN_INSTS 0x1 -#define MI_NO_ARBITRATION 0x3 - -#define MI_CONDITION_CODE_WAIT_DISABLED 0x0 -#define MI_CONDITION_CODE_WAIT_0 0x1 -#define MI_CONDITION_CODE_WAIT_1 0x2 -#define MI_CONDITION_CODE_WAIT_2 0x3 -#define MI_CONDITION_CODE_WAIT_3 0x4 -#define MI_CONDITION_CODE_WAIT_4 0x5 - -#define MI_DISPLAY_PIPE_A 0x0 -#define MI_DISPLAY_PIPE_B 0x1 - -#define MI_DISPLAY_PLANE_A 0x0 -#define MI_DISPLAY_PLANE_B 0x1 -#define MI_DISPLAY_PLANE_C 0x2 - -#define MI_STANDARD_FLIP 0x0 -#define MI_ENQUEUE_FLIP_PERFORM_BASE_FRAME_NUMBER_LOAD 0x1 -#define MI_ENQUEUE_FLIP_TARGET_FRAME_NUMBER_RELATIVE 0x2 -#define MI_ENQUEUE_FLIP_ABSOLUTE_TARGET_FRAME_NUMBER 0x3 - -#define MI_PHYSICAL_ADDRESS 0x0 -#define MI_VIRTUAL_ADDRESS 0x1 - -#define MI_BUFFER_MEMORY_MAIN 0x0 -#define MI_BUFFER_MEMORY_GTT 0x2 -#define MI_BUFFER_MEMORY_PER_PROCESS_GTT 0x3 - -#define MI_FLIP_CONTINUE 0x0 -#define MI_FLIP_ON 0x1 -#define MI_FLIP_OFF 0x2 - -#define MI_UNTRUSTED_REGISTER_SPACE 0x0 -#define MI_TRUSTED_REGISTER_SPACE 0x1 - -/* 3D state: - */ -#define _3DOP_3DSTATE_PIPELINED 0x0 -#define _3DOP_3DSTATE_NONPIPELINED 0x1 -#define _3DOP_3DCONTROL 0x2 -#define _3DOP_3DPRIMITIVE 0x3 - -#define _3DSTATE_PIPELINED_POINTERS 0x00 -#define _3DSTATE_BINDING_TABLE_POINTERS 0x01 -#define _3DSTATE_VERTEX_BUFFERS 0x08 -#define _3DSTATE_VERTEX_ELEMENTS 0x09 -#define _3DSTATE_INDEX_BUFFER 0x0A -#define _3DSTATE_VF_STATISTICS 0x0B -#define _3DSTATE_DRAWING_RECTANGLE 0x00 -#define _3DSTATE_CONSTANT_COLOR 0x01 -#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02 -#define _3DSTATE_CHROMA_KEY 0x04 -#define _3DSTATE_DEPTH_BUFFER 0x05 -#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06 -#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07 -#define _3DSTATE_LINE_STIPPLE 0x08 -#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09 -#define _3DCONTROL 0x00 -#define _3DPRIMITIVE 0x00 - -#define PIPE_CONTROL_NOWRITE 0x00 -#define PIPE_CONTROL_WRITEIMMEDIATE 0x01 -#define PIPE_CONTROL_WRITEDEPTH 0x02 -#define PIPE_CONTROL_WRITETIMESTAMP 0x03 - -#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00 -#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01 - -#define _3DPRIM_POINTLIST 0x01 -#define _3DPRIM_LINELIST 0x02 -#define _3DPRIM_LINESTRIP 0x03 -#define _3DPRIM_TRILIST 0x04 -#define _3DPRIM_TRISTRIP 0x05 -#define _3DPRIM_TRIFAN 0x06 -#define _3DPRIM_QUADLIST 0x07 -#define _3DPRIM_QUADSTRIP 0x08 -#define _3DPRIM_LINELIST_ADJ 0x09 -#define _3DPRIM_LINESTRIP_ADJ 0x0A -#define _3DPRIM_TRILIST_ADJ 0x0B -#define _3DPRIM_TRISTRIP_ADJ 0x0C -#define _3DPRIM_TRISTRIP_REVERSE 0x0D -#define _3DPRIM_POLYGON 0x0E -#define _3DPRIM_RECTLIST 0x0F -#define _3DPRIM_LINELOOP 0x10 -#define _3DPRIM_POINTLIST_BF 0x11 -#define _3DPRIM_LINESTRIP_CONT 0x12 -#define _3DPRIM_LINESTRIP_BF 0x13 -#define _3DPRIM_LINESTRIP_CONT_BF 0x14 -#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 - -#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0 -#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1 - -#define BRW_ANISORATIO_2 0 -#define BRW_ANISORATIO_4 1 -#define BRW_ANISORATIO_6 2 -#define BRW_ANISORATIO_8 3 -#define BRW_ANISORATIO_10 4 -#define BRW_ANISORATIO_12 5 -#define BRW_ANISORATIO_14 6 -#define BRW_ANISORATIO_16 7 - -#define BRW_BLENDFACTOR_ONE 0x1 -#define BRW_BLENDFACTOR_SRC_COLOR 0x2 -#define BRW_BLENDFACTOR_SRC_ALPHA 0x3 -#define BRW_BLENDFACTOR_DST_ALPHA 0x4 -#define BRW_BLENDFACTOR_DST_COLOR 0x5 -#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 -#define BRW_BLENDFACTOR_CONST_COLOR 0x7 -#define BRW_BLENDFACTOR_CONST_ALPHA 0x8 -#define BRW_BLENDFACTOR_SRC1_COLOR 0x9 -#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A -#define BRW_BLENDFACTOR_ZERO 0x11 -#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12 -#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13 -#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14 -#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15 -#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17 -#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18 -#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19 -#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A - -#define BRW_BLENDFUNCTION_ADD 0 -#define BRW_BLENDFUNCTION_SUBTRACT 1 -#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2 -#define BRW_BLENDFUNCTION_MIN 3 -#define BRW_BLENDFUNCTION_MAX 4 - -#define BRW_ALPHATEST_FORMAT_UNORM8 0 -#define BRW_ALPHATEST_FORMAT_FLOAT32 1 - -#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0 -#define BRW_CHROMAKEY_REPLACE_BLACK 1 - -#define BRW_CLIP_API_OGL 0 -#define BRW_CLIP_API_DX 1 - -#define BRW_CLIPMODE_NORMAL 0 -#define BRW_CLIPMODE_CLIP_ALL 1 -#define BRW_CLIPMODE_CLIP_NON_REJECTED 2 -#define BRW_CLIPMODE_REJECT_ALL 3 -#define BRW_CLIPMODE_ACCEPT_ALL 4 - -#define BRW_CLIP_NDCSPACE 0 -#define BRW_CLIP_SCREENSPACE 1 - -#define BRW_COMPAREFUNCTION_ALWAYS 0 -#define BRW_COMPAREFUNCTION_NEVER 1 -#define BRW_COMPAREFUNCTION_LESS 2 -#define BRW_COMPAREFUNCTION_EQUAL 3 -#define BRW_COMPAREFUNCTION_LEQUAL 4 -#define BRW_COMPAREFUNCTION_GREATER 5 -#define BRW_COMPAREFUNCTION_NOTEQUAL 6 -#define BRW_COMPAREFUNCTION_GEQUAL 7 - -#define BRW_COVERAGE_PIXELS_HALF 0 -#define BRW_COVERAGE_PIXELS_1 1 -#define BRW_COVERAGE_PIXELS_2 2 -#define BRW_COVERAGE_PIXELS_4 3 - -#define BRW_CULLMODE_BOTH 0 -#define BRW_CULLMODE_NONE 1 -#define BRW_CULLMODE_FRONT 2 -#define BRW_CULLMODE_BACK 3 - -#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0 -#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1 - -#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 -#define BRW_DEPTHFORMAT_D32_FLOAT 1 -#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2 -#define BRW_DEPTHFORMAT_D16_UNORM 5 - -#define BRW_FLOATING_POINT_IEEE_754 0 -#define BRW_FLOATING_POINT_NON_IEEE_754 1 - -#define BRW_FRONTWINDING_CW 0 -#define BRW_FRONTWINDING_CCW 1 - -#define BRW_SPRITE_POINT_ENABLE 16 - -#define BRW_INDEX_BYTE 0 -#define BRW_INDEX_WORD 1 -#define BRW_INDEX_DWORD 2 - -#define BRW_LOGICOPFUNCTION_CLEAR 0 -#define BRW_LOGICOPFUNCTION_NOR 1 -#define BRW_LOGICOPFUNCTION_AND_INVERTED 2 -#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3 -#define BRW_LOGICOPFUNCTION_AND_REVERSE 4 -#define BRW_LOGICOPFUNCTION_INVERT 5 -#define BRW_LOGICOPFUNCTION_XOR 6 -#define BRW_LOGICOPFUNCTION_NAND 7 -#define BRW_LOGICOPFUNCTION_AND 8 -#define BRW_LOGICOPFUNCTION_EQUIV 9 -#define BRW_LOGICOPFUNCTION_NOOP 10 -#define BRW_LOGICOPFUNCTION_OR_INVERTED 11 -#define BRW_LOGICOPFUNCTION_COPY 12 -#define BRW_LOGICOPFUNCTION_OR_REVERSE 13 -#define BRW_LOGICOPFUNCTION_OR 14 -#define BRW_LOGICOPFUNCTION_SET 15 - -#define BRW_MAPFILTER_NEAREST 0x0 -#define BRW_MAPFILTER_LINEAR 0x1 -#define BRW_MAPFILTER_ANISOTROPIC 0x2 - -#define BRW_MIPFILTER_NONE 0 -#define BRW_MIPFILTER_NEAREST 1 -#define BRW_MIPFILTER_LINEAR 3 - -#define BRW_POLYGON_FRONT_FACING 0 -#define BRW_POLYGON_BACK_FACING 1 - -#define BRW_PREFILTER_ALWAYS 0x0 -#define BRW_PREFILTER_NEVER 0x1 -#define BRW_PREFILTER_LESS 0x2 -#define BRW_PREFILTER_EQUAL 0x3 -#define BRW_PREFILTER_LEQUAL 0x4 -#define BRW_PREFILTER_GREATER 0x5 -#define BRW_PREFILTER_NOTEQUAL 0x6 -#define BRW_PREFILTER_GEQUAL 0x7 - -#define BRW_PROVOKING_VERTEX_0 0 -#define BRW_PROVOKING_VERTEX_1 1 -#define BRW_PROVOKING_VERTEX_2 2 - -#define BRW_RASTRULE_UPPER_LEFT 0 -#define BRW_RASTRULE_UPPER_RIGHT 1 -/* These are listed as "Reserved, but not seen as useful" - * in Intel documentation (page 212, "Point Rasterization Rule", - * section 7.4 "SF Pipeline State Summary", of document - * "Intel® 965 Express Chipset Family and Intel® G35 Express - * Chipset Graphics Controller Programmer's Reference Manual, - * Volume 2: 3D/Media", Revision 1.0b as of January 2008, - * available at - * http://intellinuxgraphics.org/documentation.html - * at the time of this writing). - * - * These appear to be supported on at least some - * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT - * is useful when using OpenGL to render to a FBO - * (which has the pixel coordinate Y orientation inverted - * with respect to the normal OpenGL pixel coordinate system). - */ -#define BRW_RASTRULE_LOWER_LEFT 2 -#define BRW_RASTRULE_LOWER_RIGHT 3 - -#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0 -#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1 -#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2 - -#define BRW_STENCILOP_KEEP 0 -#define BRW_STENCILOP_ZERO 1 -#define BRW_STENCILOP_REPLACE 2 -#define BRW_STENCILOP_INCRSAT 3 -#define BRW_STENCILOP_DECRSAT 4 -#define BRW_STENCILOP_INCR 5 -#define BRW_STENCILOP_DECR 6 -#define BRW_STENCILOP_INVERT 7 - -#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0 -#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1 - -#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 -#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001 -#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002 -#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003 -#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004 -#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005 -#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 -#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 -#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008 -#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040 -#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041 -#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042 -#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043 -#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044 -#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045 -#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046 -#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080 -#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081 -#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082 -#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083 -#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 -#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085 -#define BRW_SURFACEFORMAT_R32G32_SINT 0x086 -#define BRW_SURFACEFORMAT_R32G32_UINT 0x087 -#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 -#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 -#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A -#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B -#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C -#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D -#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E -#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F -#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090 -#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091 -#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092 -#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 -#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094 -#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095 -#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096 -#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 -#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 -#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 -#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 -#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 -#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 -#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 -#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 -#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 -#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA -#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB -#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC -#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD -#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE -#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF -#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0 -#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 -#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 -#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 -#define BRW_SURFACEFORMAT_R32_SINT 0x0D6 -#define BRW_SURFACEFORMAT_R32_UINT 0x0D7 -#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8 -#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 -#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA -#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF -#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0 -#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1 -#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2 -#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3 -#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4 -#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5 -#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 -#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA -#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB -#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC -#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED -#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE -#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0 -#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1 -#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2 -#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 -#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 -#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 -#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6 -#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7 -#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8 -#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9 -#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100 -#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 -#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102 -#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 -#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104 -#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 -#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106 -#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107 -#define BRW_SURFACEFORMAT_R8G8_SINT 0x108 -#define BRW_SURFACEFORMAT_R8G8_UINT 0x109 -#define BRW_SURFACEFORMAT_R16_UNORM 0x10A -#define BRW_SURFACEFORMAT_R16_SNORM 0x10B -#define BRW_SURFACEFORMAT_R16_SINT 0x10C -#define BRW_SURFACEFORMAT_R16_UINT 0x10D -#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E -#define BRW_SURFACEFORMAT_I16_UNORM 0x111 -#define BRW_SURFACEFORMAT_L16_UNORM 0x112 -#define BRW_SURFACEFORMAT_A16_UNORM 0x113 -#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114 -#define BRW_SURFACEFORMAT_I16_FLOAT 0x115 -#define BRW_SURFACEFORMAT_L16_FLOAT 0x116 -#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 -#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 -#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A -#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B -#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C -#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D -#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E -#define BRW_SURFACEFORMAT_R16_USCALED 0x11F -#define BRW_SURFACEFORMAT_R8_UNORM 0x140 -#define BRW_SURFACEFORMAT_R8_SNORM 0x141 -#define BRW_SURFACEFORMAT_R8_SINT 0x142 -#define BRW_SURFACEFORMAT_R8_UINT 0x143 -#define BRW_SURFACEFORMAT_A8_UNORM 0x144 -#define BRW_SURFACEFORMAT_I8_UNORM 0x145 -#define BRW_SURFACEFORMAT_L8_UNORM 0x146 -#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147 -#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148 -#define BRW_SURFACEFORMAT_R8_SSCALED 0x149 -#define BRW_SURFACEFORMAT_R8_USCALED 0x14A -#define BRW_SURFACEFORMAT_R1_UINT 0x181 -#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182 -#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 -#define BRW_SURFACEFORMAT_BC1_UNORM 0x186 -#define BRW_SURFACEFORMAT_BC2_UNORM 0x187 -#define BRW_SURFACEFORMAT_BC3_UNORM 0x188 -#define BRW_SURFACEFORMAT_BC4_UNORM 0x189 -#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A -#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B -#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C -#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D -#define BRW_SURFACEFORMAT_MONO8 0x18E -#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F -#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190 -#define BRW_SURFACEFORMAT_DXT1_RGB 0x191 -#define BRW_SURFACEFORMAT_FXT1 0x192 -#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193 -#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194 -#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195 -#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196 -#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 -#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198 -#define BRW_SURFACEFORMAT_BC4_SNORM 0x199 -#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A -#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C -#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D -#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E -#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F - -#define BRW_SURFACERETURNFORMAT_FLOAT32 0 -#define BRW_SURFACERETURNFORMAT_S1 1 - -#define BRW_SURFACE_1D 0 -#define BRW_SURFACE_2D 1 -#define BRW_SURFACE_3D 2 -#define BRW_SURFACE_CUBE 3 -#define BRW_SURFACE_BUFFER 4 -#define BRW_SURFACE_NULL 7 - -#define BRW_TEXCOORDMODE_WRAP 0 -#define BRW_TEXCOORDMODE_MIRROR 1 -#define BRW_TEXCOORDMODE_CLAMP 2 -#define BRW_TEXCOORDMODE_CUBE 3 -#define BRW_TEXCOORDMODE_CLAMP_BORDER 4 -#define BRW_TEXCOORDMODE_MIRROR_ONCE 5 - -#define BRW_THREAD_PRIORITY_NORMAL 0 -#define BRW_THREAD_PRIORITY_HIGH 1 - -#define BRW_TILEWALK_XMAJOR 0 -#define BRW_TILEWALK_YMAJOR 1 - -#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0 -#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1 - -#define BRW_VERTEXBUFFER_ACCESS_VERTEXDATA 0 -#define BRW_VERTEXBUFFER_ACCESS_INSTANCEDATA 1 - -#define BRW_VFCOMPONENT_NOSTORE 0 -#define BRW_VFCOMPONENT_STORE_SRC 1 -#define BRW_VFCOMPONENT_STORE_0 2 -#define BRW_VFCOMPONENT_STORE_1_FLT 3 -#define BRW_VFCOMPONENT_STORE_1_INT 4 -#define BRW_VFCOMPONENT_STORE_VID 5 -#define BRW_VFCOMPONENT_STORE_IID 6 -#define BRW_VFCOMPONENT_STORE_PID 7 - - - -/* Execution Unit (EU) defines - */ - -#define BRW_ALIGN_1 0 -#define BRW_ALIGN_16 1 - -#define BRW_ADDRESS_DIRECT 0 -#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1 - -#define BRW_CHANNEL_X 0 -#define BRW_CHANNEL_Y 1 -#define BRW_CHANNEL_Z 2 -#define BRW_CHANNEL_W 3 - -#define BRW_COMPRESSION_NONE 0 -#define BRW_COMPRESSION_2NDHALF 1 -#define BRW_COMPRESSION_COMPRESSED 2 - -#define BRW_CONDITIONAL_NONE 0 -#define BRW_CONDITIONAL_Z 1 -#define BRW_CONDITIONAL_NZ 2 -#define BRW_CONDITIONAL_EQ 1 /* Z */ -#define BRW_CONDITIONAL_NEQ 2 /* NZ */ -#define BRW_CONDITIONAL_G 3 -#define BRW_CONDITIONAL_GE 4 -#define BRW_CONDITIONAL_L 5 -#define BRW_CONDITIONAL_LE 6 -#define BRW_CONDITIONAL_C 7 -#define BRW_CONDITIONAL_O 8 - -#define BRW_DEBUG_NONE 0 -#define BRW_DEBUG_BREAKPOINT 1 - -#define BRW_DEPENDENCY_NORMAL 0 -#define BRW_DEPENDENCY_NOTCLEARED 1 -#define BRW_DEPENDENCY_NOTCHECKED 2 -#define BRW_DEPENDENCY_DISABLE 3 - -#define BRW_EXECUTE_1 0 -#define BRW_EXECUTE_2 1 -#define BRW_EXECUTE_4 2 -#define BRW_EXECUTE_8 3 -#define BRW_EXECUTE_16 4 -#define BRW_EXECUTE_32 5 - -#define BRW_HORIZONTAL_STRIDE_0 0 -#define BRW_HORIZONTAL_STRIDE_1 1 -#define BRW_HORIZONTAL_STRIDE_2 2 -#define BRW_HORIZONTAL_STRIDE_4 3 - -#define BRW_INSTRUCTION_NORMAL 0 -#define BRW_INSTRUCTION_SATURATE 1 - -#define BRW_MASK_ENABLE 0 -#define BRW_MASK_DISABLE 1 - -#define BRW_OPCODE_MOV 1 -#define BRW_OPCODE_SEL 2 -#define BRW_OPCODE_NOT 4 -#define BRW_OPCODE_AND 5 -#define BRW_OPCODE_OR 6 -#define BRW_OPCODE_XOR 7 -#define BRW_OPCODE_SHR 8 -#define BRW_OPCODE_SHL 9 -#define BRW_OPCODE_RSR 10 -#define BRW_OPCODE_RSL 11 -#define BRW_OPCODE_ASR 12 -#define BRW_OPCODE_CMP 16 -#define BRW_OPCODE_JMPI 32 -#define BRW_OPCODE_IF 34 -#define BRW_OPCODE_IFF 35 -#define BRW_OPCODE_ELSE 36 -#define BRW_OPCODE_ENDIF 37 -#define BRW_OPCODE_DO 38 -#define BRW_OPCODE_WHILE 39 -#define BRW_OPCODE_BREAK 40 -#define BRW_OPCODE_CONTINUE 41 -#define BRW_OPCODE_HALT 42 -#define BRW_OPCODE_MSAVE 44 -#define BRW_OPCODE_MRESTORE 45 -#define BRW_OPCODE_PUSH 46 -#define BRW_OPCODE_POP 47 -#define BRW_OPCODE_WAIT 48 -#define BRW_OPCODE_SEND 49 -#define BRW_OPCODE_ADD 64 -#define BRW_OPCODE_MUL 65 -#define BRW_OPCODE_AVG 66 -#define BRW_OPCODE_FRC 67 -#define BRW_OPCODE_RNDU 68 -#define BRW_OPCODE_RNDD 69 -#define BRW_OPCODE_RNDE 70 -#define BRW_OPCODE_RNDZ 71 -#define BRW_OPCODE_MAC 72 -#define BRW_OPCODE_MACH 73 -#define BRW_OPCODE_LZD 74 -#define BRW_OPCODE_SAD2 80 -#define BRW_OPCODE_SADA2 81 -#define BRW_OPCODE_DP4 84 -#define BRW_OPCODE_DPH 85 -#define BRW_OPCODE_DP3 86 -#define BRW_OPCODE_DP2 87 -#define BRW_OPCODE_DPA2 88 -#define BRW_OPCODE_LINE 89 -#define BRW_OPCODE_NOP 126 - -#define BRW_PREDICATE_NONE 0 -#define BRW_PREDICATE_NORMAL 1 -#define BRW_PREDICATE_ALIGN1_ANYV 2 -#define BRW_PREDICATE_ALIGN1_ALLV 3 -#define BRW_PREDICATE_ALIGN1_ANY2H 4 -#define BRW_PREDICATE_ALIGN1_ALL2H 5 -#define BRW_PREDICATE_ALIGN1_ANY4H 6 -#define BRW_PREDICATE_ALIGN1_ALL4H 7 -#define BRW_PREDICATE_ALIGN1_ANY8H 8 -#define BRW_PREDICATE_ALIGN1_ALL8H 9 -#define BRW_PREDICATE_ALIGN1_ANY16H 10 -#define BRW_PREDICATE_ALIGN1_ALL16H 11 -#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2 -#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3 -#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4 -#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5 -#define BRW_PREDICATE_ALIGN16_ANY4H 6 -#define BRW_PREDICATE_ALIGN16_ALL4H 7 - -#define BRW_ARCHITECTURE_REGISTER_FILE 0 -#define BRW_GENERAL_REGISTER_FILE 1 -#define BRW_MESSAGE_REGISTER_FILE 2 -#define BRW_IMMEDIATE_VALUE 3 - -#define BRW_REGISTER_TYPE_UD 0 -#define BRW_REGISTER_TYPE_D 1 -#define BRW_REGISTER_TYPE_UW 2 -#define BRW_REGISTER_TYPE_W 3 -#define BRW_REGISTER_TYPE_UB 4 -#define BRW_REGISTER_TYPE_B 5 -#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ -#define BRW_REGISTER_TYPE_HF 6 -#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ -#define BRW_REGISTER_TYPE_F 7 - -#define BRW_ARF_NULL 0x00 -#define BRW_ARF_ADDRESS 0x10 -#define BRW_ARF_ACCUMULATOR 0x20 -#define BRW_ARF_FLAG 0x30 -#define BRW_ARF_MASK 0x40 -#define BRW_ARF_MASK_STACK 0x50 -#define BRW_ARF_MASK_STACK_DEPTH 0x60 -#define BRW_ARF_STATE 0x70 -#define BRW_ARF_CONTROL 0x80 -#define BRW_ARF_NOTIFICATION_COUNT 0x90 -#define BRW_ARF_IP 0xA0 - -#define BRW_AMASK 0 -#define BRW_IMASK 1 -#define BRW_LMASK 2 -#define BRW_CMASK 3 - - - -#define BRW_THREAD_NORMAL 0 -#define BRW_THREAD_ATOMIC 1 -#define BRW_THREAD_SWITCH 2 - -#define BRW_VERTICAL_STRIDE_0 0 -#define BRW_VERTICAL_STRIDE_1 1 -#define BRW_VERTICAL_STRIDE_2 2 -#define BRW_VERTICAL_STRIDE_4 3 -#define BRW_VERTICAL_STRIDE_8 4 -#define BRW_VERTICAL_STRIDE_16 5 -#define BRW_VERTICAL_STRIDE_32 6 -#define BRW_VERTICAL_STRIDE_64 7 -#define BRW_VERTICAL_STRIDE_128 8 -#define BRW_VERTICAL_STRIDE_256 9 -#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF - -#define BRW_WIDTH_1 0 -#define BRW_WIDTH_2 1 -#define BRW_WIDTH_4 2 -#define BRW_WIDTH_8 3 -#define BRW_WIDTH_16 4 - -#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0 -#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1 -#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2 -#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3 -#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4 -#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5 -#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6 -#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7 -#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8 -#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9 -#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10 -#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11 - -#define BRW_POLYGON_FACING_FRONT 0 -#define BRW_POLYGON_FACING_BACK 1 - -#define BRW_MESSAGE_TARGET_NULL 0 -#define BRW_MESSAGE_TARGET_MATH 1 -#define BRW_MESSAGE_TARGET_SAMPLER 2 -#define BRW_MESSAGE_TARGET_GATEWAY 3 -#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 -#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 -#define BRW_MESSAGE_TARGET_URB 6 -#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7 - -#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 -#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 -#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3 - -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 -#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 -#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2 -#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 -#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 -#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 - -#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 -#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 -#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 -#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 -#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 - -#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 -#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 - -#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 -#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 - -#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 -#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 -#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2 -#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 - -#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 -#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 -#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 - -#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 -#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 -#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 -#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 -#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 - -#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 -#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 -#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2 -#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 -#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 -#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 -#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 - -#define BRW_MATH_FUNCTION_INV 1 -#define BRW_MATH_FUNCTION_LOG 2 -#define BRW_MATH_FUNCTION_EXP 3 -#define BRW_MATH_FUNCTION_SQRT 4 -#define BRW_MATH_FUNCTION_RSQ 5 -#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ -#define BRW_MATH_FUNCTION_COS 7 /* was 8 */ -#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ -#define BRW_MATH_FUNCTION_TAN 9 -#define BRW_MATH_FUNCTION_POW 10 -#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 -#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 -#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13 - -#define BRW_MATH_INTEGER_UNSIGNED 0 -#define BRW_MATH_INTEGER_SIGNED 1 - -#define BRW_MATH_PRECISION_FULL 0 -#define BRW_MATH_PRECISION_PARTIAL 1 - -#define BRW_MATH_SATURATE_NONE 0 -#define BRW_MATH_SATURATE_SATURATE 1 - -#define BRW_MATH_DATA_VECTOR 0 -#define BRW_MATH_DATA_SCALAR 1 - -#define BRW_URB_OPCODE_WRITE 0 - -#define BRW_URB_SWIZZLE_NONE 0 -#define BRW_URB_SWIZZLE_INTERLEAVE 1 -#define BRW_URB_SWIZZLE_TRANSPOSE 2 - -#define BRW_SCRATCH_SPACE_SIZE_1K 0 -#define BRW_SCRATCH_SPACE_SIZE_2K 1 -#define BRW_SCRATCH_SPACE_SIZE_4K 2 -#define BRW_SCRATCH_SPACE_SIZE_8K 3 -#define BRW_SCRATCH_SPACE_SIZE_16K 4 -#define BRW_SCRATCH_SPACE_SIZE_32K 5 -#define BRW_SCRATCH_SPACE_SIZE_64K 6 -#define BRW_SCRATCH_SPACE_SIZE_128K 7 -#define BRW_SCRATCH_SPACE_SIZE_256K 8 -#define BRW_SCRATCH_SPACE_SIZE_512K 9 -#define BRW_SCRATCH_SPACE_SIZE_1M 10 -#define BRW_SCRATCH_SPACE_SIZE_2M 11 - - - - -#define CMD_URB_FENCE 0x6000 -#define CMD_CONST_BUFFER_STATE 0x6001 -#define CMD_CONST_BUFFER 0x6002 - -#define CMD_STATE_BASE_ADDRESS 0x6101 -#define CMD_STATE_INSN_POINTER 0x6102 -#define CMD_PIPELINE_SELECT 0x6104 - -#define CMD_PIPELINED_STATE_POINTERS 0x7800 -#define CMD_BINDING_TABLE_PTRS 0x7801 -#define CMD_VERTEX_BUFFER 0x7808 -#define CMD_VERTEX_ELEMENT 0x7809 -#define CMD_INDEX_BUFFER 0x780a -#define CMD_VF_STATISTICS 0x780b - -#define CMD_DRAW_RECT 0x7900 -#define CMD_BLEND_CONSTANT_COLOR 0x7901 -#define CMD_CHROMA_KEY 0x7904 -#define CMD_DEPTH_BUFFER 0x7905 -#define CMD_POLY_STIPPLE_OFFSET 0x7906 -#define CMD_POLY_STIPPLE_PATTERN 0x7907 -#define CMD_LINE_STIPPLE_PATTERN 0x7908 -#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 - -#define CMD_PIPE_CONTROL 0x7a00 - -#define CMD_3D_PRIM 0x7b00 - -#define CMD_MI_FLUSH 0x0200 - - -/* Various values from the R0 vertex header: - */ -#define R02_PRIM_END 0x1 -#define R02_PRIM_START 0x2 - - - -#endif diff --git a/src/gallium/drivers/i965simple/brw_draw.c b/src/gallium/drivers/i965simple/brw_draw.c deleted file mode 100644 index 49d80cb41c..0000000000 --- a/src/gallium/drivers/i965simple/brw_draw.c +++ /dev/null @@ -1,226 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include <stdlib.h> - -#include "brw_batch.h" -#include "brw_draw.h" -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_state.h" - -#include "pipe/p_context.h" -#include "pipe/internal/p_winsys_screen.h" -#include "util/u_prim.h" - -static unsigned hw_prim[PIPE_PRIM_POLYGON+1] = { - _3DPRIM_POINTLIST, - _3DPRIM_LINELIST, - _3DPRIM_LINELOOP, - _3DPRIM_LINESTRIP, - _3DPRIM_TRILIST, - _3DPRIM_TRISTRIP, - _3DPRIM_TRIFAN, - _3DPRIM_QUADLIST, - _3DPRIM_QUADSTRIP, - _3DPRIM_POLYGON -}; - - -/* When the primitive changes, set a state bit and re-validate. Not - * the nicest and would rather deal with this by having all the - * programs be immune to the active primitive (ie. cope with all - * possibilities). That may not be realistic however. - */ -static void brw_set_prim(struct brw_context *brw, int prim) -{ - PRINT("PRIM: %d\n", prim); - - /* Slight optimization to avoid the GS program when not needed: - */ - if (prim == PIPE_PRIM_QUAD_STRIP && - brw->attribs.Raster->flatshade && - brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_FILL && - brw->attribs.Raster->fill_ccw == PIPE_POLYGON_MODE_FILL) - prim = PIPE_PRIM_TRIANGLE_STRIP; - - if (prim != brw->primitive) { - brw->primitive = prim; - brw->state.dirty.brw |= BRW_NEW_PRIMITIVE; - - if (u_reduced_prim(prim) != brw->reduced_primitive) { - brw->reduced_primitive = u_reduced_prim(prim); - brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE; - } - - brw_validate_state(brw); - } - -} - - -static unsigned trim(int prim, unsigned length) -{ - if (prim == PIPE_PRIM_QUAD_STRIP) - return length > 3 ? (length - length % 2) : 0; - else if (prim == PIPE_PRIM_QUADS) - return length - length % 4; - else - return length; -} - - - -static boolean brw_emit_prim( struct brw_context *brw, - boolean indexed, - unsigned start, - unsigned count ) - -{ - struct brw_3d_primitive prim_packet; - - if (BRW_DEBUG & DEBUG_PRIMS) - PRINT("PRIM: %d %d %d\n", brw->primitive, start, count); - - prim_packet.header.opcode = CMD_3D_PRIM; - prim_packet.header.length = sizeof(prim_packet)/4 - 2; - prim_packet.header.pad = 0; - prim_packet.header.topology = hw_prim[brw->primitive]; - prim_packet.header.indexed = indexed; - - prim_packet.verts_per_instance = trim(brw->primitive, count); - prim_packet.start_vert_location = start; - prim_packet.instance_count = 1; - prim_packet.start_instance_location = 0; - prim_packet.base_vert_location = 0; - - if (prim_packet.verts_per_instance == 0) - return TRUE; - - return brw_batchbuffer_data( brw->winsys, - &prim_packet, - sizeof(prim_packet) ); -} - - -/* May fail if out of video memory for texture or vbo upload, or on - * fallback conditions. - */ -static boolean brw_try_draw_elements( struct pipe_context *pipe, - struct pipe_buffer *index_buffer, - unsigned index_size, - unsigned mode, - unsigned start, - unsigned count ) -{ - struct brw_context *brw = brw_context(pipe); - - /* Set the first primitive ahead of validate_state: - */ - brw_set_prim(brw, mode); - - /* Upload index, vertex data: - */ - if (index_buffer && - !brw_upload_indices( brw, index_buffer, index_size, start, count )) - return FALSE; - - if (!brw_upload_vertex_buffers(brw)) - return FALSE; - - if (!brw_upload_vertex_elements( brw )) - return FALSE; - - /* XXX: Need to separate validate and upload of state. - */ - if (brw->state.dirty.brw) - brw_validate_state( brw ); - - if (!brw_emit_prim(brw, index_buffer != NULL, - start, count)) - return FALSE; - - return TRUE; -} - - - -static boolean brw_draw_elements( struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, - unsigned start, - unsigned count ) -{ - if (!brw_try_draw_elements( pipe, - indexBuffer, - indexSize, - mode, start, count )) - { - /* flush ? */ - - if (!brw_try_draw_elements( pipe, - indexBuffer, - indexSize, - mode, start, - count )) { - assert(0); - return FALSE; - } - } - - return TRUE; -} - - - -static boolean brw_draw_arrays( struct pipe_context *pipe, - unsigned mode, - unsigned start, - unsigned count ) -{ - if (!brw_try_draw_elements( pipe, NULL, 0, mode, start, count )) { - /* flush ? */ - - if (!brw_try_draw_elements( pipe, NULL, 0, mode, start, count )) { - assert(0); - return FALSE; - } - } - - return TRUE; -} - - - -void brw_init_draw_functions( struct brw_context *brw ) -{ - brw->pipe.draw_arrays = brw_draw_arrays; - brw->pipe.draw_elements = brw_draw_elements; -} - - diff --git a/src/gallium/drivers/i965simple/brw_draw_upload.c b/src/gallium/drivers/i965simple/brw_draw_upload.c deleted file mode 100644 index 2d9ca3f2ea..0000000000 --- a/src/gallium/drivers/i965simple/brw_draw_upload.c +++ /dev/null @@ -1,300 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include <stdlib.h> - -#include "brw_batch.h" -#include "brw_draw.h" -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_state.h" - - -struct brw_array_state { - union header_union header; - - struct { - union { - struct { - unsigned pitch:11; - unsigned pad:15; - unsigned access_type:1; - unsigned vb_index:5; - } bits; - unsigned dword; - } vb0; - - struct pipe_buffer *buffer; - unsigned offset; - - unsigned max_index; - unsigned instance_data_step_rate; - - } vb[BRW_VBP_MAX]; -}; - - - -unsigned brw_translate_surface_format( unsigned id ) -{ - switch (id) { - case PIPE_FORMAT_R64_FLOAT: - return BRW_SURFACEFORMAT_R64_FLOAT; - case PIPE_FORMAT_R64G64_FLOAT: - return BRW_SURFACEFORMAT_R64G64_FLOAT; - case PIPE_FORMAT_R64G64B64_FLOAT: - return BRW_SURFACEFORMAT_R64G64B64_FLOAT; - case PIPE_FORMAT_R64G64B64A64_FLOAT: - return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT; - - case PIPE_FORMAT_R32_FLOAT: - return BRW_SURFACEFORMAT_R32_FLOAT; - case PIPE_FORMAT_R32G32_FLOAT: - return BRW_SURFACEFORMAT_R32G32_FLOAT; - case PIPE_FORMAT_R32G32B32_FLOAT: - return BRW_SURFACEFORMAT_R32G32B32_FLOAT; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; - - case PIPE_FORMAT_R32_UNORM: - return BRW_SURFACEFORMAT_R32_UNORM; - case PIPE_FORMAT_R32G32_UNORM: - return BRW_SURFACEFORMAT_R32G32_UNORM; - case PIPE_FORMAT_R32G32B32_UNORM: - return BRW_SURFACEFORMAT_R32G32B32_UNORM; - case PIPE_FORMAT_R32G32B32A32_UNORM: - return BRW_SURFACEFORMAT_R32G32B32A32_UNORM; - - case PIPE_FORMAT_R32_USCALED: - return BRW_SURFACEFORMAT_R32_USCALED; - case PIPE_FORMAT_R32G32_USCALED: - return BRW_SURFACEFORMAT_R32G32_USCALED; - case PIPE_FORMAT_R32G32B32_USCALED: - return BRW_SURFACEFORMAT_R32G32B32_USCALED; - case PIPE_FORMAT_R32G32B32A32_USCALED: - return BRW_SURFACEFORMAT_R32G32B32A32_USCALED; - - case PIPE_FORMAT_R32_SNORM: - return BRW_SURFACEFORMAT_R32_SNORM; - case PIPE_FORMAT_R32G32_SNORM: - return BRW_SURFACEFORMAT_R32G32_SNORM; - case PIPE_FORMAT_R32G32B32_SNORM: - return BRW_SURFACEFORMAT_R32G32B32_SNORM; - case PIPE_FORMAT_R32G32B32A32_SNORM: - return BRW_SURFACEFORMAT_R32G32B32A32_SNORM; - - case PIPE_FORMAT_R32_SSCALED: - return BRW_SURFACEFORMAT_R32_SSCALED; - case PIPE_FORMAT_R32G32_SSCALED: - return BRW_SURFACEFORMAT_R32G32_SSCALED; - case PIPE_FORMAT_R32G32B32_SSCALED: - return BRW_SURFACEFORMAT_R32G32B32_SSCALED; - case PIPE_FORMAT_R32G32B32A32_SSCALED: - return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED; - - case PIPE_FORMAT_R16_UNORM: - return BRW_SURFACEFORMAT_R16_UNORM; - case PIPE_FORMAT_R16G16_UNORM: - return BRW_SURFACEFORMAT_R16G16_UNORM; - case PIPE_FORMAT_R16G16B16_UNORM: - return BRW_SURFACEFORMAT_R16G16B16_UNORM; - case PIPE_FORMAT_R16G16B16A16_UNORM: - return BRW_SURFACEFORMAT_R16G16B16A16_UNORM; - - case PIPE_FORMAT_R16_USCALED: - return BRW_SURFACEFORMAT_R16_USCALED; - case PIPE_FORMAT_R16G16_USCALED: - return BRW_SURFACEFORMAT_R16G16_USCALED; - case PIPE_FORMAT_R16G16B16_USCALED: - return BRW_SURFACEFORMAT_R16G16B16_USCALED; - case PIPE_FORMAT_R16G16B16A16_USCALED: - return BRW_SURFACEFORMAT_R16G16B16A16_USCALED; - - case PIPE_FORMAT_R16_SNORM: - return BRW_SURFACEFORMAT_R16_SNORM; - case PIPE_FORMAT_R16G16_SNORM: - return BRW_SURFACEFORMAT_R16G16_SNORM; - case PIPE_FORMAT_R16G16B16_SNORM: - return BRW_SURFACEFORMAT_R16G16B16_SNORM; - case PIPE_FORMAT_R16G16B16A16_SNORM: - return BRW_SURFACEFORMAT_R16G16B16A16_SNORM; - - case PIPE_FORMAT_R16_SSCALED: - return BRW_SURFACEFORMAT_R16_SSCALED; - case PIPE_FORMAT_R16G16_SSCALED: - return BRW_SURFACEFORMAT_R16G16_SSCALED; - case PIPE_FORMAT_R16G16B16_SSCALED: - return BRW_SURFACEFORMAT_R16G16B16_SSCALED; - case PIPE_FORMAT_R16G16B16A16_SSCALED: - return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED; - - case PIPE_FORMAT_R8_UNORM: - return BRW_SURFACEFORMAT_R8_UNORM; - case PIPE_FORMAT_R8G8_UNORM: - return BRW_SURFACEFORMAT_R8G8_UNORM; - case PIPE_FORMAT_R8G8B8_UNORM: - return BRW_SURFACEFORMAT_R8G8B8_UNORM; - case PIPE_FORMAT_R8G8B8A8_UNORM: - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; - - case PIPE_FORMAT_R8_USCALED: - return BRW_SURFACEFORMAT_R8_USCALED; - case PIPE_FORMAT_R8G8_USCALED: - return BRW_SURFACEFORMAT_R8G8_USCALED; - case PIPE_FORMAT_R8G8B8_USCALED: - return BRW_SURFACEFORMAT_R8G8B8_USCALED; - case PIPE_FORMAT_R8G8B8A8_USCALED: - return BRW_SURFACEFORMAT_R8G8B8A8_USCALED; - - case PIPE_FORMAT_R8_SNORM: - return BRW_SURFACEFORMAT_R8_SNORM; - case PIPE_FORMAT_R8G8_SNORM: - return BRW_SURFACEFORMAT_R8G8_SNORM; - case PIPE_FORMAT_R8G8B8_SNORM: - return BRW_SURFACEFORMAT_R8G8B8_SNORM; - case PIPE_FORMAT_R8G8B8A8_SNORM: - return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; - - case PIPE_FORMAT_R8_SSCALED: - return BRW_SURFACEFORMAT_R8_SSCALED; - case PIPE_FORMAT_R8G8_SSCALED: - return BRW_SURFACEFORMAT_R8G8_SSCALED; - case PIPE_FORMAT_R8G8B8_SSCALED: - return BRW_SURFACEFORMAT_R8G8B8_SSCALED; - case PIPE_FORMAT_R8G8B8A8_SSCALED: - return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED; - - default: - assert(0); - return 0; - } -} - -static unsigned get_index_type(int type) -{ - switch (type) { - case 1: return BRW_INDEX_BYTE; - case 2: return BRW_INDEX_WORD; - case 4: return BRW_INDEX_DWORD; - default: assert(0); return 0; - } -} - - -boolean brw_upload_vertex_buffers( struct brw_context *brw ) -{ - struct brw_array_state vbp; - unsigned nr_enabled = 0; - unsigned i; - - memset(&vbp, 0, sizeof(vbp)); - - /* This is a hardware limit: - */ - - for (i = 0; i < BRW_VEP_MAX; i++) - { - if (brw->vb.vbo_array[i] == NULL) { - nr_enabled = i; - break; - } - - vbp.vb[i].vb0.bits.pitch = brw->vb.vbo_array[i]->stride; - vbp.vb[i].vb0.bits.pad = 0; - vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA; - vbp.vb[i].vb0.bits.vb_index = i; - vbp.vb[i].offset = brw->vb.vbo_array[i]->buffer_offset; - vbp.vb[i].buffer = brw->vb.vbo_array[i]->buffer; - vbp.vb[i].max_index = brw->vb.vbo_array[i]->max_index; - } - - - vbp.header.bits.length = (1 + nr_enabled * 4) - 2; - vbp.header.bits.opcode = CMD_VERTEX_BUFFER; - - BEGIN_BATCH(vbp.header.bits.length+2, 0); - OUT_BATCH( vbp.header.dword ); - - for (i = 0; i < nr_enabled; i++) { - OUT_BATCH( vbp.vb[i].vb0.dword ); - OUT_RELOC( vbp.vb[i].buffer, PIPE_BUFFER_USAGE_GPU_READ, - vbp.vb[i].offset); - OUT_BATCH( vbp.vb[i].max_index ); - OUT_BATCH( vbp.vb[i].instance_data_step_rate ); - } - ADVANCE_BATCH(); - return TRUE; -} - - - -boolean brw_upload_vertex_elements( struct brw_context *brw ) -{ - struct brw_vertex_element_packet vep; - - unsigned i; - unsigned nr_enabled = brw->attribs.VertexProgram->info.num_inputs; - - memset(&vep, 0, sizeof(vep)); - - for (i = 0; i < nr_enabled; i++) - vep.ve[i] = brw->vb.inputs[i]; - - - vep.header.length = (1 + nr_enabled * sizeof(vep.ve[0])/4) - 2; - vep.header.opcode = CMD_VERTEX_ELEMENT; - brw_cached_batch_struct(brw, &vep, 4 + nr_enabled * sizeof(vep.ve[0])); - - return TRUE; -} - -boolean brw_upload_indices( struct brw_context *brw, - const struct pipe_buffer *index_buffer, - int ib_size, int start, int count) -{ - /* Emit the indexbuffer packet: - */ - { - struct brw_indexbuffer ib; - - memset(&ib, 0, sizeof(ib)); - - ib.header.bits.opcode = CMD_INDEX_BUFFER; - ib.header.bits.length = sizeof(ib)/4 - 2; - ib.header.bits.index_format = get_index_type(ib_size); - ib.header.bits.cut_index_enable = 0; - - - BEGIN_BATCH(4, 0); - OUT_BATCH( ib.header.dword ); - OUT_RELOC( index_buffer, PIPE_BUFFER_USAGE_GPU_READ, start); - OUT_RELOC( index_buffer, PIPE_BUFFER_USAGE_GPU_READ, start + count); - OUT_BATCH( 0 ); - ADVANCE_BATCH(); - } - return TRUE; -} diff --git a/src/gallium/drivers/i965simple/brw_eu.c b/src/gallium/drivers/i965simple/brw_eu.c deleted file mode 100644 index e2002d1821..0000000000 --- a/src/gallium/drivers/i965simple/brw_eu.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_eu.h" - - - -/* How does predicate control work when execution_size != 8? Do I - * need to test/set for 0xffff when execution_size is 16? - */ -void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ) -{ - p->current->header.predicate_control = BRW_PREDICATE_NONE; - - if (value != 0xff) { - if (value != p->flag_value) { - brw_push_insn_state(p); - brw_MOV(p, brw_flag_reg(), brw_imm_uw(value)); - p->flag_value = value; - brw_pop_insn_state(p); - } - - p->current->header.predicate_control = BRW_PREDICATE_NORMAL; - } -} - -void brw_set_predicate_control( struct brw_compile *p, unsigned pc ) -{ - p->current->header.predicate_control = pc; -} - -void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional ) -{ - p->current->header.destreg__conditonalmod = conditional; -} - -void brw_set_access_mode( struct brw_compile *p, unsigned access_mode ) -{ - p->current->header.access_mode = access_mode; -} - -void brw_set_compression_control( struct brw_compile *p, boolean compression_control ) -{ - p->current->header.compression_control = compression_control; -} - -void brw_set_mask_control( struct brw_compile *p, unsigned value ) -{ - p->current->header.mask_control = value; -} - -void brw_set_saturate( struct brw_compile *p, unsigned value ) -{ - p->current->header.saturate = value; -} - -void brw_push_insn_state( struct brw_compile *p ) -{ - assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); - memcpy(p->current+1, p->current, sizeof(struct brw_instruction)); - p->current++; -} - -void brw_pop_insn_state( struct brw_compile *p ) -{ - assert(p->current != p->stack); - p->current--; -} - - -/*********************************************************************** - */ -void brw_init_compile( struct brw_compile *p ) -{ - p->nr_insn = 0; - p->current = p->stack; - memset(p->current, 0, sizeof(p->current[0])); - - /* Some defaults? - */ - brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */ - brw_set_saturate(p, 0); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_set_predicate_control_flag_value(p, 0xff); -} - - -const unsigned *brw_get_program( struct brw_compile *p, - unsigned *sz ) -{ - unsigned i; - - for (i = 0; i < 8; i++) - brw_NOP(p); - - *sz = p->nr_insn * sizeof(struct brw_instruction); - return (const unsigned *)p->store; -} - diff --git a/src/gallium/drivers/i965simple/brw_eu.h b/src/gallium/drivers/i965simple/brw_eu.h deleted file mode 100644 index 23151ae9ed..0000000000 --- a/src/gallium/drivers/i965simple/brw_eu.h +++ /dev/null @@ -1,888 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#ifndef BRW_EU_H -#define BRW_EU_H - -#include "brw_structs.h" -#include "brw_defines.h" - -#include "pipe/p_compiler.h" -#include "pipe/p_shader_tokens.h" - -#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) -#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) - -#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) -#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) -#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) -#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) - - -#define REG_SIZE (8*4) - - -/* These aren't hardware structs, just something useful for us to pass around: - * - * Align1 operation has a lot of control over input ranges. Used in - * WM programs to implement shaders decomposed into "channel serial" - * or "structure of array" form: - */ -struct brw_reg -{ - unsigned type:4; - unsigned file:2; - unsigned nr:8; - unsigned subnr:5; /* :1 in align16 */ - unsigned negate:1; /* source only */ - unsigned abs:1; /* source only */ - unsigned vstride:4; /* source only */ - unsigned width:3; /* src only, align1 only */ - unsigned hstride:2; /* src only, align1 only */ - unsigned address_mode:1; /* relative addressing, hopefully! */ - unsigned pad0:1; - - union { - struct { - unsigned swizzle:8; /* src only, align16 only */ - unsigned writemask:4; /* dest only, align16 only */ - int indirect_offset:10; /* relative addressing offset */ - unsigned pad1:10; /* two dwords total */ - } bits; - - float f; - int d; - unsigned ud; - } dw1; -}; - - -struct brw_indirect { - unsigned addr_subnr:4; - int addr_offset:10; - unsigned pad:18; -}; - - -#define BRW_EU_MAX_INSN_STACK 5 -#define BRW_EU_MAX_INSN 1200 - -struct brw_compile { - struct brw_instruction store[BRW_EU_MAX_INSN]; - unsigned nr_insn; - - /* Allow clients to push/pop instruction state: - */ - struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; - struct brw_instruction *current; - - unsigned flag_value; - boolean single_program_flow; -}; - - - -static __inline int type_sz( unsigned type ) -{ - switch( type ) { - case BRW_REGISTER_TYPE_UD: - case BRW_REGISTER_TYPE_D: - case BRW_REGISTER_TYPE_F: - return 4; - case BRW_REGISTER_TYPE_HF: - case BRW_REGISTER_TYPE_UW: - case BRW_REGISTER_TYPE_W: - return 2; - case BRW_REGISTER_TYPE_UB: - case BRW_REGISTER_TYPE_B: - return 1; - default: - return 0; - } -} - -static __inline struct brw_reg brw_reg( unsigned file, - unsigned nr, - unsigned subnr, - unsigned type, - unsigned vstride, - unsigned width, - unsigned hstride, - unsigned swizzle, - unsigned writemask) -{ - - struct brw_reg reg; - reg.type = type; - reg.file = file; - reg.nr = nr; - reg.subnr = subnr * type_sz(type); - reg.negate = 0; - reg.abs = 0; - reg.vstride = vstride; - reg.width = width; - reg.hstride = hstride; - reg.address_mode = BRW_ADDRESS_DIRECT; - reg.pad0 = 0; - - /* Could do better: If the reg is r5.3<0;1,0>, we probably want to - * set swizzle and writemask to W, as the lower bits of subnr will - * be lost when converted to align16. This is probably too much to - * keep track of as you'd want it adjusted by suboffset(), etc. - * Perhaps fix up when converting to align16? - */ - reg.dw1.bits.swizzle = swizzle; - reg.dw1.bits.writemask = writemask; - reg.dw1.bits.indirect_offset = 0; - reg.dw1.bits.pad1 = 0; - return reg; -} - -static __inline struct brw_reg brw_vec16_reg( unsigned file, - unsigned nr, - unsigned subnr ) -{ - return brw_reg(file, - nr, - subnr, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_16, - BRW_WIDTH_16, - BRW_HORIZONTAL_STRIDE_1, - BRW_SWIZZLE_XYZW, - TGSI_WRITEMASK_XYZW); -} - -static __inline struct brw_reg brw_vec8_reg( unsigned file, - unsigned nr, - unsigned subnr ) -{ - return brw_reg(file, - nr, - subnr, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_8, - BRW_WIDTH_8, - BRW_HORIZONTAL_STRIDE_1, - BRW_SWIZZLE_XYZW, - TGSI_WRITEMASK_XYZW); -} - - -static __inline struct brw_reg brw_vec4_reg( unsigned file, - unsigned nr, - unsigned subnr ) -{ - return brw_reg(file, - nr, - subnr, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_4, - BRW_WIDTH_4, - BRW_HORIZONTAL_STRIDE_1, - BRW_SWIZZLE_XYZW, - TGSI_WRITEMASK_XYZW); -} - - -static __inline struct brw_reg brw_vec2_reg( unsigned file, - unsigned nr, - unsigned subnr ) -{ - return brw_reg(file, - nr, - subnr, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_2, - BRW_WIDTH_2, - BRW_HORIZONTAL_STRIDE_1, - BRW_SWIZZLE_XYXY, - TGSI_WRITEMASK_XY); -} - -static __inline struct brw_reg brw_vec1_reg( unsigned file, - unsigned nr, - unsigned subnr ) -{ - return brw_reg(file, - nr, - subnr, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_0, - BRW_WIDTH_1, - BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XXXX, - TGSI_WRITEMASK_X); -} - - -static __inline struct brw_reg retype( struct brw_reg reg, - unsigned type ) -{ - reg.type = type; - return reg; -} - -static __inline struct brw_reg suboffset( struct brw_reg reg, - unsigned delta ) -{ - reg.subnr += delta * type_sz(reg.type); - return reg; -} - - -static __inline struct brw_reg offset( struct brw_reg reg, - unsigned delta ) -{ - reg.nr += delta; - return reg; -} - - -static __inline struct brw_reg byte_offset( struct brw_reg reg, - unsigned bytes ) -{ - unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; - reg.nr = newoffset / REG_SIZE; - reg.subnr = newoffset % REG_SIZE; - return reg; -} - - -static __inline struct brw_reg brw_uw16_reg( unsigned file, - unsigned nr, - unsigned subnr ) -{ - return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); -} - -static __inline struct brw_reg brw_uw8_reg( unsigned file, - unsigned nr, - unsigned subnr ) -{ - return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); -} - -static __inline struct brw_reg brw_uw1_reg( unsigned file, - unsigned nr, - unsigned subnr ) -{ - return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); -} - -static __inline struct brw_reg brw_imm_reg( unsigned type ) -{ - return brw_reg( BRW_IMMEDIATE_VALUE, - 0, - 0, - type, - BRW_VERTICAL_STRIDE_0, - BRW_WIDTH_1, - BRW_HORIZONTAL_STRIDE_0, - 0, - 0); -} - -static __inline struct brw_reg brw_imm_f( float f ) -{ - struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); - imm.dw1.f = f; - return imm; -} - -static __inline struct brw_reg brw_imm_d( int d ) -{ - struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); - imm.dw1.d = d; - return imm; -} - -static __inline struct brw_reg brw_imm_ud( unsigned ud ) -{ - struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); - imm.dw1.ud = ud; - return imm; -} - -static __inline struct brw_reg brw_imm_uw( ushort uw ) -{ - struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); - imm.dw1.ud = uw; - return imm; -} - -static __inline struct brw_reg brw_imm_w( short w ) -{ - struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); - imm.dw1.d = w; - return imm; -} - -/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type - * numbers alias with _V and _VF below: - */ - -/* Vector of eight signed half-byte values: - */ -static __inline struct brw_reg brw_imm_v( unsigned v ) -{ - struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); - imm.vstride = BRW_VERTICAL_STRIDE_0; - imm.width = BRW_WIDTH_8; - imm.hstride = BRW_HORIZONTAL_STRIDE_1; - imm.dw1.ud = v; - return imm; -} - -/* Vector of four 8-bit float values: - */ -static __inline struct brw_reg brw_imm_vf( unsigned v ) -{ - struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); - imm.vstride = BRW_VERTICAL_STRIDE_0; - imm.width = BRW_WIDTH_4; - imm.hstride = BRW_HORIZONTAL_STRIDE_1; - imm.dw1.ud = v; - return imm; -} - -#define VF_ZERO 0x0 -#define VF_ONE 0x30 -#define VF_NEG (1<<7) - -static __inline struct brw_reg brw_imm_vf4( unsigned v0, - unsigned v1, - unsigned v2, - unsigned v3) -{ - struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); - imm.vstride = BRW_VERTICAL_STRIDE_0; - imm.width = BRW_WIDTH_4; - imm.hstride = BRW_HORIZONTAL_STRIDE_1; - imm.dw1.ud = ((v0 << 0) | - (v1 << 8) | - (v2 << 16) | - (v3 << 24)); - return imm; -} - - -static __inline struct brw_reg brw_address( struct brw_reg reg ) -{ - return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr); -} - - -static __inline struct brw_reg brw_vec1_grf( unsigned nr, - unsigned subnr ) -{ - return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); -} - -static __inline struct brw_reg brw_vec8_grf( unsigned nr, - unsigned subnr ) -{ - return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); -} - -static __inline struct brw_reg brw_vec4_grf( unsigned nr, - unsigned subnr ) -{ - return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); -} - - -static __inline struct brw_reg brw_vec2_grf( unsigned nr, - unsigned subnr ) -{ - return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); -} - -static __inline struct brw_reg brw_uw8_grf( unsigned nr, - unsigned subnr ) -{ - return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); -} - -static __inline struct brw_reg brw_null_reg( void ) -{ - return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_NULL, - 0); -} - -static __inline struct brw_reg brw_address_reg( unsigned subnr ) -{ - return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_ADDRESS, - subnr); -} - -/* If/else instructions break in align16 mode if writemask & swizzle - * aren't xyzw. This goes against the convention for other scalar - * regs: - */ -static __inline struct brw_reg brw_ip_reg( void ) -{ - return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_IP, - 0, - BRW_REGISTER_TYPE_UD, - BRW_VERTICAL_STRIDE_4, /* ? */ - BRW_WIDTH_1, - BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XYZW, /* NOTE! */ - TGSI_WRITEMASK_XYZW); /* NOTE! */ -} - -static __inline struct brw_reg brw_acc_reg( void ) -{ - return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_ACCUMULATOR, - 0); -} - - -static __inline struct brw_reg brw_flag_reg( void ) -{ - return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_FLAG, - 0); -} - - -static __inline struct brw_reg brw_mask_reg( unsigned subnr ) -{ - return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_MASK, - subnr); -} - -static __inline struct brw_reg brw_message_reg( unsigned nr ) -{ - return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, - nr, - 0); -} - - - - -/* This is almost always called with a numeric constant argument, so - * make things easy to evaluate at compile time: - */ -static __inline unsigned cvt( unsigned val ) -{ - switch (val) { - case 0: return 0; - case 1: return 1; - case 2: return 2; - case 4: return 3; - case 8: return 4; - case 16: return 5; - case 32: return 6; - } - return 0; -} - -static __inline struct brw_reg stride( struct brw_reg reg, - unsigned vstride, - unsigned width, - unsigned hstride ) -{ - - reg.vstride = cvt(vstride); - reg.width = cvt(width) - 1; - reg.hstride = cvt(hstride); - return reg; -} - -static __inline struct brw_reg vec16( struct brw_reg reg ) -{ - return stride(reg, 16,16,1); -} - -static __inline struct brw_reg vec8( struct brw_reg reg ) -{ - return stride(reg, 8,8,1); -} - -static __inline struct brw_reg vec4( struct brw_reg reg ) -{ - return stride(reg, 4,4,1); -} - -static __inline struct brw_reg vec2( struct brw_reg reg ) -{ - return stride(reg, 2,2,1); -} - -static __inline struct brw_reg vec1( struct brw_reg reg ) -{ - return stride(reg, 0,1,0); -} - -static __inline struct brw_reg get_element( struct brw_reg reg, unsigned elt ) -{ - return vec1(suboffset(reg, elt)); -} - -static __inline struct brw_reg get_element_ud( struct brw_reg reg, unsigned elt ) -{ - return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt)); -} - - -static __inline struct brw_reg brw_swizzle( struct brw_reg reg, - unsigned x, - unsigned y, - unsigned z, - unsigned w) -{ - reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x), - BRW_GET_SWZ(reg.dw1.bits.swizzle, y), - BRW_GET_SWZ(reg.dw1.bits.swizzle, z), - BRW_GET_SWZ(reg.dw1.bits.swizzle, w)); - return reg; -} - - -static __inline struct brw_reg brw_swizzle1( struct brw_reg reg, - unsigned x ) -{ - return brw_swizzle(reg, x, x, x, x); -} - -static __inline struct brw_reg brw_writemask( struct brw_reg reg, - unsigned mask ) -{ - reg.dw1.bits.writemask &= mask; - return reg; -} - -static __inline struct brw_reg brw_set_writemask( struct brw_reg reg, - unsigned mask ) -{ - reg.dw1.bits.writemask = mask; - return reg; -} - -static __inline struct brw_reg negate( struct brw_reg reg ) -{ - reg.negate ^= 1; - return reg; -} - -static __inline struct brw_reg brw_abs( struct brw_reg reg ) -{ - reg.abs = 1; - return reg; -} - -/*********************************************************************** - */ -static __inline struct brw_reg brw_vec4_indirect( unsigned subnr, - int offset ) -{ - struct brw_reg reg = brw_vec4_grf(0, 0); - reg.subnr = subnr; - reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; - reg.dw1.bits.indirect_offset = offset; - return reg; -} - -static __inline struct brw_reg brw_vec1_indirect( unsigned subnr, - int offset ) -{ - struct brw_reg reg = brw_vec1_grf(0, 0); - reg.subnr = subnr; - reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; - reg.dw1.bits.indirect_offset = offset; - return reg; -} - -static __inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset) -{ - return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset); -} - -static __inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset) -{ - return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset); -} - -static __inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset) -{ - return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B); -} - -static __inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset) -{ - return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW); -} - -static __inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset) -{ - return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD); -} - -static __inline struct brw_reg get_addr_reg(struct brw_indirect ptr) -{ - return brw_address_reg(ptr.addr_subnr); -} - -static __inline struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, int offset ) -{ - ptr.addr_offset += offset; - return ptr; -} - -static __inline struct brw_indirect brw_indirect( unsigned addr_subnr, int offset ) -{ - struct brw_indirect ptr; - ptr.addr_subnr = addr_subnr; - ptr.addr_offset = offset; - ptr.pad = 0; - return ptr; -} - -static __inline struct brw_instruction *current_insn( struct brw_compile *p) -{ - return &p->store[p->nr_insn]; -} - -void brw_pop_insn_state( struct brw_compile *p ); -void brw_push_insn_state( struct brw_compile *p ); -void brw_set_mask_control( struct brw_compile *p, unsigned value ); -void brw_set_saturate( struct brw_compile *p, unsigned value ); -void brw_set_access_mode( struct brw_compile *p, unsigned access_mode ); -void brw_set_compression_control( struct brw_compile *p, boolean control ); -void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ); -void brw_set_predicate_control( struct brw_compile *p, unsigned pc ); -void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional ); - -void brw_init_compile( struct brw_compile *p ); -const unsigned *brw_get_program( struct brw_compile *p, unsigned *sz ); - - -struct brw_instruction *brw_alu1( struct brw_compile *p, - unsigned opcode, - struct brw_reg dest, - struct brw_reg src ); - -struct brw_instruction *brw_alu2(struct brw_compile *p, - unsigned opcode, - struct brw_reg dest, - struct brw_reg src0, - struct brw_reg src1 ); - -/* Helpers for regular instructions: - */ -#define ALU1(OP) \ -struct brw_instruction *brw_##OP(struct brw_compile *p, \ - struct brw_reg dest, \ - struct brw_reg src0); - -#define ALU2(OP) \ -struct brw_instruction *brw_##OP(struct brw_compile *p, \ - struct brw_reg dest, \ - struct brw_reg src0, \ - struct brw_reg src1); - -ALU1(MOV) -ALU2(SEL) -ALU1(NOT) -ALU2(AND) -ALU2(OR) -ALU2(XOR) -ALU2(SHR) -ALU2(SHL) -ALU2(RSR) -ALU2(RSL) -ALU2(ASR) -ALU2(JMPI) -ALU2(ADD) -ALU2(MUL) -ALU1(FRC) -ALU1(RNDD) -ALU2(MAC) -ALU2(MACH) -ALU1(LZD) -ALU2(DP4) -ALU2(DPH) -ALU2(DP3) -ALU2(DP2) -ALU2(LINE) - -#undef ALU1 -#undef ALU2 - - - -/* Helpers for SEND instruction: - */ -void brw_urb_WRITE(struct brw_compile *p, - struct brw_reg dest, - unsigned msg_reg_nr, - struct brw_reg src0, - boolean allocate, - boolean used, - unsigned msg_length, - unsigned response_length, - boolean eot, - boolean writes_complete, - unsigned offset, - unsigned swizzle); - -void brw_fb_WRITE(struct brw_compile *p, - struct brw_reg dest, - unsigned msg_reg_nr, - struct brw_reg src0, - unsigned binding_table_index, - unsigned msg_length, - unsigned response_length, - boolean eot); - -void brw_SAMPLE(struct brw_compile *p, - struct brw_reg dest, - unsigned msg_reg_nr, - struct brw_reg src0, - unsigned binding_table_index, - unsigned sampler, - unsigned writemask, - unsigned msg_type, - unsigned response_length, - unsigned msg_length, - boolean eot); - -void brw_math_16( struct brw_compile *p, - struct brw_reg dest, - unsigned function, - unsigned saturate, - unsigned msg_reg_nr, - struct brw_reg src, - unsigned precision ); - -void brw_math( struct brw_compile *p, - struct brw_reg dest, - unsigned function, - unsigned saturate, - unsigned msg_reg_nr, - struct brw_reg src, - unsigned data_type, - unsigned precision ); - -void brw_dp_READ_16( struct brw_compile *p, - struct brw_reg dest, - unsigned msg_reg_nr, - unsigned scratch_offset ); - -void brw_dp_WRITE_16( struct brw_compile *p, - struct brw_reg src, - unsigned msg_reg_nr, - unsigned scratch_offset ); - -/* If/else/endif. Works by manipulating the execution flags on each - * channel. - */ -struct brw_instruction *brw_IF(struct brw_compile *p, - unsigned execute_size); - -struct brw_instruction *brw_ELSE(struct brw_compile *p, - struct brw_instruction *if_insn); - -void brw_ENDIF(struct brw_compile *p, - struct brw_instruction *if_or_else_insn); - - -/* DO/WHILE loops: - */ -struct brw_instruction *brw_DO(struct brw_compile *p, - unsigned execute_size); - -struct brw_instruction *brw_WHILE(struct brw_compile *p, - struct brw_instruction *patch_insn); - -struct brw_instruction *brw_BREAK(struct brw_compile *p); -struct brw_instruction *brw_CONT(struct brw_compile *p); -/* Forward jumps: - */ -void brw_land_fwd_jump(struct brw_compile *p, - struct brw_instruction *jmp_insn); - - - -void brw_NOP(struct brw_compile *p); - -/* Special case: there is never a destination, execution size will be - * taken from src0: - */ -void brw_CMP(struct brw_compile *p, - struct brw_reg dest, - unsigned conditional, - struct brw_reg src0, - struct brw_reg src1); - -void brw_print_reg( struct brw_reg reg ); - - -/*********************************************************************** - * brw_eu_util.c: - */ - -void brw_copy_indirect_to_indirect(struct brw_compile *p, - struct brw_indirect dst_ptr, - struct brw_indirect src_ptr, - unsigned count); - -void brw_copy_from_indirect(struct brw_compile *p, - struct brw_reg dst, - struct brw_indirect ptr, - unsigned count); - -void brw_copy4(struct brw_compile *p, - struct brw_reg dst, - struct brw_reg src, - unsigned count); - -void brw_copy8(struct brw_compile *p, - struct brw_reg dst, - struct brw_reg src, - unsigned count); - -void brw_math_invert( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg src); - -void brw_set_src1( struct brw_instruction *insn, - struct brw_reg reg ); -#endif diff --git a/src/gallium/drivers/i965simple/brw_eu_debug.c b/src/gallium/drivers/i965simple/brw_eu_debug.c deleted file mode 100644 index 4adfb0c02f..0000000000 --- a/src/gallium/drivers/i965simple/brw_eu_debug.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "util/u_debug.h" - -#include "brw_eu.h" - -void brw_print_reg( struct brw_reg hwreg ) -{ - static const char *file[] = { - "arf", - "grf", - "msg", - "imm" - }; - - static const char *type[] = { - "ud", - "d", - "uw", - "w", - "ub", - "vf", - "hf", - "f" - }; - - debug_printf("%s%s", - hwreg.abs ? "abs/" : "", - hwreg.negate ? "-" : ""); - - if (hwreg.file == BRW_GENERAL_REGISTER_FILE && - hwreg.nr % 2 == 0 && - hwreg.subnr == 0 && - hwreg.vstride == BRW_VERTICAL_STRIDE_8 && - hwreg.width == BRW_WIDTH_8 && - hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 && - hwreg.type == BRW_REGISTER_TYPE_F) { - debug_printf("vec%d", hwreg.nr); - } - else if (hwreg.file == BRW_GENERAL_REGISTER_FILE && - hwreg.vstride == BRW_VERTICAL_STRIDE_0 && - hwreg.width == BRW_WIDTH_1 && - hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 && - hwreg.type == BRW_REGISTER_TYPE_F) { - debug_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); - } - else { - debug_printf("%s%d.%d<%d;%d,%d>:%s", - file[hwreg.file], - hwreg.nr, - hwreg.subnr / type_sz(hwreg.type), - hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0, - 1<<hwreg.width, - hwreg.hstride ? (1<<(hwreg.hstride-1)) : 0, - type[hwreg.type]); - } -} - - - diff --git a/src/gallium/drivers/i965simple/brw_eu_emit.c b/src/gallium/drivers/i965simple/brw_eu_emit.c deleted file mode 100644 index 400a80b6fb..0000000000 --- a/src/gallium/drivers/i965simple/brw_eu_emit.c +++ /dev/null @@ -1,1080 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_eu.h" - - - - -/*********************************************************************** - * Internal helper for constructing instructions - */ - -static void guess_execution_size( struct brw_instruction *insn, - struct brw_reg reg ) -{ - if (reg.width == BRW_WIDTH_8 && - insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) - insn->header.execution_size = BRW_EXECUTE_16; - else - insn->header.execution_size = reg.width; /* note - definitions are compatible */ -} - - -static void brw_set_dest( struct brw_instruction *insn, - struct brw_reg dest ) -{ - insn->bits1.da1.dest_reg_file = dest.file; - insn->bits1.da1.dest_reg_type = dest.type; - insn->bits1.da1.dest_address_mode = dest.address_mode; - - if (dest.address_mode == BRW_ADDRESS_DIRECT) { - insn->bits1.da1.dest_reg_nr = dest.nr; - - if (insn->header.access_mode == BRW_ALIGN_1) { - insn->bits1.da1.dest_subreg_nr = dest.subnr; - insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1; - } - else { - insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; - insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; - } - } - else { - insn->bits1.ia1.dest_subreg_nr = dest.subnr; - - /* These are different sizes in align1 vs align16: - */ - if (insn->header.access_mode == BRW_ALIGN_1) { - insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; - insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1; - } - else { - insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; - } - } - - /* NEW: Set the execution size based on dest.width and - * insn->compression_control: - */ - guess_execution_size(insn, dest); -} - -static void brw_set_src0( struct brw_instruction *insn, - struct brw_reg reg ) -{ - assert(reg.file != BRW_MESSAGE_REGISTER_FILE); - - insn->bits1.da1.src0_reg_file = reg.file; - insn->bits1.da1.src0_reg_type = reg.type; - insn->bits2.da1.src0_abs = reg.abs; - insn->bits2.da1.src0_negate = reg.negate; - insn->bits2.da1.src0_address_mode = reg.address_mode; - - if (reg.file == BRW_IMMEDIATE_VALUE) { - insn->bits3.ud = reg.dw1.ud; - - /* Required to set some fields in src1 as well: - */ - insn->bits1.da1.src1_reg_file = 0; /* arf */ - insn->bits1.da1.src1_reg_type = reg.type; - } - else - { - if (reg.address_mode == BRW_ADDRESS_DIRECT) { - if (insn->header.access_mode == BRW_ALIGN_1) { - insn->bits2.da1.src0_subreg_nr = reg.subnr; - insn->bits2.da1.src0_reg_nr = reg.nr; - } - else { - insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; - insn->bits2.da16.src0_reg_nr = reg.nr; - } - } - else { - insn->bits2.ia1.src0_subreg_nr = reg.subnr; - - if (insn->header.access_mode == BRW_ALIGN_1) { - insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; - } - else { - insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; - } - } - - if (insn->header.access_mode == BRW_ALIGN_1) { - if (reg.width == BRW_WIDTH_1 && - insn->header.execution_size == BRW_EXECUTE_1) { - insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; - insn->bits2.da1.src0_width = BRW_WIDTH_1; - insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; - } - else { - insn->bits2.da1.src0_horiz_stride = reg.hstride; - insn->bits2.da1.src0_width = reg.width; - insn->bits2.da1.src0_vert_stride = reg.vstride; - } - } - else { - insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); - insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); - insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); - insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); - - /* This is an oddity of the fact we're using the same - * descriptions for registers in align_16 as align_1: - */ - if (reg.vstride == BRW_VERTICAL_STRIDE_8) - insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; - else - insn->bits2.da16.src0_vert_stride = reg.vstride; - } - } -} - - -void brw_set_src1( struct brw_instruction *insn, - struct brw_reg reg ) -{ - assert(reg.file != BRW_MESSAGE_REGISTER_FILE); - - insn->bits1.da1.src1_reg_file = reg.file; - insn->bits1.da1.src1_reg_type = reg.type; - insn->bits3.da1.src1_abs = reg.abs; - insn->bits3.da1.src1_negate = reg.negate; - - /* Only src1 can be immediate in two-argument instructions. - */ - assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); - - if (reg.file == BRW_IMMEDIATE_VALUE) { - insn->bits3.ud = reg.dw1.ud; - } - else { - /* This is a hardware restriction, which may or may not be lifted - * in the future: - */ - assert (reg.address_mode == BRW_ADDRESS_DIRECT); - //assert (reg.file == BRW_GENERAL_REGISTER_FILE); - - if (insn->header.access_mode == BRW_ALIGN_1) { - insn->bits3.da1.src1_subreg_nr = reg.subnr; - insn->bits3.da1.src1_reg_nr = reg.nr; - } - else { - insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; - insn->bits3.da16.src1_reg_nr = reg.nr; - } - - if (insn->header.access_mode == BRW_ALIGN_1) { - if (reg.width == BRW_WIDTH_1 && - insn->header.execution_size == BRW_EXECUTE_1) { - insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; - insn->bits3.da1.src1_width = BRW_WIDTH_1; - insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; - } - else { - insn->bits3.da1.src1_horiz_stride = reg.hstride; - insn->bits3.da1.src1_width = reg.width; - insn->bits3.da1.src1_vert_stride = reg.vstride; - } - } - else { - insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); - insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); - insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); - insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); - - /* This is an oddity of the fact we're using the same - * descriptions for registers in align_16 as align_1: - */ - if (reg.vstride == BRW_VERTICAL_STRIDE_8) - insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; - else - insn->bits3.da16.src1_vert_stride = reg.vstride; - } - } -} - - - -static void brw_set_math_message( struct brw_instruction *insn, - unsigned msg_length, - unsigned response_length, - unsigned function, - unsigned integer_type, - boolean low_precision, - boolean saturate, - unsigned dataType ) -{ - brw_set_src1(insn, brw_imm_d(0)); - - insn->bits3.math.function = function; - insn->bits3.math.int_type = integer_type; - insn->bits3.math.precision = low_precision; - insn->bits3.math.saturate = saturate; - insn->bits3.math.data_type = dataType; - insn->bits3.math.response_length = response_length; - insn->bits3.math.msg_length = msg_length; - insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; - insn->bits3.math.end_of_thread = 0; -} - -static void brw_set_urb_message( struct brw_instruction *insn, - boolean allocate, - boolean used, - unsigned msg_length, - unsigned response_length, - boolean end_of_thread, - boolean complete, - unsigned offset, - unsigned swizzle_control ) -{ - brw_set_src1(insn, brw_imm_d(0)); - - insn->bits3.urb.opcode = 0; /* ? */ - insn->bits3.urb.offset = offset; - insn->bits3.urb.swizzle_control = swizzle_control; - insn->bits3.urb.allocate = allocate; - insn->bits3.urb.used = used; /* ? */ - insn->bits3.urb.complete = complete; - insn->bits3.urb.response_length = response_length; - insn->bits3.urb.msg_length = msg_length; - insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; - insn->bits3.urb.end_of_thread = end_of_thread; -} - -static void brw_set_dp_write_message( struct brw_instruction *insn, - unsigned binding_table_index, - unsigned msg_control, - unsigned msg_type, - unsigned msg_length, - unsigned pixel_scoreboard_clear, - unsigned response_length, - unsigned end_of_thread ) -{ - brw_set_src1(insn, brw_imm_d(0)); - - insn->bits3.dp_write.binding_table_index = binding_table_index; - insn->bits3.dp_write.msg_control = msg_control; - insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; - insn->bits3.dp_write.msg_type = msg_type; - insn->bits3.dp_write.send_commit_msg = 0; - insn->bits3.dp_write.response_length = response_length; - insn->bits3.dp_write.msg_length = msg_length; - insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; - insn->bits3.urb.end_of_thread = end_of_thread; -} - -static void brw_set_dp_read_message( struct brw_instruction *insn, - unsigned binding_table_index, - unsigned msg_control, - unsigned msg_type, - unsigned target_cache, - unsigned msg_length, - unsigned response_length, - unsigned end_of_thread ) -{ - brw_set_src1(insn, brw_imm_d(0)); - - insn->bits3.dp_read.binding_table_index = binding_table_index; - insn->bits3.dp_read.msg_control = msg_control; - insn->bits3.dp_read.msg_type = msg_type; - insn->bits3.dp_read.target_cache = target_cache; - insn->bits3.dp_read.response_length = response_length; - insn->bits3.dp_read.msg_length = msg_length; - insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; - insn->bits3.dp_read.end_of_thread = end_of_thread; -} - -static void brw_set_sampler_message( struct brw_instruction *insn, - unsigned binding_table_index, - unsigned sampler, - unsigned msg_type, - unsigned response_length, - unsigned msg_length, - boolean eot) -{ - brw_set_src1(insn, brw_imm_d(0)); - - insn->bits3.sampler.binding_table_index = binding_table_index; - insn->bits3.sampler.sampler = sampler; - insn->bits3.sampler.msg_type = msg_type; - insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; - insn->bits3.sampler.response_length = response_length; - insn->bits3.sampler.msg_length = msg_length; - insn->bits3.sampler.end_of_thread = eot; - insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; -} - - - -static struct brw_instruction *next_insn( struct brw_compile *p, - unsigned opcode ) -{ - struct brw_instruction *insn; - - assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); - - insn = &p->store[p->nr_insn++]; - memcpy(insn, p->current, sizeof(*insn)); - - /* Reset this one-shot flag: - */ - - if (p->current->header.destreg__conditonalmod) { - p->current->header.destreg__conditonalmod = 0; - p->current->header.predicate_control = BRW_PREDICATE_NORMAL; - } - - insn->header.opcode = opcode; - return insn; -} - - -struct brw_instruction *brw_alu1( struct brw_compile *p, - unsigned opcode, - struct brw_reg dest, - struct brw_reg src ) -{ - struct brw_instruction *insn = next_insn(p, opcode); - brw_set_dest(insn, dest); - brw_set_src0(insn, src); - return insn; -} - -struct brw_instruction *brw_alu2(struct brw_compile *p, - unsigned opcode, - struct brw_reg dest, - struct brw_reg src0, - struct brw_reg src1 ) -{ - struct brw_instruction *insn = next_insn(p, opcode); - brw_set_dest(insn, dest); - brw_set_src0(insn, src0); - brw_set_src1(insn, src1); - return insn; -} - - -/*********************************************************************** - * Convenience routines. - */ -#define ALU1(OP) \ -struct brw_instruction *brw_##OP(struct brw_compile *p, \ - struct brw_reg dest, \ - struct brw_reg src0) \ -{ \ - return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ -} - -#define ALU2(OP) \ -struct brw_instruction *brw_##OP(struct brw_compile *p, \ - struct brw_reg dest, \ - struct brw_reg src0, \ - struct brw_reg src1) \ -{ \ - return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ -} - - -ALU1(MOV) -ALU2(SEL) -ALU1(NOT) -ALU2(AND) -ALU2(OR) -ALU2(XOR) -ALU2(SHR) -ALU2(SHL) -ALU2(RSR) -ALU2(RSL) -ALU2(ASR) -ALU2(ADD) -ALU2(MUL) -ALU1(FRC) -ALU1(RNDD) -ALU2(MAC) -ALU2(MACH) -ALU1(LZD) -ALU2(DP4) -ALU2(DPH) -ALU2(DP3) -ALU2(DP2) -ALU2(LINE) - - - - -void brw_NOP(struct brw_compile *p) -{ - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); - brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src1(insn, brw_imm_ud(0x0)); -} - - - - - -/*********************************************************************** - * Comparisons, if/else/endif - */ - -struct brw_instruction *brw_JMPI(struct brw_compile *p, - struct brw_reg dest, - struct brw_reg src0, - struct brw_reg src1) -{ - struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); - - p->current->header.predicate_control = BRW_PREDICATE_NONE; - - return insn; -} - -/* EU takes the value from the flag register and pushes it onto some - * sort of a stack (presumably merging with any flag value already on - * the stack). Within an if block, the flags at the top of the stack - * control execution on each channel of the unit, eg. on each of the - * 16 pixel values in our wm programs. - * - * When the matching 'else' instruction is reached (presumably by - * countdown of the instruction count patched in by our ELSE/ENDIF - * functions), the relevent flags are inverted. - * - * When the matching 'endif' instruction is reached, the flags are - * popped off. If the stack is now empty, normal execution resumes. - * - * No attempt is made to deal with stack overflow (14 elements?). - */ -struct brw_instruction *brw_IF(struct brw_compile *p, unsigned execute_size) -{ - struct brw_instruction *insn; - - if (p->single_program_flow) { - assert(execute_size == BRW_EXECUTE_1); - - insn = next_insn(p, BRW_OPCODE_ADD); - insn->header.predicate_inverse = 1; - } else { - insn = next_insn(p, BRW_OPCODE_IF); - } - - /* Override the defaults for this instruction: - */ - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); - - insn->header.execution_size = execute_size; - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.predicate_control = BRW_PREDICATE_NORMAL; - insn->header.mask_control = BRW_MASK_ENABLE; - - p->current->header.predicate_control = BRW_PREDICATE_NONE; - - return insn; -} - - -struct brw_instruction *brw_ELSE(struct brw_compile *p, - struct brw_instruction *if_insn) -{ - struct brw_instruction *insn; - - if (p->single_program_flow) { - insn = next_insn(p, BRW_OPCODE_ADD); - } else { - insn = next_insn(p, BRW_OPCODE_ELSE); - } - - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); - - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.execution_size = if_insn->header.execution_size; - insn->header.mask_control = BRW_MASK_ENABLE; - - /* Patch the if instruction to point at this instruction. - */ - if (p->single_program_flow) { - assert(if_insn->header.opcode == BRW_OPCODE_ADD); - - if_insn->bits3.ud = (insn - if_insn + 1) * 16; - } else { - assert(if_insn->header.opcode == BRW_OPCODE_IF); - - if_insn->bits3.if_else.jump_count = insn - if_insn; - if_insn->bits3.if_else.pop_count = 1; - if_insn->bits3.if_else.pad0 = 0; - } - - return insn; -} - -void brw_ENDIF(struct brw_compile *p, - struct brw_instruction *patch_insn) -{ - if (p->single_program_flow) { - /* In single program flow mode, there's no need to execute an ENDIF, - * since we don't need to do any stack operations, and if we're executing - * currently, we want to just continue executing. - */ - struct brw_instruction *next = &p->store[p->nr_insn]; - - assert(patch_insn->header.opcode == BRW_OPCODE_ADD); - - patch_insn->bits3.ud = (next - patch_insn) * 16; - } else { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); - - brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src1(insn, brw_imm_d(0x0)); - - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.execution_size = patch_insn->header.execution_size; - insn->header.mask_control = BRW_MASK_ENABLE; - - assert(patch_insn->bits3.if_else.jump_count == 0); - - /* Patch the if or else instructions to point at this or the next - * instruction respectively. - */ - if (patch_insn->header.opcode == BRW_OPCODE_IF) { - /* Automagically turn it into an IFF: - */ - patch_insn->header.opcode = BRW_OPCODE_IFF; - patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; - patch_insn->bits3.if_else.pop_count = 0; - patch_insn->bits3.if_else.pad0 = 0; - } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { - patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; - patch_insn->bits3.if_else.pop_count = 1; - patch_insn->bits3.if_else.pad0 = 0; - } else { - assert(0); - } - - /* Also pop item off the stack in the endif instruction: - */ - insn->bits3.if_else.jump_count = 0; - insn->bits3.if_else.pop_count = 1; - insn->bits3.if_else.pad0 = 0; - } -} - -struct brw_instruction *brw_BREAK(struct brw_compile *p) -{ - struct brw_instruction *insn; - insn = next_insn(p, BRW_OPCODE_BREAK); - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.execution_size = BRW_EXECUTE_8; - insn->header.mask_control = BRW_MASK_DISABLE; - insn->bits3.if_else.pad0 = 0; - return insn; -} - -struct brw_instruction *brw_CONT(struct brw_compile *p) -{ - struct brw_instruction *insn; - insn = next_insn(p, BRW_OPCODE_CONTINUE); - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.execution_size = BRW_EXECUTE_8; - insn->header.mask_control = BRW_MASK_DISABLE; - insn->bits3.if_else.pad0 = 0; - return insn; -} - -/* DO/WHILE loop: - */ -struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size) -{ - if (p->single_program_flow) { - return &p->store[p->nr_insn]; - } else { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); - - /* Override the defaults for this instruction: - */ - brw_set_dest(insn, brw_null_reg()); - brw_set_src0(insn, brw_null_reg()); - brw_set_src1(insn, brw_null_reg()); - - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.execution_size = execute_size; - insn->header.predicate_control = BRW_PREDICATE_NONE; - /* insn->header.mask_control = BRW_MASK_ENABLE; */ - insn->header.mask_control = BRW_MASK_DISABLE; - - return insn; - } -} - - - -struct brw_instruction *brw_WHILE(struct brw_compile *p, - struct brw_instruction *do_insn) -{ - struct brw_instruction *insn; - - if (p->single_program_flow) - insn = next_insn(p, BRW_OPCODE_ADD); - else - insn = next_insn(p, BRW_OPCODE_WHILE); - - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); - - insn->header.compression_control = BRW_COMPRESSION_NONE; - - if (p->single_program_flow) { - insn->header.execution_size = BRW_EXECUTE_1; - - insn->bits3.d = (do_insn - insn) * 16; - } else { - insn->header.execution_size = do_insn->header.execution_size; - - assert(do_insn->header.opcode == BRW_OPCODE_DO); - insn->bits3.if_else.jump_count = do_insn - insn; - insn->bits3.if_else.pop_count = 0; - insn->bits3.if_else.pad0 = 0; - } - -/* insn->header.mask_control = BRW_MASK_ENABLE; */ - - insn->header.mask_control = BRW_MASK_DISABLE; - p->current->header.predicate_control = BRW_PREDICATE_NONE; - return insn; -} - - -/* FORWARD JUMPS: - */ -void brw_land_fwd_jump(struct brw_compile *p, - struct brw_instruction *jmp_insn) -{ - struct brw_instruction *landing = &p->store[p->nr_insn]; - - assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); - assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE); - - jmp_insn->bits3.ud = (landing - jmp_insn) - 1; -} - - - -/* To integrate with the above, it makes sense that the comparison - * instruction should populate the flag register. It might be simpler - * just to use the flag reg for most WM tasks? - */ -void brw_CMP(struct brw_compile *p, - struct brw_reg dest, - unsigned conditional, - struct brw_reg src0, - struct brw_reg src1) -{ - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); - - insn->header.destreg__conditonalmod = conditional; - brw_set_dest(insn, dest); - brw_set_src0(insn, src0); - brw_set_src1(insn, src1); - -/* guess_execution_size(insn, src0); */ - - - /* Make it so that future instructions will use the computed flag - * value until brw_set_predicate_control_flag_value() is called - * again. - */ - if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && - dest.nr == 0) { - p->current->header.predicate_control = BRW_PREDICATE_NORMAL; - p->flag_value = 0xff; - } -} - - - -/*********************************************************************** - * Helpers for the various SEND message types: - */ - -/* Invert 8 values - */ -void brw_math( struct brw_compile *p, - struct brw_reg dest, - unsigned function, - unsigned saturate, - unsigned msg_reg_nr, - struct brw_reg src, - unsigned data_type, - unsigned precision ) -{ - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - unsigned msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; - unsigned response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; - - /* Example code doesn't set predicate_control for send - * instructions. - */ - insn->header.predicate_control = 0; - insn->header.destreg__conditonalmod = msg_reg_nr; - - brw_set_dest(insn, dest); - brw_set_src0(insn, src); - brw_set_math_message(insn, - msg_length, response_length, - function, - BRW_MATH_INTEGER_UNSIGNED, - precision, - saturate, - data_type); -} - -/* Use 2 send instructions to invert 16 elements - */ -void brw_math_16( struct brw_compile *p, - struct brw_reg dest, - unsigned function, - unsigned saturate, - unsigned msg_reg_nr, - struct brw_reg src, - unsigned precision ) -{ - struct brw_instruction *insn; - unsigned msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; - unsigned response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; - - /* First instruction: - */ - brw_push_insn_state(p); - brw_set_predicate_control_flag_value(p, 0xff); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - - insn = next_insn(p, BRW_OPCODE_SEND); - insn->header.destreg__conditonalmod = msg_reg_nr; - - brw_set_dest(insn, dest); - brw_set_src0(insn, src); - brw_set_math_message(insn, - msg_length, response_length, - function, - BRW_MATH_INTEGER_UNSIGNED, - precision, - saturate, - BRW_MATH_DATA_VECTOR); - - /* Second instruction: - */ - insn = next_insn(p, BRW_OPCODE_SEND); - insn->header.compression_control = BRW_COMPRESSION_2NDHALF; - insn->header.destreg__conditonalmod = msg_reg_nr+1; - - brw_set_dest(insn, offset(dest,1)); - brw_set_src0(insn, src); - brw_set_math_message(insn, - msg_length, response_length, - function, - BRW_MATH_INTEGER_UNSIGNED, - precision, - saturate, - BRW_MATH_DATA_VECTOR); - - brw_pop_insn_state(p); -} - - - - -void brw_dp_WRITE_16( struct brw_compile *p, - struct brw_reg src, - unsigned msg_reg_nr, - unsigned scratch_offset ) -{ - { - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - - brw_MOV(p, - retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), - brw_imm_d(scratch_offset)); - - brw_pop_insn_state(p); - } - - { - unsigned msg_length = 3; - struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - - insn->header.predicate_control = 0; /* XXX */ - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; - - brw_set_dest(insn, dest); - brw_set_src0(insn, src); - - brw_set_dp_write_message(insn, - 255, /* bti */ - BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ - BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ - msg_length, - 0, /* pixel scoreboard */ - 0, /* response_length */ - 0); /* eot */ - } - -} - - -void brw_dp_READ_16( struct brw_compile *p, - struct brw_reg dest, - unsigned msg_reg_nr, - unsigned scratch_offset ) -{ - { - brw_push_insn_state(p); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_set_mask_control(p, BRW_MASK_DISABLE); - - brw_MOV(p, - retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), - brw_imm_d(scratch_offset)); - - brw_pop_insn_state(p); - } - - { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - - insn->header.predicate_control = 0; /* XXX */ - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; - - brw_set_dest(insn, dest); /* UW? */ - brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); - - brw_set_dp_read_message(insn, - 255, /* bti */ - 3, /* msg_control */ - BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ - 1, /* target cache */ - 1, /* msg_length */ - 2, /* response_length */ - 0); /* eot */ - } -} - - -void brw_fb_WRITE(struct brw_compile *p, - struct brw_reg dest, - unsigned msg_reg_nr, - struct brw_reg src0, - unsigned binding_table_index, - unsigned msg_length, - unsigned response_length, - boolean eot) -{ - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - - insn->header.predicate_control = 0; /* XXX */ - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; - - brw_set_dest(insn, dest); - brw_set_src0(insn, src0); - brw_set_dp_write_message(insn, - binding_table_index, - BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */ - BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ - msg_length, - 1, /* pixel scoreboard */ - response_length, - eot); -} - - - -void brw_SAMPLE(struct brw_compile *p, - struct brw_reg dest, - unsigned msg_reg_nr, - struct brw_reg src0, - unsigned binding_table_index, - unsigned sampler, - unsigned writemask, - unsigned msg_type, - unsigned response_length, - unsigned msg_length, - boolean eot) -{ - boolean need_stall = 0; - - if(writemask == 0) { -/* debug_printf("%s: zero writemask??\n", __FUNCTION__); */ - return; - } - - /* Hardware doesn't do destination dependency checking on send - * instructions properly. Add a workaround which generates the - * dependency by other means. In practice it seems like this bug - * only crops up for texture samples, and only where registers are - * written by the send and then written again later without being - * read in between. Luckily for us, we already track that - * information and use it to modify the writemask for the - * instruction, so that is a guide for whether a workaround is - * needed. - */ - if (writemask != TGSI_WRITEMASK_XYZW) { - unsigned dst_offset = 0; - unsigned i, newmask = 0, len = 0; - - for (i = 0; i < 4; i++) { - if (writemask & (1<<i)) - break; - dst_offset += 2; - } - for (; i < 4; i++) { - if (!(writemask & (1<<i))) - break; - newmask |= 1<<i; - len++; - } - - if (newmask != writemask) { - need_stall = 1; -/* debug_printf("need stall %x %x\n", newmask , writemask); */ - } - else { - struct brw_reg m1 = brw_message_reg(msg_reg_nr); - - newmask = ~newmask & TGSI_WRITEMASK_XYZW; - - brw_push_insn_state(p); - - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_set_mask_control(p, BRW_MASK_DISABLE); - - brw_MOV(p, m1, brw_vec8_grf(0,0)); - brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); - - brw_pop_insn_state(p); - - src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); - dest = offset(dest, dst_offset); - response_length = len * 2; - } - } - - { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - - insn->header.predicate_control = 0; /* XXX */ - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; - - brw_set_dest(insn, dest); - brw_set_src0(insn, src0); - brw_set_sampler_message(insn, - binding_table_index, - sampler, - msg_type, - response_length, - msg_length, - eot); - } - - if (need_stall) - { - struct brw_reg reg = vec8(offset(dest, response_length-1)); - - /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } - */ - brw_push_insn_state(p); - brw_set_compression_control(p, FALSE); - brw_MOV(p, reg, reg); - brw_pop_insn_state(p); - } - -} - -/* All these variables are pretty confusing - we might be better off - * using bitmasks and macros for this, in the old style. Or perhaps - * just having the caller instantiate the fields in dword3 itself. - */ -void brw_urb_WRITE(struct brw_compile *p, - struct brw_reg dest, - unsigned msg_reg_nr, - struct brw_reg src0, - boolean allocate, - boolean used, - unsigned msg_length, - unsigned response_length, - boolean eot, - boolean writes_complete, - unsigned offset, - unsigned swizzle) -{ - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - - assert(msg_length < 16); - - brw_set_dest(insn, dest); - brw_set_src0(insn, src0); - brw_set_src1(insn, brw_imm_d(0)); - - insn->header.destreg__conditonalmod = msg_reg_nr; - - brw_set_urb_message(insn, - allocate, - used, - msg_length, - response_length, - eot, - writes_complete, - offset, - swizzle); -} - diff --git a/src/gallium/drivers/i965simple/brw_eu_util.c b/src/gallium/drivers/i965simple/brw_eu_util.c deleted file mode 100644 index 3a65b141f0..0000000000 --- a/src/gallium/drivers/i965simple/brw_eu_util.c +++ /dev/null @@ -1,126 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_eu.h" - - -void brw_math_invert( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg src) -{ - brw_math( p, - dst, - BRW_MATH_FUNCTION_INV, - BRW_MATH_SATURATE_NONE, - 0, - src, - BRW_MATH_PRECISION_FULL, - BRW_MATH_DATA_VECTOR ); -} - - - -void brw_copy4(struct brw_compile *p, - struct brw_reg dst, - struct brw_reg src, - unsigned count) -{ - unsigned i; - - dst = vec4(dst); - src = vec4(src); - - for (i = 0; i < count; i++) - { - unsigned delta = i*32; - brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); - brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16)); - } -} - - -void brw_copy8(struct brw_compile *p, - struct brw_reg dst, - struct brw_reg src, - unsigned count) -{ - unsigned i; - - dst = vec8(dst); - src = vec8(src); - - for (i = 0; i < count; i++) - { - unsigned delta = i*32; - brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); - } -} - - -void brw_copy_indirect_to_indirect(struct brw_compile *p, - struct brw_indirect dst_ptr, - struct brw_indirect src_ptr, - unsigned count) -{ - unsigned i; - - for (i = 0; i < count; i++) - { - unsigned delta = i*32; - brw_MOV(p, deref_4f(dst_ptr, delta), deref_4f(src_ptr, delta)); - brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16)); - } -} - - -void brw_copy_from_indirect(struct brw_compile *p, - struct brw_reg dst, - struct brw_indirect ptr, - unsigned count) -{ - unsigned i; - - dst = vec4(dst); - - for (i = 0; i < count; i++) - { - unsigned delta = i*32; - brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta)); - brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16)); - } -} - - - - diff --git a/src/gallium/drivers/i965simple/brw_flush.c b/src/gallium/drivers/i965simple/brw_flush.c deleted file mode 100644 index e6001c30d9..0000000000 --- a/src/gallium/drivers/i965simple/brw_flush.c +++ /dev/null @@ -1,73 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Author: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "pipe/p_defines.h" -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_batch.h" - - -static void brw_flush( struct pipe_context *pipe, - unsigned flags, - struct pipe_fence_handle **fence ) -{ - struct brw_context *brw = brw_context(pipe); - - /* Do we need to emit an MI_FLUSH command to flush the hardware - * caches? - */ - if (flags & (PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE)) { - struct brw_mi_flush flush; - - memset(&flush, 0, sizeof(flush)); - flush.opcode = CMD_MI_FLUSH; - - if (!(flags & PIPE_FLUSH_RENDER_CACHE)) - flush.flags |= BRW_INHIBIT_FLUSH_RENDER_CACHE; - - if (flags & PIPE_FLUSH_TEXTURE_CACHE) - flush.flags |= BRW_FLUSH_READ_CACHE; - - BRW_BATCH_STRUCT(brw, &flush); - } - - /* If there are no flags, just flush pending commands to hardware: - */ - FLUSH_BATCH( fence ); -} - - - -void brw_init_flush_functions( struct brw_context *brw ) -{ - brw->pipe.flush = brw_flush; -} diff --git a/src/gallium/drivers/i965simple/brw_gs.c b/src/gallium/drivers/i965simple/brw_gs.c deleted file mode 100644 index de60868ccc..0000000000 --- a/src/gallium/drivers/i965simple/brw_gs.c +++ /dev/null @@ -1,196 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_util.h" -#include "brw_state.h" -#include "brw_gs.h" - - - -static void compile_gs_prog( struct brw_context *brw, - struct brw_gs_prog_key *key ) -{ - struct brw_gs_compile c; - const unsigned *program; - unsigned program_size; - - memset(&c, 0, sizeof(c)); - - c.key = *key; - - /* Need to locate the two positions present in vertex + header. - * These are currently hardcoded: - */ - c.nr_attrs = brw_count_bits(c.key.attrs); - c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ - c.nr_bytes = c.nr_regs * REG_SIZE; - - - /* Begin the compilation: - */ - brw_init_compile(&c.func); - - c.func.single_program_flow = 1; - - /* For some reason the thread is spawned with only 4 channels - * unmasked. - */ - brw_set_mask_control(&c.func, BRW_MASK_DISABLE); - - - /* Note that primitives which don't require a GS program have - * already been weeded out by this stage: - */ - switch (key->primitive) { - case PIPE_PRIM_QUADS: - brw_gs_quads( &c ); - break; - case PIPE_PRIM_QUAD_STRIP: - brw_gs_quad_strip( &c ); - break; - case PIPE_PRIM_LINE_LOOP: - brw_gs_lines( &c ); - break; - case PIPE_PRIM_LINES: - if (key->hint_gs_always) - brw_gs_lines( &c ); - else { - return; - } - break; - case PIPE_PRIM_TRIANGLES: - if (key->hint_gs_always) - brw_gs_tris( &c ); - else { - return; - } - break; - case PIPE_PRIM_POINTS: - if (key->hint_gs_always) - brw_gs_points( &c ); - else { - return; - } - break; - default: - return; - } - - /* get the program - */ - program = brw_get_program(&c.func, &program_size); - - /* Upload - */ - brw->gs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_GS_PROG], - &c.key, - sizeof(c.key), - program, - program_size, - &c.prog_data, - &brw->gs.prog_data ); -} - - -static boolean search_cache( struct brw_context *brw, - struct brw_gs_prog_key *key ) -{ - return brw_search_cache(&brw->cache[BRW_GS_PROG], - key, sizeof(*key), - &brw->gs.prog_data, - &brw->gs.prog_gs_offset); -} - - -static const int gs_prim[PIPE_PRIM_POLYGON+1] = { - PIPE_PRIM_POINTS, - PIPE_PRIM_LINES, - PIPE_PRIM_LINE_LOOP, - PIPE_PRIM_LINES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_QUADS, - PIPE_PRIM_QUAD_STRIP, - PIPE_PRIM_TRIANGLES -}; - -static void populate_key( struct brw_context *brw, - struct brw_gs_prog_key *key ) -{ - memset(key, 0, sizeof(*key)); - - /* CACHE_NEW_VS_PROG */ - key->attrs = brw->vs.prog_data->outputs_written; - - /* BRW_NEW_PRIMITIVE */ - key->primitive = gs_prim[brw->primitive]; - - key->hint_gs_always = 0; /* debug code? */ - - key->need_gs_prog = (key->hint_gs_always || - brw->primitive == PIPE_PRIM_QUADS || - brw->primitive == PIPE_PRIM_QUAD_STRIP || - brw->primitive == PIPE_PRIM_LINE_LOOP); -} - -/* Calculate interpolants for triangle and line rasterization. - */ -static void upload_gs_prog( struct brw_context *brw ) -{ - struct brw_gs_prog_key key; - - /* Populate the key: - */ - populate_key(brw, &key); - - if (brw->gs.prog_active != key.need_gs_prog) { - brw->state.dirty.cache |= CACHE_NEW_GS_PROG; - brw->gs.prog_active = key.need_gs_prog; - } - - if (brw->gs.prog_active) { - if (!search_cache(brw, &key)) - compile_gs_prog( brw, &key ); - } -} - - -const struct brw_tracked_state brw_gs_prog = { - .dirty = { - .brw = BRW_NEW_PRIMITIVE, - .cache = CACHE_NEW_VS_PROG - }, - .update = upload_gs_prog -}; diff --git a/src/gallium/drivers/i965simple/brw_gs.h b/src/gallium/drivers/i965simple/brw_gs.h deleted file mode 100644 index f09141c6aa..0000000000 --- a/src/gallium/drivers/i965simple/brw_gs.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#ifndef BRW_GS_H -#define BRW_GS_H - - -#include "brw_context.h" -#include "brw_eu.h" - -#define MAX_GS_VERTS (4) - -struct brw_gs_prog_key { - unsigned attrs:32; - unsigned primitive:4; - unsigned hint_gs_always:1; - unsigned need_gs_prog:1; - unsigned pad:26; -}; - -struct brw_gs_compile { - struct brw_compile func; - struct brw_gs_prog_key key; - struct brw_gs_prog_data prog_data; - - struct { - struct brw_reg R0; - struct brw_reg vertex[MAX_GS_VERTS]; - } reg; - - /* 3 different ways of expressing vertex size: - */ - unsigned nr_attrs; - unsigned nr_regs; - unsigned nr_bytes; -}; - -#define ATTR_SIZE (4*4) - -void brw_gs_quads( struct brw_gs_compile *c ); -void brw_gs_quad_strip( struct brw_gs_compile *c ); -void brw_gs_tris( struct brw_gs_compile *c ); -void brw_gs_lines( struct brw_gs_compile *c ); -void brw_gs_points( struct brw_gs_compile *c ); - -#endif diff --git a/src/gallium/drivers/i965simple/brw_gs_emit.c b/src/gallium/drivers/i965simple/brw_gs_emit.c deleted file mode 100644 index c3cc90b10f..0000000000 --- a/src/gallium/drivers/i965simple/brw_gs_emit.c +++ /dev/null @@ -1,148 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_util.h" -#include "brw_gs.h" - -static void brw_gs_alloc_regs( struct brw_gs_compile *c, - unsigned nr_verts ) -{ - unsigned i = 0,j; - - /* Register usage is static, precompute here: - */ - c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; - - /* Payload vertices plus space for more generated vertices: - */ - for (j = 0; j < nr_verts; j++) { - c->reg.vertex[j] = brw_vec4_grf(i, 0); - i += c->nr_regs; - } - - c->prog_data.urb_read_length = c->nr_regs; - c->prog_data.total_grf = i; -} - - -static void brw_gs_emit_vue(struct brw_gs_compile *c, - struct brw_reg vert, - boolean last, - unsigned header) -{ - struct brw_compile *p = &c->func; - boolean allocate = !last; - - /* Overwrite PrimType and PrimStart in the message header, for - * each vertex in turn: - */ - brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); - - /* Copy the vertex from vertn into m1..mN+1: - */ - brw_copy8(p, brw_message_reg(1), vert, c->nr_regs); - - /* Send each vertex as a seperate write to the urb. This is - * different to the concept in brw_sf_emit.c, where subsequent - * writes are used to build up a single urb entry. Each of these - * writes instantiates a seperate urb entry, and a new one must be - * allocated each time. - */ - brw_urb_WRITE(p, - allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), - 0, - c->reg.R0, - allocate, - 1, /* used */ - c->nr_regs + 1, /* msg length */ - allocate ? 1 : 0, /* response length */ - allocate ? 0 : 1, /* eot */ - 1, /* writes_complete */ - 0, /* urb offset */ - BRW_URB_SWIZZLE_NONE); -} - - - -void brw_gs_quads( struct brw_gs_compile *c ) -{ - brw_gs_alloc_regs(c, 4); - - /* Use polygons for correct edgeflag behaviour. Note that vertex 3 - * is the PV for quads, but vertex 0 for polygons: - */ - brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); - brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); - brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); - brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); -} - -void brw_gs_quad_strip( struct brw_gs_compile *c ) -{ - brw_gs_alloc_regs(c, 4); - - brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); - brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2)); - brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); - brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); -} - -void brw_gs_tris( struct brw_gs_compile *c ) -{ - brw_gs_alloc_regs(c, 3); - brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START)); - brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2)); - brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END)); -} - -void brw_gs_lines( struct brw_gs_compile *c ) -{ - brw_gs_alloc_regs(c, 2); - brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START)); - brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END)); -} - -void brw_gs_points( struct brw_gs_compile *c ) -{ - brw_gs_alloc_regs(c, 1); - brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END)); -} - - - - - - - - diff --git a/src/gallium/drivers/i965simple/brw_gs_state.c b/src/gallium/drivers/i965simple/brw_gs_state.c deleted file mode 100644 index 5b8016b2e9..0000000000 --- a/src/gallium/drivers/i965simple/brw_gs_state.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "util/u_math.h" -#include "util/u_memory.h" - - - -static void upload_gs_unit( struct brw_context *brw ) -{ - struct brw_gs_unit_state gs; - - memset(&gs, 0, sizeof(gs)); - - /* CACHE_NEW_GS_PROG */ - if (brw->gs.prog_active) { - gs.thread0.grf_reg_count = - align(brw->gs.prog_data->total_grf, 16) / 16 - 1; - gs.thread0.kernel_start_pointer = brw->gs.prog_gs_offset >> 6; - gs.thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length; - } - else { - gs.thread0.grf_reg_count = 0; - gs.thread0.kernel_start_pointer = 0; - gs.thread3.urb_entry_read_length = 1; - } - - /* BRW_NEW_URB_FENCE */ - gs.thread4.nr_urb_entries = brw->urb.nr_gs_entries; - gs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; - - gs.thread4.max_threads = 0; /* Hardware requirement */ - - if (BRW_DEBUG & DEBUG_STATS) - gs.thread4.stats_enable = 1; - - /* CONSTANT */ - gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - gs.thread1.single_program_flow = 1; - gs.thread3.dispatch_grf_start_reg = 1; - gs.thread3.const_urb_entry_read_offset = 0; - gs.thread3.const_urb_entry_read_length = 0; - gs.thread3.urb_entry_read_offset = 0; - - - brw->gs.state_gs_offset = brw_cache_data( &brw->cache[BRW_GS_UNIT], &gs ); -} - - -const struct brw_tracked_state brw_gs_unit = { - .dirty = { - .brw = (BRW_NEW_CURBE_OFFSETS | - BRW_NEW_URB_FENCE), - .cache = CACHE_NEW_GS_PROG - }, - .update = upload_gs_unit -}; diff --git a/src/gallium/drivers/i965simple/brw_misc_state.c b/src/gallium/drivers/i965simple/brw_misc_state.c deleted file mode 100644 index 99ff4403a5..0000000000 --- a/src/gallium/drivers/i965simple/brw_misc_state.c +++ /dev/null @@ -1,488 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "brw_batch.h" -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" - - - - - -/*********************************************************************** - * Blend color - */ - -static void upload_blend_constant_color(struct brw_context *brw) -{ - struct brw_blend_constant_color bcc; - - memset(&bcc, 0, sizeof(bcc)); - bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR; - bcc.header.length = sizeof(bcc)/4-2; - bcc.blend_constant_color[0] = brw->attribs.BlendColor.color[0]; - bcc.blend_constant_color[1] = brw->attribs.BlendColor.color[1]; - bcc.blend_constant_color[2] = brw->attribs.BlendColor.color[2]; - bcc.blend_constant_color[3] = brw->attribs.BlendColor.color[3]; - - BRW_CACHED_BATCH_STRUCT(brw, &bcc); -} - - -const struct brw_tracked_state brw_blend_constant_color = { - .dirty = { - .brw = BRW_NEW_BLEND, - .cache = 0 - }, - .update = upload_blend_constant_color -}; - - -/*********************************************************************** - * Drawing rectangle - */ -static void upload_drawing_rect(struct brw_context *brw) -{ - struct brw_drawrect bdr; - - memset(&bdr, 0, sizeof(bdr)); - bdr.header.opcode = CMD_DRAW_RECT; - bdr.header.length = sizeof(bdr)/4 - 2; - bdr.xmin = 0; - bdr.ymin = 0; - bdr.xmax = brw->attribs.FrameBuffer.cbufs[0]->width; - bdr.ymax = brw->attribs.FrameBuffer.cbufs[0]->height; - bdr.xorg = 0; - bdr.yorg = 0; - - /* Can't use BRW_CACHED_BATCH_STRUCT because this is also emitted - * uncached in brw_draw.c: - */ - BRW_BATCH_STRUCT(brw, &bdr); -} - -const struct brw_tracked_state brw_drawing_rect = { - .dirty = { - .brw = BRW_NEW_SCENE, - .cache = 0 - }, - .update = upload_drawing_rect -}; - -/** - * Upload the binding table pointers, which point each stage's array of surface - * state pointers. - * - * The binding table pointers are relative to the surface state base address, - * which is the BRW_SS_POOL cache buffer. - */ -static void upload_binding_table_pointers(struct brw_context *brw) -{ - struct brw_binding_table_pointers btp; - memset(&btp, 0, sizeof(btp)); - - btp.header.opcode = CMD_BINDING_TABLE_PTRS; - btp.header.length = sizeof(btp)/4 - 2; - btp.vs = 0; - btp.gs = 0; - btp.clp = 0; - btp.sf = 0; - btp.wm = brw->wm.bind_ss_offset; - - BRW_CACHED_BATCH_STRUCT(brw, &btp); -} - -const struct brw_tracked_state brw_binding_table_pointers = { - .dirty = { - .brw = 0, - .cache = CACHE_NEW_SURF_BIND - }, - .update = upload_binding_table_pointers, -}; - - -/** - * Upload pointers to the per-stage state. - * - * The state pointers in this packet are all relative to the general state - * base address set by CMD_STATE_BASE_ADDRESS, which is the BRW_GS_POOL buffer. - */ -static void upload_pipelined_state_pointers(struct brw_context *brw ) -{ - struct brw_pipelined_state_pointers psp; - memset(&psp, 0, sizeof(psp)); - - psp.header.opcode = CMD_PIPELINED_STATE_POINTERS; - psp.header.length = sizeof(psp)/4 - 2; - - psp.vs.offset = brw->vs.state_gs_offset >> 5; - psp.sf.offset = brw->sf.state_gs_offset >> 5; - psp.wm.offset = brw->wm.state_gs_offset >> 5; - psp.cc.offset = brw->cc.state_gs_offset >> 5; - - /* GS gets turned on and off regularly. Need to re-emit URB fence - * after this occurs. - */ - if (brw->gs.prog_active) { - psp.gs.offset = brw->gs.state_gs_offset >> 5; - psp.gs.enable = 1; - } - - if (0) { - psp.clp.offset = brw->clip.state_gs_offset >> 5; - psp.clp.enable = 1; - } - - - if (BRW_CACHED_BATCH_STRUCT(brw, &psp)) - brw->state.dirty.brw |= BRW_NEW_PSP; -} - -const struct brw_tracked_state brw_pipelined_state_pointers = { - .dirty = { - .brw = 0, - .cache = (CACHE_NEW_VS_UNIT | - CACHE_NEW_GS_UNIT | - CACHE_NEW_GS_PROG | - CACHE_NEW_CLIP_UNIT | - CACHE_NEW_SF_UNIT | - CACHE_NEW_WM_UNIT | - CACHE_NEW_CC_UNIT) - }, - .update = upload_pipelined_state_pointers -}; - -static void upload_psp_urb_cbs(struct brw_context *brw ) -{ - upload_pipelined_state_pointers(brw); - brw_upload_urb_fence(brw); - brw_upload_constant_buffer_state(brw); -} - - -const struct brw_tracked_state brw_psp_urb_cbs = { - .dirty = { - .brw = BRW_NEW_URB_FENCE, - .cache = (CACHE_NEW_VS_UNIT | - CACHE_NEW_GS_UNIT | - CACHE_NEW_GS_PROG | - CACHE_NEW_CLIP_UNIT | - CACHE_NEW_SF_UNIT | - CACHE_NEW_WM_UNIT | - CACHE_NEW_CC_UNIT) - }, - .update = upload_psp_urb_cbs -}; - -/** - * Upload the depthbuffer offset and format. - * - * We have to do this per state validation as we need to emit the relocation - * in the batch buffer. - */ -static void upload_depthbuffer(struct brw_context *brw) -{ - struct pipe_surface *depth_surface = brw->attribs.FrameBuffer.zsbuf; - - BEGIN_BATCH(5, INTEL_BATCH_NO_CLIPRECTS); - OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (5 - 2)); - if (depth_surface == NULL) { - OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | - (BRW_SURFACE_NULL << 29)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - } else { - unsigned int format; - struct brw_texture *tex = (struct brw_texture *)depth_surface->texture; - assert(depth_surface->block.width == 1); - assert(depth_surface->block.height == 1); - switch (depth_surface->block.size) { - case 2: - format = BRW_DEPTHFORMAT_D16_UNORM; - break; - case 4: - if (depth_surface->format == PIPE_FORMAT_Z32_FLOAT) - format = BRW_DEPTHFORMAT_D32_FLOAT; - else - format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; - break; - default: - assert(0); - return; - } - - OUT_BATCH((depth_surface->stride - 1) | - (format << 18) | - (BRW_TILEWALK_YMAJOR << 26) | -// (depth_surface->region->tiled << 27) | - (BRW_SURFACE_2D << 29)); - OUT_RELOC(tex->buffer, - PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE, 0); - OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | - ((depth_surface->stride/depth_surface->block.size - 1) << 6) | - ((depth_surface->height - 1) << 19)); - OUT_BATCH(0); - } - ADVANCE_BATCH(); -} - -const struct brw_tracked_state brw_depthbuffer = { - .dirty = { - .brw = BRW_NEW_SCENE, - .cache = 0 - }, - .update = upload_depthbuffer, -}; - - - - -/*********************************************************************** - * Polygon stipple packet - */ - -static void upload_polygon_stipple(struct brw_context *brw) -{ - struct brw_polygon_stipple bps; - unsigned i; - - memset(&bps, 0, sizeof(bps)); - bps.header.opcode = CMD_POLY_STIPPLE_PATTERN; - bps.header.length = sizeof(bps)/4-2; - - /* XXX: state tracker should send *all* state down initially! - */ - if (brw->attribs.PolygonStipple) - for (i = 0; i < 32; i++) - bps.stipple[i] = brw->attribs.PolygonStipple->stipple[31 - i]; /* invert */ - - BRW_CACHED_BATCH_STRUCT(brw, &bps); -} - -const struct brw_tracked_state brw_polygon_stipple = { - .dirty = { - .brw = BRW_NEW_STIPPLE, - .cache = 0 - }, - .update = upload_polygon_stipple -}; - - -/*********************************************************************** - * Line stipple packet - */ - -static void upload_line_stipple(struct brw_context *brw) -{ - struct brw_line_stipple bls; - float tmp; - int tmpi; - - memset(&bls, 0, sizeof(bls)); - bls.header.opcode = CMD_LINE_STIPPLE_PATTERN; - bls.header.length = sizeof(bls)/4 - 2; - - bls.bits0.pattern = brw->attribs.Raster->line_stipple_pattern; - bls.bits1.repeat_count = brw->attribs.Raster->line_stipple_factor; - - tmp = 1.0 / (float) brw->attribs.Raster->line_stipple_factor; - tmpi = tmp * (1<<13); - - - bls.bits1.inverse_repeat_count = tmpi; - - BRW_CACHED_BATCH_STRUCT(brw, &bls); -} - -const struct brw_tracked_state brw_line_stipple = { - .dirty = { - .brw = BRW_NEW_STIPPLE, - .cache = 0 - }, - .update = upload_line_stipple -}; - - -/*********************************************************************** - * Misc constant state packets - */ - -static void upload_pipe_control(struct brw_context *brw) -{ - struct brw_pipe_control pc; - - return; - - memset(&pc, 0, sizeof(pc)); - - pc.header.opcode = CMD_PIPE_CONTROL; - pc.header.length = sizeof(pc)/4 - 2; - pc.header.post_sync_operation = PIPE_CONTROL_NOWRITE; - - pc.header.instruction_state_cache_flush_enable = 1; - - pc.bits1.dest_addr_type = PIPE_CONTROL_GTTWRITE_GLOBAL; - - BRW_BATCH_STRUCT(brw, &pc); -} - -const struct brw_tracked_state brw_pipe_control = { - .dirty = { - .brw = BRW_NEW_SCENE, - .cache = 0 - }, - .update = upload_pipe_control -}; - - -/*********************************************************************** - * Misc invarient state packets - */ - -static void upload_invarient_state( struct brw_context *brw ) -{ - { - struct brw_mi_flush flush; - - memset(&flush, 0, sizeof(flush)); - flush.opcode = CMD_MI_FLUSH; - flush.flags = BRW_FLUSH_STATE_CACHE | BRW_FLUSH_READ_CACHE; - BRW_BATCH_STRUCT(brw, &flush); - } - - { - /* 0x61040000 Pipeline Select */ - /* PipelineSelect : 0 */ - struct brw_pipeline_select ps; - - memset(&ps, 0, sizeof(ps)); - ps.header.opcode = CMD_PIPELINE_SELECT; - ps.header.pipeline_select = 0; - BRW_BATCH_STRUCT(brw, &ps); - } - - { - struct brw_global_depth_offset_clamp gdo; - memset(&gdo, 0, sizeof(gdo)); - - /* Disable depth offset clamping. - */ - gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP; - gdo.header.length = sizeof(gdo)/4 - 2; - gdo.depth_offset_clamp = 0.0; - - BRW_BATCH_STRUCT(brw, &gdo); - } - - - /* 0x61020000 State Instruction Pointer */ - { - struct brw_system_instruction_pointer sip; - memset(&sip, 0, sizeof(sip)); - - sip.header.opcode = CMD_STATE_INSN_POINTER; - sip.header.length = 0; - sip.bits0.pad = 0; - sip.bits0.system_instruction_pointer = 0; - BRW_BATCH_STRUCT(brw, &sip); - } - - - { - struct brw_vf_statistics vfs; - memset(&vfs, 0, sizeof(vfs)); - - vfs.opcode = CMD_VF_STATISTICS; - if (BRW_DEBUG & DEBUG_STATS) - vfs.statistics_enable = 1; - - BRW_BATCH_STRUCT(brw, &vfs); - } - - - { - struct brw_polygon_stipple_offset bpso; - - memset(&bpso, 0, sizeof(bpso)); - bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; - bpso.header.length = sizeof(bpso)/4-2; - bpso.bits0.x_offset = 0; - bpso.bits0.y_offset = 0; - - BRW_BATCH_STRUCT(brw, &bpso); - } -} - -const struct brw_tracked_state brw_invarient_state = { - .dirty = { - .brw = BRW_NEW_SCENE, - .cache = 0 - }, - .update = upload_invarient_state -}; - -/** - * Define the base addresses which some state is referenced from. - * - * This allows us to avoid having to emit relocations in many places for - * cached state, and instead emit pointers inside of large, mostly-static - * state pools. This comes at the expense of memory, and more expensive cache - * misses. - */ -static void upload_state_base_address( struct brw_context *brw ) -{ - /* Output the structure (brw_state_base_address) directly to the - * batchbuffer, so we can emit relocations inline. - */ - BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); - OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); - OUT_RELOC(brw->pool[BRW_GS_POOL].buffer, - PIPE_BUFFER_USAGE_GPU_READ, - 1); /* General state base address */ - OUT_RELOC(brw->pool[BRW_SS_POOL].buffer, - PIPE_BUFFER_USAGE_GPU_READ, - 1); /* Surface state base address */ - OUT_BATCH(1); /* Indirect object base address */ - OUT_BATCH(1); /* General state upper bound */ - OUT_BATCH(1); /* Indirect object upper bound */ - ADVANCE_BATCH(); -} - - -const struct brw_tracked_state brw_state_base_address = { - .dirty = { - .brw = BRW_NEW_SCENE, - .cache = 0 - }, - .update = upload_state_base_address -}; diff --git a/src/gallium/drivers/i965simple/brw_reg.h b/src/gallium/drivers/i965simple/brw_reg.h deleted file mode 100644 index 9e885c3b3b..0000000000 --- a/src/gallium/drivers/i965simple/brw_reg.h +++ /dev/null @@ -1,76 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#define CMD_MI (0x0 << 29) -#define CMD_2D (0x2 << 29) -#define CMD_3D (0x3 << 29) - -#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23) - -/* Stalls command execution waiting for the given events to have occurred. */ -#define MI_WAIT_FOR_EVENT (CMD_MI | (0x3 << 23)) -#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) -#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) - -/* Primitive dispatch on 830-945 */ -#define _3DPRIMITIVE (CMD_3D | (0x1f << 24)) -#define PRIM_INDIRECT (1<<23) -#define PRIM_INLINE (0<<23) -#define PRIM_INDIRECT_SEQUENTIAL (0<<17) -#define PRIM_INDIRECT_ELTS (1<<17) - -#define PRIM3D_TRILIST (0x0<<18) -#define PRIM3D_TRISTRIP (0x1<<18) -#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) -#define PRIM3D_TRIFAN (0x3<<18) -#define PRIM3D_POLY (0x4<<18) -#define PRIM3D_LINELIST (0x5<<18) -#define PRIM3D_LINESTRIP (0x6<<18) -#define PRIM3D_RECTLIST (0x7<<18) -#define PRIM3D_POINTLIST (0x8<<18) -#define PRIM3D_DIB (0x9<<18) -#define PRIM3D_MASK (0x1f<<18) - -#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22) | 6) - -#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 4) - -#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22) | 6) - -/* BR00 */ -#define XY_BLT_WRITE_ALPHA (1 << 21) -#define XY_BLT_WRITE_RGB (1 << 20) -#define XY_SRC_TILED (1 << 15) -#define XY_DST_TILED (1 << 11) - -/* BR13 */ -#define BR13_565 (0x1 << 24) -#define BR13_8888 (0x3 << 24) - -#define FENCE_LINEAR 0 -#define FENCE_XMAJOR 1 -#define FENCE_YMAJOR 2 diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c deleted file mode 100644 index 4a84c4db23..0000000000 --- a/src/gallium/drivers/i965simple/brw_screen.c +++ /dev/null @@ -1,244 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "util/u_memory.h" -#include "pipe/internal/p_winsys_screen.h" -#include "util/u_string.h" -#include "util/u_simple_screen.h" - -#include "brw_context.h" -#include "brw_screen.h" -#include "brw_tex_layout.h" - - -static const char * -brw_get_vendor( struct pipe_screen *screen ) -{ - return "VMware, Inc."; -} - - -static const char * -brw_get_name( struct pipe_screen *screen ) -{ - static char buffer[128]; - const char *chipset; - - switch (brw_screen(screen)->pci_id) { - case PCI_CHIP_I965_Q: - chipset = "Intel(R) 965Q"; - break; - case PCI_CHIP_I965_G: - case PCI_CHIP_I965_G_1: - chipset = "Intel(R) 965G"; - break; - case PCI_CHIP_I965_GM: - chipset = "Intel(R) 965GM"; - break; - case PCI_CHIP_I965_GME: - chipset = "Intel(R) 965GME/GLE"; - break; - default: - chipset = "unknown"; - break; - } - - util_snprintf(buffer, sizeof(buffer), "i965 (chipset: %s)", chipset); - return buffer; -} - - -static int -brw_get_param(struct pipe_screen *screen, int param) -{ - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 8; - case PIPE_CAP_NPOT_TEXTURES: - return 1; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 0; - case PIPE_CAP_POINT_SPRITE: - return 0; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 1; - case PIPE_CAP_OCCLUSION_QUERY: - return 0; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 11; /* max 1024x1024 */ - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 8; /* max 128x128x128 */ - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 11; /* max 1024x1024 */ - default: - return 0; - } -} - - -static float -brw_get_paramf(struct pipe_screen *screen, int param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - /* fall-through */ - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 7.5; - - case PIPE_CAP_MAX_POINT_WIDTH: - /* fall-through */ - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 255.0; - - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 4.0; - - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0; - - default: - return 0; - } -} - - -static boolean -brw_is_format_supported( struct pipe_screen *screen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned tex_usage, - unsigned geom_flags ) -{ -#if 0 - /* XXX: This is broken -- rewrite if still needed. */ - static const unsigned tex_supported[] = { - PIPE_FORMAT_R8G8B8A8_UNORM, - PIPE_FORMAT_A8R8G8B8_UNORM, - PIPE_FORMAT_R5G6B5_UNORM, - PIPE_FORMAT_L8_UNORM, - PIPE_FORMAT_A8_UNORM, - PIPE_FORMAT_I8_UNORM, - PIPE_FORMAT_L8A8_UNORM, - PIPE_FORMAT_YCBCR, - PIPE_FORMAT_YCBCR_REV, - PIPE_FORMAT_S8_Z24, - }; - - - /* Actually a lot more than this - add later: - */ - static const unsigned render_supported[] = { - PIPE_FORMAT_A8R8G8B8_UNORM, - PIPE_FORMAT_R5G6B5_UNORM, - }; - - /* - */ - static const unsigned z_stencil_supported[] = { - PIPE_FORMAT_Z16_UNORM, - PIPE_FORMAT_Z32_UNORM, - PIPE_FORMAT_S8Z24_UNORM, - }; - - switch (type) { - case PIPE_RENDER_FORMAT: - *numFormats = Elements(render_supported); - return render_supported; - - case PIPE_TEX_FORMAT: - *numFormats = Elements(tex_supported); - return render_supported; - - case PIPE_Z_STENCIL_FORMAT: - *numFormats = Elements(render_supported); - return render_supported; - - default: - *numFormats = 0; - return NULL; - } -#else - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_S8Z24_UNORM: - return TRUE; - default: - return FALSE; - }; - return FALSE; -#endif -} - - -static void -brw_destroy_screen( struct pipe_screen *screen ) -{ - struct pipe_winsys *winsys = screen->winsys; - - if(winsys->destroy) - winsys->destroy(winsys); - - FREE(screen); -} - - -/** - * Create a new brw_screen object - */ -struct pipe_screen * -brw_create_screen(struct pipe_winsys *winsys, uint pci_id) -{ - struct brw_screen *brwscreen = CALLOC_STRUCT(brw_screen); - - if (!brwscreen) - return NULL; - - brwscreen->pci_id = pci_id; - - brwscreen->screen.winsys = winsys; - - brwscreen->screen.destroy = brw_destroy_screen; - - brwscreen->screen.get_name = brw_get_name; - brwscreen->screen.get_vendor = brw_get_vendor; - brwscreen->screen.get_param = brw_get_param; - brwscreen->screen.get_paramf = brw_get_paramf; - brwscreen->screen.is_format_supported = brw_is_format_supported; - - brw_init_screen_texture_funcs(&brwscreen->screen); - u_simple_screen_init(&brwscreen->screen); - - return &brwscreen->screen; -} diff --git a/src/gallium/drivers/i965simple/brw_sf.c b/src/gallium/drivers/i965simple/brw_sf.c deleted file mode 100644 index b82a2e143b..0000000000 --- a/src/gallium/drivers/i965simple/brw_sf.c +++ /dev/null @@ -1,351 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_util.h" -#include "brw_sf.h" -#include "brw_state.h" -#include "tgsi/tgsi_parse.h" - - -static void compile_sf_prog( struct brw_context *brw, - struct brw_sf_prog_key *key ) -{ - struct brw_sf_compile c; - const unsigned *program; - unsigned program_size; - - memset(&c, 0, sizeof(c)); - - /* Begin the compilation: - */ - brw_init_compile(&c.func); - - c.key = *key; - - - c.nr_attrs = c.key.vp_output_count; - c.nr_attr_regs = (c.nr_attrs+1)/2; - - c.nr_setup_attrs = c.key.fp_input_count + 1; /* +1 for position */ - c.nr_setup_regs = (c.nr_setup_attrs+1)/2; - - c.prog_data.urb_read_length = c.nr_attr_regs; - c.prog_data.urb_entry_size = c.nr_setup_regs * 2; - - - /* Which primitive? Or all three? - */ - switch (key->primitive) { - case SF_TRIANGLES: - c.nr_verts = 3; - brw_emit_tri_setup( &c ); - break; - case SF_LINES: - c.nr_verts = 2; - brw_emit_line_setup( &c ); - break; - case SF_POINTS: - c.nr_verts = 1; - brw_emit_point_setup( &c ); - break; - - case SF_UNFILLED_TRIS: - default: - assert(0); - return; - } - - - - /* get the program - */ - program = brw_get_program(&c.func, &program_size); - - /* Upload - */ - brw->sf.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_SF_PROG], - &c.key, - sizeof(c.key), - program, - program_size, - &c.prog_data, - &brw->sf.prog_data ); -} - - -static boolean search_cache( struct brw_context *brw, - struct brw_sf_prog_key *key ) -{ - return brw_search_cache(&brw->cache[BRW_SF_PROG], - key, sizeof(*key), - &brw->sf.prog_data, - &brw->sf.prog_gs_offset); -} - - -/* Calculate interpolants for triangle and line rasterization. - */ -static void upload_sf_prog( struct brw_context *brw ) -{ - const struct brw_fragment_program *fs = brw->attribs.FragmentProgram; - struct brw_sf_prog_key key; - struct tgsi_parse_context parse; - int i, done = 0; - - - memset(&key, 0, sizeof(key)); - - /* Populate the key, noting state dependencies: - */ - /* CACHE_NEW_VS_PROG */ - key.vp_output_count = brw->vs.prog_data->outputs_written; - - /* BRW_NEW_FS */ - key.fp_input_count = brw->attribs.FragmentProgram->info.file_max[TGSI_FILE_INPUT] + 1; - - - /* BRW_NEW_REDUCED_PRIMITIVE */ - switch (brw->reduced_primitive) { - case PIPE_PRIM_TRIANGLES: -// if (key.attrs & (1<<VERT_RESULT_EDGE)) -// key.primitive = SF_UNFILLED_TRIS; -// else - key.primitive = SF_TRIANGLES; - break; - case PIPE_PRIM_LINES: - key.primitive = SF_LINES; - break; - case PIPE_PRIM_POINTS: - key.primitive = SF_POINTS; - break; - } - - - - /* Scan fp inputs to figure out what interpolation modes are - * required for each incoming vp output. There is an assumption - * that the state tracker makes sure there is a 1:1 linkage between - * these sets of attributes (XXX: position??) - */ - tgsi_parse_init( &parse, fs->program.tokens ); - while( !done && - !tgsi_parse_end_of_tokens( &parse ) ) - { - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT) - { - int first = parse.FullToken.FullDeclaration.DeclarationRange.First; - int last = parse.FullToken.FullDeclaration.DeclarationRange.Last; - int interp_mode = parse.FullToken.FullDeclaration.Declaration.Interpolate; - //int semantic = parse.FullToken.FullDeclaration.Semantic.SemanticName; - //int semantic_index = parse.FullToken.FullDeclaration.Semantic.SemanticIndex; - - debug_printf("fs input %d..%d interp mode %d\n", first, last, interp_mode); - - switch (interp_mode) { - case TGSI_INTERPOLATE_CONSTANT: - for (i = first; i <= last; i++) - key.const_mask |= (1 << i); - break; - case TGSI_INTERPOLATE_LINEAR: - for (i = first; i <= last; i++) - key.linear_mask |= (1 << i); - break; - case TGSI_INTERPOLATE_PERSPECTIVE: - for (i = first; i <= last; i++) - key.persp_mask |= (1 << i); - break; - default: - break; - } - - /* Also need stuff for flat shading, twosided color. - */ - - } - break; - default: - done = 1; - break; - } - } - - /* Hack: Adjust for position. Optimize away when not required (ie - * for perspective interpolation). - */ - key.persp_mask <<= 1; - key.linear_mask <<= 1; - key.linear_mask |= 1; - key.const_mask <<= 1; - - debug_printf("key.persp_mask: %x\n", key.persp_mask); - debug_printf("key.linear_mask: %x\n", key.linear_mask); - debug_printf("key.const_mask: %x\n", key.const_mask); - - -// key.do_point_sprite = brw->attribs.Point->PointSprite; -// key.SpriteOrigin = brw->attribs.Point->SpriteOrigin; - -// key.do_flat_shading = (brw->attribs.Raster->flatshade); -// key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide); - -// if (key.do_twoside_color) -// key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW); - - - if (!search_cache(brw, &key)) - compile_sf_prog( brw, &key ); -} - - -const struct brw_tracked_state brw_sf_prog = { - .dirty = { - .brw = (BRW_NEW_RASTERIZER | - BRW_NEW_REDUCED_PRIMITIVE | - BRW_NEW_VS | - BRW_NEW_FS), - .cache = 0, - }, - .update = upload_sf_prog -}; - - - -#if 0 -/* Build a struct like the one we'd like the state tracker to pass to - * us. - */ -static void update_sf_linkage( struct brw_context *brw ) -{ - const struct brw_vertex_program *vs = brw->attribs.VertexProgram; - const struct brw_fragment_program *fs = brw->attribs.FragmentProgram; - struct pipe_setup_linkage state; - struct tgsi_parse_context parse; - - int i, j; - int nr_vp_outputs = 0; - int done = 0; - - struct { - unsigned semantic:8; - unsigned semantic_index:16; - } fp_semantic[32], vp_semantic[32]; - - memset(&state, 0, sizeof(state)); - - state.fp_input_count = 0; - - - - - - - assert(state.fp_input_count == fs->program.num_inputs); - - - /* Then scan vp outputs - */ - done = 0; - tgsi_parse_init( &parse, vs->program.tokens ); - while( !done && - !tgsi_parse_end_of_tokens( &parse ) ) - { - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT) - { - int first = parse.FullToken.FullDeclaration.DeclarationRange.First; - int last = parse.FullToken.FullDeclaration.DeclarationRange.Last; - - for (i = first; i < last; i++) { - vp_semantic[i].semantic = - parse.FullToken.FullDeclaration.Semantic.SemanticName; - vp_semantic[i].semantic_index = - parse.FullToken.FullDeclaration.Semantic.SemanticIndex; - } - - assert(last > nr_vp_outputs); - nr_vp_outputs = last; - } - break; - default: - done = 1; - break; - } - } - - - /* Now match based on semantic information. - */ - for (i = 0; i< state.fp_input_count; i++) { - for (j = 0; j < nr_vp_outputs; j++) { - if (fp_semantic[i].semantic == vp_semantic[j].semantic && - fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) { - state.fp_input[i].vp_output = j; - } - } - if (fp_semantic[i].semantic == TGSI_SEMANTIC_COLOR) { - for (j = 0; j < nr_vp_outputs; j++) { - if (TGSI_SEMANTIC_BCOLOR == vp_semantic[j].semantic && - fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) { - state.fp_input[i].bf_vp_output = j; - } - } - } - } - - if (memcmp(&brw->sf.linkage, &state, sizeof(state)) != 0) { - brw->sf.linkage = state; - brw->state.dirty.brw |= BRW_NEW_SF_LINKAGE; - } -} - - -const struct brw_tracked_state brw_sf_linkage = { - .dirty = { - .brw = (BRW_NEW_VS | - BRW_NEW_FS), - .cache = 0, - }, - .update = update_sf_linkage -}; - - -#endif diff --git a/src/gallium/drivers/i965simple/brw_sf.h b/src/gallium/drivers/i965simple/brw_sf.h deleted file mode 100644 index b7ada47560..0000000000 --- a/src/gallium/drivers/i965simple/brw_sf.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#ifndef BRW_SF_H -#define BRW_SF_H - -#include "brw_context.h" -#include "brw_eu.h" - - -#define SF_POINTS 0 -#define SF_LINES 1 -#define SF_TRIANGLES 2 -#define SF_UNFILLED_TRIS 3 - - - -struct brw_sf_prog_key { - unsigned vp_output_count:5; - unsigned fp_input_count:5; - - unsigned primitive:2; - unsigned do_twoside_color:1; - unsigned do_flat_shading:1; - unsigned frontface_ccw:1; - unsigned do_point_sprite:1; - - /* Interpolation masks; - */ - unsigned linear_mask; - unsigned persp_mask; - unsigned const_mask; - - -// int SpriteOrigin; -}; - -struct brw_sf_point_tex { - boolean CoordReplace; -}; - -struct brw_sf_compile { - struct brw_compile func; - struct brw_sf_prog_key key; - struct brw_sf_prog_data prog_data; - - struct brw_reg pv; - struct brw_reg det; - struct brw_reg dx0; - struct brw_reg dx2; - struct brw_reg dy0; - struct brw_reg dy2; - - /* z and 1/w passed in seperately: - */ - struct brw_reg z[3]; - struct brw_reg inv_w[3]; - - /* The vertices: - */ - struct brw_reg vert[3]; - - /* Temporaries, allocated after last vertex reg. - */ - struct brw_reg inv_det; - struct brw_reg a1_sub_a0; - struct brw_reg a2_sub_a0; - struct brw_reg tmp; - - struct brw_reg m1Cx; - struct brw_reg m2Cy; - struct brw_reg m3C0; - - unsigned nr_verts; - unsigned nr_attrs; - unsigned nr_attr_regs; - unsigned nr_setup_attrs; - unsigned nr_setup_regs; -#if 0 - ubyte attr_to_idx[VERT_RESULT_MAX]; - ubyte idx_to_attr[VERT_RESULT_MAX]; - struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX]; -#endif -}; - - -void brw_emit_tri_setup( struct brw_sf_compile *c ); -void brw_emit_line_setup( struct brw_sf_compile *c ); -void brw_emit_point_setup( struct brw_sf_compile *c ); -void brw_emit_point_sprite_setup( struct brw_sf_compile *c ); -void brw_emit_anyprim_setup( struct brw_sf_compile *c ); - -#endif diff --git a/src/gallium/drivers/i965simple/brw_sf_emit.c b/src/gallium/drivers/i965simple/brw_sf_emit.c deleted file mode 100644 index 78d6fa5e9e..0000000000 --- a/src/gallium/drivers/i965simple/brw_sf_emit.c +++ /dev/null @@ -1,382 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "brw_defines.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_util.h" -#include "brw_sf.h" - - - -/*********************************************************************** - * Triangle setup. - */ - - -static void alloc_regs( struct brw_sf_compile *c ) -{ - unsigned reg, i; - - /* Values computed by fixed function unit: - */ - c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_UD); - c->det = brw_vec1_grf(1, 2); - c->dx0 = brw_vec1_grf(1, 3); - c->dx2 = brw_vec1_grf(1, 4); - c->dy0 = brw_vec1_grf(1, 5); - c->dy2 = brw_vec1_grf(1, 6); - - /* z and 1/w passed in seperately: - */ - c->z[0] = brw_vec1_grf(2, 0); - c->inv_w[0] = brw_vec1_grf(2, 1); - c->z[1] = brw_vec1_grf(2, 2); - c->inv_w[1] = brw_vec1_grf(2, 3); - c->z[2] = brw_vec1_grf(2, 4); - c->inv_w[2] = brw_vec1_grf(2, 5); - - /* The vertices: - */ - reg = 3; - for (i = 0; i < c->nr_verts; i++) { - c->vert[i] = brw_vec8_grf(reg, 0); - reg += c->nr_attr_regs; - } - - /* Temporaries, allocated after last vertex reg. - */ - c->inv_det = brw_vec1_grf(reg, 0); reg++; - c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++; - c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++; - c->tmp = brw_vec8_grf(reg, 0); reg++; - - /* Note grf allocation: - */ - c->prog_data.total_grf = reg; - - - /* Outputs of this program - interpolation coefficients for - * rasterization: - */ - c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0); - c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0); - c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0); -} - - -static void copy_z_inv_w( struct brw_sf_compile *c ) -{ - struct brw_compile *p = &c->func; - unsigned i; - - brw_push_insn_state(p); - - /* Copy both scalars with a single MOV: - */ - for (i = 0; i < c->nr_verts; i++) - brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i])); - - brw_pop_insn_state(p); -} - - -static void invert_det( struct brw_sf_compile *c) -{ - brw_math(&c->func, - c->inv_det, - BRW_MATH_FUNCTION_INV, - BRW_MATH_SATURATE_NONE, - 0, - c->det, - BRW_MATH_DATA_SCALAR, - BRW_MATH_PRECISION_FULL); - -} - -#define NON_PERPECTIVE_ATTRS (FRAG_BIT_WPOS | \ - FRAG_BIT_COL0 | \ - FRAG_BIT_COL1) - -static boolean calculate_masks( struct brw_sf_compile *c, - unsigned reg, - ushort *pc, - ushort *pc_persp, - ushort *pc_linear) -{ - boolean is_last_attr = (reg == c->nr_setup_regs - 1); - unsigned persp_mask = c->key.persp_mask; - unsigned linear_mask = c->key.linear_mask; - - debug_printf("persp_mask: %x\n", persp_mask); - debug_printf("linear_mask: %x\n", linear_mask); - - *pc_persp = 0; - *pc_linear = 0; - *pc = 0xf; - - if (persp_mask & (1 << (reg*2))) - *pc_persp = 0xf; - - if (linear_mask & (1 << (reg*2))) - *pc_linear = 0xf; - - /* Maybe only processs one attribute on the final round: - */ - if (reg*2+1 < c->nr_setup_attrs) { - *pc |= 0xf0; - - if (persp_mask & (1 << (reg*2+1))) - *pc_persp |= 0xf0; - - if (linear_mask & (1 << (reg*2+1))) - *pc_linear |= 0xf0; - } - - debug_printf("pc: %x\n", *pc); - debug_printf("pc_persp: %x\n", *pc_persp); - debug_printf("pc_linear: %x\n", *pc_linear); - - - return is_last_attr; -} - - - -void brw_emit_tri_setup( struct brw_sf_compile *c ) -{ - struct brw_compile *p = &c->func; - unsigned i; - - debug_printf("%s START ==============\n", __FUNCTION__); - - c->nr_verts = 3; - alloc_regs(c); - invert_det(c); - copy_z_inv_w(c); - - - for (i = 0; i < c->nr_setup_regs; i++) - { - /* Pair of incoming attributes: - */ - struct brw_reg a0 = offset(c->vert[0], i); - struct brw_reg a1 = offset(c->vert[1], i); - struct brw_reg a2 = offset(c->vert[2], i); - ushort pc = 0, pc_persp = 0, pc_linear = 0; - boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); - - if (pc_persp) - { - brw_set_predicate_control_flag_value(p, pc_persp); - brw_MUL(p, a0, a0, c->inv_w[0]); - brw_MUL(p, a1, a1, c->inv_w[1]); - brw_MUL(p, a2, a2, c->inv_w[2]); - } - - - /* Calculate coefficients for interpolated values: - */ - if (pc_linear) - { - brw_set_predicate_control_flag_value(p, pc_linear); - - brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); - brw_ADD(p, c->a2_sub_a0, a2, negate(a0)); - - /* calculate dA/dx - */ - brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2); - brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0)); - brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); - - /* calculate dA/dy - */ - brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0); - brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2)); - brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); - } - - { - brw_set_predicate_control_flag_value(p, pc); - /* start point for interpolation - */ - brw_MOV(p, c->m3C0, a0); - - /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in - * the send instruction: - */ - brw_urb_WRITE(p, - brw_null_reg(), - 0, - brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ - 0, /* allocate */ - 1, /* used */ - 4, /* msg len */ - 0, /* response len */ - last, /* eot */ - last, /* writes complete */ - i*4, /* offset */ - BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */ - } - } - - debug_printf("%s DONE ==============\n", __FUNCTION__); - -} - - - -void brw_emit_line_setup( struct brw_sf_compile *c ) -{ - struct brw_compile *p = &c->func; - unsigned i; - - - c->nr_verts = 2; - alloc_regs(c); - invert_det(c); - copy_z_inv_w(c); - - for (i = 0; i < c->nr_setup_regs; i++) - { - /* Pair of incoming attributes: - */ - struct brw_reg a0 = offset(c->vert[0], i); - struct brw_reg a1 = offset(c->vert[1], i); - ushort pc, pc_persp, pc_linear; - boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); - - if (pc_persp) - { - brw_set_predicate_control_flag_value(p, pc_persp); - brw_MUL(p, a0, a0, c->inv_w[0]); - brw_MUL(p, a1, a1, c->inv_w[1]); - } - - /* Calculate coefficients for position, color: - */ - if (pc_linear) { - brw_set_predicate_control_flag_value(p, pc_linear); - - brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); - - brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0); - brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); - - brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0); - brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); - } - - { - brw_set_predicate_control_flag_value(p, pc); - - /* start point for interpolation - */ - brw_MOV(p, c->m3C0, a0); - - /* Copy m0..m3 to URB. - */ - brw_urb_WRITE(p, - brw_null_reg(), - 0, - brw_vec8_grf(0, 0), - 0, /* allocate */ - 1, /* used */ - 4, /* msg len */ - 0, /* response len */ - last, /* eot */ - last, /* writes complete */ - i*4, /* urb destination offset */ - BRW_URB_SWIZZLE_TRANSPOSE); - } - } -} - - -/* Points setup - several simplifications as all attributes are - * constant across the face of the point (point sprites excluded!) - */ -void brw_emit_point_setup( struct brw_sf_compile *c ) -{ - struct brw_compile *p = &c->func; - unsigned i; - - c->nr_verts = 1; - alloc_regs(c); - copy_z_inv_w(c); - - brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */ - brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */ - - for (i = 0; i < c->nr_setup_regs; i++) - { - struct brw_reg a0 = offset(c->vert[0], i); - ushort pc, pc_persp, pc_linear; - boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); - - if (pc_persp) - { - /* This seems odd as the values are all constant, but the - * fragment shader will be expecting it: - */ - brw_set_predicate_control_flag_value(p, pc_persp); - brw_MUL(p, a0, a0, c->inv_w[0]); - } - - - /* The delta values are always zero, just send the starting - * coordinate. Again, this is to fit in with the interpolation - * code in the fragment shader. - */ - { - brw_set_predicate_control_flag_value(p, pc); - - brw_MOV(p, c->m3C0, a0); /* constant value */ - - /* Copy m0..m3 to URB. - */ - brw_urb_WRITE(p, - brw_null_reg(), - 0, - brw_vec8_grf(0, 0), - 0, /* allocate */ - 1, /* used */ - 4, /* msg len */ - 0, /* response len */ - last, /* eot */ - last, /* writes complete */ - i*4, /* urb destination offset */ - BRW_URB_SWIZZLE_TRANSPOSE); - } - } -} diff --git a/src/gallium/drivers/i965simple/brw_sf_state.c b/src/gallium/drivers/i965simple/brw_sf_state.c deleted file mode 100644 index 2a5de61c21..0000000000 --- a/src/gallium/drivers/i965simple/brw_sf_state.c +++ /dev/null @@ -1,181 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "util/u_math.h" -#include "util/u_memory.h" - - -static void upload_sf_vp(struct brw_context *brw) -{ - struct brw_sf_viewport sfv; - - memset(&sfv, 0, sizeof(sfv)); - - - /* BRW_NEW_VIEWPORT */ - { - const float *scale = brw->attribs.Viewport.scale; - const float *trans = brw->attribs.Viewport.translate; - - sfv.viewport.m00 = scale[0]; - sfv.viewport.m11 = scale[1]; - sfv.viewport.m22 = scale[2]; - sfv.viewport.m30 = trans[0]; - sfv.viewport.m31 = trans[1]; - sfv.viewport.m32 = trans[2]; - } - - /* _NEW_SCISSOR */ - sfv.scissor.xmin = brw->attribs.Scissor.minx; - sfv.scissor.xmax = brw->attribs.Scissor.maxx - 1; - sfv.scissor.ymin = brw->attribs.Scissor.miny; - sfv.scissor.ymax = brw->attribs.Scissor.maxy - 1; - - brw->sf.vp_gs_offset = brw_cache_data( &brw->cache[BRW_SF_VP], &sfv ); -} - -const struct brw_tracked_state brw_sf_vp = { - .dirty = { - .brw = (BRW_NEW_SCISSOR | - BRW_NEW_VIEWPORT), - .cache = 0 - }, - .update = upload_sf_vp -}; - -static void upload_sf_unit( struct brw_context *brw ) -{ - struct brw_sf_unit_state sf; - memset(&sf, 0, sizeof(sf)); - - /* CACHE_NEW_SF_PROG */ - sf.thread0.grf_reg_count = align(brw->sf.prog_data->total_grf, 16) / 16 - 1; - sf.thread0.kernel_start_pointer = brw->sf.prog_gs_offset >> 6; - sf.thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length; - - sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - sf.thread3.dispatch_grf_start_reg = 3; - sf.thread3.urb_entry_read_offset = 1; - - /* BRW_NEW_URB_FENCE */ - sf.thread4.nr_urb_entries = brw->urb.nr_sf_entries; - sf.thread4.urb_entry_allocation_size = brw->urb.sfsize - 1; - sf.thread4.max_threads = MIN2(12, brw->urb.nr_sf_entries / 2) - 1; - - if (BRW_DEBUG & DEBUG_SINGLE_THREAD) - sf.thread4.max_threads = 0; - - if (BRW_DEBUG & DEBUG_STATS) - sf.thread4.stats_enable = 1; - - /* CACHE_NEW_SF_VP */ - sf.sf5.sf_viewport_state_offset = brw->sf.vp_gs_offset >> 5; - sf.sf5.viewport_transform = 1; - - /* BRW_NEW_RASTER */ - if (brw->attribs.Raster->scissor) - sf.sf6.scissor = 1; - -#if 0 - if (brw->attribs.Polygon->FrontFace == GL_CCW) - sf.sf5.front_winding = BRW_FRONTWINDING_CCW; - else - sf.sf5.front_winding = BRW_FRONTWINDING_CW; - - - if (brw->attribs.Polygon->CullFlag) { - switch (brw->attribs.Polygon->CullFaceMode) { - case GL_FRONT: - sf.sf6.cull_mode = BRW_CULLMODE_FRONT; - break; - case GL_BACK: - sf.sf6.cull_mode = BRW_CULLMODE_BACK; - break; - case GL_FRONT_AND_BACK: - sf.sf6.cull_mode = BRW_CULLMODE_BOTH; - break; - default: - assert(0); - break; - } - } - else - sf.sf6.cull_mode = BRW_CULLMODE_NONE; -#else - sf.sf5.front_winding = BRW_FRONTWINDING_CCW; - sf.sf6.cull_mode = BRW_CULLMODE_NONE; -#endif - - sf.sf6.line_width = CLAMP(brw->attribs.Raster->line_width, 1.0, 5.0) * (1<<1); - - sf.sf6.line_endcap_aa_region_width = 1; - if (brw->attribs.Raster->line_smooth) - sf.sf6.aa_enable = 1; - else if (sf.sf6.line_width <= 0x2) - sf.sf6.line_width = 0; - - sf.sf6.point_rast_rule = 1; /* opengl conventions */ - - sf.sf7.sprite_point = brw->attribs.Raster->point_sprite; - sf.sf7.point_size = CLAMP(brw->attribs.Raster->line_width, 1.0, 255.0) * (1<<3); - sf.sf7.use_point_size_state = !brw->attribs.Raster->point_size_per_vertex; - - /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: - */ - sf.sf7.trifan_pv = 2; - sf.sf7.linestrip_pv = 1; - sf.sf7.tristrip_pv = 2; - sf.sf7.line_last_pixel_enable = 0; - - /* Set bias for OpenGL rasterization rules: - */ - sf.sf6.dest_org_vbias = 0x8; - sf.sf6.dest_org_hbias = 0x8; - - brw->sf.state_gs_offset = brw_cache_data( &brw->cache[BRW_SF_UNIT], &sf ); -} - - -const struct brw_tracked_state brw_sf_unit = { - .dirty = { - .brw = (BRW_NEW_RASTERIZER | - BRW_NEW_URB_FENCE), - .cache = (CACHE_NEW_SF_VP | - CACHE_NEW_SF_PROG) - }, - .update = upload_sf_unit -}; - - diff --git a/src/gallium/drivers/i965simple/brw_shader_info.c b/src/gallium/drivers/i965simple/brw_shader_info.c deleted file mode 100644 index 86d877d7ef..0000000000 --- a/src/gallium/drivers/i965simple/brw_shader_info.c +++ /dev/null @@ -1,48 +0,0 @@ - -#include "brw_context.h" -#include "brw_state.h" -#include "util/u_memory.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" - - -/** - * XXX this obsolete new and no longer compiled. - */ -void brw_shader_info(const struct tgsi_token *tokens, - struct brw_shader_info *info ) -{ - struct tgsi_parse_context parse; - int done = 0; - - tgsi_parse_init( &parse, tokens ); - - while( !done && - !tgsi_parse_end_of_tokens( &parse ) ) - { - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - { - const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; - unsigned last = decl->DeclarationRange.Last; - - // Broken by crazy wpos init: - //assert( info->nr_regs[decl->Declaration.File] <= last); - - info->nr_regs[decl->Declaration.File] = MAX2(info->nr_regs[decl->Declaration.File], - last+1); - break; - } - case TGSI_TOKEN_TYPE_IMMEDIATE: - case TGSI_TOKEN_TYPE_INSTRUCTION: - default: - done = 1; - break; - } - } - - tgsi_parse_free (&parse); - -} diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c deleted file mode 100644 index b47f5373f3..0000000000 --- a/src/gallium/drivers/i965simple/brw_state.c +++ /dev/null @@ -1,469 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Zack Rusin <zack@tungstengraphics.com> - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "pipe/internal/p_winsys_screen.h" -#include "util/u_memory.h" -#include "pipe/p_inlines.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_dump.h" -#include "tgsi/tgsi_parse.h" - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_state.h" -#include "brw_draw.h" - - -#define DUP( TYPE, VAL ) \ -do { \ - struct TYPE *x = malloc(sizeof(*x)); \ - memcpy(x, VAL, sizeof(*x) ); \ - return x; \ -} while (0) - -/************************************************************************ - * Blend - */ -static void * -brw_create_blend_state(struct pipe_context *pipe, - const struct pipe_blend_state *blend) -{ - DUP( pipe_blend_state, blend ); -} - -static void brw_bind_blend_state(struct pipe_context *pipe, - void *blend) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Blend = (struct pipe_blend_state*)blend; - brw->state.dirty.brw |= BRW_NEW_BLEND; -} - - -static void brw_delete_blend_state(struct pipe_context *pipe, void *blend) -{ - free(blend); -} - -static void brw_set_blend_color( struct pipe_context *pipe, - const struct pipe_blend_color *blend_color ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.BlendColor = *blend_color; - - brw->state.dirty.brw |= BRW_NEW_BLEND; -} - -/************************************************************************ - * Sampler - */ - -static void * -brw_create_sampler_state(struct pipe_context *pipe, - const struct pipe_sampler_state *sampler) -{ - DUP( pipe_sampler_state, sampler ); -} - -static void brw_bind_sampler_states(struct pipe_context *pipe, - unsigned num, void **sampler) -{ - struct brw_context *brw = brw_context(pipe); - - assert(num <= PIPE_MAX_SAMPLERS); - - /* Check for no-op */ - if (num == brw->num_samplers && - !memcmp(brw->attribs.Samplers, sampler, num * sizeof(void *))) - return; - - memcpy(brw->attribs.Samplers, sampler, num * sizeof(void *)); - memset(&brw->attribs.Samplers[num], 0, (PIPE_MAX_SAMPLERS - num) * - sizeof(void *)); - - brw->num_samplers = num; - - brw->state.dirty.brw |= BRW_NEW_SAMPLER; -} - -static void brw_delete_sampler_state(struct pipe_context *pipe, - void *sampler) -{ - free(sampler); -} - - -/************************************************************************ - * Depth stencil - */ - -static void * -brw_create_depth_stencil_state(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *depth_stencil) -{ - DUP( pipe_depth_stencil_alpha_state, depth_stencil ); -} - -static void brw_bind_depth_stencil_state(struct pipe_context *pipe, - void *depth_stencil) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.DepthStencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; - - brw->state.dirty.brw |= BRW_NEW_DEPTH_STENCIL; -} - -static void brw_delete_depth_stencil_state(struct pipe_context *pipe, - void *depth_stencil) -{ - free(depth_stencil); -} - -/************************************************************************ - * Scissor - */ -static void brw_set_scissor_state( struct pipe_context *pipe, - const struct pipe_scissor_state *scissor ) -{ - struct brw_context *brw = brw_context(pipe); - - memcpy( &brw->attribs.Scissor, scissor, sizeof(*scissor) ); - brw->state.dirty.brw |= BRW_NEW_SCISSOR; -} - - -/************************************************************************ - * Stipple - */ - -static void brw_set_polygon_stipple( struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple ) -{ -} - - -/************************************************************************ - * Fragment shader - */ - -static void * brw_create_fs_state(struct pipe_context *pipe, - const struct pipe_shader_state *shader) -{ - struct brw_fragment_program *brw_fp = CALLOC_STRUCT(brw_fragment_program); - - brw_fp->program.tokens = tgsi_dup_tokens(shader->tokens); - brw_fp->id = brw_context(pipe)->program_id++; - - tgsi_scan_shader(shader->tokens, &brw_fp->info); - -#if 0 - brw_shader_info(shader->tokens, - &brw_fp->info2); -#endif - - tgsi_dump(shader->tokens, 0); - - - return (void *)brw_fp; -} - -static void brw_bind_fs_state(struct pipe_context *pipe, void *shader) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.FragmentProgram = (struct brw_fragment_program *)shader; - brw->state.dirty.brw |= BRW_NEW_FS; -} - -static void brw_delete_fs_state(struct pipe_context *pipe, void *shader) -{ - struct brw_fragment_program *brw_fp = (struct brw_fragment_program *) shader; - - FREE((void *) brw_fp->program.tokens); - FREE(brw_fp); -} - - -/************************************************************************ - * Vertex shader and other TNL state - */ - -static void *brw_create_vs_state(struct pipe_context *pipe, - const struct pipe_shader_state *shader) -{ - struct brw_vertex_program *brw_vp = CALLOC_STRUCT(brw_vertex_program); - - brw_vp->program.tokens = tgsi_dup_tokens(shader->tokens); - brw_vp->id = brw_context(pipe)->program_id++; - - tgsi_scan_shader(shader->tokens, &brw_vp->info); - -#if 0 - brw_shader_info(shader->tokens, - &brw_vp->info2); -#endif - tgsi_dump(shader->tokens, 0); - - return (void *)brw_vp; -} - -static void brw_bind_vs_state(struct pipe_context *pipe, void *vs) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.VertexProgram = (struct brw_vertex_program *)vs; - brw->state.dirty.brw |= BRW_NEW_VS; - - debug_printf("YYYYYYYYYYYYY BINDING VERTEX SHADER\n"); -} - -static void brw_delete_vs_state(struct pipe_context *pipe, void *shader) -{ - struct brw_vertex_program *brw_vp = (struct brw_vertex_program *) shader; - - FREE((void *) brw_vp->program.tokens); - FREE(brw_vp); -} - - -static void brw_set_clip_state( struct pipe_context *pipe, - const struct pipe_clip_state *clip ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Clip = *clip; -} - - -static void brw_set_viewport_state( struct pipe_context *pipe, - const struct pipe_viewport_state *viewport ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Viewport = *viewport; /* struct copy */ - brw->state.dirty.brw |= BRW_NEW_VIEWPORT; - - /* pass the viewport info to the draw module */ - //draw_set_viewport_state(brw->draw, viewport); -} - - -static void brw_set_vertex_buffers(struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_buffer *buffers) -{ - struct brw_context *brw = brw_context(pipe); - memcpy(brw->vb.vbo_array, buffers, count * sizeof(buffers[0])); -} - -static void brw_set_vertex_elements(struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_element *elements) -{ - /* flush ? */ - struct brw_context *brw = brw_context(pipe); - uint i; - - assert(count <= PIPE_MAX_ATTRIBS); - - for (i = 0; i < count; i++) { - struct brw_vertex_element_state el; - memset(&el, 0, sizeof(el)); - - el.ve0.src_offset = elements[i].src_offset; - el.ve0.src_format = brw_translate_surface_format(elements[i].src_format); - el.ve0.valid = 1; - el.ve0.vertex_buffer_index = elements[i].vertex_buffer_index; - - el.ve1.dst_offset = i * 4; - - el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; - - switch (elements[i].nr_components) { - case 1: el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; - case 2: el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; - case 3: el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; - break; - } - - brw->vb.inputs[i] = el; - } -} - - - -/************************************************************************ - * Constant buffers - */ - -static void brw_set_constant_buffer(struct pipe_context *pipe, - uint shader, uint index, - const struct pipe_constant_buffer *buf) -{ - struct brw_context *brw = brw_context(pipe); - - assert(buf == 0 || index == 0); - - brw->attribs.Constants[shader] = buf; - brw->state.dirty.brw |= BRW_NEW_CONSTANTS; -} - - -/************************************************************************ - * Texture surfaces - */ - - -static void brw_set_sampler_textures(struct pipe_context *pipe, - unsigned num, - struct pipe_texture **texture) -{ - struct brw_context *brw = brw_context(pipe); - uint i; - - assert(num <= PIPE_MAX_SAMPLERS); - - /* Check for no-op */ - if (num == brw->num_textures && - !memcmp(brw->attribs.Texture, texture, num * - sizeof(struct pipe_texture *))) - return; - - for (i = 0; i < num; i++) - pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i], - texture[i]); - - for (i = num; i < brw->num_textures; i++) - pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i], - NULL); - - brw->num_textures = num; - - brw->state.dirty.brw |= BRW_NEW_TEXTURE; -} - - -/************************************************************************ - * Render targets, etc - */ - -static void brw_set_framebuffer_state(struct pipe_context *pipe, - const struct pipe_framebuffer_state *fb) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.FrameBuffer = *fb; /* struct copy */ - - brw->state.dirty.brw |= BRW_NEW_FRAMEBUFFER; -} - - - -/************************************************************************ - * Rasterizer state - */ - -static void * -brw_create_rasterizer_state(struct pipe_context *pipe, - const struct pipe_rasterizer_state *rasterizer) -{ - DUP(pipe_rasterizer_state, rasterizer); -} - -static void brw_bind_rasterizer_state( struct pipe_context *pipe, - void *setup ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Raster = (struct pipe_rasterizer_state *)setup; - - /* Also pass-through to draw module: - */ - //draw_set_rasterizer_state(brw->draw, setup); - - brw->state.dirty.brw |= BRW_NEW_RASTERIZER; -} - -static void brw_delete_rasterizer_state(struct pipe_context *pipe, - void *setup) -{ - free(setup); -} - - - -void -brw_init_state_functions( struct brw_context *brw ) -{ - brw->pipe.create_blend_state = brw_create_blend_state; - brw->pipe.bind_blend_state = brw_bind_blend_state; - brw->pipe.delete_blend_state = brw_delete_blend_state; - - brw->pipe.create_sampler_state = brw_create_sampler_state; - brw->pipe.bind_sampler_states = brw_bind_sampler_states; - brw->pipe.delete_sampler_state = brw_delete_sampler_state; - - brw->pipe.create_depth_stencil_alpha_state = brw_create_depth_stencil_state; - brw->pipe.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state; - brw->pipe.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state; - - brw->pipe.create_rasterizer_state = brw_create_rasterizer_state; - brw->pipe.bind_rasterizer_state = brw_bind_rasterizer_state; - brw->pipe.delete_rasterizer_state = brw_delete_rasterizer_state; - brw->pipe.create_fs_state = brw_create_fs_state; - brw->pipe.bind_fs_state = brw_bind_fs_state; - brw->pipe.delete_fs_state = brw_delete_fs_state; - brw->pipe.create_vs_state = brw_create_vs_state; - brw->pipe.bind_vs_state = brw_bind_vs_state; - brw->pipe.delete_vs_state = brw_delete_vs_state; - - brw->pipe.set_blend_color = brw_set_blend_color; - brw->pipe.set_clip_state = brw_set_clip_state; - brw->pipe.set_constant_buffer = brw_set_constant_buffer; - brw->pipe.set_framebuffer_state = brw_set_framebuffer_state; - -// brw->pipe.set_feedback_state = brw_set_feedback_state; -// brw->pipe.set_feedback_buffer = brw_set_feedback_buffer; - - brw->pipe.set_polygon_stipple = brw_set_polygon_stipple; - brw->pipe.set_scissor_state = brw_set_scissor_state; - brw->pipe.set_sampler_textures = brw_set_sampler_textures; - brw->pipe.set_viewport_state = brw_set_viewport_state; - brw->pipe.set_vertex_buffers = brw_set_vertex_buffers; - brw->pipe.set_vertex_elements = brw_set_vertex_elements; -} diff --git a/src/gallium/drivers/i965simple/brw_state.h b/src/gallium/drivers/i965simple/brw_state.h deleted file mode 100644 index de0a6371b8..0000000000 --- a/src/gallium/drivers/i965simple/brw_state.h +++ /dev/null @@ -1,151 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#ifndef BRW_STATE_H -#define BRW_STATE_H - -#include "brw_context.h" -#include "brw_winsys.h" - - -const struct brw_tracked_state brw_blend_constant_color; -const struct brw_tracked_state brw_cc_unit; -const struct brw_tracked_state brw_cc_vp; -const struct brw_tracked_state brw_clip_prog; -const struct brw_tracked_state brw_clip_unit; -const struct brw_tracked_state brw_constant_buffer_state; -const struct brw_tracked_state brw_constant_buffer; -const struct brw_tracked_state brw_curbe_offsets; -const struct brw_tracked_state brw_invarient_state; -const struct brw_tracked_state brw_gs_prog; -const struct brw_tracked_state brw_gs_unit; -const struct brw_tracked_state brw_drawing_rect; -const struct brw_tracked_state brw_line_stipple; -const struct brw_tracked_state brw_pipelined_state_pointers; -const struct brw_tracked_state brw_binding_table_pointers; -const struct brw_tracked_state brw_depthbuffer; -const struct brw_tracked_state brw_polygon_stipple_offset; -const struct brw_tracked_state brw_polygon_stipple; -const struct brw_tracked_state brw_program_parameters; -const struct brw_tracked_state brw_recalculate_urb_fence; -const struct brw_tracked_state brw_sf_prog; -const struct brw_tracked_state brw_sf_unit; -const struct brw_tracked_state brw_sf_vp; -const struct brw_tracked_state brw_state_base_address; -const struct brw_tracked_state brw_urb_fence; -const struct brw_tracked_state brw_vertex_state; -const struct brw_tracked_state brw_vs_prog; -const struct brw_tracked_state brw_vs_unit; -const struct brw_tracked_state brw_wm_prog; -const struct brw_tracked_state brw_wm_samplers; -const struct brw_tracked_state brw_wm_surfaces; -const struct brw_tracked_state brw_wm_unit; - -const struct brw_tracked_state brw_psp_urb_cbs; - -const struct brw_tracked_state brw_active_vertprog; -const struct brw_tracked_state brw_tnl_vertprog; -const struct brw_tracked_state brw_pipe_control; - -const struct brw_tracked_state brw_clear_surface_cache; -const struct brw_tracked_state brw_clear_batch_cache; - -/*********************************************************************** - * brw_state_cache.c - */ -unsigned brw_cache_data(struct brw_cache *cache, - const void *data ); - -unsigned brw_cache_data_sz(struct brw_cache *cache, - const void *data, - unsigned data_sz); - -unsigned brw_upload_cache( struct brw_cache *cache, - const void *key, - unsigned key_sz, - const void *data, - unsigned data_sz, - const void *aux, - void *aux_return ); - -boolean brw_search_cache( struct brw_cache *cache, - const void *key, - unsigned key_size, - void *aux_return, - unsigned *offset_return); - -void brw_init_caches( struct brw_context *brw ); -void brw_destroy_caches( struct brw_context *brw ); - -static inline struct pipe_buffer *brw_cache_buffer(struct brw_context *brw, - enum brw_cache_id id) -{ - return brw->cache[id].pool->buffer; -} - -/*********************************************************************** - * brw_state_batch.c - */ -#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) ) - -boolean brw_cached_batch_struct( struct brw_context *brw, - const void *data, - unsigned sz ); - -void brw_destroy_batch_cache( struct brw_context *brw ); - - -/*********************************************************************** - * brw_state_pool.c - */ -void brw_init_pools( struct brw_context *brw ); -void brw_destroy_pools( struct brw_context *brw ); - -boolean brw_pool_alloc( struct brw_mem_pool *pool, - unsigned size, - unsigned alignment, - unsigned *offset_return); - -void brw_pool_fence( struct brw_context *brw, - struct brw_mem_pool *pool, - unsigned fence ); - - -void brw_pool_check_wrap( struct brw_context *brw, - struct brw_mem_pool *pool ); - -void brw_clear_all_caches( struct brw_context *brw ); -void brw_invalidate_pools( struct brw_context *brw ); -void brw_clear_batch_cache_flush( struct brw_context *brw ); - - -#endif diff --git a/src/gallium/drivers/i965simple/brw_state_batch.c b/src/gallium/drivers/i965simple/brw_state_batch.c deleted file mode 100644 index 43a1c89fc4..0000000000 --- a/src/gallium/drivers/i965simple/brw_state_batch.c +++ /dev/null @@ -1,113 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "brw_state.h" -#include "brw_winsys.h" - -#include "util/u_memory.h" - -/* A facility similar to the data caching code above, which aims to - * prevent identical commands being issued repeatedly. - */ -boolean brw_cached_batch_struct( struct brw_context *brw, - const void *data, - unsigned sz ) -{ - struct brw_cached_batch_item *item = brw->cached_batch_items; - struct header *newheader = (struct header *)data; - - if (brw->emit_state_always) { - brw_batchbuffer_data(brw->winsys, data, sz); - return TRUE; - } - - while (item) { - if (item->header->opcode == newheader->opcode) { - if (item->sz == sz && memcmp(item->header, newheader, sz) == 0) - return FALSE; - if (item->sz != sz) { - FREE(item->header); - item->header = MALLOC(sz); - item->sz = sz; - } - goto emit; - } - item = item->next; - } - - assert(!item); - item = CALLOC_STRUCT(brw_cached_batch_item); - item->header = MALLOC(sz); - item->sz = sz; - item->next = brw->cached_batch_items; - brw->cached_batch_items = item; - -emit: - memcpy(item->header, newheader, sz); - brw_batchbuffer_data(brw->winsys, data, sz); - return TRUE; -} - -static void clear_batch_cache( struct brw_context *brw ) -{ - struct brw_cached_batch_item *item = brw->cached_batch_items; - - while (item) { - struct brw_cached_batch_item *next = item->next; - free((void *)item->header); - free(item); - item = next; - } - - brw->cached_batch_items = NULL; - - - brw_clear_all_caches(brw); - - brw_invalidate_pools(brw); -} - -void brw_clear_batch_cache_flush( struct brw_context *brw ) -{ - clear_batch_cache(brw); - -/* brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */ - - brw->state.dirty.brw |= ~0; - brw->state.dirty.cache |= ~0; -} - - - -void brw_destroy_batch_cache( struct brw_context *brw ) -{ - clear_batch_cache(brw); -} diff --git a/src/gallium/drivers/i965simple/brw_state_cache.c b/src/gallium/drivers/i965simple/brw_state_cache.c deleted file mode 100644 index 094248fa69..0000000000 --- a/src/gallium/drivers/i965simple/brw_state_cache.c +++ /dev/null @@ -1,443 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "brw_state.h" - -#include "brw_wm.h" -#include "brw_vs.h" -#include "brw_clip.h" -#include "brw_sf.h" -#include "brw_gs.h" - -#include "util/u_memory.h" - - - -/*********************************************************************** - * Check cache for uploaded version of struct, else upload new one. - * Fail when memory is exhausted. - * - * XXX: FIXME: Currently search is so slow it would be quicker to - * regenerate the data every time... - */ - -static unsigned hash_key( const void *key, unsigned key_size ) -{ - unsigned *ikey = (unsigned *)key; - unsigned hash = 0, i; - - assert(key_size % 4 == 0); - - /* I'm sure this can be improved on: - */ - for (i = 0; i < key_size/4; i++) - hash ^= ikey[i]; - - return hash; -} - -static struct brw_cache_item *search_cache( struct brw_cache *cache, - unsigned hash, - const void *key, - unsigned key_size) -{ - struct brw_cache_item *c; - - for (c = cache->items[hash % cache->size]; c; c = c->next) { - if (c->hash == hash && - c->key_size == key_size && - memcmp(c->key, key, key_size) == 0) - return c; - } - - return NULL; -} - - -static void rehash( struct brw_cache *cache ) -{ - struct brw_cache_item **items; - struct brw_cache_item *c, *next; - unsigned size, i; - - size = cache->size * 3; - items = (struct brw_cache_item**) MALLOC(size * sizeof(*items)); - memset(items, 0, size * sizeof(*items)); - - for (i = 0; i < cache->size; i++) - for (c = cache->items[i]; c; c = next) { - next = c->next; - c->next = items[c->hash % size]; - items[c->hash % size] = c; - } - - FREE(cache->items); - cache->items = items; - cache->size = size; -} - - -boolean brw_search_cache( struct brw_cache *cache, - const void *key, - unsigned key_size, - void *aux_return, - unsigned *offset_return) -{ - struct brw_cache_item *item; - unsigned addr = 0; - unsigned hash = hash_key(key, key_size); - - item = search_cache(cache, hash, key, key_size); - - if (item) { - if (aux_return) - *(void **)aux_return = (void *)((char *)item->key + item->key_size); - - *offset_return = addr = item->offset; - } - - if (item == NULL || addr != cache->last_addr) { - cache->brw->state.dirty.cache |= 1<<cache->id; - cache->last_addr = addr; - } - - return item != NULL; -} - -unsigned brw_upload_cache( struct brw_cache *cache, - const void *key, - unsigned key_size, - const void *data, - unsigned data_size, - const void *aux, - void *aux_return ) -{ - unsigned offset; - struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); - unsigned hash = hash_key(key, key_size); - void *tmp = MALLOC(key_size + cache->aux_size); - - if (!brw_pool_alloc(cache->pool, data_size, 1 << 6, &offset)) { - /* Should not be possible: - */ - debug_printf("brw_pool_alloc failed\n"); - exit(1); - } - - memcpy(tmp, key, key_size); - - if (cache->aux_size) - memcpy(tmp+key_size, aux, cache->aux_size); - - item->key = tmp; - item->hash = hash; - item->key_size = key_size; - item->offset = offset; - item->data_size = data_size; - - if (++cache->n_items > cache->size * 1.5) - rehash(cache); - - hash %= cache->size; - item->next = cache->items[hash]; - cache->items[hash] = item; - - if (aux_return) { - assert(cache->aux_size); - *(void **)aux_return = (void *)((char *)item->key + item->key_size); - } - - if (BRW_DEBUG & DEBUG_STATE) - debug_printf("upload %s: %d bytes to pool buffer %p offset %x\n", - cache->name, - data_size, - (void*)cache->pool->buffer, - offset); - - /* Copy data to the buffer: - */ - cache->brw->winsys->buffer_subdata_typed(cache->brw->winsys, - cache->pool->buffer, - offset, - data_size, - data, - cache->id); - - cache->brw->state.dirty.cache |= 1<<cache->id; - cache->last_addr = offset; - - return offset; -} - -/* This doesn't really work with aux data. Use search/upload instead - */ -unsigned brw_cache_data_sz(struct brw_cache *cache, - const void *data, - unsigned data_size) -{ - unsigned addr; - - if (!brw_search_cache(cache, data, data_size, NULL, &addr)) { - addr = brw_upload_cache(cache, - data, data_size, - data, data_size, - NULL, NULL); - } - - return addr; -} - -unsigned brw_cache_data(struct brw_cache *cache, - const void *data) -{ - return brw_cache_data_sz(cache, data, cache->key_size); -} - -enum pool_type { - DW_SURFACE_STATE, - DW_GENERAL_STATE -}; - -static void brw_init_cache( struct brw_context *brw, - const char *name, - unsigned id, - unsigned key_size, - unsigned aux_size, - enum pool_type pool_type) -{ - struct brw_cache *cache = &brw->cache[id]; - cache->brw = brw; - cache->id = id; - cache->name = name; - cache->items = NULL; - - cache->size = 7; - cache->n_items = 0; - cache->items = (struct brw_cache_item **) - CALLOC(cache->size, sizeof(struct brw_cache_item)); - - - cache->key_size = key_size; - cache->aux_size = aux_size; - switch (pool_type) { - case DW_GENERAL_STATE: cache->pool = &brw->pool[BRW_GS_POOL]; break; - case DW_SURFACE_STATE: cache->pool = &brw->pool[BRW_SS_POOL]; break; - default: assert(0); break; - } -} - -void brw_init_caches( struct brw_context *brw ) -{ - - brw_init_cache(brw, - "CC_VP", - BRW_CC_VP, - sizeof(struct brw_cc_viewport), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "CC_UNIT", - BRW_CC_UNIT, - sizeof(struct brw_cc_unit_state), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "WM_PROG", - BRW_WM_PROG, - sizeof(struct brw_wm_prog_key), - sizeof(struct brw_wm_prog_data), - DW_GENERAL_STATE); - - brw_init_cache(brw, - "SAMPLER_DEFAULT_COLOR", - BRW_SAMPLER_DEFAULT_COLOR, - sizeof(struct brw_sampler_default_color), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "SAMPLER", - BRW_SAMPLER, - 0, /* variable key/data size */ - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "WM_UNIT", - BRW_WM_UNIT, - sizeof(struct brw_wm_unit_state), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "SF_PROG", - BRW_SF_PROG, - sizeof(struct brw_sf_prog_key), - sizeof(struct brw_sf_prog_data), - DW_GENERAL_STATE); - - brw_init_cache(brw, - "SF_VP", - BRW_SF_VP, - sizeof(struct brw_sf_viewport), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "SF_UNIT", - BRW_SF_UNIT, - sizeof(struct brw_sf_unit_state), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "VS_UNIT", - BRW_VS_UNIT, - sizeof(struct brw_vs_unit_state), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "VS_PROG", - BRW_VS_PROG, - sizeof(struct brw_vs_prog_key), - sizeof(struct brw_vs_prog_data), - DW_GENERAL_STATE); - - brw_init_cache(brw, - "CLIP_UNIT", - BRW_CLIP_UNIT, - sizeof(struct brw_clip_unit_state), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "CLIP_PROG", - BRW_CLIP_PROG, - sizeof(struct brw_clip_prog_key), - sizeof(struct brw_clip_prog_data), - DW_GENERAL_STATE); - - brw_init_cache(brw, - "GS_UNIT", - BRW_GS_UNIT, - sizeof(struct brw_gs_unit_state), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "GS_PROG", - BRW_GS_PROG, - sizeof(struct brw_gs_prog_key), - sizeof(struct brw_gs_prog_data), - DW_GENERAL_STATE); - - brw_init_cache(brw, - "SS_SURFACE", - BRW_SS_SURFACE, - sizeof(struct brw_surface_state), - 0, - DW_SURFACE_STATE); - - brw_init_cache(brw, - "SS_SURF_BIND", - BRW_SS_SURF_BIND, - sizeof(struct brw_surface_binding_table), - 0, - DW_SURFACE_STATE); -} - - -/* When we lose hardware context, need to invalidate the surface cache - * as these structs must be explicitly re-uploaded. They are subject - * to fixup by the memory manager as they contain absolute agp - * offsets, so we need to ensure there is a fresh version of the - * struct available to receive the fixup. - * - * XXX: Need to ensure that there aren't two versions of a surface or - * bufferobj with different backing data active in the same buffer at - * once? Otherwise the cache could confuse them. Maybe better not to - * cache at all? - * - * --> Isn't this the same as saying need to ensure batch is flushed - * before new data is uploaded to an existing buffer? We - * already try to make sure of that. - */ -static void clear_cache( struct brw_cache *cache ) -{ - struct brw_cache_item *c, *next; - unsigned i; - - for (i = 0; i < cache->size; i++) { - for (c = cache->items[i]; c; c = next) { - next = c->next; - free((void *)c->key); - free(c); - } - cache->items[i] = NULL; - } - - cache->n_items = 0; -} - -void brw_clear_all_caches( struct brw_context *brw ) -{ - int i; - - if (BRW_DEBUG & DEBUG_STATE) - debug_printf("%s\n", __FUNCTION__); - - for (i = 0; i < BRW_MAX_CACHE; i++) - clear_cache(&brw->cache[i]); - - if (brw->curbe.last_buf) { - FREE(brw->curbe.last_buf); - brw->curbe.last_buf = NULL; - } - - brw->state.dirty.brw |= ~0; - brw->state.dirty.cache |= ~0; -} - - - - - -void brw_destroy_caches( struct brw_context *brw ) -{ - unsigned i; - - for (i = 0; i < BRW_MAX_CACHE; i++) - clear_cache(&brw->cache[i]); -} diff --git a/src/gallium/drivers/i965simple/brw_state_pool.c b/src/gallium/drivers/i965simple/brw_state_pool.c deleted file mode 100644 index e91263cb1f..0000000000 --- a/src/gallium/drivers/i965simple/brw_state_pool.c +++ /dev/null @@ -1,138 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -/** @file brw_state_pool.c - * Implements the state pool allocator. - * - * For the 965, we create two state pools for state cache entries. Objects - * will be allocated into the pools depending on which state base address - * their pointer is relative to in other 965 state. - * - * The state pools are relatively simple: As objects are allocated, increment - * the offset to allocate space. When the pool is "full" (rather, close to - * full), we reset the pool and reset the state cache entries that point into - * the pool. - */ - -#include "pipe/internal/p_winsys_screen.h" -#include "util/u_math.h" -#include "util/u_memory.h" -#include "pipe/p_inlines.h" -#include "brw_context.h" -#include "brw_state.h" - -boolean brw_pool_alloc( struct brw_mem_pool *pool, - unsigned size, - unsigned alignment, - unsigned *offset_return) -{ - unsigned fixup = align(pool->offset, alignment) - pool->offset; - - size = align(size, 4); - - if (pool->offset + fixup + size >= pool->size) { - debug_printf("%s failed\n", __FUNCTION__); - assert(0); - exit(0); - } - - pool->offset += fixup; - *offset_return = pool->offset; - pool->offset += size; - - return TRUE; -} - -static -void brw_invalidate_pool( struct brw_mem_pool *pool ) -{ - if (BRW_DEBUG & DEBUG_STATE) - debug_printf("\n\n\n %s \n\n\n", __FUNCTION__); - - pool->offset = 0; - - brw_clear_all_caches(pool->brw); -} - - -static void brw_init_pool( struct brw_context *brw, - unsigned pool_id, - unsigned size ) -{ - struct brw_mem_pool *pool = &brw->pool[pool_id]; - - pool->size = size; - pool->brw = brw; - - pool->buffer = pipe_buffer_create(brw->pipe.screen, - 4096, - 0 /* DRM_BO_FLAG_MEM_TT */, - size); -} - -static void brw_destroy_pool( struct brw_context *brw, - unsigned pool_id ) -{ - struct brw_mem_pool *pool = &brw->pool[pool_id]; - - pipe_buffer_reference( pool->brw->pipe.screen, - &pool->buffer, - NULL ); -} - - -void brw_pool_check_wrap( struct brw_context *brw, - struct brw_mem_pool *pool ) -{ - if (pool->offset > (pool->size * 3) / 4) { - brw->state.dirty.brw |= BRW_NEW_SCENE; - } - -} - -void brw_init_pools( struct brw_context *brw ) -{ - brw_init_pool(brw, BRW_GS_POOL, 0x80000); - brw_init_pool(brw, BRW_SS_POOL, 0x80000); -} - -void brw_destroy_pools( struct brw_context *brw ) -{ - brw_destroy_pool(brw, BRW_GS_POOL); - brw_destroy_pool(brw, BRW_SS_POOL); -} - - -void brw_invalidate_pools( struct brw_context *brw ) -{ - brw_invalidate_pool(&brw->pool[BRW_GS_POOL]); - brw_invalidate_pool(&brw->pool[BRW_SS_POOL]); -} diff --git a/src/gallium/drivers/i965simple/brw_state_upload.c b/src/gallium/drivers/i965simple/brw_state_upload.c deleted file mode 100644 index bac9161b5f..0000000000 --- a/src/gallium/drivers/i965simple/brw_state_upload.c +++ /dev/null @@ -1,202 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "brw_context.h" -#include "brw_state.h" - -#include "util/u_memory.h" - -/* This is used to initialize brw->state.atoms[]. We could use this - * list directly except for a single atom, brw_constant_buffer, which - * has a .dirty value which changes according to the parameters of the - * current fragment and vertex programs, and so cannot be a static - * value. - */ -const struct brw_tracked_state *atoms[] = -{ - &brw_vs_prog, - &brw_gs_prog, - &brw_clip_prog, - &brw_sf_prog, - &brw_wm_prog, - - /* Once all the programs are done, we know how large urb entry - * sizes need to be and can decide if we need to change the urb - * layout. - */ - &brw_curbe_offsets, - &brw_recalculate_urb_fence, - - - &brw_cc_vp, - &brw_cc_unit, - - &brw_wm_surfaces, /* must do before samplers */ - &brw_wm_samplers, - - &brw_wm_unit, - &brw_sf_vp, - &brw_sf_unit, - &brw_vs_unit, /* always required, enabled or not */ - &brw_clip_unit, - &brw_gs_unit, - - /* Command packets: - */ - &brw_invarient_state, - &brw_state_base_address, - &brw_pipe_control, - - &brw_binding_table_pointers, - &brw_blend_constant_color, - - &brw_drawing_rect, - &brw_depthbuffer, - - &brw_polygon_stipple, - &brw_line_stipple, - - &brw_psp_urb_cbs, - - &brw_constant_buffer -}; - - -void brw_init_state( struct brw_context *brw ) -{ - brw_init_pools(brw); - brw_init_caches(brw); - - brw->state.dirty.brw = ~0; - brw->emit_state_always = 0; -} - - -void brw_destroy_state( struct brw_context *brw ) -{ - brw_destroy_caches(brw); - brw_destroy_batch_cache(brw); - brw_destroy_pools(brw); -} - -/*********************************************************************** - */ - -static boolean check_state( const struct brw_state_flags *a, - const struct brw_state_flags *b ) -{ - return ((a->brw & b->brw) || - (a->cache & b->cache)); -} - -static void accumulate_state( struct brw_state_flags *a, - const struct brw_state_flags *b ) -{ - a->brw |= b->brw; - a->cache |= b->cache; -} - - -static void xor_states( struct brw_state_flags *result, - const struct brw_state_flags *a, - const struct brw_state_flags *b ) -{ - result->brw = a->brw ^ b->brw; - result->cache = a->cache ^ b->cache; -} - - -/*********************************************************************** - * Emit all state: - */ -void brw_validate_state( struct brw_context *brw ) -{ - struct brw_state_flags *state = &brw->state.dirty; - unsigned i; - - if (brw->emit_state_always) - state->brw |= ~0; - - if (state->cache == 0 && - state->brw == 0) - return; - - if (brw->state.dirty.brw & BRW_NEW_SCENE) - brw_clear_batch_cache_flush(brw); - - if (BRW_DEBUG) { - /* Debug version which enforces various sanity checks on the - * state flags which are generated and checked to help ensure - * state atoms are ordered correctly in the list. - */ - struct brw_state_flags examined, prev; - memset(&examined, 0, sizeof(examined)); - prev = *state; - - for (i = 0; i < Elements(atoms); i++) { - const struct brw_tracked_state *atom = atoms[i]; - struct brw_state_flags generated; - - assert(atom->dirty.brw || - atom->dirty.cache); - assert(atom->update); - - if (check_state(state, &atom->dirty)) { - atom->update( brw ); - } - - accumulate_state(&examined, &atom->dirty); - - /* generated = (prev ^ state) - * if (examined & generated) - * fail; - */ - xor_states(&generated, &prev, state); - assert(!check_state(&examined, &generated)); - prev = *state; - } - } - else { - for (i = 0; i < Elements(atoms); i++) { - const struct brw_tracked_state *atom = atoms[i]; - - assert(atom->dirty.brw || - atom->dirty.cache); - assert(atom->update); - - if (check_state(state, &atom->dirty)) - atom->update( brw ); - } - } - - memset(state, 0, sizeof(*state)); -} diff --git a/src/gallium/drivers/i965simple/brw_structs.h b/src/gallium/drivers/i965simple/brw_structs.h deleted file mode 100644 index bbb087e95d..0000000000 --- a/src/gallium/drivers/i965simple/brw_structs.h +++ /dev/null @@ -1,1348 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#ifndef BRW_STRUCTS_H -#define BRW_STRUCTS_H - -#include "pipe/p_compiler.h" - -/* Command packets: - */ -struct header -{ - unsigned length:16; - unsigned opcode:16; -}; - - -union header_union -{ - struct header bits; - unsigned dword; -}; - -struct brw_3d_control -{ - struct - { - unsigned length:8; - unsigned notify_enable:1; - unsigned pad:3; - unsigned wc_flush_enable:1; - unsigned depth_stall_enable:1; - unsigned operation:2; - unsigned opcode:16; - } header; - - struct - { - unsigned pad:2; - unsigned dest_addr_type:1; - unsigned dest_addr:29; - } dest; - - unsigned dword2; - unsigned dword3; -}; - - -struct brw_3d_primitive -{ - struct - { - unsigned length:8; - unsigned pad:2; - unsigned topology:5; - unsigned indexed:1; - unsigned opcode:16; - } header; - - unsigned verts_per_instance; - unsigned start_vert_location; - unsigned instance_count; - unsigned start_instance_location; - unsigned base_vert_location; -}; - -/* These seem to be passed around as function args, so it works out - * better to keep them as #defines: - */ -#define BRW_FLUSH_READ_CACHE 0x1 -#define BRW_FLUSH_STATE_CACHE 0x2 -#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4 -#define BRW_FLUSH_SNAPSHOT_COUNTERS 0x8 - -struct brw_mi_flush -{ - unsigned flags:4; - unsigned pad:12; - unsigned opcode:16; -}; - -struct brw_vf_statistics -{ - unsigned statistics_enable:1; - unsigned pad:15; - unsigned opcode:16; -}; - - - -struct brw_binding_table_pointers -{ - struct header header; - unsigned vs; - unsigned gs; - unsigned clp; - unsigned sf; - unsigned wm; -}; - - -struct brw_blend_constant_color -{ - struct header header; - float blend_constant_color[4]; -}; - - -struct brw_depthbuffer -{ - union header_union header; - - union { - struct { - unsigned pitch:18; - unsigned format:3; - unsigned pad:4; - unsigned depth_offset_disable:1; - unsigned tile_walk:1; - unsigned tiled_surface:1; - unsigned pad2:1; - unsigned surface_type:3; - } bits; - unsigned dword; - } dword1; - - unsigned dword2_base_addr; - - union { - struct { - unsigned pad:1; - unsigned mipmap_layout:1; - unsigned lod:4; - unsigned width:13; - unsigned height:13; - } bits; - unsigned dword; - } dword3; - - union { - struct { - unsigned pad:12; - unsigned min_array_element:9; - unsigned depth:11; - } bits; - unsigned dword; - } dword4; -}; - -struct brw_drawrect -{ - struct header header; - unsigned xmin:16; - unsigned ymin:16; - unsigned xmax:16; - unsigned ymax:16; - unsigned xorg:16; - unsigned yorg:16; -}; - - - - -struct brw_global_depth_offset_clamp -{ - struct header header; - float depth_offset_clamp; -}; - -struct brw_indexbuffer -{ - union { - struct - { - unsigned length:8; - unsigned index_format:2; - unsigned cut_index_enable:1; - unsigned pad:5; - unsigned opcode:16; - } bits; - unsigned dword; - - } header; - - unsigned buffer_start; - unsigned buffer_end; -}; - - -struct brw_line_stipple -{ - struct header header; - - struct - { - unsigned pattern:16; - unsigned pad:16; - } bits0; - - struct - { - unsigned repeat_count:9; - unsigned pad:7; - unsigned inverse_repeat_count:16; - } bits1; -}; - - -struct brw_pipelined_state_pointers -{ - struct header header; - - struct { - unsigned pad:5; - unsigned offset:27; - } vs; - - struct - { - unsigned enable:1; - unsigned pad:4; - unsigned offset:27; - } gs; - - struct - { - unsigned enable:1; - unsigned pad:4; - unsigned offset:27; - } clp; - - struct - { - unsigned pad:5; - unsigned offset:27; - } sf; - - struct - { - unsigned pad:5; - unsigned offset:27; - } wm; - - struct - { - unsigned pad:5; - unsigned offset:27; /* KW: check me! */ - } cc; -}; - - -struct brw_polygon_stipple_offset -{ - struct header header; - - struct { - unsigned y_offset:5; - unsigned pad:3; - unsigned x_offset:5; - unsigned pad0:19; - } bits0; -}; - - - -struct brw_polygon_stipple -{ - struct header header; - unsigned stipple[32]; -}; - - - -struct brw_pipeline_select -{ - struct - { - unsigned pipeline_select:1; - unsigned pad:15; - unsigned opcode:16; - } header; -}; - - -struct brw_pipe_control -{ - struct - { - unsigned length:8; - unsigned notify_enable:1; - unsigned pad:2; - unsigned instruction_state_cache_flush_enable:1; - unsigned write_cache_flush_enable:1; - unsigned depth_stall_enable:1; - unsigned post_sync_operation:2; - - unsigned opcode:16; - } header; - - struct - { - unsigned pad:2; - unsigned dest_addr_type:1; - unsigned dest_addr:29; - } bits1; - - unsigned data0; - unsigned data1; -}; - - -struct brw_urb_fence -{ - struct - { - unsigned length:8; - unsigned vs_realloc:1; - unsigned gs_realloc:1; - unsigned clp_realloc:1; - unsigned sf_realloc:1; - unsigned vfe_realloc:1; - unsigned cs_realloc:1; - unsigned pad:2; - unsigned opcode:16; - } header; - - struct - { - unsigned vs_fence:10; - unsigned gs_fence:10; - unsigned clp_fence:10; - unsigned pad:2; - } bits0; - - struct - { - unsigned sf_fence:10; - unsigned vf_fence:10; - unsigned cs_fence:10; - unsigned pad:2; - } bits1; -}; - -struct brw_constant_buffer_state /* previously brw_command_streamer */ -{ - struct header header; - - struct - { - unsigned nr_urb_entries:3; - unsigned pad:1; - unsigned urb_entry_size:5; - unsigned pad0:23; - } bits0; -}; - -struct brw_constant_buffer -{ - struct - { - unsigned length:8; - unsigned valid:1; - unsigned pad:7; - unsigned opcode:16; - } header; - - struct - { - unsigned buffer_length:6; - unsigned buffer_address:26; - } bits0; -}; - -struct brw_state_base_address -{ - struct header header; - - struct - { - unsigned modify_enable:1; - unsigned pad:4; - unsigned general_state_address:27; - } bits0; - - struct - { - unsigned modify_enable:1; - unsigned pad:4; - unsigned surface_state_address:27; - } bits1; - - struct - { - unsigned modify_enable:1; - unsigned pad:4; - unsigned indirect_object_state_address:27; - } bits2; - - struct - { - unsigned modify_enable:1; - unsigned pad:11; - unsigned general_state_upper_bound:20; - } bits3; - - struct - { - unsigned modify_enable:1; - unsigned pad:11; - unsigned indirect_object_state_upper_bound:20; - } bits4; -}; - -struct brw_state_prefetch -{ - struct header header; - - struct - { - unsigned prefetch_count:3; - unsigned pad:3; - unsigned prefetch_pointer:26; - } bits0; -}; - -struct brw_system_instruction_pointer -{ - struct header header; - - struct - { - unsigned pad:4; - unsigned system_instruction_pointer:28; - } bits0; -}; - - - - -/* State structs for the various fixed function units: - */ - - -struct thread0 -{ - unsigned pad0:1; - unsigned grf_reg_count:3; - unsigned pad1:2; - unsigned kernel_start_pointer:26; -}; - -struct thread1 -{ - unsigned ext_halt_exception_enable:1; - unsigned sw_exception_enable:1; - unsigned mask_stack_exception_enable:1; - unsigned timeout_exception_enable:1; - unsigned illegal_op_exception_enable:1; - unsigned pad0:3; - unsigned depth_coef_urb_read_offset:6; /* WM only */ - unsigned pad1:2; - unsigned floating_point_mode:1; - unsigned thread_priority:1; - unsigned binding_table_entry_count:8; - unsigned pad3:5; - unsigned single_program_flow:1; -}; - -struct thread2 -{ - unsigned per_thread_scratch_space:4; - unsigned pad0:6; - unsigned scratch_space_base_pointer:22; -}; - - -struct thread3 -{ - unsigned dispatch_grf_start_reg:4; - unsigned urb_entry_read_offset:6; - unsigned pad0:1; - unsigned urb_entry_read_length:6; - unsigned pad1:1; - unsigned const_urb_entry_read_offset:6; - unsigned pad2:1; - unsigned const_urb_entry_read_length:6; - unsigned pad3:1; -}; - - - -struct brw_clip_unit_state -{ - struct thread0 thread0; - struct - { - unsigned pad0:7; - unsigned sw_exception_enable:1; - unsigned pad1:3; - unsigned mask_stack_exception_enable:1; - unsigned pad2:1; - unsigned illegal_op_exception_enable:1; - unsigned pad3:2; - unsigned floating_point_mode:1; - unsigned thread_priority:1; - unsigned binding_table_entry_count:8; - unsigned pad4:5; - unsigned single_program_flow:1; - } thread1; - - struct thread2 thread2; - struct thread3 thread3; - - struct - { - unsigned pad0:9; - unsigned gs_output_stats:1; /* not always */ - unsigned stats_enable:1; - unsigned nr_urb_entries:7; - unsigned pad1:1; - unsigned urb_entry_allocation_size:5; - unsigned pad2:1; - unsigned max_threads:1; /* may be less */ - unsigned pad3:6; - } thread4; - - struct - { - unsigned pad0:13; - unsigned clip_mode:3; - unsigned userclip_enable_flags:8; - unsigned userclip_must_clip:1; - unsigned pad1:1; - unsigned guard_band_enable:1; - unsigned viewport_z_clip_enable:1; - unsigned viewport_xy_clip_enable:1; - unsigned vertex_position_space:1; - unsigned api_mode:1; - unsigned pad2:1; - } clip5; - - struct - { - unsigned pad0:5; - unsigned clipper_viewport_state_ptr:27; - } clip6; - - - float viewport_xmin; - float viewport_xmax; - float viewport_ymin; - float viewport_ymax; -}; - - - -struct brw_cc_unit_state -{ - struct - { - unsigned pad0:3; - unsigned bf_stencil_pass_depth_pass_op:3; - unsigned bf_stencil_pass_depth_fail_op:3; - unsigned bf_stencil_fail_op:3; - unsigned bf_stencil_func:3; - unsigned bf_stencil_enable:1; - unsigned pad1:2; - unsigned stencil_write_enable:1; - unsigned stencil_pass_depth_pass_op:3; - unsigned stencil_pass_depth_fail_op:3; - unsigned stencil_fail_op:3; - unsigned stencil_func:3; - unsigned stencil_enable:1; - } cc0; - - - struct - { - unsigned bf_stencil_ref:8; - unsigned stencil_write_mask:8; - unsigned stencil_test_mask:8; - unsigned stencil_ref:8; - } cc1; - - - struct - { - unsigned logicop_enable:1; - unsigned pad0:10; - unsigned depth_write_enable:1; - unsigned depth_test_function:3; - unsigned depth_test:1; - unsigned bf_stencil_write_mask:8; - unsigned bf_stencil_test_mask:8; - } cc2; - - - struct - { - unsigned pad0:8; - unsigned alpha_test_func:3; - unsigned alpha_test:1; - unsigned blend_enable:1; - unsigned ia_blend_enable:1; - unsigned pad1:1; - unsigned alpha_test_format:1; - unsigned pad2:16; - } cc3; - - struct - { - unsigned pad0:5; - unsigned cc_viewport_state_offset:27; - } cc4; - - struct - { - unsigned pad0:2; - unsigned ia_dest_blend_factor:5; - unsigned ia_src_blend_factor:5; - unsigned ia_blend_function:3; - unsigned statistics_enable:1; - unsigned logicop_func:4; - unsigned pad1:11; - unsigned dither_enable:1; - } cc5; - - struct - { - unsigned clamp_post_alpha_blend:1; - unsigned clamp_pre_alpha_blend:1; - unsigned clamp_range:2; - unsigned pad0:11; - unsigned y_dither_offset:2; - unsigned x_dither_offset:2; - unsigned dest_blend_factor:5; - unsigned src_blend_factor:5; - unsigned blend_function:3; - } cc6; - - struct { - union { - float f; - ubyte ub[4]; - } alpha_ref; - } cc7; -}; - - - -struct brw_sf_unit_state -{ - struct thread0 thread0; - struct thread1 thread1; - struct thread2 thread2; - struct thread3 thread3; - - struct - { - unsigned pad0:10; - unsigned stats_enable:1; - unsigned nr_urb_entries:7; - unsigned pad1:1; - unsigned urb_entry_allocation_size:5; - unsigned pad2:1; - unsigned max_threads:6; - unsigned pad3:1; - } thread4; - - struct - { - unsigned front_winding:1; - unsigned viewport_transform:1; - unsigned pad0:3; - unsigned sf_viewport_state_offset:27; - } sf5; - - struct - { - unsigned pad0:9; - unsigned dest_org_vbias:4; - unsigned dest_org_hbias:4; - unsigned scissor:1; - unsigned disable_2x2_trifilter:1; - unsigned disable_zero_pix_trifilter:1; - unsigned point_rast_rule:2; - unsigned line_endcap_aa_region_width:2; - unsigned line_width:4; - unsigned fast_scissor_disable:1; - unsigned cull_mode:2; - unsigned aa_enable:1; - } sf6; - - struct - { - unsigned point_size:11; - unsigned use_point_size_state:1; - unsigned subpixel_precision:1; - unsigned sprite_point:1; - unsigned pad0:11; - unsigned trifan_pv:2; - unsigned linestrip_pv:2; - unsigned tristrip_pv:2; - unsigned line_last_pixel_enable:1; - } sf7; - -}; - - -struct brw_gs_unit_state -{ - struct thread0 thread0; - struct thread1 thread1; - struct thread2 thread2; - struct thread3 thread3; - - struct - { - unsigned pad0:10; - unsigned stats_enable:1; - unsigned nr_urb_entries:7; - unsigned pad1:1; - unsigned urb_entry_allocation_size:5; - unsigned pad2:1; - unsigned max_threads:1; - unsigned pad3:6; - } thread4; - - struct - { - unsigned sampler_count:3; - unsigned pad0:2; - unsigned sampler_state_pointer:27; - } gs5; - - - struct - { - unsigned max_vp_index:4; - unsigned pad0:26; - unsigned reorder_enable:1; - unsigned pad1:1; - } gs6; -}; - - -struct brw_vs_unit_state -{ - struct thread0 thread0; - struct thread1 thread1; - struct thread2 thread2; - struct thread3 thread3; - - struct - { - unsigned pad0:10; - unsigned stats_enable:1; - unsigned nr_urb_entries:7; - unsigned pad1:1; - unsigned urb_entry_allocation_size:5; - unsigned pad2:1; - unsigned max_threads:4; - unsigned pad3:3; - } thread4; - - struct - { - unsigned sampler_count:3; - unsigned pad0:2; - unsigned sampler_state_pointer:27; - } vs5; - - struct - { - unsigned vs_enable:1; - unsigned vert_cache_disable:1; - unsigned pad0:30; - } vs6; -}; - - -struct brw_wm_unit_state -{ - struct thread0 thread0; - struct thread1 thread1; - struct thread2 thread2; - struct thread3 thread3; - - struct { - unsigned stats_enable:1; - unsigned pad0:1; - unsigned sampler_count:3; - unsigned sampler_state_pointer:27; - } wm4; - - struct - { - unsigned enable_8_pix:1; - unsigned enable_16_pix:1; - unsigned enable_32_pix:1; - unsigned pad0:7; - unsigned legacy_global_depth_bias:1; - unsigned line_stipple:1; - unsigned depth_offset:1; - unsigned polygon_stipple:1; - unsigned line_aa_region_width:2; - unsigned line_endcap_aa_region_width:2; - unsigned early_depth_test:1; - unsigned thread_dispatch_enable:1; - unsigned program_uses_depth:1; - unsigned program_computes_depth:1; - unsigned program_uses_killpixel:1; - unsigned legacy_line_rast: 1; - unsigned pad1:1; - unsigned max_threads:6; - unsigned pad2:1; - } wm5; - - float global_depth_offset_constant; - float global_depth_offset_scale; -}; - -struct brw_sampler_default_color { - float color[4]; -}; - -struct brw_sampler_state -{ - - struct - { - unsigned shadow_function:3; - unsigned lod_bias:11; - unsigned min_filter:3; - unsigned mag_filter:3; - unsigned mip_filter:2; - unsigned base_level:5; - unsigned pad:1; - unsigned lod_preclamp:1; - unsigned default_color_mode:1; - unsigned pad0:1; - unsigned disable:1; - } ss0; - - struct - { - unsigned r_wrap_mode:3; - unsigned t_wrap_mode:3; - unsigned s_wrap_mode:3; - unsigned pad:3; - unsigned max_lod:10; - unsigned min_lod:10; - } ss1; - - - struct - { - unsigned pad:5; - unsigned default_color_pointer:27; - } ss2; - - struct - { - unsigned pad:19; - unsigned max_aniso:3; - unsigned chroma_key_mode:1; - unsigned chroma_key_index:2; - unsigned chroma_key_enable:1; - unsigned monochrome_filter_width:3; - unsigned monochrome_filter_height:3; - } ss3; -}; - - -struct brw_clipper_viewport -{ - float xmin; - float xmax; - float ymin; - float ymax; -}; - -struct brw_cc_viewport -{ - float min_depth; - float max_depth; -}; - -struct brw_sf_viewport -{ - struct { - float m00; - float m11; - float m22; - float m30; - float m31; - float m32; - } viewport; - - struct { - short xmin; - short ymin; - short xmax; - short ymax; - } scissor; -}; - -/* Documented in the subsystem/shared-functions/sampler chapter... - */ -struct brw_surface_state -{ - struct { - unsigned cube_pos_z:1; - unsigned cube_neg_z:1; - unsigned cube_pos_y:1; - unsigned cube_neg_y:1; - unsigned cube_pos_x:1; - unsigned cube_neg_x:1; - unsigned pad:4; - unsigned mipmap_layout_mode:1; - unsigned vert_line_stride_ofs:1; - unsigned vert_line_stride:1; - unsigned color_blend:1; - unsigned writedisable_blue:1; - unsigned writedisable_green:1; - unsigned writedisable_red:1; - unsigned writedisable_alpha:1; - unsigned surface_format:9; - unsigned data_return_format:1; - unsigned pad0:1; - unsigned surface_type:3; - } ss0; - - struct { - unsigned base_addr; - } ss1; - - struct { - unsigned pad:2; - unsigned mip_count:4; - unsigned width:13; - unsigned height:13; - } ss2; - - struct { - unsigned tile_walk:1; - unsigned tiled_surface:1; - unsigned pad:1; - unsigned pitch:18; - unsigned depth:11; - } ss3; - - struct { - unsigned pad:19; - unsigned min_array_elt:9; - unsigned min_lod:4; - } ss4; -}; - - - -struct brw_vertex_buffer_state -{ - struct { - unsigned pitch:11; - unsigned pad:15; - unsigned access_type:1; - unsigned vb_index:5; - } vb0; - - unsigned start_addr; - unsigned max_index; -#if 1 - unsigned instance_data_step_rate; /* not included for sequential/random vertices? */ -#endif -}; - -#define BRW_VBP_MAX 17 - -struct brw_vb_array_state { - struct header header; - struct brw_vertex_buffer_state vb[BRW_VBP_MAX]; -}; - - -struct brw_vertex_element_state -{ - struct - { - unsigned src_offset:11; - unsigned pad:5; - unsigned src_format:9; - unsigned pad0:1; - unsigned valid:1; - unsigned vertex_buffer_index:5; - } ve0; - - struct - { - unsigned dst_offset:8; - unsigned pad:8; - unsigned vfcomponent3:4; - unsigned vfcomponent2:4; - unsigned vfcomponent1:4; - unsigned vfcomponent0:4; - } ve1; -}; - -#define BRW_VEP_MAX 18 - -struct brw_vertex_element_packet { - struct header header; - struct brw_vertex_element_state ve[BRW_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */ -}; - - -struct brw_urb_immediate { - unsigned opcode:4; - unsigned offset:6; - unsigned swizzle_control:2; - unsigned pad:1; - unsigned allocate:1; - unsigned used:1; - unsigned complete:1; - unsigned response_length:4; - unsigned msg_length:4; - unsigned msg_target:4; - unsigned pad1:3; - unsigned end_of_thread:1; -}; - -/* Instruction format for the execution units: - */ - -struct brw_instruction -{ - struct - { - unsigned opcode:7; - unsigned pad:1; - unsigned access_mode:1; - unsigned mask_control:1; - unsigned dependency_control:2; - unsigned compression_control:2; - unsigned thread_control:2; - unsigned predicate_control:4; - unsigned predicate_inverse:1; - unsigned execution_size:3; - unsigned destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */ - unsigned pad0:2; - unsigned debug_control:1; - unsigned saturate:1; - } header; - - union { - struct - { - unsigned dest_reg_file:2; - unsigned dest_reg_type:3; - unsigned src0_reg_file:2; - unsigned src0_reg_type:3; - unsigned src1_reg_file:2; - unsigned src1_reg_type:3; - unsigned pad:1; - unsigned dest_subreg_nr:5; - unsigned dest_reg_nr:8; - unsigned dest_horiz_stride:2; - unsigned dest_address_mode:1; - } da1; - - struct - { - unsigned dest_reg_file:2; - unsigned dest_reg_type:3; - unsigned src0_reg_file:2; - unsigned src0_reg_type:3; - unsigned pad:6; - int dest_indirect_offset:10; /* offset against the deref'd address reg */ - unsigned dest_subreg_nr:3; /* subnr for the address reg a0.x */ - unsigned dest_horiz_stride:2; - unsigned dest_address_mode:1; - } ia1; - - struct - { - unsigned dest_reg_file:2; - unsigned dest_reg_type:3; - unsigned src0_reg_file:2; - unsigned src0_reg_type:3; - unsigned src1_reg_file:2; - unsigned src1_reg_type:3; - unsigned pad0:1; - unsigned dest_writemask:4; - unsigned dest_subreg_nr:1; - unsigned dest_reg_nr:8; - unsigned pad1:2; - unsigned dest_address_mode:1; - } da16; - - struct - { - unsigned dest_reg_file:2; - unsigned dest_reg_type:3; - unsigned src0_reg_file:2; - unsigned src0_reg_type:3; - unsigned pad0:6; - unsigned dest_writemask:4; - int dest_indirect_offset:6; - unsigned dest_subreg_nr:3; - unsigned pad1:2; - unsigned dest_address_mode:1; - } ia16; - } bits1; - - - union { - struct - { - unsigned src0_subreg_nr:5; - unsigned src0_reg_nr:8; - unsigned src0_abs:1; - unsigned src0_negate:1; - unsigned src0_address_mode:1; - unsigned src0_horiz_stride:2; - unsigned src0_width:3; - unsigned src0_vert_stride:4; - unsigned flag_reg_nr:1; - unsigned pad:6; - } da1; - - struct - { - int src0_indirect_offset:10; - unsigned src0_subreg_nr:3; - unsigned src0_abs:1; - unsigned src0_negate:1; - unsigned src0_address_mode:1; - unsigned src0_horiz_stride:2; - unsigned src0_width:3; - unsigned src0_vert_stride:4; - unsigned flag_reg_nr:1; - unsigned pad:6; - } ia1; - - struct - { - unsigned src0_swz_x:2; - unsigned src0_swz_y:2; - unsigned src0_subreg_nr:1; - unsigned src0_reg_nr:8; - unsigned src0_abs:1; - unsigned src0_negate:1; - unsigned src0_address_mode:1; - unsigned src0_swz_z:2; - unsigned src0_swz_w:2; - unsigned pad0:1; - unsigned src0_vert_stride:4; - unsigned flag_reg_nr:1; - unsigned pad1:6; - } da16; - - struct - { - unsigned src0_swz_x:2; - unsigned src0_swz_y:2; - int src0_indirect_offset:6; - unsigned src0_subreg_nr:3; - unsigned src0_abs:1; - unsigned src0_negate:1; - unsigned src0_address_mode:1; - unsigned src0_swz_z:2; - unsigned src0_swz_w:2; - unsigned pad0:1; - unsigned src0_vert_stride:4; - unsigned flag_reg_nr:1; - unsigned pad1:6; - } ia16; - - } bits2; - - union - { - struct - { - unsigned src1_subreg_nr:5; - unsigned src1_reg_nr:8; - unsigned src1_abs:1; - unsigned src1_negate:1; - unsigned pad:1; - unsigned src1_horiz_stride:2; - unsigned src1_width:3; - unsigned src1_vert_stride:4; - unsigned pad0:7; - } da1; - - struct - { - unsigned src1_swz_x:2; - unsigned src1_swz_y:2; - unsigned src1_subreg_nr:1; - unsigned src1_reg_nr:8; - unsigned src1_abs:1; - unsigned src1_negate:1; - unsigned pad0:1; - unsigned src1_swz_z:2; - unsigned src1_swz_w:2; - unsigned pad1:1; - unsigned src1_vert_stride:4; - unsigned pad2:7; - } da16; - - struct - { - int src1_indirect_offset:10; - unsigned src1_subreg_nr:3; - unsigned src1_abs:1; - unsigned src1_negate:1; - unsigned pad0:1; - unsigned src1_horiz_stride:2; - unsigned src1_width:3; - unsigned src1_vert_stride:4; - unsigned flag_reg_nr:1; - unsigned pad1:6; - } ia1; - - struct - { - unsigned src1_swz_x:2; - unsigned src1_swz_y:2; - int src1_indirect_offset:6; - unsigned src1_subreg_nr:3; - unsigned src1_abs:1; - unsigned src1_negate:1; - unsigned pad0:1; - unsigned src1_swz_z:2; - unsigned src1_swz_w:2; - unsigned pad1:1; - unsigned src1_vert_stride:4; - unsigned flag_reg_nr:1; - unsigned pad2:6; - } ia16; - - - struct - { - int jump_count:16; /* note: signed */ - unsigned pop_count:4; - unsigned pad0:12; - } if_else; - - struct { - unsigned function:4; - unsigned int_type:1; - unsigned precision:1; - unsigned saturate:1; - unsigned data_type:1; - unsigned pad0:8; - unsigned response_length:4; - unsigned msg_length:4; - unsigned msg_target:4; - unsigned pad1:3; - unsigned end_of_thread:1; - } math; - - struct { - unsigned binding_table_index:8; - unsigned sampler:4; - unsigned return_format:2; - unsigned msg_type:2; - unsigned response_length:4; - unsigned msg_length:4; - unsigned msg_target:4; - unsigned pad1:3; - unsigned end_of_thread:1; - } sampler; - - struct brw_urb_immediate urb; - - struct { - unsigned binding_table_index:8; - unsigned msg_control:4; - unsigned msg_type:2; - unsigned target_cache:2; - unsigned response_length:4; - unsigned msg_length:4; - unsigned msg_target:4; - unsigned pad1:3; - unsigned end_of_thread:1; - } dp_read; - - struct { - unsigned binding_table_index:8; - unsigned msg_control:3; - unsigned pixel_scoreboard_clear:1; - unsigned msg_type:3; - unsigned send_commit_msg:1; - unsigned response_length:4; - unsigned msg_length:4; - unsigned msg_target:4; - unsigned pad1:3; - unsigned end_of_thread:1; - } dp_write; - - struct { - unsigned pad:16; - unsigned response_length:4; - unsigned msg_length:4; - unsigned msg_target:4; - unsigned pad1:3; - unsigned end_of_thread:1; - } generic; - - int d; - unsigned ud; - } bits3; -}; - - -#endif diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c deleted file mode 100644 index 724a69b2ee..0000000000 --- a/src/gallium/drivers/i965simple/brw_surface.c +++ /dev/null @@ -1,126 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "brw_blit.h" -#include "brw_context.h" -#include "brw_state.h" -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/internal/p_winsys_screen.h" -#include "util/u_tile.h" -#include "util/u_rect.h" - - - -/* Assumes all values are within bounds -- no checking at this level - - * do it higher up if required. - */ -static void -brw_surface_copy(struct pipe_context *pipe, - struct pipe_surface *dst, - unsigned dstx, unsigned dsty, - struct pipe_surface *src, - unsigned srcx, unsigned srcy, unsigned width, unsigned height) -{ - assert( dst != src ); - assert( dst->block.size == src->block.size ); - assert( dst->block.width == src->block.height ); - assert( dst->block.height == src->block.height ); - - if (0) { - void *dst_map = pipe->screen->surface_map( pipe->screen, - dst, - PIPE_BUFFER_USAGE_CPU_WRITE ); - - const void *src_map = pipe->screen->surface_map( pipe->screen, - src, - PIPE_BUFFER_USAGE_CPU_READ ); - - util_copy_rect(dst_map, - &dst->block, - dst->stride, - dstx, dsty, - width, height, - src_map, - src->stride, - srcx, srcy); - - pipe->screen->surface_unmap(pipe->screen, src); - pipe->screen->surface_unmap(pipe->screen, dst); - } - else { - struct brw_texture *dst_tex = (struct brw_texture *)dst->texture; - struct brw_texture *src_tex = (struct brw_texture *)src->texture; - assert(dst->block.width == 1); - assert(dst->block.height == 1); - brw_copy_blit(brw_context(pipe), - FALSE, - dst->block.size, - (short) src->stride/src->block.size, src_tex->buffer, src->offset, FALSE, - (short) dst->stride/dst->block.size, dst_tex->buffer, dst->offset, FALSE, - (short) srcx, (short) srcy, (short) dstx, (short) dsty, - (short) width, (short) height, PIPE_LOGICOP_COPY); - } -} - - -static void -brw_surface_fill(struct pipe_context *pipe, - struct pipe_surface *dst, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height, unsigned value) -{ - if (0) { - void *dst_map = pipe->screen->surface_map( pipe->screen, - dst, - PIPE_BUFFER_USAGE_CPU_WRITE ); - - util_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value); - - pipe->screen->surface_unmap(pipe->screen, dst); - } - else { - struct brw_texture *tex = (struct brw_texture *)dst->texture; - assert(dst->block.width == 1); - assert(dst->block.height == 1); - brw_fill_blit(brw_context(pipe), - dst->block.size, - (short) dst->stride/dst->block.size, - tex->buffer, dst->offset, FALSE, - (short) dstx, (short) dsty, - (short) width, (short) height, - value); - } -} - - -void -brw_init_surface_functions(struct brw_context *brw) -{ - brw->pipe.surface_copy = brw_surface_copy; - brw->pipe.surface_fill = brw_surface_fill; -} diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c deleted file mode 100644 index 998ffaeac4..0000000000 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ /dev/null @@ -1,380 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -/* Code to layout images in a mipmap tree for i965. - */ - -#include "pipe/p_state.h" -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/internal/p_winsys_screen.h" -#include "util/u_math.h" -#include "util/u_memory.h" -#include "brw_context.h" -#include "brw_tex_layout.h" - - -#define FILE_DEBUG_FLAG DEBUG_TEXTURE - -#if 0 -unsigned intel_compressed_alignment(unsigned internalFormat) -{ - unsigned alignment = 4; - - switch (internalFormat) { - case GL_COMPRESSED_RGB_FXT1_3DFX: - case GL_COMPRESSED_RGBA_FXT1_3DFX: - alignment = 8; - break; - - default: - break; - } - - return alignment; -} -#endif - - -static void intel_miptree_set_image_offset(struct brw_texture *tex, - unsigned level, - unsigned img, - unsigned x, unsigned y) -{ - struct pipe_texture *pt = &tex->base; - if (img == 0 && level == 0) - assert(x == 0 && y == 0); - assert(img < tex->nr_images[level]); - - tex->image_offset[level][img] = y * tex->stride + x * pt->block.size; -} - -static void intel_miptree_set_level_info(struct brw_texture *tex, - unsigned level, - unsigned nr_images, - unsigned x, unsigned y, - unsigned w, unsigned h, unsigned d) -{ - struct pipe_texture *pt = &tex->base; - - assert(level < PIPE_MAX_TEXTURE_LEVELS); - - pt->width[level] = w; - pt->height[level] = h; - pt->depth[level] = d; - - pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w); - pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h); - - tex->level_offset[level] = y * tex->stride + x * tex->base.block.size; - tex->nr_images[level] = nr_images; - - /* - DBG("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, - level, w, h, d, x, y, tex->level_offset[level]); - */ - - /* Not sure when this would happen, but anyway: - */ - if (tex->image_offset[level]) { - FREE(tex->image_offset[level]); - tex->image_offset[level] = NULL; - } - - assert(nr_images); - assert(!tex->image_offset[level]); - - tex->image_offset[level] = (unsigned *) MALLOC(nr_images * sizeof(unsigned)); - tex->image_offset[level][0] = 0; -} - -static void i945_miptree_layout_2d(struct brw_texture *tex) -{ - struct pipe_texture *pt = &tex->base; - const int align_x = 2, align_y = 4; - unsigned level; - unsigned x = 0; - unsigned y = 0; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned nblocksx = pt->nblocksx[0]; - unsigned nblocksy = pt->nblocksy[0]; - - tex->stride = align(pt->nblocksx[0] * pt->block.size, 4); - - /* May need to adjust pitch to accomodate the placement of - * the 2nd mipmap level. This occurs when the alignment - * constraints of mipmap placement push the right edge of the - * 2nd mipmap level out past the width of its parent. - */ - if (pt->last_level > 0) { - unsigned mip1_nblocksx - = align(pf_get_nblocksx(&pt->block, minify(width)), align_x) - + pf_get_nblocksx(&pt->block, minify(minify(width))); - - if (mip1_nblocksx > nblocksx) - tex->stride = mip1_nblocksx * pt->block.size; - } - - /* Pitch must be a whole number of dwords - */ - tex->stride = align(tex->stride, 64); - tex->total_nblocksy = 0; - - for (level = 0; level <= pt->last_level; level++) { - intel_miptree_set_level_info(tex, level, 1, x, y, width, - height, 1); - - nblocksy = align(nblocksy, align_y); - - /* Because the images are packed better, the final offset - * might not be the maximal one: - */ - tex->total_nblocksy = MAX2(tex->total_nblocksy, y + nblocksy); - - /* Layout_below: step right after second mipmap level. - */ - if (level == 1) { - x += align(nblocksx, align_x); - } - else { - y += nblocksy; - } - - width = minify(width); - height = minify(height); - nblocksx = pf_get_nblocksx(&pt->block, width); - nblocksy = pf_get_nblocksy(&pt->block, height); - } -} - -static boolean brw_miptree_layout(struct brw_texture *tex) -{ - struct pipe_texture *pt = &tex->base; - /* XXX: these vary depending on image format: - */ -/* int align_w = 4; */ - - switch (pt->target) { - case PIPE_TEXTURE_CUBE: - case PIPE_TEXTURE_3D: { - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; - unsigned nblocksx = pt->nblocksx[0]; - unsigned nblocksy = pt->nblocksy[0]; - unsigned pack_x_pitch, pack_x_nr; - unsigned pack_y_pitch; - unsigned level; - unsigned align_h = 2; - unsigned align_w = 4; - - tex->total_nblocksy = 0; - - tex->stride = align(pt->nblocksx[0], 4); - pack_y_pitch = align(pt->nblocksy[0], align_h); - - pack_x_pitch = tex->stride / pt->block.size; - pack_x_nr = 1; - - for (level = 0; level <= pt->last_level; level++) { - unsigned nr_images = pt->target == PIPE_TEXTURE_3D ? depth : 6; - int x = 0; - int y = 0; - uint q, j; - - intel_miptree_set_level_info(tex, level, nr_images, - 0, tex->total_nblocksy, - width, height, depth); - - for (q = 0; q < nr_images;) { - for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { - intel_miptree_set_image_offset(tex, level, q, x, y); - x += pack_x_pitch; - } - - x = 0; - y += pack_y_pitch; - } - - - tex->total_nblocksy += y; - width = minify(width); - height = minify(height); - depth = minify(depth); - nblocksx = pf_get_nblocksx(&pt->block, width); - nblocksy = pf_get_nblocksy(&pt->block, height); - - if (pf_is_compressed(pt->format)) { - pack_y_pitch = (height + 3) / 4; - - if (pack_x_pitch > align(width, align_w)) { - pack_x_pitch = align(width, align_w); - pack_x_nr <<= 1; - } - } else { - if (pack_x_pitch > 4) { - pack_x_pitch >>= 1; - pack_x_nr <<= 1; - assert(pack_x_pitch * pack_x_nr * pt->block.size <= tex->stride); - } - - if (pack_y_pitch > 2) { - pack_y_pitch >>= 1; - pack_y_pitch = align(pack_y_pitch, align_h); - } - } - - } - break; - } - - default: - i945_miptree_layout_2d(tex); - break; - } -#if 0 - PRINT("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, - pt->pitch, - pt->total_nblocksy, - pt->block.size, - pt->stride * pt->total_nblocksy ); -#endif - - return TRUE; -} - - -static struct pipe_texture * -brw_texture_create_screen(struct pipe_screen *screen, - const struct pipe_texture *templat) -{ - struct brw_texture *tex = CALLOC_STRUCT(brw_texture); - - if (tex) { - tex->base = *templat; - pipe_reference_init(&tex->base.reference, 1); - - tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width[0]); - tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height[0]); - - if (brw_miptree_layout(tex)) - tex->buffer = screen->buffer_create(screen, 64, - PIPE_BUFFER_USAGE_PIXEL, - tex->stride * - tex->total_nblocksy); - - if (!tex->buffer) { - FREE(tex); - return NULL; - } - } - - return &tex->base; -} - - -static void -brw_texture_destroy_screen(struct pipe_texture *pt) -{ - struct brw_texture *tex = (struct brw_texture *)pt; - uint i; - - /* - DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); - */ - - pipe_buffer_reference(&tex->buffer, NULL); - - for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) - if (tex->image_offset[i]) - free(tex->image_offset[i]); - - free(tex); -} - - -static struct pipe_surface * -brw_get_tex_surface_screen(struct pipe_screen *screen, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - struct brw_texture *tex = (struct brw_texture *)pt; - struct pipe_surface *ps; - unsigned offset; /* in bytes */ - - offset = tex->level_offset[level]; - - if (pt->target == PIPE_TEXTURE_CUBE) { - offset += tex->image_offset[level][face]; - } - else if (pt->target == PIPE_TEXTURE_3D) { - offset += tex->image_offset[level][zslice]; - } - else { - assert(face == 0); - assert(zslice == 0); - } - - ps = CALLOC_STRUCT(pipe_surface); - if (ps) { - pipe_reference_init(&ps->reference, 1); - pipe_texture_reference(&ps->texture, pt); - ps->format = pt->format; - ps->width = pt->width[level]; - ps->height = pt->height[level]; - ps->block = pt->block; - ps->nblocksx = pt->nblocksx[level]; - ps->nblocksy = pt->nblocksy[level]; - ps->stride = tex->stride; - ps->offset = offset; - } - return ps; -} - - -void -brw_init_texture_functions(struct brw_context *brw) -{ -// brw->pipe.texture_update = brw_texture_update; -} - - -void -brw_init_screen_texture_funcs(struct pipe_screen *screen) -{ - screen->texture_create = brw_texture_create_screen; - screen->texture_destroy = brw_texture_destroy_screen; - screen->get_tex_surface = brw_get_tex_surface_screen; -} - diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.h b/src/gallium/drivers/i965simple/brw_tex_layout.h deleted file mode 100644 index a6b6ba8146..0000000000 --- a/src/gallium/drivers/i965simple/brw_tex_layout.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - - -#ifndef BRW_TEX_LAYOUT_H -#define BRW_TEX_LAYOUT_H - - -struct brw_context; -struct pipe_screen; - - -extern void -brw_init_texture_functions(struct brw_context *brw); - -extern void -brw_init_screen_texture_funcs(struct pipe_screen *screen); - - -#endif diff --git a/src/gallium/drivers/i965simple/brw_urb.c b/src/gallium/drivers/i965simple/brw_urb.c deleted file mode 100644 index 101a4367b9..0000000000 --- a/src/gallium/drivers/i965simple/brw_urb.c +++ /dev/null @@ -1,186 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "brw_context.h" -//#include "brw_state.h" -#include "brw_batch.h" -#include "brw_defines.h" - -#define VS 0 -#define GS 1 -#define CLP 2 -#define SF 3 -#define CS 4 - -/* XXX: Are the min_entry_size numbers useful? - * XXX: Verify min_nr_entries, esp for VS. - * XXX: Verify SF min_entry_size. - */ -static const struct { - unsigned min_nr_entries; - unsigned preferred_nr_entries; - unsigned min_entry_size; - unsigned max_entry_size; -} limits[CS+1] = { - { 8, 32, 1, 5 }, /* vs */ - { 4, 8, 1, 5 }, /* gs */ - { 6, 8, 1, 5 }, /* clp */ - { 1, 8, 1, 12 }, /* sf */ - { 1, 4, 1, 32 } /* cs */ -}; - - -static boolean check_urb_layout( struct brw_context *brw ) -{ - brw->urb.vs_start = 0; - brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize; - brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize; - brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize; - brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize; - - return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= 256; -} - -/* Most minimal update, forces re-emit of URB fence packet after GS - * unit turned on/off. - */ -static void recalculate_urb_fence( struct brw_context *brw ) -{ - unsigned csize = brw->curbe.total_size; - unsigned vsize = brw->vs.prog_data->urb_entry_size; - unsigned sfsize = brw->sf.prog_data->urb_entry_size; - - if (csize < limits[CS].min_entry_size) - csize = limits[CS].min_entry_size; - - if (vsize < limits[VS].min_entry_size) - vsize = limits[VS].min_entry_size; - - if (sfsize < limits[SF].min_entry_size) - sfsize = limits[SF].min_entry_size; - - if (brw->urb.vsize < vsize || - brw->urb.sfsize < sfsize || - brw->urb.csize < csize || - (brw->urb.constrained && (brw->urb.vsize > brw->urb.vsize || - brw->urb.sfsize > brw->urb.sfsize || - brw->urb.csize > brw->urb.csize))) { - - - brw->urb.csize = csize; - brw->urb.sfsize = sfsize; - brw->urb.vsize = vsize; - - brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; - brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries; - brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries; - brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; - brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries; - - if (!check_urb_layout(brw)) { - brw->urb.nr_vs_entries = limits[VS].min_nr_entries; - brw->urb.nr_gs_entries = limits[GS].min_nr_entries; - brw->urb.nr_clip_entries = limits[CLP].min_nr_entries; - brw->urb.nr_sf_entries = limits[SF].min_nr_entries; - brw->urb.nr_cs_entries = limits[CS].min_nr_entries; - - brw->urb.constrained = 1; - - if (!check_urb_layout(brw)) { - /* This is impossible, given the maximal sizes of urb - * entries and the values for minimum nr of entries - * provided above. - */ - debug_printf("couldn't calculate URB layout!\n"); - exit(1); - } - - if (BRW_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS)) - debug_printf("URB CONSTRAINED\n"); - } - else - brw->urb.constrained = 0; - - if (BRW_DEBUG & DEBUG_URB) - debug_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", - brw->urb.vs_start, - brw->urb.gs_start, - brw->urb.clip_start, - brw->urb.sf_start, - brw->urb.cs_start, - 256); - - brw->state.dirty.brw |= BRW_NEW_URB_FENCE; - } -} - - -const struct brw_tracked_state brw_recalculate_urb_fence = { - .dirty = { - .brw = BRW_NEW_CURBE_OFFSETS, - .cache = (CACHE_NEW_VS_PROG | - CACHE_NEW_SF_PROG) - }, - .update = recalculate_urb_fence -}; - - - - - -void brw_upload_urb_fence(struct brw_context *brw) -{ - struct brw_urb_fence uf; - memset(&uf, 0, sizeof(uf)); - - uf.header.opcode = CMD_URB_FENCE; - uf.header.length = sizeof(uf)/4-2; - uf.header.vs_realloc = 1; - uf.header.gs_realloc = 1; - uf.header.clp_realloc = 1; - uf.header.sf_realloc = 1; - uf.header.vfe_realloc = 1; - uf.header.cs_realloc = 1; - - /* The ordering below is correct, not the layout in the - * instruction. - * - * There are 256 urb reg pairs in total. - */ - uf.bits0.vs_fence = brw->urb.gs_start; - uf.bits0.gs_fence = brw->urb.clip_start; - uf.bits0.clp_fence = brw->urb.sf_start; - uf.bits1.sf_fence = brw->urb.cs_start; - uf.bits1.cs_fence = 256; - - BRW_BATCH_STRUCT(brw, &uf); -} diff --git a/src/gallium/drivers/i965simple/brw_util.c b/src/gallium/drivers/i965simple/brw_util.c deleted file mode 100644 index 42391d7c8c..0000000000 --- a/src/gallium/drivers/i965simple/brw_util.c +++ /dev/null @@ -1,104 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "brw_util.h" -#include "brw_defines.h" - -#include "pipe/p_defines.h" - -unsigned brw_count_bits( unsigned val ) -{ - unsigned i; - for (i = 0; val ; val >>= 1) - if (val & 1) - i++; - return i; -} - - -unsigned brw_translate_blend_equation( int mode ) -{ - switch (mode) { - case PIPE_BLEND_ADD: - return BRW_BLENDFUNCTION_ADD; - case PIPE_BLEND_MIN: - return BRW_BLENDFUNCTION_MIN; - case PIPE_BLEND_MAX: - return BRW_BLENDFUNCTION_MAX; - case PIPE_BLEND_SUBTRACT: - return BRW_BLENDFUNCTION_SUBTRACT; - case PIPE_BLEND_REVERSE_SUBTRACT: - return BRW_BLENDFUNCTION_REVERSE_SUBTRACT; - default: - assert(0); - return BRW_BLENDFUNCTION_ADD; - } -} - -unsigned brw_translate_blend_factor( int factor ) -{ - switch(factor) { - case PIPE_BLENDFACTOR_ZERO: - return BRW_BLENDFACTOR_ZERO; - case PIPE_BLENDFACTOR_SRC_ALPHA: - return BRW_BLENDFACTOR_SRC_ALPHA; - case PIPE_BLENDFACTOR_ONE: - return BRW_BLENDFACTOR_ONE; - case PIPE_BLENDFACTOR_SRC_COLOR: - return BRW_BLENDFACTOR_SRC_COLOR; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - return BRW_BLENDFACTOR_INV_SRC_COLOR; - case PIPE_BLENDFACTOR_DST_COLOR: - return BRW_BLENDFACTOR_DST_COLOR; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - return BRW_BLENDFACTOR_INV_DST_COLOR; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - return BRW_BLENDFACTOR_INV_SRC_ALPHA; - case PIPE_BLENDFACTOR_DST_ALPHA: - return BRW_BLENDFACTOR_DST_ALPHA; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - return BRW_BLENDFACTOR_INV_DST_ALPHA; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE; - case PIPE_BLENDFACTOR_CONST_COLOR: - return BRW_BLENDFACTOR_CONST_COLOR; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - return BRW_BLENDFACTOR_INV_CONST_COLOR; - case PIPE_BLENDFACTOR_CONST_ALPHA: - return BRW_BLENDFACTOR_CONST_ALPHA; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - return BRW_BLENDFACTOR_INV_CONST_ALPHA; - default: - assert(0); - return BRW_BLENDFACTOR_ZERO; - } -} diff --git a/src/gallium/drivers/i965simple/brw_util.h b/src/gallium/drivers/i965simple/brw_util.h deleted file mode 100644 index d60e5934db..0000000000 --- a/src/gallium/drivers/i965simple/brw_util.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#ifndef BRW_UTIL_H -#define BRW_UTIL_H - -#include "pipe/p_state.h" - -extern unsigned brw_count_bits( unsigned val ); -extern unsigned brw_translate_blend_factor( int factor ); -extern unsigned brw_translate_blend_equation( int mode ); - - -#endif diff --git a/src/gallium/drivers/i965simple/brw_vs.c b/src/gallium/drivers/i965simple/brw_vs.c deleted file mode 100644 index 92327e896d..0000000000 --- a/src/gallium/drivers/i965simple/brw_vs.c +++ /dev/null @@ -1,120 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "brw_context.h" -#include "brw_vs.h" -#include "brw_util.h" -#include "brw_state.h" - - -static void do_vs_prog( struct brw_context *brw, - const struct brw_vertex_program *vp, - struct brw_vs_prog_key *key ) -{ - unsigned program_size; - const unsigned *program; - struct brw_vs_compile c; - - memset(&c, 0, sizeof(c)); - memcpy(&c.key, key, sizeof(*key)); - - brw_init_compile(&c.func); - c.vp = vp; - - c.prog_data.outputs_written = vp->info.num_outputs; - c.prog_data.inputs_read = vp->info.num_inputs; - -#if 0 - if (c.key.copy_edgeflag) { - c.prog_data.outputs_written |= 1<<VERT_RESULT_EDGE; - c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG; - } -#endif - - /* Emit GEN4 code. - */ - brw_vs_emit(&c); - - /* get the program - */ - program = brw_get_program(&c.func, &program_size); - - /* - */ - brw->vs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_VS_PROG], - &c.key, - sizeof(c.key), - program, - program_size, - &c.prog_data, - &brw->vs.prog_data); -} - - -static void brw_upload_vs_prog( struct brw_context *brw ) -{ - struct brw_vs_prog_key key; - const struct brw_vertex_program *vp = brw->attribs.VertexProgram; - - assert(vp); - - memset(&key, 0, sizeof(key)); - - /* Just upload the program verbatim for now. Always send it all - * the inputs it asks for, whether they are varying or not. - */ - key.program_string_id = vp->id; - key.nr_userclip = brw->attribs.Clip.nr; - key.copy_edgeflag = (brw->attribs.Raster->fill_cw != PIPE_POLYGON_MODE_FILL || - brw->attribs.Raster->fill_ccw != PIPE_POLYGON_MODE_FILL); - - /* Make an early check for the key. - */ - if (brw_search_cache(&brw->cache[BRW_VS_PROG], - &key, sizeof(key), - &brw->vs.prog_data, - &brw->vs.prog_gs_offset)) - return; - - do_vs_prog(brw, vp, &key); -} - - -/* See brw_vs.c: - */ -const struct brw_tracked_state brw_vs_prog = { - .dirty = { - .brw = BRW_NEW_VS, - .cache = 0 - }, - .update = brw_upload_vs_prog -}; diff --git a/src/gallium/drivers/i965simple/brw_vs.h b/src/gallium/drivers/i965simple/brw_vs.h deleted file mode 100644 index 070f9dfcae..0000000000 --- a/src/gallium/drivers/i965simple/brw_vs.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#ifndef BRW_VS_H -#define BRW_VS_H - - -#include "brw_context.h" -#include "brw_eu.h" - - -struct brw_vs_prog_key { - unsigned program_string_id; - unsigned nr_userclip:4; - unsigned copy_edgeflag:1; - unsigned know_w_is_one:1; - unsigned pad:26; -}; - - -struct brw_vs_compile { - struct brw_compile func; - struct brw_vs_prog_key key; - struct brw_vs_prog_data prog_data; - - const struct brw_vertex_program *vp; - - unsigned nr_inputs; - - unsigned first_output; - unsigned nr_outputs; - - unsigned first_tmp; - unsigned last_tmp; - - struct brw_reg r0; - struct brw_reg r1; - struct brw_reg regs[12][128]; - struct brw_reg tmp; - struct brw_reg stack; - - struct { - boolean used_in_src; - struct brw_reg reg; - } output_regs[128]; - - struct brw_reg userplane[6]; - -}; - -void brw_vs_emit( struct brw_vs_compile *c ); - -#endif diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c deleted file mode 100644 index 3ee82d95b3..0000000000 --- a/src/gallium/drivers/i965simple/brw_vs_emit.c +++ /dev/null @@ -1,1330 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "brw_context.h" -#include "brw_vs.h" - -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" - -struct brw_prog_info { - unsigned num_temps; - unsigned num_addrs; - unsigned num_consts; - - unsigned writes_psize; - - unsigned pos_idx; - unsigned result_edge_idx; - unsigned edge_flag_idx; - unsigned psize_idx; -}; - -/* Do things as simply as possible. Allocate and populate all regs - * ahead of time. - */ -static void brw_vs_alloc_regs( struct brw_vs_compile *c, - struct brw_prog_info *info ) -{ - unsigned i, reg = 0, mrf; - unsigned nr_params; - - /* r0 -- reserved as usual - */ - c->r0 = brw_vec8_grf(reg, 0); reg++; - - /* User clip planes from curbe: - */ - if (c->key.nr_userclip) { - for (i = 0; i < c->key.nr_userclip; i++) { - c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1); - } - - /* Deal with curbe alignment: - */ - reg += ((6+c->key.nr_userclip+3)/4)*2; - } - - /* Vertex program parameters from curbe: - */ - nr_params = c->prog_data.max_const; - for (i = 0; i < nr_params; i++) { - c->regs[TGSI_FILE_CONSTANT][i] = stride(brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); - } - reg += (nr_params+1)/2; - c->prog_data.curb_read_length = reg - 1; - - - - /* Allocate input regs: - */ - c->nr_inputs = c->vp->info.num_inputs; - for (i = 0; i < c->nr_inputs; i++) { - c->regs[TGSI_FILE_INPUT][i] = brw_vec8_grf(reg, 0); - reg++; - } - - - /* Allocate outputs: TODO: could organize the non-position outputs - * to go straight into message regs. - */ - c->nr_outputs = 0; - c->first_output = reg; - mrf = 4; - for (i = 0; i < c->vp->info.num_outputs; i++) { - c->nr_outputs++; -#if 0 - if (i == VERT_RESULT_HPOS) { - c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); - reg++; - } - else if (i == VERT_RESULT_PSIZ) { - c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); - reg++; - mrf++; /* just a placeholder? XXX fix later stages & remove this */ - } - else { - c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf); - mrf++; - } -#else - /*treat pos differently for now */ - if (i == info->pos_idx) { - c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); - reg++; - } else { - c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf); - mrf++; - } -#endif - } - - /* Allocate program temporaries: - */ - for (i = 0; i < info->num_temps; i++) { - c->regs[TGSI_FILE_TEMPORARY][i] = brw_vec8_grf(reg, 0); - reg++; - } - - /* Address reg(s). Don't try to use the internal address reg until - * deref time. - */ - for (i = 0; i < info->num_addrs; i++) { - c->regs[TGSI_FILE_ADDRESS][i] = brw_reg(BRW_GENERAL_REGISTER_FILE, - reg, - 0, - BRW_REGISTER_TYPE_D, - BRW_VERTICAL_STRIDE_8, - BRW_WIDTH_8, - BRW_HORIZONTAL_STRIDE_1, - BRW_SWIZZLE_XXXX, - TGSI_WRITEMASK_X); - reg++; - } - - for (i = 0; i < 128; i++) { - if (c->output_regs[i].used_in_src) { - c->output_regs[i].reg = brw_vec8_grf(reg, 0); - reg++; - } - } - - c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); - reg += 2; - - - /* Some opcodes need an internal temporary: - */ - c->first_tmp = reg; - c->last_tmp = reg; /* for allocation purposes */ - - /* Each input reg holds data from two vertices. The - * urb_read_length is the number of registers read from *each* - * vertex urb, so is half the amount: - */ - c->prog_data.urb_read_length = (c->nr_inputs+1)/2; - - c->prog_data.urb_entry_size = (c->nr_outputs+2+3)/4; - c->prog_data.total_grf = reg; -} - - -static struct brw_reg get_tmp( struct brw_vs_compile *c ) -{ - struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0); - - if (++c->last_tmp > c->prog_data.total_grf) - c->prog_data.total_grf = c->last_tmp; - - return tmp; -} - -static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp ) -{ - if (tmp.nr == c->last_tmp-1) - c->last_tmp--; -} - -static void release_tmps( struct brw_vs_compile *c ) -{ - c->last_tmp = c->first_tmp; -} - - -static void unalias1( struct brw_vs_compile *c, - struct brw_reg dst, - struct brw_reg arg0, - void (*func)( struct brw_vs_compile *, - struct brw_reg, - struct brw_reg )) -{ - if (dst.file == arg0.file && dst.nr == arg0.nr) { - struct brw_compile *p = &c->func; - struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask); - func(c, tmp, arg0); - brw_MOV(p, dst, tmp); - } - else { - func(c, dst, arg0); - } -} - -static void unalias2( struct brw_vs_compile *c, - struct brw_reg dst, - struct brw_reg arg0, - struct brw_reg arg1, - void (*func)( struct brw_vs_compile *, - struct brw_reg, - struct brw_reg, - struct brw_reg )) -{ - if ((dst.file == arg0.file && dst.nr == arg0.nr) || - (dst.file == arg1.file && dst.nr == arg1.nr)) { - struct brw_compile *p = &c->func; - struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask); - func(c, tmp, arg0, arg1); - brw_MOV(p, dst, tmp); - } - else { - func(c, dst, arg0, arg1); - } -} - -static void emit_sop( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg arg0, - struct brw_reg arg1, - unsigned cond) -{ - brw_push_insn_state(p); - brw_CMP(p, brw_null_reg(), cond, arg0, arg1); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_MOV(p, dst, brw_imm_f(1.0f)); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, brw_imm_f(0.0f)); - brw_pop_insn_state(p); -} - -static void emit_seq( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg arg0, - struct brw_reg arg1 ) -{ - emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ); -} - -static void emit_sne( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg arg0, - struct brw_reg arg1 ) -{ - emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ); -} -static void emit_slt( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg arg0, - struct brw_reg arg1 ) -{ - emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L); -} - -static void emit_sle( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg arg0, - struct brw_reg arg1 ) -{ - emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE); -} - -static void emit_sgt( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg arg0, - struct brw_reg arg1 ) -{ - emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G); -} - -static void emit_sge( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg arg0, - struct brw_reg arg1 ) -{ - emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE); -} - -static void emit_max( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg arg0, - struct brw_reg arg1 ) -{ - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); - brw_SEL(p, dst, arg1, arg0); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); -} - -static void emit_min( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg arg0, - struct brw_reg arg1 ) -{ - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); - brw_SEL(p, dst, arg0, arg1); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); -} - - -static void emit_math1( struct brw_vs_compile *c, - unsigned function, - struct brw_reg dst, - struct brw_reg arg0, - unsigned precision) -{ - /* There are various odd behaviours with SEND on the simulator. In - * addition there are documented issues with the fact that the GEN4 - * processor doesn't do dependency control properly on SEND - * results. So, on balance, this kludge to get around failures - * with writemasked math results looks like it might be necessary - * whether that turns out to be a simulator bug or not: - */ - struct brw_compile *p = &c->func; - struct brw_reg tmp = dst; - boolean need_tmp = (dst.dw1.bits.writemask != 0xf || - dst.file != BRW_GENERAL_REGISTER_FILE); - - if (need_tmp) - tmp = get_tmp(c); - - brw_math(p, - tmp, - function, - BRW_MATH_SATURATE_NONE, - 2, - arg0, - BRW_MATH_DATA_SCALAR, - precision); - - if (need_tmp) { - brw_MOV(p, dst, tmp); - release_tmp(c, tmp); - } -} - -static void emit_math2( struct brw_vs_compile *c, - unsigned function, - struct brw_reg dst, - struct brw_reg arg0, - struct brw_reg arg1, - unsigned precision) -{ - struct brw_compile *p = &c->func; - struct brw_reg tmp = dst; - boolean need_tmp = (dst.dw1.bits.writemask != 0xf || - dst.file != BRW_GENERAL_REGISTER_FILE); - - if (need_tmp) - tmp = get_tmp(c); - - brw_MOV(p, brw_message_reg(3), arg1); - - brw_math(p, - tmp, - function, - BRW_MATH_SATURATE_NONE, - 2, - arg0, - BRW_MATH_DATA_SCALAR, - precision); - - if (need_tmp) { - brw_MOV(p, dst, tmp); - release_tmp(c, tmp); - } -} - - - -static void emit_exp_noalias( struct brw_vs_compile *c, - struct brw_reg dst, - struct brw_reg arg0 ) -{ - struct brw_compile *p = &c->func; - - - if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X) { - struct brw_reg tmp = get_tmp(c); - struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D); - - /* tmp_d = floor(arg0.x) */ - brw_RNDD(p, tmp_d, brw_swizzle1(arg0, 0)); - - /* result[0] = 2.0 ^ tmp */ - - /* Adjust exponent for floating point: - * exp += 127 - */ - brw_ADD(p, brw_writemask(tmp_d, TGSI_WRITEMASK_X), tmp_d, brw_imm_d(127)); - - /* Install exponent and sign. - * Excess drops off the edge: - */ - brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), TGSI_WRITEMASK_X), - tmp_d, brw_imm_d(23)); - - release_tmp(c, tmp); - } - - if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y) { - /* result[1] = arg0.x - floor(arg0.x) */ - brw_FRC(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0, 0)); - } - - if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) { - /* As with the LOG instruction, we might be better off just - * doing a taylor expansion here, seeing as we have to do all - * the prep work. - * - * If mathbox partial precision is too low, consider also: - * result[3] = result[0] * EXP(result[1]) - */ - emit_math1(c, - BRW_MATH_FUNCTION_EXP, - brw_writemask(dst, TGSI_WRITEMASK_Z), - brw_swizzle1(arg0, 0), - BRW_MATH_PRECISION_PARTIAL); - } - - if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) { - /* result[3] = 1.0; */ - brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), brw_imm_f(1)); - } -} - - -static void emit_log_noalias( struct brw_vs_compile *c, - struct brw_reg dst, - struct brw_reg arg0 ) -{ - struct brw_compile *p = &c->func; - struct brw_reg tmp = dst; - struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD); - struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD); - boolean need_tmp = (dst.dw1.bits.writemask != 0xf || - dst.file != BRW_GENERAL_REGISTER_FILE); - - if (need_tmp) { - tmp = get_tmp(c); - tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD); - } - - /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt - * according to spec: - * - * These almost look likey they could be joined up, but not really - * practical: - * - * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127 - * result[1].i = (x.i & ((1<<23)-1) + (127<<23) - */ - if (dst.dw1.bits.writemask & TGSI_WRITEMASK_XZ) { - brw_AND(p, - brw_writemask(tmp_ud, TGSI_WRITEMASK_X), - brw_swizzle1(arg0_ud, 0), - brw_imm_ud((1U<<31)-1)); - - brw_SHR(p, - brw_writemask(tmp_ud, TGSI_WRITEMASK_X), - tmp_ud, - brw_imm_ud(23)); - - brw_ADD(p, - brw_writemask(tmp, TGSI_WRITEMASK_X), - retype(tmp_ud, BRW_REGISTER_TYPE_D), /* does it matter? */ - brw_imm_d(-127)); - } - - if (dst.dw1.bits.writemask & TGSI_WRITEMASK_YZ) { - brw_AND(p, - brw_writemask(tmp_ud, TGSI_WRITEMASK_Y), - brw_swizzle1(arg0_ud, 0), - brw_imm_ud((1<<23)-1)); - - brw_OR(p, - brw_writemask(tmp_ud, TGSI_WRITEMASK_Y), - tmp_ud, - brw_imm_ud(127<<23)); - } - - if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) { - /* result[2] = result[0] + LOG2(result[1]); */ - - /* Why bother? The above is just a hint how to do this with a - * taylor series. Maybe we *should* use a taylor series as by - * the time all the above has been done it's almost certainly - * quicker than calling the mathbox, even with low precision. - * - * Options are: - * - result[0] + mathbox.LOG2(result[1]) - * - mathbox.LOG2(arg0.x) - * - result[0] + inline_taylor_approx(result[1]) - */ - emit_math1(c, - BRW_MATH_FUNCTION_LOG, - brw_writemask(tmp, TGSI_WRITEMASK_Z), - brw_swizzle1(tmp, 1), - BRW_MATH_PRECISION_FULL); - - brw_ADD(p, - brw_writemask(tmp, TGSI_WRITEMASK_Z), - brw_swizzle1(tmp, 2), - brw_swizzle1(tmp, 0)); - } - - if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) { - /* result[3] = 1.0; */ - brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_W), brw_imm_f(1)); - } - - if (need_tmp) { - brw_MOV(p, dst, tmp); - release_tmp(c, tmp); - } -} - - - - -/* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1 - */ -static void emit_dst_noalias( struct brw_vs_compile *c, - struct brw_reg dst, - struct brw_reg arg0, - struct brw_reg arg1) -{ - struct brw_compile *p = &c->func; - - /* There must be a better way to do this: - */ - if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X) - brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_X), brw_imm_f(1.0)); - if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y) - brw_MUL(p, brw_writemask(dst, TGSI_WRITEMASK_Y), arg0, arg1); - if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) - brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Z), arg0); - if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) - brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), arg1); -} - -static void emit_xpd( struct brw_compile *p, - struct brw_reg dst, - struct brw_reg t, - struct brw_reg u) -{ - brw_MUL(p, brw_null_reg(), brw_swizzle(t, 1,2,0,3), brw_swizzle(u,2,0,1,3)); - brw_MAC(p, dst, negate(brw_swizzle(t, 2,0,1,3)), brw_swizzle(u,1,2,0,3)); -} - - - -static void emit_lit_noalias( struct brw_vs_compile *c, - struct brw_reg dst, - struct brw_reg arg0 ) -{ - struct brw_compile *p = &c->func; - struct brw_instruction *if_insn; - struct brw_reg tmp = dst; - boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE); - - if (need_tmp) - tmp = get_tmp(c); - - brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_YZ), brw_imm_f(0)); - brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_XW), brw_imm_f(1)); - - /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order - * to get all channels active inside the IF. In the clipping code - * we run with NoMask, so it's not an option and we can use - * BRW_EXECUTE_1 for all comparisions. - */ - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0)); - if_insn = brw_IF(p, BRW_EXECUTE_8); - { - brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0,0)); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0)); - brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_Z), brw_swizzle1(arg0,1)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - emit_math2(c, - BRW_MATH_FUNCTION_POW, - brw_writemask(dst, TGSI_WRITEMASK_Z), - brw_swizzle1(tmp, 2), - brw_swizzle1(arg0, 3), - BRW_MATH_PRECISION_PARTIAL); - } - - brw_ENDIF(p, if_insn); -} - - - - - -/* TODO: relative addressing! - */ -static struct brw_reg get_reg( struct brw_vs_compile *c, - unsigned file, - unsigned index ) -{ - switch (file) { - case TGSI_FILE_TEMPORARY: - case TGSI_FILE_INPUT: - case TGSI_FILE_OUTPUT: - assert(c->regs[file][index].nr != 0); - return c->regs[file][index]; - case TGSI_FILE_CONSTANT: - assert(c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm].nr != 0); - return c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm]; - case TGSI_FILE_IMMEDIATE: - assert(c->regs[TGSI_FILE_CONSTANT][index].nr != 0); - return c->regs[TGSI_FILE_CONSTANT][index]; - case TGSI_FILE_ADDRESS: - assert(index == 0); - return c->regs[file][index]; - - case TGSI_FILE_NULL: /* undef values */ - return brw_null_reg(); - - default: - assert(0); - return brw_null_reg(); - } -} - - - -static struct brw_reg deref( struct brw_vs_compile *c, - struct brw_reg arg, - int offset) -{ - struct brw_compile *p = &c->func; - struct brw_reg tmp = vec4(get_tmp(c)); - struct brw_reg vp_address = retype(vec1(get_reg(c, TGSI_FILE_ADDRESS, 0)), BRW_REGISTER_TYPE_UW); - unsigned byte_offset = arg.nr * 32 + arg.subnr + offset * 16; - struct brw_reg indirect = brw_vec4_indirect(0,0); - - { - brw_push_insn_state(p); - brw_set_access_mode(p, BRW_ALIGN_1); - - /* This is pretty clunky - load the address register twice and - * fetch each 4-dword value in turn. There must be a way to do - * this in a single pass, but I couldn't get it to work. - */ - brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset)); - brw_MOV(p, tmp, indirect); - - brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset)); - brw_MOV(p, suboffset(tmp, 4), indirect); - - brw_pop_insn_state(p); - } - - return vec8(tmp); -} - - -static void emit_arl( struct brw_vs_compile *c, - struct brw_reg dst, - struct brw_reg arg0 ) -{ - struct brw_compile *p = &c->func; - struct brw_reg tmp = dst; - boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE); - - if (need_tmp) - tmp = get_tmp(c); - - brw_RNDD(p, tmp, arg0); - brw_MUL(p, dst, tmp, brw_imm_d(16)); - - if (need_tmp) - release_tmp(c, tmp); -} - - -/* Will return mangled results for SWZ op. The emit_swz() function - * ignores this result and recalculates taking extended swizzles into - * account. - */ -static struct brw_reg get_arg( struct brw_vs_compile *c, - struct tgsi_src_register *src ) -{ - struct brw_reg reg; - - if (src->File == TGSI_FILE_NULL) - return brw_null_reg(); - -#if 0 - if (src->RelAddr) - reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index); - else -#endif - reg = get_reg(c, src->File, src->Index); - - /* Convert 3-bit swizzle to 2-bit. - */ - reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->SwizzleX, - src->SwizzleY, - src->SwizzleZ, - src->SwizzleW); - - /* Note this is ok for non-swizzle instructions: - */ - reg.negate = src->Negate ? 1 : 0; - - return reg; -} - - -static struct brw_reg get_dst( struct brw_vs_compile *c, - const struct tgsi_dst_register *dst ) -{ - struct brw_reg reg = get_reg(c, dst->File, dst->Index); - - reg.dw1.bits.writemask = dst->WriteMask; - - return reg; -} - - - - -static void emit_swz( struct brw_vs_compile *c, - struct brw_reg dst, - struct tgsi_src_register src ) -{ - struct brw_compile *p = &c->func; - unsigned zeros_mask = 0; - unsigned ones_mask = 0; - unsigned src_mask = 0; - ubyte src_swz[4]; - boolean need_tmp = (src.Negate && - dst.file != BRW_GENERAL_REGISTER_FILE); - struct brw_reg tmp = dst; - unsigned i; - - if (need_tmp) - tmp = get_tmp(c); - - for (i = 0; i < 4; i++) { - if (dst.dw1.bits.writemask & (1<<i)) { - ubyte s = 0; - switch(i) { - case 0: - s = src.SwizzleX; - break; - s = src.SwizzleY; - case 1: - break; - s = src.SwizzleZ; - case 2: - break; - s = src.SwizzleW; - case 3: - break; - } - switch (s) { - case TGSI_SWIZZLE_X: - case TGSI_SWIZZLE_Y: - case TGSI_SWIZZLE_Z: - case TGSI_SWIZZLE_W: - src_mask |= 1<<i; - src_swz[i] = s; - break; - case TGSI_EXTSWIZZLE_ZERO: - zeros_mask |= 1<<i; - break; - case TGSI_EXTSWIZZLE_ONE: - ones_mask |= 1<<i; - break; - } - } - } - - /* Do src first, in case dst aliases src: - */ - if (src_mask) { - struct brw_reg arg0; - -#if 0 - if (src.RelAddr) - arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index); - else -#endif - arg0 = get_reg(c, src.File, src.Index); - - arg0 = brw_swizzle(arg0, - src_swz[0], src_swz[1], - src_swz[2], src_swz[3]); - - brw_MOV(p, brw_writemask(tmp, src_mask), arg0); - } - - if (zeros_mask) - brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0)); - - if (ones_mask) - brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1)); - - if (src.Negate) - brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp)); - - if (need_tmp) { - brw_MOV(p, dst, tmp); - release_tmp(c, tmp); - } -} - - - -/* Post-vertex-program processing. Send the results to the URB. - */ -static void emit_vertex_write( struct brw_vs_compile *c, struct brw_prog_info *info) -{ - struct brw_compile *p = &c->func; - struct brw_reg m0 = brw_message_reg(0); - struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][info->pos_idx]; - struct brw_reg ndc; - - if (c->key.copy_edgeflag) { - brw_MOV(p, - get_reg(c, TGSI_FILE_OUTPUT, info->result_edge_idx), - get_reg(c, TGSI_FILE_INPUT, info->edge_flag_idx)); - } - - - /* Build ndc coords? TODO: Shortcircuit when w is known to be one. - */ - if (!c->key.know_w_is_one) { - ndc = get_tmp(c); - emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); - brw_MUL(p, brw_writemask(ndc, TGSI_WRITEMASK_XYZ), pos, ndc); - } - else { - ndc = pos; - } - - /* This includes the workaround for -ve rhw, so is no longer an - * optional step: - */ - if (info->writes_psize || - c->key.nr_userclip || - !c->key.know_w_is_one) - { - struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); - unsigned i; - - brw_MOV(p, header1, brw_imm_ud(0)); - - brw_set_access_mode(p, BRW_ALIGN_16); - - if (info->writes_psize) { - struct brw_reg psiz = c->regs[TGSI_FILE_OUTPUT][info->psize_idx]; - brw_MUL(p, brw_writemask(header1, TGSI_WRITEMASK_W), - brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); - brw_AND(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, - brw_imm_ud(0x7ff<<8)); - } - - - for (i = 0; i < c->key.nr_userclip; i++) { - brw_set_conditionalmod(p, BRW_CONDITIONAL_L); - brw_DP4(p, brw_null_reg(), pos, c->userplane[i]); - brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<i)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - } - - - /* i965 clipping workaround: - * 1) Test for -ve rhw - * 2) If set, - * set ndc = (0,0,0,0) - * set ucp[6] = 1 - * - * Later, clipping will detect ucp[6] and ensure the primitive is - * clipped against all fixed planes. - */ - if (!c->key.know_w_is_one) { - brw_CMP(p, - vec8(brw_null_reg()), - BRW_CONDITIONAL_L, - brw_swizzle1(ndc, 3), - brw_imm_f(0)); - - brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<6)); - brw_MOV(p, ndc, brw_imm_f(0)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - } - - brw_set_access_mode(p, BRW_ALIGN_1); /* why? */ - brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1); - brw_set_access_mode(p, BRW_ALIGN_16); - - release_tmp(c, header1); - } - else { - brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); - } - - - /* Emit the (interleaved) headers for the two vertices - an 8-reg - * of zeros followed by two sets of NDC coordinates: - */ - brw_set_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, offset(m0, 2), ndc); - brw_MOV(p, offset(m0, 3), pos); - - - brw_urb_WRITE(p, - brw_null_reg(), /* dest */ - 0, /* starting mrf reg nr */ - c->r0, /* src */ - 0, /* allocate */ - 1, /* used */ - c->nr_outputs + 3, /* msg len */ - 0, /* response len */ - 1, /* eot */ - 1, /* writes complete */ - 0, /* urb destination offset */ - BRW_URB_SWIZZLE_INTERLEAVE); - -} - -static void -post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst ) -{ - struct tgsi_parse_context parse; - const struct tgsi_token *tokens = c->vp->program.tokens; - tgsi_parse_init(&parse, tokens); - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { -#if 0 - struct brw_instruction *brw_inst1, *brw_inst2; - const struct tgsi_full_instruction *inst1, *inst2; - int offset; - inst1 = &parse.FullToken.FullInstruction; - brw_inst1 = inst1->Data; - switch (inst1->Opcode) { - case TGSI_OPCODE_CAL: - case TGSI_OPCODE_BRA: - target_insn = inst1->BranchTarget; - inst2 = &c->vp->program.Base.Instructions[target_insn]; - brw_inst2 = inst2->Data; - offset = brw_inst2 - brw_inst1; - brw_set_src1(brw_inst1, brw_imm_d(offset*16)); - break; - case TGSI_OPCODE_END: - offset = end_inst - brw_inst1; - brw_set_src1(brw_inst1, brw_imm_d(offset*16)); - break; - default: - break; - } -#endif - } - } - tgsi_parse_free(&parse); -} - -static void process_declaration(const struct tgsi_full_declaration *decl, - struct brw_prog_info *info) -{ - int first = decl->DeclarationRange.First; - int last = decl->DeclarationRange.Last; - - switch(decl->Declaration.File) { - case TGSI_FILE_CONSTANT: - info->num_consts += last - first + 1; - break; - case TGSI_FILE_INPUT: { - } - break; - case TGSI_FILE_OUTPUT: { - assert(last == first); /* for now */ - if (decl->Declaration.Semantic) { - switch (decl->Semantic.SemanticName) { - case TGSI_SEMANTIC_POSITION: { - info->pos_idx = first; - } - break; - case TGSI_SEMANTIC_COLOR: - break; - case TGSI_SEMANTIC_BCOLOR: - break; - case TGSI_SEMANTIC_FOG: - break; - case TGSI_SEMANTIC_PSIZE: { - info->writes_psize = TRUE; - info->psize_idx = first; - } - break; - case TGSI_SEMANTIC_GENERIC: - break; - } - } - } - break; - case TGSI_FILE_TEMPORARY: { - info->num_temps += (last - first) + 1; - } - break; - case TGSI_FILE_SAMPLER: { - } - break; - case TGSI_FILE_ADDRESS: { - info->num_addrs += (last - first) + 1; - } - break; - case TGSI_FILE_IMMEDIATE: { - } - break; - case TGSI_FILE_NULL: { - } - break; - } -} - -static void process_instruction(struct brw_vs_compile *c, - struct tgsi_full_instruction *inst, - struct brw_prog_info *info) -{ - struct brw_reg args[3], dst; - struct brw_compile *p = &c->func; - /*struct brw_indirect stack_index = brw_indirect(0, 0);*/ - unsigned i; - unsigned index; - unsigned file; - /*FIXME: might not be the only one*/ - const struct tgsi_dst_register *dst_reg = &inst->FullDstRegisters[0].DstRegister; - /* - struct brw_instruction *if_inst[MAX_IFSN]; - unsigned insn, if_insn = 0; - */ - - for (i = 0; i < 3; i++) { - struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; - index = src->SrcRegister.Index; - file = src->SrcRegister.File; - if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src) - args[i] = c->output_regs[index].reg; - else - args[i] = get_arg(c, &src->SrcRegister); - } - - /* Get dest regs. Note that it is possible for a reg to be both - * dst and arg, given the static allocation of registers. So - * care needs to be taken emitting multi-operation instructions. - */ - index = dst_reg->Index; - file = dst_reg->File; - if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src) - dst = c->output_regs[index].reg; - else - dst = get_dst(c, dst_reg); - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ABS: - brw_MOV(p, dst, brw_abs(args[0])); - break; - case TGSI_OPCODE_ADD: - brw_ADD(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_DP3: - brw_DP3(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_DP4: - brw_DP4(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_DPH: - brw_DPH(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_DST: - unalias2(c, dst, args[0], args[1], emit_dst_noalias); - break; - case TGSI_OPCODE_EXP: - unalias1(c, dst, args[0], emit_exp_noalias); - break; - case TGSI_OPCODE_EX2: - emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL); - break; - case TGSI_OPCODE_ARL: - emit_arl(c, dst, args[0]); - break; - case TGSI_OPCODE_FLR: - brw_RNDD(p, dst, args[0]); - break; - case TGSI_OPCODE_FRC: - brw_FRC(p, dst, args[0]); - break; - case TGSI_OPCODE_LOG: - unalias1(c, dst, args[0], emit_log_noalias); - break; - case TGSI_OPCODE_LG2: - emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL); - break; - case TGSI_OPCODE_LIT: - unalias1(c, dst, args[0], emit_lit_noalias); - break; - case TGSI_OPCODE_MAD: - brw_MOV(p, brw_acc_reg(), args[2]); - brw_MAC(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_MAX: - emit_max(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_MIN: - emit_min(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: -#if 0 - /* The args[0] value can't be used here as it won't have - * correctly encoded the full swizzle: - */ - emit_swz(c, dst, inst->SrcReg[0] ); -#endif - brw_MOV(p, dst, args[0]); - break; - case TGSI_OPCODE_MUL: - brw_MUL(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_POW: - emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL); - break; - case TGSI_OPCODE_RCP: - emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL); - break; - case TGSI_OPCODE_RSQ: - emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL); - break; - - case TGSI_OPCODE_SEQ: - emit_seq(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_SNE: - emit_sne(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_SGE: - emit_sge(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_SGT: - emit_sgt(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_SLT: - emit_slt(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_SLE: - emit_sle(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_SUB: - brw_ADD(p, dst, args[0], negate(args[1])); - break; - case TGSI_OPCODE_XPD: - emit_xpd(p, dst, args[0], args[1]); - break; -#if 0 - case TGSI_OPCODE_IF: - assert(if_insn < MAX_IFSN); - if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); - break; - case TGSI_OPCODE_ELSE: - if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); - break; - case TGSI_OPCODE_ENDIF: - assert(if_insn > 0); - brw_ENDIF(p, if_inst[--if_insn]); - break; - case TGSI_OPCODE_BRA: - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); - brw_set_predicate_control_flag_value(p, 0xff); - break; - case TGSI_OPCODE_CAL: - brw_set_access_mode(p, BRW_ALIGN_1); - brw_ADD(p, deref_1uw(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); - brw_set_access_mode(p, BRW_ALIGN_16); - brw_ADD(p, get_addr_reg(stack_index), - get_addr_reg(stack_index), brw_imm_d(4)); - inst->Data = &p->store[p->nr_insn]; - brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); - break; -#endif - case TGSI_OPCODE_RET: -#if 0 - brw_ADD(p, get_addr_reg(stack_index), - get_addr_reg(stack_index), brw_imm_d(-4)); - brw_set_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, brw_ip_reg(), deref_1uw(stack_index, 0)); - brw_set_access_mode(p, BRW_ALIGN_16); -#else - /*brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));*/ -#endif - break; - case TGSI_OPCODE_END: - brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); - break; - case TGSI_OPCODE_BGNSUB: - case TGSI_OPCODE_ENDSUB: - break; - default: - debug_printf("Unsupport opcode %d in vertex shader\n", inst->Instruction.Opcode); - break; - } - - if (dst_reg->File == TGSI_FILE_OUTPUT - && dst_reg->Index != info->pos_idx - && c->output_regs[dst_reg->Index].used_in_src) - brw_MOV(p, get_dst(c, dst_reg), dst); - - release_tmps(c); -} - -/* Emit the fragment program instructions here. - */ -void brw_vs_emit(struct brw_vs_compile *c) -{ -#define MAX_IFSN 32 - struct brw_compile *p = &c->func; - struct brw_instruction *end_inst; - struct tgsi_parse_context parse; - struct brw_indirect stack_index = brw_indirect(0, 0); - const struct tgsi_token *tokens = c->vp->program.tokens; - struct brw_prog_info prog_info; - unsigned allocated_registers = 0; - memset(&prog_info, 0, sizeof(struct brw_prog_info)); - - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_set_access_mode(p, BRW_ALIGN_16); - - tgsi_parse_init(&parse, tokens); - /* Message registers can't be read, so copy the output into GRF register - if they are used in source registers */ - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - unsigned i; - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_INSTRUCTION: { - const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; - for (i = 0; i < 3; ++i) { - const struct tgsi_src_register *src = &inst->FullSrcRegisters[i].SrcRegister; - unsigned index = src->Index; - unsigned file = src->File; - if (file == TGSI_FILE_OUTPUT) - c->output_regs[index].used_in_src = TRUE; - } - } - break; - default: - /* nothing */ - break; - } - } - tgsi_parse_free(&parse); - - tgsi_parse_init(&parse, tokens); - - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: { - struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; - process_declaration(decl, &prog_info); - } - break; - case TGSI_TOKEN_TYPE_IMMEDIATE: { - struct tgsi_full_immediate *imm = &parse.FullToken.FullImmediate; - assert(imm->Immediate.NrTokens == 4 + 1); - c->prog_data.imm_buf[c->prog_data.num_imm][0] = imm->u[0].Float; - c->prog_data.imm_buf[c->prog_data.num_imm][1] = imm->u[1].Float; - c->prog_data.imm_buf[c->prog_data.num_imm][2] = imm->u[2].Float; - c->prog_data.imm_buf[c->prog_data.num_imm][3] = imm->u[3].Float; - c->prog_data.num_imm++; - } - break; - case TGSI_TOKEN_TYPE_INSTRUCTION: { - struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; - if (!allocated_registers) { - /* first instruction (declerations finished). - * now that we know what vars are being used allocate - * registers for them.*/ - c->prog_data.num_consts = prog_info.num_consts; - c->prog_data.max_const = prog_info.num_consts + c->prog_data.num_imm; - brw_vs_alloc_regs(c, &prog_info); - - brw_set_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); - brw_set_access_mode(p, BRW_ALIGN_16); - allocated_registers = 1; - } - process_instruction(c, inst, &prog_info); - } - break; - } - } - - end_inst = &p->store[p->nr_insn]; - emit_vertex_write(c, &prog_info); - post_vs_emit(c, end_inst); - tgsi_parse_free(&parse); - -} diff --git a/src/gallium/drivers/i965simple/brw_vs_state.c b/src/gallium/drivers/i965simple/brw_vs_state.c deleted file mode 100644 index 1eaff87892..0000000000 --- a/src/gallium/drivers/i965simple/brw_vs_state.c +++ /dev/null @@ -1,103 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" - -#include "util/u_math.h" -#include "util/u_memory.h" - -static void upload_vs_unit( struct brw_context *brw ) -{ - struct brw_vs_unit_state vs; - - memset(&vs, 0, sizeof(vs)); - - /* CACHE_NEW_VS_PROG */ - vs.thread0.kernel_start_pointer = brw->vs.prog_gs_offset >> 6; - vs.thread0.grf_reg_count = align(brw->vs.prog_data->total_grf, 16) / 16 - 1; - vs.thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length; - vs.thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length; - vs.thread3.dispatch_grf_start_reg = 1; - - - /* BRW_NEW_URB_FENCE */ - vs.thread4.nr_urb_entries = brw->urb.nr_vs_entries; - vs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; - vs.thread4.max_threads = MIN2( - MAX2(0, (brw->urb.nr_vs_entries - 6) / 2 - 1), - 15); - - - - if (BRW_DEBUG & DEBUG_SINGLE_THREAD) - vs.thread4.max_threads = 0; - - /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ - if (0 /*brw->attribs.Clip->ClipPlanesEnabled*/) { - /* Note that we read in the userclip planes as well, hence - * clip_start: - */ - vs.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; - } - else { - vs.thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2; - } - - vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - vs.thread3.urb_entry_read_offset = 0; - - /* No samplers for ARB_vp programs: - */ - vs.vs5.sampler_count = 0; - - if (BRW_DEBUG & DEBUG_STATS) - vs.thread4.stats_enable = 1; - - /* Vertex program always enabled: - */ - vs.vs6.vs_enable = 1; - - brw->vs.state_gs_offset = brw_cache_data( &brw->cache[BRW_VS_UNIT], &vs ); -} - - -const struct brw_tracked_state brw_vs_unit = { - .dirty = { - .brw = (BRW_NEW_CLIP | - BRW_NEW_CURBE_OFFSETS | - BRW_NEW_URB_FENCE), - .cache = CACHE_NEW_VS_PROG - }, - .update = upload_vs_unit -}; diff --git a/src/gallium/drivers/i965simple/brw_winsys.h b/src/gallium/drivers/i965simple/brw_winsys.h deleted file mode 100644 index ec1e400418..0000000000 --- a/src/gallium/drivers/i965simple/brw_winsys.h +++ /dev/null @@ -1,209 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \file - * This is the interface that i965simple requires any window system - * hosting it to implement. This is the only include file in i965simple - * which is public. - * - */ - -#ifndef BRW_WINSYS_H -#define BRW_WINSYS_H - - -#include "pipe/p_defines.h" - - -/* Pipe drivers are (meant to be!) independent of both GL and the - * window system. The window system provides a buffer manager and a - * set of additional hooks for things like command buffer submission, - * etc. - * - * There clearly has to be some agreement between the window system - * driver and the hardware driver about the format of command buffers, - * etc. - */ - -struct pipe_buffer; -struct pipe_fence_handle; -struct pipe_winsys; -struct pipe_screen; - - -/* The pipe driver currently understands the following chipsets: - */ -#define PCI_CHIP_I965_G 0x29A2 -#define PCI_CHIP_I965_Q 0x2992 -#define PCI_CHIP_I965_G_1 0x2982 -#define PCI_CHIP_I965_GM 0x2A02 -#define PCI_CHIP_I965_GME 0x2A12 - - -/* These are the names of all the state caches managed by the driver. - * - * When data is uploaded to a buffer with buffer_subdata, we use the - * special version of that function below so that information about - * what type of data this is can be passed to the winsys backend. - * That in turn allows the correct flags to be set in the aub file - * dump to allow human-readable file dumps later on. - */ - -enum brw_cache_id { - BRW_CC_VP, - BRW_CC_UNIT, - BRW_WM_PROG, - BRW_SAMPLER_DEFAULT_COLOR, - BRW_SAMPLER, - BRW_WM_UNIT, - BRW_SF_PROG, - BRW_SF_VP, - BRW_SF_UNIT, - BRW_VS_UNIT, - BRW_VS_PROG, - BRW_GS_UNIT, - BRW_GS_PROG, - BRW_CLIP_VP, - BRW_CLIP_UNIT, - BRW_CLIP_PROG, - BRW_SS_SURFACE, - BRW_SS_SURF_BIND, - - BRW_MAX_CACHE -}; - -#define BRW_CONSTANT_BUFFER BRW_MAX_CACHE - -/** - * Additional winsys interface for i965simple. - * - * It is an over-simple batchbuffer mechanism. Will want to improve the - * performance of this, perhaps based on the cmdstream stuff. It - * would be pretty impossible to implement swz on top of this - * interface. - * - * Will also need additions/changes to implement static/dynamic - * indirect state. - */ -struct brw_winsys { - - void (*destroy)(struct brw_winsys *); - - /** - * Reserve space on batch buffer. - * - * Returns a null pointer if there is insufficient space in the batch buffer - * to hold the requested number of dwords and relocations. - * - * The number of dwords should also include the number of relocations. - */ - unsigned *(*batch_start)(struct brw_winsys *sws, - unsigned dwords, - unsigned relocs); - - void (*batch_dword)(struct brw_winsys *sws, - unsigned dword); - - /** - * Emit a relocation to a buffer. - * - * Used not only when the buffer addresses are not pinned, but also to - * ensure refered buffers will not be destroyed until the current batch - * buffer execution is finished. - * - * The access flags is a combination of I915_BUFFER_ACCESS_WRITE and - * I915_BUFFER_ACCESS_READ macros. - */ - void (*batch_reloc)(struct brw_winsys *sws, - struct pipe_buffer *buf, - unsigned access_flags, - unsigned delta); - - - /* Not used yet, but really want this: - */ - void (*batch_end)( struct brw_winsys *sws ); - - /** - * Flush the batch buffer. - * - * Fence argument must point to NULL or to a previous fence, and the caller - * must call fence_reference when done with the fence. - */ - void (*batch_flush)(struct brw_winsys *sws, - struct pipe_fence_handle **fence); - - - /* A version of buffer_subdata that includes information for the - * simulator: - */ - void (*buffer_subdata_typed)(struct brw_winsys *sws, - struct pipe_buffer *buf, - unsigned long offset, - unsigned long size, - const void *data, - unsigned data_type); - - - /* A cheat so we don't have to think about relocations in a couple - * of places yet: - */ - unsigned (*get_buffer_offset)( struct brw_winsys *sws, - struct pipe_buffer *buf, - unsigned flags ); - -}; - -#define BRW_BUFFER_ACCESS_WRITE 0x1 -#define BRW_BUFFER_ACCESS_READ 0x2 - -#define BRW_BUFFER_USAGE_LIT_VERTEX (PIPE_BUFFER_USAGE_CUSTOM << 0) - - -struct pipe_context *brw_create(struct pipe_screen *, - struct brw_winsys *, - unsigned pci_id); - -static inline boolean brw_batchbuffer_data(struct brw_winsys *winsys, - const void *data, - unsigned bytes) -{ - static const unsigned incr = sizeof(unsigned); - uint i; - const unsigned *udata = (const unsigned*)(data); - unsigned size = bytes/incr; - - winsys->batch_start(winsys, size, 0); - for (i = 0; i < size; ++i) { - winsys->batch_dword(winsys, udata[i]); - } - winsys->batch_end(winsys); - - return (i == size); -} -#endif diff --git a/src/gallium/drivers/i965simple/brw_wm.c b/src/gallium/drivers/i965simple/brw_wm.c deleted file mode 100644 index 10161f2d2f..0000000000 --- a/src/gallium/drivers/i965simple/brw_wm.c +++ /dev/null @@ -1,209 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "brw_context.h" -#include "brw_util.h" -#include "brw_wm.h" -#include "brw_eu.h" -#include "brw_state.h" -#include "util/u_memory.h" - - - -static void do_wm_prog( struct brw_context *brw, - struct brw_fragment_program *fp, - struct brw_wm_prog_key *key) -{ - struct brw_wm_compile *c = CALLOC_STRUCT(brw_wm_compile); - const unsigned *program; - unsigned program_size; - - c->key = *key; - c->fp = fp; - - c->delta_xy[0] = brw_null_reg(); - c->delta_xy[1] = brw_null_reg(); - c->pixel_xy[0] = brw_null_reg(); - c->pixel_xy[1] = brw_null_reg(); - c->pixel_w = brw_null_reg(); - - - debug_printf("XXXXXXXX FP\n"); - - brw_wm_glsl_emit(c); - - /* get the program - */ - program = brw_get_program(&c->func, &program_size); - - /* - */ - brw->wm.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_WM_PROG], - &c->key, - sizeof(c->key), - program, - program_size, - &c->prog_data, - &brw->wm.prog_data ); - - FREE(c); -} - - - -static void brw_wm_populate_key( struct brw_context *brw, - struct brw_wm_prog_key *key ) -{ - /* BRW_NEW_FRAGMENT_PROGRAM */ - struct brw_fragment_program *fp = - (struct brw_fragment_program *)brw->attribs.FragmentProgram; - unsigned lookup = 0; - unsigned line_aa; - - memset(key, 0, sizeof(*key)); - - /* Build the index for table lookup - */ - /* BRW_NEW_DEPTH_STENCIL */ - if (fp->info.uses_kill || - brw->attribs.DepthStencil->alpha.enabled) - lookup |= IZ_PS_KILL_ALPHATEST_BIT; - - if (fp->info.writes_z) - lookup |= IZ_PS_COMPUTES_DEPTH_BIT; - - if (brw->attribs.DepthStencil->depth.enabled) - lookup |= IZ_DEPTH_TEST_ENABLE_BIT; - - if (brw->attribs.DepthStencil->depth.enabled && - brw->attribs.DepthStencil->depth.writemask) /* ?? */ - lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; - - if (brw->attribs.DepthStencil->stencil[0].enabled) { - lookup |= IZ_STENCIL_TEST_ENABLE_BIT; - - if (brw->attribs.DepthStencil->stencil[0].writemask || - brw->attribs.DepthStencil->stencil[1].writemask) - lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; - } - - /* XXX: when should this be disabled? - */ - if (1) - lookup |= IZ_EARLY_DEPTH_TEST_BIT; - - - line_aa = AA_NEVER; - - /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */ - if (brw->attribs.Raster->line_smooth) { - if (brw->reduced_primitive == PIPE_PRIM_LINES) { - line_aa = AA_ALWAYS; - } - else if (brw->reduced_primitive == PIPE_PRIM_TRIANGLES) { - if (brw->attribs.Raster->fill_ccw == PIPE_POLYGON_MODE_LINE) { - line_aa = AA_SOMETIMES; - - if (brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_LINE || - (brw->attribs.Raster->cull_mode == PIPE_WINDING_CW)) - line_aa = AA_ALWAYS; - } - else if (brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_LINE) { - line_aa = AA_SOMETIMES; - - if (brw->attribs.Raster->cull_mode == PIPE_WINDING_CCW) - line_aa = AA_ALWAYS; - } - } - } - - brw_wm_lookup_iz(line_aa, - lookup, - key); - - -#if 0 - /* BRW_NEW_SAMPLER - * - * Not doing any of this at the moment: - */ - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - const struct pipe_sampler_state *unit = brw->attribs.Samplers[i]; - - if (unit) { - - if (unit->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - key->shadowtex_mask |= 1<<i; - } - if (t->Image[0][t->BaseLevel]->InternalFormat == GL_YCBCR_MESA) - key->yuvtex_mask |= 1<<i; - } - } -#endif - - - /* Extra info: - */ - key->program_string_id = fp->id; - -} - - -static void brw_upload_wm_prog( struct brw_context *brw ) -{ - struct brw_wm_prog_key key; - struct brw_fragment_program *fp = (struct brw_fragment_program *) - brw->attribs.FragmentProgram; - - brw_wm_populate_key(brw, &key); - - /* Make an early check for the key. - */ - if (brw_search_cache(&brw->cache[BRW_WM_PROG], - &key, sizeof(key), - &brw->wm.prog_data, - &brw->wm.prog_gs_offset)) - return; - - do_wm_prog(brw, fp, &key); -} - - -const struct brw_tracked_state brw_wm_prog = { - .dirty = { - .brw = (BRW_NEW_FS | - BRW_NEW_REDUCED_PRIMITIVE), - .cache = 0 - }, - .update = brw_upload_wm_prog -}; - diff --git a/src/gallium/drivers/i965simple/brw_wm.h b/src/gallium/drivers/i965simple/brw_wm.h deleted file mode 100644 index b29c4393f0..0000000000 --- a/src/gallium/drivers/i965simple/brw_wm.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#ifndef BRW_WM_H -#define BRW_WM_H - - -#include "brw_context.h" -#include "brw_eu.h" - -/* A big lookup table is used to figure out which and how many - * additional regs will inserted before the main payload in the WM - * program execution. These mainly relate to depth and stencil - * processing and the early-depth-test optimization. - */ -#define IZ_PS_KILL_ALPHATEST_BIT 0x1 -#define IZ_PS_COMPUTES_DEPTH_BIT 0x2 -#define IZ_DEPTH_WRITE_ENABLE_BIT 0x4 -#define IZ_DEPTH_TEST_ENABLE_BIT 0x8 -#define IZ_STENCIL_WRITE_ENABLE_BIT 0x10 -#define IZ_STENCIL_TEST_ENABLE_BIT 0x20 -#define IZ_EARLY_DEPTH_TEST_BIT 0x40 -#define IZ_BIT_MAX 0x80 - -#define AA_NEVER 0 -#define AA_SOMETIMES 1 -#define AA_ALWAYS 2 - -struct brw_wm_prog_key { - unsigned source_depth_reg:3; - unsigned aa_dest_stencil_reg:3; - unsigned dest_depth_reg:3; - unsigned nr_depth_regs:3; - unsigned shadowtex_mask:8; - unsigned computes_depth:1; /* could be derived from program string */ - unsigned source_depth_to_render_target:1; - unsigned runtime_check_aads_emit:1; - - unsigned yuvtex_mask:8; - - unsigned program_string_id; -}; - - - - - -#define PROGRAM_INTERNAL_PARAM -#define MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS 1024 /* 72 for GL_ARB_f_p */ -#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + PIPE_MAX_ATTRIBS + 3) -#define BRW_WM_MAX_GRF 128 /* hardware limit */ -#define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4) -#define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12) -#define BRW_WM_MAX_PARAM 256 -#define BRW_WM_MAX_CONST 256 -#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS - -#define PAYLOAD_DEPTH (PIPE_MAX_ATTRIBS) - -#define MAX_IFSN 32 -#define MAX_LOOP_DEPTH 32 - -struct brw_wm_compile { - struct brw_compile func; - struct brw_wm_prog_key key; - struct brw_wm_prog_data prog_data; /* result */ - - struct brw_fragment_program *fp; - - unsigned grf_limit; - unsigned max_wm_grf; - - - struct brw_reg pixel_xy[2]; - struct brw_reg delta_xy[2]; - struct brw_reg pixel_w; - - - struct brw_reg wm_regs[8][32][4]; - - struct brw_reg payload_depth[4]; - struct brw_reg payload_coef[16]; - - struct brw_reg emit_mask_reg; - - struct brw_instruction *if_inst[MAX_IFSN]; - int if_insn; - - struct brw_instruction *loop_inst[MAX_LOOP_DEPTH]; - int loop_insn; - - struct brw_instruction *inst0; - struct brw_instruction *inst1; - - struct brw_reg stack; - struct brw_indirect stack_index; - - unsigned reg_index; - - unsigned tmp_start; - unsigned tmp_index; -}; - - - -void brw_wm_lookup_iz( unsigned line_aa, - unsigned lookup, - struct brw_wm_prog_key *key ); - -void brw_wm_glsl_emit(struct brw_wm_compile *c); -void brw_wm_emit_decls(struct brw_wm_compile *c); - -#endif diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c deleted file mode 100644 index d50e66f613..0000000000 --- a/src/gallium/drivers/i965simple/brw_wm_decl.c +++ /dev/null @@ -1,392 +0,0 @@ - -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_wm.h" -#include "util/u_math.h" -#include "util/u_memory.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" - -static struct brw_reg alloc_tmp(struct brw_wm_compile *c) -{ - c->tmp_index++; - c->reg_index = MAX2(c->reg_index, c->tmp_start + c->tmp_index); - return brw_vec8_grf(c->tmp_start + c->tmp_index, 0); -} - -static void release_tmps(struct brw_wm_compile *c) -{ - c->tmp_index = 0; -} - - - -static int is_null( struct brw_reg reg ) -{ - return (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && - reg.nr == BRW_ARF_NULL); -} - -static void emit_pixel_xy( struct brw_wm_compile *c ) -{ - if (is_null(c->pixel_xy[0])) { - - struct brw_compile *p = &c->func; - struct brw_reg r1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); - - c->pixel_xy[0] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW)); - c->pixel_xy[1] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW)); - - /* Calculate pixel centers by adding 1 or 0 to each of the - * micro-tile coordinates passed in r1. - */ - brw_ADD(p, - c->pixel_xy[0], - stride(suboffset(r1_uw, 4), 2, 4, 0), - brw_imm_v(0x10101010)); - - brw_ADD(p, - c->pixel_xy[1], - stride(suboffset(r1_uw, 5), 2, 4, 0), - brw_imm_v(0x11001100)); - } -} - - - - - - -static void emit_delta_xy( struct brw_wm_compile *c ) -{ - if (is_null(c->delta_xy[0])) { - struct brw_compile *p = &c->func; - struct brw_reg r1 = brw_vec1_grf(1, 0); - - emit_pixel_xy(c); - - c->delta_xy[0] = alloc_tmp(c); - c->delta_xy[1] = alloc_tmp(c); - - /* Calc delta X,Y by subtracting origin in r1 from the pixel - * centers. - */ - brw_ADD(p, - c->delta_xy[0], - retype(c->pixel_xy[0], BRW_REGISTER_TYPE_UW), - negate(r1)); - - brw_ADD(p, - c->delta_xy[1], - retype(c->pixel_xy[1], BRW_REGISTER_TYPE_UW), - negate(suboffset(r1,1))); - } -} - - - -#if 0 -static void emit_pixel_w( struct brw_wm_compile *c ) -{ - if (is_null(c->pixel_w)) { - struct brw_compile *p = &c->func; - - struct brw_reg interp_wpos = c->coef_wpos; - - c->pixel_w = alloc_tmp(c); - - emit_delta_xy(c); - - /* Calc 1/w - just linterp wpos[3] optimized by putting the - * result straight into a message reg. - */ - struct brw_reg interp3 = brw_vec1_grf(interp_wpos.nr+1, 4); - brw_LINE(p, brw_null_reg(), interp3, c->delta_xy[0]); - brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), c->delta_xy[1]); - - /* Calc w */ - brw_math_16( p, - c->pixel_w, - BRW_MATH_FUNCTION_INV, - BRW_MATH_SATURATE_NONE, - 2, - brw_null_reg(), - BRW_MATH_PRECISION_FULL); - } -} -#endif - - -static void emit_cinterp(struct brw_wm_compile *c, - int idx, - int mask ) -{ - struct brw_compile *p = &c->func; - struct brw_reg interp[4]; - struct brw_reg coef = c->payload_coef[idx]; - int i; - - interp[0] = brw_vec1_grf(coef.nr, 0); - interp[1] = brw_vec1_grf(coef.nr, 4); - interp[2] = brw_vec1_grf(coef.nr+1, 0); - interp[3] = brw_vec1_grf(coef.nr+1, 4); - - for(i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - struct brw_reg dst = c->wm_regs[TGSI_FILE_INPUT][idx][i]; - brw_MOV(p, dst, suboffset(interp[i],3)); - } - } -} - -static void emit_linterp(struct brw_wm_compile *c, - int idx, - int mask ) -{ - struct brw_compile *p = &c->func; - struct brw_reg interp[4]; - struct brw_reg coef = c->payload_coef[idx]; - int i; - - emit_delta_xy(c); - - interp[0] = brw_vec1_grf(coef.nr, 0); - interp[1] = brw_vec1_grf(coef.nr, 4); - interp[2] = brw_vec1_grf(coef.nr+1, 0); - interp[3] = brw_vec1_grf(coef.nr+1, 4); - - for(i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - struct brw_reg dst = c->wm_regs[TGSI_FILE_INPUT][idx][i]; - brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]); - brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]); - } - } -} - -#if 0 -static void emit_pinterp(struct brw_wm_compile *c, - int idx, - int mask ) -{ - struct brw_compile *p = &c->func; - struct brw_reg interp[4]; - struct brw_reg coef = c->payload_coef[idx]; - int i; - - get_delta_xy(c); - get_pixel_w(c); - - interp[0] = brw_vec1_grf(coef.nr, 0); - interp[1] = brw_vec1_grf(coef.nr, 4); - interp[2] = brw_vec1_grf(coef.nr+1, 0); - interp[3] = brw_vec1_grf(coef.nr+1, 4); - - for(i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - struct brw_reg dst = allocate_reg(c, TGSI_FILE_INPUT, idx, i); - brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]); - brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]); - brw_MUL(p, dst, dst, c->pixel_w); - } - } -} -#endif - - - -#if 0 -static void emit_wpos( ) -{ - struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); - struct tgsi_full_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); - struct tgsi_full_src_register deltas = get_delta_xy(c); - struct tgsi_full_src_register arg2; - unsigned opcode; - - opcode = WM_LINTERP; - arg2 = src_undef(); - - /* Have to treat wpos.xy specially: - */ - emit_op(c, - WM_WPOSXY, - dst_mask(dst, WRITEMASK_XY), - 0, 0, 0, - get_pixel_xy(c), - src_undef(), - src_undef()); - - dst = dst_mask(dst, WRITEMASK_ZW); - - /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw - */ - emit_op(c, - WM_LINTERP, - dst, - 0, 0, 0, - interp, - deltas, - arg2); -} -#endif - - - - -/* Perform register allocation: - * - * -- r0??? - * -- passthrough depth regs (and stencil/aa??) - * -- curbe ?? - * -- inputs (coefficients) - * - * Use a totally static register allocation. This will perform poorly - * but is an easy way to get started (again). - */ -static void prealloc_reg(struct brw_wm_compile *c) -{ - int i, j; - int nr_curbe_regs = 0; - - /* R0, then some depth related regs: - */ - for (i = 0; i < c->key.nr_depth_regs; i++) { - c->payload_depth[i] = brw_vec8_grf(i*2, 0); - c->reg_index += 2; - } - - - /* Then a copy of our part of the CURBE entry: - */ - { - int nr_constants = c->fp->info.file_max[TGSI_FILE_CONSTANT] + 1; - int index = 0; - - /* XXX number of constants, or highest numbered constant? */ - assert(nr_constants == c->fp->info.file_count[TGSI_FILE_CONSTANT]); - - c->prog_data.max_const = 4*nr_constants; - for (i = 0; i < nr_constants; i++) { - for (j = 0; j < 4; j++, index++) - c->wm_regs[TGSI_FILE_CONSTANT][i][j] = brw_vec1_grf(c->reg_index + index/8, - index%8); - } - - nr_curbe_regs = 2*((4*nr_constants+15)/16); - c->reg_index += nr_curbe_regs; - } - - /* Adjust for parameter coefficients for position, which are - * currently always provided. - */ -// c->position_coef[i] = brw_vec8_grf(c->reg_index, 0); - c->reg_index += 2; - - /* Next we receive the plane coefficients for parameter - * interpolation: - */ - assert(c->fp->info.file_max[TGSI_FILE_INPUT] == c->fp->info.num_inputs); - for (i = 0; i < c->fp->info.file_max[TGSI_FILE_INPUT] + 1; i++) { - c->payload_coef[i] = brw_vec8_grf(c->reg_index, 0); - c->reg_index += 2; - } - - c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; - c->prog_data.urb_read_length = (c->fp->info.num_inputs + 1) * 2; - c->prog_data.curb_read_length = nr_curbe_regs; - - /* That's the end of the payload, now we can start allocating registers. - */ - c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); - c->reg_index++; - - c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); - c->reg_index += 2; - - /* Now allocate room for the interpolated inputs and staging - * registers for the outputs: - */ - /* XXX do we want to loop over the _number_ of inputs/outputs or loop - * to the highest input/output index that's used? - * Probably the same, actually. - */ - assert(c->fp->info.file_max[TGSI_FILE_INPUT] + 1 == c->fp->info.num_inputs); - assert(c->fp->info.file_max[TGSI_FILE_OUTPUT] + 1 == c->fp->info.num_outputs); - for (i = 0; i < c->fp->info.file_max[TGSI_FILE_INPUT] + 1; i++) - for (j = 0; j < 4; j++) - c->wm_regs[TGSI_FILE_INPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); - - for (i = 0; i < c->fp->info.file_max[TGSI_FILE_OUTPUT] + 1; i++) - for (j = 0; j < 4; j++) - c->wm_regs[TGSI_FILE_OUTPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); - - /* Beyond this we should only need registers for internal temporaries: - */ - c->tmp_start = c->reg_index; -} - - - - - -/* Need to interpolate fragment program inputs in as a preamble to the - * shader. A more sophisticated compiler would do this on demand, but - * we'll do it up front: - */ -void brw_wm_emit_decls(struct brw_wm_compile *c) -{ - struct tgsi_parse_context parse; - int done = 0; - - prealloc_reg(c); - - tgsi_parse_init( &parse, c->fp->program.tokens ); - - while( !done && - !tgsi_parse_end_of_tokens( &parse ) ) - { - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - { - const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; - unsigned first = decl->DeclarationRange.First; - unsigned last = decl->DeclarationRange.Last; - unsigned mask = decl->Declaration.UsageMask; /* ? */ - unsigned i; - - if (decl->Declaration.File != TGSI_FILE_INPUT) - break; - - for( i = first; i <= last; i++ ) { - switch (decl->Declaration.Interpolate) { - case TGSI_INTERPOLATE_CONSTANT: - emit_cinterp(c, i, mask); - break; - - case TGSI_INTERPOLATE_LINEAR: - emit_linterp(c, i, mask); - break; - - case TGSI_INTERPOLATE_PERSPECTIVE: - //emit_pinterp(c, i, mask); - emit_linterp(c, i, mask); - break; - } - } - break; - } - case TGSI_TOKEN_TYPE_IMMEDIATE: - case TGSI_TOKEN_TYPE_INSTRUCTION: - default: - done = 1; - break; - } - } - - tgsi_parse_free (&parse); - - release_tmps(c); -} diff --git a/src/gallium/drivers/i965simple/brw_wm_glsl.c b/src/gallium/drivers/i965simple/brw_wm_glsl.c deleted file mode 100644 index db75963932..0000000000 --- a/src/gallium/drivers/i965simple/brw_wm_glsl.c +++ /dev/null @@ -1,1076 +0,0 @@ - -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_wm.h" -#include "util/u_math.h" -#include "util/u_memory.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" - - - -static int get_scalar_dst_index(struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register dst = inst->FullDstRegisters[0].DstRegister; - int i; - for (i = 0; i < 4; i++) - if (dst.WriteMask & (1<<i)) - break; - return i; -} - -static struct brw_reg alloc_tmp(struct brw_wm_compile *c) -{ - c->tmp_index++; - c->reg_index = MAX2(c->reg_index, c->tmp_index); - return brw_vec8_grf(c->tmp_start + c->tmp_index, 0); -} - -static void release_tmps(struct brw_wm_compile *c) -{ - c->tmp_index = 0; -} - - -static struct brw_reg -get_reg(struct brw_wm_compile *c, int file, int index, int component ) -{ - switch (file) { - case TGSI_FILE_NULL: - return brw_null_reg(); - - case TGSI_FILE_SAMPLER: - /* Should never get here: - */ - assert (0); - return brw_null_reg(); - - case TGSI_FILE_IMMEDIATE: - /* These need a different path: - */ - assert(0); - return brw_null_reg(); - - - case TGSI_FILE_CONSTANT: - case TGSI_FILE_INPUT: - case TGSI_FILE_OUTPUT: - case TGSI_FILE_TEMPORARY: - case TGSI_FILE_ADDRESS: - return c->wm_regs[file][index][component]; - - default: - assert(0); - return brw_null_reg(); - } -} - - -static struct brw_reg get_dst_reg(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst, - int component) -{ - return get_reg(c, - inst->FullDstRegisters[0].DstRegister.File, - inst->FullDstRegisters[0].DstRegister.Index, - component); -} - -static int get_swz( struct tgsi_src_register src, int index ) -{ - switch (index & 3) { - case 0: return src.SwizzleX; - case 1: return src.SwizzleY; - case 2: return src.SwizzleZ; - case 3: return src.SwizzleW; - default: return 0; - } -} - -static int get_ext_swz( struct tgsi_src_register_ext_swz src, int index ) -{ - switch (index & 3) { - case 0: return src.ExtSwizzleX; - case 1: return src.ExtSwizzleY; - case 2: return src.ExtSwizzleZ; - case 3: return src.ExtSwizzleW; - default: return 0; - } -} - -static struct brw_reg get_src_reg(struct brw_wm_compile *c, - struct tgsi_full_src_register *src, - int index) -{ - struct brw_reg reg; - int component = index; - int neg = 0; - int abs = 0; - - if (src->SrcRegister.Negate) - neg = 1; - - component = get_swz(src->SrcRegister, component); - - /* Yes, there are multiple negates: - */ - switch (component & 3) { - case 0: neg ^= src->SrcRegisterExtSwz.NegateX; break; - case 1: neg ^= src->SrcRegisterExtSwz.NegateY; break; - case 2: neg ^= src->SrcRegisterExtSwz.NegateZ; break; - case 3: neg ^= src->SrcRegisterExtSwz.NegateW; break; - } - - /* And multiple swizzles, fun isn't it: - */ - component = get_ext_swz(src->SrcRegisterExtSwz, component); - - /* Not handling indirect lookups yet: - */ - assert(src->SrcRegister.Indirect == 0); - - /* Don't know what dimension means: - */ - assert(src->SrcRegister.Dimension == 0); - - /* Will never handle any of this stuff: - */ - assert(src->SrcRegisterExtMod.Complement == 0); - assert(src->SrcRegisterExtMod.Bias == 0); - assert(src->SrcRegisterExtMod.Scale2X == 0); - - if (src->SrcRegisterExtMod.Absolute) - abs = 1; - - /* Another negate! This is a post-absolute negate, which we - * can't do. Need to clean the crap out of tgsi somehow. - */ - assert(src->SrcRegisterExtMod.Negate == 0); - - switch( component ) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: - reg = get_reg(c, - src->SrcRegister.File, - src->SrcRegister.Index, - component ); - - if (neg) - reg = negate(reg); - - if (abs) - reg = brw_abs(reg); - - break; - - /* XXX: this won't really work in the general case, but we know - * that the extended swizzle is only allowed in the SWZ - * instruction (right??), in which case using an immediate - * directly will work. - */ - case TGSI_EXTSWIZZLE_ZERO: - reg = brw_imm_f(0); - break; - - case TGSI_EXTSWIZZLE_ONE: - if (neg && !abs) - reg = brw_imm_f(-1.0); - else - reg = brw_imm_f(1.0); - break; - - default: - assert(0); - break; - } - - - return reg; -} - -static void emit_abs( struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; - - int i; - struct brw_compile *p = &c->func; - brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i); - src = get_src_reg(c, &inst->FullSrcRegisters[0], i); - brw_MOV(p, dst, brw_abs(src)); /* NOTE */ - } - } - brw_set_saturate(p, 0); -} - - -static void emit_xpd(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - int i; - struct brw_compile *p = &c->func; - unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; - for (i = 0; i < 4; i++) { - unsigned i2 = (i+2)%3; - unsigned i1 = (i+1)%3; - if (mask & (1<<i)) { - struct brw_reg src0, src1, dst; - dst = get_dst_reg(c, inst, i); - src0 = negate(get_src_reg(c, &inst->FullSrcRegisters[0], i2)); - src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i1); - brw_MUL(p, brw_null_reg(), src0, src1); - src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i1); - src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i2); - brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); - brw_MAC(p, dst, src0, src1); - brw_set_saturate(p, 0); - } - } - brw_set_saturate(p, 0); -} - -static void emit_dp3(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - struct brw_reg src0[3], src1[3], dst; - int i; - struct brw_compile *p = &c->func; - for (i = 0; i < 3; i++) { - src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); - src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); - } - - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); - brw_MUL(p, brw_null_reg(), src0[0], src1[0]); - brw_MAC(p, brw_null_reg(), src0[1], src1[1]); - brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); - brw_MAC(p, dst, src0[2], src1[2]); - brw_set_saturate(p, 0); -} - -static void emit_dp4(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - struct brw_reg src0[4], src1[4], dst; - int i; - struct brw_compile *p = &c->func; - for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); - src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); - } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); - brw_MUL(p, brw_null_reg(), src0[0], src1[0]); - brw_MAC(p, brw_null_reg(), src0[1], src1[1]); - brw_MAC(p, brw_null_reg(), src0[2], src1[2]); - brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); - brw_MAC(p, dst, src0[3], src1[3]); - brw_set_saturate(p, 0); -} - -static void emit_dph(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - struct brw_reg src0[4], src1[4], dst; - int i; - struct brw_compile *p = &c->func; - for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); - src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); - } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); - brw_MUL(p, brw_null_reg(), src0[0], src1[0]); - brw_MAC(p, brw_null_reg(), src0[1], src1[1]); - brw_MAC(p, dst, src0[2], src1[2]); - brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); - brw_ADD(p, dst, src0[3], src1[3]); - brw_set_saturate(p, 0); -} - -static void emit_math1(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst, unsigned func) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, dst; - - src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); - brw_MOV(p, brw_message_reg(2), src0); - brw_math(p, - dst, - func, - ((inst->Instruction.Saturate != TGSI_SAT_NONE) - ? BRW_MATH_SATURATE_SATURATE - : BRW_MATH_SATURATE_NONE), - 2, - brw_null_reg(), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); -} - - -static void emit_alu2(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst, - unsigned opcode) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, dst; - unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; - int i; - brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); - src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); - brw_alu2(p, opcode, dst, src0, src1); - } - } - brw_set_saturate(p, 0); -} - - -static void emit_alu1(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst, - unsigned opcode) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, dst; - unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; - int i; - brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); - brw_alu1(p, opcode, dst, src0); - } - } - if (inst->Instruction.Saturate != TGSI_SAT_NONE) - brw_set_saturate(p, 0); -} - - -static void emit_max(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - struct brw_compile *p = &c->func; - unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; - struct brw_reg src0, src1, dst; - int i; - brw_push_insn_state(p); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); - src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); - brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); - brw_MOV(p, dst, src0); - brw_set_saturate(p, 0); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1); - brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, src1); - brw_set_saturate(p, 0); - brw_set_predicate_control_flag_value(p, 0xff); - } - } - brw_pop_insn_state(p); -} - -static void emit_min(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - struct brw_compile *p = &c->func; - unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; - struct brw_reg src0, src1, dst; - int i; - brw_push_insn_state(p); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); - src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); - brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); - brw_MOV(p, dst, src0); - brw_set_saturate(p, 0); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); - brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, src1); - brw_set_saturate(p, 0); - brw_set_predicate_control_flag_value(p, 0xff); - } - } - brw_pop_insn_state(p); -} - -static void emit_pow(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg dst, src0, src1; - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); - src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); - src1 = get_src_reg(c, &inst->FullSrcRegisters[1], 0); - - brw_MOV(p, brw_message_reg(2), src0); - brw_MOV(p, brw_message_reg(3), src1); - - brw_math(p, - dst, - BRW_MATH_FUNCTION_POW, - (inst->Instruction.Saturate != TGSI_SAT_NONE - ? BRW_MATH_SATURATE_SATURATE - : BRW_MATH_SATURATE_NONE), - 2, - brw_null_reg(), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); -} - -static void emit_lrp(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - struct brw_compile *p = &c->func; - unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; - struct brw_reg dst, tmp1, tmp2, src0, src1, src2; - int i; - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); - - src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); - - if (src1.nr == dst.nr) { - tmp1 = alloc_tmp(c); - brw_MOV(p, tmp1, src1); - } else - tmp1 = src1; - - src2 = get_src_reg(c, &inst->FullSrcRegisters[2], i); - if (src2.nr == dst.nr) { - tmp2 = alloc_tmp(c); - brw_MOV(p, tmp2, src2); - } else - tmp2 = src2; - - brw_ADD(p, dst, negate(src0), brw_imm_f(1.0)); - brw_MUL(p, brw_null_reg(), dst, tmp2); - brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); - brw_MAC(p, dst, src0, tmp1); - brw_set_saturate(p, 0); - } - release_tmps(c); - } -} - -static void emit_kil(struct brw_wm_compile *c) -{ - struct brw_compile *p = &c->func; - struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK - brw_AND(p, depth, c->emit_mask_reg, depth); - brw_pop_insn_state(p); -} - -static void emit_mad(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - struct brw_compile *p = &c->func; - unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; - struct brw_reg dst, src0, src1, src2; - int i; - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); - src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); - src2 = get_src_reg(c, &inst->FullSrcRegisters[2], i); - brw_MUL(p, dst, src0, src1); - - brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); - brw_ADD(p, dst, dst, src2); - brw_set_saturate(p, 0); - } - } -} - -static void emit_sop(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst, unsigned cond) -{ - struct brw_compile *p = &c->func; - unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; - struct brw_reg dst, src0, src1; - int i; - - brw_push_insn_state(p); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); - src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); - brw_CMP(p, brw_null_reg(), cond, src0, src1); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_MOV(p, dst, brw_imm_f(0.0)); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, brw_imm_f(1.0)); - } - } - brw_pop_insn_state(p); -} - - -static void emit_ddx(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - struct brw_compile *p = &c->func; - unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; - struct brw_reg interp[4]; - struct brw_reg dst; - struct brw_reg src0, w; - unsigned nr, i; - src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); - w = get_src_reg(c, &inst->FullSrcRegisters[1], 3); - nr = src0.nr; - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); - for(i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - brw_MOV(p, dst, interp[i]); - brw_MUL(p, dst, dst, w); - } - } - brw_set_saturate(p, 0); -} - -static void emit_ddy(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - struct brw_compile *p = &c->func; - unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; - struct brw_reg interp[4]; - struct brw_reg dst; - struct brw_reg src0, w; - unsigned nr, i; - - src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); - nr = src0.nr; - w = get_src_reg(c, &inst->FullSrcRegisters[1], 3); - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); - for(i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - brw_MOV(p, dst, suboffset(interp[i], 1)); - brw_MUL(p, dst, dst, w); - } - } - brw_set_saturate(p, 0); -} - -/* TODO - BIAS on SIMD8 not workind yet... -*/ -static void emit_txb(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ -#if 0 - struct brw_compile *p = &c->func; - struct brw_reg payload_reg = c->payload_depth[0]; - struct brw_reg dst[4], src[4]; - unsigned i; - for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i); - for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); - -#if 0 - switch (inst->TexSrcTarget) { - case TEXTURE_1D_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); - break; - case TEXTURE_2D_INDEX: - case TEXTURE_RECT_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), src[1]); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); - break; - default: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), src[1]); - brw_MOV(p, brw_message_reg(4), src[2]); - break; - } -#else - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), src[1]); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); -#endif - - brw_MOV(p, brw_message_reg(5), src[3]); - brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); - brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), - 1, - retype(payload_reg, BRW_REGISTER_TYPE_UW), - inst->TexSrcUnit + 1, /* surface */ - inst->TexSrcUnit, /* sampler */ - inst->FullDstRegisters[0].DstRegister.WriteMask, - BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, - 4, - 4, - 0); -#endif -} - -static void emit_tex(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ -#if 0 - struct brw_compile *p = &c->func; - struct brw_reg payload_reg = c->payload_depth[0]; - struct brw_reg dst[4], src[4]; - unsigned msg_len; - unsigned i, nr; - unsigned emit; - boolean shadow = (c->key.shadowtex_mask & (1<<inst->TexSrcUnit)) ? 1 : 0; - - for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i); - for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); - -#if 0 - switch (inst->TexSrcTarget) { - case TEXTURE_1D_INDEX: - emit = WRITEMASK_X; - nr = 1; - break; - case TEXTURE_2D_INDEX: - case TEXTURE_RECT_INDEX: - emit = WRITEMASK_XY; - nr = 2; - break; - default: - emit = WRITEMASK_XYZ; - nr = 3; - break; - } -#else - emit = WRITEMASK_XY; - nr = 2; -#endif - - msg_len = 1; - - for (i = 0; i < nr; i++) { - static const unsigned swz[4] = {0,1,2,2}; - if (emit & (1<<i)) - brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]); - else - brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0)); - msg_len += 1; - } - - if (shadow) { - brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); - brw_MOV(p, brw_message_reg(6), src[2]); - } - - brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), - 1, - retype(payload_reg, BRW_REGISTER_TYPE_UW), - inst->TexSrcUnit + 1, /* surface */ - inst->TexSrcUnit, /* sampler */ - inst->FullDstRegisters[0].DstRegister.WriteMask, - BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, - 4, - shadow ? 6 : 4, - 0); - - if (shadow) - brw_MOV(p, dst[3], brw_imm_f(1.0)); -#endif -} - - - - - - - - -static void emit_fb_write(struct brw_wm_compile *c, - struct tgsi_full_instruction *inst) -{ - struct brw_compile *p = &c->func; - int nr = 2; - int channel; - int base_reg = 0; - - // src0 = output color - // src1 = payload_depth[0] - // src2 = output depth - // dst = ??? - - - - /* Reserve a space for AA - may not be needed: - */ - if (c->key.aa_dest_stencil_reg) - nr += 1; - - { - brw_push_insn_state(p); - for (channel = 0; channel < 4; channel++) { - struct brw_reg src0 = c->wm_regs[TGSI_FILE_OUTPUT][0][channel]; - - /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ - /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ - brw_MOV(p, brw_message_reg(nr + channel), src0); - } - /* skip over the regs populated above: */ - nr += 8; - brw_pop_insn_state(p); - } - - - /* Pass through control information: - */ - /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ - { - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ - brw_MOV(p, - brw_message_reg(base_reg + 1), - brw_vec8_grf(1, 0)); - brw_pop_insn_state(p); - } - - /* Send framebuffer write message: */ - brw_fb_WRITE(p, - retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), - base_reg, - retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), - 0, /* render surface always 0 */ - nr, - 0, - 1); - -} - - -static void brw_wm_emit_instruction( struct brw_wm_compile *c, - struct tgsi_full_instruction *inst ) -{ - struct brw_compile *p = &c->func; - -#if 0 - if (inst->CondUpdate) - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - else - brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); -#else - brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); -#endif - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ABS: - emit_abs(c, inst); - break; - case TGSI_OPCODE_ADD: - emit_alu2(c, inst, BRW_OPCODE_ADD); - break; - case TGSI_OPCODE_SUB: - assert(0); -// emit_alu2(c, inst, BRW_OPCODE_SUB); - break; - case TGSI_OPCODE_FRC: - emit_alu1(c, inst, BRW_OPCODE_FRC); - break; - case TGSI_OPCODE_FLR: - assert(0); -// emit_alu1(c, inst, BRW_OPCODE_FLR); - break; - case TGSI_OPCODE_LRP: - emit_lrp(c, inst); - break; - case TGSI_OPCODE_INT: - emit_alu1(c, inst, BRW_OPCODE_RNDD); - break; - case TGSI_OPCODE_MOV: - emit_alu1(c, inst, BRW_OPCODE_MOV); - break; - case TGSI_OPCODE_DP3: - emit_dp3(c, inst); - break; - case TGSI_OPCODE_DP4: - emit_dp4(c, inst); - break; - case TGSI_OPCODE_XPD: - emit_xpd(c, inst); - break; - case TGSI_OPCODE_DPH: - emit_dph(c, inst); - break; - case TGSI_OPCODE_RCP: - emit_math1(c, inst, BRW_MATH_FUNCTION_INV); - break; - case TGSI_OPCODE_RSQ: - emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); - break; - case TGSI_OPCODE_SIN: - emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); - break; - case TGSI_OPCODE_COS: - emit_math1(c, inst, BRW_MATH_FUNCTION_COS); - break; - case TGSI_OPCODE_EX2: - emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); - break; - case TGSI_OPCODE_LG2: - emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); - break; - case TGSI_OPCODE_MAX: - emit_max(c, inst); - break; - case TGSI_OPCODE_MIN: - emit_min(c, inst); - break; - case TGSI_OPCODE_DDX: - emit_ddx(c, inst); - break; - case TGSI_OPCODE_DDY: - emit_ddy(c, inst); - break; - case TGSI_OPCODE_SLT: - emit_sop(c, inst, BRW_CONDITIONAL_L); - break; - case TGSI_OPCODE_SLE: - emit_sop(c, inst, BRW_CONDITIONAL_LE); - break; - case TGSI_OPCODE_SGT: - emit_sop(c, inst, BRW_CONDITIONAL_G); - break; - case TGSI_OPCODE_SGE: - emit_sop(c, inst, BRW_CONDITIONAL_GE); - break; - case TGSI_OPCODE_SEQ: - emit_sop(c, inst, BRW_CONDITIONAL_EQ); - break; - case TGSI_OPCODE_SNE: - emit_sop(c, inst, BRW_CONDITIONAL_NEQ); - break; - case TGSI_OPCODE_MUL: - emit_alu2(c, inst, BRW_OPCODE_MUL); - break; - case TGSI_OPCODE_POW: - emit_pow(c, inst); - break; - case TGSI_OPCODE_MAD: - emit_mad(c, inst); - break; - case TGSI_OPCODE_TEX: - emit_tex(c, inst); - break; - case TGSI_OPCODE_TXB: - emit_txb(c, inst); - break; - case TGSI_OPCODE_TEXKILL: - emit_kil(c); - break; - case TGSI_OPCODE_IF: - assert(c->if_insn < MAX_IFSN); - c->if_inst[c->if_insn++] = brw_IF(p, BRW_EXECUTE_8); - break; - case TGSI_OPCODE_ELSE: - c->if_inst[c->if_insn-1] = brw_ELSE(p, c->if_inst[c->if_insn-1]); - break; - case TGSI_OPCODE_ENDIF: - assert(c->if_insn > 0); - brw_ENDIF(p, c->if_inst[--c->if_insn]); - break; - case TGSI_OPCODE_BGNSUB: - case TGSI_OPCODE_ENDSUB: - break; - case TGSI_OPCODE_CAL: - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_set_access_mode(p, BRW_ALIGN_1); - brw_ADD(p, deref_1ud(c->stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); - brw_set_access_mode(p, BRW_ALIGN_16); - brw_ADD(p, - get_addr_reg(c->stack_index), - get_addr_reg(c->stack_index), brw_imm_d(4)); -// orig_inst = inst->Data; -// orig_inst->Data = &p->store[p->nr_insn]; - assert(0); - brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); - brw_pop_insn_state(p); - break; - - case TGSI_OPCODE_RET: -#if 0 - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_ADD(p, - get_addr_reg(c->stack_index), - get_addr_reg(c->stack_index), brw_imm_d(-4)); - brw_set_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, brw_ip_reg(), deref_1ud(c->stack_index, 0)); - brw_set_access_mode(p, BRW_ALIGN_16); - brw_pop_insn_state(p); -#else - emit_fb_write(c, inst); -#endif - - break; - case TGSI_OPCODE_BGNFOR: - c->loop_inst[c->loop_insn++] = brw_DO(p, BRW_EXECUTE_8); - break; - case TGSI_OPCODE_BRK: - brw_BREAK(p); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - break; - case TGSI_OPCODE_CONT: - brw_CONT(p); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - break; - case TGSI_OPCODE_ENDFOR: - c->loop_insn--; - c->inst0 = c->inst1 = brw_WHILE(p, c->loop_inst[c->loop_insn]); - /* patch all the BREAK instructions from - last BGNFOR */ - while (c->inst0 > c->loop_inst[c->loop_insn]) { - c->inst0--; - if (c->inst0->header.opcode == BRW_OPCODE_BREAK) { - c->inst0->bits3.if_else.jump_count = c->inst1 - c->inst0 + 1; - c->inst0->bits3.if_else.pop_count = 0; - } else if (c->inst0->header.opcode == BRW_OPCODE_CONTINUE) { - c->inst0->bits3.if_else.jump_count = c->inst1 - c->inst0; - c->inst0->bits3.if_else.pop_count = 0; - } - } - break; - case TGSI_OPCODE_END: - emit_fb_write(c, inst); - break; - - default: - debug_printf("unsupported IR in fragment shader %d\n", - inst->Instruction.Opcode); - } -#if 0 - if (inst->CondUpdate) - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - else - brw_set_predicate_control(p, BRW_PREDICATE_NONE); -#endif -} - - - - - - -void brw_wm_glsl_emit(struct brw_wm_compile *c) -{ - struct tgsi_parse_context parse; - struct brw_compile *p = &c->func; - - brw_init_compile(&c->func); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - - c->reg_index = 0; - c->if_insn = 0; - c->loop_insn = 0; - c->stack_index = brw_indirect(0,0); - - /* Do static register allocation and parameter interpolation: - */ - brw_wm_emit_decls( c ); - - /* Emit the actual program. All done with very direct translation, - * hopefully we can improve on this shortly... - */ - brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack)); - - tgsi_parse_init( &parse, c->fp->program.tokens ); - - while( !tgsi_parse_end_of_tokens( &parse ) ) - { - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - /* already done */ - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - /* not handled yet */ - assert(0); - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - brw_wm_emit_instruction(c, &parse.FullToken.FullInstruction); - break; - - default: - assert( 0 ); - } - } - - tgsi_parse_free (&parse); - - /* Fix up call targets: - */ -#if 0 - { - unsigned nr_insns = c->fp->program.Base.NumInstructions; - unsigned insn, target_insn; - struct tgsi_full_instruction *inst1, *inst2; - struct brw_instruction *brw_inst1, *brw_inst2; - int offset; - for (insn = 0; insn < nr_insns; insn++) { - inst1 = &c->fp->program.Base.Instructions[insn]; - brw_inst1 = inst1->Data; - switch (inst1->Opcode) { - case TGSI_OPCODE_CAL: - target_insn = inst1->BranchTarget; - inst2 = &c->fp->program.Base.Instructions[target_insn]; - brw_inst2 = inst2->Data; - offset = brw_inst2 - brw_inst1; - brw_set_src1(brw_inst1, brw_imm_d(offset*16)); - break; - default: - break; - } - } - } -#endif - - c->prog_data.total_grf = c->reg_index; - c->prog_data.total_scratch = 0; -} diff --git a/src/gallium/drivers/i965simple/brw_wm_iz.c b/src/gallium/drivers/i965simple/brw_wm_iz.c deleted file mode 100644 index 6c5f25bf39..0000000000 --- a/src/gallium/drivers/i965simple/brw_wm_iz.c +++ /dev/null @@ -1,214 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "brw_wm.h" - - -#undef P /* prompted depth */ -#undef C /* computed */ -#undef N /* non-promoted? */ - -#define P 0 -#define C 1 -#define N 2 - -const struct { - unsigned mode:2; - unsigned sd_present:1; - unsigned sd_to_rt:1; - unsigned dd_present:1; - unsigned ds_present:1; -} wm_iz_table[IZ_BIT_MAX] = -{ - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { C, 0, 1, 0, 0 }, - { C, 0, 1, 0, 0 }, - { C, 1, 1, 0, 0 }, - { C, 1, 1, 0, 0 }, - { C, 0, 1, 0, 0 }, - { C, 0, 1, 0, 0 }, - { C, 1, 1, 1, 0 }, - { C, 1, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { C, 1, 1, 1, 0 }, - { C, 1, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { C, 0, 1, 0, 0 }, - { C, 0, 1, 0, 0 }, - { C, 1, 1, 0, 0 }, - { C, 1, 1, 0, 0 }, - { C, 0, 1, 0, 0 }, - { C, 0, 1, 0, 0 }, - { C, 1, 1, 1, 0 }, - { C, 1, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { C, 1, 1, 1, 0 }, - { C, 1, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { C, 0, 0, 0, 1 }, - { C, 0, 0, 0, 1 }, - { C, 0, 1, 0, 1 }, - { C, 0, 1, 0, 1 }, - { C, 1, 1, 0, 1 }, - { C, 1, 1, 0, 1 }, - { C, 0, 1, 0, 1 }, - { C, 0, 1, 0, 1 }, - { C, 1, 1, 1, 1 }, - { C, 1, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { C, 1, 1, 1, 1 }, - { C, 1, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { C, 0, 0, 0, 1 }, - { C, 0, 0, 0, 1 }, - { C, 0, 1, 0, 1 }, - { C, 0, 1, 0, 1 }, - { C, 1, 1, 0, 1 }, - { C, 1, 1, 0, 1 }, - { C, 0, 1, 0, 1 }, - { C, 0, 1, 0, 1 }, - { C, 1, 1, 1, 1 }, - { C, 1, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { C, 1, 1, 1, 1 }, - { C, 1, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { N, 1, 1, 0, 0 }, - { N, 0, 1, 0, 0 }, - { N, 0, 1, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { C, 0, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { P, 0, 0, 0, 0 }, - { N, 1, 1, 0, 0 }, - { C, 0, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { N, 1, 1, 0, 0 }, - { N, 0, 1, 0, 0 }, - { N, 0, 1, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { C, 0, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { P, 0, 0, 0, 0 }, - { N, 1, 1, 0, 0 }, - { C, 0, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { N, 1, 1, 0, 1 }, - { N, 0, 1, 0, 1 }, - { N, 0, 1, 0, 1 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { C, 0, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { P, 0, 0, 0, 0 }, - { N, 1, 1, 0, 1 }, - { C, 0, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { P, 0, 0, 0, 0 }, - { C, 0, 0, 0, 1 }, - { P, 0, 0, 0, 0 }, - { C, 0, 1, 0, 1 }, - { P, 0, 0, 0, 0 }, - { C, 1, 1, 0, 1 }, - { C, 0, 1, 0, 1 }, - { C, 0, 1, 0, 1 }, - { P, 0, 0, 0, 0 }, - { C, 1, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { P, 0, 0, 0, 0 }, - { C, 1, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { C, 0, 1, 1, 1 } -}; - -void brw_wm_lookup_iz( unsigned line_aa, - unsigned lookup, - struct brw_wm_prog_key *key ) -{ - unsigned reg = 2; - - assert (lookup < IZ_BIT_MAX); - - if (lookup & IZ_PS_COMPUTES_DEPTH_BIT) - key->computes_depth = 1; - - if (wm_iz_table[lookup].sd_present) { - key->source_depth_reg = reg; - reg += 2; - } - - if (wm_iz_table[lookup].sd_to_rt) - key->source_depth_to_render_target = 1; - - if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) { - key->aa_dest_stencil_reg = reg; - key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present && - line_aa == AA_SOMETIMES); - reg++; - } - - if (wm_iz_table[lookup].dd_present) { - key->dest_depth_reg = reg; - reg+=2; - } - - key->nr_depth_regs = (reg+1)/2; -} - diff --git a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c deleted file mode 100644 index 52b2909a65..0000000000 --- a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c +++ /dev/null @@ -1,275 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" - -#include "util/u_math.h" -#include "util/u_memory.h" - - -#define COMPAREFUNC_ALWAYS 0 -#define COMPAREFUNC_NEVER 0x1 -#define COMPAREFUNC_LESS 0x2 -#define COMPAREFUNC_EQUAL 0x3 -#define COMPAREFUNC_LEQUAL 0x4 -#define COMPAREFUNC_GREATER 0x5 -#define COMPAREFUNC_NOTEQUAL 0x6 -#define COMPAREFUNC_GEQUAL 0x7 - -/* Samplers aren't strictly wm state from the hardware's perspective, - * but that is the only situation in which we use them in this driver. - */ - -static int intel_translate_shadow_compare_func(unsigned func) -{ - switch(func) { - case PIPE_FUNC_NEVER: - return COMPAREFUNC_ALWAYS; - case PIPE_FUNC_LESS: - return COMPAREFUNC_LEQUAL; - case PIPE_FUNC_LEQUAL: - return COMPAREFUNC_LESS; - case PIPE_FUNC_GREATER: - return COMPAREFUNC_GEQUAL; - case PIPE_FUNC_GEQUAL: - return COMPAREFUNC_GREATER; - case PIPE_FUNC_NOTEQUAL: - return COMPAREFUNC_EQUAL; - case PIPE_FUNC_EQUAL: - return COMPAREFUNC_NOTEQUAL; - case PIPE_FUNC_ALWAYS: - return COMPAREFUNC_NEVER; - } - - debug_printf("Unknown value in %s: %x\n", __FUNCTION__, func); - return COMPAREFUNC_NEVER; -} - -/* The brw (and related graphics cores) do not support GL_CLAMP. The - * Intel drivers for "other operating systems" implement GL_CLAMP as - * GL_CLAMP_TO_EDGE, so the same is done here. - */ -static unsigned translate_wrap_mode( int wrap ) -{ - switch( wrap ) { - case PIPE_TEX_WRAP_REPEAT: - return BRW_TEXCOORDMODE_WRAP; - case PIPE_TEX_WRAP_CLAMP: - return BRW_TEXCOORDMODE_CLAMP; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */ - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - return BRW_TEXCOORDMODE_CLAMP_BORDER; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - return BRW_TEXCOORDMODE_MIRROR; - default: - return BRW_TEXCOORDMODE_WRAP; - } -} - - -static unsigned U_FIXED(float value, unsigned frac_bits) -{ - value *= (1<<frac_bits); - return value < 0 ? 0 : value; -} - -static int S_FIXED(float value, unsigned frac_bits) -{ - return value * (1<<frac_bits); -} - - -static unsigned upload_default_color( struct brw_context *brw, - const float *color ) -{ - struct brw_sampler_default_color sdc; - - COPY_4V(sdc.color, color); - - return brw_cache_data( &brw->cache[BRW_SAMPLER_DEFAULT_COLOR], &sdc ); -} - - -/* - */ -static void brw_update_sampler_state( const struct pipe_sampler_state *pipe_sampler, - unsigned sdc_gs_offset, - struct brw_sampler_state *sampler) -{ - memset(sampler, 0, sizeof(*sampler)); - - switch (pipe_sampler->min_mip_filter) { - case PIPE_TEX_FILTER_NEAREST: - sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; - break; - case PIPE_TEX_FILTER_LINEAR: - sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; - break; - case PIPE_TEX_FILTER_ANISO: - sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; - break; - default: - break; - } - - switch (pipe_sampler->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NONE: - sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; - break; - default: - break; - } - /* Set Anisotropy: - */ - switch (pipe_sampler->mag_img_filter) { - case PIPE_TEX_FILTER_NEAREST: - sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST; - break; - case PIPE_TEX_FILTER_LINEAR: - sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; - break; - case PIPE_TEX_FILTER_ANISO: - sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; - break; - default: - break; - } - - if (pipe_sampler->max_anisotropy > 2.0) { - sampler->ss3.max_aniso = MAX2((pipe_sampler->max_anisotropy - 2) / 2, - BRW_ANISORATIO_16); - } - - sampler->ss1.s_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_s); - sampler->ss1.r_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_r); - sampler->ss1.t_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_t); - - /* Fulsim complains if I don't do this. Hardware doesn't mind: - */ -#if 0 - if (texObj->Target == GL_TEXTURE_CUBE_MAP_ARB) { - sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE; - sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; - sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; - } -#endif - - /* Set shadow function: - */ - if (pipe_sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - /* Shadowing is "enabled" by emitting a particular sampler - * message (sample_c). So need to recompile WM program when - * shadow comparison is enabled on each/any texture unit. - */ - sampler->ss0.shadow_function = intel_translate_shadow_compare_func(pipe_sampler->compare_func); - } - - /* Set LOD bias: - */ - sampler->ss0.lod_bias = S_FIXED(CLAMP(pipe_sampler->lod_bias, -16, 15), 6); - - sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ - sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ - - /* Set BaseMipLevel, MaxLOD, MinLOD: - * - * XXX: I don't think that using firstLevel, lastLevel works, - * because we always setup the surface state as if firstLevel == - * level zero. Probably have to subtract firstLevel from each of - * these: - */ - sampler->ss0.base_level = U_FIXED(0, 1); - - sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(pipe_sampler->max_lod, 0), 13), 6); - sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(pipe_sampler->min_lod, 0), 13), 6); - - sampler->ss2.default_color_pointer = sdc_gs_offset >> 5; -} - - - -/* All samplers must be uploaded in a single contiguous array, which - * complicates various things. However, this is still too confusing - - * FIXME: simplify all the different new texture state flags. - */ -static void upload_wm_samplers(struct brw_context *brw) -{ - unsigned unit; - unsigned sampler_count = 0; - - /* BRW_NEW_SAMPLER */ - for (unit = 0; unit < brw->num_textures && unit < brw->num_samplers; - unit++) { - /* determine unit enable/disable by looking for a bound texture */ - if (brw->attribs.Texture[unit]) { - const struct pipe_sampler_state *sampler = brw->attribs.Samplers[unit]; - unsigned sdc_gs_offset = upload_default_color(brw, sampler->border_color); - - brw_update_sampler_state(sampler, - sdc_gs_offset, - &brw->wm.sampler[unit]); - - sampler_count = unit + 1; - } - } - - if (brw->wm.sampler_count != sampler_count) { - brw->wm.sampler_count = sampler_count; - brw->state.dirty.cache |= CACHE_NEW_SAMPLER; - } - - brw->wm.sampler_gs_offset = 0; - - if (brw->wm.sampler_count) - brw->wm.sampler_gs_offset = - brw_cache_data_sz(&brw->cache[BRW_SAMPLER], - brw->wm.sampler, - sizeof(struct brw_sampler_state) * brw->wm.sampler_count); -} - -const struct brw_tracked_state brw_wm_samplers = { - .dirty = { - .brw = BRW_NEW_SAMPLER, - .cache = 0 - }, - .update = upload_wm_samplers -}; - diff --git a/src/gallium/drivers/i965simple/brw_wm_state.c b/src/gallium/drivers/i965simple/brw_wm_state.c deleted file mode 100644 index 37a9bf919c..0000000000 --- a/src/gallium/drivers/i965simple/brw_wm_state.c +++ /dev/null @@ -1,195 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "brw_wm.h" -#include "util/u_math.h" -#include "util/u_memory.h" - -/*********************************************************************** - * WM unit - fragment programs and rasterization - */ -static void upload_wm_unit(struct brw_context *brw ) -{ - struct brw_wm_unit_state wm; - unsigned max_threads; - unsigned per_thread; - - if (BRW_DEBUG & DEBUG_SINGLE_THREAD) - max_threads = 0; - else - max_threads = 31; - - - memset(&wm, 0, sizeof(wm)); - - /* CACHE_NEW_WM_PROG */ - wm.thread0.grf_reg_count = align(brw->wm.prog_data->total_grf, 16) / 16 - 1; - wm.thread0.kernel_start_pointer = brw->wm.prog_gs_offset >> 6; - wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; - wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length; - wm.thread3.const_urb_entry_read_length = brw->wm.prog_data->curb_read_length; - - wm.wm5.max_threads = max_threads; - - per_thread = align(brw->wm.prog_data->total_scratch, 1024); - assert(per_thread <= 12 * 1024); - -#if 0 - if (brw->wm.prog_data->total_scratch) { - unsigned total = per_thread * (max_threads + 1); - - /* Scratch space -- just have to make sure there is sufficient - * allocated for the active program and current number of threads. - */ - brw->wm.scratch_buffer_size = total; - if (brw->wm.scratch_buffer && - brw->wm.scratch_buffer_size > brw->wm.scratch_buffer->size) { - dri_bo_unreference(brw->wm.scratch_buffer); - brw->wm.scratch_buffer = NULL; - } - if (!brw->wm.scratch_buffer) { - brw->wm.scratch_buffer = dri_bo_alloc(intel->intelScreen->bufmgr, - "wm scratch", - brw->wm.scratch_buffer_size, - 4096, DRM_BO_FLAG_MEM_TT); - } - } - /* XXX: Scratch buffers are not implemented correectly. - * - * The scratch offset to be programmed into wm is relative to the general - * state base address. However, using dri_bo_alloc/dri_bo_emit_reloc (or - * the previous bmGenBuffers scheme), we get an offset relative to the - * start of framebuffer. Even before then, it was broken in other ways, - * so just fail for now if we hit that path. - */ - assert(brw->wm.prog_data->total_scratch == 0); -#endif - - /* CACHE_NEW_SURFACE */ - wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces; - - /* BRW_NEW_CURBE_OFFSETS */ - wm.thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2; - - wm.thread3.urb_entry_read_offset = 0; - wm.thread1.depth_coef_urb_read_offset = 1; - wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - - /* CACHE_NEW_SAMPLER */ - wm.wm4.sampler_count = (brw->wm.sampler_count + 1) / 4; - wm.wm4.sampler_state_pointer = brw->wm.sampler_gs_offset >> 5; - - /* BRW_NEW_FRAGMENT_PROGRAM */ - { - const struct brw_fragment_program *fp = brw->attribs.FragmentProgram; - - if (fp->UsesDepth) - wm.wm5.program_uses_depth = 1; /* as far as we can tell */ - - if (fp->info.writes_z) - wm.wm5.program_computes_depth = 1; - - /* BRW_NEW_ALPHA_TEST */ - if (fp->info.uses_kill || - brw->attribs.DepthStencil->alpha.enabled) - wm.wm5.program_uses_killpixel = 1; - - wm.wm5.enable_8_pix = 1; - } - - wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */ - wm.wm5.legacy_line_rast = 0; - wm.wm5.legacy_global_depth_bias = 0; - wm.wm5.early_depth_test = 1; /* never need to disable */ - wm.wm5.line_aa_region_width = 0; - wm.wm5.line_endcap_aa_region_width = 1; - - /* BRW_NEW_RASTERIZER */ - if (brw->attribs.Raster->poly_stipple_enable) - wm.wm5.polygon_stipple = 1; - -#if 0 - if (brw->attribs.Polygon->OffsetFill) { - wm.wm5.depth_offset = 1; - /* Something wierd going on with legacy_global_depth_bias, - * offset_constant, scaling and MRD. This value passes glean - * but gives some odd results elsewere (eg. the - * quad-offset-units test). - */ - wm.global_depth_offset_constant = brw->attribs.Polygon->OffsetUnits * 2; - - /* This is the only value that passes glean: - */ - wm.global_depth_offset_scale = brw->attribs.Polygon->OffsetFactor; - } -#endif - - if (brw->attribs.Raster->line_stipple_enable) { - wm.wm5.line_stipple = 1; - } - - if (BRW_DEBUG & DEBUG_STATS) - wm.wm4.stats_enable = 1; - - brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm ); - - if (brw->wm.prog_data->total_scratch) { - /* - dri_emit_reloc(brw->cache[BRW_WM_UNIT].pool->buffer, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, - (per_thread / 1024) - 1, - brw->wm.state_gs_offset + - ((char *)&wm.thread2 - (char *)&wm), - brw->wm.scratch_buffer); - */ - } else { - wm.thread2.scratch_space_base_pointer = 0; - } -} - -const struct brw_tracked_state brw_wm_unit = { - .dirty = { - .brw = (BRW_NEW_RASTERIZER | - BRW_NEW_ALPHA_TEST | - BRW_NEW_FS | - BRW_NEW_CURBE_OFFSETS), - - .cache = (CACHE_NEW_SURFACE | - CACHE_NEW_WM_PROG | - CACHE_NEW_SAMPLER) - }, - .update = upload_wm_unit -}; - diff --git a/src/gallium/drivers/i965simple/brw_wm_surface_state.c b/src/gallium/drivers/i965simple/brw_wm_surface_state.c deleted file mode 100644 index b5b9e0e702..0000000000 --- a/src/gallium/drivers/i965simple/brw_wm_surface_state.c +++ /dev/null @@ -1,305 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" - -static unsigned translate_tex_target( enum pipe_texture_target target ) -{ - switch (target) { - case PIPE_TEXTURE_1D: - return BRW_SURFACE_1D; - - case PIPE_TEXTURE_2D: - return BRW_SURFACE_2D; - - case PIPE_TEXTURE_3D: - return BRW_SURFACE_3D; - - case PIPE_TEXTURE_CUBE: - return BRW_SURFACE_CUBE; - - default: - assert(0); - return 0; - } -} - -static unsigned translate_tex_format( enum pipe_format pipe_format ) -{ - switch( pipe_format ) { - case PIPE_FORMAT_L8_UNORM: - return BRW_SURFACEFORMAT_L8_UNORM; - - case PIPE_FORMAT_I8_UNORM: - return BRW_SURFACEFORMAT_I8_UNORM; - - case PIPE_FORMAT_A8_UNORM: - return BRW_SURFACEFORMAT_A8_UNORM; - - case PIPE_FORMAT_A8L8_UNORM: - return BRW_SURFACEFORMAT_L8A8_UNORM; - - case PIPE_FORMAT_R8G8B8_UNORM: - assert(0); /* not supported for sampling */ - return BRW_SURFACEFORMAT_R8G8B8_UNORM; - - case PIPE_FORMAT_B8G8R8A8_UNORM: - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - - case PIPE_FORMAT_R8G8B8A8_UNORM: - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; - - case PIPE_FORMAT_R5G6B5_UNORM: - return BRW_SURFACEFORMAT_B5G6R5_UNORM; - - case PIPE_FORMAT_A1R5G5B5_UNORM: - return BRW_SURFACEFORMAT_B5G5R5A1_UNORM; - - case PIPE_FORMAT_A4R4G4B4_UNORM: - return BRW_SURFACEFORMAT_B4G4R4A4_UNORM; - - case PIPE_FORMAT_YCBCR_REV: - return BRW_SURFACEFORMAT_YCRCB_NORMAL; - - case PIPE_FORMAT_YCBCR: - return BRW_SURFACEFORMAT_YCRCB_SWAPUVY; -#if 0 - case PIPE_FORMAT_RGB_FXT1: - case PIPE_FORMAT_RGBA_FXT1: - return BRW_SURFACEFORMAT_FXT1; -#endif - - case PIPE_FORMAT_Z16_UNORM: - return BRW_SURFACEFORMAT_I16_UNORM; -#if 0 - case PIPE_FORMAT_RGB_DXT1: - return BRW_SURFACEFORMAT_DXT1_RGB; - - case PIPE_FORMAT_RGBA_DXT1: - return BRW_SURFACEFORMAT_BC1_UNORM; - - case PIPE_FORMAT_RGBA_DXT3: - return BRW_SURFACEFORMAT_BC2_UNORM; - - case PIPE_FORMAT_RGBA_DXT5: - return BRW_SURFACEFORMAT_BC3_UNORM; - - case PIPE_FORMAT_SRGBA8: - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB; - case PIPE_FORMAT_SRGB_DXT1: - return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; -#endif - - default: - assert(0); - return 0; - } -} - -static unsigned brw_buffer_offset(struct brw_context *brw, - struct pipe_buffer *buffer) -{ - return brw->winsys->get_buffer_offset(brw->winsys, - buffer, - 0); -} - -static -void brw_update_texture_surface( struct brw_context *brw, - unsigned unit ) -{ - const struct brw_texture *tObj = brw->attribs.Texture[unit]; - struct brw_surface_state surf; - - memset(&surf, 0, sizeof(surf)); - - surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; - surf.ss0.surface_type = translate_tex_target(tObj->base.target); - surf.ss0.surface_format = translate_tex_format(tObj->base.format); - - /* This is ok for all textures with channel width 8bit or less: - */ -/* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ - - /* Updated in emit_reloc */ - surf.ss1.base_addr = brw_buffer_offset( brw, tObj->buffer ); - - surf.ss2.mip_count = tObj->base.last_level; - surf.ss2.width = tObj->base.width[0] - 1; - surf.ss2.height = tObj->base.height[0] - 1; - - surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; - surf.ss3.tiled_surface = 0; /* always zero */ - surf.ss3.pitch = tObj->stride - 1; - surf.ss3.depth = tObj->base.depth[0] - 1; - - surf.ss4.min_lod = 0; - - if (tObj->base.target == PIPE_TEXTURE_CUBE) { - surf.ss0.cube_pos_x = 1; - surf.ss0.cube_pos_y = 1; - surf.ss0.cube_pos_z = 1; - surf.ss0.cube_neg_x = 1; - surf.ss0.cube_neg_y = 1; - surf.ss0.cube_neg_z = 1; - } - - brw->wm.bind.surf_ss_offset[unit + 1] = - brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf ); -} - - - -#define OFFSET(TYPE, FIELD) ( (unsigned)&(((TYPE *)0)->FIELD) ) - - -static void upload_wm_surfaces(struct brw_context *brw ) -{ - unsigned i; - - { - struct brw_surface_state surf; - - /* BRW_NEW_FRAMEBUFFER - */ - struct pipe_surface *pipe_surface = brw->attribs.FrameBuffer.cbufs[0];/*fixme*/ - struct brw_texture *tex = (struct brw_texture *)pipe_surface->texture; - - memset(&surf, 0, sizeof(surf)); - - if (pipe_surface != NULL) { - if (pipe_surface->block.size == 4) - surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - else - surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; - - surf.ss0.surface_type = BRW_SURFACE_2D; - - surf.ss1.base_addr = brw_buffer_offset( brw, tex->buffer ); - - surf.ss2.width = pipe_surface->width - 1; - surf.ss2.height = pipe_surface->height - 1; - surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; - surf.ss3.tiled_surface = 0; - surf.ss3.pitch = pipe_surface->stride - 1; - } else { - surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - surf.ss0.surface_type = BRW_SURFACE_NULL; - } - - /* BRW_NEW_BLEND */ - surf.ss0.color_blend = (!brw->attribs.Blend->logicop_enable && - brw->attribs.Blend->blend_enable); - - - surf.ss0.writedisable_red = !(brw->attribs.Blend->colormask & PIPE_MASK_R); - surf.ss0.writedisable_green = !(brw->attribs.Blend->colormask & PIPE_MASK_G); - surf.ss0.writedisable_blue = !(brw->attribs.Blend->colormask & PIPE_MASK_B); - surf.ss0.writedisable_alpha = !(brw->attribs.Blend->colormask & PIPE_MASK_A); - - - - - brw->wm.bind.surf_ss_offset[0] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf ); - - brw->wm.nr_surfaces = 1; - } - - - /* BRW_NEW_TEXTURE - */ - for (i = 0; i < brw->num_textures && i < brw->num_samplers; i++) { - const struct brw_texture *texUnit = brw->attribs.Texture[i]; - - if (texUnit && - texUnit->base.reference.count/*(texUnit->reference.count > 0) == really used */) { - - brw_update_texture_surface(brw, i); - - brw->wm.nr_surfaces = i+2; - } - else { - brw->wm.bind.surf_ss_offset[i+1] = 0; - } - } - - brw->wm.bind_ss_offset = brw_cache_data( &brw->cache[BRW_SS_SURF_BIND], - &brw->wm.bind ); -} - - -/* KW: Will find a different way to acheive this, see for example the - * state caches with relocs in the i915 swz driver. - */ -#if 0 -static void emit_reloc_wm_surfaces(struct brw_context *brw) -{ - int unit; - - if (brw->state.draw_region != NULL) { - /* Emit framebuffer relocation */ - dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE), - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, - 0, - brw->wm.bind.surf_ss_offset[0] + - offsetof(struct brw_surface_state, ss1), - brw->state.draw_region->buffer); - } - - /* Emit relocations for texture buffers */ - for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { - struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; - struct intel_texture_object *intelObj = intel_texture_object(tObj); - - if (texUnit->_ReallyEnabled && intelObj->mt != NULL) { - dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE), - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, - 0, - brw->wm.bind.surf_ss_offset[unit + 1] + - offsetof(struct brw_surface_state, ss1), - intelObj->mt->region->buffer); - } - } -} -#endif - -const struct brw_tracked_state brw_wm_surfaces = { - .dirty = { - .brw = (BRW_NEW_FRAMEBUFFER | - BRW_NEW_BLEND | - BRW_NEW_TEXTURE), - .cache = 0 - }, - .update = upload_wm_surfaces, -}; diff --git a/src/gallium/drivers/llvmpipe/.gitignore b/src/gallium/drivers/llvmpipe/.gitignore new file mode 100644 index 0000000000..257b72d7b2 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/.gitignore @@ -0,0 +1 @@ +lp_tile_soa.c diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index cd7b6356d2..cdf318844c 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -17,10 +17,13 @@ C_SOURCES = \ lp_bld_depth.c \ lp_bld_flow.c \ lp_bld_format_aos.c \ + lp_bld_format_query.c \ lp_bld_format_soa.c \ lp_bld_interp.c \ lp_bld_intr.c \ lp_bld_logic.c \ + lp_bld_pack.c \ + lp_bld_sample.c \ lp_bld_sample_soa.c \ lp_bld_swizzle.c \ lp_bld_struct.c \ @@ -54,4 +57,10 @@ C_SOURCES = \ lp_tile_cache.c \ lp_tile_soa.c +CPP_SOURCES = \ + lp_bld_misc.cpp + include ../../Makefile.template + +lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_access.py ../../auxiliary/util/u_format.csv + python lp_tile_soa.py ../../auxiliary/util/u_format.csv > $@ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index f4a9a3b22e..f4410f8201 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -9,6 +9,13 @@ if not env.has_key('LLVM_VERSION'): env.Tool('udis86') +env.CodeGenerate( + target = 'lp_tile_soa.c', + script = 'lp_tile_soa.py', + source = ['#src/gallium/auxiliary/util/u_format.csv'], + command = 'python $SCRIPT $SOURCE > $TARGET' +) + llvmpipe = env.ConvenienceLibrary( target = 'llvmpipe', source = [ @@ -23,9 +30,13 @@ llvmpipe = env.ConvenienceLibrary( 'lp_bld_depth.c', 'lp_bld_flow.c', 'lp_bld_format_aos.c', + 'lp_bld_format_query.c', 'lp_bld_format_soa.c', 'lp_bld_interp.c', 'lp_bld_intr.c', + 'lp_bld_misc.cpp', + 'lp_bld_pack.c', + 'lp_bld_sample.c', 'lp_bld_sample_soa.c', 'lp_bld_struct.c', 'lp_bld_logic.c', @@ -68,7 +79,7 @@ env.Prepend(LIBS = [llvmpipe] + auxiliaries) env.Program( target = 'lp_test_format', - source = ['lp_test_format.c'], + source = ['lp_test_format.c', 'lp_test_main.c'], ) env.Program( diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index 31433318a7..9c59677a74 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -47,12 +47,16 @@ #include "util/u_memory.h" #include "util/u_debug.h" +#include "util/u_math.h" #include "util/u_string.h" +#include "util/u_cpu_detect.h" #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_intr.h" #include "lp_bld_logic.h" +#include "lp_bld_pack.h" +#include "lp_bld_debug.h" #include "lp_bld_arit.h" @@ -71,30 +75,28 @@ lp_build_min_simple(struct lp_build_context *bld, /* TODO: optimize the constant case */ -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) if(type.width * type.length == 128) { if(type.floating) { - if(type.width == 32) + if(type.width == 32 && util_cpu_caps.has_sse) intrinsic = "llvm.x86.sse.min.ps"; - if(type.width == 64) + if(type.width == 64 && util_cpu_caps.has_sse2) intrinsic = "llvm.x86.sse2.min.pd"; } else { - if(type.width == 8 && !type.sign) + if(type.width == 8 && !type.sign && util_cpu_caps.has_sse2) intrinsic = "llvm.x86.sse2.pminu.b"; - if(type.width == 8 && type.sign) + if(type.width == 8 && type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pminsb"; - if(type.width == 16 && !type.sign) + if(type.width == 16 && !type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pminuw"; - if(type.width == 16 && type.sign) + if(type.width == 16 && type.sign && util_cpu_caps.has_sse2) intrinsic = "llvm.x86.sse2.pmins.w"; - if(type.width == 32 && !type.sign) + if(type.width == 32 && !type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pminud"; - if(type.width == 32 && type.sign) + if(type.width == 32 && type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pminsd"; } } -#endif if(intrinsic) return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); @@ -119,30 +121,28 @@ lp_build_max_simple(struct lp_build_context *bld, /* TODO: optimize the constant case */ -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) if(type.width * type.length == 128) { if(type.floating) { - if(type.width == 32) + if(type.width == 32 && util_cpu_caps.has_sse) intrinsic = "llvm.x86.sse.max.ps"; - if(type.width == 64) + if(type.width == 64 && util_cpu_caps.has_sse2) intrinsic = "llvm.x86.sse2.max.pd"; } else { - if(type.width == 8 && !type.sign) + if(type.width == 8 && !type.sign && util_cpu_caps.has_sse2) intrinsic = "llvm.x86.sse2.pmaxu.b"; - if(type.width == 8 && type.sign) + if(type.width == 8 && type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pmaxsb"; - if(type.width == 16 && !type.sign) + if(type.width == 16 && !type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pmaxuw"; - if(type.width == 16 && type.sign) + if(type.width == 16 && type.sign && util_cpu_caps.has_sse2) intrinsic = "llvm.x86.sse2.pmaxs.w"; - if(type.width == 32 && !type.sign) + if(type.width == 32 && !type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pmaxud"; - if(type.width == 32 && type.sign) + if(type.width == 32 && type.sign && util_cpu_caps.has_sse4_1) intrinsic = "llvm.x86.sse41.pmaxsd"; } } -#endif if(intrinsic) return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); @@ -204,15 +204,14 @@ lp_build_add(struct lp_build_context *bld, if(a == bld->one || b == bld->one) return bld->one; -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width * type.length == 128 && + if(util_cpu_caps.has_sse2 && + type.width * type.length == 128 && !type.floating && !type.fixed) { if(type.width == 8) intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b"; if(type.width == 16) intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w"; } -#endif if(intrinsic) return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); @@ -257,15 +256,14 @@ lp_build_sub(struct lp_build_context *bld, if(b == bld->one) return bld->zero; -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width * type.length == 128 && + if(util_cpu_caps.has_sse2 && + type.width * type.length == 128 && !type.floating && !type.fixed) { if(type.width == 8) intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b"; if(type.width == 16) intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w"; } -#endif if(intrinsic) return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b); @@ -284,45 +282,6 @@ lp_build_sub(struct lp_build_context *bld, /** - * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions. - */ -static LLVMValueRef -lp_build_unpack_shuffle(unsigned n, unsigned lo_hi) -{ - LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; - unsigned i, j; - - assert(n <= LP_MAX_VECTOR_LENGTH); - assert(lo_hi < 2); - - for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) { - elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0); - elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0); - } - - return LLVMConstVector(elems, n); -} - - -/** - * Build constant int vector of width 'n' and value 'c'. - */ -static LLVMValueRef -lp_build_const_vec(LLVMTypeRef type, unsigned n, long long c) -{ - LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; - unsigned i; - - assert(n <= LP_MAX_VECTOR_LENGTH); - - for(i = 0; i < n; ++i) - elems[i] = LLVMConstInt(type, c, 0); - - return LLVMConstVector(elems, n); -} - - -/** * Normalized 8bit multiplication. * * - alpha plus one @@ -365,33 +324,30 @@ lp_build_const_vec(LLVMTypeRef type, unsigned n, long long c) */ static LLVMValueRef lp_build_mul_u8n(LLVMBuilderRef builder, + struct lp_type i16_type, LLVMValueRef a, LLVMValueRef b) { - static LLVMValueRef c01 = NULL; - static LLVMValueRef c08 = NULL; - static LLVMValueRef c80 = NULL; + LLVMValueRef c8; LLVMValueRef ab; - if(!c01) c01 = lp_build_const_vec(LLVMInt16Type(), 8, 0x01); - if(!c08) c08 = lp_build_const_vec(LLVMInt16Type(), 8, 0x08); - if(!c80) c80 = lp_build_const_vec(LLVMInt16Type(), 8, 0x80); + c8 = lp_build_int_const_scalar(i16_type, 8); #if 0 /* a*b/255 ~= (a*(b + 1)) >> 256 */ - b = LLVMBuildAdd(builder, b, c01, ""); + b = LLVMBuildAdd(builder, b, lp_build_int_const_scalar(i16_type, 1), ""); ab = LLVMBuildMul(builder, a, b, ""); #else - /* t/255 ~= (t + (t >> 8) + 0x80) >> 8 */ + /* ab/255 ~= (ab + (ab >> 8) + 0x80) >> 8 */ ab = LLVMBuildMul(builder, a, b, ""); - ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c08, ""), ""); - ab = LLVMBuildAdd(builder, ab, c80, ""); + ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c8, ""), ""); + ab = LLVMBuildAdd(builder, ab, lp_build_int_const_scalar(i16_type, 0x80), ""); #endif - ab = LLVMBuildLShr(builder, ab, c08, ""); + ab = LLVMBuildLShr(builder, ab, c8, ""); return ab; } @@ -406,6 +362,8 @@ lp_build_mul(struct lp_build_context *bld, LLVMValueRef b) { const struct lp_type type = bld->type; + LLVMValueRef shift; + LLVMValueRef res; if(a == bld->zero) return bld->zero; @@ -419,53 +377,104 @@ lp_build_mul(struct lp_build_context *bld, return bld->undef; if(!type.floating && !type.fixed && type.norm) { -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width == 8 && type.length == 16) { - LLVMTypeRef i16x8 = LLVMVectorType(LLVMInt16Type(), 8); - LLVMTypeRef i8x16 = LLVMVectorType(LLVMInt8Type(), 16); - static LLVMValueRef ml = NULL; - static LLVMValueRef mh = NULL; - LLVMValueRef al, ah, bl, bh; - LLVMValueRef abl, abh; - LLVMValueRef ab; - - if(!ml) ml = lp_build_unpack_shuffle(16, 0); - if(!mh) mh = lp_build_unpack_shuffle(16, 1); - - /* PUNPCKLBW, PUNPCKHBW */ - al = LLVMBuildShuffleVector(bld->builder, a, bld->zero, ml, ""); - bl = LLVMBuildShuffleVector(bld->builder, b, bld->zero, ml, ""); - ah = LLVMBuildShuffleVector(bld->builder, a, bld->zero, mh, ""); - bh = LLVMBuildShuffleVector(bld->builder, b, bld->zero, mh, ""); + if(type.width == 8) { + struct lp_type i16_type = lp_wider_type(type); + LLVMValueRef al, ah, bl, bh, abl, abh, ab; - /* NOP */ - al = LLVMBuildBitCast(bld->builder, al, i16x8, ""); - bl = LLVMBuildBitCast(bld->builder, bl, i16x8, ""); - ah = LLVMBuildBitCast(bld->builder, ah, i16x8, ""); - bh = LLVMBuildBitCast(bld->builder, bh, i16x8, ""); + lp_build_unpack2(bld->builder, type, i16_type, a, &al, &ah); + lp_build_unpack2(bld->builder, type, i16_type, b, &bl, &bh); /* PMULLW, PSRLW, PADDW */ - abl = lp_build_mul_u8n(bld->builder, al, bl); - abh = lp_build_mul_u8n(bld->builder, ah, bh); + abl = lp_build_mul_u8n(bld->builder, i16_type, al, bl); + abh = lp_build_mul_u8n(bld->builder, i16_type, ah, bh); - /* PACKUSWB */ - ab = lp_build_intrinsic_binary(bld->builder, "llvm.x86.sse2.packuswb.128" , i16x8, abl, abh); - - /* NOP */ - ab = LLVMBuildBitCast(bld->builder, ab, i8x16, ""); + ab = lp_build_pack2(bld->builder, i16_type, type, abl, abh); return ab; } -#endif /* FIXME */ assert(0); } - if(LLVMIsConstant(a) && LLVMIsConstant(b)) - return LLVMConstMul(a, b); + if(type.fixed) + shift = lp_build_int_const_scalar(type, type.width/2); + else + shift = NULL; + + if(LLVMIsConstant(a) && LLVMIsConstant(b)) { + res = LLVMConstMul(a, b); + if(shift) { + if(type.sign) + res = LLVMConstAShr(res, shift); + else + res = LLVMConstLShr(res, shift); + } + } + else { + res = LLVMBuildMul(bld->builder, a, b, ""); + if(shift) { + if(type.sign) + res = LLVMBuildAShr(bld->builder, res, shift, ""); + else + res = LLVMBuildLShr(bld->builder, res, shift, ""); + } + } + + return res; +} + + +/** + * Small vector x scale multiplication optimization. + */ +LLVMValueRef +lp_build_mul_imm(struct lp_build_context *bld, + LLVMValueRef a, + int b) +{ + LLVMValueRef factor; + + if(b == 0) + return bld->zero; + + if(b == 1) + return a; - return LLVMBuildMul(bld->builder, a, b, ""); + if(b == -1) + return LLVMBuildNeg(bld->builder, a, ""); + + if(b == 2 && bld->type.floating) + return lp_build_add(bld, a, a); + + if(util_is_pot(b)) { + unsigned shift = ffs(b) - 1; + + if(bld->type.floating) { +#if 0 + /* + * Power of two multiplication by directly manipulating the mantissa. + * + * XXX: This might not be always faster, it will introduce a small error + * for multiplication by zero, and it will produce wrong results + * for Inf and NaN. + */ + unsigned mantissa = lp_mantissa(bld->type); + factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa); + a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), ""); + a = LLVMBuildAdd(bld->builder, a, factor, ""); + a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), ""); + return a; +#endif + } + else { + factor = lp_build_const_scalar(bld->type, shift); + return LLVMBuildShl(bld->builder, a, factor, ""); + } + } + + factor = lp_build_const_scalar(bld->type, (double)b); + return lp_build_mul(bld, a, factor); } @@ -493,22 +502,43 @@ lp_build_div(struct lp_build_context *bld, if(LLVMIsConstant(a) && LLVMIsConstant(b)) return LLVMConstFDiv(a, b); -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width == 32 && type.length == 4) + if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) return lp_build_mul(bld, a, lp_build_rcp(bld, b)); -#endif return LLVMBuildFDiv(bld->builder, a, b, ""); } +/** + * Linear interpolation. + * + * This also works for integer values with a few caveats. + * + * @sa http://www.stereopsis.com/doubleblend.html + */ LLVMValueRef lp_build_lerp(struct lp_build_context *bld, LLVMValueRef x, LLVMValueRef v0, LLVMValueRef v1) { - return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0))); + LLVMValueRef delta; + LLVMValueRef res; + + delta = lp_build_sub(bld, v1, v0); + + res = lp_build_mul(bld, x, delta); + + res = lp_build_add(bld, v0, res); + + if(bld->type.fixed) + /* XXX: This step is necessary for lerping 8bit colors stored on 16bits, + * but it will be wrong for other uses. Basically we need a more + * powerful lp_type, capable of further distinguishing the values + * interpretation from the value storage. */ + res = LLVMBuildAnd(bld->builder, res, lp_build_int_const_scalar(bld->type, (1 << bld->type.width/2) - 1), ""); + + return res; } @@ -606,8 +636,7 @@ lp_build_abs(struct lp_build_context *bld, return a; } -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width*type.length == 128) { + if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) { switch(type.width) { case 8: return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a); @@ -617,7 +646,6 @@ lp_build_abs(struct lp_build_context *bld, return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a); } } -#endif return lp_build_max(bld, a, LLVMBuildNeg(bld->builder, a, "")); } @@ -684,6 +712,8 @@ lp_build_round_sse41(struct lp_build_context *bld, assert(type.floating); assert(type.width*type.length == 128); + assert(lp_check_value(type, a)); + assert(util_cpu_caps.has_sse4_1); switch(type.width) { case 32: @@ -703,20 +733,45 @@ lp_build_round_sse41(struct lp_build_context *bld, LLVMValueRef -lp_build_round(struct lp_build_context *bld, +lp_build_trunc(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; assert(type.floating); + assert(lp_check_value(type, a)); -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); -#endif + if(util_cpu_caps.has_sse4_1) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE); + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef res; + res = LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); + res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); + return res; + } +} - /* FIXME */ - assert(0); - return bld->undef; + +LLVMValueRef +lp_build_round(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + + assert(type.floating); + assert(lp_check_value(type, a)); + + if(util_cpu_caps.has_sse4_1) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef res; + res = lp_build_iround(bld, a); + res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); + return res; + } } @@ -728,13 +783,15 @@ lp_build_floor(struct lp_build_context *bld, assert(type.floating); -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); -#endif - - /* FIXME */ - assert(0); - return bld->undef; + if(util_cpu_caps.has_sse4_1) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef res; + res = lp_build_ifloor(bld, a); + res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); + return res; + } } @@ -745,59 +802,143 @@ lp_build_ceil(struct lp_build_context *bld, const struct lp_type type = bld->type; assert(type.floating); + assert(lp_check_value(type, a)); + + if(util_cpu_caps.has_sse4_1) + return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef res; + res = lp_build_iceil(bld, a); + res = LLVMBuildSIToFP(bld->builder, res, vec_type, ""); + return res; + } +} -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); -#endif - /* FIXME */ - assert(0); - return bld->undef; +/** + * Convert to integer, through whichever rounding method that's fastest, + * typically truncating to zero. + */ +LLVMValueRef +lp_build_itrunc(struct lp_build_context *bld, + LLVMValueRef a) +{ + const struct lp_type type = bld->type; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + + assert(type.floating); + assert(lp_check_value(type, a)); + + return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); } LLVMValueRef -lp_build_trunc(struct lp_build_context *bld, - LLVMValueRef a) +lp_build_iround(struct lp_build_context *bld, + LLVMValueRef a) { const struct lp_type type = bld->type; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef res; assert(type.floating); + assert(lp_check_value(type, a)); -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE); -#endif + if(util_cpu_caps.has_sse4_1) { + res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST); + } + else { + LLVMTypeRef vec_type = lp_build_vec_type(type); + LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef sign; + LLVMValueRef half; + + /* get sign bit */ + sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + + /* sign * 0.5 */ + half = lp_build_const_scalar(type, 0.5); + half = LLVMBuildBitCast(bld->builder, half, int_vec_type, ""); + half = LLVMBuildOr(bld->builder, sign, half, ""); + half = LLVMBuildBitCast(bld->builder, half, vec_type, ""); + + res = LLVMBuildAdd(bld->builder, a, half, ""); + } + + res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); - /* FIXME */ - assert(0); - return bld->undef; + return res; } -/** - * Convert to integer, through whichever rounding method that's fastest, - * typically truncating to zero. - */ LLVMValueRef -lp_build_int(struct lp_build_context *bld, - LLVMValueRef a) +lp_build_ifloor(struct lp_build_context *bld, + LLVMValueRef a) { const struct lp_type type = bld->type; LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef res; assert(type.floating); + assert(lp_check_value(type, a)); - return LLVMBuildFPToSI(bld->builder, a, int_vec_type, ""); + if(util_cpu_caps.has_sse4_1) { + res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR); + } + else { + /* Take the sign bit and add it to 1 constant */ + LLVMTypeRef vec_type = lp_build_vec_type(type); + unsigned mantissa = lp_mantissa(type); + LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef sign; + LLVMValueRef offset; + + /* sign = a < 0 ? ~0 : 0 */ + sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + sign = LLVMBuildAnd(bld->builder, sign, mask, ""); + sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), ""); + + /* offset = -0.99999(9)f */ + offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa)); + offset = LLVMConstBitCast(offset, int_vec_type); + + /* offset = a < 0 ? -0.99999(9)f : 0.0f */ + offset = LLVMBuildAnd(bld->builder, offset, sign, ""); + offset = LLVMBuildBitCast(bld->builder, offset, vec_type, ""); + + res = LLVMBuildAdd(bld->builder, a, offset, ""); + } + + res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); + + return res; } LLVMValueRef -lp_build_ifloor(struct lp_build_context *bld, - LLVMValueRef a) +lp_build_iceil(struct lp_build_context *bld, + LLVMValueRef a) { - a = lp_build_floor(bld, a); - a = lp_build_int(bld, a); - return a; + const struct lp_type type = bld->type; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMValueRef res; + + assert(type.floating); + assert(lp_check_value(type, a)); + + if(util_cpu_caps.has_sse4_1) { + res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL); + } + else { + assert(0); + res = bld->undef; + } + + res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, ""); + + return res; } @@ -837,11 +978,9 @@ lp_build_rcp(struct lp_build_context *bld, if(LLVMIsConstant(a)) return LLVMConstFDiv(bld->one, a); - /* XXX: is this really necessary? */ -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width == 32 && type.length == 4) + if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) + /* FIXME: improve precision */ return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); -#endif return LLVMBuildFDiv(bld->builder, bld->one, a, ""); } @@ -858,11 +997,8 @@ lp_build_rsqrt(struct lp_build_context *bld, assert(type.floating); - /* XXX: is this really necessary? */ -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width == 32 && type.length == 4) + if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", lp_build_vec_type(type), a); -#endif return lp_build_rcp(bld, lp_build_sqrt(bld, a)); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h index d68a97c4b8..62be4b9aee 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h @@ -40,7 +40,7 @@ #include <llvm-c/Core.h> -struct lp_type type; +struct lp_type; struct lp_build_context; @@ -67,6 +67,11 @@ lp_build_mul(struct lp_build_context *bld, LLVMValueRef b); LLVMValueRef +lp_build_mul_imm(struct lp_build_context *bld, + LLVMValueRef a, + int b); + +LLVMValueRef lp_build_div(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); @@ -126,11 +131,18 @@ lp_build_trunc(struct lp_build_context *bld, LLVMValueRef a); LLVMValueRef -lp_build_int(struct lp_build_context *bld, - LLVMValueRef a); +lp_build_ifloor(struct lp_build_context *bld, + LLVMValueRef a); +LLVMValueRef +lp_build_iceil(struct lp_build_context *bld, + LLVMValueRef a); LLVMValueRef -lp_build_ifloor(struct lp_build_context *bld, +lp_build_iround(struct lp_build_context *bld, + LLVMValueRef a); + +LLVMValueRef +lp_build_itrunc(struct lp_build_context *bld, LLVMValueRef a); LLVMValueRef diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.h b/src/gallium/drivers/llvmpipe/lp_bld_const.h index ffb302f736..cb8e1c7b00 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_const.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_const.h @@ -42,7 +42,7 @@ #include <pipe/p_compiler.h> -struct lp_type type; +struct lp_type; unsigned diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c index 186cac70f6..9935209437 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c @@ -63,11 +63,13 @@ #include "util/u_debug.h" #include "util/u_math.h" +#include "util/u_cpu_detect.h" #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_intr.h" #include "lp_bld_arit.h" +#include "lp_bld_pack.h" #include "lp_bld_conv.h" @@ -198,243 +200,6 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, /** - * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions. - */ -static LLVMValueRef -lp_build_const_unpack_shuffle(unsigned n, unsigned lo_hi) -{ - LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; - unsigned i, j; - - assert(n <= LP_MAX_VECTOR_LENGTH); - assert(lo_hi < 2); - - /* TODO: cache results in a static table */ - - for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) { - elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0); - elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0); - } - - return LLVMConstVector(elems, n); -} - - -/** - * Build shuffle vectors that match PACKxx instructions. - */ -static LLVMValueRef -lp_build_const_pack_shuffle(unsigned n) -{ - LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; - unsigned i; - - assert(n <= LP_MAX_VECTOR_LENGTH); - - /* TODO: cache results in a static table */ - - for(i = 0; i < n; ++i) - elems[i] = LLVMConstInt(LLVMInt32Type(), 2*i, 0); - - return LLVMConstVector(elems, n); -} - - -/** - * Expand the bit width. - * - * This will only change the number of bits the values are represented, not the - * values themselves. - */ -static void -lp_build_expand(LLVMBuilderRef builder, - struct lp_type src_type, - struct lp_type dst_type, - LLVMValueRef src, - LLVMValueRef *dst, unsigned num_dsts) -{ - unsigned num_tmps; - unsigned i; - - /* Register width must remain constant */ - assert(src_type.width * src_type.length == dst_type.width * dst_type.length); - - /* We must not loose or gain channels. Only precision */ - assert(src_type.length == dst_type.length * num_dsts); - - num_tmps = 1; - dst[0] = src; - - while(src_type.width < dst_type.width) { - struct lp_type new_type = src_type; - LLVMTypeRef new_vec_type; - - new_type.width *= 2; - new_type.length /= 2; - new_vec_type = lp_build_vec_type(new_type); - - for(i = num_tmps; i--; ) { - LLVMValueRef zero; - LLVMValueRef shuffle_lo; - LLVMValueRef shuffle_hi; - LLVMValueRef lo; - LLVMValueRef hi; - - zero = lp_build_zero(src_type); - shuffle_lo = lp_build_const_unpack_shuffle(src_type.length, 0); - shuffle_hi = lp_build_const_unpack_shuffle(src_type.length, 1); - - /* PUNPCKLBW, PUNPCKHBW */ - lo = LLVMBuildShuffleVector(builder, dst[i], zero, shuffle_lo, ""); - hi = LLVMBuildShuffleVector(builder, dst[i], zero, shuffle_hi, ""); - - dst[2*i + 0] = LLVMBuildBitCast(builder, lo, new_vec_type, ""); - dst[2*i + 1] = LLVMBuildBitCast(builder, hi, new_vec_type, ""); - } - - src_type = new_type; - - num_tmps *= 2; - } - - assert(num_tmps == num_dsts); -} - - -/** - * Non-interleaved pack. - * - * This will move values as - * - * lo = __ l0 __ l1 __ l2 __.. __ ln - * hi = __ h0 __ h1 __ h2 __.. __ hn - * res = l0 l1 l2 .. ln h0 h1 h2 .. hn - * - * TODO: handle saturation consistently. - */ -static LLVMValueRef -lp_build_pack2(LLVMBuilderRef builder, - struct lp_type src_type, - struct lp_type dst_type, - boolean clamped, - LLVMValueRef lo, - LLVMValueRef hi) -{ - LLVMTypeRef src_vec_type = lp_build_vec_type(src_type); - LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type); - LLVMValueRef shuffle; - LLVMValueRef res; - - /* Register width must remain constant */ - assert(src_type.width * src_type.length == dst_type.width * dst_type.length); - - /* We must not loose or gain channels. Only precision */ - assert(src_type.length * 2 == dst_type.length); - - assert(!src_type.floating); - assert(!dst_type.floating); - -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(src_type.width * src_type.length == 128) { - /* All X86 non-interleaved pack instructions all take signed inputs and - * saturate them, so saturate beforehand. */ - if(!src_type.sign && !clamped) { - struct lp_build_context bld; - unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width; - LLVMValueRef dst_max = lp_build_int_const_scalar(src_type, ((unsigned long long)1 << dst_bits) - 1); - lp_build_context_init(&bld, builder, src_type); - lo = lp_build_min(&bld, lo, dst_max); - hi = lp_build_min(&bld, hi, dst_max); - } - - switch(src_type.width) { - case 32: - if(dst_type.sign) - res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi); - else - /* PACKUSDW is the only instrinsic with a consistent signature */ - return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi); - break; - - case 16: - if(dst_type.sign) - res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi); - else - res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi); - break; - - default: - assert(0); - return LLVMGetUndef(dst_vec_type); - break; - } - - res = LLVMBuildBitCast(builder, res, dst_vec_type, ""); - return res; - } -#endif - - lo = LLVMBuildBitCast(builder, lo, dst_vec_type, ""); - hi = LLVMBuildBitCast(builder, hi, dst_vec_type, ""); - - shuffle = lp_build_const_pack_shuffle(dst_type.length); - - res = LLVMBuildShuffleVector(builder, lo, hi, shuffle, ""); - - return res; -} - - -/** - * Truncate the bit width. - * - * TODO: Handle saturation consistently. - */ -static LLVMValueRef -lp_build_pack(LLVMBuilderRef builder, - struct lp_type src_type, - struct lp_type dst_type, - boolean clamped, - const LLVMValueRef *src, unsigned num_srcs) -{ - LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; - unsigned i; - - /* Register width must remain constant */ - assert(src_type.width * src_type.length == dst_type.width * dst_type.length); - - /* We must not loose or gain channels. Only precision */ - assert(src_type.length * num_srcs == dst_type.length); - - for(i = 0; i < num_srcs; ++i) - tmp[i] = src[i]; - - while(src_type.width > dst_type.width) { - struct lp_type new_type = src_type; - - new_type.width /= 2; - new_type.length *= 2; - - /* Take in consideration the sign changes only in the last step */ - if(new_type.width == dst_type.width) - new_type.sign = dst_type.sign; - - num_srcs /= 2; - - for(i = 0; i < num_srcs; ++i) - tmp[i] = lp_build_pack2(builder, src_type, new_type, clamped, - tmp[2*i + 0], tmp[2*i + 1]); - - src_type = new_type; - } - - assert(num_srcs == 1); - - return tmp[0]; -} - - -/** * Generic type conversion. * * TODO: Take a precision argument, or even better, add a new precision member @@ -573,7 +338,7 @@ lp_build_conv(LLVMBuilderRef builder, if(tmp_type.width < dst_type.width) { assert(num_tmps == 1); - lp_build_expand(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts); + lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts); tmp_type.width = dst_type.width; tmp_type.length = dst_type.length; num_tmps = num_dsts; @@ -693,7 +458,7 @@ lp_build_conv_mask(LLVMBuilderRef builder, } else if(src_type.width < dst_type.width) { assert(num_srcs == 1); - lp_build_expand(builder, src_type, dst_type, src[0], dst, num_dsts); + lp_build_unpack(builder, src_type, dst_type, src[0], dst, num_dsts); } else { assert(num_srcs == num_dsts); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.h b/src/gallium/drivers/llvmpipe/lp_bld_conv.h index ca378804d2..948e68fae4 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_conv.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.h @@ -40,7 +40,7 @@ #include <llvm-c/Core.h> -struct lp_type type; +struct lp_type; LLVMValueRef diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 21c665c4d4..98ec1cb1b9 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -210,7 +210,4 @@ lp_build_depth_test(LLVMBuilderRef builder, dst = lp_build_select(&bld, z_bitmask, src, dst); LLVMBuildStore(builder, dst, dst_ptr); } - - /* FIXME */ - assert(!state->occlusion_count); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h index 6d3f692619..970bee379f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h @@ -25,8 +25,8 @@ * **************************************************************************/ -#ifndef LP_BLD_H -#define LP_BLD_H +#ifndef LP_BLD_FORMAT_H +#define LP_BLD_FORMAT_H /** @@ -42,62 +42,34 @@ struct util_format_description; struct lp_type; -/** - * Unpack a pixel into its RGBA components. - * - * @param packed integer. - * - * @return RGBA in a 4 floats vector. - */ -LLVMValueRef -lp_build_unpack_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef packed); +boolean +lp_format_is_rgba8(const struct util_format_description *desc); -/** - * Pack a pixel. - * - * @param rgba 4 float vector with the unpacked components. - */ -LLVMValueRef -lp_build_pack_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef rgba); +void +lp_build_format_swizzle_soa(const struct util_format_description *format_desc, + struct lp_type type, + const LLVMValueRef *unswizzled, + LLVMValueRef *swizzled); -/** - * Load a pixel into its RGBA components. - * - * @param ptr value with the pointer to the packed pixel. Pointer type is - * irrelevant. - * - * @return RGBA in a 4 floats vector. - */ LLVMValueRef -lp_build_load_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr); +lp_build_unpack_rgba_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + LLVMValueRef packed); -/** - * Store a pixel. - * - * @param rgba 4 float vector with the unpacked components. - */ -void -lp_build_store_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr, - LLVMValueRef rgba); +LLVMValueRef +lp_build_unpack_rgba8_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + struct lp_type type, + LLVMValueRef packed); + LLVMValueRef -lp_build_gather(LLVMBuilderRef builder, - unsigned length, - unsigned src_width, - unsigned dst_width, - LLVMValueRef base_ptr, - LLVMValueRef offsets); +lp_build_pack_rgba_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + LLVMValueRef rgba); void @@ -108,12 +80,4 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, LLVMValueRef *rgba); -void -lp_build_load_rgba_soa(LLVMBuilderRef builder, - const struct util_format_description *format_desc, - struct lp_type type, - LLVMValueRef base_ptr, - LLVMValueRef offsets, - LLVMValueRef *rgba); - -#endif /* !LP_BLD_H */ +#endif /* !LP_BLD_FORMAT_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c index b9b5d84bed..5836e0173f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c @@ -25,18 +25,39 @@ * **************************************************************************/ +/** + * @file + * AoS pixel format manipulation. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#include "util/u_cpu_detect.h" #include "util/u_format.h" +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_logic.h" +#include "lp_bld_swizzle.h" #include "lp_bld_format.h" +/** + * Unpack a single pixel into its RGBA components. + * + * @param packed integer. + * + * @return RGBA in a 4 floats vector. + * + * XXX: This is mostly for reference and testing -- operating a single pixel at + * a time is rarely if ever needed. + */ LLVMValueRef lp_build_unpack_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, + const struct util_format_description *desc, LLVMValueRef packed) { - const struct util_format_description *desc; LLVMTypeRef type; LLVMValueRef shifted, casted, scaled, masked; LLVMValueRef shifts[4]; @@ -49,8 +70,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, unsigned shift; unsigned i; - desc = util_format_description(format); - /* FIXME: Support more formats */ assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); assert(desc->block.width == 1); @@ -151,12 +170,130 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, } +/** + * Take a vector with packed pixels and unpack into a rgba8 vector. + * + * Formats with bit depth smaller than 32bits are accepted, but they must be + * padded to 32bits. + */ +LLVMValueRef +lp_build_unpack_rgba8_aos(LLVMBuilderRef builder, + const struct util_format_description *desc, + struct lp_type type, + LLVMValueRef packed) +{ + struct lp_build_context bld; + bool rgba8; + LLVMValueRef res; + unsigned i; + + lp_build_context_init(&bld, builder, type); + + /* FIXME: Support more formats */ + assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(desc->block.width == 1); + assert(desc->block.height == 1); + assert(desc->block.bits <= 32); + + assert(!type.floating); + assert(!type.fixed); + assert(type.norm); + assert(type.width == 8); + assert(type.length % 4 == 0); + + rgba8 = TRUE; + for(i = 0; i < 4; ++i) { + assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED || + desc->channel[i].type == UTIL_FORMAT_TYPE_VOID); + if(desc->channel[0].size != 8) + rgba8 = FALSE; + } + + if(rgba8) { + /* + * The pixel is already in a rgba8 format variant. All it is necessary + * is to swizzle the channels. + */ + + unsigned char swizzles[4]; + boolean zeros[4]; /* bitwise AND mask */ + boolean ones[4]; /* bitwise OR mask */ + boolean swizzles_needed = FALSE; + boolean zeros_needed = FALSE; + boolean ones_needed = FALSE; + + for(i = 0; i < 4; ++i) { + enum util_format_swizzle swizzle = desc->swizzle[i]; + + /* Initialize with the no-op case */ + swizzles[i] = util_cpu_caps.little_endian ? 3 - i : i; + zeros[i] = TRUE; + ones[i] = FALSE; + + switch (swizzle) { + case UTIL_FORMAT_SWIZZLE_X: + case UTIL_FORMAT_SWIZZLE_Y: + case UTIL_FORMAT_SWIZZLE_Z: + case UTIL_FORMAT_SWIZZLE_W: + if(swizzle != swizzles[i]) { + swizzles[i] = swizzle; + swizzles_needed = TRUE; + } + break; + case UTIL_FORMAT_SWIZZLE_0: + zeros[i] = FALSE; + zeros_needed = TRUE; + break; + case UTIL_FORMAT_SWIZZLE_1: + ones[i] = TRUE; + ones_needed = TRUE; + break; + case UTIL_FORMAT_SWIZZLE_NONE: + assert(0); + break; + } + } + + res = packed; + + if(swizzles_needed) + res = lp_build_swizzle1_aos(&bld, res, swizzles); + + if(zeros_needed) { + /* Mask out zero channels */ + LLVMValueRef mask = lp_build_const_mask_aos(type, zeros); + res = LLVMBuildAnd(builder, res, mask, ""); + } + + if(ones_needed) { + /* Or one channels */ + LLVMValueRef mask = lp_build_const_mask_aos(type, ones); + res = LLVMBuildOr(builder, res, mask, ""); + } + } + else { + /* FIXME */ + assert(0); + res = lp_build_undef(type); + } + + return res; +} + + +/** + * Pack a single pixel. + * + * @param rgba 4 float vector with the unpacked components. + * + * XXX: This is mostly for reference and testing -- operating a single pixel at + * a time is rarely if ever needed. + */ LLVMValueRef lp_build_pack_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, + const struct util_format_description *desc, LLVMValueRef rgba) { - const struct util_format_description *desc; LLVMTypeRef type; LLVMValueRef packed = NULL; LLVMValueRef swizzles[4]; @@ -167,8 +304,6 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, unsigned shift; unsigned i, j; - desc = util_format_description(format); - assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); assert(desc->block.width == 1); assert(desc->block.height == 1); @@ -247,57 +382,3 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, return packed; } - - -LLVMValueRef -lp_build_load_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr) -{ - const struct util_format_description *desc; - LLVMTypeRef type; - LLVMValueRef packed; - - desc = util_format_description(format); - - /* FIXME: Support more formats */ - assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); - assert(desc->block.width == 1); - assert(desc->block.height == 1); - assert(desc->block.bits <= 32); - - type = LLVMIntType(desc->block.bits); - - ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), ""); - - packed = LLVMBuildLoad(builder, ptr, ""); - - return lp_build_unpack_rgba_aos(builder, format, packed); -} - - -void -lp_build_store_rgba_aos(LLVMBuilderRef builder, - enum pipe_format format, - LLVMValueRef ptr, - LLVMValueRef rgba) -{ - const struct util_format_description *desc; - LLVMTypeRef type; - LLVMValueRef packed; - - desc = util_format_description(format); - - assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH); - assert(desc->block.width == 1); - assert(desc->block.height == 1); - - type = LLVMIntType(desc->block.bits); - - packed = lp_build_pack_rgba_aos(builder, format, rgba); - - ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), ""); - - LLVMBuildStore(builder, packed, ptr); -} - diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_query.c b/src/gallium/drivers/llvmpipe/lp_bld_format_query.c new file mode 100644 index 0000000000..f3832d07ff --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_query.c @@ -0,0 +1,72 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Utility functions to make assertions about formats. + * + * This module centralizes most of logic used when determining what algorithm + * is most suitable (i.e., most efficient yet correct) for a given format. + * + * It might be possible to move some of these functions to u_format module, + * but since tiny differences in the format my render it more/less + * appropriate to a given algorithm it is impossible to make any long term + * guarantee about the semantics of these functions. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + + +#include "util/u_format.h" + +#include "lp_bld_format.h" + + +/** + * Whether this format is a 4 rgba8 variant + */ +boolean +lp_format_is_rgba8(const struct util_format_description *desc) +{ + unsigned chan; + + if(desc->block.width != 1 || + desc->block.height != 1 || + desc->block.bits != 32) + return FALSE; + + for(chan = 0; chan < 4; ++chan) { + if(desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED && + desc->channel[chan].type != UTIL_FORMAT_TYPE_SIGNED && + desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) + return FALSE; + if(desc->channel[chan].size != 8) + return FALSE; + } + + return TRUE; +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c index b5ff434e1a..64151d169d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c @@ -34,66 +34,17 @@ #include "lp_bld_format.h" -/** - * Gather elements from scatter positions in memory into a single vector. - * - * @param src_width src element width - * @param dst_width result element width (source will be expanded to fit) - * @param length length of the offsets, - * @param base_ptr base pointer, should be a i8 pointer type. - * @param offsets vector with offsets - */ -LLVMValueRef -lp_build_gather(LLVMBuilderRef builder, - unsigned length, - unsigned src_width, - unsigned dst_width, - LLVMValueRef base_ptr, - LLVMValueRef offsets) -{ - LLVMTypeRef src_type = LLVMIntType(src_width); - LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); - LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); - LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); - LLVMValueRef res; - unsigned i; - - res = LLVMGetUndef(dst_vec_type); - for(i = 0; i < length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef elem_offset; - LLVMValueRef elem_ptr; - LLVMValueRef elem; - - elem_offset = LLVMBuildExtractElement(builder, offsets, index, ""); - elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, ""); - elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, ""); - elem = LLVMBuildLoad(builder, elem_ptr, ""); - - assert(src_width <= dst_width); - if(src_width > dst_width) - elem = LLVMBuildTrunc(builder, elem, dst_elem_type, ""); - if(src_width < dst_width) - elem = LLVMBuildZExt(builder, elem, dst_elem_type, ""); - - res = LLVMBuildInsertElement(builder, res, elem, index, ""); - } - - return res; -} - - static LLVMValueRef -lp_build_format_swizzle(struct lp_type type, - const LLVMValueRef *inputs, - enum util_format_swizzle swizzle) +lp_build_format_swizzle_chan_soa(struct lp_type type, + const LLVMValueRef *unswizzled, + enum util_format_swizzle swizzle) { switch (swizzle) { case UTIL_FORMAT_SWIZZLE_X: case UTIL_FORMAT_SWIZZLE_Y: case UTIL_FORMAT_SWIZZLE_Z: case UTIL_FORMAT_SWIZZLE_W: - return inputs[swizzle]; + return unswizzled[swizzle]; case UTIL_FORMAT_SWIZZLE_0: return lp_build_zero(type); case UTIL_FORMAT_SWIZZLE_1: @@ -108,6 +59,28 @@ lp_build_format_swizzle(struct lp_type type, void +lp_build_format_swizzle_soa(const struct util_format_description *format_desc, + struct lp_type type, + const LLVMValueRef *unswizzled, + LLVMValueRef *swizzled) +{ + if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + enum util_format_swizzle swizzle = format_desc->swizzle[0]; + LLVMValueRef depth = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle); + swizzled[2] = swizzled[1] = swizzled[0] = depth; + swizzled[3] = lp_build_one(type); + } + else { + unsigned chan; + for (chan = 0; chan < 4; ++chan) { + enum util_format_swizzle swizzle = format_desc->swizzle[chan]; + swizzled[chan] = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle); + } + } +} + + +void lp_build_unpack_rgba_soa(LLVMBuilderRef builder, const struct util_format_description *format_desc, struct lp_type type, @@ -119,7 +92,9 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, unsigned chan; /* FIXME: Support more formats */ - assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH); + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH || + (format_desc->layout == UTIL_FORMAT_LAYOUT_ARRAY && + format_desc->block.bits == format_desc->channel[0].size)); assert(format_desc->block.width == 1); assert(format_desc->block.height == 1); assert(format_desc->block.bits <= 32); @@ -170,39 +145,5 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, start = stop; } - if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { - enum util_format_swizzle swizzle = format_desc->swizzle[0]; - LLVMValueRef depth = lp_build_format_swizzle(type, inputs, swizzle); - rgba[2] = rgba[1] = rgba[0] = depth; - rgba[3] = lp_build_one(type); - } - else { - for (chan = 0; chan < 4; ++chan) { - enum util_format_swizzle swizzle = format_desc->swizzle[chan]; - rgba[chan] = lp_build_format_swizzle(type, inputs, swizzle); - } - } -} - - -void -lp_build_load_rgba_soa(LLVMBuilderRef builder, - const struct util_format_description *format_desc, - struct lp_type type, - LLVMValueRef base_ptr, - LLVMValueRef offsets, - LLVMValueRef *rgba) -{ - LLVMValueRef packed; - - assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH); - assert(format_desc->block.width == 1); - assert(format_desc->block.height == 1); - assert(format_desc->block.bits <= 32); - - packed = lp_build_gather(builder, - type.length, format_desc->block.bits, type.width, - base_ptr, offsets); - - lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba); + lp_build_format_swizzle_soa(format_desc, type, inputs, rgba); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index 338dbca6d1..818c0e943e 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -109,32 +109,6 @@ coeffs_init(struct lp_build_interp_soa_context *bld, /** - * Small vector x scale multiplication optimization. - * - * TODO: Should be elsewhere. - */ -static LLVMValueRef -coeff_multiply(struct lp_build_interp_soa_context *bld, - LLVMValueRef coeff, - int step) -{ - LLVMValueRef factor; - - switch(step) { - case 0: - return bld->base.zero; - case 1: - return coeff; - case 2: - return lp_build_add(&bld->base, coeff, coeff); - default: - factor = lp_build_const_scalar(bld->base.type, (double)step); - return lp_build_mul(&bld->base, coeff, factor); - } -} - - -/** * Multiply the dadx and dady with the xstep and ystep respectively. */ static void @@ -149,8 +123,8 @@ coeffs_update(struct lp_build_interp_soa_context *bld) if (mode != TGSI_INTERPOLATE_CONSTANT) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { if(mask & (1 << chan)) { - bld->dadx[attrib][chan] = coeff_multiply(bld, bld->dadx[attrib][chan], bld->xstep); - bld->dady[attrib][chan] = coeff_multiply(bld, bld->dady[attrib][chan], bld->ystep); + bld->dadx[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dadx[attrib][chan], bld->xstep); + bld->dady[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dady[attrib][chan], bld->ystep); } } } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c index 6b6f820769..db22a8028a 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c @@ -33,6 +33,8 @@ */ +#include "util/u_cpu_detect.h" + #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_intr.h" @@ -65,7 +67,7 @@ lp_build_cmp(struct lp_build_context *bld, #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) if(type.width * type.length == 128) { - if(type.floating) { + if(type.floating && util_cpu_caps.has_sse) { LLVMValueRef args[3]; unsigned cc; boolean swap; @@ -114,7 +116,7 @@ lp_build_cmp(struct lp_build_context *bld, res = LLVMBuildBitCast(bld->builder, res, int_vec_type, ""); return res; } - else { + else if(util_cpu_caps.has_sse2) { static const struct { unsigned swap:1; unsigned eq:1; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.h b/src/gallium/drivers/llvmpipe/lp_bld_logic.h index a4ee7723b5..d67500ef70 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_logic.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.h @@ -42,7 +42,7 @@ #include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */ -struct lp_type type; +struct lp_type; struct lp_build_context; diff --git a/src/gallium/drivers/i965simple/brw_batch.h b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp index 5f5932a488..c9acaf1f16 100644 --- a/src/gallium/drivers/i965simple/brw_batch.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2009 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -18,42 +18,45 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ -#ifndef BRW_BATCH_H -#define BRW_BATCH_H -#include "brw_winsys.h" +#include "llvm/Config/config.h" -#define BATCH_LOCALS +#include "pipe/p_config.h" -#define INTEL_BATCH_NO_CLIPRECTS 0x1 -#define INTEL_BATCH_CLIPRECTS 0x2 +#include "lp_bld_misc.h" -#define BEGIN_BATCH( dwords, relocs ) \ - brw->winsys->batch_start(brw->winsys, dwords, relocs) -#define OUT_BATCH( dword ) \ - brw->winsys->batch_dword(brw->winsys, dword) +#ifndef LLVM_NATIVE_ARCH -#define OUT_RELOC( buf, flags, delta ) \ - brw->winsys->batch_reloc(brw->winsys, buf, flags, delta) +namespace llvm { + extern void LinkInJIT(); +} -#define ADVANCE_BATCH() \ - brw->winsys->batch_end( brw->winsys ) -/* XXX: this is bogus - need proper handling for out-of-memory in batchbuffer. - */ -#define FLUSH_BATCH(fence) do { \ - brw->winsys->batch_flush(brw->winsys, fence); \ - brw->hardware_dirty = ~0; \ -} while (0) +void +LLVMLinkInJIT(void) +{ + llvm::LinkInJIT(); +} + + +extern "C" int X86TargetMachineModule; + + +void +LLVMInitializeNativeTarget(void) +{ +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + X86TargetMachineModule = 1; +#endif +} -#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->winsys, (s), sizeof(*(s))) #endif diff --git a/src/gallium/drivers/i965simple/brw_screen.h b/src/gallium/drivers/llvmpipe/lp_bld_misc.h index d3c70387e6..51a84c5e25 100644 --- a/src/gallium/drivers/i965simple/brw_screen.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.h @@ -1,8 +1,8 @@ /************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * + * Copyright 2009 VMware, Inc. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,27 +10,24 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ -#ifndef BRW_SCREEN_H -#define BRW_SCREEN_H - - -#include "pipe/p_screen.h" +#ifndef LP_BLD_MISC_H +#define LP_BLD_MISC_H #ifdef __cplusplus @@ -38,31 +35,16 @@ extern "C" { #endif -/** - * Subclass of pipe_screen - */ -struct brw_screen -{ - struct pipe_screen screen; - - uint pci_id; -}; - - -/** cast wrapper */ -static INLINE struct brw_screen * -brw_screen(struct pipe_screen *pscreen) -{ - return (struct brw_screen *) pscreen; -} +void +LLVMLinkInJIT(void); - -extern struct pipe_screen * -brw_create_screen(struct pipe_winsys *winsys, uint pci_id); +void +LLVMInitializeNativeTarget(void); #ifdef __cplusplus } #endif -#endif /* BRW_SCREEN_H */ + +#endif /* !LP_BLD_MISC_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.c b/src/gallium/drivers/llvmpipe/lp_bld_pack.c new file mode 100644 index 0000000000..bc360ad77a --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_pack.c @@ -0,0 +1,418 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Helper functions for packing/unpacking. + * + * Pack/unpacking is necessary for conversion between types of different + * bit width. + * + * They are also commonly used when an computation needs higher + * precision for the intermediate values. For example, if one needs the + * function: + * + * c = compute(a, b); + * + * to use more precision for intermediate results then one should implement it + * as: + * + * LLVMValueRef + * compute(LLVMBuilderRef builder struct lp_type type, LLVMValueRef a, LLVMValueRef b) + * { + * struct lp_type wide_type = lp_wider_type(type); + * LLVMValueRef al, ah, bl, bh, cl, ch, c; + * + * lp_build_unpack2(builder, type, wide_type, a, &al, &ah); + * lp_build_unpack2(builder, type, wide_type, b, &bl, &bh); + * + * cl = compute_half(al, bl); + * ch = compute_half(ah, bh); + * + * c = lp_build_pack2(bld->builder, wide_type, type, cl, ch); + * + * return c; + * } + * + * where compute_half() would do the computation for half the elements with + * twice the precision. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + + +#include "util/u_debug.h" +#include "util/u_math.h" +#include "util/u_cpu_detect.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_intr.h" +#include "lp_bld_arit.h" +#include "lp_bld_pack.h" + + +/** + * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions. + */ +static LLVMValueRef +lp_build_const_unpack_shuffle(unsigned n, unsigned lo_hi) +{ + LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; + unsigned i, j; + + assert(n <= LP_MAX_VECTOR_LENGTH); + assert(lo_hi < 2); + + /* TODO: cache results in a static table */ + + for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) { + elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0); + elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0); + } + + return LLVMConstVector(elems, n); +} + + +/** + * Build shuffle vectors that match PACKxx instructions. + */ +static LLVMValueRef +lp_build_const_pack_shuffle(unsigned n) +{ + LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + assert(n <= LP_MAX_VECTOR_LENGTH); + + /* TODO: cache results in a static table */ + + for(i = 0; i < n; ++i) + elems[i] = LLVMConstInt(LLVMInt32Type(), 2*i, 0); + + return LLVMConstVector(elems, n); +} + + +/** + * Interleave vector elements. + * + * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions. + */ +LLVMValueRef +lp_build_interleave2(LLVMBuilderRef builder, + struct lp_type type, + LLVMValueRef a, + LLVMValueRef b, + unsigned lo_hi) +{ + LLVMValueRef shuffle; + + shuffle = lp_build_const_unpack_shuffle(type.length, lo_hi); + + return LLVMBuildShuffleVector(builder, a, b, shuffle, ""); +} + + +/** + * Double the bit width. + * + * This will only change the number of bits the values are represented, not the + * values themselves. + */ +void +lp_build_unpack2(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef src, + LLVMValueRef *dst_lo, + LLVMValueRef *dst_hi) +{ + LLVMValueRef msb; + LLVMTypeRef dst_vec_type; + + assert(!src_type.floating); + assert(!dst_type.floating); + assert(dst_type.width == src_type.width * 2); + assert(dst_type.length * 2 == src_type.length); + + if(dst_type.sign && src_type.sign) { + /* Replicate the sign bit in the most significant bits */ + msb = LLVMBuildAShr(builder, src, lp_build_int_const_scalar(src_type, src_type.width - 1), ""); + } + else + /* Most significant bits always zero */ + msb = lp_build_zero(src_type); + + /* Interleave bits */ + if(util_cpu_caps.little_endian) { + *dst_lo = lp_build_interleave2(builder, src_type, src, msb, 0); + *dst_hi = lp_build_interleave2(builder, src_type, src, msb, 1); + } + else { + *dst_lo = lp_build_interleave2(builder, src_type, msb, src, 0); + *dst_hi = lp_build_interleave2(builder, src_type, msb, src, 1); + } + + /* Cast the result into the new type (twice as wide) */ + + dst_vec_type = lp_build_vec_type(dst_type); + + *dst_lo = LLVMBuildBitCast(builder, *dst_lo, dst_vec_type, ""); + *dst_hi = LLVMBuildBitCast(builder, *dst_hi, dst_vec_type, ""); +} + + +/** + * Expand the bit width. + * + * This will only change the number of bits the values are represented, not the + * values themselves. + */ +void +lp_build_unpack(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef src, + LLVMValueRef *dst, unsigned num_dsts) +{ + unsigned num_tmps; + unsigned i; + + /* Register width must remain constant */ + assert(src_type.width * src_type.length == dst_type.width * dst_type.length); + + /* We must not loose or gain channels. Only precision */ + assert(src_type.length == dst_type.length * num_dsts); + + num_tmps = 1; + dst[0] = src; + + while(src_type.width < dst_type.width) { + struct lp_type tmp_type = src_type; + + tmp_type.width *= 2; + tmp_type.length /= 2; + + for(i = num_tmps; i--; ) { + lp_build_unpack2(builder, src_type, tmp_type, dst[i], &dst[2*i + 0], &dst[2*i + 1]); + } + + src_type = tmp_type; + + num_tmps *= 2; + } + + assert(num_tmps == num_dsts); +} + + +/** + * Non-interleaved pack. + * + * This will move values as + * + * lo = __ l0 __ l1 __ l2 __.. __ ln + * hi = __ h0 __ h1 __ h2 __.. __ hn + * res = l0 l1 l2 .. ln h0 h1 h2 .. hn + * + * This will only change the number of bits the values are represented, not the + * values themselves. + * + * It is assumed the values are already clamped into the destination type range. + * Values outside that range will produce undefined results. Use + * lp_build_packs2 instead. + */ +LLVMValueRef +lp_build_pack2(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef lo, + LLVMValueRef hi) +{ + LLVMTypeRef src_vec_type = lp_build_vec_type(src_type); + LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type); + LLVMValueRef shuffle; + LLVMValueRef res; + + dst_vec_type = lp_build_vec_type(dst_type); + + assert(!src_type.floating); + assert(!dst_type.floating); + assert(src_type.width == dst_type.width * 2); + assert(src_type.length * 2 == dst_type.length); + + if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) { + switch(src_type.width) { + case 32: + if(dst_type.sign) { + res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi); + } + else { + if (util_cpu_caps.has_sse4_1) { + /* PACKUSDW is the only instrinsic with a consistent signature */ + return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi); + } + else { + assert(0); + return LLVMGetUndef(dst_vec_type); + } + } + break; + + case 16: + if(dst_type.sign) + res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi); + else + res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi); + break; + + default: + assert(0); + return LLVMGetUndef(dst_vec_type); + break; + } + + res = LLVMBuildBitCast(builder, res, dst_vec_type, ""); + return res; + } + + lo = LLVMBuildBitCast(builder, lo, dst_vec_type, ""); + hi = LLVMBuildBitCast(builder, hi, dst_vec_type, ""); + + shuffle = lp_build_const_pack_shuffle(dst_type.length); + + res = LLVMBuildShuffleVector(builder, lo, hi, shuffle, ""); + + return res; +} + + + +/** + * Non-interleaved pack and saturate. + * + * Same as lp_build_pack2 but will saturate values so that they fit into the + * destination type. + */ +LLVMValueRef +lp_build_packs2(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef lo, + LLVMValueRef hi) +{ + boolean clamp; + + assert(!src_type.floating); + assert(!dst_type.floating); + assert(src_type.sign == dst_type.sign); + assert(src_type.width == dst_type.width * 2); + assert(src_type.length * 2 == dst_type.length); + + clamp = TRUE; + + /* All X86 SSE non-interleaved pack instructions take signed inputs and + * saturate them, so no need to clamp for those cases. */ + if(util_cpu_caps.has_sse2 && + src_type.width * src_type.length == 128 && + src_type.sign) + clamp = FALSE; + + if(clamp) { + struct lp_build_context bld; + unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width; + LLVMValueRef dst_max = lp_build_int_const_scalar(src_type, ((unsigned long long)1 << dst_bits) - 1); + lp_build_context_init(&bld, builder, src_type); + lo = lp_build_min(&bld, lo, dst_max); + hi = lp_build_min(&bld, hi, dst_max); + /* FIXME: What about lower bound? */ + } + + return lp_build_pack2(builder, src_type, dst_type, lo, hi); +} + + +/** + * Truncate the bit width. + * + * TODO: Handle saturation consistently. + */ +LLVMValueRef +lp_build_pack(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + boolean clamped, + const LLVMValueRef *src, unsigned num_srcs) +{ + LLVMValueRef (*pack2)(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef lo, + LLVMValueRef hi); + LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + + /* Register width must remain constant */ + assert(src_type.width * src_type.length == dst_type.width * dst_type.length); + + /* We must not loose or gain channels. Only precision */ + assert(src_type.length * num_srcs == dst_type.length); + + if(clamped) + pack2 = &lp_build_pack2; + else + pack2 = &lp_build_packs2; + + for(i = 0; i < num_srcs; ++i) + tmp[i] = src[i]; + + while(src_type.width > dst_type.width) { + struct lp_type tmp_type = src_type; + + tmp_type.width /= 2; + tmp_type.length *= 2; + + /* Take in consideration the sign changes only in the last step */ + if(tmp_type.width == dst_type.width) + tmp_type.sign = dst_type.sign; + + num_srcs /= 2; + + for(i = 0; i < num_srcs; ++i) + tmp[i] = pack2(builder, src_type, tmp_type, tmp[2*i + 0], tmp[2*i + 1]); + + src_type = tmp_type; + } + + assert(num_srcs == 1); + + return tmp[0]; +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.h b/src/gallium/drivers/llvmpipe/lp_bld_pack.h new file mode 100644 index 0000000000..fb2a34984a --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_pack.h @@ -0,0 +1,95 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Helper functions for packing/unpacking conversions. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + + +#ifndef LP_BLD_PACK_H +#define LP_BLD_PACK_H + + +#include <llvm-c/Core.h> + + +struct lp_type; + + +LLVMValueRef +lp_build_interleave2(LLVMBuilderRef builder, + struct lp_type type, + LLVMValueRef a, + LLVMValueRef b, + unsigned lo_hi); + + +void +lp_build_unpack2(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef src, + LLVMValueRef *dst_lo, + LLVMValueRef *dst_hi); + + +void +lp_build_unpack(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef src, + LLVMValueRef *dst, unsigned num_dsts); + + +LLVMValueRef +lp_build_packs2(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef lo, + LLVMValueRef hi); + + +LLVMValueRef +lp_build_pack2(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef lo, + LLVMValueRef hi); + + +LLVMValueRef +lp_build_pack(LLVMBuilderRef builder, + struct lp_type src_type, + struct lp_type dst_type, + boolean clamped, + const LLVMValueRef *src, unsigned num_srcs); + + +#endif /* !LP_BLD_PACK_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.c b/src/gallium/drivers/llvmpipe/lp_bld_sample.c new file mode 100644 index 0000000000..4d272bea87 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.c @@ -0,0 +1,190 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Texture sampling -- common code. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_format.h" +#include "util/u_math.h" +#include "lp_bld_debug.h" +#include "lp_bld_const.h" +#include "lp_bld_arit.h" +#include "lp_bld_type.h" +#include "lp_bld_format.h" +#include "lp_bld_sample.h" + + +void +lp_sampler_static_state(struct lp_sampler_static_state *state, + const struct pipe_texture *texture, + const struct pipe_sampler_state *sampler) +{ + memset(state, 0, sizeof *state); + + if(!texture) + return; + + if(!sampler) + return; + + state->format = texture->format; + state->target = texture->target; + state->pot_width = util_is_pot(texture->width[0]); + state->pot_height = util_is_pot(texture->height[0]); + state->pot_depth = util_is_pot(texture->depth[0]); + + state->wrap_s = sampler->wrap_s; + state->wrap_t = sampler->wrap_t; + state->wrap_r = sampler->wrap_r; + state->min_img_filter = sampler->min_img_filter; + state->min_mip_filter = sampler->min_mip_filter; + state->mag_img_filter = sampler->mag_img_filter; + if(sampler->compare_mode) { + state->compare_mode = sampler->compare_mode; + state->compare_func = sampler->compare_func; + } + state->normalized_coords = sampler->normalized_coords; + state->prefilter = sampler->prefilter; +} + + +/** + * Gather elements from scatter positions in memory into a single vector. + * + * @param src_width src element width + * @param dst_width result element width (source will be expanded to fit) + * @param length length of the offsets, + * @param base_ptr base pointer, should be a i8 pointer type. + * @param offsets vector with offsets + */ +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets) +{ + LLVMTypeRef src_type = LLVMIntType(src_width); + LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); + LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); + LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); + LLVMValueRef res; + unsigned i; + + res = LLVMGetUndef(dst_vec_type); + for(i = 0; i < length; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef elem_offset; + LLVMValueRef elem_ptr; + LLVMValueRef elem; + + elem_offset = LLVMBuildExtractElement(builder, offsets, index, ""); + elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, ""); + elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, ""); + elem = LLVMBuildLoad(builder, elem_ptr, ""); + + assert(src_width <= dst_width); + if(src_width > dst_width) + elem = LLVMBuildTrunc(builder, elem, dst_elem_type, ""); + if(src_width < dst_width) + elem = LLVMBuildZExt(builder, elem, dst_elem_type, ""); + + res = LLVMBuildInsertElement(builder, res, elem, index, ""); + } + + return res; +} + + +/** + * Compute the offset of a pixel. + * + * x, y, y_stride are vectors + */ +LLVMValueRef +lp_build_sample_offset(struct lp_build_context *bld, + const struct util_format_description *format_desc, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr) +{ + LLVMValueRef x_stride; + LLVMValueRef offset; + + x_stride = lp_build_const_scalar(bld->type, format_desc->block.bits/8); + + if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + LLVMValueRef x_lo, x_hi; + LLVMValueRef y_lo, y_hi; + LLVMValueRef x_stride_lo, x_stride_hi; + LLVMValueRef y_stride_lo, y_stride_hi; + LLVMValueRef x_offset_lo, x_offset_hi; + LLVMValueRef y_offset_lo, y_offset_hi; + LLVMValueRef offset_lo, offset_hi; + + x_lo = LLVMBuildAnd(bld->builder, x, bld->one, ""); + y_lo = LLVMBuildAnd(bld->builder, y, bld->one, ""); + + x_hi = LLVMBuildLShr(bld->builder, x, bld->one, ""); + y_hi = LLVMBuildLShr(bld->builder, y, bld->one, ""); + + x_stride_lo = x_stride; + y_stride_lo = lp_build_const_scalar(bld->type, 2*format_desc->block.bits/8); + + x_stride_hi = lp_build_const_scalar(bld->type, 4*format_desc->block.bits/8); + y_stride_hi = LLVMBuildShl(bld->builder, y_stride, bld->one, ""); + + x_offset_lo = lp_build_mul(bld, x_lo, x_stride_lo); + y_offset_lo = lp_build_mul(bld, y_lo, y_stride_lo); + offset_lo = lp_build_add(bld, x_offset_lo, y_offset_lo); + + x_offset_hi = lp_build_mul(bld, x_hi, x_stride_hi); + y_offset_hi = lp_build_mul(bld, y_hi, y_stride_hi); + offset_hi = lp_build_add(bld, x_offset_hi, y_offset_hi); + + offset = lp_build_add(bld, offset_hi, offset_lo); + } + else { + LLVMValueRef x_offset; + LLVMValueRef y_offset; + + x_offset = lp_build_mul(bld, x, x_stride); + y_offset = lp_build_mul(bld, y, y_stride); + + offset = lp_build_add(bld, x_offset, y_offset); + } + + return offset; +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h index 403d0e4836..8cb8210ca7 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h @@ -40,7 +40,9 @@ struct pipe_texture; struct pipe_sampler_state; +struct util_format_description; struct lp_type; +struct lp_build_context; /** @@ -119,6 +121,24 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, const struct pipe_sampler_state *sampler); +LLVMValueRef +lp_build_gather(LLVMBuilderRef builder, + unsigned length, + unsigned src_width, + unsigned dst_width, + LLVMValueRef base_ptr, + LLVMValueRef offsets); + + +LLVMValueRef +lp_build_sample_offset(struct lp_build_context *bld, + const struct util_format_description *format_desc, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr); + + void lp_build_sample_soa(LLVMBuilderRef builder, const struct lp_sampler_static_state *static_state, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index 8ca1be6f1b..47b68b71e2 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -27,7 +27,7 @@ /** * @file - * Texture sampling. + * Texture sampling -- SoA. * * @author Jose Fonseca <jfonseca@vmware.com> */ @@ -35,54 +35,23 @@ #include "pipe/p_defines.h" #include "pipe/p_state.h" #include "util/u_debug.h" +#include "util/u_debug_dump.h" #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_format.h" +#include "util/u_cpu_detect.h" #include "lp_bld_debug.h" #include "lp_bld_type.h" #include "lp_bld_const.h" +#include "lp_bld_conv.h" #include "lp_bld_arit.h" #include "lp_bld_logic.h" #include "lp_bld_swizzle.h" +#include "lp_bld_pack.h" #include "lp_bld_format.h" #include "lp_bld_sample.h" -void -lp_sampler_static_state(struct lp_sampler_static_state *state, - const struct pipe_texture *texture, - const struct pipe_sampler_state *sampler) -{ - memset(state, 0, sizeof *state); - - if(!texture) - return; - - if(!sampler) - return; - - state->format = texture->format; - state->target = texture->target; - state->pot_width = util_is_pot(texture->width[0]); - state->pot_height = util_is_pot(texture->height[0]); - state->pot_depth = util_is_pot(texture->depth[0]); - - state->wrap_s = sampler->wrap_s; - state->wrap_t = sampler->wrap_t; - state->wrap_r = sampler->wrap_r; - state->min_img_filter = sampler->min_img_filter; - state->min_mip_filter = sampler->min_mip_filter; - state->mag_img_filter = sampler->mag_img_filter; - if(sampler->compare_mode) { - state->compare_mode = sampler->compare_mode; - state->compare_func = sampler->compare_func; - } - state->normalized_coords = sampler->normalized_coords; - state->prefilter = sampler->prefilter; -} - - - /** * Keep all information for sampling code generation in a single place. */ @@ -111,66 +80,61 @@ struct lp_build_sample_context static void -lp_build_sample_texel(struct lp_build_sample_context *bld, - LLVMValueRef x, - LLVMValueRef y, - LLVMValueRef y_stride, - LLVMValueRef data_ptr, - LLVMValueRef *texel) +lp_build_sample_texel_soa(struct lp_build_sample_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) { - struct lp_build_context *int_coord_bld = &bld->int_coord_bld; - LLVMValueRef x_stride; LLVMValueRef offset; + LLVMValueRef packed; + + offset = lp_build_sample_offset(&bld->int_coord_bld, + bld->format_desc, + x, y, y_stride, + data_ptr); + + assert(bld->format_desc->block.width == 1); + assert(bld->format_desc->block.height == 1); + assert(bld->format_desc->block.bits <= bld->texel_type.width); + + packed = lp_build_gather(bld->builder, + bld->texel_type.length, + bld->format_desc->block.bits, + bld->texel_type.width, + data_ptr, offset); + + lp_build_unpack_rgba_soa(bld->builder, + bld->format_desc, + bld->texel_type, + packed, texel); +} - x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8); - - if(bld->format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { - LLVMValueRef x_lo, x_hi; - LLVMValueRef y_lo, y_hi; - LLVMValueRef x_stride_lo, x_stride_hi; - LLVMValueRef y_stride_lo, y_stride_hi; - LLVMValueRef x_offset_lo, x_offset_hi; - LLVMValueRef y_offset_lo, y_offset_hi; - LLVMValueRef offset_lo, offset_hi; - - x_lo = LLVMBuildAnd(bld->builder, x, int_coord_bld->one, ""); - y_lo = LLVMBuildAnd(bld->builder, y, int_coord_bld->one, ""); - - x_hi = LLVMBuildLShr(bld->builder, x, int_coord_bld->one, ""); - y_hi = LLVMBuildLShr(bld->builder, y, int_coord_bld->one, ""); - - x_stride_lo = x_stride; - y_stride_lo = lp_build_const_scalar(bld->int_coord_type, 2*bld->format_desc->block.bits/8); - - x_stride_hi = lp_build_const_scalar(bld->int_coord_type, 4*bld->format_desc->block.bits/8); - y_stride_hi = LLVMBuildShl(bld->builder, y_stride, int_coord_bld->one, ""); - - x_offset_lo = lp_build_mul(int_coord_bld, x_lo, x_stride_lo); - y_offset_lo = lp_build_mul(int_coord_bld, y_lo, y_stride_lo); - offset_lo = lp_build_add(int_coord_bld, x_offset_lo, y_offset_lo); - - x_offset_hi = lp_build_mul(int_coord_bld, x_hi, x_stride_hi); - y_offset_hi = lp_build_mul(int_coord_bld, y_hi, y_stride_hi); - offset_hi = lp_build_add(int_coord_bld, x_offset_hi, y_offset_hi); - offset = lp_build_add(int_coord_bld, offset_hi, offset_lo); - } - else { - LLVMValueRef x_offset; - LLVMValueRef y_offset; +static LLVMValueRef +lp_build_sample_packed(struct lp_build_sample_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef y_stride, + LLVMValueRef data_ptr) +{ + LLVMValueRef offset; - x_offset = lp_build_mul(int_coord_bld, x, x_stride); - y_offset = lp_build_mul(int_coord_bld, y, y_stride); + offset = lp_build_sample_offset(&bld->int_coord_bld, + bld->format_desc, + x, y, y_stride, + data_ptr); - offset = lp_build_add(int_coord_bld, x_offset, y_offset); - } + assert(bld->format_desc->block.width == 1); + assert(bld->format_desc->block.height == 1); + assert(bld->format_desc->block.bits <= bld->texel_type.width); - lp_build_load_rgba_soa(bld->builder, - bld->format_desc, - bld->texel_type, - data_ptr, - offset, - texel); + return lp_build_gather(bld->builder, + bld->texel_type.length, + bld->format_desc->block.bits, + bld->texel_type.width, + data_ptr, offset); } @@ -208,7 +172,8 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld, case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: /* FIXME */ - _debug_printf("warning: failed to translate texture wrap mode %u\n", wrap_mode); + _debug_printf("warning: failed to translate texture wrap mode %s\n", + debug_dump_tex_wrap(wrap_mode, TRUE)); coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); coord = lp_build_min(int_coord_bld, coord, length_minus_one); break; @@ -240,7 +205,7 @@ lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld, x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s); y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t); - lp_build_sample_texel(bld, x, y, stride, data_ptr, texel); + lp_build_sample_texel_soa(bld, x, y, stride, data_ptr, texel); } @@ -274,8 +239,8 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, s_fpart = lp_build_sub(&bld->coord_bld, s, s_ipart); t_fpart = lp_build_sub(&bld->coord_bld, t, t_ipart); - x0 = lp_build_int(&bld->coord_bld, s_ipart); - y0 = lp_build_int(&bld->coord_bld, t_ipart); + x0 = lp_build_itrunc(&bld->coord_bld, s_ipart); + y0 = lp_build_itrunc(&bld->coord_bld, t_ipart); x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s); y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t); @@ -286,10 +251,10 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s); y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t); - lp_build_sample_texel(bld, x0, y0, stride, data_ptr, neighbors[0][0]); - lp_build_sample_texel(bld, x1, y0, stride, data_ptr, neighbors[0][1]); - lp_build_sample_texel(bld, x0, y1, stride, data_ptr, neighbors[1][0]); - lp_build_sample_texel(bld, x1, y1, stride, data_ptr, neighbors[1][1]); + lp_build_sample_texel_soa(bld, x0, y0, stride, data_ptr, neighbors[0][0]); + lp_build_sample_texel_soa(bld, x1, y0, stride, data_ptr, neighbors[0][1]); + lp_build_sample_texel_soa(bld, x0, y1, stride, data_ptr, neighbors[1][0]); + lp_build_sample_texel_soa(bld, x1, y1, stride, data_ptr, neighbors[1][1]); /* TODO: Don't interpolate missing channels */ for(chan = 0; chan < 4; ++chan) { @@ -304,6 +269,217 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, static void +lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, + struct lp_type dst_type, + LLVMValueRef packed, + LLVMValueRef *rgba) +{ + LLVMValueRef mask = lp_build_int_const_scalar(dst_type, 0xff); + unsigned chan; + + /* Decode the input vector components */ + for (chan = 0; chan < 4; ++chan) { + unsigned start = chan*8; + unsigned stop = start + 8; + LLVMValueRef input; + + input = packed; + + if(start) + input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(dst_type, start), ""); + + if(stop < 32) + input = LLVMBuildAnd(builder, input, mask, ""); + + input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input); + + rgba[chan] = input; + } +} + + +static void +lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef width, + LLVMValueRef height, + LLVMValueRef stride, + LLVMValueRef data_ptr, + LLVMValueRef *texel) +{ + LLVMBuilderRef builder = bld->builder; + struct lp_build_context i32, h16, u8n; + LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; + LLVMValueRef i32_c8, i32_c128, i32_c255; + LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; + LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi; + LLVMValueRef x0, x1; + LLVMValueRef y0, y1; + LLVMValueRef neighbors[2][2]; + LLVMValueRef neighbors_lo[2][2]; + LLVMValueRef neighbors_hi[2][2]; + LLVMValueRef packed, packed_lo, packed_hi; + LLVMValueRef unswizzled[4]; + + lp_build_context_init(&i32, builder, lp_type_int(32)); + lp_build_context_init(&h16, builder, lp_type_ufixed(16)); + lp_build_context_init(&u8n, builder, lp_type_unorm(8)); + + i32_vec_type = lp_build_vec_type(i32.type); + h16_vec_type = lp_build_vec_type(h16.type); + u8n_vec_type = lp_build_vec_type(u8n.type); + + s = lp_build_mul_imm(&bld->coord_bld, s, 256); + t = lp_build_mul_imm(&bld->coord_bld, t, 256); + + s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); + t = LLVMBuildFPToSI(builder, t, i32_vec_type, ""); + + i32_c128 = lp_build_int_const_scalar(i32.type, -128); + s = LLVMBuildAdd(builder, s, i32_c128, ""); + t = LLVMBuildAdd(builder, t, i32_c128, ""); + + i32_c8 = lp_build_int_const_scalar(i32.type, 8); + s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); + t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); + + i32_c255 = lp_build_int_const_scalar(i32.type, 255); + s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); + t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); + + x0 = s_ipart; + y0 = t_ipart; + + x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one); + y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one); + + x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s); + y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t); + + /* + * Transform 4 x i32 in + * + * s_fpart = {s0, s1, s2, s3} + * + * into 8 x i16 + * + * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3} + * + * into two 8 x i16 + * + * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1} + * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3} + * + * and likewise for t_fpart. There is no risk of loosing precision here + * since the fractional parts only use the lower 8bits. + */ + + s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, ""); + t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, ""); + + { + LLVMTypeRef elem_type = LLVMInt32Type(); + LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH]; + LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH]; + LLVMValueRef shuffle_lo; + LLVMValueRef shuffle_hi; + unsigned i, j; + + for(j = 0; j < h16.type.length; j += 4) { + unsigned subindex = util_cpu_caps.little_endian ? 0 : 1; + LLVMValueRef index; + + index = LLVMConstInt(elem_type, j/2 + subindex, 0); + for(i = 0; i < 4; ++i) + shuffles_lo[j + i] = index; + + index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0); + for(i = 0; i < 4; ++i) + shuffles_hi[j + i] = index; + } + + shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length); + shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length); + + s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, ""); + t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, ""); + s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, ""); + t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, ""); + } + + /* + * Fetch the pixels as 4 x 32bit (rgba order might differ): + * + * rgba0 rgba1 rgba2 rgba3 + * + * bit cast them into 16 x u8 + * + * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3 + * + * unpack them into two 8 x i16: + * + * r0 g0 b0 a0 r1 g1 b1 a1 + * r2 g2 b2 a2 r3 g3 b3 a3 + * + * The higher 8 bits of the resulting elements will be zero. + */ + + neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_ptr); + neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_ptr); + neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_ptr); + neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_ptr); + + neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, ""); + neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, ""); + neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, ""); + neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, ""); + + lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]); + lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]); + lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]); + lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]); + + /* + * Linear interpolate with 8.8 fixed point. + */ + + packed_lo = lp_build_lerp_2d(&h16, + s_fpart_lo, t_fpart_lo, + neighbors_lo[0][0], + neighbors_lo[0][1], + neighbors_lo[1][0], + neighbors_lo[1][1]); + + packed_hi = lp_build_lerp_2d(&h16, + s_fpart_hi, t_fpart_hi, + neighbors_hi[0][0], + neighbors_hi[0][1], + neighbors_hi[1][0], + neighbors_hi[1][1]); + + packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi); + + /* + * Convert to SoA and swizzle. + */ + + packed = LLVMBuildBitCast(builder, packed, i32_vec_type, ""); + + lp_build_rgba8_to_f32_soa(bld->builder, + bld->texel_type, + packed, unswizzled); + + lp_build_format_swizzle_soa(bld->format_desc, + bld->texel_type, unswizzled, + texel); +} + + +static void lp_build_sample_compare(struct lp_build_sample_context *bld, LLVMValueRef p, LLVMValueRef *texel) @@ -402,7 +578,10 @@ lp_build_sample_soa(LLVMBuilderRef builder, break; case PIPE_TEX_FILTER_LINEAR: case PIPE_TEX_FILTER_ANISO: - lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel); + if(lp_format_is_rgba8(bld.format_desc)) + lp_build_sample_2d_linear_aos(&bld, s, t, width, height, stride, data_ptr, texel); + else + lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel); break; default: assert(0); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h index 1f6da80448..b9472127a6 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h @@ -40,7 +40,7 @@ #include <llvm-c/Core.h> -struct lp_type type; +struct lp_type; struct lp_build_context; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index adc81569ed..64027de6aa 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -158,14 +158,14 @@ emit_fetch( const unsigned chan_index ) { const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index]; - unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); LLVMValueRef res; switch (swizzle) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: switch (reg->SrcRegister.File) { case TGSI_FILE_CONSTANT: { @@ -198,14 +198,6 @@ emit_fetch( } break; - case TGSI_EXTSWIZZLE_ZERO: - res = bld->base.zero; - break; - - case TGSI_EXTSWIZZLE_ONE: - res = bld->base.one; - break; - default: assert( 0 ); return bld->base.undef; @@ -394,12 +386,7 @@ emit_kil( unsigned swizzle; /* Unswizzle channel */ - swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); - - /* Note that we test if the value is less than zero, so 1.0 and 0.0 need - * not to be tested. */ - if(swizzle == TGSI_EXTSWIZZLE_ZERO || swizzle == TGSI_EXTSWIZZLE_ONE) - continue; + swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); /* Check if the component has not been already tested. */ assert(swizzle < NUM_CHANNELS); @@ -488,7 +475,6 @@ emit_instruction( #endif case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); } @@ -1386,15 +1372,6 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_NOISE1: - case TGSI_OPCODE_NOISE2: - case TGSI_OPCODE_NOISE3: - case TGSI_OPCODE_NOISE4: - FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - dst0[chan_index] = bld->base.zero; - } - break; - case TGSI_OPCODE_NOP: break; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c index 606243d6c5..1320a26721 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c @@ -160,12 +160,31 @@ lp_build_int_vec_type(struct lp_type type) struct lp_type lp_int_type(struct lp_type type) { - struct lp_type int_type; + struct lp_type res_type; - memset(&int_type, 0, sizeof int_type); - int_type.width = type.width; - int_type.length = type.length; - return int_type; + memset(&res_type, 0, sizeof res_type); + res_type.width = type.width; + res_type.length = type.length; + + return res_type; +} + + +/** + * Return the type with twice the bit width (hence half the number of elements). + */ +struct lp_type +lp_wider_type(struct lp_type type) +{ + struct lp_type res_type; + + memcpy(&res_type, &type, sizeof res_type); + res_type.width *= 2; + res_type.length /= 2; + + assert(res_type.length); + + return res_type; } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h index ee5ca3483c..2fb233d335 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_type.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h @@ -43,13 +43,18 @@ /** + * Native SIMD register width. + * + * 128 for all architectures we care about. + */ +#define LP_NATIVE_VECTOR_WIDTH 128 + +/** * Several functions can only cope with vectors of length up to this value. * You may need to increase that value if you want to represent bigger vectors. */ #define LP_MAX_VECTOR_LENGTH 16 -#define LP_MAX_TYPE_WIDTH 64 - /** * The LLVM type system can't conveniently express all the things we care about @@ -134,6 +139,91 @@ struct lp_build_context }; +static INLINE struct lp_type +lp_type_float(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.floating = TRUE; + res_type.sign = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_int(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.sign = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_uint(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_unorm(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.norm = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_fixed(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.sign = TRUE; + res_type.fixed = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + +static INLINE struct lp_type +lp_type_ufixed(unsigned width) +{ + struct lp_type res_type; + + memset(&res_type, 0, sizeof res_type); + res_type.fixed = TRUE; + res_type.width = width; + res_type.length = LP_NATIVE_VECTOR_WIDTH / width; + + return res_type; +} + + LLVMTypeRef lp_build_elem_type(struct lp_type type); @@ -166,6 +256,10 @@ struct lp_type lp_int_type(struct lp_type type); +struct lp_type +lp_wider_type(struct lp_type type); + + void lp_build_context_init(struct lp_build_context *bld, LLVMBuilderRef builder, diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index a4b2bd8c2a..202cb8ef43 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -107,11 +107,16 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) if (llvmpipe->draw) draw_destroy( llvmpipe->draw ); - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { lp_destroy_tile_cache(llvmpipe->cbuf_cache[i]); + pipe_surface_reference(&llvmpipe->framebuffer.cbufs[i], NULL); + } + pipe_surface_reference(&llvmpipe->framebuffer.zsbuf, NULL); - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { lp_destroy_tex_tile_cache(llvmpipe->tex_cache[i]); + pipe_texture_reference(&llvmpipe->texture[i], NULL); + } for (i = 0; i < Elements(llvmpipe->constants); i++) { if (llvmpipe->constants[i].buffer) { diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 8d5a0d4f1f..7df340554e 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -88,9 +88,6 @@ struct llvmpipe_context { /** Mapped vertex buffers */ ubyte *mapped_vbuffer[PIPE_MAX_ATTRIBS]; - /** Mapped constant buffers */ - void *mapped_constants[PIPE_SHADER_TYPES]; - /** Vertex format */ struct vertex_info vertex_info; struct vertex_info vertex_info_vbuf; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 89772e62d3..0aa13a1fc6 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -45,54 +45,6 @@ -static void -llvmpipe_map_constant_buffers(struct llvmpipe_context *lp) -{ - struct pipe_screen *screen = lp->pipe.screen; - uint i, size; - - for (i = 0; i < PIPE_SHADER_TYPES; i++) { - if (lp->constants[i].buffer && lp->constants[i].buffer->size) - lp->mapped_constants[i] = screen->buffer_map(screen, lp->constants[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); - } - - if (lp->constants[PIPE_SHADER_VERTEX].buffer) - size = lp->constants[PIPE_SHADER_VERTEX].buffer->size; - else - size = 0; - - lp->jit_context.constants = lp->mapped_constants[PIPE_SHADER_FRAGMENT]; - - draw_set_mapped_constant_buffer(lp->draw, - lp->mapped_constants[PIPE_SHADER_VERTEX], - size); -} - - -static void -llvmpipe_unmap_constant_buffers(struct llvmpipe_context *lp) -{ - struct pipe_screen *screen = lp->pipe.screen; - uint i; - - /* really need to flush all prims since the vert/frag shaders const buffers - * are going away now. - */ - draw_flush(lp->draw); - - draw_set_mapped_constant_buffer(lp->draw, NULL, 0); - - lp->jit_context.constants = NULL; - - for (i = 0; i < 2; i++) { - if (lp->constants[i].buffer && lp->constants[i].buffer->size) - screen->buffer_unmap(screen, lp->constants[i].buffer); - lp->mapped_constants[i] = NULL; - } -} - - boolean llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) @@ -124,7 +76,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, llvmpipe_update_derived( lp ); llvmpipe_map_transfers(lp); - llvmpipe_map_constant_buffers(lp); /* * Map vertex buffers @@ -163,7 +114,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, /* Note: leave drawing surfaces mapped */ - llvmpipe_unmap_constant_buffers(lp); lp->dirty_render_cache = TRUE; diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index b5c1c95bb7..cd8381fe30 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -58,8 +58,10 @@ llvmpipe_flush( struct pipe_context *pipe, * in the hope that a later clear will wipe them out. */ for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) - if (llvmpipe->cbuf_cache[i]) + if (llvmpipe->cbuf_cache[i]) { + lp_tile_cache_map_transfers(llvmpipe->cbuf_cache[i]); lp_flush_tile_cache(llvmpipe->cbuf_cache[i]); + } /* Need this call for hardware buffers before swapbuffers. * @@ -71,8 +73,10 @@ llvmpipe_flush( struct pipe_context *pipe, } else if (flags & PIPE_FLUSH_RENDER_CACHE) { for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) - if (llvmpipe->cbuf_cache[i]) + if (llvmpipe->cbuf_cache[i]) { + lp_tile_cache_map_transfers(llvmpipe->cbuf_cache[i]); lp_flush_tile_cache(llvmpipe->cbuf_cache[i]); + } /* FIXME: untile zsbuf! */ diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index f7111c1e5c..13535dd638 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -36,8 +36,10 @@ #include <llvm-c/Transforms/Scalar.h> #include "util/u_memory.h" +#include "util/u_cpu_detect.h" #include "lp_screen.h" #include "lp_bld_intr.h" +#include "lp_bld_misc.h" #include "lp_jit.h" @@ -147,10 +149,16 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) { char *error = NULL; -#ifdef LLVM_NATIVE_ARCH + util_cpu_detect(); + +#if 0 + /* For simulating less capable machines */ + util_cpu_caps.has_sse3 = 0; + util_cpu_caps.has_sse4_1 = 0; +#endif + LLVMLinkInJIT(); LLVMInitializeNativeTarget(); -#endif screen->module = LLVMModuleCreateWithName("llvmpipe"); @@ -168,8 +176,15 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) LLVMAddTargetData(screen->target, screen->pass); /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, * but there are more on SVN. */ + /* TODO: Add more passes */ LLVMAddConstantPropagationPass(screen->pass); - LLVMAddInstructionCombiningPass(screen->pass); + if(util_cpu_caps.has_sse4_1) { + /* FIXME: There is a bug in this pass, whereby the combination of fptosi + * and sitofp (necessary for trunc/floor/ceil/round implementation) + * somehow becomes invalid code. + */ + LLVMAddInstructionCombiningPass(screen->pass); + } LLVMAddPromoteMemoryToRegisterPass(screen->pass); LLVMAddGVNPass(screen->pass); LLVMAddCFGSimplificationPass(screen->pass); diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 2d2fc19a65..c43b3da450 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -115,6 +115,7 @@ struct setup_context { /** * Execute fragment shader for the four fragments in the quad. */ +ALIGN_STACK static void shade_quads(struct llvmpipe_context *llvmpipe, struct quad_header *quads[], @@ -124,7 +125,7 @@ shade_quads(struct llvmpipe_context *llvmpipe, struct quad_header *quad = quads[0]; const unsigned x = quad->input.x0; const unsigned y = quad->input.y0; - uint8_t *tile = lp_get_cached_tile(llvmpipe->cbuf_cache[0], x, y); + uint8_t *tile; uint8_t *color; void *depth; uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; @@ -150,7 +151,13 @@ shade_quads(struct llvmpipe_context *llvmpipe, mask[q][chan_index] = quads[q]->inout.mask & (1 << chan_index) ? ~0 : 0; /* color buffer */ - color = &TILE_PIXEL(tile, x & (TILE_SIZE-1), y & (TILE_SIZE-1), 0); + if(llvmpipe->framebuffer.nr_cbufs >= 1 && + llvmpipe->framebuffer.cbufs[0]) { + tile = lp_get_cached_tile(llvmpipe->cbuf_cache[0], x, y); + color = &TILE_PIXEL(tile, x & (TILE_SIZE-1), y & (TILE_SIZE-1), 0); + } + else + color = NULL; /* depth buffer */ if(llvmpipe->zsbuf_map) { @@ -271,11 +278,13 @@ clip_emit_quad( struct setup_context *setup, struct quad_header *quad ) * until we codegenerate single-quad variants of the fragment pipeline * we need this hack. */ const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE; - struct quad_header quads[nr_quads]; - struct quad_header *quad_ptrs[nr_quads]; + struct quad_header quads[4]; + struct quad_header *quad_ptrs[4]; int x0 = block_x(quad->input.x0); unsigned i; + assert(nr_quads == 4); + for(i = 0; i < nr_quads; ++i) { int x = x0 + 2*i; if(x == quad->input.x0) diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 9faed5a0b1..2e9aa9fffe 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -83,6 +83,7 @@ #include "lp_bld_debug.h" #include "lp_screen.h" #include "lp_context.h" +#include "lp_buffer.h" #include "lp_state.h" #include "lp_quad.h" #include "lp_tex_sample.h" @@ -399,9 +400,9 @@ generate_fragment(struct llvmpipe_context *lp, #ifdef DEBUG tgsi_dump(shader->base.tokens, 0); if(key->depth.enabled) { + debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format)); debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE)); debug_printf("depth.writemask = %u\n", key->depth.writemask); - debug_printf("depth.occlusion_count = %u\n", key->depth.occlusion_count); } if(key->alpha.enabled) { debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE)); @@ -419,6 +420,34 @@ generate_fragment(struct llvmpipe_context *lp, debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE)); } debug_printf("blend.colormask = 0x%x\n", key->blend.colormask); + for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) { + if(key->sampler[i].format) { + debug_printf("sampler[%u] = \n", i); + debug_printf(" .format = %s\n", + pf_name(key->sampler[i].format)); + debug_printf(" .target = %s\n", + debug_dump_tex_target(key->sampler[i].target, TRUE)); + debug_printf(" .pot = %u %u %u\n", + key->sampler[i].pot_width, + key->sampler[i].pot_height, + key->sampler[i].pot_depth); + debug_printf(" .wrap = %s %s %s\n", + debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE), + debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE), + debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE)); + debug_printf(" .min_img_filter = %s\n", + debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE)); + debug_printf(" .min_mip_filter = %s\n", + debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); + debug_printf(" .mag_img_filter = %s\n", + debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); + if(key->sampler[i].compare_mode) + debug_printf(" .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE)); + debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); + debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter); + } + } + #endif variant = CALLOC_STRUCT(lp_fragment_shader_variant); @@ -582,6 +611,11 @@ generate_fragment(struct llvmpipe_context *lp, * Translate the LLVM IR into machine code. */ + if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { + LLVMDumpValue(variant->function); + abort(); + } + LLVMRunFunctionPassManager(screen->pass, variant->function); #ifdef DEBUG @@ -589,11 +623,6 @@ generate_fragment(struct llvmpipe_context *lp, debug_printf("\n"); #endif - if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { - LLVMDumpValue(variant->function); - abort(); - } - variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, variant->function); #ifdef DEBUG @@ -672,16 +701,29 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) void llvmpipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf) + const struct pipe_constant_buffer *constants) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + struct pipe_buffer *buffer = constants ? constants->buffer : NULL; + unsigned size = buffer ? buffer->size : 0; + const void *data = buffer ? llvmpipe_buffer(buffer)->data : NULL; assert(shader < PIPE_SHADER_TYPES); assert(index == 0); + if(shader == PIPE_SHADER_VERTEX) + draw_flush(llvmpipe->draw); + /* note: reference counting */ - pipe_buffer_reference(&llvmpipe->constants[shader].buffer, - buf ? buf->buffer : NULL); + pipe_buffer_reference(&llvmpipe->constants[shader].buffer, buffer); + + if(shader == PIPE_SHADER_FRAGMENT) { + llvmpipe->jit_context.constants = data; + } + + if(shader == PIPE_SHADER_VERTEX) { + draw_set_mapped_constant_buffer(llvmpipe->draw, data, size); + } llvmpipe->dirty |= LP_NEW_CONSTANTS; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index 177a26b7b1..c06ce8b75c 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -53,10 +53,11 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, /* check if changing cbuf */ if (lp->framebuffer.cbufs[i] != fb->cbufs[i]) { /* flush old */ + lp_tile_cache_map_transfers(lp->cbuf_cache[i]); lp_flush_tile_cache(lp->cbuf_cache[i]); /* assign new */ - lp->framebuffer.cbufs[i] = fb->cbufs[i]; + pipe_surface_reference(&lp->framebuffer.cbufs[i], fb->cbufs[i]); /* update cache */ lp_tile_cache_set_surface(lp->cbuf_cache[i], fb->cbufs[i]); @@ -81,7 +82,7 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, } /* assign new */ - lp->framebuffer.zsbuf = fb->zsbuf; + pipe_surface_reference(&lp->framebuffer.zsbuf, fb->zsbuf); /* Tell draw module how deep the Z/depth buffer is */ if (lp->framebuffer.zsbuf) { diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h index a88e110c66..39d80726e6 100644 --- a/src/gallium/drivers/llvmpipe/lp_test.h +++ b/src/gallium/drivers/llvmpipe/lp_test.h @@ -56,6 +56,9 @@ #include "lp_bld_type.h" +#define LP_TEST_NUM_SAMPLES 32 + + void write_tsv_header(FILE *fp); @@ -68,17 +71,28 @@ boolean test_all(unsigned verbose, FILE *fp); +#if defined(PIPE_CC_MSVC) + +unsigned __int64 __rdtsc(); +#pragma intrinsic(__rdtsc) +#define rdtsc() __rdtsc() + +#elif defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)) + static INLINE uint64_t rdtsc(void) { -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) uint32_t hi, lo; __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); return ((uint64_t)lo) | (((uint64_t)hi) << 32); +} + #else - return 0; + +#define rdtsc() 0 + #endif -} + float diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 94b661dcba..149fec1d54 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -477,8 +477,8 @@ test_one(unsigned verbose, char *error = NULL; blend_test_ptr_t blend_test_ptr; boolean success; - const unsigned n = 32; - int64_t cycles[n]; + const unsigned n = LP_TEST_NUM_SAMPLES; + int64_t cycles[LP_TEST_NUM_SAMPLES]; double cycles_avg = 0.0; unsigned i, j; @@ -530,11 +530,11 @@ test_one(unsigned verbose, success = TRUE; for(i = 0; i < n && success; ++i) { if(mode == AoS) { - uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t con[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t res[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; + uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; + uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; + uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; + uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; @@ -595,11 +595,11 @@ test_one(unsigned verbose, if(mode == SoA) { const unsigned stride = type.length*type.width/8; - uint8_t src[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t dst[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t con[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t res[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; - uint8_t ref[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8]; + uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; + uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; + uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; + uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; + uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; boolean mismatch; diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index 9dcf58e5dc..ac2a6d05e3 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -156,8 +156,8 @@ test_one(unsigned verbose, char *error = NULL; conv_test_ptr_t conv_test_ptr; boolean success; - const unsigned n = 32; - int64_t cycles[n]; + const unsigned n = LP_TEST_NUM_SAMPLES; + int64_t cycles[LP_TEST_NUM_SAMPLES]; double cycles_avg = 0.0; unsigned num_srcs; unsigned num_dsts; diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 7d83f899e6..b2403ad521 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -35,10 +35,11 @@ #include <llvm-c/Target.h> #include <llvm-c/Transforms/Scalar.h> +#include "util/u_cpu_detect.h" #include "util/u_format.h" -#include "lp_bld_flow.h" #include "lp_bld_format.h" +#include "lp_test.h" struct pixel_test_case @@ -89,40 +90,63 @@ struct pixel_test_case test_cases[] = }; -typedef void (*load_ptr_t)(const void *, float *); +void +write_tsv_header(FILE *fp) +{ + fprintf(fp, + "result\t" + "format\n"); + + fflush(fp); +} + + +static void +write_tsv_row(FILE *fp, + const struct util_format_description *desc, + boolean success) +{ + fprintf(fp, "%s\t", success ? "pass" : "fail"); + + fprintf(fp, "%s\n", desc->name); + + fflush(fp); +} + + +typedef void (*load_ptr_t)(const uint32_t packed, float *); static LLVMValueRef add_load_rgba_test(LLVMModuleRef module, - enum pipe_format format) + const struct util_format_description *desc) { LLVMTypeRef args[2]; LLVMValueRef func; - LLVMValueRef ptr; + LLVMValueRef packed; LLVMValueRef rgba_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef rgba; - struct lp_build_loop_state loop; - args[0] = LLVMPointerType(LLVMInt8Type(), 0); + args[0] = LLVMInt32Type(); args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); func = LLVMAddFunction(module, "load", LLVMFunctionType(LLVMVoidType(), args, 2, 0)); LLVMSetFunctionCallConv(func, LLVMCCallConv); - ptr = LLVMGetParam(func, 0); + packed = LLVMGetParam(func, 0); rgba_ptr = LLVMGetParam(func, 1); block = LLVMAppendBasicBlock(func, "entry"); builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); - lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 1, 0), &loop); + if(desc->block.bits < 32) + packed = LLVMBuildTrunc(builder, packed, LLVMIntType(desc->block.bits), ""); - rgba = lp_build_load_rgba_aos(builder, format, ptr); - LLVMBuildStore(builder, rgba, rgba_ptr); + rgba = lp_build_unpack_rgba_aos(builder, desc, packed); - lp_build_loop_end(builder, LLVMConstInt(LLVMInt32Type(), 4, 0), NULL, &loop); + LLVMBuildStore(builder, rgba, rgba_ptr); LLVMBuildRetVoid(builder); @@ -131,27 +155,28 @@ add_load_rgba_test(LLVMModuleRef module, } -typedef void (*store_ptr_t)(void *, const float *); +typedef void (*store_ptr_t)(uint32_t *, const float *); static LLVMValueRef add_store_rgba_test(LLVMModuleRef module, - enum pipe_format format) + const struct util_format_description *desc) { LLVMTypeRef args[2]; LLVMValueRef func; - LLVMValueRef ptr; + LLVMValueRef packed_ptr; LLVMValueRef rgba_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef rgba; + LLVMValueRef packed; - args[0] = LLVMPointerType(LLVMInt8Type(), 0); + args[0] = LLVMPointerType(LLVMInt32Type(), 0); args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); func = LLVMAddFunction(module, "store", LLVMFunctionType(LLVMVoidType(), args, 2, 0)); LLVMSetFunctionCallConv(func, LLVMCCallConv); - ptr = LLVMGetParam(func, 0); + packed_ptr = LLVMGetParam(func, 0); rgba_ptr = LLVMGetParam(func, 1); block = LLVMAppendBasicBlock(func, "entry"); @@ -160,7 +185,12 @@ add_store_rgba_test(LLVMModuleRef module, rgba = LLVMBuildLoad(builder, rgba_ptr, ""); - lp_build_store_rgba_aos(builder, format, ptr, rgba); + packed = lp_build_pack_rgba_aos(builder, desc, rgba); + + if(desc->block.bits < 32) + packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), ""); + + LLVMBuildStore(builder, packed, packed_ptr); LLVMBuildRetVoid(builder); @@ -170,7 +200,7 @@ add_store_rgba_test(LLVMModuleRef module, static boolean -test_format(const struct pixel_test_case *test) +test_format(unsigned verbose, FILE *fp, const struct pixel_test_case *test) { LLVMModuleRef module = NULL; LLVMValueRef load = NULL; @@ -192,8 +222,8 @@ test_format(const struct pixel_test_case *test) module = LLVMModuleCreateWithName("test"); - load = add_load_rgba_test(module, test->format); - store = add_store_rgba_test(module, test->format); + load = add_load_rgba_test(module, desc); + store = add_store_rgba_test(module, desc); if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { LLVMDumpModule(module); @@ -229,7 +259,7 @@ test_format(const struct pixel_test_case *test) memset(unpacked, 0, sizeof unpacked); packed = 0; - load_ptr(&test->packed, unpacked); + load_ptr(test->packed, unpacked); store_ptr(&packed, unpacked); success = TRUE; @@ -255,23 +285,29 @@ test_format(const struct pixel_test_case *test) if(pass) LLVMDisposePassManager(pass); + if(fp) + write_tsv_row(fp, desc, success); + return success; } -int main(int argc, char **argv) +boolean +test_all(unsigned verbose, FILE *fp) { unsigned i; - int ret; - -#ifdef LLVM_NATIVE_ARCH - LLVMLinkInJIT(); - LLVMInitializeNativeTarget(); -#endif + bool success = TRUE; for (i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); ++i) - if(!test_format(&test_cases[i])) - ret = 1; + if(!test_format(verbose, fp, &test_cases[i])) + success = FALSE; - return ret; + return success; +} + + +boolean +test_some(unsigned verbose, FILE *fp, unsigned long n) +{ + return test_all(verbose, fp); } diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index f07fa256f1..314544aa9a 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -34,10 +34,25 @@ */ +#include "util/u_cpu_detect.h" + #include "lp_bld_const.h" +#include "lp_bld_misc.h" #include "lp_test.h" +#ifdef PIPE_CC_MSVC +static INLINE double +round(double x) +{ + if (x >= 0.0) + return floor(x + 0.5); + else + return ceil(x - 0.5); +} +#endif + + void dump_type(FILE *fp, struct lp_type type) @@ -365,10 +380,10 @@ int main(int argc, char **argv) n = atoi(argv[i]); } -#ifdef LLVM_NATIVE_ARCH LLVMLinkInJIT(); LLVMInitializeNativeTarget(); -#endif + + util_cpu_detect(); if(fp) { /* Warm up the caches */ diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 724d437833..a00f2495df 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -66,16 +66,24 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen, pf_get_block(lpt->base.format, &lpt->base.block); for (level = 0; level <= pt->last_level; level++) { + unsigned nblocksx, nblocksy; + pt->width[level] = width; pt->height[level] = height; pt->depth[level] = depth; pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width); - pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height); - lpt->stride[level] = align(pt->nblocksx[level]*pt->block.size, 16); + pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height); + + /* Allocate storage for whole quads. This is particularly important + * for depth surfaces, which are currently stored in a swizzled format. */ + nblocksx = pf_get_nblocksx(&pt->block, align(width, 2)); + nblocksy = pf_get_nblocksy(&pt->block, align(height, 2)); + + lpt->stride[level] = align(nblocksx*pt->block.size, 16); lpt->level_offset[level] = buffer_size; - buffer_size += (pt->nblocksy[level] * + buffer_size += (nblocksy * ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * lpt->stride[level]); @@ -353,17 +361,9 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, if(lpt->dt) { struct llvmpipe_winsys *winsys = screen->winsys; - unsigned flags = 0; - - if (transfer->usage != PIPE_TRANSFER_READ) { - flags |= PIPE_BUFFER_USAGE_CPU_WRITE; - } - - if (transfer->usage != PIPE_TRANSFER_WRITE) { - flags |= PIPE_BUFFER_USAGE_CPU_READ; - } - map = winsys->displaytarget_map(winsys, lpt->dt, flags); + map = winsys->displaytarget_map(winsys, lpt->dt, + pipe_transfer_buffer_flags(transfer)); if (map == NULL) return NULL; } @@ -373,7 +373,7 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, /* May want to different things here depending on read/write nature * of the map: */ - if (transfer->texture && transfer->usage != PIPE_TRANSFER_READ) + if (transfer->texture && (transfer->usage & PIPE_TRANSFER_WRITE)) { /* Do something to notify sharing contexts of a texture change. * In llvmpipe, that would mean flushing the texture cache. diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c index 0c06b659a1..ec3e002d62 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c @@ -236,47 +236,41 @@ lp_flush_tile_cache(struct llvmpipe_tile_cache *tc) if(!pt) return; + assert(tc->transfer_map); + /* push the tile to all positions marked as clear */ for (y = 0; y < pt->height; y += TILE_SIZE) { for (x = 0; x < pt->width; x += TILE_SIZE) { struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE]; - switch(tile->status) { - case LP_TILE_STATUS_UNDEFINED: - break; - - case LP_TILE_STATUS_CLEAR: { - /** - * Actually clear the tiles which were flagged as being in a clear state. - */ - - struct pipe_screen *screen = pt->texture->screen; - unsigned tw = TILE_SIZE; - unsigned th = TILE_SIZE; - void *dst; - - if (pipe_clip_tile(x, y, &tw, &th, pt)) - continue; - - dst = screen->transfer_map(screen, pt); - assert(dst); - if(!dst) - continue; - - util_fill_rect(dst, &pt->block, pt->stride, - x, y, tw, th, - tc->clear_val); - - screen->transfer_unmap(screen, pt); + if(tile->status != LP_TILE_STATUS_UNDEFINED) { + unsigned w = TILE_SIZE; + unsigned h = TILE_SIZE; + + if (!pipe_clip_tile(x, y, &w, &h, pt)) { + switch(tile->status) { + case LP_TILE_STATUS_CLEAR: + /* Actually clear the tiles which were flagged as being in a + * clear state. */ + util_fill_rect(tc->transfer_map, &pt->block, pt->stride, + x, y, w, h, + tc->clear_val); + break; + + case LP_TILE_STATUS_DEFINED: + lp_tile_write_4ub(pt->format, + tile->color, + tc->transfer_map, pt->stride, + x, y, w, h); + break; + + default: + assert(0); + break; + } + } tile->status = LP_TILE_STATUS_UNDEFINED; - break; - } - - case LP_TILE_STATUS_DEFINED: - lp_put_tile_rgba_soa(pt, x, y, tile->color); - tile->status = LP_TILE_STATUS_UNDEFINED; - break; } } } @@ -294,6 +288,9 @@ lp_get_cached_tile(struct llvmpipe_tile_cache *tc, struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE]; struct pipe_transfer *pt = tc->transfer; + assert(tc->surface); + assert(tc->transfer); + switch(tile->status) { case LP_TILE_STATUS_CLEAR: /* don't get tile from framebuffer, just clear it */ @@ -301,11 +298,22 @@ lp_get_cached_tile(struct llvmpipe_tile_cache *tc, tile->status = LP_TILE_STATUS_DEFINED; break; - case LP_TILE_STATUS_UNDEFINED: - /* get new tile data from transfer */ - lp_get_tile_rgba_soa(pt, x & ~(TILE_SIZE - 1), y & ~(TILE_SIZE - 1), tile->color); + case LP_TILE_STATUS_UNDEFINED: { + unsigned w = TILE_SIZE; + unsigned h = TILE_SIZE; + + x &= ~(TILE_SIZE - 1); + y &= ~(TILE_SIZE - 1); + + if (!pipe_clip_tile(x, y, &w, &h, tc->transfer)) + lp_tile_read_4ub(pt->format, + tile->color, + tc->transfer_map, tc->transfer->stride, + x, y, w, h); + tile->status = LP_TILE_STATUS_DEFINED; break; + } case LP_TILE_STATUS_DEFINED: /* nothing to do */ diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.c b/src/gallium/drivers/llvmpipe/lp_tile_soa.c deleted file mode 100644 index 4e4ccb31cc..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.c +++ /dev/null @@ -1,931 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * RGBA/float tile get/put functions. - * Usable both by drivers and state trackers. - */ - - -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" - -#include "util/u_math.h" -#include "util/u_memory.h" -#include "util/u_rect.h" -#include "util/u_tile.h" -#include "lp_tile_cache.h" -#include "lp_tile_soa.h" - - -const unsigned char -tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH] = { - { 0, 1, 4, 5, 8, 9, 12, 13}, - { 2, 3, 6, 7, 10, 11, 14, 15} -}; - - - -/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/ - -static void -a8r8g8b8_get_tile_rgba(const unsigned *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - const unsigned pixel = *src++; - TILE_PIXEL(p, j, i, 0) = (pixel >> 16) & 0xff; - TILE_PIXEL(p, j, i, 1) = (pixel >> 8) & 0xff; - TILE_PIXEL(p, j, i, 2) = (pixel >> 0) & 0xff; - TILE_PIXEL(p, j, i, 3) = (pixel >> 24) & 0xff; - } - } -} - - -static void -a8r8g8b8_put_tile_rgba(unsigned *dst, - unsigned w, unsigned h, - const uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - unsigned r, g, b, a; - r = TILE_PIXEL(p, j, i, 0); - g = TILE_PIXEL(p, j, i, 1); - b = TILE_PIXEL(p, j, i, 2); - a = TILE_PIXEL(p, j, i, 3); - *dst++ = (a << 24) | (r << 16) | (g << 8) | b; - } - } -} - - -/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/ - -static void -x8r8g8b8_get_tile_rgba(const unsigned *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - const unsigned pixel = *src++; - TILE_PIXEL(p, j, i, 0) = (pixel >> 16) & 0xff; - TILE_PIXEL(p, j, i, 1) = (pixel >> 8) & 0xff; - TILE_PIXEL(p, j, i, 2) = (pixel >> 0) & 0xff; - TILE_PIXEL(p, j, i, 3) = 0xff; - } - } -} - - -static void -x8r8g8b8_put_tile_rgba(unsigned *dst, - unsigned w, unsigned h, - const uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - unsigned r, g, b; - r = TILE_PIXEL(p, j, i, 0); - g = TILE_PIXEL(p, j, i, 1); - b = TILE_PIXEL(p, j, i, 2); - *dst++ = (0xff << 24) | (r << 16) | (g << 8) | b; - } - } -} - - -/*** PIPE_FORMAT_B8G8R8A8_UNORM ***/ - -static void -b8g8r8a8_get_tile_rgba(const unsigned *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - const unsigned pixel = *src++; - TILE_PIXEL(p, j, i, 0) = (pixel >> 8) & 0xff; - TILE_PIXEL(p, j, i, 1) = (pixel >> 16) & 0xff; - TILE_PIXEL(p, j, i, 2) = (pixel >> 24) & 0xff; - TILE_PIXEL(p, j, i, 3) = (pixel >> 0) & 0xff; - } - } -} - - -static void -b8g8r8a8_put_tile_rgba(unsigned *dst, - unsigned w, unsigned h, - const uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - unsigned r, g, b, a; - r = TILE_PIXEL(p, j, i, 0); - g = TILE_PIXEL(p, j, i, 1); - b = TILE_PIXEL(p, j, i, 2); - a = TILE_PIXEL(p, j, i, 3); - *dst++ = (b << 24) | (g << 16) | (r << 8) | a; - } - } -} - - -/*** PIPE_FORMAT_A1R5G5B5_UNORM ***/ - -static void -a1r5g5b5_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - const ushort pixel = *src++; - TILE_PIXEL(p, j, i, 0) = ((pixel >> 10) & 0x1f) * 255 / 31; - TILE_PIXEL(p, j, i, 1) = ((pixel >> 5) & 0x1f) * 255 / 31; - TILE_PIXEL(p, j, i, 2) = ((pixel ) & 0x1f) * 255 / 31; - TILE_PIXEL(p, j, i, 3) = ((pixel >> 15) ) * 255; - } - } -} - - -static void -a1r5g5b5_put_tile_rgba(ushort *dst, - unsigned w, unsigned h, - const uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - unsigned r, g, b, a; - r = TILE_PIXEL(p, j, i, 0); - g = TILE_PIXEL(p, j, i, 1); - b = TILE_PIXEL(p, j, i, 2); - a = TILE_PIXEL(p, j, i, 3); - r = r >> 3; /* 5 bits */ - g = g >> 3; /* 5 bits */ - b = b >> 3; /* 5 bits */ - a = a >> 7; /* 1 bit */ - *dst++ = (a << 15) | (r << 10) | (g << 5) | b; - } - } -} - - -/*** PIPE_FORMAT_A4R4G4B4_UNORM ***/ - -static void -a4r4g4b4_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - const ushort pixel = *src++; - TILE_PIXEL(p, j, i, 0) = ((pixel >> 8) & 0xf) * 255 / 15; - TILE_PIXEL(p, j, i, 1) = ((pixel >> 4) & 0xf) * 255 / 15; - TILE_PIXEL(p, j, i, 2) = ((pixel ) & 0xf) * 255 / 15; - TILE_PIXEL(p, j, i, 3) = ((pixel >> 12) ) * 255 / 15; - } - } -} - - -static void -a4r4g4b4_put_tile_rgba(ushort *dst, - unsigned w, unsigned h, - const uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - unsigned r, g, b, a; - r = TILE_PIXEL(p, j, i, 0); - g = TILE_PIXEL(p, j, i, 1); - b = TILE_PIXEL(p, j, i, 2); - a = TILE_PIXEL(p, j, i, 3); - r >>= 4; - g >>= 4; - b >>= 4; - a >>= 4; - *dst++ = (a << 12) | (r << 16) | (g << 4) | b; - } - } -} - - -/*** PIPE_FORMAT_R5G6B5_UNORM ***/ - -static void -r5g6b5_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - const ushort pixel = *src++; - TILE_PIXEL(p, j, i, 0) = ((pixel >> 11) & 0x1f) * 255 / 31; - TILE_PIXEL(p, j, i, 1) = ((pixel >> 5) & 0x3f) * 255 / 63; - TILE_PIXEL(p, j, i, 2) = ((pixel ) & 0x1f) * 255 / 31; - TILE_PIXEL(p, j, i, 3) = 255; - } - } -} - - -static void -r5g6b5_put_tile_rgba(ushort *dst, - unsigned w, unsigned h, - const uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - uint r = (uint) TILE_PIXEL(p, j, i, 0) * 31 / 255; - uint g = (uint) TILE_PIXEL(p, j, i, 1) * 63 / 255; - uint b = (uint) TILE_PIXEL(p, j, i, 2) * 31 / 255; - *dst++ = (r << 11) | (g << 5) | (b); - } - } -} - - - -/*** PIPE_FORMAT_Z16_UNORM ***/ - -/** - * Return each Z value as four floats in [0,1]. - */ -static void -z16_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - uint8_t *p) -{ - const float scale = 1.0f / 65535.0f; - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - TILE_PIXEL(p, j, i, 0) = - TILE_PIXEL(p, j, i, 1) = - TILE_PIXEL(p, j, i, 2) = - TILE_PIXEL(p, j, i, 3) = *src++ * scale; - } - } -} - - - - -/*** PIPE_FORMAT_L8_UNORM ***/ - -static void -l8_get_tile_rgba(const ubyte *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++, src++) { - TILE_PIXEL(p, j, i, 0) = - TILE_PIXEL(p, j, i, 1) = - TILE_PIXEL(p, j, i, 2) = *src; - TILE_PIXEL(p, j, i, 3) = 255; - } - } -} - - -static void -l8_put_tile_rgba(ubyte *dst, - unsigned w, unsigned h, - const uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - unsigned r; - r = TILE_PIXEL(p, j, i, 0); - *dst++ = (ubyte) r; - } - } -} - - - -/*** PIPE_FORMAT_A8_UNORM ***/ - -static void -a8_get_tile_rgba(const ubyte *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++, src++) { - TILE_PIXEL(p, j, i, 0) = - TILE_PIXEL(p, j, i, 1) = - TILE_PIXEL(p, j, i, 2) = 0; - TILE_PIXEL(p, j, i, 3) = *src; - } - } -} - - -static void -a8_put_tile_rgba(ubyte *dst, - unsigned w, unsigned h, - const uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - unsigned a; - a = TILE_PIXEL(p, j, i, 3); - *dst++ = (ubyte) a; - } - } -} - - - -/*** PIPE_FORMAT_R16_SNORM ***/ - -static void -r16_get_tile_rgba(const short *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++, src++) { - TILE_PIXEL(p, j, i, 0) = MAX2(src[0] >> 7, 0); - TILE_PIXEL(p, j, i, 1) = - TILE_PIXEL(p, j, i, 2) = 0; - TILE_PIXEL(p, j, i, 3) = 255; - } - } -} - - -static void -r16_put_tile_rgba(short *dst, - unsigned w, unsigned h, - const uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++, dst++) { - dst[0] = TILE_PIXEL(p, j, i, 0) << 7; - } - } -} - - -/*** PIPE_FORMAT_R16G16B16A16_SNORM ***/ - -static void -r16g16b16a16_get_tile_rgba(const short *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++, src += 4) { - TILE_PIXEL(p, j, i, 0) = src[0] >> 8; - TILE_PIXEL(p, j, i, 1) = src[1] >> 8; - TILE_PIXEL(p, j, i, 2) = src[2] >> 8; - TILE_PIXEL(p, j, i, 3) = src[3] >> 8; - } - } -} - - -static void -r16g16b16a16_put_tile_rgba(short *dst, - unsigned w, unsigned h, - const uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++, dst += 4) { - dst[0] = TILE_PIXEL(p, j, i, 0) << 8; - dst[1] = TILE_PIXEL(p, j, i, 1) << 8; - dst[2] = TILE_PIXEL(p, j, i, 2) << 8; - dst[3] = TILE_PIXEL(p, j, i, 3) << 8; - } - } -} - - - -/*** PIPE_FORMAT_I8_UNORM ***/ - -static void -i8_get_tile_rgba(const ubyte *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++, src++) { - TILE_PIXEL(p, j, i, 0) = - TILE_PIXEL(p, j, i, 1) = - TILE_PIXEL(p, j, i, 2) = - TILE_PIXEL(p, j, i, 3) = *src; - } - } -} - - -static void -i8_put_tile_rgba(ubyte *dst, - unsigned w, unsigned h, - const uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - unsigned r; - r = TILE_PIXEL(p, j, i, 0); - *dst++ = (ubyte) r; - } - } -} - - -/*** PIPE_FORMAT_A8L8_UNORM ***/ - -static void -a8l8_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - ushort ra = *src++; - TILE_PIXEL(p, j, i, 0) = - TILE_PIXEL(p, j, i, 1) = - TILE_PIXEL(p, j, i, 2) = ra & 0xff; - TILE_PIXEL(p, j, i, 3) = ra >> 8; - } - } -} - - -static void -a8l8_put_tile_rgba(ushort *dst, - unsigned w, unsigned h, - const uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - unsigned r, a; - r = TILE_PIXEL(p, j, i, 0); - a = TILE_PIXEL(p, j, i, 3); - *dst++ = (a << 8) | r; - } - } -} - - - - -/*** PIPE_FORMAT_Z32_UNORM ***/ - -/** - * Return each Z value as four floats in [0,1]. - */ -static void -z32_get_tile_rgba(const unsigned *src, - unsigned w, unsigned h, - uint8_t *p) -{ - const double scale = 1.0 / (double) 0xffffffff; - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - TILE_PIXEL(p, j, i, 0) = - TILE_PIXEL(p, j, i, 1) = - TILE_PIXEL(p, j, i, 2) = - TILE_PIXEL(p, j, i, 3) = (float) (*src++ * scale); - } - } -} - - -/*** PIPE_FORMAT_S8Z24_UNORM ***/ - -/** - * Return Z component as four float in [0,1]. Stencil part ignored. - */ -static void -s8z24_get_tile_rgba(const unsigned *src, - unsigned w, unsigned h, - uint8_t *p) -{ - const double scale = 1.0 / ((1 << 24) - 1); - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - TILE_PIXEL(p, j, i, 0) = - TILE_PIXEL(p, j, i, 1) = - TILE_PIXEL(p, j, i, 2) = - TILE_PIXEL(p, j, i, 3) = (float) (scale * (*src++ & 0xffffff)); - } - } -} - - -/*** PIPE_FORMAT_Z24S8_UNORM ***/ - -/** - * Return Z component as four float in [0,1]. Stencil part ignored. - */ -static void -z24s8_get_tile_rgba(const unsigned *src, - unsigned w, unsigned h, - uint8_t *p) -{ - const double scale = 1.0 / ((1 << 24) - 1); - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - TILE_PIXEL(p, j, i, 0) = - TILE_PIXEL(p, j, i, 1) = - TILE_PIXEL(p, j, i, 2) = - TILE_PIXEL(p, j, i, 3) = (float) (scale * (*src++ >> 8)); - } - } -} - - -/*** PIPE_FORMAT_Z32_FLOAT ***/ - -/** - * Return each Z value as four floats in [0,1]. - */ -static void -z32f_get_tile_rgba(const float *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - TILE_PIXEL(p, j, i, 0) = - TILE_PIXEL(p, j, i, 1) = - TILE_PIXEL(p, j, i, 2) = - TILE_PIXEL(p, j, i, 3) = *src++; - } - } -} - - -/*** PIPE_FORMAT_YCBCR / PIPE_FORMAT_YCBCR_REV ***/ - -/** - * Convert YCbCr (or YCrCb) to RGBA. - */ -static void -ycbcr_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - uint8_t *p, - boolean rev) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - /* do two texels at a time */ - for (j = 0; j < (w & ~1); j += 2, src += 2) { - const ushort t0 = src[0]; - const ushort t1 = src[1]; - const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */ - const ubyte y1 = (t1 >> 8) & 0xff; /* luminance */ - ubyte cb, cr; - float r, g, b; - - if (rev) { - cb = t1 & 0xff; /* chroma U */ - cr = t0 & 0xff; /* chroma V */ - } - else { - cb = t0 & 0xff; /* chroma U */ - cr = t1 & 0xff; /* chroma V */ - } - - /* even pixel: y0,cr,cb */ - r = 1.164f * (y0-16) + 1.596f * (cr-128); - g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128); - b = 1.164f * (y0-16) + 2.018f * (cb-128); - TILE_PIXEL(p, j, i, 0) = r; - TILE_PIXEL(p, j, i, 1) = g; - TILE_PIXEL(p, j, i, 2) = b; - TILE_PIXEL(p, j, i, 3) = 255; - - /* odd pixel: use y1,cr,cb */ - r = 1.164f * (y1-16) + 1.596f * (cr-128); - g = 1.164f * (y1-16) - 0.813f * (cr-128) - 0.391f * (cb-128); - b = 1.164f * (y1-16) + 2.018f * (cb-128); - TILE_PIXEL(p, j + 1, i, 0) = r; - TILE_PIXEL(p, j + 1, i, 1) = g; - TILE_PIXEL(p, j + 1, i, 2) = b; - TILE_PIXEL(p, j + 1, i, 3) = 255; - } - /* do the last texel */ - if (w & 1) { - const ushort t0 = src[0]; - const ushort t1 = src[1]; - const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */ - ubyte cb, cr; - float r, g, b; - - if (rev) { - cb = t1 & 0xff; /* chroma U */ - cr = t0 & 0xff; /* chroma V */ - } - else { - cb = t0 & 0xff; /* chroma U */ - cr = t1 & 0xff; /* chroma V */ - } - - /* even pixel: y0,cr,cb */ - r = 1.164f * (y0-16) + 1.596f * (cr-128); - g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128); - b = 1.164f * (y0-16) + 2.018f * (cb-128); - TILE_PIXEL(p, j, i, 0) = r; - TILE_PIXEL(p, j, i, 1) = g; - TILE_PIXEL(p, j, i, 2) = b; - TILE_PIXEL(p, j, i, 3) = 255; - } - } -} - - -static void -fake_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - uint8_t *p) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - TILE_PIXEL(p, j, i, 0) = - TILE_PIXEL(p, j, i, 1) = - TILE_PIXEL(p, j, i, 2) = - TILE_PIXEL(p, j, i, 3) = (i ^ j) & 1 ? 255 : 0; - } - } -} - - -static void -lp_tile_raw_to_rgba_soa(enum pipe_format format, - void *src, - uint w, uint h, - uint8_t *p) -{ - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - a8r8g8b8_get_tile_rgba((unsigned *) src, w, h, p); - break; - case PIPE_FORMAT_X8R8G8B8_UNORM: - x8r8g8b8_get_tile_rgba((unsigned *) src, w, h, p); - break; - case PIPE_FORMAT_B8G8R8A8_UNORM: - b8g8r8a8_get_tile_rgba((unsigned *) src, w, h, p); - break; - case PIPE_FORMAT_A1R5G5B5_UNORM: - a1r5g5b5_get_tile_rgba((ushort *) src, w, h, p); - break; - case PIPE_FORMAT_A4R4G4B4_UNORM: - a4r4g4b4_get_tile_rgba((ushort *) src, w, h, p); - break; - case PIPE_FORMAT_R5G6B5_UNORM: - r5g6b5_get_tile_rgba((ushort *) src, w, h, p); - break; - case PIPE_FORMAT_L8_UNORM: - l8_get_tile_rgba((ubyte *) src, w, h, p); - break; - case PIPE_FORMAT_A8_UNORM: - a8_get_tile_rgba((ubyte *) src, w, h, p); - break; - case PIPE_FORMAT_I8_UNORM: - i8_get_tile_rgba((ubyte *) src, w, h, p); - break; - case PIPE_FORMAT_A8L8_UNORM: - a8l8_get_tile_rgba((ushort *) src, w, h, p); - break; - case PIPE_FORMAT_R16_SNORM: - r16_get_tile_rgba((short *) src, w, h, p); - break; - case PIPE_FORMAT_R16G16B16A16_SNORM: - r16g16b16a16_get_tile_rgba((short *) src, w, h, p); - break; - case PIPE_FORMAT_Z16_UNORM: - z16_get_tile_rgba((ushort *) src, w, h, p); - break; - case PIPE_FORMAT_Z32_UNORM: - z32_get_tile_rgba((unsigned *) src, w, h, p); - break; - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - s8z24_get_tile_rgba((unsigned *) src, w, h, p); - break; - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: - z24s8_get_tile_rgba((unsigned *) src, w, h, p); - break; - case PIPE_FORMAT_Z32_FLOAT: - z32f_get_tile_rgba((float *) src, w, h, p); - break; - case PIPE_FORMAT_YCBCR: - ycbcr_get_tile_rgba((ushort *) src, w, h, p, FALSE); - break; - case PIPE_FORMAT_YCBCR_REV: - ycbcr_get_tile_rgba((ushort *) src, w, h, p, TRUE); - break; - default: - debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(format)); - fake_get_tile_rgba(src, w, h, p); - } -} - - -void -lp_get_tile_rgba_soa(struct pipe_transfer *pt, - uint x, uint y, - uint8_t *p) -{ - uint w = TILE_SIZE, h = TILE_SIZE; - void *packed; - - if (pipe_clip_tile(x, y, &w, &h, pt)) - return; - - packed = MALLOC(pf_get_nblocks(&pt->block, w, h) * pt->block.size); - - if (!packed) - return; - - if(pt->format == PIPE_FORMAT_YCBCR || pt->format == PIPE_FORMAT_YCBCR_REV) - assert((x & 1) == 0); - - pipe_get_tile_raw(pt, x, y, w, h, packed, 0); - - lp_tile_raw_to_rgba_soa(pt->format, packed, w, h, p); - - FREE(packed); -} - - -void -lp_put_tile_rgba_soa(struct pipe_transfer *pt, - uint x, uint y, - const uint8_t *p) -{ - uint w = TILE_SIZE, h = TILE_SIZE; - void *packed; - - if (pipe_clip_tile(x, y, &w, &h, pt)) - return; - - packed = MALLOC(pf_get_nblocks(&pt->block, w, h) * pt->block.size); - - if (!packed) - return; - - switch (pt->format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p); - break; - case PIPE_FORMAT_X8R8G8B8_UNORM: - x8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p); - break; - case PIPE_FORMAT_B8G8R8A8_UNORM: - b8g8r8a8_put_tile_rgba((unsigned *) packed, w, h, p); - break; - case PIPE_FORMAT_A1R5G5B5_UNORM: - a1r5g5b5_put_tile_rgba((ushort *) packed, w, h, p); - break; - case PIPE_FORMAT_R5G6B5_UNORM: - r5g6b5_put_tile_rgba((ushort *) packed, w, h, p); - break; - case PIPE_FORMAT_R8G8B8A8_UNORM: - assert(0); - break; - case PIPE_FORMAT_A4R4G4B4_UNORM: - a4r4g4b4_put_tile_rgba((ushort *) packed, w, h, p); - break; - case PIPE_FORMAT_L8_UNORM: - l8_put_tile_rgba((ubyte *) packed, w, h, p); - break; - case PIPE_FORMAT_A8_UNORM: - a8_put_tile_rgba((ubyte *) packed, w, h, p); - break; - case PIPE_FORMAT_I8_UNORM: - i8_put_tile_rgba((ubyte *) packed, w, h, p); - break; - case PIPE_FORMAT_A8L8_UNORM: - a8l8_put_tile_rgba((ushort *) packed, w, h, p); - break; - case PIPE_FORMAT_R16_SNORM: - r16_put_tile_rgba((short *) packed, w, h, p); - break; - case PIPE_FORMAT_R16G16B16A16_SNORM: - r16g16b16a16_put_tile_rgba((short *) packed, w, h, p); - break; - case PIPE_FORMAT_Z16_UNORM: - /*z16_put_tile_rgba((ushort *) packed, w, h, p);*/ - break; - case PIPE_FORMAT_Z32_UNORM: - /*z32_put_tile_rgba((unsigned *) packed, w, h, p);*/ - break; - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - /*s8z24_put_tile_rgba((unsigned *) packed, w, h, p);*/ - break; - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: - /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p);*/ - break; - default: - debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(pt->format)); - } - - pipe_put_tile_raw(pt, x, y, w, h, packed, 0); - - FREE(packed); -} - - diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h index 3d8c703b73..040b01865d 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h @@ -64,14 +64,18 @@ tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH]; void -lp_get_tile_rgba_soa(struct pipe_transfer *pt, - uint x, uint y, - uint8_t *p); +lp_tile_read_4ub(enum pipe_format format, + uint8_t *dst, + const void *src, unsigned src_stride, + unsigned x, unsigned y, unsigned w, unsigned h); + void -lp_put_tile_rgba_soa(struct pipe_transfer *pt, - uint x, uint y, - const uint8_t *p); +lp_tile_write_4ub(enum pipe_format format, + const uint8_t *src, + void *dst, unsigned dst_stride, + unsigned x, unsigned y, unsigned w, unsigned h); + #ifdef __cplusplus diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py new file mode 100644 index 0000000000..004c5c979e --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -0,0 +1,278 @@ +#!/usr/bin/env python + +''' +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Pixel format accessor functions. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ +''' + + +import sys +import os.path + +sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '../../auxiliary/util')) + +from u_format_access import * + + +def generate_format_read(format, dst_type, dst_native_type, dst_suffix): + '''Generate the function to read pixels from a particular format''' + + name = short_name(format) + + src_native_type = native_type(format) + + print 'static void' + print 'lp_tile_%s_read_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, dst_suffix, dst_native_type) + print '{' + print ' unsigned x, y;' + print ' const uint8_t *src_row = src + y0*src_stride;' + print ' for (y = 0; y < h; ++y) {' + print ' const %s *src_pixel = (const %s *)(src_row + x0*%u);' % (src_native_type, src_native_type, format.stride()) + print ' for (x = 0; x < w; ++x) {' + + names = ['']*4 + if format.colorspace == 'rgb': + for i in range(4): + swizzle = format.out_swizzle[i] + if swizzle < 4: + names[swizzle] += 'rgba'[i] + elif format.colorspace == 'zs': + swizzle = format.out_swizzle[0] + if swizzle < 4: + names[swizzle] = 'z' + else: + assert False + else: + assert False + + if format.layout == ARITH: + print ' %s pixel = *src_pixel++;' % src_native_type + shift = 0; + for i in range(4): + src_type = format.in_types[i] + width = src_type.size + if names[i]: + value = 'pixel' + mask = (1 << width) - 1 + if shift: + value = '(%s >> %u)' % (value, shift) + if shift + width < format.block_size(): + value = '(%s & 0x%x)' % (value, mask) + value = conversion_expr(src_type, dst_type, dst_native_type, value) + print ' %s %s = %s;' % (dst_native_type, names[i], value) + shift += width + elif format.layout == ARRAY: + for i in range(4): + src_type = format.in_types[i] + if names[i]: + value = '(*src_pixel++)' + value = conversion_expr(src_type, dst_type, dst_native_type, value) + print ' %s %s = %s;' % (dst_native_type, names[i], value) + else: + assert False + + for i in range(4): + if format.colorspace == 'rgb': + swizzle = format.out_swizzle[i] + if swizzle < 4: + value = names[swizzle] + elif swizzle == SWIZZLE_0: + value = '0' + elif swizzle == SWIZZLE_1: + value = '1' + else: + assert False + elif format.colorspace == 'zs': + if i < 3: + value = 'z' + else: + value = '1' + else: + assert False + print ' TILE_PIXEL(dst, x, y, %u) = %s; /* %s */' % (i, value, 'rgba'[i]) + + print ' }' + print ' src_row += src_stride;' + print ' }' + print '}' + print + + +def generate_format_write(format, src_type, src_native_type, src_suffix): + '''Generate the function to write pixels to a particular format''' + + name = short_name(format) + + dst_native_type = native_type(format) + + print 'static void' + print 'lp_tile_%s_write_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type) + print '{' + print ' unsigned x, y;' + print ' uint8_t *dst_row = dst + y0*dst_stride;' + print ' for (y = 0; y < h; ++y) {' + print ' %s *dst_pixel = (%s *)(dst_row + x0*%u);' % (dst_native_type, dst_native_type, format.stride()) + print ' for (x = 0; x < w; ++x) {' + + inv_swizzle = [None]*4 + if format.colorspace == 'rgb': + for i in range(4): + swizzle = format.out_swizzle[i] + if swizzle < 4: + inv_swizzle[swizzle] = i + elif format.colorspace == 'zs': + swizzle = format.out_swizzle[0] + if swizzle < 4: + inv_swizzle[swizzle] = 0 + else: + assert False + + if format.layout == ARITH: + print ' %s pixel = 0;' % dst_native_type + shift = 0; + for i in range(4): + dst_type = format.in_types[i] + width = dst_type.size + if inv_swizzle[i] is not None: + value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i] + value = conversion_expr(src_type, dst_type, dst_native_type, value) + if shift: + value = '(%s << %u)' % (value, shift) + print ' pixel |= %s;' % value + shift += width + print ' *dst_pixel++ = pixel;' + elif format.layout == ARRAY: + for i in range(4): + dst_type = format.in_types[i] + if inv_swizzle[i] is not None: + value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i] + value = conversion_expr(src_type, dst_type, dst_native_type, value) + print ' *dst_pixel++ = %s;' % value + else: + assert False + + print ' }' + print ' dst_row += dst_stride;' + print ' }' + print '}' + print + + +def generate_read(formats, dst_type, dst_native_type, dst_suffix): + '''Generate the dispatch function to read pixels from any format''' + + for format in formats: + if is_format_supported(format): + generate_format_read(format, dst_type, dst_native_type, dst_suffix) + + print 'void' + print 'lp_tile_read_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type) + print '{' + print ' void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % dst_native_type + print ' switch(format) {' + for format in formats: + if is_format_supported(format): + print ' case %s:' % format.name + print ' func = &lp_tile_%s_read_%s;' % (short_name(format), dst_suffix) + print ' break;' + print ' default:' + print ' debug_printf("unsupported format\\n");' + print ' return;' + print ' }' + print ' func(dst, (const uint8_t *)src, src_stride, x, y, w, h);' + print '}' + print + + +def generate_write(formats, src_type, src_native_type, src_suffix): + '''Generate the dispatch function to write pixels to any format''' + + for format in formats: + if is_format_supported(format): + generate_format_write(format, src_type, src_native_type, src_suffix) + + print 'void' + print 'lp_tile_write_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (src_suffix, src_native_type) + + print '{' + print ' void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % src_native_type + print ' switch(format) {' + for format in formats: + if is_format_supported(format): + print ' case %s:' % format.name + print ' func = &lp_tile_%s_write_%s;' % (short_name(format), src_suffix) + print ' break;' + print ' default:' + print ' debug_printf("unsupported format\\n");' + print ' return;' + print ' }' + print ' func(src, (uint8_t *)dst, dst_stride, x, y, w, h);' + print '}' + print + + +def main(): + formats = [] + for arg in sys.argv[1:]: + formats.extend(parse(arg)) + + print '/* This file is autogenerated by lp_tile_soa.py from u_format.csv. Do not edit directly. */' + print + # This will print the copyright message on the top of this file + print __doc__.strip() + print + print '#include "pipe/p_compiler.h"' + print '#include "util/u_format.h"' + print '#include "util/u_math.h"' + print '#include "lp_tile_soa.h"' + print + print 'const unsigned char' + print 'tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH] = {' + print ' { 0, 1, 4, 5, 8, 9, 12, 13},' + print ' { 2, 3, 6, 7, 10, 11, 14, 15}' + print '};' + print + + generate_clamp() + + type = Type(UNSIGNED, True, 8) + native_type = 'uint8_t' + suffix = '4ub' + + generate_read(formats, type, native_type, suffix) + generate_write(formats, type, native_type, suffix) + + +if __name__ == '__main__': + main() diff --git a/src/gallium/drivers/nouveau/Makefile b/src/gallium/drivers/nouveau/Makefile index dbe8a6e7bf..0cb66041d5 100644 --- a/src/gallium/drivers/nouveau/Makefile +++ b/src/gallium/drivers/nouveau/Makefile @@ -3,6 +3,7 @@ include $(TOP)/configs/current LIBNAME = nouveau -C_SOURCES = nouveau_screen.c +C_SOURCES = nouveau_screen.c \ + nouveau_context.c include ../../Makefile.template diff --git a/src/gallium/drivers/nouveau/nouveau_context.c b/src/gallium/drivers/nouveau/nouveau_context.c new file mode 100644 index 0000000000..23443869e6 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_context.c @@ -0,0 +1,41 @@ +#include <pipe/p_defines.h> +#include <pipe/p_context.h> + +#include "nouveau/nouveau_screen.h" +#include "nouveau/nouveau_context.h" + +#include "nouveau/nouveau_bo.h" + +static unsigned int +nouveau_reference_flags(struct nouveau_bo *bo) +{ + uint32_t bo_flags; + int flags = 0; + + bo_flags = nouveau_bo_pending(bo); + if (bo_flags & NOUVEAU_BO_RD) + flags |= PIPE_REFERENCED_FOR_READ; + if (bo_flags & NOUVEAU_BO_WR) + flags |= PIPE_REFERENCED_FOR_WRITE; + + return flags; +} + +unsigned int +nouveau_is_texture_referenced(struct pipe_context *pipe, + struct pipe_texture *pt, + unsigned face, unsigned level) +{ + struct nouveau_miptree *mt = nouveau_miptree(pt); + + return nouveau_reference_flags(mt->bo); +} + +unsigned int +nouveau_is_buffer_referenced(struct pipe_context *pipe, struct pipe_buffer *pb) +{ + struct nouveau_bo *bo = nouveau_bo(pb); + + return nouveau_reference_flags(bo); +} + diff --git a/src/gallium/drivers/nouveau/nouveau_context.h b/src/gallium/drivers/nouveau/nouveau_context.h new file mode 100644 index 0000000000..6a28d40da7 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_context.h @@ -0,0 +1,11 @@ +#ifndef __NOUVEAU_CONTEXT_H__ +#define __NOUVEAU_CONTEXT_H__ + +unsigned int +nouveau_is_texture_referenced(struct pipe_context *, struct pipe_texture *, + unsigned face, unsigned level); + +unsigned int +nouveau_is_buffer_referenced(struct pipe_context *, struct pipe_buffer *); + +#endif diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c index 17166c9f51..10d984ace9 100644 --- a/src/gallium/drivers/nv04/nv04_context.c +++ b/src/gallium/drivers/nv04/nv04_context.c @@ -64,30 +64,6 @@ nv04_init_hwctx(struct nv04_context *nv04) return TRUE; } -static unsigned int -nv04_is_texture_referenced( struct pipe_context *pipe, - struct pipe_texture *texture, - unsigned face, unsigned level) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - -static unsigned int -nv04_is_buffer_referenced( struct pipe_context *pipe, - struct pipe_buffer *buf) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - - struct pipe_context * nv04_create(struct pipe_screen *pscreen, unsigned pctx_id) { @@ -113,8 +89,8 @@ nv04_create(struct pipe_screen *pscreen, unsigned pctx_id) nv04->pipe.clear = nv04_clear; nv04->pipe.flush = nv04_flush; - nv04->pipe.is_texture_referenced = nv04_is_texture_referenced; - nv04->pipe.is_buffer_referenced = nv04_is_buffer_referenced; + nv04->pipe.is_texture_referenced = nouveau_is_texture_referenced; + nv04->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; nv04_init_surface_functions(nv04); nv04_init_state_functions(nv04); diff --git a/src/gallium/drivers/nv04/nv04_context.h b/src/gallium/drivers/nv04/nv04_context.h index 2842b2c90d..55326c787a 100644 --- a/src/gallium/drivers/nv04/nv04_context.h +++ b/src/gallium/drivers/nv04/nv04_context.h @@ -13,6 +13,7 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" +#include "nouveau/nouveau_context.h" #define NOUVEAU_PUSH_CONTEXT(ctx) \ struct nv04_screen *ctx = nv04->screen diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c index f88e138c79..8c7eb367e2 100644 --- a/src/gallium/drivers/nv04/nv04_surface_2d.c +++ b/src/gallium/drivers/nv04/nv04_surface_2d.c @@ -1,5 +1,6 @@ #include "pipe/p_context.h" #include "pipe/p_format.h" +#include "util/u_math.h" #include "util/u_memory.h" #include "nouveau/nouveau_winsys.h" @@ -12,10 +13,13 @@ nv04_surface_format(enum pipe_format format) { switch (format) { case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_I8_UNORM: return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8; case PIPE_FORMAT_R16_SNORM: case PIPE_FORMAT_R5G6B5_UNORM: case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_A8L8_UNORM: return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5; case PIPE_FORMAT_X8R8G8B8_UNORM: case PIPE_FORMAT_A8R8G8B8_UNORM: @@ -35,6 +39,7 @@ nv04_rect_format(enum pipe_format format) case PIPE_FORMAT_A8_UNORM: return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_A8L8_UNORM: case PIPE_FORMAT_Z16_UNORM: return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5; case PIPE_FORMAT_A8R8G8B8_UNORM: @@ -50,6 +55,10 @@ static INLINE int nv04_scaled_image_format(enum pipe_format format) { switch (format) { + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_I8_UNORM: + return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8; case PIPE_FORMAT_A1R5G5B5_UNORM: return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5; case PIPE_FORMAT_A8R8G8B8_UNORM: @@ -58,6 +67,7 @@ nv04_scaled_image_format(enum pipe_format format) return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8; case PIPE_FORMAT_R5G6B5_UNORM: case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_A8L8_UNORM: return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5; default: return -1; @@ -107,17 +117,20 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, struct nouveau_bo *src_bo = nouveau_bo(ctx->buf(src)); struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst)); const unsigned src_pitch = ((struct nv04_surface *)src)->pitch; + /* Max width & height may not be the same on all HW, but must be POT */ const unsigned max_w = 1024; const unsigned max_h = 1024; - const unsigned sub_w = w > max_w ? max_w : w; - const unsigned sub_h = h > max_h ? max_h : h; - unsigned cx; - unsigned cy; + unsigned sub_w = w > max_w ? max_w : w; + unsigned sub_h = h > max_h ? max_h : h; + unsigned x; + unsigned y; + + /* Swizzled surfaces must be POT */ + assert(util_is_pot(dst->width) && util_is_pot(dst->height)); -#if 0 - /* That's the way she likes it */ - assert(src_pitch == ((struct nv04_surface *)dst)->pitch); -#endif + /* If area is too large to copy in one shot we must copy it in POT chunks to meet alignment requirements */ + assert(sub_w == w || util_is_pot(sub_w)); + assert(sub_h == h || util_is_pot(sub_h)); BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1); OUT_RELOCo(chan, dst_bo, @@ -125,41 +138,46 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_FORMAT, 1); OUT_RING (chan, nv04_surface_format(dst->format) | - log2i(w) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT | - log2i(h) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT); - + log2i(dst->width) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT | + log2i(dst->height) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT); + BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1); OUT_RELOCo(chan, src_bo, NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE, 1); OUT_RING (chan, swzsurf->handle); - for (cy = 0; cy < h; cy += sub_h) { - for (cx = 0; cx < w; cx += sub_w) { + for (y = 0; y < h; y += sub_h) { + sub_h = MIN2(sub_h, h - y); + + for (x = 0; x < w; x += sub_w) { + sub_w = MIN2(sub_w, w - x); + + /* Must be 64-byte aligned */ + assert(!((dst->offset + nv04_swizzle_bits(dx+x, dy+y) * dst->texture->block.size) & 63)); + BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1); - OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(cx+dx, cy+dy) * - dst->texture->block.size, NOUVEAU_BO_GART | - NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(dx+x, dy+y) * dst->texture->block.size, + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9); OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE); OUT_RING (chan, nv04_scaled_image_format(src->format)); OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY); OUT_RING (chan, 0); - OUT_RING (chan, sub_h << 16 | sub_w); + OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT | sub_w); OUT_RING (chan, 0); - OUT_RING (chan, sub_h << 16 | sub_w); + OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT | sub_w); OUT_RING (chan, 1 << 20); OUT_RING (chan, 1 << 20); BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SIZE, 4); - OUT_RING (chan, sub_h << 16 | sub_w); + OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_H_SHIFT | sub_w); OUT_RING (chan, src_pitch | NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER | NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE); - OUT_RELOCl(chan, src_bo, src->offset + (cy+sy) * src_pitch + - (cx+sx) * src->texture->block.size, NOUVEAU_BO_GART | - NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, src_bo, src->offset + (sy+y) * src_pitch + (sx+x) * src->texture->block.size, + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); OUT_RING (chan, 0); } } @@ -214,43 +232,6 @@ nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx, } static int -nv04_surface_copy_m2mf_swizzle(struct nv04_surface_2d *ctx, - struct pipe_surface *dst, int dx, int dy, - struct pipe_surface *src, int sx, int sy) -{ - struct nouveau_channel *chan = ctx->m2mf->channel; - struct nouveau_grobj *m2mf = ctx->m2mf; - struct nouveau_bo *src_bo = nouveau_bo(ctx->buf(src)); - struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst)); - unsigned src_pitch = ((struct nv04_surface *)src)->pitch; - unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch; - unsigned dst_offset = dst->offset + nv04_swizzle_bits(dx, dy) * - dst->texture->block.size; - unsigned src_offset = src->offset + sy * src_pitch + - sx * src->texture->block.size; - - BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2); - OUT_RELOCo(chan, src_bo, - NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RELOCo(chan, dst_bo, - NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8); - OUT_RELOCl(chan, src_bo, src_offset, - NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCl(chan, dst_bo, dst_offset, - NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR); - OUT_RING (chan, src_pitch); - OUT_RING (chan, dst_pitch); - OUT_RING (chan, 1 * src->texture->block.size); - OUT_RING (chan, 1); - OUT_RING (chan, 0x0101); - OUT_RING (chan, 0); - - return 0; -} - -static int nv04_surface_copy_blit(struct nv04_surface_2d *ctx, struct pipe_surface *dst, int dx, int dy, struct pipe_surface *src, int sx, int sy, int w, int h) @@ -299,61 +280,10 @@ nv04_surface_copy(struct nv04_surface_2d *ctx, struct pipe_surface *dst, assert(src->format == dst->format); /* Setup transfer to swizzle the texture to vram if needed */ - if (src_linear && !dst_linear) { - int x,y; - - if ((w>1) && (h>1)) { - int potWidth = 1<<log2i(w); - int potHeight = 1<<log2i(h); - int remainWidth = w-potWidth; - int remainHeight = h-potHeight; - int squareDim = (potWidth>potHeight ? potHeight : potWidth); - - /* top left is always POT, but we can only swizzle squares */ - for (y=0; y<potHeight; y+=squareDim) { - for (x=0; x<potWidth; x+= squareDim) { - nv04_surface_copy_swizzle(ctx, dst, dx+x, dy+y, - src, sx+x, sy+y, - squareDim, squareDim); - } - } - - /* top right */ - if (remainWidth>0) { - nv04_surface_copy(ctx, dst, dx+potWidth, dy, - src, sx+potWidth, sy, - remainWidth, potHeight); - } - - /* bottom left */ - if (remainHeight>0) { - nv04_surface_copy(ctx, dst, dx, dy+potHeight, - src, sx, sy+potHeight, - potWidth, remainHeight); - } - - /* bottom right */ - if ((remainWidth>0) && (remainHeight>0)) { - nv04_surface_copy(ctx, dst, dx+potWidth, dy+potHeight, - src, sx+potWidth, sy+potHeight, - remainWidth, remainHeight); - } - } else if (w==1) { - /* We have a column to copy to a swizzled texture */ - for (y=0; y<h; y++) { - nv04_surface_copy_m2mf_swizzle(ctx, dst, dx, dy+y, - src, sx, sy+y); - } - } else if (h==1) { - /* We have a row to copy to a swizzled texture */ - for (x=0; x<w; x++) { - nv04_surface_copy_m2mf_swizzle(ctx, dst, dx+x, dy, - src, sx+x, sy); - } - } - - return; - } + if (src_linear && !dst_linear && w > 1 && h > 1) { + nv04_surface_copy_swizzle(ctx, dst, dx, dy, src, sx, sy, w, h); + return; + } /* NV_CONTEXT_SURFACES_2D has buffer alignment restrictions, fallback * to NV_MEMORY_TO_MEMORY_FORMAT in this case. diff --git a/src/gallium/drivers/nv04/nv04_transfer.c b/src/gallium/drivers/nv04/nv04_transfer.c index 854b855d64..6618660743 100644 --- a/src/gallium/drivers/nv04/nv04_transfer.c +++ b/src/gallium/drivers/nv04/nv04_transfer.c @@ -13,22 +13,6 @@ struct nv04_transfer { bool direct; }; -static unsigned nv04_usage_tx_to_buf(unsigned tx_usage) -{ - switch (tx_usage) { - case PIPE_TRANSFER_READ: - return PIPE_BUFFER_USAGE_CPU_READ; - case PIPE_TRANSFER_WRITE: - return PIPE_BUFFER_USAGE_CPU_WRITE; - case PIPE_TRANSFER_READ_WRITE: - return PIPE_BUFFER_USAGE_CPU_READ_WRITE; - default: - assert(0); - } - - return -1; -} - static void nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, struct pipe_texture *template) @@ -86,7 +70,7 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = true; tx->surface = pscreen->get_tex_surface(pscreen, pt, 0, 0, 0, - nv04_usage_tx_to_buf(usage)); + pipe_transfer_buffer_flags(&tx->base)); return &tx->base; } @@ -103,7 +87,7 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, face, level, zslice, - nv04_usage_tx_to_buf(usage)); + pipe_transfer_buffer_flags(&tx->base)); pipe_texture_reference(&tx_tex, NULL); @@ -114,7 +98,7 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } - if (usage != PIPE_TRANSFER_WRITE) { + if (usage & PIPE_TRANSFER_READ) { struct nv04_screen *nvscreen = nv04_screen(pscreen); struct pipe_surface *src; @@ -140,7 +124,7 @@ nv04_transfer_del(struct pipe_transfer *ptx) { struct nv04_transfer *tx = (struct nv04_transfer *)ptx; - if (!tx->direct && ptx->usage != PIPE_TRANSFER_READ) { + if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) { struct pipe_screen *pscreen = ptx->texture->screen; struct nv04_screen *nvscreen = nv04_screen(pscreen); struct pipe_surface *dst; @@ -170,7 +154,7 @@ nv04_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) struct nv04_surface *ns = (struct nv04_surface *)tx->surface; struct nv04_miptree *mt = (struct nv04_miptree *)tx->surface->texture; void *map = pipe_buffer_map(pscreen, mt->buffer, - nv04_usage_tx_to_buf(ptx->usage)); + pipe_transfer_buffer_flags(ptx)); return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * ptx->block.size; diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c index a127b134ec..933176fc32 100644 --- a/src/gallium/drivers/nv10/nv10_context.c +++ b/src/gallium/drivers/nv10/nv10_context.c @@ -257,29 +257,6 @@ nv10_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) { } -static unsigned int -nv10_is_texture_referenced( struct pipe_context *pipe, - struct pipe_texture *texture, - unsigned face, unsigned level) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - -static unsigned int -nv10_is_buffer_referenced( struct pipe_context *pipe, - struct pipe_buffer *buf) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - struct pipe_context * nv10_create(struct pipe_screen *pscreen, unsigned pctx_id) { @@ -305,8 +282,8 @@ nv10_create(struct pipe_screen *pscreen, unsigned pctx_id) nv10->pipe.clear = nv10_clear; nv10->pipe.flush = nv10_flush; - nv10->pipe.is_texture_referenced = nv10_is_texture_referenced; - nv10->pipe.is_buffer_referenced = nv10_is_buffer_referenced; + nv10->pipe.is_texture_referenced = nouveau_is_texture_referenced; + nv10->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; nv10_init_surface_functions(nv10); nv10_init_state_functions(nv10); diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h index f1e003c953..36a6aa7a74 100644 --- a/src/gallium/drivers/nv10/nv10_context.h +++ b/src/gallium/drivers/nv10/nv10_context.h @@ -13,6 +13,7 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" +#include "nouveau/nouveau_context.h" #define NOUVEAU_PUSH_CONTEXT(ctx) \ struct nv10_screen *ctx = nv10->screen diff --git a/src/gallium/drivers/nv10/nv10_transfer.c b/src/gallium/drivers/nv10/nv10_transfer.c index c06b8d34c7..8feb85e4bd 100644 --- a/src/gallium/drivers/nv10/nv10_transfer.c +++ b/src/gallium/drivers/nv10/nv10_transfer.c @@ -13,22 +13,6 @@ struct nv10_transfer { bool direct; }; -static unsigned nv10_usage_tx_to_buf(unsigned tx_usage) -{ - switch (tx_usage) { - case PIPE_TRANSFER_READ: - return PIPE_BUFFER_USAGE_CPU_READ; - case PIPE_TRANSFER_WRITE: - return PIPE_BUFFER_USAGE_CPU_WRITE; - case PIPE_TRANSFER_READ_WRITE: - return PIPE_BUFFER_USAGE_CPU_READ_WRITE; - default: - assert(0); - } - - return -1; -} - static void nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, struct pipe_texture *template) @@ -86,7 +70,7 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = true; tx->surface = pscreen->get_tex_surface(pscreen, pt, 0, 0, 0, - nv10_usage_tx_to_buf(usage)); + pipe_transfer_buffer_flags(&tx->base)); return &tx->base; } @@ -103,7 +87,7 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, face, level, zslice, - nv10_usage_tx_to_buf(usage)); + pipe_transfer_buffer_flags(&tx->base)); pipe_texture_reference(&tx_tex, NULL); @@ -114,7 +98,7 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } - if (usage != PIPE_TRANSFER_WRITE) { + if (usage & PIPE_TRANSFER_READ) { struct nv10_screen *nvscreen = nv10_screen(pscreen); struct pipe_surface *src; @@ -140,7 +124,7 @@ nv10_transfer_del(struct pipe_transfer *ptx) { struct nv10_transfer *tx = (struct nv10_transfer *)ptx; - if (!tx->direct && ptx->usage != PIPE_TRANSFER_READ) { + if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) { struct pipe_screen *pscreen = ptx->texture->screen; struct nv10_screen *nvscreen = nv10_screen(pscreen); struct pipe_surface *dst; @@ -170,7 +154,7 @@ nv10_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) struct nv04_surface *ns = (struct nv04_surface *)tx->surface; struct nv10_miptree *mt = (struct nv10_miptree *)tx->surface->texture; void *map = pipe_buffer_map(pscreen, mt->buffer, - nv10_usage_tx_to_buf(ptx->usage)); + pipe_transfer_buffer_flags(ptx)); return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * ptx->block.size; diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c index b32d0d83ba..9a48739661 100644 --- a/src/gallium/drivers/nv20/nv20_context.c +++ b/src/gallium/drivers/nv20/nv20_context.c @@ -380,30 +380,6 @@ nv20_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) { } - -static unsigned int -nv20_is_texture_referenced( struct pipe_context *pipe, - struct pipe_texture *texture, - unsigned face, unsigned level) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - -static unsigned int -nv20_is_buffer_referenced( struct pipe_context *pipe, - struct pipe_buffer *buf) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - struct pipe_context * nv20_create(struct pipe_screen *pscreen, unsigned pctx_id) { @@ -429,8 +405,8 @@ nv20_create(struct pipe_screen *pscreen, unsigned pctx_id) nv20->pipe.clear = nv20_clear; nv20->pipe.flush = nv20_flush; - nv20->pipe.is_texture_referenced = nv20_is_texture_referenced; - nv20->pipe.is_buffer_referenced = nv20_is_buffer_referenced; + nv20->pipe.is_texture_referenced = nouveau_is_texture_referenced; + nv20->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; nv20_init_surface_functions(nv20); nv20_init_state_functions(nv20); diff --git a/src/gallium/drivers/nv20/nv20_context.h b/src/gallium/drivers/nv20/nv20_context.h index fc932f1f90..a4eaa95660 100644 --- a/src/gallium/drivers/nv20/nv20_context.h +++ b/src/gallium/drivers/nv20/nv20_context.h @@ -13,6 +13,7 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" +#include "nouveau/nouveau_context.h" #define NOUVEAU_PUSH_CONTEXT(ctx) \ struct nv20_screen *ctx = nv20->screen diff --git a/src/gallium/drivers/nv20/nv20_transfer.c b/src/gallium/drivers/nv20/nv20_transfer.c index 5018995596..81b4f1a917 100644 --- a/src/gallium/drivers/nv20/nv20_transfer.c +++ b/src/gallium/drivers/nv20/nv20_transfer.c @@ -13,22 +13,6 @@ struct nv20_transfer { bool direct; }; -static unsigned nv20_usage_tx_to_buf(unsigned tx_usage) -{ - switch (tx_usage) { - case PIPE_TRANSFER_READ: - return PIPE_BUFFER_USAGE_CPU_READ; - case PIPE_TRANSFER_WRITE: - return PIPE_BUFFER_USAGE_CPU_WRITE; - case PIPE_TRANSFER_READ_WRITE: - return PIPE_BUFFER_USAGE_CPU_READ_WRITE; - default: - assert(0); - } - - return -1; -} - static void nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, struct pipe_texture *template) @@ -86,7 +70,7 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = true; tx->surface = pscreen->get_tex_surface(pscreen, pt, 0, 0, 0, - nv20_usage_tx_to_buf(usage)); + pipe_transfer_buffer_flags(&tx->base)); return &tx->base; } @@ -103,7 +87,7 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, face, level, zslice, - nv20_usage_tx_to_buf(usage)); + pipe_transfer_buffer_flags(&tx->base)); pipe_texture_reference(&tx_tex, NULL); @@ -114,7 +98,7 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } - if (usage != PIPE_TRANSFER_WRITE) { + if (usage & PIPE_TRANSFER_READ) { struct nv20_screen *nvscreen = nv20_screen(pscreen); struct pipe_surface *src; @@ -140,7 +124,7 @@ nv20_transfer_del(struct pipe_transfer *ptx) { struct nv20_transfer *tx = (struct nv20_transfer *)ptx; - if (!tx->direct && ptx->usage != PIPE_TRANSFER_READ) { + if (!tx->direct && (ptx->usage = PIPE_TRANSFER_WRITE)) { struct pipe_screen *pscreen = ptx->texture->screen; struct nv20_screen *nvscreen = nv20_screen(pscreen); struct pipe_surface *dst; @@ -170,7 +154,7 @@ nv20_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) struct nv04_surface *ns = (struct nv04_surface *)tx->surface; struct nv20_miptree *mt = (struct nv20_miptree *)tx->surface->texture; void *map = pipe_buffer_map(pscreen, mt->buffer, - nv20_usage_tx_to_buf(ptx->usage)); + pipe_transfer_buffer_flags(ptx)); return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * ptx->block.size; diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index f827bdc78b..d8300fd69f 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -10,7 +10,7 @@ nv30_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv30_context *nv30 = nv30_context(pipe); - + if (flags & PIPE_FLUSH_TEXTURE_CACHE) { BEGIN_RING(rankine, 0x1fd8, 1); OUT_RING (2); @@ -31,29 +31,6 @@ nv30_destroy(struct pipe_context *pipe) FREE(nv30); } -static unsigned int -nv30_is_texture_referenced( struct pipe_context *pipe, - struct pipe_texture *texture, - unsigned face, unsigned level) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - -static unsigned int -nv30_is_buffer_referenced( struct pipe_context *pipe, - struct pipe_buffer *buf) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - struct pipe_context * nv30_create(struct pipe_screen *pscreen, unsigned pctx_id) { @@ -78,8 +55,8 @@ nv30_create(struct pipe_screen *pscreen, unsigned pctx_id) nv30->pipe.clear = nv30_clear; nv30->pipe.flush = nv30_flush; - nv30->pipe.is_texture_referenced = nv30_is_texture_referenced; - nv30->pipe.is_buffer_referenced = nv30_is_buffer_referenced; + nv30->pipe.is_texture_referenced = nouveau_is_texture_referenced; + nv30->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; nv30_init_query_functions(nv30); nv30_init_surface_functions(nv30); @@ -95,4 +72,3 @@ nv30_create(struct pipe_screen *pscreen, unsigned pctx_id) return &nv30->pipe; } - diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 4229c0a0e1..8d49366dfc 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -13,6 +13,7 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" +#include "nouveau/nouveau_context.h" #define NOUVEAU_PUSH_CONTEXT(ctx) \ struct nv30_screen *ctx = nv30->screen diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index a48ba9782b..cc0385426c 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -318,38 +318,23 @@ src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc, { const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); struct nv30_sreg tgsi = tgsi_src(fpc, fsrc); - uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0; - uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX, - fsrc->SrcRegisterExtSwz.NegateY, - fsrc->SrcRegisterExtSwz.NegateZ, - fsrc->SrcRegisterExtSwz.NegateW }; + uint mask = 0; uint c; for (c = 0; c < 4; c++) { - switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: + switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) { + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: mask |= (1 << c); break; - case TGSI_EXTSWIZZLE_ZERO: - zero_mask |= (1 << c); - tgsi.swz[c] = SWZ_X; - break; - case TGSI_EXTSWIZZLE_ONE: - one_mask |= (1 << c); - tgsi.swz[c] = SWZ_X; - break; default: assert(0); } - - if (!tgsi.negate && neg[c]) - neg_mask |= (1 << c); } - if (mask == MASK_ALL && !neg_mask) + if (mask == MASK_ALL) return TRUE; *src = temp(fpc); @@ -357,18 +342,6 @@ src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc, if (mask) arith(fpc, 0, MOV, *src, mask, tgsi, none, none); - if (zero_mask) - arith(fpc, 0, SFL, *src, zero_mask, *src, none, none); - - if (one_mask) - arith(fpc, 0, STR, *src, one_mask, *src, none, none); - - if (neg_mask) { - struct nv30_sreg one = temp(fpc); - arith(fpc, 0, STR, one, neg_mask, one, none, none); - arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none); - } - return FALSE; } @@ -527,12 +500,6 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, case TGSI_OPCODE_MUL: arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); break; - case TGSI_OPCODE_NOISE1: - case TGSI_OPCODE_NOISE2: - case TGSI_OPCODE_NOISE3: - case TGSI_OPCODE_NOISE4: - arith(fpc, sat, SFL, dst, mask, none, none, none); - break; case TGSI_OPCODE_POW: arith(fpc, sat, POW, dst, mask, src[0], src[1], none); break; diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index 822e1d8def..3dd636f4ee 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -29,8 +29,8 @@ nv30_texture_formats[] = { _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X), _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X), _(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y), -// _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X), -// _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X), + _(Z16_UNORM , R5G6B5 , S1, S1, S1, ONE, X, X, X, X), + _(Z24S8_UNORM , A8R8G8B8, S1, S1, S1, ONE, X, X, X, X), _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W), _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W), _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W), @@ -69,13 +69,13 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) tf = nv30_fragtex_format(pt->format); if (!tf) - assert(0); + return NULL; txf = tf->format; txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0); - txf |= log2i(pt->width[0]) << 20; - txf |= log2i(pt->height[0]) << 24; - txf |= log2i(pt->depth[0]) << 28; + txf |= log2i(pt->width[0]) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT; + txf |= log2i(pt->height[0]) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT; + txf |= log2i(pt->depth[0]) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT; txf |= NV34TCL_TX_FORMAT_NO_BORDER | 0x10000; switch (pt->target) { diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index 7f8054de73..17acca61ab 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -96,6 +96,11 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_X8R8G8B8_UNORM: case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_A8L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_I8_UNORM: { if (debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE)) mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 41af38450b..221ae1b5f8 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -10,6 +10,22 @@ #define NV34TCL_CHIPSET_3X_MASK 0x00000010 #define NV35TCL_CHIPSET_3X_MASK 0x000001e0 +/* FIXME: It seems I should not include directly ../../winsys/drm/nouveau/drm/nouveau_drm_api.h + * to get the pointer to the context front buffer, so I copied nouveau_winsys here. + * nv30_screen_surface_format_supported() can then use it to enforce creating fbo + * with same number of bits everywhere. + */ +struct nouveau_winsys { + struct pipe_winsys base; + + struct pipe_screen *pscreen; + + unsigned nr_pctx; + struct pipe_context **pctx; + + struct pipe_surface *front; +}; + static int nv30_screen_get_param(struct pipe_screen *pscreen, int param) { @@ -83,6 +99,8 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen, enum pipe_texture_target target, unsigned tex_usage, unsigned geom_flags) { + struct pipe_surface *front = ((struct nouveau_winsys *) pscreen->winsys)->front; + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: @@ -96,8 +114,9 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen, switch (format) { case PIPE_FORMAT_Z24S8_UNORM: case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z16_UNORM: return TRUE; + case PIPE_FORMAT_Z16_UNORM: + return (front->format == PIPE_FORMAT_R5G6B5_UNORM); default: break; } diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c index 44b6a74715..4d6a67e56d 100644 --- a/src/gallium/drivers/nv30/nv30_state_fb.c +++ b/src/gallium/drivers/nv30/nv30_state_fb.c @@ -8,15 +8,15 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) struct nouveau_channel *chan = nv30->screen->base.channel; struct nouveau_grobj *rankine = nv30->screen->rankine; struct nv04_surface *rt[2], *zeta = NULL; - uint32_t rt_enable, rt_format; - int i, colour_format = 0, zeta_format = 0; + uint32_t rt_enable = 0, rt_format = 0; + int i, colour_format = 0, zeta_format = 0, depth_only = 0; struct nouveau_stateobj *so = so_new(64, 10); unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; unsigned w = fb->width; unsigned h = fb->height; struct nv30_miptree *nv30mt; + int colour_bits = 32, zeta_bits = 32; - rt_enable = 0; for (i = 0; i < fb->nr_cbufs; i++) { if (colour_format) { assert(colour_format == fb->cbufs[i]->format); @@ -35,18 +35,35 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) zeta = (struct nv04_surface *)fb->zsbuf; } - if (!(rt[0]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { - assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); - for (i = 1; i < fb->nr_cbufs; i++) - assert(!(rt[i]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); + if (rt_enable & (NV34TCL_RT_ENABLE_COLOR0|NV34TCL_RT_ENABLE_COLOR1)) { + /* Render to at least a colour buffer */ + if (!(rt[0]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); + for (i = 1; i < fb->nr_cbufs; i++) + assert(!(rt[i]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); - /* FIXME: NV34TCL_RT_FORMAT_LOG2_[WIDTH/HEIGHT] */ - rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | - log2i(fb->width) << 16 /*NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT*/ | - log2i(fb->height) << 24 /*NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT*/; + rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | + (log2i(rt[0]->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | + (log2i(rt[0]->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); + } + else + rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; + } else if (fb->zsbuf) { + depth_only = 1; + + /* Render to depth buffer only */ + if (!(zeta->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); + + rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | + (log2i(zeta->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | + (log2i(zeta->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); + } + else + rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; + } else { + return FALSE; } - else - rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; switch (colour_format) { case PIPE_FORMAT_A8R8G8B8_UNORM: @@ -55,6 +72,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) break; case PIPE_FORMAT_R5G6B5_UNORM: rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5; + colour_bits = 16; break; default: assert(0); @@ -63,6 +81,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) switch (zeta_format) { case PIPE_FORMAT_Z16_UNORM: rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16; + zeta_bits = 16; break; case PIPE_FORMAT_Z24S8_UNORM: case PIPE_FORMAT_Z24X8_UNORM: @@ -73,21 +92,27 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) assert(0); } - if (rt_enable & NV34TCL_RT_ENABLE_COLOR0) { - uint32_t pitch = rt[0]->pitch; + if (colour_bits > zeta_bits) { + return FALSE; + } + + if (depth_only || (rt_enable & NV34TCL_RT_ENABLE_COLOR0)) { + struct nv04_surface *rt0 = (depth_only ? zeta : rt[0]); + uint32_t pitch = rt0->pitch; + if (zeta) { pitch |= (zeta->pitch << 16); } else { pitch |= (pitch << 16); } - nv30mt = (struct nv30_miptree *)rt[0]->base.texture; + nv30mt = (struct nv30_miptree *) rt0->base.texture; so_method(so, rankine, NV34TCL_DMA_COLOR0, 1); so_reloc (so, nouveau_bo(nv30mt->buffer), 0, rt_flags | NOUVEAU_BO_OR, chan->vram->handle, chan->gart->handle); so_method(so, rankine, NV34TCL_COLOR0_PITCH, 2); so_data (so, pitch); - so_reloc (so, nouveau_bo(nv30mt->buffer), rt[0]->base.offset, + so_reloc (so, nouveau_bo(nv30mt->buffer), rt0->base.offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); } diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c index 2367571878..98011decf7 100644 --- a/src/gallium/drivers/nv30/nv30_transfer.c +++ b/src/gallium/drivers/nv30/nv30_transfer.c @@ -13,22 +13,6 @@ struct nv30_transfer { bool direct; }; -static unsigned nv30_usage_tx_to_buf(unsigned tx_usage) -{ - switch (tx_usage) { - case PIPE_TRANSFER_READ: - return PIPE_BUFFER_USAGE_CPU_READ; - case PIPE_TRANSFER_WRITE: - return PIPE_BUFFER_USAGE_CPU_WRITE; - case PIPE_TRANSFER_READ_WRITE: - return PIPE_BUFFER_USAGE_CPU_READ_WRITE; - default: - assert(0); - } - - return -1; -} - static void nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, struct pipe_texture *template) @@ -86,7 +70,7 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = true; tx->surface = pscreen->get_tex_surface(pscreen, pt, face, level, zslice, - nv30_usage_tx_to_buf(usage)); + pipe_transfer_buffer_flags(&tx->base)); return &tx->base; } @@ -103,7 +87,7 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, 0, 0, 0, - nv30_usage_tx_to_buf(usage)); + pipe_transfer_buffer_flags(&tx->base)); pipe_texture_reference(&tx_tex, NULL); @@ -114,7 +98,7 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } - if (usage != PIPE_TRANSFER_WRITE) { + if (usage & PIPE_TRANSFER_READ) { struct nv30_screen *nvscreen = nv30_screen(pscreen); struct pipe_surface *src; @@ -140,7 +124,7 @@ nv30_transfer_del(struct pipe_transfer *ptx) { struct nv30_transfer *tx = (struct nv30_transfer *)ptx; - if (!tx->direct && ptx->usage != PIPE_TRANSFER_READ) { + if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) { struct pipe_screen *pscreen = ptx->texture->screen; struct nv30_screen *nvscreen = nv30_screen(pscreen); struct pipe_surface *dst; @@ -170,7 +154,7 @@ nv30_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) struct nv04_surface *ns = (struct nv04_surface *)tx->surface; struct nv30_miptree *mt = (struct nv30_miptree *)tx->surface->texture; void *map = pipe_buffer_map(pscreen, mt->buffer, - nv30_usage_tx_to_buf(ptx->usage)); + pipe_transfer_buffer_flags(ptx)); return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * ptx->block.size; diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index 8eba6a43ef..7f008274a4 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -10,7 +10,7 @@ nv40_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv40_context *nv40 = nv40_context(pipe); - + if (flags & PIPE_FLUSH_TEXTURE_CACHE) { BEGIN_RING(curie, 0x1fd8, 1); OUT_RING (2); @@ -31,29 +31,6 @@ nv40_destroy(struct pipe_context *pipe) FREE(nv40); } -static unsigned int -nv40_is_texture_referenced( struct pipe_context *pipe, - struct pipe_texture *texture, - unsigned face, unsigned level) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - -static unsigned int -nv40_is_buffer_referenced( struct pipe_context *pipe, - struct pipe_buffer *buf) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - struct pipe_context * nv40_create(struct pipe_screen *pscreen, unsigned pctx_id) { @@ -78,8 +55,8 @@ nv40_create(struct pipe_screen *pscreen, unsigned pctx_id) nv40->pipe.clear = nv40_clear; nv40->pipe.flush = nv40_flush; - nv40->pipe.is_texture_referenced = nv40_is_texture_referenced; - nv40->pipe.is_buffer_referenced = nv40_is_buffer_referenced; + nv40->pipe.is_texture_referenced = nouveau_is_texture_referenced; + nv40->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; nv40_init_query_functions(nv40); nv40_init_surface_functions(nv40); @@ -95,4 +72,3 @@ nv40_create(struct pipe_screen *pscreen, unsigned pctx_id) return &nv40->pipe; } - diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 97bc83292d..a3d594167a 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -13,6 +13,7 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" +#include "nouveau/nouveau_context.h" #define NOUVEAU_PUSH_CONTEXT(ctx) \ struct nv40_screen *ctx = nv40->screen diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 32d9ed1a7f..99277506fc 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -321,38 +321,23 @@ src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc, { const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); struct nv40_sreg tgsi = tgsi_src(fpc, fsrc); - uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0; - uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX, - fsrc->SrcRegisterExtSwz.NegateY, - fsrc->SrcRegisterExtSwz.NegateZ, - fsrc->SrcRegisterExtSwz.NegateW }; + uint mask = 0; uint c; for (c = 0; c < 4; c++) { - switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: + switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) { + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: mask |= (1 << c); break; - case TGSI_EXTSWIZZLE_ZERO: - zero_mask |= (1 << c); - tgsi.swz[c] = SWZ_X; - break; - case TGSI_EXTSWIZZLE_ONE: - one_mask |= (1 << c); - tgsi.swz[c] = SWZ_X; - break; default: assert(0); } - - if (!tgsi.negate && neg[c]) - neg_mask |= (1 << c); } - if (mask == MASK_ALL && !neg_mask) + if (mask == MASK_ALL) return TRUE; *src = temp(fpc); @@ -360,18 +345,6 @@ src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc, if (mask) arith(fpc, 0, MOV, *src, mask, tgsi, none, none); - if (zero_mask) - arith(fpc, 0, SFL, *src, zero_mask, *src, none, none); - - if (one_mask) - arith(fpc, 0, STR, *src, one_mask, *src, none, none); - - if (neg_mask) { - struct nv40_sreg one = temp(fpc); - arith(fpc, 0, STR, one, neg_mask, one, none, none); - arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none); - } - return FALSE; } @@ -568,12 +541,6 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, case TGSI_OPCODE_MUL: arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); break; - case TGSI_OPCODE_NOISE1: - case TGSI_OPCODE_NOISE2: - case TGSI_OPCODE_NOISE3: - case TGSI_OPCODE_NOISE4: - arith(fpc, sat, SFL, dst, mask, none, none, none); - break; case TGSI_OPCODE_POW: tmp = temp(fpc); arith(fpc, 0, LG2, tmp, MASK_X, diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c index 6d92ac3db9..92caee6f38 100644 --- a/src/gallium/drivers/nv40/nv40_transfer.c +++ b/src/gallium/drivers/nv40/nv40_transfer.c @@ -13,22 +13,6 @@ struct nv40_transfer { bool direct; }; -static unsigned nv40_usage_tx_to_buf(unsigned tx_usage) -{ - switch (tx_usage) { - case PIPE_TRANSFER_READ: - return PIPE_BUFFER_USAGE_CPU_READ; - case PIPE_TRANSFER_WRITE: - return PIPE_BUFFER_USAGE_CPU_WRITE; - case PIPE_TRANSFER_READ_WRITE: - return PIPE_BUFFER_USAGE_CPU_READ_WRITE; - default: - assert(0); - } - - return -1; -} - static void nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, struct pipe_texture *template) @@ -86,7 +70,7 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = true; tx->surface = pscreen->get_tex_surface(pscreen, pt, face, level, zslice, - nv40_usage_tx_to_buf(usage)); + pipe_transfer_buffer_flags(&tx->base)); return &tx->base; } @@ -103,7 +87,7 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, 0, 0, 0, - nv40_usage_tx_to_buf(usage)); + pipe_transfer_buffer_flags(&tx->base)); pipe_texture_reference(&tx_tex, NULL); @@ -114,7 +98,7 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } - if (usage != PIPE_TRANSFER_WRITE) { + if (usage & PIPE_TRANSFER_READ) { struct nv40_screen *nvscreen = nv40_screen(pscreen); struct pipe_surface *src; @@ -140,7 +124,7 @@ nv40_transfer_del(struct pipe_transfer *ptx) { struct nv40_transfer *tx = (struct nv40_transfer *)ptx; - if (!tx->direct && ptx->usage != PIPE_TRANSFER_READ) { + if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) { struct pipe_screen *pscreen = ptx->texture->screen; struct nv40_screen *nvscreen = nv40_screen(pscreen); struct pipe_surface *dst; @@ -170,7 +154,7 @@ nv40_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) struct nv04_surface *ns = (struct nv04_surface *)tx->surface; struct nv40_miptree *mt = (struct nv40_miptree *)tx->surface->texture; void *map = pipe_buffer_map(pscreen, mt->buffer, - nv40_usage_tx_to_buf(ptx->usage)); + pipe_transfer_buffer_flags(ptx)); return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * ptx->block.size; diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 0382dbba8f..31dae2457f 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -362,38 +362,23 @@ src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc, { const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); struct nv40_sreg tgsi = tgsi_src(vpc, fsrc); - uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0; - uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX, - fsrc->SrcRegisterExtSwz.NegateY, - fsrc->SrcRegisterExtSwz.NegateZ, - fsrc->SrcRegisterExtSwz.NegateW }; + uint mask = 0; uint c; for (c = 0; c < 4; c++) { - switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: + switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) { + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: mask |= tgsi_mask(1 << c); break; - case TGSI_EXTSWIZZLE_ZERO: - zero_mask |= tgsi_mask(1 << c); - tgsi.swz[c] = SWZ_X; - break; - case TGSI_EXTSWIZZLE_ONE: - one_mask |= tgsi_mask(1 << c); - tgsi.swz[c] = SWZ_X; - break; default: assert(0); } - - if (!tgsi.negate && neg[c]) - neg_mask |= tgsi_mask(1 << c); } - if (mask == MASK_ALL && !neg_mask) + if (mask == MASK_ALL) return TRUE; *src = temp(vpc); @@ -401,18 +386,6 @@ src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc, if (mask) arith(vpc, 0, OP_MOV, *src, mask, tgsi, none, none); - if (zero_mask) - arith(vpc, 0, OP_SFL, *src, zero_mask, *src, none, none); - - if (one_mask) - arith(vpc, 0, OP_STR, *src, one_mask, *src, none, none); - - if (neg_mask) { - struct nv40_sreg one = temp(vpc); - arith(vpc, 0, OP_STR, one, neg_mask, one, none, none); - arith(vpc, 0, OP_MUL, *src, neg_mask, *src, neg(one), none); - } - return FALSE; } diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index fca078b174..219e7a7862 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -33,13 +33,6 @@ nv50_flush(struct pipe_context *pipe, unsigned flags, { struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_channel *chan = nv50->screen->base.channel; - struct nouveau_grobj *eng2d = nv50->screen->eng2d; - - /* We need this in the ddx for reliable composite, not sure what we're - * actually flushing. We generate all our own flushes with flags = 0. */ - WAIT_RING(chan, 2); - BEGIN_RING(chan, eng2d, 0x0110, 1); - OUT_RING (chan, 0); if (flags & PIPE_FLUSH_FRAME) FIRE_RING(chan); @@ -60,29 +53,6 @@ nv50_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) { } -static unsigned int -nv50_is_texture_referenced( struct pipe_context *pipe, - struct pipe_texture *texture, - unsigned face, unsigned level) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - -static unsigned int -nv50_is_buffer_referenced( struct pipe_context *pipe, - struct pipe_buffer *buf) -{ - /** - * FIXME: Optimize. - */ - - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} - struct pipe_context * nv50_create(struct pipe_screen *pscreen, unsigned pctx_id) { @@ -108,8 +78,8 @@ nv50_create(struct pipe_screen *pscreen, unsigned pctx_id) nv50->pipe.flush = nv50_flush; - nv50->pipe.is_texture_referenced = nv50_is_texture_referenced; - nv50->pipe.is_buffer_referenced = nv50_is_buffer_referenced; + nv50->pipe.is_texture_referenced = nouveau_is_texture_referenced; + nv50->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; screen->base.channel->user_private = nv50; screen->base.channel->flush_notify = nv50_state_flush_notify; diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 4608854d71..33667e8765 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -14,6 +14,7 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_stateobj.h" +#include "nouveau/nouveau_context.h" #include "nv50_screen.h" #include "nv50_program.h" @@ -120,6 +121,7 @@ struct nv50_state { struct nouveau_stateobj *vtxfmt; struct nouveau_stateobj *vtxbuf; struct nouveau_stateobj *vtxattr; + unsigned vtxelt_nr; }; struct nv50_context { @@ -152,6 +154,8 @@ struct nv50_context { unsigned sampler_nr; struct nv50_miptree *miptree[PIPE_MAX_SAMPLERS]; unsigned miptree_nr; + + uint16_t vbo_fifo; }; static INLINE struct nv50_context * @@ -198,6 +202,11 @@ extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program extern boolean nv50_state_validate(struct nv50_context *nv50); extern void nv50_state_flush_notify(struct nouveau_channel *chan); +extern void nv50_so_init_sifc(struct nv50_context *nv50, + struct nouveau_stateobj *so, + struct nouveau_bo *bo, unsigned reloc, + unsigned size); + /* nv50_tex.c */ extern void nv50_tex_validate(struct nv50_context *); diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 576d075318..9ccc4f5a16 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -31,9 +31,12 @@ #include "nv50_context.h" -#define NV50_SU_MAX_TEMP 64 +#define NV50_SU_MAX_TEMP 127 +#define NV50_SU_MAX_ADDR 4 //#define NV50_PROGRAM_DUMP +/* $a5 and $a6 always seem to be 0, and using $a7 gives you noise */ + /* ARL - gallium craps itself on progs/vp/arl.txt * * MSB - Like MAD, but MUL+SUB @@ -79,7 +82,8 @@ struct nv50_reg { P_ATTR, P_RESULT, P_CONST, - P_IMMD + P_IMMD, + P_ADDR } type; int index; @@ -99,6 +103,7 @@ struct nv50_pc { /* hw resources */ struct nv50_reg *r_temp[NV50_SU_MAX_TEMP]; + struct nv50_reg r_addr[NV50_SU_MAX_ADDR]; /* tgsi resources */ struct nv50_reg *temp; @@ -112,6 +117,8 @@ struct nv50_pc { struct nv50_reg *immd; float *immd_buf; int immd_nr; + struct nv50_reg **addr; + int addr_nr; struct nv50_reg *temp_temp[16]; unsigned temp_temp_nr; @@ -159,6 +166,17 @@ popcnt4(uint32_t val) } static void +terminate_mbb(struct nv50_pc *pc) +{ + int i; + + /* remove records of temporary address register values */ + for (i = 0; i < NV50_SU_MAX_ADDR; ++i) + if (pc->r_addr[i].index < 0) + pc->r_addr[i].rhw = -1; +} + +static void alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) { int i = 0; @@ -434,6 +452,8 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e) } alloc_reg(pc, dst); + if (dst->hw > 63) + set_long(pc, e); e->inst[0] |= (dst->hw << 2); } @@ -454,9 +474,96 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) e->inst[1] |= (val >> 6) << 2; } +static INLINE void +set_addr(struct nv50_program_exec *e, struct nv50_reg *a) +{ + assert(!(e->inst[0] & 0x0c000000)); + assert(!(e->inst[1] & 0x00000004)); + + e->inst[0] |= (a->hw & 3) << 26; + e->inst[1] |= (a->hw >> 2) << 2; +} + +static void +emit_add_addr_imm(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, uint16_t src1_val) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xd0000000 | (src1_val << 9); + e->inst[1] = 0x20000000; + set_long(pc, e); + e->inst[0] |= dst->hw << 2; + if (src0) /* otherwise will add to $a0, which is always 0 */ + set_addr(e, src0); + + emit(pc, e); +} + +static struct nv50_reg * +alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref) +{ + int i; + struct nv50_reg *a_tgsi = NULL, *a = NULL; + + if (!ref) { + /* allocate for TGSI address reg */ + for (i = 0; i < NV50_SU_MAX_ADDR; ++i) { + if (pc->r_addr[i].index >= 0) + continue; + if (pc->r_addr[i].rhw >= 0 && + pc->r_addr[i].acc == pc->insn_cur) + continue; + + pc->r_addr[i].rhw = -1; + pc->r_addr[i].index = i; + return &pc->r_addr[i]; + } + assert(0); + return NULL; + } + + /* Allocate and set an address reg so we can access 'ref'. + * + * If and r_addr has index < 0, it is not reserved for TGSI, + * and index will be the negative of the TGSI addr index the + * value in rhw is relative to, or -256 if rhw is an offset + * from 0. If rhw < 0, the reg has not been initialized. + */ + for (i = NV50_SU_MAX_ADDR - 1; i >= 0; --i) { + if (pc->r_addr[i].index >= 0) /* occupied for TGSI */ + continue; + if (pc->r_addr[i].rhw < 0) { /* unused */ + a = &pc->r_addr[i]; + continue; + } + if (!a && pc->r_addr[i].acc != pc->insn_cur) + a = &pc->r_addr[i]; + + if (ref->hw - pc->r_addr[i].rhw >= 128) + continue; + + if ((ref->acc >= 0 && pc->r_addr[i].index == -256) || + (ref->acc < 0 && -pc->r_addr[i].index == ref->index)) { + pc->r_addr[i].acc = pc->insn_cur; + return &pc->r_addr[i]; + } + } + assert(a); + + if (ref->acc < 0) + a_tgsi = pc->addr[ref->index]; + + emit_add_addr_imm(pc, a, a_tgsi, (ref->hw & ~0x7f) * 4); + + a->rhw = ref->hw & ~0x7f; + a->acc = pc->insn_cur; + a->index = a_tgsi ? -ref->index : -256; + return a; +} #define INTERP_LINEAR 0 -#define INTERP_FLAT 1 +#define INTERP_FLAT 1 #define INTERP_PERSPECTIVE 2 #define INTERP_CENTROID 4 @@ -494,10 +601,18 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, { set_long(pc, e); - e->param.index = src->hw; + e->param.index = src->hw & 127; e->param.shift = s; e->param.mask = m << (s % 32); + if (src->hw > 127) + set_addr(e, alloc_addr(pc, src)); + else + if (src->acc < 0) { + assert(src->type == P_CONST); + set_addr(e, pc->addr[src->index]); + } + e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22); } @@ -506,11 +621,13 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { struct nv50_program_exec *e = exec(pc); - e->inst[0] |= 0x10000000; + e->inst[0] = 0x10000000; + if (!pc->allow32) + set_long(pc, e); set_dst(pc, dst, e); - if (pc->allow32 && dst->type != P_RESULT && src->type == P_IMMD) { + if (!is_long(e) && src->type == P_IMMD) { set_immd(pc, src, e); /*XXX: 32-bit, but steals part of "half" reg space - need to * catch and handle this case if/when we do half-regs @@ -527,6 +644,8 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) } alloc_reg(pc, src); + if (src->hw > 63) + set_long(pc, e); e->inst[0] |= (src->hw << 9); } @@ -586,6 +705,8 @@ set_src_0_restricted(struct nv50_pc *pc, struct nv50_reg *src, } alloc_reg(pc, src); + if (src->hw > 63) + set_long(pc, e); e->inst[0] |= (src->hw << 9); } @@ -604,6 +725,8 @@ set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) } alloc_reg(pc, src); + if (src->hw > 63) + set_long(pc, e); e->inst[0] |= (src->hw << 9); } @@ -630,7 +753,9 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) } alloc_reg(pc, src); - e->inst[0] |= (src->hw << 16); + if (src->hw > 63) + set_long(pc, e); + e->inst[0] |= ((src->hw & 127) << 16); } static void @@ -658,7 +783,7 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) } alloc_reg(pc, src); - e->inst[1] |= (src->hw << 14); + e->inst[1] |= ((src->hw & 127) << 14); } static void @@ -698,11 +823,12 @@ emit_add(struct nv50_pc *pc, struct nv50_reg *dst, { struct nv50_program_exec *e = exec(pc); - e->inst[0] |= 0xb0000000; + e->inst[0] = 0xb0000000; + alloc_reg(pc, src1); check_swap_src_0_1(pc, &src0, &src1); - if (!pc->allow32 || src0->neg || src1->neg) { + if (!pc->allow32 || (src0->neg | src1->neg) || src1->hw > 63) { set_long(pc, e); e->inst[1] |= (src0->neg << 26) | (src1->neg << 27); } @@ -721,6 +847,22 @@ emit_add(struct nv50_pc *pc, struct nv50_reg *dst, } static void +emit_arl(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, + uint8_t s) +{ + struct nv50_program_exec *e = exec(pc); + + set_long(pc, e); + e->inst[1] |= 0xc0000000; + + e->inst[0] |= dst->hw << 2; + e->inst[0] |= s << 16; /* shift left */ + set_src_0_restricted(pc, src, e); + + emit(pc, e); +} + +static void emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) { @@ -742,6 +884,7 @@ static INLINE void emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) { + assert(src0 != src1); src1->neg ^= 1; emit_add(pc, dst, src0, src1); src1->neg ^= 1; @@ -773,6 +916,7 @@ static INLINE void emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) { + assert(src2 != src0 && src2 != src1); src2->neg ^= 1; emit_mad(pc, dst, src0, src1, src2); src2->neg ^= 1; @@ -837,12 +981,11 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) #define CVTOP_SAT 0x08 #define CVTOP_ABS 0x10 -/* 0x04 == 32 bit */ +/* 0x04 == 32 bit dst */ /* 0x40 == dst is float */ /* 0x80 == src is float */ #define CVT_F32_F32 0xc4 #define CVT_F32_S32 0x44 -#define CVT_F32_U32 0x64 #define CVT_S32_F32 0x8c #define CVT_S32_S32 0x0c #define CVT_NEG 0x20 @@ -858,7 +1001,7 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, set_long(pc, e); e->inst[0] |= 0xa0000000; - e->inst[1] |= 0x00004000; + e->inst[1] |= 0x00004000; /* 32 bit src */ e->inst[1] |= (cvn << 16); e->inst[1] |= (fmt << 24); set_src_0(pc, src, e); @@ -1037,20 +1180,10 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, FREE(one); } -static void +static INLINE void emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - struct nv50_program_exec *e = exec(pc); - - set_long(pc, e); - e->inst[0] |= 0xa0000000; /* delta */ - e->inst[1] |= (7 << 29); /* delta */ - e->inst[1] |= 0x04000000; /* negate arg0? probably not */ - e->inst[1] |= (1 << 14); /* src .f32 */ - set_dst(pc, dst, e); - set_src_0(pc, src, e); - - emit(pc, e); + emit_cvt(pc, dst, src, -1, CVTOP_RN, CVT_F32_F32 | CVT_NEG); } static void @@ -1058,22 +1191,18 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src) { struct nv50_program_exec *e; const int r_pred = 1; + unsigned cvn = CVT_F32_F32; - /* Sets predicate reg ? */ - e = exec(pc); - e->inst[0] = 0xa00001fd; - e->inst[1] = 0xc4014788; - set_src_0(pc, src, e); - set_pred_wr(pc, 1, r_pred, e); if (src->neg) - e->inst[1] |= 0x20000000; - emit(pc, e); + cvn |= CVT_NEG; + /* write predicate reg */ + emit_cvt(pc, NULL, src, r_pred, CVTOP_RN, cvn); - /* This is probably KILP */ + /* conditional discard */ e = exec(pc); - e->inst[0] = 0x000001fe; + e->inst[0] = 0x00000002; set_long(pc, e); - set_pred(pc, 1 /* LT? */, r_pred, e); + set_pred(pc, 0x1 /* LT */, r_pred, e); emit(pc, e); } @@ -1219,6 +1348,43 @@ emit_nop(struct nv50_pc *pc) } static void +emit_ddx(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + assert(src->type == P_TEMP); + + e->inst[0] = 0xc0140000; + e->inst[1] = 0x89800000; + set_long(pc, e); + set_dst(pc, dst, e); + set_src_0(pc, src, e); + set_src_2(pc, src, e); + + emit(pc, e); +} + +static void +emit_ddy(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + assert(src->type == P_TEMP); + + if (!src->neg) /* ! double negation */ + emit_neg(pc, src, src); + + e->inst[0] = 0xc0150000; + e->inst[1] = 0x8a400000; + set_long(pc, e); + set_dst(pc, dst, e); + set_src_0(pc, src, e); + set_src_2(pc, src, e); + + emit(pc, e); +} + +static void convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e) { unsigned q = 0, m = ~0; @@ -1266,10 +1432,14 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e) e->inst[1] |= q; } +/* Some operations support an optional negation flag. */ static boolean negate_supported(const struct tgsi_full_instruction *insn, int i) { + int s; + switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_DDY: case TGSI_OPCODE_DP3: case TGSI_OPCODE_DP4: case TGSI_OPCODE_MUL: @@ -1277,12 +1447,29 @@ negate_supported(const struct tgsi_full_instruction *insn, int i) case TGSI_OPCODE_ADD: case TGSI_OPCODE_SUB: case TGSI_OPCODE_MAD: - return TRUE; + break; case TGSI_OPCODE_POW: - return (i == 1) ? TRUE : FALSE; + if (i == 1) + break; + return FALSE; default: return FALSE; } + + /* Watch out for possible multiple uses of an nv50_reg, we + * can't use nv50_reg::neg in these cases. + */ + for (s = 0; s < insn->Instruction.NumSrcRegs; ++s) { + if (s == i) + continue; + if ((insn->FullSrcRegisters[s].SrcRegister.Index == + insn->FullSrcRegisters[i].SrcRegister.Index) && + (insn->FullSrcRegisters[s].SrcRegister.File == + insn->FullSrcRegisters[i].SrcRegister.File)) + return FALSE; + } + + return TRUE; } /* Return a read mask for source registers deduced from opcode & write mask. */ @@ -1357,6 +1544,16 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) return &pc->temp[dst->DstRegister.Index * 4 + c]; case TGSI_FILE_OUTPUT: return &pc->result[dst->DstRegister.Index * 4 + c]; + case TGSI_FILE_ADDRESS: + { + struct nv50_reg *r = pc->addr[dst->DstRegister.Index * 4 + c]; + if (!r) { + r = alloc_addr(pc, NULL); + pc->addr[dst->DstRegister.Index * 4 + c] = r; + } + assert(r); + return r; + } case TGSI_FILE_NULL: return NULL; default: @@ -1372,16 +1569,19 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, { struct nv50_reg *r = NULL; struct nv50_reg *temp; - unsigned sgn, c; + unsigned sgn, c, swz; + + if (src->SrcRegister.File != TGSI_FILE_CONSTANT) + assert(!src->SrcRegister.Indirect); sgn = tgsi_util_get_full_src_register_sign_mode(src, chan); - c = tgsi_util_get_full_src_register_extswizzle(src, chan); + c = tgsi_util_get_full_src_register_swizzle(src, chan); switch (c) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: switch (src->SrcRegister.File) { case TGSI_FILE_INPUT: r = &pc->attr[src->SrcRegister.Index * 4 + c]; @@ -1390,25 +1590,35 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, r = &pc->temp[src->SrcRegister.Index * 4 + c]; break; case TGSI_FILE_CONSTANT: - r = &pc->param[src->SrcRegister.Index * 4 + c]; + if (!src->SrcRegister.Indirect) { + r = &pc->param[src->SrcRegister.Index * 4 + c]; + break; + } + /* Indicate indirection by setting r->acc < 0 and + * use the index field to select the address reg. + */ + r = MALLOC_STRUCT(nv50_reg); + swz = tgsi_util_get_src_register_swizzle( + &src->SrcRegisterInd, 0); + ctor_reg(r, P_CONST, + src->SrcRegisterInd.Index * 4 + swz, + src->SrcRegister.Index * 4 + c); + r->acc = -1; break; case TGSI_FILE_IMMEDIATE: r = &pc->immd[src->SrcRegister.Index * 4 + c]; break; case TGSI_FILE_SAMPLER: break; + case TGSI_FILE_ADDRESS: + r = pc->addr[src->SrcRegister.Index * 4 + c]; + assert(r); + break; default: assert(0); break; } break; - case TGSI_EXTSWIZZLE_ZERO: - r = alloc_immd(pc, 0.0); - return r; - case TGSI_EXTSWIZZLE_ONE: - if (sgn == TGSI_UTIL_SIGN_TOGGLE || sgn == TGSI_UTIL_SIGN_SET) - return alloc_immd(pc, -1.0); - return alloc_immd(pc, 1.0); default: assert(0); break; @@ -1433,11 +1643,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, break; case TGSI_UTIL_SIGN_SET: temp = temp_temp(pc); - emit_abs(pc, temp, r); - if (neg) - temp->neg = 1; - else - emit_neg(pc, temp, temp); + emit_cvt(pc, temp, r, -1, CVTOP_ABS, CVT_F32_F32 | CVT_NEG); r = temp; break; default: @@ -1610,7 +1816,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (c = 0; c < 4; c++) { if (!(mask & (1 << c)) || dst[c]->type == P_TEMP) continue; - rdst[c] = dst[c]; + /* rdst[c] = dst[c]; */ /* done above */ dst[c] = temp_temp(pc); } } @@ -1632,8 +1838,15 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_add(pc, dst[c], src[0][c], src[1][c]); } break; + case TGSI_OPCODE_ARL: + assert(src[0][0]); + temp = temp_temp(pc); + emit_cvt(pc, temp, src[0][0], -1, CVTOP_FLOOR, CVT_S32_F32); + emit_arl(pc, dst[0], temp, 4); + break; case TGSI_OPCODE_BGNLOOP: pc->loop_pos[pc->loop_lvl++] = pc->p->exec_size; + terminate_mbb(pc); break; case TGSI_OPCODE_BRK: emit_branch(pc, -1, 0, NULL); @@ -1648,6 +1861,18 @@ nv50_program_tx_insn(struct nv50_pc *pc, CVTOP_CEIL, CVT_F32_F32 | CVT_RI); } break; + case TGSI_OPCODE_CMP: + pc->allow32 = FALSE; + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, NULL, src[0][c], 1, CVTOP_RN, CVT_F32_F32); + emit_mov(pc, dst[c], src[1][c]); + set_pred(pc, 0x1, 1, pc->p->exec_tail); /* @SF */ + emit_mov(pc, dst[c], src[2][c]); + set_pred(pc, 0x6, 1, pc->p->exec_tail); /* @NSF */ + } + break; case TGSI_OPCODE_COS: if (mask & 8) { emit_precossin(pc, temp, src[0][3]); @@ -1660,6 +1885,20 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_precossin(pc, temp, src[0][0]); emit_flop(pc, 5, brdc, temp); break; + case TGSI_OPCODE_DDX: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_ddx(pc, dst[c], src[0][c]); + } + break; + case TGSI_OPCODE_DDY: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_ddy(pc, dst[c], src[0][c]); + } + break; case TGSI_OPCODE_DP3: emit_mul(pc, temp, src[0][0], src[1][0]); emit_mad(pc, temp, src[0][1], src[1][1], temp); @@ -1691,6 +1930,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_branch(pc, -1, 0, NULL); pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size; pc->if_insn[pc->if_lvl++] = pc->p->exec_tail; + terminate_mbb(pc); break; case TGSI_OPCODE_ENDIF: pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size; @@ -1703,6 +1943,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, pc->br_join[pc->if_lvl]->param.index = pc->p->exec_size; pc->br_join[pc->if_lvl] = NULL; } + terminate_mbb(pc); /* emit a NOP as join point, we could set it on the next * one, but would have to make sure it is long and !immd */ @@ -1713,6 +1954,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_branch(pc, -1, 0, NULL); pc->p->exec_tail->param.index = pc->loop_pos[--pc->loop_lvl]; pc->br_loop[pc->loop_lvl]->param.index = pc->p->exec_size; + terminate_mbb(pc); break; case TGSI_OPCODE_EX2: emit_preex2(pc, temp, src[0][0]); @@ -1740,6 +1982,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, set_pred_wr(pc, 1, 0, pc->if_cond); emit_branch(pc, 0, 2, &pc->br_join[pc->if_lvl]); pc->if_insn[pc->if_lvl++] = pc->p->exec_tail; + terminate_mbb(pc); break; case TGSI_OPCODE_KIL: emit_kil(pc, src[0][0]); @@ -1784,7 +2027,6 @@ nv50_program_tx_insn(struct nv50_pc *pc, } break; case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; @@ -1903,8 +2145,10 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - /* in this case we saturate later */ - if (dst[c]->type == P_TEMP && dst[c]->index < 0) + /* In this case we saturate later, and dst[c] won't + * be another temp_temp (and thus lost), since rdst + * already is TEMP (see above). */ + if (rdst[c]->type == P_TEMP && rdst[c]->index < 0) continue; emit_sat(pc, rdst[c], dst[c]); } @@ -1914,8 +2158,12 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (c = 0; c < 4; c++) { if (!src[i][c]) continue; + src[i][c]->neg = 0; if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD) FREE(src[i][c]); + else + if (src[i][c]->acc < 0 && src[i][c]->type == P_CONST) + FREE(src[i][c]); /* indirect constant */ } } @@ -1964,10 +2212,7 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - k = tgsi_util_get_full_src_register_extswizzle(src, c); - - if (k > TGSI_EXTSWIZZLE_W) - continue; + k = tgsi_util_get_full_src_register_swizzle(src, c); reg[src->SrcRegister.Index * 4 + k].acc = pc->insn_nr; } @@ -2070,11 +2315,10 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, if (!(mask & (1 << chn))) /* src is not read */ continue; - c = tgsi_util_get_full_src_register_extswizzle(fs, chn); + c = tgsi_util_get_full_src_register_swizzle(fs, chn); s = tgsi_util_get_full_src_register_sign_mode(fs, chn); - if (c > TGSI_EXTSWIZZLE_W || - !(fd->DstRegister.WriteMask & (1 << c))) + if (!(fd->DstRegister.WriteMask & (1 << c))) continue; /* no danger if src is copied to TEMP first */ @@ -2259,8 +2503,8 @@ nv50_program_tx_prep(struct nv50_pc *pc) pc->interp_mode[i] = mode; } break; + case TGSI_FILE_ADDRESS: case TGSI_FILE_CONSTANT: - break; case TGSI_FILE_SAMPLER: break; default: @@ -2454,6 +2698,8 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) pc->attr_nr = p->info.file_max[TGSI_FILE_INPUT] + 1; pc->result_nr = p->info.file_max[TGSI_FILE_OUTPUT] + 1; pc->param_nr = p->info.file_max[TGSI_FILE_CONSTANT] + 1; + pc->addr_nr = p->info.file_max[TGSI_FILE_ADDRESS] + 1; + assert(pc->addr_nr <= 2); p->cfg.high_temp = 4; @@ -2522,6 +2768,14 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) ctor_reg(&pc->param[rid], P_CONST, i, rid); } + if (pc->addr_nr) { + pc->addr = CALLOC(pc->addr_nr * 4, sizeof(struct nv50_reg *)); + if (!pc->addr) + return FALSE; + } + for (i = 0; i < NV50_SU_MAX_ADDR; ++i) + ctor_reg(&pc->r_addr[i], P_ADDR, -256, i + 1); + return TRUE; } @@ -2701,7 +2955,7 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) p->immd_nr, NV50_CB_PMISC); } - assert(p->param_nr <= 128); + assert(p->param_nr <= 512); if (p->param_nr) { unsigned cb; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 3b08e1b89f..c8d0f1e4d8 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -35,8 +35,14 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, { if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { switch (format) { + case PIPE_FORMAT_X8R8G8B8_UNORM: case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R16G16_SNORM: + case PIPE_FORMAT_R16G16_UNORM: return TRUE; default: break; @@ -55,6 +61,9 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, } else { switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_A8R8G8B8_SRGB: + case PIPE_FORMAT_X8R8G8B8_SRGB: case PIPE_FORMAT_A1R5G5B5_UNORM: case PIPE_FORMAT_A4R4G4B4_UNORM: case PIPE_FORMAT_R5G6B5_UNORM: @@ -66,6 +75,13 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, case PIPE_FORMAT_DXT1_RGBA: case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT5_RGBA: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_R16G16B16A16_SNORM: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R16G16_SNORM: + case PIPE_FORMAT_R16G16_UNORM: return TRUE; default: break; @@ -216,7 +232,16 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) tesla_class = NV54TCL; break; case 0xa0: - tesla_class = NVA0TCL; + switch (chipset) { + case 0xa0: + case 0xaa: + case 0xac: + tesla_class = NVA0TCL; + break; + default: + tesla_class = 0x8597; + break; + } break; default: NOUVEAU_ERR("Not a known NV50 chipset: NV%02x\n", chipset); @@ -224,12 +249,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) return NULL; } - if (tesla_class == 0) { - NOUVEAU_ERR("Unknown G8x chipset: NV%02x\n", chipset); - nv50_screen_destroy(pscreen); - return NULL; - } - ret = nouveau_grobj_alloc(chan, 0xbeef5097, tesla_class, &screen->tesla); if (ret) { @@ -290,6 +309,10 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_method(so, screen->tesla, 0x121c, 1); so_data (so, 1); + /* try to activate all/more lanes (threads) in a warp */ + so_method(so, screen->tesla, 0x1400, 1); + so_data (so, 0xf); + so_method(so, screen->tesla, 0x13bc, 1); so_data (so, 0x54); /* origin is top left (set to 1 for bottom left) */ @@ -299,7 +322,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_data (so, 8); /* constant buffers for immediates and VP/FP parameters */ - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 128*4*4, + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (32 * 4) * 4, &screen->constbuf_misc[0]); if (ret) { nv50_screen_destroy(pscreen); @@ -307,7 +330,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) } for (i = 0; i < 2; i++) { - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 128*4*4, + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (128 * 4) * 4, &screen->constbuf_parm[i]); if (ret) { nv50_screen_destroy(pscreen); @@ -316,8 +339,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) } if (nouveau_resource_init(&screen->immd_heap[0], 0, 128) || - nouveau_resource_init(&screen->parm_heap[0], 0, 128) || - nouveau_resource_init(&screen->parm_heap[1], 0, 128)) + nouveau_resource_init(&screen->parm_heap[0], 0, 512) || + nouveau_resource_init(&screen->parm_heap[1], 0, 512)) { NOUVEAU_ERR("Error initialising constant buffers.\n"); nv50_screen_destroy(pscreen); @@ -338,7 +361,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->constbuf_misc[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, (NV50_CB_PMISC << 16) | 0x00000800); + so_data (so, (NV50_CB_PMISC << 16) | 0x00000200); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000001 | (NV50_CB_PMISC << 12)); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); @@ -362,48 +385,31 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000131 | (NV50_CB_PFP << 12)); - /* Texture sampler/image unit setup - we abuse the constant buffer - * upload mechanism for the moment to upload data to the tex config - * blocks. At some point we *may* want to go the NVIDIA way of doing - * things? - */ - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 32*8*4, &screen->tic); + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 64*8*4, &screen->tic); if (ret) { nv50_screen_destroy(pscreen); return NULL; } - so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); - so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, (NV50_CB_TIC << 16) | 0x0800); so_method(so, screen->tesla, NV50TCL_TIC_ADDRESS_HIGH, 3); so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0x00000800); + so_data (so, 0x000007ff); - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 32*8*4, &screen->tsc); + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 64*8*4, &screen->tsc); if (ret) { nv50_screen_destroy(pscreen); return NULL; } - so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); - so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, (NV50_CB_TSC << 16) | 0x0800); so_method(so, screen->tesla, NV50TCL_TSC_ADDRESS_HIGH, 3); so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); - so_data (so, 0x00000800); + so_data (so, 0x00000000); /* Vertex array limits - max them out */ diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 81fa3e34c5..ffaa5e29d1 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -146,6 +146,7 @@ nv50_sampler_state_create(struct pipe_context *pipe, (wrap_mode(cso->wrap_r) << 6)); switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_ANISO: case PIPE_TEX_FILTER_LINEAR: tsc[1] |= NV50TSC_1_1_MAGF_LINEAR; break; @@ -156,6 +157,7 @@ nv50_sampler_state_create(struct pipe_context *pipe, } switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_ANISO: case PIPE_TEX_FILTER_LINEAR: tsc[1] |= NV50TSC_1_1_MINF_LINEAR; break; @@ -183,21 +185,15 @@ nv50_sampler_state_create(struct pipe_context *pipe, else if (cso->max_anisotropy >= 12.0) tsc[0] |= (6 << 20); - else - if (cso->max_anisotropy >= 10.0) - tsc[0] |= (5 << 20); - else - if (cso->max_anisotropy >= 8.0) - tsc[0] |= (4 << 20); - else - if (cso->max_anisotropy >= 6.0) - tsc[0] |= (3 << 20); - else - if (cso->max_anisotropy >= 4.0) - tsc[0] |= (2 << 20); - else - if (cso->max_anisotropy >= 2.0) - tsc[0] |= (1 << 20); + else { + tsc[0] |= (int)(cso->max_anisotropy * 0.5f) << 20; + + if (cso->max_anisotropy >= 4.0) + tsc[1] |= NV50TSC_1_1_UNKN_ANISO_35; + else + if (cso->max_anisotropy >= 2.0) + tsc[1] |= NV50TSC_1_1_UNKN_ANISO_15; + } if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { tsc[0] |= (1 << 8); diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 4ed76973c4..956a700615 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -23,6 +23,12 @@ #include "nv50_context.h" #include "nouveau/nouveau_stateobj.h" +#define NV50_CBUF_FORMAT_CASE(n) \ + case PIPE_FORMAT_##n: so_data(so, NV50TCL_RT_FORMAT_##n); break + +#define NV50_ZETA_FORMAT_CASE(n) \ + case PIPE_FORMAT_##n: so_data(so, NV50TCL_ZETA_FORMAT_##n); break + static void nv50_state_validate_fb(struct nv50_context *nv50) { @@ -54,12 +60,14 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_reloc (so, bo, fb->cbufs[i]->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0); switch (fb->cbufs[i]->format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - so_data(so, NV50TCL_RT_FORMAT_A8R8G8B8_UNORM); - break; - case PIPE_FORMAT_R5G6B5_UNORM: - so_data(so, NV50TCL_RT_FORMAT_R5G6B5_UNORM); - break; + NV50_CBUF_FORMAT_CASE(A8R8G8B8_UNORM); + NV50_CBUF_FORMAT_CASE(X8R8G8B8_UNORM); + NV50_CBUF_FORMAT_CASE(R5G6B5_UNORM); + NV50_CBUF_FORMAT_CASE(R16G16B16A16_SNORM); + NV50_CBUF_FORMAT_CASE(R16G16B16A16_UNORM); + NV50_CBUF_FORMAT_CASE(R32G32B32A32_FLOAT); + NV50_CBUF_FORMAT_CASE(R16G16_SNORM); + NV50_CBUF_FORMAT_CASE(R16G16_UNORM); default: NOUVEAU_ERR("AIIII unknown format %s\n", pf_name(fb->cbufs[i]->format)); @@ -93,18 +101,10 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_reloc (so, bo, fb->zsbuf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0); switch (fb->zsbuf->format) { - case PIPE_FORMAT_Z32_FLOAT: - so_data(so, NV50TCL_ZETA_FORMAT_Z32_FLOAT); - break; - case PIPE_FORMAT_Z24S8_UNORM: - so_data(so, NV50TCL_ZETA_FORMAT_Z24S8_UNORM); - break; - case PIPE_FORMAT_X8Z24_UNORM: - so_data(so, NV50TCL_ZETA_FORMAT_X8Z24_UNORM); - break; - case PIPE_FORMAT_S8Z24_UNORM: - so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM); - break; + NV50_ZETA_FORMAT_CASE(S8Z24_UNORM); + NV50_ZETA_FORMAT_CASE(X8Z24_UNORM); + NV50_ZETA_FORMAT_CASE(Z24S8_UNORM); + NV50_ZETA_FORMAT_CASE(Z32_FLOAT); default: NOUVEAU_ERR("AIIII unknown format %s\n", pf_name(fb->zsbuf->format)); @@ -219,6 +219,9 @@ nv50_state_flush_notify(struct nouveau_channel *chan) { struct nv50_context *nv50 = chan->user_private; + if (nv50->state.tic_upload && !(nv50->dirty & NV50_NEW_TEXTURE)) + so_emit(chan, nv50->state.tic_upload); + so_emit_reloc_markers(chan, nv50->state.fb); so_emit_reloc_markers(chan, nv50->state.vertprog); so_emit_reloc_markers(chan, nv50->state.fragprog); @@ -230,6 +233,7 @@ boolean nv50_state_validate(struct nv50_context *nv50) { struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_grobj *eng2d = nv50->screen->eng2d; struct nouveau_stateobj *so; unsigned i; @@ -351,15 +355,25 @@ scissor_uptodate: viewport_uptodate: if (nv50->dirty & NV50_NEW_SAMPLER) { - int i; - - so = so_new(nv50->sampler_nr * 8 + 3, 0); - so_method(so, tesla, NV50TCL_CB_ADDR, 1); - so_data (so, NV50_CB_TSC); - so_method(so, tesla, NV50TCL_CB_DATA(0) | 0x40000000, - nv50->sampler_nr * 8); - for (i = 0; i < nv50->sampler_nr; i++) + unsigned i; + + so = so_new(nv50->sampler_nr * 9 + 23 + 4, 2); + + nv50_so_init_sifc(nv50, so, nv50->screen->tsc, NOUVEAU_BO_VRAM, + nv50->sampler_nr * 8 * 4); + + for (i = 0; i < nv50->sampler_nr; i++) { + if (!nv50->sampler[i]) + continue; + so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), 8); so_datap (so, nv50->sampler[i]->tsc, 8); + } + + so_method(so, tesla, 0x1440, 1); /* sync SIFC */ + so_data (so, 0); + so_method(so, tesla, 0x1334, 1); /* flush TSC */ + so_data (so, 0); + so_ref(so, &nv50->state.tsc_upload); so_ref(NULL, &so); } @@ -377,3 +391,33 @@ viewport_uptodate: return TRUE; } +void nv50_so_init_sifc(struct nv50_context *nv50, + struct nouveau_stateobj *so, + struct nouveau_bo *bo, unsigned reloc, unsigned size) +{ + struct nouveau_grobj *eng2d = nv50->screen->eng2d; + + so_method(so, eng2d, NV50_2D_DST_FORMAT, 2); + so_data (so, NV50_2D_DST_FORMAT_R8_UNORM); + so_data (so, 1); + so_method(so, eng2d, NV50_2D_DST_PITCH, 5); + so_data (so, 262144); + so_data (so, 65536); + so_data (so, 1); + so_reloc (so, bo, 0, reloc | NOUVEAU_BO_WR | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, bo, 0, reloc | NOUVEAU_BO_WR | NOUVEAU_BO_LOW, 0, 0); + so_method(so, eng2d, NV50_2D_SIFC_UNK0800, 2); + so_data (so, 0); + so_data (so, NV50_2D_SIFC_FORMAT_R8_UNORM); + so_method(so, eng2d, NV50_2D_SIFC_WIDTH, 10); + so_data (so, size); + so_data (so, 1); + so_data (so, 0); + so_data (so, 1); + so_data (so, 0); + so_data (so, 1); + so_data (so, 0); + so_data (so, 0); + so_data (so, 0); + so_data (so, 0); +} diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index 033cb50c11..52ccdaa407 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -25,105 +25,86 @@ #include "nouveau/nouveau_stateobj.h" +#define _MIXED(pf, t0, t1, t2, t3, cr, cg, cb, ca, f) \ +{ \ + PIPE_FORMAT_##pf, \ + NV50TIC_0_0_MAPR_##cr | NV50TIC_0_0_TYPER_##t0 | \ + NV50TIC_0_0_MAPG_##cg | NV50TIC_0_0_TYPEG_##t1 | \ + NV50TIC_0_0_MAPB_##cb | NV50TIC_0_0_TYPEB_##t2 | \ + NV50TIC_0_0_MAPA_##ca | NV50TIC_0_0_TYPEA_##t3 | \ + NV50TIC_0_0_FMT_##f \ +} + +#define _(pf, t, cr, cg, cb, ca, f) _MIXED(pf, t, t, t, t, cr, cg, cb, ca, f) + +struct nv50_texture_format { + enum pipe_format pf; + uint32_t hw; +}; + +#define NV50_TEX_FORMAT_LIST_SIZE \ + (sizeof(nv50_tex_format_list) / sizeof(struct nv50_texture_format)) + +static const struct nv50_texture_format nv50_tex_format_list[] = +{ + _(A8R8G8B8_UNORM, UNORM, C2, C1, C0, C3, 8_8_8_8), + _(A8R8G8B8_SRGB, UNORM, C2, C1, C0, C3, 8_8_8_8), + _(X8R8G8B8_UNORM, UNORM, C2, C1, C0, ONE, 8_8_8_8), + _(X8R8G8B8_SRGB, UNORM, C2, C1, C0, ONE, 8_8_8_8), + _(A1R5G5B5_UNORM, UNORM, C2, C1, C0, C3, 1_5_5_5), + _(A4R4G4B4_UNORM, UNORM, C2, C1, C0, C3, 4_4_4_4), + + _(R5G6B5_UNORM, UNORM, C2, C1, C0, ONE, 5_6_5), + + _(L8_UNORM, UNORM, C0, C0, C0, ONE, 8), + _(A8_UNORM, UNORM, ZERO, ZERO, ZERO, C0, 8), + _(I8_UNORM, UNORM, C0, C0, C0, C0, 8), + + _(A8L8_UNORM, UNORM, C0, C0, C0, C1, 8_8), + + _(DXT1_RGB, UNORM, C0, C1, C2, ONE, DXT1), + _(DXT1_RGBA, UNORM, C0, C1, C2, C3, DXT1), + _(DXT3_RGBA, UNORM, C0, C1, C2, C3, DXT3), + _(DXT5_RGBA, UNORM, C0, C1, C2, C3, DXT5), + + _MIXED(Z24S8_UNORM, UINT, UNORM, UINT, UINT, C1, C1, C1, ONE, 24_8), + + _(R16G16B16A16_SNORM, UNORM, C0, C1, C2, C3, 16_16_16_16), + _(R16G16B16A16_UNORM, SNORM, C0, C1, C2, C3, 16_16_16_16), + _(R32G32B32A32_FLOAT, FLOAT, C0, C1, C2, C3, 32_32_32_32), + + _(R16G16_SNORM, SNORM, C0, C1, ZERO, ONE, 16_16), + _(R16G16_UNORM, UNORM, C0, C1, ZERO, ONE, 16_16), + + _MIXED(Z32_FLOAT, FLOAT, UINT, UINT, UINT, C0, C0, C0, ONE, 32_DEPTH) + +}; + +#undef _ +#undef _MIXED + static int nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so, struct nv50_miptree *mt, int unit) { - switch (mt->base.base.format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_8_8_8_8); - break; - case PIPE_FORMAT_A1R5G5B5_UNORM: - so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_1_5_5_5); - break; - case PIPE_FORMAT_A4R4G4B4_UNORM: - so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_4_4_4_4); - break; - case PIPE_FORMAT_R5G6B5_UNORM: - so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_5_6_5); - break; - case PIPE_FORMAT_L8_UNORM: - so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_8); - break; - case PIPE_FORMAT_A8_UNORM: - so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_ZERO | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_ZERO | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_ZERO | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_8); - break; - case PIPE_FORMAT_I8_UNORM: - so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_8); - break; - case PIPE_FORMAT_A8L8_UNORM: - so_data(so, NV50TIC_0_0_MAPA_C1 | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_8_8); - break; - case PIPE_FORMAT_DXT1_RGB: - so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_DXT1); - break; - case PIPE_FORMAT_DXT1_RGBA: - so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_DXT1); - break; - case PIPE_FORMAT_DXT3_RGBA: - so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_DXT3); - break; - case PIPE_FORMAT_DXT5_RGBA: - so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | - NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | - NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | - NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | - NV50TIC_0_0_FMT_DXT5); - break; - default: - return 1; - } + unsigned i; + uint32_t mode; + for (i = 0; i < NV50_TEX_FORMAT_LIST_SIZE; i++) + if (nv50_tex_format_list[i].pf == mt->base.base.format) + break; + if (i == NV50_TEX_FORMAT_LIST_SIZE) + return 1; + + mode = (nv50->sampler[unit]->normalized ? 0xd0005000 : 0x5001d000) | + (mt->base.bo->tile_mode << 22); + if (pf_type(mt->base.base.format) == PIPE_FORMAT_TYPE_SRGB) + mode |= 0x0400; + + so_data (so, nv50_tex_format_list[i].hw); so_reloc(so, mt->base.bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, 0); - if (nv50->sampler[unit]->normalized) - so_data (so, 0xd0005000 | mt->base.bo->tile_mode << 22); - else - so_data (so, 0x5001d000 | mt->base.bo->tile_mode << 22); + so_data (so, mode); so_data (so, 0x00300000); so_data (so, mt->base.base.width[0]); so_data (so, (mt->base.base.last_level << 28) | @@ -137,20 +118,24 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so, void nv50_tex_validate(struct nv50_context *nv50) { + struct nouveau_grobj *eng2d = nv50->screen->eng2d; struct nouveau_grobj *tesla = nv50->screen->tesla; struct nouveau_stateobj *so; - int unit, push; + unsigned i, unit, push; + + push = MAX2(nv50->miptree_nr, nv50->state.miptree_nr) * 2 + 23 + 6; + so = so_new(nv50->miptree_nr * 9 + push, nv50->miptree_nr + 2); - push = nv50->miptree_nr * 9 + 2; - push += MAX2(nv50->miptree_nr, nv50->state.miptree_nr) * 2; + nv50_so_init_sifc(nv50, so, nv50->screen->tic, NOUVEAU_BO_VRAM, + nv50->miptree_nr * 8 * 4); - so = so_new(push, nv50->miptree_nr * 2); - so_method(so, tesla, NV50TCL_CB_ADDR, 1); - so_data (so, NV50_CB_TIC); - for (unit = 0; unit < nv50->miptree_nr; unit++) { + for (i = 0, unit = 0; unit < nv50->miptree_nr; ++unit) { struct nv50_miptree *mt = nv50->miptree[unit]; - so_method(so, tesla, NV50TCL_CB_DATA(0) | 0x40000000, 8); + if (!mt) + continue; + + so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), 8); if (nv50_tex_construct(nv50, so, mt, unit)) { NOUVEAU_ERR("failed tex validate\n"); so_ref(NULL, &so); @@ -158,17 +143,25 @@ nv50_tex_validate(struct nv50_context *nv50) } so_method(so, tesla, NV50TCL_SET_SAMPLER_TEX, 1); - so_data (so, (unit << NV50TCL_SET_SAMPLER_TEX_TIC_SHIFT) | - (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) | - NV50TCL_SET_SAMPLER_TEX_VALID); + so_data (so, (i++ << NV50TCL_SET_SAMPLER_TEX_TIC_SHIFT) | + (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) | + NV50TCL_SET_SAMPLER_TEX_VALID); } for (; unit < nv50->state.miptree_nr; unit++) { so_method(so, tesla, NV50TCL_SET_SAMPLER_TEX, 1); so_data (so, - (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) | 0); + (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) | 0); } + /* not sure if the following really do what I think: */ + so_method(so, tesla, 0x1440, 1); /* sync SIFC */ + so_data (so, 0); + so_method(so, tesla, 0x1330, 1); /* flush TIC */ + so_data (so, 0); + so_method(so, tesla, 0x1338, 1); /* flush texture caches */ + so_data (so, 0x20); + so_ref(so, &nv50->state.tic_upload); so_ref(NULL, &so); nv50->state.miptree_nr = nv50->miptree_nr; diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h index 207fb039f7..d531e61132 100644 --- a/src/gallium/drivers/nv50/nv50_texture.h +++ b/src/gallium/drivers/nv50/nv50_texture.h @@ -38,18 +38,26 @@ #define NV50TIC_0_0_TYPEA_MASK 0x00038000 #define NV50TIC_0_0_TYPEA_UNORM 0x00010000 #define NV50TIC_0_0_TYPEA_SNORM 0x00008000 +#define NV50TIC_0_0_TYPEA_SINT 0x00018000 +#define NV50TIC_0_0_TYPEA_UINT 0x00020000 #define NV50TIC_0_0_TYPEA_FLOAT 0x00038000 #define NV50TIC_0_0_TYPEB_MASK 0x00007000 #define NV50TIC_0_0_TYPEB_UNORM 0x00002000 #define NV50TIC_0_0_TYPEB_SNORM 0x00001000 +#define NV50TIC_0_0_TYPEB_SINT 0x00003000 +#define NV50TIC_0_0_TYPEB_UINT 0x00004000 #define NV50TIC_0_0_TYPEB_FLOAT 0x00007000 #define NV50TIC_0_0_TYPEG_MASK 0x00000e00 #define NV50TIC_0_0_TYPEG_UNORM 0x00000400 #define NV50TIC_0_0_TYPEG_SNORM 0x00000200 +#define NV50TIC_0_0_TYPEG_SINT 0x00000600 +#define NV50TIC_0_0_TYPEG_UINT 0x00000800 #define NV50TIC_0_0_TYPEG_FLOAT 0x00000e00 #define NV50TIC_0_0_TYPER_MASK 0x000001c0 #define NV50TIC_0_0_TYPER_UNORM 0x00000080 #define NV50TIC_0_0_TYPER_SNORM 0x00000040 +#define NV50TIC_0_0_TYPER_SINT 0x000000c0 +#define NV50TIC_0_0_TYPER_UINT 0x00000100 #define NV50TIC_0_0_TYPER_FLOAT 0x000001c0 #define NV50TIC_0_0_FMT_MASK 0x0000003f #define NV50TIC_0_0_FMT_32_32_32_32 0x00000001 @@ -57,6 +65,7 @@ #define NV50TIC_0_0_FMT_32_32 0x00000004 #define NV50TIC_0_0_FMT_8_8_8_8 0x00000008 #define NV50TIC_0_0_FMT_2_10_10_10 0x00000009 +#define NV50TIC_0_0_FMT_16_16 0x0000000c #define NV50TIC_0_0_FMT_32 0x0000000f #define NV50TIC_0_0_FMT_4_4_4_4 0x00000012 /* #define NV50TIC_0_0_FMT_1_5_5_5 0x00000013 */ @@ -65,12 +74,16 @@ #define NV50TIC_0_0_FMT_8_8 0x00000018 #define NV50TIC_0_0_FMT_16 0x0000001b #define NV50TIC_0_0_FMT_8 0x0000001d +#define NV50TIC_0_0_FMT_5_9_9_9 0x00000020 #define NV50TIC_0_0_FMT_10_11_11 0x00000021 #define NV50TIC_0_0_FMT_DXT1 0x00000024 #define NV50TIC_0_0_FMT_DXT3 0x00000025 #define NV50TIC_0_0_FMT_DXT5 0x00000026 #define NV50TIC_0_0_FMT_RGTC1 0x00000027 #define NV50TIC_0_0_FMT_RGTC2 0x00000028 +#define NV50TIC_0_0_FMT_24_8 0x00000029 +#define NV50TIC_0_0_FMT_32_DEPTH 0x0000002f +#define NV50TIC_0_0_FMT_32_8 0x00000030 #define NV50TIC_0_1_OFFSET_LOW_MASK 0xffffffff #define NV50TIC_0_1_OFFSET_LOW_SHIFT 0 @@ -133,6 +146,8 @@ #define NV50TSC_1_1_MIPF_NEAREST 0x00000080 #define NV50TSC_1_1_MIPF_LINEAR 0x000000c0 #define NV50TSC_1_1_LOD_BIAS_MASK 0x01fff000 +#define NV50TSC_1_1_UNKN_ANISO_15 0x10000000 +#define NV50TSC_1_1_UNKN_ANISO_35 0x18000000 #define NV50TSC_1_2_MIN_LOD_MASK 0x00000f00 #define NV50TSC_1_2_MAX_LOD_MASK 0x00f00000 diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index bb7731855c..9c289026bb 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -161,7 +161,7 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } - if (usage != PIPE_TRANSFER_WRITE) { + if (usage & PIPE_TRANSFER_READ) { nv50_transfer_rect_m2mf(pscreen, mt->base.bo, tx->level_offset, tx->level_pitch, tx->level_tiling, x, y, @@ -183,7 +183,7 @@ nv50_transfer_del(struct pipe_transfer *ptx) struct nv50_transfer *tx = (struct nv50_transfer *)ptx; struct nv50_miptree *mt = nv50_miptree(ptx->texture); - if (ptx->usage != PIPE_TRANSFER_READ) { + if (ptx->usage & PIPE_TRANSFER_WRITE) { struct pipe_screen *pscreen = ptx->texture->screen; nv50_transfer_rect_m2mf(pscreen, tx->bo, 0, tx->base.stride, tx->bo->tile_mode, 0, 0, diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index eeed148c7b..db54380241 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -26,6 +26,18 @@ #include "nv50_context.h" +static boolean +nv50_push_elements_u08(struct nv50_context *, uint8_t *, unsigned); + +static boolean +nv50_push_elements_u16(struct nv50_context *, uint16_t *, unsigned); + +static boolean +nv50_push_elements_u32(struct nv50_context *, uint32_t *, unsigned); + +static boolean +nv50_push_arrays(struct nv50_context *, unsigned, unsigned); + static INLINE unsigned nv50_prim(unsigned mode) { @@ -132,6 +144,7 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_channel *chan = nv50->screen->tesla->channel; struct nouveau_grobj *tesla = nv50->screen->tesla; + boolean ret; nv50_state_validate(nv50); @@ -139,24 +152,25 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, OUT_RING (chan, 0); BEGIN_RING(chan, tesla, 0x142c, 1); OUT_RING (chan, 0); - BEGIN_RING(chan, tesla, 0x1440, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, tesla, 0x1334, 1); - OUT_RING (chan, 0); BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); OUT_RING (chan, nv50_prim(mode)); - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); - OUT_RING (chan, start); - OUT_RING (chan, count); + + if (nv50->vbo_fifo) + ret = nv50_push_arrays(nv50, start, count); + else { + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); + OUT_RING (chan, start); + OUT_RING (chan, count); + ret = TRUE; + } BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); - pipe->flush(pipe, 0, NULL); - return TRUE; + return ret; } -static INLINE void +static INLINE boolean nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, unsigned start, unsigned count) { @@ -165,6 +179,9 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, map += start; + if (nv50->vbo_fifo) + return nv50_push_elements_u08(nv50, map, count); + if (count & 1) { BEGIN_RING(chan, tesla, 0x15e8, 1); OUT_RING (chan, map[0]); @@ -183,9 +200,10 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, count -= nr; map += nr; } + return TRUE; } -static INLINE void +static INLINE boolean nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, unsigned start, unsigned count) { @@ -194,6 +212,9 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, map += start; + if (nv50->vbo_fifo) + return nv50_push_elements_u16(nv50, map, count); + if (count & 1) { BEGIN_RING(chan, tesla, 0x15e8, 1); OUT_RING (chan, map[0]); @@ -212,9 +233,10 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, count -= nr; map += nr; } + return TRUE; } -static INLINE void +static INLINE boolean nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, unsigned start, unsigned count) { @@ -223,6 +245,9 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, map += start; + if (nv50->vbo_fifo) + return nv50_push_elements_u32(nv50, map, count); + while (count) { unsigned nr = count > 2047 ? 2047 : count; @@ -232,6 +257,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, count -= nr; map += nr; } + return TRUE; } boolean @@ -244,6 +270,7 @@ nv50_draw_elements(struct pipe_context *pipe, struct nouveau_grobj *tesla = nv50->screen->tesla; struct pipe_screen *pscreen = pipe->screen; void *map; + boolean ret; map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); @@ -258,23 +285,25 @@ nv50_draw_elements(struct pipe_context *pipe, OUT_RING (chan, nv50_prim(mode)); switch (indexSize) { case 1: - nv50_draw_elements_inline_u08(nv50, map, start, count); + ret = nv50_draw_elements_inline_u08(nv50, map, start, count); break; case 2: - nv50_draw_elements_inline_u16(nv50, map, start, count); + ret = nv50_draw_elements_inline_u16(nv50, map, start, count); break; case 4: - nv50_draw_elements_inline_u32(nv50, map, start, count); + ret = nv50_draw_elements_inline_u32(nv50, map, start, count); break; default: assert(0); + ret = FALSE; + break; } BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); pipe_buffer_unmap(pscreen, indexBuffer); - pipe->flush(pipe, 0, NULL); - return TRUE; + + return ret; } static INLINE boolean @@ -341,17 +370,24 @@ nv50_vbo_validate(struct nv50_context *nv50) { struct nouveau_grobj *tesla = nv50->screen->tesla; struct nouveau_stateobj *vtxbuf, *vtxfmt, *vtxattr; - unsigned i; + unsigned i, n_ve; /* don't validate if Gallium took away our buffers */ if (nv50->vtxbuf_nr == 0) return; + nv50->vbo_fifo = 0; + + for (i = 0; i < nv50->vtxbuf_nr; ++i) + if (nv50->vtxbuf[i].stride && + !(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX)) + nv50->vbo_fifo = 0xffff; + + n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr); vtxattr = NULL; - vtxbuf = so_new(nv50->vtxelt_nr * 7, nv50->vtxelt_nr * 4); - vtxfmt = so_new(nv50->vtxelt_nr + 1, 0); - so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), - nv50->vtxelt_nr); + vtxbuf = so_new(n_ve * 7, nv50->vtxelt_nr * 4); + vtxfmt = so_new(n_ve + 1, 0); + so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve); for (i = 0; i < nv50->vtxelt_nr; i++) { struct pipe_vertex_element *ve = &nv50->vtxelt[i]; @@ -367,10 +403,19 @@ nv50_vbo_validate(struct nv50_context *nv50) so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1); so_data (vtxbuf, 0); + + nv50->vbo_fifo &= ~(1 << i); continue; } so_data(vtxfmt, hw | i); + if (nv50->vbo_fifo) { + so_method(vtxbuf, tesla, + NV50TCL_VERTEX_ARRAY_FORMAT(i), 1); + so_data (vtxbuf, 0); + continue; + } + so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3); so_data (vtxbuf, 0x20000000 | vb->stride); so_reloc (vtxbuf, bo, vb->buffer_offset + @@ -389,6 +434,13 @@ nv50_vbo_validate(struct nv50_context *nv50) NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); } + for (; i < n_ve; ++i) { + so_data (vtxfmt, 0x7e080010); + + so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1); + so_data (vtxbuf, 0); + } + nv50->state.vtxelt_nr = nv50->vtxelt_nr; so_ref (vtxfmt, &nv50->state.vtxfmt); so_ref (vtxbuf, &nv50->state.vtxbuf); @@ -398,3 +450,320 @@ nv50_vbo_validate(struct nv50_context *nv50) so_ref (NULL, &vtxattr); } +typedef void (*pfn_push)(struct nouveau_channel *, void *); + +struct nv50_vbo_emitctx +{ + pfn_push push[16]; + void *map[16]; + unsigned stride[16]; + unsigned nr_ve; + unsigned vtx_dwords; + unsigned vtx_max; +}; + +static INLINE void +emit_vtx_next(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit) +{ + unsigned i; + + for (i = 0; i < emit->nr_ve; ++i) { + emit->push[i](chan, emit->map[i]); + emit->map[i] += emit->stride[i]; + } +} + +static INLINE void +emit_vtx(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit, + uint32_t vi) +{ + unsigned i; + + for (i = 0; i < emit->nr_ve; ++i) + emit->push[i](chan, emit->map[i] + emit->stride[i] * vi); +} + +static INLINE boolean +nv50_map_vbufs(struct nv50_context *nv50) +{ + int i; + + for (i = 0; i < nv50->vtxbuf_nr; ++i) { + struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i]; + unsigned size, delta; + + if (nouveau_bo(vb->buffer)->map) + continue; + + size = vb->stride * (vb->max_index + 1); + delta = vb->buffer_offset; + + if (!size) + size = vb->buffer->size - vb->buffer_offset; + + if (nouveau_bo_map_range(nouveau_bo(vb->buffer), + delta, size, NOUVEAU_BO_RD)) + break; + } + + if (i == nv50->vtxbuf_nr) + return TRUE; + for (; i >= 0; --i) + nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer)); + return FALSE; +} + +static INLINE void +nv50_unmap_vbufs(struct nv50_context *nv50) +{ + unsigned i; + + for (i = 0; i < nv50->vtxbuf_nr; ++i) + if (nouveau_bo(nv50->vtxbuf[i].buffer)->map) + nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer)); +} + +static void +emit_b32_1(struct nouveau_channel *chan, void *data) +{ + uint32_t *v = data; + + OUT_RING(chan, v[0]); +} + +static void +emit_b32_2(struct nouveau_channel *chan, void *data) +{ + uint32_t *v = data; + + OUT_RING(chan, v[0]); + OUT_RING(chan, v[1]); +} + +static void +emit_b32_3(struct nouveau_channel *chan, void *data) +{ + uint32_t *v = data; + + OUT_RING(chan, v[0]); + OUT_RING(chan, v[1]); + OUT_RING(chan, v[2]); +} + +static void +emit_b32_4(struct nouveau_channel *chan, void *data) +{ + uint32_t *v = data; + + OUT_RING(chan, v[0]); + OUT_RING(chan, v[1]); + OUT_RING(chan, v[2]); + OUT_RING(chan, v[3]); +} + +static void +emit_b16_1(struct nouveau_channel *chan, void *data) +{ + uint16_t *v = data; + + OUT_RING(chan, v[0]); +} + +static void +emit_b16_3(struct nouveau_channel *chan, void *data) +{ + uint16_t *v = data; + + OUT_RING(chan, (v[1] << 16) | v[0]); + OUT_RING(chan, v[2]); +} + +static void +emit_b08_1(struct nouveau_channel *chan, void *data) +{ + uint8_t *v = data; + + OUT_RING(chan, v[0]); +} + +static void +emit_b08_3(struct nouveau_channel *chan, void *data) +{ + uint8_t *v = data; + + OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]); +} + +static boolean +emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit, + unsigned start) +{ + unsigned i; + + if (nv50_map_vbufs(nv50) == FALSE) + return FALSE; + + emit->nr_ve = 0; + emit->vtx_dwords = 0; + + for (i = 0; i < nv50->vtxelt_nr; ++i) { + struct pipe_vertex_element *ve; + struct pipe_vertex_buffer *vb; + unsigned n, type, size; + + ve = &nv50->vtxelt[i]; + vb = &nv50->vtxbuf[ve->vertex_buffer_index]; + if (!(nv50->vbo_fifo & (1 << i))) + continue; + n = emit->nr_ve++; + + emit->stride[n] = vb->stride; + emit->map[n] = nouveau_bo(vb->buffer)->map + + (start * vb->stride + ve->src_offset); + + type = pf_type(ve->src_format); + size = pf_size_x(ve->src_format) << pf_exp2(ve->src_format); + + assert(ve->nr_components > 0 && ve->nr_components <= 4); + + /* It shouldn't be necessary to push the implicit 1s + * for case 3 and size 8 cases 1, 2, 3. + */ + switch (size) { + default: + NOUVEAU_ERR("unsupported vtxelt size: %u\n", size); + return FALSE; + case 32: + switch (ve->nr_components) { + case 1: emit->push[n] = emit_b32_1; break; + case 2: emit->push[n] = emit_b32_2; break; + case 3: emit->push[n] = emit_b32_3; break; + case 4: emit->push[n] = emit_b32_4; break; + } + emit->vtx_dwords += ve->nr_components; + break; + case 16: + switch (ve->nr_components) { + case 1: emit->push[n] = emit_b16_1; break; + case 2: emit->push[n] = emit_b32_1; break; + case 3: emit->push[n] = emit_b16_3; break; + case 4: emit->push[n] = emit_b32_2; break; + } + emit->vtx_dwords += (ve->nr_components + 1) >> 1; + break; + case 8: + switch (ve->nr_components) { + case 1: emit->push[n] = emit_b08_1; break; + case 2: emit->push[n] = emit_b16_1; break; + case 3: emit->push[n] = emit_b08_3; break; + case 4: emit->push[n] = emit_b32_1; break; + } + emit->vtx_dwords += 1; + break; + } + } + + emit->vtx_max = 512 / emit->vtx_dwords; + + return TRUE; +} + +static boolean +nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv50->screen->base.channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_vbo_emitctx emit; + + if (emit_prepare(nv50, &emit, start) == FALSE) + return FALSE; + + while (count) { + unsigned i, dw, nr = MIN2(count, emit.vtx_max); + dw = nr * emit.vtx_dwords; + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); + for (i = 0; i < nr; ++i) + emit_vtx_next(chan, &emit); + + count -= nr; + } + nv50_unmap_vbufs(nv50); + + return TRUE; +} + +static boolean +nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count) +{ + struct nouveau_channel *chan = nv50->screen->base.channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_vbo_emitctx emit; + + if (emit_prepare(nv50, &emit, 0) == FALSE) + return FALSE; + + while (count) { + unsigned i, dw, nr = MIN2(count, emit.vtx_max); + dw = nr * emit.vtx_dwords; + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); + for (i = 0; i < nr; ++i) + emit_vtx(chan, &emit, *map++); + + count -= nr; + } + nv50_unmap_vbufs(nv50); + + return TRUE; +} + +static boolean +nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count) +{ + struct nouveau_channel *chan = nv50->screen->base.channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_vbo_emitctx emit; + + if (emit_prepare(nv50, &emit, 0) == FALSE) + return FALSE; + + while (count) { + unsigned i, dw, nr = MIN2(count, emit.vtx_max); + dw = nr * emit.vtx_dwords; + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); + for (i = 0; i < nr; ++i) + emit_vtx(chan, &emit, *map++); + + count -= nr; + } + nv50_unmap_vbufs(nv50); + + return TRUE; +} + +static boolean +nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count) +{ + struct nouveau_channel *chan = nv50->screen->base.channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_vbo_emitctx emit; + + if (emit_prepare(nv50, &emit, 0) == FALSE) + return FALSE; + + while (count) { + unsigned i, dw, nr = MIN2(count, emit.vtx_max); + dw = nr * emit.vtx_dwords; + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); + for (i = 0; i < nr; ++i) + emit_vtx(chan, &emit, *map++); + + count -= nr; + } + nv50_unmap_vbufs(nv50); + + return TRUE; +} diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index 93c2152edc..f73d80de88 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -4,8 +4,6 @@ include $(TOP)/configs/current LIBNAME = r300 C_SOURCES = \ - r3xx_fs.c \ - r5xx_fs.c \ r300_chipset.c \ r300_clear.c \ r300_context.c \ @@ -20,14 +18,11 @@ C_SOURCES = \ r300_state_derived.c \ r300_state_invariant.c \ r300_vs.c \ - r300_surface.c \ r300_texture.c \ r300_tgsi_to_rc.c LIBRARY_INCLUDES = \ - -I$(TOP)/src/mesa/drivers/dri/r300/compiler \ - -I$(TOP)/src/mesa \ - -I$(TOP)/include + -I$(TOP)/src/mesa/drivers/dri/r300/compiler COMPILER_ARCHIVE = $(TOP)/src/mesa/drivers/dri/r300/compiler/libr300compiler.a diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index 493d7b28bc..97989040d2 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -1,12 +1,14 @@ Import('*') +r300compiler = SConscript('#/src/mesa/drivers/dri/r300/compiler/SConscript') + env = env.Clone() +# add the paths for r300compiler +env.Append(CPPPATH = ['#/src/mesa/drivers/dri/r300/compiler', '#/include', '#/src/mesa']) r300 = env.ConvenienceLibrary( target = 'r300', source = [ - 'r3xx_fs.c', - 'r5xx_fs.c', 'r300_chipset.c', 'r300_clear.c', 'r300_context.c', @@ -21,9 +23,9 @@ r300 = env.ConvenienceLibrary( 'r300_state_derived.c', 'r300_state_invariant.c', 'r300_vs.c', - 'r300_surface.c', 'r300_texture.c', - ]) + 'r300_tgsi_to_rc.c', + ] + r300compiler) + r300compiler Export('r300') diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index d138866d33..51fdb82ff3 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -21,6 +21,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_chipset.h" + #include "util/u_debug.h" /* r300_chipset: A file all to itself for deducing the various properties of @@ -31,7 +32,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) { /* Reasonable defaults */ caps->num_vert_fpus = 4; - caps->has_tcl = getenv("RADEON_NO_TCL") ? FALSE : TRUE; + caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; caps->is_r500 = FALSE; caps->high_second_pipe = FALSE; diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 322d4a57e4..0633a8b8a7 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -33,9 +33,11 @@ struct r300_capabilities { /* Chipset family */ int family; /* The number of vertex floating-point units */ - int num_vert_fpus; + unsigned num_vert_fpus; /* The number of fragment pipes */ - int num_frag_pipes; + unsigned num_frag_pipes; + /* The number of z pipes */ + unsigned num_z_pipes; /* Whether or not TCL is physically present */ boolean has_tcl; /* Whether or not this is an RV515 or newer; R500s have many differences diff --git a/src/gallium/drivers/r300/r300_clear.c b/src/gallium/drivers/r300/r300_clear.c index 8b9cb819ae..02d6d504fc 100644 --- a/src/gallium/drivers/r300/r300_clear.c +++ b/src/gallium/drivers/r300/r300_clear.c @@ -21,6 +21,9 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_clear.h" +#include "r300_context.h" + +#include "util/u_clear.h" /* Clears currently bound buffers. */ void r300_clear(struct pipe_context* pipe, diff --git a/src/gallium/drivers/r300/r300_clear.h b/src/gallium/drivers/r300/r300_clear.h index cd5900565e..b8fcdf273c 100644 --- a/src/gallium/drivers/r300/r300_clear.h +++ b/src/gallium/drivers/r300/r300_clear.h @@ -23,9 +23,7 @@ #ifndef R300_CLEAR_H #define R300_CLEAR_H -#include "util/u_clear.h" - -#include "r300_context.h" +struct pipe_context; void r300_clear(struct pipe_context* pipe, unsigned buffers, diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 9cc455135d..e45564b54e 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -20,90 +20,52 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "r300_context.h" +#include "draw/draw_context.h" -#include "r300_flush.h" -#include "r300_state_invariant.h" +#include "pipe/p_inlines.h" -static boolean r300_draw_range_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) -{ - struct r300_context* r300 = r300_context(pipe); - int i; - - for (i = 0; i < r300->vertex_buffer_count; i++) { - void* buf = pipe_buffer_map(pipe->screen, - r300->vertex_buffers[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_vertex_buffer(r300->draw, i, buf); - } - - if (indexBuffer) { - void* indices = pipe_buffer_map(pipe->screen, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_element_buffer_range(r300->draw, indexSize, - minIndex, maxIndex, indices); - } else { - draw_set_mapped_element_buffer(r300->draw, 0, NULL); - } +#include "tgsi/tgsi_scan.h" - draw_set_mapped_constant_buffer(r300->draw, - r300->shader_constants[PIPE_SHADER_VERTEX].constants, - r300->shader_constants[PIPE_SHADER_VERTEX].count * - (sizeof(float) * 4)); +#include "util/u_hash_table.h" +#include "util/u_memory.h" +#include "util/u_simple_list.h" - draw_arrays(r300->draw, mode, start, count); - - for (i = 0; i < r300->vertex_buffer_count; i++) { - pipe_buffer_unmap(pipe->screen, r300->vertex_buffers[i].buffer); - draw_set_mapped_vertex_buffer(r300->draw, i, NULL); - } - - if (indexBuffer) { - pipe_buffer_unmap(pipe->screen, indexBuffer); - draw_set_mapped_element_buffer_range(r300->draw, 0, start, - start + count - 1, NULL); - } - - return TRUE; -} +#include "r300_clear.h" +#include "r300_context.h" +#include "r300_flush.h" +#include "r300_query.h" +#include "r300_render.h" +#include "r300_screen.h" +#include "r300_state_derived.h" +#include "r300_state_invariant.h" +#include "r300_winsys.h" -static boolean r300_draw_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, unsigned mode, - unsigned start, unsigned count) +static enum pipe_error r300_clear_hash_table(void* key, void* value, + void* data) { - return r300_draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0, - mode, start, count); + FREE(key); + FREE(value); + return PIPE_OK; } -static boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, - unsigned start, unsigned count) +static void r300_destroy_context(struct pipe_context* context) { - return r300_draw_elements(pipe, NULL, 0, mode, start, count); -} - -static void r300_destroy_context(struct pipe_context* context) { struct r300_context* r300 = r300_context(context); struct r300_query* query, * temp; + util_hash_table_foreach(r300->shader_hash_table, r300_clear_hash_table, + NULL); + util_hash_table_destroy(r300->shader_hash_table); + draw_destroy(r300->draw); /* Free the OQ BO. */ context->screen->buffer_destroy(r300->oqbo); /* If there are any queries pending or not destroyed, remove them now. */ - if (r300->query_list) { - foreach_s(query, temp, r300->query_list) { - remove_from_list(query); - FREE(query); - } + foreach_s(query, temp, &r300->query_list) { + remove_from_list(query); + FREE(query); } FREE(r300->blend_color_state); @@ -114,26 +76,30 @@ static void r300_destroy_context(struct pipe_context* context) { } static unsigned int -r300_is_texture_referenced( struct pipe_context *pipe, - struct pipe_texture *texture, - unsigned face, unsigned level) +r300_is_texture_referenced(struct pipe_context *pipe, + struct pipe_texture *texture, + unsigned face, unsigned level) { - /** - * FIXME: Optimize. - */ + struct pipe_buffer* buf; + + r300_get_texture_buffer(texture, &buf, NULL); - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + return pipe->is_buffer_referenced(pipe, buf); } static unsigned int -r300_is_buffer_referenced( struct pipe_context *pipe, - struct pipe_buffer *buf) +r300_is_buffer_referenced(struct pipe_context *pipe, + struct pipe_buffer *buf) { - /** - * FIXME: Optimize. - */ + /* XXX */ + return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; +} + +static void r300_flush_cb(void *data) +{ + struct r300_context* const cs_context_copy = data; - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; + cs_context_copy->context.flush(&cs_context_copy->context, 0, NULL); } struct pipe_context* r300_create_context(struct pipe_screen* screen, @@ -147,7 +113,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->winsys = r300_winsys; r300->context.winsys = (struct pipe_winsys*)r300_winsys; - r300->context.screen = r300_screen(screen); + r300->context.screen = screen; r300_init_debug(r300); @@ -157,11 +123,14 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.draw_arrays = r300_draw_arrays; r300->context.draw_elements = r300_draw_elements; - r300->context.draw_range_elements = r300_draw_range_elements; + r300->context.draw_range_elements = r300_swtcl_draw_range_elements; r300->context.is_texture_referenced = r300_is_texture_referenced; r300->context.is_buffer_referenced = r300_is_buffer_referenced; + r300->shader_hash_table = util_hash_table_create(r300_shader_key_hash, + r300_shader_key_compare); + r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state); r300->rs_block = CALLOC_STRUCT(r300_rs_block); r300->scissor_state = CALLOC_STRUCT(r300_scissor_state); @@ -185,13 +154,15 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_init_query_functions(r300); - r300_init_surface_functions(r300); + /* r300_init_surface_functions(r300); */ r300_init_state_functions(r300); r300_emit_invariant_state(r300); + + r300->winsys->set_flush_cb(r300->winsys, r300_flush_cb, r300); r300->dirty_state = R300_NEW_KITCHEN_SINK; r300->dirty_hw++; - + make_empty_list(&r300->query_list); return &r300->context; } diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 52b1c9a6b2..4d73567bbe 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -23,21 +23,10 @@ #ifndef R300_CONTEXT_H #define R300_CONTEXT_H -#include "draw/draw_context.h" #include "draw/draw_vertex.h" #include "pipe/p_context.h" -#include "tgsi/tgsi_scan.h" - -#include "util/u_memory.h" -#include "util/u_simple_list.h" - -#include "r300_clear.h" -#include "r300_query.h" -#include "r300_screen.h" -#include "r300_winsys.h" - struct r300_fragment_shader; struct r300_vertex_shader; @@ -62,7 +51,6 @@ struct r300_dsa_state { uint32_t z_buffer_control; /* R300_ZB_CNTL: 0x4f00 */ uint32_t z_stencil_control; /* R300_ZB_ZSTENCILCNTL: 0x4f04 */ uint32_t stencil_ref_mask; /* R300_ZB_STENCILREFMASK: 0x4f08 */ - uint32_t z_buffer_top; /* R300_ZB_ZTOP: 0x4f14 */ uint32_t stencil_ref_bf; /* R500_ZB_STENCILREFMASK_BF: 0x4fd4 */ }; @@ -124,6 +112,10 @@ struct r300_viewport_state { uint32_t vte_control; /* R300_VAP_VTE_CNTL: 0x20b0 */ }; +struct r300_ztop_state { + uint32_t z_buffer_top; /* R300_ZB_ZTOP: 0x4f14 */ +}; + #define R300_NEW_BLEND 0x00000001 #define R300_NEW_BLEND_COLOR 0x00000002 #define R300_NEW_CLIP 0x00000004 @@ -141,7 +133,8 @@ struct r300_viewport_state { #define R300_NEW_VERTEX_FORMAT 0x04000000 #define R300_NEW_VERTEX_SHADER 0x08000000 #define R300_NEW_VIEWPORT 0x10000000 -#define R300_NEW_KITCHEN_SINK 0x1fffffff +#define R300_NEW_QUERY 0x20000000 +#define R300_NEW_KITCHEN_SINK 0x3fffffff /* The next several objects are not pure Radeon state; they inherit from * various Gallium classes. */ @@ -172,6 +165,10 @@ struct r300_query { unsigned int count; /* The offset of this query into the query buffer, in bytes. */ unsigned offset; + /* if we've flushed the query */ + boolean flushed; + /* if begin has been emitted */ + boolean begin_emitted; /* Linked list members. */ struct r300_query* prev; struct r300_query* next; @@ -184,6 +181,9 @@ struct r300_texture { /* Offsets into the buffer. */ unsigned offset[PIPE_MAX_TEXTURE_LEVELS]; + /* Size of one zslice or face based on the texture target */ + unsigned layer_size[PIPE_MAX_TEXTURE_LEVELS]; + /** * If non-zero, override the natural texture layout with * a custom stride (in bytes). @@ -237,7 +237,14 @@ struct r300_context { /* Occlusion query buffer. */ struct pipe_buffer* oqbo; /* Query list. */ - struct r300_query* query_list; + struct r300_query *query_current; + struct r300_query query_list; + + /* Shader hash table. Used to store vertex formatting information, which + * depends on the combination of both currently loaded shaders. */ + struct util_hash_table* shader_hash_table; + /* Vertex formatting information. */ + struct r300_vertex_format* vertex_info; /* Various CSO state objects. */ /* Blend state. */ @@ -269,12 +276,16 @@ struct r300_context { /* Vertex buffers for Gallium. */ struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; int vertex_buffer_count; - /* Vertex information. */ - struct r300_vertex_format vertex_info; + /* Vertex elements for Gallium. */ + struct pipe_vertex_element vertex_elements[PIPE_MAX_ATTRIBS]; + int vertex_element_count; /* Vertex shader. */ struct r300_vertex_shader* vs; /* Viewport state. */ struct r300_viewport_state* viewport_state; + /* ZTOP state. */ + struct r300_ztop_state ztop_state; + /* Bitmask of dirty state objects. */ uint32_t dirty_state; /* Flag indicating whether or not the HW is dirty. */ @@ -312,6 +323,8 @@ void r300_init_surface_functions(struct r300_context* r300); #define DBG_VP 0x0000004 #define DBG_CS 0x0000008 #define DBG_DRAW 0x0000010 +#define DBG_TEX 0x0000020 +#define DBG_FALL 0x0000040 /*@}*/ static INLINE boolean DBG_ON(struct r300_context * ctx, unsigned flags) diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 0a7e470363..883f0a02dc 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -68,11 +68,17 @@ } while (0) #define OUT_CS(value) do { \ + if (VERY_VERBOSE_CS || VERY_VERBOSE_REGISTERS) { \ + DBG(cs_context_copy, DBG_CS, "r300: writing %08x\n", value); \ + } \ cs_winsys->write_cs_dword(cs_winsys, (value)); \ cs_count--; \ } while (0) #define OUT_CS_32F(value) do { \ + if (VERY_VERBOSE_CS || VERY_VERBOSE_REGISTERS) { \ + DBG(cs_context_copy, DBG_CS, "r300: writing %f\n", value); \ + } \ cs_winsys->write_cs_dword(cs_winsys, fui(value)); \ cs_count--; \ } while (0) @@ -82,8 +88,9 @@ DBG(cs_context_copy, DBG_CS, "r300: writing 0x%08X to register 0x%04X\n", \ value, register); \ assert(register); \ - OUT_CS(CP_PACKET0(register, 0)); \ - OUT_CS(value); \ + cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0(register, 0)); \ + cs_winsys->write_cs_dword(cs_winsys, value); \ + cs_count -= 2; \ } while (0) /* Note: This expects count to be the number of registers, @@ -93,7 +100,8 @@ DBG(cs_context_copy, DBG_CS, "r300: writing register sequence of %d to 0x%04X\n", \ count, register); \ assert(register); \ - OUT_CS(CP_PACKET0(register, ((count) - 1))); \ + cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0((register), ((count) - 1))); \ + cs_count--; \ } while (0) #define OUT_CS_RELOC(bo, offset, rd, wd, flags) do { \ @@ -101,9 +109,9 @@ "domains (%d, %d, %d)\n", \ bo, offset, rd, wd, flags); \ assert(bo); \ - OUT_CS(offset); \ + cs_winsys->write_cs_dword(cs_winsys, offset); \ cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \ - cs_count -= 2; \ + cs_count -= 3; \ } while (0) #define END_CS do { \ @@ -131,24 +139,26 @@ DBG(cs_context_copy, DBG_CS, "r300: writing data sequence of %d to 0x%04X\n", \ count, register); \ assert(register); \ - OUT_CS(CP_PACKET0(register, ((count) - 1)) | RADEON_ONE_REG_WR); \ + cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0((register), ((count) - 1)) | RADEON_ONE_REG_WR); \ + cs_count--; \ } while (0) #define CP_PACKET3(op, count) \ (RADEON_CP_PACKET3 | (op) | ((count) << 16)) #define OUT_CS_PKT3(op, count) do { \ - OUT_CS(CP_PACKET3(op, count)); \ + cs_winsys->write_cs_dword(cs_winsys, CP_PACKET3(op, count)); \ + cs_count--; \ } while (0) #define OUT_CS_INDEX_RELOC(bo, offset, count, rd, wd, flags) do { \ DBG(cs_context_copy, DBG_CS, "r300: writing relocation for index buffer %p," \ "offset %d\n", bo, offset); \ assert(bo); \ - OUT_CS(offset); \ - OUT_CS(count); \ + cs_winsys->write_cs_dword(cs_winsys, offset); \ + cs_winsys->write_cs_dword(cs_winsys, count); \ cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \ - cs_count -= 2; \ + cs_count -= 4; \ } while (0) #endif /* R300_CS_H */ diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 15308dda1d..421253ca72 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -37,6 +37,8 @@ static struct debug_option debug_options[] = { { "vp", DBG_VP, "Vertex program handling" }, { "cs", DBG_CS, "Command submissions" }, { "draw", DBG_DRAW, "Draw and emit" }, + { "tex", DBG_TEX, "Textures" }, + { "fall", DBG_FALL, "Fallbacks" }, { "all", ~0, "Convenience option that enables all debug flags" }, @@ -48,6 +50,8 @@ void r300_init_debug(struct r300_context * ctx) { const char * options = debug_get_option("RADEON_DEBUG", 0); boolean printhint = false; + size_t length; + struct debug_option * opt; if (options) { while(*options) { @@ -56,8 +60,7 @@ void r300_init_debug(struct r300_context * ctx) continue; } - size_t length = strcspn(options, " ,"); - struct debug_option * opt; + length = strcspn(options, " ,"); for(opt = debug_options; opt->name; ++opt) { if (!strncmp(options, opt->name, length)) { @@ -81,7 +84,7 @@ void r300_init_debug(struct r300_context * ctx) if (printhint || ctx->debug & DBG_HELP) { debug_printf("You can enable debug output by setting the RADEON_DEBUG environment variable\n" "to a comma-separated list of debug options. Available options are:\n"); - for(struct debug_option * opt = debug_options; opt->name; ++opt) { + for(opt = debug_options; opt->name; ++opt) { debug_printf(" %s: %s\n", opt->name, opt->description); } } diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 77ce431cdc..2a8e4a9f41 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -22,10 +22,16 @@ /* r300_emit: Functions for emitting state. */ -#include "r300_emit.h" +#include "util/u_math.h" +#include "r300_context.h" +#include "r300_cs.h" +#include "r300_emit.h" #include "r300_fs.h" +#include "r300_screen.h" #include "r300_state_derived.h" +#include "r300_state_inlines.h" +#include "r300_texture.h" #include "r300_vs.h" void r300_emit_blend_state(struct r300_context* r300, @@ -96,19 +102,23 @@ void r300_emit_dsa_state(struct r300_context* r300, struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); - BEGIN_CS(r300screen->caps->is_r500 ? 8 : 8); + BEGIN_CS(r300screen->caps->is_r500 ? 10 : 8); OUT_CS_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); - /* XXX figure out the r300 counterpart for this */ - if (r300screen->caps->is_r500) { - /* OUT_CS_REG(R500_FG_ALPHA_VALUE, dsa->alpha_reference); */ - } + + /* not needed since we use the 8bit alpha ref */ + /*if (r300screen->caps->is_r500) { + OUT_CS_REG(R500_FG_ALPHA_VALUE, dsa->alpha_reference); + }*/ + OUT_CS_REG_SEQ(R300_ZB_CNTL, 3); OUT_CS(dsa->z_buffer_control); OUT_CS(dsa->z_stencil_control); OUT_CS(dsa->stencil_ref_mask); - OUT_CS_REG(R300_ZB_ZTOP, dsa->z_buffer_top); + OUT_CS_REG(R300_ZB_ZTOP, r300->ztop_state.z_buffer_top); + + /* XXX it seems r3xx doesn't support STENCILREFMASK_BF */ if (r300screen->caps->is_r500) { - /* OUT_CS_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf); */ + OUT_CS_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf); } END_CS; } @@ -212,7 +222,7 @@ void r300_emit_fragment_program_code(struct r300_context* r300, } if (constants->Count) { - OUT_CS_ONE_REG(R300_PFS_PARAM_0_X, constants->Count * 4); + OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, constants->Count * 4); for(i = 0; i < constants->Count; ++i) { const float * data = get_shader_constant(r300, &constants->Constants[i], externals); OUT_CS(pack_float24(data[0])); @@ -320,31 +330,37 @@ void r300_emit_fb_state(struct r300_context* r300, END_CS; } -void r300_emit_query_begin(struct r300_context* r300, - struct r300_query* query) +static void r300_emit_query_start(struct r300_context *r300) { + struct r300_capabilities *caps = r300_screen(r300->context.screen)->caps; + struct r300_query *query = r300->query_current; CS_LOCALS(r300); + if (!query) + return; + /* XXX This will almost certainly not return good results * for overlapping queries. */ - BEGIN_CS(2); + BEGIN_CS(4); + if (caps->family == CHIP_FAMILY_RV530) { + OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); + } else { + OUT_CS_REG(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL); + } OUT_CS_REG(R300_ZB_ZPASS_DATA, 0); END_CS; + query->begin_emitted = TRUE; } -void r300_emit_query_end(struct r300_context* r300, - struct r300_query* query) + +static void r300_emit_query_finish(struct r300_context *r300, + struct r300_query *query) { struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; CS_LOCALS(r300); - if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, - 0, RADEON_GEM_DOMAIN_GTT)) { - debug_printf("r300: There wasn't room for the OQ buffer!?" - " Oh noes!\n"); - } - assert(caps->num_frag_pipes); + BEGIN_CS(6 * caps->num_frag_pipes + 2); /* I'm not so sure I like this switch, but it's hard to be elegant * when there's so many special cases... @@ -381,7 +397,7 @@ void r300_emit_query_end(struct r300_context* r300, OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); OUT_CS_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 0), 0, RADEON_GEM_DOMAIN_GTT, 0); - break; + break; default: debug_printf("r300: Implementation error: Chipset reports %d" " pixel pipes!\n", caps->num_frag_pipes); @@ -391,7 +407,55 @@ void r300_emit_query_end(struct r300_context* r300, /* And, finally, reset it to normal... */ OUT_CS_REG(R300_SU_REG_DEST, 0xF); END_CS; +} + +static void rv530_emit_query_single(struct r300_context *r300, + struct r300_query *query) +{ + CS_LOCALS(r300); + + BEGIN_CS(8); + OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); + OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_CS_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); + END_CS; +} + +static void rv530_emit_query_double(struct r300_context *r300, + struct r300_query *query) +{ + CS_LOCALS(r300); + + BEGIN_CS(14); + OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); + OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_CS_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); + OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_CS_RELOC(r300->oqbo, query->offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); + END_CS; +} +void r300_emit_query_end(struct r300_context* r300) +{ + struct r300_capabilities *caps = r300_screen(r300->context.screen)->caps; + struct r300_query *query = r300->query_current; + + if (!query) + return; + + if (query->begin_emitted == FALSE) + return; + + if (caps->family == CHIP_FAMILY_RV530) { + if (caps->num_z_pipes == 2) + rv530_emit_query_double(r300, query); + else + rv530_emit_query_single(r300, query); + } else + r300_emit_query_finish(r300, query); } void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs) @@ -472,10 +536,18 @@ void r300_emit_texture(struct r300_context* r300, struct r300_texture* tex, unsigned offset) { + uint32_t filter0 = sampler->filter0; CS_LOCALS(r300); + /* to emulate 1D textures through 2D ones correctly */ + if (tex->tex.height[0] == 1) { + filter0 &= ~R300_TX_WRAP_T_MASK; + filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); + } + BEGIN_CS(16); - OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), sampler->filter0); + OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), filter0 | + (offset << 28)); OUT_CS_REG(R300_TX_FILTER1_0 + (offset * 4), sampler->filter1); OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (offset * 4), sampler->border_color); @@ -494,7 +566,7 @@ void r300_emit_vertex_buffer(struct r300_context* r300) DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, " "vertex size %d\n", r300->vbo, - r300->vertex_info.vinfo.size); + r300->vertex_info->vinfo.size); /* Set the pointer to our vertex buffer. The emitted values are this: * PACKET3 [3D_LOAD_VBPNTR] * COUNT [1] @@ -505,8 +577,8 @@ void r300_emit_vertex_buffer(struct r300_context* r300) BEGIN_CS(7); OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3); OUT_CS(1); - OUT_CS(r300->vertex_info.vinfo.size | - (r300->vertex_info.vinfo.size << 8)); + OUT_CS(r300->vertex_info->vinfo.size | + (r300->vertex_info->vinfo.size << 8)); OUT_CS(r300->vbo_offset); OUT_CS_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); END_CS; @@ -518,30 +590,30 @@ void r300_emit_vertex_format_state(struct r300_context* r300) CS_LOCALS(r300); BEGIN_CS(26); - OUT_CS_REG(R300_VAP_VTX_SIZE, r300->vertex_info.vinfo.size); + OUT_CS_REG(R300_VAP_VTX_SIZE, r300->vertex_info->vinfo.size); OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); - OUT_CS(r300->vertex_info.vinfo.hwfmt[0]); - OUT_CS(r300->vertex_info.vinfo.hwfmt[1]); + OUT_CS(r300->vertex_info->vinfo.hwfmt[0]); + OUT_CS(r300->vertex_info->vinfo.hwfmt[1]); OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); - OUT_CS(r300->vertex_info.vinfo.hwfmt[2]); - OUT_CS(r300->vertex_info.vinfo.hwfmt[3]); + OUT_CS(r300->vertex_info->vinfo.hwfmt[2]); + OUT_CS(r300->vertex_info->vinfo.hwfmt[3]); /* for (i = 0; i < 4; i++) { * debug_printf("hwfmt%d: 0x%08x\n", i, - * r300->vertex_info.vinfo.hwfmt[i]); + * r300->vertex_info->vinfo.hwfmt[i]); * } */ OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, 8); for (i = 0; i < 8; i++) { - OUT_CS(r300->vertex_info.vap_prog_stream_cntl[i]); + OUT_CS(r300->vertex_info->vap_prog_stream_cntl[i]); /* debug_printf("prog_stream_cntl%d: 0x%08x\n", i, - * r300->vertex_info.vap_prog_stream_cntl[i]); */ + * r300->vertex_info->vap_prog_stream_cntl[i]); */ } OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, 8); for (i = 0; i < 8; i++) { - OUT_CS(r300->vertex_info.vap_prog_stream_cntl_ext[i]); + OUT_CS(r300->vertex_info->vap_prog_stream_cntl_ext[i]); /* debug_printf("prog_stream_cntl_ext%d: 0x%08x\n", i, - * r300->vertex_info.vap_prog_stream_cntl_ext[i]); */ + * r300->vertex_info->vap_prog_stream_cntl_ext[i]); */ } END_CS; } @@ -658,6 +730,9 @@ void r300_emit_dirty_state(struct r300_context* r300) r300_update_derived_state(r300); + /* Clean out BOs. */ + r300->winsys->reset_bos(r300->winsys); + /* XXX check size */ validate: /* Color buffers... */ @@ -683,7 +758,8 @@ validate: /* ...textures... */ for (i = 0; i < r300->texture_count; i++) { tex = r300->textures[i]; - assert(tex && tex->buffer && "texture is marked, but NULL!"); + if (!tex) + continue; if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) { r300->context.flush(&r300->context, 0, NULL); @@ -717,6 +793,11 @@ validate: goto validate; } + if (r300->dirty_state & R300_NEW_QUERY) { + r300_emit_query_start(r300); + r300->dirty_state &= ~R300_NEW_QUERY; + } + if (r300->dirty_state & R300_NEW_BLEND) { r300_emit_blend_state(r300, r300->blend_state); r300->dirty_state &= ~R300_NEW_BLEND; @@ -770,12 +851,13 @@ validate: if (r300->dirty_state & (R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES)) { for (i = 0; i < MIN2(r300->sampler_count, r300->texture_count); i++) { - if (r300->dirty_state & - ((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i))) { - r300_emit_texture(r300, - r300->sampler_states[i], - r300->textures[i], - i); + if (r300->dirty_state & + ((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i))) { + if (r300->textures[i]) + r300_emit_texture(r300, + r300->sampler_states[i], + r300->textures[i], + i); r300->dirty_state &= ~((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i)); dirty_tex++; diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index c4002b8e5d..02ac5bebbd 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -23,13 +23,6 @@ #ifndef R300_EMIT_H #define R300_EMIT_H -#include "util/u_math.h" - -#include "r300_context.h" -#include "r300_cs.h" -#include "r300_screen.h" -#include "r300_state_inlines.h" - struct rX00_fragment_program_code; struct r300_vertex_program_code; @@ -58,8 +51,8 @@ void r300_emit_fb_state(struct r300_context* r300, void r300_emit_query_begin(struct r300_context* r300, struct r300_query* query); -void r300_emit_query_end(struct r300_context* r300, - struct r300_query* query); + +void r300_emit_query_end(struct r300_context* r300); void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs); diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 0dff1c6f4f..14a08241fc 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -20,29 +20,48 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "draw/draw_context.h" +#include "draw/draw_private.h" + +#include "util/u_simple_list.h" + +#include "r300_context.h" +#include "r300_cs.h" +#include "r300_emit.h" #include "r300_flush.h" +#include "r300_state_invariant.h" static void r300_flush(struct pipe_context* pipe, unsigned flags, struct pipe_fence_handle** fence) { - struct r300_context* r300 = r300_context(pipe); - CS_LOCALS(r300); + struct r300_context *r300 = r300_context(pipe); + struct r300_query *query; + CS_LOCALS(r300); /* We probably need to flush Draw, but we may have been called from - * within Draw. This feels kludgy, but it might be the best thing. */ - if (!r300->draw->flushing) { + * within Draw. This feels kludgy, but it might be the best thing. + * + * Of course, the best thing is to kill Draw with fire. :3 */ + if (r300->draw && !r300->draw->flushing) { draw_flush(r300->draw); } + r300_emit_query_end(r300); + if (r300->dirty_hw) { FLUSH_CS; r300_emit_invariant_state(r300); r300->dirty_state = R300_NEW_KITCHEN_SINK; r300->dirty_hw = 0; } + /* reset flushed query */ + foreach(query, &r300->query_list) { + query->flushed = TRUE; + } } + void r300_init_flush_functions(struct r300_context* r300) { r300->context.flush = r300_flush; diff --git a/src/gallium/drivers/r300/r300_flush.h b/src/gallium/drivers/r300/r300_flush.h index 9a83d89daa..0e9e6106bb 100644 --- a/src/gallium/drivers/r300/r300_flush.h +++ b/src/gallium/drivers/r300/r300_flush.h @@ -23,13 +23,6 @@ #ifndef R300_FLUSH_H #define R300_FLUSH_H -#include "draw/draw_private.h" - -#include "pipe/p_context.h" - -#include "r300_context.h" -#include "r300_cs.h" - void r300_init_flush_functions(struct r300_context* r300); #endif /* R300_FLUSH_H */ diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index a0e848a59a..29ddc84c41 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -21,10 +21,14 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "r300_fs.h" +#include "tgsi/tgsi_dump.h" +#include "r300_context.h" +#include "r300_screen.h" +#include "r300_fs.h" #include "r300_tgsi_to_rc.h" +#include "radeon_code.h" #include "radeon_compiler.h" static void find_output_registers(struct r300_fragment_program_compiler * compiler, @@ -126,9 +130,9 @@ void r300_translate_fragment_shader(struct r300_context* r300, /* Invoke the compiler */ r3xx_compile_fragment_program(&compiler); if (compiler.Base.Error) { - /* Todo: Fail gracefully */ - fprintf(stderr, "r300 FP: Compiler error\n"); - abort(); + /* XXX failover maybe? */ + DBG(r300, DBG_FP, "r300: Error compiling fragment program: %s\n", + compiler.Base.ErrorMsg); } /* And, finally... */ diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h index 9fab789402..e831c30301 100644 --- a/src/gallium/drivers/r300/r300_fs.h +++ b/src/gallium/drivers/r300/r300_fs.h @@ -24,11 +24,9 @@ #ifndef R300_FS_H #define R300_FS_H -#include "tgsi/tgsi_dump.h" +#include "pipe/p_state.h" -#include "r300_context.h" -#include "r3xx_fs.h" -#include "r5xx_fs.h" +#include "tgsi/tgsi_scan.h" #include "radeon_code.h" @@ -48,4 +46,10 @@ struct r300_fragment_shader { void r300_translate_fragment_shader(struct r300_context* r300, struct r300_fragment_shader* fs); - #endif /* R300_FS_H */ +static inline boolean r300_fragment_shader_writes_depth(struct r300_fragment_shader *fs) +{ + if (!fs) + return FALSE; + return (fs->code.writes_depth) ? TRUE : FALSE; +} +#endif /* R300_FS_H */ diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 2880d34877..ca00b043c5 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -20,17 +20,23 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "r300_query.h" +#include "util/u_memory.h" +#include "util/u_simple_list.h" +#include "r300_context.h" +#include "r300_screen.h" +#include "r300_cs.h" #include "r300_emit.h" +#include "r300_query.h" +#include "r300_reg.h" -static struct pipe_query* r300_create_query(struct pipe_context* pipe, +static struct pipe_query *r300_create_query(struct pipe_context *pipe, unsigned query_type) { - struct r300_context* r300 = r300_context(pipe); - struct r300_screen* r300screen = r300_screen(r300->context.screen); - unsigned query_size = r300screen->caps->num_frag_pipes * 4; - struct r300_query* q, * qptr; + struct r300_context *r300 = r300_context(pipe); + struct r300_screen *r300screen = r300_screen(r300->context.screen); + unsigned query_size; + struct r300_query *q, *qptr; q = CALLOC_STRUCT(r300_query); @@ -39,13 +45,16 @@ static struct pipe_query* r300_create_query(struct pipe_context* pipe, q->active = FALSE; - if (!r300->query_list) { - r300->query_list = q; - } else if (!is_empty_list(r300->query_list)) { - qptr = last_elem(r300->query_list); + if (r300screen->caps->family == CHIP_FAMILY_RV530) + query_size = r300screen->caps->num_z_pipes * sizeof(uint32_t); + else + query_size = r300screen->caps->num_frag_pipes * sizeof(uint32_t); + + if (!is_empty_list(&r300->query_list)) { + qptr = last_elem(&r300->query_list); q->offset = qptr->offset + query_size; - insert_at_tail(r300->query_list, q); } + insert_at_tail(&r300->query_list, q); /* XXX */ if (q->offset >= 4096) { @@ -71,24 +80,26 @@ static void r300_begin_query(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); struct r300_query* q = (struct r300_query*)query; + assert(r300->query_current == NULL); + map = pipe->screen->buffer_map(pipe->screen, r300->oqbo, PIPE_BUFFER_USAGE_CPU_WRITE); map += q->offset / 4; - *map = ~0; + *map = ~0U; pipe->screen->buffer_unmap(pipe->screen, r300->oqbo); - r300_emit_dirty_state(r300); - r300_emit_query_begin(r300, q); + q->flushed = FALSE; + r300->query_current = q; + r300->dirty_state |= R300_NEW_QUERY; } static void r300_end_query(struct pipe_context* pipe, - struct pipe_query* query) + struct pipe_query* query) { struct r300_context* r300 = r300_context(pipe); - struct r300_query* q = (struct r300_query*)query; - r300_emit_dirty_state(r300); - r300_emit_query_end(r300, q); + r300_emit_query_end(r300); + r300->query_current = NULL; } static boolean r300_get_query_result(struct pipe_context* pipe, @@ -98,28 +109,36 @@ static boolean r300_get_query_result(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); struct r300_screen* r300screen = r300_screen(r300->context.screen); - struct r300_query* q = (struct r300_query*)query; + struct r300_query *q = (struct r300_query*)query; unsigned flags = PIPE_BUFFER_USAGE_CPU_READ; uint32_t* map; - uint32_t temp; - unsigned i; + uint32_t temp = 0; + unsigned i, num_results; - if (wait) { + if (q->flushed == FALSE) pipe->flush(pipe, 0, NULL); - } else { + if (!wait) { flags |= PIPE_BUFFER_USAGE_DONTBLOCK; } map = pipe->screen->buffer_map(pipe->screen, r300->oqbo, flags); + if (!map) + return FALSE; map += q->offset / 4; - for (i = 0; i < r300screen->caps->num_frag_pipes; i++) { - if (*map == ~0) { + + if (r300screen->caps->family == CHIP_FAMILY_RV530) + num_results = r300screen->caps->num_z_pipes; + else + num_results = r300screen->caps->num_frag_pipes; + + for (i = 0; i < num_results; i++) { + if (*map == ~0U) { /* Looks like our results aren't ready yet. */ if (wait) { debug_printf("r300: Despite waiting, OQ results haven't" " come in yet.\n"); } - temp = ~0; + temp = ~0U; break; } temp += *map; @@ -127,7 +146,7 @@ static boolean r300_get_query_result(struct pipe_context* pipe, } pipe->screen->buffer_unmap(pipe->screen, r300->oqbo); - if (temp == ~0) { + if (temp == ~0U) { /* Our results haven't been written yet... */ return FALSE; } diff --git a/src/gallium/drivers/r300/r300_query.h b/src/gallium/drivers/r300/r300_query.h index 4f50e8f844..48876da312 100644 --- a/src/gallium/drivers/r300/r300_query.h +++ b/src/gallium/drivers/r300/r300_query.h @@ -23,10 +23,6 @@ #ifndef R300_QUERY_H #define R300_QUERY_H -#include "r300_context.h" -#include "r300_cs.h" -#include "r300_reg.h" - struct r300_context; static INLINE struct r300_query* r300_query(struct pipe_query* q) diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 3abff5db62..babc3c709e 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -348,6 +348,27 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_WRITE_ENA_W 8 # define R300_SWIZZLE1_SHIFT 16 +# define R300_VAP_SWIZZLE_X001 \ + ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \ + (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Y_SHIFT) | \ + (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) | \ + (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | \ + (0xf << R300_WRITE_ENA_SHIFT)) + +# define R300_VAP_SWIZZLE_XY01 \ + ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \ + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | \ + (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) | \ + (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | \ + (0xf << R300_WRITE_ENA_SHIFT)) + +# define R300_VAP_SWIZZLE_XYZ1 \ + ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \ + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | \ + (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | \ + (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | \ + (0xf << R300_WRITE_ENA_SHIFT)) + # define R300_VAP_SWIZZLE_XYZW \ ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \ (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | \ @@ -1172,6 +1193,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* SU Depth Offset value */ #define R300_SU_DEPTH_OFFSET 0x42c4 +#define R300_SU_REG_DEST 0x42c8 +# define R300_RASTER_PIPE_SELECT_0 (1 << 0) +# define R300_RASTER_PIPE_SELECT_1 (1 << 1) +# define R300_RASTER_PIPE_SELECT_2 (1 << 2) +# define R300_RASTER_PIPE_SELECT_3 (1 << 3) +# define R300_RASTER_PIPE_SELECT_ALL 0xf + /* BEGIN: Rasterization / Interpolators - many guesses */ @@ -2095,6 +2123,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R500_FG_ALPHA_VALUE 0x4be0 # define R500_FG_ALPHA_VALUE_MASK 0x0000ffff +#define RV530_FG_ZBREG_DEST 0x4be8 +# define RV530_FG_ZBREG_DEST_PIPE_SELECT_0 (1 << 0) +# define RV530_FG_ZBREG_DEST_PIPE_SELECT_1 (1 << 1) +# define RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL (3 << 0) /* gap */ /* Fragment program parameters in 7.16 floating point */ @@ -2384,6 +2416,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_Z_WRITE_ENABLE (1 << 2) # define R300_Z_SIGNED_COMPARE (1 << 3) # define R300_STENCIL_FRONT_BACK (1 << 4) +# define R500_STENCIL_ZSIGNED_MAGNITUDE (1 << 5) +# define R500_STENCIL_REFMASK_FRONT_BACK (1 << 6) #define R300_ZB_ZSTENCILCNTL 0x4f04 /* functions */ @@ -3313,10 +3347,6 @@ enum { #define R200_3D_DRAW_IMMD_2 0xC0003500 -/* XXX Oh look, stuff not brought over from docs yet */ - -#define R300_SU_REG_DEST 0x42C8 - #endif /* _R300_REG_H */ /* *INDENT-ON* */ diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 737396d8d9..c36350d29e 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -20,19 +20,224 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "draw/draw_pipe.h" +#include "draw/draw_context.h" #include "draw/draw_vbuf.h" + +#include "pipe/p_inlines.h" + #include "util/u_memory.h" +#include "util/u_prim.h" #include "r300_cs.h" #include "r300_context.h" #include "r300_emit.h" #include "r300_reg.h" +#include "r300_render.h" #include "r300_state_derived.h" /* r300_render: Vertex and index buffer primitive emission. */ #define R300_MAX_VBO_SIZE (1024 * 1024) +static uint32_t r300_translate_primitive(unsigned prim) +{ + switch (prim) { + case PIPE_PRIM_POINTS: + return R300_VAP_VF_CNTL__PRIM_POINTS; + case PIPE_PRIM_LINES: + return R300_VAP_VF_CNTL__PRIM_LINES; + case PIPE_PRIM_LINE_LOOP: + return R300_VAP_VF_CNTL__PRIM_LINE_LOOP; + case PIPE_PRIM_LINE_STRIP: + return R300_VAP_VF_CNTL__PRIM_LINE_STRIP; + case PIPE_PRIM_TRIANGLES: + return R300_VAP_VF_CNTL__PRIM_TRIANGLES; + case PIPE_PRIM_TRIANGLE_STRIP: + return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP; + case PIPE_PRIM_TRIANGLE_FAN: + return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN; + case PIPE_PRIM_QUADS: + return R300_VAP_VF_CNTL__PRIM_QUADS; + case PIPE_PRIM_QUAD_STRIP: + return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP; + case PIPE_PRIM_POLYGON: + return R300_VAP_VF_CNTL__PRIM_POLYGON; + default: + return 0; + } +} + +/* This is the fast-path drawing & emission for HW TCL. */ +boolean r300_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) +{ + struct r300_context* r300 = r300_context(pipe); + uint32_t prim = r300_translate_primitive(mode); + struct pipe_vertex_buffer* aos = r300->vertex_buffers; + unsigned aos_count = r300->vertex_buffer_count; + short* indices; + unsigned packet_size; + unsigned i; + bool invalid = FALSE; + + CS_LOCALS(r300); + + if (!u_trim_pipe_prim(mode, &count)) { + return FALSE; + } + +validate: + for (i = 0; i < aos_count; i++) { + if (!r300->winsys->add_buffer(r300->winsys, aos[i].buffer, + RADEON_GEM_DOMAIN_GTT, 0)) { + pipe->flush(pipe, 0, NULL); + goto validate; + } + } + if (!r300->winsys->validate(r300->winsys)) { + pipe->flush(pipe, 0, NULL); + if (invalid) { + /* Well, hell. */ + debug_printf("r300: Stuck in validation loop, gonna quit now."); + exit(1); + } + invalid = TRUE; + goto validate; + } + + r300_emit_dirty_state(r300); + + packet_size = (aos_count >> 1) * 3 + (aos_count & 1) * 2; + + BEGIN_CS(3 + packet_size + (aos_count * 2)); + OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); + OUT_CS(aos_count); + for (i = 0; i < aos_count - 1; i += 2) { + OUT_CS(aos[i].stride | + (aos[i].stride << 8) | + (aos[i + 1].stride << 16) | + (aos[i + 1].stride << 24)); + OUT_CS(aos[i].buffer_offset + start * 4 * aos[i].stride); + OUT_CS(aos[i + 1].buffer_offset + start * 4 * aos[i + 1].stride); + } + if (aos_count & 1) { + OUT_CS(aos[i].stride | (aos[i].stride << 8)); + OUT_CS(aos[i].buffer_offset + start * 4 * aos[i].stride); + } + for (i = 0; i < aos_count; i++) { + OUT_CS_RELOC(aos[i].buffer, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); + } + END_CS; + + if (indexBuffer) { + indices = (short*)pipe_buffer_map(pipe->screen, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + + /* Set the starting point. */ + indices += start; + + BEGIN_CS(2 + (count+1)/2); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count + 1)/2); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | prim); + for (i = 0; i < count - 1; i += 2) { + OUT_CS(indices[i + 1] << 16 | indices[i]); + } + if (count % 2) { + OUT_CS(indices[count - 1]); + } + END_CS; + } else { + BEGIN_CS(2); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | + prim); + END_CS; + } + + return TRUE; +} + +/* Simple helpers for context setup. Should probably be moved to util. */ +boolean r300_draw_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, unsigned mode, + unsigned start, unsigned count) +{ + return pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0, + mode, start, count); +} + +boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, + unsigned start, unsigned count) +{ + return pipe->draw_elements(pipe, NULL, 0, mode, start, count); +} + +/**************************************************************************** + * The rest of this file is for SW TCL rendering only. Please be polite and * + * keep these functions separated so that they are easier to locate. ~C. * + ***************************************************************************/ + +/* Draw-based drawing for SW TCL chipsets. */ +boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) +{ + struct r300_context* r300 = r300_context(pipe); + int i; + + if (!u_trim_pipe_prim(mode, &count)) { + return FALSE; + } + + for (i = 0; i < r300->vertex_buffer_count; i++) { + void* buf = pipe_buffer_map(pipe->screen, + r300->vertex_buffers[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(r300->draw, i, buf); + } + + if (indexBuffer) { + void* indices = pipe_buffer_map(pipe->screen, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer_range(r300->draw, indexSize, + minIndex, maxIndex, indices); + } else { + draw_set_mapped_element_buffer(r300->draw, 0, NULL); + } + + draw_set_mapped_constant_buffer(r300->draw, + r300->shader_constants[PIPE_SHADER_VERTEX].constants, + r300->shader_constants[PIPE_SHADER_VERTEX].count * + (sizeof(float) * 4)); + + draw_arrays(r300->draw, mode, start, count); + + for (i = 0; i < r300->vertex_buffer_count; i++) { + pipe_buffer_unmap(pipe->screen, r300->vertex_buffers[i].buffer); + draw_set_mapped_vertex_buffer(r300->draw, i, NULL); + } + + if (indexBuffer) { + pipe_buffer_unmap(pipe->screen, indexBuffer); + draw_set_mapped_element_buffer_range(r300->draw, 0, start, + start + count - 1, NULL); + } + + return TRUE; +} + +/* Object for rendering using Draw. */ struct r300_render { /* Parent class */ struct vbuf_render base; @@ -67,7 +272,7 @@ r300_render_get_vertex_info(struct vbuf_render* render) r300_update_derived_state(r300); - return &r300->vertex_info.vinfo; + return &r300->vertex_info->vinfo; } static boolean r300_render_allocate_vertices(struct vbuf_render* render, @@ -79,12 +284,14 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render, struct pipe_screen* screen = r300->context.screen; size_t size = (size_t)vertex_size * (size_t)count; - if (size + r300render->vbo_offset > r300render->vbo_size) + if (size + r300render->vbo_offset > r300render->vbo_size) { + pipe_buffer_reference(&r300->vbo, NULL); r300render->vbo = pipe_buffer_create(screen, 64, PIPE_BUFFER_USAGE_VERTEX, R300_MAX_VBO_SIZE); + r300render->vbo_offset = 0; r300render->vbo_size = R300_MAX_VBO_SIZE; } @@ -117,7 +324,7 @@ static void r300_render_unmap_vertices(struct vbuf_render* render, OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, max); END_CS; - r300render->vbo_max_used = MAX2(r300render->vbo_max_used, + r300render->vbo_max_used = MAX2(r300render->vbo_max_used, r300render->vertex_size * (max + 1)); pipe_buffer_unmap(screen, r300render->vbo); } @@ -125,67 +332,22 @@ static void r300_render_unmap_vertices(struct vbuf_render* render, static void r300_render_release_vertices(struct vbuf_render* render) { struct r300_render* r300render = r300_render(render); - struct r300_context* r300 = r300render->r300; r300render->vbo_offset += r300render->vbo_max_used; r300render->vbo_max_used = 0; - r300->vbo = NULL; } static boolean r300_render_set_primitive(struct vbuf_render* render, unsigned prim) { struct r300_render* r300render = r300_render(render); - r300render->prim = prim; - switch (prim) { - case PIPE_PRIM_POINTS: - r300render->hwprim = R300_VAP_VF_CNTL__PRIM_POINTS; - break; - case PIPE_PRIM_LINES: - r300render->hwprim = R300_VAP_VF_CNTL__PRIM_LINES; - break; - case PIPE_PRIM_LINE_LOOP: - r300render->hwprim = R300_VAP_VF_CNTL__PRIM_LINE_LOOP; - break; - case PIPE_PRIM_LINE_STRIP: - r300render->hwprim = R300_VAP_VF_CNTL__PRIM_LINE_STRIP; - break; - case PIPE_PRIM_TRIANGLES: - r300render->hwprim = R300_VAP_VF_CNTL__PRIM_TRIANGLES; - break; - case PIPE_PRIM_TRIANGLE_STRIP: - r300render->hwprim = R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP; - break; - case PIPE_PRIM_TRIANGLE_FAN: - r300render->hwprim = R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN; - break; - case PIPE_PRIM_QUADS: - r300render->hwprim = R300_VAP_VF_CNTL__PRIM_QUADS; - break; - case PIPE_PRIM_QUAD_STRIP: - r300render->hwprim = R300_VAP_VF_CNTL__PRIM_QUAD_STRIP; - break; - case PIPE_PRIM_POLYGON: - r300render->hwprim = R300_VAP_VF_CNTL__PRIM_POLYGON; - break; - default: - return FALSE; - break; - } + r300render->prim = prim; + r300render->hwprim = r300_translate_primitive(prim); return TRUE; } -static void r300_prepare_render(struct r300_render* render, unsigned count) -{ - struct r300_context* r300 = render->r300; - - CS_LOCALS(r300); - - r300_emit_dirty_state(r300); -} - static void r300_render_draw_arrays(struct vbuf_render* render, unsigned start, unsigned count) @@ -195,7 +357,7 @@ static void r300_render_draw_arrays(struct vbuf_render* render, CS_LOCALS(r300); - r300_prepare_render(r300render, count); + r300_emit_dirty_state(r300); DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count); @@ -212,22 +374,11 @@ static void r300_render_draw(struct vbuf_render* render, { struct r300_render* r300render = r300_render(render); struct r300_context* r300 = r300render->r300; - struct pipe_screen* screen = r300->context.screen; - struct pipe_buffer* index_buffer; - void* index_map; int i; - uint32_t index; CS_LOCALS(r300); - r300_prepare_render(r300render, count); - - /* Send our indices into an index buffer. */ - index_buffer = pipe_buffer_create(screen, 64, PIPE_BUFFER_USAGE_VERTEX, - count * 2); - if (!index_buffer) { - return; - } + r300_emit_dirty_state(r300); BEGIN_CS(2 + (count+1)/2); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2); diff --git a/src/gallium/drivers/r300/r300_render.h b/src/gallium/drivers/r300/r300_render.h new file mode 100644 index 0000000000..3d8f47ba75 --- /dev/null +++ b/src/gallium/drivers/r300/r300_render.h @@ -0,0 +1,52 @@ +/* + * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_RENDER_H +#define R300_RENDER_H + +boolean r300_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count); + +boolean r300_draw_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, unsigned mode, + unsigned start, unsigned count); + +boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, + unsigned start, unsigned count); + +boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count); + +#endif /* R300_RENDER_H */ diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 8296d56840..6efa17cbaf 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -20,7 +20,14 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "util/u_simple_screen.h" + +#include "r300_context.h" #include "r300_screen.h" +#include "r300_texture.h" +#include "r300_winsys.h" /* Return the identifier behind whom the brave coders responsible for this * amalgamation of code, sweat, and duct tape, routinely obscure their names. @@ -73,13 +80,10 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) struct r300_screen* r300screen = r300_screen(pscreen); switch (param) { - /* XXX cases marked "IN THEORY" are possible on the hardware, - * but haven't been implemented yet. */ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: /* XXX I'm told this goes up to 16 */ return 8; case PIPE_CAP_NPOT_TEXTURES: - /* IN THEORY */ return 0; case PIPE_CAP_TWO_SIDED_STENCIL: if (r300screen->caps->is_r500) { @@ -88,16 +92,26 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) return 0; } case PIPE_CAP_GLSL: - if (r300screen->caps->is_r500) { - return 1; - } else { - return 0; - } + /* I'll be frank. This is a lie. + * + * We don't truly support GLSL on any of this driver's chipsets. + * To be fair, no chipset supports the full GLSL specification + * to the best of our knowledge, but some of the less esoteric + * features are still missing here. + * + * Rather than cripple ourselves intentionally, I'm going to set + * this flag, and as Gallium's interface continues to change, I + * hope that this single monolithic GLSL enable can slowly get + * split down into many different pieces and the state tracker + * will handle fallbacks transparently, like it should. + * + * ~ C. + */ + return 1; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: - /* IN THEORY */ - return 0; + return 1; case PIPE_CAP_MAX_RENDER_TARGETS: return 4; case PIPE_CAP_OCCLUSION_QUERY: @@ -105,32 +119,13 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) case PIPE_CAP_TEXTURE_SHADOW_MAP: return 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - if (r300screen->caps->is_r500) { - /* 13 == 4096x4096 */ - return 13; - } else { - /* 12 == 2048x2048 */ - return 12; - } case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - /* So, technically, the limit is the same as above, but some math - * shows why this is silly. Assuming RGBA, 4cpp, we can see that - * 4096*4096*4096 = 64.0 GiB exactly, so it's not exactly - * practical. However, if at some point a game really wants this, - * then we can remove or raise this limit. */ - if (r300screen->caps->is_r500) { - /* 9 == 256x256x256 */ - return 9; - } else { - /* 8 == 128*128*128 */ - return 8; - } case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: if (r300screen->caps->is_r500) { - /* 13 == 4096x4096 */ + /* 13 == 4096 */ return 13; } else { - /* 12 == 2048x2048 */ + /* 12 == 2048 */ return 12; } case PIPE_CAP_TEXTURE_MIRROR_CLAMP: @@ -138,10 +133,8 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) case PIPE_CAP_TEXTURE_MIRROR_REPEAT: return 1; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - /* XXX guessing (what a terrible guess) */ - return 2; + return 0; case PIPE_CAP_TGSI_CONT_SUPPORTED: - /* XXX */ return 0; case PIPE_CAP_BLEND_EQUATION_SEPARATE: return 1; @@ -179,19 +172,22 @@ static float r300_get_paramf(struct pipe_screen* pscreen, int param) } } -static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage, - boolean is_r500) +static boolean check_tex_format(enum pipe_format format, uint32_t usage, + boolean is_r500) { + uint32_t retval = 0; + switch (format) { /* Supported formats. */ /* Colorbuffer */ case PIPE_FORMAT_A4R4G4B4_UNORM: case PIPE_FORMAT_R5G6B5_UNORM: case PIPE_FORMAT_A1R5G5B5_UNORM: - return usage & + retval = usage & (PIPE_TEXTURE_USAGE_RENDER_TARGET | PIPE_TEXTURE_USAGE_DISPLAY_TARGET | PIPE_TEXTURE_USAGE_PRIMARY); + break; /* Texture */ case PIPE_FORMAT_A8R8G8B8_SRGB: @@ -201,31 +197,37 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage, case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT5_RGBA: case PIPE_FORMAT_YCBCR: - return usage & PIPE_TEXTURE_USAGE_SAMPLER; + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8L8_UNORM: + retval = usage & PIPE_TEXTURE_USAGE_SAMPLER; + break; /* Colorbuffer or texture */ case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: case PIPE_FORMAT_I8_UNORM: - return usage & + retval = usage & (PIPE_TEXTURE_USAGE_RENDER_TARGET | PIPE_TEXTURE_USAGE_DISPLAY_TARGET | PIPE_TEXTURE_USAGE_PRIMARY | PIPE_TEXTURE_USAGE_SAMPLER); + break; - /* Z buffer */ + /* Z buffer or texture */ case PIPE_FORMAT_Z16_UNORM: - return usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL; - + case PIPE_FORMAT_Z24X8_UNORM: /* Z buffer with stencil or texture */ case PIPE_FORMAT_Z24S8_UNORM: - return usage & + retval = usage & (PIPE_TEXTURE_USAGE_DEPTH_STENCIL | PIPE_TEXTURE_USAGE_SAMPLER); + break; /* Definitely unsupported formats. */ /* Non-usable Z buffer/stencil formats. */ - case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z32_UNORM: case PIPE_FORMAT_S8Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: debug_printf("r300: Note: Got unsupported format: %s in %s\n", @@ -235,7 +237,6 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage, /* XXX These don't even exist case PIPE_FORMAT_A32R32G32B32: case PIPE_FORMAT_A16R16G16B16: */ - /* XXX Insert YUV422 packed VYUY and YVYU here */ /* XXX What the deuce is UV88? (r3xx accel page 14) debug_printf("r300: Warning: Got unimplemented format: %s in %s\n", pf_name(format), __FUNCTION__); @@ -259,10 +260,15 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage, break; } - return FALSE; + /* If usage was a mask that contained multiple bits, and not all of them + * are supported, this will catch that and return FALSE. + * e.g. usage = 2 | 4; retval = 4; (retval >= usage) == FALSE + * + * This also returns FALSE for any unknown formats. + */ + return (retval >= usage); } -/* XXX moar targets */ static boolean r300_is_format_supported(struct pipe_screen* pscreen, enum pipe_format format, enum pipe_texture_target target, @@ -270,15 +276,13 @@ static boolean r300_is_format_supported(struct pipe_screen* pscreen, unsigned geom_flags) { switch (target) { + case PIPE_TEXTURE_1D: /* handle 1D textures as 2D ones */ case PIPE_TEXTURE_2D: - return check_tex_2d_format(format, tex_usage, - r300_screen(pscreen)->caps->is_r500); - case PIPE_TEXTURE_1D: case PIPE_TEXTURE_3D: case PIPE_TEXTURE_CUBE: - debug_printf("r300: Implementation error: Unsupported format " - "target: %d\n", target); - break; + return check_tex_format(format, tex_usage, + r300_screen(pscreen)->caps->is_r500); + default: debug_printf("r300: Fatal: This is not a format target: %d\n", target); @@ -298,22 +302,9 @@ r300_get_tex_transfer(struct pipe_screen *screen, { struct r300_texture *tex = (struct r300_texture *)texture; struct r300_transfer *trans; - unsigned offset; /* in bytes */ + unsigned offset; - /* XXX Add support for these things */ - if (texture->target == PIPE_TEXTURE_CUBE) { - debug_printf("PIPE_TEXTURE_CUBE is not yet supported.\n"); - /* offset = tex->image_offset[level][face]; */ - } - else if (texture->target == PIPE_TEXTURE_3D) { - debug_printf("PIPE_TEXTURE_3D is not yet supported.\n"); - /* offset = tex->image_offset[level][zslice]; */ - } - else { - offset = tex->offset[level]; - assert(face == 0); - assert(zslice == 0); - } + offset = r300_texture_get_offset(tex, level, zslice, face); /* in bytes */ trans = CALLOC_STRUCT(r300_transfer); if (trans) { @@ -328,6 +319,12 @@ r300_get_tex_transfer(struct pipe_screen *screen, trans->transfer.nblocksy = texture->nblocksy[level]; trans->transfer.stride = r300_texture_get_stride(tex, level); trans->transfer.usage = usage; + + /* XXX not sure whether it's required to set these two, + the driver doesn't use them */ + trans->transfer.zslice = zslice; + trans->transfer.face = face; + trans->offset = offset; } return &trans->transfer; @@ -345,16 +342,9 @@ static void* r300_transfer_map(struct pipe_screen* screen, { struct r300_texture* tex = (struct r300_texture*)transfer->texture; char* map; - unsigned flags = 0; - - if (transfer->usage != PIPE_TRANSFER_WRITE) { - flags |= PIPE_BUFFER_USAGE_CPU_READ; - } - if (transfer->usage != PIPE_TRANSFER_READ) { - flags |= PIPE_BUFFER_USAGE_CPU_WRITE; - } - map = pipe_buffer_map(screen, tex->buffer, flags); + map = pipe_buffer_map(screen, tex->buffer, + pipe_transfer_buffer_flags(transfer)); if (!map) { return NULL; @@ -390,6 +380,7 @@ struct pipe_screen* r300_create_screen(struct r300_winsys* r300_winsys) caps->pci_id = r300_winsys->pci_id; caps->num_frag_pipes = r300_winsys->gb_pipes; + caps->num_z_pipes = r300_winsys->z_pipes; r300_parse_chipset(caps); diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 2a0e41fbc3..41df31f670 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -23,14 +23,9 @@ #ifndef R300_SCREEN_H #define R300_SCREEN_H -#include "pipe/p_inlines.h" #include "pipe/p_screen.h" -#include "util/u_memory.h" -#include "util/u_simple_screen.h" #include "r300_chipset.h" -#include "r300_texture.h" -#include "r300_winsys.h" struct r300_screen { /* Parent class */ diff --git a/src/gallium/drivers/r300/r300_shader_inlines.h b/src/gallium/drivers/r300/r300_shader_inlines.h deleted file mode 100644 index a04f45b03e..0000000000 --- a/src/gallium/drivers/r300/r300_shader_inlines.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com> - * Joakim Sindholt <opensource@zhasha.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R300_SHADER_INLINES_H -#define R300_SHADER_INLINES_H - -/* TGSI constants. TGSI is like XML: If it can't solve your problems, you're - * not using enough of it. */ -static const struct tgsi_full_src_register r300_constant_zero = { - .SrcRegister.Extended = TRUE, - .SrcRegister.File = TGSI_FILE_NULL, - .SrcRegisterExtSwz.ExtSwizzleX = TGSI_EXTSWIZZLE_ZERO, - .SrcRegisterExtSwz.ExtSwizzleY = TGSI_EXTSWIZZLE_ZERO, - .SrcRegisterExtSwz.ExtSwizzleZ = TGSI_EXTSWIZZLE_ZERO, - .SrcRegisterExtSwz.ExtSwizzleW = TGSI_EXTSWIZZLE_ZERO, -}; - -static const struct tgsi_full_src_register r300_constant_one = { - .SrcRegister.Extended = TRUE, - .SrcRegister.File = TGSI_FILE_NULL, - .SrcRegisterExtSwz.ExtSwizzleX = TGSI_EXTSWIZZLE_ONE, - .SrcRegisterExtSwz.ExtSwizzleY = TGSI_EXTSWIZZLE_ONE, - .SrcRegisterExtSwz.ExtSwizzleZ = TGSI_EXTSWIZZLE_ONE, - .SrcRegisterExtSwz.ExtSwizzleW = TGSI_EXTSWIZZLE_ONE, -}; - -#endif /* R300_SHADER_INLINES_H */ diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 88cb9af6fb..5db8c69dec 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -20,8 +20,10 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "util/u_debug.h" +#include "draw/draw_context.h" + #include "util/u_math.h" +#include "util/u_memory.h" #include "util/u_pack_color.h" #include "tgsi/tgsi_parse.h" @@ -31,6 +33,7 @@ #include "r300_context.h" #include "r300_reg.h" +#include "r300_screen.h" #include "r300_state_inlines.h" #include "r300_fs.h" #include "r300_vs.h" @@ -46,23 +49,47 @@ static void* r300_create_blend_state(struct pipe_context* pipe, { struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state); - if (state->blend_enable) { - /* XXX for now, always do separate alpha... - * is it faster to do it with one reg? */ + if (state->blend_enable) + { + unsigned eqRGB = state->rgb_func; + unsigned srcRGB = state->rgb_src_factor; + unsigned dstRGB = state->rgb_dst_factor; + + unsigned eqA = state->alpha_func; + unsigned srcA = state->alpha_src_factor; + unsigned dstA = state->alpha_dst_factor; + + /* despite the name, ALPHA_BLEND_ENABLE has nothing to do with alpha, + * this is just the crappy D3D naming */ blend->blend_control = R300_ALPHA_BLEND_ENABLE | - R300_SEPARATE_ALPHA_ENABLE | - R300_READ_ENABLE | - r300_translate_blend_function(state->rgb_func) | - (r300_translate_blend_factor(state->rgb_src_factor) << - R300_SRC_BLEND_SHIFT) | - (r300_translate_blend_factor(state->rgb_dst_factor) << - R300_DST_BLEND_SHIFT); - blend->alpha_blend_control = - r300_translate_blend_function(state->alpha_func) | - (r300_translate_blend_factor(state->alpha_src_factor) << - R300_SRC_BLEND_SHIFT) | - (r300_translate_blend_factor(state->alpha_dst_factor) << - R300_DST_BLEND_SHIFT); + r300_translate_blend_function(eqRGB) | + ( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) | + ( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT); + + /* optimization: some operations do not require the destination color */ + if (eqRGB == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MIN || + eqRGB == PIPE_BLEND_MAX || eqA == PIPE_BLEND_MAX || + dstRGB != PIPE_BLENDFACTOR_ZERO || + dstA != PIPE_BLENDFACTOR_ZERO || + srcRGB == PIPE_BLENDFACTOR_DST_COLOR || + srcRGB == PIPE_BLENDFACTOR_DST_ALPHA || + srcRGB == PIPE_BLENDFACTOR_INV_DST_COLOR || + srcRGB == PIPE_BLENDFACTOR_INV_DST_ALPHA || + srcA == PIPE_BLENDFACTOR_DST_ALPHA || + srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA) + blend->blend_control |= R300_READ_ENABLE; + + /* XXX implement the optimization with DISCARD_SRC_PIXELS*/ + /* XXX implement the optimization with SRC_ALPHA_?_NO_READ */ + + /* separate alpha */ + if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { + blend->blend_control |= R300_SEPARATE_ALPHA_ENABLE; + blend->alpha_blend_control = + r300_translate_blend_function(eqA) | + (r300_translate_blend_factor(srcA) << R300_SRC_BLEND_SHIFT) | + (r300_translate_blend_factor(dstA) << R300_DST_BLEND_SHIFT); + } } /* PIPE_LOGICOP_* don't need to be translated, fortunately. */ @@ -96,25 +123,29 @@ static void r300_delete_blend_state(struct pipe_context* pipe, FREE(state); } +/* Convert float to 10bit integer */ +static unsigned float_to_fixed10(float f) +{ + return CLAMP((unsigned)(f * 1023.9f), 0, 1023); +} + /* Set blend color. * Setup both R300 and R500 registers, figure out later which one to write. */ static void r300_set_blend_color(struct pipe_context* pipe, const struct pipe_blend_color* color) { struct r300_context* r300 = r300_context(pipe); - ubyte ur, ug, ub, ua; - - ur = float_to_ubyte(color->color[0]); - ug = float_to_ubyte(color->color[1]); - ub = float_to_ubyte(color->color[2]); - ua = float_to_ubyte(color->color[3]); util_pack_color(color->color, PIPE_FORMAT_A8R8G8B8_UNORM, &r300->blend_color_state->blend_color); - /* XXX this is wrong */ - r300->blend_color_state->blend_color_red_alpha = ur | (ua << 16); - r300->blend_color_state->blend_color_green_blue = ub | (ug << 16); + /* XXX if FP16 blending is enabled, we should use the FP16 format */ + r300->blend_color_state->blend_color_red_alpha = + float_to_fixed10(color->color[0]) | + (float_to_fixed10(color->color[3]) << 16); + r300->blend_color_state->blend_color_green_blue = + float_to_fixed10(color->color[2]) | + (float_to_fixed10(color->color[1]) << 16); r300->dirty_state |= R300_NEW_BLEND_COLOR; } @@ -167,6 +198,8 @@ static void* r300_create_dsa_state(struct pipe_context* pipe, const struct pipe_depth_stencil_alpha_state* state) { + struct r300_capabilities *caps = + r300_screen(r300_context(pipe)->context.screen)->caps; struct r300_dsa_state* dsa = CALLOC_STRUCT(r300_dsa_state); /* Depth test setup. */ @@ -211,9 +244,16 @@ static void* (r300_translate_stencil_op(state->stencil[1].zfail_op) << R300_S_BACK_ZFAIL_OP_SHIFT); - dsa->stencil_ref_bf = (state->stencil[1].ref_value) | - (state->stencil[1].valuemask << R300_STENCILMASK_SHIFT) | - (state->stencil[1].writemask << R300_STENCILWRITEMASK_SHIFT); + /* XXX it seems r3xx doesn't support STENCILREFMASK_BF */ + if (caps->is_r500) + { + dsa->z_buffer_control |= R500_STENCIL_REFMASK_FRONT_BACK; + dsa->stencil_ref_bf = (state->stencil[1].ref_value) | + (state->stencil[1].valuemask << + R300_STENCILMASK_SHIFT) | + (state->stencil[1].writemask << + R300_STENCILWRITEMASK_SHIFT); + } } } @@ -222,11 +262,13 @@ static void* dsa->alpha_function = r300_translate_alpha_function(state->alpha.func) | R300_FG_ALPHA_FUNC_ENABLE; - dsa->alpha_reference = CLAMP(state->alpha.ref_value * 1023.0f, - 0, 1023); - } else { - /* XXX need to fix this to be dynamically set - dsa->z_buffer_top = R300_ZTOP_ENABLE; */ + + /* XXX figure out why emitting 10bit alpha ref causes CS to dump */ + /* always use 8bit alpha ref */ + dsa->alpha_function |= float_to_ubyte(state->alpha.ref_value); + + if (caps->is_r500) + dsa->alpha_function |= R500_FG_ALPHA_FUNC_8BIT; } return (void*)dsa; @@ -309,7 +351,7 @@ static void r300_delete_fs_state(struct pipe_context* pipe, void* shader) { struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader; rc_constants_destroy(&fs->code.constants); - FREE(fs->state.tokens); + FREE((void*)fs->state.tokens); FREE(shader); } @@ -677,7 +719,7 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader) rc_constants_destroy(&vs->code.constants); draw_delete_vertex_shader(r300->draw, vs->draw); - FREE(vs->state.tokens); + FREE((void*)vs->state.tokens); FREE(shader); } else { draw_delete_vertex_shader(r300->draw, diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 5f6b225d34..7d000e9e2d 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -20,15 +20,47 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "r300_state_derived.h" +#include "draw/draw_context.h" + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "r300_context.h" #include "r300_fs.h" +#include "r300_screen.h" +#include "r300_state_derived.h" #include "r300_state_inlines.h" #include "r300_vs.h" /* r300_state_derived: Various bits of state which are dependent upon * currently bound CSO data. */ +struct r300_shader_key { + struct r300_vertex_shader* vs; + struct r300_fragment_shader* fs; +}; + +struct r300_shader_derived_value { + struct r300_vertex_format* vformat; + struct r300_rs_block* rs_block; +}; + +unsigned r300_shader_key_hash(void* key) { + struct r300_shader_key* shader_key = (struct r300_shader_key*)key; + unsigned vs = (unsigned)shader_key->vs; + unsigned fs = (unsigned)shader_key->fs; + + return (vs << 16) | (fs & 0xffff); +} + +int r300_shader_key_compare(void* key1, void* key2) { + struct r300_shader_key* shader_key1 = (struct r300_shader_key*)key1; + struct r300_shader_key* shader_key2 = (struct r300_shader_key*)key2; + + return (shader_key1->vs == shader_key2->vs) && + (shader_key1->fs == shader_key2->fs); +} + /* Set up the vs_tab and routes. */ static void r300_vs_tab_routes(struct r300_context* r300, struct r300_vertex_format* vformat) @@ -53,7 +85,7 @@ static void r300_vs_tab_routes(struct r300_context* r300, if (!r300screen->caps->has_tcl || !r300->rs_state->enable_vte) { for (i = 0; i < info->num_inputs; i++) { - switch (info->input_semantic_name[i]) { + switch (r300->vs->code.inputs[i]) { case TGSI_SEMANTIC_POSITION: pos = TRUE; tab[i] = 0; @@ -63,10 +95,12 @@ static void r300_vs_tab_routes(struct r300_context* r300, cols++; break; case TGSI_SEMANTIC_PSIZE: + assert(psize == FALSE); psize = TRUE; tab[i] = 15; break; case TGSI_SEMANTIC_FOG: + assert(fog == FALSE); fog = TRUE; /* Fall through */ case TGSI_SEMANTIC_GENERIC: @@ -125,7 +159,9 @@ static void r300_vs_tab_routes(struct r300_context* r300, vinfo->hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */ - if (!pos) { + /* We need to add vertex position attribute only for SW TCL case, + * for HW TCL case it could be generated by vertex shader */ + if (!pos && !r300screen->caps->has_tcl) { debug_printf("r300: Forcing vertex position attribute emit...\n"); /* Make room for the position attribute * at the beginning of the tab. */ @@ -164,7 +200,7 @@ static void r300_vs_tab_routes(struct r300_context* r300, vinfo->hwfmt[3] |= (4 << (3 * i)); } - for (i; i < texs; i++) { + for (; i < texs; i++) { draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, draw_find_vs_output(r300->draw, TGSI_SEMANTIC_GENERIC, i)); vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i); @@ -188,8 +224,9 @@ static void r300_vertex_psc(struct r300_context* r300, struct r300_screen* r300screen = r300_screen(r300->context.screen); struct vertex_info* vinfo = &vformat->vinfo; int* tab = vformat->vs_tab; - uint32_t temp; - int i, attrib_count; + uint16_t type, swizzle; + enum pipe_format format; + unsigned i, attrib_count; /* Vertex shaders have no semantics on their inputs, * so PSC should just route stuff based on their info, @@ -210,64 +247,43 @@ static void r300_vertex_psc(struct r300_context* r300, } for (i = 0; i < attrib_count; i++) { - /* Make sure we have a proper destination for our attribute */ + /* Make sure we have a proper destination for our attribute. */ assert(tab[i] != -1); - /* Add the attribute to the PSC table. */ - temp = r300screen->caps->has_tcl ? - R300_DATA_TYPE_FLOAT_4 : - translate_vertex_data_type(vinfo->attrib[i].emit); - temp |= tab[i] << R300_DST_VEC_LOC_SHIFT; + format = draw_translate_vinfo_format(vinfo->attrib[i].emit); + + /* Obtain the type of data in this attribute. */ + type = r300_translate_vertex_data_type(format) | + tab[i] << R300_DST_VEC_LOC_SHIFT; + /* Obtain the swizzle for this attribute. Note that the default + * swizzle in the hardware is not XYZW! */ + swizzle = r300_translate_vertex_data_swizzle(format); + + /* Add the attribute to the PSC table. */ if (i & 1) { - vformat->vap_prog_stream_cntl[i >> 1] &= 0x0000ffff; - vformat->vap_prog_stream_cntl[i >> 1] |= temp << 16; + vformat->vap_prog_stream_cntl[i >> 1] |= type << 16; - vformat->vap_prog_stream_cntl_ext[i >> 1] |= - (R300_VAP_SWIZZLE_XYZW << 16); + vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16; } else { - vformat->vap_prog_stream_cntl[i >> 1] &= 0xffff0000; - vformat->vap_prog_stream_cntl[i >> 1] |= temp << 0; + vformat->vap_prog_stream_cntl[i >> 1] |= type << 0; - vformat->vap_prog_stream_cntl_ext[i >> 1] |= - (R300_VAP_SWIZZLE_XYZW << 0); + vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 0; } } /* Set the last vector in the PSC. */ - i--; + if (i) { + i -= 1; + } vformat->vap_prog_stream_cntl[i >> 1] |= (R300_LAST_VEC << (i & 1 ? 16 : 0)); } -/* Update the vertex format. */ -static void r300_update_vertex_format(struct r300_context* r300) -{ - struct r300_vertex_format vformat; - int i; - - memset(&vformat, 0, sizeof(struct r300_vertex_format)); - for (i = 0; i < 16; i++) { - vformat.vs_tab[i] = -1; - vformat.fs_tab[i] = -1; - } - - r300_vs_tab_routes(r300, &vformat); - - r300_vertex_psc(r300, &vformat); - - if (memcmp(&r300->vertex_info, &vformat, - sizeof(struct r300_vertex_format))) { - memcpy(&r300->vertex_info, &vformat, - sizeof(struct r300_vertex_format)); - r300->dirty_state |= R300_NEW_VERTEX_FORMAT; - } -} - /* Set up the mappings from GB to US, for RS block. */ -static void r300_update_fs_tab(struct r300_context* r300) +static void r300_update_fs_tab(struct r300_context* r300, + struct r300_vertex_format* vformat) { - struct r300_vertex_format* vformat = &r300->vertex_info; struct tgsi_shader_info* info = &r300->fs->info; int i, cols = 0, texs = 0, cols_emitted = 0; int* tab = vformat->fs_tab; @@ -329,53 +345,39 @@ static void r300_update_fs_tab(struct r300_context* r300) /* Set up the RS block. This is the part of the chipset that actually does * the rasterization of vertices into fragments. This is also the part of the * chipset that locks up if any part of it is even slightly wrong. */ -static void r300_update_rs_block(struct r300_context* r300) +static void r300_update_rs_block(struct r300_context* r300, + struct r300_rs_block* rs) { - struct r300_rs_block* rs = r300->rs_block; struct tgsi_shader_info* info = &r300->fs->info; - int* tab = r300->vertex_info.fs_tab; - int col_count = 0, fp_offset = 0, i, memory_pos, tex_count = 0; - - memset(rs, 0, sizeof(struct r300_rs_block)); + int col_count = 0, fp_offset = 0, i, tex_count = 0; + int rs_tex_comp = 0; if (r300_screen(r300->context.screen)->caps->is_r500) { for (i = 0; i < info->num_inputs; i++) { - assert(tab[i] != -1); - memory_pos = tab[i] * 4; switch (info->input_semantic_name[i]) { case TGSI_SEMANTIC_COLOR: rs->ip[col_count] |= - R500_RS_COL_PTR(memory_pos) | + R500_RS_COL_PTR(col_count) | R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA); col_count++; break; case TGSI_SEMANTIC_GENERIC: rs->ip[tex_count] |= - R500_RS_SEL_S(memory_pos) | - R500_RS_SEL_T(memory_pos + 1) | - R500_RS_SEL_R(memory_pos + 2) | - R500_RS_SEL_Q(memory_pos + 3); + R500_RS_SEL_S(rs_tex_comp) | + R500_RS_SEL_T(rs_tex_comp + 1) | + R500_RS_SEL_R(rs_tex_comp + 2) | + R500_RS_SEL_Q(rs_tex_comp + 3); tex_count++; + rs_tex_comp += 4; break; default: break; } } - if (col_count == 0) { - rs->ip[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001); - } - - if (tex_count == 0) { - rs->ip[0] |= - R500_RS_SEL_S(R500_RS_IP_PTR_K0) | - R500_RS_SEL_T(R500_RS_IP_PTR_K0) | - R500_RS_SEL_R(R500_RS_IP_PTR_K0) | - R500_RS_SEL_Q(R500_RS_IP_PTR_K1); - } - /* Rasterize at least one color, or bad things happen. */ if ((col_count == 0) && (tex_count == 0)) { + rs->ip[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001); col_count++; } @@ -392,23 +394,22 @@ static void r300_update_rs_block(struct r300_context* r300) } } else { for (i = 0; i < info->num_inputs; i++) { - assert(tab[i] != -1); - memory_pos = tab[i] * 4; switch (info->input_semantic_name[i]) { case TGSI_SEMANTIC_COLOR: rs->ip[col_count] |= - R300_RS_COL_PTR(memory_pos) | + R300_RS_COL_PTR(col_count) | R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA); col_count++; break; case TGSI_SEMANTIC_GENERIC: rs->ip[tex_count] |= - R300_RS_TEX_PTR(memory_pos) | + R300_RS_TEX_PTR(rs_tex_comp) | R300_RS_SEL_S(R300_RS_SEL_C0) | R300_RS_SEL_T(R300_RS_SEL_C1) | R300_RS_SEL_R(R300_RS_SEL_C2) | R300_RS_SEL_Q(R300_RS_SEL_C3); tex_count++; + rs_tex_comp+=4; break; default: break; @@ -445,21 +446,110 @@ static void r300_update_rs_block(struct r300_context* r300) } } - rs->count = (tex_count * 4) | (col_count << R300_IC_COUNT_SHIFT) | + rs->count = (rs_tex_comp) | (col_count << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; rs->inst_count = MAX2(MAX2(col_count - 1, tex_count - 1), 0); } +/* Update the vertex format. */ +static void r300_update_derived_shader_state(struct r300_context* r300) +{ + struct r300_shader_key* key; + struct r300_vertex_format* vformat; + struct r300_rs_block* rs_block; + struct r300_shader_derived_value* value; + int i; + + /* + key = CALLOC_STRUCT(r300_shader_key); + key->vs = r300->vs; + key->fs = r300->fs; + + value = (struct r300_shader_derived_value*) + util_hash_table_get(r300->shader_hash_table, (void*)key); + if (value) { + //vformat = value->vformat; + rs_block = value->rs_block; + + FREE(key); + } else { + rs_block = CALLOC_STRUCT(r300_rs_block); + value = CALLOC_STRUCT(r300_shader_derived_value); + + r300_update_rs_block(r300, rs_block); + + //value->vformat = vformat; + value->rs_block = rs_block; + util_hash_table_set(r300->shader_hash_table, + (void*)key, (void*)value); + } */ + + /* XXX This will be refactored ASAP. */ + vformat = CALLOC_STRUCT(r300_vertex_format); + rs_block = CALLOC_STRUCT(r300_rs_block); + + for (i = 0; i < 16; i++) { + vformat->vs_tab[i] = -1; + vformat->fs_tab[i] = -1; + } + + r300_vs_tab_routes(r300, vformat); + r300_vertex_psc(r300, vformat); + r300_update_fs_tab(r300, vformat); + + r300_update_rs_block(r300, rs_block); + + FREE(r300->vertex_info); + FREE(r300->rs_block); + + r300->vertex_info = vformat; + r300->rs_block = rs_block; + r300->dirty_state |= (R300_NEW_VERTEX_FORMAT | R300_NEW_RS_BLOCK); +} + +static void r300_update_ztop(struct r300_context* r300) +{ + r300->ztop_state.z_buffer_top = R300_ZTOP_ENABLE; + + /* This is important enough that I felt it warranted a comment. + * + * According to the docs, these are the conditions where ZTOP must be + * disabled: + * 1) Alpha testing enabled + * 2) Texture kill instructions in fragment shader + * 3) Chroma key culling enabled + * 4) W-buffering enabled + * + * The docs claim that for the first three cases, if no ZS writes happen, + * then ZTOP can be used. + * + * Additionally, the following conditions require disabled ZTOP: + * ~) Depth writes in fragment shader + * ~) Outstanding occlusion queries + * + * ~C. + */ + if (r300->dsa_state->alpha_function) { + r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300->fs->info.uses_kill) { + r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300_fragment_shader_writes_depth(r300->fs)) { + r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300->query_current) { + r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; + } +} + void r300_update_derived_state(struct r300_context* r300) { if (r300->dirty_state & - (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER)) { - r300_update_vertex_format(r300); + (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER)) { + r300_update_derived_shader_state(r300); } - if (r300->dirty_state & R300_NEW_VERTEX_FORMAT) { - r300_update_fs_tab(r300); - r300_update_rs_block(r300); + if (r300->dirty_state & + (R300_NEW_DSA | R300_NEW_FRAGMENT_SHADER | R300_NEW_QUERY)) { + r300_update_ztop(r300); } } diff --git a/src/gallium/drivers/r300/r300_state_derived.h b/src/gallium/drivers/r300/r300_state_derived.h index 71a4a47b00..05ad535e2d 100644 --- a/src/gallium/drivers/r300/r300_state_derived.h +++ b/src/gallium/drivers/r300/r300_state_derived.h @@ -25,6 +25,10 @@ struct r300_context; +unsigned r300_shader_key_hash(void* key); + +int r300_shader_key_compare(void* key1, void* key2); + void r300_update_derived_state(struct r300_context* r300); #endif /* R300_STATE_DERIVED_H */ diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 91b93fc367..ec11a41253 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -24,6 +24,8 @@ #ifndef R300_STATE_INLINES_H #define R300_STATE_INLINES_H +#include "draw/draw_vertex.h" + #include "pipe/p_format.h" #include "r300_reg.h" @@ -51,6 +53,7 @@ static INLINE uint32_t r300_translate_blend_function(int blend_func) return R300_COMB_FCN_MAX; default: debug_printf("r300: Unknown blend function %d\n", blend_func); + assert(0); break; } return 0; @@ -98,6 +101,7 @@ static INLINE uint32_t r300_translate_blend_factor(int blend_fact) case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: */ default: debug_printf("r300: Unknown blend factor %d\n", blend_fact); + assert(0); break; } return 0; @@ -127,6 +131,7 @@ static INLINE uint32_t r300_translate_depth_stencil_function(int zs_func) default: debug_printf("r300: Unknown depth/stencil function %d\n", zs_func); + assert(0); break; } return 0; @@ -153,6 +158,7 @@ static INLINE uint32_t r300_translate_stencil_op(int s_op) return R300_ZS_INVERT; default: debug_printf("r300: Unknown stencil op %d", s_op); + assert(0); break; } return 0; @@ -179,6 +185,7 @@ static INLINE uint32_t r300_translate_alpha_function(int alpha_func) return R300_FG_ALPHA_FUNC_ALWAYS; default: debug_printf("r300: Unknown alpha function %d", alpha_func); + assert(0); break; } return 0; @@ -207,6 +214,7 @@ static INLINE uint32_t r300_translate_wrap(int wrap) return R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED; default: debug_printf("r300: Unknown texture wrap %d", wrap); + assert(0); return 0; } } @@ -226,6 +234,7 @@ static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip) break; default: debug_printf("r300: Unknown texture filter %d\n", min); + assert(0); break; } switch (mag) { @@ -240,6 +249,7 @@ static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip) break; default: debug_printf("r300: Unknown texture filter %d\n", mag); + assert(0); break; } switch (mip) { @@ -254,6 +264,7 @@ static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip) break; default: debug_printf("r300: Unknown texture filter %d\n", mip); + assert(0); break; } @@ -277,6 +288,8 @@ static INLINE uint32_t r300_anisotropy(float max_aniso) /* Buffer formats. */ +/* Colorbuffer formats. This is the unswizzled format of the RB3D block's + * output. For the swizzling of the targets, check the shader's format. */ static INLINE uint32_t r300_translate_colorformat(enum pipe_format format) { switch (format) { @@ -292,7 +305,9 @@ static INLINE uint32_t r300_translate_colorformat(enum pipe_format format) return R300_COLOR_FORMAT_ARGB4444; /* 32-bit buffers */ case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: return R300_COLOR_FORMAT_ARGB8888; /* XXX Not in pipe_format case PIPE_FORMAT_A32R32G32B32: @@ -309,17 +324,21 @@ static INLINE uint32_t r300_translate_colorformat(enum pipe_format format) debug_printf("r300: Implementation error: " "Got unsupported color format %s in %s\n", pf_name(format), __FUNCTION__); + assert(0); break; } return 0; } +/* Depthbuffer and stencilbuffer. Thankfully, we only support two flavors. */ static INLINE uint32_t r300_translate_zsformat(enum pipe_format format) { switch (format) { /* 16-bit depth, no stencil */ case PIPE_FORMAT_Z16_UNORM: return R300_DEPTHFORMAT_16BIT_INT_Z; + /* 24-bit depth, ignored stencil */ + case PIPE_FORMAT_Z24X8_UNORM: /* 24-bit depth, 8-bit stencil */ case PIPE_FORMAT_Z24S8_UNORM: return R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; @@ -327,25 +346,36 @@ static INLINE uint32_t r300_translate_zsformat(enum pipe_format format) debug_printf("r300: Implementation error: " "Got unsupported ZS format %s in %s\n", pf_name(format), __FUNCTION__); + assert(0); break; } return 0; } -/* Translate pipe_format into US_OUT_FMT. +/* Shader output formats. This is essentially the swizzle from the shader + * to the RB3D block. + * * Note that formats are stored from C3 to C0. */ static INLINE uint32_t r300_translate_out_fmt(enum pipe_format format) { switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + /* XXX */ case PIPE_FORMAT_Z24S8_UNORM: return R300_US_OUT_FMT_C4_8 | R300_C0_SEL_B | R300_C1_SEL_G | R300_C2_SEL_R | R300_C3_SEL_A; + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + return R300_US_OUT_FMT_C4_8 | + R300_C0_SEL_A | R300_C1_SEL_B | + R300_C2_SEL_G | R300_C3_SEL_R; default: debug_printf("r300: Implementation error: " "Got unsupported output format %s in %s\n", pf_name(format), __FUNCTION__); + assert(0); return R300_US_OUT_FMT_UNUSED; } return 0; @@ -372,31 +402,57 @@ static INLINE uint32_t r300_translate_gb_pipes(int pipe_count) return 0; } -static INLINE uint32_t translate_vertex_data_type(int type) { - switch (type) { - case EMIT_1F: - case EMIT_1F_PSIZE: +/* Translate pipe_formats into PSC vertex types. */ +static INLINE uint16_t +r300_translate_vertex_data_type(enum pipe_format format) { + switch (format) { + case PIPE_FORMAT_R32_FLOAT: return R300_DATA_TYPE_FLOAT_1; break; - case EMIT_2F: + case PIPE_FORMAT_R32G32_FLOAT: return R300_DATA_TYPE_FLOAT_2; break; - case EMIT_3F: + case PIPE_FORMAT_R32G32B32_FLOAT: return R300_DATA_TYPE_FLOAT_3; break; - case EMIT_4F: + case PIPE_FORMAT_R32G32B32A32_FLOAT: return R300_DATA_TYPE_FLOAT_4; break; - case EMIT_4UB: - return R300_DATA_TYPE_BYTE; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return R300_DATA_TYPE_BYTE | + R300_NORMALIZE; break; default: debug_printf("r300: Implementation error: " - "Bad vertex data type!\n"); + "Bad vertex data format %s!\n", pf_name(format)); assert(0); break; } + return 0; +} +static INLINE uint16_t +r300_translate_vertex_data_swizzle(enum pipe_format format) { + switch (format) { + case PIPE_FORMAT_R32_FLOAT: + return R300_VAP_SWIZZLE_X001; + break; + case PIPE_FORMAT_R32G32_FLOAT: + return R300_VAP_SWIZZLE_XY01; + break; + case PIPE_FORMAT_R32G32B32_FLOAT: + return R300_VAP_SWIZZLE_XYZ1; + break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R8G8B8A8_UNORM: + return R300_VAP_SWIZZLE_XYZW; + break; + default: + debug_printf("r300: Implementation error: " + "Bad vertex data format %s!\n", pf_name(format)); + assert(0); + break; + } return 0; } diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index 3865730d63..4865f16058 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -21,9 +21,12 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "r300_context.h" +#include "r300_cs.h" +#include "r300_reg.h" +#include "r300_screen.h" #include "r300_state_invariant.h" - struct pipe_viewport_state r300_viewport_identity = { .scale = {1.0, 1.0, 1.0, 1.0}, .translate = {0.0, 0.0, 0.0, 0.0}, diff --git a/src/gallium/drivers/r300/r300_state_invariant.h b/src/gallium/drivers/r300/r300_state_invariant.h index 5bea6779fe..05cff0d6df 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.h +++ b/src/gallium/drivers/r300/r300_state_invariant.h @@ -23,11 +23,7 @@ #ifndef R300_STATE_INVARIANT_H #define R300_STATE_INVARIANT_H -#include "r300_chipset.h" -#include "r300_context.h" -#include "r300_cs.h" -#include "r300_reg.h" -#include "r300_state_inlines.h" +struct r300_context; void r300_emit_invariant_state(struct r300_context* r300); diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c deleted file mode 100644 index cc6288cb51..0000000000 --- a/src/gallium/drivers/r300/r300_surface.c +++ /dev/null @@ -1,372 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> - * Joakim Sindholt <opensource@zhasha.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "r300_surface.h" - -static void r300_surface_setup(struct r300_context* r300, - struct r300_texture* dest, - unsigned x, unsigned y, - unsigned w, unsigned h) -{ - struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; - unsigned pixpitch = r300_texture_get_stride(dest, 0) / dest->tex.block.size; - CS_LOCALS(r300); - - r300_emit_blend_state(r300, &blend_clear_state); - r300_emit_blend_color_state(r300, &blend_color_clear_state); - r300_emit_dsa_state(r300, &dsa_clear_state); - r300_emit_rs_state(r300, &rs_clear_state); - - BEGIN_CS(26); - - /* Viewport setup */ - OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); - OUT_CS_32F((float)w); - OUT_CS_32F((float)x); - OUT_CS_32F((float)h); - OUT_CS_32F((float)y); - OUT_CS_32F(1.0); - OUT_CS_32F(0.0); - - OUT_CS_REG(R300_VAP_VTE_CNTL, R300_VPORT_X_SCALE_ENA | - R300_VPORT_X_OFFSET_ENA | - R300_VPORT_Y_SCALE_ENA | - R300_VPORT_Y_OFFSET_ENA | - R300_VTX_XY_FMT | R300_VTX_Z_FMT); - - /* Pixel scissors. */ - OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); - if (caps->is_r500) { - OUT_CS((x << R300_SCISSORS_X_SHIFT) | (y << R300_SCISSORS_Y_SHIFT)); - OUT_CS(((w - 1) << R300_SCISSORS_X_SHIFT) | ((h - 1) << R300_SCISSORS_Y_SHIFT)); - } else { - /* Non-R500 chipsets have an offset of 1440 in their scissors. */ - OUT_CS(((x + 1440) << R300_SCISSORS_X_SHIFT) | - ((y + 1440) << R300_SCISSORS_Y_SHIFT)); - OUT_CS((((w - 1) + 1440) << R300_SCISSORS_X_SHIFT) | - (((h - 1) + 1440) << R300_SCISSORS_Y_SHIFT)); - } - - /* Flush colorbuffer and blend caches. */ - OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, - R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D | - R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL); - OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, - R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | - R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); - - /* Setup colorbuffer. */ - OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1); - OUT_CS_RELOC(dest->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); - OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0, 1); - OUT_CS_RELOC(dest->buffer, pixpitch | - r300_translate_colorformat(dest->tex.format), 0, - RADEON_GEM_DOMAIN_VRAM, 0); - OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0xf); - - END_CS; -} - -/* Provides pipe_context's "surface_fill". Commonly used for clearing - * buffers. */ -static void r300_surface_fill(struct pipe_context* pipe, - struct pipe_surface* dest, - unsigned x, unsigned y, - unsigned w, unsigned h, - unsigned color) -{ - int i; - float r, g, b, a, depth; - struct r300_context* r300 = r300_context(pipe); - struct r300_capabilities* caps = r300_screen(pipe->screen)->caps; - struct r300_texture* tex = (struct r300_texture*)dest->texture; - unsigned pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size; - boolean invalid = FALSE; - CS_LOCALS(r300); - - a = (float)((color >> 24) & 0xff) / 255.0f; - r = (float)((color >> 16) & 0xff) / 255.0f; - g = (float)((color >> 8) & 0xff) / 255.0f; - b = (float)((color >> 0) & 0xff) / 255.0f; - debug_printf("r300: Filling surface %p at (%d,%d)," - " dimensions %dx%d (pixel pitch %d), color 0x%x\n", - dest, x, y, w, h, pixpitch, color); - - /* Fallback? */ - if (FALSE) { -fallback: - debug_printf("r300: Falling back on surface clear..."); - util_surface_fill(pipe, dest, x, y, w, h, color); - return; - } - - /* Make sure our target BO is okay. */ -validate: - if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, - 0, RADEON_GEM_DOMAIN_VRAM)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } - if (!r300->winsys->validate(r300->winsys)) { - r300->context.flush(&r300->context, 0, NULL); - if (invalid) { - debug_printf("r300: Stuck in validation loop, gonna fallback."); - goto fallback; - } - invalid = TRUE; - goto validate; - } - - r300_surface_setup(r300, tex, x, y, w, h); - - /* Vertex shader setup */ - if (caps->has_tcl) { - r300_emit_vertex_program_code(r300, &r300_passthrough_vertex_shader, 0); - } else { - BEGIN_CS(4); - OUT_CS_REG(R300_VAP_CNTL_STATUS, -#ifdef PIPE_ARCH_BIG_ENDIAN - R300_VC_32BIT_SWAP | -#endif - R300_VAP_TCL_BYPASS); - OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(5) | - R300_PVS_NUM_CNTLRS(5) | - R300_PVS_NUM_FPUS(caps->num_vert_fpus) | - R300_PVS_VF_MAX_VTX_NUM(12)); - END_CS; - } - - /* Fragment shader setup */ - if (caps->is_r500) { - r500_emit_fragment_program_code(r300, &r5xx_passthrough_fragment_shader, 0); - r300_emit_rs_block_state(r300, &r5xx_rs_block_clear_state); - } else { - r300_emit_fragment_program_code(r300, &r3xx_passthrough_fragment_shader, 0); - r300_emit_rs_block_state(r300, &r3xx_rs_block_clear_state); - } - - BEGIN_CS(26); - - /* VAP stream control, mapping from input memory to PVS/RS memory */ - if (caps->has_tcl) { - OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0, - (R300_DATA_TYPE_FLOAT_4 << R300_DATA_TYPE_0_SHIFT) | - ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | - R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)); - } else { - OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0, - (R300_DATA_TYPE_FLOAT_4 << R300_DATA_TYPE_0_SHIFT) | - ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | - R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)); - } - OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0, - (R300_VAP_SWIZZLE_XYZW << R300_SWIZZLE0_SHIFT) | - (R300_VAP_SWIZZLE_XYZW << R300_SWIZZLE1_SHIFT)); - - /* VAP format controls */ - OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_0, - R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT | - R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT); - OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_1, 0x0); - - /* Disable textures */ - OUT_CS_REG(R300_TX_ENABLE, 0x0); - - /* The size of the point we're about to draw, in sixths of pixels */ - OUT_CS_REG(R300_GA_POINT_SIZE, - ((h * 6) & R300_POINTSIZE_Y_MASK) | - ((w * 6) << R300_POINTSIZE_X_SHIFT)); - - /* Vertex size. */ - OUT_CS_REG(R300_VAP_VTX_SIZE, 0x8); - - /* Packet3 with our point vertex */ - OUT_CS_PKT3(R200_3D_DRAW_IMMD_2, 8); - OUT_CS(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING | - (1 << R300_PRIM_NUM_VERTICES_SHIFT)); - /* Position */ - OUT_CS_32F(0.5); - OUT_CS_32F(0.5); - OUT_CS_32F(1.0); - OUT_CS_32F(1.0); - /* Color */ - OUT_CS_32F(r); - OUT_CS_32F(g); - OUT_CS_32F(b); - OUT_CS_32F(a); - - OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA); - - END_CS; - - r300->dirty_hw++; -} - -static void r300_surface_copy(struct pipe_context* pipe, - struct pipe_surface* dest, - unsigned destx, unsigned desty, - struct pipe_surface* src, - unsigned srcx, unsigned srcy, - unsigned w, unsigned h) -{ - struct r300_context* r300 = r300_context(pipe); - struct r300_capabilities* caps = r300_screen(pipe->screen)->caps; - struct r300_texture* srctex = (struct r300_texture*)src->texture; - struct r300_texture* desttex = (struct r300_texture*)dest->texture; - unsigned pixpitch = r300_texture_get_stride(srctex, 0) / srctex->tex.block.size; - boolean invalid = FALSE; - float fsrcx = srcx, fsrcy = srcy, fdestx = destx, fdesty = desty; - CS_LOCALS(r300); - - debug_printf("r300: Copying surface %p at (%d,%d) to %p at (%d, %d)," - " dimensions %dx%d (pixel pitch %d)\n", - src, srcx, srcy, dest, destx, desty, w, h, pixpitch); - - if ((srctex->buffer == desttex->buffer) && - ((destx < srcx + w) || (srcx < destx + w)) && - ((desty < srcy + h) || (srcy < desty + h))) { -fallback: - debug_printf("r300: Falling back on surface_copy\n"); - util_surface_copy(pipe, FALSE, dest, destx, desty, src, - srcx, srcy, w, h); - } - - /* Add our target BOs to the list. */ -validate: - if (!r300->winsys->add_buffer(r300->winsys, srctex->buffer, - RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } - if (!r300->winsys->add_buffer(r300->winsys, desttex->buffer, - 0, RADEON_GEM_DOMAIN_VRAM)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } - if (!r300->winsys->validate(r300->winsys)) { - r300->context.flush(&r300->context, 0, NULL); - if (invalid) { - debug_printf("r300: Stuck in validation loop, gonna fallback."); - goto fallback; - } - invalid = TRUE; - goto validate; - } - - r300_surface_setup(r300, desttex, destx, desty, w, h); - - /* Setup the texture. */ - r300_emit_texture(r300, &r300_sampler_copy_state, srctex, 0); - - /* Flush and enable. */ - r300_flush_textures(r300); - - /* Vertex shader setup */ - if (caps->has_tcl) { - r300_emit_vertex_program_code(r300, &r300_passthrough_vertex_shader, 0); - } else { - BEGIN_CS(4); - OUT_CS_REG(R300_VAP_CNTL_STATUS, -#ifdef PIPE_ARCH_BIG_ENDIAN - R300_VC_32BIT_SWAP | -#endif - R300_VAP_TCL_BYPASS); - OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(5) | - R300_PVS_NUM_CNTLRS(5) | - R300_PVS_NUM_FPUS(caps->num_vert_fpus) | - R300_PVS_VF_MAX_VTX_NUM(12)); - END_CS; - } - - /* Fragment shader setup */ - if (caps->is_r500) { - r500_emit_fragment_program_code(r300, &r5xx_texture_fragment_shader, 0); - r300_emit_rs_block_state(r300, &r5xx_rs_block_copy_state); - } else { - r300_emit_fragment_program_code(r300, &r3xx_texture_fragment_shader, 0); - r300_emit_rs_block_state(r300, &r3xx_rs_block_copy_state); - } - - BEGIN_CS(30); - /* VAP stream control, mapping from input memory to PVS/RS memory */ - if (caps->has_tcl) { - OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0, - (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | - ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | - R300_DATA_TYPE_FLOAT_2) << R300_DATA_TYPE_1_SHIFT)); - } else { - OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0, - (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | - ((R300_LAST_VEC | (6 << R300_DST_VEC_LOC_SHIFT) | - R300_DATA_TYPE_FLOAT_2) << R300_DATA_TYPE_1_SHIFT)); - } - OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0, - (R300_VAP_SWIZZLE_XYZW << R300_SWIZZLE0_SHIFT) | - (R300_VAP_SWIZZLE_XYZW << R300_SWIZZLE1_SHIFT)); - - /* VAP format controls */ - OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_0, - R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT); - /* Two components of texture 0 */ - OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_1, 0x2); - - /* Vertex size. */ - OUT_CS_REG(R300_VAP_VTX_SIZE, 0x4); - - /* Packet3 with our texcoords */ - OUT_CS_PKT3(R200_3D_DRAW_IMMD_2, 16); - OUT_CS(R300_PRIM_TYPE_QUADS | R300_PRIM_WALK_RING | - (4 << R300_PRIM_NUM_VERTICES_SHIFT)); - /* (x , y ) */ - OUT_CS_32F(fdestx / dest->width); - OUT_CS_32F(fdesty / dest->height); - OUT_CS_32F(fsrcx / src->width); - OUT_CS_32F(fsrcy / src->height); - /* (x , y + h) */ - OUT_CS_32F(fdestx / dest->width); - OUT_CS_32F((fdesty + h) / dest->height); - OUT_CS_32F(fsrcx / src->width); - OUT_CS_32F((fsrcy + h) / src->height); - /* (x + w, y + h) */ - OUT_CS_32F((fdestx + w) / dest->width); - OUT_CS_32F((fdesty + h) / dest->height); - OUT_CS_32F((fsrcx + w) / src->width); - OUT_CS_32F((fsrcy + h) / src->height); - /* (x + w, y ) */ - OUT_CS_32F((fdestx + w) / dest->width); - OUT_CS_32F(fdesty / dest->height); - OUT_CS_32F((fsrcx + w) / src->width); - OUT_CS_32F(fsrcy / src->height); - - OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA); - - END_CS; - - r300->dirty_hw++; -} - -void r300_init_surface_functions(struct r300_context* r300) -{ - r300->context.surface_fill = r300_surface_fill; - r300->context.surface_copy = r300_surface_copy; -} diff --git a/src/gallium/drivers/r300/r300_surface.h b/src/gallium/drivers/r300/r300_surface.h deleted file mode 100644 index f9e98b2ec9..0000000000 --- a/src/gallium/drivers/r300/r300_surface.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R300_SURFACE_H -#define R300_SURFACE_H - -#include "pipe/p_context.h" -#include "pipe/p_screen.h" - -#include "util/u_rect.h" - -#include "r300_context.h" -#include "r300_cs.h" -#include "r300_emit.h" -#include "r300_fs.h" -#include "r300_vs.h" -#include "r300_state_inlines.h" - -static struct r300_blend_state blend_clear_state = { - .blend_control = 0x0, - .alpha_blend_control = 0x0, - .rop = 0x0, - .dither = 0x0, -}; - -static struct r300_blend_color_state blend_color_clear_state = { - .blend_color = 0x0, - .blend_color_red_alpha = 0x0, - .blend_color_green_blue = 0x0, -}; - -static struct r300_dsa_state dsa_clear_state = { - .alpha_function = 0x0, - .alpha_reference = 0x0, - .z_buffer_control = 0x0, - .z_stencil_control = 0x0, - .stencil_ref_mask = R300_STENCILWRITEMASK_MASK, - .z_buffer_top = R300_ZTOP_ENABLE, - .stencil_ref_bf = 0x0, -}; - -static struct r300_rs_state rs_clear_state = { - .point_minmax = 0x36000006, - .line_control = 0x00030006, - .depth_scale_front = 0x0, - .depth_offset_front = 0x0, - .depth_scale_back = 0x0, - .depth_offset_back = 0x0, - .polygon_offset_enable = 0x0, - .cull_mode = 0x0, - .line_stipple_config = 0x3BAAAAAB, - .line_stipple_value = 0x0, - .color_control = R300_SHADE_MODEL_FLAT, -}; - -static struct r300_rs_block r3xx_rs_block_clear_state = { - .ip[0] = R500_RS_SEL_S(R300_RS_SEL_C0) | - R500_RS_SEL_T(R300_RS_SEL_C0) | - R500_RS_SEL_R(R300_RS_SEL_C0) | - R500_RS_SEL_Q(R300_RS_SEL_K1), - .inst[0] = R300_RS_INST_COL_CN_WRITE, - .count = R300_IT_COUNT(0) | R300_IC_COUNT(1) | R300_HIRES_EN, - .inst_count = 0, -}; - -static struct r300_rs_block r5xx_rs_block_clear_state = { - .ip[0] = R500_RS_SEL_S(R500_RS_IP_PTR_K0) | - R500_RS_SEL_T(R500_RS_IP_PTR_K0) | - R500_RS_SEL_R(R500_RS_IP_PTR_K0) | - R500_RS_SEL_Q(R500_RS_IP_PTR_K1), - .inst[0] = R500_RS_INST_COL_CN_WRITE, - .count = R300_IT_COUNT(0) | R300_IC_COUNT(1) | R300_HIRES_EN, - .inst_count = 0, -}; - -/* The following state is used for surface_copy only. */ - -static struct r300_rs_block r3xx_rs_block_copy_state = { - .ip[0] = R500_RS_SEL_S(R300_RS_SEL_K0) | - R500_RS_SEL_T(R300_RS_SEL_K0) | - R500_RS_SEL_R(R300_RS_SEL_K0) | - R500_RS_SEL_Q(R300_RS_SEL_K1), - .inst[0] = R300_RS_INST_COL_CN_WRITE, - .count = R300_IT_COUNT(2) | R300_IC_COUNT(0) | R300_HIRES_EN, - .inst_count = R300_RS_TX_OFFSET(0), -}; - -static struct r300_rs_block r5xx_rs_block_copy_state = { - .ip[0] = R500_RS_SEL_S(0) | - R500_RS_SEL_T(1) | - R500_RS_SEL_R(R500_RS_IP_PTR_K0) | - R500_RS_SEL_Q(R500_RS_IP_PTR_K1), - .inst[0] = R500_RS_INST_TEX_CN_WRITE, - .count = R300_IT_COUNT(2) | R300_IC_COUNT(0) | R300_HIRES_EN, - .inst_count = R300_RS_TX_OFFSET(0), -}; - -static struct r300_sampler_state r300_sampler_copy_state = { - .filter0 = R300_TX_WRAP_S(R300_TX_CLAMP) | - R300_TX_WRAP_T(R300_TX_CLAMP) | - R300_TX_MAG_FILTER_NEAREST | - R300_TX_MIN_FILTER_NEAREST, -}; - -#endif /* R300_SURFACE_H */ diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index ce60ded7ca..3e90fea6c8 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -20,43 +20,83 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "pipe/p_screen.h" + +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "r300_context.h" #include "r300_texture.h" +#include "r300_screen.h" -static void r300_setup_texture_state(struct r300_texture* tex) +static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) { struct r300_texture_state* state = &tex->state; struct pipe_texture *pt = &tex->tex; + unsigned stride; state->format0 = R300_TX_WIDTH((pt->width[0] - 1) & 0x7ff) | - R300_TX_HEIGHT((pt->height[0] - 1) & 0x7ff) | - R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf) | - R300_TX_NUM_LEVELS(pt->last_level) | - R300_TX_PITCH_EN; + R300_TX_HEIGHT((pt->height[0] - 1) & 0x7ff); + + if (!util_is_power_of_two(pt->width[0]) || + !util_is_power_of_two(pt->height[0])) { + + /* rectangles love this */ + state->format0 |= R300_TX_PITCH_EN; + + stride = r300_texture_get_stride(tex, 0) / pt->block.size; + state->format2 = (stride - 1) & 0x1fff; + } + else { + /* power of two textures (3D, mipmaps, and no pitch) */ + state->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf) | + R300_TX_NUM_LEVELS(pt->last_level & 0xf); + } - /* XXX */ state->format1 = r300_translate_texformat(pt->format); if (pt->target == PIPE_TEXTURE_CUBE) { - state->format1 |= R300_TX_FORMAT_CUBIC_MAP; + state->format1 |= R300_TX_FORMAT_CUBIC_MAP; } if (pt->target == PIPE_TEXTURE_3D) { - state->format1 |= R300_TX_FORMAT_3D; + state->format1 |= R300_TX_FORMAT_3D; } - state->format2 = (r300_texture_get_stride(tex, 0) / pt->block.size) - 1; - - /* Assume (somewhat foolishly) that oversized textures will - * not be permitted by the state tracker. */ - if (pt->width[0] > 2048) { - state->format2 |= R500_TXWIDTH_BIT11; - } - if (pt->height[0] > 2048) { - state->format2 |= R500_TXHEIGHT_BIT11; + /* large textures on r500 */ + if (is_r500) + { + if (pt->width[0] > 2048) { + state->format2 |= R500_TXWIDTH_BIT11; + } + if (pt->height[0] > 2048) { + state->format2 |= R500_TXHEIGHT_BIT11; + } } + assert(is_r500 || (pt->width[0] <= 2048 && pt->height[0] <= 2048)); debug_printf("r300: Set texture state (%dx%d, %d levels)\n", pt->width[0], pt->height[0], pt->last_level); } +unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, + unsigned zslice, unsigned face) +{ + unsigned offset = tex->offset[level]; + + switch (tex->tex.target) { + case PIPE_TEXTURE_3D: + assert(face == 0); + return offset + zslice * tex->layer_size[level]; + + case PIPE_TEXTURE_CUBE: + assert(zslice == 0); + return offset + face * tex->layer_size[level]; + + default: + assert(zslice == 0 && face == 0); + return offset; + } +} + /** * Return the stride, in bytes, of the texture images of the given texture * at the given level. @@ -67,7 +107,8 @@ unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level) return tex->stride_override; if (level > tex->tex.last_level) { - debug_printf("%s: level (%u) > last_level (%u)\n", __FUNCTION__, level, tex->tex.last_level); + debug_printf("%s: level (%u) > last_level (%u)\n", __FUNCTION__, + level, tex->tex.last_level); return 0; } @@ -77,7 +118,7 @@ unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level) static void r300_setup_miptree(struct r300_texture* tex) { struct pipe_texture* base = &tex->tex; - int stride, size; + int stride, size, layer_size; int i; for (i = 0; i <= base->last_level; i++) { @@ -90,16 +131,17 @@ static void r300_setup_miptree(struct r300_texture* tex) base->nblocksx[i] = pf_get_nblocksx(&base->block, base->width[i]); base->nblocksy[i] = pf_get_nblocksy(&base->block, base->height[i]); - /* Radeons enjoy things in multiples of 64. - * - * XXX - * POT, uncompressed, unmippmapped textures can be aligned to 32, - * instead of 64. */ stride = r300_texture_get_stride(tex, i); - size = stride * base->nblocksy[i] * base->depth[i]; + layer_size = stride * base->nblocksy[i]; + + if (base->target == PIPE_TEXTURE_CUBE) + size = layer_size * 6; + else + size = layer_size * base->depth[i]; tex->offset[i] = align(tex->size, 32); tex->size = tex->offset[i] + size; + tex->layer_size[i] = layer_size; debug_printf("r300: Texture miptree: Level %d " "(%dx%dx%d px, pitch %d bytes)\n", @@ -125,7 +167,7 @@ static struct pipe_texture* r300_setup_miptree(tex); - r300_setup_texture_state(tex); + r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500); tex->buffer = screen->buffer_create(screen, 1024, PIPE_BUFFER_USAGE_PIXEL, @@ -159,8 +201,7 @@ static struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, struct pipe_surface* surface = CALLOC_STRUCT(pipe_surface); unsigned offset; - /* XXX this is certainly dependent on tex target */ - offset = tex->offset[level]; + offset = r300_texture_get_offset(tex, level, zslice, face); if (surface) { pipe_reference_init(&surface->reference, 1); @@ -170,6 +211,10 @@ static struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, surface->height = texture->height[level]; surface->offset = offset; surface->usage = flags; + surface->zslice = zslice; + surface->texture = texture; + surface->face = face; + surface->level = level; } return surface; @@ -189,13 +234,6 @@ static struct pipe_texture* { struct r300_texture* tex; - /* XXX we should start doing mips now... */ - if (base->target != PIPE_TEXTURE_2D || - base->last_level != 0 || - base->depth[0] != 1) { - return NULL; - } - tex = CALLOC_STRUCT(r300_texture); if (!tex) { return NULL; @@ -207,14 +245,62 @@ static struct pipe_texture* tex->stride_override = *stride; - /* XXX */ - r300_setup_texture_state(tex); + r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500); pipe_buffer_reference(&tex->buffer, buffer); return (struct pipe_texture*)tex; } +static struct pipe_video_surface * +r300_video_surface_create(struct pipe_screen *screen, + enum pipe_video_chroma_format chroma_format, + unsigned width, unsigned height) +{ + struct r300_video_surface *r300_vsfc; + struct pipe_texture template; + + assert(screen); + assert(width && height); + + r300_vsfc = CALLOC_STRUCT(r300_video_surface); + if (!r300_vsfc) + return NULL; + + pipe_reference_init(&r300_vsfc->base.reference, 1); + r300_vsfc->base.screen = screen; + r300_vsfc->base.chroma_format = chroma_format; + r300_vsfc->base.width = width; + r300_vsfc->base.height = height; + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + template.format = PIPE_FORMAT_X8R8G8B8_UNORM; + template.last_level = 0; + template.width[0] = util_next_power_of_two(width); + template.height[0] = util_next_power_of_two(height); + template.depth[0] = 1; + pf_get_block(template.format, &template.block); + template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | + PIPE_TEXTURE_USAGE_RENDER_TARGET; + + r300_vsfc->tex = screen->texture_create(screen, &template); + if (!r300_vsfc->tex) + { + FREE(r300_vsfc); + return NULL; + } + + return &r300_vsfc->base; +} + +static void r300_video_surface_destroy(struct pipe_video_surface *vsfc) +{ + struct r300_video_surface *r300_vsfc = r300_video_surface(vsfc); + pipe_texture_reference(&r300_vsfc->tex, NULL); + FREE(r300_vsfc); +} + void r300_init_screen_texture_functions(struct pipe_screen* screen) { screen->texture_create = r300_texture_create; @@ -222,6 +308,9 @@ void r300_init_screen_texture_functions(struct pipe_screen* screen) screen->get_tex_surface = r300_get_tex_surface; screen->tex_surface_destroy = r300_tex_surface_destroy; screen->texture_blanket = r300_texture_blanket; + + screen->video_surface_create = r300_video_surface_create; + screen->video_surface_destroy= r300_video_surface_destroy; } boolean r300_get_texture_buffer(struct pipe_texture* texture, diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 78ee0f1611..55ceb1a513 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -23,11 +23,8 @@ #ifndef R300_TEXTURE_H #define R300_TEXTURE_H -#include "pipe/p_screen.h" +#include "pipe/p_video_state.h" -#include "util/u_math.h" - -#include "r300_context.h" #include "r300_reg.h" struct r300_texture; @@ -36,6 +33,9 @@ void r300_init_screen_texture_functions(struct pipe_screen* screen); unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level); +unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, + unsigned zslice, unsigned face); + /* Note the signature of R300_EASY_TX_FORMAT(A, R, G, B, FORMAT)... */ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) { @@ -43,11 +43,28 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) /* X8 */ case PIPE_FORMAT_I8_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, X8); + case PIPE_FORMAT_L8_UNORM: + return R300_EASY_TX_FORMAT(X, X, X, ONE, X8); + /* X16 */ + case PIPE_FORMAT_R16_UNORM: + return R300_EASY_TX_FORMAT(X, X, X, X, X16); + case PIPE_FORMAT_R16_SNORM: + return R300_EASY_TX_FORMAT(X, X, X, X, X16) | + R300_TX_FORMAT_SIGNED; + case PIPE_FORMAT_Z16_UNORM: + return R300_EASY_TX_FORMAT(X, X, X, X, X16); + /* Y8X8 */ + case PIPE_FORMAT_A8L8_UNORM: + return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8); /* W8Z8Y8X8 */ case PIPE_FORMAT_A8R8G8B8_UNORM: return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); case PIPE_FORMAT_R8G8B8A8_UNORM: return R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8); + case PIPE_FORMAT_X8R8G8B8_UNORM: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); + case PIPE_FORMAT_R8G8B8X8_UNORM: + return R300_EASY_TX_FORMAT(Y, Z, ONE, X, W8Z8Y8X8); case PIPE_FORMAT_A8R8G8B8_SRGB: return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8) | R300_TX_FORMAT_GAMMA; @@ -71,12 +88,9 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) R300_TX_FORMAT_YUV_TO_RGB; /* W24_FP */ case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, W24_FP); - /* Z5_Y6_X5 */ - case PIPE_FORMAT_R16_SNORM: - return R300_EASY_TX_FORMAT(X, X, X, X, Z5Y6X5); - case PIPE_FORMAT_Z16_UNORM: - return R300_EASY_TX_FORMAT(X, X, X, X, X16); + default: debug_printf("r300: Implementation error: " "Got unsupported texture format %s in %s\n", @@ -87,6 +101,18 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format) return 0; } +struct r300_video_surface +{ + struct pipe_video_surface base; + struct pipe_texture *tex; +}; + +static INLINE struct r300_video_surface * +r300_video_surface(struct pipe_video_surface *pvs) +{ + return (struct r300_video_surface *)pvs; +} + #ifndef R300_WINSYS_H boolean r300_get_texture_buffer(struct pipe_texture* texture, diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 0913ca1bd5..589f1984ee 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -33,150 +33,145 @@ static unsigned translate_opcode(unsigned opcode) { switch(opcode) { - case TGSI_OPCODE_ARL: return OPCODE_ARL; - case TGSI_OPCODE_MOV: return OPCODE_MOV; - case TGSI_OPCODE_LIT: return OPCODE_LIT; - case TGSI_OPCODE_RCP: return OPCODE_RCP; - case TGSI_OPCODE_RSQ: return OPCODE_RSQ; - case TGSI_OPCODE_EXP: return OPCODE_EXP; - case TGSI_OPCODE_LOG: return OPCODE_LOG; - case TGSI_OPCODE_MUL: return OPCODE_MUL; - case TGSI_OPCODE_ADD: return OPCODE_ADD; - case TGSI_OPCODE_DP3: return OPCODE_DP3; - case TGSI_OPCODE_DP4: return OPCODE_DP4; - case TGSI_OPCODE_DST: return OPCODE_DST; - case TGSI_OPCODE_MIN: return OPCODE_MIN; - case TGSI_OPCODE_MAX: return OPCODE_MAX; - case TGSI_OPCODE_SLT: return OPCODE_SLT; - case TGSI_OPCODE_SGE: return OPCODE_SGE; - case TGSI_OPCODE_MAD: return OPCODE_MAD; - case TGSI_OPCODE_SUB: return OPCODE_SUB; - case TGSI_OPCODE_LRP: return OPCODE_LRP; - /* case TGSI_OPCODE_CND: return OPCODE_CND; */ - case TGSI_OPCODE_DP2A: return OPCODE_DP2A; + case TGSI_OPCODE_ARL: return RC_OPCODE_ARL; + case TGSI_OPCODE_MOV: return RC_OPCODE_MOV; + case TGSI_OPCODE_LIT: return RC_OPCODE_LIT; + case TGSI_OPCODE_RCP: return RC_OPCODE_RCP; + case TGSI_OPCODE_RSQ: return RC_OPCODE_RSQ; + case TGSI_OPCODE_EXP: return RC_OPCODE_EXP; + case TGSI_OPCODE_LOG: return RC_OPCODE_LOG; + case TGSI_OPCODE_MUL: return RC_OPCODE_MUL; + case TGSI_OPCODE_ADD: return RC_OPCODE_ADD; + case TGSI_OPCODE_DP3: return RC_OPCODE_DP3; + case TGSI_OPCODE_DP4: return RC_OPCODE_DP4; + case TGSI_OPCODE_DST: return RC_OPCODE_DST; + case TGSI_OPCODE_MIN: return RC_OPCODE_MIN; + case TGSI_OPCODE_MAX: return RC_OPCODE_MAX; + case TGSI_OPCODE_SLT: return RC_OPCODE_SLT; + case TGSI_OPCODE_SGE: return RC_OPCODE_SGE; + case TGSI_OPCODE_MAD: return RC_OPCODE_MAD; + case TGSI_OPCODE_SUB: return RC_OPCODE_SUB; + case TGSI_OPCODE_LRP: return RC_OPCODE_LRP; + /* case TGSI_OPCODE_CND: return RC_OPCODE_CND; */ + /* case TGSI_OPCODE_CND0: return RC_OPCODE_CND0; */ + /* case TGSI_OPCODE_DP2A: return RC_OPCODE_DP2A; */ /* gap */ - case TGSI_OPCODE_FRC: return OPCODE_FRC; - /* case TGSI_OPCODE_CLAMP: return OPCODE_CLAMP; */ - case TGSI_OPCODE_FLR: return OPCODE_FLR; - /* case TGSI_OPCODE_ROUND: return OPCODE_ROUND; */ - case TGSI_OPCODE_EX2: return OPCODE_EX2; - case TGSI_OPCODE_LG2: return OPCODE_LG2; - case TGSI_OPCODE_POW: return OPCODE_POW; - case TGSI_OPCODE_XPD: return OPCODE_XPD; + case TGSI_OPCODE_FRC: return RC_OPCODE_FRC; + /* case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP; */ + case TGSI_OPCODE_FLR: return RC_OPCODE_FLR; + /* case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND; */ + case TGSI_OPCODE_EX2: return RC_OPCODE_EX2; + case TGSI_OPCODE_LG2: return RC_OPCODE_LG2; + case TGSI_OPCODE_POW: return RC_OPCODE_POW; + case TGSI_OPCODE_XPD: return RC_OPCODE_XPD; /* gap */ - case TGSI_OPCODE_ABS: return OPCODE_ABS; - case TGSI_OPCODE_RCC: return OPCODE_RCC; - case TGSI_OPCODE_DPH: return OPCODE_DPH; - case TGSI_OPCODE_COS: return OPCODE_COS; - case TGSI_OPCODE_DDX: return OPCODE_DDX; - case TGSI_OPCODE_DDY: return OPCODE_DDY; - /* case TGSI_OPCODE_KILP: return OPCODE_KILP; */ - case TGSI_OPCODE_PK2H: return OPCODE_PK2H; - case TGSI_OPCODE_PK2US: return OPCODE_PK2US; - case TGSI_OPCODE_PK4B: return OPCODE_PK4B; - case TGSI_OPCODE_PK4UB: return OPCODE_PK4UB; - case TGSI_OPCODE_RFL: return OPCODE_RFL; - case TGSI_OPCODE_SEQ: return OPCODE_SEQ; - case TGSI_OPCODE_SFL: return OPCODE_SFL; - case TGSI_OPCODE_SGT: return OPCODE_SGT; - case TGSI_OPCODE_SIN: return OPCODE_SIN; - case TGSI_OPCODE_SLE: return OPCODE_SLE; - case TGSI_OPCODE_SNE: return OPCODE_SNE; - case TGSI_OPCODE_STR: return OPCODE_STR; - case TGSI_OPCODE_TEX: return OPCODE_TEX; - case TGSI_OPCODE_TXD: return OPCODE_TXD; - case TGSI_OPCODE_TXP: return OPCODE_TXP; - case TGSI_OPCODE_UP2H: return OPCODE_UP2H; - case TGSI_OPCODE_UP2US: return OPCODE_UP2US; - case TGSI_OPCODE_UP4B: return OPCODE_UP4B; - case TGSI_OPCODE_UP4UB: return OPCODE_UP4UB; - case TGSI_OPCODE_X2D: return OPCODE_X2D; - case TGSI_OPCODE_ARA: return OPCODE_ARA; - case TGSI_OPCODE_ARR: return OPCODE_ARR; - case TGSI_OPCODE_BRA: return OPCODE_BRA; - case TGSI_OPCODE_CAL: return OPCODE_CAL; - case TGSI_OPCODE_RET: return OPCODE_RET; - case TGSI_OPCODE_SSG: return OPCODE_SSG; - case TGSI_OPCODE_CMP: return OPCODE_CMP; - case TGSI_OPCODE_SCS: return OPCODE_SCS; - case TGSI_OPCODE_TXB: return OPCODE_TXB; - /* case TGSI_OPCODE_NRM: return OPCODE_NRM; */ - /* case TGSI_OPCODE_DIV: return OPCODE_DIV; */ - case TGSI_OPCODE_DP2: return OPCODE_DP2; - case TGSI_OPCODE_TXL: return OPCODE_TXL; - case TGSI_OPCODE_BRK: return OPCODE_BRK; - case TGSI_OPCODE_IF: return OPCODE_IF; - /* case TGSI_OPCODE_LOOP: return OPCODE_LOOP; */ - /* case TGSI_OPCODE_REP: return OPCODE_REP; */ - case TGSI_OPCODE_ELSE: return OPCODE_ELSE; - case TGSI_OPCODE_ENDIF: return OPCODE_ENDIF; - case TGSI_OPCODE_ENDLOOP: return OPCODE_ENDLOOP; - /* case TGSI_OPCODE_ENDREP: return OPCODE_ENDREP; */ - case TGSI_OPCODE_PUSHA: return OPCODE_PUSHA; - case TGSI_OPCODE_POPA: return OPCODE_POPA; - /* case TGSI_OPCODE_CEIL: return OPCODE_CEIL; */ - /* case TGSI_OPCODE_I2F: return OPCODE_I2F; */ - case TGSI_OPCODE_NOT: return OPCODE_NOT; - case TGSI_OPCODE_TRUNC: return OPCODE_TRUNC; - /* case TGSI_OPCODE_SHL: return OPCODE_SHL; */ - /* case TGSI_OPCODE_SHR: return OPCODE_SHR; */ - case TGSI_OPCODE_AND: return OPCODE_AND; - case TGSI_OPCODE_OR: return OPCODE_OR; - /* case TGSI_OPCODE_MOD: return OPCODE_MOD; */ - case TGSI_OPCODE_XOR: return OPCODE_XOR; - /* case TGSI_OPCODE_SAD: return OPCODE_SAD; */ - /* case TGSI_OPCODE_TXF: return OPCODE_TXF; */ - /* case TGSI_OPCODE_TXQ: return OPCODE_TXQ; */ - case TGSI_OPCODE_CONT: return OPCODE_CONT; - /* case TGSI_OPCODE_EMIT: return OPCODE_EMIT; */ - /* case TGSI_OPCODE_ENDPRIM: return OPCODE_ENDPRIM; */ - /* case TGSI_OPCODE_BGNLOOP2: return OPCODE_BGNLOOP2; */ - case TGSI_OPCODE_BGNSUB: return OPCODE_BGNSUB; - /* case TGSI_OPCODE_ENDLOOP2: return OPCODE_ENDLOOP2; */ - case TGSI_OPCODE_ENDSUB: return OPCODE_ENDSUB; - case TGSI_OPCODE_NOISE1: return OPCODE_NOISE1; - case TGSI_OPCODE_NOISE2: return OPCODE_NOISE2; - case TGSI_OPCODE_NOISE3: return OPCODE_NOISE3; - case TGSI_OPCODE_NOISE4: return OPCODE_NOISE4; - case TGSI_OPCODE_NOP: return OPCODE_NOP; + case TGSI_OPCODE_ABS: return RC_OPCODE_ABS; + /* case TGSI_OPCODE_RCC: return RC_OPCODE_RCC; */ + case TGSI_OPCODE_DPH: return RC_OPCODE_DPH; + case TGSI_OPCODE_COS: return RC_OPCODE_COS; + case TGSI_OPCODE_DDX: return RC_OPCODE_DDX; + case TGSI_OPCODE_DDY: return RC_OPCODE_DDY; + /* case TGSI_OPCODE_KILP: return RC_OPCODE_KILP; */ + /* case TGSI_OPCODE_PK2H: return RC_OPCODE_PK2H; */ + /* case TGSI_OPCODE_PK2US: return RC_OPCODE_PK2US; */ + /* case TGSI_OPCODE_PK4B: return RC_OPCODE_PK4B; */ + /* case TGSI_OPCODE_PK4UB: return RC_OPCODE_PK4UB; */ + /* case TGSI_OPCODE_RFL: return RC_OPCODE_RFL; */ + case TGSI_OPCODE_SEQ: return RC_OPCODE_SEQ; + case TGSI_OPCODE_SFL: return RC_OPCODE_SFL; + case TGSI_OPCODE_SGT: return RC_OPCODE_SGT; + case TGSI_OPCODE_SIN: return RC_OPCODE_SIN; + case TGSI_OPCODE_SLE: return RC_OPCODE_SLE; + case TGSI_OPCODE_SNE: return RC_OPCODE_SNE; + /* case TGSI_OPCODE_STR: return RC_OPCODE_STR; */ + case TGSI_OPCODE_TEX: return RC_OPCODE_TEX; + case TGSI_OPCODE_TXD: return RC_OPCODE_TXD; + case TGSI_OPCODE_TXP: return RC_OPCODE_TXP; + /* case TGSI_OPCODE_UP2H: return RC_OPCODE_UP2H; */ + /* case TGSI_OPCODE_UP2US: return RC_OPCODE_UP2US; */ + /* case TGSI_OPCODE_UP4B: return RC_OPCODE_UP4B; */ + /* case TGSI_OPCODE_UP4UB: return RC_OPCODE_UP4UB; */ + /* case TGSI_OPCODE_X2D: return RC_OPCODE_X2D; */ + /* case TGSI_OPCODE_ARA: return RC_OPCODE_ARA; */ + /* case TGSI_OPCODE_ARR: return RC_OPCODE_ARR; */ + /* case TGSI_OPCODE_BRA: return RC_OPCODE_BRA; */ + /* case TGSI_OPCODE_CAL: return RC_OPCODE_CAL; */ + /* case TGSI_OPCODE_RET: return RC_OPCODE_RET; */ + /* case TGSI_OPCODE_SSG: return RC_OPCODE_SSG; */ + case TGSI_OPCODE_CMP: return RC_OPCODE_CMP; + case TGSI_OPCODE_SCS: return RC_OPCODE_SCS; + case TGSI_OPCODE_TXB: return RC_OPCODE_TXB; + /* case TGSI_OPCODE_NRM: return RC_OPCODE_NRM; */ + /* case TGSI_OPCODE_DIV: return RC_OPCODE_DIV; */ + /* case TGSI_OPCODE_DP2: return RC_OPCODE_DP2; */ + case TGSI_OPCODE_TXL: return RC_OPCODE_TXL; + /* case TGSI_OPCODE_BRK: return RC_OPCODE_BRK; */ + case TGSI_OPCODE_IF: return RC_OPCODE_IF; + /* case TGSI_OPCODE_LOOP: return RC_OPCODE_LOOP; */ + /* case TGSI_OPCODE_REP: return RC_OPCODE_REP; */ + case TGSI_OPCODE_ELSE: return RC_OPCODE_ELSE; + case TGSI_OPCODE_ENDIF: return RC_OPCODE_ENDIF; + /* case TGSI_OPCODE_ENDLOOP: return RC_OPCODE_ENDLOOP; */ + /* case TGSI_OPCODE_ENDREP: return RC_OPCODE_ENDREP; */ + /* case TGSI_OPCODE_PUSHA: return RC_OPCODE_PUSHA; */ + /* case TGSI_OPCODE_POPA: return RC_OPCODE_POPA; */ + /* case TGSI_OPCODE_CEIL: return RC_OPCODE_CEIL; */ + /* case TGSI_OPCODE_I2F: return RC_OPCODE_I2F; */ + /* case TGSI_OPCODE_NOT: return RC_OPCODE_NOT; */ + /* case TGSI_OPCODE_TRUNC: return RC_OPCODE_TRUNC; */ + /* case TGSI_OPCODE_SHL: return RC_OPCODE_SHL; */ + /* case TGSI_OPCODE_SHR: return RC_OPCODE_SHR; */ + /* case TGSI_OPCODE_AND: return RC_OPCODE_AND; */ + /* case TGSI_OPCODE_OR: return RC_OPCODE_OR; */ + /* case TGSI_OPCODE_MOD: return RC_OPCODE_MOD; */ + /* case TGSI_OPCODE_XOR: return RC_OPCODE_XOR; */ + /* case TGSI_OPCODE_SAD: return RC_OPCODE_SAD; */ + /* case TGSI_OPCODE_TXF: return RC_OPCODE_TXF; */ + /* case TGSI_OPCODE_TXQ: return RC_OPCODE_TXQ; */ + /* case TGSI_OPCODE_CONT: return RC_OPCODE_CONT; */ + /* case TGSI_OPCODE_EMIT: return RC_OPCODE_EMIT; */ + /* case TGSI_OPCODE_ENDPRIM: return RC_OPCODE_ENDPRIM; */ + /* case TGSI_OPCODE_BGNLOOP2: return RC_OPCODE_BGNLOOP2; */ + /* case TGSI_OPCODE_BGNSUB: return RC_OPCODE_BGNSUB; */ + /* case TGSI_OPCODE_ENDLOOP2: return RC_OPCODE_ENDLOOP2; */ + /* case TGSI_OPCODE_ENDSUB: return RC_OPCODE_ENDSUB; */ + case TGSI_OPCODE_NOP: return RC_OPCODE_NOP; /* gap */ - case TGSI_OPCODE_NRM4: return OPCODE_NRM4; - /* case TGSI_OPCODE_CALLNZ: return OPCODE_CALLNZ; */ - /* case TGSI_OPCODE_IFC: return OPCODE_IFC; */ - /* case TGSI_OPCODE_BREAKC: return OPCODE_BREAKC; */ - case TGSI_OPCODE_KIL: return OPCODE_KIL; - case TGSI_OPCODE_END: return OPCODE_END; - case TGSI_OPCODE_SWZ: return OPCODE_SWZ; + /* case TGSI_OPCODE_NRM4: return RC_OPCODE_NRM4; */ + /* case TGSI_OPCODE_CALLNZ: return RC_OPCODE_CALLNZ; */ + /* case TGSI_OPCODE_IFC: return RC_OPCODE_IFC; */ + /* case TGSI_OPCODE_BREAKC: return RC_OPCODE_BREAKC; */ + case TGSI_OPCODE_KIL: return RC_OPCODE_KIL; } fprintf(stderr, "Unknown opcode: %i\n", opcode); - abort(); + return RC_OPCODE_ILLEGAL_OPCODE; } static unsigned translate_saturate(unsigned saturate) { switch(saturate) { - case TGSI_SAT_NONE: return SATURATE_OFF; - case TGSI_SAT_ZERO_ONE: return SATURATE_ZERO_ONE; + default: + fprintf(stderr, "Unknown saturate mode: %i\n", saturate); + /* fall-through */ + case TGSI_SAT_NONE: return RC_SATURATE_NONE; + case TGSI_SAT_ZERO_ONE: return RC_SATURATE_ZERO_ONE; } - - fprintf(stderr, "Unknown saturate mode: %i\n", saturate); - abort(); } static unsigned translate_register_file(unsigned file) { switch(file) { - case TGSI_FILE_CONSTANT: return PROGRAM_CONSTANT; - case TGSI_FILE_IMMEDIATE: return PROGRAM_CONSTANT; - case TGSI_FILE_INPUT: return PROGRAM_INPUT; - case TGSI_FILE_OUTPUT: return PROGRAM_OUTPUT; - case TGSI_FILE_TEMPORARY: return PROGRAM_TEMPORARY; - case TGSI_FILE_ADDRESS: return PROGRAM_ADDRESS; + case TGSI_FILE_CONSTANT: return RC_FILE_CONSTANT; + case TGSI_FILE_IMMEDIATE: return RC_FILE_CONSTANT; + case TGSI_FILE_INPUT: return RC_FILE_INPUT; + case TGSI_FILE_OUTPUT: return RC_FILE_OUTPUT; + default: + fprintf(stderr, "Unhandled register file: %i\n", file); + /* fall-through */ + case TGSI_FILE_TEMPORARY: return RC_FILE_TEMPORARY; + case TGSI_FILE_ADDRESS: return RC_FILE_ADDRESS; } - - fprintf(stderr, "Unhandled register file: %i\n", file); - abort(); } static int translate_register_index( @@ -192,7 +187,7 @@ static int translate_register_index( static void transform_dstreg( struct tgsi_to_rc * ttr, - struct prog_dst_register * dst, + struct rc_dst_register * dst, struct tgsi_full_dst_register * src) { dst->File = translate_register_file(src->DstRegister.File); @@ -203,77 +198,73 @@ static void transform_dstreg( static void transform_srcreg( struct tgsi_to_rc * ttr, - struct prog_src_register * dst, + struct rc_src_register * dst, struct tgsi_full_src_register * src) { dst->File = translate_register_file(src->SrcRegister.File); dst->Index = translate_register_index(ttr, src->SrcRegister.File, src->SrcRegister.Index); dst->RelAddr = src->SrcRegister.Indirect; - dst->Swizzle = tgsi_util_get_full_src_register_extswizzle(src, 0); - dst->Swizzle |= tgsi_util_get_full_src_register_extswizzle(src, 1) << 3; - dst->Swizzle |= tgsi_util_get_full_src_register_extswizzle(src, 2) << 6; - dst->Swizzle |= tgsi_util_get_full_src_register_extswizzle(src, 3) << 9; + dst->Swizzle = tgsi_util_get_full_src_register_swizzle(src, 0); + dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 1) << 3; + dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 2) << 6; + dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 3) << 9; dst->Abs = src->SrcRegisterExtMod.Absolute; - dst->Negate = - src->SrcRegisterExtSwz.NegateX | - (src->SrcRegisterExtSwz.NegateY << 1) | - (src->SrcRegisterExtSwz.NegateZ << 2) | - (src->SrcRegisterExtSwz.NegateW << 3); - dst->Negate ^= src->SrcRegister.Negate ? NEGATE_XYZW : 0; + dst->Negate = src->SrcRegister.Negate ? RC_MASK_XYZW : 0; } static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_ext_texture src) { switch(src.Texture) { case TGSI_TEXTURE_1D: - dst->I.TexSrcTarget = TEXTURE_1D_INDEX; + dst->U.I.TexSrcTarget = RC_TEXTURE_1D; break; case TGSI_TEXTURE_2D: - dst->I.TexSrcTarget = TEXTURE_2D_INDEX; + dst->U.I.TexSrcTarget = RC_TEXTURE_2D; break; case TGSI_TEXTURE_3D: - dst->I.TexSrcTarget = TEXTURE_3D_INDEX; + dst->U.I.TexSrcTarget = RC_TEXTURE_3D; break; case TGSI_TEXTURE_CUBE: - dst->I.TexSrcTarget = TEXTURE_CUBE_INDEX; + dst->U.I.TexSrcTarget = RC_TEXTURE_CUBE; break; case TGSI_TEXTURE_RECT: - dst->I.TexSrcTarget = TEXTURE_RECT_INDEX; + dst->U.I.TexSrcTarget = RC_TEXTURE_RECT; break; case TGSI_TEXTURE_SHADOW1D: - dst->I.TexSrcTarget = TEXTURE_1D_INDEX; - dst->I.TexShadow = 1; + dst->U.I.TexSrcTarget = RC_TEXTURE_1D; + dst->U.I.TexShadow = 1; break; case TGSI_TEXTURE_SHADOW2D: - dst->I.TexSrcTarget = TEXTURE_2D_INDEX; - dst->I.TexShadow = 1; + dst->U.I.TexSrcTarget = RC_TEXTURE_2D; + dst->U.I.TexShadow = 1; break; case TGSI_TEXTURE_SHADOWRECT: - dst->I.TexSrcTarget = TEXTURE_RECT_INDEX; - dst->I.TexShadow = 1; + dst->U.I.TexSrcTarget = RC_TEXTURE_RECT; + dst->U.I.TexShadow = 1; break; } } static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_instruction * src) { + struct rc_instruction * dst; + int i; + if (src->Instruction.Opcode == TGSI_OPCODE_END) return; - struct rc_instruction * dst = rc_insert_new_instruction(ttr->compiler, ttr->compiler->Program.Instructions.Prev); - int i; - - dst->I.Opcode = translate_opcode(src->Instruction.Opcode); - dst->I.SaturateMode = translate_saturate(src->Instruction.Saturate); + dst = rc_insert_new_instruction(ttr->compiler, ttr->compiler->Program.Instructions.Prev); + dst->U.I.Opcode = translate_opcode(src->Instruction.Opcode); + dst->U.I.SaturateMode = translate_saturate(src->Instruction.Saturate); if (src->Instruction.NumDstRegs) - transform_dstreg(ttr, &dst->I.DstReg, &src->FullDstRegisters[0]); + transform_dstreg(ttr, &dst->U.I.DstReg, &src->FullDstRegisters[0]); for(i = 0; i < src->Instruction.NumSrcRegs; ++i) { if (src->FullSrcRegisters[i].SrcRegister.File == TGSI_FILE_SAMPLER) - dst->I.TexSrcUnit = src->FullSrcRegisters[i].SrcRegister.Index; + dst->U.I.TexSrcUnit = src->FullSrcRegisters[i].SrcRegister.Index; else - transform_srcreg(ttr, &dst->I.SrcReg[i], &src->FullSrcRegisters[i]); + transform_srcreg(ttr, &dst->U.I.SrcReg[i], &src->FullSrcRegisters[i]); } /* Texturing. */ diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 12a6e37be6..eca85879a7 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -35,6 +35,8 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) { struct r300_vertex_shader * vs = c->UserData; struct tgsi_shader_info* info = &vs->info; + struct tgsi_parse_context parser; + struct tgsi_full_declaration * decl; boolean pointsize = false; int out_colors = 0; int colors = 0; @@ -62,8 +64,6 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) } } - struct tgsi_parse_context parser; - tgsi_parse_init(&parser, vs->state.tokens); while (!tgsi_parse_end_of_tokens(&parser)) { @@ -72,7 +72,7 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) if (parser.FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION) continue; - struct tgsi_full_declaration * decl = &parser.FullToken.FullDeclaration; + decl = &parser.FullToken.FullDeclaration; if (decl->Declaration.File != TGSI_FILE_OUTPUT) continue; @@ -146,89 +146,3 @@ void r300_translate_vertex_shader(struct r300_context* r300, rc_destroy(&compiler.Base); vs->translated = TRUE; } - - -/* XXX get these to r300_reg */ -#define R300_PVS_DST_OPCODE(x) ((x) << 0) -# define R300_VE_DOT_PRODUCT 1 -# define R300_VE_MULTIPLY 2 -# define R300_VE_ADD 3 -# define R300_VE_MAXIMUM 7 -# define R300_VE_SET_LESS_THAN 10 -#define R300_PVS_DST_MATH_INST (1 << 6) -# define R300_ME_RECIP_DX 6 -#define R300_PVS_DST_MACRO_INST (1 << 7) -# define R300_PVS_MACRO_OP_2CLK_MADD 0 -#define R300_PVS_DST_REG_TYPE(x) ((x) << 8) -# define R300_PVS_DST_REG_TEMPORARY 0 -# define R300_PVS_DST_REG_A0 1 -# define R300_PVS_DST_REG_OUT 2 -# define R300_PVS_DST_REG_OUT_REPL_X 3 -# define R300_PVS_DST_REG_ALT_TEMPORARY 4 -# define R300_PVS_DST_REG_INPUT 5 -#define R300_PVS_DST_OFFSET(x) ((x) << 13) -#define R300_PVS_DST_WE(x) ((x) << 20) -#define R300_PVS_DST_WE_XYZW (0xf << 20) - -#define R300_PVS_SRC_REG_TYPE(x) ((x) << 0) -# define R300_PVS_SRC_REG_TEMPORARY 0 -# define R300_PVS_SRC_REG_INPUT 1 -# define R300_PVS_SRC_REG_CONSTANT 2 -# define R300_PVS_SRC_REG_ALT_TEMPORARY 3 -#define R300_PVS_SRC_OFFSET(x) ((x) << 5) -#define R300_PVS_SRC_SWIZZLE(x) ((x) << 13) -# define R300_PVS_SRC_SELECT_X 0 -# define R300_PVS_SRC_SELECT_Y 1 -# define R300_PVS_SRC_SELECT_Z 2 -# define R300_PVS_SRC_SELECT_W 3 -# define R300_PVS_SRC_SELECT_FORCE_0 4 -# define R300_PVS_SRC_SELECT_FORCE_1 5 -# define R300_PVS_SRC_SWIZZLE_XYZW \ - ((R300_PVS_SRC_SELECT_X | (R300_PVS_SRC_SELECT_Y << 3) | \ - (R300_PVS_SRC_SELECT_Z << 6) | (R300_PVS_SRC_SELECT_W << 9)) << 13) -# define R300_PVS_SRC_SWIZZLE_ZERO \ - ((R300_PVS_SRC_SELECT_FORCE_0 | (R300_PVS_SRC_SELECT_FORCE_0 << 3) | \ - (R300_PVS_SRC_SELECT_FORCE_0 << 6) | \ - (R300_PVS_SRC_SELECT_FORCE_0 << 9)) << 13) -# define R300_PVS_SRC_SWIZZLE_ONE \ - ((R300_PVS_SRC_SELECT_FORCE_1 | (R300_PVS_SRC_SELECT_FORCE_1 << 3) | \ - (R300_PVS_SRC_SELECT_FORCE_1 << 6) | \ - (R300_PVS_SRC_SELECT_FORCE_1 << 9)) << 13) -#define R300_PVS_MODIFIER_X (1 << 25) -#define R300_PVS_MODIFIER_Y (1 << 26) -#define R300_PVS_MODIFIER_Z (1 << 27) -#define R300_PVS_MODIFIER_W (1 << 28) -#define R300_PVS_NEGATE_XYZW \ - (R300_PVS_MODIFIER_X | R300_PVS_MODIFIER_Y | \ - R300_PVS_MODIFIER_Z | R300_PVS_MODIFIER_W) - -struct r300_vertex_program_code r300_passthrough_vertex_shader = { - .length = 8, /* two instructions */ - - /* MOV out[0], in[0] */ - .body.d[0] = R300_PVS_DST_OPCODE(R300_VE_ADD) | - R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | - R300_PVS_DST_OFFSET(0) | R300_PVS_DST_WE_XYZW, - .body.d[1] = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | - R300_PVS_SRC_OFFSET(0) | R300_PVS_SRC_SWIZZLE_XYZW, - .body.d[2] = R300_PVS_SRC_SWIZZLE_ZERO, - .body.d[3] = 0x0, - - /* MOV out[1], in[1] */ - .body.d[4] = R300_PVS_DST_OPCODE(R300_VE_ADD) | - R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) | - R300_PVS_DST_OFFSET(1) | R300_PVS_DST_WE_XYZW, - .body.d[5] = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) | - R300_PVS_SRC_OFFSET(1) | R300_PVS_SRC_SWIZZLE_XYZW, - .body.d[6] = R300_PVS_SRC_SWIZZLE_ZERO, - .body.d[7] = 0x0, - - .inputs[0] = 0, - .inputs[1] = 1, - .outputs[0] = 0, - .outputs[1] = 1, - - .InputsRead = 3, - .OutputsWritten = 3 -}; - diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index f18ad75a47..864a6146b2 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -48,6 +48,9 @@ struct r300_winsys { /* GB pipe count */ uint32_t gb_pipes; + /* Z pipe count (rv530 only) */ + uint32_t z_pipes; + /* GART size. */ uint32_t gart_size; @@ -92,6 +95,12 @@ struct r300_winsys { /* Flush the CS. */ void (*flush_cs)(struct r300_winsys* winsys); + + /* winsys flush - callback from winsys when flush required */ + void (*set_flush_cb)(struct r300_winsys *winsys, + void (*flush_cb)(void *), void *data); + + void (*reset_bos)(struct r300_winsys *winsys); }; struct pipe_context* r300_create_context(struct pipe_screen* screen, diff --git a/src/gallium/drivers/r300/r3xx_fs.c b/src/gallium/drivers/r300/r3xx_fs.c deleted file mode 100644 index c1c1194d58..0000000000 --- a/src/gallium/drivers/r300/r3xx_fs.c +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> - * Joakim Sindholt <opensource@zhasha.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "r3xx_fs.h" - -#include "r300_reg.h" - -struct rX00_fragment_program_code r3xx_passthrough_fragment_shader = { - .code.r300.alu.length = 1, - .code.r300.tex.length = 0, - - .code.r300.config = 0, - .code.r300.pixsize = 0, - .code.r300.code_offset = 0, - .code.r300.code_addr[3] = R300_RGBA_OUT, - - .code.r300.alu.inst[0].rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) | - R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) | - R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) | - R300_ALU_OUTC_CMP, - .code.r300.alu.inst[0].rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) | - R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ, - .code.r300.alu.inst[0].alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) | - R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) | - R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) | - R300_ALU_OUTA_CMP, - .code.r300.alu.inst[0].alpha_addr = R300_ALPHA_ADDR0(0) | - R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT, -}; - -struct rX00_fragment_program_code r3xx_texture_fragment_shader = { - .code.r300.alu.length = 1, - .code.r300.tex.length = 1, - - .code.r300.config = R300_PFS_CNTL_FIRST_NODE_HAS_TEX, - .code.r300.pixsize = 0, - .code.r300.code_offset = 0, - .code.r300.code_addr[3] = R300_RGBA_OUT, - - .code.r300.tex.inst[0] = R300_TEX_OP_LD << R300_TEX_INST_SHIFT, - - .code.r300.alu.inst[0].rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) | - R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) | - R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) | - R300_ALU_OUTC_CMP, - .code.r300.alu.inst[0].rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) | - R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ, - .code.r300.alu.inst[0].alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) | - R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) | - R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) | - R300_ALU_OUTA_CMP, - .code.r300.alu.inst[0].alpha_addr = R300_ALPHA_ADDR0(0) | - R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT, -}; diff --git a/src/gallium/drivers/r300/r3xx_fs.h b/src/gallium/drivers/r300/r3xx_fs.h deleted file mode 100644 index 51cd245724..0000000000 --- a/src/gallium/drivers/r300/r3xx_fs.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> - * Joakim Sindholt <opensource@zhasha.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R3XX_FS_H -#define R3XX_FS_H - -#include "radeon_code.h" - -struct rX00_fragment_program_code r3xx_passthrough_fragment_shader; -struct rX00_fragment_program_code r3xx_texture_fragment_shader; - -#endif /* R3XX_FS_H */ diff --git a/src/gallium/drivers/r300/r5xx_fs.c b/src/gallium/drivers/r300/r5xx_fs.c deleted file mode 100644 index f072deab0d..0000000000 --- a/src/gallium/drivers/r300/r5xx_fs.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> - * Joakim Sindholt <opensource@zhasha.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "r5xx_fs.h" - -#include "r300_reg.h" - -/* XXX this all should find its way back to r300_reg */ -/* Swizzle tools */ -#define R500_SWIZZLE_ZERO 4 -#define R500_SWIZZLE_HALF 5 -#define R500_SWIZZLE_ONE 6 -#define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6)) -#define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6)) -#define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6)) -#define R500_SWIZ_MOD_NEG 1 -#define R500_SWIZ_MOD_ABS 2 -#define R500_SWIZ_MOD_NEG_ABS 3 -/* Swizzles for inst2 */ -#define R500_SWIZ_TEX_STRQ(x) ((x) << 8) -#define R500_SWIZ_TEX_RGBA(x) ((x) << 24) -/* Swizzles for inst3 */ -#define R500_SWIZ_RGB_A(x) ((x) << 2) -#define R500_SWIZ_RGB_B(x) ((x) << 15) -/* Swizzles for inst4 */ -#define R500_SWIZ_ALPHA_A(x) ((x) << 14) -#define R500_SWIZ_ALPHA_B(x) ((x) << 21) -/* Swizzle for inst5 */ -#define R500_SWIZ_RGBA_C(x) ((x) << 14) -#define R500_SWIZ_ALPHA_C(x) ((x) << 27) -/* Writemasks */ -#define R500_TEX_WMASK(x) ((x) << 11) -#define R500_ALU_WMASK(x) ((x) << 11) -#define R500_ALU_OMASK(x) ((x) << 15) -#define R500_W_OMASK (1 << 31) - -struct rX00_fragment_program_code r5xx_passthrough_fragment_shader = { - .code.r500.max_temp_idx = 0, - .code.r500.inst_end = 0, - - .code.r500.inst[0].inst0 = R500_INST_TYPE_OUT | - R500_INST_TEX_SEM_WAIT | R500_INST_LAST | - R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK | - R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP, - .code.r500.inst[0].inst1 = - R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST | - R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST, - .code.r500.inst[0].inst2 = - R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST | - R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST, - .code.r500.inst[0].inst3 = - R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R | - R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B | - R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R | - R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B, - .code.r500.inst[0].inst4 = - R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A, - .code.r500.inst[0].inst5 = - R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 | - R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 | - R500_ALU_RGBA_A_SWIZ_0, -}; - -struct rX00_fragment_program_code r5xx_texture_fragment_shader = { - .code.r500.max_temp_idx = 0, - .code.r500.inst_end = 1, - - .code.r500.inst[0].inst0 = R500_INST_TYPE_TEX | - R500_INST_TEX_SEM_WAIT | - R500_INST_RGB_WMASK_RGB | R500_INST_ALPHA_WMASK | - R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP, - .code.r500.inst[0].inst1 = R500_TEX_ID(0) | R500_TEX_INST_LD | - R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED, - .code.r500.inst[0].inst2 = R500_TEX_SRC_ADDR(0) | - R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G | - R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A | - R500_TEX_DST_ADDR(0) | - R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G | - R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A, - .code.r500.inst[0].inst3 = 0x0, - .code.r500.inst[0].inst4 = 0x0, - .code.r500.inst[0].inst5 = 0x0, - - .code.r500.inst[1].inst0 = R500_INST_TYPE_OUT | - R500_INST_TEX_SEM_WAIT | R500_INST_LAST | - R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK | - R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP, - .code.r500.inst[1].inst1 = - R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST | - R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST, - .code.r500.inst[1].inst2 = - R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST | - R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST, - .code.r500.inst[1].inst3 = - R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R | - R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B | - R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R | - R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B, - .code.r500.inst[1].inst4 = - R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A, - .code.r500.inst[1].inst5 = - R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 | - R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 | - R500_ALU_RGBA_A_SWIZ_0, -}; diff --git a/src/gallium/drivers/r300/r5xx_fs.h b/src/gallium/drivers/r300/r5xx_fs.h deleted file mode 100644 index a4addde32b..0000000000 --- a/src/gallium/drivers/r300/r5xx_fs.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> - * Joakim Sindholt <opensource@zhasha.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R5XX_FS_H -#define R5XX_FS_H - -#include "radeon_code.h" - -struct rX00_fragment_program_code r5xx_passthrough_fragment_shader; -struct rX00_fragment_program_code r5xx_texture_fragment_shader; - -#endif /* R5XX_FS_H */ diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index 6ab3753762..bcb887a0b2 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -31,6 +31,7 @@ C_SOURCES = \ sp_tex_sample.c \ sp_tex_tile_cache.c \ sp_tile_cache.c \ - sp_surface.c + sp_surface.c \ + sp_video_context.c include ../../Makefile.template diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index 950c3d9955..aac9edf44e 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -33,6 +33,7 @@ softpipe = env.ConvenienceLibrary( 'sp_tex_tile_cache.c', 'sp_texture.c', 'sp_tile_cache.c', + 'sp_video_context.c', ]) -Export('softpipe')
\ No newline at end of file +Export('softpipe') diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index e1e31ab047..94d000a5ac 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -94,12 +94,17 @@ softpipe_destroy( struct pipe_context *pipe ) softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); softpipe->quad.blend->destroy( softpipe->quad.blend ); - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { sp_destroy_tile_cache(softpipe->cbuf_cache[i]); + pipe_surface_reference(&softpipe->framebuffer.cbufs[i], NULL); + } sp_destroy_tile_cache(softpipe->zsbuf_cache); + pipe_surface_reference(&softpipe->framebuffer.zsbuf, NULL); - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { sp_destroy_tex_tile_cache(softpipe->tex_cache[i]); + pipe_texture_reference(&softpipe->texture[i], NULL); + } for (i = 0; i < Elements(softpipe->constants); i++) { if (softpipe->constants[i].buffer) { diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 43a195c8ef..a735573d6f 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -85,7 +85,7 @@ struct softpipe_context { /** Mapped vertex buffers */ ubyte *mapped_vbuffer[PIPE_MAX_ATTRIBS]; - + /** Mapped constant buffers */ void *mapped_constants[PIPE_SHADER_TYPES]; @@ -96,14 +96,13 @@ struct softpipe_context { /** Which vertex shader output slot contains point size */ int psize_slot; - /* The reduced version of the primitive supplied by the state - * tracker. - */ + /** The reduced version of the primitive supplied by the state tracker */ unsigned reduced_api_prim; - /* The reduced primitive after unfilled triangles, wide-line - * decomposition, etc, are taken into account. This is the - * primitive actually rasterized. + /** + * The reduced primitive after unfilled triangles, wide-line decomposition, + * etc, are taken into account. This is the primitive type that's actually + * rasterized. */ unsigned reduced_prim; @@ -117,7 +116,6 @@ struct softpipe_context { struct quad_stage *shade; struct quad_stage *depth_test; struct quad_stage *blend; - struct quad_stage *first; /**< points to one of the above stages */ } quad; @@ -135,10 +133,10 @@ struct softpipe_context { struct draw_stage *vbuf; boolean dirty_render_cache; - + struct softpipe_tile_cache *cbuf_cache[PIPE_MAX_COLOR_BUFS]; struct softpipe_tile_cache *zsbuf_cache; - + unsigned tex_timestamp; struct softpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS]; @@ -159,4 +157,3 @@ softpipe_reset_sampler_varients(struct softpipe_context *softpipe); #endif /* SP_CONTEXT_H */ - diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 9d3e4670ee..f912950658 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -138,7 +138,7 @@ fs_sse_run( const struct sp_fragment_shader *base, machine->Consts, (const float (*)[4])shader->immediates, machine->InterpCoefs - // , &machine->QuadPos + /*, &machine->QuadPos*/ ); quad->inout.mask &= ~(machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0]); diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index e603c20fc4..5fbac06a53 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -138,7 +138,7 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); struct setup_context *setup_ctx = cvbr->setup; - setup_prepare( setup_ctx ); + sp_setup_prepare( setup_ctx ); cvbr->softpipe->reduced_prim = u_reduced_prim(prim); cvbr->prim = prim; @@ -171,14 +171,14 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { - setup_point( setup_ctx, + sp_setup_point( setup_ctx, get_vert(vertex_buffer, indices[i-0], stride) ); } break; case PIPE_PRIM_LINES: for (i = 1; i < nr; i += 2) { - setup_line( setup_ctx, + sp_setup_line( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); } @@ -186,7 +186,7 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_LINE_STRIP: for (i = 1; i < nr; i ++) { - setup_line( setup_ctx, + sp_setup_line( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); } @@ -194,12 +194,12 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_LINE_LOOP: for (i = 1; i < nr; i ++) { - setup_line( setup_ctx, + sp_setup_line( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); } if (nr) { - setup_line( setup_ctx, + sp_setup_line( setup_ctx, get_vert(vertex_buffer, indices[nr-1], stride), get_vert(vertex_buffer, indices[0], stride) ); } @@ -208,7 +208,7 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_TRIANGLES: if (softpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 3) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-2], stride) ); @@ -216,7 +216,7 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 2; i < nr; i += 3) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -227,7 +227,7 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_TRIANGLE_STRIP: if (softpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 1) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i+(i&1)-1], stride), get_vert(vertex_buffer, indices[i-(i&1)], stride), get_vert(vertex_buffer, indices[i-2], stride) ); @@ -235,7 +235,7 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 2; i < nr; i += 1) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i+(i&1)-2], stride), get_vert(vertex_buffer, indices[i-(i&1)-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -246,7 +246,7 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_TRIANGLE_FAN: if (softpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 1) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[0], stride), get_vert(vertex_buffer, indices[i-1], stride) ); @@ -254,7 +254,7 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 2; i < nr; i += 1) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[0], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -265,11 +265,11 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_QUADS: if (softpipe->rasterizer->flatshade_first) { for (i = 3; i < nr; i += 4) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-3], stride) ); - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-3], stride) ); @@ -277,12 +277,12 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 3; i < nr; i += 4) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-0], stride) ); - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -293,11 +293,11 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) case PIPE_PRIM_QUAD_STRIP: if (softpipe->rasterizer->flatshade_first) { for (i = 3; i < nr; i += 2) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-3], stride)); - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-3], stride) ); @@ -305,11 +305,11 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) } else { for (i = 3; i < nr; i += 2) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-0], stride) ); - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-0], stride) ); @@ -324,7 +324,7 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) * flatshade_first state makes no difference. */ for (i = 2; i < nr; i += 1) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-0], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[0], stride) ); @@ -355,14 +355,14 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { - setup_point( setup_ctx, + sp_setup_point( setup_ctx, get_vert(vertex_buffer, i-0, stride) ); } break; case PIPE_PRIM_LINES: for (i = 1; i < nr; i += 2) { - setup_line( setup_ctx, + sp_setup_line( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); } @@ -370,7 +370,7 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_LINE_STRIP: for (i = 1; i < nr; i ++) { - setup_line( setup_ctx, + sp_setup_line( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); } @@ -378,12 +378,12 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_LINE_LOOP: for (i = 1; i < nr; i ++) { - setup_line( setup_ctx, + sp_setup_line( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); } if (nr) { - setup_line( setup_ctx, + sp_setup_line( setup_ctx, get_vert(vertex_buffer, nr-1, stride), get_vert(vertex_buffer, 0, stride) ); } @@ -392,7 +392,7 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_TRIANGLES: if (softpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 3) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-2, stride) ); @@ -400,7 +400,7 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 2; i < nr; i += 3) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -411,7 +411,7 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_TRIANGLE_STRIP: if (softpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i++) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i+(i&1)-1, stride), get_vert(vertex_buffer, i-(i&1), stride), get_vert(vertex_buffer, i-2, stride) ); @@ -419,7 +419,7 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 2; i < nr; i++) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i+(i&1)-2, stride), get_vert(vertex_buffer, i-(i&1)-1, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -430,7 +430,7 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_TRIANGLE_FAN: if (softpipe->rasterizer->flatshade_first) { for (i = 2; i < nr; i += 1) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, 0, stride), get_vert(vertex_buffer, i-1, stride) ); @@ -438,7 +438,7 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 2; i < nr; i += 1) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, 0, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -449,11 +449,11 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_QUADS: if (softpipe->rasterizer->flatshade_first) { for (i = 3; i < nr; i += 4) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-3, stride) ); - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-3, stride) ); @@ -461,11 +461,11 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 3; i < nr; i += 4) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-3, stride), get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-0, stride) ); - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -476,11 +476,11 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) case PIPE_PRIM_QUAD_STRIP: if (softpipe->rasterizer->flatshade_first) { for (i = 3; i < nr; i += 2) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-3, stride) ); - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, i-3, stride) ); @@ -488,11 +488,11 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } else { for (i = 3; i < nr; i += 2) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-3, stride), get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-0, stride) ); - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-3, stride), get_vert(vertex_buffer, i-0, stride) ); @@ -507,7 +507,7 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) * flatshade_first state makes no difference. */ for (i = 2; i < nr; i += 1) { - setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride), get_vert(vertex_buffer, 0, stride) ); @@ -525,7 +525,7 @@ static void sp_vbuf_destroy(struct vbuf_render *vbr) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); - setup_destroy_context(cvbr->setup); + sp_setup_destroy_context(cvbr->setup); FREE(cvbr); } @@ -556,7 +556,7 @@ sp_create_vbuf_backend(struct softpipe_context *sp) cvbr->softpipe = sp; - cvbr->setup = setup_create_context(cvbr->softpipe); + cvbr->setup = sp_setup_create_context(cvbr->softpipe); return &cvbr->base; } diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index e243c63fa2..0ad0b98654 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -946,15 +946,15 @@ choose_blend_quad(struct quad_stage *qs, qs->run = blend_noop; } else if (!softpipe->blend->logicop_enable && - softpipe->blend->colormask == 0xf) + softpipe->blend->colormask == 0xf && + softpipe->framebuffer.nr_cbufs == 1) { if (!blend->blend_enable) { qs->run = single_output_color; } else if (blend->rgb_src_factor == blend->alpha_src_factor && blend->rgb_dst_factor == blend->alpha_dst_factor && - blend->rgb_func == blend->alpha_func && - softpipe->framebuffer.nr_cbufs == 1) + blend->rgb_func == blend->alpha_func) { if (blend->alpha_func == PIPE_BLEND_ADD) { if (blend->rgb_src_factor == PIPE_BLENDFACTOR_ONE && diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index ade125662a..00fb52a64f 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -106,6 +106,7 @@ struct setup_context { #endif unsigned winding; /* which winding to cull */ + unsigned nr_vertex_attrs; }; @@ -268,8 +269,8 @@ static void print_vertex(const struct setup_context *setup, const float (*v)[4]) { int i; - debug_printf(" Vertex: (%p)\n", v); - for (i = 0; i < setup->quad[0].nr_attrs; i++) { + debug_printf(" Vertex: (%p)\n", (void *) v); + for (i = 0; i < setup->nr_vertex_attrs; i++) { debug_printf(" %d: %f %f %f %f\n", i, v[i][0], v[i][1], v[i][2], v[i][3]); if (util_is_inf_or_nan(v[i][0])) { @@ -696,7 +697,7 @@ calc_det( const float (*v0)[4], /** * Do setup for triangle rasterization, then render the triangle. */ -void setup_tri( struct setup_context *setup, +void sp_setup_tri( struct setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) @@ -915,7 +916,7 @@ plot(struct setup_context *setup, int x, int y) * to handle stippling and wide lines. */ void -setup_line(struct setup_context *setup, +sp_setup_line(struct setup_context *setup, const float (*v0)[4], const float (*v1)[4]) { @@ -1045,7 +1046,7 @@ point_persp_coeff(const struct setup_context *setup, * XXX could optimize a lot for 1-pixel points. */ void -setup_point( struct setup_context *setup, +sp_setup_point( struct setup_context *setup, const float (*v0)[4] ) { struct softpipe_context *softpipe = setup->softpipe; @@ -1246,7 +1247,7 @@ setup_point( struct setup_context *setup, } } -void setup_prepare( struct setup_context *setup ) +void sp_setup_prepare( struct setup_context *setup ) { struct softpipe_context *sp = setup->softpipe; @@ -1254,6 +1255,9 @@ void setup_prepare( struct setup_context *setup ) softpipe_update_derived(sp); } + /* Note: nr_attrs is only used for debugging (vertex printing) */ + setup->nr_vertex_attrs = draw_num_vs_outputs(sp->draw); + sp->quad.first->begin( sp->quad.first ); if (sp->reduced_api_prim == PIPE_PRIM_TRIANGLES && @@ -1270,7 +1274,7 @@ void setup_prepare( struct setup_context *setup ) -void setup_destroy_context( struct setup_context *setup ) +void sp_setup_destroy_context( struct setup_context *setup ) { FREE( setup ); } @@ -1279,7 +1283,7 @@ void setup_destroy_context( struct setup_context *setup ) /** * Create a new primitive setup/render stage. */ -struct setup_context *setup_create_context( struct softpipe_context *softpipe ) +struct setup_context *sp_setup_create_context( struct softpipe_context *softpipe ) { struct setup_context *setup = CALLOC_STRUCT(setup_context); unsigned i; diff --git a/src/gallium/drivers/softpipe/sp_setup.h b/src/gallium/drivers/softpipe/sp_setup.h index d54f334428..9c8844d2e8 100644 --- a/src/gallium/drivers/softpipe/sp_setup.h +++ b/src/gallium/drivers/softpipe/sp_setup.h @@ -31,23 +31,23 @@ struct setup_context; struct softpipe_context; void -setup_tri( struct setup_context *setup, +sp_setup_tri( struct setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ); void -setup_line(struct setup_context *setup, +sp_setup_line(struct setup_context *setup, const float (*v0)[4], const float (*v1)[4]); void -setup_point( struct setup_context *setup, +sp_setup_point( struct setup_context *setup, const float (*v0)[4] ); -struct setup_context *setup_create_context( struct softpipe_context *softpipe ); -void setup_prepare( struct setup_context *setup ); -void setup_destroy_context( struct setup_context *setup ); +struct setup_context *sp_setup_create_context( struct softpipe_context *softpipe ); +void sp_setup_prepare( struct setup_context *setup ); +void sp_setup_destroy_context( struct setup_context *setup ); #endif diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c index c8f55c3cec..bc0e201130 100644 --- a/src/gallium/drivers/softpipe/sp_state_surface.c +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -56,7 +56,7 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, sp_flush_tile_cache(sp->cbuf_cache[i]); /* assign new */ - sp->framebuffer.cbufs[i] = fb->cbufs[i]; + pipe_surface_reference(&sp->framebuffer.cbufs[i], fb->cbufs[i]); /* update cache */ sp_tile_cache_set_surface(sp->cbuf_cache[i], fb->cbufs[i]); @@ -71,7 +71,7 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, sp_flush_tile_cache(sp->zsbuf_cache); /* assign new */ - sp->framebuffer.zsbuf = fb->zsbuf; + pipe_surface_reference(&sp->framebuffer.zsbuf, fb->zsbuf); /* update cache */ sp_tile_cache_set_surface(sp->zsbuf_cache, fb->zsbuf); diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 49b51afda7..7caf2928b4 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -111,6 +111,9 @@ softpipe_displaytarget_layout(struct pipe_screen *screen, } +/** + * Create new pipe_texture given the template information. + */ static struct pipe_texture * softpipe_texture_create(struct pipe_screen *screen, const struct pipe_texture *template) @@ -145,6 +148,9 @@ softpipe_texture_create(struct pipe_screen *screen, } +/** + * Create a new pipe_texture which wraps an existing buffer. + */ static struct pipe_texture * softpipe_texture_blanket(struct pipe_screen * screen, const struct pipe_texture *base, @@ -188,6 +194,9 @@ softpipe_texture_destroy(struct pipe_texture *pt) } +/** + * Get a pipe_surface "view" into a texture. + */ static struct pipe_surface * softpipe_get_tex_surface(struct pipe_screen *screen, struct pipe_texture *pt, @@ -248,6 +257,9 @@ softpipe_get_tex_surface(struct pipe_screen *screen, } +/** + * Free a pipe_surface which was created with softpipe_get_tex_surface(). + */ static void softpipe_tex_surface_destroy(struct pipe_surface *surf) { @@ -261,6 +273,18 @@ softpipe_tex_surface_destroy(struct pipe_surface *surf) } +/** + * Geta pipe_transfer object which is used for moving data in/out of + * a texture object. + * \param face one of PIPE_TEX_FACE_x or 0 + * \param level texture mipmap level + * \param zslice 2D slice of a 3D texture + * \param usage one of PIPE_TRANSFER_READ/WRITE/READ_WRITE + * \param x X position of region to read/write + * \param y Y position of region to read/write + * \param width width of region to read/write + * \param height height of region to read/write + */ static struct pipe_transfer * softpipe_get_tex_transfer(struct pipe_screen *screen, struct pipe_texture *texture, @@ -310,6 +334,10 @@ softpipe_get_tex_transfer(struct pipe_screen *screen, } +/** + * Free a pipe_transfer object which was created with + * softpipe_get_tex_transfer(). + */ static void softpipe_tex_transfer_destroy(struct pipe_transfer *transfer) { @@ -323,33 +351,27 @@ softpipe_tex_transfer_destroy(struct pipe_transfer *transfer) } +/** + * Create memory mapping for given pipe_transfer object. + */ static void * softpipe_transfer_map( struct pipe_screen *screen, struct pipe_transfer *transfer ) { ubyte *map, *xfer_map; struct softpipe_texture *spt; - unsigned flags = 0; assert(transfer->texture); spt = softpipe_texture(transfer->texture); - if (transfer->usage != PIPE_TRANSFER_READ) { - flags |= PIPE_BUFFER_USAGE_CPU_WRITE; - } - - if (transfer->usage != PIPE_TRANSFER_WRITE) { - flags |= PIPE_BUFFER_USAGE_CPU_READ; - } - - map = pipe_buffer_map(screen, spt->buffer, flags); + map = pipe_buffer_map(screen, spt->buffer, pipe_transfer_buffer_flags(transfer)); if (map == NULL) return NULL; /* May want to different things here depending on read/write nature * of the map: */ - if (transfer->texture && transfer->usage != PIPE_TRANSFER_READ) { + if (transfer->texture && (transfer->usage & PIPE_TRANSFER_WRITE)) { /* Do something to notify sharing contexts of a texture change. * In softpipe, that would mean flushing the texture cache. */ @@ -364,6 +386,9 @@ softpipe_transfer_map( struct pipe_screen *screen, } +/** + * Unmap memory mapping for given pipe_transfer object. + */ static void softpipe_transfer_unmap(struct pipe_screen *screen, struct pipe_transfer *transfer) @@ -375,13 +400,66 @@ softpipe_transfer_unmap(struct pipe_screen *screen, pipe_buffer_unmap( screen, spt->buffer ); - if (transfer->usage != PIPE_TRANSFER_READ) { + if (transfer->usage & PIPE_TRANSFER_WRITE) { /* Mark the texture as dirty to expire the tile caches. */ spt->timestamp++; } } +static struct pipe_video_surface* +softpipe_video_surface_create(struct pipe_screen *screen, + enum pipe_video_chroma_format chroma_format, + unsigned width, unsigned height) +{ + struct softpipe_video_surface *sp_vsfc; + struct pipe_texture template; + + assert(screen); + assert(width && height); + + sp_vsfc = CALLOC_STRUCT(softpipe_video_surface); + if (!sp_vsfc) + return NULL; + + pipe_reference_init(&sp_vsfc->base.reference, 1); + sp_vsfc->base.screen = screen; + sp_vsfc->base.chroma_format = chroma_format; + /*sp_vsfc->base.surface_format = PIPE_VIDEO_SURFACE_FORMAT_VUYA;*/ + sp_vsfc->base.width = width; + sp_vsfc->base.height = height; + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + template.format = PIPE_FORMAT_X8R8G8B8_UNORM; + template.last_level = 0; + /* vl_mpeg12_mc_renderer expects this when it's initialized with pot_buffers=true */ + template.width[0] = util_next_power_of_two(width); + template.height[0] = util_next_power_of_two(height); + template.depth[0] = 1; + pf_get_block(template.format, &template.block); + template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET; + + sp_vsfc->tex = screen->texture_create(screen, &template); + if (!sp_vsfc->tex) { + FREE(sp_vsfc); + return NULL; + } + + return &sp_vsfc->base; +} + + +static void +softpipe_video_surface_destroy(struct pipe_video_surface *vsfc) +{ + struct softpipe_video_surface *sp_vsfc = softpipe_video_surface(vsfc); + + pipe_texture_reference(&sp_vsfc->tex, NULL); + FREE(sp_vsfc); +} + + void softpipe_init_screen_texture_funcs(struct pipe_screen *screen) { @@ -396,9 +474,16 @@ softpipe_init_screen_texture_funcs(struct pipe_screen *screen) screen->tex_transfer_destroy = softpipe_tex_transfer_destroy; screen->transfer_map = softpipe_transfer_map; screen->transfer_unmap = softpipe_transfer_unmap; + + screen->video_surface_create = softpipe_video_surface_create; + screen->video_surface_destroy = softpipe_video_surface_destroy; } +/** + * Return pipe_buffer handle and stride for given texture object. + * XXX used for??? + */ boolean softpipe_get_texture_buffer( struct pipe_texture *texture, struct pipe_buffer **buf, diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index 2537ab6a40..2ef64e1e7c 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -30,6 +30,7 @@ #include "pipe/p_state.h" +#include "pipe/p_video_state.h" struct pipe_context; @@ -62,6 +63,15 @@ struct softpipe_transfer unsigned long offset; }; +struct softpipe_video_surface +{ + struct pipe_video_surface base; + + /* The data is held here: + */ + struct pipe_texture *tex; +}; + /** cast wrappers */ static INLINE struct softpipe_texture * @@ -76,6 +86,12 @@ softpipe_transfer(struct pipe_transfer *pt) return (struct softpipe_transfer *) pt; } +static INLINE struct softpipe_video_surface * +softpipe_video_surface(struct pipe_video_surface *pvs) +{ + return (struct softpipe_video_surface *) pvs; +} + extern void softpipe_init_screen_texture_funcs(struct pipe_screen *screen); diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index de47970985..65872cecc4 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -96,6 +96,16 @@ sp_create_tile_cache( struct pipe_screen *screen ) tc->entries[pos].addr.bits.invalid = 1; } tc->last_tile = &tc->entries[0]; /* any tile */ + + /* XXX this code prevents valgrind warnings about use of uninitialized + * memory in programs that don't clear the surface before rendering. + * However, it breaks clearing in other situations (such as in + * progs/tests/drawbuffers, see bug 24402). + */ +#if 0 && TILE_CLEAR_OPTIMIZATION + /* set flags to indicate all the tiles are cleared */ + memset(tc->clear_flags, 255, sizeof(tc->clear_flags)); +#endif } return tc; } diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c new file mode 100644 index 0000000000..cae2d3efc5 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_video_context.c @@ -0,0 +1,304 @@ +/************************************************************************** + * + * Copyright 2009 Younes Manton. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "sp_video_context.h" +#include <pipe/p_inlines.h> +#include <util/u_memory.h> +#include "softpipe/sp_winsys.h" +#include "softpipe/sp_texture.h" + +static void +sp_mpeg12_destroy(struct pipe_video_context *vpipe) +{ + struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; + + assert(vpipe); + + /* Asserted in softpipe_delete_fs_state() for some reason */ + ctx->pipe->bind_vs_state(ctx->pipe, NULL); + ctx->pipe->bind_fs_state(ctx->pipe, NULL); + + ctx->pipe->delete_blend_state(ctx->pipe, ctx->blend); + ctx->pipe->delete_rasterizer_state(ctx->pipe, ctx->rast); + ctx->pipe->delete_depth_stencil_alpha_state(ctx->pipe, ctx->dsa); + + pipe_video_surface_reference(&ctx->decode_target, NULL); + vl_compositor_cleanup(&ctx->compositor); + vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer); + ctx->pipe->destroy(ctx->pipe); + + FREE(ctx); +} + +static void +sp_mpeg12_decode_macroblocks(struct pipe_video_context *vpipe, + struct pipe_video_surface *past, + struct pipe_video_surface *future, + unsigned num_macroblocks, + struct pipe_macroblock *macroblocks, + struct pipe_fence_handle **fence) +{ + struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; + struct pipe_mpeg12_macroblock *mpeg12_macroblocks = (struct pipe_mpeg12_macroblock*)macroblocks; + + assert(vpipe); + assert(num_macroblocks); + assert(macroblocks); + assert(macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12); + assert(ctx->decode_target); + + vl_mpeg12_mc_renderer_render_macroblocks(&ctx->mc_renderer, + softpipe_video_surface(ctx->decode_target)->tex, + past ? softpipe_video_surface(past)->tex : NULL, + future ? softpipe_video_surface(future)->tex : NULL, + num_macroblocks, mpeg12_macroblocks, fence); +} + +static void +sp_mpeg12_clear_surface(struct pipe_video_context *vpipe, + unsigned x, unsigned y, + unsigned width, unsigned height, + unsigned value, + struct pipe_surface *surface) +{ + struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; + + assert(vpipe); + assert(surface); + + ctx->pipe->surface_fill(ctx->pipe, surface, x, y, width, height, value); +} + +static void +sp_mpeg12_render_picture(struct pipe_video_context *vpipe, + /*struct pipe_surface *backround, + struct pipe_video_rect *backround_area,*/ + struct pipe_video_surface *src_surface, + enum pipe_mpeg12_picture_type picture_type, + /*unsigned num_past_surfaces, + struct pipe_video_surface *past_surfaces, + unsigned num_future_surfaces, + struct pipe_video_surface *future_surfaces,*/ + struct pipe_video_rect *src_area, + struct pipe_surface *dst_surface, + struct pipe_video_rect *dst_area, + /*unsigned num_layers, + struct pipe_surface *layers, + struct pipe_video_rect *layer_src_areas, + struct pipe_video_rect *layer_dst_areas*/ + struct pipe_fence_handle **fence) +{ + struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; + + assert(vpipe); + assert(src_surface); + assert(src_area); + assert(dst_surface); + assert(dst_area); + + vl_compositor_render(&ctx->compositor, softpipe_video_surface(src_surface)->tex, + picture_type, src_area, dst_surface->texture, dst_area, fence); +} + +static void +sp_mpeg12_set_decode_target(struct pipe_video_context *vpipe, + struct pipe_video_surface *dt) +{ + struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; + + assert(vpipe); + assert(dt); + + pipe_video_surface_reference(&ctx->decode_target, dt); +} + +static void sp_mpeg12_set_csc_matrix(struct pipe_video_context *vpipe, const float *mat) +{ + struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe; + + assert(vpipe); + + vl_compositor_set_csc_matrix(&ctx->compositor, mat); +} + +static bool +init_pipe_state(struct sp_mpeg12_context *ctx) +{ + struct pipe_rasterizer_state rast; + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state dsa; + unsigned i; + + assert(ctx); + + rast.flatshade = 1; + rast.flatshade_first = 0; + rast.light_twoside = 0; + rast.front_winding = PIPE_WINDING_CCW; + rast.cull_mode = PIPE_WINDING_CW; + rast.fill_cw = PIPE_POLYGON_MODE_FILL; + rast.fill_ccw = PIPE_POLYGON_MODE_FILL; + rast.offset_cw = 0; + rast.offset_ccw = 0; + rast.scissor = 0; + rast.poly_smooth = 0; + rast.poly_stipple_enable = 0; + rast.point_sprite = 0; + rast.point_size_per_vertex = 0; + rast.multisample = 0; + rast.line_smooth = 0; + rast.line_stipple_enable = 0; + rast.line_stipple_factor = 0; + rast.line_stipple_pattern = 0; + rast.line_last_pixel = 0; + rast.bypass_vs_clip_and_viewport = 0; + rast.line_width = 1; + rast.point_smooth = 0; + rast.point_size = 1; + rast.offset_units = 1; + rast.offset_scale = 1; + /*rast.sprite_coord_mode[i] = ;*/ + ctx->rast = ctx->pipe->create_rasterizer_state(ctx->pipe, &rast); + ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rast); + + blend.blend_enable = 0; + blend.rgb_func = PIPE_BLEND_ADD; + blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rgb_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_func = PIPE_BLEND_ADD; + blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.logicop_enable = 0; + blend.logicop_func = PIPE_LOGICOP_CLEAR; + /* Needed to allow color writes to FB, even if blending disabled */ + blend.colormask = PIPE_MASK_RGBA; + blend.dither = 0; + ctx->blend = ctx->pipe->create_blend_state(ctx->pipe, &blend); + ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend); + + dsa.depth.enabled = 0; + dsa.depth.writemask = 0; + dsa.depth.func = PIPE_FUNC_ALWAYS; + for (i = 0; i < 2; ++i) { + dsa.stencil[i].enabled = 0; + dsa.stencil[i].func = PIPE_FUNC_ALWAYS; + dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].ref_value = 0; + dsa.stencil[i].valuemask = 0; + dsa.stencil[i].writemask = 0; + } + dsa.alpha.enabled = 0; + dsa.alpha.func = PIPE_FUNC_ALWAYS; + dsa.alpha.ref_value = 0; + ctx->dsa = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &dsa); + ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, ctx->dsa); + + return true; +} + +static struct pipe_video_context * +sp_mpeg12_create(struct pipe_screen *screen, enum pipe_video_profile profile, + enum pipe_video_chroma_format chroma_format, + unsigned width, unsigned height) +{ + struct sp_mpeg12_context *ctx; + + assert(u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG12); + + ctx = CALLOC_STRUCT(sp_mpeg12_context); + + if (!ctx) + return NULL; + + ctx->base.profile = profile; + ctx->base.chroma_format = chroma_format; + ctx->base.width = width; + ctx->base.height = height; + + ctx->base.screen = screen; + ctx->base.destroy = sp_mpeg12_destroy; + ctx->base.decode_macroblocks = sp_mpeg12_decode_macroblocks; + ctx->base.clear_surface = sp_mpeg12_clear_surface; + ctx->base.render_picture = sp_mpeg12_render_picture; + ctx->base.set_decode_target = sp_mpeg12_set_decode_target; + ctx->base.set_csc_matrix = sp_mpeg12_set_csc_matrix; + + ctx->pipe = softpipe_create(screen); + if (!ctx->pipe) { + FREE(ctx); + return NULL; + } + + /* TODO: Use slice buffering for softpipe when implemented, no advantage to buffering an entire picture */ + if (!vl_mpeg12_mc_renderer_init(&ctx->mc_renderer, ctx->pipe, + width, height, chroma_format, + VL_MPEG12_MC_RENDERER_BUFFER_PICTURE, + /* TODO: Use XFER_NONE when implemented */ + VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE, + true)) { + ctx->pipe->destroy(ctx->pipe); + FREE(ctx); + return NULL; + } + + if (!vl_compositor_init(&ctx->compositor, ctx->pipe)) { + vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer); + ctx->pipe->destroy(ctx->pipe); + FREE(ctx); + return NULL; + } + + if (!init_pipe_state(ctx)) { + vl_compositor_cleanup(&ctx->compositor); + vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer); + ctx->pipe->destroy(ctx->pipe); + FREE(ctx); + return NULL; + } + + return &ctx->base; +} + +struct pipe_video_context * +sp_video_create(struct pipe_screen *screen, enum pipe_video_profile profile, + enum pipe_video_chroma_format chroma_format, + unsigned width, unsigned height) +{ + assert(screen); + assert(width && height); + + switch (u_reduce_video_profile(profile)) { + case PIPE_VIDEO_CODEC_MPEG12: + return sp_mpeg12_create(screen, profile, + chroma_format, + width, height); + default: + return NULL; + } +} diff --git a/src/gallium/drivers/i965simple/brw_draw.h b/src/gallium/drivers/softpipe/sp_video_context.h index 62fe0d5d0e..ccbd1ffe4c 100644 --- a/src/gallium/drivers/i965simple/brw_draw.h +++ b/src/gallium/drivers/softpipe/sp_video_context.h @@ -1,6 +1,6 @@ - /************************************************************************** +/************************************************************************** * - * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2009 Younes Manton. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -25,31 +25,33 @@ * **************************************************************************/ -#ifndef BRW_DRAW_H -#define BRW_DRAW_H +#ifndef SP_VIDEO_CONTEXT_H +#define SP_VIDEO_CONTEXT_H -#include "pipe/p_context.h" +#include <pipe/p_video_context.h> +#include <vl/vl_mpeg12_mc_renderer.h> +#include <vl/vl_compositor.h> -struct brw_context; +struct pipe_screen; +struct pipe_context; +struct pipe_video_surface; +struct sp_mpeg12_context +{ + struct pipe_video_context base; + struct pipe_context *pipe; + struct pipe_video_surface *decode_target; + struct vl_mpeg12_mc_renderer mc_renderer; + struct vl_compositor compositor; + void *rast; + void *dsa; + void *blend; +}; -void brw_init_draw_functions( struct brw_context *brw ); +struct pipe_video_context * +sp_video_create(struct pipe_screen *screen, enum pipe_video_profile profile, + enum pipe_video_chroma_format chroma_format, + unsigned width, unsigned height); - -boolean brw_upload_vertices( struct brw_context *brw, - unsigned min_index, - unsigned max_index ); - -boolean brw_upload_indices(struct brw_context *brw, - const struct pipe_buffer *index_buffer, - int ib_size, int start, int count); - -boolean brw_upload_vertex_buffers( struct brw_context *brw ); -boolean brw_upload_vertex_elements( struct brw_context *brw ); - -unsigned brw_translate_surface_format( unsigned id ); - - - -#endif +#endif /* SP_VIDEO_CONTEXT_H */ diff --git a/src/gallium/drivers/trace/tr_drm.c b/src/gallium/drivers/trace/tr_drm.c index 781ca5d3bc..48d1c4051c 100644 --- a/src/gallium/drivers/trace/tr_drm.c +++ b/src/gallium/drivers/trace/tr_drm.c @@ -150,7 +150,9 @@ trace_drm_destroy(struct drm_api *_api) { struct trace_drm_api *tr_api = trace_drm_api(_api); struct drm_api *api = tr_api->api; - api->destroy(api); + + if (api->destroy) + api->destroy(api); free(tr_api); } diff --git a/src/gallium/drivers/trace/tr_rbug.c b/src/gallium/drivers/trace/tr_rbug.c index e85ac15edc..81e0a6f3b0 100644 --- a/src/gallium/drivers/trace/tr_rbug.c +++ b/src/gallium/drivers/trace/tr_rbug.c @@ -44,7 +44,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # define sleep Sleep -#elif defined(PIPE_OS_LINUX) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) void usleep(int); # define sleep usleep #else diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index 26f1c04594..7da9bd3866 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -366,7 +366,8 @@ trace_screen_get_tex_transfer(struct pipe_screen *_screen, trace_dump_call_end(); - result = trace_transfer_create(tr_tex, result); + if (result) + result = trace_transfer_create(tr_tex, result); return result; } @@ -403,7 +404,7 @@ trace_screen_transfer_map(struct pipe_screen *_screen, map = screen->transfer_map(screen, transfer); if(map) { - if(transfer->usage != PIPE_TRANSFER_READ) { + if(transfer->usage & PIPE_TRANSFER_WRITE) { assert(!tr_trans->map); tr_trans->map = map; } |