diff options
Diffstat (limited to 'src/gallium/drivers')
140 files changed, 3452 insertions, 4018 deletions
diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index d5f5c7bbba..aa29dcb394 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -358,6 +358,7 @@ struct cell_spu_function_info /** This is the object passed to spe_create_thread() */ +PIPE_ALIGN_TYPE(16, struct cell_init_info { unsigned id; @@ -370,7 +371,7 @@ struct cell_init_info uint *buffer_status; /**< points at cell_context->buffer_status */ struct cell_spu_function_info *spu_functions; -} ALIGN16_ATTRIB; +}); #endif /* CELL_COMMON_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 5c3188e7f9..3fb6a3227c 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -89,7 +89,7 @@ struct cell_buffer_node; */ struct cell_buffer_list { - struct cell_fence fence ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) struct cell_fence fence; struct cell_buffer_node *head; }; @@ -150,18 +150,18 @@ struct cell_context /** Mapped constant buffers */ void *mapped_constants[PIPE_SHADER_TYPES]; - struct cell_spu_function_info spu_functions ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) struct cell_spu_function_info spu_functions; uint num_cells, num_spus; /** Buffers for command batches, vertex/index data */ uint buffer_size[CELL_NUM_BUFFERS]; - ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE]; int cur_batch; /**< which buffer is being filled w/ commands */ /** [4] to ensure 16-byte alignment for each status word */ - uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4]; /** Associated with each command/batch buffer is a list of pipe_buffers diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 01bea0f8cc..3fa8b975d3 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -85,7 +85,7 @@ cell_unmap_constant_buffers(struct cell_context *sp) * * XXX should the element buffer be specified/bound with a separate function? */ -static boolean +static void cell_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -145,29 +145,27 @@ cell_draw_range_elements(struct pipe_context *pipe, /* Note: leave drawing surfaces mapped */ cell_unmap_constant_buffers(sp); - - return TRUE; } -static boolean +static void cell_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { - return cell_draw_range_elements( pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - mode, start, count ); + cell_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + mode, start, count ); } -static boolean +static void cell_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - return cell_draw_elements(pipe, NULL, 0, mode, start, count); + cell_draw_elements(pipe, NULL, 0, mode, start, count); } diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 5c0179d954..55bd85bde2 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -53,8 +53,7 @@ struct spu_vs_context draw; /** * Buffers containing dynamically generated SPU code: */ -static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] - ALIGN16_ATTRIB; +PIPE_ALIGN_VAR(16) static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]; @@ -405,8 +404,6 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) case PIPE_TEX_FILTER_LINEAR: spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear; break; - case PIPE_TEX_FILTER_ANISO: - /* fall-through, for now */ case PIPE_TEX_FILTER_NEAREST: spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest; break; @@ -418,8 +415,6 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) case PIPE_TEX_FILTER_LINEAR: spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear; break; - case PIPE_TEX_FILTER_ANISO: - /* fall-through, for now */ case PIPE_TEX_FILTER_NEAREST: spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest; break; @@ -547,7 +542,7 @@ cmd_batch(uint opcode) { const uint buf = (opcode >> 8) & 0xff; uint size = (opcode >> 16); - qword buffer[CELL_BUFFER_SIZE / 16] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) qword buffer[CELL_BUFFER_SIZE / 16]; const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]); uint pos; diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index d86d8e09a5..d2166a4901 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -1839,10 +1839,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) /* execute declarations (interpolants) */ if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { for (i = 0; i < mach->NumDeclarations; i++) { + PIPE_ALIGN_VAR(16) union { struct tgsi_full_declaration decl; qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16]; - } d ALIGN16_ATTRIB; + } d; unsigned ea = (unsigned) (mach->Declarations + pc); spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl)); @@ -1853,10 +1854,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) /* execute instructions, until pc is set to -1 */ while (pc != -1) { + PIPE_ALIGN_VAR(16) union { struct tgsi_full_instruction inst; qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16]; - } i ALIGN16_ATTRIB; + } i; unsigned ea = (unsigned) (mach->Instructions + pc); spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst)); diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h index 8605679940..0ca92af248 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.h +++ b/src/gallium/drivers/cell/spu/spu_exec.h @@ -98,9 +98,9 @@ struct spu_exec_machine * 4 internal temporaries * 1 address */ + PIPE_ALIGN_VAR(16) struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS - + TGSI_EXEC_NUM_TEMP_EXTRAS + 1] - ALIGN16_ATTRIB; + + TGSI_EXEC_NUM_TEMP_EXTRAS + 1]; struct spu_exec_vector *Addrs; diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index ff3d609d25..98919c43ff 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -144,7 +144,7 @@ export_func(struct cell_spu_function_info *spu_functions, void return_function_info(void) { - struct cell_spu_function_info funcs ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) struct cell_spu_function_info funcs; int tag = TAG_MISC; ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */ diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 33767e7c51..b18f4c22ef 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -93,6 +93,7 @@ typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs, vector float *constants); +PIPE_ALIGN_TYPE(16, struct spu_framebuffer { void *color_start; /**< addr of color surface in main memory */ @@ -107,10 +108,11 @@ struct spu_framebuffer uint zsize; /**< 0, 2 or 4 bytes per Z */ float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */ -} ALIGN16_ATTRIB; +}); /** per-texture level info */ +PIPE_ALIGN_TYPE(16, struct spu_texture_level { void *start; @@ -123,20 +125,22 @@ struct spu_texture_level vector signed int mask_s, mask_t, mask_r; /** texcoord clamp limits */ vector signed int max_s, max_t, max_r; -} ALIGN16_ATTRIB; +}); +PIPE_ALIGN_TYPE(16, struct spu_texture { struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; uint max_level; uint target; /**< PIPE_TEXTURE_x */ -} ALIGN16_ATTRIB; +}); /** * All SPU global/context state will be in a singleton object of this type: */ +PIPE_ALIGN_TYPE(16, struct spu_global { /** One-time init/constant info */ @@ -155,8 +159,8 @@ struct spu_global struct vertex_info vertex_info; /** Current color and Z tiles */ - tile_t ctile ALIGN16_ATTRIB; - tile_t ztile ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) tile_t ctile; + PIPE_ALIGN_VAR(16) tile_t ztile; /** Read depth/stencil tiles? */ boolean read_depth_stencil; @@ -165,8 +169,8 @@ struct spu_global ubyte cur_ctile_status, cur_ztile_status; /** Status of all tiles in framebuffer */ - ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; - ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]; + PIPE_ALIGN_VAR(16) ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]; /** Current fragment ops machine code, at 8-byte boundary */ uint *fragment_ops_code; @@ -175,7 +179,7 @@ struct spu_global spu_fragment_ops_func fragment_ops[2]; /** Current fragment program machine code, at 8-byte boundary */ - uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN8_ATTRIB; + PIPE_ALIGN_VAR(8) uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; /** Current fragment ops function */ spu_fragment_program_func fragment_program; @@ -187,7 +191,7 @@ struct spu_global /** Fragment program constants */ vector float constants[4 * CELL_MAX_CONSTANTS]; -} ALIGN16_ATTRIB; +}); extern struct spu_global spu; diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 5ffb7073ab..14987e3c3a 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -169,7 +169,7 @@ void cmd_render(const struct cell_command_render *render, uint *pos_incr) { /* we'll DMA into these buffers */ - ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) ubyte vertex_data[CELL_BUFFER_SIZE]; const uint vertex_size = render->vertex_size; /* in bytes */ /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; uint index_bytes; diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c index 03375d84a5..087963960d 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -43,7 +43,8 @@ typedef void (*spu_fetch_func)(qword *out, const qword *in, const qword *shuffle_data); -static const qword fetch_shuffle_data[5] ALIGN16_ATTRIB = { +PIPE_ALIGN_VAR(16) static const qword +fetch_shuffle_data[5] = { /* Shuffle used by CVT_64_FLOAT */ { @@ -110,7 +111,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, unsigned idx; const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; const unsigned quads_per_entry = (bytes_per_entry + 15) / 16; - qword in[2 * 4] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) qword in[2 * 4]; /* Fetch four attributes for four vertices. diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index fbe5b34d39..3e9804bf8e 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -107,8 +107,8 @@ run_vertex_program(struct spu_vs_context *draw, struct spu_exec_machine *machine = &draw->machine; unsigned int j; - ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_MAX_ATTRIBS); - ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_MAX_ATTRIBS); + PIPE_ALIGN_VAR(16) struct spu_exec_vector inputs[PIPE_MAX_ATTRIBS]; + PIPE_ALIGN_VAR(16) struct spu_exec_vector outputs[PIPE_MAX_ATTRIBS]; const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; @@ -119,8 +119,8 @@ run_vertex_program(struct spu_vs_context *draw, ASSERT_ALIGN16(draw->constants); machine->Consts = (float (*)[4]) draw->constants; - machine->Inputs = ALIGN16_ASSIGN(inputs); - machine->Outputs = ALIGN16_ASSIGN(outputs); + machine->Inputs = inputs; + machine->Outputs = outputs; spu_vertex_fetch( draw, machine, elts, count ); @@ -132,8 +132,9 @@ run_vertex_program(struct spu_vs_context *draw, for (j = 0; j < count; j++) { unsigned slot; float x, y, z, w; + PIPE_ALIGN_VAR(16) unsigned char buffer[sizeof(struct vertex_header) - + MAX_VERTEX_SIZE] ALIGN16_ATTRIB; + + MAX_VERTEX_SIZE]; struct vertex_header *const tmpOut = (struct vertex_header *) buffer; const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header) @@ -186,8 +187,8 @@ run_vertex_program(struct spu_vs_context *draw, } -unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32] - ALIGN16_ATTRIB; +PIPE_ALIGN_VAR(16) unsigned char +immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32]); void diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index 37184eac7b..46e4338d98 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -44,11 +44,19 @@ static void failover_destroy( struct pipe_context *pipe ) } +void failover_fail_over( struct failover_context *failover ) +{ + failover->dirty = TRUE; + failover->mode = FO_SW; +} + -static boolean failover_draw_elements( struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned prim, unsigned start, unsigned count) +static void failover_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, + unsigned start, + unsigned count) { struct failover_context *failover = failover_context( pipe ); @@ -62,24 +70,22 @@ static boolean failover_draw_elements( struct pipe_context *pipe, /* Try hardware: */ if (failover->mode == FO_HW) { - if (!failover->hw->draw_elements( failover->hw, - indexBuffer, - indexSize, - prim, - start, - count )) { - - failover->hw->flush( failover->hw, ~0, NULL ); - failover->mode = FO_SW; - } + failover->hw->draw_elements( failover->hw, + indexBuffer, + indexSize, + prim, + start, + count ); } /* Possibly try software: */ if (failover->mode == FO_SW) { - if (failover->dirty) + if (failover->dirty) { + failover->hw->flush( failover->hw, ~0, NULL ); failover_state_emit( failover ); + } failover->sw->draw_elements( failover->sw, indexBuffer, @@ -94,15 +100,13 @@ static boolean failover_draw_elements( struct pipe_context *pipe, */ failover->sw->flush( failover->sw, ~0, NULL ); } - - return TRUE; } -static boolean failover_draw_arrays( struct pipe_context *pipe, +static void failover_draw_arrays( struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) { - return failover_draw_elements(pipe, NULL, 0, prim, start, count); + failover_draw_elements(pipe, NULL, 0, prim, start, count); } static unsigned int diff --git a/src/gallium/drivers/failover/fo_winsys.h b/src/gallium/drivers/failover/fo_winsys.h index a8ce997a1f..533122b69d 100644 --- a/src/gallium/drivers/failover/fo_winsys.h +++ b/src/gallium/drivers/failover/fo_winsys.h @@ -36,10 +36,13 @@ struct pipe_context; +struct failover_context; struct pipe_context *failover_create( struct pipe_context *hw, struct pipe_context *sw ); +void failover_fail_over( struct failover_context *failover ); + #endif /* FO_WINSYS_H */ diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 949f046350..89feeade75 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -45,7 +45,7 @@ */ -static boolean +static void i915_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -106,27 +106,25 @@ i915_draw_range_elements(struct pipe_context *pipe, pipe_buffer_unmap(pipe->screen, indexBuffer); draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL); } - - return TRUE; } -static boolean +static void i915_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count) { - return i915_draw_range_elements(pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - prim, start, count); + i915_draw_range_elements(pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + prim, start, count); } -static boolean +static void i915_draw_arrays(struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) { - return i915_draw_elements(pipe, NULL, 0, prim, start, count); + i915_draw_elements(pipe, NULL, 0, prim, start, count); } diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index 1528afc859..5f5b6f8e18 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -74,8 +74,6 @@ static unsigned translate_img_filter( unsigned filter ) return FILTER_NEAREST; case PIPE_TEX_FILTER_LINEAR: return FILTER_LINEAR; - case PIPE_TEX_FILTER_ANISO: - return FILTER_ANISOTROPIC; default: assert(0); return FILTER_NEAREST; @@ -221,6 +219,9 @@ i915_create_sampler_state(struct pipe_context *pipe, minFilt = translate_img_filter( sampler->min_img_filter ); magFilt = translate_img_filter( sampler->mag_img_filter ); + if (sampler->max_anisotropy > 1.0) + minFilt = magFilt = FILTER_ANISOTROPIC; + if (sampler->max_anisotropy > 2.0) { cso->state[0] |= SS2_MAX_ANISO_4; } diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index 852fd22982..ea8d39adaf 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -176,7 +176,7 @@ try_draw_range_elements(struct brw_context *brw, } -static boolean +static void brw_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *index_buffer, unsigned index_size, @@ -228,29 +228,27 @@ brw_draw_range_elements(struct pipe_context *pipe, ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count ); assert(ret == 0); } - - return TRUE; } -static boolean +static void brw_draw_elements(struct pipe_context *pipe, struct pipe_buffer *index_buffer, unsigned index_size, unsigned mode, unsigned start, unsigned count) { - return brw_draw_range_elements( pipe, index_buffer, - index_size, - 0, 0xffffffff, - mode, - start, count ); + brw_draw_range_elements( pipe, index_buffer, + index_size, + 0, 0xffffffff, + mode, + start, count ); } -static boolean +static void brw_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - return brw_draw_elements(pipe, NULL, 0, mode, start, count); + brw_draw_elements(pipe, NULL, 0, mode, start, count); } diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c index 5ddc63f57e..81712798a5 100644 --- a/src/gallium/drivers/i965/brw_pipe_sampler.c +++ b/src/gallium/drivers/i965/brw_pipe_sampler.c @@ -48,8 +48,6 @@ static GLuint translate_img_filter( unsigned filter ) return BRW_MAPFILTER_NEAREST; case PIPE_TEX_FILTER_LINEAR: return BRW_MAPFILTER_LINEAR; - case PIPE_TEX_FILTER_ANISO: - return BRW_MAPFILTER_ANISOTROPIC; default: assert(0); return BRW_MAPFILTER_NEAREST; diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index 7e57d0306b..8f983a60ae 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -691,7 +691,7 @@ static void emit_xpd( struct brw_compile *p, { GLuint i; - assert(!(mask & BRW_WRITEMASK_W) == BRW_WRITEMASK_X); + assert((mask & BRW_WRITEMASK_W) != BRW_WRITEMASK_W); for (i = 0 ; i < 3; i++) { if (mask & (1<<i)) { diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index bdbaae5987..9f5b4e6323 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -45,7 +45,7 @@ identity_destroy(struct pipe_context *_pipe) free(id_pipe); } -static boolean +static void identity_draw_arrays(struct pipe_context *_pipe, unsigned prim, unsigned start, @@ -54,13 +54,13 @@ identity_draw_arrays(struct pipe_context *_pipe, struct identity_context *id_pipe = identity_context(_pipe); struct pipe_context *pipe = id_pipe->pipe; - return pipe->draw_arrays(pipe, - prim, - start, - count); + pipe->draw_arrays(pipe, + prim, + start, + count); } -static boolean +static void identity_draw_elements(struct pipe_context *_pipe, struct pipe_buffer *_indexBuffer, unsigned indexSize, @@ -73,15 +73,15 @@ identity_draw_elements(struct pipe_context *_pipe, struct pipe_context *pipe = id_pipe->pipe; struct pipe_buffer *indexBuffer = id_buffer->buffer; - return pipe->draw_elements(pipe, - indexBuffer, - indexSize, - prim, - start, - count); + pipe->draw_elements(pipe, + indexBuffer, + indexSize, + prim, + start, + count); } -static boolean +static void identity_draw_range_elements(struct pipe_context *_pipe, struct pipe_buffer *_indexBuffer, unsigned indexSize, @@ -96,14 +96,14 @@ identity_draw_range_elements(struct pipe_context *_pipe, struct pipe_context *pipe = id_pipe->pipe; struct pipe_buffer *indexBuffer = id_buffer->buffer; - return pipe->draw_range_elements(pipe, - indexBuffer, - indexSize, - minIndex, - maxIndex, - mode, - start, - count); + pipe->draw_range_elements(pipe, + indexBuffer, + indexSize, + minIndex, + maxIndex, + mode, + start, + count); } static struct pipe_query * diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index e038a5229e..7c6e46006b 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -50,7 +50,6 @@ C_SOURCES = \ lp_state_vs.c \ lp_surface.c \ lp_tex_cache.c \ - lp_tex_sample_c.c \ lp_tex_sample_llvm.c \ lp_texture.c \ lp_tile_cache.c \ diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index 0c3f00fd58..72d9f39658 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -59,27 +59,16 @@ Requirements See /proc/cpuinfo to know what your CPU supports. - - LLVM 2.5 or greater. LLVM 2.6 is preferred. + - LLVM 2.6. - On Debian based distributions do: + For Linux, on a recent Debian based distribution do: aptitude install llvm-dev - There is a typo in one of the llvm 2.5 headers, that may cause compilation - errors. To fix it apply the change: - - --- /usr/include/llvm-c/Core.h.orig 2009-08-10 15:38:54.000000000 +0100 - +++ /usr/include/llvm-c/Core.h 2009-08-10 15:38:25.000000000 +0100 - @@ -831,7 +831,7 @@ - template<typename T> - inline T **unwrap(LLVMValueRef *Vals, unsigned Length) { - #if DEBUG - - for (LLVMValueRef *I = Vals, E = Vals + Length; I != E; ++I) - + for (LLVMValueRef *I = Vals, *E = Vals + Length; I != E; ++I) - cast<T>(*I); - #endif - return reinterpret_cast<T**>(Vals); - + For Windows download pre-built MSVC 9.0 or MinGW binaries from + http://people.freedesktop.org/~jrfonseca/llvm/ and set the LLVM environment + variable to the extracted path. + - scons (optional) - udis86, http://udis86.sourceforge.net/ (optional): @@ -95,9 +84,9 @@ Requirements Building ======== -To build everything invoke scons as: +To build everything on Linux invoke scons as: - scons debug=yes statetrackers=mesa drivers=llvmpipe winsys=xlib dri=false -k + scons debug=yes statetrackers=mesa drivers=trace,llvmpipe winsys=xlib dri=false Alternatively, you can build it with GNU make, if you prefer, by invoking it as @@ -105,12 +94,15 @@ Alternatively, you can build it with GNU make, if you prefer, by invoking it as but the rest of these instructions assume that scons is used. +For windows is everything the except except the winsys: + + scons debug=yes statetrackers=mesa drivers=trace,llvmpipe winsys=gdi dri=false Using ===== -Building will create a drop-in alternative for libGL.so. To use it set the -environment variables: +On Linux, building will create a drop-in alternative for libGL.so. To use it +set the environment variables: export LD_LIBRARY_PATH=$PWD/build/linux-x86_64-debug/lib:$LD_LIBRARY_PATH @@ -121,6 +113,11 @@ or For performance evaluation pass debug=no to scons, and use the corresponding lib directory without the "-debug" suffix. +On Windows, building will create a drop-in alternative for opengl32.dll. To use +it put it in the same directory as the application. It can also be used by +replacing the native ICD driver, but it's quite an advanced usage, so if you +need to ask, don't even try it. + Unit testing ============ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 3ca676647c..6bb545a501 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -66,7 +66,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_state_vs.c', 'lp_surface.c', 'lp_tex_cache.c', - 'lp_tex_sample_c.c', 'lp_tex_sample_llvm.c', 'lp_texture.c', 'lp_tile_cache.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp index d3f78c06d9..6e79438ead 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp +++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp @@ -59,3 +59,17 @@ LLVMInitializeNativeTarget(void) #endif + + +/* + * Hack to allow the linking of release LLVM static libraries on a debug build. + * + * See also: + * - http://social.msdn.microsoft.com/Forums/en-US/vclanguage/thread/7234ea2b-0042-42ed-b4e2-5d8644dfb57d + */ +#if defined(_MSC_VER) && defined(_DEBUG) +#include <crtdefs.h> +extern "C" { + _CRTIMP void __cdecl _invalid_parameter_noinfo(void) {} +} +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.c b/src/gallium/drivers/llvmpipe/lp_bld_sample.c index af70ddc6ab..9003e108c1 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.c @@ -69,8 +69,8 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, state->min_img_filter = sampler->min_img_filter; state->min_mip_filter = sampler->min_mip_filter; state->mag_img_filter = sampler->mag_img_filter; - if(sampler->compare_mode) { - state->compare_mode = sampler->compare_mode; + state->compare_mode = sampler->compare_mode; + if(sampler->compare_mode != PIPE_TEX_COMPARE_NONE) { state->compare_func = sampler->compare_func; } state->normalized_coords = sampler->normalized_coords; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index 47b68b71e2..5ee8d556a6 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -488,7 +488,7 @@ lp_build_sample_compare(struct lp_build_sample_context *bld, LLVMValueRef res; unsigned chan; - if(!bld->static_state->compare_mode) + if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE) return; /* TODO: Compare before swizzling, to avoid redundant computations */ @@ -577,7 +577,6 @@ lp_build_sample_soa(LLVMBuilderRef builder, lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel); break; case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: if(lp_format_is_rgba8(bld.format_desc)) lp_build_sample_2d_linear_aos(&bld, s, t, width, height, stride, data_ptr, texel); else diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index 61b033c9fc..fb1eda4423 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -361,6 +361,9 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, if (projected) coords[i] = lp_build_mul(&bld->base, coords[i], oow); } + for (i = num_coords; i < 3; i++) { + coords[i] = bld->base.undef; + } bld->sampler->emit_fetch_texel(bld->sampler, bld->base.builder, diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 37587d4f79..1cc3c9227c 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -256,22 +256,6 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe->vertex_tex_cache[i] = lp_create_tex_tile_cache(screen); - /* vertex shader samplers */ - for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { - llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples; - llvmpipe->tgsi.vert_samplers[i].processor = TGSI_PROCESSOR_VERTEX; - llvmpipe->tgsi.vert_samplers[i].cache = llvmpipe->vertex_tex_cache[i]; - llvmpipe->tgsi.vert_samplers_list[i] = &llvmpipe->tgsi.vert_samplers[i]; - } - - /* fragment shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples; - llvmpipe->tgsi.frag_samplers[i].processor = TGSI_PROCESSOR_FRAGMENT; - llvmpipe->tgsi.frag_samplers[i].cache = llvmpipe->tex_cache[i]; - llvmpipe->tgsi.frag_samplers_list[i] = &llvmpipe->tgsi.frag_samplers[i]; - } - /* * Create drawing context and plug our rendering stage into it. */ @@ -279,10 +263,7 @@ llvmpipe_create( struct pipe_screen *screen ) if (!llvmpipe->draw) goto fail; - draw_texture_samplers(llvmpipe->draw, - PIPE_MAX_VERTEX_SAMPLERS, - (struct tgsi_sampler **) - llvmpipe->tgsi.vert_samplers_list); + /* FIXME: devise alternative to draw_texture_samplers */ if (debug_get_bool_option( "LP_NO_RAST", FALSE )) llvmpipe->no_rast = TRUE; diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index cc4d5ad5fd..6411797cf5 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -115,14 +115,6 @@ struct llvmpipe_context { unsigned line_stipple_counter; - /** TGSI exec things */ - struct { - struct lp_shader_sampler vert_samplers[PIPE_MAX_SAMPLERS]; - struct lp_shader_sampler *vert_samplers_list[PIPE_MAX_SAMPLERS]; - struct lp_shader_sampler frag_samplers[PIPE_MAX_SAMPLERS]; - struct lp_shader_sampler *frag_samplers_list[PIPE_MAX_SAMPLERS]; - } tgsi; - /** The primitive drawing context */ struct draw_context *draw; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index a96c2cad9d..c152b4413f 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -45,11 +45,11 @@ -boolean +void llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - return llvmpipe_draw_elements(pipe, NULL, 0, mode, start, count); + llvmpipe_draw_elements(pipe, NULL, 0, mode, start, count); } @@ -58,7 +58,7 @@ llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, * Basically, map the vertex buffers (and drawing surfaces), then hand off * the drawing to the 'draw' module. */ -boolean +void llvmpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -122,20 +122,18 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, /* Note: leave drawing surfaces mapped */ lp->dirty_render_cache = TRUE; - - return TRUE; } -boolean +void llvmpipe_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { - return llvmpipe_draw_range_elements( pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - mode, start, count ); + llvmpipe_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + mode, start, count ); } diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index bce3baec16..4ef0783f3e 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -79,25 +79,22 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) /* struct lp_jit_context */ { - LLVMTypeRef elem_types[5]; + LLVMTypeRef elem_types[4]; LLVMTypeRef context_type; elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* constants */ - elem_types[1] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */ - elem_types[2] = LLVMFloatType(); /* alpha_ref_value */ - elem_types[3] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */ - elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ + elem_types[1] = LLVMFloatType(); /* alpha_ref_value */ + elem_types[2] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */ + elem_types[3] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ context_type = LLVMStructType(elem_types, Elements(elem_types), 0); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants, screen->target, context_type, 0); - LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, samplers, - screen->target, context_type, 1); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, alpha_ref_value, - screen->target, context_type, 2); + screen->target, context_type, 1); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color, - screen->target, context_type, 3); + screen->target, context_type, 2); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures, screen->target, context_type, LP_JIT_CONTEXT_TEXTURES_INDEX); @@ -109,24 +106,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) screen->context_ptr_type = LLVMPointerType(context_type, 0); } - /* fetch_texel - */ - { - LLVMTypeRef ret_type; - LLVMTypeRef arg_types[3]; - LLVMValueRef fetch_texel; - - ret_type = LLVMVoidType(); - arg_types[0] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */ - arg_types[1] = LLVMInt32Type(); /* unit */ - arg_types[2] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); /* store */ - - fetch_texel = lp_declare_intrinsic(screen->module, "fetch_texel", - ret_type, arg_types, Elements(arg_types)); - - LLVMAddGlobalMapping(screen->engine, fetch_texel, lp_fetch_texel_soa); - } - #ifdef DEBUG LLVMDumpModule(screen->module); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 58f716ede2..277b690c02 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -41,7 +41,6 @@ #include "pipe/p_state.h" -struct tgsi_sampler; struct llvmpipe_screen; @@ -78,8 +77,6 @@ struct lp_jit_context { const float *constants; - struct tgsi_sampler **samplers; - float alpha_ref_value; /* FIXME: store (also?) in floats */ @@ -92,16 +89,13 @@ struct lp_jit_context #define lp_jit_context_constants(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, 0, "constants") -#define lp_jit_context_samplers(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 1, "samplers") - #define lp_jit_context_alpha_ref_value(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 2, "alpha_ref_value") + lp_build_struct_get(_builder, _ptr, 1, "alpha_ref_value") #define lp_jit_context_blend_color(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 3, "blend_color") + lp_build_struct_get(_builder, _ptr, 2, "blend_color") -#define LP_JIT_CONTEXT_TEXTURES_INDEX 4 +#define LP_JIT_CONTEXT_TEXTURES_INDEX 3 #define lp_jit_context_textures(_builder, _ptr) \ lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES_INDEX, "textures") @@ -118,12 +112,6 @@ typedef void void *color, void *depth); -void PIPE_CDECL -lp_fetch_texel_soa( struct tgsi_sampler **samplers, - uint32_t unit, - float *store ); - - void lp_jit_screen_cleanup(struct llvmpipe_screen *screen); diff --git a/src/gallium/drivers/llvmpipe/lp_quad.h b/src/gallium/drivers/llvmpipe/lp_quad.h index 7eb05de77a..c3a48700a4 100644 --- a/src/gallium/drivers/llvmpipe/lp_quad.h +++ b/src/gallium/drivers/llvmpipe/lp_quad.h @@ -31,6 +31,7 @@ #ifndef LP_QUAD_H #define LP_QUAD_H +#include "pipe/p_compiler.h" #include "pipe/p_state.h" #include "tgsi/tgsi_exec.h" @@ -83,7 +84,7 @@ struct quad_header_inout struct quad_header_output { /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ - float ALIGN16_ATTRIB color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; + PIPE_ALIGN_VAR(16) float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; }; @@ -92,9 +93,9 @@ struct quad_header_output */ struct quad_interp_coef { - float ALIGN16_ATTRIB a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - float ALIGN16_ATTRIB dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - float ALIGN16_ATTRIB dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16) float a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16) float dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16) float dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index b18f17c0cd..0b2d3a2801 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -117,7 +117,7 @@ struct setup_context { /** * Execute fragment shader for the four fragments in the quad. */ -ALIGN_STACK +PIPE_ALIGN_STACK static void shade_quads(struct llvmpipe_context *llvmpipe, struct quad_header *quads[], @@ -130,7 +130,7 @@ shade_quads(struct llvmpipe_context *llvmpipe, uint8_t *tile; uint8_t *color; void *depth; - uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16) uint32_t mask[4][NUM_CHANNELS]; unsigned chan_index; unsigned q; diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 5cee7bf74b..7020da145f 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -56,7 +56,6 @@ #define LP_NEW_QUERY 0x4000 -struct tgsi_sampler; struct vertex_info; struct pipe_context; struct llvmpipe_context; @@ -197,14 +196,14 @@ void llvmpipe_update_fs(struct llvmpipe_context *lp); void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ); -boolean llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, +void llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count); -boolean llvmpipe_draw_elements(struct pipe_context *pipe, +void llvmpipe_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count); -boolean +void llvmpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index acfd7be5f7..6c1ef6bc42 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -192,36 +192,6 @@ compute_cliprect(struct llvmpipe_context *lp) } -static void -update_tgsi_samplers( struct llvmpipe_context *llvmpipe ) -{ - unsigned i; - - /* vertex shader samplers */ - for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { - llvmpipe->tgsi.vert_samplers[i].sampler = llvmpipe->vertex_samplers[i]; - llvmpipe->tgsi.vert_samplers[i].texture = llvmpipe->vertex_textures[i]; - llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples; - } - - for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { - lp_tex_tile_cache_validate_texture( llvmpipe->vertex_tex_cache[i] ); - } - - /* fragment shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - llvmpipe->tgsi.frag_samplers[i].sampler = llvmpipe->sampler[i]; - llvmpipe->tgsi.frag_samplers[i].texture = llvmpipe->texture[i]; - llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples; - } - - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - lp_tex_tile_cache_validate_texture( llvmpipe->tex_cache[i] ); - } - - llvmpipe->jit_context.samplers = (struct tgsi_sampler **)llvmpipe->tgsi.frag_samplers_list; -} - /* Hopefully this will remain quite simple, otherwise need to pull in * something like the state tracker mechanism. */ @@ -237,8 +207,9 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) } if (llvmpipe->dirty & (LP_NEW_SAMPLER | - LP_NEW_TEXTURE)) - update_tgsi_samplers( llvmpipe ); + LP_NEW_TEXTURE)) { + /* TODO */ + } if (llvmpipe->dirty & (LP_NEW_RASTERIZER | LP_NEW_FS | diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index f2b8c36264..b73ca2d41e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -453,8 +453,8 @@ generate_fragment(struct llvmpipe_context *lp, debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); debug_printf(" .mag_img_filter = %s\n", debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); - if(key->sampler[i].compare_mode) - debug_printf(" .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE)); + if(key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE) + debug_printf(" .compare_func = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE)); debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter); } @@ -550,13 +550,8 @@ generate_fragment(struct llvmpipe_context *lp, a0_ptr, dadx_ptr, dady_ptr, x0, y0, 2, 0); -#if 0 - /* C texture sampling */ - sampler = lp_c_sampler_soa_create(context_ptr); -#else /* code generated texture sampling */ sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); -#endif for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 29fff91981..6c29e8d8ac 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -462,7 +462,7 @@ compute_blend_ref(const struct pipe_blend_state *blend, } -ALIGN_STACK +PIPE_ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, @@ -531,11 +531,11 @@ test_one(unsigned verbose, success = TRUE; for(i = 0; i < n && success; ++i) { if(mode == AoS) { - ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; @@ -596,11 +596,11 @@ test_one(unsigned verbose, if(mode == SoA) { const unsigned stride = type.length*type.width/8; - ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; boolean mismatch; diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index faddfb9677..c1abee424c 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -142,7 +142,7 @@ add_conv_test(LLVMModuleRef module, } -ALIGN_STACK +PIPE_ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, @@ -230,8 +230,8 @@ test_one(unsigned verbose, for(i = 0; i < n && success; ++i) { unsigned src_stride = src_type.length*src_type.width/8; unsigned dst_stride = dst_type.length*dst_type.width/8; - ALIGN16_ATTRIB uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; - ALIGN16_ATTRIB uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + PIPE_ALIGN_VAR(16) uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + PIPE_ALIGN_VAR(16) uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; int64_t start_counter = 0; diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 23ea9ebbe7..2b258f1052 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -199,7 +199,7 @@ add_store_rgba_test(LLVMModuleRef module, } -ALIGN_STACK +PIPE_ALIGN_STACK static boolean test_format(unsigned verbose, FILE *fp, const struct pixel_test_case *test) { diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h index 9ad1bde956..cb59a94464 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h @@ -31,64 +31,11 @@ #include <llvm-c/Core.h> -#include "tgsi/tgsi_exec.h" - -struct llvmpipe_tex_tile_cache; struct lp_sampler_static_state; /** - * Subclass of tgsi_sampler - */ -struct lp_shader_sampler -{ - struct tgsi_sampler base; /**< base class */ - - unsigned processor; - - /* For lp_get_samples_2d_linear_POT: - */ - unsigned xpot; - unsigned ypot; - unsigned level; - - const struct pipe_texture *texture; - const struct pipe_sampler_state *sampler; - - struct llvmpipe_tex_tile_cache *cache; -}; - - - -static INLINE struct lp_shader_sampler * -lp_shader_sampler(const struct tgsi_sampler *sampler) -{ - return (struct lp_shader_sampler *) sampler; -} - - - -extern void -lp_get_samples(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]); - - -/** - * Texture sampling code generator that just calls lp_get_samples C function - * for the actual sampling computation. - * - * @param context_ptr LLVM value with the pointer to the struct lp_jit_context. - */ -struct lp_build_sampler_soa * -lp_c_sampler_soa_create(LLVMValueRef context_ptr); - - -/** * Pure-LLVM texture sampling code generator. * * @param context_ptr LLVM value with the pointer to the struct lp_jit_context. diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c deleted file mode 100644 index 68520fa4f0..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c +++ /dev/null @@ -1,1713 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * Copyright 2008 VMware, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Texture sampling - * - * Authors: - * Brian Paul - */ - -#include "lp_context.h" -#include "lp_quad.h" -#include "lp_surface.h" -#include "lp_texture.h" -#include "lp_tex_sample.h" -#include "lp_tex_cache.h" -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" -#include "util/u_math.h" -#include "util/u_memory.h" - - - -/* - * Note, the FRAC macro has to work perfectly. Otherwise you'll sometimes - * see 1-pixel bands of improperly weighted linear-filtered textures. - * The tests/texwrap.c demo is a good test. - * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0. - * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x). - */ -#define FRAC(f) ((f) - util_ifloor(f)) - - -/** - * Linear interpolation macro - */ -static INLINE float -lerp(float a, float v0, float v1) -{ - return v0 + a * (v1 - v0); -} - - -/** - * Do 2D/biliner interpolation of float values. - * v00, v10, v01 and v11 are typically four texture samples in a square/box. - * a and b are the horizontal and vertical interpolants. - * It's important that this function is inlined when compiled with - * optimization! If we find that's not true on some systems, convert - * to a macro. - */ -static INLINE float -lerp_2d(float a, float b, - float v00, float v10, float v01, float v11) -{ - const float temp0 = lerp(a, v00, v10); - const float temp1 = lerp(a, v01, v11); - return lerp(b, temp0, temp1); -} - - -/** - * As above, but 3D interpolation of 8 values. - */ -static INLINE float -lerp_3d(float a, float b, float c, - float v000, float v100, float v010, float v110, - float v001, float v101, float v011, float v111) -{ - const float temp0 = lerp_2d(a, b, v000, v100, v010, v110); - const float temp1 = lerp_2d(a, b, v001, v101, v011, v111); - return lerp(c, temp0, temp1); -} - - - -/** - * If A is a signed integer, A % B doesn't give the right value for A < 0 - * (in terms of texture repeat). Just casting to unsigned fixes that. - */ -#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B)) - - -/** - * Apply texture coord wrapping mode and return integer texture indexes - * for a vector of four texcoords (S or T or P). - * \param wrapMode PIPE_TEX_WRAP_x - * \param s the incoming texcoords - * \param size the texture image size - * \param icoord returns the integer texcoords - * \return integer texture index - */ -static INLINE void -nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord[4]) -{ - uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_REPEAT: - /* s limited to [0,1) */ - /* i limited to [0,size-1] */ - for (ch = 0; ch < 4; ch++) { - int i = util_ifloor(s[ch] * size); - icoord[ch] = REMAINDER(i, size); - } - return; - case PIPE_TEX_WRAP_CLAMP: - /* s limited to [0,1] */ - /* i limited to [0,size-1] */ - for (ch = 0; ch < 4; ch++) { - if (s[ch] <= 0.0F) - icoord[ch] = 0; - else if (s[ch] >= 1.0F) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - if (s[ch] < min) - icoord[ch] = 0; - else if (s[ch] > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - { - /* s limited to [min,max] */ - /* i limited to [-1, size] */ - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - if (s[ch] <= min) - icoord[ch] = -1; - else if (s[ch] >= max) - icoord[ch] = size; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - { - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const int flr = util_ifloor(s[ch]); - float u; - if (flr & 1) - u = 1.0F - (s[ch] - (float) flr); - else - u = s[ch] - (float) flr; - if (u < min) - icoord[ch] = 0; - else if (u > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - for (ch = 0; ch < 4; ch++) { - /* s limited to [0,1] */ - /* i limited to [0,size-1] */ - const float u = fabsf(s[ch]); - if (u <= 0.0F) - icoord[ch] = 0; - else if (u >= 1.0F) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const float u = fabsf(s[ch]); - if (u < min) - icoord[ch] = 0; - else if (u > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const float u = fabsf(s[ch]); - if (u < min) - icoord[ch] = -1; - else if (u > max) - icoord[ch] = size; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - default: - assert(0); - } -} - - -/** - * Used to compute texel locations for linear sampling for four texcoords. - * \param wrapMode PIPE_TEX_WRAP_x - * \param s the texcoords - * \param size the texture image size - * \param icoord0 returns first texture indexes - * \param icoord1 returns second texture indexes (usually icoord0 + 1) - * \param w returns blend factor/weight between texture indexes - * \param icoord returns the computed integer texture coords - */ -static INLINE void -linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord0[4], int icoord1[4], float w[4]) -{ - uint ch; - - switch (wrapMode) { - case PIPE_TEX_WRAP_REPEAT: - for (ch = 0; ch < 4; ch++) { - float u = s[ch] * size - 0.5F; - icoord0[ch] = REMAINDER(util_ifloor(u), size); - icoord1[ch] = REMAINDER(icoord0[ch] + 1, size); - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.0F, 1.0F); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.0F, 1.0F); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - { - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], min, max); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - } - break;; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - for (ch = 0; ch < 4; ch++) { - const int flr = util_ifloor(s[ch]); - float u; - if (flr & 1) - u = 1.0F - (s[ch] - (float) flr); - else - u = s[ch] - (float) flr; - u = u * size - 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u >= 1.0F) - u = (float) size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u >= 1.0F) - u = (float) size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - { - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u <= min) - u = min * size; - else if (u >= max) - u = max * size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - } - break;; - default: - assert(0); - } -} - - -/** - * For RECT textures / unnormalized texcoords - * Only a subset of wrap modes supported. - */ -static INLINE void -nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord[4]) -{ - uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - int i = util_ifloor(s[ch]); - icoord[ch]= CLAMP(i, 0, (int) size-1); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - /* fall-through */ - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - for (ch = 0; ch < 4; ch++) { - icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) ); - } - return; - default: - assert(0); - } -} - - -/** - * For RECT textures / unnormalized texcoords. - * Only a subset of wrap modes supported. - */ -static INLINE void -linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord0[4], int icoord1[4], float w[4]) -{ - uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - /* Not exactly what the spec says, but it matches NVIDIA output */ - float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f); - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - /* fall-through */ - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F); - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord1[ch] > (int) size - 1) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break; - default: - assert(0); - } -} - - -static unsigned -choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) -{ - /* - major axis - direction target sc tc ma - ---------- ------------------------------- --- --- --- - +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx - -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx - +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry - -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry - +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz - -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz - */ - const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz); - unsigned face; - float sc, tc, ma; - - if (arx > ary && arx > arz) { - if (rx >= 0.0F) { - face = PIPE_TEX_FACE_POS_X; - sc = -rz; - tc = -ry; - ma = arx; - } - else { - face = PIPE_TEX_FACE_NEG_X; - sc = rz; - tc = -ry; - ma = arx; - } - } - else if (ary > arx && ary > arz) { - if (ry >= 0.0F) { - face = PIPE_TEX_FACE_POS_Y; - sc = rx; - tc = rz; - ma = ary; - } - else { - face = PIPE_TEX_FACE_NEG_Y; - sc = rx; - tc = -rz; - ma = ary; - } - } - else { - if (rz > 0.0F) { - face = PIPE_TEX_FACE_POS_Z; - sc = rx; - tc = -ry; - ma = arz; - } - else { - face = PIPE_TEX_FACE_NEG_Z; - sc = -rx; - tc = -ry; - ma = arz; - } - } - - *newS = ( sc / ma + 1.0F ) * 0.5F; - *newT = ( tc / ma + 1.0F ) * 0.5F; - - return face; -} - - -/** - * Examine the quad's texture coordinates to compute the partial - * derivatives w.r.t X and Y, then compute lambda (level of detail). - * - * This is only done for fragment shaders, not vertex shaders. - */ -static float -compute_lambda(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - float rho, lambda; - - if (samp->processor == TGSI_PROCESSOR_VERTEX) - return lodbias; - - assert(sampler->normalized_coords); - - assert(s); - { - float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]; - float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; - dsdx = fabsf(dsdx); - dsdy = fabsf(dsdy); - rho = MAX2(dsdx, dsdy) * texture->width0; - } - if (t) { - float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; - float dtdy = t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]; - float max; - dtdx = fabsf(dtdx); - dtdy = fabsf(dtdy); - max = MAX2(dtdx, dtdy) * texture->height0; - rho = MAX2(rho, max); - } - if (p) { - float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]; - float dpdy = p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]; - float max; - dpdx = fabsf(dpdx); - dpdy = fabsf(dpdy); - max = MAX2(dpdx, dpdy) * texture->depth0; - rho = MAX2(rho, max); - } - - lambda = util_fast_log2(rho); - lambda += lodbias + sampler->lod_bias; - lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); - - return lambda; -} - - -/** - * Do several things here: - * 1. Compute lambda from the texcoords, if needed - * 2. Determine if we're minifying or magnifying - * 3. If minifying, choose mipmap levels - * 4. Return image filter to use within mipmap images - * \param level0 Returns first mipmap level to sample from - * \param level1 Returns second mipmap level to sample from - * \param levelBlend Returns blend factor between levels, in [0,1] - * \param imgFilter Returns either the min or mag filter, depending on lambda - */ -static void -choose_mipmap_levels(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - unsigned *level0, unsigned *level1, float *levelBlend, - unsigned *imgFilter) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { - /* no mipmap selection needed */ - *level0 = *level1 = CLAMP((int) sampler->min_lod, - 0, (int) texture->last_level); - - if (sampler->min_img_filter != sampler->mag_img_filter) { - /* non-mipmapped texture, but still need to determine if doing - * minification or magnification. - */ - float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); - if (lambda <= 0.0) { - *imgFilter = sampler->mag_img_filter; - } - else { - *imgFilter = sampler->min_img_filter; - } - } - else { - *imgFilter = sampler->mag_img_filter; - } - } - else { - float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); - - if (lambda <= 0.0) { /* XXX threshold depends on the filter */ - /* magnifying */ - *imgFilter = sampler->mag_img_filter; - *level0 = *level1 = 0; - } - else { - /* minifying */ - *imgFilter = sampler->min_img_filter; - - /* choose mipmap level(s) and compute the blend factor between them */ - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { - /* Nearest mipmap level */ - const int lvl = (int) (lambda + 0.5); - *level0 = - *level1 = CLAMP(lvl, 0, (int) texture->last_level); - } - else { - /* Linear interpolation between mipmap levels */ - const int lvl = (int) lambda; - *level0 = CLAMP(lvl, 0, (int) texture->last_level); - *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level); - *levelBlend = FRAC(lambda); /* blending weight between levels */ - } - } - } -} - - -/** - * Get a texel from a texture, using the texture tile cache. - * - * \param face the cube face in 0..5 - * \param level the mipmap level - * \param x the x coord of texel within 2D image - * \param y the y coord of texel within 2D image - * \param z which slice of a 3D texture - * \param rgba the quad to put the texel/color into - * \param j which element of the rgba quad to write to - * - * XXX maybe move this into lp_tile_cache.c and merge with the - * lp_get_cached_tile_tex() function. Also, get 4 texels instead of 1... - */ -static void -get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y, - const uint8_t *out[4]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - - const struct llvmpipe_cached_tex_tile *tile - = lp_get_cached_tex_tile(samp->cache, - tex_tile_address(x, y, 0, face, level)); - - y %= TEX_TILE_SIZE; - x %= TEX_TILE_SIZE; - - out[0] = &tile->color[y ][x ][0]; - out[1] = &tile->color[y ][x+1][0]; - out[2] = &tile->color[y+1][x ][0]; - out[3] = &tile->color[y+1][x+1][0]; -} - -static INLINE const uint8_t * -get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - - const struct llvmpipe_cached_tex_tile *tile - = lp_get_cached_tex_tile(samp->cache, - tex_tile_address(x, y, 0, face, level)); - - y %= TEX_TILE_SIZE; - x %= TEX_TILE_SIZE; - - return &tile->color[y][x][0]; -} - - -static void -get_texel_quad_2d_mt(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, - int x0, int y0, - int x1, int y1, - const uint8_t *out[4]) -{ - unsigned i; - - for (i = 0; i < 4; i++) { - unsigned tx = (i & 1) ? x1 : x0; - unsigned ty = (i >> 1) ? y1 : y0; - - out[i] = get_texel_2d_ptr( tgsi_sampler, face, level, tx, ty ); - } -} - -static void -get_texel(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y, int z, - float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - - if (x < 0 || x >= (int) u_minify(texture->width0, level) || - y < 0 || y >= (int) u_minify(texture->height0, level) || - z < 0 || z >= (int) u_minify(texture->depth0, level)) { - rgba[0][j] = sampler->border_color[0]; - rgba[1][j] = sampler->border_color[1]; - rgba[2][j] = sampler->border_color[2]; - rgba[3][j] = sampler->border_color[3]; - } - else { - const unsigned tx = x % TEX_TILE_SIZE; - const unsigned ty = y % TEX_TILE_SIZE; - const struct llvmpipe_cached_tex_tile *tile; - - tile = lp_get_cached_tex_tile(samp->cache, - tex_tile_address(x, y, z, face, level)); - - rgba[0][j] = ubyte_to_float(tile->color[ty][tx][0]); - rgba[1][j] = ubyte_to_float(tile->color[ty][tx][1]); - rgba[2][j] = ubyte_to_float(tile->color[ty][tx][2]); - rgba[3][j] = ubyte_to_float(tile->color[ty][tx][3]); - if (0) - { - debug_printf("Get texel %f %f %f %f from %s\n", - rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j], - pf_name(texture->format)); - } - } -} - - -/** - * Compare texcoord 'p' (aka R) against texture value 'rgba[0]' - * When we sampled the depth texture, the depth value was put into all - * RGBA channels. We look at the red channel here. - * \param rgba quad of (depth) texel values - * \param p texture 'P' components for four pixels in quad - * \param j which pixel in the quad to test [0..3] - */ -static INLINE void -shadow_compare(const struct pipe_sampler_state *sampler, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const float p[QUAD_SIZE], - uint j) -{ - int k; - switch (sampler->compare_func) { - case PIPE_FUNC_LESS: - k = p[j] < rgba[0][j]; - break; - case PIPE_FUNC_LEQUAL: - k = p[j] <= rgba[0][j]; - break; - case PIPE_FUNC_GREATER: - k = p[j] > rgba[0][j]; - break; - case PIPE_FUNC_GEQUAL: - k = p[j] >= rgba[0][j]; - break; - case PIPE_FUNC_EQUAL: - k = p[j] == rgba[0][j]; - break; - case PIPE_FUNC_NOTEQUAL: - k = p[j] != rgba[0][j]; - break; - case PIPE_FUNC_ALWAYS: - k = 1; - break; - case PIPE_FUNC_NEVER: - k = 0; - break; - default: - k = 0; - assert(0); - break; - } - - /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ - rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k; - rgba[3][j] = 1.0F; -} - - -/** - * As above, but do four z/texture comparisons. - */ -static INLINE void -shadow_compare4(const struct pipe_sampler_state *sampler, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const float p[QUAD_SIZE]) -{ - int j, k0, k1, k2, k3; - float val; - - /* compare four texcoords vs. four texture samples */ - switch (sampler->compare_func) { - case PIPE_FUNC_LESS: - k0 = p[0] < rgba[0][0]; - k1 = p[1] < rgba[0][1]; - k2 = p[2] < rgba[0][2]; - k3 = p[3] < rgba[0][3]; - break; - case PIPE_FUNC_LEQUAL: - k0 = p[0] <= rgba[0][0]; - k1 = p[1] <= rgba[0][1]; - k2 = p[2] <= rgba[0][2]; - k3 = p[3] <= rgba[0][3]; - break; - case PIPE_FUNC_GREATER: - k0 = p[0] > rgba[0][0]; - k1 = p[1] > rgba[0][1]; - k2 = p[2] > rgba[0][2]; - k3 = p[3] > rgba[0][3]; - break; - case PIPE_FUNC_GEQUAL: - k0 = p[0] >= rgba[0][0]; - k1 = p[1] >= rgba[0][1]; - k2 = p[2] >= rgba[0][2]; - k3 = p[3] >= rgba[0][3]; - break; - case PIPE_FUNC_EQUAL: - k0 = p[0] == rgba[0][0]; - k1 = p[1] == rgba[0][1]; - k2 = p[2] == rgba[0][2]; - k3 = p[3] == rgba[0][3]; - break; - case PIPE_FUNC_NOTEQUAL: - k0 = p[0] != rgba[0][0]; - k1 = p[1] != rgba[0][1]; - k2 = p[2] != rgba[0][2]; - k3 = p[3] != rgba[0][3]; - break; - case PIPE_FUNC_ALWAYS: - k0 = k1 = k2 = k3 = 1; - break; - case PIPE_FUNC_NEVER: - k0 = k1 = k2 = k3 = 0; - break; - default: - k0 = k1 = k2 = k3 = 0; - assert(0); - break; - } - - /* convert four pass/fail values to an intensity in [0,1] */ - val = 0.25F * (k0 + k1 + k2 + k3); - - /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ - for (j = 0; j < 4; j++) { - rgba[0][j] = rgba[1][j] = rgba[2][j] = val; - rgba[3][j] = 1.0F; - } -} - - - -static void -lp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - unsigned j; - unsigned level = samp->level; - unsigned xpot = 1 << (samp->xpot - level); - unsigned ypot = 1 << (samp->ypot - level); - unsigned xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */ - unsigned ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */ - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - - float u = s[j] * xpot - 0.5F; - float v = t[j] * ypot - 0.5F; - - int uflr = util_ifloor(u); - int vflr = util_ifloor(v); - - float xw = u - (float)uflr; - float yw = v - (float)vflr; - - int x0 = uflr & (xpot - 1); - int y0 = vflr & (ypot - 1); - - const uint8_t *tx[4]; - - - /* Can we fetch all four at once: - */ - if (x0 < xmax && y0 < ymax) - { - get_texel_quad_2d(tgsi_sampler, 0, level, x0, y0, tx); - } - else - { - unsigned x1 = (x0 + 1) & (xpot - 1); - unsigned y1 = (y0 + 1) & (ypot - 1); - get_texel_quad_2d_mt(tgsi_sampler, 0, level, - x0, y0, x1, y1, tx); - } - - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw, yw, - ubyte_to_float(tx[0][c]), ubyte_to_float(tx[1][c]), - ubyte_to_float(tx[2][c]), ubyte_to_float(tx[3][c])); - } - } -} - - -static void -lp_get_samples_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - unsigned j; - unsigned level = samp->level; - unsigned xpot = 1 << (samp->xpot - level); - unsigned ypot = 1 << (samp->ypot - level); - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - - float u = s[j] * xpot; - float v = t[j] * ypot; - - int uflr = util_ifloor(u); - int vflr = util_ifloor(v); - - int x0 = uflr & (xpot - 1); - int y0 = vflr & (ypot - 1); - - const uint8_t *out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0); - - for (c = 0; c < 4; c++) { - rgba[c][j] = ubyte_to_float(out[c]); - } - } -} - - -static void -lp_get_samples_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - unsigned j; - unsigned level = samp->level; - unsigned xpot = 1 << (samp->xpot - level); - unsigned ypot = 1 << (samp->ypot - level); - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - - float u = s[j] * xpot; - float v = t[j] * ypot; - - int x0, y0; - const uint8_t *out; - - x0 = util_ifloor(u); - if (x0 < 0) - x0 = 0; - else if (x0 > xpot - 1) - x0 = xpot - 1; - - y0 = util_ifloor(v); - if (y0 < 0) - y0 = 0; - else if (y0 > ypot - 1) - y0 = ypot - 1; - - out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0); - - for (c = 0; c < 4; c++) { - rgba[c][j] = ubyte_to_float(out[c]); - } - } -} - - -static void -lp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - int level0; - float lambda; - - lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); - level0 = (int)lambda; - - if (lambda < 0.0) { - samp->level = 0; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba ); - } - else if (level0 >= texture->last_level) { - samp->level = texture->last_level; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba ); - } - else { - float levelBlend = lambda - level0; - float rgba0[4][4]; - float rgba1[4][4]; - int c,j; - - samp->level = level0; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba0 ); - - samp->level = level0+1; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba1 ); - - for (j = 0; j < QUAD_SIZE; j++) { - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]); - } - } - } -} - -/** - * Common code for sampling 1D/2D/cube textures. - * Could probably extend for 3D... - */ -static void -lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const unsigned faces[4]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - unsigned level0, level1, j, imgFilter; - int width, height; - float levelBlend = 0.0f; - - choose_mipmap_levels(tgsi_sampler, s, t, p, - lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - assert(sampler->normalized_coords); - - width = u_minify(texture->width0, level0); - height = u_minify(texture->height0, level0); - - assert(width > 0); - - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4]; - nearest_texcoord_4(sampler->wrap_s, s, width, x); - nearest_texcoord_4(sampler->wrap_t, t, height, y); - - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare(sampler, rgba, p, j); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - unsigned c; - x[j] /= 2; - y[j] /= 2; - get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0, - rgba2, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ - shadow_compare(sampler, rgba2, p, j); - } - - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], y0[4], x1[4], y1[4]; - float xw[4], yw[4]; /* weights */ - - linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); - - for (j = 0; j < QUAD_SIZE; j++) { - float tx[4][4]; /* texels */ - int c; - get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare4(sampler, tx, p); - } - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], - tx[c][2], tx[c][3]); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - - /* XXX: This is incorrect -- will often end up with (x0 - * == x1 && y0 == y1), meaning that we fetch the same - * texel four times and linearly interpolate between - * identical values. The correct approach would be to - * call linear_texcoord again for the second level. - */ - x0[j] /= 2; - y0[j] /= 2; - x1[j] /= 2; - y1[j] /= 2; - get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ - shadow_compare4(sampler, tx, p); - } - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba2[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], tx[c][2], tx[c][3]); - } - - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; - default: - assert(0); - } -} - - -static INLINE void -lp_get_samples_1d(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - static const unsigned faces[4] = {0, 0, 0, 0}; - static const float tzero[4] = {0, 0, 0, 0}; - lp_get_samples_2d_common(sampler, s, tzero, NULL, - lodbias, rgba, faces); -} - - -static INLINE void -lp_get_samples_2d(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - static const unsigned faces[4] = {0, 0, 0, 0}; - lp_get_samples_2d_common(sampler, s, t, p, - lodbias, rgba, faces); -} - - -static INLINE void -lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - /* get/map pipe_surfaces corresponding to 3D tex slices */ - unsigned level0, level1, j, imgFilter; - int width, height, depth; - float levelBlend = 0.0f; - const uint face = 0; - - choose_mipmap_levels(tgsi_sampler, s, t, p, - lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - assert(sampler->normalized_coords); - - width = u_minify(texture->width0, level0); - height = u_minify(texture->height0, level0); - depth = u_minify(texture->depth0, level0); - - assert(width > 0); - assert(height > 0); - assert(depth > 0); - - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4], z[4]; - nearest_texcoord_4(sampler->wrap_s, s, width, x); - nearest_texcoord_4(sampler->wrap_t, t, height, y); - nearest_texcoord_4(sampler->wrap_r, p, depth, z); - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j); - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - unsigned c; - x[j] /= 2; - y[j] /= 2; - z[j] /= 2; - get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j); - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]); - } - } - } - } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4]; - float xw[4], yw[4], zw[4]; /* interpolation weights */ - linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); - linear_texcoord_4(sampler->wrap_r, p, depth, z0, z1, zw); - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - float tx0[4][4], tx1[4][4]; - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3); - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3); - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j], - tx0[c][0], tx0[c][1], - tx0[c][2], tx0[c][3], - tx1[c][0], tx1[c][1], - tx1[c][2], tx1[c][3]); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - x0[j] /= 2; - y0[j] /= 2; - z0[j] /= 2; - x1[j] /= 2; - y1[j] /= 2; - z1[j] /= 2; - get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0); - get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1); - get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2); - get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3); - get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0); - get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1); - get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2); - get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3); - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j], - tx0[c][0], tx0[c][1], - tx0[c][2], tx0[c][3], - tx1[c][0], tx1[c][1], - tx1[c][2], tx1[c][3]); - } - - /* blend mipmap levels */ - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; - default: - assert(0); - } -} - - -static void -lp_get_samples_cube(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - unsigned faces[QUAD_SIZE], j; - float ssss[4], tttt[4]; - for (j = 0; j < QUAD_SIZE; j++) { - faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j); - } - lp_get_samples_2d_common(sampler, ssss, tttt, NULL, - lodbias, rgba, faces); -} - - -static void -lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - const uint face = 0; - unsigned level0, level1, j, imgFilter; - int width, height; - float levelBlend; - - choose_mipmap_levels(tgsi_sampler, s, t, p, - lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - /* texture RECTS cannot be mipmapped */ - assert(level0 == level1); - - width = u_minify(texture->width0, level0); - height = u_minify(texture->height0, level0); - - assert(width > 0); - - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4]; - nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x); - nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y); - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare(sampler, rgba, p, j); - } - } - } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], y0[4], x1[4], y1[4]; - float xw[4], yw[4]; /* weights */ - linear_texcoord_unnorm_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw); - for (j = 0; j < QUAD_SIZE; j++) { - float tx[4][4]; /* texels */ - int c; - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare4(sampler, tx, p); - } - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], tx[c][2], tx[c][3]); - } - } - } - break; - default: - assert(0); - } -} - - -/** - * Error condition handler - */ -static INLINE void -lp_get_samples_null(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - int i,j; - - for (i = 0; i < 4; i++) - for (j = 0; j < 4; j++) - rgba[i][j] = 1.0; -} - -/** - * Called via tgsi_sampler::get_samples() when using a sampler for the - * first time. Determine the actual sampler function, link it in and - * call it. - */ -void -lp_get_samples(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - - /* Default to the 'undefined' case: - */ - tgsi_sampler->get_samples = lp_get_samples_null; - - if (!texture) { - assert(0); /* is this legal?? */ - goto out; - } - - if (!sampler->normalized_coords) { - assert (texture->target == PIPE_TEXTURE_2D); - tgsi_sampler->get_samples = lp_get_samples_rect; - goto out; - } - - switch (texture->target) { - case PIPE_TEXTURE_1D: - tgsi_sampler->get_samples = lp_get_samples_1d; - break; - case PIPE_TEXTURE_2D: - tgsi_sampler->get_samples = lp_get_samples_2d; - break; - case PIPE_TEXTURE_3D: - tgsi_sampler->get_samples = lp_get_samples_3d; - break; - case PIPE_TEXTURE_CUBE: - tgsi_sampler->get_samples = lp_get_samples_cube; - break; - default: - assert(0); - break; - } - - /* Do this elsewhere: - */ - samp->xpot = util_unsigned_logbase2( samp->texture->width0 ); - samp->ypot = util_unsigned_logbase2( samp->texture->height0 ); - - /* Try to hook in a faster sampler. Ultimately we'll have to - * code-generate these. Luckily most of this looks like it is - * orthogonal state within the sampler. - */ - if (texture->target == PIPE_TEXTURE_2D && - sampler->min_img_filter == sampler->mag_img_filter && - sampler->wrap_s == sampler->wrap_t && - sampler->compare_mode == FALSE && - sampler->normalized_coords) - { - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { - samp->level = CLAMP((int) sampler->min_lod, - 0, (int) texture->last_level); - - if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) { - switch (sampler->min_img_filter) { - case PIPE_TEX_FILTER_NEAREST: - tgsi_sampler->get_samples = lp_get_samples_2d_nearest_repeat_POT; - break; - case PIPE_TEX_FILTER_LINEAR: - tgsi_sampler->get_samples = lp_get_samples_2d_linear_repeat_POT; - break; - default: - break; - } - } - else if (sampler->wrap_s == PIPE_TEX_WRAP_CLAMP) { - switch (sampler->min_img_filter) { - case PIPE_TEX_FILTER_NEAREST: - tgsi_sampler->get_samples = lp_get_samples_2d_nearest_clamp_POT; - break; - default: - break; - } - } - } - else if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) { - switch (sampler->min_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - tgsi_sampler->get_samples = lp_get_samples_2d_linear_mip_linear_repeat_POT; - break; - default: - break; - } - } - } - } - else if (0) { - _debug_printf("target %d/%d min_mip %d/%d min_img %d/%d wrap %d/%d compare %d/%d norm %d/%d\n", - texture->target, PIPE_TEXTURE_2D, - sampler->min_mip_filter, PIPE_TEX_MIPFILTER_NONE, - sampler->min_img_filter, sampler->mag_img_filter, - sampler->wrap_s, sampler->wrap_t, - sampler->compare_mode, FALSE, - sampler->normalized_coords, TRUE); - } - -out: - tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba ); -} - - -void PIPE_CDECL -lp_fetch_texel_soa( struct tgsi_sampler **samplers, - uint32_t unit, - float *store ) -{ - struct tgsi_sampler *sampler = samplers[unit]; - -#if 0 - uint j; - - debug_printf("%s sampler: %p (%p) store: %p\n", - __FUNCTION__, - sampler, *sampler, - store ); - - debug_printf("lodbias %f\n", store[12]); - - for (j = 0; j < 4; j++) - debug_printf("sample %d texcoord %f %f\n", - j, - store[0+j], - store[4+j]); -#endif - - { - float rgba[NUM_CHANNELS][QUAD_SIZE]; - sampler->get_samples(sampler, - &store[0], - &store[4], - &store[8], - 0.0f, /*store[12], lodbias */ - rgba); - memcpy(store, rgba, sizeof rgba); - } - -#if 0 - for (j = 0; j < 4; j++) - debug_printf("sample %d result %f %f %f %f\n", - j, - store[0+j], - store[4+j], - store[8+j], - store[12+j]); -#endif -} - - -#include "lp_bld_type.h" -#include "lp_bld_intr.h" -#include "lp_bld_tgsi.h" - - -struct lp_c_sampler_soa -{ - struct lp_build_sampler_soa base; - - LLVMValueRef context_ptr; - - LLVMValueRef samplers_ptr; - - /** Coords/texels store */ - LLVMValueRef store_ptr; -}; - - -static void -lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) -{ - FREE(sampler); -} - - -static void -lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler, - LLVMBuilderRef builder, - struct lp_type type, - unsigned unit, - unsigned num_coords, - const LLVMValueRef *coords, - LLVMValueRef lodbias, - LLVMValueRef *texel) -{ - struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler; - LLVMTypeRef vec_type = LLVMTypeOf(coords[0]); - LLVMValueRef args[3]; - unsigned i; - - if(!sampler->samplers_ptr) - sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr); - - if(!sampler->store_ptr) - sampler->store_ptr = LLVMBuildArrayAlloca(builder, - vec_type, - LLVMConstInt(LLVMInt32Type(), 4, 0), - "texel_store"); - - for (i = 0; i < num_coords; i++) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); - LLVMBuildStore(builder, coords[i], coord_ptr); - } - - args[0] = sampler->samplers_ptr; - args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0); - args[2] = sampler->store_ptr; - - lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3); - - for (i = 0; i < NUM_CHANNELS; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); - texel[i] = LLVMBuildLoad(builder, texel_ptr, ""); - } -} - - -struct lp_build_sampler_soa * -lp_c_sampler_soa_create(LLVMValueRef context_ptr) -{ - struct lp_c_sampler_soa *sampler; - - sampler = CALLOC_STRUCT(lp_c_sampler_soa); - if(!sampler) - return NULL; - - sampler->base.destroy = lp_c_sampler_soa_destroy; - sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel; - sampler->context_ptr = context_ptr; - - return &sampler->base; -} - diff --git a/src/gallium/drivers/nouveau/nouveau_push.h b/src/gallium/drivers/nouveau/nouveau_push.h deleted file mode 100644 index 9c235080a5..0000000000 --- a/src/gallium/drivers/nouveau/nouveau_push.h +++ /dev/null @@ -1,93 +0,0 @@ -#ifndef __NOUVEAU_PUSH_H__ -#define __NOUVEAU_PUSH_H__ - -#include "nouveau/nouveau_winsys.h" - -#ifndef NOUVEAU_PUSH_CONTEXT -#error undefined push context -#endif - -#define OUT_RING(data) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - (*pc->base.channel->pushbuf->cur++) = (data); \ -} while(0) - -#define OUT_RINGp(src,size) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - memcpy(pc->base.channel->pushbuf->cur, (src), (size) * 4); \ - pc->base.channel->pushbuf->cur += (size); \ -} while(0) - -#define OUT_RINGf(data) do { \ - union { float v; uint32_t u; } c; \ - c.v = (data); \ - OUT_RING(c.u); \ -} while(0) - -#define BEGIN_RING(obj,mthd,size) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - struct nouveau_channel *chan = pc->base.channel; \ - if (chan->pushbuf->remaining < ((size) + 1)) \ - nouveau_pushbuf_flush(chan, ((size) + 1)); \ - OUT_RING((pc->obj->subc << 13) | ((size) << 18) | (mthd)); \ - chan->pushbuf->remaining -= ((size) + 1); \ -} while(0) - -#define BEGIN_RING_NI(obj,mthd,size) do { \ - BEGIN_RING(obj, (mthd) | 0x40000000, (size)); \ -} while(0) - -static inline void -DO_FIRE_RING(struct nouveau_channel *chan, struct pipe_fence_handle **fence) -{ - nouveau_pushbuf_flush(chan, 0); - if (fence) - *fence = NULL; -} - -#define FIRE_RING(fence) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - DO_FIRE_RING(pc->base.channel, fence); \ -} while(0) - -#define OUT_RELOC(bo,data,flags,vor,tor) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - struct nouveau_channel *chan = pc->base.channel; \ - nouveau_pushbuf_emit_reloc(chan, chan->pushbuf->cur++, nouveau_bo(bo), \ - (data), 0, (flags), (vor), (tor)); \ -} while(0) - -/* Raw data + flags depending on FB/TT buffer */ -#define OUT_RELOCd(bo,data,flags,vor,tor) do { \ - OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor)); \ -} while(0) - -/* FB/TT object handle */ -#define OUT_RELOCo(bo,flags) do { \ - OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR, \ - pc->base.channel->vram->handle, \ - pc->base.channel->gart->handle); \ -} while(0) - -/* Low 32-bits of offset */ -#define OUT_RELOCl(bo,delta,flags) do { \ - OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0); \ -} while(0) - -/* High 32-bits of offset */ -#define OUT_RELOCh(bo,delta,flags) do { \ - OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \ -} while(0) - -/* A reloc which'll recombine into a NV_DMA_METHOD packet header */ -#define OUT_RELOCm(bo, flags, obj, mthd, size) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - struct nouveau_channel *chan = pc->base.channel; \ - if (chan->pushbuf->remaining < ((size) + 1)) \ - nouveau_pushbuf_flush(chan, ((size) + 1)); \ - OUT_RELOCd((bo), (pc->obj->subc << 13) | ((size) << 18) | (mthd), \ - (flags), 0, 0); \ - chan->pushbuf->remaining -= ((size) + 1); \ -} while(0) - -#endif diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index 0437af3725..7ebc94ed6c 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -127,8 +127,18 @@ nouveau_screen_bo_map(struct pipe_screen *pscreen, struct pipe_buffer *pb, unsigned usage) { struct nouveau_bo *bo = nouveau_bo(pb); + struct nouveau_screen *nscreen = nouveau_screen(pscreen); int ret; + if (nscreen->pre_pipebuffer_map_callback) { + ret = nscreen->pre_pipebuffer_map_callback(pscreen, pb, usage); + if (ret) { + debug_printf("pre_pipebuffer_map_callback failed %d\n", + ret); + return NULL; + } + } + ret = nouveau_bo_map(bo, nouveau_screen_map_flags(usage)); if (ret) { debug_printf("map failed: %d\n", ret); @@ -143,11 +153,22 @@ nouveau_screen_bo_map_range(struct pipe_screen *pscreen, struct pipe_buffer *pb, unsigned offset, unsigned length, unsigned usage) { struct nouveau_bo *bo = nouveau_bo(pb); + struct nouveau_screen *nscreen = nouveau_screen(pscreen); uint32_t flags = nouveau_screen_map_flags(usage); int ret; + if (nscreen->pre_pipebuffer_map_callback) { + ret = nscreen->pre_pipebuffer_map_callback(pscreen, pb, usage); + if (ret) { + debug_printf("pre_pipebuffer_map_callback failed %d\n", + ret); + return NULL; + } + } + ret = nouveau_bo_map_range(bo, offset, length, flags); if (ret) { + nouveau_bo_unmap(bo); if (!(flags & NOUVEAU_BO_NOWAIT) || ret != -EBUSY) debug_printf("map_range failed: %d\n", ret); return NULL; diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index ebfc67ad1c..a7927d88df 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -5,6 +5,9 @@ struct nouveau_screen { struct pipe_screen base; struct nouveau_device *device; struct nouveau_channel *channel; + + int (*pre_pipebuffer_map_callback) (struct pipe_screen *pscreen, + struct pipe_buffer *pb, unsigned usage); }; static inline struct nouveau_screen * diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h index 9aee9e4956..e844f6abb3 100644 --- a/src/gallium/drivers/nouveau/nouveau_stateobj.h +++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h @@ -3,41 +3,95 @@ #include "util/u_debug.h" +#ifdef DEBUG +#define DEBUG_NOUVEAU_STATEOBJ +#endif /* DEBUG */ + struct nouveau_stateobj_reloc { struct nouveau_bo *bo; - unsigned offset; - unsigned packet; + struct nouveau_grobj *gr; + uint32_t push_offset; + uint32_t mthd; - unsigned data; + uint32_t data; unsigned flags; unsigned vor; unsigned tor; }; +struct nouveau_stateobj_start { + struct nouveau_grobj *gr; + uint32_t mthd; + uint32_t size; + unsigned offset; +}; + struct nouveau_stateobj { struct pipe_reference reference; - unsigned *push; + struct nouveau_stateobj_start *start; struct nouveau_stateobj_reloc *reloc; - unsigned *cur; - unsigned cur_packet; + /* Common memory pool for data. */ + uint32_t *pool; + unsigned pool_cur; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + unsigned start_alloc; + unsigned reloc_alloc; + unsigned pool_alloc; +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + unsigned total; /* includes begin_ring */ + unsigned cur; /* excludes begin_ring, offset from "cur_start" */ + unsigned cur_start; unsigned cur_reloc; }; +static INLINE void +so_dump(struct nouveau_stateobj *so) +{ + unsigned i, nr, total = 0; + + for (i = 0; i < so->cur_start; i++) { + if (so->start[i].gr->subc > -1) + debug_printf("+0x%04x: 0x%08x\n", total++, + (so->start[i].size << 18) | (so->start[i].gr->subc << 13) + | so->start[i].mthd); + else + debug_printf("+0x%04x: 0x%08x\n", total++, + (so->start[i].size << 18) | so->start[i].mthd); + for (nr = 0; nr < so->start[i].size; nr++, total++) + debug_printf("+0x%04x: 0x%08x\n", total, + so->pool[so->start[i].offset + nr]); + } +} + static INLINE struct nouveau_stateobj * -so_new(unsigned push, unsigned reloc) +so_new(unsigned start, unsigned push, unsigned reloc) { struct nouveau_stateobj *so; so = MALLOC(sizeof(struct nouveau_stateobj)); pipe_reference_init(&so->reference, 1); - so->push = MALLOC(sizeof(unsigned) * push); - so->reloc = MALLOC(sizeof(struct nouveau_stateobj_reloc) * reloc); + so->total = so->cur = so->cur_start = so->cur_reloc = 0; - so->cur = so->push; - so->cur_reloc = so->cur_packet = 0; +#ifdef DEBUG_NOUVEAU_STATEOBJ + so->start_alloc = start; + so->reloc_alloc = reloc; + so->pool_alloc = push; +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + so->start = MALLOC(start * sizeof(struct nouveau_stateobj_start)); + so->reloc = MALLOC(reloc * sizeof(struct nouveau_stateobj_reloc)); + so->pool = MALLOC(push * sizeof(uint32_t)); + so->pool_cur = 0; + + if (!so->start || !so->reloc || !so->pool) { + debug_printf("malloc failed\n"); + assert(0); + } return so; } @@ -48,63 +102,128 @@ so_ref(struct nouveau_stateobj *ref, struct nouveau_stateobj **pso) struct nouveau_stateobj *so = *pso; int i; - if (pipe_reference(&(*pso)->reference, &ref->reference)) { - free(so->push); + if (pipe_reference(&(*pso)->reference, &ref->reference)) { + FREE(so->start); for (i = 0; i < so->cur_reloc; i++) nouveau_bo_ref(NULL, &so->reloc[i].bo); - free(so->reloc); - free(so); + FREE(so->reloc); + FREE(so->pool); + FREE(so); } *pso = ref; } static INLINE void -so_data(struct nouveau_stateobj *so, unsigned data) +so_data(struct nouveau_stateobj *so, uint32_t data) { - (*so->cur++) = (data); - so->cur_packet += 4; +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->cur >= so->start[so->cur_start - 1].size) { + debug_printf("exceeding specified size\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + so->pool[so->start[so->cur_start - 1].offset + so->cur++] = data; } static INLINE void -so_datap(struct nouveau_stateobj *so, unsigned *data, unsigned size) +so_datap(struct nouveau_stateobj *so, uint32_t *data, unsigned size) { - so->cur_packet += (4 * size); +#ifdef DEBUG_NOUVEAU_STATEOBJ + if ((so->cur + size) > so->start[so->cur_start - 1].size) { + debug_printf("exceeding specified size\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + while (size--) - (*so->cur++) = (*data++); + so->pool[so->start[so->cur_start - 1].offset + so->cur++] = + *data++; } static INLINE void so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr, unsigned mthd, unsigned size) { - so->cur_packet = (gr->subc << 13) | (1 << 18) | (mthd - 4); - so_data(so, (gr->subc << 13) | (size << 18) | mthd); + struct nouveau_stateobj_start *start; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->start_alloc <= so->cur_start) { + debug_printf("exceeding num_start size\n"); + assert(0); + } else +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + start = so->start; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->cur_start > 0 && start[so->cur_start - 1].size > so->cur) { + debug_printf("previous so_method was not filled\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + so->start = start; + start[so->cur_start].gr = gr; + start[so->cur_start].mthd = mthd; + start[so->cur_start].size = size; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->pool_alloc < (size + so->pool_cur)) { + debug_printf("exceeding num_pool size\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + start[so->cur_start].offset = so->pool_cur; + so->pool_cur += size; + + so->cur_start++; + /* The 1 is for *this* begin_ring. */ + so->total += so->cur + 1; + so->cur = 0; } static INLINE void so_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo, unsigned data, unsigned flags, unsigned vor, unsigned tor) { - struct nouveau_stateobj_reloc *r = &so->reloc[so->cur_reloc++]; - - r->bo = NULL; - nouveau_bo_ref(bo, &r->bo); - r->offset = so->cur - so->push; - r->packet = so->cur_packet; - r->data = data; - r->flags = flags; - r->vor = vor; - r->tor = tor; + struct nouveau_stateobj_reloc *r; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->reloc_alloc <= so->cur_reloc) { + debug_printf("exceeding num_reloc size\n"); + assert(0); + } else +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + r = so->reloc; + + so->reloc = r; + r[so->cur_reloc].bo = NULL; + nouveau_bo_ref(bo, &(r[so->cur_reloc].bo)); + r[so->cur_reloc].gr = so->start[so->cur_start-1].gr; + r[so->cur_reloc].push_offset = so->total + so->cur; + r[so->cur_reloc].data = data; + r[so->cur_reloc].flags = flags; + r[so->cur_reloc].mthd = so->start[so->cur_start-1].mthd + + (so->cur << 2); + r[so->cur_reloc].vor = vor; + r[so->cur_reloc].tor = tor; + so_data(so, data); + so->cur_reloc++; } -static INLINE void -so_dump(struct nouveau_stateobj *so) +/* Determine if this buffer object is referenced by this state object. */ +static INLINE boolean +so_bo_is_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo) { - unsigned i, nr = so->cur - so->push; + int i; + + for (i = 0; i < so->cur_reloc; i++) + if (so->reloc[i].bo == bo) + return true; - for (i = 0; i < nr; i++) - debug_printf("+0x%04x: 0x%08x\n", i, so->push[i]); + return false; } static INLINE void @@ -114,75 +233,93 @@ so_emit(struct nouveau_channel *chan, struct nouveau_stateobj *so) unsigned nr, i; int ret = 0; - nr = so->cur - so->push; +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->start[so->cur_start - 1].size > so->cur) { + debug_printf("emit: previous so_method was not filled\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + /* We cannot update total in case we so_emit again. */ + nr = so->total + so->cur; + /* This will flush if we need space. * We don't actually need the marker. */ if ((ret = nouveau_pushbuf_marker_emit(chan, nr, so->cur_reloc))) { debug_printf("so_emit failed marker emit with error %d\n", ret); - return; + assert(0); + } + + /* Submit data. This will ensure proper binding of objects. */ + for (i = 0; i < so->cur_start; i++) { + BEGIN_RING(chan, so->start[i].gr, so->start[i].mthd, so->start[i].size); + OUT_RINGp(chan, &(so->pool[so->start[i].offset]), so->start[i].size); } - pb->remaining -= nr; - memcpy(pb->cur, so->push, nr * 4); for (i = 0; i < so->cur_reloc; i++) { struct nouveau_stateobj_reloc *r = &so->reloc[i]; - if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur + r->offset, - r->bo, r->data, 0, r->flags, - r->vor, r->tor))) { + if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur - nr + + r->push_offset, r->bo, r->data, + 0, r->flags, r->vor, r->tor))) { debug_printf("so_emit failed reloc with error %d\n", ret); - goto out; + assert(0); } } -out: - pb->cur += nr; } static INLINE void so_emit_reloc_markers(struct nouveau_channel *chan, struct nouveau_stateobj *so) { struct nouveau_pushbuf *pb = chan->pushbuf; + struct nouveau_grobj *gr = NULL; unsigned i; int ret = 0; if (!so) return; - i = so->cur_reloc << 1; - /* This will flush if we need space. - * We don't actually need the marker. - */ - if ((ret = nouveau_pushbuf_marker_emit(chan, i, i))) { - debug_printf("so_emit_reloc_markers failed marker emit with" \ - "error %d\n", ret); - return; - } - pb->remaining -= i; - + /* If we need to flush in flush notify, then we have a problem anyway. */ for (i = 0; i < so->cur_reloc; i++) { struct nouveau_stateobj_reloc *r = &so->reloc[i]; - if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo, - r->packet, 0, - (r->flags & (NOUVEAU_BO_VRAM | - NOUVEAU_BO_GART | - NOUVEAU_BO_RDWR)) | - NOUVEAU_BO_DUMMY, 0, 0))) { - debug_printf("so_emit_reloc_markers failed reloc" \ - "with error %d\n", ret); - pb->remaining += ((so->cur_reloc - i) << 1); - return; +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (r->mthd & 0x40000000) { + debug_printf("error: NI mthd 0x%08X\n", r->mthd); + continue; } - if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo, - r->data, 0, - r->flags | NOUVEAU_BO_DUMMY, - r->vor, r->tor))) { - debug_printf("so_emit_reloc_markers failed reloc" \ - "with error %d\n", ret); - pb->remaining += ((so->cur_reloc - i) << 1) - 1; - return; +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + /* The object needs to be bound and the system must know the + * subchannel is being used. Otherwise it will discard it. + */ + if (gr != r->gr) { + BEGIN_RING(chan, r->gr, 0x100, 1); + OUT_RING(chan, 0); + gr = r->gr; + } + + /* Some relocs really don't like to be hammered, + * NOUVEAU_BO_DUMMY makes sure it only + * happens when needed. + */ + ret = OUT_RELOC(chan, r->bo, (r->gr->subc << 13) | (1<< 18) | + r->mthd, (r->flags & (NOUVEAU_BO_VRAM | NOUVEAU_BO_GART + | NOUVEAU_BO_RDWR)) | NOUVEAU_BO_DUMMY, 0, 0); + if (ret) { + debug_printf("OUT_RELOC failed %d\n", ret); + assert(0); } + + ret = OUT_RELOC(chan, r->bo, r->data, r->flags | + NOUVEAU_BO_DUMMY, r->vor, r->tor); + if (ret) { + debug_printf("OUT_RELOC failed %d\n", ret); + assert(0); + } + + pb->remaining -= 2; } } diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c index 770733a4a1..edd96859cf 100644 --- a/src/gallium/drivers/nv04/nv04_context.c +++ b/src/gallium/drivers/nv04/nv04_context.c @@ -10,10 +10,14 @@ nv04_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv04_context *nv04 = nv04_context(pipe); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; draw_flush(nv04->draw); - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void @@ -30,32 +34,36 @@ nv04_destroy(struct pipe_context *pipe) static boolean nv04_init_hwctx(struct nv04_context *nv04) { + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + // requires a valid handle -// BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_NOTIFY, 1); +// BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_NOTIFY, 1); // OUT_RING(0); - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_NOP, 1); - OUT_RING(0); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_NOP, 1); + OUT_RING(chan, 0); - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); - OUT_RING(0x40182800); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING(chan, 0x40182800); // OUT_RING(1<<20/*no cull*/); - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1); // OUT_RING(0x24|(1<<6)|(1<<8)); - OUT_RING(0x120001a4); - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_FORMAT, 1); - OUT_RING(0x332213a1); - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_FILTER, 1); - OUT_RING(0x11001010); - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_COLORKEY, 1); - OUT_RING(0x0); -// BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 1); + OUT_RING(chan, 0x120001a4); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FORMAT, 1); + OUT_RING(chan, 0x332213a1); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FILTER, 1); + OUT_RING(chan, 0x11001010); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_COLORKEY, 1); + OUT_RING(chan, 0x0); +// BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 1); // OUT_RING(SCREEN_OFFSET); - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_FOGCOLOR, 1); - OUT_RING(0xff000000); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FOGCOLOR, 1); + OUT_RING(chan, 0xff000000); - FIRE_RING (NULL); + FIRE_RING (chan); return TRUE; } diff --git a/src/gallium/drivers/nv04/nv04_context.h b/src/gallium/drivers/nv04/nv04_context.h index 55326c787a..fe3b527423 100644 --- a/src/gallium/drivers/nv04/nv04_context.h +++ b/src/gallium/drivers/nv04/nv04_context.h @@ -15,10 +15,6 @@ #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv04_screen *ctx = nv04->screen -#include "nouveau/nouveau_push.h" - #include "nv04_state.h" #define NOUVEAU_ERR(fmt, args...) \ @@ -141,9 +137,9 @@ extern void nv04_emit_hw_state(struct nv04_context *nv04); extern void nv04_state_tex_update(struct nv04_context *nv04); /* nv04_vbo.c */ -extern boolean nv04_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv04_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv04_draw_elements( struct pipe_context *pipe, +extern void nv04_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count); diff --git a/src/gallium/drivers/nv04/nv04_prim_vbuf.c b/src/gallium/drivers/nv04/nv04_prim_vbuf.c index 25395edfd7..0b795ea243 100644 --- a/src/gallium/drivers/nv04/nv04_prim_vbuf.c +++ b/src/gallium/drivers/nv04/nv04_prim_vbuf.c @@ -93,33 +93,45 @@ nv04_vbuf_render_set_primitive( struct vbuf_render *render, static INLINE void nv04_2triangles(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5) { - BEGIN_RING(fahrenheit,NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA),49); - OUT_RINGp(buffer + VERTEX_SIZE * v0,8); - OUT_RINGp(buffer + VERTEX_SIZE * v1,8); - OUT_RINGp(buffer + VERTEX_SIZE * v2,8); - OUT_RINGp(buffer + VERTEX_SIZE * v3,8); - OUT_RINGp(buffer + VERTEX_SIZE * v4,8); - OUT_RINGp(buffer + VERTEX_SIZE * v5,8); - OUT_RING(0xFEDCBA); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA), 49); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v3,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v4,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v5,8); + OUT_RING(chan, 0xFEDCBA); } static INLINE void nv04_1triangle(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2) { - BEGIN_RING(fahrenheit,NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD),25); - OUT_RINGp(buffer + VERTEX_SIZE * v0,8); - OUT_RINGp(buffer + VERTEX_SIZE * v1,8); - OUT_RINGp(buffer + VERTEX_SIZE * v2,8); - OUT_RING(0xFED); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD), 25); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8); + OUT_RING(chan, 0xFED); } static INLINE void nv04_1quad(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3) { - BEGIN_RING(fahrenheit,NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC),33); - OUT_RINGp(buffer + VERTEX_SIZE * v0,8); - OUT_RINGp(buffer + VERTEX_SIZE * v1,8); - OUT_RINGp(buffer + VERTEX_SIZE * v2,8); - OUT_RINGp(buffer + VERTEX_SIZE * v3,8); - OUT_RING(0xFECEDC); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC), 33); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v3,8); + OUT_RING(chan, 0xFECEDC); } static void nv04_vbuf_render_triangles_elts(struct nv04_vbuf_render * render, const ushort * indices, uint nr_indices) @@ -156,7 +168,10 @@ static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, con { const uint32_t striptbl[]={0x321210,0x543432,0x765654,0x987876,0xBA9A98,0xDCBCBA,0xFEDEDC}; unsigned char* buffer = render->buffer; - struct nv04_context* nv04 = render->nv04; + struct nv04_context *nv04 = render->nv04; + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; int i,j; for(i = 0; i<nr_indices; i+=14) @@ -166,15 +181,15 @@ static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, con if (numvert<3) break; - BEGIN_RING( fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8 ); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8); for(j = 0; j<numvert; j++) - OUT_RINGp( buffer + VERTEX_SIZE * indices [i+j], 8 ); + OUT_RINGp(chan, buffer + VERTEX_SIZE * indices [i+j], 8 ); - BEGIN_RING_NI( fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2 ); + BEGIN_RING_NI(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2 ); for(j = 0; j<numtri/2; j++ ) - OUT_RING(striptbl[j]); + OUT_RING(chan, striptbl[j]); if (numtri%2) - OUT_RING(striptbl[numtri/2]&0xFFF); + OUT_RING(chan, striptbl[numtri/2]&0xFFF); } } @@ -182,11 +197,14 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const { const uint32_t fantbl[]={0x320210,0x540430,0x760650,0x980870,0xBA0A90,0xDC0CB0,0xFE0ED0}; unsigned char* buffer = render->buffer; - struct nv04_context* nv04 = render->nv04; + struct nv04_context *nv04 = render->nv04; + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; int i,j; - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8); - OUT_RINGp(buffer + VERTEX_SIZE * indices[0], 8); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * indices[0], 8); for(i = 1; i<nr_indices; i+=14) { @@ -195,16 +213,16 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const if (numvert < 3) break; - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8); for(j=0;j<numvert;j++) - OUT_RINGp( buffer + VERTEX_SIZE * indices[ i+j ], 8 ); + OUT_RINGp(chan, buffer + VERTEX_SIZE * indices[ i+j ], 8 ); - BEGIN_RING_NI(fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2); + BEGIN_RING_NI(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2); for(j = 0; j<numtri/2; j++) - OUT_RING(fantbl[j]); + OUT_RING(chan, fantbl[j]); if (numtri%2) - OUT_RING(fantbl[numtri/2]&0xFFF); + OUT_RING(chan, fantbl[numtri/2]&0xFFF); } } diff --git a/src/gallium/drivers/nv04/nv04_state_emit.c b/src/gallium/drivers/nv04/nv04_state_emit.c index bd98ae091f..b8d6dc560f 100644 --- a/src/gallium/drivers/nv04/nv04_state_emit.c +++ b/src/gallium/drivers/nv04/nv04_state_emit.c @@ -57,13 +57,19 @@ static uint32_t nv04_blend_func(uint32_t f) static void nv04_emit_control(struct nv04_context* nv04) { uint32_t control = nv04->dsa->control; + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); - OUT_RING(control); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING(chan, control); } static void nv04_emit_blend(struct nv04_context* nv04) { + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; uint32_t blend; blend=0x4; // texture MODULATE_ALPHA @@ -75,19 +81,23 @@ static void nv04_emit_blend(struct nv04_context* nv04) blend|=(nv04_blend_func(nv04->blend->b_src)<<24); blend|=(nv04_blend_func(nv04->blend->b_dst)<<28); - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1); - OUT_RING(blend); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1); + OUT_RING(chan, blend); } static void nv04_emit_sampler(struct nv04_context *nv04, int unit) { struct nv04_miptree *nv04mt = nv04->tex_miptree[unit]; struct pipe_texture *pt = &nv04mt->base; - - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 3); - OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); - OUT_RING(nv04->sampler[unit]->filter); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + struct nouveau_bo *bo = nouveau_bo(nv04mt->buffer); + + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 3); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(chan, bo, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RING(chan, nv04->sampler[unit]->filter); } static void nv04_state_emit_framebuffer(struct nv04_context* nv04) @@ -97,6 +107,10 @@ static void nv04_state_emit_framebuffer(struct nv04_context* nv04) uint32_t rt_format, w, h; int colour_format = 0, zeta_format = 0; struct nv04_miptree *nv04mt = 0; + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *context_surfaces_3d = screen->context_surfaces_3d; + struct nouveau_bo *bo; w = fb->cbufs[0]->width; h = fb->cbufs[0]->height; @@ -128,24 +142,29 @@ static void nv04_state_emit_framebuffer(struct nv04_context* nv04) assert(0); } - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1); - OUT_RING(rt_format); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1); + OUT_RING(chan, rt_format); nv04mt = (struct nv04_miptree *)rt->base.texture; + bo = nouveau_bo(nv04mt->buffer); /* FIXME pitches have to be aligned ! */ - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); - OUT_RING(rt->pitch|(zeta->pitch<<16)); - OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); + OUT_RING(chan, rt->pitch|(zeta->pitch<<16)); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); if (fb->zsbuf) { nv04mt = (struct nv04_miptree *)zeta->base.texture; - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); - OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); } } void nv04_emit_hw_state(struct nv04_context *nv04) { + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + struct nouveau_grobj *context_surfaces_3d = screen->context_surfaces_3d; int i; if (nv04->dirty & NV04_NEW_VERTPROG) { @@ -163,8 +182,8 @@ nv04_emit_hw_state(struct nv04_context *nv04) if (nv04->dirty & NV04_NEW_CONTROL) { nv04->dirty &= ~NV04_NEW_CONTROL; - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); - OUT_RING(nv04->dsa->control); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING(chan, nv04->dsa->control); } if (nv04->dirty & NV04_NEW_BLEND) { @@ -205,12 +224,12 @@ nv04_emit_hw_state(struct nv04_context *nv04) unsigned rt_pitch = ((struct nv04_surface *)nv04->rt)->pitch; unsigned zeta_pitch = ((struct nv04_surface *)nv04->zeta)->pitch; - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); - OUT_RING(rt_pitch|(zeta_pitch<<16)); - OUT_RELOCl(nv04->rt, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); + OUT_RING(chan, rt_pitch|(zeta_pitch<<16)); + OUT_RELOCl(chan, nouveau_bo(nv04->rt), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); if (nv04->zeta) { - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); - OUT_RELOCl(nv04->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); + OUT_RELOCl(chan, nouveau_bo(nv04->zeta), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); } /* Texture images */ @@ -218,9 +237,10 @@ nv04_emit_hw_state(struct nv04_context *nv04) if (!(nv04->fp_samplers & (1 << i))) continue; struct nv04_miptree *nv04mt = nv04->tex_miptree[i]; - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 2); - OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + struct nouveau_bo *bo = nouveau_bo(nv04mt->buffer); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 2); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(chan, bo, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); } } diff --git a/src/gallium/drivers/nv04/nv04_vbo.c b/src/gallium/drivers/nv04/nv04_vbo.c index 099ab10043..3484771814 100644 --- a/src/gallium/drivers/nv04/nv04_vbo.c +++ b/src/gallium/drivers/nv04/nv04_vbo.c @@ -9,7 +9,7 @@ #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_pushbuf.h" -boolean nv04_draw_elements( struct pipe_context *pipe, +void nv04_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count) @@ -65,15 +65,13 @@ boolean nv04_draw_elements( struct pipe_context *pipe, pipe_buffer_unmap(pscreen, indexBuffer); draw_set_mapped_element_buffer(draw, 0, NULL); } - - return TRUE; } -boolean nv04_draw_arrays( struct pipe_context *pipe, - unsigned prim, unsigned start, unsigned count) +void nv04_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) { printf("coucou in draw arrays\n"); - return nv04_draw_elements(pipe, NULL, 0, prim, start, count); + nv04_draw_elements(pipe, NULL, 0, prim, start, count); } diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c index 0dadeb03dd..1ecb73d06e 100644 --- a/src/gallium/drivers/nv10/nv10_context.c +++ b/src/gallium/drivers/nv10/nv10_context.c @@ -10,10 +10,14 @@ nv10_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv10_context *nv10 = nv10_context(pipe); + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; draw_flush(nv10->draw); - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void @@ -31,225 +35,226 @@ static void nv10_init_hwctx(struct nv10_context *nv10) { struct nv10_screen *screen = nv10->screen; struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; int i; float projectionmatrix[16]; - BEGIN_RING(celsius, NV10TCL_DMA_NOTIFY, 1); - OUT_RING (screen->sync->handle); - BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY0, 2); - OUT_RING (chan->vram->handle); - OUT_RING (chan->gart->handle); - BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY2, 2); - OUT_RING (chan->vram->handle); - OUT_RING (chan->vram->handle); + BEGIN_RING(chan, celsius, NV10TCL_DMA_NOTIFY, 1); + OUT_RING (chan, screen->sync->handle); + BEGIN_RING(chan, celsius, NV10TCL_DMA_IN_MEMORY0, 2); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->gart->handle); + BEGIN_RING(chan, celsius, NV10TCL_DMA_IN_MEMORY2, 2); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->vram->handle); - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); - BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 2); - OUT_RING (0); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_RT_HORIZ, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1); - OUT_RING ((0x7ff<<16)|0x800); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1); - OUT_RING ((0x7ff<<16)|0x800); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1); + OUT_RING (chan, (0x7ff<<16)|0x800); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1); + OUT_RING (chan, (0x7ff<<16)|0x800); for (i=1;i<8;i++) { - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1); + OUT_RING (chan, 0); } - BEGIN_RING(celsius, 0x290, 1); - OUT_RING ((0x10<<16)|1); - BEGIN_RING(celsius, 0x3f4, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, 0x290, 1); + OUT_RING (chan, (0x10<<16)|1); + BEGIN_RING(chan, celsius, 0x3f4, 1); + OUT_RING (chan, 0); - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); if (nv10->screen->celsius->grclass != NV10TCL) { /* For nv11, nv17 */ - BEGIN_RING(celsius, 0x120, 3); - OUT_RING (0); - OUT_RING (1); - OUT_RING (2); + BEGIN_RING(chan, celsius, 0x120, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 2); - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); } - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); /* Set state */ - BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 2); - OUT_RING (0x207); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_TX_ENABLE(0), 2); - OUT_RING (0); - OUT_RING (0); - - BEGIN_RING(celsius, NV10TCL_RC_IN_ALPHA(0), 12); - OUT_RING (0x30141010); - OUT_RING (0); - OUT_RING (0x20040000); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0x00000c00); - OUT_RING (0); - OUT_RING (0x00000c00); - OUT_RING (0x18000000); - OUT_RING (0x300e0300); - OUT_RING (0x0c091c80); - - BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 2); - OUT_RING (1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_SRC, 4); - OUT_RING (1); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0x8006); - BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 8); - OUT_RING (0xff); - OUT_RING (0x207); - OUT_RING (0); - OUT_RING (0xff); - OUT_RING (0x1e00); - OUT_RING (0x1e00); - OUT_RING (0x1e00); - OUT_RING (0x1d01); - BEGIN_RING(celsius, NV10TCL_NORMALIZE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_LIGHT_MODEL, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_COLOR_CONTROL, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_ENABLED_LIGHTS, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1); - OUT_RING (0x201); - BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1); - OUT_RING (8); - BEGIN_RING(celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_LINE_WIDTH, 1); - OUT_RING (8); - BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (0x1b02); - OUT_RING (0x1b02); - BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2); - OUT_RING (0x405); - OUT_RING (0x901); - BEGIN_RING(celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_TX_GEN_S(0), 8); + BEGIN_RING(chan, celsius, NV10TCL_FOG_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_FUNC, 2); + OUT_RING (chan, 0x207); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_TX_ENABLE(0), 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + + BEGIN_RING(chan, celsius, NV10TCL_RC_IN_ALPHA(0), 12); + OUT_RING (chan, 0x30141010); + OUT_RING (chan, 0); + OUT_RING (chan, 0x20040000); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0x00000c00); + OUT_RING (chan, 0); + OUT_RING (chan, 0x00000c00); + OUT_RING (chan, 0x18000000); + OUT_RING (chan, 0x300e0300); + OUT_RING (chan, 0x0c091c80); + + BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_DITHER_ENABLE, 2); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_SRC, 4); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0x8006); + BEGIN_RING(chan, celsius, NV10TCL_STENCIL_MASK, 8); + OUT_RING (chan, 0xff); + OUT_RING (chan, 0x207); + OUT_RING (chan, 0); + OUT_RING (chan, 0xff); + OUT_RING (chan, 0x1e00); + OUT_RING (chan, 0x1e00); + OUT_RING (chan, 0x1e00); + OUT_RING (chan, 0x1d01); + BEGIN_RING(chan, celsius, NV10TCL_NORMALIZE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_FOG_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_LIGHT_MODEL, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_CONTROL, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_ENABLED_LIGHTS, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_FUNC, 1); + OUT_RING (chan, 0x201); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_POINT_SIZE, 1); + OUT_RING (chan, 8); + BEGIN_RING(chan, celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_LINE_WIDTH, 1); + OUT_RING (chan, 8); + BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (chan, 0x1b02); + OUT_RING (chan, 0x1b02); + BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE, 2); + OUT_RING (chan, 0x405); + OUT_RING (chan, 0x901); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_TX_GEN_S(0), 8); for (i=0;i<8;i++) { - OUT_RING (0); + OUT_RING (chan, 0); } - BEGIN_RING(celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3); - OUT_RING (0x3fc00000); /* -1.50 */ - OUT_RING (0xbdb8aa0a); /* -0.09 */ - OUT_RING (0); /* 0.00 */ + BEGIN_RING(chan, celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3); + OUT_RING (chan, 0x3fc00000); /* -1.50 */ + OUT_RING (chan, 0xbdb8aa0a); /* -0.09 */ + OUT_RING (chan, 0); /* 0.00 */ - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); - BEGIN_RING(celsius, NV10TCL_FOG_MODE, 2); - OUT_RING (0x802); - OUT_RING (2); + BEGIN_RING(chan, celsius, NV10TCL_FOG_MODE, 2); + OUT_RING (chan, 0x802); + OUT_RING (chan, 2); /* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when * using texturing, except when using the texture matrix */ - BEGIN_RING(celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1); - OUT_RING (6); - BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1); - OUT_RING (0x01010101); + BEGIN_RING(chan, celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1); + OUT_RING (chan, 6); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_MASK, 1); + OUT_RING (chan, 0x01010101); /* Set vertex component */ - BEGIN_RING(celsius, NV10TCL_VERTEX_COL_4F_R, 4); - OUT_RINGf (1.0); - OUT_RINGf (1.0); - OUT_RINGf (1.0); - OUT_RINGf (1.0); - BEGIN_RING(celsius, NV10TCL_VERTEX_COL2_3F_R, 3); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_VERTEX_NOR_3F_X, 3); - OUT_RING (0); - OUT_RING (0); - OUT_RINGf (1.0); - BEGIN_RING(celsius, NV10TCL_VERTEX_TX0_4F_S, 4); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - BEGIN_RING(celsius, NV10TCL_VERTEX_TX1_4F_S, 4); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - BEGIN_RING(celsius, NV10TCL_VERTEX_FOG_1F, 1); - OUT_RINGf (0.0); - BEGIN_RING(celsius, NV10TCL_EDGEFLAG_ENABLE, 1); - OUT_RING (1); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_COL_4F_R, 4); + OUT_RINGf (chan, 1.0); + OUT_RINGf (chan, 1.0); + OUT_RINGf (chan, 1.0); + OUT_RINGf (chan, 1.0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_COL2_3F_R, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_NOR_3F_X, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RINGf (chan, 1.0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_TX0_4F_S, 4); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 1.0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_TX1_4F_S, 4); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 1.0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_FOG_1F, 1); + OUT_RINGf (chan, 0.0); + BEGIN_RING(chan, celsius, NV10TCL_EDGEFLAG_ENABLE, 1); + OUT_RING (chan, 1); memset(projectionmatrix, 0, sizeof(projectionmatrix)); - BEGIN_RING(celsius, NV10TCL_PROJECTION_MATRIX(0), 16); + BEGIN_RING(chan, celsius, NV10TCL_PROJECTION_MATRIX(0), 16); projectionmatrix[0*4+0] = 1.0; projectionmatrix[1*4+1] = 1.0; projectionmatrix[2*4+2] = 1.0; projectionmatrix[3*4+3] = 1.0; for (i=0;i<16;i++) { - OUT_RINGf (projectionmatrix[i]); + OUT_RINGf (chan, projectionmatrix[i]); } - BEGIN_RING(celsius, NV10TCL_DEPTH_RANGE_NEAR, 2); - OUT_RING (0.0); - OUT_RINGf (16777216.0); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_RANGE_NEAR, 2); + OUT_RING (chan, 0.0); + OUT_RINGf (chan, 16777216.0); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_TRANSLATE_X, 4); - OUT_RINGf (-2048.0); - OUT_RINGf (-2048.0); - OUT_RINGf (16777215.0 * 0.5); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_TRANSLATE_X, 4); + OUT_RINGf (chan, -2048.0); + OUT_RINGf (chan, -2048.0); + OUT_RINGf (chan, 16777215.0 * 0.5); + OUT_RING (chan, 0); - FIRE_RING (NULL); + FIRE_RING (chan); } struct pipe_context * diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h index 36a6aa7a74..ab4b825487 100644 --- a/src/gallium/drivers/nv10/nv10_context.h +++ b/src/gallium/drivers/nv10/nv10_context.h @@ -15,10 +15,6 @@ #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv10_screen *ctx = nv10->screen -#include "nouveau/nouveau_push.h" - #include "nv10_state.h" #define NOUVEAU_ERR(fmt, args...) \ @@ -144,9 +140,9 @@ extern void nv10_emit_hw_state(struct nv10_context *nv10); extern void nv10_state_tex_update(struct nv10_context *nv10); /* nv10_vbo.c */ -extern boolean nv10_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv10_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv10_draw_elements( struct pipe_context *pipe, +extern void nv10_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count); diff --git a/src/gallium/drivers/nv10/nv10_fragtex.c b/src/gallium/drivers/nv10/nv10_fragtex.c index 906fdfeeb9..c1f7ccb9ab 100644 --- a/src/gallium/drivers/nv10/nv10_fragtex.c +++ b/src/gallium/drivers/nv10/nv10_fragtex.c @@ -52,6 +52,9 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit) struct nv10_miptree *nv10mt = nv10->tex_miptree[unit]; struct pipe_texture *pt = &nv10mt->base; struct nv10_texture_format *tf; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; uint32_t txf, txs, txp; tf = nv10_fragtex_format(pt->format); @@ -82,15 +85,15 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit) return; } - BEGIN_RING(celsius, NV10TCL_TX_OFFSET(unit), 8); - OUT_RELOCl(nv10mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(nv10mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); - OUT_RING (ps->wrap); - OUT_RING (0x40000000); /* enable */ - OUT_RING (txs); - OUT_RING (ps->filt | 0x2000 /* magic */); - OUT_RING ((pt->width0 << 16) | pt->height0); - OUT_RING (ps->bcol); + BEGIN_RING(chan, celsius, NV10TCL_TX_OFFSET(unit), 8); + OUT_RELOCl(chan, nouveau_bo(nv10mt->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(chan, nouveau_bo(nv10mt->buffer),txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RING (chan, ps->wrap); + OUT_RING (chan, 0x40000000); /* enable */ + OUT_RING (chan, txs); + OUT_RING (chan, ps->filt | 0x2000 /* magic */); + OUT_RING (chan, (pt->width0 << 16) | pt->height0); + OUT_RING (chan, ps->bcol); #endif } @@ -99,6 +102,9 @@ nv10_fragtex_bind(struct nv10_context *nv10) { #if 0 struct nv10_fragment_program *fp = nv10->fragprog.active; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; unsigned samplers, unit; samplers = nv10->fp_samplers & ~fp->samplers; @@ -106,8 +112,8 @@ nv10_fragtex_bind(struct nv10_context *nv10) unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - BEGIN_RING(celsius, NV10TCL_TX_ENABLE(unit), 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_TX_ENABLE(unit), 1); + OUT_RING (chan, 0); } samplers = nv10->dirty_samplers & fp->samplers; diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c index 7ba9777a22..c5dbe43dbc 100644 --- a/src/gallium/drivers/nv10/nv10_prim_vbuf.c +++ b/src/gallium/drivers/nv10/nv10_prim_vbuf.c @@ -67,12 +67,15 @@ struct nv10_vbuf_render { void nv10_vtxbuf_bind( struct nv10_context* nv10 ) { + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; int i; for(i = 0; i < 8; i++) { - BEGIN_RING(celsius, NV10TCL_VTXBUF_ADDRESS(i), 1); - OUT_RING(0/*nv10->vtxbuf*/); - BEGIN_RING(celsius, NV10TCL_VTXFMT(i), 1); - OUT_RING(0/*XXX*/); + BEGIN_RING(chan, celsius, NV10TCL_VTXBUF_ADDRESS(i), 1); + OUT_RING(chan, 0/*nv10->vtxbuf*/); + BEGIN_RING(chan, celsius, NV10TCL_VTXFMT(i), 1); + OUT_RING(chan, 0/*XXX*/); } } @@ -163,19 +166,22 @@ nv10_vbuf_render_draw( struct vbuf_render *render, { struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); struct nv10_context *nv10 = nv10_render->nv10; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; int push, i; nv10_emit_hw_state(nv10); - BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); - OUT_RELOCl(nv10_render->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); + OUT_RELOCl(chan, nouveau_bo(nv10_render->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); - OUT_RING(nv10_render->hwprim); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING(chan, nv10_render->hwprim); if (nr_indices & 1) { - BEGIN_RING(celsius, NV10TCL_VB_ELEMENT_U32, 1); - OUT_RING (indices[0]); + BEGIN_RING(chan, celsius, NV10TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, indices[0]); indices++; nr_indices--; } @@ -183,16 +189,16 @@ nv10_vbuf_render_draw( struct vbuf_render *render, // XXX too big/small ? check the size push = MIN2(nr_indices, 1200 * 2); - BEGIN_RING_NI(celsius, NV10TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, celsius, NV10TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((indices[i+1] << 16) | indices[i]); + OUT_RING(chan, (indices[i+1] << 16) | indices[i]); nr_indices -= push; indices += push; } - BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING (chan, 0); } diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c index 6a39ddeaac..69a6dab866 100644 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -180,7 +180,6 @@ nv10_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) NOUVEAU_ERR("Error creating 3D object: %d\n", ret); return FALSE; } - BIND_RING(chan, screen->celsius, 7); /* 2D engine setup */ screen->eng2d = nv04_surface_2d_init(&screen->base); diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c index 2577ab73b5..30a596ca60 100644 --- a/src/gallium/drivers/nv10/nv10_state_emit.c +++ b/src/gallium/drivers/nv10/nv10_state_emit.c @@ -4,25 +4,32 @@ static void nv10_state_emit_blend(struct nv10_context* nv10) { struct nv10_blend_state *b = nv10->blend; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; - BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 1); - OUT_RING (b->d_enable); + BEGIN_RING(chan, celsius, NV10TCL_DITHER_ENABLE, 1); + OUT_RING (chan, b->d_enable); - BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 3); - OUT_RING (b->b_enable); - OUT_RING (b->b_srcfunc); - OUT_RING (b->b_dstfunc); + BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_ENABLE, 3); + OUT_RING (chan, b->b_enable); + OUT_RING (chan, b->b_srcfunc); + OUT_RING (chan, b->b_dstfunc); - BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1); - OUT_RING (b->c_mask); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_MASK, 1); + OUT_RING (chan, b->c_mask); } static void nv10_state_emit_blend_color(struct nv10_context* nv10) { struct pipe_blend_color *c = nv10->blend_color; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; - BEGIN_RING(celsius, NV10TCL_BLEND_COLOR, 1); - OUT_RING ((float_to_ubyte(c->color[3]) << 24)| + BEGIN_RING(chan, celsius, NV10TCL_BLEND_COLOR, 1); + OUT_RING (chan, + (float_to_ubyte(c->color[3]) << 24)| (float_to_ubyte(c->color[0]) << 16)| (float_to_ubyte(c->color[1]) << 8) | (float_to_ubyte(c->color[2]) << 0)); @@ -31,60 +38,66 @@ static void nv10_state_emit_blend_color(struct nv10_context* nv10) static void nv10_state_emit_rast(struct nv10_context* nv10) { struct nv10_rasterizer_state *r = nv10->rast; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; - BEGIN_RING(celsius, NV10TCL_SHADE_MODEL, 2); - OUT_RING (r->shade_model); - OUT_RING (r->line_width); + BEGIN_RING(chan, celsius, NV10TCL_SHADE_MODEL, 2); + OUT_RING (chan, r->shade_model); + OUT_RING (chan, r->line_width); - BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1); - OUT_RING (r->point_size); + BEGIN_RING(chan, celsius, NV10TCL_POINT_SIZE, 1); + OUT_RING (chan, r->point_size); - BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (r->poly_mode_front); - OUT_RING (r->poly_mode_back); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (chan, r->poly_mode_front); + OUT_RING (chan, r->poly_mode_back); - BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2); - OUT_RING (r->cull_face); - OUT_RING (r->front_face); + BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE, 2); + OUT_RING (chan, r->cull_face); + OUT_RING (chan, r->front_face); - BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2); - OUT_RING (r->line_smooth_en); - OUT_RING (r->poly_smooth_en); + BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2); + OUT_RING (chan, r->line_smooth_en); + OUT_RING (chan, r->poly_smooth_en); - BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1); - OUT_RING (r->cull_face_en); + BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE_ENABLE, 1); + OUT_RING (chan, r->cull_face_en); } static void nv10_state_emit_dsa(struct nv10_context* nv10) { struct nv10_depth_stencil_alpha_state *d = nv10->dsa; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; - BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1); - OUT_RING (d->depth.func); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_FUNC, 1); + OUT_RING (chan, d->depth.func); - BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); - OUT_RING (d->depth.write_enable); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (chan, d->depth.write_enable); - BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); - OUT_RING (d->depth.test_enable); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (chan, d->depth.test_enable); #if 0 - BEGIN_RING(celsius, NV10TCL_STENCIL_ENABLE, 1); - OUT_RING (d->stencil.enable); - BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 7); - OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7); + BEGIN_RING(chan, celsius, NV10TCL_STENCIL_ENABLE, 1); + OUT_RING (chan, d->stencil.enable); + BEGIN_RING(chan, celsius, NV10TCL_STENCIL_MASK, 7); + OUT_RINGp (chan, (uint32_t *)&(d->stencil.wmask), 7); #endif - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (d->alpha.enabled); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (chan, d->alpha.enabled); - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 1); - OUT_RING (d->alpha.func); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_FUNC, 1); + OUT_RING (chan, d->alpha.func); - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_REF, 1); - OUT_RING (d->alpha.ref); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_REF, 1); + OUT_RING (chan, d->alpha.ref); } static void nv10_state_emit_viewport(struct nv10_context* nv10) @@ -108,6 +121,10 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) int colour_format = 0, zeta_format = 0; struct nv10_miptree *nv10mt = 0; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; + w = fb->cbufs[0]->width; h = fb->cbufs[0]->height; colour_format = fb->cbufs[0]->format; @@ -144,11 +161,11 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) } if (zeta) { - BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); - OUT_RING (rt->pitch | (zeta->pitch << 16)); + BEGIN_RING(chan, celsius, NV10TCL_RT_PITCH, 1); + OUT_RING (chan, rt->pitch | (zeta->pitch << 16)); } else { - BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); - OUT_RING (rt->pitch | (rt->pitch << 16)); + BEGIN_RING(chan, celsius, NV10TCL_RT_PITCH, 1); + OUT_RING (chan, rt->pitch | (rt->pitch << 16)); } nv10mt = (struct nv10_miptree *)rt->base.texture; @@ -160,13 +177,13 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) nv10->zeta = nv10mt->buffer; } - BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 3); - OUT_RING ((w << 16) | 0); - OUT_RING ((h << 16) | 0); - OUT_RING (rt_format); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2); - OUT_RING (((w - 1) << 16) | 0 | 0x08000800); - OUT_RING (((h - 1) << 16) | 0 | 0x08000800); + BEGIN_RING(chan, celsius, NV10TCL_RT_HORIZ, 3); + OUT_RING (chan, (w << 16) | 0); + OUT_RING (chan, (h << 16) | 0); + OUT_RING (chan, rt_format); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2); + OUT_RING (chan, ((w - 1) << 16) | 0 | 0x08000800); + OUT_RING (chan, ((h - 1) << 16) | 0 | 0x08000800); } static void nv10_vertex_layout(struct nv10_context *nv10) @@ -201,6 +218,10 @@ static void nv10_vertex_layout(struct nv10_context *nv10) void nv10_emit_hw_state(struct nv10_context *nv10) { + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; + struct nouveau_bo *rt_bo; int i; if (nv10->dirty & NV10_NEW_VERTPROG) { @@ -269,38 +290,41 @@ nv10_emit_hw_state(struct nv10_context *nv10) */ /* Render target */ + rt_bo = nouveau_bo(nv10->rt[0]); // XXX figre out who's who for NV10TCL_DMA_* and fill accordingly -// BEGIN_RING(celsius, NV10TCL_DMA_COLOR0, 1); -// OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1); - OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +// BEGIN_RING(chan, celsius, NV10TCL_DMA_COLOR0, 1); +// OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_OFFSET, 1); + OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); if (nv10->zeta) { + struct nouveau_bo *zeta_bo = nouveau_bo(nv10->zeta); // XXX -// BEGIN_RING(celsius, NV10TCL_DMA_ZETA, 1); -// OUT_RELOCo(nv10->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(celsius, NV10TCL_ZETA_OFFSET, 1); - OUT_RELOCl(nv10->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +// BEGIN_RING(chan, celsius, NV10TCL_DMA_ZETA, 1); +// OUT_RELOCo(chan, zeta_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, celsius, NV10TCL_ZETA_OFFSET, 1); + OUT_RELOCl(chan, zeta_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); /* XXX for when we allocate LMA on nv17 */ -/* BEGIN_RING(celsius, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); - OUT_RELOCl(nv10->zeta + lma_offset);*/ +/* BEGIN_RING(chan, celsius, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); + OUT_RELOCl(chan, nouveau_bo(nv10->zeta + lma_offset));*/ } /* Vertex buffer */ - BEGIN_RING(celsius, NV10TCL_DMA_VTXBUF0, 1); - OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1); - OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, celsius, NV10TCL_DMA_VTXBUF0, 1); + OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_OFFSET, 1); + OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); /* Texture images */ for (i = 0; i < 2; i++) { if (!(nv10->fp_samplers & (1 << i))) continue; - BEGIN_RING(celsius, NV10TCL_TX_OFFSET(i), 1); - OUT_RELOCl(nv10->tex[i].buffer, 0, NOUVEAU_BO_VRAM | + struct nouveau_bo *bo = nouveau_bo(nv10->tex[i].buffer); + BEGIN_RING(chan, celsius, NV10TCL_TX_OFFSET(i), 1); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_RING(celsius, NV10TCL_TX_FORMAT(i), 1); - OUT_RELOCd(nv10->tex[i].buffer, nv10->tex[i].format, + BEGIN_RING(chan, celsius, NV10TCL_TX_FORMAT(i), 1); + OUT_RELOCd(chan, bo, nv10->tex[i].format, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_OR, NV10TCL_TX_FORMAT_DMA0, NV10TCL_TX_FORMAT_DMA1); diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c index 0d26141248..9180c72c9b 100644 --- a/src/gallium/drivers/nv10/nv10_vbo.c +++ b/src/gallium/drivers/nv10/nv10_vbo.c @@ -9,7 +9,7 @@ #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_pushbuf.h" -boolean nv10_draw_elements( struct pipe_context *pipe, +void nv10_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count) @@ -65,14 +65,12 @@ boolean nv10_draw_elements( struct pipe_context *pipe, pipe_buffer_unmap(pscreen, indexBuffer); draw_set_mapped_element_buffer(draw, 0, NULL); } - - return TRUE; } -boolean nv10_draw_arrays( struct pipe_context *pipe, - unsigned prim, unsigned start, unsigned count) +void nv10_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) { - return nv10_draw_elements(pipe, NULL, 0, prim, start, count); + nv10_draw_elements(pipe, NULL, 0, prim, start, count); } diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c index 6a147a4159..5b80af2d22 100644 --- a/src/gallium/drivers/nv20/nv20_context.c +++ b/src/gallium/drivers/nv20/nv20_context.c @@ -10,10 +10,14 @@ nv20_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv20_context *nv20 = nv20_context(pipe); + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; draw_flush(nv20->draw); - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void @@ -31,348 +35,352 @@ static void nv20_init_hwctx(struct nv20_context *nv20) { struct nv20_screen *screen = nv20->screen; struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; int i; float projectionmatrix[16]; - const boolean is_nv25tcl = (nv20->screen->kelvin->grclass == NV25TCL); + const boolean is_nv25tcl = (kelvin->grclass == NV25TCL); - BEGIN_RING(kelvin, NV20TCL_DMA_NOTIFY, 1); - OUT_RING (screen->sync->handle); - BEGIN_RING(kelvin, NV20TCL_DMA_TEXTURE0, 2); - OUT_RING (chan->vram->handle); - OUT_RING (chan->gart->handle); /* TEXTURE1 */ - BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 2); - OUT_RING (chan->vram->handle); - OUT_RING (chan->vram->handle); /* ZETA */ + BEGIN_RING(chan, kelvin, NV20TCL_DMA_NOTIFY, 1); + OUT_RING (chan, screen->sync->handle); + BEGIN_RING(chan, kelvin, NV20TCL_DMA_TEXTURE0, 2); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->gart->handle); /* TEXTURE1 */ + BEGIN_RING(chan, kelvin, NV20TCL_DMA_COLOR, 2); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->vram->handle); /* ZETA */ - BEGIN_RING(kelvin, NV20TCL_DMA_QUERY, 1); - OUT_RING (0); /* renouveau: beef0351, unique */ + BEGIN_RING(chan, kelvin, NV20TCL_DMA_QUERY, 1); + OUT_RING (chan, 0); /* renouveau: beef0351, unique */ - BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2); - OUT_RING (0); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1); - OUT_RING ((0xfff << 16) | 0x0); - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1); - OUT_RING ((0xfff << 16) | 0x0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1); + OUT_RING (chan, (0xfff << 16) | 0x0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1); + OUT_RING (chan, (0xfff << 16) | 0x0); for (i = 1; i < NV20TCL_VIEWPORT_CLIP_HORIZ__SIZE; i++) { - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(i), 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(i), 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(i), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_VERT(i), 1); + OUT_RING (chan, 0); } - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_MODE, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_MODE, 1); + OUT_RING (chan, 0); - BEGIN_RING(kelvin, 0x17e0, 3); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); + BEGIN_RING(chan, kelvin, 0x17e0, 3); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 1.0); if (is_nv25tcl) { - BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1); - OUT_RING (NV20TCL_TX_RCOMP_LEQUAL | 0xdb0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_RCOMP, 1); + OUT_RING (chan, NV20TCL_TX_RCOMP_LEQUAL | 0xdb0); } else { - BEGIN_RING(kelvin, 0x1e68, 1); - OUT_RING (0x4b800000); /* 16777216.000000 */ - BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1); - OUT_RING (NV20TCL_TX_RCOMP_LEQUAL); + BEGIN_RING(chan, kelvin, 0x1e68, 1); + OUT_RING (chan, 0x4b800000); /* 16777216.000000 */ + BEGIN_RING(chan, kelvin, NV20TCL_TX_RCOMP, 1); + OUT_RING (chan, NV20TCL_TX_RCOMP_LEQUAL); } - BEGIN_RING(kelvin, 0x290, 1); - OUT_RING ((0x10 << 16) | 1); - BEGIN_RING(kelvin, 0x9fc, 1); - OUT_RING (0); - BEGIN_RING(kelvin, 0x1d80, 1); - OUT_RING (1); - BEGIN_RING(kelvin, 0x9f8, 1); - OUT_RING (4); - BEGIN_RING(kelvin, 0x17ec, 3); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - OUT_RINGf (0.0); + BEGIN_RING(chan, kelvin, 0x290, 1); + OUT_RING (chan, (0x10 << 16) | 1); + BEGIN_RING(chan, kelvin, 0x9fc, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, 0x1d80, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, kelvin, 0x9f8, 1); + OUT_RING (chan, 4); + BEGIN_RING(chan, kelvin, 0x17ec, 3); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 1.0); + OUT_RINGf (chan, 0.0); if (is_nv25tcl) { - BEGIN_RING(kelvin, 0x1d88, 1); - OUT_RING (3); + BEGIN_RING(chan, kelvin, 0x1d88, 1); + OUT_RING (chan, 3); - BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY9, 1); - OUT_RING (chan->vram->handle); - BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY8, 1); - OUT_RING (chan->vram->handle); + BEGIN_RING(chan, kelvin, NV25TCL_DMA_IN_MEMORY9, 1); + OUT_RING (chan, chan->vram->handle); + BEGIN_RING(chan, kelvin, NV25TCL_DMA_IN_MEMORY8, 1); + OUT_RING (chan, chan->vram->handle); } - BEGIN_RING(kelvin, NV20TCL_DMA_FENCE, 1); - OUT_RING (0); /* renouveau: beef1e10 */ + BEGIN_RING(chan, kelvin, NV20TCL_DMA_FENCE, 1); + OUT_RING (chan, 0); /* renouveau: beef1e10 */ - BEGIN_RING(kelvin, 0x1e98, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, 0x1e98, 1); + OUT_RING (chan, 0); #if 0 if (is_nv25tcl) { - BEGIN_RING(NvSub3D, NV25TCL_DMA_IN_MEMORY4, 2); - OUT_RING (NvDmaTT); /* renouveau: beef0202 */ - OUT_RING (NvDmaFB); /* renouveau: beef0201 */ + BEGIN_RING(chan, NvSub3D, NV25TCL_DMA_IN_MEMORY4, 2); + OUT_RING (chan, NvDmaTT); /* renouveau: beef0202 */ + OUT_RING (chan, NvDmaFB); /* renouveau: beef0201 */ - BEGIN_RING(NvSub3D, NV20TCL_DMA_TEXTURE1, 1); - OUT_RING (NvDmaTT); /* renouveau: beef0202 */ + BEGIN_RING(chan, NvSub3D, NV20TCL_DMA_TEXTURE1, 1); + OUT_RING (chan, NvDmaTT); /* renouveau: beef0202 */ } #endif - BEGIN_RING(kelvin, NV20TCL_NOTIFY, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_NOTIFY, 1); + OUT_RING (chan, 0); - BEGIN_RING(kelvin, 0x120, 3); - OUT_RING (0); - OUT_RING (1); - OUT_RING (2); + BEGIN_RING(chan, kelvin, 0x120, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 2); /* error: ILLEGAL_MTHD, PROTECTION_FAULT - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); - OUT_RINGf (0.0); - OUT_RINGf (512.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 512.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); */ if (is_nv25tcl) { - BEGIN_RING(kelvin, 0x022c, 2); - OUT_RING (0x280); - OUT_RING (0x07d28000); + BEGIN_RING(chan, kelvin, 0x022c, 2); + OUT_RING (chan, 0x280); + OUT_RING (chan, 0x07d28000); } /* * illegal method, protection fault - BEGIN_RING(NvSub3D, 0x1c2c, 1); - OUT_RING (0); */ + BEGIN_RING(chan, NvSub3D, 0x1c2c, 1); + OUT_RING (chan, 0); */ if (is_nv25tcl) { - BEGIN_RING(kelvin, 0x1da4, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, 0x1da4, 1); + OUT_RING (chan, 0); } /* * crashes with illegal method, protection fault - BEGIN_RING(NvSub3D, 0x1c18, 1); - OUT_RING (0x200); */ + BEGIN_RING(chan, NvSub3D, 0x1c18, 1); + OUT_RING (chan, 0x200); */ - BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2); - OUT_RING ((0 << 16) | 0); - OUT_RING ((0 << 16) | 0); + BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 2); + OUT_RING (chan, (0 << 16) | 0); + OUT_RING (chan, (0 << 16) | 0); /* *** Set state *** */ - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 2); - OUT_RING (NV20TCL_ALPHA_FUNC_FUNC_ALWAYS); - OUT_RING (0); /* NV20TCL_ALPHA_FUNC_REF */ + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_FUNC, 2); + OUT_RING (chan, NV20TCL_ALPHA_FUNC_FUNC_ALWAYS); + OUT_RING (chan, 0); /* NV20TCL_ALPHA_FUNC_REF */ for (i = 0; i < NV20TCL_TX_ENABLE__SIZE; ++i) { - BEGIN_RING(kelvin, NV20TCL_TX_ENABLE(i), 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_ENABLE(i), 1); + OUT_RING (chan, 0); } - BEGIN_RING(kelvin, NV20TCL_TX_SHADER_OP, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_TX_SHADER_CULL_MODE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_IN_ALPHA(0), 4); - OUT_RING (0x30d410d0); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_OUT_RGB(0), 4); - OUT_RING (0x00000c00); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_ENABLE, 1); - OUT_RING (0x00011101); - BEGIN_RING(kelvin, NV20TCL_RC_FINAL0, 2); - OUT_RING (0x130e0300); - OUT_RING (0x0c091c80); - BEGIN_RING(kelvin, NV20TCL_RC_OUT_ALPHA(0), 4); - OUT_RING (0x00000c00); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_IN_RGB(0), 4); - OUT_RING (0x20c400c0); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_COLOR0, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_CONSTANT_COLOR0(0), 4); - OUT_RING (0x035125a0); - OUT_RING (0); - OUT_RING (0x40002000); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_MULTISAMPLE_CONTROL, 1); - OUT_RING (0xffff0000); - - BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 4); - OUT_RING (NV20TCL_BLEND_FUNC_SRC_ONE); - OUT_RING (NV20TCL_BLEND_FUNC_DST_ZERO); - OUT_RING (0); /* NV20TCL_BLEND_COLOR */ - OUT_RING (NV20TCL_BLEND_EQUATION_FUNC_ADD); - BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7); - OUT_RING (0xff); - OUT_RING (NV20TCL_STENCIL_FUNC_FUNC_ALWAYS); - OUT_RING (0); /* NV20TCL_STENCIL_FUNC_REF */ - OUT_RING (0xff); /* NV20TCL_STENCIL_FUNC_MASK */ - OUT_RING (NV20TCL_STENCIL_OP_FAIL_KEEP); - OUT_RING (NV20TCL_STENCIL_OP_ZFAIL_KEEP); - OUT_RING (NV20TCL_STENCIL_OP_ZPASS_KEEP); - - BEGIN_RING(kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2); - OUT_RING (0); - OUT_RING (NV20TCL_COLOR_LOGIC_OP_OP_COPY); - BEGIN_RING(kelvin, 0x17cc, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_SHADER_OP, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_SHADER_CULL_MODE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_IN_ALPHA(0), 4); + OUT_RING (chan, 0x30d410d0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_OUT_RGB(0), 4); + OUT_RING (chan, 0x00000c00); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_ENABLE, 1); + OUT_RING (chan, 0x00011101); + BEGIN_RING(chan, kelvin, NV20TCL_RC_FINAL0, 2); + OUT_RING (chan, 0x130e0300); + OUT_RING (chan, 0x0c091c80); + BEGIN_RING(chan, kelvin, NV20TCL_RC_OUT_ALPHA(0), 4); + OUT_RING (chan, 0x00000c00); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_IN_RGB(0), 4); + OUT_RING (chan, 0x20c400c0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_COLOR0, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_CONSTANT_COLOR0(0), 4); + OUT_RING (chan, 0x035125a0); + OUT_RING (chan, 0); + OUT_RING (chan, 0x40002000); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_MULTISAMPLE_CONTROL, 1); + OUT_RING (chan, 0xffff0000); + + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_DITHER_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_SRC, 4); + OUT_RING (chan, NV20TCL_BLEND_FUNC_SRC_ONE); + OUT_RING (chan, NV20TCL_BLEND_FUNC_DST_ZERO); + OUT_RING (chan, 0); /* NV20TCL_BLEND_COLOR */ + OUT_RING (chan, NV20TCL_BLEND_EQUATION_FUNC_ADD); + BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_MASK, 7); + OUT_RING (chan, 0xff); + OUT_RING (chan, NV20TCL_STENCIL_FUNC_FUNC_ALWAYS); + OUT_RING (chan, 0); /* NV20TCL_STENCIL_FUNC_REF */ + OUT_RING (chan, 0xff); /* NV20TCL_STENCIL_FUNC_MASK */ + OUT_RING (chan, NV20TCL_STENCIL_OP_FAIL_KEEP); + OUT_RING (chan, NV20TCL_STENCIL_OP_ZFAIL_KEEP); + OUT_RING (chan, NV20TCL_STENCIL_OP_ZPASS_KEEP); + + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, NV20TCL_COLOR_LOGIC_OP_OP_COPY); + BEGIN_RING(chan, kelvin, 0x17cc, 1); + OUT_RING (chan, 0); if (is_nv25tcl) { - BEGIN_RING(kelvin, 0x1d84, 1); - OUT_RING (1); + BEGIN_RING(chan, kelvin, 0x1d84, 1); + OUT_RING (chan, 1); } - BEGIN_RING(kelvin, NV20TCL_LIGHTING_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_LIGHT_CONTROL, 1); - OUT_RING (0x00020000); - BEGIN_RING(kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_ENABLED_LIGHTS, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_NORMALIZE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_PATTERN(0), + BEGIN_RING(chan, kelvin, NV20TCL_LIGHTING_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_CONTROL, 1); + OUT_RING (chan, 0x00020000); + BEGIN_RING(chan, kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_ENABLED_LIGHTS, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_NORMALIZE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_STIPPLE_PATTERN(0), NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE); for (i = 0; i < NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE; ++i) { - OUT_RING(0xffffffff); + OUT_RING(chan, 0xffffffff); } - BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_POINT_ENABLE, 3); - OUT_RING (0); - OUT_RING (0); /* NV20TCL.POLYGON_OFFSET_LINE_ENABLE */ - OUT_RING (0); /* NV20TCL.POLYGON_OFFSET_FILL_ENABLE */ - BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1); - OUT_RING (NV20TCL_DEPTH_FUNC_LESS); - BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_FACTOR, 2); - OUT_RINGf (0.0); - OUT_RINGf (0.0); /* NV20TCL.POLYGON_OFFSET_UNITS */ - BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1); - OUT_RING (1); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 0); /* NV20TCL.POLYGON_OFFSET_LINE_ENABLE */ + OUT_RING (chan, 0); /* NV20TCL.POLYGON_OFFSET_FILL_ENABLE */ + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_FUNC, 1); + OUT_RING (chan, NV20TCL_DEPTH_FUNC_LESS); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_OFFSET_FACTOR, 2); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); /* NV20TCL.POLYGON_OFFSET_UNITS */ + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_UNK17D8, 1); + OUT_RING (chan, 1); if (!is_nv25tcl) { - BEGIN_RING(kelvin, 0x1d84, 1); - OUT_RING (3); + BEGIN_RING(chan, kelvin, 0x1d84, 1); + OUT_RING (chan, 3); } - BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1); + BEGIN_RING(chan, kelvin, NV20TCL_POINT_SIZE, 1); if (!is_nv25tcl) { - OUT_RING (8); + OUT_RING (chan, 8); } else { - OUT_RINGf (1.0); + OUT_RINGf (chan, 1.0); } if (!is_nv25tcl) { - BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); /* NV20TCL.POINT_SMOOTH_ENABLE */ + BEGIN_RING(chan, kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); /* NV20TCL.POINT_SMOOTH_ENABLE */ } else { - BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, 0x0a1c, 1); - OUT_RING (0x800); + BEGIN_RING(chan, kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, 0x0a1c, 1); + OUT_RING (chan, 0x800); } - BEGIN_RING(kelvin, NV20TCL_LINE_WIDTH, 1); - OUT_RING (8); - BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (NV20TCL_POLYGON_MODE_FRONT_FILL); - OUT_RING (NV20TCL_POLYGON_MODE_BACK_FILL); - BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2); - OUT_RING (NV20TCL_CULL_FACE_BACK); - OUT_RING (NV20TCL_FRONT_FACE_CCW); - BEGIN_RING(kelvin, NV20TCL_POLYGON_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 1); - OUT_RING (NV20TCL_SHADE_MODEL_SMOOTH); - BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_TX_GEN_S(0), 4 * NV20TCL_TX_GEN_S__SIZE); + BEGIN_RING(chan, kelvin, NV20TCL_LINE_WIDTH, 1); + OUT_RING (chan, 8); + BEGIN_RING(chan, kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (chan, NV20TCL_POLYGON_MODE_FRONT_FILL); + OUT_RING (chan, NV20TCL_POLYGON_MODE_BACK_FILL); + BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE, 2); + OUT_RING (chan, NV20TCL_CULL_FACE_BACK); + OUT_RING (chan, NV20TCL_FRONT_FACE_CCW); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_SHADE_MODEL, 1); + OUT_RING (chan, NV20TCL_SHADE_MODEL_SMOOTH); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_STIPPLE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_GEN_S(0), 4 * NV20TCL_TX_GEN_S__SIZE); for (i=0; i < 4 * NV20TCL_TX_GEN_S__SIZE; ++i) { - OUT_RING(0); + OUT_RING(chan, 0); } - BEGIN_RING(kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3); - OUT_RINGf (1.5); - OUT_RINGf (-0.090168); /* NV20TCL.FOG_EQUATION_LINEAR */ - OUT_RINGf (0.0); /* NV20TCL.FOG_EQUATION_QUADRATIC */ - BEGIN_RING(kelvin, NV20TCL_FOG_MODE, 2); - OUT_RING (NV20TCL_FOG_MODE_EXP_2); - OUT_RING (NV20TCL_FOG_COORD_DIST_COORD_FOG); - BEGIN_RING(kelvin, NV20TCL_FOG_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); /* NV20TCL.FOG_COLOR */ - BEGIN_RING(kelvin, NV20TCL_ENGINE, 1); - OUT_RING (NV20TCL_ENGINE_FIXED); + BEGIN_RING(chan, kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3); + OUT_RINGf (chan, 1.5); + OUT_RINGf (chan, -0.090168); /* NV20TCL.FOG_EQUATION_LINEAR */ + OUT_RINGf (chan, 0.0); /* NV20TCL.FOG_EQUATION_QUADRATIC */ + BEGIN_RING(chan, kelvin, NV20TCL_FOG_MODE, 2); + OUT_RING (chan, NV20TCL_FOG_MODE_EXP_SIGNED); + OUT_RING (chan, NV20TCL_FOG_COORD_FOG); + BEGIN_RING(chan, kelvin, NV20TCL_FOG_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); /* NV20TCL.FOG_COLOR */ + BEGIN_RING(chan, kelvin, NV20TCL_ENGINE, 1); + OUT_RING (chan, NV20TCL_ENGINE_FIXED); for (i = 0; i < NV20TCL_TX_MATRIX_ENABLE__SIZE; ++i) { - BEGIN_RING(kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1); + OUT_RING (chan, 0); } - BEGIN_RING(kelvin, NV20TCL_VTX_ATTR_4F_X(1), 4 * 15); - OUT_RINGf(1.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); - OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); OUT_RINGf(1.0); - OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0); + BEGIN_RING(chan, kelvin, NV20TCL_VTX_ATTR_4F_X(1), 4 * 15); + OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 1.0); + OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); + OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); for (i = 4; i < 16; ++i) { - OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); + OUT_RINGf(chan, 0.0); + OUT_RINGf(chan, 0.0); + OUT_RINGf(chan, 0.0); + OUT_RINGf(chan, 1.0); } - BEGIN_RING(kelvin, NV20TCL_EDGEFLAG_ENABLE, 1); - OUT_RING (1); - BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1); - OUT_RING (0x00010101); - BEGIN_RING(kelvin, NV20TCL_CLEAR_VALUE, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_EDGEFLAG_ENABLE, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_MASK, 1); + OUT_RING (chan, 0x00010101); + BEGIN_RING(chan, kelvin, NV20TCL_CLEAR_VALUE, 1); + OUT_RING (chan, 0); memset(projectionmatrix, 0, sizeof(projectionmatrix)); projectionmatrix[0*4+0] = 1.0; projectionmatrix[1*4+1] = 1.0; projectionmatrix[2*4+2] = 16777215.0; projectionmatrix[3*4+3] = 1.0; - BEGIN_RING(kelvin, NV20TCL_PROJECTION_MATRIX(0), 16); + BEGIN_RING(chan, kelvin, NV20TCL_PROJECTION_MATRIX(0), 16); for (i = 0; i < 16; i++) { - OUT_RINGf (projectionmatrix[i]); + OUT_RINGf (chan, projectionmatrix[i]); } - BEGIN_RING(kelvin, NV20TCL_DEPTH_RANGE_NEAR, 2); - OUT_RINGf (0.0); - OUT_RINGf (16777216.0); /* [0, 1] scaled approx to [0, 2^24] */ + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_RANGE_NEAR, 2); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 16777216.0); /* [0, 1] scaled approx to [0, 2^24] */ - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); - OUT_RINGf (0.0); /* x-offset, w/2 + 1.031250 */ - OUT_RINGf (0.0); /* y-offset, h/2 + 0.030762 */ - OUT_RINGf (0.0); - OUT_RINGf (16777215.0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); + OUT_RINGf (chan, 0.0); /* x-offset, w/2 + 1.031250 */ + OUT_RINGf (chan, 0.0); /* y-offset, h/2 + 0.030762 */ + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 16777215.0); - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE_X, 4); - OUT_RINGf (0.0); /* no effect?, w/2 */ - OUT_RINGf (0.0); /* no effect?, h/2 */ - OUT_RINGf (16777215.0 * 0.5); - OUT_RINGf (65535.0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_SCALE_X, 4); + OUT_RINGf (chan, 0.0); /* no effect?, w/2 */ + OUT_RINGf (chan, 0.0); /* no effect?, h/2 */ + OUT_RINGf (chan, 16777215.0 * 0.5); + OUT_RINGf (chan, 65535.0); - FIRE_RING (NULL); + FIRE_RING (chan); } struct pipe_context * diff --git a/src/gallium/drivers/nv20/nv20_context.h b/src/gallium/drivers/nv20/nv20_context.h index a4eaa95660..c7dfadaa31 100644 --- a/src/gallium/drivers/nv20/nv20_context.h +++ b/src/gallium/drivers/nv20/nv20_context.h @@ -15,10 +15,6 @@ #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv20_screen *ctx = nv20->screen -#include "nouveau/nouveau_push.h" - #include "nv20_state.h" #define NOUVEAU_ERR(fmt, args...) \ @@ -143,9 +139,9 @@ extern void nv20_emit_hw_state(struct nv20_context *nv20); extern void nv20_state_tex_update(struct nv20_context *nv20); /* nv20_vbo.c */ -extern boolean nv20_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv20_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv20_draw_elements( struct pipe_context *pipe, +extern void nv20_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count); diff --git a/src/gallium/drivers/nv20/nv20_fragtex.c b/src/gallium/drivers/nv20/nv20_fragtex.c index 2db4a4015a..dedbec73f3 100644 --- a/src/gallium/drivers/nv20/nv20_fragtex.c +++ b/src/gallium/drivers/nv20/nv20_fragtex.c @@ -52,6 +52,9 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit) struct nv20_miptree *nv20mt = nv20->tex_miptree[unit]; struct pipe_texture *pt = &nv20mt->base; struct nv20_texture_format *tf; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; uint32_t txf, txs, txp; tf = nv20_fragtex_format(pt->format); @@ -82,15 +85,15 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit) return; } - BEGIN_RING(kelvin, NV10TCL_TX_OFFSET(unit), 8); - OUT_RELOCl(nv20mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(nv20mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); - OUT_RING (ps->wrap); - OUT_RING (0x40000000); /* enable */ - OUT_RING (txs); - OUT_RING (ps->filt | 0x2000 /* magic */); - OUT_RING ((pt->width0 << 16) | pt->height0); - OUT_RING (ps->bcol); + BEGIN_RING(chan, kelvin, NV10TCL_TX_OFFSET(unit), 8); + OUT_RELOCl(chan, nouveau_bo(nv20mt->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(chan, nouveau_bo(nv20mt->buffer),txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RING (chan, ps->wrap); + OUT_RING (chan, 0x40000000); /* enable */ + OUT_RING (chan, txs); + OUT_RING (chan, ps->filt | 0x2000 /* magic */); + OUT_RING (chan, (pt->width0 << 16) | pt->height0); + OUT_RING (chan, ps->bcol); #endif } @@ -99,6 +102,9 @@ nv20_fragtex_bind(struct nv20_context *nv20) { #if 0 struct nv20_fragment_program *fp = nv20->fragprog.active; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; unsigned samplers, unit; samplers = nv20->fp_samplers & ~fp->samplers; @@ -106,8 +112,8 @@ nv20_fragtex_bind(struct nv20_context *nv20) unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - BEGIN_RING(kelvin, NV10TCL_TX_ENABLE(unit), 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV10TCL_TX_ENABLE(unit), 1); + OUT_RING (chan, 0); } samplers = nv20->dirty_samplers & fp->samplers; diff --git a/src/gallium/drivers/nv20/nv20_prim_vbuf.c b/src/gallium/drivers/nv20/nv20_prim_vbuf.c index ddfcdb8057..2e145672da 100644 --- a/src/gallium/drivers/nv20/nv20_prim_vbuf.c +++ b/src/gallium/drivers/nv20/nv20_prim_vbuf.c @@ -81,12 +81,15 @@ nv20_vbuf_render(struct vbuf_render *render) void nv20_vtxbuf_bind( struct nv20_context* nv20 ) { #if 0 + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; int i; for(i = 0; i < NV20TCL_VTXBUF_ADDRESS__SIZE; i++) { - BEGIN_RING(kelvin, NV20TCL_VTXBUF_ADDRESS(i), 1); - OUT_RING(0/*nv20->vtxbuf*/); - BEGIN_RING(kelvin, NV20TCL_VTXFMT(i) ,1); - OUT_RING(0/*XXX*/); + BEGIN_RING(chan, kelvin, NV20TCL_VTXBUF_ADDRESS(i), 1); + OUT_RING(chan, 0/*nv20->vtxbuf*/); + BEGIN_RING(chan, kelvin, NV20TCL_VTXFMT(i) ,1); + OUT_RING(chan, 0/*XXX*/); } #endif } @@ -202,6 +205,9 @@ nv20__vtxhwformat(unsigned stride, unsigned fields, unsigned type) static unsigned nv20__emit_format(struct nv20_context *nv20, enum attrib_emit type, int hwattr) { + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; uint32_t hwfmt = 0; unsigned fields; @@ -231,8 +237,8 @@ nv20__emit_format(struct nv20_context *nv20, enum attrib_emit type, int hwattr) return 0; } - BEGIN_RING(kelvin, NV20TCL_VTXFMT(hwattr), 1); - OUT_RING(hwfmt); + BEGIN_RING(chan, kelvin, NV20TCL_VTXFMT(hwattr), 1); + OUT_RING(chan, hwfmt); return fields; } @@ -262,6 +268,9 @@ nv20__draw_mbuffer(struct nv20_vbuf_render *nv20_render, uint nr_indices) { struct nv20_context *nv20 = nv20_render->nv20; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; struct vertex_info *vinfo = &nv20->vertex_info; unsigned nr_fields; int max_push; @@ -270,29 +279,29 @@ nv20__draw_mbuffer(struct nv20_vbuf_render *nv20_render, nr_fields = nv20__emit_vertex_array_format(nv20); - BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1); - OUT_RING(nv20_render->hwprim); + BEGIN_RING(chan, kelvin, NV20TCL_VERTEX_BEGIN_END, 1); + OUT_RING(chan, nv20_render->hwprim); max_push = 1200 / nr_fields; while (nr_indices) { int i; int push = MIN2(nr_indices, max_push); - BEGIN_RING_NI(kelvin, NV20TCL_VERTEX_DATA, push * nr_fields); + BEGIN_RING_NI(chan, kelvin, NV20TCL_VERTEX_DATA, push * nr_fields); for (i = 0; i < push; i++) { /* XXX: fixme to handle other than floats? */ int f = nr_fields; float *attrv = (float*)&data[indices[i] * vsz]; while (f-- > 0) - OUT_RINGf(*attrv++); + OUT_RINGf(chan, *attrv++); } nr_indices -= push; indices += push; } - BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1); - OUT_RING(NV20TCL_VERTEX_BEGIN_END_STOP); + BEGIN_RING(chan, kelvin, NV20TCL_VERTEX_BEGIN_END, 1); + OUT_RING(chan, NV20TCL_VERTEX_BEGIN_END_STOP); } static void @@ -301,20 +310,23 @@ nv20__draw_pbuffer(struct nv20_vbuf_render *nv20_render, uint nr_indices) { struct nv20_context *nv20 = nv20_render->nv20; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; int push, i; NOUVEAU_ERR("nv20__draw_pbuffer: this path is broken.\n"); - BEGIN_RING(kelvin, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); - OUT_RELOCl(nv20_render->pbuffer, 0, + BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); + OUT_RELOCl(chan, nouveau_bo(nv20_render->pbuffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); - OUT_RING(nv20_render->hwprim); + BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING(chan, nv20_render->hwprim); if (nr_indices & 1) { - BEGIN_RING(kelvin, NV10TCL_VB_ELEMENT_U32, 1); - OUT_RING (indices[0]); + BEGIN_RING(chan, kelvin, NV10TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, indices[0]); indices++; nr_indices--; } @@ -322,16 +334,16 @@ nv20__draw_pbuffer(struct nv20_vbuf_render *nv20_render, // XXX too big/small ? check the size push = MIN2(nr_indices, 1200 * 2); - BEGIN_RING_NI(kelvin, NV10TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, kelvin, NV10TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((indices[i+1] << 16) | indices[i]); + OUT_RING(chan, (indices[i+1] << 16) | indices[i]); nr_indices -= push; indices += push; } - BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING (chan, 0); } static void diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c index a0973f1ebd..d091335063 100644 --- a/src/gallium/drivers/nv20/nv20_screen.c +++ b/src/gallium/drivers/nv20/nv20_screen.c @@ -176,7 +176,6 @@ nv20_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) NOUVEAU_ERR("Error creating 3D object: %d\n", ret); return FALSE; } - BIND_RING(chan, screen->kelvin, 7); /* 2D engine setup */ screen->eng2d = nv04_surface_2d_init(&screen->base); diff --git a/src/gallium/drivers/nv20/nv20_state_emit.c b/src/gallium/drivers/nv20/nv20_state_emit.c index 63cba1f412..6bbd1fdae9 100644 --- a/src/gallium/drivers/nv20/nv20_state_emit.c +++ b/src/gallium/drivers/nv20/nv20_state_emit.c @@ -5,27 +5,34 @@ static void nv20_state_emit_blend(struct nv20_context* nv20) { struct nv20_blend_state *b = nv20->blend; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; - BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1); - OUT_RING (b->d_enable); + BEGIN_RING(chan, kelvin, NV20TCL_DITHER_ENABLE, 1); + OUT_RING (chan, b->d_enable); - BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); - OUT_RING (b->b_enable); + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); + OUT_RING (chan, b->b_enable); - BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 2); - OUT_RING (b->b_srcfunc); - OUT_RING (b->b_dstfunc); + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_SRC, 2); + OUT_RING (chan, b->b_srcfunc); + OUT_RING (chan, b->b_dstfunc); - BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1); - OUT_RING (b->c_mask); + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_MASK, 1); + OUT_RING (chan, b->c_mask); } static void nv20_state_emit_blend_color(struct nv20_context* nv20) { struct pipe_blend_color *c = nv20->blend_color; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; - BEGIN_RING(kelvin, NV20TCL_BLEND_COLOR, 1); - OUT_RING ((float_to_ubyte(c->color[3]) << 24)| + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_COLOR, 1); + OUT_RING (chan, + (float_to_ubyte(c->color[3]) << 24)| (float_to_ubyte(c->color[0]) << 16)| (float_to_ubyte(c->color[1]) << 8) | (float_to_ubyte(c->color[2]) << 0)); @@ -34,63 +41,69 @@ static void nv20_state_emit_blend_color(struct nv20_context* nv20) static void nv20_state_emit_rast(struct nv20_context* nv20) { struct nv20_rasterizer_state *r = nv20->rast; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; - BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 2); - OUT_RING (r->shade_model); - OUT_RING (r->line_width); + BEGIN_RING(chan, kelvin, NV20TCL_SHADE_MODEL, 2); + OUT_RING (chan, r->shade_model); + OUT_RING (chan, r->line_width); - BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1); - OUT_RING (r->point_size); + BEGIN_RING(chan, kelvin, NV20TCL_POINT_SIZE, 1); + OUT_RING (chan, r->point_size); - BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (r->poly_mode_front); - OUT_RING (r->poly_mode_back); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (chan, r->poly_mode_front); + OUT_RING (chan, r->poly_mode_back); - BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2); - OUT_RING (r->cull_face); - OUT_RING (r->front_face); + BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE, 2); + OUT_RING (chan, r->cull_face); + OUT_RING (chan, r->front_face); - BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 2); - OUT_RING (r->line_smooth_en); - OUT_RING (r->poly_smooth_en); + BEGIN_RING(chan, kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 2); + OUT_RING (chan, r->line_smooth_en); + OUT_RING (chan, r->poly_smooth_en); - BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1); - OUT_RING (r->cull_face_en); + BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE_ENABLE, 1); + OUT_RING (chan, r->cull_face_en); } static void nv20_state_emit_dsa(struct nv20_context* nv20) { struct nv20_depth_stencil_alpha_state *d = nv20->dsa; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; - BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1); - OUT_RING (d->depth.func); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_FUNC, 1); + OUT_RING (chan, d->depth.func); - BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); - OUT_RING (d->depth.write_enable); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (chan, d->depth.write_enable); - BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); - OUT_RING (d->depth.test_enable); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (chan, d->depth.test_enable); - BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1); - OUT_RING (1); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_UNK17D8, 1); + OUT_RING (chan, 1); #if 0 - BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1); - OUT_RING (d->stencil.enable); - BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7); - OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7); + BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_ENABLE, 1); + OUT_RING (chan, d->stencil.enable); + BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_MASK, 7); + OUT_RINGp (chan, (uint32_t *)&(d->stencil.wmask), 7); #endif - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (d->alpha.enabled); + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (chan, d->alpha.enabled); - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 1); - OUT_RING (d->alpha.func); + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_FUNC, 1); + OUT_RING (chan, d->alpha.func); - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_REF, 1); - OUT_RING (d->alpha.ref); + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_REF, 1); + OUT_RING (chan, d->alpha.ref); } static void nv20_state_emit_viewport(struct nv20_context* nv20) @@ -101,9 +114,13 @@ static void nv20_state_emit_scissor(struct nv20_context* nv20) { /* NV20TCL_SCISSOR_* is probably a software method */ /* struct pipe_scissor_state *s = nv20->scissor; - BEGIN_RING(kelvin, NV20TCL_SCISSOR_HORIZ, 2); - OUT_RING (((s->maxx - s->minx) << 16) | s->minx); - OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/ + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; + + BEGIN_RING(chan, kelvin, NV20TCL_SCISSOR_HORIZ, 2); + OUT_RING (chan, ((s->maxx - s->minx) << 16) | s->minx); + OUT_RING (chan, ((s->maxy - s->miny) << 16) | s->miny);*/ } static void nv20_state_emit_framebuffer(struct nv20_context* nv20) @@ -113,6 +130,9 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20) uint32_t rt_format, w, h; int colour_format = 0, zeta_format = 0; struct nv20_miptree *nv20mt = 0; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; w = fb->cbufs[0]->width; h = fb->cbufs[0]->height; @@ -150,11 +170,11 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20) } if (zeta) { - BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1); - OUT_RING (rt->pitch | (zeta->pitch << 16)); + BEGIN_RING(chan, kelvin, NV20TCL_RT_PITCH, 1); + OUT_RING (chan, rt->pitch | (zeta->pitch << 16)); } else { - BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1); - OUT_RING (rt->pitch | (rt->pitch << 16)); + BEGIN_RING(chan, kelvin, NV20TCL_RT_PITCH, 1); + OUT_RING (chan, rt->pitch | (rt->pitch << 16)); } nv20mt = (struct nv20_miptree *)rt->base.texture; @@ -166,13 +186,13 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20) nv20->zeta = nv20mt->buffer; } - BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 3); - OUT_RING ((w << 16) | 0); - OUT_RING ((h << 16) | 0); /*NV20TCL_RT_VERT */ - OUT_RING (rt_format); /* NV20TCL_RT_FORMAT */ - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 2); - OUT_RING (((w - 1) << 16) | 0); - OUT_RING (((h - 1) << 16) | 0); + BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 3); + OUT_RING (chan, (w << 16) | 0); + OUT_RING (chan, (h << 16) | 0); /*NV20TCL_RT_VERT */ + OUT_RING (chan, rt_format); /* NV20TCL_RT_FORMAT */ + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 2); + OUT_RING (chan, ((w - 1) << 16) | 0); + OUT_RING (chan, ((h - 1) << 16) | 0); } static void nv20_vertex_layout(struct nv20_context *nv20) @@ -293,6 +313,10 @@ static void nv20_vertex_layout(struct nv20_context *nv20) void nv20_emit_hw_state(struct nv20_context *nv20) { + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; + struct nouveau_bo *rt_bo; int i; if (nv20->dirty & NV20_NEW_VERTPROG) { @@ -361,36 +385,39 @@ nv20_emit_hw_state(struct nv20_context *nv20) */ /* Render target */ - BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 1); - OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1); - OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + rt_bo = nouveau_bo(nv20->rt[0]); + BEGIN_RING(chan, kelvin, NV20TCL_DMA_COLOR, 1); + OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_OFFSET, 1); + OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); if (nv20->zeta) { - BEGIN_RING(kelvin, NV20TCL_DMA_ZETA, 1); - OUT_RELOCo(nv20->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(kelvin, NV20TCL_ZETA_OFFSET, 1); - OUT_RELOCl(nv20->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + struct nouveau_bo *zeta_bo = nouveau_bo(nv20->zeta); + BEGIN_RING(chan, kelvin, NV20TCL_DMA_ZETA, 1); + OUT_RELOCo(chan, zeta_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, kelvin, NV20TCL_ZETA_OFFSET, 1); + OUT_RELOCl(chan, zeta_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); /* XXX for when we allocate LMA on nv17 */ -/* BEGIN_RING(kelvin, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); - OUT_RELOCl(nv20->zeta + lma_offset);*/ +/* BEGIN_RING(chan, kelvin, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); + OUT_RELOCl(chan, nouveau_bo(nv20->zeta + lma_offset));*/ } /* Vertex buffer */ - BEGIN_RING(kelvin, NV20TCL_DMA_VTXBUF0, 1); - OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1); - OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, kelvin, NV20TCL_DMA_VTXBUF0, 1); + OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_OFFSET, 1); + OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); /* Texture images */ for (i = 0; i < 2; i++) { if (!(nv20->fp_samplers & (1 << i))) continue; - BEGIN_RING(kelvin, NV20TCL_TX_OFFSET(i), 1); - OUT_RELOCl(nv20->tex[i].buffer, 0, NOUVEAU_BO_VRAM | + struct nouveau_bo *bo = nouveau_bo(nv20->tex[i].buffer); + BEGIN_RING(chan, kelvin, NV20TCL_TX_OFFSET(i), 1); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_RING(kelvin, NV20TCL_TX_FORMAT(i), 1); - OUT_RELOCd(nv20->tex[i].buffer, nv20->tex[i].format, + BEGIN_RING(chan, kelvin, NV20TCL_TX_FORMAT(i), 1); + OUT_RELOCd(chan, bo, nv20->tex[i].format, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_OR, NV20TCL_TX_FORMAT_DMA0, NV20TCL_TX_FORMAT_DMA1); diff --git a/src/gallium/drivers/nv20/nv20_vbo.c b/src/gallium/drivers/nv20/nv20_vbo.c index 4bf461eba9..52991a0d85 100644 --- a/src/gallium/drivers/nv20/nv20_vbo.c +++ b/src/gallium/drivers/nv20/nv20_vbo.c @@ -9,7 +9,7 @@ #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_pushbuf.h" -boolean nv20_draw_elements( struct pipe_context *pipe, +void nv20_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count) @@ -67,13 +67,12 @@ boolean nv20_draw_elements( struct pipe_context *pipe, } draw_flush(nv20->draw); - return TRUE; } -boolean nv20_draw_arrays( struct pipe_context *pipe, +void nv20_draw_arrays( struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) { - return nv20_draw_elements(pipe, NULL, 0, prim, start, count); + nv20_draw_elements(pipe, NULL, 0, prim, start, count); } diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index 38b39159f1..54572e9ab3 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -10,15 +10,20 @@ nv30_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - BEGIN_RING(rankine, 0x1fd8, 1); - OUT_RING (2); - BEGIN_RING(rankine, 0x1fd8, 1); - OUT_RING (1); + BEGIN_RING(chan, rankine, 0x1fd8, 1); + OUT_RING (chan, 2); + BEGIN_RING(chan, rankine, 0x1fd8, 1); + OUT_RING (chan, 1); } - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 864ddaeb59..e59449287b 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -14,10 +14,6 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" - -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv30_screen *ctx = nv30->screen -#include "nouveau/nouveau_push.h" #include "nouveau/nouveau_stateobj.h" #include "nv30_state.h" @@ -198,9 +194,9 @@ extern struct nv30_state_entry nv30_state_fragtex; extern struct nv30_state_entry nv30_state_vbo; /* nv30_vbo.c */ -extern boolean nv30_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv30_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv30_draw_elements(struct pipe_context *pipe, +extern void nv30_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index d1ff18e2df..2d565cb631 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -837,7 +837,7 @@ nv30_fragprog_validate(struct nv30_context *nv30) fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4); nv30_fragprog_upload(nv30, fp); - so = so_new(8, 1); + so = so_new(4, 4, 1); so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1); so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index b3293ee700..9893567891 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -106,7 +106,7 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) txs = tf->swizzle; - so = so_new(16, 2); + so = so_new(1, 8, 2); so_method(so, nv30->screen->rankine, NV34TCL_TX_OFFSET(unit), 8); so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR, @@ -135,7 +135,7 @@ nv30_fragtex_validate(struct nv30_context *nv30) unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, nv30->screen->rankine, NV34TCL_TX_ENABLE(unit), 1); so_data (so, 0); so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]); diff --git a/src/gallium/drivers/nv30/nv30_query.c b/src/gallium/drivers/nv30/nv30_query.c index 1d1c8a484e..e27e9ccbf6 100644 --- a/src/gallium/drivers/nv30/nv30_query.c +++ b/src/gallium/drivers/nv30/nv30_query.c @@ -41,6 +41,9 @@ nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq) { struct nv30_context *nv30 = nv30_context(pipe); struct nv30_query *q = nv30_query(pq); + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); @@ -57,10 +60,10 @@ nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq) assert(0); nouveau_notifier_reset(nv30->screen->query, q->object->start); - BEGIN_RING(rankine, NV34TCL_QUERY_RESET, 1); - OUT_RING (1); - BEGIN_RING(rankine, NV34TCL_QUERY_UNK17CC, 1); - OUT_RING (1); + BEGIN_RING(chan, rankine, NV34TCL_QUERY_RESET, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, rankine, NV34TCL_QUERY_UNK17CC, 1); + OUT_RING (chan, 1); q->ready = FALSE; } @@ -69,12 +72,15 @@ static void nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq) { struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; struct nv30_query *q = nv30_query(pq); - BEGIN_RING(rankine, NV34TCL_QUERY_GET, 1); - OUT_RING ((0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) | + BEGIN_RING(chan, rankine, NV34TCL_QUERY_GET, 1); + OUT_RING (chan, (0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) | ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT)); - FIRE_RING(NULL); + FIRE_RING(chan); } static boolean diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 760467f736..9ed48178dc 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -233,7 +233,6 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) NOUVEAU_ERR("Error creating 3D object: %d\n", ret); return FALSE; } - BIND_RING(chan, screen->rankine, 7); /* 2D engine setup */ screen->eng2d = nv04_surface_2d_init(&screen->base); @@ -270,7 +269,7 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) } /* Static rankine initialisation */ - so = so_new(128, 0); + so = so_new(36, 60, 0); so_method(so, screen->rankine, NV34TCL_DMA_NOTIFY, 1); so_data (so, screen->sync->handle); so_method(so, screen->rankine, NV34TCL_DMA_TEXTURE0, 2); diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index e6321b480f..a80dfb0488 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -14,7 +14,7 @@ nv30_blend_state_create(struct pipe_context *pipe, struct nv30_context *nv30 = nv30_context(pipe); struct nouveau_grobj *rankine = nv30->screen->rankine; struct nv30_blend_state *bso = CALLOC(1, sizeof(*bso)); - struct nouveau_stateobj *so = so_new(16, 0); + struct nouveau_stateobj *so = so_new(5, 8, 0); if (cso->blend_enable) { so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 3); @@ -300,7 +300,7 @@ nv30_rasterizer_state_create(struct pipe_context *pipe, { struct nv30_context *nv30 = nv30_context(pipe); struct nv30_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); - struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_stateobj *so = so_new(9, 19, 0); struct nouveau_grobj *rankine = nv30->screen->rankine; /*XXX: ignored: @@ -435,7 +435,7 @@ nv30_depth_stencil_alpha_state_create(struct pipe_context *pipe, { struct nv30_context *nv30 = nv30_context(pipe); struct nv30_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); - struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_stateobj *so = so_new(5, 21, 0); struct nouveau_grobj *rankine = nv30->screen->rankine; so_method(so, rankine, NV34TCL_DEPTH_FUNC, 3); diff --git a/src/gallium/drivers/nv30/nv30_state_blend.c b/src/gallium/drivers/nv30/nv30_state_blend.c index 64cf9ae93a..c36d58c040 100644 --- a/src/gallium/drivers/nv30/nv30_state_blend.c +++ b/src/gallium/drivers/nv30/nv30_state_blend.c @@ -18,7 +18,7 @@ struct nv30_state_entry nv30_state_blend = { static boolean nv30_state_blend_colour_validate(struct nv30_context *nv30) { - struct nouveau_stateobj *so = so_new(2, 0); + struct nouveau_stateobj *so = so_new(1, 1, 0); struct pipe_blend_color *bcol = &nv30->blend_colour; so_method(so, nv30->screen->rankine, NV34TCL_BLEND_COLOR, 1); diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c index 6f6d1740d6..2ed2ea55e8 100644 --- a/src/gallium/drivers/nv30/nv30_state_fb.c +++ b/src/gallium/drivers/nv30/nv30_state_fb.c @@ -10,7 +10,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) struct nv04_surface *rt[2], *zeta = NULL; uint32_t rt_enable = 0, rt_format = 0; int i, colour_format = 0, zeta_format = 0, depth_only = 0; - struct nouveau_stateobj *so = so_new(64, 10); + struct nouveau_stateobj *so = so_new(12, 18, 10); unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; unsigned w = fb->width; unsigned h = fb->height; diff --git a/src/gallium/drivers/nv30/nv30_state_scissor.c b/src/gallium/drivers/nv30/nv30_state_scissor.c index 3ac7a8471e..ba61a9e24a 100644 --- a/src/gallium/drivers/nv30/nv30_state_scissor.c +++ b/src/gallium/drivers/nv30/nv30_state_scissor.c @@ -12,7 +12,7 @@ nv30_state_scissor_validate(struct nv30_context *nv30) return FALSE; nv30->state.scissor_enabled = rast->scissor; - so = so_new(3, 0); + so = so_new(1, 2, 0); so_method(so, nv30->screen->rankine, NV34TCL_SCISSOR_HORIZ, 2); if (nv30->state.scissor_enabled) { so_data (so, ((s->maxx - s->minx) << 16) | s->minx); diff --git a/src/gallium/drivers/nv30/nv30_state_stipple.c b/src/gallium/drivers/nv30/nv30_state_stipple.c index d0c791ac08..ed520a4f43 100644 --- a/src/gallium/drivers/nv30/nv30_state_stipple.c +++ b/src/gallium/drivers/nv30/nv30_state_stipple.c @@ -14,14 +14,14 @@ nv30_state_stipple_validate(struct nv30_context *nv30) if (rast->poly_stipple_enable) { unsigned i; - so = so_new(35, 0); + so = so_new(2, 33, 0); so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); so_data (so, 1); so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32); for (i = 0; i < 32; i++) so_data(so, nv30->stipple[i]); } else { - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); so_data (so, 0); } diff --git a/src/gallium/drivers/nv30/nv30_state_viewport.c b/src/gallium/drivers/nv30/nv30_state_viewport.c index c3eb413dac..2d7781292b 100644 --- a/src/gallium/drivers/nv30/nv30_state_viewport.c +++ b/src/gallium/drivers/nv30/nv30_state_viewport.c @@ -19,7 +19,7 @@ nv30_state_viewport_validate(struct nv30_context *nv30) return FALSE; nv30->state.viewport_bypass = bypass; - so = so_new(11, 0); + so = so_new(3, 10, 0); if (!bypass) { so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_TRANSLATE_X, 8); diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c index e32b8141af..1c5db03ea2 100644 --- a/src/gallium/drivers/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nv30/nv30_vbo.c @@ -163,19 +163,21 @@ nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so, return TRUE; } -boolean +void nv30_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv30_context *nv30 = nv30_context(pipe); - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; unsigned restart = 0; nv30_vbo_set_idxbuf(nv30, NULL, 0); if (FORCE_SWTNL || !nv30_state_validate(nv30)) { /*return nv30_draw_elements_swtnl(pipe, NULL, 0, mode, start, count);*/ - return FALSE; + return; } while (count) { @@ -186,17 +188,17 @@ nv30_draw_arrays(struct pipe_context *pipe, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, mode, start, count, &restart); if (!vc) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(rankine, NV34TCL_VB_VERTEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); + BEGIN_RING(chan, rankine, NV34TCL_VB_VERTEX_BATCH, 1); + OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -206,15 +208,15 @@ nv30_draw_arrays(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(rankine, NV34TCL_VB_VERTEX_BATCH, push); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_VERTEX_BATCH, push); while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); + OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); count -= vc; start = restart; @@ -228,7 +230,9 @@ static INLINE void nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; while (count) { uint8_t *elts = (uint8_t *)ib + start; @@ -239,17 +243,17 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); + BEGIN_RING(chan, rankine, NV34TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, elts[0]); elts++; vc--; } @@ -258,16 +262,16 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); + OUT_RING(chan, (elts[i+1] << 16) | elts[i]); vc -= push; elts += push; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } @@ -277,7 +281,9 @@ static INLINE void nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; while (count) { uint16_t *elts = (uint16_t *)ib + start; @@ -288,17 +294,17 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); + BEGIN_RING(chan, rankine, NV34TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, elts[0]); elts++; vc--; } @@ -307,16 +313,16 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); + OUT_RING(chan, (elts[i+1] << 16) | elts[i]); vc -= push; elts += push; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } @@ -326,7 +332,9 @@ static INLINE void nv30_draw_elements_u32(struct nv30_context *nv30, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; while (count) { uint32_t *elts = (uint32_t *)ib + start; @@ -337,32 +345,32 @@ nv30_draw_elements_u32(struct nv30_context *nv30, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); while (vc) { push = MIN2(vc, 2047); - BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U32, push); - OUT_RINGp (elts, push); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U32, push); + OUT_RINGp (chan, elts, push); vc -= push; elts += push; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } } -static boolean +static void nv30_draw_elements_inline(struct pipe_context *pipe, struct pipe_buffer *ib, unsigned ib_size, unsigned mode, unsigned start, unsigned count) @@ -393,15 +401,16 @@ nv30_draw_elements_inline(struct pipe_context *pipe, } pipe_buffer_unmap(pscreen, ib); - return TRUE; } -static boolean +static void nv30_draw_elements_vbo(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv30_context *nv30 = nv30_context(pipe); - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; unsigned restart = 0; while (count) { @@ -412,17 +421,17 @@ nv30_draw_elements_vbo(struct pipe_context *pipe, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, mode, start, count, &restart); if (!vc) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(rankine, NV34TCL_VB_INDEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); + BEGIN_RING(chan, rankine, NV34TCL_VB_INDEX_BATCH, 1); + OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -432,24 +441,22 @@ nv30_draw_elements_vbo(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(rankine, NV34TCL_VB_INDEX_BATCH, push); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_INDEX_BATCH, push); while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); + OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); count -= vc; start = restart; } - - return TRUE; } -boolean +void nv30_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) @@ -461,7 +468,7 @@ nv30_draw_elements(struct pipe_context *pipe, if (FORCE_SWTNL || !nv30_state_validate(nv30)) { /*return nv30_draw_elements_swtnl(pipe, NULL, 0, mode, start, count);*/ - return FALSE; + return; } if (idxbuf) { @@ -472,7 +479,6 @@ nv30_draw_elements(struct pipe_context *pipe, } pipe->flush(pipe, 0, NULL); - return TRUE; } static boolean @@ -485,9 +491,9 @@ nv30_vbo_validate(struct nv30_context *nv30) unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; int hw; - vtxbuf = so_new(20, 18); + vtxbuf = so_new(3, 17, 18); so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt_nr); - vtxfmt = so_new(17, 0); + vtxfmt = so_new(1, 16, 0); so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt_nr); for (hw = 0; hw < nv30->vtxelt_nr; hw++) { @@ -500,7 +506,7 @@ nv30_vbo_validate(struct nv30_context *nv30) if (!vb->stride) { if (!sattr) - sattr = so_new(16 * 5, 0); + sattr = so_new(16, 16 * 4, 0); if (nv30_vbo_static_attrib(nv30, sattr, hw, ve, vb)) { so_data(vtxbuf, 0); diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index 5d60984622..e77a5be3f2 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -650,7 +650,9 @@ static boolean nv30_vertprog_validate(struct nv30_context *nv30) { struct pipe_screen *pscreen = nv30->pipe.screen; - struct nouveau_grobj *rankine = nv30->screen->rankine; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; struct nv30_vertex_program *vp; struct pipe_buffer *constbuf; boolean upload_code = FALSE, upload_data = FALSE; @@ -684,7 +686,7 @@ nv30_vertprog_validate(struct nv30_context *nv30) assert(0); } - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1); so_data (so, vp->exec->start); so_ref(so, &vp->so); @@ -770,9 +772,9 @@ nv30_vertprog_validate(struct nv30_context *nv30) 4 * sizeof(float)); } - BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5); - OUT_RING (i + vp->data->start); - OUT_RINGp ((uint32_t *)vpd->value, 4); + BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5); + OUT_RING (chan, i + vp->data->start); + OUT_RINGp (chan, (uint32_t *)vpd->value, 4); } if (constbuf) @@ -788,11 +790,11 @@ nv30_vertprog_validate(struct nv30_context *nv30) vp->insns[i].data[2], vp->insns[i].data[3]); } #endif - BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1); - OUT_RING (vp->exec->start); + BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1); + OUT_RING (chan, vp->exec->start); for (i = 0; i < vp->nr_insns; i++) { - BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4); - OUT_RINGp (vp->insns[i].data, 4); + BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_INST(0), 4); + OUT_RINGp (chan, vp->insns[i].data, 4); } } diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index d56c7a6b49..f79ae4db84 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -10,15 +10,20 @@ nv40_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - BEGIN_RING(curie, 0x1fd8, 1); - OUT_RING (2); - BEGIN_RING(curie, 0x1fd8, 1); - OUT_RING (1); + BEGIN_RING(chan, curie, 0x1fd8, 1); + OUT_RING (chan, 2); + BEGIN_RING(chan, curie, 0x1fd8, 1); + OUT_RING (chan, 1); } - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 83fcf1785d..e219bb537a 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -14,10 +14,6 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" - -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv40_screen *ctx = nv40->screen -#include "nouveau/nouveau_push.h" #include "nouveau/nouveau_stateobj.h" #include "nv40_state.h" @@ -183,7 +179,7 @@ extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen); /* nv40_draw.c */ extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40); -extern boolean nv40_draw_elements_swtnl(struct pipe_context *pipe, +extern void nv40_draw_elements_swtnl(struct pipe_context *pipe, struct pipe_buffer *idxbuf, unsigned ib_size, unsigned mode, unsigned start, unsigned count); @@ -219,9 +215,9 @@ extern struct nv40_state_entry nv40_state_vbo; extern struct nv40_state_entry nv40_state_vtxfmt; /* nv40_vbo.c */ -extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv40_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv40_draw_elements(struct pipe_context *pipe, +extern void nv40_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index 3875bc3545..d826f8c2f5 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -31,6 +31,9 @@ nv40_render_stage(struct draw_stage *stage) static INLINE void nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v) { + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; unsigned i; for (i = 0; i < nv40->swtnl.nr_attribs; i++) { @@ -41,30 +44,30 @@ nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v) case EMIT_OMIT: break; case EMIT_1F: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_1F(hw), 1); - OUT_RING (fui(v->data[idx][0])); + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_1F(hw), 1); + OUT_RING (chan, fui(v->data[idx][0])); break; case EMIT_2F: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_2F_X(hw), 2); - OUT_RING (fui(v->data[idx][0])); - OUT_RING (fui(v->data[idx][1])); + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_2F_X(hw), 2); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); break; case EMIT_3F: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_3F_X(hw), 3); - OUT_RING (fui(v->data[idx][0])); - OUT_RING (fui(v->data[idx][1])); - OUT_RING (fui(v->data[idx][2])); + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_3F_X(hw), 3); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); + OUT_RING (chan, fui(v->data[idx][2])); break; case EMIT_4F: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(hw), 4); - OUT_RING (fui(v->data[idx][0])); - OUT_RING (fui(v->data[idx][1])); - OUT_RING (fui(v->data[idx][2])); - OUT_RING (fui(v->data[idx][3])); + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_4F_X(hw), 4); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); + OUT_RING (chan, fui(v->data[idx][2])); + OUT_RING (chan, fui(v->data[idx][3])); break; case EMIT_4UB: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_4UB(hw), 1); - OUT_RING (pack_ub4(float_to_ubyte(v->data[idx][0]), + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_4UB(hw), 1); + OUT_RING (chan, pack_ub4(float_to_ubyte(v->data[idx][0]), float_to_ubyte(v->data[idx][1]), float_to_ubyte(v->data[idx][2]), float_to_ubyte(v->data[idx][3]))); @@ -82,7 +85,11 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, { struct nv40_render_stage *rs = nv40_render_stage(stage); struct nv40_context *nv40 = rs->nv40; - struct nouveau_pushbuf *pb = nv40->screen->base.channel->pushbuf; + + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_pushbuf *pb = chan->pushbuf; + struct nouveau_grobj *curie = screen->curie; unsigned i; /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */ @@ -91,19 +98,19 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, NOUVEAU_ERR("AIII, missed flush\n"); assert(0); } - FIRE_RING(NULL); + FIRE_RING(chan); nv40_state_emit(nv40); } /* Switch primitive modes if necessary */ if (rs->prim != mode) { if (rs->prim != NV40TCL_BEGIN_END_STOP) { - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (NV40TCL_BEGIN_END_STOP); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, NV40TCL_BEGIN_END_STOP); } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (mode); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, mode); rs->prim = mode; } @@ -115,8 +122,8 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, * off the primitive now. */ if (pb->remaining < ((count * 20) + 6)) { - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (NV40TCL_BEGIN_END_STOP); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, NV40TCL_BEGIN_END_STOP); rs->prim = NV40TCL_BEGIN_END_STOP; } } @@ -144,10 +151,13 @@ nv40_render_flush(struct draw_stage *draw, unsigned flags) { struct nv40_render_stage *rs = nv40_render_stage(draw); struct nv40_context *nv40 = rs->nv40; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; if (rs->prim != NV40TCL_BEGIN_END_STOP) { - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (NV40TCL_BEGIN_END_STOP); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, NV40TCL_BEGIN_END_STOP); rs->prim = NV40TCL_BEGIN_END_STOP; } } @@ -226,7 +236,7 @@ nv40_draw_render_stage(struct nv40_context *nv40) return &render->stage; } -boolean +void nv40_draw_elements_swtnl(struct pipe_context *pipe, struct pipe_buffer *idxbuf, unsigned idxbuf_size, unsigned mode, unsigned start, unsigned count) @@ -237,7 +247,7 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, void *map; if (!nv40_state_validate_swtnl(nv40)) - return FALSE; + return; nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF); nv40_state_emit(nv40); @@ -278,8 +288,6 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, draw_flush(nv40->draw); pipe->flush(pipe, 0, NULL); - - return TRUE; } static INLINE void diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index bb9c85cc43..1237066c39 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -919,7 +919,7 @@ nv40_fragprog_validate(struct nv40_context *nv40) fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4); nv40_fragprog_upload(nv40, fp); - so = so_new(4, 1); + so = so_new(2, 2, 1); so_method(so, nv40->screen->curie, NV40TCL_FP_ADDRESS, 1); so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index 44abc84596..aad9198210 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -108,7 +108,7 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit) txs = tf->swizzle; - so = so_new(16, 2); + so = so_new(2, 9, 2); so_method(so, nv40->screen->curie, NV40TCL_TEX_OFFSET(unit), 8); so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR, @@ -139,7 +139,7 @@ nv40_fragtex_validate(struct nv40_context *nv40) unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, nv40->screen->curie, NV40TCL_TEX_ENABLE(unit), 1); so_data (so, 0); so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c index 7874aedd42..8ed4a67dd0 100644 --- a/src/gallium/drivers/nv40/nv40_query.c +++ b/src/gallium/drivers/nv40/nv40_query.c @@ -41,6 +41,9 @@ nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq) { struct nv40_context *nv40 = nv40_context(pipe); struct nv40_query *q = nv40_query(pq); + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); @@ -57,10 +60,10 @@ nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq) assert(0); nouveau_notifier_reset(nv40->screen->query, q->object->start); - BEGIN_RING(curie, NV40TCL_QUERY_RESET, 1); - OUT_RING (1); - BEGIN_RING(curie, NV40TCL_QUERY_UNK17CC, 1); - OUT_RING (1); + BEGIN_RING(chan, curie, NV40TCL_QUERY_RESET, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, curie, NV40TCL_QUERY_UNK17CC, 1); + OUT_RING (chan, 1); q->ready = FALSE; } @@ -70,11 +73,14 @@ nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq) { struct nv40_context *nv40 = nv40_context(pipe); struct nv40_query *q = nv40_query(pq); + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; - BEGIN_RING(curie, NV40TCL_QUERY_GET, 1); - OUT_RING ((0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) | + BEGIN_RING(chan, curie, NV40TCL_QUERY_GET, 1); + OUT_RING (chan, (0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) | ((q->object->start * 32) << NV40TCL_QUERY_GET_OFFSET_SHIFT)); - FIRE_RING(NULL); + FIRE_RING(chan); } static boolean diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index d01e712805..9e55e5a089 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -215,7 +215,6 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) NOUVEAU_ERR("Error creating 3D object: %d\n", ret); return FALSE; } - BIND_RING(chan, screen->curie, 7); /* 2D engine setup */ screen->eng2d = nv04_surface_2d_init(&screen->base); @@ -252,7 +251,7 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) } /* Static curie initialisation */ - so = so_new(128, 0); + so = so_new(16, 25, 0); so_method(so, screen->curie, NV40TCL_DMA_NOTIFY, 1); so_data (so, screen->sync->handle); so_method(so, screen->curie, NV40TCL_DMA_TEXTURE0, 2); diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index ed55d29aff..ed0ca9e02c 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -16,7 +16,7 @@ nv40_blend_state_create(struct pipe_context *pipe, struct nv40_context *nv40 = nv40_context(pipe); struct nouveau_grobj *curie = nv40->screen->curie; struct nv40_blend_state *bso = CALLOC(1, sizeof(*bso)); - struct nouveau_stateobj *so = so_new(16, 0); + struct nouveau_stateobj *so = so_new(5, 8, 0); if (cso->blend_enable) { so_method(so, curie, NV40TCL_BLEND_ENABLE, 3); @@ -310,7 +310,7 @@ nv40_rasterizer_state_create(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); struct nv40_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); - struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_stateobj *so = so_new(8, 18, 0); struct nouveau_grobj *curie = nv40->screen->curie; /*XXX: ignored: @@ -445,7 +445,7 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); - struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_stateobj *so = so_new(4, 21, 0); struct nouveau_grobj *curie = nv40->screen->curie; so_method(so, curie, NV40TCL_DEPTH_FUNC, 3); diff --git a/src/gallium/drivers/nv40/nv40_state_blend.c b/src/gallium/drivers/nv40/nv40_state_blend.c index 8cd05ce66e..3ff00a37f6 100644 --- a/src/gallium/drivers/nv40/nv40_state_blend.c +++ b/src/gallium/drivers/nv40/nv40_state_blend.c @@ -18,7 +18,7 @@ struct nv40_state_entry nv40_state_blend = { static boolean nv40_state_blend_colour_validate(struct nv40_context *nv40) { - struct nouveau_stateobj *so = so_new(2, 0); + struct nouveau_stateobj *so = so_new(1, 1, 0); struct pipe_blend_color *bcol = &nv40->blend_colour; so_method(so, nv40->screen->curie, NV40TCL_BLEND_COLOR, 1); diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index 789ed16126..13fe854915 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -54,9 +54,10 @@ nv40_state_do_validate(struct nv40_context *nv40, void nv40_state_emit(struct nv40_context *nv40) { - struct nouveau_channel *chan = nv40->screen->base.channel; struct nv40_state *state = &nv40->state; struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; unsigned i; uint64_t states; @@ -80,10 +81,10 @@ nv40_state_emit(struct nv40_context *nv40) if (state->dirty & ((1ULL << NV40_STATE_FRAGPROG) | (1ULL << NV40_STATE_FRAGTEX0))) { - BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (2); - BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (1); + BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (chan, 2); + BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (chan, 1); } state->dirty = 0; diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c index 1c7a7cd64f..a58fe9ddb1 100644 --- a/src/gallium/drivers/nv40/nv40_state_fb.c +++ b/src/gallium/drivers/nv40/nv40_state_fb.c @@ -19,7 +19,7 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) struct nv04_surface *rt[4], *zeta; uint32_t rt_enable, rt_format; int i, colour_format = 0, zeta_format = 0; - struct nouveau_stateobj *so = so_new(64, 10); + struct nouveau_stateobj *so = so_new(18, 24, 10); unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; unsigned w = fb->width; unsigned h = fb->height; diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c index cf58d33906..753a505e93 100644 --- a/src/gallium/drivers/nv40/nv40_state_scissor.c +++ b/src/gallium/drivers/nv40/nv40_state_scissor.c @@ -12,7 +12,7 @@ nv40_state_scissor_validate(struct nv40_context *nv40) return FALSE; nv40->state.scissor_enabled = rast->scissor; - so = so_new(3, 0); + so = so_new(1, 2, 0); so_method(so, nv40->screen->curie, NV40TCL_SCISSOR_HORIZ, 2); if (nv40->state.scissor_enabled) { so_data (so, ((s->maxx - s->minx) << 16) | s->minx); diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c index b51024ad9b..2b371ebfec 100644 --- a/src/gallium/drivers/nv40/nv40_state_stipple.c +++ b/src/gallium/drivers/nv40/nv40_state_stipple.c @@ -14,14 +14,14 @@ nv40_state_stipple_validate(struct nv40_context *nv40) if (rast->poly_stipple_enable) { unsigned i; - so = so_new(35, 0); + so = so_new(2, 33, 0); so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); so_data (so, 1); so_method(so, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32); for (i = 0; i < 32; i++) so_data(so, nv40->stipple[i]); } else { - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); so_data (so, 0); } diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c index 665d2d5fca..9919ba1d0b 100644 --- a/src/gallium/drivers/nv40/nv40_state_viewport.c +++ b/src/gallium/drivers/nv40/nv40_state_viewport.c @@ -19,7 +19,7 @@ nv40_state_viewport_validate(struct nv40_context *nv40) return FALSE; nv40->state.viewport_bypass = bypass; - so = so_new(11, 0); + so = so_new(2, 9, 0); if (!bypass) { so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_TRANSLATE_X, 8); diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index af3fcf6a34..a777898f68 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -164,18 +164,21 @@ nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so, return TRUE; } -boolean +void nv40_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; unsigned restart; nv40_vbo_set_idxbuf(nv40, NULL, 0); if (FORCE_SWTNL || !nv40_state_validate(nv40)) { - return nv40_draw_elements_swtnl(pipe, NULL, 0, - mode, start, count); + nv40_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count); + return; } while (count) { @@ -186,17 +189,17 @@ nv40_draw_arrays(struct pipe_context *pipe, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, mode, start, count, &restart); if (!vc) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(curie, NV40TCL_VB_VERTEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); + BEGIN_RING(chan, curie, NV40TCL_VB_VERTEX_BATCH, 1); + OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -206,29 +209,30 @@ nv40_draw_arrays(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, push); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_VERTEX_BATCH, push); while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); + OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); count -= vc; start = restart; } pipe->flush(pipe, 0, NULL); - return TRUE; } static INLINE void nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; while (count) { uint8_t *elts = (uint8_t *)ib + start; @@ -239,17 +243,17 @@ nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); + BEGIN_RING(chan, curie, NV40TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, elts[0]); elts++; vc--; } @@ -258,16 +262,16 @@ nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); + OUT_RING(chan, (elts[i+1] << 16) | elts[i]); vc -= push; elts += push; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } @@ -277,7 +281,9 @@ static INLINE void nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; while (count) { uint16_t *elts = (uint16_t *)ib + start; @@ -288,17 +294,17 @@ nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); + BEGIN_RING(chan, curie, NV40TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, elts[0]); elts++; vc--; } @@ -307,16 +313,16 @@ nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); + OUT_RING(chan, (elts[i+1] << 16) | elts[i]); vc -= push; elts += push; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } @@ -326,7 +332,9 @@ static INLINE void nv40_draw_elements_u32(struct nv40_context *nv40, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; while (count) { uint32_t *elts = (uint32_t *)ib + start; @@ -337,32 +345,32 @@ nv40_draw_elements_u32(struct nv40_context *nv40, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); while (vc) { push = MIN2(vc, 2047); - BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U32, push); - OUT_RINGp (elts, push); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U32, push); + OUT_RINGp (chan, elts, push); vc -= push; elts += push; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } } -static boolean +static void nv40_draw_elements_inline(struct pipe_context *pipe, struct pipe_buffer *ib, unsigned ib_size, unsigned mode, unsigned start, unsigned count) @@ -393,15 +401,16 @@ nv40_draw_elements_inline(struct pipe_context *pipe, } pipe_buffer_unmap(pscreen, ib); - return TRUE; } -static boolean +static void nv40_draw_elements_vbo(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; unsigned restart; while (count) { @@ -412,17 +421,17 @@ nv40_draw_elements_vbo(struct pipe_context *pipe, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, mode, start, count, &restart); if (!vc) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(curie, NV40TCL_VB_INDEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); + BEGIN_RING(chan, curie, NV40TCL_VB_INDEX_BATCH, 1); + OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -432,24 +441,22 @@ nv40_draw_elements_vbo(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(curie, NV40TCL_VB_INDEX_BATCH, push); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_INDEX_BATCH, push); while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); + OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); count -= vc; start = restart; } - - return TRUE; } -boolean +void nv40_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) @@ -459,8 +466,9 @@ nv40_draw_elements(struct pipe_context *pipe, idxbuf = nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize); if (FORCE_SWTNL || !nv40_state_validate(nv40)) { - return nv40_draw_elements_swtnl(pipe, NULL, 0, - mode, start, count); + nv40_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count); + return; } if (idxbuf) { @@ -471,7 +479,6 @@ nv40_draw_elements(struct pipe_context *pipe, } pipe->flush(pipe, 0, NULL); - return TRUE; } static boolean @@ -484,9 +491,9 @@ nv40_vbo_validate(struct nv40_context *nv40) unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; int hw; - vtxbuf = so_new(20, 18); + vtxbuf = so_new(3, 17, 18); so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr); - vtxfmt = so_new(17, 0); + vtxfmt = so_new(1, 16, 0); so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt_nr); for (hw = 0; hw < nv40->vtxelt_nr; hw++) { @@ -499,7 +506,7 @@ nv40_vbo_validate(struct nv40_context *nv40) if (!vb->stride) { if (!sattr) - sattr = so_new(16 * 5, 0); + sattr = so_new(16, 16 * 4, 0); if (nv40_vbo_static_attrib(nv40, sattr, hw, ve, vb)) { so_data(vtxbuf, 0); diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index d9fc31006f..8d80fcad38 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -834,7 +834,9 @@ static boolean nv40_vertprog_validate(struct nv40_context *nv40) { struct pipe_screen *pscreen = nv40->pipe.screen; - struct nouveau_grobj *curie = nv40->screen->curie; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; struct nv40_vertex_program *vp; struct pipe_buffer *constbuf; boolean upload_code = FALSE, upload_data = FALSE; @@ -884,7 +886,7 @@ check_gpu_resources: assert(0); } - so = so_new(7, 0); + so = so_new(3, 4, 0); so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1); so_data (so, vp->exec->start); so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2); @@ -974,9 +976,9 @@ check_gpu_resources: 4 * sizeof(float)); } - BEGIN_RING(curie, NV40TCL_VP_UPLOAD_CONST_ID, 5); - OUT_RING (i + vp->data->start); - OUT_RINGp ((uint32_t *)vpd->value, 4); + BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_CONST_ID, 5); + OUT_RING (chan, i + vp->data->start); + OUT_RINGp (chan, (uint32_t *)vpd->value, 4); } if (constbuf) @@ -993,11 +995,11 @@ check_gpu_resources: NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]); } #endif - BEGIN_RING(curie, NV40TCL_VP_UPLOAD_FROM_ID, 1); - OUT_RING (vp->exec->start); + BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_FROM_ID, 1); + OUT_RING (chan, vp->exec->start); for (i = 0; i < vp->nr_insns; i++) { - BEGIN_RING(curie, NV40TCL_VP_UPLOAD_INST(0), 4); - OUT_RINGp (vp->insns[i].data, 4); + BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_INST(0), 4); + OUT_RINGp (chan, vp->insns[i].data, 4); } } diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 5578a5838f..cbd4c3ff86 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -191,9 +191,9 @@ nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst, extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50); /* nv50_vbo.c */ -extern boolean nv50_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv50_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv50_draw_elements(struct pipe_context *pipe, +extern void nv50_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 3f1edf0a13..cecb1efc90 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -145,7 +145,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) mt->level[0].tile_mode, tile_flags, &mt->base.bo); if (ret) { - for (l = 0; l < pt->last_level; ++l) + for (l = 0; l <= pt->last_level; ++l) FREE(mt->level[l].image_offset); FREE(mt); return NULL; @@ -188,7 +188,7 @@ nv50_miptree_destroy(struct pipe_texture *pt) struct nv50_miptree *mt = nv50_miptree(pt); unsigned l; - for (l = 0; l < pt->last_level; ++l) + for (l = 0; l <= pt->last_level; ++l) FREE(mt->level[l].image_offset); nouveau_bo_ref(NULL, &mt->base.bo); diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 2d0b1818ef..069f815938 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -96,7 +96,11 @@ struct nv50_reg { #define NV50_MOD_NEG 1 #define NV50_MOD_ABS 2 +#define NV50_MOD_NEG_ABS (NV50_MOD_NEG | NV50_MOD_ABS) #define NV50_MOD_SAT 4 +#define NV50_MOD_I32 8 + +/* NV50_MOD_I32 is used to indicate integer mode for neg/abs */ /* STACK: Conditionals and loops have to use the (per warp) stack. * Stack entries consist of an entry type (divergent path, join at), @@ -134,6 +138,7 @@ struct nv50_pc { uint8_t addr_alloc; /* set bit indicates used for TGSI_FILE_ADDRESS */ struct nv50_reg *temp_temp[16]; + struct nv50_program_exec *temp_temp_exec[16]; unsigned temp_temp_nr; /* broadcast and destination replacement regs */ @@ -241,7 +246,8 @@ alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) } } - assert(0); + NOUVEAU_ERR("out of registers\n"); + abort(); } static INLINE struct nv50_reg * @@ -281,7 +287,8 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) } } - assert(0); + NOUVEAU_ERR("out of registers\n"); + abort(); return NULL; } @@ -343,23 +350,29 @@ free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4]) } static struct nv50_reg * -temp_temp(struct nv50_pc *pc) +temp_temp(struct nv50_pc *pc, struct nv50_program_exec *e) { if (pc->temp_temp_nr >= 16) assert(0); pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); + pc->temp_temp_exec[pc->temp_temp_nr] = e; return pc->temp_temp[pc->temp_temp_nr++]; } +/* This *must* be called for all nv50_program_exec that have been + * given as argument to temp_temp, or the temps will be leaked ! + */ static void -kill_temp_temp(struct nv50_pc *pc) +kill_temp_temp(struct nv50_pc *pc, struct nv50_program_exec *e) { int i; for (i = 0; i < pc->temp_temp_nr; i++) - free_temp(pc, pc->temp_temp[i]); - pc->temp_temp_nr = 0; + if (pc->temp_temp_exec[i] == e) + free_temp(pc, pc->temp_temp[i]); + if (!e) + pc->temp_temp_nr = 0; } static int @@ -421,6 +434,8 @@ emit(struct nv50_pc *pc, struct nv50_program_exec *e) p->exec_head = e; p->exec_tail = e; p->exec_size += (e->inst[0] & 1) ? 2 : 1; + + kill_temp_temp(pc, e); } static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *); @@ -776,7 +791,7 @@ set_src_0_restricted(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_reg *temp; if (src->type != P_TEMP) { - temp = temp_temp(pc); + temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; } @@ -795,7 +810,7 @@ set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) e->inst[1] |= 0x00200000; } else if (src->type == P_CONST || src->type == P_IMMD) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -811,7 +826,7 @@ static void set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) { if (src->type == P_ATTR) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -819,7 +834,7 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) if (src->type == P_CONST || src->type == P_IMMD) { assert(!(e->inst[0] & 0x00800000)); if (e->inst[0] & 0x01000000) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -841,7 +856,7 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) set_long(pc, e); if (src->type == P_ATTR) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -849,7 +864,7 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) if (src->type == P_CONST || src->type == P_IMMD) { assert(!(e->inst[0] & 0x01000000)); if (e->inst[0] & 0x00800000) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -864,6 +879,26 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) } static void +set_half_src(struct nv50_pc *pc, struct nv50_reg *src, int lh, + struct nv50_program_exec *e, int pos) +{ + struct nv50_reg *r = src; + + alloc_reg(pc, r); + if (r->type != P_TEMP) { + r = temp_temp(pc, e); + emit_mov(pc, r, src); + } + + if (r->hw > (NV50_SU_MAX_TEMP / 2)) { + NOUVEAU_ERR("out of low GPRs\n"); + abort(); + } + + e->inst[pos / 32] |= ((src->hw * 2) + lh) << (pos % 32); +} + +static void emit_mov_from_pred(struct nv50_pc *pc, struct nv50_reg *dst, int pred) { struct nv50_program_exec *e = exec(pc); @@ -967,6 +1002,13 @@ emit_arl(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, emit(pc, e); } +#define NV50_MAX_F32 0x880 +#define NV50_MAX_S32 0x08c +#define NV50_MAX_U32 0x084 +#define NV50_MIN_F32 0x8a0 +#define NV50_MIN_S32 0x0ac +#define NV50_MIN_U32 0x0a4 + static void emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) @@ -974,8 +1016,8 @@ emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, struct nv50_program_exec *e = exec(pc); set_long(pc, e); - e->inst[0] |= 0xb0000000; - e->inst[1] |= (sub << 29); + e->inst[0] |= 0x30000000 | ((sub & 0x800) << 20); + e->inst[1] |= (sub << 24); check_swap_src_0_1(pc, &src0, &src1); set_dst(pc, dst, e); @@ -1039,6 +1081,69 @@ emit_bitop2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, } static void +emit_not(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xd0000000; + e->inst[1] = 0x0402c000; + set_long(pc, e); + set_dst(pc, dst, e); + set_src_1(pc, src, e); + + emit(pc, e); +} + +static void +emit_shift(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1, unsigned dir) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x30000000; + e->inst[1] = 0xc4000000; + + set_long(pc, e); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + + if (src1->type == P_IMMD) { + e->inst[1] |= (1 << 20); + e->inst[0] |= (pc->immd_buf[src1->hw] & 0x7f) << 16; + } else + set_src_1(pc, src1, e); + + if (dir != TGSI_OPCODE_SHL) + e->inst[1] |= (1 << 29); + + if (dir == TGSI_OPCODE_ISHR) + e->inst[1] |= (1 << 27); + + emit(pc, e); +} + +static void +emit_shl_imm(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src, int s) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x30000000; + e->inst[1] = 0xc4100000; + if (s < 0) { + e->inst[1] |= 1 << 29; + s = -s; + } + e->inst[1] |= ((s & 0x7f) << 16); + + set_long(pc, e); + set_dst(pc, dst, e); + set_src_0(pc, src, e); + + emit(pc, e); +} + +static void emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) { @@ -1142,36 +1247,41 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) emit(pc, e); } -#define CVTOP_RN 0x01 -#define CVTOP_FLOOR 0x03 -#define CVTOP_CEIL 0x05 -#define CVTOP_TRUNC 0x07 -#define CVTOP_SAT 0x08 -#define CVTOP_ABS 0x10 - -/* 0x04 == 32 bit dst */ -/* 0x40 == dst is float */ -/* 0x80 == src is float */ -#define CVT_F32_F32 0xc4 -#define CVT_F32_S32 0x44 -#define CVT_S32_F32 0x8c -#define CVT_S32_S32 0x0c -#define CVT_NEG 0x20 -#define CVT_RI 0x08 +#define CVT_RN (0x00 << 16) +#define CVT_FLOOR (0x02 << 16) +#define CVT_CEIL (0x04 << 16) +#define CVT_TRUNC (0x06 << 16) +#define CVT_SAT (0x08 << 16) +#define CVT_ABS (0x10 << 16) + +#define CVT_X32_X32 0x04004000 +#define CVT_X32_S32 0x04014000 +#define CVT_F32_F32 ((0xc0 << 24) | CVT_X32_X32) +#define CVT_S32_F32 ((0x88 << 24) | CVT_X32_X32) +#define CVT_U32_F32 ((0x80 << 24) | CVT_X32_X32) +#define CVT_F32_S32 ((0x40 << 24) | CVT_X32_S32) +#define CVT_F32_U32 ((0x40 << 24) | CVT_X32_X32) +#define CVT_S32_S32 ((0x08 << 24) | CVT_X32_S32) +#define CVT_S32_U32 ((0x08 << 24) | CVT_X32_X32) +#define CVT_U32_S32 ((0x00 << 24) | CVT_X32_S32) + +#define CVT_NEG 0x20000000 +#define CVT_RI 0x08000000 static void emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, - int wp, unsigned cvn, unsigned fmt) + int wp, uint32_t cvn) { struct nv50_program_exec *e; e = exec(pc); - set_long(pc, e); - e->inst[0] |= 0xa0000000; - e->inst[1] |= 0x00004000; /* 32 bit src */ - e->inst[1] |= (cvn << 16); - e->inst[1] |= (fmt << 24); + if (src->mod & NV50_MOD_NEG) cvn |= CVT_NEG; + if (src->mod & NV50_MOD_ABS) cvn |= CVT_ABS; + + e->inst[0] = 0xa0000000; + e->inst[1] = cvn; + set_long(pc, e); set_src_0(pc, src, e); if (wp >= 0) @@ -1196,10 +1306,12 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, * 0x6 = GE * 0x7 = set condition code ? (used before bra.lt/le/gt/ge) * 0x8 = unordered bit (allows NaN) + * + * mode = 0x04 (u32), 0x0c (s32), 0x80 (f32) */ static void emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp, - struct nv50_reg *src0, struct nv50_reg *src1) + struct nv50_reg *src0, struct nv50_reg *src1, uint8_t mode) { static const unsigned cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; @@ -1214,16 +1326,10 @@ emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp, if (dst && dst->type != P_TEMP) dst = alloc_temp(pc, NULL); - /* set.u32 */ set_long(pc, e); - e->inst[0] |= 0xb0000000; + e->inst[0] |= 0x30000000 | (mode << 24); e->inst[1] |= 0x60000000 | (ccode << 14); - /* XXX: decuda will disasm as .u16 and use .lo/.hi regs, but - * that doesn't seem to match what the hw actually does - e->inst[1] |= 0x04000000; << breaks things, u32 by default ? - */ - if (wp >= 0) set_pred_wr(pc, 1, wp, e); if (dst) @@ -1238,33 +1344,146 @@ emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp, emit(pc, e); - /* cvt.f32.u32/s32 (?) if we didn't only write the predicate */ - if (rdst) - emit_cvt(pc, rdst, dst, -1, CVTOP_ABS | CVTOP_RN, CVT_F32_S32); + if (rdst && mode == 0x80) /* convert to float ? */ + emit_cvt(pc, rdst, dst, -1, CVT_ABS | CVT_F32_S32); if (rdst && rdst != dst) free_temp(pc, dst); } -static INLINE unsigned -map_tgsi_setop_cc(unsigned op) +static INLINE void +map_tgsi_setop_hw(unsigned op, uint8_t *cc, uint8_t *ty) { switch (op) { - case TGSI_OPCODE_SLT: return 0x1; - case TGSI_OPCODE_SGE: return 0x6; - case TGSI_OPCODE_SEQ: return 0x2; - case TGSI_OPCODE_SGT: return 0x4; - case TGSI_OPCODE_SLE: return 0x3; - case TGSI_OPCODE_SNE: return 0xd; + case TGSI_OPCODE_SLT: *cc = 0x1; *ty = 0x80; break; + case TGSI_OPCODE_SGE: *cc = 0x6; *ty = 0x80; break; + case TGSI_OPCODE_SEQ: *cc = 0x2; *ty = 0x80; break; + case TGSI_OPCODE_SGT: *cc = 0x4; *ty = 0x80; break; + case TGSI_OPCODE_SLE: *cc = 0x3; *ty = 0x80; break; + case TGSI_OPCODE_SNE: *cc = 0xd; *ty = 0x80; break; + + case TGSI_OPCODE_ISLT: *cc = 0x1; *ty = 0x0c; break; + case TGSI_OPCODE_ISGE: *cc = 0x6; *ty = 0x0c; break; + case TGSI_OPCODE_USEQ: *cc = 0x2; *ty = 0x04; break; + case TGSI_OPCODE_USGE: *cc = 0x6; *ty = 0x04; break; + case TGSI_OPCODE_USLT: *cc = 0x1; *ty = 0x04; break; + case TGSI_OPCODE_USNE: *cc = 0x5; *ty = 0x04; break; default: assert(0); - return 0; + return; + } +} + +static void +emit_add_b32(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *rsrc1) +{ + struct nv50_program_exec *e = exec(pc); + struct nv50_reg *src1; + + e->inst[0] = 0x20000000; + + alloc_reg(pc, rsrc1); + check_swap_src_0_1(pc, &src0, &rsrc1); + + src1 = rsrc1; + if (src0->mod & rsrc1->mod & NV50_MOD_NEG) { + src1 = temp_temp(pc, e); + emit_cvt(pc, src1, rsrc1, -1, CVT_S32_S32); + } + + if (!pc->allow32 || src1->hw > 63 || + (src1->type != P_TEMP && src1->type != P_IMMD)) + set_long(pc, e); + + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + + if (is_long(e)) { + e->inst[1] |= 1 << 26; + set_src_2(pc, src1, e); + } else { + e->inst[0] |= 0x8000; + if (src1->type == P_IMMD) + set_immd(pc, src1, e); + else + set_src_1(pc, src1, e); } + + if (src0->mod & NV50_MOD_NEG) + e->inst[0] |= 1 << 28; + else + if (src1->mod & NV50_MOD_NEG) + e->inst[0] |= 1 << 22; + + emit(pc, e); +} + +static void +emit_mad_u16(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, int lh_0, struct nv50_reg *src1, int lh_1, + struct nv50_reg *src2) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x60000000; + if (!pc->allow32) + set_long(pc, e); + set_dst(pc, dst, e); + + set_half_src(pc, src0, lh_0, e, 9); + set_half_src(pc, src1, lh_1, e, 16); + alloc_reg(pc, src2); + if (is_long(e) || (src2->type != P_TEMP) || (src2->hw != dst->hw)) + set_src_2(pc, src2, e); + + emit(pc, e); +} + +static void +emit_mul_u16(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, int lh_0, struct nv50_reg *src1, int lh_1) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x40000000; + set_long(pc, e); + set_dst(pc, dst, e); + + set_half_src(pc, src0, lh_0, e, 9); + set_half_src(pc, src1, lh_1, e, 16); + + emit(pc, e); +} + +static void +emit_sad(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x50000000; + if (!pc->allow32) + set_long(pc, e); + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + alloc_reg(pc, src2); + if (is_long(e) || (src2->type != dst->type) || (src2->hw != dst->hw)) + set_src_2(pc, src2, e); + + if (is_long(e)) + e->inst[1] |= 0x0c << 24; + else + e->inst[0] |= 0x81 << 8; + + emit(pc, e); } static INLINE void emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - emit_cvt(pc, dst, src, -1, CVTOP_FLOOR, CVT_F32_F32 | CVT_RI); + emit_cvt(pc, dst, src, -1, CVT_FLOOR | CVT_F32_F32 | CVT_RI); } static void @@ -1282,15 +1501,9 @@ emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, } static INLINE void -emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) -{ - emit_cvt(pc, dst, src, -1, CVTOP_ABS, CVT_F32_F32); -} - -static INLINE void emit_sat(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - emit_cvt(pc, dst, src, -1, CVTOP_SAT, CVT_F32_F32); + emit_cvt(pc, dst, src, -1, CVT_SAT | CVT_F32_F32); } static void @@ -1308,18 +1521,18 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, if (mask & (3 << 1)) { tmp[0] = alloc_temp(pc, NULL); - emit_minmax(pc, 4, tmp[0], src[0], zero); + emit_minmax(pc, NV50_MAX_F32, tmp[0], src[0], zero); } if (mask & (1 << 2)) { set_pred_wr(pc, 1, 0, pc->p->exec_tail); - tmp[1] = temp_temp(pc); - emit_minmax(pc, 4, tmp[1], src[1], zero); + tmp[1] = temp_temp(pc, NULL); + emit_minmax(pc, NV50_MAX_F32, tmp[1], src[1], zero); - tmp[3] = temp_temp(pc); - emit_minmax(pc, 4, tmp[3], src[3], neg128); - emit_minmax(pc, 5, tmp[3], tmp[3], pos128); + tmp[3] = temp_temp(pc, NULL); + emit_minmax(pc, NV50_MAX_F32, tmp[3], src[3], neg128); + emit_minmax(pc, NV50_MIN_F32, tmp[3], tmp[3], pos128); emit_pow(pc, dst[2], tmp[1], tmp[3]); emit_mov(pc, dst[2], zero); @@ -1347,12 +1560,6 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, FREE(one); } -static INLINE void -emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) -{ - emit_cvt(pc, dst, src, -1, CVTOP_RN, CVT_F32_F32 | CVT_NEG); -} - static void emit_kil(struct nv50_pc *pc, struct nv50_reg *src) { @@ -1364,14 +1571,9 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src) set_long(pc, e); /* sets cond code to ALWAYS */ if (src) { - unsigned cvn = CVT_F32_F32; - set_pred(pc, 0x1 /* cc = LT */, r_pred, e); - - if (src->mod & NV50_MOD_NEG) - cvn |= CVT_NEG; - /* write predicate reg */ - emit_cvt(pc, NULL, src, r_pred, CVTOP_RN, cvn); + /* write to predicate reg */ + emit_cvt(pc, NULL, src, r_pred, CVT_F32_F32); } emit(pc, e); @@ -1474,8 +1676,8 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], src[1]->mod |= NV50_MOD_ABS; src[2]->mod |= NV50_MOD_ABS; - emit_minmax(pc, 4, t[2], src[0], src[1]); - emit_minmax(pc, 4, t[2], src[2], t[2]); + emit_minmax(pc, NV50_MAX_F32, t[2], src[0], src[1]); + emit_minmax(pc, NV50_MAX_F32, t[2], src[2], t[2]); src[0]->mod = mod[0]; src[1]->mod = mod[1]; @@ -1778,6 +1980,21 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e) q = 0x0403c000; m = 0xffff7fff; break; + case 0x2: + case 0x3: + /* ADD, SUB, SUBR b32 */ + m = ~(0x8000 | (127 << 16)); + q = ((e->inst[0] & (~m)) >> 2) | (1 << 26); + break; + case 0x5: + /* SAD */ + m = ~(0x81 << 8); + q = (0x0c << 24) | ((e->inst[0] & (0x7f << 2)) << 12); + break; + case 0x6: + /* MAD u16 */ + q = (e->inst[0] & (0x7f << 2)) << 12; + break; case 0x8: /* INTERP (move centroid, perspective and flat bits) */ m = ~0x03000100; @@ -1814,8 +2031,8 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e) } /* Some operations support an optional negation flag. */ -static boolean -negate_supported(const struct tgsi_full_instruction *insn, int i) +static int +get_supported_mods(const struct tgsi_full_instruction *insn, int i) { switch (insn->Instruction.Opcode) { case TGSI_OPCODE_ADD: @@ -1835,9 +2052,36 @@ negate_supported(const struct tgsi_full_instruction *insn, int i) case TGSI_OPCODE_SCS: case TGSI_OPCODE_SIN: case TGSI_OPCODE_SUB: - return TRUE; + return NV50_MOD_NEG; + case TGSI_OPCODE_MAX: + case TGSI_OPCODE_MIN: + case TGSI_OPCODE_INEG: /* tgsi src sign toggle/set would be stupid */ + return NV50_MOD_ABS; + case TGSI_OPCODE_CEIL: + case TGSI_OPCODE_FLR: + case TGSI_OPCODE_TRUNC: + return NV50_MOD_NEG | NV50_MOD_ABS; + case TGSI_OPCODE_F2I: + case TGSI_OPCODE_F2U: + case TGSI_OPCODE_I2F: + case TGSI_OPCODE_U2F: + return NV50_MOD_NEG | NV50_MOD_ABS | NV50_MOD_I32; + case TGSI_OPCODE_UADD: + return NV50_MOD_NEG | NV50_MOD_I32; + case TGSI_OPCODE_SAD: + case TGSI_OPCODE_SHL: + case TGSI_OPCODE_IMAX: + case TGSI_OPCODE_IMIN: + case TGSI_OPCODE_ISHR: + case TGSI_OPCODE_NOT: + case TGSI_OPCODE_UMAD: + case TGSI_OPCODE_UMAX: + case TGSI_OPCODE_UMIN: + case TGSI_OPCODE_UMUL: + case TGSI_OPCODE_USHR: + return NV50_MOD_I32; default: - return FALSE; + return 0; } } @@ -1944,11 +2188,11 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) static struct nv50_reg * tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, - boolean neg) + int mod) { struct nv50_reg *r = NULL; - struct nv50_reg *temp; - unsigned sgn, c, swz; + struct nv50_reg *temp = NULL; + unsigned sgn, c, swz, cvn; if (src->Register.File != TGSI_FILE_CONSTANT) assert(!src->Register.Indirect); @@ -1988,7 +2232,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, r = &pc->immd[src->Register.Index * 4 + c]; break; case TGSI_FILE_SAMPLER: - break; + return NULL; case TGSI_FILE_ADDRESS: r = pc->addr[src->Register.Index * 4 + c]; assert(r); @@ -2003,35 +2247,34 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, break; } + cvn = (mod & NV50_MOD_I32) ? CVT_S32_S32 : CVT_F32_F32; + switch (sgn) { - case TGSI_UTIL_SIGN_KEEP: - break; case TGSI_UTIL_SIGN_CLEAR: - temp = temp_temp(pc); - emit_abs(pc, temp, r); - r = temp; - break; - case TGSI_UTIL_SIGN_TOGGLE: - if (neg) - r->mod = NV50_MOD_NEG; - else { - temp = temp_temp(pc); - emit_neg(pc, temp, r); - r = temp; - } + r->mod = NV50_MOD_ABS; break; case TGSI_UTIL_SIGN_SET: - temp = temp_temp(pc); - emit_cvt(pc, temp, r, -1, CVTOP_ABS, CVT_F32_F32 | CVT_NEG); - r = temp; + r->mod = NV50_MOD_NEG_ABS; + break; + case TGSI_UTIL_SIGN_TOGGLE: + r->mod = NV50_MOD_NEG; break; default: - assert(0); + assert(!r->mod && sgn == TGSI_UTIL_SIGN_KEEP); break; } - if (r && r->acc >= 0 && r != temp) - return reg_instance(pc, r); + if ((r->mod & mod) != r->mod) { + temp = temp_temp(pc, NULL); + emit_cvt(pc, temp, r, -1, cvn); + r->mod = 0; + r = temp; + } else + r->mod |= mod & NV50_MOD_I32; + + assert(r); + if (r->acc >= 0 && r != temp) + return reg_instance(pc, r); /* will clear r->mod */ return r; } @@ -2195,22 +2438,22 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fs = &inst->Src[i]; unsigned src_mask; - boolean neg_supp; + int mod_supp; src_mask = nv50_tgsi_src_mask(inst, i); - neg_supp = negate_supported(inst, i); + mod_supp = get_supported_mods(inst, i); if (fs->Register.File == TGSI_FILE_SAMPLER) unit = fs->Register.Index; for (c = 0; c < 4; c++) if (src_mask & (1 << c)) - src[i][c] = tgsi_src(pc, c, fs, neg_supp); + src[i][c] = tgsi_src(pc, c, fs, mod_supp); } brdc = temp = pc->r_brdc; if (brdc && brdc->type != P_TEMP) { - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); if (sat) brdc = temp; } else @@ -2219,7 +2462,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!(mask & (1 << c)) || dst[c]->type == P_TEMP) continue; /* rdst[c] = dst[c]; */ /* done above */ - dst[c] = temp_temp(pc); + dst[c] = temp_temp(pc, NULL); } } @@ -2230,7 +2473,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_abs(pc, dst[c], src[0][c]); + emit_cvt(pc, dst[c], src[0][c], -1, + CVT_ABS | CVT_F32_F32); } break; case TGSI_OPCODE_ADD: @@ -2252,8 +2496,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, break; case TGSI_OPCODE_ARL: assert(src[0][0]); - temp = temp_temp(pc); - emit_cvt(pc, temp, src[0][0], -1, CVTOP_FLOOR, CVT_S32_F32); + temp = temp_temp(pc, NULL); + emit_cvt(pc, temp, src[0][0], -1, CVT_FLOOR | CVT_S32_F32); emit_arl(pc, dst[0], temp, 4); break; case TGSI_OPCODE_BGNLOOP: @@ -2282,7 +2526,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!(mask & (1 << c))) continue; emit_cvt(pc, dst[c], src[0][c], -1, - CVTOP_CEIL, CVT_F32_F32 | CVT_RI); + CVT_CEIL | CVT_F32_F32 | CVT_RI); } break; case TGSI_OPCODE_CMP: @@ -2290,7 +2534,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_cvt(pc, NULL, src[0][c], 1, CVTOP_RN, CVT_F32_F32); + emit_cvt(pc, NULL, src[0][c], 1, CVT_F32_F32); emit_mov(pc, dst[c], src[1][c]); set_pred(pc, 0x1, 1, pc->p->exec_tail); /* @SF */ emit_mov(pc, dst[c], src[2][c]); @@ -2309,7 +2553,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!(mask &= 7)) break; if (temp == dst[3]) - temp = brdc = temp_temp(pc); + temp = brdc = temp_temp(pc, NULL); } emit_precossin(pc, temp, src[0][0]); emit_flop(pc, NV50_FLOP_COS, brdc, temp); @@ -2397,8 +2641,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, struct nv50_reg *t[2]; assert(!temp); - t[0] = temp_temp(pc); - t[1] = temp_temp(pc); + t[0] = temp_temp(pc, NULL); + t[1] = temp_temp(pc, NULL); if (mask & 0x6) emit_mov(pc, t[0], src[0][0]); @@ -2419,6 +2663,22 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_mov_immdval(pc, dst[3], 1.0f); } break; + case TGSI_OPCODE_F2I: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, dst[c], src[0][c], -1, + CVT_TRUNC | CVT_S32_F32); + } + break; + case TGSI_OPCODE_F2U: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, dst[c], src[0][c], -1, + CVT_TRUNC | CVT_U32_F32); + } + break; case TGSI_OPCODE_FLR: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) @@ -2427,7 +2687,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, } break; case TGSI_OPCODE_FRC: - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; @@ -2435,14 +2695,42 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_sub(pc, dst[c], src[0][c], temp); } break; + case TGSI_OPCODE_I2F: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, dst[c], src[0][c], -1, CVT_F32_S32); + } + break; case TGSI_OPCODE_IF: assert(pc->if_lvl < NV50_MAX_COND_NESTING); - emit_cvt(pc, NULL, src[0][0], 0, CVTOP_ABS | CVTOP_RN, - CVT_F32_F32); + emit_cvt(pc, NULL, src[0][0], 0, CVT_ABS | CVT_F32_F32); pc->if_join[pc->if_lvl] = emit_joinat(pc); pc->if_insn[pc->if_lvl++] = emit_branch(pc, 0, 2);; terminate_mbb(pc); break; + case TGSI_OPCODE_IMAX: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_minmax(pc, 0x08c, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_IMIN: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_minmax(pc, 0x0ac, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_INEG: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, dst[c], src[0][c], -1, + CVT_S32_S32 | CVT_NEG); + } + break; case TGSI_OPCODE_KIL: assert(src[0][0] && src[0][1] && src[0][2] && src[0][3]); emit_kil(pc, src[0][0]); @@ -2463,13 +2751,13 @@ nv50_program_tx_insn(struct nv50_pc *pc, { struct nv50_reg *t[2]; - t[0] = temp_temp(pc); + t[0] = temp_temp(pc, NULL); if (mask & (1 << 1)) - t[1] = temp_temp(pc); + t[1] = temp_temp(pc, NULL); else t[1] = t[0]; - emit_abs(pc, t[0], src[0][0]); + emit_cvt(pc, t[0], src[0][0], -1, CVT_ABS | CVT_F32_F32); emit_flop(pc, NV50_FLOP_LG2, t[1], t[0]); if (mask & (1 << 2)) emit_mov(pc, dst[2], t[1]); @@ -2488,7 +2776,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, } break; case TGSI_OPCODE_LRP: - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; @@ -2507,14 +2795,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); + emit_minmax(pc, 0x880, dst[c], src[0][c], src[1][c]); } break; case TGSI_OPCODE_MIN: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); + emit_minmax(pc, 0x8a0, dst[c], src[0][c], src[1][c]); } break; case TGSI_OPCODE_MOV: @@ -2531,10 +2819,19 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_mul(pc, dst[c], src[0][c], src[1][c]); } break; + case TGSI_OPCODE_NOT: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_not(pc, dst[c], src[0][c]); + } + break; case TGSI_OPCODE_POW: emit_pow(pc, brdc, src[0][0], src[1][0]); break; case TGSI_OPCODE_RCP: + if (!sat && popcnt4(mask) == 1) + brdc = dst[ffs(mask) - 1]; emit_flop(pc, NV50_FLOP_RCP, brdc, src[0][0]); break; case TGSI_OPCODE_RET: @@ -2543,11 +2840,20 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_ret(pc, -1, 0); break; case TGSI_OPCODE_RSQ: + if (!sat && popcnt4(mask) == 1) + brdc = dst[ffs(mask) - 1]; src[0][0]->mod |= NV50_MOD_ABS; emit_flop(pc, NV50_FLOP_RSQ, brdc, src[0][0]); break; + case TGSI_OPCODE_SAD: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_sad(pc, dst[c], src[0][c], src[1][c], src[2][c]); + } + break; case TGSI_OPCODE_SCS: - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); if (mask & 3) emit_precossin(pc, temp, src[0][0]); if (mask & (1 << 0)) @@ -2559,6 +2865,16 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (mask & (1 << 3)) emit_mov_immdval(pc, dst[3], 1.0); break; + case TGSI_OPCODE_SHL: + case TGSI_OPCODE_ISHR: + case TGSI_OPCODE_USHR: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_shift(pc, dst[c], src[0][c], src[1][c], + inst->Instruction.Opcode); + } + break; case TGSI_OPCODE_SIN: if (mask & 8) { emit_precossin(pc, temp, src[0][3]); @@ -2566,7 +2882,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!(mask &= 7)) break; if (temp == dst[3]) - temp = brdc = temp_temp(pc); + temp = brdc = temp_temp(pc, NULL); } emit_precossin(pc, temp, src[0][0]); emit_flop(pc, NV50_FLOP_SIN, brdc, temp); @@ -2577,12 +2893,23 @@ nv50_program_tx_insn(struct nv50_pc *pc, case TGSI_OPCODE_SGT: case TGSI_OPCODE_SLE: case TGSI_OPCODE_SNE: - i = map_tgsi_setop_cc(inst->Instruction.Opcode); + case TGSI_OPCODE_ISLT: + case TGSI_OPCODE_ISGE: + case TGSI_OPCODE_USEQ: + case TGSI_OPCODE_USGE: + case TGSI_OPCODE_USLT: + case TGSI_OPCODE_USNE: + { + uint8_t cc, ty; + + map_tgsi_setop_hw(inst->Instruction.Opcode, &cc, &ty); + for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_set(pc, i, dst[c], -1, src[0][c], src[1][c]); + emit_set(pc, cc, dst[c], -1, src[0][c], src[1][c], ty); } + } break; case TGSI_OPCODE_SUB: for (c = 0; c < 4; c++) { @@ -2612,11 +2939,72 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!(mask & (1 << c))) continue; emit_cvt(pc, dst[c], src[0][c], -1, - CVTOP_TRUNC, CVT_F32_F32 | CVT_RI); + CVT_TRUNC | CVT_F32_F32 | CVT_RI); + } + break; + case TGSI_OPCODE_U2F: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, dst[c], src[0][c], -1, CVT_F32_U32); + } + break; + case TGSI_OPCODE_UADD: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_add_b32(pc, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_UMAX: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_minmax(pc, 0x084, dst[c], src[0][c], src[1][c]); } break; + case TGSI_OPCODE_UMIN: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_minmax(pc, 0x0a4, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_UMAD: + { + assert(!temp); + temp = temp_temp(pc, NULL); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mul_u16(pc, temp, src[0][c], 0, src[1][c], 1); + emit_mad_u16(pc, temp, src[0][c], 1, src[1][c], 0, + temp); + emit_shl_imm(pc, temp, temp, 16); + emit_mad_u16(pc, temp, src[0][c], 0, src[1][c], 0, + temp); + emit_add_b32(pc, dst[c], temp, src[2][c]); + } + } + break; + case TGSI_OPCODE_UMUL: + { + assert(!temp); + temp = temp_temp(pc, NULL); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mul_u16(pc, temp, src[0][c], 0, src[1][c], 1); + emit_mad_u16(pc, temp, src[0][c], 1, src[1][c], 0, + temp); + emit_shl_imm(pc, temp, temp, 16); + emit_mad_u16(pc, dst[c], src[0][c], 0, src[1][c], 0, + temp); + } + } + break; case TGSI_OPCODE_XPD: - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); if (mask & (1 << 0)) { emit_mul(pc, temp, src[0][2], src[1][1]); emit_msb(pc, dst[0], src[0][1], src[1][2], temp); @@ -2670,7 +3058,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, } } - kill_temp_temp(pc); + kill_temp_temp(pc, NULL); pc->reg_instance_nr = 0; return TRUE; @@ -2679,7 +3067,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, static void prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) { - struct nv50_reg *reg = NULL; + struct nv50_reg *r, *reg = NULL; const struct tgsi_full_src_register *src; const struct tgsi_dst_register *dst; unsigned i, c, k, mask; @@ -2725,7 +3113,15 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) continue; k = tgsi_util_get_full_src_register_swizzle(src, c); - reg[src->Register.Index * 4 + k].acc = pc->insn_nr; + r = ®[src->Register.Index * 4 + k]; + + /* If used before written, pre-allocate the reg, + * lest we overwrite results from a subroutine. + */ + if (!r->acc && r->type == P_TEMP) + alloc_reg(pc, r); + + r->acc = pc->insn_nr; } } } @@ -2814,7 +3210,7 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, for (i = 0; i < insn->Instruction.NumSrcRegs; i++) { unsigned chn, mask = nv50_tgsi_src_mask(insn, i); - boolean neg_supp = negate_supported(insn, i); + int ms = get_supported_mods(insn, i); fs = &insn->Src[i]; if (fs->Register.File != fd->Register.File || @@ -2832,10 +3228,12 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, if (!(fd->Register.WriteMask & (1 << c))) continue; - /* no danger if src is copied to TEMP first */ - if ((s != TGSI_UTIL_SIGN_KEEP) && - (s != TGSI_UTIL_SIGN_TOGGLE || !neg_supp)) - continue; + if (s == TGSI_UTIL_SIGN_TOGGLE && !(ms & NV50_MOD_NEG)) + continue; + if (s == TGSI_UTIL_SIGN_CLEAR && !(ms & NV50_MOD_ABS)) + continue; + if ((s == TGSI_UTIL_SIGN_SET) && ((ms & 3) != 3)) + continue; rdep[c] |= nv50_tgsi_dst_revdep( insn->Instruction.Opcode, i, chn); @@ -2859,7 +3257,7 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) if (is_scalar_op(insn.Instruction.Opcode)) { pc->r_brdc = tgsi_broadcast_dst(pc, fd, deqs); if (!pc->r_brdc) - pc->r_brdc = temp_temp(pc); + pc->r_brdc = temp_temp(pc, NULL); return nv50_program_tx_insn(pc, &insn); } pc->r_brdc = NULL; @@ -3224,6 +3622,8 @@ free_nv50_pc(struct nv50_pc *pc) FREE(pc->attr); if (pc->temp) FREE(pc->temp); + if (pc->insn_pos) + FREE(pc->insn_pos); FREE(pc); } @@ -3579,7 +3979,7 @@ nv50_vertprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(13, 2); + so = so_new(5, 8, 2); so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); @@ -3615,7 +4015,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(64, 2); + so = so_new(6, 7, 2); so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); @@ -3635,12 +4035,13 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_ref(NULL, &so); } -static void +static uint32_t nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) { struct nv50_program *fp = nv50->fragprog; struct nv50_program *vp = nv50->vertprog; unsigned i, c, m = base; + uint32_t origin = 0x00000010; /* XXX: this might not work correctly in all cases yet - we'll * just assume that an FP generic input that is not written in @@ -3674,7 +4075,9 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) if (mode == PIPE_SPRITE_COORD_NONE) { m += n; continue; - } + } else + if (mode == PIPE_SPRITE_COORD_LOWER_LEFT) + origin = 0; } /* this is either PointCoord or replaced by sprite coords */ @@ -3685,6 +4088,7 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) ++m; } } + return origin; } static int @@ -3783,7 +4187,7 @@ nv50_linkage_validate(struct nv50_context *nv50) } /* now fill the stateobj */ - so = so_new(64, 0); + so = so_new(7, 57, 0); n = (m + 3) / 4; so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); @@ -3801,7 +4205,9 @@ nv50_linkage_validate(struct nv50_context *nv50) so_datap (so, lin, 4); if (nv50->rasterizer->pipe.point_sprite) { - nv50_pntc_replace(nv50, pcrd, (reg[4] >> 8) & 0xff); + so_method(so, tesla, NV50TCL_POINT_SPRITE_CTRL, 1); + so_data (so, + nv50_pntc_replace(nv50, pcrd, (reg[4] >> 8) & 0xff)); so_method(so, tesla, NV50TCL_POINT_COORD_REPLACE_MAP(0), 8); so_datap (so, pcrd, 8); diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c index 5d9e18218a..5a4ab3508b 100644 --- a/src/gallium/drivers/nv50/nv50_query.c +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -111,7 +111,7 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, if (!q->ready) { ret = nouveau_bo_map(q->bo, NOUVEAU_BO_RD | - wait ? 0 : NOUVEAU_BO_NOWAIT); + (wait ? 0 : NOUVEAU_BO_NOWAIT)); if (ret) return false; q->result = ((uint32_t *)q->bo->map)[1]; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 7e039ea82e..28e2b35dea 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -189,6 +189,28 @@ nv50_screen_destroy(struct pipe_screen *pscreen) FREE(screen); } +static int +nv50_pre_pipebuffer_map(struct pipe_screen *pscreen, struct pipe_buffer *pb, + unsigned usage) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + struct nv50_context *ctx = screen->cur_ctx; + + if (!(pb->usage & PIPE_BUFFER_USAGE_VERTEX)) + return 0; + + /* Our vtxbuf got mapped, it can no longer be considered part of current + * state, remove it to avoid emitting reloc markers. + */ + if (ctx && ctx->state.vtxbuf && so_bo_is_reloc(ctx->state.vtxbuf, + nouveau_bo(pb))) { + so_ref(NULL, &ctx->state.vtxbuf); + ctx->dirty |= NV50_NEW_ARRAYS; + } + + return 0; +} + struct pipe_screen * nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) { @@ -216,6 +238,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) pscreen->get_param = nv50_screen_get_param; pscreen->get_paramf = nv50_screen_get_paramf; pscreen->is_format_supported = nv50_screen_is_format_supported; + screen->base.pre_pipebuffer_map_callback = nv50_pre_pipebuffer_map; nv50_screen_init_miptree_functions(pscreen); nv50_transfer_init_screen_functions(pscreen); @@ -228,7 +251,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nv50_screen_destroy(pscreen); return NULL; } - BIND_RING(chan, screen->m2mf, 1); /* 2D object */ ret = nouveau_grobj_alloc(chan, 0xbeef502d, NV50_2D, &screen->eng2d); @@ -237,7 +259,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nv50_screen_destroy(pscreen); return NULL; } - BIND_RING(chan, screen->eng2d, 2); /* 3D object */ switch (chipset & 0xf0) { @@ -273,7 +294,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nv50_screen_destroy(pscreen); return NULL; } - BIND_RING(chan, screen->tesla, 3); /* Sync notifier */ ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync); @@ -284,7 +304,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) } /* Static M2MF init */ - so = so_new(32, 0); + so = so_new(1, 3, 0); so_method(so, screen->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 3); so_data (so, screen->sync->handle); so_data (so, chan->vram->handle); @@ -293,7 +313,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_ref (NULL, &so); /* Static 2D init */ - so = so_new(64, 0); + so = so_new(4, 7, 0); so_method(so, screen->eng2d, NV50_2D_DMA_NOTIFY, 4); so_data (so, screen->sync->handle); so_data (so, chan->vram->handle); @@ -309,7 +329,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_ref(NULL, &so); /* Static tesla init */ - so = so_new(256, 20); + so = so_new(40, 84, 20); so_method(so, screen->tesla, NV50TCL_COND_MODE, 1); so_data (so, NV50TCL_COND_MODE_ALWAYS); diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index 61e24a5b57..a038a4e3c2 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -2,6 +2,7 @@ #define __NV50_SCREEN_H__ #include "nouveau/nouveau_screen.h" +#include "nv50_context.h" struct nv50_screen { struct nouveau_screen base; @@ -9,6 +10,7 @@ struct nv50_screen { struct nouveau_winsys *nvws; unsigned cur_pctx; + struct nv50_context *cur_ctx; struct nouveau_grobj *tesla; struct nouveau_grobj *eng2d; diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 30b2b0f91b..1f67df814b 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -35,7 +35,7 @@ static void * nv50_blend_state_create(struct pipe_context *pipe, const struct pipe_blend_state *cso) { - struct nouveau_stateobj *so = so_new(64, 0); + struct nouveau_stateobj *so = so_new(5, 24, 0); struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; struct nv50_blend_stateobj *bso = CALLOC_STRUCT(nv50_blend_stateobj); unsigned cmask = 0, i; @@ -146,7 +146,6 @@ nv50_sampler_state_create(struct pipe_context *pipe, (wrap_mode(cso->wrap_r) << 6)); switch (cso->mag_img_filter) { - case PIPE_TEX_FILTER_ANISO: case PIPE_TEX_FILTER_LINEAR: tsc[1] |= NV50TSC_1_1_MAGF_LINEAR; break; @@ -157,7 +156,6 @@ nv50_sampler_state_create(struct pipe_context *pipe, } switch (cso->min_img_filter) { - case PIPE_TEX_FILTER_ANISO: case PIPE_TEX_FILTER_LINEAR: tsc[1] |= NV50TSC_1_1_MINF_LINEAR; break; @@ -280,7 +278,7 @@ static void * nv50_rasterizer_state_create(struct pipe_context *pipe, const struct pipe_rasterizer_state *cso) { - struct nouveau_stateobj *so = so_new(64, 0); + struct nouveau_stateobj *so = so_new(15, 21, 0); struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; struct nv50_rasterizer_stateobj *rso = CALLOC_STRUCT(nv50_rasterizer_stateobj); @@ -425,7 +423,7 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, { struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; struct nv50_zsa_stateobj *zsa = CALLOC_STRUCT(nv50_zsa_stateobj); - struct nouveau_stateobj *so = so_new(64, 0); + struct nouveau_stateobj *so = so_new(8, 22, 0); so_method(so, tesla, NV50TCL_DEPTH_WRITE_ENABLE, 1); so_data (so, cso->depth.writemask ? 1 : 0); diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index c8bdf9dc27..f83232f43c 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -33,7 +33,7 @@ static void nv50_state_validate_fb(struct nv50_context *nv50) { struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_stateobj *so = so_new(128, 18); + struct nouveau_stateobj *so = so_new(32, 79, 18); struct pipe_framebuffer_state *fb = &nv50->framebuffer; unsigned i, w, h, gw = 0; @@ -185,6 +185,9 @@ nv50_state_emit(struct nv50_context *nv50) struct nv50_screen *screen = nv50->screen; struct nouveau_channel *chan = screen->base.channel; + /* I don't want to copy headers from the winsys. */ + screen->cur_ctx = nv50; + if (nv50->pctx_id != screen->cur_pctx) { if (nv50->state.fb) nv50->state.dirty |= NV50_NEW_FRAMEBUFFER; @@ -296,7 +299,7 @@ nv50_state_validate(struct nv50_context *nv50) so_ref(nv50->rasterizer->so, &nv50->state.rast); if (nv50->dirty & NV50_NEW_BLEND_COLOUR) { - so = so_new(5, 0); + so = so_new(1, 4, 0); so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4); so_data (so, fui(nv50->blend_colour.color[0])); so_data (so, fui(nv50->blend_colour.color[1])); @@ -307,7 +310,7 @@ nv50_state_validate(struct nv50_context *nv50) } if (nv50->dirty & NV50_NEW_STIPPLE) { - so = so_new(33, 0); + so = so_new(1, 32, 0); so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32); for (i = 0; i < 32; i++) so_data(so, util_bswap32(nv50->stipple.stipple[i])); @@ -324,7 +327,7 @@ nv50_state_validate(struct nv50_context *nv50) goto scissor_uptodate; nv50->state.scissor_enabled = rast->scissor; - so = so_new(3, 0); + so = so_new(1, 2, 0); so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2); if (nv50->state.scissor_enabled) { so_data(so, (s->maxx << 16) | s->minx); @@ -353,7 +356,7 @@ scissor_uptodate: goto viewport_uptodate; nv50->state.viewport_bypass = bypass; - so = so_new(14, 0); + so = so_new(5, 9, 0); if (!bypass) { so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3); so_data (so, fui(nv50->viewport.translate[0])); @@ -397,7 +400,8 @@ viewport_uptodate: for (i = 0; i < PIPE_SHADER_TYPES; ++i) nr += nv50->sampler_nr[i]; - so = so_new(nr * 8 + 24 * PIPE_SHADER_TYPES + 2, 4); + so = so_new(1+ 5 * PIPE_SHADER_TYPES, 1+ 19 * PIPE_SHADER_TYPES + + nr * 8, PIPE_SHADER_TYPES * 2); nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX); nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT); diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index c4ca096d6a..bef548b728 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -199,16 +199,18 @@ nv50_tex_validate(struct nv50_context *nv50) { struct nouveau_stateobj *so; struct nouveau_grobj *tesla = nv50->screen->tesla; - unsigned p, push, nrlc; + unsigned p, start, push, nrlc; - for (nrlc = 0, push = 0, p = 0; p < PIPE_SHADER_TYPES; ++p) { + for (nrlc = 0, start = 0, push = 0, p = 0; p < PIPE_SHADER_TYPES; ++p) { + start += MAX2(nv50->miptree_nr[p], nv50->state.miptree_nr[p]); push += MAX2(nv50->miptree_nr[p], nv50->state.miptree_nr[p]); nrlc += nv50->miptree_nr[p]; } - push = push * 11 + 23 * PIPE_SHADER_TYPES + 4; + start = start * 2 + 4 * PIPE_SHADER_TYPES + 2; + push = push * 9 + 19 * PIPE_SHADER_TYPES + 2; nrlc = nrlc * 2 + 2 * PIPE_SHADER_TYPES; - so = so_new(push, nrlc); + so = so_new(start, push, nrlc); if (nv50_validate_textures(nv50, so, PIPE_SHADER_VERTEX) == FALSE || nv50_validate_textures(nv50, so, PIPE_SHADER_FRAGMENT) == FALSE) { diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 602adfc50d..f2e510fba6 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -152,7 +152,7 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve) return (hw_type | hw_size); } -boolean +void nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { @@ -182,7 +182,9 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); - return ret; + /* XXX: not sure what to do if ret != TRUE: flush and retry? + */ + assert(ret); } static INLINE boolean @@ -275,7 +277,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, return TRUE; } -boolean +void nv50_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) @@ -317,8 +319,10 @@ nv50_draw_elements(struct pipe_context *pipe, OUT_RING (chan, 0); pipe_buffer_unmap(pscreen, indexBuffer); - - return ret; + + /* XXX: what to do if ret != TRUE? Flush and retry? + */ + assert(ret); } static INLINE boolean @@ -350,7 +354,7 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib, so = *pso; if (!so) - *pso = so = so_new(nv50->vtxelt_nr * 5, 0); + *pso = so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 4, 0); switch (ve->nr_components) { case 4: @@ -411,8 +415,8 @@ nv50_vbo_validate(struct nv50_context *nv50) n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr); vtxattr = NULL; - vtxbuf = so_new(n_ve * 7, nv50->vtxelt_nr * 4); - vtxfmt = so_new(n_ve + 1, 0); + vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt_nr * 4); + vtxfmt = so_new(1, n_ve, 0); so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve); for (i = 0; i < nv50->vtxelt_nr; i++) { diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index ffe066d536..c14414fff6 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -27,9 +27,9 @@ static void r300_blitter_save_states(struct r300_context* r300) { - util_blitter_save_blend(r300->blitter, r300->blend_state); - util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state); - util_blitter_save_rasterizer(r300->blitter, r300->rs_state); + util_blitter_save_blend(r300->blitter, r300->blend_state.state); + util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state.state); + util_blitter_save_rasterizer(r300->blitter, r300->rs_state.state); util_blitter_save_fragment_shader(r300->blitter, r300->fs); util_blitter_save_vertex_shader(r300->blitter, r300->vs); } diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 51fdb82ff3..92de297ef1 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -33,6 +33,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) /* Reasonable defaults */ caps->num_vert_fpus = 4; caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; + caps->is_r400 = FALSE; caps->is_r500 = FALSE; caps->high_second_pipe = FALSE; @@ -123,6 +124,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4A54: caps->family = CHIP_FAMILY_R420; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5548: @@ -136,6 +138,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D57: caps->family = CHIP_FAMILY_R423; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x554C: @@ -147,6 +150,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D4A: caps->family = CHIP_FAMILY_R430; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5D4C: @@ -157,6 +161,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D52: caps->family = CHIP_FAMILY_R480; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x4B48: @@ -166,6 +171,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4B4C: caps->family = CHIP_FAMILY_R481; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5E4C: @@ -182,6 +188,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5E4D: caps->family = CHIP_FAMILY_RV410; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5954: @@ -212,6 +219,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x791F: caps->family = CHIP_FAMILY_RS690; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x793F: @@ -219,6 +227,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x7942: caps->family = CHIP_FAMILY_RS600; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x796C: @@ -227,6 +236,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x796F: caps->family = CHIP_FAMILY_RS740; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x7100: diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 0633a8b8a7..2808486492 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -40,11 +40,18 @@ struct r300_capabilities { unsigned num_z_pipes; /* Whether or not TCL is physically present */ boolean has_tcl; + /* Whether or not this is R400. The differences compared to their R3xx + * cousins are: + * - Extended fragment shader registers + * - Blend LTE/GTE thresholds */ + boolean is_r400; /* Whether or not this is an RV515 or newer; R500s have many differences * that require extra consideration, compared to their R3xx cousins: * - Extra bit of width and height on texture sizes * - Blend color is split across two registers - * - Universal Shader (US) block used for fragment shaders */ + * - Blend LTE/GTE thresholds + * - Universal Shader (US) block used for fragment shaders + * - FP16 blending and multisampling */ boolean is_r500; /* Whether or not the second pixel pipe is accessed with the high bit */ boolean high_second_pipe; diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index d5c2d63d39..5e4f6552c3 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -30,6 +30,7 @@ #include "r300_blit.h" #include "r300_context.h" +#include "r300_emit.h" #include "r300_flush.h" #include "r300_query.h" #include "r300_render.h" @@ -69,11 +70,13 @@ static void r300_destroy_context(struct pipe_context* context) FREE(query); } - FREE(r300->blend_color_state); + FREE(r300->blend_color_state.state); + FREE(r300->clip_state.state); FREE(r300->rs_block); - FREE(r300->scissor_state); + FREE(r300->scissor_state.state); FREE(r300->vertex_info); - FREE(r300->viewport_state); + FREE(r300->viewport_state.state); + FREE(r300->ztop_state.state); FREE(r300); } @@ -107,6 +110,35 @@ static void r300_flush_cb(void *data) cs_context_copy->context.flush(&cs_context_copy->context, 0, NULL); } +#define R300_INIT_ATOM(atomname, atomsize) \ + r300->atomname##_state.name = #atomname; \ + r300->atomname##_state.state = NULL; \ + r300->atomname##_state.size = atomsize; \ + r300->atomname##_state.emit = r300_emit_##atomname##_state; \ + r300->atomname##_state.dirty = FALSE; \ + insert_at_tail(&r300->atom_list, &r300->atomname##_state); + +static void r300_setup_atoms(struct r300_context* r300) +{ + /* Create the actual atom list. + * + * Each atom is examined and emitted in the order it appears here, which + * can affect performance and conformance if not handled with care. + * + * Some atoms never change size, others change every emit. This is just + * an upper bound on each atom, to keep the emission machinery from + * underallocating space. */ + make_empty_list(&r300->atom_list); + R300_INIT_ATOM(ztop, 2); + R300_INIT_ATOM(blend, 8); + R300_INIT_ATOM(blend_color, 3); + R300_INIT_ATOM(clip, 29); + R300_INIT_ATOM(dsa, 8); + R300_INIT_ATOM(rs, 22); + R300_INIT_ATOM(scissor, 3); + R300_INIT_ATOM(viewport, 9); +} + struct pipe_context* r300_create_context(struct pipe_screen* screen, struct radeon_winsys* radeon_winsys) { @@ -155,11 +187,15 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->shader_hash_table = util_hash_table_create(r300_shader_key_hash, r300_shader_key_compare); - r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state); + r300_setup_atoms(r300); + + r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); + r300->clip_state.state = CALLOC_STRUCT(pipe_clip_state); r300->rs_block = CALLOC_STRUCT(r300_rs_block); - r300->scissor_state = CALLOC_STRUCT(r300_scissor_state); + r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); r300->vertex_info = CALLOC_STRUCT(r300_vertex_info); - r300->viewport_state = CALLOC_STRUCT(r300_viewport_state); + r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); + r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); /* Open up the OQ BO. */ r300->oqbo = screen->buffer_create(screen, 4096, diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 232530b7dc..682b9179c8 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -30,9 +30,28 @@ #include "pipe/p_context.h" #include "pipe/p_inlines.h" +struct r300_context; + struct r300_fragment_shader; struct r300_vertex_shader; +struct r300_atom { + /* List pointers. */ + struct r300_atom *prev, *next; + /* Name, for debugging. */ + const char* name; + /* Opaque state. */ + void* state; + /* Emit the state to the context. */ + void (*emit)(struct r300_context*, void*); + /* Upper bound on number of dwords to emit. */ + unsigned size; + /* Whether this atom should be emitted. */ + boolean dirty; + /* Another dirty flag that is never automatically cleared. */ + boolean always_dirty; +}; + struct r300_blend_state { uint32_t blend_control; /* R300_RB3D_CBLEND: 0x4e04 */ uint32_t alpha_blend_control; /* R300_RB3D_ABLEND: 0x4e08 */ @@ -62,11 +81,6 @@ struct r300_rs_state { /* Draw-specific rasterizer state */ struct pipe_rasterizer_state rs; - /* Whether or not to enable the VTE. This is referenced at the very - * last moment during emission of VTE state, to decide whether or not - * the VTE should be used for transformation. */ - boolean enable_vte; - uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */ @@ -102,19 +116,6 @@ struct r300_sampler_state { unsigned min_lod, max_lod; }; -struct r300_scissor_regs { - uint32_t top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */ - uint32_t bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */ - - /* Whether everything is culled by scissoring. */ - boolean empty_area; -}; - -struct r300_scissor_state { - struct r300_scissor_regs framebuffer; - struct r300_scissor_regs scissor; -}; - struct r300_texture_state { uint32_t format0; /* R300_TX_FORMAT0: 0x4480 */ uint32_t format1; /* R300_TX_FORMAT1: 0x44c0 */ @@ -135,24 +136,17 @@ struct r300_ztop_state { uint32_t z_buffer_top; /* R300_ZB_ZTOP: 0x4f14 */ }; -#define R300_NEW_BLEND 0x00000001 -#define R300_NEW_BLEND_COLOR 0x00000002 -#define R300_NEW_CLIP 0x00000004 -#define R300_NEW_DSA 0x00000008 #define R300_NEW_FRAMEBUFFERS 0x00000010 #define R300_NEW_FRAGMENT_SHADER 0x00000020 #define R300_NEW_FRAGMENT_SHADER_CONSTANTS 0x00000040 -#define R300_NEW_RASTERIZER 0x00000080 #define R300_NEW_RS_BLOCK 0x00000100 #define R300_NEW_SAMPLER 0x00000200 #define R300_ANY_NEW_SAMPLERS 0x0001fe00 -#define R300_NEW_SCISSOR 0x00020000 #define R300_NEW_TEXTURE 0x00040000 #define R300_ANY_NEW_TEXTURES 0x03fc0000 #define R300_NEW_VERTEX_FORMAT 0x04000000 #define R300_NEW_VERTEX_SHADER 0x08000000 #define R300_NEW_VERTEX_SHADER_CONSTANTS 0x10000000 -#define R300_NEW_VIEWPORT 0x20000000 #define R300_NEW_QUERY 0x40000000 #define R300_NEW_KITCHEN_SINK 0x7fffffff @@ -194,6 +188,12 @@ struct r300_query { struct r300_query* next; }; +enum r300_buffer_tiling { + R300_BUFFER_LINEAR = 0, + R300_BUFFER_TILED, + R300_BUFFER_SQUARETILED +}; + struct r300_texture { /* Parent class */ struct pipe_texture tex; @@ -230,6 +230,9 @@ struct r300_texture { /* Registers carrying texture format data. */ struct r300_texture_state state; + + /* Buffer tiling */ + enum r300_buffer_tiling microtile, macrotile; }; struct r300_vertex_info { @@ -273,38 +276,40 @@ struct r300_context { struct r300_vertex_info* vertex_info; /* Various CSO state objects. */ + /* Beginning of atom list. */ + struct r300_atom atom_list; /* Blend state. */ - struct r300_blend_state* blend_state; + struct r300_atom blend_state; /* Blend color state. */ - struct r300_blend_color_state* blend_color_state; + struct r300_atom blend_color_state; /* User clip planes. */ - struct pipe_clip_state clip_state; + struct r300_atom clip_state; /* Shader constants. */ struct r300_constant_buffer shader_constants[PIPE_SHADER_TYPES]; /* Depth, stencil, and alpha state. */ - struct r300_dsa_state* dsa_state; + struct r300_atom dsa_state; /* Fragment shader. */ struct r300_fragment_shader* fs; /* Framebuffer state. We currently don't need our own version of this. */ struct pipe_framebuffer_state framebuffer_state; /* Rasterizer state. */ - struct r300_rs_state* rs_state; + struct r300_atom rs_state; /* RS block state. */ struct r300_rs_block* rs_block; /* Sampler states. */ struct r300_sampler_state* sampler_states[8]; int sampler_count; /* Scissor state. */ - struct r300_scissor_state* scissor_state; + struct r300_atom scissor_state; /* Texture states. */ struct r300_texture* textures[8]; int texture_count; /* Vertex shader. */ struct r300_vertex_shader* vs; /* Viewport state. */ - struct r300_viewport_state* viewport_state; + struct r300_atom viewport_state; /* ZTOP state. */ - struct r300_ztop_state ztop_state; + struct r300_atom ztop_state; /* Vertex buffers for Gallium. */ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; @@ -317,6 +322,8 @@ struct r300_context { uint32_t dirty_state; /* Flag indicating whether or not the HW is dirty. */ uint32_t dirty_hw; + /* Whether the TCL engine should be in bypass mode. */ + boolean tcl_bypass; /** Combination of DBG_xxx flags */ unsigned debug; diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index d142fee050..151f72b0fe 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -52,7 +52,7 @@ #define CS_LOCALS(context) \ struct r300_context* const cs_context_copy = (context); \ struct radeon_winsys* cs_winsys = cs_context_copy->winsys; \ - int cs_count = 0; + int cs_count = 0; (void) cs_count; #define CHECK_CS(size) \ assert(cs_winsys->check_cs(cs_winsys, (size))) diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 1dc9216a7b..9f93327e59 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -25,6 +25,7 @@ #include "util/u_format.h" #include "util/u_math.h" +#include "util/u_simple_list.h" #include "r300_context.h" #include "r300_cs.h" @@ -36,11 +37,13 @@ #include "r300_texture.h" #include "r300_vs.h" -void r300_emit_blend_state(struct r300_context* r300, - struct r300_blend_state* blend) +void r300_emit_blend_state(struct r300_context* r300, void* state) { + struct r300_blend_state* blend = (struct r300_blend_state*)state; CS_LOCALS(r300); + BEGIN_CS(8); + OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3); if (r300->framebuffer_state.nr_cbufs) { OUT_CS(blend->blend_control); @@ -52,14 +55,13 @@ void r300_emit_blend_state(struct r300_context* r300, OUT_CS(0); /* XXX also disable fastfill here once it's supported */ } - OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); OUT_CS_REG(R300_RB3D_DITHER_CTL, blend->dither); END_CS; } -void r300_emit_blend_color_state(struct r300_context* r300, - struct r300_blend_color_state* bc) +void r300_emit_blend_color_state(struct r300_context* r300, void* state) { + struct r300_blend_color_state* bc = (struct r300_blend_color_state*)state; struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); @@ -76,9 +78,9 @@ void r300_emit_blend_color_state(struct r300_context* r300, } } -void r300_emit_clip_state(struct r300_context* r300, - struct pipe_clip_state* clip) +void r300_emit_clip_state(struct r300_context* r300, void* state) { + struct pipe_clip_state* clip = (struct pipe_clip_state*)state; int i; struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); @@ -106,13 +108,13 @@ void r300_emit_clip_state(struct r300_context* r300, } -void r300_emit_dsa_state(struct r300_context* r300, - struct r300_dsa_state* dsa) +void r300_emit_dsa_state(struct r300_context* r300, void* state) { + struct r300_dsa_state* dsa = (struct r300_dsa_state*)state; struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); - BEGIN_CS(r300screen->caps->is_r500 ? 10 : 8); + BEGIN_CS(r300screen->caps->is_r500 ? 8 : 6); OUT_CS_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); /* not needed since we use the 8bit alpha ref */ @@ -121,10 +123,16 @@ void r300_emit_dsa_state(struct r300_context* r300, }*/ OUT_CS_REG_SEQ(R300_ZB_CNTL, 3); - OUT_CS(dsa->z_buffer_control); - OUT_CS(dsa->z_stencil_control); + + if (r300->framebuffer_state.zsbuf) { + OUT_CS(dsa->z_buffer_control); + OUT_CS(dsa->z_stencil_control); + } else { + OUT_CS(0); + OUT_CS(0); + } + OUT_CS(dsa->stencil_ref_mask); - OUT_CS_REG(R300_ZB_ZTOP, r300->ztop_state.z_buffer_top); /* XXX it seems r3xx doesn't support STENCILREFMASK_BF */ if (r300screen->caps->is_r500) { @@ -138,6 +146,8 @@ static const float * get_shader_constant( struct rc_constant * constant, struct r300_constant_buffer * externals) { + struct r300_viewport_state* viewport = + (struct r300_viewport_state*)r300->viewport_state.state; static float vec[4] = { 0.0, 0.0, 0.0, 1.0 }; struct pipe_texture *tex; @@ -160,11 +170,31 @@ static const float * get_shader_constant( /* Texture compare-fail value. */ /* XXX Since Gallium doesn't support GL_ARB_shadow_ambient, - * this is always (0,0,0,0). */ + * this is always (0,0,0,0), right? */ case RC_STATE_SHADOW_AMBIENT: vec[3] = 0; break; + case RC_STATE_R300_VIEWPORT_SCALE: + if (r300->tcl_bypass) { + vec[0] = 1; + vec[1] = 1; + vec[2] = 1; + } else { + vec[0] = viewport->xscale; + vec[1] = viewport->yscale; + vec[2] = viewport->zscale; + } + break; + + case RC_STATE_R300_VIEWPORT_OFFSET: + if (!r300->tcl_bypass) { + vec[0] = viewport->xoffset; + vec[1] = viewport->yoffset; + vec[2] = viewport->zoffset; + } + break; + default: debug_printf("r300: Implementation error: " "Unknown RC_CONSTANT type %d\n", constant->u.State[0]); @@ -283,6 +313,22 @@ void r300_emit_fs_constant_buffer(struct r300_context* r300, END_CS; } +static void r300_emit_fragment_depth_config(struct r300_context* r300, + struct r300_fragment_shader* fs) +{ + CS_LOCALS(r300); + + BEGIN_CS(4); + if (r300_fragment_shader_writes_depth(fs)) { + OUT_CS_REG(R300_FG_DEPTH_SRC, R300_FG_DEPTH_SRC_SHADER); + OUT_CS_REG(R300_US_W_FMT, R300_W_FMT_W24 | R300_W_SRC_US); + } else { + OUT_CS_REG(R300_FG_DEPTH_SRC, R300_FG_DEPTH_SRC_SCAN); + OUT_CS_REG(R300_US_W_FMT, R300_W_FMT_W0 | R300_W_SRC_US); + } + END_CS; +} + void r500_emit_fragment_program_code(struct r300_context* r300, struct rX00_fragment_program_code* generic_code) { @@ -374,8 +420,10 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] | - r300_translate_colorformat(tex->tex.format), 0, - RADEON_GEM_DOMAIN_VRAM, 0); + r300_translate_colorformat(tex->tex.format) | + R300_COLOR_TILE(tex->macrotile) | + R300_COLOR_MICROTILE(tex->microtile), + 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), r300_translate_out_fmt(surf->format)); @@ -398,8 +446,10 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG(R300_ZB_FORMAT, r300_translate_zsformat(tex->tex.format)); OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level], 0, - RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] | + R300_DEPTHMACROTILE(tex->macrotile) | + R300_DEPTHMICROTILE(tex->microtile), + 0, RADEON_GEM_DOMAIN_VRAM, 0); } END_CS; @@ -531,8 +581,9 @@ void r300_emit_query_end(struct r300_context* r300) r300_emit_query_finish(r300, query); } -void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs) +void r300_emit_rs_state(struct r300_context* r300, void* state) { + struct r300_rs_state* rs = (struct r300_rs_state*)state; CS_LOCALS(r300); BEGIN_CS(22); @@ -595,26 +646,47 @@ void r300_emit_rs_block_state(struct r300_context* r300, END_CS; } -static void r300_emit_scissor_regs(struct r300_context* r300, - struct r300_scissor_regs* scissor) +void r300_emit_scissor_state(struct r300_context* r300, void* state) { + unsigned minx, miny, maxx, maxy; + uint32_t top_left, bottom_right; + struct r300_screen* r300screen = r300_screen(r300->context.screen); + struct pipe_scissor_state* scissor = (struct pipe_scissor_state*)state; CS_LOCALS(r300); - BEGIN_CS(3); - OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); - OUT_CS(scissor->top_left); - OUT_CS(scissor->bottom_right); - END_CS; -} + minx = miny = 0; + maxx = r300->framebuffer_state.width; + maxy = r300->framebuffer_state.height; -void r300_emit_scissor_state(struct r300_context* r300, - struct r300_scissor_state* scissor) -{ - if (r300->rs_state->rs.scissor) { - r300_emit_scissor_regs(r300, &scissor->scissor); + if (((struct r300_rs_state*)r300->rs_state.state)->rs.scissor) { + minx = MAX2(minx, scissor->minx); + miny = MAX2(miny, scissor->miny); + maxx = MIN2(maxx, scissor->maxx); + maxy = MIN2(maxy, scissor->maxy); + } + + if (r300screen->caps->is_r500) { + top_left = + (minx << R300_SCISSORS_X_SHIFT) | + (miny << R300_SCISSORS_Y_SHIFT); + bottom_right = + ((maxx - 1) << R300_SCISSORS_X_SHIFT) | + ((maxy - 1) << R300_SCISSORS_Y_SHIFT); } else { - r300_emit_scissor_regs(r300, &scissor->framebuffer); + /* Offset of 1440 in non-R500 chipsets. */ + top_left = + ((minx + 1440) << R300_SCISSORS_X_SHIFT) | + ((miny + 1440) << R300_SCISSORS_Y_SHIFT); + bottom_right = + (((maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | + (((maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); } + + BEGIN_CS(3); + OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); + OUT_CS(top_left); + OUT_CS(bottom_right); + END_CS; } void r300_emit_texture(struct r300_context* r300, @@ -650,8 +722,10 @@ void r300_emit_texture(struct r300_context* r300, OUT_CS_REG(R300_TX_FORMAT1_0 + (offset * 4), tex->state.format1); OUT_CS_REG(R300_TX_FORMAT2_0 + (offset * 4), tex->state.format2); OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (offset * 4), 1); - OUT_CS_RELOC(tex->buffer, 0, - RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); + OUT_CS_RELOC(tex->buffer, + R300_TXO_MACRO_TILE(tex->macrotile) | + R300_TXO_MICRO_TILE(tex->microtile), + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); END_CS; } @@ -717,32 +791,6 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) END_CS; } -#if 0 -void r300_emit_draw_packet(struct r300_context* r300) -{ - CS_LOCALS(r300); - - DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, " - "vertex size %d\n", r300->vbo, - r300->vertex_info->vinfo.size); - /* Set the pointer to our vertex buffer. The emitted values are this: - * PACKET3 [3D_LOAD_VBPNTR] - * COUNT [1] - * FORMAT [size | stride << 8] - * OFFSET [offset into BO] - * VBPNTR [relocated BO] - */ - BEGIN_CS(7); - OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3); - OUT_CS(1); - OUT_CS(r300->vertex_info->vinfo.size | - (r300->vertex_info->vinfo.size << 8)); - OUT_CS(r300->vbo_offset); - OUT_CS_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); - END_CS; -} -#endif - void r300_emit_vertex_format_state(struct r300_context* r300) { int i; @@ -867,26 +915,27 @@ void r300_emit_vs_constant_buffer(struct r300_context* r300, END_CS; } -void r300_emit_viewport_state(struct r300_context* r300, - struct r300_viewport_state* viewport) +void r300_emit_viewport_state(struct r300_context* r300, void* state) { + struct r300_viewport_state* viewport = (struct r300_viewport_state*)state; CS_LOCALS(r300); - BEGIN_CS(9); - OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); - OUT_CS_32F(viewport->xscale); - OUT_CS_32F(viewport->xoffset); - OUT_CS_32F(viewport->yscale); - OUT_CS_32F(viewport->yoffset); - OUT_CS_32F(viewport->zscale); - OUT_CS_32F(viewport->zoffset); - - if (r300->rs_state->enable_vte) { - OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); - } else { + if (r300->tcl_bypass) { + BEGIN_CS(2); OUT_CS_REG(R300_VAP_VTE_CNTL, 0); + END_CS; + } else { + BEGIN_CS(9); + OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); + OUT_CS_32F(viewport->xscale); + OUT_CS_32F(viewport->xoffset); + OUT_CS_32F(viewport->yscale); + OUT_CS_32F(viewport->yoffset); + OUT_CS_32F(viewport->zscale); + OUT_CS_32F(viewport->zoffset); + OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); + END_CS; } - END_CS; } void r300_emit_texture_count(struct r300_context* r300) @@ -910,6 +959,16 @@ void r300_emit_texture_count(struct r300_context* r300) } +void r300_emit_ztop_state(struct r300_context* r300, void* state) +{ + struct r300_ztop_state* ztop = (struct r300_ztop_state*)state; + CS_LOCALS(r300); + + BEGIN_CS(2); + OUT_CS_REG(R300_ZB_ZTOP, ztop->z_buffer_top); + END_CS; +} + void r300_flush_textures(struct r300_context* r300) { CS_LOCALS(r300); @@ -933,18 +992,24 @@ void r300_emit_dirty_state(struct r300_context* r300) { struct r300_screen* r300screen = r300_screen(r300->context.screen); struct r300_texture* tex; - int i, dirty_tex = 0; + struct r300_atom* atom; + unsigned i, dwords = 1024; + int dirty_tex = 0; boolean invalid = FALSE; - if (!(r300->dirty_state)) { - return; + /* Check the required number of dwords against the space remaining in the + * current CS object. If we need more, then flush. */ + + foreach(atom, &r300->atom_list) { + if (atom->dirty || atom->always_dirty) { + dwords += atom->size; + } } - /* Check size of CS. */ - /* Make sure we have at least 8*1024 spare dwords. */ + /* Make sure we have at least 2*1024 spare dwords. */ /* XXX It would be nice to know the number of dwords we really need to * XXX emit. */ - if (!r300->winsys->check_cs(r300->winsys, 8*1024)) { + if (!r300->winsys->check_cs(r300->winsys, dwords)) { r300->context.flush(&r300->context, 0, NULL); } @@ -984,10 +1049,12 @@ validate: } } /* ...occlusion query buffer... */ - if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, - 0, RADEON_GEM_DOMAIN_GTT)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; + if (r300->dirty_state & R300_NEW_QUERY) { + if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, + 0, RADEON_GEM_DOMAIN_GTT)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } } /* ...and vertex buffer. */ if (r300->vbo) { @@ -1015,27 +1082,15 @@ validate: r300->dirty_state &= ~R300_NEW_QUERY; } - if (r300->dirty_state & R300_NEW_BLEND) { - r300_emit_blend_state(r300, r300->blend_state); - r300->dirty_state &= ~R300_NEW_BLEND; - } - - if (r300->dirty_state & R300_NEW_BLEND_COLOR) { - r300_emit_blend_color_state(r300, r300->blend_color_state); - r300->dirty_state &= ~R300_NEW_BLEND_COLOR; - } - - if (r300->dirty_state & R300_NEW_CLIP) { - r300_emit_clip_state(r300, &r300->clip_state); - r300->dirty_state &= ~R300_NEW_CLIP; - } - - if (r300->dirty_state & R300_NEW_DSA) { - r300_emit_dsa_state(r300, r300->dsa_state); - r300->dirty_state &= ~R300_NEW_DSA; + foreach(atom, &r300->atom_list) { + if (atom->dirty || atom->always_dirty) { + atom->emit(r300, atom->state); + atom->dirty = FALSE; + } } if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER) { + r300_emit_fragment_depth_config(r300, r300->fs); if (r300screen->caps->is_r500) { r500_emit_fragment_program_code(r300, &r300->fs->shader->code); } else { @@ -1060,21 +1115,11 @@ validate: r300->dirty_state &= ~R300_NEW_FRAMEBUFFERS; } - if (r300->dirty_state & R300_NEW_RASTERIZER) { - r300_emit_rs_state(r300, r300->rs_state); - r300->dirty_state &= ~R300_NEW_RASTERIZER; - } - if (r300->dirty_state & R300_NEW_RS_BLOCK) { r300_emit_rs_block_state(r300, r300->rs_block); r300->dirty_state &= ~R300_NEW_RS_BLOCK; } - if (r300->dirty_state & R300_NEW_SCISSOR) { - r300_emit_scissor_state(r300, r300->scissor_state); - r300->dirty_state &= ~R300_NEW_SCISSOR; - } - /* Samplers and textures are tracked separately but emitted together. */ if (r300->dirty_state & (R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES)) { @@ -1096,11 +1141,6 @@ validate: r300->dirty_state &= ~(R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES); } - if (r300->dirty_state & R300_NEW_VIEWPORT) { - r300_emit_viewport_state(r300, r300->viewport_state); - r300->dirty_state &= ~R300_NEW_VIEWPORT; - } - if (dirty_tex) { r300_flush_textures(r300); } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 3797d3d332..05a6bfeae8 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -31,17 +31,13 @@ struct r300_vertex_program_code; void r300_emit_aos(struct r300_context* r300, unsigned offset); -void r300_emit_blend_state(struct r300_context* r300, - struct r300_blend_state* blend); +void r300_emit_blend_state(struct r300_context* r300, void* state); -void r300_emit_blend_color_state(struct r300_context* r300, - struct r300_blend_color_state* bc); +void r300_emit_blend_color_state(struct r300_context* r300, void* state); -void r300_emit_clip_state(struct r300_context* r300, - struct pipe_clip_state* clip); +void r300_emit_clip_state(struct r300_context* r300, void* state); -void r300_emit_dsa_state(struct r300_context* r300, - struct r300_dsa_state* dsa); +void r300_emit_dsa_state(struct r300_context* r300, void* state); void r300_emit_fragment_program_code(struct r300_context* r300, struct rX00_fragment_program_code* generic_code); @@ -63,13 +59,12 @@ void r300_emit_query_begin(struct r300_context* r300, void r300_emit_query_end(struct r300_context* r300); -void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs); +void r300_emit_rs_state(struct r300_context* r300, void* state); void r300_emit_rs_block_state(struct r300_context* r300, struct r300_rs_block* rs); -void r300_emit_scissor_state(struct r300_context* r300, - struct r300_scissor_state* scissor); +void r300_emit_scissor_state(struct r300_context* r300, void* state); void r300_emit_texture(struct r300_context* r300, struct r300_sampler_state* sampler, @@ -89,11 +84,12 @@ void r300_emit_vs_constant_buffer(struct r300_context* r300, void r300_emit_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs); -void r300_emit_viewport_state(struct r300_context* r300, - struct r300_viewport_state* viewport); +void r300_emit_viewport_state(struct r300_context* r300, void* state); void r300_emit_texture_count(struct r300_context* r300); +void r300_emit_ztop_state(struct r300_context* r300, void* state); + void r300_flush_textures(struct r300_context* r300); /* Emit all dirty state. */ diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 14a08241fc..c78a7673a3 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -37,6 +37,7 @@ static void r300_flush(struct pipe_context* pipe, { struct r300_context *r300 = r300_context(pipe); struct r300_query *query; + struct r300_atom *atom; CS_LOCALS(r300); /* We probably need to flush Draw, but we may have been called from @@ -54,7 +55,15 @@ static void r300_flush(struct pipe_context* pipe, r300_emit_invariant_state(r300); r300->dirty_state = R300_NEW_KITCHEN_SINK; r300->dirty_hw = 0; + + /* New kitchen sink, baby. */ + foreach(atom, &r300->atom_list) { + if (atom->state) { + atom->dirty = TRUE; + } + } } + /* reset flushed query */ foreach(query, &r300->query_list) { query->flushed = TRUE; diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 4e1b61ca40..60ea9c171d 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -63,6 +63,11 @@ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, fs_inputs->fog = i; break; + case TGSI_SEMANTIC_POSITION: + assert(index == 0); + fs_inputs->wpos = i; + break; + default: assert(0); } @@ -114,6 +119,9 @@ static void allocate_hardware_inputs( if (inputs->fog != ATTR_UNUSED) { allocate(mydata, inputs->fog, reg++); } + if (inputs->wpos != ATTR_UNUSED) { + allocate(mydata, inputs->wpos, reg++); + } } static void get_compare_state( @@ -144,6 +152,7 @@ static void r300_translate_fragment_shader( struct r300_fragment_shader* fs = r300->fs; struct r300_fragment_program_compiler compiler; struct tgsi_to_rc ttr; + int wpos = fs->inputs.wpos; /* Setup the compiler. */ memset(&compiler, 0, sizeof(compiler)); @@ -171,6 +180,18 @@ static void r300_translate_fragment_shader( fs->shadow_samplers = compiler.Base.Program.ShadowSamplers; + /** + * Transform the program to support WPOS. + * + * Introduce a small fragment at the start of the program that will be + * the only code that directly reads the WPOS input. + * All other code pieces that reference that input will be rewritten + * to read from a newly allocated temporary. */ + if (wpos != ATTR_UNUSED) { + /* Moving the input to some other reg is not really necessary. */ + rc_transform_fragment_wpos(&compiler.Base, wpos, wpos, TRUE); + } + /* Invoke the compiler */ r3xx_compile_fragment_program(&compiler); if (compiler.Base.Error) { diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 0aa1da07f8..361813891f 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -1619,18 +1619,20 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_TX_OFFSET_5 0x4554 #define R300_TX_OFFSET_6 0x4558 #define R300_TX_OFFSET_7 0x455C - /* BEGIN: Guess from R200 */ + # define R300_TXO_ENDIAN_NO_SWAP (0 << 0) # define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0) # define R300_TXO_ENDIAN_WORD_SWAP (2 << 0) # define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0) -# define R300_TXO_MACRO_TILE (1 << 2) +# define R300_TXO_MACRO_TILE_LINEAR (0 << 2) +# define R300_TXO_MACRO_TILE_TILED (1 << 2) +# define R300_TXO_MACRO_TILE(x) ((x) << 2) # define R300_TXO_MICRO_TILE_LINEAR (0 << 3) -# define R300_TXO_MICRO_TILE (1 << 3) -# define R300_TXO_MICRO_TILE_SQUARE (2 << 3) +# define R300_TXO_MICRO_TILE_TILED (1 << 3) +# define R300_TXO_MICRO_TILE_TILED_SQUARE (2 << 3) +# define R300_TXO_MICRO_TILE(x) ((x) << 3) # define R300_TXO_OFFSET_MASK 0xffffffe0 # define R300_TXO_OFFSET_SHIFT 5 - /* END: Guess from R200 */ /* 32 bit chroma key */ #define R300_TX_CHROMA_KEY_0 0x4580 @@ -2186,6 +2188,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1 (4 << 3) # define R300_DISCARD_SRC_PIXELS_SRC_COLOR_1 (5 << 3) # define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1 (6 << 3) +# define R500_SRC_ALPHA_0_NO_READ (1 << 30) +# define R500_SRC_ALPHA_1_NO_READ (1 << 31) /* the following are shared between CBLEND and ABLEND */ # define R300_FCN_MASK (3 << 12) @@ -2281,9 +2285,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_COLORPITCH_MASK 0x00003FFE # define R300_COLOR_TILE_DISABLE (0 << 16) # define R300_COLOR_TILE_ENABLE (1 << 16) +# define R300_COLOR_TILE(x) ((x) << 16) # define R300_COLOR_MICROTILE_DISABLE (0 << 17) # define R300_COLOR_MICROTILE_ENABLE (1 << 17) # define R300_COLOR_MICROTILE_ENABLE_SQUARE (2 << 17) /* Only available in 16-bit */ +# define R300_COLOR_MICROTILE(x) ((x) << 17) # define R300_COLOR_ENDIAN_NO_SWAP (0 << 19) # define R300_COLOR_ENDIAN_WORD_SWAP (1 << 19) # define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 19) @@ -2542,9 +2548,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_DEPTHPITCH_MASK 0x00003FFC # define R300_DEPTHMACROTILE_DISABLE (0 << 16) # define R300_DEPTHMACROTILE_ENABLE (1 << 16) +# define R300_DEPTHMACROTILE(x) ((x) << 16) # define R300_DEPTHMICROTILE_LINEAR (0 << 17) # define R300_DEPTHMICROTILE_TILED (1 << 17) # define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17) +# define R300_DEPTHMICROTILE(x) ((x) << 17) # define R300_DEPTHENDIAN_NO_SWAP (0 << 18) # define R300_DEPTHENDIAN_WORD_SWAP (1 << 18) # define R300_DEPTHENDIAN_DWORD_SWAP (2 << 18) diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index a89cb633e0..710d850163 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -26,6 +26,8 @@ #include "draw/draw_context.h" #include "draw/draw_vbuf.h" +#include "indices/u_indices.h" + #include "pipe/p_inlines.h" #include "util/u_memory.h" @@ -69,16 +71,11 @@ uint32_t r300_translate_primitive(unsigned prim) } } -static boolean r300_nothing_to_draw(struct r300_context *r300) -{ - return r300->rs_state->rs.scissor && - r300->scissor_state->scissor.empty_area; -} - static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, unsigned mode) { - uint32_t color_control = r300->rs_state->color_control; + struct r300_rs_state* rs = (struct r300_rs_state*)r300->rs_state.state; + uint32_t color_control = rs->color_control; /* By default (see r300_state.c:r300_create_rs_state) color_control is * initialized to provoking the first vertex. @@ -98,7 +95,7 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, * ~ C. */ - if (r300->rs_state->rs.flatshade_first) { + if (rs->rs.flatshade_first) { switch (mode) { case PIPE_PRIM_TRIANGLE_FAN: color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND; @@ -119,6 +116,44 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, return color_control; } +static void r300_emit_draw_immediate(struct r300_context *r300, + unsigned mode, + unsigned start, + unsigned count) +{ + struct pipe_buffer* vbo = r300->vertex_buffer[0].buffer; + unsigned vertex_size = r300->vertex_buffer[0].stride / sizeof(float); + unsigned i; + uint32_t* map; + CS_LOCALS(r300); + + map = (uint32_t*)pipe_buffer_map_range(r300->context.screen, vbo, + start * vertex_size, count * vertex_size, + PIPE_BUFFER_USAGE_CPU_READ); + + BEGIN_CS(10 + count * vertex_size); + OUT_CS_REG(R300_GA_COLOR_CONTROL, + r300_provoking_vertex_fixes(r300, mode)); + OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); + OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | + r300_translate_primitive(mode)); + //debug_printf("r300: Immd %d verts, %d attrs\n", count, vertex_size); + for (i = 0; i < count * vertex_size; i++) { + if (i % vertex_size == 0) { + //debug_printf("r300: -- vert --\n"); + } + //debug_printf("r300: 0x%08x\n", *map); + OUT_CS(*map); + map++; + } + END_CS; + + pipe_buffer_unmap(r300->context.screen, vbo); +} + static void r300_emit_draw_arrays(struct r300_context *r300, unsigned mode, unsigned count) @@ -212,43 +247,84 @@ validate: return TRUE; } +static struct pipe_buffer* r300_translate_elts(struct r300_context* r300, + struct pipe_buffer* elts, + unsigned* size, + unsigned* mode, + unsigned* count) +{ + struct pipe_screen* screen = r300->context.screen; + struct pipe_buffer* new_elts; + void *in_map, *out_map; + unsigned out_prim, out_index_size, out_nr; + u_translate_func out_translate; + + (void)u_index_translator(~0, *mode, *size, *count, PV_LAST, PV_LAST, + &out_prim, &out_index_size, &out_nr, &out_translate); + + new_elts = screen->buffer_create(screen, 32, + PIPE_BUFFER_USAGE_INDEX | + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ, + out_index_size * out_nr); + + in_map = pipe_buffer_map(screen, elts, PIPE_BUFFER_USAGE_CPU_READ); + out_map = pipe_buffer_map(screen, new_elts, PIPE_BUFFER_USAGE_CPU_WRITE); + + out_translate(in_map, *count, out_map); + + pipe_buffer_unmap(screen, elts); + pipe_buffer_unmap(screen, new_elts); + + *size = out_index_size; + *mode = out_prim; + *count = out_nr; + + return new_elts; +} + /* This is the fast-path drawing & emission for HW TCL. */ -boolean r300_draw_range_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) +void r300_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) { struct r300_context* r300 = r300_context(pipe); + struct pipe_buffer* orgIndexBuffer = indexBuffer; if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; + return; } if (count > 65535) { - return FALSE; - } - - if (r300_nothing_to_draw(r300)) { - return TRUE; + /* XXX: use aux/indices functions to split this into smaller + * primitives. + */ + return; } r300_update_derived_state(r300); if (!r300_setup_vertex_buffers(r300)) { - return FALSE; + return; + } + + if (indexSize == 1) { + indexBuffer = r300_translate_elts(r300, indexBuffer, + &indexSize, &mode, &count); } if (!r300->winsys->add_buffer(r300->winsys, indexBuffer, RADEON_GEM_DOMAIN_GTT, 0)) { - return FALSE; + goto cleanup; } if (!r300->winsys->validate(r300->winsys)) { - return FALSE; + goto cleanup; } r300_emit_dirty_state(r300); @@ -258,49 +334,52 @@ boolean r300_draw_range_elements(struct pipe_context* pipe, r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex, mode, start, count); - return TRUE; +cleanup: + if (indexBuffer != orgIndexBuffer) { + pipe->screen->buffer_destroy(indexBuffer); + } } /* Simple helpers for context setup. Should probably be moved to util. */ -boolean r300_draw_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, unsigned mode, - unsigned start, unsigned count) +void r300_draw_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, unsigned mode, + unsigned start, unsigned count) { - return pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0, - mode, start, count); + pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0, + mode, start, count); } -boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, - unsigned start, unsigned count) +void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, + unsigned start, unsigned count) { struct r300_context* r300 = r300_context(pipe); if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; + return; } if (count > 65535) { - return FALSE; - } - - if (r300_nothing_to_draw(r300)) { - return TRUE; + /* XXX: driver needs to handle this -- use the functions in + * aux/indices to split this into several smaller primitives. + */ + return; } r300_update_derived_state(r300); if (!r300_setup_vertex_buffers(r300)) { - return FALSE; + return; } r300_emit_dirty_state(r300); - r300_emit_aos(r300, start); - - r300_emit_draw_arrays(r300, mode, count); - - return TRUE; + if (FALSE && count <= 4 && r300->vertex_buffer_count == 1) { + r300_emit_draw_immediate(r300, mode, start, count); + } else { + r300_emit_aos(r300, start); + r300_emit_draw_arrays(r300, mode, count); + } } /**************************************************************************** @@ -309,7 +388,7 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, ***************************************************************************/ /* SW TCL arrays, using Draw. */ -boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, +void r300_swtcl_draw_arrays(struct pipe_context* pipe, unsigned mode, unsigned start, unsigned count) @@ -318,11 +397,7 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, int i; if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; - } - - if (r300_nothing_to_draw(r300)) { - return TRUE; + return; } for (i = 0; i < r300->vertex_buffer_count; i++) { @@ -346,12 +421,10 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, pipe_buffer_unmap(pipe->screen, r300->vertex_buffer[i].buffer); draw_set_mapped_vertex_buffer(r300->draw, i, NULL); } - - return TRUE; } /* SW TCL elements, using Draw. */ -boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, +void r300_swtcl_draw_range_elements(struct pipe_context* pipe, struct pipe_buffer* indexBuffer, unsigned indexSize, unsigned minIndex, @@ -365,11 +438,7 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, void* indices; if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; - } - - if (r300_nothing_to_draw(r300)) { - return TRUE; + return; } for (i = 0; i < r300->vertex_buffer_count; i++) { @@ -400,8 +469,6 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, pipe_buffer_unmap(pipe->screen, indexBuffer); draw_set_mapped_element_buffer_range(r300->draw, 0, start, start + count - 1, NULL); - - return TRUE; } /* Object for rendering using Draw. */ diff --git a/src/gallium/drivers/r300/r300_render.h b/src/gallium/drivers/r300/r300_render.h index da83069083..27b5e6a963 100644 --- a/src/gallium/drivers/r300/r300_render.h +++ b/src/gallium/drivers/r300/r300_render.h @@ -25,35 +25,35 @@ uint32_t r300_translate_primitive(unsigned prim); -boolean r300_draw_range_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count); - -boolean r300_draw_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, unsigned mode, - unsigned start, unsigned count); - -boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, - unsigned start, unsigned count); - -boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, - unsigned mode, - unsigned start, - unsigned count); - -boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count); +void r300_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count); + +void r300_draw_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, unsigned mode, + unsigned start, unsigned count); + +void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, + unsigned start, unsigned count); + +void r300_swtcl_draw_arrays(struct pipe_context* pipe, + unsigned mode, + unsigned start, + unsigned count); + +void r300_swtcl_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count); #endif /* R300_RENDER_H */ diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 2a8667d483..287664b1d2 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -83,6 +83,7 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) switch (param) { case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + case PIPE_CAP_MAX_COMBINED_SAMPLERS: /* XXX I'm told this goes up to 16 */ return 8; case PIPE_CAP_NPOT_TEXTURES: @@ -143,9 +144,11 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) case PIPE_CAP_BLEND_EQUATION_SEPARATE: return 1; case PIPE_CAP_SM3: - return 1; - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return 8; + if (r300screen->caps->is_r500) { + return 1; + } else { + return 0; + } default: debug_printf("r300: Implementation error: Bad param %d\n", param); diff --git a/src/gallium/drivers/r300/r300_shader_semantics.h b/src/gallium/drivers/r300/r300_shader_semantics.h index 85184e2cfd..6796841b29 100644 --- a/src/gallium/drivers/r300/r300_shader_semantics.h +++ b/src/gallium/drivers/r300/r300_shader_semantics.h @@ -40,6 +40,7 @@ struct r300_shader_semantics { int bcolor[ATTR_COLOR_COUNT]; int generic[ATTR_GENERIC_COUNT]; int fog; + int wpos; }; static INLINE void r300_shader_semantics_reset( @@ -50,6 +51,7 @@ static INLINE void r300_shader_semantics_reset( info->pos = ATTR_UNUSED; info->psize = ATTR_UNUSED; info->fog = ATTR_UNUSED; + info->wpos = ATTR_UNUSED; for (i = 0; i < ATTR_COLOR_COUNT; i++) { info->color[i] = ATTR_UNUSED; diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 49072462ec..281ff68449 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1,5 +1,6 @@ /* * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Copyright 2009 Marek Olšák <maraeo@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -41,6 +42,120 @@ /* r300_state: Functions used to intialize state context by translating * Gallium state objects into semi-native r300 state objects. */ +static boolean blend_discard_if_src_alpha_0(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_ALPHA == 0, and the following state is set, the colorbuffer + * will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_SRC_COLOR || + srcA == PIPE_BLENDFACTOR_SRC_ALPHA || + srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_alpha_1(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_ALPHA == 1, and the following state is set, the colorbuffer + * will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_color_0(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_COLOR == (0,0,0), and the following state is set, the colorbuffer + * will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_color_1(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_COLOR == (1,1,1), and the following state is set, the colorbuffer + * will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_SRC_COLOR || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_alpha_color_0(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_ALPHA_COLOR == (0,0,0,0), and the following state is set, + * the colorbuffer will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR || + srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_SRC_COLOR || + srcA == PIPE_BLENDFACTOR_SRC_ALPHA || + srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_alpha_color_1(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_ALPHA_COLOR == (1,1,1,1), and the following state is set, + * the colorbuffer will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR || + srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_SRC_COLOR || + dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ONE); +} + /* Create a new blend state based on the CSO blend state. * * This encompasses alpha blending, logic/raster ops, and blend dithering. */ @@ -66,7 +181,11 @@ static void* r300_create_blend_state(struct pipe_context* pipe, ( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) | ( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT); - /* optimization: some operations do not require the destination color */ + /* Optimization: some operations do not require the destination color. + * + * When SRC_ALPHA_SATURATE is used, colorbuffer reads must be enabled, + * otherwise blending gives incorrect results. It seems to be + * a hardware bug. */ if (eqRGB == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX || eqA == PIPE_BLEND_MAX || dstRGB != PIPE_BLENDFACTOR_ZERO || @@ -78,11 +197,81 @@ static void* r300_create_blend_state(struct pipe_context* pipe, srcA == PIPE_BLENDFACTOR_DST_COLOR || srcA == PIPE_BLENDFACTOR_DST_ALPHA || srcA == PIPE_BLENDFACTOR_INV_DST_COLOR || - srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA) + srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA || + srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) { + /* Enable reading from the colorbuffer. */ blend->blend_control |= R300_READ_ENABLE; - /* XXX implement the optimization with DISCARD_SRC_PIXELS*/ - /* XXX implement the optimization with SRC_ALPHA_?_NO_READ */ + if (r300_screen(r300_context(pipe)->context.screen)->caps->is_r500) { + /* Optimization: Depending on incoming pixels, we can + * conditionally disable the reading in hardware... */ + if (eqRGB != PIPE_BLEND_MIN && eqA != PIPE_BLEND_MIN && + eqRGB != PIPE_BLEND_MAX && eqA != PIPE_BLEND_MAX) { + /* Disable reading if SRC_ALPHA == 0. */ + if ((dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ZERO) && + (dstA == PIPE_BLENDFACTOR_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ZERO)) { + blend->blend_control |= R500_SRC_ALPHA_0_NO_READ; + } + + /* Disable reading if SRC_ALPHA == 1. */ + if ((dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ZERO) && + (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ZERO)) { + blend->blend_control |= R500_SRC_ALPHA_1_NO_READ; + } + } + } + } + + /* Optimization: discard pixels which don't change the colorbuffer. + * + * The code below is non-trivial and some math is involved. + * + * Discarding pixels must be disabled when FP16 AA is enabled. + * This is a hardware bug. Also, this implementation wouldn't work + * with FP blending enabled and equation clamping disabled. + * + * Equations other than ADD are rarely used and therefore won't be + * optimized. */ + if ((eqRGB == PIPE_BLEND_ADD || eqRGB == PIPE_BLEND_REVERSE_SUBTRACT) && + (eqA == PIPE_BLEND_ADD || eqA == PIPE_BLEND_REVERSE_SUBTRACT)) { + /* ADD: X+Y + * REVERSE_SUBTRACT: Y-X + * + * The idea is: + * If X = src*srcFactor = 0 and Y = dst*dstFactor = 1, + * then CB will not be changed. + * + * Given the srcFactor and dstFactor variables, we can derive + * what src and dst should be equal to and discard appropriate + * pixels. + */ + if (blend_discard_if_src_alpha_0(srcRGB, srcA, dstRGB, dstA)) { + blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0; + } else if (blend_discard_if_src_alpha_1(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1; + } else if (blend_discard_if_src_color_0(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_0; + } else if (blend_discard_if_src_color_1(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_1; + } else if (blend_discard_if_src_alpha_color_0(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= + R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0; + } else if (blend_discard_if_src_alpha_color_1(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= + R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1; + } + } /* separate alpha */ if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { @@ -128,8 +317,8 @@ static void r300_bind_blend_state(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - r300->blend_state = (struct r300_blend_state*)state; - r300->dirty_state |= R300_NEW_BLEND; + r300->blend_state.state = state; + r300->blend_state.dirty = TRUE; } /* Free blend state. */ @@ -151,20 +340,24 @@ static void r300_set_blend_color(struct pipe_context* pipe, const struct pipe_blend_color* color) { struct r300_context* r300 = r300_context(pipe); + struct r300_screen* r300screen = r300_screen(pipe->screen); + struct r300_blend_color_state* state = + (struct r300_blend_color_state*)r300->blend_color_state.state; union util_color uc; util_pack_color(color->color, PIPE_FORMAT_A8R8G8B8_UNORM, &uc); - r300->blend_color_state->blend_color = uc.ui; + state->blend_color = uc.ui; /* XXX if FP16 blending is enabled, we should use the FP16 format */ - r300->blend_color_state->blend_color_red_alpha = + state->blend_color_red_alpha = float_to_fixed10(color->color[0]) | (float_to_fixed10(color->color[3]) << 16); - r300->blend_color_state->blend_color_green_blue = + state->blend_color_green_blue = float_to_fixed10(color->color[2]) | (float_to_fixed10(color->color[1]) << 16); - r300->dirty_state |= R300_NEW_BLEND_COLOR; + r300->blend_color_state.size = r300screen->caps->is_r500 ? 3 : 2; + r300->blend_color_state.dirty = TRUE; } static void r300_set_clip_state(struct pipe_context* pipe, @@ -173,12 +366,15 @@ static void r300_set_clip_state(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); if (r300_screen(pipe->screen)->caps->has_tcl) { - r300->clip_state = *state; - r300->dirty_state |= R300_NEW_CLIP; + memcpy(r300->clip_state.state, state, sizeof(struct pipe_clip_state)); + r300->clip_state.size = 29; } else { draw_flush(r300->draw); draw_set_clip_state(r300->draw, state); + r300->clip_state.size = 2; } + + r300->clip_state.dirty = TRUE; } /* Create a new depth, stencil, and alpha state based on the CSO dsa state. @@ -271,9 +467,11 @@ static void r300_bind_dsa_state(struct pipe_context* pipe, void* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_screen* r300screen = r300_screen(pipe->screen); - r300->dsa_state = (struct r300_dsa_state*)state; - r300->dirty_state |= R300_NEW_DSA; + r300->dsa_state.state = state; + r300->dsa_state.size = r300screen->caps->is_r500 ? 8 : 6; + r300->dsa_state.dirty = TRUE; } /* Free DSA state. */ @@ -283,37 +481,11 @@ static void r300_delete_dsa_state(struct pipe_context* pipe, FREE(state); } -static void r300_set_scissor_regs(const struct pipe_scissor_state* state, - struct r300_scissor_regs *scissor, - boolean is_r500) -{ - if (is_r500) { - scissor->top_left = - (state->minx << R300_SCISSORS_X_SHIFT) | - (state->miny << R300_SCISSORS_Y_SHIFT); - scissor->bottom_right = - ((state->maxx - 1) << R300_SCISSORS_X_SHIFT) | - ((state->maxy - 1) << R300_SCISSORS_Y_SHIFT); - } else { - /* Offset of 1440 in non-R500 chipsets. */ - scissor->top_left = - ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) | - ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT); - scissor->bottom_right = - (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | - (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); - } - - scissor->empty_area = state->minx >= state->maxx || - state->miny >= state->maxy; -} - static void r300_set_framebuffer_state(struct pipe_context* pipe, const struct pipe_framebuffer_state* state) { struct r300_context* r300 = r300_context(pipe); - struct pipe_scissor_state scissor; if (r300->draw) { draw_flush(r300->draw); @@ -321,18 +493,12 @@ static void r300->framebuffer_state = *state; - scissor.minx = scissor.miny = 0; - scissor.maxx = state->width; - scissor.maxy = state->height; - r300_set_scissor_regs(&scissor, &r300->scissor_state->framebuffer, - r300_screen(r300->context.screen)->caps->is_r500); - /* Don't rely on the order of states being set for the first time. */ - if (!r300->rs_state || !r300->rs_state->rs.scissor) { - r300->dirty_state |= R300_NEW_SCISSOR; - } r300->dirty_state |= R300_NEW_FRAMEBUFFERS; - r300->dirty_state |= R300_NEW_BLEND; + + r300->blend_state.dirty = TRUE; + r300->dsa_state.dirty = TRUE; + r300->scissor_state.dirty = TRUE; } /* Create fragment shader state. */ @@ -367,6 +533,10 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) r300->fs = fs; r300_pick_fragment_shader(r300); + if (r300->vs && r300_vertex_shader_setup_wpos(r300)) { + r300->dirty_state |= R300_NEW_VERTEX_FORMAT; + } + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER | R300_NEW_FRAGMENT_SHADER_CONSTANTS; } @@ -407,8 +577,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe, /* Copy rasterizer state for Draw. */ rs->rs = *state; - rs->enable_vte = !state->bypass_vs_clip_and_viewport; - #ifdef PIPE_ARCH_LITTLE_ENDIAN rs->vap_control_status = R300_VC_NO_SWAP; #else @@ -524,12 +692,23 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) draw_set_rasterizer_state(r300->draw, &rs->rs); } - r300->rs_state = rs; + if (rs) { + r300->tcl_bypass = rs->rs.bypass_vs_clip_and_viewport; + } else { + r300->tcl_bypass = FALSE; + } + + r300->rs_state.state = rs; + r300->rs_state.dirty = TRUE; + /* XXX Why is this still needed, dammit!? */ + r300->scissor_state.dirty = TRUE; + r300->viewport_state.dirty = TRUE; + /* XXX Clean these up when we move to atom emits */ - r300->dirty_state |= R300_NEW_RASTERIZER; r300->dirty_state |= R300_NEW_RS_BLOCK; - r300->dirty_state |= R300_NEW_SCISSOR; - r300->dirty_state |= R300_NEW_VIEWPORT; + if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) { + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; + } } /* Free rasterizer state. */ @@ -556,7 +735,8 @@ static void* sampler->filter0 |= r300_translate_tex_filters(state->min_img_filter, state->mag_img_filter, - state->min_mip_filter); + state->min_mip_filter, + state->max_anisotropy > 1.0); /* Unfortunately, r300-r500 don't support floating-point mipmap lods. */ /* We must pass these to the emit function to clamp them properly. */ @@ -664,49 +844,51 @@ static void r300_set_scissor_state(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - r300_set_scissor_regs(state, &r300->scissor_state->scissor, - r300_screen(r300->context.screen)->caps->is_r500); + memcpy(r300->scissor_state.state, state, + sizeof(struct pipe_scissor_state)); - /* Don't rely on the order of states being set for the first time. */ - if (!r300->rs_state || r300->rs_state->rs.scissor) { - r300->dirty_state |= R300_NEW_SCISSOR; - } + r300->scissor_state.dirty = TRUE; } static void r300_set_viewport_state(struct pipe_context* pipe, const struct pipe_viewport_state* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_viewport_state* viewport = + (struct r300_viewport_state*)r300->viewport_state.state; /* Do the transform in HW. */ - r300->viewport_state->vte_control = R300_VTX_W0_FMT; + viewport->vte_control = R300_VTX_W0_FMT; if (state->scale[0] != 1.0f) { - r300->viewport_state->xscale = state->scale[0]; - r300->viewport_state->vte_control |= R300_VPORT_X_SCALE_ENA; + viewport->xscale = state->scale[0]; + viewport->vte_control |= R300_VPORT_X_SCALE_ENA; } if (state->scale[1] != 1.0f) { - r300->viewport_state->yscale = state->scale[1]; - r300->viewport_state->vte_control |= R300_VPORT_Y_SCALE_ENA; + viewport->yscale = state->scale[1]; + viewport->vte_control |= R300_VPORT_Y_SCALE_ENA; } if (state->scale[2] != 1.0f) { - r300->viewport_state->zscale = state->scale[2]; - r300->viewport_state->vte_control |= R300_VPORT_Z_SCALE_ENA; + viewport->zscale = state->scale[2]; + viewport->vte_control |= R300_VPORT_Z_SCALE_ENA; } if (state->translate[0] != 0.0f) { - r300->viewport_state->xoffset = state->translate[0]; - r300->viewport_state->vte_control |= R300_VPORT_X_OFFSET_ENA; + viewport->xoffset = state->translate[0]; + viewport->vte_control |= R300_VPORT_X_OFFSET_ENA; } if (state->translate[1] != 0.0f) { - r300->viewport_state->yoffset = state->translate[1]; - r300->viewport_state->vte_control |= R300_VPORT_Y_OFFSET_ENA; + viewport->yoffset = state->translate[1]; + viewport->vte_control |= R300_VPORT_Y_OFFSET_ENA; } if (state->translate[2] != 0.0f) { - r300->viewport_state->zoffset = state->translate[2]; - r300->viewport_state->vte_control |= R300_VPORT_Z_OFFSET_ENA; + viewport->zoffset = state->translate[2]; + viewport->vte_control |= R300_VPORT_Z_OFFSET_ENA; } - r300->dirty_state |= R300_NEW_VIEWPORT; + r300->viewport_state.dirty = TRUE; + if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) { + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; + } } static void r300_set_vertex_buffers(struct pipe_context* pipe, @@ -778,7 +960,13 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) } r300->vs = vs; - r300->dirty_state |= R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS; + if (r300->fs) { + r300_vertex_shader_setup_wpos(r300); + } + + r300->dirty_state |= + R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS | + R300_NEW_VERTEX_FORMAT; } else { draw_flush(r300->draw); draw_bind_vertex_shader(r300->draw, diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 727ae7ade6..192846411b 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -139,10 +139,10 @@ static void r300_vertex_psc(struct r300_context* r300) /* If TCL is bypassed, map vertex streams to equivalent VS output * locations. */ - if (r300->rs_state->enable_vte) { - stream_tab = identity; - } else { + if (r300->tcl_bypass) { stream_tab = r300->vs->stream_loc_notcl; + } else { + stream_tab = identity; } /* Vertex shaders have no semantics on their inputs, @@ -333,6 +333,8 @@ static void r300_update_rs_block(struct r300_context* r300, void (*rX00_rs_col_write)(struct r300_rs_block*, int, int); void (*rX00_rs_tex)(struct r300_rs_block*, int, int, boolean); void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int); + boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || + vs_outputs->bcolor[1] != ATTR_UNUSED; if (r300_screen(r300->context.screen)->caps->is_r500) { rX00_rs_col = r500_rs_col; @@ -348,7 +350,7 @@ static void r300_update_rs_block(struct r300_context* r300, /* Rasterize colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { /* Always rasterize if it's written by the VS, * otherwise it locks up. */ rX00_rs_col(rs, col_count, i, FALSE); @@ -410,6 +412,16 @@ static void r300_update_rs_block(struct r300_context* r300, } } + /* Rasterize WPOS. */ + /* If the FS doesn't need it, it's not written by the VS. */ + if (fs_inputs->wpos != ATTR_UNUSED) { + rX00_rs_tex(rs, tex_count, tex_count, FALSE); + rX00_rs_tex_write(rs, tex_count, fp_offset); + + fp_offset++; + tex_count++; + } + /* Rasterize at least one color, or bad things happen. */ if (col_count == 0 && tex_count == 0) { rX00_rs_col(rs, 0, 0, TRUE); @@ -496,7 +508,8 @@ static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa) static void r300_update_ztop(struct r300_context* r300) { - r300->ztop_state.z_buffer_top = R300_ZTOP_ENABLE; + struct r300_ztop_state* ztop_state = + (struct r300_ztop_state*)r300->ztop_state.state; /* This is important enough that I felt it warranted a comment. * @@ -518,31 +531,37 @@ static void r300_update_ztop(struct r300_context* r300) * 5) Depth writes in fragment shader * 6) Outstanding occlusion queries * + * This register causes stalls all the way from SC to CB when changed, + * but it is buffered on-chip so it does not hurt to write it if it has + * not changed. + * * ~C. */ /* ZS writes */ - if (r300_dsa_writes_depth_stencil(r300->dsa_state) && - (r300_dsa_alpha_test_enabled(r300->dsa_state) || /* (1) */ - r300->fs->info.uses_kill)) { /* (2) */ - r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300_fragment_shader_writes_depth(r300->fs)) { /* (5) */ - r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300->query_current) { /* (6) */ - r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; + if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) && + (r300_dsa_alpha_test_enabled(r300->dsa_state.state) ||/* (1) */ + r300->fs->info.uses_kill)) { /* (2) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300_fragment_shader_writes_depth(r300->fs)) { /* (5) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300->query_current) { /* (6) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else { + ztop_state->z_buffer_top = R300_ZTOP_ENABLE; } + + r300->ztop_state.dirty = TRUE; } void r300_update_derived_state(struct r300_context* r300) { + /* XXX */ if (r300->dirty_state & (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER | - R300_NEW_VERTEX_FORMAT)) { + R300_NEW_VERTEX_FORMAT) || r300->rs_state.dirty) { r300_update_derived_shader_state(r300); } - if (r300->dirty_state & - (R300_NEW_DSA | R300_NEW_FRAGMENT_SHADER | R300_NEW_QUERY)) { - r300_update_ztop(r300); - } + r300_update_ztop(r300); } diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index dbe42edd91..35be00e1b0 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -257,38 +257,37 @@ static INLINE uint32_t r300_translate_wrap(int wrap) } } -static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip) +static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip, + int is_anisotropic) { uint32_t retval = 0; - switch (min) { + if (is_anisotropic) + retval |= R300_TX_MIN_FILTER_ANISO | R300_TX_MAG_FILTER_ANISO; + else { + switch (min) { case PIPE_TEX_FILTER_NEAREST: retval |= R300_TX_MIN_FILTER_NEAREST; break; case PIPE_TEX_FILTER_LINEAR: retval |= R300_TX_MIN_FILTER_LINEAR; break; - case PIPE_TEX_FILTER_ANISO: - retval |= R300_TX_MIN_FILTER_ANISO; - break; default: debug_printf("r300: Unknown texture filter %d\n", min); assert(0); break; - } - switch (mag) { + } + switch (mag) { case PIPE_TEX_FILTER_NEAREST: retval |= R300_TX_MAG_FILTER_NEAREST; break; case PIPE_TEX_FILTER_LINEAR: retval |= R300_TX_MAG_FILTER_LINEAR; break; - case PIPE_TEX_FILTER_ANISO: - retval |= R300_TX_MAG_FILTER_ANISO; - break; default: debug_printf("r300: Unknown texture filter %d\n", mag); assert(0); break; + } } switch (mip) { case PIPE_TEX_MIPFILTER_NONE: diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index bcd4c030f9..b0f309695c 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -43,7 +43,7 @@ void r300_emit_invariant_state(struct r300_context* r300) struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; CS_LOCALS(r300); - BEGIN_CS(20 + (caps->has_tcl ? 2: 0)); + BEGIN_CS(16 + (caps->has_tcl ? 2: 0)); /*** Graphics Backend (GB) ***/ /* Various GB enables */ @@ -66,8 +66,6 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x0); OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x0); OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x0); - OUT_CS_REG(R300_FG_DEPTH_SRC, 0x0); - OUT_CS_REG(R300_US_W_FMT, 0x0); /*** VAP ***/ /* Sign/normalize control */ @@ -117,10 +115,12 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C); OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525); OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000); - if (caps->is_r500) { - OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x00000000); - OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFFFFFFFF); + + if (caps->family >= CHIP_FAMILY_RV350) { + OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); + OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE); } + OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000); OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000); OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 9a96206a4d..a9bbdd56d8 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -30,6 +30,18 @@ #include "r300_texture.h" #include "r300_screen.h" +#define TILE_WIDTH 0 +#define TILE_HEIGHT 1 + +static const unsigned microblock_table[5][3][2] = { + /*linear tiled square-tiled */ + {{32, 1}, {8, 4}, {0, 0}}, /* 8 bits per pixel */ + {{16, 1}, {8, 2}, {4, 4}}, /* 16 bits per pixel */ + {{ 8, 1}, {4, 2}, {0, 0}}, /* 32 bits per pixel */ + {{ 4, 1}, {0, 0}, {2, 2}}, /* 64 bits per pixel */ + {{ 2, 1}, {0, 0}, {0, 0}} /* 128 bits per pixel */ +}; + static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) { struct r300_texture_state* state = &tex->state; @@ -92,33 +104,67 @@ unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, } /** + * Return the width (dim==TILE_WIDTH) or height (dim==TILE_HEIGHT) of one tile + * of the given texture. + */ +static unsigned r300_texture_get_tile_size(struct r300_texture* tex, int dim) +{ + unsigned pixsize, tile_size; + + pixsize = util_format_get_blocksize(tex->tex.format); + tile_size = microblock_table[util_logbase2(pixsize)][tex->microtile][dim] * + (tex->macrotile == R300_BUFFER_TILED ? 8 : 1); + + assert(tile_size); + return tile_size; +} + +/** * Return the stride, in bytes, of the texture images of the given texture * at the given level. */ unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level) { + unsigned tile_width, width; + if (tex->stride_override) return tex->stride_override; + /* Check the level. */ if (level > tex->tex.last_level) { debug_printf("%s: level (%u) > last_level (%u)\n", __FUNCTION__, level, tex->tex.last_level); return 0; } - return align(util_format_get_stride(tex->tex.format, u_minify(tex->tex.width0, level)), 32); + tile_width = r300_texture_get_tile_size(tex, TILE_WIDTH); + width = align(u_minify(tex->tex.width0, level), tile_width); + + /* Should already be aligned except for S3TC. */ + return align(util_format_get_stride(tex->tex.format, width), 32); +} + +static unsigned r300_texture_get_nblocksy(struct r300_texture* tex, + unsigned level) +{ + unsigned height, tile_height; + + tile_height = r300_texture_get_tile_size(tex, TILE_HEIGHT); + height = align(u_minify(tex->tex.height0, level), tile_height); + + return util_format_get_nblocksy(tex->tex.format, height); } static void r300_setup_miptree(struct r300_texture* tex) { struct pipe_texture* base = &tex->tex; - int stride, size, layer_size; - int i; + unsigned stride, size, layer_size, nblocksy, i; - for (i = 0; i <= base->last_level; i++) { - unsigned nblocksy = util_format_get_nblocksy(base->format, u_minify(base->height0, i)); + debug_printf("r300: Making miptree for texture, format %s\n", pf_name(base->format)); + for (i = 0; i <= base->last_level; i++) { stride = r300_texture_get_stride(tex, i); + nblocksy = r300_texture_get_nblocksy(tex, i); layer_size = stride * nblocksy; if (base->target == PIPE_TEXTURE_CUBE) @@ -132,9 +178,9 @@ static void r300_setup_miptree(struct r300_texture* tex) tex->pitch[i] = stride / util_format_get_blocksize(base->format); debug_printf("r300: Texture miptree: Level %d " - "(%dx%dx%d px, pitch %d bytes)\n", + "(%dx%dx%d px, pitch %d bytes) %d bytes total\n", i, u_minify(base->width0, i), u_minify(base->height0, i), - u_minify(base->depth0, i), stride); + u_minify(base->depth0, i), stride, tex->size); } } @@ -163,7 +209,7 @@ static struct pipe_texture* r300_setup_miptree(tex); r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500); - tex->buffer = screen->buffer_create(screen, 1024, + tex->buffer = screen->buffer_create(screen, 2048, PIPE_BUFFER_USAGE_PIXEL, tex->size); diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index c4ed0d712f..68aef70872 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -22,6 +22,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_vs.h" +#include "r300_fs.h" #include "r300_context.h" #include "r300_screen.h" @@ -33,6 +34,8 @@ #include "radeon_compiler.h" +#include "util/u_math.h" + /* Convert info about VS output semantics into r300_shader_semantics. */ static void r300_shader_read_vs_outputs( struct tgsi_shader_info* info, @@ -88,11 +91,13 @@ static void r300_shader_read_vs_outputs( } } -static void r300_shader_vap_output_fmt( - struct r300_shader_semantics* vs_outputs, - uint* hwfmt) +static void r300_shader_vap_output_fmt(struct r300_vertex_shader* vs) { + struct r300_shader_semantics* vs_outputs = &vs->outputs; + uint32_t* hwfmt = vs->hwfmt; int i, gen_count; + boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || + vs_outputs->bcolor[1] != ATTR_UNUSED; /* Do the actual vertex_info setup. * @@ -119,13 +124,19 @@ static void r300_shader_vap_output_fmt( /* Colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { hwfmt[1] |= R300_INPUT_CNTL_COLOR; hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; } } - /* XXX Back-face colors. */ + /* Back-face colors. */ + if (any_bcolor_used) { + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + hwfmt[1] |= R300_INPUT_CNTL_COLOR; + hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i); + } + } /* Texture coordinates. */ gen_count = 0; @@ -146,6 +157,9 @@ static void r300_shader_vap_output_fmt( /* XXX magic */ assert(gen_count <= 8); + + /* WPOS. */ + vs->wpos_tex_output = gen_count; } /* Sets up stream mapping to equivalent VS outputs if TCL is bypassed @@ -155,6 +169,8 @@ static void r300_stream_locations_notcl( int* stream_loc) { int i, tabi = 0, gen_count; + boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || + vs_outputs->bcolor[1] != ATTR_UNUSED; /* Position. */ stream_loc[tabi++] = 0; @@ -166,14 +182,14 @@ static void r300_stream_locations_notcl( /* Colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { stream_loc[tabi++] = 2 + i; } } /* Back-face colors. */ - for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->bcolor[i] != ATTR_UNUSED) { + if (any_bcolor_used) { + for (i = 0; i < ATTR_COLOR_COUNT; i++) { stream_loc[tabi++] = 4 + i; } } @@ -181,7 +197,7 @@ static void r300_stream_locations_notcl( /* Texture coordinates. */ gen_count = 0; for (i = 0; i < ATTR_GENERIC_COUNT; i++) { - if (vs_outputs->bcolor[i] != ATTR_UNUSED) { + if (vs_outputs->generic[i] != ATTR_UNUSED) { assert(tabi < 16); stream_loc[tabi++] = 6 + gen_count; gen_count++; @@ -195,8 +211,12 @@ static void r300_stream_locations_notcl( gen_count++; } - /* XXX magic */ - assert(gen_count <= 8); + /* WPOS. */ + if (vs_outputs->wpos != ATTR_UNUSED) { + assert(tabi < 16); + stream_loc[tabi++] = 6 + gen_count; + gen_count++; + } for (; tabi < 16;) { stream_loc[tabi++] = -1; @@ -209,6 +229,8 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) struct r300_shader_semantics* outputs = &vs->outputs; struct tgsi_shader_info* info = &vs->info; int i, reg = 0; + boolean any_bcolor_used = outputs->bcolor[0] != ATTR_UNUSED || + outputs->bcolor[1] != ATTR_UNUSED; /* Fill in the input mapping */ for (i = 0; i < info->num_inputs; i++) @@ -226,14 +248,30 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) c->code->outputs[outputs->psize] = reg++; } + /* If we're writing back facing colors we need to send + * four colors to make front/back face colors selection work. + * If the vertex program doesn't write all 4 colors, lets + * pretend it does by skipping output index reg so the colors + * get written into appropriate output vectors. + */ + /* Colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { if (outputs->color[i] != ATTR_UNUSED) { c->code->outputs[outputs->color[i]] = reg++; + } else if (any_bcolor_used) { + reg++; } } - /* XXX Back-face colors. */ + /* Back-face colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (outputs->bcolor[i] != ATTR_UNUSED) { + c->code->outputs[outputs->bcolor[i]] = reg++; + } else if (any_bcolor_used) { + reg++; + } + } /* Texture coordinates. */ for (i = 0; i < ATTR_GENERIC_COUNT; i++) { @@ -246,6 +284,33 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) if (outputs->fog != ATTR_UNUSED) { c->code->outputs[outputs->fog] = reg++; } + + /* WPOS. */ + if (outputs->wpos != ATTR_UNUSED) { + c->code->outputs[outputs->wpos] = reg++; + } +} + +static void r300_insert_wpos(struct r300_vertex_program_compiler* c, + struct r300_shader_semantics* outputs) +{ + int i, lastOutput = 0; + + /* Find the max output index. */ + lastOutput = MAX2(lastOutput, outputs->psize); + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + lastOutput = MAX2(lastOutput, outputs->color[i]); + lastOutput = MAX2(lastOutput, outputs->bcolor[i]); + } + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + lastOutput = MAX2(lastOutput, outputs->generic[i]); + } + lastOutput = MAX2(lastOutput, outputs->fog); + + /* Set WPOS after the last output. */ + lastOutput++; + rc_copy_output(&c->Base, 0, lastOutput); /* out[lastOutput] = out[0]; */ + outputs->wpos = lastOutput; } void r300_translate_vertex_shader(struct r300_context* r300, @@ -256,8 +321,6 @@ void r300_translate_vertex_shader(struct r300_context* r300, /* Initialize. */ r300_shader_read_vs_outputs(&vs->info, &vs->outputs); - r300_shader_vap_output_fmt(&vs->outputs, vs->hwfmt); - r300_stream_locations_notcl(&vs->outputs, vs->stream_loc_notcl); /* Setup the compiler */ rc_init(&compiler.Base); @@ -277,9 +340,15 @@ void r300_translate_vertex_shader(struct r300_context* r300, r300_tgsi_to_rc(&ttr, vs->state.tokens); - compiler.RequiredOutputs = ~(~0 << vs->info.num_outputs); + compiler.RequiredOutputs = ~(~0 << (vs->info.num_outputs+1)); compiler.SetHwInputOutput = &set_vertex_inputs_outputs; + /* Insert the WPOS output. */ + r300_insert_wpos(&compiler, &vs->outputs); + + r300_shader_vap_output_fmt(vs); + r300_stream_locations_notcl(&vs->outputs, vs->stream_loc_notcl); + /* Invoke the compiler */ r3xx_compile_vertex_program(&compiler); if (compiler.Base.Error) { @@ -292,3 +361,30 @@ void r300_translate_vertex_shader(struct r300_context* r300, rc_destroy(&compiler.Base); vs->translated = TRUE; } + +boolean r300_vertex_shader_setup_wpos(struct r300_context* r300) +{ + struct r300_vertex_shader* vs = r300->vs; + int tex_output = r300->vs->wpos_tex_output; + uint32_t tex_fmt = R300_INPUT_CNTL_TC0 << tex_output; + uint32_t* hwfmt = vs->hwfmt; + + if (r300->fs->inputs.wpos != ATTR_UNUSED) { + /* Enable WPOS in VAP. */ + if (!(hwfmt[1] & tex_fmt)) { + hwfmt[1] |= tex_fmt; + hwfmt[3] |= (4 << (3 * tex_output)); + + assert(tex_output < 8); + return TRUE; + } + } else { + /* Disable WPOS in VAP. */ + if (hwfmt[1] & tex_fmt) { + hwfmt[1] &= ~tex_fmt; + hwfmt[3] &= ~(4 << (3 * tex_output)); + return TRUE; + } + } + return FALSE; +} diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 67e9db5366..18cfeee3cd 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -43,6 +43,9 @@ struct r300_vertex_shader { /* Stream locations for SWTCL or if TCL is bypassed. */ int stream_loc_notcl[16]; + /* Output stream location for WPOS. */ + int wpos_tex_output; + /* Has this shader been translated yet? */ boolean translated; @@ -53,4 +56,7 @@ struct r300_vertex_shader { void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs); +/* Return TRUE if VAP (hwfmt) needs to be re-emitted. */ +boolean r300_vertex_shader_setup_wpos(struct r300_context* r300); + #endif /* R300_VS_H */ diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index b3ece9d8ed..2a27e5ce64 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -103,7 +103,7 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) * Basically, map the vertex buffers (and drawing surfaces), then hand off * the drawing to the 'draw' module. */ -static boolean +static void softpipe_draw_range_elements_instanced(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -116,24 +116,24 @@ softpipe_draw_range_elements_instanced(struct pipe_context *pipe, unsigned instanceCount); -boolean +void softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - return softpipe_draw_range_elements_instanced(pipe, - NULL, - 0, - 0, - 0xffffffff, - mode, - start, - count, - 0, - 1); + softpipe_draw_range_elements_instanced(pipe, + NULL, + 0, + 0, + 0xffffffff, + mode, + start, + count, + 0, + 1); } -boolean +void softpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -141,35 +141,35 @@ softpipe_draw_range_elements(struct pipe_context *pipe, unsigned max_index, unsigned mode, unsigned start, unsigned count) { - return softpipe_draw_range_elements_instanced(pipe, - indexBuffer, - indexSize, - min_index, - max_index, - mode, - start, - count, - 0, - 1); + softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + min_index, + max_index, + mode, + start, + count, + 0, + 1); } -boolean +void softpipe_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { - return softpipe_draw_range_elements_instanced(pipe, - indexBuffer, - indexSize, - 0, - 0xffffffff, - mode, - start, - count, - 0, - 1); + softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + 0, + 0xffffffff, + mode, + start, + count, + 0, + 1); } void @@ -214,7 +214,7 @@ softpipe_draw_elements_instanced(struct pipe_context *pipe, instanceCount); } -static boolean +static void softpipe_draw_range_elements_instanced(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -231,7 +231,7 @@ softpipe_draw_range_elements_instanced(struct pipe_context *pipe, unsigned i; if (!softpipe_check_render_cond(sp)) - return TRUE; + return; sp->reduced_api_prim = u_reduced_prim(mode); @@ -290,6 +290,4 @@ softpipe_draw_range_elements_instanced(struct pipe_context *pipe, softpipe_unmap_constant_buffers(sp); sp->dirty_render_cache = TRUE; - - return TRUE; } diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 3153d6e6a4..0f9b1546df 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -184,14 +184,14 @@ void softpipe_set_vertex_buffers(struct pipe_context *, void softpipe_update_derived( struct softpipe_context *softpipe ); -boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count); - -boolean softpipe_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count); -boolean +void softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count); + +void softpipe_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count); +void softpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index e26153b1d9..1ae8fecacf 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -2,7 +2,7 @@ * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * Copyright 2008 VMware, Inc. All rights reserved. + * Copyright 2008-2010 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -514,21 +514,15 @@ static float compute_lambda_1d(const struct sp_sampler_varient *samp, const float s[QUAD_SIZE], const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) + const float p[QUAD_SIZE]) { const struct pipe_texture *texture = samp->texture; const struct pipe_sampler_state *sampler = samp->sampler; float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]); float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]); float rho = MAX2(dsdx, dsdy) * texture->width0; - float lambda; - - lambda = util_fast_log2(rho); - lambda += lodbias + sampler->lod_bias; - lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); - return lambda; + return util_fast_log2(rho); } @@ -536,8 +530,7 @@ static float compute_lambda_2d(const struct sp_sampler_varient *samp, const float s[QUAD_SIZE], const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) + const float p[QUAD_SIZE]) { const struct pipe_texture *texture = samp->texture; const struct pipe_sampler_state *sampler = samp->sampler; @@ -548,13 +541,8 @@ compute_lambda_2d(const struct sp_sampler_varient *samp, float maxx = MAX2(dsdx, dsdy) * texture->width0; float maxy = MAX2(dtdx, dtdy) * texture->height0; float rho = MAX2(maxx, maxy); - float lambda; - lambda = util_fast_log2(rho); - lambda += lodbias + sampler->lod_bias; - lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); - - return lambda; + return util_fast_log2(rho); } @@ -562,8 +550,7 @@ static float compute_lambda_3d(const struct sp_sampler_varient *samp, const float s[QUAD_SIZE], const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) + const float p[QUAD_SIZE]) { const struct pipe_texture *texture = samp->texture; const struct pipe_sampler_state *sampler = samp->sampler; @@ -576,31 +563,26 @@ compute_lambda_3d(const struct sp_sampler_varient *samp, float maxx = MAX2(dsdx, dsdy) * texture->width0; float maxy = MAX2(dtdx, dtdy) * texture->height0; float maxz = MAX2(dpdx, dpdy) * texture->depth0; - float rho, lambda; + float rho; rho = MAX2(maxx, maxy); rho = MAX2(rho, maxz); - lambda = util_fast_log2(rho); - lambda += lodbias + sampler->lod_bias; - lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); - - return lambda; + return util_fast_log2(rho); } /** * Compute lambda for a vertex texture sampler. - * Since there aren't derivatives to use, just return the LOD bias. + * Since there aren't derivatives to use, just return 0. */ static float compute_lambda_vert(const struct sp_sampler_varient *samp, const float s[QUAD_SIZE], const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) + const float p[QUAD_SIZE]) { - return lodbias; + return 0.0f; } @@ -769,7 +751,8 @@ img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -827,7 +810,8 @@ img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -866,7 +850,8 @@ img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -914,7 +899,8 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -949,7 +935,8 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -996,7 +983,8 @@ img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1035,7 +1023,8 @@ img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1076,7 +1065,8 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1115,7 +1105,8 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1161,7 +1152,8 @@ img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1209,7 +1201,8 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1261,29 +1254,60 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler, } +/* Calculate level of detail for every fragment. + * Note that lambda has already been biased by global LOD bias. + */ +static INLINE void +compute_lod(const struct pipe_sampler_state *sampler, + const float biased_lambda, + const float lodbias[QUAD_SIZE], + float lod[QUAD_SIZE]) +{ + uint i; + + for (i = 0; i < QUAD_SIZE; i++) { + lod[i] = biased_lambda + lodbias[i]; + lod[i] = CLAMP(lod[i], sampler->min_lod, sampler->max_lod); + } +} + + static void mip_filter_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct pipe_texture *texture = samp->texture; int level0; float lambda; + float lod[QUAD_SIZE]; + + if (control == tgsi_sampler_lod_bias) { + lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias; + compute_lod(samp->sampler, lambda, c0, lod); + } else { + assert(control == tgsi_sampler_lod_explicit); - lambda = samp->compute_lambda(samp, s, t, p, lodbias); + memcpy(lod, c0, sizeof(lod)); + } + + /* XXX: Take into account all lod values. + */ + lambda = lod[0]; level0 = (int)lambda; if (lambda < 0.0) { samp->level = 0; - samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else if (level0 >= texture->last_level) { samp->level = texture->last_level; - samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else { float levelBlend = lambda - level0; @@ -1292,10 +1316,10 @@ mip_filter_linear(struct tgsi_sampler *tgsi_sampler, int c,j; samp->level = level0; - samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba0 ); + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba0); samp->level = level0+1; - samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba1 ); + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba1); for (j = 0; j < QUAD_SIZE; j++) { for (c = 0; c < 4; c++) { @@ -1311,23 +1335,36 @@ mip_filter_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct pipe_texture *texture = samp->texture; float lambda; + float lod[QUAD_SIZE]; - lambda = samp->compute_lambda(samp, s, t, p, lodbias); + if (control == tgsi_sampler_lod_bias) { + lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias; + compute_lod(samp->sampler, lambda, c0, lod); + } else { + assert(control == tgsi_sampler_lod_explicit); + + memcpy(lod, c0, sizeof(lod)); + } + + /* XXX: Take into account all lod values. + */ + lambda = lod[0]; if (lambda < 0.0) { samp->level = 0; - samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else { samp->level = (int)(lambda + 0.5) ; samp->level = MIN2(samp->level, (int)texture->last_level); - samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } #if 0 @@ -1345,17 +1382,32 @@ mip_filter_none(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); - float lambda = samp->compute_lambda(samp, s, t, p, lodbias); + float lambda; + float lod[QUAD_SIZE]; + + if (control == tgsi_sampler_lod_bias) { + lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias; + compute_lod(samp->sampler, lambda, c0, lod); + } else { + assert(control == tgsi_sampler_lod_explicit); + + memcpy(lod, c0, sizeof(lod)); + } + + /* XXX: Take into account all lod values. + */ + lambda = lod[0]; if (lambda < 0.0) { - samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else { - samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } } @@ -1371,15 +1423,28 @@ mip_filter_linear_2d_linear_repeat_POT( const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct pipe_texture *texture = samp->texture; int level0; float lambda; + float lod[QUAD_SIZE]; - lambda = compute_lambda_2d(samp, s, t, p, lodbias); + if (control == tgsi_sampler_lod_bias) { + lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias; + compute_lod(samp->sampler, lambda, c0, lod); + } else { + assert(control == tgsi_sampler_lod_explicit); + + memcpy(lod, c0, sizeof(lod)); + } + + /* XXX: Take into account all lod values. + */ + lambda = lod[0]; level0 = (int)lambda; /* Catches both negative and large values of level0: @@ -1390,7 +1455,7 @@ mip_filter_linear_2d_linear_repeat_POT( else samp->level = texture->last_level; - img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba ); + img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else { float levelBlend = lambda - level0; @@ -1399,10 +1464,10 @@ mip_filter_linear_2d_linear_repeat_POT( int c,j; samp->level = level0; - img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba0 ); + img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba0); samp->level = level0+1; - img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba1 ); + img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba1); for (j = 0; j < QUAD_SIZE; j++) { for (c = 0; c < 4; c++) { @@ -1422,7 +1487,8 @@ sample_compare(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1430,7 +1496,7 @@ sample_compare(struct tgsi_sampler *tgsi_sampler, int j, k0, k1, k2, k3; float val; - samp->mip_filter( tgsi_sampler, s, t, p, lodbias, rgba ); + samp->mip_filter(tgsi_sampler, s, t, p, c0, control, rgba); /** * Compare texcoord 'p' (aka R) against texture value 'rgba[0]' @@ -1508,7 +1574,8 @@ sample_cube(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1589,7 +1656,7 @@ sample_cube(struct tgsi_sampler *tgsi_sampler, * is not active, this will point somewhere deeper into the * pipeline, eg. to mip_filter or even img_filter. */ - samp->compare(tgsi_sampler, ssss, tttt, NULL, lodbias, rgba); + samp->compare(tgsi_sampler, ssss, tttt, NULL, c0, control, rgba); } @@ -1862,7 +1929,7 @@ sp_create_sampler_varient( const struct pipe_sampler_state *sampler, break; } - if (sampler->compare_mode != FALSE) { + if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) { samp->compare = sample_compare; } else { diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h index b0797711d3..b6e66c998a 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.h +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -2,6 +2,7 @@ * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2010 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -46,14 +47,14 @@ typedef void (*wrap_linear_func)(const float s[4], typedef float (*compute_lambda_func)(const struct sp_sampler_varient *sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias); + const float p[QUAD_SIZE]); typedef void (*filter_func)(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]); diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index fa7f6cb3bb..66259fd010 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -266,8 +266,6 @@ struct svga_hw_draw_state unsigned ts[16][TS_MAX]; float cb[PIPE_SHADER_TYPES][CB_MAX][4]; - unsigned shader_id[PIPE_SHADER_TYPES]; - struct svga_shader_result *fs; struct svga_shader_result *vs; struct svga_hw_view_state views[PIPE_MAX_SAMPLERS]; diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c index 71a552862e..0f24ef4ee8 100644 --- a/src/gallium/drivers/svga/svga_pipe_draw.c +++ b/src/gallium/drivers/svga/svga_pipe_draw.c @@ -149,7 +149,7 @@ retry: -static boolean +static void svga_draw_range_elements( struct pipe_context *pipe, struct pipe_buffer *index_buffer, unsigned index_size, @@ -162,7 +162,7 @@ svga_draw_range_elements( struct pipe_context *pipe, enum pipe_error ret = 0; if (!u_trim_pipe_prim( prim, &count )) - return TRUE; + return; /* * Mark currently bound target surfaces as dirty @@ -183,7 +183,7 @@ svga_draw_range_elements( struct pipe_context *pipe, #ifdef DEBUG if (svga->curr.vs->base.id == svga->debug.disable_shader || svga->curr.fs->base.id == svga->debug.disable_shader) - return 0; + return; #endif if (svga->state.sw.need_swtnl) @@ -225,31 +225,29 @@ svga_draw_range_elements( struct pipe_context *pipe, svga_hwtnl_flush_retry( svga ); svga_context_flush(svga, NULL); } - - return ret == PIPE_OK; } -static boolean +static void svga_draw_elements( struct pipe_context *pipe, struct pipe_buffer *index_buffer, unsigned index_size, unsigned prim, unsigned start, unsigned count) { - return svga_draw_range_elements( pipe, index_buffer, - index_size, - 0, 0xffffffff, - prim, start, count ); + svga_draw_range_elements( pipe, index_buffer, + index_size, + 0, 0xffffffff, + prim, start, count ); } -static boolean +static void svga_draw_arrays( struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) { - return svga_draw_range_elements(pipe, NULL, 0, - start, start + count - 1, - prim, - start, count); + svga_draw_range_elements(pipe, NULL, 0, + start, start + count - 1, + prim, + start, count); } diff --git a/src/gallium/drivers/svga/svga_pipe_fs.c b/src/gallium/drivers/svga/svga_pipe_fs.c index a461a86dd3..5f1213e46a 100644 --- a/src/gallium/drivers/svga/svga_pipe_fs.c +++ b/src/gallium/drivers/svga/svga_pipe_fs.c @@ -111,6 +111,13 @@ void svga_delete_fs_state(struct pipe_context *pipe, void *shader) util_bitmask_clear( svga->fs_bm, result->id ); svga_destroy_shader_result( result ); + + /* + * Remove stale references to this result to ensure a new result on the + * same address will be detected as a change. + */ + if(result == svga->state.hw_draw.fs) + svga->state.hw_draw.fs = NULL; } FREE((void *)fs->base.tokens); diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c index 78053e755e..460a101f8c 100644 --- a/src/gallium/drivers/svga/svga_pipe_sampler.c +++ b/src/gallium/drivers/svga/svga_pipe_sampler.c @@ -76,7 +76,6 @@ static INLINE unsigned translate_img_filter( unsigned filter ) switch (filter) { case PIPE_TEX_FILTER_NEAREST: return SVGA3D_TEX_FILTER_NEAREST; case PIPE_TEX_FILTER_LINEAR: return SVGA3D_TEX_FILTER_LINEAR; - case PIPE_TEX_FILTER_ANISO: return SVGA3D_TEX_FILTER_ANISOTROPIC; default: assert(0); return SVGA3D_TEX_FILTER_NEAREST; @@ -107,6 +106,8 @@ svga_create_sampler_state(struct pipe_context *pipe, cso->magfilter = translate_img_filter( sampler->mag_img_filter ); cso->minfilter = translate_img_filter( sampler->min_img_filter ); cso->aniso_level = MAX2( (unsigned) sampler->max_anisotropy, 1 ); + if(cso->aniso_level != 1) + cso->magfilter = cso->minfilter = SVGA3D_TEX_FILTER_ANISOTROPIC; cso->lod_bias = sampler->lod_bias; cso->addressu = translate_wrap_mode(sampler->wrap_s); cso->addressv = translate_wrap_mode(sampler->wrap_t); diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c index e82d10c259..7e6ab576ad 100644 --- a/src/gallium/drivers/svga/svga_pipe_vs.c +++ b/src/gallium/drivers/svga/svga_pipe_vs.c @@ -176,6 +176,13 @@ static void svga_delete_vs_state(struct pipe_context *pipe, void *shader) util_bitmask_clear( svga->vs_bm, result->id ); svga_destroy_shader_result( result ); + + /* + * Remove stale references to this result to ensure a new result on the + * same address will be detected as a change. + */ + if(result == svga->state.hw_draw.vs) + svga->state.hw_draw.vs = NULL; } FREE((void *)vs->base.tokens); diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c index 1902b0106b..d29f3762d2 100644 --- a/src/gallium/drivers/svga/svga_state_fs.c +++ b/src/gallium/drivers/svga/svga_state_fs.c @@ -40,8 +40,13 @@ static INLINE int compare_fs_keys( const struct svga_fs_compile_key *a, const struct svga_fs_compile_key *b ) { - unsigned keysize = svga_fs_key_size( a ); - return memcmp( a, b, keysize ); + unsigned keysize_a = svga_fs_key_size( a ); + unsigned keysize_b = svga_fs_key_size( b ); + + if (keysize_a != keysize_b) { + return (int)(keysize_a - keysize_b); + } + return memcmp( a, b, keysize_a ); } @@ -67,7 +72,7 @@ static enum pipe_error compile_fs( struct svga_context *svga, struct svga_shader_result **out_result ) { struct svga_shader_result *result; - enum pipe_error ret; + enum pipe_error ret = PIPE_ERROR; result = svga_translate_fragment_program( fs, key ); if (result == NULL) { @@ -268,16 +273,13 @@ static int emit_hw_fs( struct svga_context *svga, assert(id != SVGA3D_INVALID_ID); if (result != svga->state.hw_draw.fs) { - if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT]) { - ret = SVGA3D_SetShader(svga->swc, - SVGA3D_SHADERTYPE_PS, - id ); - if (ret) - return ret; - } + ret = SVGA3D_SetShader(svga->swc, + SVGA3D_SHADERTYPE_PS, + id ); + if (ret) + return ret; svga->dirty |= SVGA_NEW_FS_RESULT; - svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT] = id; svga->state.hw_draw.fs = result; } diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index 2313eafc37..fef652c0c0 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -150,16 +150,13 @@ static int emit_hw_vs( struct svga_context *svga, } if (result != svga->state.hw_draw.vs) { - if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX]) { - ret = SVGA3D_SetShader(svga->swc, - SVGA3D_SHADERTYPE_VS, - id ); - if (ret) - return ret; - } + ret = SVGA3D_SetShader(svga->swc, + SVGA3D_SHADERTYPE_VS, + id ); + if (ret) + return ret; svga->dirty |= SVGA_NEW_VS_RESULT; - svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX] = id; svga->state.hw_draw.vs = result; } diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h index 896c90a89a..737a2213af 100644 --- a/src/gallium/drivers/svga/svga_tgsi.h +++ b/src/gallium/drivers/svga/svga_tgsi.h @@ -39,26 +39,24 @@ struct tgsi_token; struct svga_vs_compile_key { - ubyte need_prescale:1; - ubyte allow_psiz:1; unsigned zero_stride_vertex_elements; - ubyte num_zero_stride_vertex_elements:6; + unsigned need_prescale:1; + unsigned allow_psiz:1; + unsigned num_zero_stride_vertex_elements:6; }; struct svga_fs_compile_key { - boolean light_twoside:1; - boolean front_cw:1; - ubyte num_textures; - ubyte num_unnormalized_coords; + unsigned light_twoside:1; + unsigned front_cw:1; + unsigned num_textures:8; + unsigned num_unnormalized_coords:8; struct { - ubyte compare_mode : 1; - ubyte compare_func : 3; - ubyte unnormalized : 1; - - ubyte width_height_idx : 7; - - ubyte texture_target; + unsigned compare_mode:1; + unsigned compare_func:3; + unsigned unnormalized:1; + unsigned width_height_idx:7; + unsigned texture_target:8; } tex[PIPE_MAX_SAMPLERS]; }; @@ -121,8 +119,7 @@ static INLINE unsigned svga_vs_key_size( const struct svga_vs_compile_key *key ) static INLINE unsigned svga_fs_key_size( const struct svga_fs_compile_key *key ) { - return (const char *)&key->tex[key->num_textures].texture_target - - (const char *)key; + return (const char *)&key->tex[key->num_textures] - (const char *)key; } struct svga_shader_result * diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index ad47a56fba..075e4f9a0b 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -161,16 +161,15 @@ trace_context_draw_block(struct trace_context *tr_ctx, int flag) pipe_mutex_unlock(tr_ctx->draw_mutex); } -static INLINE boolean +static INLINE void trace_context_draw_arrays(struct pipe_context *_pipe, unsigned mode, unsigned start, unsigned count) { struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; - boolean result; if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled) - return 0; + return; trace_context_draw_block(tr_ctx, 1); @@ -181,19 +180,15 @@ trace_context_draw_arrays(struct pipe_context *_pipe, trace_dump_arg(uint, start); trace_dump_arg(uint, count); - result = pipe->draw_arrays(pipe, mode, start, count); - - trace_dump_ret(bool, result); + pipe->draw_arrays(pipe, mode, start, count); trace_dump_call_end(); trace_context_draw_block(tr_ctx, 2); - - return result; } -static INLINE boolean +static INLINE void trace_context_draw_elements(struct pipe_context *_pipe, struct pipe_buffer *_indexBuffer, unsigned indexSize, @@ -203,10 +198,9 @@ trace_context_draw_elements(struct pipe_context *_pipe, struct trace_buffer *tr_buf = trace_buffer(_indexBuffer); struct pipe_context *pipe = tr_ctx->pipe; struct pipe_buffer *indexBuffer = tr_buf->buffer; - boolean result; if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled) - return 0; + return; trace_context_draw_block(tr_ctx, 1); @@ -221,19 +215,15 @@ trace_context_draw_elements(struct pipe_context *_pipe, trace_dump_arg(uint, start); trace_dump_arg(uint, count); - result = pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count); - - trace_dump_ret(bool, result); + pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count); trace_dump_call_end(); trace_context_draw_block(tr_ctx, 2); - - return result; } -static INLINE boolean +static INLINE void trace_context_draw_range_elements(struct pipe_context *_pipe, struct pipe_buffer *_indexBuffer, unsigned indexSize, @@ -247,10 +237,9 @@ trace_context_draw_range_elements(struct pipe_context *_pipe, struct trace_buffer *tr_buf = trace_buffer(_indexBuffer); struct pipe_context *pipe = tr_ctx->pipe; struct pipe_buffer *indexBuffer = tr_buf->buffer; - boolean result; if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled) - return 0; + return; trace_context_draw_block(tr_ctx, 1); @@ -267,18 +256,14 @@ trace_context_draw_range_elements(struct pipe_context *_pipe, trace_dump_arg(uint, start); trace_dump_arg(uint, count); - result = pipe->draw_range_elements(pipe, - indexBuffer, - indexSize, minIndex, maxIndex, - mode, start, count); - - trace_dump_ret(bool, result); + pipe->draw_range_elements(pipe, + indexBuffer, + indexSize, minIndex, maxIndex, + mode, start, count); trace_dump_call_end(); trace_context_draw_block(tr_ctx, 2); - - return result; } diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 0102cc1876..86237e03bc 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -409,7 +409,7 @@ void trace_dump_sampler_state(const struct pipe_sampler_state *state) trace_dump_member(uint, state, min_img_filter); trace_dump_member(uint, state, min_mip_filter); trace_dump_member(uint, state, mag_img_filter); - trace_dump_member(bool, state, compare_mode); + trace_dump_member(uint, state, compare_mode); trace_dump_member(uint, state, compare_func); trace_dump_member(bool, state, normalized_coords); trace_dump_member(uint, state, prefilter); |