diff options
Diffstat (limited to 'src/gallium/drivers')
210 files changed, 5200 insertions, 4862 deletions
diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index d5f5c7bbba..aa29dcb394 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -358,6 +358,7 @@ struct cell_spu_function_info /** This is the object passed to spe_create_thread() */ +PIPE_ALIGN_TYPE(16, struct cell_init_info { unsigned id; @@ -370,7 +371,7 @@ struct cell_init_info uint *buffer_status; /**< points at cell_context->buffer_status */ struct cell_spu_function_info *spu_functions; -} ALIGN16_ATTRIB; +}); #endif /* CELL_COMMON_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 1498fb665a..e402ed2922 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -89,7 +89,7 @@ struct cell_buffer_node; */ struct cell_buffer_list { - struct cell_fence fence ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) struct cell_fence fence; struct cell_buffer_node *head; }; @@ -150,18 +150,18 @@ struct cell_context /** Mapped constant buffers */ void *mapped_constants[PIPE_SHADER_TYPES]; - struct cell_spu_function_info spu_functions ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) struct cell_spu_function_info spu_functions; uint num_cells, num_spus; /** Buffers for command batches, vertex/index data */ uint buffer_size[CELL_NUM_BUFFERS]; - ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE]; int cur_batch; /**< which buffer is being filled w/ commands */ /** [4] to ensure 16-byte alignment for each status word */ - uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4]; /** Associated with each command/batch buffer is a list of pipe_buffers diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 2016b21a40..0a4da8ecc8 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -59,7 +59,7 @@ cell_map_constant_buffers(struct cell_context *sp) } } - draw_set_mapped_constant_buffer(sp->draw, + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, sp->mapped_constants[PIPE_SHADER_VERTEX], sp->constants[PIPE_SHADER_VERTEX]->size); } @@ -85,7 +85,7 @@ cell_unmap_constant_buffers(struct cell_context *sp) * * XXX should the element buffer be specified/bound with a separate function? */ -static boolean +static void cell_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -145,29 +145,27 @@ cell_draw_range_elements(struct pipe_context *pipe, /* Note: leave drawing surfaces mapped */ cell_unmap_constant_buffers(sp); - - return TRUE; } -static boolean +static void cell_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { - return cell_draw_range_elements( pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - mode, start, count ); + cell_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + mode, start, count ); } -static boolean +static void cell_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - return cell_draw_elements(pipe, NULL, 0, mode, start, count); + cell_draw_elements(pipe, NULL, 0, mode, start, count); } diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c index efc4f78364..b723e794e7 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_derived.c +++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c @@ -66,7 +66,7 @@ calculate_vertex_layout( struct cell_context *cell ) vinfo->num_attribs = 0; /* we always want to emit vertex pos */ - src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_POSITION, 0); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); @@ -82,14 +82,14 @@ calculate_vertex_layout( struct cell_context *cell ) break; case TGSI_SEMANTIC_COLOR: - src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_COLOR, + src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_COLOR, fs->info.input_semantic_index[i]); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); break; case TGSI_SEMANTIC_FOG: - src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_FOG, 0); + src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_FOG, 0); #if 1 if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */ src = 0; @@ -100,7 +100,7 @@ calculate_vertex_layout( struct cell_context *cell ) case TGSI_SEMANTIC_GENERIC: /* this includes texcoords and varying vars */ - src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_GENERIC, + src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_GENERIC, fs->info.input_semantic_index[i]); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index ab55a24bb6..f1e1dcb9eb 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -331,7 +331,7 @@ cell_emit_state(struct cell_context *cell) const struct draw_context *const draw = cell->draw; struct cell_shader_info info; - info.num_outputs = draw_num_vs_outputs(draw); + info.num_outputs = draw_num_shader_outputs(draw); info.declarations = (uintptr_t) draw->vs.machine.Declarations; info.num_declarations = draw->vs.machine.NumDeclarations; info.instructions = (uintptr_t) draw->vs.machine.Instructions; diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 5c0179d954..55bd85bde2 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -53,8 +53,7 @@ struct spu_vs_context draw; /** * Buffers containing dynamically generated SPU code: */ -static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] - ALIGN16_ATTRIB; +PIPE_ALIGN_VAR(16) static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]; @@ -405,8 +404,6 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) case PIPE_TEX_FILTER_LINEAR: spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear; break; - case PIPE_TEX_FILTER_ANISO: - /* fall-through, for now */ case PIPE_TEX_FILTER_NEAREST: spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest; break; @@ -418,8 +415,6 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) case PIPE_TEX_FILTER_LINEAR: spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear; break; - case PIPE_TEX_FILTER_ANISO: - /* fall-through, for now */ case PIPE_TEX_FILTER_NEAREST: spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest; break; @@ -547,7 +542,7 @@ cmd_batch(uint opcode) { const uint buf = (opcode >> 8) & 0xff; uint size = (opcode >> 16); - qword buffer[CELL_BUFFER_SIZE / 16] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) qword buffer[CELL_BUFFER_SIZE / 16]; const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]); uint pos; diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 5ed330aa6e..d2166a4901 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -1681,7 +1681,7 @@ exec_instruction( } break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); @@ -1839,10 +1839,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) /* execute declarations (interpolants) */ if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { for (i = 0; i < mach->NumDeclarations; i++) { + PIPE_ALIGN_VAR(16) union { struct tgsi_full_declaration decl; qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16]; - } d ALIGN16_ATTRIB; + } d; unsigned ea = (unsigned) (mach->Declarations + pc); spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl)); @@ -1853,10 +1854,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) /* execute instructions, until pc is set to -1 */ while (pc != -1) { + PIPE_ALIGN_VAR(16) union { struct tgsi_full_instruction inst; qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16]; - } i ALIGN16_ATTRIB; + } i; unsigned ea = (unsigned) (mach->Instructions + pc); spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst)); diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h index 8605679940..0ca92af248 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.h +++ b/src/gallium/drivers/cell/spu/spu_exec.h @@ -98,9 +98,9 @@ struct spu_exec_machine * 4 internal temporaries * 1 address */ + PIPE_ALIGN_VAR(16) struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS - + TGSI_EXEC_NUM_TEMP_EXTRAS + 1] - ALIGN16_ATTRIB; + + TGSI_EXEC_NUM_TEMP_EXTRAS + 1]; struct spu_exec_vector *Addrs; diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index ff3d609d25..98919c43ff 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -144,7 +144,7 @@ export_func(struct cell_spu_function_info *spu_functions, void return_function_info(void) { - struct cell_spu_function_info funcs ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) struct cell_spu_function_info funcs; int tag = TAG_MISC; ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */ diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 33767e7c51..b18f4c22ef 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -93,6 +93,7 @@ typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs, vector float *constants); +PIPE_ALIGN_TYPE(16, struct spu_framebuffer { void *color_start; /**< addr of color surface in main memory */ @@ -107,10 +108,11 @@ struct spu_framebuffer uint zsize; /**< 0, 2 or 4 bytes per Z */ float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */ -} ALIGN16_ATTRIB; +}); /** per-texture level info */ +PIPE_ALIGN_TYPE(16, struct spu_texture_level { void *start; @@ -123,20 +125,22 @@ struct spu_texture_level vector signed int mask_s, mask_t, mask_r; /** texcoord clamp limits */ vector signed int max_s, max_t, max_r; -} ALIGN16_ATTRIB; +}); +PIPE_ALIGN_TYPE(16, struct spu_texture { struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; uint max_level; uint target; /**< PIPE_TEXTURE_x */ -} ALIGN16_ATTRIB; +}); /** * All SPU global/context state will be in a singleton object of this type: */ +PIPE_ALIGN_TYPE(16, struct spu_global { /** One-time init/constant info */ @@ -155,8 +159,8 @@ struct spu_global struct vertex_info vertex_info; /** Current color and Z tiles */ - tile_t ctile ALIGN16_ATTRIB; - tile_t ztile ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) tile_t ctile; + PIPE_ALIGN_VAR(16) tile_t ztile; /** Read depth/stencil tiles? */ boolean read_depth_stencil; @@ -165,8 +169,8 @@ struct spu_global ubyte cur_ctile_status, cur_ztile_status; /** Status of all tiles in framebuffer */ - ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; - ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]; + PIPE_ALIGN_VAR(16) ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]; /** Current fragment ops machine code, at 8-byte boundary */ uint *fragment_ops_code; @@ -175,7 +179,7 @@ struct spu_global spu_fragment_ops_func fragment_ops[2]; /** Current fragment program machine code, at 8-byte boundary */ - uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN8_ATTRIB; + PIPE_ALIGN_VAR(8) uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; /** Current fragment ops function */ spu_fragment_program_func fragment_program; @@ -187,7 +191,7 @@ struct spu_global /** Fragment program constants */ vector float constants[4 * CELL_MAX_CONSTANTS]; -} ALIGN16_ATTRIB; +}); extern struct spu_global spu; diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 5ffb7073ab..14987e3c3a 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -169,7 +169,7 @@ void cmd_render(const struct cell_command_render *render, uint *pos_incr) { /* we'll DMA into these buffers */ - ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) ubyte vertex_data[CELL_BUFFER_SIZE]; const uint vertex_size = render->vertex_size; /* in bytes */ /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; uint index_bytes; diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c index 03375d84a5..087963960d 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -43,7 +43,8 @@ typedef void (*spu_fetch_func)(qword *out, const qword *in, const qword *shuffle_data); -static const qword fetch_shuffle_data[5] ALIGN16_ATTRIB = { +PIPE_ALIGN_VAR(16) static const qword +fetch_shuffle_data[5] = { /* Shuffle used by CVT_64_FLOAT */ { @@ -110,7 +111,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, unsigned idx; const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; const unsigned quads_per_entry = (bytes_per_entry + 15) / 16; - qword in[2 * 4] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) qword in[2 * 4]; /* Fetch four attributes for four vertices. diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index fbe5b34d39..3e9804bf8e 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -107,8 +107,8 @@ run_vertex_program(struct spu_vs_context *draw, struct spu_exec_machine *machine = &draw->machine; unsigned int j; - ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_MAX_ATTRIBS); - ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_MAX_ATTRIBS); + PIPE_ALIGN_VAR(16) struct spu_exec_vector inputs[PIPE_MAX_ATTRIBS]; + PIPE_ALIGN_VAR(16) struct spu_exec_vector outputs[PIPE_MAX_ATTRIBS]; const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; @@ -119,8 +119,8 @@ run_vertex_program(struct spu_vs_context *draw, ASSERT_ALIGN16(draw->constants); machine->Consts = (float (*)[4]) draw->constants; - machine->Inputs = ALIGN16_ASSIGN(inputs); - machine->Outputs = ALIGN16_ASSIGN(outputs); + machine->Inputs = inputs; + machine->Outputs = outputs; spu_vertex_fetch( draw, machine, elts, count ); @@ -132,8 +132,9 @@ run_vertex_program(struct spu_vs_context *draw, for (j = 0; j < count; j++) { unsigned slot; float x, y, z, w; + PIPE_ALIGN_VAR(16) unsigned char buffer[sizeof(struct vertex_header) - + MAX_VERTEX_SIZE] ALIGN16_ATTRIB; + + MAX_VERTEX_SIZE]; struct vertex_header *const tmpOut = (struct vertex_header *) buffer; const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header) @@ -186,8 +187,8 @@ run_vertex_program(struct spu_vs_context *draw, } -unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32] - ALIGN16_ATTRIB; +PIPE_ALIGN_VAR(16) unsigned char +immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32]); void diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index 37184eac7b..46e4338d98 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -44,11 +44,19 @@ static void failover_destroy( struct pipe_context *pipe ) } +void failover_fail_over( struct failover_context *failover ) +{ + failover->dirty = TRUE; + failover->mode = FO_SW; +} + -static boolean failover_draw_elements( struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned prim, unsigned start, unsigned count) +static void failover_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, + unsigned start, + unsigned count) { struct failover_context *failover = failover_context( pipe ); @@ -62,24 +70,22 @@ static boolean failover_draw_elements( struct pipe_context *pipe, /* Try hardware: */ if (failover->mode == FO_HW) { - if (!failover->hw->draw_elements( failover->hw, - indexBuffer, - indexSize, - prim, - start, - count )) { - - failover->hw->flush( failover->hw, ~0, NULL ); - failover->mode = FO_SW; - } + failover->hw->draw_elements( failover->hw, + indexBuffer, + indexSize, + prim, + start, + count ); } /* Possibly try software: */ if (failover->mode == FO_SW) { - if (failover->dirty) + if (failover->dirty) { + failover->hw->flush( failover->hw, ~0, NULL ); failover_state_emit( failover ); + } failover->sw->draw_elements( failover->sw, indexBuffer, @@ -94,15 +100,13 @@ static boolean failover_draw_elements( struct pipe_context *pipe, */ failover->sw->flush( failover->sw, ~0, NULL ); } - - return TRUE; } -static boolean failover_draw_arrays( struct pipe_context *pipe, +static void failover_draw_arrays( struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) { - return failover_draw_elements(pipe, NULL, 0, prim, start, count); + failover_draw_elements(pipe, NULL, 0, prim, start, count); } static unsigned int diff --git a/src/gallium/drivers/failover/fo_winsys.h b/src/gallium/drivers/failover/fo_winsys.h index a8ce997a1f..533122b69d 100644 --- a/src/gallium/drivers/failover/fo_winsys.h +++ b/src/gallium/drivers/failover/fo_winsys.h @@ -36,10 +36,13 @@ struct pipe_context; +struct failover_context; struct pipe_context *failover_create( struct pipe_context *hw, struct pipe_context *sw ); +void failover_fail_over( struct failover_context *failover ); + #endif /* FO_WINSYS_H */ diff --git a/src/gallium/drivers/i915/i915_buffer.c b/src/gallium/drivers/i915/i915_buffer.c index effeba1297..669964770d 100644 --- a/src/gallium/drivers/i915/i915_buffer.c +++ b/src/gallium/drivers/i915/i915_buffer.c @@ -111,6 +111,7 @@ i915_buffer_unmap(struct pipe_screen *screen, { struct i915_buffer *buf = i915_buffer(buffer); assert(!buf->ibuf); + (void) buf; } static void diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 94c8aee30f..89feeade75 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -45,7 +45,7 @@ */ -static boolean +static void i915_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -84,7 +84,7 @@ i915_draw_range_elements(struct pipe_context *pipe, } - draw_set_mapped_constant_buffer(draw, + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, i915->current.constants[PIPE_SHADER_VERTEX], (i915->current.num_user_constants[PIPE_SHADER_VERTEX] * 4 * sizeof(float))); @@ -106,27 +106,25 @@ i915_draw_range_elements(struct pipe_context *pipe, pipe_buffer_unmap(pipe->screen, indexBuffer); draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL); } - - return TRUE; } -static boolean +static void i915_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count) { - return i915_draw_range_elements(pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - prim, start, count); + i915_draw_range_elements(pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + prim, start, count); } -static boolean +static void i915_draw_arrays(struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) { - return i915_draw_elements(pipe, NULL, 0, prim, start, count); + i915_draw_elements(pipe, NULL, 0, prim, start, count); } diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index f7ebfadd59..0fab6e1bc3 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -58,10 +58,10 @@ translate_wrap_mode(unsigned wrap) return TEXCOORDMODE_CLAMP_EDGE; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return TEXCOORDMODE_CLAMP_BORDER; -/* + /* case PIPE_TEX_WRAP_MIRRORED_REPEAT: return TEXCOORDMODE_MIRROR; -*/ + */ default: return TEXCOORDMODE_WRAP; } @@ -74,8 +74,6 @@ static unsigned translate_img_filter( unsigned filter ) return FILTER_NEAREST; case PIPE_TEX_FILTER_LINEAR: return FILTER_LINEAR; - case PIPE_TEX_FILTER_ANISO: - return FILTER_ANISOTROPIC; default: assert(0); return FILTER_NEAREST; @@ -221,6 +219,9 @@ i915_create_sampler_state(struct pipe_context *pipe, minFilt = translate_img_filter( sampler->min_img_filter ); magFilt = translate_img_filter( sampler->mag_img_filter ); + if (sampler->max_anisotropy > 1.0) + minFilt = magFilt = FILTER_ANISOTROPIC; + if (sampler->max_anisotropy > 2.0) { cso->state[0] |= SS2_MAX_ANISO_4; } diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c index 178d4e8781..03dd5091a6 100644 --- a/src/gallium/drivers/i915/i915_state_derived.c +++ b/src/gallium/drivers/i915/i915_state_derived.c @@ -84,7 +84,7 @@ static void calculate_vertex_layout( struct i915_context *i915 ) /* pos */ - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_POSITION, 0); if (needW) { draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src); vinfo.hwfmt[0] |= S4_VFMT_XYZW; @@ -101,21 +101,21 @@ static void calculate_vertex_layout( struct i915_context *i915 ) /* primary color */ if (colors[0]) { - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 0); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_COLOR, 0); draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); vinfo.hwfmt[0] |= S4_VFMT_COLOR; } /* secondary color */ if (colors[1]) { - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 1); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_COLOR, 1); draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG; } /* fog coord, not fog blend factor */ if (fog) { - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_FOG, 0); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_FOG, 0); draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM; } @@ -125,7 +125,7 @@ static void calculate_vertex_layout( struct i915_context *i915 ) uint hwtc; if (texCoords[i]) { hwtc = TEXCOORDFMT_4D; - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); } else { diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c index 58d9e56df2..d67a1a6263 100644 --- a/src/gallium/drivers/i965/brw_clip.c +++ b/src/gallium/drivers/i965/brw_clip.c @@ -83,19 +83,19 @@ compile_clip_prog( struct brw_context *brw, c.offset_hpos = delta + c.key.output_hpos * ATTR_SIZE; - if (c.key.output_color0) + if (c.key.output_color0 != BRW_OUTPUT_NOT_PRESENT) c.offset_color0 = delta + c.key.output_color0 * ATTR_SIZE; - if (c.key.output_color1) + if (c.key.output_color1 != BRW_OUTPUT_NOT_PRESENT) c.offset_color1 = delta + c.key.output_color1 * ATTR_SIZE; - if (c.key.output_bfc0) + if (c.key.output_bfc0 != BRW_OUTPUT_NOT_PRESENT) c.offset_bfc0 = delta + c.key.output_bfc0 * ATTR_SIZE; - if (c.key.output_bfc1) + if (c.key.output_bfc1 != BRW_OUTPUT_NOT_PRESENT) c.offset_bfc1 = delta + c.key.output_bfc1 * ATTR_SIZE; - if (c.key.output_edgeflag) + if (c.key.output_edgeflag != BRW_OUTPUT_NOT_PRESENT) c.offset_edgeflag = delta + c.key.output_edgeflag * ATTR_SIZE; if (BRW_IS_IGDNG(brw)) @@ -182,7 +182,6 @@ upload_clip_prog(struct brw_context *brw) */ /* CACHE_NEW_VS_PROG */ key.nr_attrs = brw->vs.prog_data->nr_outputs; - key.output_edgeflag = brw->vs.prog_data->output_edgeflag; /* PIPE_NEW_VS */ key.output_hpos = vs->output_hpos; @@ -190,6 +189,7 @@ upload_clip_prog(struct brw_context *brw) key.output_color1 = vs->output_color1; key.output_bfc0 = vs->output_bfc0; key.output_bfc1 = vs->output_bfc1; + key.output_edgeflag = vs->output_edgeflag; /* PIPE_NEW_CLIP */ key.nr_userclip = brw->curr.ucp.nr; diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 56e7807400..8c006bb95b 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -120,6 +120,13 @@ #define BRW_MAX_CURBE (32*16) + +/* Need a value to say a particular vertex shader output isn't + * present. Limits us to 63 outputs currently. + */ +#define BRW_OUTPUT_NOT_PRESENT ((1<<6)-1) + + struct brw_context; struct brw_depth_stencil_state { @@ -335,8 +342,6 @@ struct brw_vs_prog_data { GLuint nr_params; /**< number of TGSI_FILE_CONSTANT's */ - GLuint output_edgeflag; - GLboolean writes_psiz; /* Used for calculating urb partitions: diff --git a/src/gallium/drivers/i965/brw_disasm.h b/src/gallium/drivers/i965/brw_disasm.h index 77d402d35e..ba5b109c48 100644 --- a/src/gallium/drivers/i965/brw_disasm.h +++ b/src/gallium/drivers/i965/brw_disasm.h @@ -23,6 +23,8 @@ #ifndef BRW_DISASM_H #define BRW_DISASM_H +#include <stdio.h> + struct brw_instruction; int brw_disasm_insn (FILE *file, const struct brw_instruction *inst); diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index 852fd22982..ea8d39adaf 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -176,7 +176,7 @@ try_draw_range_elements(struct brw_context *brw, } -static boolean +static void brw_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *index_buffer, unsigned index_size, @@ -228,29 +228,27 @@ brw_draw_range_elements(struct pipe_context *pipe, ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count ); assert(ret == 0); } - - return TRUE; } -static boolean +static void brw_draw_elements(struct pipe_context *pipe, struct pipe_buffer *index_buffer, unsigned index_size, unsigned mode, unsigned start, unsigned count) { - return brw_draw_range_elements( pipe, index_buffer, - index_size, - 0, 0xffffffff, - mode, - start, count ); + brw_draw_range_elements( pipe, index_buffer, + index_size, + 0, 0xffffffff, + mode, + start, count ); } -static boolean +static void brw_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - return brw_draw_elements(pipe, NULL, 0, mode, start, count); + brw_draw_elements(pipe, NULL, 0, mode, start, count); } diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c index 4fe7b6acc1..00d8eaccbc 100644 --- a/src/gallium/drivers/i965/brw_eu_emit.c +++ b/src/gallium/drivers/i965/brw_eu_emit.c @@ -860,7 +860,7 @@ void brw_land_fwd_jump(struct brw_compile *p, jmpi = 2; assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); - assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE); + assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); } diff --git a/src/gallium/drivers/i965/brw_pipe_clear.c b/src/gallium/drivers/i965/brw_pipe_clear.c index 211be88178..452e1e89f9 100644 --- a/src/gallium/drivers/i965/brw_pipe_clear.c +++ b/src/gallium/drivers/i965/brw_pipe_clear.c @@ -114,18 +114,18 @@ static void color_clear(struct brw_context *brw, const float *rgba ) { enum pipe_error ret; - unsigned value; + union util_color value; util_pack_color( rgba, bsurface->base.format, &value ); if (bsurface->cpp == 2) - value |= value << 16; + value.ui |= value.ui << 16; - ret = try_clear( brw, bsurface, value ); + ret = try_clear( brw, bsurface, value.ui ); if (ret != 0) { brw_context_flush( brw ); - ret = try_clear( brw, bsurface, value ); + ret = try_clear( brw, bsurface, value.ui ); assert( ret == 0 ); } } diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c index 6b03094f50..5d4e5025f9 100644 --- a/src/gallium/drivers/i965/brw_pipe_fb.c +++ b/src/gallium/drivers/i965/brw_pipe_fb.c @@ -3,6 +3,7 @@ #include "pipe/p_state.h" #include "brw_context.h" +#include "brw_debug.h" /** * called from intelDrawBuffer() @@ -51,8 +52,14 @@ static void brw_set_viewport_state( struct pipe_context *pipe, struct brw_context *brw = brw_context(pipe); brw->curr.viewport = *viewport; - brw->curr.ccv.min_depth = 0.0; /* XXX: near */ - brw->curr.ccv.max_depth = 1.0; /* XXX: far */ + brw->curr.ccv.min_depth = viewport->scale[2] * -1.0 + viewport->translate[2]; + brw->curr.ccv.max_depth = viewport->scale[2] * 1.0 + viewport->translate[2]; + + if (0) + debug_printf("%s depth range %f .. %f\n", + __FUNCTION__, + brw->curr.ccv.min_depth, + brw->curr.ccv.max_depth); brw->state.dirty.mesa |= PIPE_NEW_VIEWPORT; } diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c index 5ddc63f57e..81712798a5 100644 --- a/src/gallium/drivers/i965/brw_pipe_sampler.c +++ b/src/gallium/drivers/i965/brw_pipe_sampler.c @@ -48,8 +48,6 @@ static GLuint translate_img_filter( unsigned filter ) return BRW_MAPFILTER_NEAREST; case PIPE_TEX_FILTER_LINEAR: return BRW_MAPFILTER_LINEAR; - case PIPE_TEX_FILTER_ANISO: - return BRW_MAPFILTER_ANISOTROPIC; default: assert(0); return BRW_MAPFILTER_NEAREST; diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index 6c376e5e5d..e389587f3e 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -197,6 +197,13 @@ static void *brw_create_vs_state( struct pipe_context *pipe, vs->id = brw->program_id++; vs->has_flow_control = has_flow_control(&vs->info); + vs->output_hpos = BRW_OUTPUT_NOT_PRESENT; + vs->output_color0 = BRW_OUTPUT_NOT_PRESENT; + vs->output_color1 = BRW_OUTPUT_NOT_PRESENT; + vs->output_bfc0 = BRW_OUTPUT_NOT_PRESENT; + vs->output_bfc1 = BRW_OUTPUT_NOT_PRESENT; + vs->output_edgeflag = BRW_OUTPUT_NOT_PRESENT; + for (i = 0; i < vs->info.num_outputs; i++) { int index = vs->info.output_semantic_index[i]; switch (vs->info.output_semantic_name[i]) { diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 714def5046..8a16205d2f 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -79,18 +79,12 @@ static void release_tmps( struct brw_vs_compile *c ) static boolean is_position_output( struct brw_vs_compile *c, unsigned vs_output ) { - struct brw_vertex_shader *vs = c->vp; - - if (vs_output == c->prog_data.output_edgeflag) { - return FALSE; - } - else { - unsigned semantic = vs->info.output_semantic_name[vs_output]; - unsigned index = vs->info.output_semantic_index[vs_output]; + const struct brw_vertex_shader *vs = c->vp; + unsigned semantic = vs->info.output_semantic_name[vs_output]; + unsigned index = vs->info.output_semantic_index[vs_output]; - return (semantic == TGSI_SEMANTIC_POSITION && - index == 0); - } + return (semantic == TGSI_SEMANTIC_POSITION && + index == 0); } @@ -98,23 +92,16 @@ static boolean find_output_slot( struct brw_vs_compile *c, unsigned vs_output, unsigned *fs_input_slot ) { - struct brw_vertex_shader *vs = c->vp; + const struct brw_vertex_shader *vs = c->vp; + unsigned semantic = vs->info.output_semantic_name[vs_output]; + unsigned index = vs->info.output_semantic_index[vs_output]; + unsigned i; - if (vs_output == c->prog_data.output_edgeflag) { - *fs_input_slot = c->key.fs_signature.nr_inputs; - return TRUE; - } - else { - unsigned semantic = vs->info.output_semantic_name[vs_output]; - unsigned index = vs->info.output_semantic_index[vs_output]; - unsigned i; - - for (i = 0; i < c->key.fs_signature.nr_inputs; i++) { - if (c->key.fs_signature.input[i].semantic == semantic && + for (i = 0; i < c->key.fs_signature.nr_inputs; i++) { + if (c->key.fs_signature.input[i].semantic == semantic && c->key.fs_signature.input[i].semantic_index == index) { - *fs_input_slot = i; - return TRUE; - } + *fs_input_slot = i; + return TRUE; } } diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index 7e57d0306b..8f983a60ae 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -691,7 +691,7 @@ static void emit_xpd( struct brw_compile *p, { GLuint i; - assert(!(mask & BRW_WRITEMASK_W) == BRW_WRITEMASK_X); + assert((mask & BRW_WRITEMASK_W) != BRW_WRITEMASK_W); for (i = 0 ; i < 3; i++) { if (mask & (1<<i)) { diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index b975182e7b..f9063d90fb 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -45,7 +45,7 @@ identity_destroy(struct pipe_context *_pipe) free(id_pipe); } -static boolean +static void identity_draw_arrays(struct pipe_context *_pipe, unsigned prim, unsigned start, @@ -54,13 +54,13 @@ identity_draw_arrays(struct pipe_context *_pipe, struct identity_context *id_pipe = identity_context(_pipe); struct pipe_context *pipe = id_pipe->pipe; - return pipe->draw_arrays(pipe, - prim, - start, - count); + pipe->draw_arrays(pipe, + prim, + start, + count); } -static boolean +static void identity_draw_elements(struct pipe_context *_pipe, struct pipe_buffer *_indexBuffer, unsigned indexSize, @@ -73,15 +73,15 @@ identity_draw_elements(struct pipe_context *_pipe, struct pipe_context *pipe = id_pipe->pipe; struct pipe_buffer *indexBuffer = id_buffer->buffer; - return pipe->draw_elements(pipe, - indexBuffer, - indexSize, - prim, - start, - count); + pipe->draw_elements(pipe, + indexBuffer, + indexSize, + prim, + start, + count); } -static boolean +static void identity_draw_range_elements(struct pipe_context *_pipe, struct pipe_buffer *_indexBuffer, unsigned indexSize, @@ -96,14 +96,14 @@ identity_draw_range_elements(struct pipe_context *_pipe, struct pipe_context *pipe = id_pipe->pipe; struct pipe_buffer *indexBuffer = id_buffer->buffer; - return pipe->draw_range_elements(pipe, - indexBuffer, - indexSize, - minIndex, - maxIndex, - mode, - start, - count); + pipe->draw_range_elements(pipe, + indexBuffer, + indexSize, + minIndex, + maxIndex, + mode, + start, + count); } static struct pipe_query * diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index e038a5229e..7c6e46006b 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -50,7 +50,6 @@ C_SOURCES = \ lp_state_vs.c \ lp_surface.c \ lp_tex_cache.c \ - lp_tex_sample_c.c \ lp_tex_sample_llvm.c \ lp_texture.c \ lp_tile_cache.c \ diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index 0c3f00fd58..72d9f39658 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -59,27 +59,16 @@ Requirements See /proc/cpuinfo to know what your CPU supports. - - LLVM 2.5 or greater. LLVM 2.6 is preferred. + - LLVM 2.6. - On Debian based distributions do: + For Linux, on a recent Debian based distribution do: aptitude install llvm-dev - There is a typo in one of the llvm 2.5 headers, that may cause compilation - errors. To fix it apply the change: - - --- /usr/include/llvm-c/Core.h.orig 2009-08-10 15:38:54.000000000 +0100 - +++ /usr/include/llvm-c/Core.h 2009-08-10 15:38:25.000000000 +0100 - @@ -831,7 +831,7 @@ - template<typename T> - inline T **unwrap(LLVMValueRef *Vals, unsigned Length) { - #if DEBUG - - for (LLVMValueRef *I = Vals, E = Vals + Length; I != E; ++I) - + for (LLVMValueRef *I = Vals, *E = Vals + Length; I != E; ++I) - cast<T>(*I); - #endif - return reinterpret_cast<T**>(Vals); - + For Windows download pre-built MSVC 9.0 or MinGW binaries from + http://people.freedesktop.org/~jrfonseca/llvm/ and set the LLVM environment + variable to the extracted path. + - scons (optional) - udis86, http://udis86.sourceforge.net/ (optional): @@ -95,9 +84,9 @@ Requirements Building ======== -To build everything invoke scons as: +To build everything on Linux invoke scons as: - scons debug=yes statetrackers=mesa drivers=llvmpipe winsys=xlib dri=false -k + scons debug=yes statetrackers=mesa drivers=trace,llvmpipe winsys=xlib dri=false Alternatively, you can build it with GNU make, if you prefer, by invoking it as @@ -105,12 +94,15 @@ Alternatively, you can build it with GNU make, if you prefer, by invoking it as but the rest of these instructions assume that scons is used. +For windows is everything the except except the winsys: + + scons debug=yes statetrackers=mesa drivers=trace,llvmpipe winsys=gdi dri=false Using ===== -Building will create a drop-in alternative for libGL.so. To use it set the -environment variables: +On Linux, building will create a drop-in alternative for libGL.so. To use it +set the environment variables: export LD_LIBRARY_PATH=$PWD/build/linux-x86_64-debug/lib:$LD_LIBRARY_PATH @@ -121,6 +113,11 @@ or For performance evaluation pass debug=no to scons, and use the corresponding lib directory without the "-debug" suffix. +On Windows, building will create a drop-in alternative for opengl32.dll. To use +it put it in the same directory as the application. It can also be used by +replacing the native ICD driver, but it's quite an advanced usage, so if you +need to ask, don't even try it. + Unit testing ============ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 3bd2e70013..6bb545a501 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -9,6 +9,8 @@ if not env.has_key('LLVM_VERSION'): env.Tool('udis86') +env.Append(CPPPATH = ['.']) + env.CodeGenerate( target = 'lp_tile_soa.c', script = 'lp_tile_soa.py', @@ -64,7 +66,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_state_vs.c', 'lp_surface.c', 'lp_tex_cache.c', - 'lp_tex_sample_c.c', 'lp_tex_sample_llvm.c', 'lp_texture.c', 'lp_tile_cache.c', @@ -74,21 +75,19 @@ llvmpipe = env.ConvenienceLibrary( env = env.Clone() -env.Prepend(LIBS = [llvmpipe] + auxiliaries) +env.Prepend(LIBS = [llvmpipe] + gallium) -env.Program( - target = 'lp_test_format', - source = ['lp_test_format.c', 'lp_test_main.c'], -) +tests = [ + 'format', + 'blend', + 'conv', +] -env.Program( - target = 'lp_test_blend', - source = ['lp_test_blend.c', 'lp_test_main.c'], -) - -env.Program( - target = 'lp_test_conv', - source = ['lp_test_conv.c', 'lp_test_main.c'], -) +for test in tests: + target = env.Program( + target = 'lp_test_' + test, + source = ['lp_test_' + test + '.c', 'lp_test_main.c'], + ) + env.InstallProgram(target) Export('llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c index d14f468ba9..ced7b9c11d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -142,7 +142,7 @@ lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld, enum lp_build_blend_swizzle { LP_BUILD_BLEND_SWIZZLE_RGBA = 0, - LP_BUILD_BLEND_SWIZZLE_AAAA = 1, + LP_BUILD_BLEND_SWIZZLE_AAAA = 1 }; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index dcc25fbff8..25c10af29f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -47,7 +47,7 @@ */ enum lp_build_flow_construct_kind { lP_BUILD_FLOW_SCOPE, - LP_BUILD_FLOW_SKIP, + LP_BUILD_FLOW_SKIP }; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c index 5836e0173f..10e82f120b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c @@ -130,7 +130,7 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), ""); masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), ""); - // UIToFP can't be expressed in SSE2 + /* UIToFP can't be expressed in SSE2 */ casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), ""); if (normalized) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp index d3f78c06d9..6e79438ead 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp +++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp @@ -59,3 +59,17 @@ LLVMInitializeNativeTarget(void) #endif + + +/* + * Hack to allow the linking of release LLVM static libraries on a debug build. + * + * See also: + * - http://social.msdn.microsoft.com/Forums/en-US/vclanguage/thread/7234ea2b-0042-42ed-b4e2-5d8644dfb57d + */ +#if defined(_MSC_VER) && defined(_DEBUG) +#include <crtdefs.h> +extern "C" { + _CRTIMP void __cdecl _invalid_parameter_noinfo(void) {} +} +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.c b/src/gallium/drivers/llvmpipe/lp_bld_sample.c index af70ddc6ab..9003e108c1 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.c @@ -69,8 +69,8 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, state->min_img_filter = sampler->min_img_filter; state->min_mip_filter = sampler->min_mip_filter; state->mag_img_filter = sampler->mag_img_filter; - if(sampler->compare_mode) { - state->compare_mode = sampler->compare_mode; + state->compare_mode = sampler->compare_mode; + if(sampler->compare_mode != PIPE_TEX_COMPARE_NONE) { state->compare_func = sampler->compare_func; } state->normalized_coords = sampler->normalized_coords; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index 47b68b71e2..5ee8d556a6 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -488,7 +488,7 @@ lp_build_sample_compare(struct lp_build_sample_context *bld, LLVMValueRef res; unsigned chan; - if(!bld->static_state->compare_mode) + if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE) return; /* TODO: Compare before swizzling, to avoid redundant computations */ @@ -577,7 +577,6 @@ lp_build_sample_soa(LLVMBuilderRef builder, lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel); break; case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: if(lp_format_is_rgba8(bld.format_desc)) lp_build_sample_2d_linear_aos(&bld, s, t, width, height, stride, data_ptr, texel); else diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index a67c70ff25..fb1eda4423 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -321,7 +321,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, { const uint unit = inst->Src[1].Register.Index; LLVMValueRef lodbias; - LLVMValueRef oow; + LLVMValueRef oow = NULL; LLVMValueRef coords[3]; unsigned num_coords; unsigned i; @@ -361,6 +361,9 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, if (projected) coords[i] = lp_build_mul(&bld->base, coords[i], oow); } + for (i = num_coords; i < 3; i++) { + coords[i] = bld->base.undef; + } bld->sampler->emit_fetch_texel(bld->sampler, bld->base.builder, @@ -446,7 +449,12 @@ emit_instruction( { unsigned chan_index; LLVMValueRef src0, src1, src2; - LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + LLVMValueRef tmp0, tmp1, tmp2; + LLVMValueRef tmp3 = NULL; + LLVMValueRef tmp4 = NULL; + LLVMValueRef tmp5 = NULL; + LLVMValueRef tmp6 = NULL; + LLVMValueRef tmp7 = NULL; LLVMValueRef res; LLVMValueRef dst0[NUM_CHANNELS]; @@ -1310,7 +1318,7 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: /* deprecated? */ assert(0); return 0; diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index f381156d00..aaa675aec7 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -140,6 +140,7 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe, struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); unsigned i; + /* check if any of the bound drawing surfaces are this texture */ if(llvmpipe->dirty_render_cache) { for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { if(llvmpipe->framebuffer.cbufs[i] && @@ -150,6 +151,13 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe, llvmpipe->framebuffer.zsbuf->texture == texture) return PIPE_REFERENCED_FOR_WRITE; } + + /* check if any of the tex_cache textures are this texture */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + if (llvmpipe->tex_cache[i] && + llvmpipe->tex_cache[i]->texture == texture) + return PIPE_REFERENCED_FOR_READ; + } for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { if (llvmpipe->vertex_tex_cache[i] && llvmpipe->vertex_tex_cache[i]->texture == texture) @@ -248,22 +256,6 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe->vertex_tex_cache[i] = lp_create_tex_tile_cache(screen); - /* vertex shader samplers */ - for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { - llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples; - llvmpipe->tgsi.vert_samplers[i].processor = TGSI_PROCESSOR_VERTEX; - llvmpipe->tgsi.vert_samplers[i].cache = llvmpipe->vertex_tex_cache[i]; - llvmpipe->tgsi.vert_samplers_list[i] = &llvmpipe->tgsi.vert_samplers[i]; - } - - /* fragment shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples; - llvmpipe->tgsi.frag_samplers[i].processor = TGSI_PROCESSOR_FRAGMENT; - llvmpipe->tgsi.frag_samplers[i].cache = llvmpipe->tex_cache[i]; - llvmpipe->tgsi.frag_samplers_list[i] = &llvmpipe->tgsi.frag_samplers[i]; - } - /* * Create drawing context and plug our rendering stage into it. */ @@ -271,10 +263,7 @@ llvmpipe_create( struct pipe_screen *screen ) if (!llvmpipe->draw) goto fail; - draw_texture_samplers(llvmpipe->draw, - PIPE_MAX_VERTEX_SAMPLERS, - (struct tgsi_sampler **) - llvmpipe->tgsi.vert_samplers_list); + /* FIXME: devise alternative to draw_texture_samplers */ if (debug_get_bool_option( "LP_NO_RAST", FALSE )) llvmpipe->no_rast = TRUE; diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index ca50585896..426d6eb4a1 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -115,14 +115,6 @@ struct llvmpipe_context { unsigned line_stipple_counter; - /** TGSI exec things */ - struct { - struct lp_shader_sampler vert_samplers[PIPE_MAX_SAMPLERS]; - struct lp_shader_sampler *vert_samplers_list[PIPE_MAX_SAMPLERS]; - struct lp_shader_sampler frag_samplers[PIPE_MAX_SAMPLERS]; - struct lp_shader_sampler *frag_samplers_list[PIPE_MAX_SAMPLERS]; - } tgsi; - /** The primitive drawing context */ struct draw_context *draw; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 2299566c66..c152b4413f 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -45,11 +45,11 @@ -boolean +void llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - return llvmpipe_draw_elements(pipe, NULL, 0, mode, start, count); + llvmpipe_draw_elements(pipe, NULL, 0, mode, start, count); } @@ -58,7 +58,7 @@ llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, * Basically, map the vertex buffers (and drawing surfaces), then hand off * the drawing to the 'draw' module. */ -boolean +void llvmpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -103,7 +103,7 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, draw_arrays(draw, mode, start, count); /* - * unmap vertex/index buffers - will cause draw module to flush + * unmap vertex/index buffers */ for (i = 0; i < lp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); @@ -112,24 +112,28 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, draw_set_mapped_element_buffer(draw, 0, NULL); } + /* + * TODO: Flush only when a user vertex/index buffer is present + * (or even better, modify draw module to do this + * internally when this condition is seen?) + */ + draw_flush(draw); /* Note: leave drawing surfaces mapped */ lp->dirty_render_cache = TRUE; - - return TRUE; } -boolean +void llvmpipe_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { - return llvmpipe_draw_range_elements( pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - mode, start, count ); + llvmpipe_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + mode, start, count ); } diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index bce3baec16..4ef0783f3e 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -79,25 +79,22 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) /* struct lp_jit_context */ { - LLVMTypeRef elem_types[5]; + LLVMTypeRef elem_types[4]; LLVMTypeRef context_type; elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* constants */ - elem_types[1] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */ - elem_types[2] = LLVMFloatType(); /* alpha_ref_value */ - elem_types[3] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */ - elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ + elem_types[1] = LLVMFloatType(); /* alpha_ref_value */ + elem_types[2] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */ + elem_types[3] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ context_type = LLVMStructType(elem_types, Elements(elem_types), 0); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants, screen->target, context_type, 0); - LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, samplers, - screen->target, context_type, 1); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, alpha_ref_value, - screen->target, context_type, 2); + screen->target, context_type, 1); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color, - screen->target, context_type, 3); + screen->target, context_type, 2); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures, screen->target, context_type, LP_JIT_CONTEXT_TEXTURES_INDEX); @@ -109,24 +106,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) screen->context_ptr_type = LLVMPointerType(context_type, 0); } - /* fetch_texel - */ - { - LLVMTypeRef ret_type; - LLVMTypeRef arg_types[3]; - LLVMValueRef fetch_texel; - - ret_type = LLVMVoidType(); - arg_types[0] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */ - arg_types[1] = LLVMInt32Type(); /* unit */ - arg_types[2] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); /* store */ - - fetch_texel = lp_declare_intrinsic(screen->module, "fetch_texel", - ret_type, arg_types, Elements(arg_types)); - - LLVMAddGlobalMapping(screen->engine, fetch_texel, lp_fetch_texel_soa); - } - #ifdef DEBUG LLVMDumpModule(screen->module); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 58f716ede2..277b690c02 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -41,7 +41,6 @@ #include "pipe/p_state.h" -struct tgsi_sampler; struct llvmpipe_screen; @@ -78,8 +77,6 @@ struct lp_jit_context { const float *constants; - struct tgsi_sampler **samplers; - float alpha_ref_value; /* FIXME: store (also?) in floats */ @@ -92,16 +89,13 @@ struct lp_jit_context #define lp_jit_context_constants(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, 0, "constants") -#define lp_jit_context_samplers(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 1, "samplers") - #define lp_jit_context_alpha_ref_value(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 2, "alpha_ref_value") + lp_build_struct_get(_builder, _ptr, 1, "alpha_ref_value") #define lp_jit_context_blend_color(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 3, "blend_color") + lp_build_struct_get(_builder, _ptr, 2, "blend_color") -#define LP_JIT_CONTEXT_TEXTURES_INDEX 4 +#define LP_JIT_CONTEXT_TEXTURES_INDEX 3 #define lp_jit_context_textures(_builder, _ptr) \ lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES_INDEX, "textures") @@ -118,12 +112,6 @@ typedef void void *color, void *depth); -void PIPE_CDECL -lp_fetch_texel_soa( struct tgsi_sampler **samplers, - uint32_t unit, - float *store ); - - void lp_jit_screen_cleanup(struct llvmpipe_screen *screen); diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c index 4abff4eccc..e8e2e2524a 100644 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c @@ -128,6 +128,7 @@ lp_vbuf_unmap_vertices(struct vbuf_render *vbr, { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size ); + (void) cvbr; /* do nothing */ } diff --git a/src/gallium/drivers/llvmpipe/lp_quad.h b/src/gallium/drivers/llvmpipe/lp_quad.h index 7eb05de77a..c3a48700a4 100644 --- a/src/gallium/drivers/llvmpipe/lp_quad.h +++ b/src/gallium/drivers/llvmpipe/lp_quad.h @@ -31,6 +31,7 @@ #ifndef LP_QUAD_H #define LP_QUAD_H +#include "pipe/p_compiler.h" #include "pipe/p_state.h" #include "tgsi/tgsi_exec.h" @@ -83,7 +84,7 @@ struct quad_header_inout struct quad_header_output { /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ - float ALIGN16_ATTRIB color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; + PIPE_ALIGN_VAR(16) float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; }; @@ -92,9 +93,9 @@ struct quad_header_output */ struct quad_interp_coef { - float ALIGN16_ATTRIB a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - float ALIGN16_ATTRIB dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - float ALIGN16_ATTRIB dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16) float a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16) float dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16) float dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index b18f17c0cd..0b2d3a2801 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -117,7 +117,7 @@ struct setup_context { /** * Execute fragment shader for the four fragments in the quad. */ -ALIGN_STACK +PIPE_ALIGN_STACK static void shade_quads(struct llvmpipe_context *llvmpipe, struct quad_header *quads[], @@ -130,7 +130,7 @@ shade_quads(struct llvmpipe_context *llvmpipe, uint8_t *tile; uint8_t *color; void *depth; - uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16) uint32_t mask[4][NUM_CHANNELS]; unsigned chan_index; unsigned q; diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 8e0c773a63..e16793186b 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -56,7 +56,6 @@ #define LP_NEW_QUERY 0x4000 -struct tgsi_sampler; struct vertex_info; struct pipe_context; struct llvmpipe_context; @@ -197,14 +196,14 @@ void llvmpipe_update_fs(struct llvmpipe_context *lp); void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ); -boolean llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, +void llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count); -boolean llvmpipe_draw_elements(struct pipe_context *pipe, +void llvmpipe_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count); -boolean +void llvmpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c index b2e75d3b14..a94cd05ef2 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c @@ -34,6 +34,7 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_debug_dump.h" +#include "draw/draw_context.h" #include "lp_screen.h" #include "lp_context.h" #include "lp_state.h" @@ -51,6 +52,11 @@ void llvmpipe_bind_blend_state( struct pipe_context *pipe, { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + if (llvmpipe->blend == blend) + return; + + draw_flush(llvmpipe->draw); + llvmpipe->blend = blend; llvmpipe->dirty |= LP_NEW_BLEND; @@ -69,6 +75,11 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe, struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); unsigned i, j; + if(memcmp(&llvmpipe->blend_color, blend_color, sizeof *blend_color) == 0) + return; + + draw_flush(llvmpipe->draw); + memcpy(&llvmpipe->blend_color, blend_color, sizeof *blend_color); if(!llvmpipe->jit_context.blend_color) @@ -99,7 +110,12 @@ llvmpipe_bind_depth_stencil_state(struct pipe_context *pipe, { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - llvmpipe->depth_stencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; + if (llvmpipe->depth_stencil == depth_stencil) + return; + + draw_flush(llvmpipe->draw); + + llvmpipe->depth_stencil = depth_stencil; if(llvmpipe->depth_stencil) llvmpipe->jit_context.alpha_ref_value = llvmpipe->depth_stencil->alpha.ref_value; diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index e703964aaa..6c1ef6bc42 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -66,7 +66,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) /* compute vertex layout now */ const struct lp_fragment_shader *lpfs = llvmpipe->fs; struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; - const uint num = draw_num_vs_outputs(llvmpipe->draw); + const uint num = draw_current_shader_outputs(llvmpipe->draw); uint i; /* Tell draw_vbuf to simply emit the whole post-xform vertex @@ -116,13 +116,13 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) } /* this includes texcoords and varying vars */ - src = draw_find_vs_output(llvmpipe->draw, + src = draw_find_shader_output(llvmpipe->draw, lpfs->info.input_semantic_name[i], lpfs->info.input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); } - llvmpipe->psize_slot = draw_find_vs_output(llvmpipe->draw, + llvmpipe->psize_slot = draw_find_shader_output(llvmpipe->draw, TGSI_SEMANTIC_PSIZE, 0); if (llvmpipe->psize_slot > 0) { draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, @@ -192,36 +192,6 @@ compute_cliprect(struct llvmpipe_context *lp) } -static void -update_tgsi_samplers( struct llvmpipe_context *llvmpipe ) -{ - unsigned i; - - /* vertex shader samplers */ - for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { - llvmpipe->tgsi.vert_samplers[i].sampler = llvmpipe->vertex_samplers[i]; - llvmpipe->tgsi.vert_samplers[i].texture = llvmpipe->vertex_textures[i]; - llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples; - } - - for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { - lp_tex_tile_cache_validate_texture( llvmpipe->vertex_tex_cache[i] ); - } - - /* fragment shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - llvmpipe->tgsi.frag_samplers[i].sampler = llvmpipe->sampler[i]; - llvmpipe->tgsi.frag_samplers[i].texture = llvmpipe->texture[i]; - llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples; - } - - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - lp_tex_tile_cache_validate_texture( llvmpipe->tex_cache[i] ); - } - - llvmpipe->jit_context.samplers = (struct tgsi_sampler **)llvmpipe->tgsi.frag_samplers_list; -} - /* Hopefully this will remain quite simple, otherwise need to pull in * something like the state tracker mechanism. */ @@ -237,8 +207,9 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) } if (llvmpipe->dirty & (LP_NEW_SAMPLER | - LP_NEW_TEXTURE)) - update_tgsi_samplers( llvmpipe ); + LP_NEW_TEXTURE)) { + /* TODO */ + } if (llvmpipe->dirty & (LP_NEW_RASTERIZER | LP_NEW_FS | diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index d129918fe2..9f4bbef73f 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -453,8 +453,8 @@ generate_fragment(struct llvmpipe_context *lp, debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); debug_printf(" .mag_img_filter = %s\n", debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); - if(key->sampler[i].compare_mode) - debug_printf(" .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE)); + if(key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE) + debug_printf(" .compare_func = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE)); debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter); } @@ -550,13 +550,8 @@ generate_fragment(struct llvmpipe_context *lp, a0_ptr, dadx_ptr, dady_ptr, x0, y0, 2, 0); -#if 0 - /* C texture sampling */ - sampler = lp_c_sampler_soa_create(context_ptr); -#else /* code generated texture sampling */ sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); -#endif for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); @@ -673,7 +668,12 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - llvmpipe->fs = (struct lp_fragment_shader *) fs; + if (llvmpipe->fs == fs) + return; + + draw_flush(llvmpipe->draw); + + llvmpipe->fs = fs; llvmpipe->dirty |= LP_NEW_FS; } @@ -688,6 +688,7 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) struct lp_fragment_shader_variant *variant; assert(fs != llvmpipe->fs); + (void) llvmpipe; variant = shader->variants; while(variant) { @@ -722,8 +723,7 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, assert(shader < PIPE_SHADER_TYPES); assert(index == 0); - if(shader == PIPE_SHADER_VERTEX) - draw_flush(llvmpipe->draw); + draw_flush(llvmpipe->draw); /* note: reference counting */ pipe_buffer_reference(&llvmpipe->constants[shader], constants); @@ -733,7 +733,8 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, } if(shader == PIPE_SHADER_VERTEX) { - draw_set_mapped_constant_buffer(llvmpipe->draw, data, size); + draw_set_mapped_constant_buffer(llvmpipe->draw, PIPE_SHADER_VERTEX, + data, size); } llvmpipe->dirty |= LP_NEW_CONSTANTS; diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index 4561c6b845..aa3b5a3f91 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -41,14 +41,17 @@ llvmpipe_create_rasterizer_state(struct pipe_context *pipe, } void llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, - void *setup) + void *rasterizer) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + if (llvmpipe->rasterizer == rasterizer) + return; + /* pass-through to draw module */ - draw_set_rasterizer_state(llvmpipe->draw, setup); + draw_set_rasterizer_state(llvmpipe->draw, rasterizer); - llvmpipe->rasterizer = (struct pipe_rasterizer_state *)setup; + llvmpipe->rasterizer = rasterizer; llvmpipe->dirty |= LP_NEW_RASTERIZER; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index ba970cac98..e37ff04f3d 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -51,6 +51,8 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, struct llvmpipe_context *lp = llvmpipe_context(pipe); uint i; + draw_flush(lp->draw); + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { /* check if changing cbuf */ if (lp->framebuffer.cbufs[i] != fb->cbufs[i]) { diff --git a/src/gallium/drivers/llvmpipe/lp_state_vs.c b/src/gallium/drivers/llvmpipe/lp_state_vs.c index 8a761648e7..884e3878e6 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vs.c @@ -70,14 +70,18 @@ fail: void -llvmpipe_bind_vs_state(struct pipe_context *pipe, void *vs) +llvmpipe_bind_vs_state(struct pipe_context *pipe, void *_vs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + const struct lp_vertex_shader *vs = (const struct lp_vertex_shader *)_vs; - llvmpipe->vs = (const struct lp_vertex_shader *)vs; + if (llvmpipe->vs == vs) + return; - draw_bind_vertex_shader(llvmpipe->draw, - (llvmpipe->vs ? llvmpipe->vs->draw_data : NULL)); + draw_bind_vertex_shader(llvmpipe->draw, + vs ? vs->draw_data : NULL); + + llvmpipe->vs = vs; llvmpipe->dirty |= LP_NEW_VS; } diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 29fff91981..6c29e8d8ac 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -462,7 +462,7 @@ compute_blend_ref(const struct pipe_blend_state *blend, } -ALIGN_STACK +PIPE_ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, @@ -531,11 +531,11 @@ test_one(unsigned verbose, success = TRUE; for(i = 0; i < n && success; ++i) { if(mode == AoS) { - ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; @@ -596,11 +596,11 @@ test_one(unsigned verbose, if(mode == SoA) { const unsigned stride = type.length*type.width/8; - ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; int64_t start_counter = 0; int64_t end_counter = 0; boolean mismatch; diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index 968c7a2d4a..c1abee424c 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -142,7 +142,7 @@ add_conv_test(LLVMModuleRef module, } -ALIGN_STACK +PIPE_ALIGN_STACK static boolean test_one(unsigned verbose, FILE *fp, @@ -230,8 +230,8 @@ test_one(unsigned verbose, for(i = 0; i < n && success; ++i) { unsigned src_stride = src_type.length*src_type.width/8; unsigned dst_stride = dst_type.length*dst_type.width/8; - ALIGN16_ATTRIB uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; - ALIGN16_ATTRIB uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + PIPE_ALIGN_VAR(16) uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + PIPE_ALIGN_VAR(16) uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; int64_t start_counter = 0; @@ -330,7 +330,7 @@ test_one(unsigned verbose, fprintf(stderr, "conv.bc written\n"); fprintf(stderr, "Invoke as \"llc -o - conv.bc\"\n"); firsttime = FALSE; - //abort(); + /* abort(); */ } } diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 23ea9ebbe7..2b258f1052 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -199,7 +199,7 @@ add_store_rgba_test(LLVMModuleRef module, } -ALIGN_STACK +PIPE_ALIGN_STACK static boolean test_format(unsigned verbose, FILE *fp, const struct pixel_test_case *test) { diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.h b/src/gallium/drivers/llvmpipe/lp_tex_cache.h index 9fa6c36812..05fded78e1 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_cache.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.h @@ -115,7 +115,7 @@ extern const struct llvmpipe_cached_tex_tile * lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc, union tex_tile_address addr ); -static INLINE const union tex_tile_address +static INLINE union tex_tile_address tex_tile_address( unsigned x, unsigned y, unsigned z, diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h index 9ad1bde956..cb59a94464 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h @@ -31,64 +31,11 @@ #include <llvm-c/Core.h> -#include "tgsi/tgsi_exec.h" - -struct llvmpipe_tex_tile_cache; struct lp_sampler_static_state; /** - * Subclass of tgsi_sampler - */ -struct lp_shader_sampler -{ - struct tgsi_sampler base; /**< base class */ - - unsigned processor; - - /* For lp_get_samples_2d_linear_POT: - */ - unsigned xpot; - unsigned ypot; - unsigned level; - - const struct pipe_texture *texture; - const struct pipe_sampler_state *sampler; - - struct llvmpipe_tex_tile_cache *cache; -}; - - - -static INLINE struct lp_shader_sampler * -lp_shader_sampler(const struct tgsi_sampler *sampler) -{ - return (struct lp_shader_sampler *) sampler; -} - - - -extern void -lp_get_samples(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]); - - -/** - * Texture sampling code generator that just calls lp_get_samples C function - * for the actual sampling computation. - * - * @param context_ptr LLVM value with the pointer to the struct lp_jit_context. - */ -struct lp_build_sampler_soa * -lp_c_sampler_soa_create(LLVMValueRef context_ptr); - - -/** * Pure-LLVM texture sampling code generator. * * @param context_ptr LLVM value with the pointer to the struct lp_jit_context. diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c deleted file mode 100644 index 0d01c07fb5..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c +++ /dev/null @@ -1,1713 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * Copyright 2008 VMware, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Texture sampling - * - * Authors: - * Brian Paul - */ - -#include "lp_context.h" -#include "lp_quad.h" -#include "lp_surface.h" -#include "lp_texture.h" -#include "lp_tex_sample.h" -#include "lp_tex_cache.h" -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" -#include "util/u_math.h" -#include "util/u_memory.h" - - - -/* - * Note, the FRAC macro has to work perfectly. Otherwise you'll sometimes - * see 1-pixel bands of improperly weighted linear-filtered textures. - * The tests/texwrap.c demo is a good test. - * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0. - * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x). - */ -#define FRAC(f) ((f) - util_ifloor(f)) - - -/** - * Linear interpolation macro - */ -static INLINE float -lerp(float a, float v0, float v1) -{ - return v0 + a * (v1 - v0); -} - - -/** - * Do 2D/biliner interpolation of float values. - * v00, v10, v01 and v11 are typically four texture samples in a square/box. - * a and b are the horizontal and vertical interpolants. - * It's important that this function is inlined when compiled with - * optimization! If we find that's not true on some systems, convert - * to a macro. - */ -static INLINE float -lerp_2d(float a, float b, - float v00, float v10, float v01, float v11) -{ - const float temp0 = lerp(a, v00, v10); - const float temp1 = lerp(a, v01, v11); - return lerp(b, temp0, temp1); -} - - -/** - * As above, but 3D interpolation of 8 values. - */ -static INLINE float -lerp_3d(float a, float b, float c, - float v000, float v100, float v010, float v110, - float v001, float v101, float v011, float v111) -{ - const float temp0 = lerp_2d(a, b, v000, v100, v010, v110); - const float temp1 = lerp_2d(a, b, v001, v101, v011, v111); - return lerp(c, temp0, temp1); -} - - - -/** - * If A is a signed integer, A % B doesn't give the right value for A < 0 - * (in terms of texture repeat). Just casting to unsigned fixes that. - */ -#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B)) - - -/** - * Apply texture coord wrapping mode and return integer texture indexes - * for a vector of four texcoords (S or T or P). - * \param wrapMode PIPE_TEX_WRAP_x - * \param s the incoming texcoords - * \param size the texture image size - * \param icoord returns the integer texcoords - * \return integer texture index - */ -static INLINE void -nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord[4]) -{ - uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_REPEAT: - /* s limited to [0,1) */ - /* i limited to [0,size-1] */ - for (ch = 0; ch < 4; ch++) { - int i = util_ifloor(s[ch] * size); - icoord[ch] = REMAINDER(i, size); - } - return; - case PIPE_TEX_WRAP_CLAMP: - /* s limited to [0,1] */ - /* i limited to [0,size-1] */ - for (ch = 0; ch < 4; ch++) { - if (s[ch] <= 0.0F) - icoord[ch] = 0; - else if (s[ch] >= 1.0F) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - if (s[ch] < min) - icoord[ch] = 0; - else if (s[ch] > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - { - /* s limited to [min,max] */ - /* i limited to [-1, size] */ - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - if (s[ch] <= min) - icoord[ch] = -1; - else if (s[ch] >= max) - icoord[ch] = size; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - { - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const int flr = util_ifloor(s[ch]); - float u; - if (flr & 1) - u = 1.0F - (s[ch] - (float) flr); - else - u = s[ch] - (float) flr; - if (u < min) - icoord[ch] = 0; - else if (u > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - for (ch = 0; ch < 4; ch++) { - /* s limited to [0,1] */ - /* i limited to [0,size-1] */ - const float u = fabsf(s[ch]); - if (u <= 0.0F) - icoord[ch] = 0; - else if (u >= 1.0F) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const float u = fabsf(s[ch]); - if (u < min) - icoord[ch] = 0; - else if (u > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const float u = fabsf(s[ch]); - if (u < min) - icoord[ch] = -1; - else if (u > max) - icoord[ch] = size; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - default: - assert(0); - } -} - - -/** - * Used to compute texel locations for linear sampling for four texcoords. - * \param wrapMode PIPE_TEX_WRAP_x - * \param s the texcoords - * \param size the texture image size - * \param icoord0 returns first texture indexes - * \param icoord1 returns second texture indexes (usually icoord0 + 1) - * \param w returns blend factor/weight between texture indexes - * \param icoord returns the computed integer texture coords - */ -static INLINE void -linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord0[4], int icoord1[4], float w[4]) -{ - uint ch; - - switch (wrapMode) { - case PIPE_TEX_WRAP_REPEAT: - for (ch = 0; ch < 4; ch++) { - float u = s[ch] * size - 0.5F; - icoord0[ch] = REMAINDER(util_ifloor(u), size); - icoord1[ch] = REMAINDER(icoord0[ch] + 1, size); - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.0F, 1.0F); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.0F, 1.0F); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - { - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], min, max); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - } - break;; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - for (ch = 0; ch < 4; ch++) { - const int flr = util_ifloor(s[ch]); - float u; - if (flr & 1) - u = 1.0F - (s[ch] - (float) flr); - else - u = s[ch] - (float) flr; - u = u * size - 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u >= 1.0F) - u = (float) size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u >= 1.0F) - u = (float) size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - { - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u <= min) - u = min * size; - else if (u >= max) - u = max * size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - } - break;; - default: - assert(0); - } -} - - -/** - * For RECT textures / unnormalized texcoords - * Only a subset of wrap modes supported. - */ -static INLINE void -nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord[4]) -{ - uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - int i = util_ifloor(s[ch]); - icoord[ch]= CLAMP(i, 0, (int) size-1); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - /* fall-through */ - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - for (ch = 0; ch < 4; ch++) { - icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) ); - } - return; - default: - assert(0); - } -} - - -/** - * For RECT textures / unnormalized texcoords. - * Only a subset of wrap modes supported. - */ -static INLINE void -linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord0[4], int icoord1[4], float w[4]) -{ - uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - /* Not exactly what the spec says, but it matches NVIDIA output */ - float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f); - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - /* fall-through */ - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F); - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord1[ch] > (int) size - 1) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break; - default: - assert(0); - } -} - - -static unsigned -choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) -{ - /* - major axis - direction target sc tc ma - ---------- ------------------------------- --- --- --- - +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx - -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx - +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry - -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry - +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz - -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz - */ - const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz); - unsigned face; - float sc, tc, ma; - - if (arx > ary && arx > arz) { - if (rx >= 0.0F) { - face = PIPE_TEX_FACE_POS_X; - sc = -rz; - tc = -ry; - ma = arx; - } - else { - face = PIPE_TEX_FACE_NEG_X; - sc = rz; - tc = -ry; - ma = arx; - } - } - else if (ary > arx && ary > arz) { - if (ry >= 0.0F) { - face = PIPE_TEX_FACE_POS_Y; - sc = rx; - tc = rz; - ma = ary; - } - else { - face = PIPE_TEX_FACE_NEG_Y; - sc = rx; - tc = -rz; - ma = ary; - } - } - else { - if (rz > 0.0F) { - face = PIPE_TEX_FACE_POS_Z; - sc = rx; - tc = -ry; - ma = arz; - } - else { - face = PIPE_TEX_FACE_NEG_Z; - sc = -rx; - tc = -ry; - ma = arz; - } - } - - *newS = ( sc / ma + 1.0F ) * 0.5F; - *newT = ( tc / ma + 1.0F ) * 0.5F; - - return face; -} - - -/** - * Examine the quad's texture coordinates to compute the partial - * derivatives w.r.t X and Y, then compute lambda (level of detail). - * - * This is only done for fragment shaders, not vertex shaders. - */ -static float -compute_lambda(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - float rho, lambda; - - if (samp->processor == TGSI_PROCESSOR_VERTEX) - return lodbias; - - assert(sampler->normalized_coords); - - assert(s); - { - float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]; - float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; - dsdx = fabsf(dsdx); - dsdy = fabsf(dsdy); - rho = MAX2(dsdx, dsdy) * texture->width0; - } - if (t) { - float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; - float dtdy = t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]; - float max; - dtdx = fabsf(dtdx); - dtdy = fabsf(dtdy); - max = MAX2(dtdx, dtdy) * texture->height0; - rho = MAX2(rho, max); - } - if (p) { - float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]; - float dpdy = p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]; - float max; - dpdx = fabsf(dpdx); - dpdy = fabsf(dpdy); - max = MAX2(dpdx, dpdy) * texture->depth0; - rho = MAX2(rho, max); - } - - lambda = util_fast_log2(rho); - lambda += lodbias + sampler->lod_bias; - lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); - - return lambda; -} - - -/** - * Do several things here: - * 1. Compute lambda from the texcoords, if needed - * 2. Determine if we're minifying or magnifying - * 3. If minifying, choose mipmap levels - * 4. Return image filter to use within mipmap images - * \param level0 Returns first mipmap level to sample from - * \param level1 Returns second mipmap level to sample from - * \param levelBlend Returns blend factor between levels, in [0,1] - * \param imgFilter Returns either the min or mag filter, depending on lambda - */ -static void -choose_mipmap_levels(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - unsigned *level0, unsigned *level1, float *levelBlend, - unsigned *imgFilter) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { - /* no mipmap selection needed */ - *level0 = *level1 = CLAMP((int) sampler->min_lod, - 0, (int) texture->last_level); - - if (sampler->min_img_filter != sampler->mag_img_filter) { - /* non-mipmapped texture, but still need to determine if doing - * minification or magnification. - */ - float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); - if (lambda <= 0.0) { - *imgFilter = sampler->mag_img_filter; - } - else { - *imgFilter = sampler->min_img_filter; - } - } - else { - *imgFilter = sampler->mag_img_filter; - } - } - else { - float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); - - if (lambda <= 0.0) { /* XXX threshold depends on the filter */ - /* magnifying */ - *imgFilter = sampler->mag_img_filter; - *level0 = *level1 = 0; - } - else { - /* minifying */ - *imgFilter = sampler->min_img_filter; - - /* choose mipmap level(s) and compute the blend factor between them */ - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { - /* Nearest mipmap level */ - const int lvl = (int) (lambda + 0.5); - *level0 = - *level1 = CLAMP(lvl, 0, (int) texture->last_level); - } - else { - /* Linear interpolation between mipmap levels */ - const int lvl = (int) lambda; - *level0 = CLAMP(lvl, 0, (int) texture->last_level); - *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level); - *levelBlend = FRAC(lambda); /* blending weight between levels */ - } - } - } -} - - -/** - * Get a texel from a texture, using the texture tile cache. - * - * \param face the cube face in 0..5 - * \param level the mipmap level - * \param x the x coord of texel within 2D image - * \param y the y coord of texel within 2D image - * \param z which slice of a 3D texture - * \param rgba the quad to put the texel/color into - * \param j which element of the rgba quad to write to - * - * XXX maybe move this into lp_tile_cache.c and merge with the - * lp_get_cached_tile_tex() function. Also, get 4 texels instead of 1... - */ -static void -get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y, - const uint8_t *out[4]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - - const struct llvmpipe_cached_tex_tile *tile - = lp_get_cached_tex_tile(samp->cache, - tex_tile_address(x, y, 0, face, level)); - - y %= TEX_TILE_SIZE; - x %= TEX_TILE_SIZE; - - out[0] = &tile->color[y ][x ][0]; - out[1] = &tile->color[y ][x+1][0]; - out[2] = &tile->color[y+1][x ][0]; - out[3] = &tile->color[y+1][x+1][0]; -} - -static INLINE const uint8_t * -get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - - const struct llvmpipe_cached_tex_tile *tile - = lp_get_cached_tex_tile(samp->cache, - tex_tile_address(x, y, 0, face, level)); - - y %= TEX_TILE_SIZE; - x %= TEX_TILE_SIZE; - - return &tile->color[y][x][0]; -} - - -static void -get_texel_quad_2d_mt(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, - int x0, int y0, - int x1, int y1, - const uint8_t *out[4]) -{ - unsigned i; - - for (i = 0; i < 4; i++) { - unsigned tx = (i & 1) ? x1 : x0; - unsigned ty = (i >> 1) ? y1 : y0; - - out[i] = get_texel_2d_ptr( tgsi_sampler, face, level, tx, ty ); - } -} - -static void -get_texel(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y, int z, - float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - - if (x < 0 || x >= (int) u_minify(texture->width0, level) || - y < 0 || y >= (int) u_minify(texture->height0, level) || - z < 0 || z >= (int) u_minify(texture->depth0, level)) { - rgba[0][j] = sampler->border_color[0]; - rgba[1][j] = sampler->border_color[1]; - rgba[2][j] = sampler->border_color[2]; - rgba[3][j] = sampler->border_color[3]; - } - else { - const unsigned tx = x % TEX_TILE_SIZE; - const unsigned ty = y % TEX_TILE_SIZE; - const struct llvmpipe_cached_tex_tile *tile; - - tile = lp_get_cached_tex_tile(samp->cache, - tex_tile_address(x, y, z, face, level)); - - rgba[0][j] = ubyte_to_float(tile->color[ty][tx][0]); - rgba[1][j] = ubyte_to_float(tile->color[ty][tx][1]); - rgba[2][j] = ubyte_to_float(tile->color[ty][tx][2]); - rgba[3][j] = ubyte_to_float(tile->color[ty][tx][3]); - if (0) - { - debug_printf("Get texel %f %f %f %f from %s\n", - rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j], - pf_name(texture->format)); - } - } -} - - -/** - * Compare texcoord 'p' (aka R) against texture value 'rgba[0]' - * When we sampled the depth texture, the depth value was put into all - * RGBA channels. We look at the red channel here. - * \param rgba quad of (depth) texel values - * \param p texture 'P' components for four pixels in quad - * \param j which pixel in the quad to test [0..3] - */ -static INLINE void -shadow_compare(const struct pipe_sampler_state *sampler, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const float p[QUAD_SIZE], - uint j) -{ - int k; - switch (sampler->compare_func) { - case PIPE_FUNC_LESS: - k = p[j] < rgba[0][j]; - break; - case PIPE_FUNC_LEQUAL: - k = p[j] <= rgba[0][j]; - break; - case PIPE_FUNC_GREATER: - k = p[j] > rgba[0][j]; - break; - case PIPE_FUNC_GEQUAL: - k = p[j] >= rgba[0][j]; - break; - case PIPE_FUNC_EQUAL: - k = p[j] == rgba[0][j]; - break; - case PIPE_FUNC_NOTEQUAL: - k = p[j] != rgba[0][j]; - break; - case PIPE_FUNC_ALWAYS: - k = 1; - break; - case PIPE_FUNC_NEVER: - k = 0; - break; - default: - k = 0; - assert(0); - break; - } - - /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ - rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k; - rgba[3][j] = 1.0F; -} - - -/** - * As above, but do four z/texture comparisons. - */ -static INLINE void -shadow_compare4(const struct pipe_sampler_state *sampler, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const float p[QUAD_SIZE]) -{ - int j, k0, k1, k2, k3; - float val; - - /* compare four texcoords vs. four texture samples */ - switch (sampler->compare_func) { - case PIPE_FUNC_LESS: - k0 = p[0] < rgba[0][0]; - k1 = p[1] < rgba[0][1]; - k2 = p[2] < rgba[0][2]; - k3 = p[3] < rgba[0][3]; - break; - case PIPE_FUNC_LEQUAL: - k0 = p[0] <= rgba[0][0]; - k1 = p[1] <= rgba[0][1]; - k2 = p[2] <= rgba[0][2]; - k3 = p[3] <= rgba[0][3]; - break; - case PIPE_FUNC_GREATER: - k0 = p[0] > rgba[0][0]; - k1 = p[1] > rgba[0][1]; - k2 = p[2] > rgba[0][2]; - k3 = p[3] > rgba[0][3]; - break; - case PIPE_FUNC_GEQUAL: - k0 = p[0] >= rgba[0][0]; - k1 = p[1] >= rgba[0][1]; - k2 = p[2] >= rgba[0][2]; - k3 = p[3] >= rgba[0][3]; - break; - case PIPE_FUNC_EQUAL: - k0 = p[0] == rgba[0][0]; - k1 = p[1] == rgba[0][1]; - k2 = p[2] == rgba[0][2]; - k3 = p[3] == rgba[0][3]; - break; - case PIPE_FUNC_NOTEQUAL: - k0 = p[0] != rgba[0][0]; - k1 = p[1] != rgba[0][1]; - k2 = p[2] != rgba[0][2]; - k3 = p[3] != rgba[0][3]; - break; - case PIPE_FUNC_ALWAYS: - k0 = k1 = k2 = k3 = 1; - break; - case PIPE_FUNC_NEVER: - k0 = k1 = k2 = k3 = 0; - break; - default: - k0 = k1 = k2 = k3 = 0; - assert(0); - break; - } - - /* convert four pass/fail values to an intensity in [0,1] */ - val = 0.25F * (k0 + k1 + k2 + k3); - - /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ - for (j = 0; j < 4; j++) { - rgba[0][j] = rgba[1][j] = rgba[2][j] = val; - rgba[3][j] = 1.0F; - } -} - - - -static void -lp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - unsigned j; - unsigned level = samp->level; - unsigned xpot = 1 << (samp->xpot - level); - unsigned ypot = 1 << (samp->ypot - level); - unsigned xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */ - unsigned ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */ - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - - float u = s[j] * xpot - 0.5F; - float v = t[j] * ypot - 0.5F; - - int uflr = util_ifloor(u); - int vflr = util_ifloor(v); - - float xw = u - (float)uflr; - float yw = v - (float)vflr; - - int x0 = uflr & (xpot - 1); - int y0 = vflr & (ypot - 1); - - const uint8_t *tx[4]; - - - /* Can we fetch all four at once: - */ - if (x0 < xmax && y0 < ymax) - { - get_texel_quad_2d(tgsi_sampler, 0, level, x0, y0, tx); - } - else - { - unsigned x1 = (x0 + 1) & (xpot - 1); - unsigned y1 = (y0 + 1) & (ypot - 1); - get_texel_quad_2d_mt(tgsi_sampler, 0, level, - x0, y0, x1, y1, tx); - } - - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw, yw, - ubyte_to_float(tx[0][c]), ubyte_to_float(tx[1][c]), - ubyte_to_float(tx[2][c]), ubyte_to_float(tx[3][c])); - } - } -} - - -static void -lp_get_samples_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - unsigned j; - unsigned level = samp->level; - unsigned xpot = 1 << (samp->xpot - level); - unsigned ypot = 1 << (samp->ypot - level); - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - - float u = s[j] * xpot; - float v = t[j] * ypot; - - int uflr = util_ifloor(u); - int vflr = util_ifloor(v); - - int x0 = uflr & (xpot - 1); - int y0 = vflr & (ypot - 1); - - const uint8_t *out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0); - - for (c = 0; c < 4; c++) { - rgba[c][j] = ubyte_to_float(out[c]); - } - } -} - - -static void -lp_get_samples_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - unsigned j; - unsigned level = samp->level; - unsigned xpot = 1 << (samp->xpot - level); - unsigned ypot = 1 << (samp->ypot - level); - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - - float u = s[j] * xpot; - float v = t[j] * ypot; - - int x0, y0; - const uint8_t *out; - - x0 = util_ifloor(u); - if (x0 < 0) - x0 = 0; - else if (x0 > xpot - 1) - x0 = xpot - 1; - - y0 = util_ifloor(v); - if (y0 < 0) - y0 = 0; - else if (y0 > ypot - 1) - y0 = ypot - 1; - - out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0); - - for (c = 0; c < 4; c++) { - rgba[c][j] = ubyte_to_float(out[c]); - } - } -} - - -static void -lp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - int level0; - float lambda; - - lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); - level0 = (int)lambda; - - if (lambda < 0.0) { - samp->level = 0; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba ); - } - else if (level0 >= texture->last_level) { - samp->level = texture->last_level; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba ); - } - else { - float levelBlend = lambda - level0; - float rgba0[4][4]; - float rgba1[4][4]; - int c,j; - - samp->level = level0; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba0 ); - - samp->level = level0+1; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba1 ); - - for (j = 0; j < QUAD_SIZE; j++) { - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]); - } - } - } -} - -/** - * Common code for sampling 1D/2D/cube textures. - * Could probably extend for 3D... - */ -static void -lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const unsigned faces[4]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - unsigned level0, level1, j, imgFilter; - int width, height; - float levelBlend; - - choose_mipmap_levels(tgsi_sampler, s, t, p, - lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - assert(sampler->normalized_coords); - - width = u_minify(texture->width0, level0); - height = u_minify(texture->height0, level0); - - assert(width > 0); - - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4]; - nearest_texcoord_4(sampler->wrap_s, s, width, x); - nearest_texcoord_4(sampler->wrap_t, t, height, y); - - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare(sampler, rgba, p, j); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - unsigned c; - x[j] /= 2; - y[j] /= 2; - get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0, - rgba2, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ - shadow_compare(sampler, rgba2, p, j); - } - - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], y0[4], x1[4], y1[4]; - float xw[4], yw[4]; /* weights */ - - linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); - - for (j = 0; j < QUAD_SIZE; j++) { - float tx[4][4]; /* texels */ - int c; - get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare4(sampler, tx, p); - } - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], - tx[c][2], tx[c][3]); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - - /* XXX: This is incorrect -- will often end up with (x0 - * == x1 && y0 == y1), meaning that we fetch the same - * texel four times and linearly interpolate between - * identical values. The correct approach would be to - * call linear_texcoord again for the second level. - */ - x0[j] /= 2; - y0[j] /= 2; - x1[j] /= 2; - y1[j] /= 2; - get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ - shadow_compare4(sampler, tx, p); - } - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba2[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], tx[c][2], tx[c][3]); - } - - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; - default: - assert(0); - } -} - - -static INLINE void -lp_get_samples_1d(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - static const unsigned faces[4] = {0, 0, 0, 0}; - static const float tzero[4] = {0, 0, 0, 0}; - lp_get_samples_2d_common(sampler, s, tzero, NULL, - lodbias, rgba, faces); -} - - -static INLINE void -lp_get_samples_2d(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - static const unsigned faces[4] = {0, 0, 0, 0}; - lp_get_samples_2d_common(sampler, s, t, p, - lodbias, rgba, faces); -} - - -static INLINE void -lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - /* get/map pipe_surfaces corresponding to 3D tex slices */ - unsigned level0, level1, j, imgFilter; - int width, height, depth; - float levelBlend; - const uint face = 0; - - choose_mipmap_levels(tgsi_sampler, s, t, p, - lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - assert(sampler->normalized_coords); - - width = u_minify(texture->width0, level0); - height = u_minify(texture->height0, level0); - depth = u_minify(texture->depth0, level0); - - assert(width > 0); - assert(height > 0); - assert(depth > 0); - - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4], z[4]; - nearest_texcoord_4(sampler->wrap_s, s, width, x); - nearest_texcoord_4(sampler->wrap_t, t, height, y); - nearest_texcoord_4(sampler->wrap_r, p, depth, z); - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j); - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - unsigned c; - x[j] /= 2; - y[j] /= 2; - z[j] /= 2; - get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j); - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]); - } - } - } - } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4]; - float xw[4], yw[4], zw[4]; /* interpolation weights */ - linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); - linear_texcoord_4(sampler->wrap_r, p, depth, z0, z1, zw); - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - float tx0[4][4], tx1[4][4]; - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3); - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3); - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j], - tx0[c][0], tx0[c][1], - tx0[c][2], tx0[c][3], - tx1[c][0], tx1[c][1], - tx1[c][2], tx1[c][3]); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - x0[j] /= 2; - y0[j] /= 2; - z0[j] /= 2; - x1[j] /= 2; - y1[j] /= 2; - z1[j] /= 2; - get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0); - get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1); - get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2); - get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3); - get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0); - get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1); - get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2); - get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3); - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j], - tx0[c][0], tx0[c][1], - tx0[c][2], tx0[c][3], - tx1[c][0], tx1[c][1], - tx1[c][2], tx1[c][3]); - } - - /* blend mipmap levels */ - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; - default: - assert(0); - } -} - - -static void -lp_get_samples_cube(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - unsigned faces[QUAD_SIZE], j; - float ssss[4], tttt[4]; - for (j = 0; j < QUAD_SIZE; j++) { - faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j); - } - lp_get_samples_2d_common(sampler, ssss, tttt, NULL, - lodbias, rgba, faces); -} - - -static void -lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - const uint face = 0; - unsigned level0, level1, j, imgFilter; - int width, height; - float levelBlend; - - choose_mipmap_levels(tgsi_sampler, s, t, p, - lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - /* texture RECTS cannot be mipmapped */ - assert(level0 == level1); - - width = u_minify(texture->width0, level0); - height = u_minify(texture->height0, level0); - - assert(width > 0); - - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4]; - nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x); - nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y); - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare(sampler, rgba, p, j); - } - } - } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], y0[4], x1[4], y1[4]; - float xw[4], yw[4]; /* weights */ - linear_texcoord_unnorm_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw); - for (j = 0; j < QUAD_SIZE; j++) { - float tx[4][4]; /* texels */ - int c; - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare4(sampler, tx, p); - } - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], tx[c][2], tx[c][3]); - } - } - } - break; - default: - assert(0); - } -} - - -/** - * Error condition handler - */ -static INLINE void -lp_get_samples_null(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - int i,j; - - for (i = 0; i < 4; i++) - for (j = 0; j < 4; j++) - rgba[i][j] = 1.0; -} - -/** - * Called via tgsi_sampler::get_samples() when using a sampler for the - * first time. Determine the actual sampler function, link it in and - * call it. - */ -void -lp_get_samples(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - - /* Default to the 'undefined' case: - */ - tgsi_sampler->get_samples = lp_get_samples_null; - - if (!texture) { - assert(0); /* is this legal?? */ - goto out; - } - - if (!sampler->normalized_coords) { - assert (texture->target == PIPE_TEXTURE_2D); - tgsi_sampler->get_samples = lp_get_samples_rect; - goto out; - } - - switch (texture->target) { - case PIPE_TEXTURE_1D: - tgsi_sampler->get_samples = lp_get_samples_1d; - break; - case PIPE_TEXTURE_2D: - tgsi_sampler->get_samples = lp_get_samples_2d; - break; - case PIPE_TEXTURE_3D: - tgsi_sampler->get_samples = lp_get_samples_3d; - break; - case PIPE_TEXTURE_CUBE: - tgsi_sampler->get_samples = lp_get_samples_cube; - break; - default: - assert(0); - break; - } - - /* Do this elsewhere: - */ - samp->xpot = util_unsigned_logbase2( samp->texture->width0 ); - samp->ypot = util_unsigned_logbase2( samp->texture->height0 ); - - /* Try to hook in a faster sampler. Ultimately we'll have to - * code-generate these. Luckily most of this looks like it is - * orthogonal state within the sampler. - */ - if (texture->target == PIPE_TEXTURE_2D && - sampler->min_img_filter == sampler->mag_img_filter && - sampler->wrap_s == sampler->wrap_t && - sampler->compare_mode == FALSE && - sampler->normalized_coords) - { - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { - samp->level = CLAMP((int) sampler->min_lod, - 0, (int) texture->last_level); - - if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) { - switch (sampler->min_img_filter) { - case PIPE_TEX_FILTER_NEAREST: - tgsi_sampler->get_samples = lp_get_samples_2d_nearest_repeat_POT; - break; - case PIPE_TEX_FILTER_LINEAR: - tgsi_sampler->get_samples = lp_get_samples_2d_linear_repeat_POT; - break; - default: - break; - } - } - else if (sampler->wrap_s == PIPE_TEX_WRAP_CLAMP) { - switch (sampler->min_img_filter) { - case PIPE_TEX_FILTER_NEAREST: - tgsi_sampler->get_samples = lp_get_samples_2d_nearest_clamp_POT; - break; - default: - break; - } - } - } - else if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) { - switch (sampler->min_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - tgsi_sampler->get_samples = lp_get_samples_2d_linear_mip_linear_repeat_POT; - break; - default: - break; - } - } - } - } - else if (0) { - _debug_printf("target %d/%d min_mip %d/%d min_img %d/%d wrap %d/%d compare %d/%d norm %d/%d\n", - texture->target, PIPE_TEXTURE_2D, - sampler->min_mip_filter, PIPE_TEX_MIPFILTER_NONE, - sampler->min_img_filter, sampler->mag_img_filter, - sampler->wrap_s, sampler->wrap_t, - sampler->compare_mode, FALSE, - sampler->normalized_coords, TRUE); - } - -out: - tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba ); -} - - -void PIPE_CDECL -lp_fetch_texel_soa( struct tgsi_sampler **samplers, - uint32_t unit, - float *store ) -{ - struct tgsi_sampler *sampler = samplers[unit]; - -#if 0 - uint j; - - debug_printf("%s sampler: %p (%p) store: %p\n", - __FUNCTION__, - sampler, *sampler, - store ); - - debug_printf("lodbias %f\n", store[12]); - - for (j = 0; j < 4; j++) - debug_printf("sample %d texcoord %f %f\n", - j, - store[0+j], - store[4+j]); -#endif - - { - float rgba[NUM_CHANNELS][QUAD_SIZE]; - sampler->get_samples(sampler, - &store[0], - &store[4], - &store[8], - 0.0f, /*store[12], lodbias */ - rgba); - memcpy(store, rgba, sizeof rgba); - } - -#if 0 - for (j = 0; j < 4; j++) - debug_printf("sample %d result %f %f %f %f\n", - j, - store[0+j], - store[4+j], - store[8+j], - store[12+j]); -#endif -} - - -#include "lp_bld_type.h" -#include "lp_bld_intr.h" -#include "lp_bld_tgsi.h" - - -struct lp_c_sampler_soa -{ - struct lp_build_sampler_soa base; - - LLVMValueRef context_ptr; - - LLVMValueRef samplers_ptr; - - /** Coords/texels store */ - LLVMValueRef store_ptr; -}; - - -static void -lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) -{ - FREE(sampler); -} - - -static void -lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler, - LLVMBuilderRef builder, - struct lp_type type, - unsigned unit, - unsigned num_coords, - const LLVMValueRef *coords, - LLVMValueRef lodbias, - LLVMValueRef *texel) -{ - struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler; - LLVMTypeRef vec_type = LLVMTypeOf(coords[0]); - LLVMValueRef args[3]; - unsigned i; - - if(!sampler->samplers_ptr) - sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr); - - if(!sampler->store_ptr) - sampler->store_ptr = LLVMBuildArrayAlloca(builder, - vec_type, - LLVMConstInt(LLVMInt32Type(), 4, 0), - "texel_store"); - - for (i = 0; i < num_coords; i++) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); - LLVMBuildStore(builder, coords[i], coord_ptr); - } - - args[0] = sampler->samplers_ptr; - args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0); - args[2] = sampler->store_ptr; - - lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3); - - for (i = 0; i < NUM_CHANNELS; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); - texel[i] = LLVMBuildLoad(builder, texel_ptr, ""); - } -} - - -struct lp_build_sampler_soa * -lp_c_sampler_soa_create(LLVMValueRef context_ptr) -{ - struct lp_c_sampler_soa *sampler; - - sampler = CALLOC_STRUCT(lp_c_sampler_soa); - if(!sampler) - return NULL; - - sampler->base.destroy = lp_c_sampler_soa_destroy; - sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel; - sampler->context_ptr = context_ptr; - - return &sampler->base; -} - diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h index 040b01865d..19d00b58d3 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h @@ -29,7 +29,7 @@ #define LP_TILE_SOA_H #include "pipe/p_compiler.h" -#include "tgsi/tgsi_exec.h" // for NUM_CHANNELS +#include "tgsi/tgsi_exec.h" /* for NUM_CHANNELS */ #ifdef __cplusplus diff --git a/src/gallium/drivers/llvmpipe/lp_winsys.h b/src/gallium/drivers/llvmpipe/lp_winsys.h index 595481c2cb..74b472b653 100644 --- a/src/gallium/drivers/llvmpipe/lp_winsys.h +++ b/src/gallium/drivers/llvmpipe/lp_winsys.h @@ -35,7 +35,7 @@ #define LP_WINSYS_H -#include "pipe/p_compiler.h" // for boolean +#include "pipe/p_compiler.h" /* for boolean */ #include "pipe/p_format.h" diff --git a/src/gallium/drivers/nouveau/nouveau_push.h b/src/gallium/drivers/nouveau/nouveau_push.h deleted file mode 100644 index 9c235080a5..0000000000 --- a/src/gallium/drivers/nouveau/nouveau_push.h +++ /dev/null @@ -1,93 +0,0 @@ -#ifndef __NOUVEAU_PUSH_H__ -#define __NOUVEAU_PUSH_H__ - -#include "nouveau/nouveau_winsys.h" - -#ifndef NOUVEAU_PUSH_CONTEXT -#error undefined push context -#endif - -#define OUT_RING(data) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - (*pc->base.channel->pushbuf->cur++) = (data); \ -} while(0) - -#define OUT_RINGp(src,size) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - memcpy(pc->base.channel->pushbuf->cur, (src), (size) * 4); \ - pc->base.channel->pushbuf->cur += (size); \ -} while(0) - -#define OUT_RINGf(data) do { \ - union { float v; uint32_t u; } c; \ - c.v = (data); \ - OUT_RING(c.u); \ -} while(0) - -#define BEGIN_RING(obj,mthd,size) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - struct nouveau_channel *chan = pc->base.channel; \ - if (chan->pushbuf->remaining < ((size) + 1)) \ - nouveau_pushbuf_flush(chan, ((size) + 1)); \ - OUT_RING((pc->obj->subc << 13) | ((size) << 18) | (mthd)); \ - chan->pushbuf->remaining -= ((size) + 1); \ -} while(0) - -#define BEGIN_RING_NI(obj,mthd,size) do { \ - BEGIN_RING(obj, (mthd) | 0x40000000, (size)); \ -} while(0) - -static inline void -DO_FIRE_RING(struct nouveau_channel *chan, struct pipe_fence_handle **fence) -{ - nouveau_pushbuf_flush(chan, 0); - if (fence) - *fence = NULL; -} - -#define FIRE_RING(fence) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - DO_FIRE_RING(pc->base.channel, fence); \ -} while(0) - -#define OUT_RELOC(bo,data,flags,vor,tor) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - struct nouveau_channel *chan = pc->base.channel; \ - nouveau_pushbuf_emit_reloc(chan, chan->pushbuf->cur++, nouveau_bo(bo), \ - (data), 0, (flags), (vor), (tor)); \ -} while(0) - -/* Raw data + flags depending on FB/TT buffer */ -#define OUT_RELOCd(bo,data,flags,vor,tor) do { \ - OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor)); \ -} while(0) - -/* FB/TT object handle */ -#define OUT_RELOCo(bo,flags) do { \ - OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR, \ - pc->base.channel->vram->handle, \ - pc->base.channel->gart->handle); \ -} while(0) - -/* Low 32-bits of offset */ -#define OUT_RELOCl(bo,delta,flags) do { \ - OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0); \ -} while(0) - -/* High 32-bits of offset */ -#define OUT_RELOCh(bo,delta,flags) do { \ - OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \ -} while(0) - -/* A reloc which'll recombine into a NV_DMA_METHOD packet header */ -#define OUT_RELOCm(bo, flags, obj, mthd, size) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - struct nouveau_channel *chan = pc->base.channel; \ - if (chan->pushbuf->remaining < ((size) + 1)) \ - nouveau_pushbuf_flush(chan, ((size) + 1)); \ - OUT_RELOCd((bo), (pc->obj->subc << 13) | ((size) << 18) | (mthd), \ - (flags), 0, 0); \ - chan->pushbuf->remaining -= ((size) + 1); \ -} while(0) - -#endif diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index e4cf91c005..7ebc94ed6c 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -31,7 +31,7 @@ nouveau_screen_bo_skel(struct pipe_screen *pscreen, struct nouveau_bo *bo, unsigned alignment, unsigned usage, unsigned size) { struct pipe_buffer *pb; - + pb = CALLOC(1, sizeof(struct pipe_buffer)+sizeof(struct nouveau_bo *)); if (!pb) { nouveau_bo_ref(NULL, &bo); @@ -127,8 +127,18 @@ nouveau_screen_bo_map(struct pipe_screen *pscreen, struct pipe_buffer *pb, unsigned usage) { struct nouveau_bo *bo = nouveau_bo(pb); + struct nouveau_screen *nscreen = nouveau_screen(pscreen); int ret; + if (nscreen->pre_pipebuffer_map_callback) { + ret = nscreen->pre_pipebuffer_map_callback(pscreen, pb, usage); + if (ret) { + debug_printf("pre_pipebuffer_map_callback failed %d\n", + ret); + return NULL; + } + } + ret = nouveau_bo_map(bo, nouveau_screen_map_flags(usage)); if (ret) { debug_printf("map failed: %d\n", ret); @@ -143,11 +153,22 @@ nouveau_screen_bo_map_range(struct pipe_screen *pscreen, struct pipe_buffer *pb, unsigned offset, unsigned length, unsigned usage) { struct nouveau_bo *bo = nouveau_bo(pb); + struct nouveau_screen *nscreen = nouveau_screen(pscreen); uint32_t flags = nouveau_screen_map_flags(usage); int ret; + if (nscreen->pre_pipebuffer_map_callback) { + ret = nscreen->pre_pipebuffer_map_callback(pscreen, pb, usage); + if (ret) { + debug_printf("pre_pipebuffer_map_callback failed %d\n", + ret); + return NULL; + } + } + ret = nouveau_bo_map_range(bo, offset, length, flags); if (ret) { + nouveau_bo_unmap(bo); if (!(flags & NOUVEAU_BO_NOWAIT) || ret != -EBUSY) debug_printf("map_range failed: %d\n", ret); return NULL; @@ -239,5 +260,6 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) void nouveau_screen_fini(struct nouveau_screen *screen) { + nouveau_channel_free(&screen->channel); } diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index ebfc67ad1c..a7927d88df 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -5,6 +5,9 @@ struct nouveau_screen { struct pipe_screen base; struct nouveau_device *device; struct nouveau_channel *channel; + + int (*pre_pipebuffer_map_callback) (struct pipe_screen *pscreen, + struct pipe_buffer *pb, unsigned usage); }; static inline struct nouveau_screen * diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h index 9aee9e4956..e844f6abb3 100644 --- a/src/gallium/drivers/nouveau/nouveau_stateobj.h +++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h @@ -3,41 +3,95 @@ #include "util/u_debug.h" +#ifdef DEBUG +#define DEBUG_NOUVEAU_STATEOBJ +#endif /* DEBUG */ + struct nouveau_stateobj_reloc { struct nouveau_bo *bo; - unsigned offset; - unsigned packet; + struct nouveau_grobj *gr; + uint32_t push_offset; + uint32_t mthd; - unsigned data; + uint32_t data; unsigned flags; unsigned vor; unsigned tor; }; +struct nouveau_stateobj_start { + struct nouveau_grobj *gr; + uint32_t mthd; + uint32_t size; + unsigned offset; +}; + struct nouveau_stateobj { struct pipe_reference reference; - unsigned *push; + struct nouveau_stateobj_start *start; struct nouveau_stateobj_reloc *reloc; - unsigned *cur; - unsigned cur_packet; + /* Common memory pool for data. */ + uint32_t *pool; + unsigned pool_cur; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + unsigned start_alloc; + unsigned reloc_alloc; + unsigned pool_alloc; +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + unsigned total; /* includes begin_ring */ + unsigned cur; /* excludes begin_ring, offset from "cur_start" */ + unsigned cur_start; unsigned cur_reloc; }; +static INLINE void +so_dump(struct nouveau_stateobj *so) +{ + unsigned i, nr, total = 0; + + for (i = 0; i < so->cur_start; i++) { + if (so->start[i].gr->subc > -1) + debug_printf("+0x%04x: 0x%08x\n", total++, + (so->start[i].size << 18) | (so->start[i].gr->subc << 13) + | so->start[i].mthd); + else + debug_printf("+0x%04x: 0x%08x\n", total++, + (so->start[i].size << 18) | so->start[i].mthd); + for (nr = 0; nr < so->start[i].size; nr++, total++) + debug_printf("+0x%04x: 0x%08x\n", total, + so->pool[so->start[i].offset + nr]); + } +} + static INLINE struct nouveau_stateobj * -so_new(unsigned push, unsigned reloc) +so_new(unsigned start, unsigned push, unsigned reloc) { struct nouveau_stateobj *so; so = MALLOC(sizeof(struct nouveau_stateobj)); pipe_reference_init(&so->reference, 1); - so->push = MALLOC(sizeof(unsigned) * push); - so->reloc = MALLOC(sizeof(struct nouveau_stateobj_reloc) * reloc); + so->total = so->cur = so->cur_start = so->cur_reloc = 0; - so->cur = so->push; - so->cur_reloc = so->cur_packet = 0; +#ifdef DEBUG_NOUVEAU_STATEOBJ + so->start_alloc = start; + so->reloc_alloc = reloc; + so->pool_alloc = push; +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + so->start = MALLOC(start * sizeof(struct nouveau_stateobj_start)); + so->reloc = MALLOC(reloc * sizeof(struct nouveau_stateobj_reloc)); + so->pool = MALLOC(push * sizeof(uint32_t)); + so->pool_cur = 0; + + if (!so->start || !so->reloc || !so->pool) { + debug_printf("malloc failed\n"); + assert(0); + } return so; } @@ -48,63 +102,128 @@ so_ref(struct nouveau_stateobj *ref, struct nouveau_stateobj **pso) struct nouveau_stateobj *so = *pso; int i; - if (pipe_reference(&(*pso)->reference, &ref->reference)) { - free(so->push); + if (pipe_reference(&(*pso)->reference, &ref->reference)) { + FREE(so->start); for (i = 0; i < so->cur_reloc; i++) nouveau_bo_ref(NULL, &so->reloc[i].bo); - free(so->reloc); - free(so); + FREE(so->reloc); + FREE(so->pool); + FREE(so); } *pso = ref; } static INLINE void -so_data(struct nouveau_stateobj *so, unsigned data) +so_data(struct nouveau_stateobj *so, uint32_t data) { - (*so->cur++) = (data); - so->cur_packet += 4; +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->cur >= so->start[so->cur_start - 1].size) { + debug_printf("exceeding specified size\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + so->pool[so->start[so->cur_start - 1].offset + so->cur++] = data; } static INLINE void -so_datap(struct nouveau_stateobj *so, unsigned *data, unsigned size) +so_datap(struct nouveau_stateobj *so, uint32_t *data, unsigned size) { - so->cur_packet += (4 * size); +#ifdef DEBUG_NOUVEAU_STATEOBJ + if ((so->cur + size) > so->start[so->cur_start - 1].size) { + debug_printf("exceeding specified size\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + while (size--) - (*so->cur++) = (*data++); + so->pool[so->start[so->cur_start - 1].offset + so->cur++] = + *data++; } static INLINE void so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr, unsigned mthd, unsigned size) { - so->cur_packet = (gr->subc << 13) | (1 << 18) | (mthd - 4); - so_data(so, (gr->subc << 13) | (size << 18) | mthd); + struct nouveau_stateobj_start *start; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->start_alloc <= so->cur_start) { + debug_printf("exceeding num_start size\n"); + assert(0); + } else +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + start = so->start; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->cur_start > 0 && start[so->cur_start - 1].size > so->cur) { + debug_printf("previous so_method was not filled\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + so->start = start; + start[so->cur_start].gr = gr; + start[so->cur_start].mthd = mthd; + start[so->cur_start].size = size; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->pool_alloc < (size + so->pool_cur)) { + debug_printf("exceeding num_pool size\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + start[so->cur_start].offset = so->pool_cur; + so->pool_cur += size; + + so->cur_start++; + /* The 1 is for *this* begin_ring. */ + so->total += so->cur + 1; + so->cur = 0; } static INLINE void so_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo, unsigned data, unsigned flags, unsigned vor, unsigned tor) { - struct nouveau_stateobj_reloc *r = &so->reloc[so->cur_reloc++]; - - r->bo = NULL; - nouveau_bo_ref(bo, &r->bo); - r->offset = so->cur - so->push; - r->packet = so->cur_packet; - r->data = data; - r->flags = flags; - r->vor = vor; - r->tor = tor; + struct nouveau_stateobj_reloc *r; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->reloc_alloc <= so->cur_reloc) { + debug_printf("exceeding num_reloc size\n"); + assert(0); + } else +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + r = so->reloc; + + so->reloc = r; + r[so->cur_reloc].bo = NULL; + nouveau_bo_ref(bo, &(r[so->cur_reloc].bo)); + r[so->cur_reloc].gr = so->start[so->cur_start-1].gr; + r[so->cur_reloc].push_offset = so->total + so->cur; + r[so->cur_reloc].data = data; + r[so->cur_reloc].flags = flags; + r[so->cur_reloc].mthd = so->start[so->cur_start-1].mthd + + (so->cur << 2); + r[so->cur_reloc].vor = vor; + r[so->cur_reloc].tor = tor; + so_data(so, data); + so->cur_reloc++; } -static INLINE void -so_dump(struct nouveau_stateobj *so) +/* Determine if this buffer object is referenced by this state object. */ +static INLINE boolean +so_bo_is_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo) { - unsigned i, nr = so->cur - so->push; + int i; + + for (i = 0; i < so->cur_reloc; i++) + if (so->reloc[i].bo == bo) + return true; - for (i = 0; i < nr; i++) - debug_printf("+0x%04x: 0x%08x\n", i, so->push[i]); + return false; } static INLINE void @@ -114,75 +233,93 @@ so_emit(struct nouveau_channel *chan, struct nouveau_stateobj *so) unsigned nr, i; int ret = 0; - nr = so->cur - so->push; +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->start[so->cur_start - 1].size > so->cur) { + debug_printf("emit: previous so_method was not filled\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + /* We cannot update total in case we so_emit again. */ + nr = so->total + so->cur; + /* This will flush if we need space. * We don't actually need the marker. */ if ((ret = nouveau_pushbuf_marker_emit(chan, nr, so->cur_reloc))) { debug_printf("so_emit failed marker emit with error %d\n", ret); - return; + assert(0); + } + + /* Submit data. This will ensure proper binding of objects. */ + for (i = 0; i < so->cur_start; i++) { + BEGIN_RING(chan, so->start[i].gr, so->start[i].mthd, so->start[i].size); + OUT_RINGp(chan, &(so->pool[so->start[i].offset]), so->start[i].size); } - pb->remaining -= nr; - memcpy(pb->cur, so->push, nr * 4); for (i = 0; i < so->cur_reloc; i++) { struct nouveau_stateobj_reloc *r = &so->reloc[i]; - if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur + r->offset, - r->bo, r->data, 0, r->flags, - r->vor, r->tor))) { + if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur - nr + + r->push_offset, r->bo, r->data, + 0, r->flags, r->vor, r->tor))) { debug_printf("so_emit failed reloc with error %d\n", ret); - goto out; + assert(0); } } -out: - pb->cur += nr; } static INLINE void so_emit_reloc_markers(struct nouveau_channel *chan, struct nouveau_stateobj *so) { struct nouveau_pushbuf *pb = chan->pushbuf; + struct nouveau_grobj *gr = NULL; unsigned i; int ret = 0; if (!so) return; - i = so->cur_reloc << 1; - /* This will flush if we need space. - * We don't actually need the marker. - */ - if ((ret = nouveau_pushbuf_marker_emit(chan, i, i))) { - debug_printf("so_emit_reloc_markers failed marker emit with" \ - "error %d\n", ret); - return; - } - pb->remaining -= i; - + /* If we need to flush in flush notify, then we have a problem anyway. */ for (i = 0; i < so->cur_reloc; i++) { struct nouveau_stateobj_reloc *r = &so->reloc[i]; - if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo, - r->packet, 0, - (r->flags & (NOUVEAU_BO_VRAM | - NOUVEAU_BO_GART | - NOUVEAU_BO_RDWR)) | - NOUVEAU_BO_DUMMY, 0, 0))) { - debug_printf("so_emit_reloc_markers failed reloc" \ - "with error %d\n", ret); - pb->remaining += ((so->cur_reloc - i) << 1); - return; +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (r->mthd & 0x40000000) { + debug_printf("error: NI mthd 0x%08X\n", r->mthd); + continue; } - if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo, - r->data, 0, - r->flags | NOUVEAU_BO_DUMMY, - r->vor, r->tor))) { - debug_printf("so_emit_reloc_markers failed reloc" \ - "with error %d\n", ret); - pb->remaining += ((so->cur_reloc - i) << 1) - 1; - return; +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + /* The object needs to be bound and the system must know the + * subchannel is being used. Otherwise it will discard it. + */ + if (gr != r->gr) { + BEGIN_RING(chan, r->gr, 0x100, 1); + OUT_RING(chan, 0); + gr = r->gr; + } + + /* Some relocs really don't like to be hammered, + * NOUVEAU_BO_DUMMY makes sure it only + * happens when needed. + */ + ret = OUT_RELOC(chan, r->bo, (r->gr->subc << 13) | (1<< 18) | + r->mthd, (r->flags & (NOUVEAU_BO_VRAM | NOUVEAU_BO_GART + | NOUVEAU_BO_RDWR)) | NOUVEAU_BO_DUMMY, 0, 0); + if (ret) { + debug_printf("OUT_RELOC failed %d\n", ret); + assert(0); } + + ret = OUT_RELOC(chan, r->bo, r->data, r->flags | + NOUVEAU_BO_DUMMY, r->vor, r->tor); + if (ret) { + debug_printf("OUT_RELOC failed %d\n", ret); + assert(0); + } + + pb->remaining -= 2; } } diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index 42c77e5e77..4c3e08a43f 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -23,6 +23,9 @@ #define NOUVEAU_BUFFER_USAGE_ZETA (1 << 17) #define NOUVEAU_BUFFER_USAGE_TRANSFER (1 << 18) +/* use along with GPU_WRITE for 2D-only writes */ +#define NOUVEAU_BUFFER_USAGE_NO_RENDER (1 << 19) + extern struct pipe_screen * nv04_screen_create(struct pipe_winsys *ws, struct nouveau_device *); diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c index 4b33636b2e..edd96859cf 100644 --- a/src/gallium/drivers/nv04/nv04_context.c +++ b/src/gallium/drivers/nv04/nv04_context.c @@ -10,10 +10,14 @@ nv04_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv04_context *nv04 = nv04_context(pipe); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; draw_flush(nv04->draw); - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void @@ -30,32 +34,36 @@ nv04_destroy(struct pipe_context *pipe) static boolean nv04_init_hwctx(struct nv04_context *nv04) { + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + // requires a valid handle -// BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOTIFY, 1); +// BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_NOTIFY, 1); // OUT_RING(0); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOP, 1); - OUT_RING(0); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_NOP, 1); + OUT_RING(chan, 0); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); - OUT_RING(0x40182800); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING(chan, 0x40182800); // OUT_RING(1<<20/*no cull*/); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1); // OUT_RING(0x24|(1<<6)|(1<<8)); - OUT_RING(0x120001a4); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FORMAT, 1); - OUT_RING(0x332213a1); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FILTER, 1); - OUT_RING(0x11001010); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_COLORKEY, 1); - OUT_RING(0x0); -// BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 1); + OUT_RING(chan, 0x120001a4); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FORMAT, 1); + OUT_RING(chan, 0x332213a1); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FILTER, 1); + OUT_RING(chan, 0x11001010); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_COLORKEY, 1); + OUT_RING(chan, 0x0); +// BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 1); // OUT_RING(SCREEN_OFFSET); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR, 1); - OUT_RING(0xff000000); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FOGCOLOR, 1); + OUT_RING(chan, 0xff000000); - FIRE_RING (NULL); + FIRE_RING (chan); return TRUE; } diff --git a/src/gallium/drivers/nv04/nv04_context.h b/src/gallium/drivers/nv04/nv04_context.h index 55326c787a..fe3b527423 100644 --- a/src/gallium/drivers/nv04/nv04_context.h +++ b/src/gallium/drivers/nv04/nv04_context.h @@ -15,10 +15,6 @@ #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv04_screen *ctx = nv04->screen -#include "nouveau/nouveau_push.h" - #include "nv04_state.h" #define NOUVEAU_ERR(fmt, args...) \ @@ -141,9 +137,9 @@ extern void nv04_emit_hw_state(struct nv04_context *nv04); extern void nv04_state_tex_update(struct nv04_context *nv04); /* nv04_vbo.c */ -extern boolean nv04_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv04_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv04_draw_elements( struct pipe_context *pipe, +extern void nv04_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count); diff --git a/src/gallium/drivers/nv04/nv04_fragtex.c b/src/gallium/drivers/nv04/nv04_fragtex.c index 0cce71ad1d..c152b52119 100644 --- a/src/gallium/drivers/nv04/nv04_fragtex.c +++ b/src/gallium/drivers/nv04/nv04_fragtex.c @@ -4,7 +4,7 @@ #define _(m,tf) \ { \ PIPE_FORMAT_##m, \ - NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf, \ + NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf, \ } struct nv04_texture_format { @@ -53,14 +53,14 @@ nv04_fragtex_build(struct nv04_context *nv04, int unit) return; } - nv04->fragtex.format = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER - | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER + nv04->fragtex.format = NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER + | NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER | nv04_fragtex_format(pt->format) - | ( (pt->last_level + 1) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT ) - | ( log2i(pt->width0) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT ) - | ( log2i(pt->height0) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT ) - | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE - | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE + | ( (pt->last_level + 1) << NV04_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT ) + | ( log2i(pt->width0) << NV04_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT ) + | ( log2i(pt->height0) << NV04_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT ) + | NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE + | NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE ; } diff --git a/src/gallium/drivers/nv04/nv04_prim_vbuf.c b/src/gallium/drivers/nv04/nv04_prim_vbuf.c index f6458232ae..0b795ea243 100644 --- a/src/gallium/drivers/nv04/nv04_prim_vbuf.c +++ b/src/gallium/drivers/nv04/nv04_prim_vbuf.c @@ -93,33 +93,45 @@ nv04_vbuf_render_set_primitive( struct vbuf_render *render, static INLINE void nv04_2triangles(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5) { - BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA),49); - OUT_RINGp(buffer + VERTEX_SIZE * v0,8); - OUT_RINGp(buffer + VERTEX_SIZE * v1,8); - OUT_RINGp(buffer + VERTEX_SIZE * v2,8); - OUT_RINGp(buffer + VERTEX_SIZE * v3,8); - OUT_RINGp(buffer + VERTEX_SIZE * v4,8); - OUT_RINGp(buffer + VERTEX_SIZE * v5,8); - OUT_RING(0xFEDCBA); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA), 49); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v3,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v4,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v5,8); + OUT_RING(chan, 0xFEDCBA); } static INLINE void nv04_1triangle(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2) { - BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD),25); - OUT_RINGp(buffer + VERTEX_SIZE * v0,8); - OUT_RINGp(buffer + VERTEX_SIZE * v1,8); - OUT_RINGp(buffer + VERTEX_SIZE * v2,8); - OUT_RING(0xFED); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD), 25); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8); + OUT_RING(chan, 0xFED); } static INLINE void nv04_1quad(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3) { - BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC),33); - OUT_RINGp(buffer + VERTEX_SIZE * v0,8); - OUT_RINGp(buffer + VERTEX_SIZE * v1,8); - OUT_RINGp(buffer + VERTEX_SIZE * v2,8); - OUT_RINGp(buffer + VERTEX_SIZE * v3,8); - OUT_RING(0xFECEDC); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC), 33); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v3,8); + OUT_RING(chan, 0xFECEDC); } static void nv04_vbuf_render_triangles_elts(struct nv04_vbuf_render * render, const ushort * indices, uint nr_indices) @@ -156,7 +168,10 @@ static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, con { const uint32_t striptbl[]={0x321210,0x543432,0x765654,0x987876,0xBA9A98,0xDCBCBA,0xFEDEDC}; unsigned char* buffer = render->buffer; - struct nv04_context* nv04 = render->nv04; + struct nv04_context *nv04 = render->nv04; + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; int i,j; for(i = 0; i<nr_indices; i+=14) @@ -166,15 +181,15 @@ static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, con if (numvert<3) break; - BEGIN_RING( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8 ); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8); for(j = 0; j<numvert; j++) - OUT_RINGp( buffer + VERTEX_SIZE * indices [i+j], 8 ); + OUT_RINGp(chan, buffer + VERTEX_SIZE * indices [i+j], 8 ); - BEGIN_RING_NI( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2 ); + BEGIN_RING_NI(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2 ); for(j = 0; j<numtri/2; j++ ) - OUT_RING(striptbl[j]); + OUT_RING(chan, striptbl[j]); if (numtri%2) - OUT_RING(striptbl[numtri/2]&0xFFF); + OUT_RING(chan, striptbl[numtri/2]&0xFFF); } } @@ -182,11 +197,14 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const { const uint32_t fantbl[]={0x320210,0x540430,0x760650,0x980870,0xBA0A90,0xDC0CB0,0xFE0ED0}; unsigned char* buffer = render->buffer; - struct nv04_context* nv04 = render->nv04; + struct nv04_context *nv04 = render->nv04; + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; int i,j; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8); - OUT_RINGp(buffer + VERTEX_SIZE * indices[0], 8); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * indices[0], 8); for(i = 1; i<nr_indices; i+=14) { @@ -195,16 +213,16 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const if (numvert < 3) break; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8); for(j=0;j<numvert;j++) - OUT_RINGp( buffer + VERTEX_SIZE * indices[ i+j ], 8 ); + OUT_RINGp(chan, buffer + VERTEX_SIZE * indices[ i+j ], 8 ); - BEGIN_RING_NI(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2); + BEGIN_RING_NI(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2); for(j = 0; j<numtri/2; j++) - OUT_RING(fantbl[j]); + OUT_RING(chan, fantbl[j]); if (numtri%2) - OUT_RING(fantbl[numtri/2]&0xFFF); + OUT_RING(chan, fantbl[numtri/2]&0xFFF); } } diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c index 170ce3eb7e..7c5b6e8229 100644 --- a/src/gallium/drivers/nv04/nv04_screen.c +++ b/src/gallium/drivers/nv04/nv04_screen.c @@ -119,6 +119,8 @@ nv04_screen_destroy(struct pipe_screen *pscreen) nouveau_grobj_free(&screen->fahrenheit); nv04_surface_2d_takedown(&screen->eng2d); + nouveau_screen_fini(&screen->base); + FREE(pscreen); } @@ -163,10 +165,10 @@ nv04_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) fahrenheit_class = 0; sub3d_class = 0; } else if (dev->chipset >= 0x10) { - fahrenheit_class = NV10_DX5_TEXTURED_TRIANGLE; + fahrenheit_class = NV10_TEXTURED_TRIANGLE; sub3d_class = NV10_CONTEXT_SURFACES_3D; } else { - fahrenheit_class=NV04_DX5_TEXTURED_TRIANGLE; + fahrenheit_class=NV04_TEXTURED_TRIANGLE; sub3d_class = NV04_CONTEXT_SURFACES_3D; } diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c index 871034ad0b..b67f1e16b1 100644 --- a/src/gallium/drivers/nv04/nv04_state.c +++ b/src/gallium/drivers/nv04/nv04_state.c @@ -50,28 +50,28 @@ wrap_mode(unsigned wrap) { switch (wrap) { case PIPE_TEX_WRAP_REPEAT: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT; break; case PIPE_TEX_WRAP_MIRROR_REPEAT: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT; break; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE; break; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER; break; case PIPE_TEX_WRAP_CLAMP: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; break; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: case PIPE_TEX_WRAP_MIRROR_CLAMP: default: NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; } - return ret >> NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT; + return ret >> NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT; } static void * @@ -84,20 +84,20 @@ nv04_sampler_state_create(struct pipe_context *pipe, ss = MALLOC(sizeof(struct nv04_sampler_state)); - ss->format = ((wrap_mode(cso->wrap_s) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) | - (wrap_mode(cso->wrap_t) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT)); + ss->format = ((wrap_mode(cso->wrap_s) << NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) | + (wrap_mode(cso->wrap_t) << NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT)); if (cso->max_anisotropy > 1.0) { - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE; } switch (cso->mag_img_filter) { case PIPE_TEX_FILTER_LINEAR: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR; break; case PIPE_TEX_FILTER_NEAREST: default: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST; break; } @@ -105,14 +105,14 @@ nv04_sampler_state_create(struct pipe_context *pipe, case PIPE_TEX_FILTER_LINEAR: switch (cso->min_mip_filter) { case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; break; case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; break; case PIPE_TEX_MIPFILTER_NONE: default: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR; break; } break; @@ -120,14 +120,14 @@ nv04_sampler_state_create(struct pipe_context *pipe, default: switch (cso->min_mip_filter) { case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; break; case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; break; case PIPE_TEX_MIPFILTER_NONE: default: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST; break; } break; @@ -181,7 +181,7 @@ nv04_rasterizer_state_create(struct pipe_context *pipe, */ rs = MALLOC(sizeof(struct nv04_rasterizer_state)); - rs->blend = cso->flatshade ? NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD; + rs->blend = cso->flatshade ? NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD; return (void *)rs; } @@ -229,16 +229,16 @@ nv04_depth_stencil_alpha_state_create(struct pipe_context *pipe, hw = MALLOC(sizeof(struct nv04_depth_stencil_alpha_state)); hw->control = float_to_ubyte(cso->alpha.ref_value); - hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT ); - hw->control |= cso->alpha.enabled ? NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_TEST_ENABLE : 0; - hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ORIGIN; - hw->control |= cso->depth.enabled ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_SHIFT) : 0; - hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT ); - hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module - hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE; - hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE; - hw->control |= cso->depth.writemask ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_SHIFT) : 0; - hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format + hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT ); + hw->control |= cso->alpha.enabled ? NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_ENABLE : 0; + hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_ORIGIN; + hw->control |= cso->depth.enabled ? NV04_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE : 0; + hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT ); + hw->control |= 1 << NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module + hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE; + hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE; + hw->control |= cso->depth.writemask ? NV04_TEXTURED_TRIANGLE_CONTROL_Z_WRITE : 0; + hw->control |= 1 << NV04_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format return (void *)hw; } @@ -377,7 +377,7 @@ nv04_set_scissor_state(struct pipe_context *pipe, /* struct nv04_context *nv04 = nv04_context(pipe); // XXX - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2); OUT_RING (((s->maxx - s->minx) << 16) | s->minx); OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/ } diff --git a/src/gallium/drivers/nv04/nv04_state_emit.c b/src/gallium/drivers/nv04/nv04_state_emit.c index eb2c1c57c6..b8d6dc560f 100644 --- a/src/gallium/drivers/nv04/nv04_state_emit.c +++ b/src/gallium/drivers/nv04/nv04_state_emit.c @@ -57,13 +57,19 @@ static uint32_t nv04_blend_func(uint32_t f) static void nv04_emit_control(struct nv04_context* nv04) { uint32_t control = nv04->dsa->control; + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); - OUT_RING(control); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING(chan, control); } static void nv04_emit_blend(struct nv04_context* nv04) { + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; uint32_t blend; blend=0x4; // texture MODULATE_ALPHA @@ -75,19 +81,23 @@ static void nv04_emit_blend(struct nv04_context* nv04) blend|=(nv04_blend_func(nv04->blend->b_src)<<24); blend|=(nv04_blend_func(nv04->blend->b_dst)<<28); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1); - OUT_RING(blend); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1); + OUT_RING(chan, blend); } static void nv04_emit_sampler(struct nv04_context *nv04, int unit) { struct nv04_miptree *nv04mt = nv04->tex_miptree[unit]; struct pipe_texture *pt = &nv04mt->base; - - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 3); - OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); - OUT_RING(nv04->sampler[unit]->filter); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + struct nouveau_bo *bo = nouveau_bo(nv04mt->buffer); + + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 3); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(chan, bo, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RING(chan, nv04->sampler[unit]->filter); } static void nv04_state_emit_framebuffer(struct nv04_context* nv04) @@ -97,6 +107,10 @@ static void nv04_state_emit_framebuffer(struct nv04_context* nv04) uint32_t rt_format, w, h; int colour_format = 0, zeta_format = 0; struct nv04_miptree *nv04mt = 0; + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *context_surfaces_3d = screen->context_surfaces_3d; + struct nouveau_bo *bo; w = fb->cbufs[0]->width; h = fb->cbufs[0]->height; @@ -128,24 +142,29 @@ static void nv04_state_emit_framebuffer(struct nv04_context* nv04) assert(0); } - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1); - OUT_RING(rt_format); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1); + OUT_RING(chan, rt_format); nv04mt = (struct nv04_miptree *)rt->base.texture; + bo = nouveau_bo(nv04mt->buffer); /* FIXME pitches have to be aligned ! */ - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); - OUT_RING(rt->pitch|(zeta->pitch<<16)); - OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); + OUT_RING(chan, rt->pitch|(zeta->pitch<<16)); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); if (fb->zsbuf) { nv04mt = (struct nv04_miptree *)zeta->base.texture; - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); - OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); } } void nv04_emit_hw_state(struct nv04_context *nv04) { + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + struct nouveau_grobj *context_surfaces_3d = screen->context_surfaces_3d; int i; if (nv04->dirty & NV04_NEW_VERTPROG) { @@ -163,8 +182,8 @@ nv04_emit_hw_state(struct nv04_context *nv04) if (nv04->dirty & NV04_NEW_CONTROL) { nv04->dirty &= ~NV04_NEW_CONTROL; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); - OUT_RING(nv04->dsa->control); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING(chan, nv04->dsa->control); } if (nv04->dirty & NV04_NEW_BLEND) { @@ -205,12 +224,12 @@ nv04_emit_hw_state(struct nv04_context *nv04) unsigned rt_pitch = ((struct nv04_surface *)nv04->rt)->pitch; unsigned zeta_pitch = ((struct nv04_surface *)nv04->zeta)->pitch; - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); - OUT_RING(rt_pitch|(zeta_pitch<<16)); - OUT_RELOCl(nv04->rt, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); + OUT_RING(chan, rt_pitch|(zeta_pitch<<16)); + OUT_RELOCl(chan, nouveau_bo(nv04->rt), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); if (nv04->zeta) { - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); - OUT_RELOCl(nv04->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); + OUT_RELOCl(chan, nouveau_bo(nv04->zeta), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); } /* Texture images */ @@ -218,9 +237,10 @@ nv04_emit_hw_state(struct nv04_context *nv04) if (!(nv04->fp_samplers & (1 << i))) continue; struct nv04_miptree *nv04mt = nv04->tex_miptree[i]; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 2); - OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + struct nouveau_bo *bo = nouveau_bo(nv04mt->buffer); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 2); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(chan, bo, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); } } diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c index 12df7fd199..b24a9cee5a 100644 --- a/src/gallium/drivers/nv04/nv04_surface_2d.c +++ b/src/gallium/drivers/nv04/nv04_surface_2d.c @@ -77,7 +77,7 @@ nv04_scaled_image_format(enum pipe_format format) } static INLINE unsigned -nv04_swizzle_bits(unsigned x, unsigned y) +nv04_swizzle_bits_square(unsigned x, unsigned y) { unsigned u = (x & 0x001) << 0 | (x & 0x002) << 1 | @@ -107,6 +107,15 @@ nv04_swizzle_bits(unsigned x, unsigned y) return v | u; } +/* rectangular swizzled textures are linear concatenations of swizzled square tiles */ +static INLINE unsigned +nv04_swizzle_bits(unsigned x, unsigned y, unsigned w, unsigned h) +{ + unsigned s = MIN2(w, h); + unsigned m = s - 1; + return (((x | y) & ~m) * s) | nv04_swizzle_bits_square(x & m, y & m); +} + static int nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, struct pipe_surface *dst, int dx, int dy, @@ -158,20 +167,19 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, for (x = 0; x < w; x += sub_w) { sub_w = MIN2(sub_w, w - x); - /* Must be 64-byte aligned */ - assert(!((dst->offset + nv04_swizzle_bits(dx+x, dy+y) * util_format_get_blocksize(dst->texture->format)) & 63)); + assert(!(dst->offset & 63)); BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1); - OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(dx+x, dy+y) * util_format_get_blocksize(dst->texture->format), + OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9); OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE); OUT_RING (chan, nv04_scaled_image_format(src->format)); OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY); - OUT_RING (chan, 0); + OUT_RING (chan, (x + dx) | ((y + dy) << NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT)); OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT | sub_w); - OUT_RING (chan, 0); + OUT_RING (chan, (x + dx) | ((y + dy) << NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_SHIFT)); OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT | sub_w); OUT_RING (chan, 1 << 20); OUT_RING (chan, 1 << 20); @@ -491,3 +499,49 @@ nv04_surface_2d_init(struct nouveau_screen *screen) ctx->fill = nv04_surface_fill; return ctx; } + +struct nv04_surface* +nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns) +{ + int temp_flags; + + // printf("creating temp, flags is %i!\n", flags); + + if(ns->base.usage & PIPE_BUFFER_USAGE_DISCARD) + { + temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ; + ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_DISCARD; + } + else + { + temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE; + ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_GPU_READ; + } + + struct nv40_screen* screen = (struct nv40_screen*)pscreen; + ns->base.usage = PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE; + + struct pipe_texture templ; + memset(&templ, 0, sizeof(templ)); + templ.format = ns->base.texture->format; + templ.target = PIPE_TEXTURE_2D; + templ.width0 = ns->base.width; + templ.height0 = ns->base.height; + templ.depth0 = 1; + templ.last_level = 0; + + // TODO: this is probably wrong and we should specifically handle multisampling somehow once it is implemented + templ.nr_samples = ns->base.texture->nr_samples; + + templ.tex_usage = ns->base.texture->tex_usage | PIPE_TEXTURE_USAGE_RENDER_TARGET; + + struct pipe_texture* temp_tex = pscreen->texture_create(pscreen, &templ); + struct nv04_surface* temp_ns = (struct nv04_surface*)pscreen->get_tex_surface(pscreen, temp_tex, 0, 0, 0, temp_flags); + temp_ns->backing = ns; + + if(ns->base.usage & PIPE_BUFFER_USAGE_GPU_READ) + eng2d->copy(eng2d, &temp_ns->backing->base, 0, 0, &ns->base, 0, 0, ns->base.width, ns->base.height); + + return temp_ns; +} + diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.h b/src/gallium/drivers/nv04/nv04_surface_2d.h index 02b3f56ba8..ce696a11a3 100644 --- a/src/gallium/drivers/nv04/nv04_surface_2d.h +++ b/src/gallium/drivers/nv04/nv04_surface_2d.h @@ -4,6 +4,7 @@ struct nv04_surface { struct pipe_surface base; unsigned pitch; + struct nv04_surface* backing; }; struct nv04_surface_2d { @@ -30,4 +31,7 @@ nv04_surface_2d_init(struct nouveau_screen *screen); void nv04_surface_2d_takedown(struct nv04_surface_2d **); +struct nv04_surface* +nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns); + #endif diff --git a/src/gallium/drivers/nv04/nv04_transfer.c b/src/gallium/drivers/nv04/nv04_transfer.c index 8446073ae8..2dd2e146a8 100644 --- a/src/gallium/drivers/nv04/nv04_transfer.c +++ b/src/gallium/drivers/nv04/nv04_transfer.c @@ -16,14 +16,14 @@ struct nv04_transfer { }; static void -nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width0 = u_minify(pt->width0, level); - template->height0 = u_minify(pt->height0, level); + template->width0 = width; + template->height0 = height; template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -71,7 +71,7 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv04_compatible_transfer_tex(pt, level, &tx_tex_template); + nv04_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -80,6 +80,8 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv04_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, face, level, zslice, pipe_transfer_buffer_flags(&tx->base)); @@ -105,8 +107,8 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -130,9 +132,9 @@ nv04_transfer_del(struct pipe_transfer *ptx) /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -151,8 +153,10 @@ nv04_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv04/nv04_vbo.c b/src/gallium/drivers/nv04/nv04_vbo.c index e3167814f2..3484771814 100644 --- a/src/gallium/drivers/nv04/nv04_vbo.c +++ b/src/gallium/drivers/nv04/nv04_vbo.c @@ -9,7 +9,7 @@ #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_pushbuf.h" -boolean nv04_draw_elements( struct pipe_context *pipe, +void nv04_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count) @@ -45,7 +45,7 @@ boolean nv04_draw_elements( struct pipe_context *pipe, draw_set_mapped_element_buffer(draw, 0, NULL); } - draw_set_mapped_constant_buffer(draw, + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, nv04->constbuf[PIPE_SHADER_VERTEX], nv04->constbuf_nr[PIPE_SHADER_VERTEX]); @@ -65,15 +65,13 @@ boolean nv04_draw_elements( struct pipe_context *pipe, pipe_buffer_unmap(pscreen, indexBuffer); draw_set_mapped_element_buffer(draw, 0, NULL); } - - return TRUE; } -boolean nv04_draw_arrays( struct pipe_context *pipe, - unsigned prim, unsigned start, unsigned count) +void nv04_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) { printf("coucou in draw arrays\n"); - return nv04_draw_elements(pipe, NULL, 0, prim, start, count); + nv04_draw_elements(pipe, NULL, 0, prim, start, count); } diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c index 0dadeb03dd..1ecb73d06e 100644 --- a/src/gallium/drivers/nv10/nv10_context.c +++ b/src/gallium/drivers/nv10/nv10_context.c @@ -10,10 +10,14 @@ nv10_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv10_context *nv10 = nv10_context(pipe); + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; draw_flush(nv10->draw); - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void @@ -31,225 +35,226 @@ static void nv10_init_hwctx(struct nv10_context *nv10) { struct nv10_screen *screen = nv10->screen; struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; int i; float projectionmatrix[16]; - BEGIN_RING(celsius, NV10TCL_DMA_NOTIFY, 1); - OUT_RING (screen->sync->handle); - BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY0, 2); - OUT_RING (chan->vram->handle); - OUT_RING (chan->gart->handle); - BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY2, 2); - OUT_RING (chan->vram->handle); - OUT_RING (chan->vram->handle); + BEGIN_RING(chan, celsius, NV10TCL_DMA_NOTIFY, 1); + OUT_RING (chan, screen->sync->handle); + BEGIN_RING(chan, celsius, NV10TCL_DMA_IN_MEMORY0, 2); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->gart->handle); + BEGIN_RING(chan, celsius, NV10TCL_DMA_IN_MEMORY2, 2); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->vram->handle); - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); - BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 2); - OUT_RING (0); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_RT_HORIZ, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1); - OUT_RING ((0x7ff<<16)|0x800); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1); - OUT_RING ((0x7ff<<16)|0x800); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1); + OUT_RING (chan, (0x7ff<<16)|0x800); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1); + OUT_RING (chan, (0x7ff<<16)|0x800); for (i=1;i<8;i++) { - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1); + OUT_RING (chan, 0); } - BEGIN_RING(celsius, 0x290, 1); - OUT_RING ((0x10<<16)|1); - BEGIN_RING(celsius, 0x3f4, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, 0x290, 1); + OUT_RING (chan, (0x10<<16)|1); + BEGIN_RING(chan, celsius, 0x3f4, 1); + OUT_RING (chan, 0); - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); if (nv10->screen->celsius->grclass != NV10TCL) { /* For nv11, nv17 */ - BEGIN_RING(celsius, 0x120, 3); - OUT_RING (0); - OUT_RING (1); - OUT_RING (2); + BEGIN_RING(chan, celsius, 0x120, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 2); - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); } - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); /* Set state */ - BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 2); - OUT_RING (0x207); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_TX_ENABLE(0), 2); - OUT_RING (0); - OUT_RING (0); - - BEGIN_RING(celsius, NV10TCL_RC_IN_ALPHA(0), 12); - OUT_RING (0x30141010); - OUT_RING (0); - OUT_RING (0x20040000); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0x00000c00); - OUT_RING (0); - OUT_RING (0x00000c00); - OUT_RING (0x18000000); - OUT_RING (0x300e0300); - OUT_RING (0x0c091c80); - - BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 2); - OUT_RING (1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_SRC, 4); - OUT_RING (1); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0x8006); - BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 8); - OUT_RING (0xff); - OUT_RING (0x207); - OUT_RING (0); - OUT_RING (0xff); - OUT_RING (0x1e00); - OUT_RING (0x1e00); - OUT_RING (0x1e00); - OUT_RING (0x1d01); - BEGIN_RING(celsius, NV10TCL_NORMALIZE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_LIGHT_MODEL, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_COLOR_CONTROL, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_ENABLED_LIGHTS, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1); - OUT_RING (0x201); - BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1); - OUT_RING (8); - BEGIN_RING(celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_LINE_WIDTH, 1); - OUT_RING (8); - BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (0x1b02); - OUT_RING (0x1b02); - BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2); - OUT_RING (0x405); - OUT_RING (0x901); - BEGIN_RING(celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_TX_GEN_S(0), 8); + BEGIN_RING(chan, celsius, NV10TCL_FOG_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_FUNC, 2); + OUT_RING (chan, 0x207); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_TX_ENABLE(0), 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + + BEGIN_RING(chan, celsius, NV10TCL_RC_IN_ALPHA(0), 12); + OUT_RING (chan, 0x30141010); + OUT_RING (chan, 0); + OUT_RING (chan, 0x20040000); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0x00000c00); + OUT_RING (chan, 0); + OUT_RING (chan, 0x00000c00); + OUT_RING (chan, 0x18000000); + OUT_RING (chan, 0x300e0300); + OUT_RING (chan, 0x0c091c80); + + BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_DITHER_ENABLE, 2); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_SRC, 4); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0x8006); + BEGIN_RING(chan, celsius, NV10TCL_STENCIL_MASK, 8); + OUT_RING (chan, 0xff); + OUT_RING (chan, 0x207); + OUT_RING (chan, 0); + OUT_RING (chan, 0xff); + OUT_RING (chan, 0x1e00); + OUT_RING (chan, 0x1e00); + OUT_RING (chan, 0x1e00); + OUT_RING (chan, 0x1d01); + BEGIN_RING(chan, celsius, NV10TCL_NORMALIZE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_FOG_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_LIGHT_MODEL, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_CONTROL, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_ENABLED_LIGHTS, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_FUNC, 1); + OUT_RING (chan, 0x201); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_POINT_SIZE, 1); + OUT_RING (chan, 8); + BEGIN_RING(chan, celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_LINE_WIDTH, 1); + OUT_RING (chan, 8); + BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (chan, 0x1b02); + OUT_RING (chan, 0x1b02); + BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE, 2); + OUT_RING (chan, 0x405); + OUT_RING (chan, 0x901); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_TX_GEN_S(0), 8); for (i=0;i<8;i++) { - OUT_RING (0); + OUT_RING (chan, 0); } - BEGIN_RING(celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3); - OUT_RING (0x3fc00000); /* -1.50 */ - OUT_RING (0xbdb8aa0a); /* -0.09 */ - OUT_RING (0); /* 0.00 */ + BEGIN_RING(chan, celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3); + OUT_RING (chan, 0x3fc00000); /* -1.50 */ + OUT_RING (chan, 0xbdb8aa0a); /* -0.09 */ + OUT_RING (chan, 0); /* 0.00 */ - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); - BEGIN_RING(celsius, NV10TCL_FOG_MODE, 2); - OUT_RING (0x802); - OUT_RING (2); + BEGIN_RING(chan, celsius, NV10TCL_FOG_MODE, 2); + OUT_RING (chan, 0x802); + OUT_RING (chan, 2); /* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when * using texturing, except when using the texture matrix */ - BEGIN_RING(celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1); - OUT_RING (6); - BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1); - OUT_RING (0x01010101); + BEGIN_RING(chan, celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1); + OUT_RING (chan, 6); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_MASK, 1); + OUT_RING (chan, 0x01010101); /* Set vertex component */ - BEGIN_RING(celsius, NV10TCL_VERTEX_COL_4F_R, 4); - OUT_RINGf (1.0); - OUT_RINGf (1.0); - OUT_RINGf (1.0); - OUT_RINGf (1.0); - BEGIN_RING(celsius, NV10TCL_VERTEX_COL2_3F_R, 3); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_VERTEX_NOR_3F_X, 3); - OUT_RING (0); - OUT_RING (0); - OUT_RINGf (1.0); - BEGIN_RING(celsius, NV10TCL_VERTEX_TX0_4F_S, 4); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - BEGIN_RING(celsius, NV10TCL_VERTEX_TX1_4F_S, 4); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - BEGIN_RING(celsius, NV10TCL_VERTEX_FOG_1F, 1); - OUT_RINGf (0.0); - BEGIN_RING(celsius, NV10TCL_EDGEFLAG_ENABLE, 1); - OUT_RING (1); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_COL_4F_R, 4); + OUT_RINGf (chan, 1.0); + OUT_RINGf (chan, 1.0); + OUT_RINGf (chan, 1.0); + OUT_RINGf (chan, 1.0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_COL2_3F_R, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_NOR_3F_X, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RINGf (chan, 1.0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_TX0_4F_S, 4); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 1.0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_TX1_4F_S, 4); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 1.0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_FOG_1F, 1); + OUT_RINGf (chan, 0.0); + BEGIN_RING(chan, celsius, NV10TCL_EDGEFLAG_ENABLE, 1); + OUT_RING (chan, 1); memset(projectionmatrix, 0, sizeof(projectionmatrix)); - BEGIN_RING(celsius, NV10TCL_PROJECTION_MATRIX(0), 16); + BEGIN_RING(chan, celsius, NV10TCL_PROJECTION_MATRIX(0), 16); projectionmatrix[0*4+0] = 1.0; projectionmatrix[1*4+1] = 1.0; projectionmatrix[2*4+2] = 1.0; projectionmatrix[3*4+3] = 1.0; for (i=0;i<16;i++) { - OUT_RINGf (projectionmatrix[i]); + OUT_RINGf (chan, projectionmatrix[i]); } - BEGIN_RING(celsius, NV10TCL_DEPTH_RANGE_NEAR, 2); - OUT_RING (0.0); - OUT_RINGf (16777216.0); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_RANGE_NEAR, 2); + OUT_RING (chan, 0.0); + OUT_RINGf (chan, 16777216.0); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_TRANSLATE_X, 4); - OUT_RINGf (-2048.0); - OUT_RINGf (-2048.0); - OUT_RINGf (16777215.0 * 0.5); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_TRANSLATE_X, 4); + OUT_RINGf (chan, -2048.0); + OUT_RINGf (chan, -2048.0); + OUT_RINGf (chan, 16777215.0 * 0.5); + OUT_RING (chan, 0); - FIRE_RING (NULL); + FIRE_RING (chan); } struct pipe_context * diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h index 36a6aa7a74..ab4b825487 100644 --- a/src/gallium/drivers/nv10/nv10_context.h +++ b/src/gallium/drivers/nv10/nv10_context.h @@ -15,10 +15,6 @@ #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv10_screen *ctx = nv10->screen -#include "nouveau/nouveau_push.h" - #include "nv10_state.h" #define NOUVEAU_ERR(fmt, args...) \ @@ -144,9 +140,9 @@ extern void nv10_emit_hw_state(struct nv10_context *nv10); extern void nv10_state_tex_update(struct nv10_context *nv10); /* nv10_vbo.c */ -extern boolean nv10_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv10_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv10_draw_elements( struct pipe_context *pipe, +extern void nv10_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count); diff --git a/src/gallium/drivers/nv10/nv10_fragtex.c b/src/gallium/drivers/nv10/nv10_fragtex.c index 906fdfeeb9..c1f7ccb9ab 100644 --- a/src/gallium/drivers/nv10/nv10_fragtex.c +++ b/src/gallium/drivers/nv10/nv10_fragtex.c @@ -52,6 +52,9 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit) struct nv10_miptree *nv10mt = nv10->tex_miptree[unit]; struct pipe_texture *pt = &nv10mt->base; struct nv10_texture_format *tf; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; uint32_t txf, txs, txp; tf = nv10_fragtex_format(pt->format); @@ -82,15 +85,15 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit) return; } - BEGIN_RING(celsius, NV10TCL_TX_OFFSET(unit), 8); - OUT_RELOCl(nv10mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(nv10mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); - OUT_RING (ps->wrap); - OUT_RING (0x40000000); /* enable */ - OUT_RING (txs); - OUT_RING (ps->filt | 0x2000 /* magic */); - OUT_RING ((pt->width0 << 16) | pt->height0); - OUT_RING (ps->bcol); + BEGIN_RING(chan, celsius, NV10TCL_TX_OFFSET(unit), 8); + OUT_RELOCl(chan, nouveau_bo(nv10mt->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(chan, nouveau_bo(nv10mt->buffer),txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RING (chan, ps->wrap); + OUT_RING (chan, 0x40000000); /* enable */ + OUT_RING (chan, txs); + OUT_RING (chan, ps->filt | 0x2000 /* magic */); + OUT_RING (chan, (pt->width0 << 16) | pt->height0); + OUT_RING (chan, ps->bcol); #endif } @@ -99,6 +102,9 @@ nv10_fragtex_bind(struct nv10_context *nv10) { #if 0 struct nv10_fragment_program *fp = nv10->fragprog.active; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; unsigned samplers, unit; samplers = nv10->fp_samplers & ~fp->samplers; @@ -106,8 +112,8 @@ nv10_fragtex_bind(struct nv10_context *nv10) unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - BEGIN_RING(celsius, NV10TCL_TX_ENABLE(unit), 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_TX_ENABLE(unit), 1); + OUT_RING (chan, 0); } samplers = nv10->dirty_samplers & fp->samplers; diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c index 7ba9777a22..c5dbe43dbc 100644 --- a/src/gallium/drivers/nv10/nv10_prim_vbuf.c +++ b/src/gallium/drivers/nv10/nv10_prim_vbuf.c @@ -67,12 +67,15 @@ struct nv10_vbuf_render { void nv10_vtxbuf_bind( struct nv10_context* nv10 ) { + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; int i; for(i = 0; i < 8; i++) { - BEGIN_RING(celsius, NV10TCL_VTXBUF_ADDRESS(i), 1); - OUT_RING(0/*nv10->vtxbuf*/); - BEGIN_RING(celsius, NV10TCL_VTXFMT(i), 1); - OUT_RING(0/*XXX*/); + BEGIN_RING(chan, celsius, NV10TCL_VTXBUF_ADDRESS(i), 1); + OUT_RING(chan, 0/*nv10->vtxbuf*/); + BEGIN_RING(chan, celsius, NV10TCL_VTXFMT(i), 1); + OUT_RING(chan, 0/*XXX*/); } } @@ -163,19 +166,22 @@ nv10_vbuf_render_draw( struct vbuf_render *render, { struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); struct nv10_context *nv10 = nv10_render->nv10; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; int push, i; nv10_emit_hw_state(nv10); - BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); - OUT_RELOCl(nv10_render->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); + OUT_RELOCl(chan, nouveau_bo(nv10_render->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); - OUT_RING(nv10_render->hwprim); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING(chan, nv10_render->hwprim); if (nr_indices & 1) { - BEGIN_RING(celsius, NV10TCL_VB_ELEMENT_U32, 1); - OUT_RING (indices[0]); + BEGIN_RING(chan, celsius, NV10TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, indices[0]); indices++; nr_indices--; } @@ -183,16 +189,16 @@ nv10_vbuf_render_draw( struct vbuf_render *render, // XXX too big/small ? check the size push = MIN2(nr_indices, 1200 * 2); - BEGIN_RING_NI(celsius, NV10TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, celsius, NV10TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((indices[i+1] << 16) | indices[i]); + OUT_RING(chan, (indices[i+1] << 16) | indices[i]); nr_indices -= push; indices += push; } - BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING (chan, 0); } diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c index ee5901e743..69a6dab866 100644 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -115,6 +115,9 @@ nv10_screen_destroy(struct pipe_screen *pscreen) nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->celsius); + nv04_surface_2d_takedown(&screen->eng2d); + + nouveau_screen_fini(&screen->base); FREE(pscreen); } @@ -177,7 +180,6 @@ nv10_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) NOUVEAU_ERR("Error creating 3D object: %d\n", ret); return FALSE; } - BIND_RING(chan, screen->celsius, 7); /* 2D engine setup */ screen->eng2d = nv04_surface_2d_init(&screen->base); diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c index 2577ab73b5..30a596ca60 100644 --- a/src/gallium/drivers/nv10/nv10_state_emit.c +++ b/src/gallium/drivers/nv10/nv10_state_emit.c @@ -4,25 +4,32 @@ static void nv10_state_emit_blend(struct nv10_context* nv10) { struct nv10_blend_state *b = nv10->blend; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; - BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 1); - OUT_RING (b->d_enable); + BEGIN_RING(chan, celsius, NV10TCL_DITHER_ENABLE, 1); + OUT_RING (chan, b->d_enable); - BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 3); - OUT_RING (b->b_enable); - OUT_RING (b->b_srcfunc); - OUT_RING (b->b_dstfunc); + BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_ENABLE, 3); + OUT_RING (chan, b->b_enable); + OUT_RING (chan, b->b_srcfunc); + OUT_RING (chan, b->b_dstfunc); - BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1); - OUT_RING (b->c_mask); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_MASK, 1); + OUT_RING (chan, b->c_mask); } static void nv10_state_emit_blend_color(struct nv10_context* nv10) { struct pipe_blend_color *c = nv10->blend_color; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; - BEGIN_RING(celsius, NV10TCL_BLEND_COLOR, 1); - OUT_RING ((float_to_ubyte(c->color[3]) << 24)| + BEGIN_RING(chan, celsius, NV10TCL_BLEND_COLOR, 1); + OUT_RING (chan, + (float_to_ubyte(c->color[3]) << 24)| (float_to_ubyte(c->color[0]) << 16)| (float_to_ubyte(c->color[1]) << 8) | (float_to_ubyte(c->color[2]) << 0)); @@ -31,60 +38,66 @@ static void nv10_state_emit_blend_color(struct nv10_context* nv10) static void nv10_state_emit_rast(struct nv10_context* nv10) { struct nv10_rasterizer_state *r = nv10->rast; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; - BEGIN_RING(celsius, NV10TCL_SHADE_MODEL, 2); - OUT_RING (r->shade_model); - OUT_RING (r->line_width); + BEGIN_RING(chan, celsius, NV10TCL_SHADE_MODEL, 2); + OUT_RING (chan, r->shade_model); + OUT_RING (chan, r->line_width); - BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1); - OUT_RING (r->point_size); + BEGIN_RING(chan, celsius, NV10TCL_POINT_SIZE, 1); + OUT_RING (chan, r->point_size); - BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (r->poly_mode_front); - OUT_RING (r->poly_mode_back); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (chan, r->poly_mode_front); + OUT_RING (chan, r->poly_mode_back); - BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2); - OUT_RING (r->cull_face); - OUT_RING (r->front_face); + BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE, 2); + OUT_RING (chan, r->cull_face); + OUT_RING (chan, r->front_face); - BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2); - OUT_RING (r->line_smooth_en); - OUT_RING (r->poly_smooth_en); + BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2); + OUT_RING (chan, r->line_smooth_en); + OUT_RING (chan, r->poly_smooth_en); - BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1); - OUT_RING (r->cull_face_en); + BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE_ENABLE, 1); + OUT_RING (chan, r->cull_face_en); } static void nv10_state_emit_dsa(struct nv10_context* nv10) { struct nv10_depth_stencil_alpha_state *d = nv10->dsa; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; - BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1); - OUT_RING (d->depth.func); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_FUNC, 1); + OUT_RING (chan, d->depth.func); - BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); - OUT_RING (d->depth.write_enable); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (chan, d->depth.write_enable); - BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); - OUT_RING (d->depth.test_enable); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (chan, d->depth.test_enable); #if 0 - BEGIN_RING(celsius, NV10TCL_STENCIL_ENABLE, 1); - OUT_RING (d->stencil.enable); - BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 7); - OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7); + BEGIN_RING(chan, celsius, NV10TCL_STENCIL_ENABLE, 1); + OUT_RING (chan, d->stencil.enable); + BEGIN_RING(chan, celsius, NV10TCL_STENCIL_MASK, 7); + OUT_RINGp (chan, (uint32_t *)&(d->stencil.wmask), 7); #endif - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (d->alpha.enabled); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (chan, d->alpha.enabled); - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 1); - OUT_RING (d->alpha.func); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_FUNC, 1); + OUT_RING (chan, d->alpha.func); - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_REF, 1); - OUT_RING (d->alpha.ref); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_REF, 1); + OUT_RING (chan, d->alpha.ref); } static void nv10_state_emit_viewport(struct nv10_context* nv10) @@ -108,6 +121,10 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) int colour_format = 0, zeta_format = 0; struct nv10_miptree *nv10mt = 0; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; + w = fb->cbufs[0]->width; h = fb->cbufs[0]->height; colour_format = fb->cbufs[0]->format; @@ -144,11 +161,11 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) } if (zeta) { - BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); - OUT_RING (rt->pitch | (zeta->pitch << 16)); + BEGIN_RING(chan, celsius, NV10TCL_RT_PITCH, 1); + OUT_RING (chan, rt->pitch | (zeta->pitch << 16)); } else { - BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); - OUT_RING (rt->pitch | (rt->pitch << 16)); + BEGIN_RING(chan, celsius, NV10TCL_RT_PITCH, 1); + OUT_RING (chan, rt->pitch | (rt->pitch << 16)); } nv10mt = (struct nv10_miptree *)rt->base.texture; @@ -160,13 +177,13 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) nv10->zeta = nv10mt->buffer; } - BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 3); - OUT_RING ((w << 16) | 0); - OUT_RING ((h << 16) | 0); - OUT_RING (rt_format); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2); - OUT_RING (((w - 1) << 16) | 0 | 0x08000800); - OUT_RING (((h - 1) << 16) | 0 | 0x08000800); + BEGIN_RING(chan, celsius, NV10TCL_RT_HORIZ, 3); + OUT_RING (chan, (w << 16) | 0); + OUT_RING (chan, (h << 16) | 0); + OUT_RING (chan, rt_format); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2); + OUT_RING (chan, ((w - 1) << 16) | 0 | 0x08000800); + OUT_RING (chan, ((h - 1) << 16) | 0 | 0x08000800); } static void nv10_vertex_layout(struct nv10_context *nv10) @@ -201,6 +218,10 @@ static void nv10_vertex_layout(struct nv10_context *nv10) void nv10_emit_hw_state(struct nv10_context *nv10) { + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; + struct nouveau_bo *rt_bo; int i; if (nv10->dirty & NV10_NEW_VERTPROG) { @@ -269,38 +290,41 @@ nv10_emit_hw_state(struct nv10_context *nv10) */ /* Render target */ + rt_bo = nouveau_bo(nv10->rt[0]); // XXX figre out who's who for NV10TCL_DMA_* and fill accordingly -// BEGIN_RING(celsius, NV10TCL_DMA_COLOR0, 1); -// OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1); - OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +// BEGIN_RING(chan, celsius, NV10TCL_DMA_COLOR0, 1); +// OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_OFFSET, 1); + OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); if (nv10->zeta) { + struct nouveau_bo *zeta_bo = nouveau_bo(nv10->zeta); // XXX -// BEGIN_RING(celsius, NV10TCL_DMA_ZETA, 1); -// OUT_RELOCo(nv10->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(celsius, NV10TCL_ZETA_OFFSET, 1); - OUT_RELOCl(nv10->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +// BEGIN_RING(chan, celsius, NV10TCL_DMA_ZETA, 1); +// OUT_RELOCo(chan, zeta_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, celsius, NV10TCL_ZETA_OFFSET, 1); + OUT_RELOCl(chan, zeta_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); /* XXX for when we allocate LMA on nv17 */ -/* BEGIN_RING(celsius, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); - OUT_RELOCl(nv10->zeta + lma_offset);*/ +/* BEGIN_RING(chan, celsius, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); + OUT_RELOCl(chan, nouveau_bo(nv10->zeta + lma_offset));*/ } /* Vertex buffer */ - BEGIN_RING(celsius, NV10TCL_DMA_VTXBUF0, 1); - OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1); - OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, celsius, NV10TCL_DMA_VTXBUF0, 1); + OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_OFFSET, 1); + OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); /* Texture images */ for (i = 0; i < 2; i++) { if (!(nv10->fp_samplers & (1 << i))) continue; - BEGIN_RING(celsius, NV10TCL_TX_OFFSET(i), 1); - OUT_RELOCl(nv10->tex[i].buffer, 0, NOUVEAU_BO_VRAM | + struct nouveau_bo *bo = nouveau_bo(nv10->tex[i].buffer); + BEGIN_RING(chan, celsius, NV10TCL_TX_OFFSET(i), 1); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_RING(celsius, NV10TCL_TX_FORMAT(i), 1); - OUT_RELOCd(nv10->tex[i].buffer, nv10->tex[i].format, + BEGIN_RING(chan, celsius, NV10TCL_TX_FORMAT(i), 1); + OUT_RELOCd(chan, bo, nv10->tex[i].format, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_OR, NV10TCL_TX_FORMAT_DMA0, NV10TCL_TX_FORMAT_DMA1); diff --git a/src/gallium/drivers/nv10/nv10_transfer.c b/src/gallium/drivers/nv10/nv10_transfer.c index c664973e90..eb04af9782 100644 --- a/src/gallium/drivers/nv10/nv10_transfer.c +++ b/src/gallium/drivers/nv10/nv10_transfer.c @@ -16,14 +16,14 @@ struct nv10_transfer { }; static void -nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width0 = u_minify(pt->width0, level); - template->height0 = u_minify(pt->height0, level); + template->width0 = width; + template->height0 = height; template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -71,7 +71,7 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv10_compatible_transfer_tex(pt, level, &tx_tex_template); + nv10_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -80,6 +80,8 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv10_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, face, level, zslice, pipe_transfer_buffer_flags(&tx->base)); @@ -105,8 +107,8 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -130,9 +132,9 @@ nv10_transfer_del(struct pipe_transfer *ptx) /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -151,8 +153,10 @@ nv10_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c index 441a4f75f3..9180c72c9b 100644 --- a/src/gallium/drivers/nv10/nv10_vbo.c +++ b/src/gallium/drivers/nv10/nv10_vbo.c @@ -9,7 +9,7 @@ #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_pushbuf.h" -boolean nv10_draw_elements( struct pipe_context *pipe, +void nv10_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count) @@ -45,6 +45,7 @@ boolean nv10_draw_elements( struct pipe_context *pipe, } draw_set_mapped_constant_buffer(draw, + PIPE_SHADER_VERTEX, nv10->constbuf[PIPE_SHADER_VERTEX], nv10->constbuf_nr[PIPE_SHADER_VERTEX]); @@ -64,14 +65,12 @@ boolean nv10_draw_elements( struct pipe_context *pipe, pipe_buffer_unmap(pscreen, indexBuffer); draw_set_mapped_element_buffer(draw, 0, NULL); } - - return TRUE; } -boolean nv10_draw_arrays( struct pipe_context *pipe, - unsigned prim, unsigned start, unsigned count) +void nv10_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) { - return nv10_draw_elements(pipe, NULL, 0, prim, start, count); + nv10_draw_elements(pipe, NULL, 0, prim, start, count); } diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c index 6a147a4159..5b80af2d22 100644 --- a/src/gallium/drivers/nv20/nv20_context.c +++ b/src/gallium/drivers/nv20/nv20_context.c @@ -10,10 +10,14 @@ nv20_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv20_context *nv20 = nv20_context(pipe); + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; draw_flush(nv20->draw); - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void @@ -31,348 +35,352 @@ static void nv20_init_hwctx(struct nv20_context *nv20) { struct nv20_screen *screen = nv20->screen; struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; int i; float projectionmatrix[16]; - const boolean is_nv25tcl = (nv20->screen->kelvin->grclass == NV25TCL); + const boolean is_nv25tcl = (kelvin->grclass == NV25TCL); - BEGIN_RING(kelvin, NV20TCL_DMA_NOTIFY, 1); - OUT_RING (screen->sync->handle); - BEGIN_RING(kelvin, NV20TCL_DMA_TEXTURE0, 2); - OUT_RING (chan->vram->handle); - OUT_RING (chan->gart->handle); /* TEXTURE1 */ - BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 2); - OUT_RING (chan->vram->handle); - OUT_RING (chan->vram->handle); /* ZETA */ + BEGIN_RING(chan, kelvin, NV20TCL_DMA_NOTIFY, 1); + OUT_RING (chan, screen->sync->handle); + BEGIN_RING(chan, kelvin, NV20TCL_DMA_TEXTURE0, 2); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->gart->handle); /* TEXTURE1 */ + BEGIN_RING(chan, kelvin, NV20TCL_DMA_COLOR, 2); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->vram->handle); /* ZETA */ - BEGIN_RING(kelvin, NV20TCL_DMA_QUERY, 1); - OUT_RING (0); /* renouveau: beef0351, unique */ + BEGIN_RING(chan, kelvin, NV20TCL_DMA_QUERY, 1); + OUT_RING (chan, 0); /* renouveau: beef0351, unique */ - BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2); - OUT_RING (0); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1); - OUT_RING ((0xfff << 16) | 0x0); - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1); - OUT_RING ((0xfff << 16) | 0x0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1); + OUT_RING (chan, (0xfff << 16) | 0x0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1); + OUT_RING (chan, (0xfff << 16) | 0x0); for (i = 1; i < NV20TCL_VIEWPORT_CLIP_HORIZ__SIZE; i++) { - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(i), 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(i), 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(i), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_VERT(i), 1); + OUT_RING (chan, 0); } - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_MODE, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_MODE, 1); + OUT_RING (chan, 0); - BEGIN_RING(kelvin, 0x17e0, 3); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); + BEGIN_RING(chan, kelvin, 0x17e0, 3); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 1.0); if (is_nv25tcl) { - BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1); - OUT_RING (NV20TCL_TX_RCOMP_LEQUAL | 0xdb0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_RCOMP, 1); + OUT_RING (chan, NV20TCL_TX_RCOMP_LEQUAL | 0xdb0); } else { - BEGIN_RING(kelvin, 0x1e68, 1); - OUT_RING (0x4b800000); /* 16777216.000000 */ - BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1); - OUT_RING (NV20TCL_TX_RCOMP_LEQUAL); + BEGIN_RING(chan, kelvin, 0x1e68, 1); + OUT_RING (chan, 0x4b800000); /* 16777216.000000 */ + BEGIN_RING(chan, kelvin, NV20TCL_TX_RCOMP, 1); + OUT_RING (chan, NV20TCL_TX_RCOMP_LEQUAL); } - BEGIN_RING(kelvin, 0x290, 1); - OUT_RING ((0x10 << 16) | 1); - BEGIN_RING(kelvin, 0x9fc, 1); - OUT_RING (0); - BEGIN_RING(kelvin, 0x1d80, 1); - OUT_RING (1); - BEGIN_RING(kelvin, 0x9f8, 1); - OUT_RING (4); - BEGIN_RING(kelvin, 0x17ec, 3); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - OUT_RINGf (0.0); + BEGIN_RING(chan, kelvin, 0x290, 1); + OUT_RING (chan, (0x10 << 16) | 1); + BEGIN_RING(chan, kelvin, 0x9fc, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, 0x1d80, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, kelvin, 0x9f8, 1); + OUT_RING (chan, 4); + BEGIN_RING(chan, kelvin, 0x17ec, 3); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 1.0); + OUT_RINGf (chan, 0.0); if (is_nv25tcl) { - BEGIN_RING(kelvin, 0x1d88, 1); - OUT_RING (3); + BEGIN_RING(chan, kelvin, 0x1d88, 1); + OUT_RING (chan, 3); - BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY9, 1); - OUT_RING (chan->vram->handle); - BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY8, 1); - OUT_RING (chan->vram->handle); + BEGIN_RING(chan, kelvin, NV25TCL_DMA_IN_MEMORY9, 1); + OUT_RING (chan, chan->vram->handle); + BEGIN_RING(chan, kelvin, NV25TCL_DMA_IN_MEMORY8, 1); + OUT_RING (chan, chan->vram->handle); } - BEGIN_RING(kelvin, NV20TCL_DMA_FENCE, 1); - OUT_RING (0); /* renouveau: beef1e10 */ + BEGIN_RING(chan, kelvin, NV20TCL_DMA_FENCE, 1); + OUT_RING (chan, 0); /* renouveau: beef1e10 */ - BEGIN_RING(kelvin, 0x1e98, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, 0x1e98, 1); + OUT_RING (chan, 0); #if 0 if (is_nv25tcl) { - BEGIN_RING(NvSub3D, NV25TCL_DMA_IN_MEMORY4, 2); - OUT_RING (NvDmaTT); /* renouveau: beef0202 */ - OUT_RING (NvDmaFB); /* renouveau: beef0201 */ + BEGIN_RING(chan, NvSub3D, NV25TCL_DMA_IN_MEMORY4, 2); + OUT_RING (chan, NvDmaTT); /* renouveau: beef0202 */ + OUT_RING (chan, NvDmaFB); /* renouveau: beef0201 */ - BEGIN_RING(NvSub3D, NV20TCL_DMA_TEXTURE1, 1); - OUT_RING (NvDmaTT); /* renouveau: beef0202 */ + BEGIN_RING(chan, NvSub3D, NV20TCL_DMA_TEXTURE1, 1); + OUT_RING (chan, NvDmaTT); /* renouveau: beef0202 */ } #endif - BEGIN_RING(kelvin, NV20TCL_NOTIFY, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_NOTIFY, 1); + OUT_RING (chan, 0); - BEGIN_RING(kelvin, 0x120, 3); - OUT_RING (0); - OUT_RING (1); - OUT_RING (2); + BEGIN_RING(chan, kelvin, 0x120, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 2); /* error: ILLEGAL_MTHD, PROTECTION_FAULT - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); - OUT_RINGf (0.0); - OUT_RINGf (512.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 512.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); */ if (is_nv25tcl) { - BEGIN_RING(kelvin, 0x022c, 2); - OUT_RING (0x280); - OUT_RING (0x07d28000); + BEGIN_RING(chan, kelvin, 0x022c, 2); + OUT_RING (chan, 0x280); + OUT_RING (chan, 0x07d28000); } /* * illegal method, protection fault - BEGIN_RING(NvSub3D, 0x1c2c, 1); - OUT_RING (0); */ + BEGIN_RING(chan, NvSub3D, 0x1c2c, 1); + OUT_RING (chan, 0); */ if (is_nv25tcl) { - BEGIN_RING(kelvin, 0x1da4, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, 0x1da4, 1); + OUT_RING (chan, 0); } /* * crashes with illegal method, protection fault - BEGIN_RING(NvSub3D, 0x1c18, 1); - OUT_RING (0x200); */ + BEGIN_RING(chan, NvSub3D, 0x1c18, 1); + OUT_RING (chan, 0x200); */ - BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2); - OUT_RING ((0 << 16) | 0); - OUT_RING ((0 << 16) | 0); + BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 2); + OUT_RING (chan, (0 << 16) | 0); + OUT_RING (chan, (0 << 16) | 0); /* *** Set state *** */ - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 2); - OUT_RING (NV20TCL_ALPHA_FUNC_FUNC_ALWAYS); - OUT_RING (0); /* NV20TCL_ALPHA_FUNC_REF */ + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_FUNC, 2); + OUT_RING (chan, NV20TCL_ALPHA_FUNC_FUNC_ALWAYS); + OUT_RING (chan, 0); /* NV20TCL_ALPHA_FUNC_REF */ for (i = 0; i < NV20TCL_TX_ENABLE__SIZE; ++i) { - BEGIN_RING(kelvin, NV20TCL_TX_ENABLE(i), 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_ENABLE(i), 1); + OUT_RING (chan, 0); } - BEGIN_RING(kelvin, NV20TCL_TX_SHADER_OP, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_TX_SHADER_CULL_MODE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_IN_ALPHA(0), 4); - OUT_RING (0x30d410d0); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_OUT_RGB(0), 4); - OUT_RING (0x00000c00); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_ENABLE, 1); - OUT_RING (0x00011101); - BEGIN_RING(kelvin, NV20TCL_RC_FINAL0, 2); - OUT_RING (0x130e0300); - OUT_RING (0x0c091c80); - BEGIN_RING(kelvin, NV20TCL_RC_OUT_ALPHA(0), 4); - OUT_RING (0x00000c00); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_IN_RGB(0), 4); - OUT_RING (0x20c400c0); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_COLOR0, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_CONSTANT_COLOR0(0), 4); - OUT_RING (0x035125a0); - OUT_RING (0); - OUT_RING (0x40002000); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_MULTISAMPLE_CONTROL, 1); - OUT_RING (0xffff0000); - - BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 4); - OUT_RING (NV20TCL_BLEND_FUNC_SRC_ONE); - OUT_RING (NV20TCL_BLEND_FUNC_DST_ZERO); - OUT_RING (0); /* NV20TCL_BLEND_COLOR */ - OUT_RING (NV20TCL_BLEND_EQUATION_FUNC_ADD); - BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7); - OUT_RING (0xff); - OUT_RING (NV20TCL_STENCIL_FUNC_FUNC_ALWAYS); - OUT_RING (0); /* NV20TCL_STENCIL_FUNC_REF */ - OUT_RING (0xff); /* NV20TCL_STENCIL_FUNC_MASK */ - OUT_RING (NV20TCL_STENCIL_OP_FAIL_KEEP); - OUT_RING (NV20TCL_STENCIL_OP_ZFAIL_KEEP); - OUT_RING (NV20TCL_STENCIL_OP_ZPASS_KEEP); - - BEGIN_RING(kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2); - OUT_RING (0); - OUT_RING (NV20TCL_COLOR_LOGIC_OP_OP_COPY); - BEGIN_RING(kelvin, 0x17cc, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_SHADER_OP, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_SHADER_CULL_MODE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_IN_ALPHA(0), 4); + OUT_RING (chan, 0x30d410d0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_OUT_RGB(0), 4); + OUT_RING (chan, 0x00000c00); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_ENABLE, 1); + OUT_RING (chan, 0x00011101); + BEGIN_RING(chan, kelvin, NV20TCL_RC_FINAL0, 2); + OUT_RING (chan, 0x130e0300); + OUT_RING (chan, 0x0c091c80); + BEGIN_RING(chan, kelvin, NV20TCL_RC_OUT_ALPHA(0), 4); + OUT_RING (chan, 0x00000c00); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_IN_RGB(0), 4); + OUT_RING (chan, 0x20c400c0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_COLOR0, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_CONSTANT_COLOR0(0), 4); + OUT_RING (chan, 0x035125a0); + OUT_RING (chan, 0); + OUT_RING (chan, 0x40002000); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_MULTISAMPLE_CONTROL, 1); + OUT_RING (chan, 0xffff0000); + + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_DITHER_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_SRC, 4); + OUT_RING (chan, NV20TCL_BLEND_FUNC_SRC_ONE); + OUT_RING (chan, NV20TCL_BLEND_FUNC_DST_ZERO); + OUT_RING (chan, 0); /* NV20TCL_BLEND_COLOR */ + OUT_RING (chan, NV20TCL_BLEND_EQUATION_FUNC_ADD); + BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_MASK, 7); + OUT_RING (chan, 0xff); + OUT_RING (chan, NV20TCL_STENCIL_FUNC_FUNC_ALWAYS); + OUT_RING (chan, 0); /* NV20TCL_STENCIL_FUNC_REF */ + OUT_RING (chan, 0xff); /* NV20TCL_STENCIL_FUNC_MASK */ + OUT_RING (chan, NV20TCL_STENCIL_OP_FAIL_KEEP); + OUT_RING (chan, NV20TCL_STENCIL_OP_ZFAIL_KEEP); + OUT_RING (chan, NV20TCL_STENCIL_OP_ZPASS_KEEP); + + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, NV20TCL_COLOR_LOGIC_OP_OP_COPY); + BEGIN_RING(chan, kelvin, 0x17cc, 1); + OUT_RING (chan, 0); if (is_nv25tcl) { - BEGIN_RING(kelvin, 0x1d84, 1); - OUT_RING (1); + BEGIN_RING(chan, kelvin, 0x1d84, 1); + OUT_RING (chan, 1); } - BEGIN_RING(kelvin, NV20TCL_LIGHTING_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_LIGHT_CONTROL, 1); - OUT_RING (0x00020000); - BEGIN_RING(kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_ENABLED_LIGHTS, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_NORMALIZE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_PATTERN(0), + BEGIN_RING(chan, kelvin, NV20TCL_LIGHTING_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_CONTROL, 1); + OUT_RING (chan, 0x00020000); + BEGIN_RING(chan, kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_ENABLED_LIGHTS, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_NORMALIZE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_STIPPLE_PATTERN(0), NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE); for (i = 0; i < NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE; ++i) { - OUT_RING(0xffffffff); + OUT_RING(chan, 0xffffffff); } - BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_POINT_ENABLE, 3); - OUT_RING (0); - OUT_RING (0); /* NV20TCL.POLYGON_OFFSET_LINE_ENABLE */ - OUT_RING (0); /* NV20TCL.POLYGON_OFFSET_FILL_ENABLE */ - BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1); - OUT_RING (NV20TCL_DEPTH_FUNC_LESS); - BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_FACTOR, 2); - OUT_RINGf (0.0); - OUT_RINGf (0.0); /* NV20TCL.POLYGON_OFFSET_UNITS */ - BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1); - OUT_RING (1); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 0); /* NV20TCL.POLYGON_OFFSET_LINE_ENABLE */ + OUT_RING (chan, 0); /* NV20TCL.POLYGON_OFFSET_FILL_ENABLE */ + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_FUNC, 1); + OUT_RING (chan, NV20TCL_DEPTH_FUNC_LESS); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_OFFSET_FACTOR, 2); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); /* NV20TCL.POLYGON_OFFSET_UNITS */ + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_UNK17D8, 1); + OUT_RING (chan, 1); if (!is_nv25tcl) { - BEGIN_RING(kelvin, 0x1d84, 1); - OUT_RING (3); + BEGIN_RING(chan, kelvin, 0x1d84, 1); + OUT_RING (chan, 3); } - BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1); + BEGIN_RING(chan, kelvin, NV20TCL_POINT_SIZE, 1); if (!is_nv25tcl) { - OUT_RING (8); + OUT_RING (chan, 8); } else { - OUT_RINGf (1.0); + OUT_RINGf (chan, 1.0); } if (!is_nv25tcl) { - BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); /* NV20TCL.POINT_SMOOTH_ENABLE */ + BEGIN_RING(chan, kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); /* NV20TCL.POINT_SMOOTH_ENABLE */ } else { - BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, 0x0a1c, 1); - OUT_RING (0x800); + BEGIN_RING(chan, kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, 0x0a1c, 1); + OUT_RING (chan, 0x800); } - BEGIN_RING(kelvin, NV20TCL_LINE_WIDTH, 1); - OUT_RING (8); - BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (NV20TCL_POLYGON_MODE_FRONT_FILL); - OUT_RING (NV20TCL_POLYGON_MODE_BACK_FILL); - BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2); - OUT_RING (NV20TCL_CULL_FACE_BACK); - OUT_RING (NV20TCL_FRONT_FACE_CCW); - BEGIN_RING(kelvin, NV20TCL_POLYGON_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 1); - OUT_RING (NV20TCL_SHADE_MODEL_SMOOTH); - BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_TX_GEN_S(0), 4 * NV20TCL_TX_GEN_S__SIZE); + BEGIN_RING(chan, kelvin, NV20TCL_LINE_WIDTH, 1); + OUT_RING (chan, 8); + BEGIN_RING(chan, kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (chan, NV20TCL_POLYGON_MODE_FRONT_FILL); + OUT_RING (chan, NV20TCL_POLYGON_MODE_BACK_FILL); + BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE, 2); + OUT_RING (chan, NV20TCL_CULL_FACE_BACK); + OUT_RING (chan, NV20TCL_FRONT_FACE_CCW); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_SHADE_MODEL, 1); + OUT_RING (chan, NV20TCL_SHADE_MODEL_SMOOTH); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_STIPPLE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_GEN_S(0), 4 * NV20TCL_TX_GEN_S__SIZE); for (i=0; i < 4 * NV20TCL_TX_GEN_S__SIZE; ++i) { - OUT_RING(0); + OUT_RING(chan, 0); } - BEGIN_RING(kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3); - OUT_RINGf (1.5); - OUT_RINGf (-0.090168); /* NV20TCL.FOG_EQUATION_LINEAR */ - OUT_RINGf (0.0); /* NV20TCL.FOG_EQUATION_QUADRATIC */ - BEGIN_RING(kelvin, NV20TCL_FOG_MODE, 2); - OUT_RING (NV20TCL_FOG_MODE_EXP_2); - OUT_RING (NV20TCL_FOG_COORD_DIST_COORD_FOG); - BEGIN_RING(kelvin, NV20TCL_FOG_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); /* NV20TCL.FOG_COLOR */ - BEGIN_RING(kelvin, NV20TCL_ENGINE, 1); - OUT_RING (NV20TCL_ENGINE_FIXED); + BEGIN_RING(chan, kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3); + OUT_RINGf (chan, 1.5); + OUT_RINGf (chan, -0.090168); /* NV20TCL.FOG_EQUATION_LINEAR */ + OUT_RINGf (chan, 0.0); /* NV20TCL.FOG_EQUATION_QUADRATIC */ + BEGIN_RING(chan, kelvin, NV20TCL_FOG_MODE, 2); + OUT_RING (chan, NV20TCL_FOG_MODE_EXP_SIGNED); + OUT_RING (chan, NV20TCL_FOG_COORD_FOG); + BEGIN_RING(chan, kelvin, NV20TCL_FOG_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); /* NV20TCL.FOG_COLOR */ + BEGIN_RING(chan, kelvin, NV20TCL_ENGINE, 1); + OUT_RING (chan, NV20TCL_ENGINE_FIXED); for (i = 0; i < NV20TCL_TX_MATRIX_ENABLE__SIZE; ++i) { - BEGIN_RING(kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1); + OUT_RING (chan, 0); } - BEGIN_RING(kelvin, NV20TCL_VTX_ATTR_4F_X(1), 4 * 15); - OUT_RINGf(1.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); - OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); OUT_RINGf(1.0); - OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0); + BEGIN_RING(chan, kelvin, NV20TCL_VTX_ATTR_4F_X(1), 4 * 15); + OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 1.0); + OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); + OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); for (i = 4; i < 16; ++i) { - OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); + OUT_RINGf(chan, 0.0); + OUT_RINGf(chan, 0.0); + OUT_RINGf(chan, 0.0); + OUT_RINGf(chan, 1.0); } - BEGIN_RING(kelvin, NV20TCL_EDGEFLAG_ENABLE, 1); - OUT_RING (1); - BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1); - OUT_RING (0x00010101); - BEGIN_RING(kelvin, NV20TCL_CLEAR_VALUE, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_EDGEFLAG_ENABLE, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_MASK, 1); + OUT_RING (chan, 0x00010101); + BEGIN_RING(chan, kelvin, NV20TCL_CLEAR_VALUE, 1); + OUT_RING (chan, 0); memset(projectionmatrix, 0, sizeof(projectionmatrix)); projectionmatrix[0*4+0] = 1.0; projectionmatrix[1*4+1] = 1.0; projectionmatrix[2*4+2] = 16777215.0; projectionmatrix[3*4+3] = 1.0; - BEGIN_RING(kelvin, NV20TCL_PROJECTION_MATRIX(0), 16); + BEGIN_RING(chan, kelvin, NV20TCL_PROJECTION_MATRIX(0), 16); for (i = 0; i < 16; i++) { - OUT_RINGf (projectionmatrix[i]); + OUT_RINGf (chan, projectionmatrix[i]); } - BEGIN_RING(kelvin, NV20TCL_DEPTH_RANGE_NEAR, 2); - OUT_RINGf (0.0); - OUT_RINGf (16777216.0); /* [0, 1] scaled approx to [0, 2^24] */ + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_RANGE_NEAR, 2); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 16777216.0); /* [0, 1] scaled approx to [0, 2^24] */ - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); - OUT_RINGf (0.0); /* x-offset, w/2 + 1.031250 */ - OUT_RINGf (0.0); /* y-offset, h/2 + 0.030762 */ - OUT_RINGf (0.0); - OUT_RINGf (16777215.0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); + OUT_RINGf (chan, 0.0); /* x-offset, w/2 + 1.031250 */ + OUT_RINGf (chan, 0.0); /* y-offset, h/2 + 0.030762 */ + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 16777215.0); - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE_X, 4); - OUT_RINGf (0.0); /* no effect?, w/2 */ - OUT_RINGf (0.0); /* no effect?, h/2 */ - OUT_RINGf (16777215.0 * 0.5); - OUT_RINGf (65535.0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_SCALE_X, 4); + OUT_RINGf (chan, 0.0); /* no effect?, w/2 */ + OUT_RINGf (chan, 0.0); /* no effect?, h/2 */ + OUT_RINGf (chan, 16777215.0 * 0.5); + OUT_RINGf (chan, 65535.0); - FIRE_RING (NULL); + FIRE_RING (chan); } struct pipe_context * diff --git a/src/gallium/drivers/nv20/nv20_context.h b/src/gallium/drivers/nv20/nv20_context.h index a4eaa95660..c7dfadaa31 100644 --- a/src/gallium/drivers/nv20/nv20_context.h +++ b/src/gallium/drivers/nv20/nv20_context.h @@ -15,10 +15,6 @@ #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv20_screen *ctx = nv20->screen -#include "nouveau/nouveau_push.h" - #include "nv20_state.h" #define NOUVEAU_ERR(fmt, args...) \ @@ -143,9 +139,9 @@ extern void nv20_emit_hw_state(struct nv20_context *nv20); extern void nv20_state_tex_update(struct nv20_context *nv20); /* nv20_vbo.c */ -extern boolean nv20_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv20_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv20_draw_elements( struct pipe_context *pipe, +extern void nv20_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count); diff --git a/src/gallium/drivers/nv20/nv20_fragtex.c b/src/gallium/drivers/nv20/nv20_fragtex.c index 2db4a4015a..dedbec73f3 100644 --- a/src/gallium/drivers/nv20/nv20_fragtex.c +++ b/src/gallium/drivers/nv20/nv20_fragtex.c @@ -52,6 +52,9 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit) struct nv20_miptree *nv20mt = nv20->tex_miptree[unit]; struct pipe_texture *pt = &nv20mt->base; struct nv20_texture_format *tf; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; uint32_t txf, txs, txp; tf = nv20_fragtex_format(pt->format); @@ -82,15 +85,15 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit) return; } - BEGIN_RING(kelvin, NV10TCL_TX_OFFSET(unit), 8); - OUT_RELOCl(nv20mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(nv20mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); - OUT_RING (ps->wrap); - OUT_RING (0x40000000); /* enable */ - OUT_RING (txs); - OUT_RING (ps->filt | 0x2000 /* magic */); - OUT_RING ((pt->width0 << 16) | pt->height0); - OUT_RING (ps->bcol); + BEGIN_RING(chan, kelvin, NV10TCL_TX_OFFSET(unit), 8); + OUT_RELOCl(chan, nouveau_bo(nv20mt->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(chan, nouveau_bo(nv20mt->buffer),txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RING (chan, ps->wrap); + OUT_RING (chan, 0x40000000); /* enable */ + OUT_RING (chan, txs); + OUT_RING (chan, ps->filt | 0x2000 /* magic */); + OUT_RING (chan, (pt->width0 << 16) | pt->height0); + OUT_RING (chan, ps->bcol); #endif } @@ -99,6 +102,9 @@ nv20_fragtex_bind(struct nv20_context *nv20) { #if 0 struct nv20_fragment_program *fp = nv20->fragprog.active; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; unsigned samplers, unit; samplers = nv20->fp_samplers & ~fp->samplers; @@ -106,8 +112,8 @@ nv20_fragtex_bind(struct nv20_context *nv20) unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - BEGIN_RING(kelvin, NV10TCL_TX_ENABLE(unit), 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV10TCL_TX_ENABLE(unit), 1); + OUT_RING (chan, 0); } samplers = nv20->dirty_samplers & fp->samplers; diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c index d1291a92e0..8f7538e7f5 100644 --- a/src/gallium/drivers/nv20/nv20_miptree.c +++ b/src/gallium/drivers/nv20/nv20_miptree.c @@ -6,6 +6,7 @@ #include "nv20_context.h" #include "nv20_screen.h" +#include "../nv04/nv04_surface_2d.h" static void nv20_miptree_layout(struct nv20_miptree *nv20mt) @@ -127,6 +128,12 @@ nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear. + * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy. + * This also happens for small mipmaps of large textures. */ + if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + nv20_miptree_layout(mt); mt->buffer = screen->buffer_create(screen, 256, buf_usage, mt->total_size); @@ -183,12 +190,27 @@ nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, ns->base.offset = nv20mt->level[level].image_offset[0]; } + /* create a linear temporary that we can render into if necessary. + * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so + * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/ + if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE) + return &nv04_surface_wrap_for_render(screen, ((struct nv20_screen*)screen)->eng2d, ns)->base; + return &ns->base; } static void nv20_miptree_surface_destroy(struct pipe_surface *ps) { + struct nv04_surface* ns = (struct nv04_surface*)ps; + if(ns->backing) + { + struct nv20_screen* screen = (struct nv20_screen*)ps->texture->screen; + if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE) + screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); + nv20_miptree_surface_destroy(&ns->backing->base); + } + pipe_texture_reference(&ps->texture, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv20/nv20_prim_vbuf.c b/src/gallium/drivers/nv20/nv20_prim_vbuf.c index ddfcdb8057..2e145672da 100644 --- a/src/gallium/drivers/nv20/nv20_prim_vbuf.c +++ b/src/gallium/drivers/nv20/nv20_prim_vbuf.c @@ -81,12 +81,15 @@ nv20_vbuf_render(struct vbuf_render *render) void nv20_vtxbuf_bind( struct nv20_context* nv20 ) { #if 0 + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; int i; for(i = 0; i < NV20TCL_VTXBUF_ADDRESS__SIZE; i++) { - BEGIN_RING(kelvin, NV20TCL_VTXBUF_ADDRESS(i), 1); - OUT_RING(0/*nv20->vtxbuf*/); - BEGIN_RING(kelvin, NV20TCL_VTXFMT(i) ,1); - OUT_RING(0/*XXX*/); + BEGIN_RING(chan, kelvin, NV20TCL_VTXBUF_ADDRESS(i), 1); + OUT_RING(chan, 0/*nv20->vtxbuf*/); + BEGIN_RING(chan, kelvin, NV20TCL_VTXFMT(i) ,1); + OUT_RING(chan, 0/*XXX*/); } #endif } @@ -202,6 +205,9 @@ nv20__vtxhwformat(unsigned stride, unsigned fields, unsigned type) static unsigned nv20__emit_format(struct nv20_context *nv20, enum attrib_emit type, int hwattr) { + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; uint32_t hwfmt = 0; unsigned fields; @@ -231,8 +237,8 @@ nv20__emit_format(struct nv20_context *nv20, enum attrib_emit type, int hwattr) return 0; } - BEGIN_RING(kelvin, NV20TCL_VTXFMT(hwattr), 1); - OUT_RING(hwfmt); + BEGIN_RING(chan, kelvin, NV20TCL_VTXFMT(hwattr), 1); + OUT_RING(chan, hwfmt); return fields; } @@ -262,6 +268,9 @@ nv20__draw_mbuffer(struct nv20_vbuf_render *nv20_render, uint nr_indices) { struct nv20_context *nv20 = nv20_render->nv20; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; struct vertex_info *vinfo = &nv20->vertex_info; unsigned nr_fields; int max_push; @@ -270,29 +279,29 @@ nv20__draw_mbuffer(struct nv20_vbuf_render *nv20_render, nr_fields = nv20__emit_vertex_array_format(nv20); - BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1); - OUT_RING(nv20_render->hwprim); + BEGIN_RING(chan, kelvin, NV20TCL_VERTEX_BEGIN_END, 1); + OUT_RING(chan, nv20_render->hwprim); max_push = 1200 / nr_fields; while (nr_indices) { int i; int push = MIN2(nr_indices, max_push); - BEGIN_RING_NI(kelvin, NV20TCL_VERTEX_DATA, push * nr_fields); + BEGIN_RING_NI(chan, kelvin, NV20TCL_VERTEX_DATA, push * nr_fields); for (i = 0; i < push; i++) { /* XXX: fixme to handle other than floats? */ int f = nr_fields; float *attrv = (float*)&data[indices[i] * vsz]; while (f-- > 0) - OUT_RINGf(*attrv++); + OUT_RINGf(chan, *attrv++); } nr_indices -= push; indices += push; } - BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1); - OUT_RING(NV20TCL_VERTEX_BEGIN_END_STOP); + BEGIN_RING(chan, kelvin, NV20TCL_VERTEX_BEGIN_END, 1); + OUT_RING(chan, NV20TCL_VERTEX_BEGIN_END_STOP); } static void @@ -301,20 +310,23 @@ nv20__draw_pbuffer(struct nv20_vbuf_render *nv20_render, uint nr_indices) { struct nv20_context *nv20 = nv20_render->nv20; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; int push, i; NOUVEAU_ERR("nv20__draw_pbuffer: this path is broken.\n"); - BEGIN_RING(kelvin, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); - OUT_RELOCl(nv20_render->pbuffer, 0, + BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); + OUT_RELOCl(chan, nouveau_bo(nv20_render->pbuffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); - OUT_RING(nv20_render->hwprim); + BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING(chan, nv20_render->hwprim); if (nr_indices & 1) { - BEGIN_RING(kelvin, NV10TCL_VB_ELEMENT_U32, 1); - OUT_RING (indices[0]); + BEGIN_RING(chan, kelvin, NV10TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, indices[0]); indices++; nr_indices--; } @@ -322,16 +334,16 @@ nv20__draw_pbuffer(struct nv20_vbuf_render *nv20_render, // XXX too big/small ? check the size push = MIN2(nr_indices, 1200 * 2); - BEGIN_RING_NI(kelvin, NV10TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, kelvin, NV10TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((indices[i+1] << 16) | indices[i]); + OUT_RING(chan, (indices[i+1] << 16) | indices[i]); nr_indices -= push; indices += push; } - BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING (chan, 0); } static void diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c index 4eeacd1afd..d091335063 100644 --- a/src/gallium/drivers/nv20/nv20_screen.c +++ b/src/gallium/drivers/nv20/nv20_screen.c @@ -115,6 +115,9 @@ nv20_screen_destroy(struct pipe_screen *pscreen) nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->kelvin); + nv04_surface_2d_takedown(&screen->eng2d); + + nouveau_screen_fini(&screen->base); FREE(pscreen); } @@ -173,7 +176,6 @@ nv20_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) NOUVEAU_ERR("Error creating 3D object: %d\n", ret); return FALSE; } - BIND_RING(chan, screen->kelvin, 7); /* 2D engine setup */ screen->eng2d = nv04_surface_2d_init(&screen->base); diff --git a/src/gallium/drivers/nv20/nv20_state_emit.c b/src/gallium/drivers/nv20/nv20_state_emit.c index 0122b1c2cd..6bbd1fdae9 100644 --- a/src/gallium/drivers/nv20/nv20_state_emit.c +++ b/src/gallium/drivers/nv20/nv20_state_emit.c @@ -5,27 +5,34 @@ static void nv20_state_emit_blend(struct nv20_context* nv20) { struct nv20_blend_state *b = nv20->blend; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; - BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1); - OUT_RING (b->d_enable); + BEGIN_RING(chan, kelvin, NV20TCL_DITHER_ENABLE, 1); + OUT_RING (chan, b->d_enable); - BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); - OUT_RING (b->b_enable); + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); + OUT_RING (chan, b->b_enable); - BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 2); - OUT_RING (b->b_srcfunc); - OUT_RING (b->b_dstfunc); + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_SRC, 2); + OUT_RING (chan, b->b_srcfunc); + OUT_RING (chan, b->b_dstfunc); - BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1); - OUT_RING (b->c_mask); + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_MASK, 1); + OUT_RING (chan, b->c_mask); } static void nv20_state_emit_blend_color(struct nv20_context* nv20) { struct pipe_blend_color *c = nv20->blend_color; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; - BEGIN_RING(kelvin, NV20TCL_BLEND_COLOR, 1); - OUT_RING ((float_to_ubyte(c->color[3]) << 24)| + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_COLOR, 1); + OUT_RING (chan, + (float_to_ubyte(c->color[3]) << 24)| (float_to_ubyte(c->color[0]) << 16)| (float_to_ubyte(c->color[1]) << 8) | (float_to_ubyte(c->color[2]) << 0)); @@ -34,63 +41,69 @@ static void nv20_state_emit_blend_color(struct nv20_context* nv20) static void nv20_state_emit_rast(struct nv20_context* nv20) { struct nv20_rasterizer_state *r = nv20->rast; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; - BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 2); - OUT_RING (r->shade_model); - OUT_RING (r->line_width); + BEGIN_RING(chan, kelvin, NV20TCL_SHADE_MODEL, 2); + OUT_RING (chan, r->shade_model); + OUT_RING (chan, r->line_width); - BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1); - OUT_RING (r->point_size); + BEGIN_RING(chan, kelvin, NV20TCL_POINT_SIZE, 1); + OUT_RING (chan, r->point_size); - BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (r->poly_mode_front); - OUT_RING (r->poly_mode_back); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (chan, r->poly_mode_front); + OUT_RING (chan, r->poly_mode_back); - BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2); - OUT_RING (r->cull_face); - OUT_RING (r->front_face); + BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE, 2); + OUT_RING (chan, r->cull_face); + OUT_RING (chan, r->front_face); - BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 2); - OUT_RING (r->line_smooth_en); - OUT_RING (r->poly_smooth_en); + BEGIN_RING(chan, kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 2); + OUT_RING (chan, r->line_smooth_en); + OUT_RING (chan, r->poly_smooth_en); - BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1); - OUT_RING (r->cull_face_en); + BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE_ENABLE, 1); + OUT_RING (chan, r->cull_face_en); } static void nv20_state_emit_dsa(struct nv20_context* nv20) { struct nv20_depth_stencil_alpha_state *d = nv20->dsa; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; - BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1); - OUT_RING (d->depth.func); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_FUNC, 1); + OUT_RING (chan, d->depth.func); - BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); - OUT_RING (d->depth.write_enable); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (chan, d->depth.write_enable); - BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); - OUT_RING (d->depth.test_enable); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (chan, d->depth.test_enable); - BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1); - OUT_RING (1); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_UNK17D8, 1); + OUT_RING (chan, 1); #if 0 - BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1); - OUT_RING (d->stencil.enable); - BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7); - OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7); + BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_ENABLE, 1); + OUT_RING (chan, d->stencil.enable); + BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_MASK, 7); + OUT_RINGp (chan, (uint32_t *)&(d->stencil.wmask), 7); #endif - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (d->alpha.enabled); + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (chan, d->alpha.enabled); - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 1); - OUT_RING (d->alpha.func); + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_FUNC, 1); + OUT_RING (chan, d->alpha.func); - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_REF, 1); - OUT_RING (d->alpha.ref); + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_REF, 1); + OUT_RING (chan, d->alpha.ref); } static void nv20_state_emit_viewport(struct nv20_context* nv20) @@ -101,9 +114,13 @@ static void nv20_state_emit_scissor(struct nv20_context* nv20) { /* NV20TCL_SCISSOR_* is probably a software method */ /* struct pipe_scissor_state *s = nv20->scissor; - BEGIN_RING(kelvin, NV20TCL_SCISSOR_HORIZ, 2); - OUT_RING (((s->maxx - s->minx) << 16) | s->minx); - OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/ + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; + + BEGIN_RING(chan, kelvin, NV20TCL_SCISSOR_HORIZ, 2); + OUT_RING (chan, ((s->maxx - s->minx) << 16) | s->minx); + OUT_RING (chan, ((s->maxy - s->miny) << 16) | s->miny);*/ } static void nv20_state_emit_framebuffer(struct nv20_context* nv20) @@ -113,6 +130,9 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20) uint32_t rt_format, w, h; int colour_format = 0, zeta_format = 0; struct nv20_miptree *nv20mt = 0; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; w = fb->cbufs[0]->width; h = fb->cbufs[0]->height; @@ -150,11 +170,11 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20) } if (zeta) { - BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1); - OUT_RING (rt->pitch | (zeta->pitch << 16)); + BEGIN_RING(chan, kelvin, NV20TCL_RT_PITCH, 1); + OUT_RING (chan, rt->pitch | (zeta->pitch << 16)); } else { - BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1); - OUT_RING (rt->pitch | (rt->pitch << 16)); + BEGIN_RING(chan, kelvin, NV20TCL_RT_PITCH, 1); + OUT_RING (chan, rt->pitch | (rt->pitch << 16)); } nv20mt = (struct nv20_miptree *)rt->base.texture; @@ -166,13 +186,13 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20) nv20->zeta = nv20mt->buffer; } - BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 3); - OUT_RING ((w << 16) | 0); - OUT_RING ((h << 16) | 0); /*NV20TCL_RT_VERT */ - OUT_RING (rt_format); /* NV20TCL_RT_FORMAT */ - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 2); - OUT_RING (((w - 1) << 16) | 0); - OUT_RING (((h - 1) << 16) | 0); + BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 3); + OUT_RING (chan, (w << 16) | 0); + OUT_RING (chan, (h << 16) | 0); /*NV20TCL_RT_VERT */ + OUT_RING (chan, rt_format); /* NV20TCL_RT_FORMAT */ + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 2); + OUT_RING (chan, ((w - 1) << 16) | 0); + OUT_RING (chan, ((h - 1) << 16) | 0); } static void nv20_vertex_layout(struct nv20_context *nv20) @@ -228,7 +248,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20) } /* always do position */ { - src = draw_find_vs_output(dc, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_POSITION, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src); vinfo->hwfmt[0] |= (1 << 0); } @@ -237,19 +257,19 @@ static void nv20_vertex_layout(struct nv20_context *nv20) for (i = 4; i < 6; i++) { if (!generics[i]) continue; - src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << (i - 3)); } if (colors[0]) { - src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 0); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_COLOR, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); vinfo->hwfmt[0] |= (1 << 3); } if (colors[1]) { - src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 1); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_COLOR, 1); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); vinfo->hwfmt[0] |= (1 << 4); } @@ -258,7 +278,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20) for (i = 6; i < 10; i++) { if (!generics[i]) continue; - src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << (i - 1)); } @@ -267,7 +287,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20) for (i = 0; i < 4; i++) { if (!generics[i]) continue; - src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << (i + 9)); } @@ -276,13 +296,13 @@ static void nv20_vertex_layout(struct nv20_context *nv20) for (i = 10; i < 12; i++) { if (!generics[i]) continue; - src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << (i + 3)); } if (fog) { - src = draw_find_vs_output(dc, TGSI_SEMANTIC_FOG, 0); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_FOG, 0); draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << 15); } @@ -293,6 +313,10 @@ static void nv20_vertex_layout(struct nv20_context *nv20) void nv20_emit_hw_state(struct nv20_context *nv20) { + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; + struct nouveau_bo *rt_bo; int i; if (nv20->dirty & NV20_NEW_VERTPROG) { @@ -361,36 +385,39 @@ nv20_emit_hw_state(struct nv20_context *nv20) */ /* Render target */ - BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 1); - OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1); - OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + rt_bo = nouveau_bo(nv20->rt[0]); + BEGIN_RING(chan, kelvin, NV20TCL_DMA_COLOR, 1); + OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_OFFSET, 1); + OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); if (nv20->zeta) { - BEGIN_RING(kelvin, NV20TCL_DMA_ZETA, 1); - OUT_RELOCo(nv20->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(kelvin, NV20TCL_ZETA_OFFSET, 1); - OUT_RELOCl(nv20->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + struct nouveau_bo *zeta_bo = nouveau_bo(nv20->zeta); + BEGIN_RING(chan, kelvin, NV20TCL_DMA_ZETA, 1); + OUT_RELOCo(chan, zeta_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, kelvin, NV20TCL_ZETA_OFFSET, 1); + OUT_RELOCl(chan, zeta_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); /* XXX for when we allocate LMA on nv17 */ -/* BEGIN_RING(kelvin, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); - OUT_RELOCl(nv20->zeta + lma_offset);*/ +/* BEGIN_RING(chan, kelvin, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); + OUT_RELOCl(chan, nouveau_bo(nv20->zeta + lma_offset));*/ } /* Vertex buffer */ - BEGIN_RING(kelvin, NV20TCL_DMA_VTXBUF0, 1); - OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1); - OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, kelvin, NV20TCL_DMA_VTXBUF0, 1); + OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_OFFSET, 1); + OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); /* Texture images */ for (i = 0; i < 2; i++) { if (!(nv20->fp_samplers & (1 << i))) continue; - BEGIN_RING(kelvin, NV20TCL_TX_OFFSET(i), 1); - OUT_RELOCl(nv20->tex[i].buffer, 0, NOUVEAU_BO_VRAM | + struct nouveau_bo *bo = nouveau_bo(nv20->tex[i].buffer); + BEGIN_RING(chan, kelvin, NV20TCL_TX_OFFSET(i), 1); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_RING(kelvin, NV20TCL_TX_FORMAT(i), 1); - OUT_RELOCd(nv20->tex[i].buffer, nv20->tex[i].format, + BEGIN_RING(chan, kelvin, NV20TCL_TX_FORMAT(i), 1); + OUT_RELOCd(chan, bo, nv20->tex[i].format, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_OR, NV20TCL_TX_FORMAT_DMA0, NV20TCL_TX_FORMAT_DMA1); diff --git a/src/gallium/drivers/nv20/nv20_transfer.c b/src/gallium/drivers/nv20/nv20_transfer.c index 69b79c809f..699773e8e6 100644 --- a/src/gallium/drivers/nv20/nv20_transfer.c +++ b/src/gallium/drivers/nv20/nv20_transfer.c @@ -16,14 +16,14 @@ struct nv20_transfer { }; static void -nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width0 = u_minify(pt->width0, level); - template->height0 = u_minify(pt->height0, level); + template->width0 = width; + template->height0 = height; template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -71,7 +71,7 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv20_compatible_transfer_tex(pt, level, &tx_tex_template); + nv20_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -80,6 +80,8 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv20_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, face, level, zslice, pipe_transfer_buffer_flags(&tx->base)); @@ -105,8 +107,8 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -126,13 +128,13 @@ nv20_transfer_del(struct pipe_transfer *ptx) dst = pscreen->get_tex_surface(pscreen, ptx->texture, ptx->face, ptx->level, ptx->zslice, - PIPE_BUFFER_USAGE_GPU_WRITE); + PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER); /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -151,8 +153,10 @@ nv20_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv20/nv20_vbo.c b/src/gallium/drivers/nv20/nv20_vbo.c index 84d7db6c5e..52991a0d85 100644 --- a/src/gallium/drivers/nv20/nv20_vbo.c +++ b/src/gallium/drivers/nv20/nv20_vbo.c @@ -9,7 +9,7 @@ #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_pushbuf.h" -boolean nv20_draw_elements( struct pipe_context *pipe, +void nv20_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count) @@ -45,7 +45,7 @@ boolean nv20_draw_elements( struct pipe_context *pipe, draw_set_mapped_element_buffer(draw, 0, NULL); } - draw_set_mapped_constant_buffer(draw, + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, nv20->constbuf[PIPE_SHADER_VERTEX], nv20->constbuf_nr[PIPE_SHADER_VERTEX]); @@ -67,13 +67,12 @@ boolean nv20_draw_elements( struct pipe_context *pipe, } draw_flush(nv20->draw); - return TRUE; } -boolean nv20_draw_arrays( struct pipe_context *pipe, +void nv20_draw_arrays( struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) { - return nv20_draw_elements(pipe, NULL, 0, prim, start, count); + nv20_draw_elements(pipe, NULL, 0, prim, start, count); } diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index 46a821a48b..54572e9ab3 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -10,21 +10,32 @@ nv30_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - BEGIN_RING(rankine, 0x1fd8, 1); - OUT_RING (2); - BEGIN_RING(rankine, 0x1fd8, 1); - OUT_RING (1); + BEGIN_RING(chan, rankine, 0x1fd8, 1); + OUT_RING (chan, 2); + BEGIN_RING(chan, rankine, 0x1fd8, 1); + OUT_RING (chan, 1); } - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void nv30_destroy(struct pipe_context *pipe) { struct nv30_context *nv30 = nv30_context(pipe); + unsigned i; + + for (i = 0; i < NV30_STATE_MAX; i++) { + if (nv30->state.hw[i]) + so_ref(NULL, &nv30->state.hw[i]); + } if (nv30->draw) draw_destroy(nv30->draw); diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 864ddaeb59..e59449287b 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -14,10 +14,6 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" - -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv30_screen *ctx = nv30->screen -#include "nouveau/nouveau_push.h" #include "nouveau/nouveau_stateobj.h" #include "nv30_state.h" @@ -198,9 +194,9 @@ extern struct nv30_state_entry nv30_state_fragtex; extern struct nv30_state_entry nv30_state_vbo; /* nv30_vbo.c */ -extern boolean nv30_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv30_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv30_draw_elements(struct pipe_context *pipe, +extern void nv30_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 40965a9772..2d565cb631 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -435,10 +435,11 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_CMP: - tmp = temp(fpc); - arith(fpc, sat, MOV, dst, mask, src[2], none, none); + tmp = nv30_sr(NV30SR_NONE, 0); tmp.cc_update = 1; arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); + dst.cc_test = NV30_VP_INST_COND_GE; + arith(fpc, sat, MOV, dst, mask, src[2], none, none); dst.cc_test = NV30_VP_INST_COND_LT; arith(fpc, sat, MOV, dst, mask, src[1], none, none); break; @@ -517,13 +518,28 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none); break; case TGSI_OPCODE_SCS: - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, - swz(src[0], X, X, X, X), none, none); + /* avoid overwriting the source */ + if(src[0].swz[SWZ_X] != SWZ_X) + { + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } } - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, - swz(src[0], X, X, X, X), none, none); + else + { + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } } break; case TGSI_OPCODE_SIN: @@ -821,7 +837,7 @@ nv30_fragprog_validate(struct nv30_context *nv30) fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4); nv30_fragprog_upload(nv30, fp); - so = so_new(8, 1); + so = so_new(4, 4, 1); so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1); so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | @@ -870,6 +886,12 @@ void nv30_fragprog_destroy(struct nv30_context *nv30, struct nv30_fragment_program *fp) { + if (fp->buffer) + pipe_buffer_reference(&fp->buffer, NULL); + + if (fp->so) + so_ref(NULL, &fp->so); + if (fp->insn_len) FREE(fp->insn); } diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index b3293ee700..9893567891 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -106,7 +106,7 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) txs = tf->swizzle; - so = so_new(16, 2); + so = so_new(1, 8, 2); so_method(so, nv30->screen->rankine, NV34TCL_TX_OFFSET(unit), 8); so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR, @@ -135,7 +135,7 @@ nv30_fragtex_validate(struct nv30_context *nv30) unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, nv30->screen->rankine, NV34TCL_TX_ENABLE(unit), 1); so_data (so, 0); so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]); diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index ce95d9700f..8fbba38e78 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -5,6 +5,7 @@ #include "util/u_math.h" #include "nv30_context.h" +#include "../nv04/nv04_surface_2d.h" static void nv30_miptree_layout(struct nv30_miptree *nv30mt) @@ -108,6 +109,12 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear. + * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy. + * This also happens for small mipmaps of large textures. */ + if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + nv30_miptree_layout(mt); mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage, @@ -196,12 +203,27 @@ nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ns->base.offset = nv30mt->level[level].image_offset[0]; } + /* create a linear temporary that we can render into if necessary. + * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so + * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/ + if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE) + return &nv04_surface_wrap_for_render(pscreen, ((struct nv30_screen*)pscreen)->eng2d, ns)->base; + return &ns->base; } static void nv30_miptree_surface_del(struct pipe_surface *ps) { + struct nv04_surface* ns = (struct nv04_surface*)ps; + if(ns->backing) + { + struct nv30_screen* screen = (struct nv30_screen*)ps->texture->screen; + if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE) + screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); + nv30_miptree_surface_del(&ns->backing->base); + } + pipe_texture_reference(&ps->texture, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv30/nv30_query.c b/src/gallium/drivers/nv30/nv30_query.c index 1d1c8a484e..e27e9ccbf6 100644 --- a/src/gallium/drivers/nv30/nv30_query.c +++ b/src/gallium/drivers/nv30/nv30_query.c @@ -41,6 +41,9 @@ nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq) { struct nv30_context *nv30 = nv30_context(pipe); struct nv30_query *q = nv30_query(pq); + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); @@ -57,10 +60,10 @@ nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq) assert(0); nouveau_notifier_reset(nv30->screen->query, q->object->start); - BEGIN_RING(rankine, NV34TCL_QUERY_RESET, 1); - OUT_RING (1); - BEGIN_RING(rankine, NV34TCL_QUERY_UNK17CC, 1); - OUT_RING (1); + BEGIN_RING(chan, rankine, NV34TCL_QUERY_RESET, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, rankine, NV34TCL_QUERY_UNK17CC, 1); + OUT_RING (chan, 1); q->ready = FALSE; } @@ -69,12 +72,15 @@ static void nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq) { struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; struct nv30_query *q = nv30_query(pq); - BEGIN_RING(rankine, NV34TCL_QUERY_GET, 1); - OUT_RING ((0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) | + BEGIN_RING(chan, rankine, NV34TCL_QUERY_GET, 1); + OUT_RING (chan, (0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) | ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT)); - FIRE_RING(NULL); + FIRE_RING(chan); } static boolean diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 7cd36902eb..9ed48178dc 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -156,6 +156,12 @@ static void nv30_screen_destroy(struct pipe_screen *pscreen) { struct nv30_screen *screen = nv30_screen(pscreen); + unsigned i; + + for (i = 0; i < NV30_STATE_MAX; i++) { + if (screen->state[i]) + so_ref(NULL, &screen->state[i]); + } nouveau_resource_free(&screen->vp_exec_heap); nouveau_resource_free(&screen->vp_data_heap); @@ -163,6 +169,9 @@ nv30_screen_destroy(struct pipe_screen *pscreen) nouveau_notifier_free(&screen->query); nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->rankine); + nv04_surface_2d_takedown(&screen->eng2d); + + nouveau_screen_fini(&screen->base); FREE(pscreen); } @@ -224,7 +233,6 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) NOUVEAU_ERR("Error creating 3D object: %d\n", ret); return FALSE; } - BIND_RING(chan, screen->rankine, 7); /* 2D engine setup */ screen->eng2d = nv04_surface_2d_init(&screen->base); @@ -261,7 +269,7 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) } /* Static rankine initialisation */ - so = so_new(128, 0); + so = so_new(36, 60, 0); so_method(so, screen->rankine, NV34TCL_DMA_NOTIFY, 1); so_data (so, screen->sync->handle); so_method(so, screen->rankine, NV34TCL_DMA_TEXTURE0, 2); diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 268d3a8ab8..065c927a10 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -14,7 +14,7 @@ nv30_blend_state_create(struct pipe_context *pipe, struct nv30_context *nv30 = nv30_context(pipe); struct nouveau_grobj *rankine = nv30->screen->rankine; struct nv30_blend_state *bso = CALLOC(1, sizeof(*bso)); - struct nouveau_stateobj *so = so_new(16, 0); + struct nouveau_stateobj *so = so_new(5, 8, 0); if (cso->blend_enable) { so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 3); @@ -300,7 +300,7 @@ nv30_rasterizer_state_create(struct pipe_context *pipe, { struct nv30_context *nv30 = nv30_context(pipe); struct nv30_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); - struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_stateobj *so = so_new(9, 19, 0); struct nouveau_grobj *rankine = nv30->screen->rankine; /*XXX: ignored: @@ -435,7 +435,7 @@ nv30_depth_stencil_alpha_state_create(struct pipe_context *pipe, { struct nv30_context *nv30 = nv30_context(pipe); struct nv30_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); - struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_stateobj *so = so_new(5, 21, 0); struct nouveau_grobj *rankine = nv30->screen->rankine; so_method(so, rankine, NV34TCL_DEPTH_FUNC, 3); diff --git a/src/gallium/drivers/nv30/nv30_state_blend.c b/src/gallium/drivers/nv30/nv30_state_blend.c index 64cf9ae93a..c36d58c040 100644 --- a/src/gallium/drivers/nv30/nv30_state_blend.c +++ b/src/gallium/drivers/nv30/nv30_state_blend.c @@ -18,7 +18,7 @@ struct nv30_state_entry nv30_state_blend = { static boolean nv30_state_blend_colour_validate(struct nv30_context *nv30) { - struct nouveau_stateobj *so = so_new(2, 0); + struct nouveau_stateobj *so = so_new(1, 1, 0); struct pipe_blend_color *bcol = &nv30->blend_colour; so_method(so, nv30->screen->rankine, NV34TCL_BLEND_COLOR, 1); diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c index 6f6d1740d6..2ed2ea55e8 100644 --- a/src/gallium/drivers/nv30/nv30_state_fb.c +++ b/src/gallium/drivers/nv30/nv30_state_fb.c @@ -10,7 +10,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) struct nv04_surface *rt[2], *zeta = NULL; uint32_t rt_enable = 0, rt_format = 0; int i, colour_format = 0, zeta_format = 0, depth_only = 0; - struct nouveau_stateobj *so = so_new(64, 10); + struct nouveau_stateobj *so = so_new(12, 18, 10); unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; unsigned w = fb->width; unsigned h = fb->height; diff --git a/src/gallium/drivers/nv30/nv30_state_scissor.c b/src/gallium/drivers/nv30/nv30_state_scissor.c index 3ac7a8471e..ba61a9e24a 100644 --- a/src/gallium/drivers/nv30/nv30_state_scissor.c +++ b/src/gallium/drivers/nv30/nv30_state_scissor.c @@ -12,7 +12,7 @@ nv30_state_scissor_validate(struct nv30_context *nv30) return FALSE; nv30->state.scissor_enabled = rast->scissor; - so = so_new(3, 0); + so = so_new(1, 2, 0); so_method(so, nv30->screen->rankine, NV34TCL_SCISSOR_HORIZ, 2); if (nv30->state.scissor_enabled) { so_data (so, ((s->maxx - s->minx) << 16) | s->minx); diff --git a/src/gallium/drivers/nv30/nv30_state_stipple.c b/src/gallium/drivers/nv30/nv30_state_stipple.c index d0c791ac08..ed520a4f43 100644 --- a/src/gallium/drivers/nv30/nv30_state_stipple.c +++ b/src/gallium/drivers/nv30/nv30_state_stipple.c @@ -14,14 +14,14 @@ nv30_state_stipple_validate(struct nv30_context *nv30) if (rast->poly_stipple_enable) { unsigned i; - so = so_new(35, 0); + so = so_new(2, 33, 0); so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); so_data (so, 1); so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32); for (i = 0; i < 32; i++) so_data(so, nv30->stipple[i]); } else { - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); so_data (so, 0); } diff --git a/src/gallium/drivers/nv30/nv30_state_viewport.c b/src/gallium/drivers/nv30/nv30_state_viewport.c index c3eb413dac..2d7781292b 100644 --- a/src/gallium/drivers/nv30/nv30_state_viewport.c +++ b/src/gallium/drivers/nv30/nv30_state_viewport.c @@ -19,7 +19,7 @@ nv30_state_viewport_validate(struct nv30_context *nv30) return FALSE; nv30->state.viewport_bypass = bypass; - so = so_new(11, 0); + so = so_new(3, 10, 0); if (!bypass) { so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_TRANSLATE_X, 8); diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c index 2255a02cae..65598991c6 100644 --- a/src/gallium/drivers/nv30/nv30_transfer.c +++ b/src/gallium/drivers/nv30/nv30_transfer.c @@ -16,14 +16,14 @@ struct nv30_transfer { }; static void -nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width0 = u_minify(pt->width0, level); - template->height0 = u_minify(pt->height0, level); + template->width0 = width; + template->height0 = height; template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -71,7 +71,7 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv30_compatible_transfer_tex(pt, level, &tx_tex_template); + nv30_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -80,6 +80,8 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv30_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, 0, 0, 0, pipe_transfer_buffer_flags(&tx->base)); @@ -105,8 +107,8 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -126,13 +128,13 @@ nv30_transfer_del(struct pipe_transfer *ptx) dst = pscreen->get_tex_surface(pscreen, ptx->texture, ptx->face, ptx->level, ptx->zslice, - PIPE_BUFFER_USAGE_GPU_WRITE); + PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER); /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -151,8 +153,10 @@ nv30_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c index e32b8141af..1c5db03ea2 100644 --- a/src/gallium/drivers/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nv30/nv30_vbo.c @@ -163,19 +163,21 @@ nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so, return TRUE; } -boolean +void nv30_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv30_context *nv30 = nv30_context(pipe); - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; unsigned restart = 0; nv30_vbo_set_idxbuf(nv30, NULL, 0); if (FORCE_SWTNL || !nv30_state_validate(nv30)) { /*return nv30_draw_elements_swtnl(pipe, NULL, 0, mode, start, count);*/ - return FALSE; + return; } while (count) { @@ -186,17 +188,17 @@ nv30_draw_arrays(struct pipe_context *pipe, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, mode, start, count, &restart); if (!vc) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(rankine, NV34TCL_VB_VERTEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); + BEGIN_RING(chan, rankine, NV34TCL_VB_VERTEX_BATCH, 1); + OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -206,15 +208,15 @@ nv30_draw_arrays(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(rankine, NV34TCL_VB_VERTEX_BATCH, push); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_VERTEX_BATCH, push); while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); + OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); count -= vc; start = restart; @@ -228,7 +230,9 @@ static INLINE void nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; while (count) { uint8_t *elts = (uint8_t *)ib + start; @@ -239,17 +243,17 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); + BEGIN_RING(chan, rankine, NV34TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, elts[0]); elts++; vc--; } @@ -258,16 +262,16 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); + OUT_RING(chan, (elts[i+1] << 16) | elts[i]); vc -= push; elts += push; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } @@ -277,7 +281,9 @@ static INLINE void nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; while (count) { uint16_t *elts = (uint16_t *)ib + start; @@ -288,17 +294,17 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); + BEGIN_RING(chan, rankine, NV34TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, elts[0]); elts++; vc--; } @@ -307,16 +313,16 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); + OUT_RING(chan, (elts[i+1] << 16) | elts[i]); vc -= push; elts += push; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } @@ -326,7 +332,9 @@ static INLINE void nv30_draw_elements_u32(struct nv30_context *nv30, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; while (count) { uint32_t *elts = (uint32_t *)ib + start; @@ -337,32 +345,32 @@ nv30_draw_elements_u32(struct nv30_context *nv30, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); while (vc) { push = MIN2(vc, 2047); - BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U32, push); - OUT_RINGp (elts, push); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U32, push); + OUT_RINGp (chan, elts, push); vc -= push; elts += push; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } } -static boolean +static void nv30_draw_elements_inline(struct pipe_context *pipe, struct pipe_buffer *ib, unsigned ib_size, unsigned mode, unsigned start, unsigned count) @@ -393,15 +401,16 @@ nv30_draw_elements_inline(struct pipe_context *pipe, } pipe_buffer_unmap(pscreen, ib); - return TRUE; } -static boolean +static void nv30_draw_elements_vbo(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv30_context *nv30 = nv30_context(pipe); - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; unsigned restart = 0; while (count) { @@ -412,17 +421,17 @@ nv30_draw_elements_vbo(struct pipe_context *pipe, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, mode, start, count, &restart); if (!vc) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(rankine, NV34TCL_VB_INDEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); + BEGIN_RING(chan, rankine, NV34TCL_VB_INDEX_BATCH, 1); + OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -432,24 +441,22 @@ nv30_draw_elements_vbo(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(rankine, NV34TCL_VB_INDEX_BATCH, push); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_INDEX_BATCH, push); while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); + OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); count -= vc; start = restart; } - - return TRUE; } -boolean +void nv30_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) @@ -461,7 +468,7 @@ nv30_draw_elements(struct pipe_context *pipe, if (FORCE_SWTNL || !nv30_state_validate(nv30)) { /*return nv30_draw_elements_swtnl(pipe, NULL, 0, mode, start, count);*/ - return FALSE; + return; } if (idxbuf) { @@ -472,7 +479,6 @@ nv30_draw_elements(struct pipe_context *pipe, } pipe->flush(pipe, 0, NULL); - return TRUE; } static boolean @@ -485,9 +491,9 @@ nv30_vbo_validate(struct nv30_context *nv30) unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; int hw; - vtxbuf = so_new(20, 18); + vtxbuf = so_new(3, 17, 18); so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt_nr); - vtxfmt = so_new(17, 0); + vtxfmt = so_new(1, 16, 0); so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt_nr); for (hw = 0; hw < nv30->vtxelt_nr; hw++) { @@ -500,7 +506,7 @@ nv30_vbo_validate(struct nv30_context *nv30) if (!vb->stride) { if (!sattr) - sattr = so_new(16 * 5, 0); + sattr = so_new(16, 16 * 4, 0); if (nv30_vbo_static_attrib(nv30, sattr, hw, ve, vb)) { so_data(vtxbuf, 0); diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index 5d60984622..e77a5be3f2 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -650,7 +650,9 @@ static boolean nv30_vertprog_validate(struct nv30_context *nv30) { struct pipe_screen *pscreen = nv30->pipe.screen; - struct nouveau_grobj *rankine = nv30->screen->rankine; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; struct nv30_vertex_program *vp; struct pipe_buffer *constbuf; boolean upload_code = FALSE, upload_data = FALSE; @@ -684,7 +686,7 @@ nv30_vertprog_validate(struct nv30_context *nv30) assert(0); } - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1); so_data (so, vp->exec->start); so_ref(so, &vp->so); @@ -770,9 +772,9 @@ nv30_vertprog_validate(struct nv30_context *nv30) 4 * sizeof(float)); } - BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5); - OUT_RING (i + vp->data->start); - OUT_RINGp ((uint32_t *)vpd->value, 4); + BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5); + OUT_RING (chan, i + vp->data->start); + OUT_RINGp (chan, (uint32_t *)vpd->value, 4); } if (constbuf) @@ -788,11 +790,11 @@ nv30_vertprog_validate(struct nv30_context *nv30) vp->insns[i].data[2], vp->insns[i].data[3]); } #endif - BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1); - OUT_RING (vp->exec->start); + BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1); + OUT_RING (chan, vp->exec->start); for (i = 0; i < vp->nr_insns; i++) { - BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4); - OUT_RINGp (vp->insns[i].data, 4); + BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_INST(0), 4); + OUT_RINGp (chan, vp->insns[i].data, 4); } } diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index eb9cce4c78..f79ae4db84 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -10,21 +10,32 @@ nv40_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - BEGIN_RING(curie, 0x1fd8, 1); - OUT_RING (2); - BEGIN_RING(curie, 0x1fd8, 1); - OUT_RING (1); + BEGIN_RING(chan, curie, 0x1fd8, 1); + OUT_RING (chan, 2); + BEGIN_RING(chan, curie, 0x1fd8, 1); + OUT_RING (chan, 1); } - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void nv40_destroy(struct pipe_context *pipe) { struct nv40_context *nv40 = nv40_context(pipe); + unsigned i; + + for (i = 0; i < NV40_STATE_MAX; i++) { + if (nv40->state.hw[i]) + so_ref(NULL, &nv40->state.hw[i]); + } if (nv40->draw) draw_destroy(nv40->draw); diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 83fcf1785d..e219bb537a 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -14,10 +14,6 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" - -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv40_screen *ctx = nv40->screen -#include "nouveau/nouveau_push.h" #include "nouveau/nouveau_stateobj.h" #include "nv40_state.h" @@ -183,7 +179,7 @@ extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen); /* nv40_draw.c */ extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40); -extern boolean nv40_draw_elements_swtnl(struct pipe_context *pipe, +extern void nv40_draw_elements_swtnl(struct pipe_context *pipe, struct pipe_buffer *idxbuf, unsigned ib_size, unsigned mode, unsigned start, unsigned count); @@ -219,9 +215,9 @@ extern struct nv40_state_entry nv40_state_vbo; extern struct nv40_state_entry nv40_state_vtxfmt; /* nv40_vbo.c */ -extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv40_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv40_draw_elements(struct pipe_context *pipe, +extern void nv40_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index b2f19ecb69..d826f8c2f5 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -31,6 +31,9 @@ nv40_render_stage(struct draw_stage *stage) static INLINE void nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v) { + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; unsigned i; for (i = 0; i < nv40->swtnl.nr_attribs; i++) { @@ -41,30 +44,30 @@ nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v) case EMIT_OMIT: break; case EMIT_1F: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_1F(hw), 1); - OUT_RING (fui(v->data[idx][0])); + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_1F(hw), 1); + OUT_RING (chan, fui(v->data[idx][0])); break; case EMIT_2F: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_2F_X(hw), 2); - OUT_RING (fui(v->data[idx][0])); - OUT_RING (fui(v->data[idx][1])); + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_2F_X(hw), 2); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); break; case EMIT_3F: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_3F_X(hw), 3); - OUT_RING (fui(v->data[idx][0])); - OUT_RING (fui(v->data[idx][1])); - OUT_RING (fui(v->data[idx][2])); + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_3F_X(hw), 3); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); + OUT_RING (chan, fui(v->data[idx][2])); break; case EMIT_4F: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(hw), 4); - OUT_RING (fui(v->data[idx][0])); - OUT_RING (fui(v->data[idx][1])); - OUT_RING (fui(v->data[idx][2])); - OUT_RING (fui(v->data[idx][3])); + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_4F_X(hw), 4); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); + OUT_RING (chan, fui(v->data[idx][2])); + OUT_RING (chan, fui(v->data[idx][3])); break; case EMIT_4UB: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_4UB(hw), 1); - OUT_RING (pack_ub4(float_to_ubyte(v->data[idx][0]), + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_4UB(hw), 1); + OUT_RING (chan, pack_ub4(float_to_ubyte(v->data[idx][0]), float_to_ubyte(v->data[idx][1]), float_to_ubyte(v->data[idx][2]), float_to_ubyte(v->data[idx][3]))); @@ -82,7 +85,11 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, { struct nv40_render_stage *rs = nv40_render_stage(stage); struct nv40_context *nv40 = rs->nv40; - struct nouveau_pushbuf *pb = nv40->screen->base.channel->pushbuf; + + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_pushbuf *pb = chan->pushbuf; + struct nouveau_grobj *curie = screen->curie; unsigned i; /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */ @@ -91,19 +98,19 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, NOUVEAU_ERR("AIII, missed flush\n"); assert(0); } - FIRE_RING(NULL); + FIRE_RING(chan); nv40_state_emit(nv40); } /* Switch primitive modes if necessary */ if (rs->prim != mode) { if (rs->prim != NV40TCL_BEGIN_END_STOP) { - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (NV40TCL_BEGIN_END_STOP); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, NV40TCL_BEGIN_END_STOP); } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (mode); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, mode); rs->prim = mode; } @@ -115,8 +122,8 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, * off the primitive now. */ if (pb->remaining < ((count * 20) + 6)) { - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (NV40TCL_BEGIN_END_STOP); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, NV40TCL_BEGIN_END_STOP); rs->prim = NV40TCL_BEGIN_END_STOP; } } @@ -144,10 +151,13 @@ nv40_render_flush(struct draw_stage *draw, unsigned flags) { struct nv40_render_stage *rs = nv40_render_stage(draw); struct nv40_context *nv40 = rs->nv40; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; if (rs->prim != NV40TCL_BEGIN_END_STOP) { - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (NV40TCL_BEGIN_END_STOP); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, NV40TCL_BEGIN_END_STOP); rs->prim = NV40TCL_BEGIN_END_STOP; } } @@ -226,7 +236,7 @@ nv40_draw_render_stage(struct nv40_context *nv40) return &render->stage; } -boolean +void nv40_draw_elements_swtnl(struct pipe_context *pipe, struct pipe_buffer *idxbuf, unsigned idxbuf_size, unsigned mode, unsigned start, unsigned count) @@ -237,7 +247,7 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, void *map; if (!nv40_state_validate_swtnl(nv40)) - return FALSE; + return; nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF); nv40_state_emit(nv40); @@ -261,7 +271,8 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, map = pipe_buffer_map(pscreen, nv40->constbuf[PIPE_SHADER_VERTEX], PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_constant_buffer(nv40->draw, map, nr); + draw_set_mapped_constant_buffer(nv40->draw, PIPE_SHADER_VERTEX, + map, nr); } draw_arrays(nv40->draw, mode, start, count); @@ -277,15 +288,13 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, draw_flush(nv40->draw); pipe->flush(pipe, 0, NULL); - - return TRUE; } static INLINE void emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit, unsigned semantic, unsigned index) { - unsigned draw_out = draw_find_vs_output(nv40->draw, semantic, index); + unsigned draw_out = draw_find_shader_output(nv40->draw, semantic, index); unsigned a = nv40->swtnl.nr_attribs++; nv40->swtnl.hw[a] = hw; diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 1bf16726d1..1237066c39 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -149,7 +149,7 @@ emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src) sizeof(uint32_t) * 4); } - sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT); + sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT); break; case NV40SR_NONE: sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); @@ -445,10 +445,11 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_CMP: - tmp = temp(fpc); - arith(fpc, sat, MOV, dst, mask, src[2], none, none); + tmp = nv40_sr(NV40SR_NONE, 0); tmp.cc_update = 1; arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); + dst.cc_test = NV40_VP_INST_COND_GE; + arith(fpc, sat, MOV, dst, mask, src[2], none, none); dst.cc_test = NV40_VP_INST_COND_LT; arith(fpc, sat, MOV, dst, mask, src[1], none, none); break; @@ -573,13 +574,28 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, neg(swz(tmp, X, X, X, X)), none, none); break; case TGSI_OPCODE_SCS: - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, - swz(src[0], X, X, X, X), none, none); + /* avoid overwriting the source */ + if(src[0].swz[SWZ_X] != SWZ_X) + { + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } } - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, - swz(src[0], X, X, X, X), none, none); + else + { + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } } break; case TGSI_OPCODE_SEQ: @@ -752,7 +768,7 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc) { struct tgsi_full_immediate *imm; float vals[4]; - + imm = &p.FullToken.FullImmediate; assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); assert(fpc->nr_imm < MAX_IMM); @@ -836,7 +852,7 @@ nv40_fragprog_translate(struct nv40_context *nv40, fp->insn[fpc->inst_offset + 1] = 0x00000000; fp->insn[fpc->inst_offset + 2] = 0x00000000; fp->insn[fpc->inst_offset + 3] = 0x00000000; - + fp->translated = TRUE; out_err: tgsi_parse_free(&parse); @@ -903,7 +919,7 @@ nv40_fragprog_validate(struct nv40_context *nv40) fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4); nv40_fragprog_upload(nv40, fp); - so = so_new(4, 1); + so = so_new(2, 2, 1); so_method(so, nv40->screen->curie, NV40TCL_FP_ADDRESS, 1); so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | @@ -917,7 +933,7 @@ nv40_fragprog_validate(struct nv40_context *nv40) update_constants: if (fp->nr_consts) { float *map; - + map = pipe_buffer_map(pscreen, constbuf, PIPE_BUFFER_USAGE_CPU_READ); for (i = 0; i < fp->nr_consts; i++) { @@ -948,6 +964,12 @@ void nv40_fragprog_destroy(struct nv40_context *nv40, struct nv40_fragment_program *fp) { + if (fp->buffer) + pipe_buffer_reference(&fp->buffer, NULL); + + if (fp->so) + so_ref(NULL, &fp->so); + if (fp->insn_len) FREE(fp->insn); } diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index 44abc84596..aad9198210 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -108,7 +108,7 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit) txs = tf->swizzle; - so = so_new(16, 2); + so = so_new(2, 9, 2); so_method(so, nv40->screen->curie, NV40TCL_TEX_OFFSET(unit), 8); so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR, @@ -139,7 +139,7 @@ nv40_fragtex_validate(struct nv40_context *nv40) unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, nv40->screen->curie, NV40TCL_TEX_ENABLE(unit), 1); so_data (so, 0); so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index b974e68a07..89bd155ff4 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -5,6 +5,7 @@ #include "util/u_math.h" #include "nv40_context.h" +#include "../nv04/nv04_surface_2d.h" @@ -105,6 +106,12 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear. + * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy. + * This also happens for small mipmaps of large textures. */ + if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + nv40_miptree_layout(mt); mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage, mt->total_size); @@ -191,12 +198,27 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ns->base.offset = mt->level[level].image_offset[0]; } + /* create a linear temporary that we can render into if necessary. + * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so + * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/ + if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE) + return &nv04_surface_wrap_for_render(pscreen, ((struct nv40_screen*)pscreen)->eng2d, ns)->base; + return &ns->base; } static void nv40_miptree_surface_del(struct pipe_surface *ps) { + struct nv04_surface* ns = (struct nv04_surface*)ps; + if(ns->backing) + { + struct nv40_screen* screen = (struct nv40_screen*)ps->texture->screen; + if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE) + screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); + nv40_miptree_surface_del(&ns->backing->base); + } + pipe_texture_reference(&ps->texture, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c index 7874aedd42..8ed4a67dd0 100644 --- a/src/gallium/drivers/nv40/nv40_query.c +++ b/src/gallium/drivers/nv40/nv40_query.c @@ -41,6 +41,9 @@ nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq) { struct nv40_context *nv40 = nv40_context(pipe); struct nv40_query *q = nv40_query(pq); + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); @@ -57,10 +60,10 @@ nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq) assert(0); nouveau_notifier_reset(nv40->screen->query, q->object->start); - BEGIN_RING(curie, NV40TCL_QUERY_RESET, 1); - OUT_RING (1); - BEGIN_RING(curie, NV40TCL_QUERY_UNK17CC, 1); - OUT_RING (1); + BEGIN_RING(chan, curie, NV40TCL_QUERY_RESET, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, curie, NV40TCL_QUERY_UNK17CC, 1); + OUT_RING (chan, 1); q->ready = FALSE; } @@ -70,11 +73,14 @@ nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq) { struct nv40_context *nv40 = nv40_context(pipe); struct nv40_query *q = nv40_query(pq); + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; - BEGIN_RING(curie, NV40TCL_QUERY_GET, 1); - OUT_RING ((0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) | + BEGIN_RING(chan, curie, NV40TCL_QUERY_GET, 1); + OUT_RING (chan, (0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) | ((q->object->start * 32) << NV40TCL_QUERY_GET_OFFSET_SHIFT)); - FIRE_RING(NULL); + FIRE_RING(chan); } static boolean diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index bd13dfddd1..9e55e5a089 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -140,6 +140,12 @@ static void nv40_screen_destroy(struct pipe_screen *pscreen) { struct nv40_screen *screen = nv40_screen(pscreen); + unsigned i; + + for (i = 0; i < NV40_STATE_MAX; i++) { + if (screen->state[i]) + so_ref(NULL, &screen->state[i]); + } nouveau_resource_free(&screen->vp_exec_heap); nouveau_resource_free(&screen->vp_data_heap); @@ -147,6 +153,7 @@ nv40_screen_destroy(struct pipe_screen *pscreen) nouveau_notifier_free(&screen->query); nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->curie); + nv04_surface_2d_takedown(&screen->eng2d); nouveau_screen_fini(&screen->base); @@ -208,7 +215,6 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) NOUVEAU_ERR("Error creating 3D object: %d\n", ret); return FALSE; } - BIND_RING(chan, screen->curie, 7); /* 2D engine setup */ screen->eng2d = nv04_surface_2d_init(&screen->base); @@ -245,7 +251,7 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) } /* Static curie initialisation */ - so = so_new(128, 0); + so = so_new(16, 25, 0); so_method(so, screen->curie, NV40TCL_DMA_NOTIFY, 1); so_data (so, screen->sync->handle); so_method(so, screen->curie, NV40TCL_DMA_TEXTURE0, 2); diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index f6e5dee814..7d990f7d56 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -16,7 +16,7 @@ nv40_blend_state_create(struct pipe_context *pipe, struct nv40_context *nv40 = nv40_context(pipe); struct nouveau_grobj *curie = nv40->screen->curie; struct nv40_blend_state *bso = CALLOC(1, sizeof(*bso)); - struct nouveau_stateobj *so = so_new(16, 0); + struct nouveau_stateobj *so = so_new(5, 8, 0); if (cso->blend_enable) { so_method(so, curie, NV40TCL_BLEND_ENABLE, 3); @@ -310,7 +310,7 @@ nv40_rasterizer_state_create(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); struct nv40_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); - struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_stateobj *so = so_new(8, 18, 0); struct nouveau_grobj *curie = nv40->screen->curie; /*XXX: ignored: @@ -445,7 +445,7 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); - struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_stateobj *so = so_new(4, 21, 0); struct nouveau_grobj *curie = nv40->screen->curie; so_method(so, curie, NV40TCL_DEPTH_FUNC, 3); diff --git a/src/gallium/drivers/nv40/nv40_state_blend.c b/src/gallium/drivers/nv40/nv40_state_blend.c index 8cd05ce66e..3ff00a37f6 100644 --- a/src/gallium/drivers/nv40/nv40_state_blend.c +++ b/src/gallium/drivers/nv40/nv40_state_blend.c @@ -18,7 +18,7 @@ struct nv40_state_entry nv40_state_blend = { static boolean nv40_state_blend_colour_validate(struct nv40_context *nv40) { - struct nouveau_stateobj *so = so_new(2, 0); + struct nouveau_stateobj *so = so_new(1, 1, 0); struct pipe_blend_color *bcol = &nv40->blend_colour; so_method(so, nv40->screen->curie, NV40TCL_BLEND_COLOR, 1); diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index 789ed16126..13fe854915 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -54,9 +54,10 @@ nv40_state_do_validate(struct nv40_context *nv40, void nv40_state_emit(struct nv40_context *nv40) { - struct nouveau_channel *chan = nv40->screen->base.channel; struct nv40_state *state = &nv40->state; struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; unsigned i; uint64_t states; @@ -80,10 +81,10 @@ nv40_state_emit(struct nv40_context *nv40) if (state->dirty & ((1ULL << NV40_STATE_FRAGPROG) | (1ULL << NV40_STATE_FRAGTEX0))) { - BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (2); - BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (1); + BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (chan, 2); + BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (chan, 1); } state->dirty = 0; diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c index 1c7a7cd64f..a58fe9ddb1 100644 --- a/src/gallium/drivers/nv40/nv40_state_fb.c +++ b/src/gallium/drivers/nv40/nv40_state_fb.c @@ -19,7 +19,7 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) struct nv04_surface *rt[4], *zeta; uint32_t rt_enable, rt_format; int i, colour_format = 0, zeta_format = 0; - struct nouveau_stateobj *so = so_new(64, 10); + struct nouveau_stateobj *so = so_new(18, 24, 10); unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; unsigned w = fb->width; unsigned h = fb->height; diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c index cf58d33906..753a505e93 100644 --- a/src/gallium/drivers/nv40/nv40_state_scissor.c +++ b/src/gallium/drivers/nv40/nv40_state_scissor.c @@ -12,7 +12,7 @@ nv40_state_scissor_validate(struct nv40_context *nv40) return FALSE; nv40->state.scissor_enabled = rast->scissor; - so = so_new(3, 0); + so = so_new(1, 2, 0); so_method(so, nv40->screen->curie, NV40TCL_SCISSOR_HORIZ, 2); if (nv40->state.scissor_enabled) { so_data (so, ((s->maxx - s->minx) << 16) | s->minx); diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c index b51024ad9b..2b371ebfec 100644 --- a/src/gallium/drivers/nv40/nv40_state_stipple.c +++ b/src/gallium/drivers/nv40/nv40_state_stipple.c @@ -14,14 +14,14 @@ nv40_state_stipple_validate(struct nv40_context *nv40) if (rast->poly_stipple_enable) { unsigned i; - so = so_new(35, 0); + so = so_new(2, 33, 0); so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); so_data (so, 1); so_method(so, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32); for (i = 0; i < 32; i++) so_data(so, nv40->stipple[i]); } else { - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); so_data (so, 0); } diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c index 665d2d5fca..9919ba1d0b 100644 --- a/src/gallium/drivers/nv40/nv40_state_viewport.c +++ b/src/gallium/drivers/nv40/nv40_state_viewport.c @@ -19,7 +19,7 @@ nv40_state_viewport_validate(struct nv40_context *nv40) return FALSE; nv40->state.viewport_bypass = bypass; - so = so_new(11, 0); + so = so_new(2, 9, 0); if (!bypass) { so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_TRANSLATE_X, 8); diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c index b084a38b48..791ee6823d 100644 --- a/src/gallium/drivers/nv40/nv40_transfer.c +++ b/src/gallium/drivers/nv40/nv40_transfer.c @@ -16,14 +16,14 @@ struct nv40_transfer { }; static void -nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width0 = u_minify(pt->width0, level); - template->height0 = u_minify(pt->height0, level); + template->width0 = width; + template->height0 = height; template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -71,7 +71,7 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv40_compatible_transfer_tex(pt, level, &tx_tex_template); + nv40_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -80,6 +80,8 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv40_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, 0, 0, 0, pipe_transfer_buffer_flags(&tx->base)); @@ -105,8 +107,8 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -126,13 +128,13 @@ nv40_transfer_del(struct pipe_transfer *ptx) dst = pscreen->get_tex_surface(pscreen, ptx->texture, ptx->face, ptx->level, ptx->zslice, - PIPE_BUFFER_USAGE_GPU_WRITE); + PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER); /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -151,8 +153,10 @@ nv40_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index af3fcf6a34..a777898f68 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -164,18 +164,21 @@ nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so, return TRUE; } -boolean +void nv40_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; unsigned restart; nv40_vbo_set_idxbuf(nv40, NULL, 0); if (FORCE_SWTNL || !nv40_state_validate(nv40)) { - return nv40_draw_elements_swtnl(pipe, NULL, 0, - mode, start, count); + nv40_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count); + return; } while (count) { @@ -186,17 +189,17 @@ nv40_draw_arrays(struct pipe_context *pipe, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, mode, start, count, &restart); if (!vc) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(curie, NV40TCL_VB_VERTEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); + BEGIN_RING(chan, curie, NV40TCL_VB_VERTEX_BATCH, 1); + OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -206,29 +209,30 @@ nv40_draw_arrays(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, push); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_VERTEX_BATCH, push); while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); + OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); count -= vc; start = restart; } pipe->flush(pipe, 0, NULL); - return TRUE; } static INLINE void nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; while (count) { uint8_t *elts = (uint8_t *)ib + start; @@ -239,17 +243,17 @@ nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); + BEGIN_RING(chan, curie, NV40TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, elts[0]); elts++; vc--; } @@ -258,16 +262,16 @@ nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); + OUT_RING(chan, (elts[i+1] << 16) | elts[i]); vc -= push; elts += push; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } @@ -277,7 +281,9 @@ static INLINE void nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; while (count) { uint16_t *elts = (uint16_t *)ib + start; @@ -288,17 +294,17 @@ nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); + BEGIN_RING(chan, curie, NV40TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, elts[0]); elts++; vc--; } @@ -307,16 +313,16 @@ nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); + OUT_RING(chan, (elts[i+1] << 16) | elts[i]); vc -= push; elts += push; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } @@ -326,7 +332,9 @@ static INLINE void nv40_draw_elements_u32(struct nv40_context *nv40, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; while (count) { uint32_t *elts = (uint32_t *)ib + start; @@ -337,32 +345,32 @@ nv40_draw_elements_u32(struct nv40_context *nv40, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); while (vc) { push = MIN2(vc, 2047); - BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U32, push); - OUT_RINGp (elts, push); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U32, push); + OUT_RINGp (chan, elts, push); vc -= push; elts += push; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } } -static boolean +static void nv40_draw_elements_inline(struct pipe_context *pipe, struct pipe_buffer *ib, unsigned ib_size, unsigned mode, unsigned start, unsigned count) @@ -393,15 +401,16 @@ nv40_draw_elements_inline(struct pipe_context *pipe, } pipe_buffer_unmap(pscreen, ib); - return TRUE; } -static boolean +static void nv40_draw_elements_vbo(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; unsigned restart; while (count) { @@ -412,17 +421,17 @@ nv40_draw_elements_vbo(struct pipe_context *pipe, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, mode, start, count, &restart); if (!vc) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(curie, NV40TCL_VB_INDEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); + BEGIN_RING(chan, curie, NV40TCL_VB_INDEX_BATCH, 1); + OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -432,24 +441,22 @@ nv40_draw_elements_vbo(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(curie, NV40TCL_VB_INDEX_BATCH, push); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_INDEX_BATCH, push); while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); + OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); count -= vc; start = restart; } - - return TRUE; } -boolean +void nv40_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) @@ -459,8 +466,9 @@ nv40_draw_elements(struct pipe_context *pipe, idxbuf = nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize); if (FORCE_SWTNL || !nv40_state_validate(nv40)) { - return nv40_draw_elements_swtnl(pipe, NULL, 0, - mode, start, count); + nv40_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count); + return; } if (idxbuf) { @@ -471,7 +479,6 @@ nv40_draw_elements(struct pipe_context *pipe, } pipe->flush(pipe, 0, NULL); - return TRUE; } static boolean @@ -484,9 +491,9 @@ nv40_vbo_validate(struct nv40_context *nv40) unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; int hw; - vtxbuf = so_new(20, 18); + vtxbuf = so_new(3, 17, 18); so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr); - vtxfmt = so_new(17, 0); + vtxfmt = so_new(1, 16, 0); so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt_nr); for (hw = 0; hw < nv40->vtxelt_nr; hw++) { @@ -499,7 +506,7 @@ nv40_vbo_validate(struct nv40_context *nv40) if (!vb->stride) { if (!sattr) - sattr = so_new(16 * 5, 0); + sattr = so_new(16, 16 * 4, 0); if (nv40_vbo_static_attrib(nv40, sattr, hw, ve, vb)) { so_data(vtxbuf, 0); diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index d9fc31006f..8d80fcad38 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -834,7 +834,9 @@ static boolean nv40_vertprog_validate(struct nv40_context *nv40) { struct pipe_screen *pscreen = nv40->pipe.screen; - struct nouveau_grobj *curie = nv40->screen->curie; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; struct nv40_vertex_program *vp; struct pipe_buffer *constbuf; boolean upload_code = FALSE, upload_data = FALSE; @@ -884,7 +886,7 @@ check_gpu_resources: assert(0); } - so = so_new(7, 0); + so = so_new(3, 4, 0); so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1); so_data (so, vp->exec->start); so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2); @@ -974,9 +976,9 @@ check_gpu_resources: 4 * sizeof(float)); } - BEGIN_RING(curie, NV40TCL_VP_UPLOAD_CONST_ID, 5); - OUT_RING (i + vp->data->start); - OUT_RINGp ((uint32_t *)vpd->value, 4); + BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_CONST_ID, 5); + OUT_RING (chan, i + vp->data->start); + OUT_RINGp (chan, (uint32_t *)vpd->value, 4); } if (constbuf) @@ -993,11 +995,11 @@ check_gpu_resources: NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]); } #endif - BEGIN_RING(curie, NV40TCL_VP_UPLOAD_FROM_ID, 1); - OUT_RING (vp->exec->start); + BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_FROM_ID, 1); + OUT_RING (chan, vp->exec->start); for (i = 0; i < vp->nr_insns; i++) { - BEGIN_RING(curie, NV40TCL_VP_UPLOAD_INST(0), 4); - OUT_RINGp (vp->insns[i].data, 4); + BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_INST(0), 4); + OUT_RINGp (chan, vp->insns[i].data, 4); } } diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index d21b80eab8..5997456e4c 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -43,6 +43,39 @@ nv50_destroy(struct pipe_context *pipe) { struct nv50_context *nv50 = nv50_context(pipe); + if (nv50->state.fb) + so_ref(NULL, &nv50->state.fb); + if (nv50->state.blend) + so_ref(NULL, &nv50->state.blend); + if (nv50->state.blend_colour) + so_ref(NULL, &nv50->state.blend_colour); + if (nv50->state.zsa) + so_ref(NULL, &nv50->state.zsa); + if (nv50->state.rast) + so_ref(NULL, &nv50->state.rast); + if (nv50->state.stipple) + so_ref(NULL, &nv50->state.stipple); + if (nv50->state.scissor) + so_ref(NULL, &nv50->state.scissor); + if (nv50->state.viewport) + so_ref(NULL, &nv50->state.viewport); + if (nv50->state.tsc_upload) + so_ref(NULL, &nv50->state.tsc_upload); + if (nv50->state.tic_upload) + so_ref(NULL, &nv50->state.tic_upload); + if (nv50->state.vertprog) + so_ref(NULL, &nv50->state.vertprog); + if (nv50->state.fragprog) + so_ref(NULL, &nv50->state.fragprog); + if (nv50->state.programs) + so_ref(NULL, &nv50->state.programs); + if (nv50->state.vtxfmt) + so_ref(NULL, &nv50->state.vtxfmt); + if (nv50->state.vtxbuf) + so_ref(NULL, &nv50->state.vtxbuf); + if (nv50->state.vtxattr) + so_ref(NULL, &nv50->state.vtxattr); + draw_destroy(nv50->draw); FREE(nv50); } diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 5578a5838f..cbd4c3ff86 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -191,9 +191,9 @@ nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst, extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50); /* nv50_vbo.c */ -extern boolean nv50_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv50_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv50_draw_elements(struct pipe_context *pipe, +extern void nv50_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 3f1edf0a13..cecb1efc90 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -145,7 +145,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) mt->level[0].tile_mode, tile_flags, &mt->base.bo); if (ret) { - for (l = 0; l < pt->last_level; ++l) + for (l = 0; l <= pt->last_level; ++l) FREE(mt->level[l].image_offset); FREE(mt); return NULL; @@ -188,7 +188,7 @@ nv50_miptree_destroy(struct pipe_texture *pt) struct nv50_miptree *mt = nv50_miptree(pt); unsigned l; - for (l = 0; l < pt->last_level; ++l) + for (l = 0; l <= pt->last_level; ++l) FREE(mt->level[l].image_offset); nouveau_bo_ref(NULL, &mt->base.bo); diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 679c28ce4b..069f815938 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -96,7 +96,11 @@ struct nv50_reg { #define NV50_MOD_NEG 1 #define NV50_MOD_ABS 2 +#define NV50_MOD_NEG_ABS (NV50_MOD_NEG | NV50_MOD_ABS) #define NV50_MOD_SAT 4 +#define NV50_MOD_I32 8 + +/* NV50_MOD_I32 is used to indicate integer mode for neg/abs */ /* STACK: Conditionals and loops have to use the (per warp) stack. * Stack entries consist of an entry type (divergent path, join at), @@ -134,6 +138,7 @@ struct nv50_pc { uint8_t addr_alloc; /* set bit indicates used for TGSI_FILE_ADDRESS */ struct nv50_reg *temp_temp[16]; + struct nv50_program_exec *temp_temp_exec[16]; unsigned temp_temp_nr; /* broadcast and destination replacement regs */ @@ -154,26 +159,17 @@ struct nv50_pc { int if_lvl, loop_lvl; unsigned loop_pos[NV50_MAX_LOOP_NESTING]; + unsigned *insn_pos; /* actual program offset of each TGSI insn */ + boolean in_subroutine; + /* current instruction and total number of insns */ unsigned insn_cur; unsigned insn_nr; boolean allow32; -}; - -static INLINE struct nv50_reg * -reg_instance(struct nv50_pc *pc, struct nv50_reg *reg) -{ - struct nv50_reg *ri; - assert(pc->reg_instance_nr < 16); - ri = &pc->reg_instances[pc->reg_instance_nr++]; - if (reg) { - *ri = *reg; - reg->mod = 0; - } - return ri; -} + uint8_t edgeflag_out; +}; static INLINE void ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw) @@ -250,7 +246,23 @@ alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) } } - assert(0); + NOUVEAU_ERR("out of registers\n"); + abort(); +} + +static INLINE struct nv50_reg * +reg_instance(struct nv50_pc *pc, struct nv50_reg *reg) +{ + struct nv50_reg *ri; + + assert(pc->reg_instance_nr < 16); + ri = &pc->reg_instances[pc->reg_instance_nr++]; + if (reg) { + alloc_reg(pc, reg); + *ri = *reg; + reg->mod = 0; + } + return ri; } /* XXX: For shaders that aren't executed linearly (e.g. shaders that @@ -275,26 +287,11 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) } } - assert(0); + NOUVEAU_ERR("out of registers\n"); + abort(); return NULL; } -/* Assign the hw of the discarded temporary register src - * to the tgsi register dst and free src. - */ -static void -assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) -{ - assert(src->index == -1 && src->hw != -1); - - if (dst->hw != -1) - pc->r_temp[dst->hw] = NULL; - pc->r_temp[src->hw] = dst; - dst->hw = src->hw; - - FREE(src); -} - /* release the hardware resource held by r */ static void release_hw(struct nv50_pc *pc, struct nv50_reg *r) @@ -353,23 +350,29 @@ free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4]) } static struct nv50_reg * -temp_temp(struct nv50_pc *pc) +temp_temp(struct nv50_pc *pc, struct nv50_program_exec *e) { if (pc->temp_temp_nr >= 16) assert(0); pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); + pc->temp_temp_exec[pc->temp_temp_nr] = e; return pc->temp_temp[pc->temp_temp_nr++]; } +/* This *must* be called for all nv50_program_exec that have been + * given as argument to temp_temp, or the temps will be leaked ! + */ static void -kill_temp_temp(struct nv50_pc *pc) +kill_temp_temp(struct nv50_pc *pc, struct nv50_program_exec *e) { int i; for (i = 0; i < pc->temp_temp_nr; i++) - free_temp(pc, pc->temp_temp[i]); - pc->temp_temp_nr = 0; + if (pc->temp_temp_exec[i] == e) + free_temp(pc, pc->temp_temp[i]); + if (!e) + pc->temp_temp_nr = 0; } static int @@ -431,6 +434,8 @@ emit(struct nv50_pc *pc, struct nv50_program_exec *e) p->exec_head = e; p->exec_tail = e; p->exec_size += (e->inst[0] & 1) ? 2 : 1; + + kill_temp_temp(pc, e); } static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *); @@ -451,10 +456,19 @@ is_immd(struct nv50_program_exec *e) return FALSE; } +static boolean +is_join(struct nv50_program_exec *e) +{ + if (is_long(e) && (e->inst[1] & 3) == 2) + return TRUE; + return FALSE; +} + static INLINE void set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, struct nv50_program_exec *e) { + assert(!is_immd(e)); set_long(pc, e); e->inst[1] &= ~((0x1f << 7) | (0x3 << 12)); e->inst[1] |= (pred << 7) | (idx << 12); @@ -497,15 +511,6 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e) static INLINE void set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) { - union { - float f; - uint32_t ui; - } u; - u.ui = pc->immd_buf[imm->hw]; - - u.f = (imm->mod & NV50_MOD_ABS) ? fabsf(u.f) : u.f; - u.f = (imm->mod & NV50_MOD_NEG) ? -u.f : u.f; - set_long(pc, e); /* XXX: can't be predicated - bits overlap; cases where both * are required should be avoided by using pc->allow32 */ @@ -513,8 +518,8 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) set_pred_wr(pc, 0, 0, e); e->inst[1] |= 0x00000002 | 0x00000001; - e->inst[0] |= (u.ui & 0x3f) << 16; - e->inst[1] |= (u.ui >> 6) << 2; + e->inst[0] |= (pc->immd_buf[imm->hw] & 0x3f) << 16; + e->inst[1] |= (pc->immd_buf[imm->hw] >> 6) << 2; } static INLINE void @@ -663,6 +668,7 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22); } +/* Never apply nv50_reg::mod in emit_mov, or carefully check the code !!! */ static void emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { @@ -715,6 +721,34 @@ emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f) FREE(imm); } +/* Assign the hw of the discarded temporary register src + * to the tgsi register dst and free src. + */ +static void +assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + assert(src->index == -1 && src->hw != -1); + + if (pc->if_lvl || pc->loop_lvl || + (dst->type != P_TEMP) || + (src->hw < pc->result_nr * 4 && + pc->p->type == PIPE_SHADER_FRAGMENT) || + pc->p->info.opcode_count[TGSI_OPCODE_CAL] || + pc->p->info.opcode_count[TGSI_OPCODE_BRA]) { + + emit_mov(pc, dst, src); + free_temp(pc, src); + return; + } + + if (dst->hw != -1) + pc->r_temp[dst->hw] = NULL; + pc->r_temp[src->hw] = dst; + dst->hw = src->hw; + + FREE(src); +} + static void emit_nop(struct nv50_pc *pc) { @@ -757,7 +791,7 @@ set_src_0_restricted(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_reg *temp; if (src->type != P_TEMP) { - temp = temp_temp(pc); + temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; } @@ -776,7 +810,7 @@ set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) e->inst[1] |= 0x00200000; } else if (src->type == P_CONST || src->type == P_IMMD) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -792,7 +826,7 @@ static void set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) { if (src->type == P_ATTR) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -800,7 +834,7 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) if (src->type == P_CONST || src->type == P_IMMD) { assert(!(e->inst[0] & 0x00800000)); if (e->inst[0] & 0x01000000) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -822,7 +856,7 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) set_long(pc, e); if (src->type == P_ATTR) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -830,7 +864,7 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) if (src->type == P_CONST || src->type == P_IMMD) { assert(!(e->inst[0] & 0x01000000)); if (e->inst[0] & 0x00800000) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -845,6 +879,26 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) } static void +set_half_src(struct nv50_pc *pc, struct nv50_reg *src, int lh, + struct nv50_program_exec *e, int pos) +{ + struct nv50_reg *r = src; + + alloc_reg(pc, r); + if (r->type != P_TEMP) { + r = temp_temp(pc, e); + emit_mov(pc, r, src); + } + + if (r->hw > (NV50_SU_MAX_TEMP / 2)) { + NOUVEAU_ERR("out of low GPRs\n"); + abort(); + } + + e->inst[pos / 32] |= ((src->hw * 2) + lh) << (pos % 32); +} + +static void emit_mov_from_pred(struct nv50_pc *pc, struct nv50_reg *dst, int pred) { struct nv50_program_exec *e = exec(pc); @@ -886,7 +940,7 @@ emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, set_dst(pc, dst, e); set_src_0(pc, src0, e); if (src1->type == P_IMMD && !is_long(e)) { - if (src0->mod & NV50_MOD_NEG) + if (src0->mod ^ src1->mod) e->inst[0] |= 0x00008000; set_immd(pc, src1, e); } else { @@ -948,6 +1002,13 @@ emit_arl(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, emit(pc, e); } +#define NV50_MAX_F32 0x880 +#define NV50_MAX_S32 0x08c +#define NV50_MAX_U32 0x084 +#define NV50_MIN_F32 0x8a0 +#define NV50_MIN_S32 0x0ac +#define NV50_MIN_U32 0x0a4 + static void emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) @@ -955,8 +1016,8 @@ emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, struct nv50_program_exec *e = exec(pc); set_long(pc, e); - e->inst[0] |= 0xb0000000; - e->inst[1] |= (sub << 29); + e->inst[0] |= 0x30000000 | ((sub & 0x800) << 20); + e->inst[1] |= (sub << 24); check_swap_src_0_1(pc, &src0, &src1); set_dst(pc, dst, e); @@ -997,6 +1058,8 @@ emit_bitop2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, op != TGSI_OPCODE_XOR) assert(!"invalid bit op"); + assert(!(src0->mod | src1->mod)); + if (src1->type == P_IMMD && src0->type == P_TEMP && pc->allow32) { set_immd(pc, src1, e); if (op == TGSI_OPCODE_OR) @@ -1018,6 +1081,69 @@ emit_bitop2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, } static void +emit_not(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xd0000000; + e->inst[1] = 0x0402c000; + set_long(pc, e); + set_dst(pc, dst, e); + set_src_1(pc, src, e); + + emit(pc, e); +} + +static void +emit_shift(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1, unsigned dir) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x30000000; + e->inst[1] = 0xc4000000; + + set_long(pc, e); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + + if (src1->type == P_IMMD) { + e->inst[1] |= (1 << 20); + e->inst[0] |= (pc->immd_buf[src1->hw] & 0x7f) << 16; + } else + set_src_1(pc, src1, e); + + if (dir != TGSI_OPCODE_SHL) + e->inst[1] |= (1 << 29); + + if (dir == TGSI_OPCODE_ISHR) + e->inst[1] |= (1 << 27); + + emit(pc, e); +} + +static void +emit_shl_imm(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src, int s) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x30000000; + e->inst[1] = 0xc4100000; + if (s < 0) { + e->inst[1] |= 1 << 29; + s = -s; + } + e->inst[1] |= ((s & 0x7f) << 16); + + set_long(pc, e); + set_dst(pc, dst, e); + set_src_0(pc, src, e); + + emit(pc, e); +} + +static void emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) { @@ -1048,6 +1174,14 @@ emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, src2->mod ^= NV50_MOD_NEG; } +#define NV50_FLOP_RCP 0 +#define NV50_FLOP_RSQ 2 +#define NV50_FLOP_LG2 3 +#define NV50_FLOP_SIN 4 +#define NV50_FLOP_COS 5 +#define NV50_FLOP_EX2 6 + +/* rcp, rsqrt, lg2 support neg and abs */ static void emit_flop(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, struct nv50_reg *src) @@ -1055,17 +1189,20 @@ emit_flop(struct nv50_pc *pc, unsigned sub, struct nv50_program_exec *e = exec(pc); e->inst[0] |= 0x90000000; - if (sub) { + if (sub || src->mod) { set_long(pc, e); e->inst[1] |= (sub << 29); } set_dst(pc, dst, e); + set_src_0_restricted(pc, src, e); - if (sub == 0 || sub == 2) - set_src_0_restricted(pc, src, e); - else - set_src_0(pc, src, e); + assert(!src->mod || sub < 4); + + if (src->mod & NV50_MOD_NEG) + e->inst[1] |= 0x04000000; + if (src->mod & NV50_MOD_ABS) + e->inst[1] |= 0x00100000; emit(pc, e); } @@ -1082,6 +1219,11 @@ emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) set_long(pc, e); e->inst[1] |= (6 << 29) | 0x00004000; + if (src->mod & NV50_MOD_NEG) + e->inst[1] |= 0x04000000; + if (src->mod & NV50_MOD_ABS) + e->inst[1] |= 0x00100000; + emit(pc, e); } @@ -1097,39 +1239,49 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) set_long(pc, e); e->inst[1] |= (6 << 29); + if (src->mod & NV50_MOD_NEG) + e->inst[1] |= 0x04000000; + if (src->mod & NV50_MOD_ABS) + e->inst[1] |= 0x00100000; + emit(pc, e); } -#define CVTOP_RN 0x01 -#define CVTOP_FLOOR 0x03 -#define CVTOP_CEIL 0x05 -#define CVTOP_TRUNC 0x07 -#define CVTOP_SAT 0x08 -#define CVTOP_ABS 0x10 - -/* 0x04 == 32 bit dst */ -/* 0x40 == dst is float */ -/* 0x80 == src is float */ -#define CVT_F32_F32 0xc4 -#define CVT_F32_S32 0x44 -#define CVT_S32_F32 0x8c -#define CVT_S32_S32 0x0c -#define CVT_NEG 0x20 -#define CVT_RI 0x08 +#define CVT_RN (0x00 << 16) +#define CVT_FLOOR (0x02 << 16) +#define CVT_CEIL (0x04 << 16) +#define CVT_TRUNC (0x06 << 16) +#define CVT_SAT (0x08 << 16) +#define CVT_ABS (0x10 << 16) + +#define CVT_X32_X32 0x04004000 +#define CVT_X32_S32 0x04014000 +#define CVT_F32_F32 ((0xc0 << 24) | CVT_X32_X32) +#define CVT_S32_F32 ((0x88 << 24) | CVT_X32_X32) +#define CVT_U32_F32 ((0x80 << 24) | CVT_X32_X32) +#define CVT_F32_S32 ((0x40 << 24) | CVT_X32_S32) +#define CVT_F32_U32 ((0x40 << 24) | CVT_X32_X32) +#define CVT_S32_S32 ((0x08 << 24) | CVT_X32_S32) +#define CVT_S32_U32 ((0x08 << 24) | CVT_X32_X32) +#define CVT_U32_S32 ((0x00 << 24) | CVT_X32_S32) + +#define CVT_NEG 0x20000000 +#define CVT_RI 0x08000000 static void emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, - int wp, unsigned cvn, unsigned fmt) + int wp, uint32_t cvn) { struct nv50_program_exec *e; e = exec(pc); - set_long(pc, e); - e->inst[0] |= 0xa0000000; - e->inst[1] |= 0x00004000; /* 32 bit src */ - e->inst[1] |= (cvn << 16); - e->inst[1] |= (fmt << 24); + if (src->mod & NV50_MOD_NEG) cvn |= CVT_NEG; + if (src->mod & NV50_MOD_ABS) cvn |= CVT_ABS; + + e->inst[0] = 0xa0000000; + e->inst[1] = cvn; + set_long(pc, e); set_src_0(pc, src, e); if (wp >= 0) @@ -1154,10 +1306,12 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, * 0x6 = GE * 0x7 = set condition code ? (used before bra.lt/le/gt/ge) * 0x8 = unordered bit (allows NaN) + * + * mode = 0x04 (u32), 0x0c (s32), 0x80 (f32) */ static void emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp, - struct nv50_reg *src0, struct nv50_reg *src1) + struct nv50_reg *src0, struct nv50_reg *src1, uint8_t mode) { static const unsigned cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; @@ -1172,16 +1326,10 @@ emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp, if (dst && dst->type != P_TEMP) dst = alloc_temp(pc, NULL); - /* set.u32 */ set_long(pc, e); - e->inst[0] |= 0xb0000000; + e->inst[0] |= 0x30000000 | (mode << 24); e->inst[1] |= 0x60000000 | (ccode << 14); - /* XXX: decuda will disasm as .u16 and use .lo/.hi regs, but - * that doesn't seem to match what the hw actually does - e->inst[1] |= 0x04000000; << breaks things, u32 by default ? - */ - if (wp >= 0) set_pred_wr(pc, 1, wp, e); if (dst) @@ -1196,33 +1344,146 @@ emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp, emit(pc, e); - /* cvt.f32.u32/s32 (?) if we didn't only write the predicate */ - if (rdst) - emit_cvt(pc, rdst, dst, -1, CVTOP_ABS | CVTOP_RN, CVT_F32_S32); + if (rdst && mode == 0x80) /* convert to float ? */ + emit_cvt(pc, rdst, dst, -1, CVT_ABS | CVT_F32_S32); if (rdst && rdst != dst) free_temp(pc, dst); } -static INLINE unsigned -map_tgsi_setop_cc(unsigned op) +static INLINE void +map_tgsi_setop_hw(unsigned op, uint8_t *cc, uint8_t *ty) { switch (op) { - case TGSI_OPCODE_SLT: return 0x1; - case TGSI_OPCODE_SGE: return 0x6; - case TGSI_OPCODE_SEQ: return 0x2; - case TGSI_OPCODE_SGT: return 0x4; - case TGSI_OPCODE_SLE: return 0x3; - case TGSI_OPCODE_SNE: return 0xd; + case TGSI_OPCODE_SLT: *cc = 0x1; *ty = 0x80; break; + case TGSI_OPCODE_SGE: *cc = 0x6; *ty = 0x80; break; + case TGSI_OPCODE_SEQ: *cc = 0x2; *ty = 0x80; break; + case TGSI_OPCODE_SGT: *cc = 0x4; *ty = 0x80; break; + case TGSI_OPCODE_SLE: *cc = 0x3; *ty = 0x80; break; + case TGSI_OPCODE_SNE: *cc = 0xd; *ty = 0x80; break; + + case TGSI_OPCODE_ISLT: *cc = 0x1; *ty = 0x0c; break; + case TGSI_OPCODE_ISGE: *cc = 0x6; *ty = 0x0c; break; + case TGSI_OPCODE_USEQ: *cc = 0x2; *ty = 0x04; break; + case TGSI_OPCODE_USGE: *cc = 0x6; *ty = 0x04; break; + case TGSI_OPCODE_USLT: *cc = 0x1; *ty = 0x04; break; + case TGSI_OPCODE_USNE: *cc = 0x5; *ty = 0x04; break; default: assert(0); - return 0; + return; } } +static void +emit_add_b32(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *rsrc1) +{ + struct nv50_program_exec *e = exec(pc); + struct nv50_reg *src1; + + e->inst[0] = 0x20000000; + + alloc_reg(pc, rsrc1); + check_swap_src_0_1(pc, &src0, &rsrc1); + + src1 = rsrc1; + if (src0->mod & rsrc1->mod & NV50_MOD_NEG) { + src1 = temp_temp(pc, e); + emit_cvt(pc, src1, rsrc1, -1, CVT_S32_S32); + } + + if (!pc->allow32 || src1->hw > 63 || + (src1->type != P_TEMP && src1->type != P_IMMD)) + set_long(pc, e); + + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + + if (is_long(e)) { + e->inst[1] |= 1 << 26; + set_src_2(pc, src1, e); + } else { + e->inst[0] |= 0x8000; + if (src1->type == P_IMMD) + set_immd(pc, src1, e); + else + set_src_1(pc, src1, e); + } + + if (src0->mod & NV50_MOD_NEG) + e->inst[0] |= 1 << 28; + else + if (src1->mod & NV50_MOD_NEG) + e->inst[0] |= 1 << 22; + + emit(pc, e); +} + +static void +emit_mad_u16(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, int lh_0, struct nv50_reg *src1, int lh_1, + struct nv50_reg *src2) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x60000000; + if (!pc->allow32) + set_long(pc, e); + set_dst(pc, dst, e); + + set_half_src(pc, src0, lh_0, e, 9); + set_half_src(pc, src1, lh_1, e, 16); + alloc_reg(pc, src2); + if (is_long(e) || (src2->type != P_TEMP) || (src2->hw != dst->hw)) + set_src_2(pc, src2, e); + + emit(pc, e); +} + +static void +emit_mul_u16(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, int lh_0, struct nv50_reg *src1, int lh_1) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x40000000; + set_long(pc, e); + set_dst(pc, dst, e); + + set_half_src(pc, src0, lh_0, e, 9); + set_half_src(pc, src1, lh_1, e, 16); + + emit(pc, e); +} + +static void +emit_sad(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x50000000; + if (!pc->allow32) + set_long(pc, e); + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + alloc_reg(pc, src2); + if (is_long(e) || (src2->type != dst->type) || (src2->hw != dst->hw)) + set_src_2(pc, src2, e); + + if (is_long(e)) + e->inst[1] |= 0x0c << 24; + else + e->inst[0] |= 0x81 << 8; + + emit(pc, e); +} + static INLINE void emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - emit_cvt(pc, dst, src, -1, CVTOP_FLOOR, CVT_F32_F32 | CVT_RI); + emit_cvt(pc, dst, src, -1, CVT_FLOOR | CVT_F32_F32 | CVT_RI); } static void @@ -1231,24 +1492,18 @@ emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, { struct nv50_reg *temp = alloc_temp(pc, NULL); - emit_flop(pc, 3, temp, v); + emit_flop(pc, NV50_FLOP_LG2, temp, v); emit_mul(pc, temp, temp, e); emit_preex2(pc, temp, temp); - emit_flop(pc, 6, dst, temp); + emit_flop(pc, NV50_FLOP_EX2, dst, temp); free_temp(pc, temp); } static INLINE void -emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) -{ - emit_cvt(pc, dst, src, -1, CVTOP_ABS, CVT_F32_F32); -} - -static INLINE void emit_sat(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - emit_cvt(pc, dst, src, -1, CVTOP_SAT, CVT_F32_F32); + emit_cvt(pc, dst, src, -1, CVT_SAT | CVT_F32_F32); } static void @@ -1266,18 +1521,18 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, if (mask & (3 << 1)) { tmp[0] = alloc_temp(pc, NULL); - emit_minmax(pc, 4, tmp[0], src[0], zero); + emit_minmax(pc, NV50_MAX_F32, tmp[0], src[0], zero); } if (mask & (1 << 2)) { set_pred_wr(pc, 1, 0, pc->p->exec_tail); - tmp[1] = temp_temp(pc); - emit_minmax(pc, 4, tmp[1], src[1], zero); + tmp[1] = temp_temp(pc, NULL); + emit_minmax(pc, NV50_MAX_F32, tmp[1], src[1], zero); - tmp[3] = temp_temp(pc); - emit_minmax(pc, 4, tmp[3], src[3], neg128); - emit_minmax(pc, 5, tmp[3], tmp[3], pos128); + tmp[3] = temp_temp(pc, NULL); + emit_minmax(pc, NV50_MAX_F32, tmp[3], src[3], neg128); + emit_minmax(pc, NV50_MIN_F32, tmp[3], tmp[3], pos128); emit_pow(pc, dst[2], tmp[1], tmp[3]); emit_mov(pc, dst[2], zero); @@ -1305,12 +1560,6 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, FREE(one); } -static INLINE void -emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) -{ - emit_cvt(pc, dst, src, -1, CVTOP_RN, CVT_F32_F32 | CVT_NEG); -} - static void emit_kil(struct nv50_pc *pc, struct nv50_reg *src) { @@ -1322,80 +1571,62 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src) set_long(pc, e); /* sets cond code to ALWAYS */ if (src) { - unsigned cvn = CVT_F32_F32; - set_pred(pc, 0x1 /* cc = LT */, r_pred, e); - - if (src->mod & NV50_MOD_NEG) - cvn |= CVT_NEG; - /* write predicate reg */ - emit_cvt(pc, NULL, src, r_pred, CVTOP_RN, cvn); + /* write to predicate reg */ + emit_cvt(pc, NULL, src, r_pred, CVT_F32_F32); } emit(pc, e); } static struct nv50_program_exec * -emit_breakaddr(struct nv50_pc *pc) +emit_control_flow(struct nv50_pc *pc, unsigned op, int pred, unsigned cc) { struct nv50_program_exec *e = exec(pc); - e->inst[0] = 0x40000002; + e->inst[0] = (op << 28) | 2; set_long(pc, e); + if (pred >= 0) + set_pred(pc, cc, pred, e); emit(pc, e); return e; } -static void -emit_break(struct nv50_pc *pc, int pred, unsigned cc) +static INLINE struct nv50_program_exec * +emit_breakaddr(struct nv50_pc *pc) { - struct nv50_program_exec *e = exec(pc); - - e->inst[0] = 0x50000002; - set_long(pc, e); - if (pred >= 0) - set_pred(pc, cc, pred, e); + return emit_control_flow(pc, 0x4, -1, 0); +} - emit(pc, e); +static INLINE void +emit_break(struct nv50_pc *pc, int pred, unsigned cc) +{ + emit_control_flow(pc, 0x5, pred, cc); } -static struct nv50_program_exec * +static INLINE struct nv50_program_exec * emit_joinat(struct nv50_pc *pc) { - struct nv50_program_exec *e = exec(pc); - - e->inst[0] = 0xa0000002; - set_long(pc, e); - - emit(pc, e); - return e; + return emit_control_flow(pc, 0xa, -1, 0); } -static struct nv50_program_exec * +static INLINE struct nv50_program_exec * emit_branch(struct nv50_pc *pc, int pred, unsigned cc) { - struct nv50_program_exec *e = exec(pc); + return emit_control_flow(pc, 0x1, pred, cc); +} - e->inst[0] = 0x10000002; - set_long(pc, e); - if (pred >= 0) - set_pred(pc, cc, pred, e); - emit(pc, e); - return pc->p->exec_tail; +static INLINE struct nv50_program_exec * +emit_call(struct nv50_pc *pc, int pred, unsigned cc) +{ + return emit_control_flow(pc, 0x2, pred, cc); } -static void +static INLINE void emit_ret(struct nv50_pc *pc, int pred, unsigned cc) { - struct nv50_program_exec *e = exec(pc); - - e->inst[0] = 0x30000002; - set_long(pc, e); - if (pred >= 0) - set_pred(pc, cc, pred, e); - - emit(pc, e); + emit_control_flow(pc, 0x3, pred, cc); } #define QOP_ADD 0 @@ -1445,8 +1676,8 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], src[1]->mod |= NV50_MOD_ABS; src[2]->mod |= NV50_MOD_ABS; - emit_minmax(pc, 4, t[2], src[0], src[1]); - emit_minmax(pc, 4, t[2], src[2], t[2]); + emit_minmax(pc, NV50_MAX_F32, t[2], src[0], src[1]); + emit_minmax(pc, NV50_MAX_F32, t[2], src[2], t[2]); src[0]->mod = mod[0]; src[1]->mod = mod[1]; @@ -1458,7 +1689,7 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], if (arg == 4) /* there is no textureProj(samplerCubeShadow) */ emit_mov(pc, t[3], src[3]); - emit_flop(pc, 0, t[2], t[2]); + emit_flop(pc, NV50_FLOP_RCP, t[2], t[2]); emit_mul(pc, t[0], src[0], t[2]); emit_mul(pc, t[1], src[1], t[2]); @@ -1476,7 +1707,7 @@ load_proj_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], t[3]->rhw = src[3]->rhw; emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID)); - emit_flop(pc, 0, t[3], t[3]); + emit_flop(pc, NV50_FLOP_RCP, t[3], t[3]); for (c = 0; c < dim; ++c) { t[c]->rhw = src[c]->rhw; @@ -1490,7 +1721,7 @@ load_proj_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], /* XXX: for some reason the blob sometimes uses MAD * (mad f32 $rX $rY $rZ neg $r63) */ - emit_flop(pc, 0, t[3], src[3]); + emit_flop(pc, NV50_FLOP_RCP, t[3], src[3]); for (c = 0; c < dim; ++c) emit_mul(pc, t[c], src[c], t[3]); if (arg != dim) /* depth reference value */ @@ -1537,7 +1768,13 @@ emit_texlod_sequence(struct nv50_pc *pc, struct nv50_reg *tlod, struct nv50_reg *src, struct nv50_program_exec *tex) { struct nv50_program_exec *join_at; - unsigned i, target = pc->p->exec_size + 7 * 2; + unsigned i, target = pc->p->exec_size + 9 * 2; + + if (pc->p->type != PIPE_SHADER_FRAGMENT) { + emit(pc, tex); + return; + } + pc->allow32 = FALSE; /* Subtract lod of each pixel from lod of top left pixel, jump * texlod insn if result is 0, then repeat for 2 other pixels. @@ -1663,6 +1900,7 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, emit(pc, e); } else if (bias_lod < 0) { + assert(pc->p->type == PIPE_SHADER_FRAGMENT); e->inst[0] |= arg << 22; e->inst[1] |= 0x20000000; /* texbias */ emit_mov(pc, t[arg], src[3]); @@ -1742,6 +1980,21 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e) q = 0x0403c000; m = 0xffff7fff; break; + case 0x2: + case 0x3: + /* ADD, SUB, SUBR b32 */ + m = ~(0x8000 | (127 << 16)); + q = ((e->inst[0] & (~m)) >> 2) | (1 << 26); + break; + case 0x5: + /* SAD */ + m = ~(0x81 << 8); + q = (0x0c << 24) | ((e->inst[0] & (0x7f << 2)) << 12); + break; + case 0x6: + /* MAD u16 */ + q = (e->inst[0] & (0x7f << 2)) << 12; + break; case 0x8: /* INTERP (move centroid, perspective and flat bits) */ m = ~0x03000100; @@ -1778,26 +2031,57 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e) } /* Some operations support an optional negation flag. */ -static boolean -negate_supported(const struct tgsi_full_instruction *insn, int i) +static int +get_supported_mods(const struct tgsi_full_instruction *insn, int i) { switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_COS: case TGSI_OPCODE_DDX: case TGSI_OPCODE_DDY: case TGSI_OPCODE_DP3: case TGSI_OPCODE_DP4: - case TGSI_OPCODE_MUL: + case TGSI_OPCODE_EX2: case TGSI_OPCODE_KIL: - case TGSI_OPCODE_ADD: - case TGSI_OPCODE_SUB: + case TGSI_OPCODE_LG2: case TGSI_OPCODE_MAD: - return TRUE; + case TGSI_OPCODE_MUL: case TGSI_OPCODE_POW: - if (i == 1) - return TRUE; - return FALSE; + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: /* ignored, RSQ = rsqrt(abs(src.x)) */ + case TGSI_OPCODE_SCS: + case TGSI_OPCODE_SIN: + case TGSI_OPCODE_SUB: + return NV50_MOD_NEG; + case TGSI_OPCODE_MAX: + case TGSI_OPCODE_MIN: + case TGSI_OPCODE_INEG: /* tgsi src sign toggle/set would be stupid */ + return NV50_MOD_ABS; + case TGSI_OPCODE_CEIL: + case TGSI_OPCODE_FLR: + case TGSI_OPCODE_TRUNC: + return NV50_MOD_NEG | NV50_MOD_ABS; + case TGSI_OPCODE_F2I: + case TGSI_OPCODE_F2U: + case TGSI_OPCODE_I2F: + case TGSI_OPCODE_U2F: + return NV50_MOD_NEG | NV50_MOD_ABS | NV50_MOD_I32; + case TGSI_OPCODE_UADD: + return NV50_MOD_NEG | NV50_MOD_I32; + case TGSI_OPCODE_SAD: + case TGSI_OPCODE_SHL: + case TGSI_OPCODE_IMAX: + case TGSI_OPCODE_IMIN: + case TGSI_OPCODE_ISHR: + case TGSI_OPCODE_NOT: + case TGSI_OPCODE_UMAD: + case TGSI_OPCODE_UMAX: + case TGSI_OPCODE_UMIN: + case TGSI_OPCODE_UMUL: + case TGSI_OPCODE_USHR: + return NV50_MOD_I32; default: - return FALSE; + return 0; } } @@ -1820,7 +2104,9 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c) case TGSI_OPCODE_DST: return mask & (c ? 0xa : 0x6); case TGSI_OPCODE_EX2: + case TGSI_OPCODE_EXP: case TGSI_OPCODE_LG2: + case TGSI_OPCODE_LOG: case TGSI_OPCODE_POW: case TGSI_OPCODE_RCP: case TGSI_OPCODE_RSQ: @@ -1902,11 +2188,11 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) static struct nv50_reg * tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, - boolean neg) + int mod) { struct nv50_reg *r = NULL; - struct nv50_reg *temp; - unsigned sgn, c, swz; + struct nv50_reg *temp = NULL; + unsigned sgn, c, swz, cvn; if (src->Register.File != TGSI_FILE_CONSTANT) assert(!src->Register.Indirect); @@ -1946,7 +2232,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, r = &pc->immd[src->Register.Index * 4 + c]; break; case TGSI_FILE_SAMPLER: - break; + return NULL; case TGSI_FILE_ADDRESS: r = pc->addr[src->Register.Index * 4 + c]; assert(r); @@ -1961,35 +2247,34 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, break; } + cvn = (mod & NV50_MOD_I32) ? CVT_S32_S32 : CVT_F32_F32; + switch (sgn) { - case TGSI_UTIL_SIGN_KEEP: - break; case TGSI_UTIL_SIGN_CLEAR: - temp = temp_temp(pc); - emit_abs(pc, temp, r); - r = temp; - break; - case TGSI_UTIL_SIGN_TOGGLE: - if (neg) - r->mod = NV50_MOD_NEG; - else { - temp = temp_temp(pc); - emit_neg(pc, temp, r); - r = temp; - } + r->mod = NV50_MOD_ABS; break; case TGSI_UTIL_SIGN_SET: - temp = temp_temp(pc); - emit_cvt(pc, temp, r, -1, CVTOP_ABS, CVT_F32_F32 | CVT_NEG); - r = temp; + r->mod = NV50_MOD_NEG_ABS; + break; + case TGSI_UTIL_SIGN_TOGGLE: + r->mod = NV50_MOD_NEG; break; default: - assert(0); + assert(!r->mod && sgn == TGSI_UTIL_SIGN_KEEP); break; } - if (r && r->acc >= 0 && r != temp) - return reg_instance(pc, r); + if ((r->mod & mod) != r->mod) { + temp = temp_temp(pc, NULL); + emit_cvt(pc, temp, r, -1, cvn); + r->mod = 0; + r = temp; + } else + r->mod |= mod & NV50_MOD_I32; + + assert(r); + if (r->acc >= 0 && r != temp) + return reg_instance(pc, r); /* will clear r->mod */ return r; } @@ -2042,6 +2327,8 @@ nv50_tgsi_dst_revdep(unsigned op, int s, int c) assert(0); return 0x0; } + case TGSI_OPCODE_EXP: + case TGSI_OPCODE_LOG: case TGSI_OPCODE_LIT: case TGSI_OPCODE_SCS: case TGSI_OPCODE_TEX: @@ -2082,6 +2369,8 @@ nv50_kill_branch(struct nv50_pc *pc) if (pc->if_insn[lvl]->next != pc->p->exec_tail) return FALSE; + if (is_immd(pc->p->exec_tail)) + return FALSE; /* if ccode == 'true', the BRA is from an ELSE and the predicate * reg may no longer be valid, since we currently always use $p0 @@ -2149,22 +2438,22 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fs = &inst->Src[i]; unsigned src_mask; - boolean neg_supp; + int mod_supp; src_mask = nv50_tgsi_src_mask(inst, i); - neg_supp = negate_supported(inst, i); + mod_supp = get_supported_mods(inst, i); if (fs->Register.File == TGSI_FILE_SAMPLER) unit = fs->Register.Index; for (c = 0; c < 4; c++) if (src_mask & (1 << c)) - src[i][c] = tgsi_src(pc, c, fs, neg_supp); + src[i][c] = tgsi_src(pc, c, fs, mod_supp); } brdc = temp = pc->r_brdc; if (brdc && brdc->type != P_TEMP) { - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); if (sat) brdc = temp; } else @@ -2173,7 +2462,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!(mask & (1 << c)) || dst[c]->type == P_TEMP) continue; /* rdst[c] = dst[c]; */ /* done above */ - dst[c] = temp_temp(pc); + dst[c] = temp_temp(pc, NULL); } } @@ -2184,7 +2473,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_abs(pc, dst[c], src[0][c]); + emit_cvt(pc, dst[c], src[0][c], -1, + CVT_ABS | CVT_F32_F32); } break; case TGSI_OPCODE_ADD: @@ -2206,8 +2496,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, break; case TGSI_OPCODE_ARL: assert(src[0][0]); - temp = temp_temp(pc); - emit_cvt(pc, temp, src[0][0], -1, CVTOP_FLOOR, CVT_S32_F32); + temp = temp_temp(pc, NULL); + emit_cvt(pc, temp, src[0][0], -1, CVT_FLOOR | CVT_S32_F32); emit_arl(pc, dst[0], temp, 4); break; case TGSI_OPCODE_BGNLOOP: @@ -2215,16 +2505,28 @@ nv50_program_tx_insn(struct nv50_pc *pc, pc->loop_pos[pc->loop_lvl++] = pc->p->exec_size; terminate_mbb(pc); break; + case TGSI_OPCODE_BGNSUB: + assert(!pc->in_subroutine); + pc->in_subroutine = TRUE; + /* probably not necessary, but align to 8 byte boundary */ + if (!is_long(pc->p->exec_tail)) + convert_to_long(pc, pc->p->exec_tail); + break; case TGSI_OPCODE_BRK: assert(pc->loop_lvl > 0); emit_break(pc, -1, 0); break; + case TGSI_OPCODE_CAL: + assert(inst->Label.Label < pc->insn_nr); + emit_call(pc, -1, 0)->param.index = inst->Label.Label; + /* replaced by actual offset in nv50_program_fixup_insns */ + break; case TGSI_OPCODE_CEIL: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; emit_cvt(pc, dst[c], src[0][c], -1, - CVTOP_CEIL, CVT_F32_F32 | CVT_RI); + CVT_CEIL | CVT_F32_F32 | CVT_RI); } break; case TGSI_OPCODE_CMP: @@ -2232,24 +2534,29 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_cvt(pc, NULL, src[0][c], 1, CVTOP_RN, CVT_F32_F32); + emit_cvt(pc, NULL, src[0][c], 1, CVT_F32_F32); emit_mov(pc, dst[c], src[1][c]); set_pred(pc, 0x1, 1, pc->p->exec_tail); /* @SF */ emit_mov(pc, dst[c], src[2][c]); set_pred(pc, 0x6, 1, pc->p->exec_tail); /* @NSF */ } break; + case TGSI_OPCODE_CONT: + assert(pc->loop_lvl > 0); + emit_branch(pc, -1, 0)->param.index = + pc->loop_pos[pc->loop_lvl - 1]; + break; case TGSI_OPCODE_COS: if (mask & 8) { emit_precossin(pc, temp, src[0][3]); - emit_flop(pc, 5, dst[3], temp); + emit_flop(pc, NV50_FLOP_COS, dst[3], temp); if (!(mask &= 7)) break; if (temp == dst[3]) - temp = brdc = temp_temp(pc); + temp = brdc = temp_temp(pc, NULL); } emit_precossin(pc, temp, src[0][0]); - emit_flop(pc, 5, brdc, temp); + emit_flop(pc, NV50_FLOP_COS, brdc, temp); break; case TGSI_OPCODE_DDX: for (c = 0; c < 4; c++) { @@ -2321,9 +2628,56 @@ nv50_program_tx_insn(struct nv50_pc *pc, pc->loop_brka[pc->loop_lvl]->param.index = pc->p->exec_size; terminate_mbb(pc); break; + case TGSI_OPCODE_ENDSUB: + assert(pc->in_subroutine); + pc->in_subroutine = FALSE; + break; case TGSI_OPCODE_EX2: emit_preex2(pc, temp, src[0][0]); - emit_flop(pc, 6, brdc, temp); + emit_flop(pc, NV50_FLOP_EX2, brdc, temp); + break; + case TGSI_OPCODE_EXP: + { + struct nv50_reg *t[2]; + + assert(!temp); + t[0] = temp_temp(pc, NULL); + t[1] = temp_temp(pc, NULL); + + if (mask & 0x6) + emit_mov(pc, t[0], src[0][0]); + if (mask & 0x3) + emit_flr(pc, t[1], src[0][0]); + + if (mask & (1 << 1)) + emit_sub(pc, dst[1], t[0], t[1]); + if (mask & (1 << 0)) { + emit_preex2(pc, t[1], t[1]); + emit_flop(pc, NV50_FLOP_EX2, dst[0], t[1]); + } + if (mask & (1 << 2)) { + emit_preex2(pc, t[0], t[0]); + emit_flop(pc, NV50_FLOP_EX2, dst[2], t[0]); + } + if (mask & (1 << 3)) + emit_mov_immdval(pc, dst[3], 1.0f); + } + break; + case TGSI_OPCODE_F2I: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, dst[c], src[0][c], -1, + CVT_TRUNC | CVT_S32_F32); + } + break; + case TGSI_OPCODE_F2U: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, dst[c], src[0][c], -1, + CVT_TRUNC | CVT_U32_F32); + } break; case TGSI_OPCODE_FLR: for (c = 0; c < 4; c++) { @@ -2333,7 +2687,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, } break; case TGSI_OPCODE_FRC: - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; @@ -2341,14 +2695,42 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_sub(pc, dst[c], src[0][c], temp); } break; + case TGSI_OPCODE_I2F: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, dst[c], src[0][c], -1, CVT_F32_S32); + } + break; case TGSI_OPCODE_IF: assert(pc->if_lvl < NV50_MAX_COND_NESTING); - emit_cvt(pc, NULL, src[0][0], 0, CVTOP_ABS | CVTOP_RN, - CVT_F32_F32); + emit_cvt(pc, NULL, src[0][0], 0, CVT_ABS | CVT_F32_F32); pc->if_join[pc->if_lvl] = emit_joinat(pc); pc->if_insn[pc->if_lvl++] = emit_branch(pc, 0, 2);; terminate_mbb(pc); break; + case TGSI_OPCODE_IMAX: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_minmax(pc, 0x08c, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_IMIN: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_minmax(pc, 0x0ac, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_INEG: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, dst[c], src[0][c], -1, + CVT_S32_S32 | CVT_NEG); + } + break; case TGSI_OPCODE_KIL: assert(src[0][0] && src[0][1] && src[0][2] && src[0][3]); emit_kil(pc, src[0][0]); @@ -2363,10 +2745,38 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_lit(pc, &dst[0], mask, &src[0][0]); break; case TGSI_OPCODE_LG2: - emit_flop(pc, 3, brdc, src[0][0]); + emit_flop(pc, NV50_FLOP_LG2, brdc, src[0][0]); + break; + case TGSI_OPCODE_LOG: + { + struct nv50_reg *t[2]; + + t[0] = temp_temp(pc, NULL); + if (mask & (1 << 1)) + t[1] = temp_temp(pc, NULL); + else + t[1] = t[0]; + + emit_cvt(pc, t[0], src[0][0], -1, CVT_ABS | CVT_F32_F32); + emit_flop(pc, NV50_FLOP_LG2, t[1], t[0]); + if (mask & (1 << 2)) + emit_mov(pc, dst[2], t[1]); + emit_flr(pc, t[1], t[1]); + if (mask & (1 << 0)) + emit_mov(pc, dst[0], t[1]); + if (mask & (1 << 1)) { + t[1]->mod = NV50_MOD_NEG; + emit_preex2(pc, t[1], t[1]); + t[1]->mod = 0; + emit_flop(pc, NV50_FLOP_EX2, t[1], t[1]); + emit_mul(pc, dst[1], t[0], t[1]); + } + if (mask & (1 << 3)) + emit_mov_immdval(pc, dst[3], 1.0f); + } break; case TGSI_OPCODE_LRP: - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; @@ -2385,14 +2795,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); + emit_minmax(pc, 0x880, dst[c], src[0][c], src[1][c]); } break; case TGSI_OPCODE_MIN: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); + emit_minmax(pc, 0x8a0, dst[c], src[0][c], src[1][c]); } break; case TGSI_OPCODE_MOV: @@ -2409,44 +2819,73 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_mul(pc, dst[c], src[0][c], src[1][c]); } break; + case TGSI_OPCODE_NOT: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_not(pc, dst[c], src[0][c]); + } + break; case TGSI_OPCODE_POW: emit_pow(pc, brdc, src[0][0], src[1][0]); break; case TGSI_OPCODE_RCP: - emit_flop(pc, 0, brdc, src[0][0]); + if (!sat && popcnt4(mask) == 1) + brdc = dst[ffs(mask) - 1]; + emit_flop(pc, NV50_FLOP_RCP, brdc, src[0][0]); break; case TGSI_OPCODE_RET: - if (pc->p->type == PIPE_SHADER_FRAGMENT) + if (pc->p->type == PIPE_SHADER_FRAGMENT && !pc->in_subroutine) nv50_fp_move_results(pc); emit_ret(pc, -1, 0); break; case TGSI_OPCODE_RSQ: - emit_flop(pc, 2, brdc, src[0][0]); + if (!sat && popcnt4(mask) == 1) + brdc = dst[ffs(mask) - 1]; + src[0][0]->mod |= NV50_MOD_ABS; + emit_flop(pc, NV50_FLOP_RSQ, brdc, src[0][0]); + break; + case TGSI_OPCODE_SAD: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_sad(pc, dst[c], src[0][c], src[1][c], src[2][c]); + } break; case TGSI_OPCODE_SCS: - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); if (mask & 3) emit_precossin(pc, temp, src[0][0]); if (mask & (1 << 0)) - emit_flop(pc, 5, dst[0], temp); + emit_flop(pc, NV50_FLOP_COS, dst[0], temp); if (mask & (1 << 1)) - emit_flop(pc, 4, dst[1], temp); + emit_flop(pc, NV50_FLOP_SIN, dst[1], temp); if (mask & (1 << 2)) emit_mov_immdval(pc, dst[2], 0.0); if (mask & (1 << 3)) emit_mov_immdval(pc, dst[3], 1.0); break; + case TGSI_OPCODE_SHL: + case TGSI_OPCODE_ISHR: + case TGSI_OPCODE_USHR: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_shift(pc, dst[c], src[0][c], src[1][c], + inst->Instruction.Opcode); + } + break; case TGSI_OPCODE_SIN: if (mask & 8) { emit_precossin(pc, temp, src[0][3]); - emit_flop(pc, 4, dst[3], temp); + emit_flop(pc, NV50_FLOP_SIN, dst[3], temp); if (!(mask &= 7)) break; if (temp == dst[3]) - temp = brdc = temp_temp(pc); + temp = brdc = temp_temp(pc, NULL); } emit_precossin(pc, temp, src[0][0]); - emit_flop(pc, 4, brdc, temp); + emit_flop(pc, NV50_FLOP_SIN, brdc, temp); break; case TGSI_OPCODE_SLT: case TGSI_OPCODE_SGE: @@ -2454,12 +2893,23 @@ nv50_program_tx_insn(struct nv50_pc *pc, case TGSI_OPCODE_SGT: case TGSI_OPCODE_SLE: case TGSI_OPCODE_SNE: - i = map_tgsi_setop_cc(inst->Instruction.Opcode); + case TGSI_OPCODE_ISLT: + case TGSI_OPCODE_ISGE: + case TGSI_OPCODE_USEQ: + case TGSI_OPCODE_USGE: + case TGSI_OPCODE_USLT: + case TGSI_OPCODE_USNE: + { + uint8_t cc, ty; + + map_tgsi_setop_hw(inst->Instruction.Opcode, &cc, &ty); + for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_set(pc, i, dst[c], -1, src[0][c], src[1][c]); + emit_set(pc, cc, dst[c], -1, src[0][c], src[1][c], ty); } + } break; case TGSI_OPCODE_SUB: for (c = 0; c < 4; c++) { @@ -2489,11 +2939,72 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!(mask & (1 << c))) continue; emit_cvt(pc, dst[c], src[0][c], -1, - CVTOP_TRUNC, CVT_F32_F32 | CVT_RI); + CVT_TRUNC | CVT_F32_F32 | CVT_RI); + } + break; + case TGSI_OPCODE_U2F: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, dst[c], src[0][c], -1, CVT_F32_U32); + } + break; + case TGSI_OPCODE_UADD: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_add_b32(pc, dst[c], src[0][c], src[1][c]); } break; + case TGSI_OPCODE_UMAX: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_minmax(pc, 0x084, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_UMIN: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_minmax(pc, 0x0a4, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_UMAD: + { + assert(!temp); + temp = temp_temp(pc, NULL); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mul_u16(pc, temp, src[0][c], 0, src[1][c], 1); + emit_mad_u16(pc, temp, src[0][c], 1, src[1][c], 0, + temp); + emit_shl_imm(pc, temp, temp, 16); + emit_mad_u16(pc, temp, src[0][c], 0, src[1][c], 0, + temp); + emit_add_b32(pc, dst[c], temp, src[2][c]); + } + } + break; + case TGSI_OPCODE_UMUL: + { + assert(!temp); + temp = temp_temp(pc, NULL); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mul_u16(pc, temp, src[0][c], 0, src[1][c], 1); + emit_mad_u16(pc, temp, src[0][c], 1, src[1][c], 0, + temp); + emit_shl_imm(pc, temp, temp, 16); + emit_mad_u16(pc, dst[c], src[0][c], 0, src[1][c], 0, + temp); + } + } + break; case TGSI_OPCODE_XPD: - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); if (mask & (1 << 0)) { emit_mul(pc, temp, src[0][2], src[1][1]); emit_msb(pc, dst[0], src[0][1], src[1][2], temp); @@ -2510,6 +3021,17 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_mov_immdval(pc, dst[3], 1.0); break; case TGSI_OPCODE_END: + if (pc->p->type == PIPE_SHADER_FRAGMENT) + nv50_fp_move_results(pc); + + /* last insn must be long so it can have the exit bit set */ + if (!is_long(pc->p->exec_tail)) + convert_to_long(pc, pc->p->exec_tail); + else + if (is_immd(pc->p->exec_tail) || is_join(pc->p->exec_tail)) + emit_nop(pc); + + pc->p->exec_tail->inst[1] |= 1; /* set exit bit */ break; default: NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); @@ -2536,7 +3058,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, } } - kill_temp_temp(pc); + kill_temp_temp(pc, NULL); pc->reg_instance_nr = 0; return TRUE; @@ -2545,7 +3067,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, static void prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) { - struct nv50_reg *reg = NULL; + struct nv50_reg *r, *reg = NULL; const struct tgsi_full_src_register *src; const struct tgsi_dst_register *dst; unsigned i, c, k, mask; @@ -2554,10 +3076,16 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) mask = dst->WriteMask; if (dst->File == TGSI_FILE_TEMPORARY) - reg = pc->temp; + reg = pc->temp; else - if (dst->File == TGSI_FILE_OUTPUT) - reg = pc->result; + if (dst->File == TGSI_FILE_OUTPUT) { + reg = pc->result; + + if (insn->Instruction.Opcode == TGSI_OPCODE_MOV && + dst->Index == pc->edgeflag_out && + insn->Src[0].Register.File == TGSI_FILE_INPUT) + pc->p->cfg.edgeflag_in = insn->Src[0].Register.Index; + } if (reg) { for (c = 0; c < 4; c++) { @@ -2585,7 +3113,15 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) continue; k = tgsi_util_get_full_src_register_swizzle(src, c); - reg[src->Register.Index * 4 + k].acc = pc->insn_nr; + r = ®[src->Register.Index * 4 + k]; + + /* If used before written, pre-allocate the reg, + * lest we overwrite results from a subroutine. + */ + if (!r->acc && r->type == P_TEMP) + alloc_reg(pc, r); + + r->acc = pc->insn_nr; } } } @@ -2674,7 +3210,7 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, for (i = 0; i < insn->Instruction.NumSrcRegs; i++) { unsigned chn, mask = nv50_tgsi_src_mask(insn, i); - boolean neg_supp = negate_supported(insn, i); + int ms = get_supported_mods(insn, i); fs = &insn->Src[i]; if (fs->Register.File != fd->Register.File || @@ -2692,10 +3228,12 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, if (!(fd->Register.WriteMask & (1 << c))) continue; - /* no danger if src is copied to TEMP first */ - if ((s != TGSI_UTIL_SIGN_KEEP) && - (s != TGSI_UTIL_SIGN_TOGGLE || !neg_supp)) - continue; + if (s == TGSI_UTIL_SIGN_TOGGLE && !(ms & NV50_MOD_NEG)) + continue; + if (s == TGSI_UTIL_SIGN_CLEAR && !(ms & NV50_MOD_ABS)) + continue; + if ((s == TGSI_UTIL_SIGN_SET) && ((ms & 3) != 3)) + continue; rdep[c] |= nv50_tgsi_dst_revdep( insn->Instruction.Opcode, i, chn); @@ -2719,12 +3257,12 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) if (is_scalar_op(insn.Instruction.Opcode)) { pc->r_brdc = tgsi_broadcast_dst(pc, fd, deqs); if (!pc->r_brdc) - pc->r_brdc = temp_temp(pc); + pc->r_brdc = temp_temp(pc, NULL); return nv50_program_tx_insn(pc, &insn); } pc->r_brdc = NULL; - if (!deqs) + if (!deqs || (!rdep[0] && !rdep[1] && !rdep[2] && !rdep[3])) return nv50_program_tx_insn(pc, &insn); deqs = nv50_revdep_reorder(m, rdep); @@ -2775,7 +3313,7 @@ load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg) iv->rhw = popcnt4(pc->p->cfg.regs[1] >> 24) - 1; emit_interp(pc, iv, NULL, mode & INTERP_CENTROID); - emit_flop(pc, 0, iv, iv); + emit_flop(pc, NV50_FLOP_RCP, iv, iv); /* XXX: when loading interpolants dynamically, move these * to the program head, or make sure it can't be skipped. @@ -2856,6 +3394,9 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (p->cfg.io_nr > first) p->cfg.io_nr = first; break; + case TGSI_SEMANTIC_EDGEFLAG: + pc->edgeflag_out = first; + break; /* case TGSI_SEMANTIC_CLIP_DISTANCE: p->cfg.clpd = MIN2(p->cfg.clpd, first); @@ -3081,6 +3622,8 @@ free_nv50_pc(struct nv50_pc *pc) FREE(pc->attr); if (pc->temp) FREE(pc->temp); + if (pc->insn_pos) + FREE(pc->insn_pos); FREE(pc); } @@ -3104,6 +3647,8 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) p->cfg.two_side[0].hw = 0x40; p->cfg.two_side[1].hw = 0x40; + p->cfg.edgeflag_in = pc->edgeflag_out = 0xff; + switch (p->type) { case PIPE_SHADER_VERTEX: p->cfg.psiz = 0x40; @@ -3192,16 +3737,6 @@ nv50_program_fixup_insns(struct nv50_pc *pc) if (e->param.index >= 0 && !e->param.mask) bra_list[n++] = e; - /* last instruction must be long so it can have the exit bit set */ - if (!is_long(pc->p->exec_tail)) - convert_to_long(pc, pc->p->exec_tail); - /* set exit bit */ - pc->p->exec_tail->inst[1] |= 1; - - /* !immd on exit insn simultaneously means !join */ - assert(!is_immd(pc->p->exec_head)); - assert(!is_immd(pc->p->exec_tail)); - /* Make sure we don't have any single 32 bit instructions. */ for (e = pc->p->exec_head, pos = 0; e; e = e->next) { pos += is_long(e) ? 2 : 1; @@ -3210,12 +3745,24 @@ nv50_program_fixup_insns(struct nv50_pc *pc) for (i = 0; i < n; ++i) if (bra_list[i]->param.index >= pos) bra_list[i]->param.index += 1; + for (i = 0; i < pc->insn_nr; ++i) + if (pc->insn_pos[i] >= pos) + pc->insn_pos[i] += 1; convert_to_long(pc, e); ++pos; } } FREE(bra_list); + + if (!pc->p->info.opcode_count[TGSI_OPCODE_CAL]) + return; + + /* fill in CALL offsets */ + for (e = pc->p->exec_head; e; e = e->next) { + if ((e->inst[0] & 2) && (e->inst[0] >> 28) == 0x2) + e->param.index = pc->insn_pos[e->param.index]; + } } static boolean @@ -3237,19 +3784,20 @@ nv50_program_tx(struct nv50_program *p) if (ret == FALSE) goto out_cleanup; + pc->insn_pos = MALLOC(pc->insn_nr * sizeof(unsigned)); + tgsi_parse_init(&parse, pc->p->pipe.tokens); while (!tgsi_parse_end_of_tokens(&parse)) { const union tgsi_full_token *tok = &parse.FullToken; - /* don't allow half insn/immd on first and last instruction */ + /* previously allow32 was FALSE for first & last instruction */ pc->allow32 = TRUE; - if (pc->insn_cur == 0 || pc->insn_cur + 2 == pc->insn_nr) - pc->allow32 = FALSE; tgsi_parse_token(&parse); switch (tok->Token.Type) { case TGSI_TOKEN_TYPE_INSTRUCTION: + pc->insn_pos[pc->insn_cur] = pc->p->exec_size; ++pc->insn_cur; ret = nv50_tgsi_insn(pc, tok); if (ret == FALSE) @@ -3260,9 +3808,6 @@ nv50_program_tx(struct nv50_program *p) } } - if (pc->p->type == PIPE_SHADER_FRAGMENT) - nv50_fp_move_results(pc); - nv50_program_fixup_insns(pc); p->param_nr = pc->param_nr * 4; @@ -3434,7 +3979,7 @@ nv50_vertprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(13, 2); + so = so_new(5, 8, 2); so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); @@ -3470,7 +4015,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(64, 2); + so = so_new(6, 7, 2); so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); @@ -3480,7 +4025,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_data (so, p->cfg.high_temp); so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1); so_data (so, p->cfg.high_result); - so_method(so, tesla, NV50TCL_FP_CTRL_UNK19A8, 1); + so_method(so, tesla, NV50TCL_FP_CONTROL, 1); so_data (so, p->cfg.regs[2]); so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1); so_data (so, p->cfg.regs[3]); @@ -3490,12 +4035,13 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_ref(NULL, &so); } -static void +static uint32_t nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) { struct nv50_program *fp = nv50->fragprog; struct nv50_program *vp = nv50->vertprog; unsigned i, c, m = base; + uint32_t origin = 0x00000010; /* XXX: this might not work correctly in all cases yet - we'll * just assume that an FP generic input that is not written in @@ -3529,7 +4075,9 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) if (mode == PIPE_SPRITE_COORD_NONE) { m += n; continue; - } + } else + if (mode == PIPE_SPRITE_COORD_LOWER_LEFT) + origin = 0; } /* this is either PointCoord or replaced by sprite coords */ @@ -3540,6 +4088,7 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) ++m; } } + return origin; } static int @@ -3638,7 +4187,7 @@ nv50_linkage_validate(struct nv50_context *nv50) } /* now fill the stateobj */ - so = so_new(64, 0); + so = so_new(7, 57, 0); n = (m + 3) / 4; so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); @@ -3652,11 +4201,13 @@ nv50_linkage_validate(struct nv50_context *nv50) so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1); so_data (so, reg[4]); - so_method(so, tesla, 0x1540, 4); + so_method(so, tesla, NV50TCL_NOPERSPECTIVE_BITMAP(0), 4); so_datap (so, lin, 4); if (nv50->rasterizer->pipe.point_sprite) { - nv50_pntc_replace(nv50, pcrd, (reg[4] >> 8) & 0xff); + so_method(so, tesla, NV50TCL_POINT_SPRITE_CTRL, 1); + so_data (so, + nv50_pntc_replace(nv50, pcrd, (reg[4] >> 8) & 0xff)); so_method(so, tesla, NV50TCL_POINT_COORD_REPLACE_MAP(0), 8); so_datap (so, pcrd, 8); diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 4a90c372ce..461fec1d89 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -58,6 +58,7 @@ struct nv50_program { /* VP only */ uint8_t clpd, clpd_nr; uint8_t psiz; + uint8_t edgeflag_in; } cfg; }; diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c index 268c9823f7..5a4ab3508b 100644 --- a/src/gallium/drivers/nv50/nv50_query.c +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -77,9 +77,9 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_query *q = nv50_query(pq); - BEGIN_RING(chan, tesla, 0x1530, 1); + BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_RESET, 1); OUT_RING (chan, 1); - BEGIN_RING(chan, tesla, 0x1514, 1); + BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_ENABLE, 1); OUT_RING (chan, 1); q->ready = FALSE; @@ -111,7 +111,7 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, if (!q->ready) { ret = nouveau_bo_map(q->bo, NOUVEAU_BO_RD | - wait ? 0 : NOUVEAU_BO_NOWAIT); + (wait ? 0 : NOUVEAU_BO_NOWAIT)); if (ret) return false; q->result = ((uint32_t *)q->bo->map)[1]; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index d443ca3ad0..28e2b35dea 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -128,7 +128,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param) case PIPE_CAP_TEXTURE_MIRROR_REPEAT: return 1; case PIPE_CAP_TGSI_CONT_SUPPORTED: - return 0; + return 1; case PIPE_CAP_BLEND_EQUATION_SEPARATE: return 1; case NOUVEAU_CAP_HW_VTXBUF: @@ -165,6 +165,21 @@ static void nv50_screen_destroy(struct pipe_screen *pscreen) { struct nv50_screen *screen = nv50_screen(pscreen); + unsigned i; + + for (i = 0; i < 2; i++) { + if (screen->constbuf_parm[i]) + nouveau_bo_ref(NULL, &screen->constbuf_parm[i]); + } + + if (screen->constbuf_misc[0]) + nouveau_bo_ref(NULL, &screen->constbuf_misc[0]); + if (screen->tic) + nouveau_bo_ref(NULL, &screen->tic); + if (screen->tsc) + nouveau_bo_ref(NULL, &screen->tsc); + if (screen->static_init) + so_ref(NULL, &screen->static_init); nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->tesla); @@ -174,6 +189,28 @@ nv50_screen_destroy(struct pipe_screen *pscreen) FREE(screen); } +static int +nv50_pre_pipebuffer_map(struct pipe_screen *pscreen, struct pipe_buffer *pb, + unsigned usage) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + struct nv50_context *ctx = screen->cur_ctx; + + if (!(pb->usage & PIPE_BUFFER_USAGE_VERTEX)) + return 0; + + /* Our vtxbuf got mapped, it can no longer be considered part of current + * state, remove it to avoid emitting reloc markers. + */ + if (ctx && ctx->state.vtxbuf && so_bo_is_reloc(ctx->state.vtxbuf, + nouveau_bo(pb))) { + so_ref(NULL, &ctx->state.vtxbuf); + ctx->dirty |= NV50_NEW_ARRAYS; + } + + return 0; +} + struct pipe_screen * nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) { @@ -201,6 +238,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) pscreen->get_param = nv50_screen_get_param; pscreen->get_paramf = nv50_screen_get_paramf; pscreen->is_format_supported = nv50_screen_is_format_supported; + screen->base.pre_pipebuffer_map_callback = nv50_pre_pipebuffer_map; nv50_screen_init_miptree_functions(pscreen); nv50_transfer_init_screen_functions(pscreen); @@ -213,7 +251,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nv50_screen_destroy(pscreen); return NULL; } - BIND_RING(chan, screen->m2mf, 1); /* 2D object */ ret = nouveau_grobj_alloc(chan, 0xbeef502d, NV50_2D, &screen->eng2d); @@ -222,7 +259,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nv50_screen_destroy(pscreen); return NULL; } - BIND_RING(chan, screen->eng2d, 2); /* 3D object */ switch (chipset & 0xf0) { @@ -231,8 +267,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) break; case 0x80: case 0x90: - /* this stupid name should be corrected. */ - tesla_class = NV54TCL; + tesla_class = NV84TCL; break; case 0xa0: switch (chipset) { @@ -242,7 +277,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) tesla_class = NVA0TCL; break; default: - tesla_class = 0x8597; + tesla_class = NVA8TCL; break; } break; @@ -259,7 +294,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nv50_screen_destroy(pscreen); return NULL; } - BIND_RING(chan, screen->tesla, 3); /* Sync notifier */ ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync); @@ -270,7 +304,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) } /* Static M2MF init */ - so = so_new(32, 0); + so = so_new(1, 3, 0); so_method(so, screen->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 3); so_data (so, screen->sync->handle); so_data (so, chan->vram->handle); @@ -279,7 +313,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_ref (NULL, &so); /* Static 2D init */ - so = so_new(64, 0); + so = so_new(4, 7, 0); so_method(so, screen->eng2d, NV50_2D_DMA_NOTIFY, 4); so_data (so, screen->sync->handle); so_data (so, chan->vram->handle); @@ -287,7 +321,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_data (so, chan->vram->handle); so_method(so, screen->eng2d, NV50_2D_OPERATION, 1); so_data (so, NV50_2D_OPERATION_SRCCOPY); - so_method(so, screen->eng2d, 0x0290, 1); + so_method(so, screen->eng2d, NV50_2D_CLIP_ENABLE, 1); so_data (so, 0); so_method(so, screen->eng2d, 0x0888, 1); so_data (so, 1); @@ -295,36 +329,35 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_ref(NULL, &so); /* Static tesla init */ - so = so_new(256, 20); + so = so_new(40, 84, 20); - so_method(so, screen->tesla, 0x1558, 1); - so_data (so, 1); + so_method(so, screen->tesla, NV50TCL_COND_MODE, 1); + so_data (so, NV50TCL_COND_MODE_ALWAYS); so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1); so_data (so, screen->sync->handle); - so_method(so, screen->tesla, NV50TCL_DMA_UNK0(0), - NV50TCL_DMA_UNK0__SIZE); - for (i = 0; i < NV50TCL_DMA_UNK0__SIZE; i++) + so_method(so, screen->tesla, NV50TCL_DMA_ZETA, 11); + for (i = 0; i < 11; i++) so_data(so, chan->vram->handle); - so_method(so, screen->tesla, NV50TCL_DMA_UNK1(0), - NV50TCL_DMA_UNK1__SIZE); - for (i = 0; i < NV50TCL_DMA_UNK1__SIZE; i++) + so_method(so, screen->tesla, NV50TCL_DMA_COLOR(0), + NV50TCL_DMA_COLOR__SIZE); + for (i = 0; i < NV50TCL_DMA_COLOR__SIZE; i++) so_data(so, chan->vram->handle); - so_method(so, screen->tesla, 0x121c, 1); + so_method(so, screen->tesla, NV50TCL_RT_CONTROL, 1); so_data (so, 1); /* activate all 32 lanes (threads) in a warp */ - so_method(so, screen->tesla, 0x19a0, 1); + so_method(so, screen->tesla, NV50TCL_WARP_HALVES, 1); so_data (so, 0x2); so_method(so, screen->tesla, 0x1400, 1); so_data (so, 0xf); /* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */ - so_method(so, screen->tesla, 0x13b4, 1); + so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(0), 1); so_data (so, 0x54); - so_method(so, screen->tesla, 0x13bc, 1); + so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(2), 1); so_data (so, 0x54); /* origin is top left (set to 1 for bottom left) */ - so_method(so, screen->tesla, 0x13ac, 1); + so_method(so, screen->tesla, NV50TCL_Y_ORIGIN_BOTTOM, 1); so_data (so, 0); so_method(so, screen->tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1); so_data (so, 8); @@ -360,7 +393,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) // B = buffer ID (maybe more than 1 byte) // N = CB index used in shader instruction // P = program type (0 = VP, 2 = GP, 3 = FP) - so_method(so, screen->tesla, 0x1694, 1); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x000BBNP1); */ @@ -424,23 +457,26 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) /* Vertex array limits - max them out */ for (i = 0; i < 16; i++) { - so_method(so, screen->tesla, NV50TCL_UNK1080_OFFSET_HIGH(i), 2); + so_method(so, screen->tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2); so_data (so, 0x000000ff); so_data (so, 0xffffffff); } - so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR, 2); + so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR(0), 2); so_data (so, fui(0.0)); so_data (so, fui(1.0)); /* no dynamic combination of TIC & TSC entries => only BIND_TIC used */ - so_method(so, screen->tesla, 0x1234, 1); + so_method(so, screen->tesla, NV50TCL_LINKED_TSC, 1); so_data (so, 1); /* activate first scissor rectangle */ - so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE, 1); + so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE(0), 1); so_data (so, 1); + so_method(so, screen->tesla, NV50TCL_EDGEFLAG_ENABLE, 1); + so_data (so, 1); /* default edgeflag to TRUE */ + so_emit(chan, so); so_ref (so, &screen->static_init); so_ref (NULL, &so); diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index 61e24a5b57..a038a4e3c2 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -2,6 +2,7 @@ #define __NV50_SCREEN_H__ #include "nouveau/nouveau_screen.h" +#include "nv50_context.h" struct nv50_screen { struct nouveau_screen base; @@ -9,6 +10,7 @@ struct nv50_screen { struct nouveau_winsys *nvws; unsigned cur_pctx; + struct nv50_context *cur_ctx; struct nouveau_grobj *tesla; struct nouveau_grobj *eng2d; diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index d609b4cbc6..1bbbbdd5f0 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -35,7 +35,7 @@ static void * nv50_blend_state_create(struct pipe_context *pipe, const struct pipe_blend_state *cso) { - struct nouveau_stateobj *so = so_new(64, 0); + struct nouveau_stateobj *so = so_new(5, 24, 0); struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; struct nv50_blend_stateobj *bso = CALLOC_STRUCT(nv50_blend_stateobj); unsigned cmask = 0, i; @@ -146,7 +146,6 @@ nv50_sampler_state_create(struct pipe_context *pipe, (wrap_mode(cso->wrap_r) << 6)); switch (cso->mag_img_filter) { - case PIPE_TEX_FILTER_ANISO: case PIPE_TEX_FILTER_LINEAR: tsc[1] |= NV50TSC_1_1_MAGF_LINEAR; break; @@ -157,7 +156,6 @@ nv50_sampler_state_create(struct pipe_context *pipe, } switch (cso->min_img_filter) { - case PIPE_TEX_FILTER_ANISO: case PIPE_TEX_FILTER_LINEAR: tsc[1] |= NV50TSC_1_1_MINF_LINEAR; break; @@ -280,7 +278,7 @@ static void * nv50_rasterizer_state_create(struct pipe_context *pipe, const struct pipe_rasterizer_state *cso) { - struct nouveau_stateobj *so = so_new(64, 0); + struct nouveau_stateobj *so = so_new(15, 21, 0); struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; struct nv50_rasterizer_stateobj *rso = CALLOC_STRUCT(nv50_rasterizer_stateobj); @@ -295,7 +293,7 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, so_method(so, tesla, NV50TCL_SHADE_MODEL, 1); so_data (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT : NV50TCL_SHADE_MODEL_SMOOTH); - so_method(so, tesla, 0x1684, 1); + so_method(so, tesla, NV50TCL_PROVOKING_VERTEX_LAST, 1); so_data (so, cso->flatshade_first ? 0 : 1); so_method(so, tesla, NV50TCL_VERTEX_TWO_SIDE_ENABLE, 1); @@ -392,7 +390,7 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, so_method(so, tesla, NV50TCL_POLYGON_OFFSET_FACTOR, 1); so_data (so, fui(cso->offset_scale)); so_method(so, tesla, NV50TCL_POLYGON_OFFSET_UNITS, 1); - so_data (so, fui(cso->offset_units)); + so_data (so, fui(cso->offset_units * 2.0f)); } rso->pipe = *cso; @@ -425,7 +423,7 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, { struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; struct nv50_zsa_stateobj *zsa = CALLOC_STRUCT(nv50_zsa_stateobj); - struct nouveau_stateobj *so = so_new(64, 0); + struct nouveau_stateobj *so = so_new(8, 22, 0); so_method(so, tesla, NV50TCL_DEPTH_WRITE_ENABLE, 1); so_data (so, cso->depth.writemask ? 1 : 0); @@ -439,9 +437,8 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, 0); } - /* XXX: keep hex values until header is updated (names reversed) */ if (cso->stencil[0].enabled) { - so_method(so, tesla, 0x1380, 8); + so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 8); so_data (so, 1); so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); @@ -451,23 +448,23 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, cso->stencil[0].writemask); so_data (so, cso->stencil[0].valuemask); } else { - so_method(so, tesla, 0x1380, 1); + so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 1); so_data (so, 0); } if (cso->stencil[1].enabled) { - so_method(so, tesla, 0x1594, 5); + so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 5); so_data (so, 1); so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); so_data (so, nvgl_comparison_op(cso->stencil[1].func)); - so_method(so, tesla, 0x0f54, 3); + so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 3); so_data (so, cso->stencil[1].ref_value); so_data (so, cso->stencil[1].writemask); so_data (so, cso->stencil[1].valuemask); } else { - so_method(so, tesla, 0x1594, 1); + so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 1); so_data (so, 0); } diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 871e8097b6..f83232f43c 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -33,7 +33,7 @@ static void nv50_state_validate_fb(struct nv50_context *nv50) { struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_stateobj *so = so_new(128, 18); + struct nouveau_stateobj *so = so_new(32, 79, 18); struct pipe_framebuffer_state *fb = &nv50->framebuffer; unsigned i, w, h, gw = 0; @@ -41,7 +41,7 @@ nv50_state_validate_fb(struct nv50_context *nv50) * FP result 0 always goes to RT[0], bits 4 - 6 are ignored. * Ambiguous assignment results in no rendering (no DATA_ERROR). */ - so_method(so, tesla, 0x121c, 1); + so_method(so, tesla, NV50TCL_RT_CONTROL, 1); so_data (so, fb->nr_cbufs | (0 << 4) | (1 << 7) | (2 << 10) | (3 << 13) | (4 << 16) | (5 << 19) | (6 << 22) | (7 << 25)); @@ -87,7 +87,7 @@ nv50_state_validate_fb(struct nv50_context *nv50) level[fb->cbufs[i]->level].tile_mode << 4); so_data(so, 0x00000000); - so_method(so, tesla, 0x1224, 1); + so_method(so, tesla, NV50TCL_RT_ARRAY_MODE, 1); so_data (so, 1); } @@ -124,22 +124,22 @@ nv50_state_validate_fb(struct nv50_context *nv50) level[fb->zsbuf->level].tile_mode << 4); so_data(so, 0x00000000); - so_method(so, tesla, 0x1538, 1); + so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1); so_data (so, 1); so_method(so, tesla, NV50TCL_ZETA_HORIZ, 3); so_data (so, fb->zsbuf->width); so_data (so, fb->zsbuf->height); so_data (so, 0x00010001); } else { - so_method(so, tesla, 0x1538, 1); + so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1); so_data (so, 0); } - so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2); + so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ(0), 2); so_data (so, w << 16); so_data (so, h << 16); /* set window lower left corner */ - so_method(so, tesla, NV50TCL_WINDOW_LEFT, 2); + so_method(so, tesla, NV50TCL_WINDOW_OFFSET_X, 2); so_data (so, 0); so_data (so, 0); /* set screen scissor rectangle */ @@ -185,6 +185,9 @@ nv50_state_emit(struct nv50_context *nv50) struct nv50_screen *screen = nv50->screen; struct nouveau_channel *chan = screen->base.channel; + /* I don't want to copy headers from the winsys. */ + screen->cur_ctx = nv50; + if (nv50->pctx_id != screen->cur_pctx) { if (nv50->state.fb) nv50->state.dirty |= NV50_NEW_FRAMEBUFFER; @@ -296,7 +299,7 @@ nv50_state_validate(struct nv50_context *nv50) so_ref(nv50->rasterizer->so, &nv50->state.rast); if (nv50->dirty & NV50_NEW_BLEND_COLOUR) { - so = so_new(5, 0); + so = so_new(1, 4, 0); so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4); so_data (so, fui(nv50->blend_colour.color[0])); so_data (so, fui(nv50->blend_colour.color[1])); @@ -307,7 +310,7 @@ nv50_state_validate(struct nv50_context *nv50) } if (nv50->dirty & NV50_NEW_STIPPLE) { - so = so_new(33, 0); + so = so_new(1, 32, 0); so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32); for (i = 0; i < 32; i++) so_data(so, util_bswap32(nv50->stipple.stipple[i])); @@ -324,8 +327,8 @@ nv50_state_validate(struct nv50_context *nv50) goto scissor_uptodate; nv50->state.scissor_enabled = rast->scissor; - so = so_new(3, 0); - so_method(so, tesla, NV50TCL_SCISSOR_HORIZ, 2); + so = so_new(1, 2, 0); + so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2); if (nv50->state.scissor_enabled) { so_data(so, (s->maxx << 16) | s->minx); so_data(so, (s->maxy << 16) | s->miny); @@ -353,13 +356,13 @@ scissor_uptodate: goto viewport_uptodate; nv50->state.viewport_bypass = bypass; - so = so_new(14, 0); + so = so_new(5, 9, 0); if (!bypass) { - so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE(0), 3); + so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3); so_data (so, fui(nv50->viewport.translate[0])); so_data (so, fui(nv50->viewport.translate[1])); so_data (so, fui(nv50->viewport.translate[2])); - so_method(so, tesla, NV50TCL_VIEWPORT_SCALE(0), 3); + so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3); so_data (so, fui(nv50->viewport.scale[0])); so_data (so, fui(nv50->viewport.scale[1])); so_data (so, fui(nv50->viewport.scale[2])); @@ -397,7 +400,8 @@ viewport_uptodate: for (i = 0; i < PIPE_SHADER_TYPES; ++i) nr += nv50->sampler_nr[i]; - so = so_new(nr * 8 + 24 * PIPE_SHADER_TYPES + 2, 4); + so = so_new(1+ 5 * PIPE_SHADER_TYPES, 1+ 19 * PIPE_SHADER_TYPES + + nr * 8, PIPE_SHADER_TYPES * 2); nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX); nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT); @@ -440,7 +444,7 @@ void nv50_so_init_sifc(struct nv50_context *nv50, so_data (so, 1); so_reloc (so, bo, offset, reloc | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, bo, offset, reloc | NOUVEAU_BO_LOW, 0, 0); - so_method(so, eng2d, NV50_2D_SIFC_UNK0800, 2); + so_method(so, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2); so_data (so, 0); so_data (so, NV50_2D_SIFC_FORMAT_R8_UNORM); so_method(so, eng2d, NV50_2D_SIFC_WIDTH, 10); diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 79655fc08d..6378132979 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -176,11 +176,11 @@ nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, if (ret) return; - BEGIN_RING(chan, eng2d, 0x0580, 3); - OUT_RING (chan, 4); + BEGIN_RING(chan, eng2d, NV50_2D_DRAW_SHAPE, 3); + OUT_RING (chan, NV50_2D_DRAW_SHAPE_RECTANGLES); OUT_RING (chan, format); OUT_RING (chan, value); - BEGIN_RING(chan, eng2d, NV50_2D_RECT_X1, 4); + BEGIN_RING(chan, eng2d, NV50_2D_DRAW_POINT32_X(0), 4); OUT_RING (chan, destx); OUT_RING (chan, desty); OUT_RING (chan, width); diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index c4ca096d6a..bef548b728 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -199,16 +199,18 @@ nv50_tex_validate(struct nv50_context *nv50) { struct nouveau_stateobj *so; struct nouveau_grobj *tesla = nv50->screen->tesla; - unsigned p, push, nrlc; + unsigned p, start, push, nrlc; - for (nrlc = 0, push = 0, p = 0; p < PIPE_SHADER_TYPES; ++p) { + for (nrlc = 0, start = 0, push = 0, p = 0; p < PIPE_SHADER_TYPES; ++p) { + start += MAX2(nv50->miptree_nr[p], nv50->state.miptree_nr[p]); push += MAX2(nv50->miptree_nr[p], nv50->state.miptree_nr[p]); nrlc += nv50->miptree_nr[p]; } - push = push * 11 + 23 * PIPE_SHADER_TYPES + 4; + start = start * 2 + 4 * PIPE_SHADER_TYPES + 2; + push = push * 9 + 19 * PIPE_SHADER_TYPES + 2; nrlc = nrlc * 2 + 2 * PIPE_SHADER_TYPES; - so = so_new(push, nrlc); + so = so_new(start, push, nrlc); if (nv50_validate_textures(nv50, so, PIPE_SHADER_VERTEX) == FALSE || nv50_validate_textures(nv50, so, PIPE_SHADER_FRAGMENT) == FALSE) { diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index 4d9afa6fed..a2f1db2914 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -47,7 +47,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 1); OUT_RING (chan, 1); BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1); + NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1); OUT_RING (chan, src_pitch); src_offset += (sy * src_pitch) + (sx * cpp); } else { @@ -66,7 +66,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 1); OUT_RING (chan, 1); BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1); + NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1); OUT_RING (chan, dst_pitch); dst_offset += (dy * dst_pitch) + (dx * cpp); } else { @@ -89,7 +89,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, OUT_RELOCh(chan, src_bo, src_offset, src_reloc); OUT_RELOCh(chan, dst_bo, dst_offset, dst_reloc); BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2); + NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2); OUT_RELOCl(chan, src_bo, src_offset, src_reloc); OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc); if (src_bo->tile_flags) { @@ -107,7 +107,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, dst_offset += (line_count * dst_pitch); } BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4); + NV04_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4); OUT_RING (chan, width * cpp); OUT_RING (chan, line_count); OUT_RING (chan, 0x00000101); @@ -291,7 +291,7 @@ nv50_upload_sifc(struct nv50_context *nv50, /* NV50_2D_OPERATION_SRCCOPY assumed already set */ - BEGIN_RING(chan, eng2d, NV50_2D_SIFC_UNK0800, 2); + BEGIN_RING(chan, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2); OUT_RING (chan, 0); OUT_RING (chan, src_format); BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10); @@ -334,6 +334,6 @@ nv50_upload_sifc(struct nv50_context *nv50, src += src_pitch; } - BEGIN_RING(chan, tesla, 0x1440, 1); + BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1); OUT_RING (chan, 0); } diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index f7fa0659e8..f2e510fba6 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -99,19 +99,19 @@ nv50_vbo_size_to_hw(unsigned size, unsigned nr_c) { static const uint32_t hw_values[] = { 0, 0, 0, 0, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8_8, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8_8_8, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16_16, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16_16_16, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16, 0, 0, 0, 0, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32_32, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32_32_32 }; + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 }; /* we'd also have R11G11B10 and R10G10B10A2 */ @@ -152,7 +152,7 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve) return (hw_type | hw_size); } -boolean +void nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { @@ -182,7 +182,9 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); - return ret; + /* XXX: not sure what to do if ret != TRUE: flush and retry? + */ + assert(ret); } static INLINE boolean @@ -198,7 +200,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, return nv50_push_elements_u08(nv50, map, count); if (count & 1) { - BEGIN_RING(chan, tesla, 0x15e8, 1); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1); OUT_RING (chan, map[0]); map++; count--; @@ -208,7 +210,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, unsigned nr = count > 2046 ? 2046 : count; int i; - BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1); for (i = 0; i < nr; i += 2) OUT_RING (chan, (map[i + 1] << 16) | map[i]); @@ -231,7 +233,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, return nv50_push_elements_u16(nv50, map, count); if (count & 1) { - BEGIN_RING(chan, tesla, 0x15e8, 1); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1); OUT_RING (chan, map[0]); map++; count--; @@ -241,7 +243,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, unsigned nr = count > 2046 ? 2046 : count; int i; - BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1); for (i = 0; i < nr; i += 2) OUT_RING (chan, (map[i + 1] << 16) | map[i]); @@ -266,7 +268,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, while (count) { unsigned nr = count > 2047 ? 2047 : count; - BEGIN_RING(chan, tesla, 0x400015e8, nr); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x40000000, nr); OUT_RINGp (chan, map, nr); count -= nr; @@ -275,7 +277,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, return TRUE; } -boolean +void nv50_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) @@ -317,8 +319,10 @@ nv50_draw_elements(struct pipe_context *pipe, OUT_RING (chan, 0); pipe_buffer_unmap(pscreen, indexBuffer); - - return ret; + + /* XXX: what to do if ret != TRUE? Flush and retry? + */ + assert(ret); } static INLINE boolean @@ -350,7 +354,7 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib, so = *pso; if (!so) - *pso = so = so_new(nv50->vtxelt_nr * 5, 0); + *pso = so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 4, 0); switch (ve->nr_components) { case 4: @@ -372,6 +376,10 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib, so_data (so, fui(v[1])); break; case 1: + if (attrib == nv50->vertprog->cfg.edgeflag_in) { + so_method(so, tesla, NV50TCL_EDGEFLAG_ENABLE, 1); + so_data (so, v[0] ? 1 : 0); + } so_method(so, tesla, NV50TCL_VTX_ATTR_1F(attrib), 1); so_data (so, fui(v[0])); break; @@ -401,11 +409,14 @@ nv50_vbo_validate(struct nv50_context *nv50) !(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX)) nv50->vbo_fifo = 0xffff; + if (nv50->vertprog->cfg.edgeflag_in < 16) + nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */ + n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr); vtxattr = NULL; - vtxbuf = so_new(n_ve * 7, nv50->vtxelt_nr * 4); - vtxfmt = so_new(n_ve + 1, 0); + vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt_nr * 4); + vtxfmt = so_new(1, n_ve, 0); so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve); for (i = 0; i < nv50->vtxelt_nr; i++) { @@ -445,7 +456,7 @@ nv50_vbo_validate(struct nv50_context *nv50) NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); /* vertex array limits */ - so_method(vtxbuf, tesla, 0x1080 + (i * 8), 2); + so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2); so_reloc (vtxbuf, bo, vb->buffer->size - 1, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); @@ -479,6 +490,9 @@ struct nv50_vbo_emitctx unsigned nr_ve; unsigned vtx_dwords; unsigned vtx_max; + + float edgeflag; + unsigned ve_edgeflag; }; static INLINE void @@ -622,6 +636,9 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit, if (nv50_map_vbufs(nv50) == FALSE) return FALSE; + emit->ve_edgeflag = nv50->vertprog->cfg.edgeflag_in; + + emit->edgeflag = 0.5f; emit->nr_ve = 0; emit->vtx_dwords = 0; @@ -644,7 +661,8 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit, desc = util_format_description(ve->src_format); assert(desc); - size = util_format_get_component_bits(ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0); + size = util_format_get_component_bits( + ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0); assert(ve->nr_components > 0 && ve->nr_components <= 4); @@ -686,10 +704,31 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit, } emit->vtx_max = 512 / emit->vtx_dwords; + if (emit->ve_edgeflag < 16) + emit->vtx_max = 1; return TRUE; } +static INLINE void +set_edgeflag(struct nouveau_channel *chan, + struct nouveau_grobj *tesla, + struct nv50_vbo_emitctx *emit, uint32_t index) +{ + unsigned i = emit->ve_edgeflag; + + if (i < 16) { + float f = *((float *)(emit->map[i] + index * emit->stride[i])); + + if (emit->edgeflag != f) { + emit->edgeflag = f; + + BEGIN_RING(chan, tesla, 0x15e4, 1); + OUT_RING (chan, f ? 1 : 0); + } + } +} + static boolean nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count) { @@ -704,6 +743,8 @@ nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count) unsigned i, dw, nr = MIN2(count, emit.vtx_max); dw = nr * emit.vtx_dwords; + set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */ + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); for (i = 0; i < nr; ++i) emit_vtx_next(chan, &emit); @@ -729,6 +770,8 @@ nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count) unsigned i, dw, nr = MIN2(count, emit.vtx_max); dw = nr * emit.vtx_dwords; + set_edgeflag(chan, tesla, &emit, *map); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); @@ -754,6 +797,8 @@ nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count) unsigned i, dw, nr = MIN2(count, emit.vtx_max); dw = nr * emit.vtx_dwords; + set_edgeflag(chan, tesla, &emit, *map); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); @@ -779,6 +824,8 @@ nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count) unsigned i, dw, nr = MIN2(count, emit.vtx_max); dw = nr * emit.vtx_dwords; + set_edgeflag(chan, tesla, &emit, *map); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index 0d2de17be9..183aa17f9b 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -4,7 +4,12 @@ r300compiler = SConscript('#/src/mesa/drivers/dri/r300/compiler/SConscript') env = env.Clone() # add the paths for r300compiler -env.Append(CPPPATH = ['#/src/mesa/drivers/dri/r300/compiler', '#/include', '#/src/mesa']) +env.Append(CPPPATH = [ + '#/src/mesa/drivers/dri/r300/compiler', + '#/src/gallium/winsys/drm/radeon/core', + '#/include', + '#/src/mesa', +]) r300 = env.ConvenienceLibrary( target = 'r300', diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index ffe066d536..c14414fff6 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -27,9 +27,9 @@ static void r300_blitter_save_states(struct r300_context* r300) { - util_blitter_save_blend(r300->blitter, r300->blend_state); - util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state); - util_blitter_save_rasterizer(r300->blitter, r300->rs_state); + util_blitter_save_blend(r300->blitter, r300->blend_state.state); + util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state.state); + util_blitter_save_rasterizer(r300->blitter, r300->rs_state.state); util_blitter_save_fragment_shader(r300->blitter, r300->fs); util_blitter_save_vertex_shader(r300->blitter, r300->vs); } diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 51fdb82ff3..92de297ef1 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -33,6 +33,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) /* Reasonable defaults */ caps->num_vert_fpus = 4; caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; + caps->is_r400 = FALSE; caps->is_r500 = FALSE; caps->high_second_pipe = FALSE; @@ -123,6 +124,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4A54: caps->family = CHIP_FAMILY_R420; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5548: @@ -136,6 +138,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D57: caps->family = CHIP_FAMILY_R423; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x554C: @@ -147,6 +150,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D4A: caps->family = CHIP_FAMILY_R430; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5D4C: @@ -157,6 +161,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D52: caps->family = CHIP_FAMILY_R480; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x4B48: @@ -166,6 +171,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4B4C: caps->family = CHIP_FAMILY_R481; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5E4C: @@ -182,6 +188,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5E4D: caps->family = CHIP_FAMILY_RV410; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5954: @@ -212,6 +219,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x791F: caps->family = CHIP_FAMILY_RS690; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x793F: @@ -219,6 +227,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x7942: caps->family = CHIP_FAMILY_RS600; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x796C: @@ -227,6 +236,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x796F: caps->family = CHIP_FAMILY_RS740; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x7100: diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 0633a8b8a7..2808486492 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -40,11 +40,18 @@ struct r300_capabilities { unsigned num_z_pipes; /* Whether or not TCL is physically present */ boolean has_tcl; + /* Whether or not this is R400. The differences compared to their R3xx + * cousins are: + * - Extended fragment shader registers + * - Blend LTE/GTE thresholds */ + boolean is_r400; /* Whether or not this is an RV515 or newer; R500s have many differences * that require extra consideration, compared to their R3xx cousins: * - Extra bit of width and height on texture sizes * - Blend color is split across two registers - * - Universal Shader (US) block used for fragment shaders */ + * - Blend LTE/GTE thresholds + * - Universal Shader (US) block used for fragment shaders + * - FP16 blending and multisampling */ boolean is_r500; /* Whether or not the second pixel pipe is accessed with the high bit */ boolean high_second_pipe; diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index d5c2d63d39..5e4f6552c3 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -30,6 +30,7 @@ #include "r300_blit.h" #include "r300_context.h" +#include "r300_emit.h" #include "r300_flush.h" #include "r300_query.h" #include "r300_render.h" @@ -69,11 +70,13 @@ static void r300_destroy_context(struct pipe_context* context) FREE(query); } - FREE(r300->blend_color_state); + FREE(r300->blend_color_state.state); + FREE(r300->clip_state.state); FREE(r300->rs_block); - FREE(r300->scissor_state); + FREE(r300->scissor_state.state); FREE(r300->vertex_info); - FREE(r300->viewport_state); + FREE(r300->viewport_state.state); + FREE(r300->ztop_state.state); FREE(r300); } @@ -107,6 +110,35 @@ static void r300_flush_cb(void *data) cs_context_copy->context.flush(&cs_context_copy->context, 0, NULL); } +#define R300_INIT_ATOM(atomname, atomsize) \ + r300->atomname##_state.name = #atomname; \ + r300->atomname##_state.state = NULL; \ + r300->atomname##_state.size = atomsize; \ + r300->atomname##_state.emit = r300_emit_##atomname##_state; \ + r300->atomname##_state.dirty = FALSE; \ + insert_at_tail(&r300->atom_list, &r300->atomname##_state); + +static void r300_setup_atoms(struct r300_context* r300) +{ + /* Create the actual atom list. + * + * Each atom is examined and emitted in the order it appears here, which + * can affect performance and conformance if not handled with care. + * + * Some atoms never change size, others change every emit. This is just + * an upper bound on each atom, to keep the emission machinery from + * underallocating space. */ + make_empty_list(&r300->atom_list); + R300_INIT_ATOM(ztop, 2); + R300_INIT_ATOM(blend, 8); + R300_INIT_ATOM(blend_color, 3); + R300_INIT_ATOM(clip, 29); + R300_INIT_ATOM(dsa, 8); + R300_INIT_ATOM(rs, 22); + R300_INIT_ATOM(scissor, 3); + R300_INIT_ATOM(viewport, 9); +} + struct pipe_context* r300_create_context(struct pipe_screen* screen, struct radeon_winsys* radeon_winsys) { @@ -155,11 +187,15 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->shader_hash_table = util_hash_table_create(r300_shader_key_hash, r300_shader_key_compare); - r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state); + r300_setup_atoms(r300); + + r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); + r300->clip_state.state = CALLOC_STRUCT(pipe_clip_state); r300->rs_block = CALLOC_STRUCT(r300_rs_block); - r300->scissor_state = CALLOC_STRUCT(r300_scissor_state); + r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); r300->vertex_info = CALLOC_STRUCT(r300_vertex_info); - r300->viewport_state = CALLOC_STRUCT(r300_viewport_state); + r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); + r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); /* Open up the OQ BO. */ r300->oqbo = screen->buffer_create(screen, 4096, diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 232530b7dc..682b9179c8 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -30,9 +30,28 @@ #include "pipe/p_context.h" #include "pipe/p_inlines.h" +struct r300_context; + struct r300_fragment_shader; struct r300_vertex_shader; +struct r300_atom { + /* List pointers. */ + struct r300_atom *prev, *next; + /* Name, for debugging. */ + const char* name; + /* Opaque state. */ + void* state; + /* Emit the state to the context. */ + void (*emit)(struct r300_context*, void*); + /* Upper bound on number of dwords to emit. */ + unsigned size; + /* Whether this atom should be emitted. */ + boolean dirty; + /* Another dirty flag that is never automatically cleared. */ + boolean always_dirty; +}; + struct r300_blend_state { uint32_t blend_control; /* R300_RB3D_CBLEND: 0x4e04 */ uint32_t alpha_blend_control; /* R300_RB3D_ABLEND: 0x4e08 */ @@ -62,11 +81,6 @@ struct r300_rs_state { /* Draw-specific rasterizer state */ struct pipe_rasterizer_state rs; - /* Whether or not to enable the VTE. This is referenced at the very - * last moment during emission of VTE state, to decide whether or not - * the VTE should be used for transformation. */ - boolean enable_vte; - uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */ @@ -102,19 +116,6 @@ struct r300_sampler_state { unsigned min_lod, max_lod; }; -struct r300_scissor_regs { - uint32_t top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */ - uint32_t bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */ - - /* Whether everything is culled by scissoring. */ - boolean empty_area; -}; - -struct r300_scissor_state { - struct r300_scissor_regs framebuffer; - struct r300_scissor_regs scissor; -}; - struct r300_texture_state { uint32_t format0; /* R300_TX_FORMAT0: 0x4480 */ uint32_t format1; /* R300_TX_FORMAT1: 0x44c0 */ @@ -135,24 +136,17 @@ struct r300_ztop_state { uint32_t z_buffer_top; /* R300_ZB_ZTOP: 0x4f14 */ }; -#define R300_NEW_BLEND 0x00000001 -#define R300_NEW_BLEND_COLOR 0x00000002 -#define R300_NEW_CLIP 0x00000004 -#define R300_NEW_DSA 0x00000008 #define R300_NEW_FRAMEBUFFERS 0x00000010 #define R300_NEW_FRAGMENT_SHADER 0x00000020 #define R300_NEW_FRAGMENT_SHADER_CONSTANTS 0x00000040 -#define R300_NEW_RASTERIZER 0x00000080 #define R300_NEW_RS_BLOCK 0x00000100 #define R300_NEW_SAMPLER 0x00000200 #define R300_ANY_NEW_SAMPLERS 0x0001fe00 -#define R300_NEW_SCISSOR 0x00020000 #define R300_NEW_TEXTURE 0x00040000 #define R300_ANY_NEW_TEXTURES 0x03fc0000 #define R300_NEW_VERTEX_FORMAT 0x04000000 #define R300_NEW_VERTEX_SHADER 0x08000000 #define R300_NEW_VERTEX_SHADER_CONSTANTS 0x10000000 -#define R300_NEW_VIEWPORT 0x20000000 #define R300_NEW_QUERY 0x40000000 #define R300_NEW_KITCHEN_SINK 0x7fffffff @@ -194,6 +188,12 @@ struct r300_query { struct r300_query* next; }; +enum r300_buffer_tiling { + R300_BUFFER_LINEAR = 0, + R300_BUFFER_TILED, + R300_BUFFER_SQUARETILED +}; + struct r300_texture { /* Parent class */ struct pipe_texture tex; @@ -230,6 +230,9 @@ struct r300_texture { /* Registers carrying texture format data. */ struct r300_texture_state state; + + /* Buffer tiling */ + enum r300_buffer_tiling microtile, macrotile; }; struct r300_vertex_info { @@ -273,38 +276,40 @@ struct r300_context { struct r300_vertex_info* vertex_info; /* Various CSO state objects. */ + /* Beginning of atom list. */ + struct r300_atom atom_list; /* Blend state. */ - struct r300_blend_state* blend_state; + struct r300_atom blend_state; /* Blend color state. */ - struct r300_blend_color_state* blend_color_state; + struct r300_atom blend_color_state; /* User clip planes. */ - struct pipe_clip_state clip_state; + struct r300_atom clip_state; /* Shader constants. */ struct r300_constant_buffer shader_constants[PIPE_SHADER_TYPES]; /* Depth, stencil, and alpha state. */ - struct r300_dsa_state* dsa_state; + struct r300_atom dsa_state; /* Fragment shader. */ struct r300_fragment_shader* fs; /* Framebuffer state. We currently don't need our own version of this. */ struct pipe_framebuffer_state framebuffer_state; /* Rasterizer state. */ - struct r300_rs_state* rs_state; + struct r300_atom rs_state; /* RS block state. */ struct r300_rs_block* rs_block; /* Sampler states. */ struct r300_sampler_state* sampler_states[8]; int sampler_count; /* Scissor state. */ - struct r300_scissor_state* scissor_state; + struct r300_atom scissor_state; /* Texture states. */ struct r300_texture* textures[8]; int texture_count; /* Vertex shader. */ struct r300_vertex_shader* vs; /* Viewport state. */ - struct r300_viewport_state* viewport_state; + struct r300_atom viewport_state; /* ZTOP state. */ - struct r300_ztop_state ztop_state; + struct r300_atom ztop_state; /* Vertex buffers for Gallium. */ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; @@ -317,6 +322,8 @@ struct r300_context { uint32_t dirty_state; /* Flag indicating whether or not the HW is dirty. */ uint32_t dirty_hw; + /* Whether the TCL engine should be in bypass mode. */ + boolean tcl_bypass; /** Combination of DBG_xxx flags */ unsigned debug; diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index d142fee050..151f72b0fe 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -52,7 +52,7 @@ #define CS_LOCALS(context) \ struct r300_context* const cs_context_copy = (context); \ struct radeon_winsys* cs_winsys = cs_context_copy->winsys; \ - int cs_count = 0; + int cs_count = 0; (void) cs_count; #define CHECK_CS(size) \ assert(cs_winsys->check_cs(cs_winsys, (size))) diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 199ce3a945..9f93327e59 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -25,6 +25,7 @@ #include "util/u_format.h" #include "util/u_math.h" +#include "util/u_simple_list.h" #include "r300_context.h" #include "r300_cs.h" @@ -36,11 +37,13 @@ #include "r300_texture.h" #include "r300_vs.h" -void r300_emit_blend_state(struct r300_context* r300, - struct r300_blend_state* blend) +void r300_emit_blend_state(struct r300_context* r300, void* state) { + struct r300_blend_state* blend = (struct r300_blend_state*)state; CS_LOCALS(r300); + BEGIN_CS(8); + OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3); if (r300->framebuffer_state.nr_cbufs) { OUT_CS(blend->blend_control); @@ -52,14 +55,13 @@ void r300_emit_blend_state(struct r300_context* r300, OUT_CS(0); /* XXX also disable fastfill here once it's supported */ } - OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); OUT_CS_REG(R300_RB3D_DITHER_CTL, blend->dither); END_CS; } -void r300_emit_blend_color_state(struct r300_context* r300, - struct r300_blend_color_state* bc) +void r300_emit_blend_color_state(struct r300_context* r300, void* state) { + struct r300_blend_color_state* bc = (struct r300_blend_color_state*)state; struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); @@ -76,9 +78,9 @@ void r300_emit_blend_color_state(struct r300_context* r300, } } -void r300_emit_clip_state(struct r300_context* r300, - struct pipe_clip_state* clip) +void r300_emit_clip_state(struct r300_context* r300, void* state) { + struct pipe_clip_state* clip = (struct pipe_clip_state*)state; int i; struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); @@ -106,13 +108,13 @@ void r300_emit_clip_state(struct r300_context* r300, } -void r300_emit_dsa_state(struct r300_context* r300, - struct r300_dsa_state* dsa) +void r300_emit_dsa_state(struct r300_context* r300, void* state) { + struct r300_dsa_state* dsa = (struct r300_dsa_state*)state; struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); - BEGIN_CS(r300screen->caps->is_r500 ? 10 : 8); + BEGIN_CS(r300screen->caps->is_r500 ? 8 : 6); OUT_CS_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); /* not needed since we use the 8bit alpha ref */ @@ -121,10 +123,16 @@ void r300_emit_dsa_state(struct r300_context* r300, }*/ OUT_CS_REG_SEQ(R300_ZB_CNTL, 3); - OUT_CS(dsa->z_buffer_control); - OUT_CS(dsa->z_stencil_control); + + if (r300->framebuffer_state.zsbuf) { + OUT_CS(dsa->z_buffer_control); + OUT_CS(dsa->z_stencil_control); + } else { + OUT_CS(0); + OUT_CS(0); + } + OUT_CS(dsa->stencil_ref_mask); - OUT_CS_REG(R300_ZB_ZTOP, r300->ztop_state.z_buffer_top); /* XXX it seems r3xx doesn't support STENCILREFMASK_BF */ if (r300screen->caps->is_r500) { @@ -138,6 +146,8 @@ static const float * get_shader_constant( struct rc_constant * constant, struct r300_constant_buffer * externals) { + struct r300_viewport_state* viewport = + (struct r300_viewport_state*)r300->viewport_state.state; static float vec[4] = { 0.0, 0.0, 0.0, 1.0 }; struct pipe_texture *tex; @@ -160,11 +170,31 @@ static const float * get_shader_constant( /* Texture compare-fail value. */ /* XXX Since Gallium doesn't support GL_ARB_shadow_ambient, - * this is always (0,0,0,0). */ + * this is always (0,0,0,0), right? */ case RC_STATE_SHADOW_AMBIENT: vec[3] = 0; break; + case RC_STATE_R300_VIEWPORT_SCALE: + if (r300->tcl_bypass) { + vec[0] = 1; + vec[1] = 1; + vec[2] = 1; + } else { + vec[0] = viewport->xscale; + vec[1] = viewport->yscale; + vec[2] = viewport->zscale; + } + break; + + case RC_STATE_R300_VIEWPORT_OFFSET: + if (!r300->tcl_bypass) { + vec[0] = viewport->xoffset; + vec[1] = viewport->yoffset; + vec[2] = viewport->zoffset; + } + break; + default: debug_printf("r300: Implementation error: " "Unknown RC_CONSTANT type %d\n", constant->u.State[0]); @@ -283,6 +313,22 @@ void r300_emit_fs_constant_buffer(struct r300_context* r300, END_CS; } +static void r300_emit_fragment_depth_config(struct r300_context* r300, + struct r300_fragment_shader* fs) +{ + CS_LOCALS(r300); + + BEGIN_CS(4); + if (r300_fragment_shader_writes_depth(fs)) { + OUT_CS_REG(R300_FG_DEPTH_SRC, R300_FG_DEPTH_SRC_SHADER); + OUT_CS_REG(R300_US_W_FMT, R300_W_FMT_W24 | R300_W_SRC_US); + } else { + OUT_CS_REG(R300_FG_DEPTH_SRC, R300_FG_DEPTH_SRC_SCAN); + OUT_CS_REG(R300_US_W_FMT, R300_W_FMT_W0 | R300_W_SRC_US); + } + END_CS; +} + void r500_emit_fragment_program_code(struct r300_context* r300, struct rX00_fragment_program_code* generic_code) { @@ -374,8 +420,10 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] | - r300_translate_colorformat(tex->tex.format), 0, - RADEON_GEM_DOMAIN_VRAM, 0); + r300_translate_colorformat(tex->tex.format) | + R300_COLOR_TILE(tex->macrotile) | + R300_COLOR_MICROTILE(tex->microtile), + 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), r300_translate_out_fmt(surf->format)); @@ -398,8 +446,10 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG(R300_ZB_FORMAT, r300_translate_zsformat(tex->tex.format)); OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level], 0, - RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] | + R300_DEPTHMACROTILE(tex->macrotile) | + R300_DEPTHMICROTILE(tex->microtile), + 0, RADEON_GEM_DOMAIN_VRAM, 0); } END_CS; @@ -531,8 +581,9 @@ void r300_emit_query_end(struct r300_context* r300) r300_emit_query_finish(r300, query); } -void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs) +void r300_emit_rs_state(struct r300_context* r300, void* state) { + struct r300_rs_state* rs = (struct r300_rs_state*)state; CS_LOCALS(r300); BEGIN_CS(22); @@ -595,26 +646,47 @@ void r300_emit_rs_block_state(struct r300_context* r300, END_CS; } -static void r300_emit_scissor_regs(struct r300_context* r300, - struct r300_scissor_regs* scissor) +void r300_emit_scissor_state(struct r300_context* r300, void* state) { + unsigned minx, miny, maxx, maxy; + uint32_t top_left, bottom_right; + struct r300_screen* r300screen = r300_screen(r300->context.screen); + struct pipe_scissor_state* scissor = (struct pipe_scissor_state*)state; CS_LOCALS(r300); - BEGIN_CS(3); - OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); - OUT_CS(scissor->top_left); - OUT_CS(scissor->bottom_right); - END_CS; -} + minx = miny = 0; + maxx = r300->framebuffer_state.width; + maxy = r300->framebuffer_state.height; -void r300_emit_scissor_state(struct r300_context* r300, - struct r300_scissor_state* scissor) -{ - if (r300->rs_state->rs.scissor) { - r300_emit_scissor_regs(r300, &scissor->scissor); + if (((struct r300_rs_state*)r300->rs_state.state)->rs.scissor) { + minx = MAX2(minx, scissor->minx); + miny = MAX2(miny, scissor->miny); + maxx = MIN2(maxx, scissor->maxx); + maxy = MIN2(maxy, scissor->maxy); + } + + if (r300screen->caps->is_r500) { + top_left = + (minx << R300_SCISSORS_X_SHIFT) | + (miny << R300_SCISSORS_Y_SHIFT); + bottom_right = + ((maxx - 1) << R300_SCISSORS_X_SHIFT) | + ((maxy - 1) << R300_SCISSORS_Y_SHIFT); } else { - r300_emit_scissor_regs(r300, &scissor->framebuffer); + /* Offset of 1440 in non-R500 chipsets. */ + top_left = + ((minx + 1440) << R300_SCISSORS_X_SHIFT) | + ((miny + 1440) << R300_SCISSORS_Y_SHIFT); + bottom_right = + (((maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | + (((maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); } + + BEGIN_CS(3); + OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); + OUT_CS(top_left); + OUT_CS(bottom_right); + END_CS; } void r300_emit_texture(struct r300_context* r300, @@ -650,8 +722,10 @@ void r300_emit_texture(struct r300_context* r300, OUT_CS_REG(R300_TX_FORMAT1_0 + (offset * 4), tex->state.format1); OUT_CS_REG(R300_TX_FORMAT2_0 + (offset * 4), tex->state.format2); OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (offset * 4), 1); - OUT_CS_RELOC(tex->buffer, 0, - RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); + OUT_CS_RELOC(tex->buffer, + R300_TXO_MACRO_TILE(tex->macrotile) | + R300_TXO_MICRO_TILE(tex->microtile), + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); END_CS; } @@ -717,32 +791,6 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) END_CS; } -#if 0 -void r300_emit_draw_packet(struct r300_context* r300) -{ - CS_LOCALS(r300); - - DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, " - "vertex size %d\n", r300->vbo, - r300->vertex_info->vinfo.size); - /* Set the pointer to our vertex buffer. The emitted values are this: - * PACKET3 [3D_LOAD_VBPNTR] - * COUNT [1] - * FORMAT [size | stride << 8] - * OFFSET [offset into BO] - * VBPNTR [relocated BO] - */ - BEGIN_CS(7); - OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3); - OUT_CS(1); - OUT_CS(r300->vertex_info->vinfo.size | - (r300->vertex_info->vinfo.size << 8)); - OUT_CS(r300->vbo_offset); - OUT_CS_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); - END_CS; -} -#endif - void r300_emit_vertex_format_state(struct r300_context* r300) { int i; @@ -867,26 +915,27 @@ void r300_emit_vs_constant_buffer(struct r300_context* r300, END_CS; } -void r300_emit_viewport_state(struct r300_context* r300, - struct r300_viewport_state* viewport) +void r300_emit_viewport_state(struct r300_context* r300, void* state) { + struct r300_viewport_state* viewport = (struct r300_viewport_state*)state; CS_LOCALS(r300); - BEGIN_CS(9); - OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); - OUT_CS_32F(viewport->xscale); - OUT_CS_32F(viewport->xoffset); - OUT_CS_32F(viewport->yscale); - OUT_CS_32F(viewport->yoffset); - OUT_CS_32F(viewport->zscale); - OUT_CS_32F(viewport->zoffset); - - if (r300->rs_state->enable_vte) { - OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); - } else { + if (r300->tcl_bypass) { + BEGIN_CS(2); OUT_CS_REG(R300_VAP_VTE_CNTL, 0); + END_CS; + } else { + BEGIN_CS(9); + OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); + OUT_CS_32F(viewport->xscale); + OUT_CS_32F(viewport->xoffset); + OUT_CS_32F(viewport->yscale); + OUT_CS_32F(viewport->yoffset); + OUT_CS_32F(viewport->zscale); + OUT_CS_32F(viewport->zoffset); + OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); + END_CS; } - END_CS; } void r300_emit_texture_count(struct r300_context* r300) @@ -910,6 +959,16 @@ void r300_emit_texture_count(struct r300_context* r300) } +void r300_emit_ztop_state(struct r300_context* r300, void* state) +{ + struct r300_ztop_state* ztop = (struct r300_ztop_state*)state; + CS_LOCALS(r300); + + BEGIN_CS(2); + OUT_CS_REG(R300_ZB_ZTOP, ztop->z_buffer_top); + END_CS; +} + void r300_flush_textures(struct r300_context* r300) { CS_LOCALS(r300); @@ -933,18 +992,24 @@ void r300_emit_dirty_state(struct r300_context* r300) { struct r300_screen* r300screen = r300_screen(r300->context.screen); struct r300_texture* tex; - int i, dirty_tex = 0; + struct r300_atom* atom; + unsigned i, dwords = 1024; + int dirty_tex = 0; boolean invalid = FALSE; - if (!(r300->dirty_state)) { - return; + /* Check the required number of dwords against the space remaining in the + * current CS object. If we need more, then flush. */ + + foreach(atom, &r300->atom_list) { + if (atom->dirty || atom->always_dirty) { + dwords += atom->size; + } } - /* Check size of CS. */ - /* Make sure we have at least 8*1024 spare dwords. */ + /* Make sure we have at least 2*1024 spare dwords. */ /* XXX It would be nice to know the number of dwords we really need to * XXX emit. */ - if (!r300->winsys->check_cs(r300->winsys, 8*1024)) { + if (!r300->winsys->check_cs(r300->winsys, dwords)) { r300->context.flush(&r300->context, 0, NULL); } @@ -984,10 +1049,12 @@ validate: } } /* ...occlusion query buffer... */ - if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, - 0, RADEON_GEM_DOMAIN_GTT)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; + if (r300->dirty_state & R300_NEW_QUERY) { + if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, + 0, RADEON_GEM_DOMAIN_GTT)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } } /* ...and vertex buffer. */ if (r300->vbo) { @@ -997,7 +1064,7 @@ validate: goto validate; } } else { - // debug_printf("No VBO while emitting dirty state!\n"); + /* debug_printf("No VBO while emitting dirty state!\n"); */ } if (!r300->winsys->validate(r300->winsys)) { r300->context.flush(&r300->context, 0, NULL); @@ -1015,27 +1082,15 @@ validate: r300->dirty_state &= ~R300_NEW_QUERY; } - if (r300->dirty_state & R300_NEW_BLEND) { - r300_emit_blend_state(r300, r300->blend_state); - r300->dirty_state &= ~R300_NEW_BLEND; - } - - if (r300->dirty_state & R300_NEW_BLEND_COLOR) { - r300_emit_blend_color_state(r300, r300->blend_color_state); - r300->dirty_state &= ~R300_NEW_BLEND_COLOR; - } - - if (r300->dirty_state & R300_NEW_CLIP) { - r300_emit_clip_state(r300, &r300->clip_state); - r300->dirty_state &= ~R300_NEW_CLIP; - } - - if (r300->dirty_state & R300_NEW_DSA) { - r300_emit_dsa_state(r300, r300->dsa_state); - r300->dirty_state &= ~R300_NEW_DSA; + foreach(atom, &r300->atom_list) { + if (atom->dirty || atom->always_dirty) { + atom->emit(r300, atom->state); + atom->dirty = FALSE; + } } if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER) { + r300_emit_fragment_depth_config(r300, r300->fs); if (r300screen->caps->is_r500) { r500_emit_fragment_program_code(r300, &r300->fs->shader->code); } else { @@ -1060,21 +1115,11 @@ validate: r300->dirty_state &= ~R300_NEW_FRAMEBUFFERS; } - if (r300->dirty_state & R300_NEW_RASTERIZER) { - r300_emit_rs_state(r300, r300->rs_state); - r300->dirty_state &= ~R300_NEW_RASTERIZER; - } - if (r300->dirty_state & R300_NEW_RS_BLOCK) { r300_emit_rs_block_state(r300, r300->rs_block); r300->dirty_state &= ~R300_NEW_RS_BLOCK; } - if (r300->dirty_state & R300_NEW_SCISSOR) { - r300_emit_scissor_state(r300, r300->scissor_state); - r300->dirty_state &= ~R300_NEW_SCISSOR; - } - /* Samplers and textures are tracked separately but emitted together. */ if (r300->dirty_state & (R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES)) { @@ -1096,11 +1141,6 @@ validate: r300->dirty_state &= ~(R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES); } - if (r300->dirty_state & R300_NEW_VIEWPORT) { - r300_emit_viewport_state(r300, r300->viewport_state); - r300->dirty_state &= ~R300_NEW_VIEWPORT; - } - if (dirty_tex) { r300_flush_textures(r300); } @@ -1129,7 +1169,7 @@ validate: */ /* Finally, emit the VBO. */ - //r300_emit_vertex_buffer(r300); + /* r300_emit_vertex_buffer(r300); */ r300->dirty_hw++; } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 3797d3d332..05a6bfeae8 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -31,17 +31,13 @@ struct r300_vertex_program_code; void r300_emit_aos(struct r300_context* r300, unsigned offset); -void r300_emit_blend_state(struct r300_context* r300, - struct r300_blend_state* blend); +void r300_emit_blend_state(struct r300_context* r300, void* state); -void r300_emit_blend_color_state(struct r300_context* r300, - struct r300_blend_color_state* bc); +void r300_emit_blend_color_state(struct r300_context* r300, void* state); -void r300_emit_clip_state(struct r300_context* r300, - struct pipe_clip_state* clip); +void r300_emit_clip_state(struct r300_context* r300, void* state); -void r300_emit_dsa_state(struct r300_context* r300, - struct r300_dsa_state* dsa); +void r300_emit_dsa_state(struct r300_context* r300, void* state); void r300_emit_fragment_program_code(struct r300_context* r300, struct rX00_fragment_program_code* generic_code); @@ -63,13 +59,12 @@ void r300_emit_query_begin(struct r300_context* r300, void r300_emit_query_end(struct r300_context* r300); -void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs); +void r300_emit_rs_state(struct r300_context* r300, void* state); void r300_emit_rs_block_state(struct r300_context* r300, struct r300_rs_block* rs); -void r300_emit_scissor_state(struct r300_context* r300, - struct r300_scissor_state* scissor); +void r300_emit_scissor_state(struct r300_context* r300, void* state); void r300_emit_texture(struct r300_context* r300, struct r300_sampler_state* sampler, @@ -89,11 +84,12 @@ void r300_emit_vs_constant_buffer(struct r300_context* r300, void r300_emit_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs); -void r300_emit_viewport_state(struct r300_context* r300, - struct r300_viewport_state* viewport); +void r300_emit_viewport_state(struct r300_context* r300, void* state); void r300_emit_texture_count(struct r300_context* r300); +void r300_emit_ztop_state(struct r300_context* r300, void* state); + void r300_flush_textures(struct r300_context* r300); /* Emit all dirty state. */ diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 14a08241fc..59819cb106 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -37,8 +37,10 @@ static void r300_flush(struct pipe_context* pipe, { struct r300_context *r300 = r300_context(pipe); struct r300_query *query; + struct r300_atom *atom; CS_LOCALS(r300); + (void) cs_count; /* We probably need to flush Draw, but we may have been called from * within Draw. This feels kludgy, but it might be the best thing. * @@ -54,7 +56,15 @@ static void r300_flush(struct pipe_context* pipe, r300_emit_invariant_state(r300); r300->dirty_state = R300_NEW_KITCHEN_SINK; r300->dirty_hw = 0; + + /* New kitchen sink, baby. */ + foreach(atom, &r300->atom_list) { + if (atom->state) { + atom->dirty = TRUE; + } + } } + /* reset flushed query */ foreach(query, &r300->query_list) { query->flushed = TRUE; diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 4e1b61ca40..60ea9c171d 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -63,6 +63,11 @@ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, fs_inputs->fog = i; break; + case TGSI_SEMANTIC_POSITION: + assert(index == 0); + fs_inputs->wpos = i; + break; + default: assert(0); } @@ -114,6 +119,9 @@ static void allocate_hardware_inputs( if (inputs->fog != ATTR_UNUSED) { allocate(mydata, inputs->fog, reg++); } + if (inputs->wpos != ATTR_UNUSED) { + allocate(mydata, inputs->wpos, reg++); + } } static void get_compare_state( @@ -144,6 +152,7 @@ static void r300_translate_fragment_shader( struct r300_fragment_shader* fs = r300->fs; struct r300_fragment_program_compiler compiler; struct tgsi_to_rc ttr; + int wpos = fs->inputs.wpos; /* Setup the compiler. */ memset(&compiler, 0, sizeof(compiler)); @@ -171,6 +180,18 @@ static void r300_translate_fragment_shader( fs->shadow_samplers = compiler.Base.Program.ShadowSamplers; + /** + * Transform the program to support WPOS. + * + * Introduce a small fragment at the start of the program that will be + * the only code that directly reads the WPOS input. + * All other code pieces that reference that input will be rewritten + * to read from a newly allocated temporary. */ + if (wpos != ATTR_UNUSED) { + /* Moving the input to some other reg is not really necessary. */ + rc_transform_fragment_wpos(&compiler.Base, wpos, wpos, TRUE); + } + /* Invoke the compiler */ r3xx_compile_fragment_program(&compiler); if (compiler.Base.Error) { diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index d8d08fbe26..361813891f 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -1619,18 +1619,20 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_TX_OFFSET_5 0x4554 #define R300_TX_OFFSET_6 0x4558 #define R300_TX_OFFSET_7 0x455C - /* BEGIN: Guess from R200 */ + # define R300_TXO_ENDIAN_NO_SWAP (0 << 0) # define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0) # define R300_TXO_ENDIAN_WORD_SWAP (2 << 0) # define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0) -# define R300_TXO_MACRO_TILE (1 << 2) +# define R300_TXO_MACRO_TILE_LINEAR (0 << 2) +# define R300_TXO_MACRO_TILE_TILED (1 << 2) +# define R300_TXO_MACRO_TILE(x) ((x) << 2) # define R300_TXO_MICRO_TILE_LINEAR (0 << 3) -# define R300_TXO_MICRO_TILE (1 << 3) -# define R300_TXO_MICRO_TILE_SQUARE (2 << 3) +# define R300_TXO_MICRO_TILE_TILED (1 << 3) +# define R300_TXO_MICRO_TILE_TILED_SQUARE (2 << 3) +# define R300_TXO_MICRO_TILE(x) ((x) << 3) # define R300_TXO_OFFSET_MASK 0xffffffe0 # define R300_TXO_OFFSET_SHIFT 5 - /* END: Guess from R200 */ /* 32 bit chroma key */ #define R300_TX_CHROMA_KEY_0 0x4580 @@ -2186,6 +2188,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1 (4 << 3) # define R300_DISCARD_SRC_PIXELS_SRC_COLOR_1 (5 << 3) # define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1 (6 << 3) +# define R500_SRC_ALPHA_0_NO_READ (1 << 30) +# define R500_SRC_ALPHA_1_NO_READ (1 << 31) /* the following are shared between CBLEND and ABLEND */ # define R300_FCN_MASK (3 << 12) @@ -2281,9 +2285,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_COLORPITCH_MASK 0x00003FFE # define R300_COLOR_TILE_DISABLE (0 << 16) # define R300_COLOR_TILE_ENABLE (1 << 16) +# define R300_COLOR_TILE(x) ((x) << 16) # define R300_COLOR_MICROTILE_DISABLE (0 << 17) # define R300_COLOR_MICROTILE_ENABLE (1 << 17) # define R300_COLOR_MICROTILE_ENABLE_SQUARE (2 << 17) /* Only available in 16-bit */ +# define R300_COLOR_MICROTILE(x) ((x) << 17) # define R300_COLOR_ENDIAN_NO_SWAP (0 << 19) # define R300_COLOR_ENDIAN_WORD_SWAP (1 << 19) # define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 19) @@ -2542,9 +2548,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_DEPTHPITCH_MASK 0x00003FFC # define R300_DEPTHMACROTILE_DISABLE (0 << 16) # define R300_DEPTHMACROTILE_ENABLE (1 << 16) +# define R300_DEPTHMACROTILE(x) ((x) << 16) # define R300_DEPTHMICROTILE_LINEAR (0 << 17) # define R300_DEPTHMICROTILE_TILED (1 << 17) # define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17) +# define R300_DEPTHMICROTILE(x) ((x) << 17) # define R300_DEPTHENDIAN_NO_SWAP (0 << 18) # define R300_DEPTHENDIAN_WORD_SWAP (1 << 18) # define R300_DEPTHENDIAN_DWORD_SWAP (2 << 18) @@ -2638,7 +2646,7 @@ enum { VE_COND_MUX_GTE = 25, VE_SET_GREATER_THAN = 26, VE_SET_EQUAL = 27, - VE_SET_NOT_EQUAL = 28, + VE_SET_NOT_EQUAL = 28 }; enum { @@ -2672,20 +2680,20 @@ enum { ME_PRED_SET_CLR = 25, ME_PRED_SET_INV = 26, ME_PRED_SET_POP = 27, - ME_PRED_SET_RESTORE = 28, + ME_PRED_SET_RESTORE = 28 }; enum { /* R3XX */ PVS_MACRO_OP_2CLK_MADD = 0, - PVS_MACRO_OP_2CLK_M2X_ADD = 1, + PVS_MACRO_OP_2CLK_M2X_ADD = 1 }; enum { PVS_SRC_REG_TEMPORARY = 0, /* Intermediate Storage */ PVS_SRC_REG_INPUT = 1, /* Input Vertex Storage */ PVS_SRC_REG_CONSTANT = 2, /* Constant State Storage */ - PVS_SRC_REG_ALT_TEMPORARY = 3, /* Alternate Intermediate Storage */ + PVS_SRC_REG_ALT_TEMPORARY = 3 /* Alternate Intermediate Storage */ }; enum { @@ -2694,7 +2702,7 @@ enum { PVS_DST_REG_OUT = 2, /* Output Memory. Used for all outputs */ PVS_DST_REG_OUT_REPL_X = 3, /* Output Memory & Replicate X to all channels */ PVS_DST_REG_ALT_TEMPORARY = 4, /* Alternate Intermediate Storage */ - PVS_DST_REG_INPUT = 5, /* Output Memory & Replicate X to all channels */ + PVS_DST_REG_INPUT = 5 /* Output Memory & Replicate X to all channels */ }; enum { @@ -2703,7 +2711,7 @@ enum { PVS_SRC_SELECT_Z = 2, /* Select Z Component */ PVS_SRC_SELECT_W = 3, /* Select W Component */ PVS_SRC_SELECT_FORCE_0 = 4, /* Force Component to 0.0 */ - PVS_SRC_SELECT_FORCE_1 = 5, /* Force Component to 1.0 */ + PVS_SRC_SELECT_FORCE_1 = 5 /* Force Component to 1.0 */ }; /* PVS Opcode & Destination Operand Description */ @@ -2742,7 +2750,7 @@ enum { PVS_DST_ADDR_SEL_MASK = 0x3, PVS_DST_ADDR_SEL_SHIFT = 29, PVS_DST_ADDR_MODE_0_MASK = 0x1, - PVS_DST_ADDR_MODE_0_SHIFT = 31, + PVS_DST_ADDR_MODE_0_SHIFT = 31 }; /* PVS Source Operand Description */ @@ -2777,7 +2785,7 @@ enum { PVS_SRC_ADDR_SEL_MASK = 0x3, PVS_SRC_ADDR_SEL_SHIFT = 29, PVS_SRC_ADDR_MODE_1_MASK = 0x0, - PVS_SRC_ADDR_MODE_1_SHIFT = 32, + PVS_SRC_ADDR_MODE_1_SHIFT = 32 }; /*\}*/ diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 2d70ec2ac9..710d850163 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -26,6 +26,8 @@ #include "draw/draw_context.h" #include "draw/draw_vbuf.h" +#include "indices/u_indices.h" + #include "pipe/p_inlines.h" #include "util/u_memory.h" @@ -69,16 +71,11 @@ uint32_t r300_translate_primitive(unsigned prim) } } -static boolean r300_nothing_to_draw(struct r300_context *r300) -{ - return r300->rs_state->rs.scissor && - r300->scissor_state->scissor.empty_area; -} - static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, unsigned mode) { - uint32_t color_control = r300->rs_state->color_control; + struct r300_rs_state* rs = (struct r300_rs_state*)r300->rs_state.state; + uint32_t color_control = rs->color_control; /* By default (see r300_state.c:r300_create_rs_state) color_control is * initialized to provoking the first vertex. @@ -98,7 +95,7 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, * ~ C. */ - if (r300->rs_state->rs.flatshade_first) { + if (rs->rs.flatshade_first) { switch (mode) { case PIPE_PRIM_TRIANGLE_FAN: color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND; @@ -119,6 +116,44 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, return color_control; } +static void r300_emit_draw_immediate(struct r300_context *r300, + unsigned mode, + unsigned start, + unsigned count) +{ + struct pipe_buffer* vbo = r300->vertex_buffer[0].buffer; + unsigned vertex_size = r300->vertex_buffer[0].stride / sizeof(float); + unsigned i; + uint32_t* map; + CS_LOCALS(r300); + + map = (uint32_t*)pipe_buffer_map_range(r300->context.screen, vbo, + start * vertex_size, count * vertex_size, + PIPE_BUFFER_USAGE_CPU_READ); + + BEGIN_CS(10 + count * vertex_size); + OUT_CS_REG(R300_GA_COLOR_CONTROL, + r300_provoking_vertex_fixes(r300, mode)); + OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); + OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | + r300_translate_primitive(mode)); + //debug_printf("r300: Immd %d verts, %d attrs\n", count, vertex_size); + for (i = 0; i < count * vertex_size; i++) { + if (i % vertex_size == 0) { + //debug_printf("r300: -- vert --\n"); + } + //debug_printf("r300: 0x%08x\n", *map); + OUT_CS(*map); + map++; + } + END_CS; + + pipe_buffer_unmap(r300->context.screen, vbo); +} + static void r300_emit_draw_arrays(struct r300_context *r300, unsigned mode, unsigned count) @@ -212,43 +247,84 @@ validate: return TRUE; } +static struct pipe_buffer* r300_translate_elts(struct r300_context* r300, + struct pipe_buffer* elts, + unsigned* size, + unsigned* mode, + unsigned* count) +{ + struct pipe_screen* screen = r300->context.screen; + struct pipe_buffer* new_elts; + void *in_map, *out_map; + unsigned out_prim, out_index_size, out_nr; + u_translate_func out_translate; + + (void)u_index_translator(~0, *mode, *size, *count, PV_LAST, PV_LAST, + &out_prim, &out_index_size, &out_nr, &out_translate); + + new_elts = screen->buffer_create(screen, 32, + PIPE_BUFFER_USAGE_INDEX | + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ, + out_index_size * out_nr); + + in_map = pipe_buffer_map(screen, elts, PIPE_BUFFER_USAGE_CPU_READ); + out_map = pipe_buffer_map(screen, new_elts, PIPE_BUFFER_USAGE_CPU_WRITE); + + out_translate(in_map, *count, out_map); + + pipe_buffer_unmap(screen, elts); + pipe_buffer_unmap(screen, new_elts); + + *size = out_index_size; + *mode = out_prim; + *count = out_nr; + + return new_elts; +} + /* This is the fast-path drawing & emission for HW TCL. */ -boolean r300_draw_range_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) +void r300_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) { struct r300_context* r300 = r300_context(pipe); + struct pipe_buffer* orgIndexBuffer = indexBuffer; if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; + return; } if (count > 65535) { - return FALSE; - } - - if (r300_nothing_to_draw(r300)) { - return TRUE; + /* XXX: use aux/indices functions to split this into smaller + * primitives. + */ + return; } r300_update_derived_state(r300); if (!r300_setup_vertex_buffers(r300)) { - return FALSE; + return; + } + + if (indexSize == 1) { + indexBuffer = r300_translate_elts(r300, indexBuffer, + &indexSize, &mode, &count); } if (!r300->winsys->add_buffer(r300->winsys, indexBuffer, RADEON_GEM_DOMAIN_GTT, 0)) { - return FALSE; + goto cleanup; } if (!r300->winsys->validate(r300->winsys)) { - return FALSE; + goto cleanup; } r300_emit_dirty_state(r300); @@ -258,49 +334,52 @@ boolean r300_draw_range_elements(struct pipe_context* pipe, r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex, mode, start, count); - return TRUE; +cleanup: + if (indexBuffer != orgIndexBuffer) { + pipe->screen->buffer_destroy(indexBuffer); + } } /* Simple helpers for context setup. Should probably be moved to util. */ -boolean r300_draw_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, unsigned mode, - unsigned start, unsigned count) +void r300_draw_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, unsigned mode, + unsigned start, unsigned count) { - return pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0, - mode, start, count); + pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0, + mode, start, count); } -boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, - unsigned start, unsigned count) +void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, + unsigned start, unsigned count) { struct r300_context* r300 = r300_context(pipe); if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; + return; } if (count > 65535) { - return FALSE; - } - - if (r300_nothing_to_draw(r300)) { - return TRUE; + /* XXX: driver needs to handle this -- use the functions in + * aux/indices to split this into several smaller primitives. + */ + return; } r300_update_derived_state(r300); if (!r300_setup_vertex_buffers(r300)) { - return FALSE; + return; } r300_emit_dirty_state(r300); - r300_emit_aos(r300, start); - - r300_emit_draw_arrays(r300, mode, count); - - return TRUE; + if (FALSE && count <= 4 && r300->vertex_buffer_count == 1) { + r300_emit_draw_immediate(r300, mode, start, count); + } else { + r300_emit_aos(r300, start); + r300_emit_draw_arrays(r300, mode, count); + } } /**************************************************************************** @@ -309,7 +388,7 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, ***************************************************************************/ /* SW TCL arrays, using Draw. */ -boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, +void r300_swtcl_draw_arrays(struct pipe_context* pipe, unsigned mode, unsigned start, unsigned count) @@ -318,11 +397,7 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, int i; if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; - } - - if (r300_nothing_to_draw(r300)) { - return TRUE; + return; } for (i = 0; i < r300->vertex_buffer_count; i++) { @@ -335,8 +410,9 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, draw_set_mapped_element_buffer(r300->draw, 0, NULL); draw_set_mapped_constant_buffer(r300->draw, - r300->shader_constants[PIPE_SHADER_VERTEX].constants, - r300->shader_constants[PIPE_SHADER_VERTEX].count * + PIPE_SHADER_VERTEX, + r300->shader_constants[PIPE_SHADER_VERTEX].constants, + r300->shader_constants[PIPE_SHADER_VERTEX].count * (sizeof(float) * 4)); draw_arrays(r300->draw, mode, start, count); @@ -345,12 +421,10 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, pipe_buffer_unmap(pipe->screen, r300->vertex_buffer[i].buffer); draw_set_mapped_vertex_buffer(r300->draw, i, NULL); } - - return TRUE; } /* SW TCL elements, using Draw. */ -boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, +void r300_swtcl_draw_range_elements(struct pipe_context* pipe, struct pipe_buffer* indexBuffer, unsigned indexSize, unsigned minIndex, @@ -361,13 +435,10 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); int i; + void* indices; if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; - } - - if (r300_nothing_to_draw(r300)) { - return TRUE; + return; } for (i = 0; i < r300->vertex_buffer_count; i++) { @@ -377,12 +448,13 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, draw_set_mapped_vertex_buffer(r300->draw, i, buf); } - void* indices = pipe_buffer_map(pipe->screen, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); + indices = pipe_buffer_map(pipe->screen, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer_range(r300->draw, indexSize, minIndex, maxIndex, indices); draw_set_mapped_constant_buffer(r300->draw, + PIPE_SHADER_VERTEX, r300->shader_constants[PIPE_SHADER_VERTEX].constants, r300->shader_constants[PIPE_SHADER_VERTEX].count * (sizeof(float) * 4)); @@ -397,8 +469,6 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, pipe_buffer_unmap(pipe->screen, indexBuffer); draw_set_mapped_element_buffer_range(r300->draw, 0, start, start + count - 1, NULL); - - return TRUE; } /* Object for rendering using Draw. */ @@ -474,7 +544,7 @@ static void* r300_render_map_vertices(struct vbuf_render* render) r300render->vbo_ptr = pipe_buffer_map(screen, r300render->vbo, PIPE_BUFFER_USAGE_CPU_WRITE); - return (r300render->vbo_ptr + r300render->vbo_offset); + return ((uint8_t*)r300render->vbo_ptr + r300render->vbo_offset); } static void r300_render_unmap_vertices(struct vbuf_render* render, diff --git a/src/gallium/drivers/r300/r300_render.h b/src/gallium/drivers/r300/r300_render.h index da83069083..27b5e6a963 100644 --- a/src/gallium/drivers/r300/r300_render.h +++ b/src/gallium/drivers/r300/r300_render.h @@ -25,35 +25,35 @@ uint32_t r300_translate_primitive(unsigned prim); -boolean r300_draw_range_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count); - -boolean r300_draw_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, unsigned mode, - unsigned start, unsigned count); - -boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, - unsigned start, unsigned count); - -boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, - unsigned mode, - unsigned start, - unsigned count); - -boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count); +void r300_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count); + +void r300_draw_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, unsigned mode, + unsigned start, unsigned count); + +void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, + unsigned start, unsigned count); + +void r300_swtcl_draw_arrays(struct pipe_context* pipe, + unsigned mode, + unsigned start, + unsigned count); + +void r300_swtcl_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count); #endif /* R300_RENDER_H */ diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 2a8667d483..287664b1d2 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -83,6 +83,7 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) switch (param) { case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + case PIPE_CAP_MAX_COMBINED_SAMPLERS: /* XXX I'm told this goes up to 16 */ return 8; case PIPE_CAP_NPOT_TEXTURES: @@ -143,9 +144,11 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) case PIPE_CAP_BLEND_EQUATION_SEPARATE: return 1; case PIPE_CAP_SM3: - return 1; - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return 8; + if (r300screen->caps->is_r500) { + return 1; + } else { + return 0; + } default: debug_printf("r300: Implementation error: Bad param %d\n", param); diff --git a/src/gallium/drivers/r300/r300_shader_semantics.h b/src/gallium/drivers/r300/r300_shader_semantics.h index 85184e2cfd..6796841b29 100644 --- a/src/gallium/drivers/r300/r300_shader_semantics.h +++ b/src/gallium/drivers/r300/r300_shader_semantics.h @@ -40,6 +40,7 @@ struct r300_shader_semantics { int bcolor[ATTR_COLOR_COUNT]; int generic[ATTR_GENERIC_COUNT]; int fog; + int wpos; }; static INLINE void r300_shader_semantics_reset( @@ -50,6 +51,7 @@ static INLINE void r300_shader_semantics_reset( info->pos = ATTR_UNUSED; info->psize = ATTR_UNUSED; info->fog = ATTR_UNUSED; + info->wpos = ATTR_UNUSED; for (i = 0; i < ATTR_COLOR_COUNT; i++) { info->color[i] = ATTR_UNUSED; diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 541b0abc9c..60ad763cf4 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1,5 +1,6 @@ /* * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Copyright 2009 Marek Olšák <maraeo@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -41,6 +42,120 @@ /* r300_state: Functions used to intialize state context by translating * Gallium state objects into semi-native r300 state objects. */ +static boolean blend_discard_if_src_alpha_0(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_ALPHA == 0, and the following state is set, the colorbuffer + * will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_SRC_COLOR || + srcA == PIPE_BLENDFACTOR_SRC_ALPHA || + srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_alpha_1(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_ALPHA == 1, and the following state is set, the colorbuffer + * will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_color_0(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_COLOR == (0,0,0), and the following state is set, the colorbuffer + * will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_color_1(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_COLOR == (1,1,1), and the following state is set, the colorbuffer + * will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_SRC_COLOR || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_alpha_color_0(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_ALPHA_COLOR == (0,0,0,0), and the following state is set, + * the colorbuffer will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR || + srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_SRC_COLOR || + srcA == PIPE_BLENDFACTOR_SRC_ALPHA || + srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_alpha_color_1(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_ALPHA_COLOR == (1,1,1,1), and the following state is set, + * the colorbuffer will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR || + srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_SRC_COLOR || + dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ONE); +} + /* Create a new blend state based on the CSO blend state. * * This encompasses alpha blending, logic/raster ops, and blend dithering. */ @@ -66,7 +181,11 @@ static void* r300_create_blend_state(struct pipe_context* pipe, ( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) | ( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT); - /* optimization: some operations do not require the destination color */ + /* Optimization: some operations do not require the destination color. + * + * When SRC_ALPHA_SATURATE is used, colorbuffer reads must be enabled, + * otherwise blending gives incorrect results. It seems to be + * a hardware bug. */ if (eqRGB == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX || eqA == PIPE_BLEND_MAX || dstRGB != PIPE_BLENDFACTOR_ZERO || @@ -78,11 +197,81 @@ static void* r300_create_blend_state(struct pipe_context* pipe, srcA == PIPE_BLENDFACTOR_DST_COLOR || srcA == PIPE_BLENDFACTOR_DST_ALPHA || srcA == PIPE_BLENDFACTOR_INV_DST_COLOR || - srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA) + srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA || + srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) { + /* Enable reading from the colorbuffer. */ blend->blend_control |= R300_READ_ENABLE; - /* XXX implement the optimization with DISCARD_SRC_PIXELS*/ - /* XXX implement the optimization with SRC_ALPHA_?_NO_READ */ + if (r300_screen(r300_context(pipe)->context.screen)->caps->is_r500) { + /* Optimization: Depending on incoming pixels, we can + * conditionally disable the reading in hardware... */ + if (eqRGB != PIPE_BLEND_MIN && eqA != PIPE_BLEND_MIN && + eqRGB != PIPE_BLEND_MAX && eqA != PIPE_BLEND_MAX) { + /* Disable reading if SRC_ALPHA == 0. */ + if ((dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ZERO) && + (dstA == PIPE_BLENDFACTOR_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ZERO)) { + blend->blend_control |= R500_SRC_ALPHA_0_NO_READ; + } + + /* Disable reading if SRC_ALPHA == 1. */ + if ((dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ZERO) && + (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ZERO)) { + blend->blend_control |= R500_SRC_ALPHA_1_NO_READ; + } + } + } + } + + /* Optimization: discard pixels which don't change the colorbuffer. + * + * The code below is non-trivial and some math is involved. + * + * Discarding pixels must be disabled when FP16 AA is enabled. + * This is a hardware bug. Also, this implementation wouldn't work + * with FP blending enabled and equation clamping disabled. + * + * Equations other than ADD are rarely used and therefore won't be + * optimized. */ + if ((eqRGB == PIPE_BLEND_ADD || eqRGB == PIPE_BLEND_REVERSE_SUBTRACT) && + (eqA == PIPE_BLEND_ADD || eqA == PIPE_BLEND_REVERSE_SUBTRACT)) { + /* ADD: X+Y + * REVERSE_SUBTRACT: Y-X + * + * The idea is: + * If X = src*srcFactor = 0 and Y = dst*dstFactor = 1, + * then CB will not be changed. + * + * Given the srcFactor and dstFactor variables, we can derive + * what src and dst should be equal to and discard appropriate + * pixels. + */ + if (blend_discard_if_src_alpha_0(srcRGB, srcA, dstRGB, dstA)) { + blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0; + } else if (blend_discard_if_src_alpha_1(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1; + } else if (blend_discard_if_src_color_0(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_0; + } else if (blend_discard_if_src_color_1(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_1; + } else if (blend_discard_if_src_alpha_color_0(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= + R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0; + } else if (blend_discard_if_src_alpha_color_1(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= + R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1; + } + } /* separate alpha */ if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { @@ -128,8 +317,8 @@ static void r300_bind_blend_state(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - r300->blend_state = (struct r300_blend_state*)state; - r300->dirty_state |= R300_NEW_BLEND; + r300->blend_state.state = state; + r300->blend_state.dirty = TRUE; } /* Free blend state. */ @@ -151,20 +340,24 @@ static void r300_set_blend_color(struct pipe_context* pipe, const struct pipe_blend_color* color) { struct r300_context* r300 = r300_context(pipe); + struct r300_screen* r300screen = r300_screen(pipe->screen); + struct r300_blend_color_state* state = + (struct r300_blend_color_state*)r300->blend_color_state.state; union util_color uc; util_pack_color(color->color, PIPE_FORMAT_A8R8G8B8_UNORM, &uc); - r300->blend_color_state->blend_color = uc.ui; + state->blend_color = uc.ui; /* XXX if FP16 blending is enabled, we should use the FP16 format */ - r300->blend_color_state->blend_color_red_alpha = + state->blend_color_red_alpha = float_to_fixed10(color->color[0]) | (float_to_fixed10(color->color[3]) << 16); - r300->blend_color_state->blend_color_green_blue = + state->blend_color_green_blue = float_to_fixed10(color->color[2]) | (float_to_fixed10(color->color[1]) << 16); - r300->dirty_state |= R300_NEW_BLEND_COLOR; + r300->blend_color_state.size = r300screen->caps->is_r500 ? 3 : 2; + r300->blend_color_state.dirty = TRUE; } static void r300_set_clip_state(struct pipe_context* pipe, @@ -173,12 +366,15 @@ static void r300_set_clip_state(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); if (r300_screen(pipe->screen)->caps->has_tcl) { - r300->clip_state = *state; - r300->dirty_state |= R300_NEW_CLIP; + memcpy(r300->clip_state.state, state, sizeof(struct pipe_clip_state)); + r300->clip_state.size = 29; } else { draw_flush(r300->draw); draw_set_clip_state(r300->draw, state); + r300->clip_state.size = 2; } + + r300->clip_state.dirty = TRUE; } /* Create a new depth, stencil, and alpha state based on the CSO dsa state. @@ -271,9 +467,11 @@ static void r300_bind_dsa_state(struct pipe_context* pipe, void* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_screen* r300screen = r300_screen(pipe->screen); - r300->dsa_state = (struct r300_dsa_state*)state; - r300->dirty_state |= R300_NEW_DSA; + r300->dsa_state.state = state; + r300->dsa_state.size = r300screen->caps->is_r500 ? 8 : 6; + r300->dsa_state.dirty = TRUE; } /* Free DSA state. */ @@ -283,37 +481,11 @@ static void r300_delete_dsa_state(struct pipe_context* pipe, FREE(state); } -static void r300_set_scissor_regs(const struct pipe_scissor_state* state, - struct r300_scissor_regs *scissor, - boolean is_r500) -{ - if (is_r500) { - scissor->top_left = - (state->minx << R300_SCISSORS_X_SHIFT) | - (state->miny << R300_SCISSORS_Y_SHIFT); - scissor->bottom_right = - ((state->maxx - 1) << R300_SCISSORS_X_SHIFT) | - ((state->maxy - 1) << R300_SCISSORS_Y_SHIFT); - } else { - /* Offset of 1440 in non-R500 chipsets. */ - scissor->top_left = - ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) | - ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT); - scissor->bottom_right = - (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | - (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); - } - - scissor->empty_area = state->minx >= state->maxx || - state->miny >= state->maxy; -} - static void r300_set_framebuffer_state(struct pipe_context* pipe, const struct pipe_framebuffer_state* state) { struct r300_context* r300 = r300_context(pipe); - struct pipe_scissor_state scissor; if (r300->draw) { draw_flush(r300->draw); @@ -321,18 +493,12 @@ static void r300->framebuffer_state = *state; - scissor.minx = scissor.miny = 0; - scissor.maxx = state->width; - scissor.maxy = state->height; - r300_set_scissor_regs(&scissor, &r300->scissor_state->framebuffer, - r300_screen(r300->context.screen)->caps->is_r500); - /* Don't rely on the order of states being set for the first time. */ - if (!r300->rs_state || !r300->rs_state->rs.scissor) { - r300->dirty_state |= R300_NEW_SCISSOR; - } r300->dirty_state |= R300_NEW_FRAMEBUFFERS; - r300->dirty_state |= R300_NEW_BLEND; + + r300->blend_state.dirty = TRUE; + r300->dsa_state.dirty = TRUE; + r300->scissor_state.dirty = TRUE; } /* Create fragment shader state. */ @@ -367,6 +533,10 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) r300->fs = fs; r300_pick_fragment_shader(r300); + if (r300->vs && r300_vertex_shader_setup_wpos(r300)) { + r300->dirty_state |= R300_NEW_VERTEX_FORMAT; + } + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER | R300_NEW_FRAGMENT_SHADER_CONSTANTS; } @@ -407,8 +577,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe, /* Copy rasterizer state for Draw. */ rs->rs = *state; - rs->enable_vte = !state->bypass_vs_clip_and_viewport; - #ifdef PIPE_ARCH_LITTLE_ENDIAN rs->vap_control_status = R300_VC_NO_SWAP; #else @@ -524,12 +692,23 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) draw_set_rasterizer_state(r300->draw, &rs->rs); } - r300->rs_state = rs; + if (rs) { + r300->tcl_bypass = rs->rs.bypass_vs_clip_and_viewport; + } else { + r300->tcl_bypass = FALSE; + } + + r300->rs_state.state = rs; + r300->rs_state.dirty = TRUE; + /* XXX Why is this still needed, dammit!? */ + r300->scissor_state.dirty = TRUE; + r300->viewport_state.dirty = TRUE; + /* XXX Clean these up when we move to atom emits */ - r300->dirty_state |= R300_NEW_RASTERIZER; r300->dirty_state |= R300_NEW_RS_BLOCK; - r300->dirty_state |= R300_NEW_SCISSOR; - r300->dirty_state |= R300_NEW_VIEWPORT; + if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) { + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; + } } /* Free rasterizer state. */ @@ -556,7 +735,8 @@ static void* sampler->filter0 |= r300_translate_tex_filters(state->min_img_filter, state->mag_img_filter, - state->min_mip_filter); + state->min_mip_filter, + state->max_anisotropy > 1.0); /* Unfortunately, r300-r500 don't support floating-point mipmap lods. */ /* We must pass these to the emit function to clamp them properly. */ @@ -664,49 +844,51 @@ static void r300_set_scissor_state(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - r300_set_scissor_regs(state, &r300->scissor_state->scissor, - r300_screen(r300->context.screen)->caps->is_r500); + memcpy(r300->scissor_state.state, state, + sizeof(struct pipe_scissor_state)); - /* Don't rely on the order of states being set for the first time. */ - if (!r300->rs_state || r300->rs_state->rs.scissor) { - r300->dirty_state |= R300_NEW_SCISSOR; - } + r300->scissor_state.dirty = TRUE; } static void r300_set_viewport_state(struct pipe_context* pipe, const struct pipe_viewport_state* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_viewport_state* viewport = + (struct r300_viewport_state*)r300->viewport_state.state; /* Do the transform in HW. */ - r300->viewport_state->vte_control = R300_VTX_W0_FMT; + viewport->vte_control = R300_VTX_W0_FMT; if (state->scale[0] != 1.0f) { - r300->viewport_state->xscale = state->scale[0]; - r300->viewport_state->vte_control |= R300_VPORT_X_SCALE_ENA; + viewport->xscale = state->scale[0]; + viewport->vte_control |= R300_VPORT_X_SCALE_ENA; } if (state->scale[1] != 1.0f) { - r300->viewport_state->yscale = state->scale[1]; - r300->viewport_state->vte_control |= R300_VPORT_Y_SCALE_ENA; + viewport->yscale = state->scale[1]; + viewport->vte_control |= R300_VPORT_Y_SCALE_ENA; } if (state->scale[2] != 1.0f) { - r300->viewport_state->zscale = state->scale[2]; - r300->viewport_state->vte_control |= R300_VPORT_Z_SCALE_ENA; + viewport->zscale = state->scale[2]; + viewport->vte_control |= R300_VPORT_Z_SCALE_ENA; } if (state->translate[0] != 0.0f) { - r300->viewport_state->xoffset = state->translate[0]; - r300->viewport_state->vte_control |= R300_VPORT_X_OFFSET_ENA; + viewport->xoffset = state->translate[0]; + viewport->vte_control |= R300_VPORT_X_OFFSET_ENA; } if (state->translate[1] != 0.0f) { - r300->viewport_state->yoffset = state->translate[1]; - r300->viewport_state->vte_control |= R300_VPORT_Y_OFFSET_ENA; + viewport->yoffset = state->translate[1]; + viewport->vte_control |= R300_VPORT_Y_OFFSET_ENA; } if (state->translate[2] != 0.0f) { - r300->viewport_state->zoffset = state->translate[2]; - r300->viewport_state->vte_control |= R300_VPORT_Z_OFFSET_ENA; + viewport->zoffset = state->translate[2]; + viewport->vte_control |= R300_VPORT_Z_OFFSET_ENA; } - r300->dirty_state |= R300_NEW_VIEWPORT; + r300->viewport_state.dirty = TRUE; + if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) { + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; + } } static void r300_set_vertex_buffers(struct pipe_context* pipe, @@ -778,7 +960,13 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) } r300->vs = vs; - r300->dirty_state |= R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS; + if (r300->fs) { + r300_vertex_shader_setup_wpos(r300); + } + + r300->dirty_state |= + R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS | + R300_NEW_VERTEX_FORMAT; } else { draw_flush(r300->draw); draw_bind_vertex_shader(r300->draw, diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 29bc701a86..192846411b 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -71,9 +71,9 @@ static void r300_draw_emit_attrib(struct r300_context* r300, struct tgsi_shader_info* info = &r300->vs->info; int output; - output = draw_find_vs_output(r300->draw, - info->output_semantic_name[index], - info->output_semantic_index[index]); + output = draw_find_shader_output(r300->draw, + info->output_semantic_name[index], + info->output_semantic_index[index]); draw_emit_vertex_attr(&r300->vertex_info->vinfo, emit, interp, output); } @@ -139,10 +139,10 @@ static void r300_vertex_psc(struct r300_context* r300) /* If TCL is bypassed, map vertex streams to equivalent VS output * locations. */ - if (r300->rs_state->enable_vte) { - stream_tab = identity; - } else { + if (r300->tcl_bypass) { stream_tab = r300->vs->stream_loc_notcl; + } else { + stream_tab = identity; } /* Vertex shaders have no semantics on their inputs, @@ -333,6 +333,8 @@ static void r300_update_rs_block(struct r300_context* r300, void (*rX00_rs_col_write)(struct r300_rs_block*, int, int); void (*rX00_rs_tex)(struct r300_rs_block*, int, int, boolean); void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int); + boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || + vs_outputs->bcolor[1] != ATTR_UNUSED; if (r300_screen(r300->context.screen)->caps->is_r500) { rX00_rs_col = r500_rs_col; @@ -348,7 +350,7 @@ static void r300_update_rs_block(struct r300_context* r300, /* Rasterize colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { /* Always rasterize if it's written by the VS, * otherwise it locks up. */ rX00_rs_col(rs, col_count, i, FALSE); @@ -410,6 +412,16 @@ static void r300_update_rs_block(struct r300_context* r300, } } + /* Rasterize WPOS. */ + /* If the FS doesn't need it, it's not written by the VS. */ + if (fs_inputs->wpos != ATTR_UNUSED) { + rX00_rs_tex(rs, tex_count, tex_count, FALSE); + rX00_rs_tex_write(rs, tex_count, fp_offset); + + fp_offset++; + tex_count++; + } + /* Rasterize at least one color, or bad things happen. */ if (col_count == 0 && tex_count == 0) { rX00_rs_col(rs, 0, 0, TRUE); @@ -496,7 +508,8 @@ static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa) static void r300_update_ztop(struct r300_context* r300) { - r300->ztop_state.z_buffer_top = R300_ZTOP_ENABLE; + struct r300_ztop_state* ztop_state = + (struct r300_ztop_state*)r300->ztop_state.state; /* This is important enough that I felt it warranted a comment. * @@ -518,31 +531,37 @@ static void r300_update_ztop(struct r300_context* r300) * 5) Depth writes in fragment shader * 6) Outstanding occlusion queries * + * This register causes stalls all the way from SC to CB when changed, + * but it is buffered on-chip so it does not hurt to write it if it has + * not changed. + * * ~C. */ /* ZS writes */ - if (r300_dsa_writes_depth_stencil(r300->dsa_state) && - (r300_dsa_alpha_test_enabled(r300->dsa_state) || /* (1) */ - r300->fs->info.uses_kill)) { /* (2) */ - r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300_fragment_shader_writes_depth(r300->fs)) { /* (5) */ - r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300->query_current) { /* (6) */ - r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; + if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) && + (r300_dsa_alpha_test_enabled(r300->dsa_state.state) ||/* (1) */ + r300->fs->info.uses_kill)) { /* (2) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300_fragment_shader_writes_depth(r300->fs)) { /* (5) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300->query_current) { /* (6) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else { + ztop_state->z_buffer_top = R300_ZTOP_ENABLE; } + + r300->ztop_state.dirty = TRUE; } void r300_update_derived_state(struct r300_context* r300) { + /* XXX */ if (r300->dirty_state & (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER | - R300_NEW_VERTEX_FORMAT)) { + R300_NEW_VERTEX_FORMAT) || r300->rs_state.dirty) { r300_update_derived_shader_state(r300); } - if (r300->dirty_state & - (R300_NEW_DSA | R300_NEW_FRAGMENT_SHADER | R300_NEW_QUERY)) { - r300_update_ztop(r300); - } + r300_update_ztop(r300); } diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index dbe42edd91..35be00e1b0 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -257,38 +257,37 @@ static INLINE uint32_t r300_translate_wrap(int wrap) } } -static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip) +static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip, + int is_anisotropic) { uint32_t retval = 0; - switch (min) { + if (is_anisotropic) + retval |= R300_TX_MIN_FILTER_ANISO | R300_TX_MAG_FILTER_ANISO; + else { + switch (min) { case PIPE_TEX_FILTER_NEAREST: retval |= R300_TX_MIN_FILTER_NEAREST; break; case PIPE_TEX_FILTER_LINEAR: retval |= R300_TX_MIN_FILTER_LINEAR; break; - case PIPE_TEX_FILTER_ANISO: - retval |= R300_TX_MIN_FILTER_ANISO; - break; default: debug_printf("r300: Unknown texture filter %d\n", min); assert(0); break; - } - switch (mag) { + } + switch (mag) { case PIPE_TEX_FILTER_NEAREST: retval |= R300_TX_MAG_FILTER_NEAREST; break; case PIPE_TEX_FILTER_LINEAR: retval |= R300_TX_MAG_FILTER_LINEAR; break; - case PIPE_TEX_FILTER_ANISO: - retval |= R300_TX_MAG_FILTER_ANISO; - break; default: debug_printf("r300: Unknown texture filter %d\n", mag); assert(0); break; + } } switch (mip) { case PIPE_TEX_MIPFILTER_NONE: diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index bcd4c030f9..b0f309695c 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -43,7 +43,7 @@ void r300_emit_invariant_state(struct r300_context* r300) struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; CS_LOCALS(r300); - BEGIN_CS(20 + (caps->has_tcl ? 2: 0)); + BEGIN_CS(16 + (caps->has_tcl ? 2: 0)); /*** Graphics Backend (GB) ***/ /* Various GB enables */ @@ -66,8 +66,6 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x0); OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x0); OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x0); - OUT_CS_REG(R300_FG_DEPTH_SRC, 0x0); - OUT_CS_REG(R300_US_W_FMT, 0x0); /*** VAP ***/ /* Sign/normalize control */ @@ -117,10 +115,12 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C); OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525); OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000); - if (caps->is_r500) { - OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x00000000); - OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFFFFFFFF); + + if (caps->family >= CHIP_FAMILY_RV350) { + OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); + OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE); } + OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000); OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000); OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 9a96206a4d..a9bbdd56d8 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -30,6 +30,18 @@ #include "r300_texture.h" #include "r300_screen.h" +#define TILE_WIDTH 0 +#define TILE_HEIGHT 1 + +static const unsigned microblock_table[5][3][2] = { + /*linear tiled square-tiled */ + {{32, 1}, {8, 4}, {0, 0}}, /* 8 bits per pixel */ + {{16, 1}, {8, 2}, {4, 4}}, /* 16 bits per pixel */ + {{ 8, 1}, {4, 2}, {0, 0}}, /* 32 bits per pixel */ + {{ 4, 1}, {0, 0}, {2, 2}}, /* 64 bits per pixel */ + {{ 2, 1}, {0, 0}, {0, 0}} /* 128 bits per pixel */ +}; + static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) { struct r300_texture_state* state = &tex->state; @@ -92,33 +104,67 @@ unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, } /** + * Return the width (dim==TILE_WIDTH) or height (dim==TILE_HEIGHT) of one tile + * of the given texture. + */ +static unsigned r300_texture_get_tile_size(struct r300_texture* tex, int dim) +{ + unsigned pixsize, tile_size; + + pixsize = util_format_get_blocksize(tex->tex.format); + tile_size = microblock_table[util_logbase2(pixsize)][tex->microtile][dim] * + (tex->macrotile == R300_BUFFER_TILED ? 8 : 1); + + assert(tile_size); + return tile_size; +} + +/** * Return the stride, in bytes, of the texture images of the given texture * at the given level. */ unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level) { + unsigned tile_width, width; + if (tex->stride_override) return tex->stride_override; + /* Check the level. */ if (level > tex->tex.last_level) { debug_printf("%s: level (%u) > last_level (%u)\n", __FUNCTION__, level, tex->tex.last_level); return 0; } - return align(util_format_get_stride(tex->tex.format, u_minify(tex->tex.width0, level)), 32); + tile_width = r300_texture_get_tile_size(tex, TILE_WIDTH); + width = align(u_minify(tex->tex.width0, level), tile_width); + + /* Should already be aligned except for S3TC. */ + return align(util_format_get_stride(tex->tex.format, width), 32); +} + +static unsigned r300_texture_get_nblocksy(struct r300_texture* tex, + unsigned level) +{ + unsigned height, tile_height; + + tile_height = r300_texture_get_tile_size(tex, TILE_HEIGHT); + height = align(u_minify(tex->tex.height0, level), tile_height); + + return util_format_get_nblocksy(tex->tex.format, height); } static void r300_setup_miptree(struct r300_texture* tex) { struct pipe_texture* base = &tex->tex; - int stride, size, layer_size; - int i; + unsigned stride, size, layer_size, nblocksy, i; - for (i = 0; i <= base->last_level; i++) { - unsigned nblocksy = util_format_get_nblocksy(base->format, u_minify(base->height0, i)); + debug_printf("r300: Making miptree for texture, format %s\n", pf_name(base->format)); + for (i = 0; i <= base->last_level; i++) { stride = r300_texture_get_stride(tex, i); + nblocksy = r300_texture_get_nblocksy(tex, i); layer_size = stride * nblocksy; if (base->target == PIPE_TEXTURE_CUBE) @@ -132,9 +178,9 @@ static void r300_setup_miptree(struct r300_texture* tex) tex->pitch[i] = stride / util_format_get_blocksize(base->format); debug_printf("r300: Texture miptree: Level %d " - "(%dx%dx%d px, pitch %d bytes)\n", + "(%dx%dx%d px, pitch %d bytes) %d bytes total\n", i, u_minify(base->width0, i), u_minify(base->height0, i), - u_minify(base->depth0, i), stride); + u_minify(base->depth0, i), stride, tex->size); } } @@ -163,7 +209,7 @@ static struct pipe_texture* r300_setup_miptree(tex); r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500); - tex->buffer = screen->buffer_create(screen, 1024, + tex->buffer = screen->buffer_create(screen, 2048, PIPE_BUFFER_USAGE_PIXEL, tex->size); diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 096cdb20bb..a792c2cf98 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -120,7 +120,7 @@ static unsigned translate_opcode(unsigned opcode) /* case TGSI_OPCODE_NOT: return RC_OPCODE_NOT; */ /* case TGSI_OPCODE_TRUNC: return RC_OPCODE_TRUNC; */ /* case TGSI_OPCODE_SHL: return RC_OPCODE_SHL; */ - /* case TGSI_OPCODE_SHR: return RC_OPCODE_SHR; */ + /* case TGSI_OPCODE_ISHR: return RC_OPCODE_SHR; */ /* case TGSI_OPCODE_AND: return RC_OPCODE_AND; */ /* case TGSI_OPCODE_OR: return RC_OPCODE_OR; */ /* case TGSI_OPCODE_MOD: return RC_OPCODE_MOD; */ diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index c4ed0d712f..68aef70872 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -22,6 +22,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_vs.h" +#include "r300_fs.h" #include "r300_context.h" #include "r300_screen.h" @@ -33,6 +34,8 @@ #include "radeon_compiler.h" +#include "util/u_math.h" + /* Convert info about VS output semantics into r300_shader_semantics. */ static void r300_shader_read_vs_outputs( struct tgsi_shader_info* info, @@ -88,11 +91,13 @@ static void r300_shader_read_vs_outputs( } } -static void r300_shader_vap_output_fmt( - struct r300_shader_semantics* vs_outputs, - uint* hwfmt) +static void r300_shader_vap_output_fmt(struct r300_vertex_shader* vs) { + struct r300_shader_semantics* vs_outputs = &vs->outputs; + uint32_t* hwfmt = vs->hwfmt; int i, gen_count; + boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || + vs_outputs->bcolor[1] != ATTR_UNUSED; /* Do the actual vertex_info setup. * @@ -119,13 +124,19 @@ static void r300_shader_vap_output_fmt( /* Colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { hwfmt[1] |= R300_INPUT_CNTL_COLOR; hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; } } - /* XXX Back-face colors. */ + /* Back-face colors. */ + if (any_bcolor_used) { + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + hwfmt[1] |= R300_INPUT_CNTL_COLOR; + hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i); + } + } /* Texture coordinates. */ gen_count = 0; @@ -146,6 +157,9 @@ static void r300_shader_vap_output_fmt( /* XXX magic */ assert(gen_count <= 8); + + /* WPOS. */ + vs->wpos_tex_output = gen_count; } /* Sets up stream mapping to equivalent VS outputs if TCL is bypassed @@ -155,6 +169,8 @@ static void r300_stream_locations_notcl( int* stream_loc) { int i, tabi = 0, gen_count; + boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || + vs_outputs->bcolor[1] != ATTR_UNUSED; /* Position. */ stream_loc[tabi++] = 0; @@ -166,14 +182,14 @@ static void r300_stream_locations_notcl( /* Colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { stream_loc[tabi++] = 2 + i; } } /* Back-face colors. */ - for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->bcolor[i] != ATTR_UNUSED) { + if (any_bcolor_used) { + for (i = 0; i < ATTR_COLOR_COUNT; i++) { stream_loc[tabi++] = 4 + i; } } @@ -181,7 +197,7 @@ static void r300_stream_locations_notcl( /* Texture coordinates. */ gen_count = 0; for (i = 0; i < ATTR_GENERIC_COUNT; i++) { - if (vs_outputs->bcolor[i] != ATTR_UNUSED) { + if (vs_outputs->generic[i] != ATTR_UNUSED) { assert(tabi < 16); stream_loc[tabi++] = 6 + gen_count; gen_count++; @@ -195,8 +211,12 @@ static void r300_stream_locations_notcl( gen_count++; } - /* XXX magic */ - assert(gen_count <= 8); + /* WPOS. */ + if (vs_outputs->wpos != ATTR_UNUSED) { + assert(tabi < 16); + stream_loc[tabi++] = 6 + gen_count; + gen_count++; + } for (; tabi < 16;) { stream_loc[tabi++] = -1; @@ -209,6 +229,8 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) struct r300_shader_semantics* outputs = &vs->outputs; struct tgsi_shader_info* info = &vs->info; int i, reg = 0; + boolean any_bcolor_used = outputs->bcolor[0] != ATTR_UNUSED || + outputs->bcolor[1] != ATTR_UNUSED; /* Fill in the input mapping */ for (i = 0; i < info->num_inputs; i++) @@ -226,14 +248,30 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) c->code->outputs[outputs->psize] = reg++; } + /* If we're writing back facing colors we need to send + * four colors to make front/back face colors selection work. + * If the vertex program doesn't write all 4 colors, lets + * pretend it does by skipping output index reg so the colors + * get written into appropriate output vectors. + */ + /* Colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { if (outputs->color[i] != ATTR_UNUSED) { c->code->outputs[outputs->color[i]] = reg++; + } else if (any_bcolor_used) { + reg++; } } - /* XXX Back-face colors. */ + /* Back-face colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (outputs->bcolor[i] != ATTR_UNUSED) { + c->code->outputs[outputs->bcolor[i]] = reg++; + } else if (any_bcolor_used) { + reg++; + } + } /* Texture coordinates. */ for (i = 0; i < ATTR_GENERIC_COUNT; i++) { @@ -246,6 +284,33 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) if (outputs->fog != ATTR_UNUSED) { c->code->outputs[outputs->fog] = reg++; } + + /* WPOS. */ + if (outputs->wpos != ATTR_UNUSED) { + c->code->outputs[outputs->wpos] = reg++; + } +} + +static void r300_insert_wpos(struct r300_vertex_program_compiler* c, + struct r300_shader_semantics* outputs) +{ + int i, lastOutput = 0; + + /* Find the max output index. */ + lastOutput = MAX2(lastOutput, outputs->psize); + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + lastOutput = MAX2(lastOutput, outputs->color[i]); + lastOutput = MAX2(lastOutput, outputs->bcolor[i]); + } + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + lastOutput = MAX2(lastOutput, outputs->generic[i]); + } + lastOutput = MAX2(lastOutput, outputs->fog); + + /* Set WPOS after the last output. */ + lastOutput++; + rc_copy_output(&c->Base, 0, lastOutput); /* out[lastOutput] = out[0]; */ + outputs->wpos = lastOutput; } void r300_translate_vertex_shader(struct r300_context* r300, @@ -256,8 +321,6 @@ void r300_translate_vertex_shader(struct r300_context* r300, /* Initialize. */ r300_shader_read_vs_outputs(&vs->info, &vs->outputs); - r300_shader_vap_output_fmt(&vs->outputs, vs->hwfmt); - r300_stream_locations_notcl(&vs->outputs, vs->stream_loc_notcl); /* Setup the compiler */ rc_init(&compiler.Base); @@ -277,9 +340,15 @@ void r300_translate_vertex_shader(struct r300_context* r300, r300_tgsi_to_rc(&ttr, vs->state.tokens); - compiler.RequiredOutputs = ~(~0 << vs->info.num_outputs); + compiler.RequiredOutputs = ~(~0 << (vs->info.num_outputs+1)); compiler.SetHwInputOutput = &set_vertex_inputs_outputs; + /* Insert the WPOS output. */ + r300_insert_wpos(&compiler, &vs->outputs); + + r300_shader_vap_output_fmt(vs); + r300_stream_locations_notcl(&vs->outputs, vs->stream_loc_notcl); + /* Invoke the compiler */ r3xx_compile_vertex_program(&compiler); if (compiler.Base.Error) { @@ -292,3 +361,30 @@ void r300_translate_vertex_shader(struct r300_context* r300, rc_destroy(&compiler.Base); vs->translated = TRUE; } + +boolean r300_vertex_shader_setup_wpos(struct r300_context* r300) +{ + struct r300_vertex_shader* vs = r300->vs; + int tex_output = r300->vs->wpos_tex_output; + uint32_t tex_fmt = R300_INPUT_CNTL_TC0 << tex_output; + uint32_t* hwfmt = vs->hwfmt; + + if (r300->fs->inputs.wpos != ATTR_UNUSED) { + /* Enable WPOS in VAP. */ + if (!(hwfmt[1] & tex_fmt)) { + hwfmt[1] |= tex_fmt; + hwfmt[3] |= (4 << (3 * tex_output)); + + assert(tex_output < 8); + return TRUE; + } + } else { + /* Disable WPOS in VAP. */ + if (hwfmt[1] & tex_fmt) { + hwfmt[1] &= ~tex_fmt; + hwfmt[3] &= ~(4 << (3 * tex_output)); + return TRUE; + } + } + return FALSE; +} diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 67e9db5366..18cfeee3cd 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -43,6 +43,9 @@ struct r300_vertex_shader { /* Stream locations for SWTCL or if TCL is bypassed. */ int stream_loc_notcl[16]; + /* Output stream location for WPOS. */ + int wpos_tex_output; + /* Has this shader been translated yet? */ boolean translated; @@ -53,4 +56,7 @@ struct r300_vertex_shader { void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs); +/* Return TRUE if VAP (hwfmt) needs to be re-emitted. */ +boolean r300_vertex_shader_setup_wpos(struct r300_context* r300); + #endif /* R300_VS_H */ diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c index f98087deb8..5f130453c3 100644 --- a/src/gallium/drivers/softpipe/sp_clear.c +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -36,6 +36,7 @@ #include "util/u_pack_color.h" #include "sp_clear.h" #include "sp_context.h" +#include "sp_query.h" #include "sp_tile_cache.h" @@ -55,6 +56,9 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, if (softpipe->no_rast) return; + if (!softpipe_check_render_cond(softpipe)) + return; + #if 0 softpipe_update_derived(softpipe); /* not needed?? */ #endif diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 0862e9f24b..8e01793940 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -176,6 +176,19 @@ softpipe_is_buffer_referenced( struct pipe_context *pipe, } +static void +softpipe_render_condition( struct pipe_context *pipe, + struct pipe_query *query, + uint mode ) +{ + struct softpipe_context *softpipe = softpipe_context( pipe ); + + softpipe->render_cond_query = query; + softpipe->render_cond_mode = mode; +} + + + struct pipe_context * softpipe_create( struct pipe_screen *screen ) { @@ -191,6 +204,7 @@ softpipe_create( struct pipe_screen *screen ) #endif softpipe->dump_fs = debug_get_bool_option( "GALLIUM_DUMP_FS", FALSE ); + softpipe->dump_gs = debug_get_bool_option( "SOFTPIPE_DUMP_GS", FALSE ); softpipe->pipe.winsys = screen->winsys; softpipe->pipe.screen = screen; @@ -222,6 +236,10 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.bind_vs_state = softpipe_bind_vs_state; softpipe->pipe.delete_vs_state = softpipe_delete_vs_state; + softpipe->pipe.create_gs_state = softpipe_create_gs_state; + softpipe->pipe.bind_gs_state = softpipe_bind_gs_state; + softpipe->pipe.delete_gs_state = softpipe_delete_gs_state; + softpipe->pipe.set_blend_color = softpipe_set_blend_color; softpipe->pipe.set_clip_state = softpipe_set_clip_state; softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer; @@ -238,6 +256,8 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.draw_arrays = softpipe_draw_arrays; softpipe->pipe.draw_elements = softpipe_draw_elements; softpipe->pipe.draw_range_elements = softpipe_draw_range_elements; + softpipe->pipe.draw_arrays_instanced = softpipe_draw_arrays_instanced; + softpipe->pipe.draw_elements_instanced = softpipe_draw_elements_instanced; softpipe->pipe.clear = softpipe_clear; softpipe->pipe.flush = softpipe_flush; @@ -247,6 +267,8 @@ softpipe_create( struct pipe_screen *screen ) softpipe_init_query_funcs( softpipe ); + softpipe->pipe.render_condition = softpipe_render_condition; + /* * Alloc caches for accessing drawing surfaces and textures. * Must be before quad stage setup! diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index ac24fccb4c..da673c57ad 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -58,6 +58,7 @@ struct softpipe_context { struct pipe_rasterizer_state *rasterizer; struct sp_fragment_shader *fs; struct sp_vertex_shader *vs; + struct sp_geometry_shader *gs; /** Other rendering state */ struct pipe_blend_color blend_color; @@ -115,6 +116,10 @@ struct softpipe_context { unsigned line_stipple_counter; + /** Conditional query object and mode */ + struct pipe_query *render_cond_query; + uint render_cond_mode; + /** Software quad rendering pipeline */ struct { struct quad_stage *shade; @@ -147,6 +152,7 @@ struct softpipe_context { unsigned use_sse : 1; unsigned dump_fs : 1; + unsigned dump_gs : 1; unsigned no_rast : 1; }; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 96e1c5d815..03b58d2fb7 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -38,6 +38,7 @@ #include "util/u_prim.h" #include "sp_context.h" +#include "sp_query.h" #include "sp_state.h" #include "draw/draw_context.h" @@ -48,7 +49,7 @@ static void softpipe_map_constant_buffers(struct softpipe_context *sp) { struct pipe_winsys *ws = sp->pipe.winsys; - uint i, size; + uint i, vssize, gssize; for (i = 0; i < PIPE_SHADER_TYPES; i++) { if (sp->constants[i] && sp->constants[i]->size) @@ -57,13 +58,21 @@ softpipe_map_constant_buffers(struct softpipe_context *sp) } if (sp->constants[PIPE_SHADER_VERTEX]) - size = sp->constants[PIPE_SHADER_VERTEX]->size; + vssize = sp->constants[PIPE_SHADER_VERTEX]->size; else - size = 0; + vssize = 0; - draw_set_mapped_constant_buffer(sp->draw, + if (sp->constants[PIPE_SHADER_GEOMETRY]) + gssize = sp->constants[PIPE_SHADER_GEOMETRY]->size; + else + gssize = 0; + + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, sp->mapped_constants[PIPE_SHADER_VERTEX], - size); + vssize); + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_GEOMETRY, + sp->mapped_constants[PIPE_SHADER_GEOMETRY], + gssize); } @@ -78,9 +87,10 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) */ draw_flush(sp->draw); - draw_set_mapped_constant_buffer(sp->draw, NULL, 0); + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, NULL, 0); + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_GEOMETRY, NULL, 0); - for (i = 0; i < 2; i++) { + for (i = 0; i < PIPE_SHADER_TYPES; i++) { if (sp->constants[i] && sp->constants[i]->size) ws->buffer_unmap(ws, sp->constants[i]); sp->mapped_constants[i] = NULL; @@ -88,20 +98,42 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) } -boolean +/** + * Draw vertex arrays, with optional indexing. + * Basically, map the vertex buffers (and drawing surfaces), then hand off + * the drawing to the 'draw' module. + */ +static void +softpipe_draw_range_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + + +void softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - return softpipe_draw_elements(pipe, NULL, 0, mode, start, count); + softpipe_draw_range_elements_instanced(pipe, + NULL, + 0, + 0, + 0xffffffff, + mode, + start, + count, + 0, + 1); } -/** - * Draw vertex arrays, with optional indexing. - * Basically, map the vertex buffers (and drawing surfaces), then hand off - * the drawing to the 'draw' module. - */ -boolean +void softpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -109,51 +141,142 @@ softpipe_draw_range_elements(struct pipe_context *pipe, unsigned max_index, unsigned mode, unsigned start, unsigned count) { + softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + min_index, + max_index, + mode, + start, + count, + 0, + 1); +} + + +void +softpipe_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + 0, + 0xffffffff, + mode, + start, + count, + 0, + 1); +} + +void +softpipe_draw_arrays_instanced(struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + softpipe_draw_range_elements_instanced(pipe, + NULL, + 0, + 0, + 0xffffffff, + mode, + start, + count, + startInstance, + instanceCount); +} + +void +softpipe_draw_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ + softpipe_draw_range_elements_instanced(pipe, + indexBuffer, + indexSize, + 0, + 0xffffffff, + mode, + start, + count, + startInstance, + instanceCount); +} + +static void +softpipe_draw_range_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount) +{ struct softpipe_context *sp = softpipe_context(pipe); struct draw_context *draw = sp->draw; unsigned i; + if (!softpipe_check_render_cond(sp)) + return; + sp->reduced_api_prim = u_reduced_prim(mode); - if (sp->dirty) - softpipe_update_derived( sp ); + if (sp->dirty) { + softpipe_update_derived(sp); + } softpipe_map_transfers(sp); softpipe_map_constant_buffers(sp); - /* - * Map vertex buffers - */ + /* Map vertex buffers */ for (i = 0; i < sp->num_vertex_buffers; i++) { - void *buf - = pipe_buffer_map(pipe->screen, - sp->vertex_buffer[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); + void *buf; + + buf = pipe_buffer_map(pipe->screen, + sp->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(draw, i, buf); } /* Map index buffer, if present */ if (indexBuffer) { - void *mapped_indexes - = pipe_buffer_map(pipe->screen, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_element_buffer_range(draw, indexSize, - min_index, - max_index, + void *mapped_indexes; + + mapped_indexes = pipe_buffer_map(pipe->screen, + indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer_range(draw, + indexSize, + minIndex, + maxIndex, mapped_indexes); - } - else { + } else { /* no index/element buffer */ - draw_set_mapped_element_buffer_range(draw, 0, start, - start + count - 1, NULL); + draw_set_mapped_element_buffer_range(draw, + 0, + start, + start + count - 1, + NULL); } /* draw! */ - draw_arrays(draw, mode, start, count); + draw_arrays_instanced(draw, mode, start, count, startInstance, instanceCount); - /* - * unmap vertex/index buffers - will cause draw module to flush - */ + /* unmap vertex/index buffers - will cause draw module to flush */ for (i = 0; i < sp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer); @@ -163,24 +286,8 @@ softpipe_draw_range_elements(struct pipe_context *pipe, pipe_buffer_unmap(pipe->screen, indexBuffer); } - /* Note: leave drawing surfaces mapped */ softpipe_unmap_constant_buffers(sp); sp->dirty_render_cache = TRUE; - - return TRUE; -} - - -boolean -softpipe_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count) -{ - return softpipe_draw_range_elements( pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - mode, start, count ); } diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 5fbac06a53..7f573aef3c 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -128,6 +128,7 @@ sp_vbuf_unmap_vertices(struct vbuf_render *vbr, { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size ); + (void) cvbr; /* do nothing */ } diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index fe6b6cec35..d9babe81da 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -229,7 +229,7 @@ blend_quad(struct quad_stage *qs, static const float zero[4] = { 0, 0, 0, 0 }; static const float one[4] = { 1, 1, 1, 1 }; struct softpipe_context *softpipe = qs->softpipe; - float source[4][QUAD_SIZE]; + float source[4][QUAD_SIZE] = { { 0 } }; /* * Compute src/first term RGB diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c index 379cf4ad06..4ef5d9f7b1 100644 --- a/src/gallium/drivers/softpipe/sp_query.c +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -99,6 +99,32 @@ softpipe_get_query_result(struct pipe_context *pipe, } +/** + * Called by rendering function to check rendering is conditional. + * \return TRUE if we should render, FALSE if we should skip rendering + */ +boolean +softpipe_check_render_cond(struct softpipe_context *sp) +{ + struct pipe_context *pipe = &sp->pipe; + boolean b, wait; + uint64_t result; + + if (!sp->render_cond_query) { + return TRUE; /* no query predicate, draw normally */ + } + + wait = (sp->render_cond_mode == PIPE_RENDER_COND_WAIT || + sp->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT); + + b = pipe->get_query_result(pipe, sp->render_cond_query, wait, &result); + if (b) + return result > 0; + else + return TRUE; +} + + void softpipe_init_query_funcs(struct softpipe_context *softpipe ) { softpipe->pipe.create_query = softpipe_create_query; diff --git a/src/gallium/drivers/softpipe/sp_query.h b/src/gallium/drivers/softpipe/sp_query.h index 05060a4575..736c033897 100644 --- a/src/gallium/drivers/softpipe/sp_query.h +++ b/src/gallium/drivers/softpipe/sp_query.h @@ -32,6 +32,10 @@ #ifndef SP_QUERY_H #define SP_QUERY_H +extern boolean +softpipe_check_render_cond(struct softpipe_context *sp); + + struct softpipe_context; extern void softpipe_init_query_funcs(struct softpipe_context * ); diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 615581b95f..3da75364c5 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -1268,7 +1268,7 @@ void sp_setup_prepare( struct setup_context *setup ) } /* Note: nr_attrs is only used for debugging (vertex printing) */ - setup->nr_vertex_attrs = draw_num_vs_outputs(sp->draw); + setup->nr_vertex_attrs = draw_num_shader_outputs(sp->draw); sp->quad.first->begin( sp->quad.first ); diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index c41e718488..7f244c4fd4 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -50,6 +50,7 @@ #define SP_NEW_VERTEX 0x1000 #define SP_NEW_VS 0x2000 #define SP_NEW_QUERY 0x4000 +#define SP_NEW_GS 0x8000 struct tgsi_sampler; @@ -90,6 +91,11 @@ struct sp_vertex_shader { int max_sampler; /* -1 if no samplers */ }; +/** Subclass of pipe_shader_state */ +struct sp_geometry_shader { + struct pipe_shader_state shader; + struct draw_geometry_shader *draw_data; +}; void * @@ -143,6 +149,10 @@ void *softpipe_create_vs_state(struct pipe_context *, const struct pipe_shader_state *); void softpipe_bind_vs_state(struct pipe_context *, void *); void softpipe_delete_vs_state(struct pipe_context *, void *); +void *softpipe_create_gs_state(struct pipe_context *, + const struct pipe_shader_state *); +void softpipe_bind_gs_state(struct pipe_context *, void *); +void softpipe_delete_gs_state(struct pipe_context *, void *); void softpipe_set_polygon_stipple( struct pipe_context *, const struct pipe_poly_stipple * ); @@ -174,14 +184,14 @@ void softpipe_set_vertex_buffers(struct pipe_context *, void softpipe_update_derived( struct softpipe_context *softpipe ); -boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count); +void softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count); -boolean softpipe_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count); -boolean +void softpipe_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count); +void softpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -190,6 +200,24 @@ softpipe_draw_range_elements(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count); void +softpipe_draw_arrays_instanced(struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + +void +softpipe_draw_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count, + unsigned startInstance, + unsigned instanceCount); + +void softpipe_map_transfers(struct softpipe_context *sp); void diff --git a/src/gallium/drivers/softpipe/sp_state_blend.c b/src/gallium/drivers/softpipe/sp_state_blend.c index efed082f82..95ab323433 100644 --- a/src/gallium/drivers/softpipe/sp_state_blend.c +++ b/src/gallium/drivers/softpipe/sp_state_blend.c @@ -29,6 +29,7 @@ */ #include "util/u_memory.h" +#include "draw/draw_context.h" #include "sp_context.h" #include "sp_state.h" @@ -45,6 +46,8 @@ void softpipe_bind_blend_state( struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); + draw_flush(softpipe->draw); + softpipe->blend = (struct pipe_blend_state *)blend; softpipe->dirty |= SP_NEW_BLEND; @@ -62,6 +65,8 @@ void softpipe_set_blend_color( struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); + draw_flush(softpipe->draw); + softpipe->blend_color = *blend_color; softpipe->dirty |= SP_NEW_BLEND; diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index c24a737d07..f6856a5f69 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -67,7 +67,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) /* compute vertex layout now */ const struct sp_fragment_shader *spfs = softpipe->fs; struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; - const uint num = draw_num_vs_outputs(softpipe->draw); + const uint num = draw_current_shader_outputs(softpipe->draw); uint i; /* Tell draw_vbuf to simply emit the whole post-xform vertex @@ -117,13 +117,13 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) } /* this includes texcoords and varying vars */ - src = draw_find_vs_output(softpipe->draw, - spfs->info.input_semantic_name[i], - spfs->info.input_semantic_index[i]); + src = draw_find_shader_output(softpipe->draw, + spfs->info.input_semantic_name[i], + spfs->info.input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); } - softpipe->psize_slot = draw_find_vs_output(softpipe->draw, + softpipe->psize_slot = draw_find_shader_output(softpipe->draw, TGSI_SEMANTIC_PSIZE, 0); if (softpipe->psize_slot > 0) { draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index eba0563c62..b7ed4441b4 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -69,7 +69,14 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct softpipe_context *softpipe = softpipe_context(pipe); - softpipe->fs = (struct sp_fragment_shader *) fs; + draw_flush(softpipe->draw); + + if (softpipe->fs == fs) + return; + + draw_flush(softpipe->draw); + + softpipe->fs = fs; softpipe->dirty |= SP_NEW_FS; } @@ -159,8 +166,74 @@ softpipe_set_constant_buffer(struct pipe_context *pipe, assert(shader < PIPE_SHADER_TYPES); assert(index == 0); + draw_flush(softpipe->draw); + /* note: reference counting */ pipe_buffer_reference(&softpipe->constants[shader], buf); softpipe->dirty |= SP_NEW_CONSTANTS; } + +void * +softpipe_create_gs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + struct sp_geometry_shader *state; + + state = CALLOC_STRUCT(sp_geometry_shader); + if (state == NULL ) + goto fail; + + /* debug */ + if (softpipe->dump_gs) + tgsi_dump(templ->tokens, 0); + + /* copy shader tokens, the ones passed in will go away. + */ + state->shader.tokens = tgsi_dup_tokens(templ->tokens); + if (state->shader.tokens == NULL) + goto fail; + + state->draw_data = draw_create_geometry_shader(softpipe->draw, templ); + if (state->draw_data == NULL) + goto fail; + + return state; + +fail: + if (state) { + FREE( (void *)state->shader.tokens ); + FREE( state->draw_data ); + FREE( state ); + } + return NULL; +} + + +void +softpipe_bind_gs_state(struct pipe_context *pipe, void *gs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->gs = (struct sp_geometry_shader *)gs; + + draw_bind_geometry_shader(softpipe->draw, + (softpipe->gs ? softpipe->gs->draw_data : NULL)); + + softpipe->dirty |= SP_NEW_GS; +} + + +void +softpipe_delete_gs_state(struct pipe_context *pipe, void *gs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + struct sp_geometry_shader *state = + (struct sp_geometry_shader *)gs; + + draw_delete_geometry_shader(softpipe->draw, + (state) ? state->draw_data : 0); + FREE(state); +} diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c index 87b7219683..a5b00336d4 100644 --- a/src/gallium/drivers/softpipe/sp_state_rasterizer.c +++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c @@ -41,14 +41,17 @@ softpipe_create_rasterizer_state(struct pipe_context *pipe, } void softpipe_bind_rasterizer_state(struct pipe_context *pipe, - void *setup) + void *rasterizer) { struct softpipe_context *softpipe = softpipe_context(pipe); + if (softpipe->rasterizer == rasterizer) + return; + /* pass-through to draw module */ - draw_set_rasterizer_state(softpipe->draw, setup); + draw_set_rasterizer_state(softpipe->draw, rasterizer); - softpipe->rasterizer = (struct pipe_rasterizer_state *)setup; + softpipe->rasterizer = rasterizer; softpipe->dirty |= SP_NEW_RASTERIZER; } diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c index a518248bb1..f6154109ea 100644 --- a/src/gallium/drivers/softpipe/sp_state_surface.c +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -51,6 +51,8 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, struct softpipe_context *sp = softpipe_context(pipe); uint i; + draw_flush(sp->draw); + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { /* check if changing cbuf */ if (sp->framebuffer.cbufs[i] != fb->cbufs[i]) { diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index e26153b1d9..1ae8fecacf 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -2,7 +2,7 @@ * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * Copyright 2008 VMware, Inc. All rights reserved. + * Copyright 2008-2010 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -514,21 +514,15 @@ static float compute_lambda_1d(const struct sp_sampler_varient *samp, const float s[QUAD_SIZE], const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) + const float p[QUAD_SIZE]) { const struct pipe_texture *texture = samp->texture; const struct pipe_sampler_state *sampler = samp->sampler; float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]); float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]); float rho = MAX2(dsdx, dsdy) * texture->width0; - float lambda; - - lambda = util_fast_log2(rho); - lambda += lodbias + sampler->lod_bias; - lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); - return lambda; + return util_fast_log2(rho); } @@ -536,8 +530,7 @@ static float compute_lambda_2d(const struct sp_sampler_varient *samp, const float s[QUAD_SIZE], const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) + const float p[QUAD_SIZE]) { const struct pipe_texture *texture = samp->texture; const struct pipe_sampler_state *sampler = samp->sampler; @@ -548,13 +541,8 @@ compute_lambda_2d(const struct sp_sampler_varient *samp, float maxx = MAX2(dsdx, dsdy) * texture->width0; float maxy = MAX2(dtdx, dtdy) * texture->height0; float rho = MAX2(maxx, maxy); - float lambda; - lambda = util_fast_log2(rho); - lambda += lodbias + sampler->lod_bias; - lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); - - return lambda; + return util_fast_log2(rho); } @@ -562,8 +550,7 @@ static float compute_lambda_3d(const struct sp_sampler_varient *samp, const float s[QUAD_SIZE], const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) + const float p[QUAD_SIZE]) { const struct pipe_texture *texture = samp->texture; const struct pipe_sampler_state *sampler = samp->sampler; @@ -576,31 +563,26 @@ compute_lambda_3d(const struct sp_sampler_varient *samp, float maxx = MAX2(dsdx, dsdy) * texture->width0; float maxy = MAX2(dtdx, dtdy) * texture->height0; float maxz = MAX2(dpdx, dpdy) * texture->depth0; - float rho, lambda; + float rho; rho = MAX2(maxx, maxy); rho = MAX2(rho, maxz); - lambda = util_fast_log2(rho); - lambda += lodbias + sampler->lod_bias; - lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); - - return lambda; + return util_fast_log2(rho); } /** * Compute lambda for a vertex texture sampler. - * Since there aren't derivatives to use, just return the LOD bias. + * Since there aren't derivatives to use, just return 0. */ static float compute_lambda_vert(const struct sp_sampler_varient *samp, const float s[QUAD_SIZE], const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) + const float p[QUAD_SIZE]) { - return lodbias; + return 0.0f; } @@ -769,7 +751,8 @@ img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -827,7 +810,8 @@ img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -866,7 +850,8 @@ img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -914,7 +899,8 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -949,7 +935,8 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -996,7 +983,8 @@ img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1035,7 +1023,8 @@ img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1076,7 +1065,8 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1115,7 +1105,8 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1161,7 +1152,8 @@ img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1209,7 +1201,8 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1261,29 +1254,60 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler, } +/* Calculate level of detail for every fragment. + * Note that lambda has already been biased by global LOD bias. + */ +static INLINE void +compute_lod(const struct pipe_sampler_state *sampler, + const float biased_lambda, + const float lodbias[QUAD_SIZE], + float lod[QUAD_SIZE]) +{ + uint i; + + for (i = 0; i < QUAD_SIZE; i++) { + lod[i] = biased_lambda + lodbias[i]; + lod[i] = CLAMP(lod[i], sampler->min_lod, sampler->max_lod); + } +} + + static void mip_filter_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct pipe_texture *texture = samp->texture; int level0; float lambda; + float lod[QUAD_SIZE]; + + if (control == tgsi_sampler_lod_bias) { + lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias; + compute_lod(samp->sampler, lambda, c0, lod); + } else { + assert(control == tgsi_sampler_lod_explicit); - lambda = samp->compute_lambda(samp, s, t, p, lodbias); + memcpy(lod, c0, sizeof(lod)); + } + + /* XXX: Take into account all lod values. + */ + lambda = lod[0]; level0 = (int)lambda; if (lambda < 0.0) { samp->level = 0; - samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else if (level0 >= texture->last_level) { samp->level = texture->last_level; - samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else { float levelBlend = lambda - level0; @@ -1292,10 +1316,10 @@ mip_filter_linear(struct tgsi_sampler *tgsi_sampler, int c,j; samp->level = level0; - samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba0 ); + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba0); samp->level = level0+1; - samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba1 ); + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba1); for (j = 0; j < QUAD_SIZE; j++) { for (c = 0; c < 4; c++) { @@ -1311,23 +1335,36 @@ mip_filter_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct pipe_texture *texture = samp->texture; float lambda; + float lod[QUAD_SIZE]; - lambda = samp->compute_lambda(samp, s, t, p, lodbias); + if (control == tgsi_sampler_lod_bias) { + lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias; + compute_lod(samp->sampler, lambda, c0, lod); + } else { + assert(control == tgsi_sampler_lod_explicit); + + memcpy(lod, c0, sizeof(lod)); + } + + /* XXX: Take into account all lod values. + */ + lambda = lod[0]; if (lambda < 0.0) { samp->level = 0; - samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else { samp->level = (int)(lambda + 0.5) ; samp->level = MIN2(samp->level, (int)texture->last_level); - samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } #if 0 @@ -1345,17 +1382,32 @@ mip_filter_none(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); - float lambda = samp->compute_lambda(samp, s, t, p, lodbias); + float lambda; + float lod[QUAD_SIZE]; + + if (control == tgsi_sampler_lod_bias) { + lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias; + compute_lod(samp->sampler, lambda, c0, lod); + } else { + assert(control == tgsi_sampler_lod_explicit); + + memcpy(lod, c0, sizeof(lod)); + } + + /* XXX: Take into account all lod values. + */ + lambda = lod[0]; if (lambda < 0.0) { - samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else { - samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } } @@ -1371,15 +1423,28 @@ mip_filter_linear_2d_linear_repeat_POT( const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct pipe_texture *texture = samp->texture; int level0; float lambda; + float lod[QUAD_SIZE]; - lambda = compute_lambda_2d(samp, s, t, p, lodbias); + if (control == tgsi_sampler_lod_bias) { + lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias; + compute_lod(samp->sampler, lambda, c0, lod); + } else { + assert(control == tgsi_sampler_lod_explicit); + + memcpy(lod, c0, sizeof(lod)); + } + + /* XXX: Take into account all lod values. + */ + lambda = lod[0]; level0 = (int)lambda; /* Catches both negative and large values of level0: @@ -1390,7 +1455,7 @@ mip_filter_linear_2d_linear_repeat_POT( else samp->level = texture->last_level; - img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba ); + img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else { float levelBlend = lambda - level0; @@ -1399,10 +1464,10 @@ mip_filter_linear_2d_linear_repeat_POT( int c,j; samp->level = level0; - img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba0 ); + img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba0); samp->level = level0+1; - img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba1 ); + img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba1); for (j = 0; j < QUAD_SIZE; j++) { for (c = 0; c < 4; c++) { @@ -1422,7 +1487,8 @@ sample_compare(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1430,7 +1496,7 @@ sample_compare(struct tgsi_sampler *tgsi_sampler, int j, k0, k1, k2, k3; float val; - samp->mip_filter( tgsi_sampler, s, t, p, lodbias, rgba ); + samp->mip_filter(tgsi_sampler, s, t, p, c0, control, rgba); /** * Compare texcoord 'p' (aka R) against texture value 'rgba[0]' @@ -1508,7 +1574,8 @@ sample_cube(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1589,7 +1656,7 @@ sample_cube(struct tgsi_sampler *tgsi_sampler, * is not active, this will point somewhere deeper into the * pipeline, eg. to mip_filter or even img_filter. */ - samp->compare(tgsi_sampler, ssss, tttt, NULL, lodbias, rgba); + samp->compare(tgsi_sampler, ssss, tttt, NULL, c0, control, rgba); } @@ -1862,7 +1929,7 @@ sp_create_sampler_varient( const struct pipe_sampler_state *sampler, break; } - if (sampler->compare_mode != FALSE) { + if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) { samp->compare = sample_compare; } else { diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h index b0797711d3..b6e66c998a 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.h +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -2,6 +2,7 @@ * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2010 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -46,14 +47,14 @@ typedef void (*wrap_linear_func)(const float s[4], typedef float (*compute_lambda_func)(const struct sp_sampler_varient *sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias); + const float p[QUAD_SIZE]); typedef void (*filter_func)(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]); diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index c3de12b4a3..af99c9de37 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -29,6 +29,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_screen.h" #include "util/u_memory.h" +#include "util/u_bitmask.h" #include "util/u_upload_mgr.h" #include "svga_context.h" @@ -61,6 +62,9 @@ static void svga_destroy( struct pipe_context *pipe ) u_upload_destroy( svga->upload_vb ); u_upload_destroy( svga->upload_ib ); + util_bitmask_destroy( svga->vs_bm ); + util_bitmask_destroy( svga->fs_bm ); + for(shader = 0; shader < PIPE_SHADER_TYPES; ++shader) pipe_buffer_reference( &svga->curr.cb[shader], NULL ); @@ -130,7 +134,7 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen ) svga = CALLOC_STRUCT(svga_context); if (svga == NULL) - goto error1; + goto no_svga; svga->pipe.winsys = screen->winsys; svga->pipe.screen = screen; @@ -142,7 +146,7 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen ) svga->swc = svgascreen->sws->context_create(svgascreen->sws); if(!svga->swc) - goto error2; + goto no_swc; svga_init_blend_functions(svga); svga_init_blit_functions(svga); @@ -165,32 +169,40 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen ) svga->debug.disable_shader = debug_get_num_option("SVGA_DISABLE_SHADER", ~0); if (!svga_init_swtnl(svga)) - goto error3; + goto no_swtnl; + + svga->fs_bm = util_bitmask_create(); + if (svga->fs_bm == NULL) + goto no_fs_bm; + + svga->vs_bm = util_bitmask_create(); + if (svga->vs_bm == NULL) + goto no_vs_bm; svga->upload_ib = u_upload_create( svga->pipe.screen, 32 * 1024, 16, PIPE_BUFFER_USAGE_INDEX ); if (svga->upload_ib == NULL) - goto error4; + goto no_upload_ib; svga->upload_vb = u_upload_create( svga->pipe.screen, 128 * 1024, 16, PIPE_BUFFER_USAGE_VERTEX ); if (svga->upload_vb == NULL) - goto error5; + goto no_upload_vb; svga->hwtnl = svga_hwtnl_create( svga, svga->upload_ib, svga->swc ); if (svga->hwtnl == NULL) - goto error6; + goto no_hwtnl; ret = svga_emit_initial_state( svga ); if (ret) - goto error7; + goto no_state; /* Avoid shortcircuiting state with initial value of zero. */ @@ -209,19 +221,23 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen ) return &svga->pipe; -error7: +no_state: svga_hwtnl_destroy( svga->hwtnl ); -error6: +no_hwtnl: u_upload_destroy( svga->upload_vb ); -error5: +no_upload_vb: u_upload_destroy( svga->upload_ib ); -error4: +no_upload_ib: + util_bitmask_destroy( svga->vs_bm ); +no_vs_bm: + util_bitmask_destroy( svga->fs_bm ); +no_fs_bm: svga_destroy_swtnl(svga); -error3: +no_swtnl: svga->swc->destroy(svga->swc); -error2: +no_swc: FREE(svga); -error1: +no_svga: return NULL; } diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 0885d9ca74..66259fd010 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -41,6 +41,7 @@ struct draw_vertex_shader; struct svga_shader_result; struct SVGACmdMemory; +struct util_bitmask; struct u_upload_mgr; @@ -265,8 +266,6 @@ struct svga_hw_draw_state unsigned ts[16][TS_MAX]; float cb[PIPE_SHADER_TYPES][CB_MAX][4]; - unsigned shader_id[PIPE_SHADER_TYPES]; - struct svga_shader_result *fs; struct svga_shader_result *vs; struct svga_hw_view_state views[PIPE_MAX_SAMPLERS]; @@ -319,12 +318,14 @@ struct svga_context boolean new_vdecl; } swtnl; + /* Bitmask of used shader IDs */ + struct util_bitmask *fs_bm; + struct util_bitmask *vs_bm; + struct { unsigned dirty[4]; unsigned texture_timestamp; - unsigned next_fs_id; - unsigned next_vs_id; /* Internally generated shaders: */ diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c index 8db40d0fd5..ca73cf9d5a 100644 --- a/src/gallium/drivers/svga/svga_draw.c +++ b/src/gallium/drivers/svga/svga_draw.c @@ -164,7 +164,8 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) } SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n", - svga_surface(svga->curr.framebuffer.cbufs[0])->handle, + svga->curr.framebuffer.cbufs[0] ? + svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL, hwtnl->cmd.prim_count); ret = SVGA3D_BeginDrawPrimitives(swc, diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c index 71a552862e..0f24ef4ee8 100644 --- a/src/gallium/drivers/svga/svga_pipe_draw.c +++ b/src/gallium/drivers/svga/svga_pipe_draw.c @@ -149,7 +149,7 @@ retry: -static boolean +static void svga_draw_range_elements( struct pipe_context *pipe, struct pipe_buffer *index_buffer, unsigned index_size, @@ -162,7 +162,7 @@ svga_draw_range_elements( struct pipe_context *pipe, enum pipe_error ret = 0; if (!u_trim_pipe_prim( prim, &count )) - return TRUE; + return; /* * Mark currently bound target surfaces as dirty @@ -183,7 +183,7 @@ svga_draw_range_elements( struct pipe_context *pipe, #ifdef DEBUG if (svga->curr.vs->base.id == svga->debug.disable_shader || svga->curr.fs->base.id == svga->debug.disable_shader) - return 0; + return; #endif if (svga->state.sw.need_swtnl) @@ -225,31 +225,29 @@ svga_draw_range_elements( struct pipe_context *pipe, svga_hwtnl_flush_retry( svga ); svga_context_flush(svga, NULL); } - - return ret == PIPE_OK; } -static boolean +static void svga_draw_elements( struct pipe_context *pipe, struct pipe_buffer *index_buffer, unsigned index_size, unsigned prim, unsigned start, unsigned count) { - return svga_draw_range_elements( pipe, index_buffer, - index_size, - 0, 0xffffffff, - prim, start, count ); + svga_draw_range_elements( pipe, index_buffer, + index_size, + 0, 0xffffffff, + prim, start, count ); } -static boolean +static void svga_draw_arrays( struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) { - return svga_draw_range_elements(pipe, NULL, 0, - start, start + count - 1, - prim, - start, count); + svga_draw_range_elements(pipe, NULL, 0, + start, start + count - 1, + prim, + start, count); } diff --git a/src/gallium/drivers/svga/svga_pipe_fs.c b/src/gallium/drivers/svga/svga_pipe_fs.c index e3be840d92..5f1213e46a 100644 --- a/src/gallium/drivers/svga/svga_pipe_fs.c +++ b/src/gallium/drivers/svga/svga_pipe_fs.c @@ -26,6 +26,7 @@ #include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_bitmask.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_text.h" @@ -107,7 +108,16 @@ void svga_delete_fs_state(struct pipe_context *pipe, void *shader) assert(ret == PIPE_OK); } + util_bitmask_clear( svga->fs_bm, result->id ); + svga_destroy_shader_result( result ); + + /* + * Remove stale references to this result to ensure a new result on the + * same address will be detected as a change. + */ + if(result == svga->state.hw_draw.fs) + svga->state.hw_draw.fs = NULL; } FREE((void *)fs->base.tokens); diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c index 78053e755e..460a101f8c 100644 --- a/src/gallium/drivers/svga/svga_pipe_sampler.c +++ b/src/gallium/drivers/svga/svga_pipe_sampler.c @@ -76,7 +76,6 @@ static INLINE unsigned translate_img_filter( unsigned filter ) switch (filter) { case PIPE_TEX_FILTER_NEAREST: return SVGA3D_TEX_FILTER_NEAREST; case PIPE_TEX_FILTER_LINEAR: return SVGA3D_TEX_FILTER_LINEAR; - case PIPE_TEX_FILTER_ANISO: return SVGA3D_TEX_FILTER_ANISOTROPIC; default: assert(0); return SVGA3D_TEX_FILTER_NEAREST; @@ -107,6 +106,8 @@ svga_create_sampler_state(struct pipe_context *pipe, cso->magfilter = translate_img_filter( sampler->mag_img_filter ); cso->minfilter = translate_img_filter( sampler->min_img_filter ); cso->aniso_level = MAX2( (unsigned) sampler->max_anisotropy, 1 ); + if(cso->aniso_level != 1) + cso->magfilter = cso->minfilter = SVGA3D_TEX_FILTER_ANISOTROPIC; cso->lod_bias = sampler->lod_bias; cso->addressu = translate_wrap_mode(sampler->wrap_s); cso->addressv = translate_wrap_mode(sampler->wrap_t); diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c index c104c41f5f..7e6ab576ad 100644 --- a/src/gallium/drivers/svga/svga_pipe_vs.c +++ b/src/gallium/drivers/svga/svga_pipe_vs.c @@ -27,6 +27,7 @@ #include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_bitmask.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_text.h" @@ -172,7 +173,16 @@ static void svga_delete_vs_state(struct pipe_context *pipe, void *shader) assert(ret == PIPE_OK); } + util_bitmask_clear( svga->vs_bm, result->id ); + svga_destroy_shader_result( result ); + + /* + * Remove stale references to this result to ensure a new result on the + * same address will be detected as a change. + */ + if(result == svga->state.hw_draw.vs) + svga->state.hw_draw.vs = NULL; } FREE((void *)vs->base.tokens); diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c index 6ec38ed3e4..ec2886348b 100644 --- a/src/gallium/drivers/svga/svga_state_fs.c +++ b/src/gallium/drivers/svga/svga_state_fs.c @@ -26,6 +26,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_defines.h" #include "util/u_math.h" +#include "util/u_bitmask.h" #include "svga_context.h" #include "svga_state.h" @@ -39,8 +40,13 @@ static INLINE int compare_fs_keys( const struct svga_fs_compile_key *a, const struct svga_fs_compile_key *b ) { - unsigned keysize = svga_fs_key_size( a ); - return memcmp( a, b, keysize ); + unsigned keysize_a = svga_fs_key_size( a ); + unsigned keysize_b = svga_fs_key_size( b ); + + if (keysize_a != keysize_b) { + return (int)(keysize_a - keysize_b); + } + return memcmp( a, b, keysize_a ); } @@ -66,7 +72,7 @@ static enum pipe_error compile_fs( struct svga_context *svga, struct svga_shader_result **out_result ) { struct svga_shader_result *result; - enum pipe_error ret; + enum pipe_error ret = PIPE_ERROR; result = svga_translate_fragment_program( fs, key ); if (result == NULL) { @@ -74,9 +80,14 @@ static enum pipe_error compile_fs( struct svga_context *svga, goto fail; } + result->id = util_bitmask_add(svga->fs_bm); + if(result->id == UTIL_BITMASK_INVALID_INDEX) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto fail; + } ret = SVGA3D_DefineShader(svga->swc, - svga->state.next_fs_id, + result->id, SVGA3D_SHADERTYPE_PS, result->tokens, result->nr_tokens * sizeof result->tokens[0]); @@ -84,14 +95,16 @@ static enum pipe_error compile_fs( struct svga_context *svga, goto fail; *out_result = result; - result->id = svga->state.next_fs_id++; result->next = fs->base.results; fs->base.results = result; return PIPE_OK; fail: - if (result) + if (result) { + if (result->id != UTIL_BITMASK_INVALID_INDEX) + util_bitmask_clear( svga->fs_bm, result->id ); svga_destroy_shader_result( result ); + } return ret; } @@ -116,7 +129,7 @@ fail: */ static int emit_white_fs( struct svga_context *svga ) { - int ret; + int ret = PIPE_ERROR; /* ps_3_0 * def c0, 1.000000, 0.000000, 0.000000, 1.000000 @@ -137,16 +150,26 @@ static int emit_white_fs( struct svga_context *svga ) 0x0000ffff, }; + assert(SVGA3D_INVALID_ID == UTIL_BITMASK_INVALID_INDEX); + svga->state.white_fs_id = util_bitmask_add(svga->fs_bm); + if(svga->state.white_fs_id == SVGA3D_INVALID_ID) + goto no_fs_id; + ret = SVGA3D_DefineShader(svga->swc, - svga->state.next_fs_id, + svga->state.white_fs_id, SVGA3D_SHADERTYPE_PS, white_tokens, sizeof(white_tokens)); if (ret) - return ret; + goto no_definition; - svga->state.white_fs_id = svga->state.next_fs_id++; return 0; + +no_definition: + util_bitmask_clear(svga->fs_bm, svga->state.white_fs_id); + svga->state.white_fs_id = SVGA3D_INVALID_ID; +no_fs_id: + return ret; } @@ -251,15 +274,14 @@ static int emit_hw_fs( struct svga_context *svga, assert(id != SVGA3D_INVALID_ID); - if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT]) { - ret = SVGA3D_SetShader(svga->swc, - SVGA3D_SHADERTYPE_PS, + if (result != svga->state.hw_draw.fs) { + ret = SVGA3D_SetShader(svga->swc, + SVGA3D_SHADERTYPE_PS, id ); if (ret) return ret; svga->dirty |= SVGA_NEW_FS_RESULT; - svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT] = id; svga->state.hw_draw.fs = result; } diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index 44b7ceb4fa..e7e6c08432 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -27,6 +27,7 @@ #include "pipe/p_defines.h" #include "util/u_format.h" #include "util/u_math.h" +#include "util/u_bitmask.h" #include "translate/translate.h" #include "svga_context.h" @@ -70,7 +71,7 @@ static enum pipe_error compile_vs( struct svga_context *svga, struct svga_shader_result **out_result ) { struct svga_shader_result *result; - enum pipe_error ret = PIPE_OK; + enum pipe_error ret = PIPE_ERROR; result = svga_translate_vertex_program( vs, key ); if (result == NULL) { @@ -78,8 +79,14 @@ static enum pipe_error compile_vs( struct svga_context *svga, goto fail; } + result->id = util_bitmask_add(svga->vs_bm); + if(result->id == UTIL_BITMASK_INVALID_INDEX) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto fail; + } + ret = SVGA3D_DefineShader(svga->swc, - svga->state.next_vs_id, + result->id, SVGA3D_SHADERTYPE_VS, result->tokens, result->nr_tokens * sizeof result->tokens[0]); @@ -87,14 +94,16 @@ static enum pipe_error compile_vs( struct svga_context *svga, goto fail; *out_result = result; - result->id = svga->state.next_vs_id++; result->next = vs->base.results; vs->base.results = result; return PIPE_OK; fail: - if (result) + if (result) { + if (result->id != UTIL_BITMASK_INVALID_INDEX) + util_bitmask_clear( svga->vs_bm, result->id ); svga_destroy_shader_result( result ); + } return ret; } @@ -142,15 +151,14 @@ static int emit_hw_vs( struct svga_context *svga, id = result->id; } - if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX]) { - ret = SVGA3D_SetShader(svga->swc, - SVGA3D_SHADERTYPE_VS, + if (result != svga->state.hw_draw.vs) { + ret = SVGA3D_SetShader(svga->swc, + SVGA3D_SHADERTYPE_VS, id ); if (ret) return ret; svga->dirty |= SVGA_NEW_VS_RESULT; - svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX] = id; svga->state.hw_draw.vs = result; } @@ -194,10 +202,12 @@ static int update_zero_stride( struct svga_context *svga, key.output_stride = 4 * sizeof(float); key.nr_elements = 1; + key.element[0].type = TRANSLATE_ELEMENT_NORMAL; key.element[0].input_format = vel->src_format; key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; key.element[0].input_buffer = vel->vertex_buffer_index; key.element[0].input_offset = vel->src_offset; + key.element[0].instance_divisor = vel->instance_divisor; key.element[0].output_offset = const_idx * 4 * sizeof(float); translate_key_sanitize(&key); @@ -216,7 +226,7 @@ static int update_zero_stride( struct svga_context *svga, translate->set_buffer(translate, vel->vertex_buffer_index, mapped_buffer, vbuffer->stride); - translate->run(translate, 0, 1, + translate->run(translate, 0, 1, 0, svga->curr.zero_stride_constants); pipe_buffer_unmap(svga->pipe.screen, diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c index 8b14c913f7..7655121bec 100644 --- a/src/gallium/drivers/svga/svga_swtnl_draw.c +++ b/src/gallium/drivers/svga/svga_swtnl_draw.c @@ -90,7 +90,7 @@ svga_swtnl_draw_range_elements(struct svga_context *svga, PIPE_BUFFER_USAGE_CPU_READ); assert(map); draw_set_mapped_constant_buffer( - draw, + draw, PIPE_SHADER_VERTEX, map, svga->curr.cb[PIPE_SHADER_VERTEX]->size); } diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c index 25b8c2af3a..94b6ccc62d 100644 --- a/src/gallium/drivers/svga/svga_swtnl_state.c +++ b/src/gallium/drivers/svga/svga_swtnl_state.c @@ -156,7 +156,7 @@ int svga_swtnl_update_vdecl( struct svga_context *svga ) memset(vdecl, 0, sizeof(vdecl)); /* always add position */ - src = draw_find_vs_output(draw, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_shader_output(draw, TGSI_SEMANTIC_POSITION, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src); vinfo->attrib[0].emit = EMIT_4F; vdecl[0].array.offset = offset; @@ -169,7 +169,7 @@ int svga_swtnl_update_vdecl( struct svga_context *svga ) for (i = 0; i < fs->base.info.num_inputs; i++) { unsigned name = fs->base.info.input_semantic_name[i]; unsigned index = fs->base.info.input_semantic_index[i]; - src = draw_find_vs_output(draw, name, index); + src = draw_find_shader_output(draw, name, index); vdecl[nr_decls].array.offset = offset; vdecl[nr_decls].identity.usageIndex = fs->base.info.input_semantic_index[i]; diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c index b8ef137c01..0cd620189b 100644 --- a/src/gallium/drivers/svga/svga_tgsi.c +++ b/src/gallium/drivers/svga/svga_tgsi.c @@ -31,6 +31,7 @@ #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_scan.h" #include "util/u_memory.h" +#include "util/u_bitmask.h" #include "svgadump/svga_shader_dump.h" @@ -221,6 +222,7 @@ svga_tgsi_translate( const struct svga_shader *shader, result->tokens = (const unsigned *)emit.buf; result->nr_tokens = (emit.ptr - emit.buf) / sizeof(unsigned); memcpy(&result->key, &key, sizeof key); + result->id = UTIL_BITMASK_INVALID_INDEX; if (SVGA_DEBUG & DEBUG_TGSI) { diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h index 896c90a89a..737a2213af 100644 --- a/src/gallium/drivers/svga/svga_tgsi.h +++ b/src/gallium/drivers/svga/svga_tgsi.h @@ -39,26 +39,24 @@ struct tgsi_token; struct svga_vs_compile_key { - ubyte need_prescale:1; - ubyte allow_psiz:1; unsigned zero_stride_vertex_elements; - ubyte num_zero_stride_vertex_elements:6; + unsigned need_prescale:1; + unsigned allow_psiz:1; + unsigned num_zero_stride_vertex_elements:6; }; struct svga_fs_compile_key { - boolean light_twoside:1; - boolean front_cw:1; - ubyte num_textures; - ubyte num_unnormalized_coords; + unsigned light_twoside:1; + unsigned front_cw:1; + unsigned num_textures:8; + unsigned num_unnormalized_coords:8; struct { - ubyte compare_mode : 1; - ubyte compare_func : 3; - ubyte unnormalized : 1; - - ubyte width_height_idx : 7; - - ubyte texture_target; + unsigned compare_mode:1; + unsigned compare_func:3; + unsigned unnormalized:1; + unsigned width_height_idx:7; + unsigned texture_target:8; } tex[PIPE_MAX_SAMPLERS]; }; @@ -121,8 +119,7 @@ static INLINE unsigned svga_vs_key_size( const struct svga_vs_compile_key *key ) static INLINE unsigned svga_fs_key_size( const struct svga_fs_compile_key *key ) { - return (const char *)&key->tex[key->num_textures].texture_target - - (const char *)key; + return (const char *)&key->tex[key->num_textures] - (const char *)key; } struct svga_shader_result * diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 1670da8bfa..dc5eb8fc60 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -2109,7 +2109,7 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit, case TGSI_OPCODE_I2F: case TGSI_OPCODE_NOT: case TGSI_OPCODE_SHL: - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: case TGSI_OPCODE_XOR: return FALSE; diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c index e6d4a74e86..d59fb89a58 100644 --- a/src/gallium/drivers/svga/svgadump/svga_dump.c +++ b/src/gallium/drivers/svga/svgadump/svga_dump.c @@ -1444,6 +1444,312 @@ dump_SVGA3dCmdBlitSurfaceToScreen(const SVGA3dCmdBlitSurfaceToScreen *cmd) void +svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size) +{ + const uint8_t *body = (const uint8_t *)data; + const uint8_t *next = body + size; + + switch(cmd_id) { + case SVGA_3D_CMD_SURFACE_DEFINE: + _debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n"); + { + const SVGA3dCmdDefineSurface *cmd = (const SVGA3dCmdDefineSurface *)body; + dump_SVGA3dCmdDefineSurface(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dSize) <= next) { + dump_SVGA3dSize((const SVGA3dSize *)body); + body += sizeof(SVGA3dSize); + } + } + break; + case SVGA_3D_CMD_SURFACE_DESTROY: + _debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n"); + { + const SVGA3dCmdDestroySurface *cmd = (const SVGA3dCmdDestroySurface *)body; + dump_SVGA3dCmdDestroySurface(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SURFACE_COPY: + _debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n"); + { + const SVGA3dCmdSurfaceCopy *cmd = (const SVGA3dCmdSurfaceCopy *)body; + dump_SVGA3dCmdSurfaceCopy(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dCopyBox) <= next) { + dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body); + body += sizeof(SVGA3dCopyBox); + } + } + break; + case SVGA_3D_CMD_SURFACE_STRETCHBLT: + _debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n"); + { + const SVGA3dCmdSurfaceStretchBlt *cmd = (const SVGA3dCmdSurfaceStretchBlt *)body; + dump_SVGA3dCmdSurfaceStretchBlt(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SURFACE_DMA: + _debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n"); + { + const SVGA3dCmdSurfaceDMA *cmd = (const SVGA3dCmdSurfaceDMA *)body; + dump_SVGA3dCmdSurfaceDMA(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dCopyBox) <= next) { + dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body); + body += sizeof(SVGA3dCopyBox); + } + while(body + sizeof(SVGA3dCmdSurfaceDMASuffix) <= next) { + dump_SVGA3dCmdSurfaceDMASuffix((const SVGA3dCmdSurfaceDMASuffix *)body); + body += sizeof(SVGA3dCmdSurfaceDMASuffix); + } + } + break; + case SVGA_3D_CMD_CONTEXT_DEFINE: + _debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n"); + { + const SVGA3dCmdDefineContext *cmd = (const SVGA3dCmdDefineContext *)body; + dump_SVGA3dCmdDefineContext(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_CONTEXT_DESTROY: + _debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n"); + { + const SVGA3dCmdDestroyContext *cmd = (const SVGA3dCmdDestroyContext *)body; + dump_SVGA3dCmdDestroyContext(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETTRANSFORM: + _debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n"); + { + const SVGA3dCmdSetTransform *cmd = (const SVGA3dCmdSetTransform *)body; + dump_SVGA3dCmdSetTransform(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETZRANGE: + _debug_printf("\tSVGA_3D_CMD_SETZRANGE\n"); + { + const SVGA3dCmdSetZRange *cmd = (const SVGA3dCmdSetZRange *)body; + dump_SVGA3dCmdSetZRange(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETRENDERSTATE: + _debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n"); + { + const SVGA3dCmdSetRenderState *cmd = (const SVGA3dCmdSetRenderState *)body; + dump_SVGA3dCmdSetRenderState(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dRenderState) <= next) { + dump_SVGA3dRenderState((const SVGA3dRenderState *)body); + body += sizeof(SVGA3dRenderState); + } + } + break; + case SVGA_3D_CMD_SETRENDERTARGET: + _debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n"); + { + const SVGA3dCmdSetRenderTarget *cmd = (const SVGA3dCmdSetRenderTarget *)body; + dump_SVGA3dCmdSetRenderTarget(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETTEXTURESTATE: + _debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n"); + { + const SVGA3dCmdSetTextureState *cmd = (const SVGA3dCmdSetTextureState *)body; + dump_SVGA3dCmdSetTextureState(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dTextureState) <= next) { + dump_SVGA3dTextureState((const SVGA3dTextureState *)body); + body += sizeof(SVGA3dTextureState); + } + } + break; + case SVGA_3D_CMD_SETMATERIAL: + _debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n"); + { + const SVGA3dCmdSetMaterial *cmd = (const SVGA3dCmdSetMaterial *)body; + dump_SVGA3dCmdSetMaterial(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETLIGHTDATA: + _debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n"); + { + const SVGA3dCmdSetLightData *cmd = (const SVGA3dCmdSetLightData *)body; + dump_SVGA3dCmdSetLightData(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETLIGHTENABLED: + _debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n"); + { + const SVGA3dCmdSetLightEnabled *cmd = (const SVGA3dCmdSetLightEnabled *)body; + dump_SVGA3dCmdSetLightEnabled(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETVIEWPORT: + _debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n"); + { + const SVGA3dCmdSetViewport *cmd = (const SVGA3dCmdSetViewport *)body; + dump_SVGA3dCmdSetViewport(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETCLIPPLANE: + _debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n"); + { + const SVGA3dCmdSetClipPlane *cmd = (const SVGA3dCmdSetClipPlane *)body; + dump_SVGA3dCmdSetClipPlane(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_CLEAR: + _debug_printf("\tSVGA_3D_CMD_CLEAR\n"); + { + const SVGA3dCmdClear *cmd = (const SVGA3dCmdClear *)body; + dump_SVGA3dCmdClear(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dRect) <= next) { + dump_SVGA3dRect((const SVGA3dRect *)body); + body += sizeof(SVGA3dRect); + } + } + break; + case SVGA_3D_CMD_PRESENT: + _debug_printf("\tSVGA_3D_CMD_PRESENT\n"); + { + const SVGA3dCmdPresent *cmd = (const SVGA3dCmdPresent *)body; + dump_SVGA3dCmdPresent(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dCopyRect) <= next) { + dump_SVGA3dCopyRect((const SVGA3dCopyRect *)body); + body += sizeof(SVGA3dCopyRect); + } + } + break; + case SVGA_3D_CMD_SHADER_DEFINE: + _debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n"); + { + const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body; + dump_SVGA3dCmdDefineShader(cmd); + body = (const uint8_t *)&cmd[1]; + svga_shader_dump((const uint32_t *)body, + (unsigned)(next - body)/sizeof(uint32_t), + FALSE ); + body = next; + } + break; + case SVGA_3D_CMD_SHADER_DESTROY: + _debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n"); + { + const SVGA3dCmdDestroyShader *cmd = (const SVGA3dCmdDestroyShader *)body; + dump_SVGA3dCmdDestroyShader(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SET_SHADER: + _debug_printf("\tSVGA_3D_CMD_SET_SHADER\n"); + { + const SVGA3dCmdSetShader *cmd = (const SVGA3dCmdSetShader *)body; + dump_SVGA3dCmdSetShader(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SET_SHADER_CONST: + _debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n"); + { + const SVGA3dCmdSetShaderConst *cmd = (const SVGA3dCmdSetShaderConst *)body; + dump_SVGA3dCmdSetShaderConst(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_DRAW_PRIMITIVES: + _debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n"); + { + const SVGA3dCmdDrawPrimitives *cmd = (const SVGA3dCmdDrawPrimitives *)body; + unsigned i, j; + dump_SVGA3dCmdDrawPrimitives(cmd); + body = (const uint8_t *)&cmd[1]; + for(i = 0; i < cmd->numVertexDecls; ++i) { + dump_SVGA3dVertexDecl((const SVGA3dVertexDecl *)body); + body += sizeof(SVGA3dVertexDecl); + } + for(j = 0; j < cmd->numRanges; ++j) { + dump_SVGA3dPrimitiveRange((const SVGA3dPrimitiveRange *)body); + body += sizeof(SVGA3dPrimitiveRange); + } + while(body + sizeof(SVGA3dVertexDivisor) <= next) { + dump_SVGA3dVertexDivisor((const SVGA3dVertexDivisor *)body); + body += sizeof(SVGA3dVertexDivisor); + } + } + break; + case SVGA_3D_CMD_SETSCISSORRECT: + _debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n"); + { + const SVGA3dCmdSetScissorRect *cmd = (const SVGA3dCmdSetScissorRect *)body; + dump_SVGA3dCmdSetScissorRect(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_BEGIN_QUERY: + _debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n"); + { + const SVGA3dCmdBeginQuery *cmd = (const SVGA3dCmdBeginQuery *)body; + dump_SVGA3dCmdBeginQuery(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_END_QUERY: + _debug_printf("\tSVGA_3D_CMD_END_QUERY\n"); + { + const SVGA3dCmdEndQuery *cmd = (const SVGA3dCmdEndQuery *)body; + dump_SVGA3dCmdEndQuery(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_WAIT_FOR_QUERY: + _debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n"); + { + const SVGA3dCmdWaitForQuery *cmd = (const SVGA3dCmdWaitForQuery *)body; + dump_SVGA3dCmdWaitForQuery(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN: + _debug_printf("\tSVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN\n"); + { + const SVGA3dCmdBlitSurfaceToScreen *cmd = (const SVGA3dCmdBlitSurfaceToScreen *)body; + dump_SVGA3dCmdBlitSurfaceToScreen(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGASignedRect) <= next) { + dump_SVGASignedRect((const SVGASignedRect *)body); + body += sizeof(SVGASignedRect); + } + } + break; + default: + _debug_printf("\t0x%08x\n", cmd_id); + break; + } + + while(body + sizeof(uint32_t) <= next) { + _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); + body += sizeof(uint32_t); + } + while(body + sizeof(uint32_t) <= next) + _debug_printf("\t\t0x%02x\n", *body++); +} + + +void svga_dump_commands(const void *commands, uint32_t size) { const uint8_t *next = commands; @@ -1458,307 +1764,11 @@ svga_dump_commands(const void *commands, uint32_t size) const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next; const uint8_t *body = (const uint8_t *)&header[1]; - next = (const uint8_t *)body + header->size; + next = body + header->size; if(next > last) break; - switch(cmd_id) { - case SVGA_3D_CMD_SURFACE_DEFINE: - _debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n"); - { - const SVGA3dCmdDefineSurface *cmd = (const SVGA3dCmdDefineSurface *)body; - dump_SVGA3dCmdDefineSurface(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dSize) <= next) { - dump_SVGA3dSize((const SVGA3dSize *)body); - body += sizeof(SVGA3dSize); - } - } - break; - case SVGA_3D_CMD_SURFACE_DESTROY: - _debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n"); - { - const SVGA3dCmdDestroySurface *cmd = (const SVGA3dCmdDestroySurface *)body; - dump_SVGA3dCmdDestroySurface(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SURFACE_COPY: - _debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n"); - { - const SVGA3dCmdSurfaceCopy *cmd = (const SVGA3dCmdSurfaceCopy *)body; - dump_SVGA3dCmdSurfaceCopy(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dCopyBox) <= next) { - dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body); - body += sizeof(SVGA3dCopyBox); - } - } - break; - case SVGA_3D_CMD_SURFACE_STRETCHBLT: - _debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n"); - { - const SVGA3dCmdSurfaceStretchBlt *cmd = (const SVGA3dCmdSurfaceStretchBlt *)body; - dump_SVGA3dCmdSurfaceStretchBlt(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SURFACE_DMA: - _debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n"); - { - const SVGA3dCmdSurfaceDMA *cmd = (const SVGA3dCmdSurfaceDMA *)body; - dump_SVGA3dCmdSurfaceDMA(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dCopyBox) <= next) { - dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body); - body += sizeof(SVGA3dCopyBox); - } - while(body + sizeof(SVGA3dCmdSurfaceDMASuffix) <= next) { - dump_SVGA3dCmdSurfaceDMASuffix((const SVGA3dCmdSurfaceDMASuffix *)body); - body += sizeof(SVGA3dCmdSurfaceDMASuffix); - } - } - break; - case SVGA_3D_CMD_CONTEXT_DEFINE: - _debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n"); - { - const SVGA3dCmdDefineContext *cmd = (const SVGA3dCmdDefineContext *)body; - dump_SVGA3dCmdDefineContext(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_CONTEXT_DESTROY: - _debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n"); - { - const SVGA3dCmdDestroyContext *cmd = (const SVGA3dCmdDestroyContext *)body; - dump_SVGA3dCmdDestroyContext(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETTRANSFORM: - _debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n"); - { - const SVGA3dCmdSetTransform *cmd = (const SVGA3dCmdSetTransform *)body; - dump_SVGA3dCmdSetTransform(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETZRANGE: - _debug_printf("\tSVGA_3D_CMD_SETZRANGE\n"); - { - const SVGA3dCmdSetZRange *cmd = (const SVGA3dCmdSetZRange *)body; - dump_SVGA3dCmdSetZRange(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETRENDERSTATE: - _debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n"); - { - const SVGA3dCmdSetRenderState *cmd = (const SVGA3dCmdSetRenderState *)body; - dump_SVGA3dCmdSetRenderState(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dRenderState) <= next) { - dump_SVGA3dRenderState((const SVGA3dRenderState *)body); - body += sizeof(SVGA3dRenderState); - } - } - break; - case SVGA_3D_CMD_SETRENDERTARGET: - _debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n"); - { - const SVGA3dCmdSetRenderTarget *cmd = (const SVGA3dCmdSetRenderTarget *)body; - dump_SVGA3dCmdSetRenderTarget(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETTEXTURESTATE: - _debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n"); - { - const SVGA3dCmdSetTextureState *cmd = (const SVGA3dCmdSetTextureState *)body; - dump_SVGA3dCmdSetTextureState(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dTextureState) <= next) { - dump_SVGA3dTextureState((const SVGA3dTextureState *)body); - body += sizeof(SVGA3dTextureState); - } - } - break; - case SVGA_3D_CMD_SETMATERIAL: - _debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n"); - { - const SVGA3dCmdSetMaterial *cmd = (const SVGA3dCmdSetMaterial *)body; - dump_SVGA3dCmdSetMaterial(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETLIGHTDATA: - _debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n"); - { - const SVGA3dCmdSetLightData *cmd = (const SVGA3dCmdSetLightData *)body; - dump_SVGA3dCmdSetLightData(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETLIGHTENABLED: - _debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n"); - { - const SVGA3dCmdSetLightEnabled *cmd = (const SVGA3dCmdSetLightEnabled *)body; - dump_SVGA3dCmdSetLightEnabled(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETVIEWPORT: - _debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n"); - { - const SVGA3dCmdSetViewport *cmd = (const SVGA3dCmdSetViewport *)body; - dump_SVGA3dCmdSetViewport(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETCLIPPLANE: - _debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n"); - { - const SVGA3dCmdSetClipPlane *cmd = (const SVGA3dCmdSetClipPlane *)body; - dump_SVGA3dCmdSetClipPlane(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_CLEAR: - _debug_printf("\tSVGA_3D_CMD_CLEAR\n"); - { - const SVGA3dCmdClear *cmd = (const SVGA3dCmdClear *)body; - dump_SVGA3dCmdClear(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dRect) <= next) { - dump_SVGA3dRect((const SVGA3dRect *)body); - body += sizeof(SVGA3dRect); - } - } - break; - case SVGA_3D_CMD_PRESENT: - _debug_printf("\tSVGA_3D_CMD_PRESENT\n"); - { - const SVGA3dCmdPresent *cmd = (const SVGA3dCmdPresent *)body; - dump_SVGA3dCmdPresent(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dCopyRect) <= next) { - dump_SVGA3dCopyRect((const SVGA3dCopyRect *)body); - body += sizeof(SVGA3dCopyRect); - } - } - break; - case SVGA_3D_CMD_SHADER_DEFINE: - _debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n"); - { - const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body; - dump_SVGA3dCmdDefineShader(cmd); - body = (const uint8_t *)&cmd[1]; - svga_shader_dump((const uint32_t *)body, - (unsigned)(next - body)/sizeof(uint32_t), - FALSE ); - body = next; - } - break; - case SVGA_3D_CMD_SHADER_DESTROY: - _debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n"); - { - const SVGA3dCmdDestroyShader *cmd = (const SVGA3dCmdDestroyShader *)body; - dump_SVGA3dCmdDestroyShader(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SET_SHADER: - _debug_printf("\tSVGA_3D_CMD_SET_SHADER\n"); - { - const SVGA3dCmdSetShader *cmd = (const SVGA3dCmdSetShader *)body; - dump_SVGA3dCmdSetShader(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SET_SHADER_CONST: - _debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n"); - { - const SVGA3dCmdSetShaderConst *cmd = (const SVGA3dCmdSetShaderConst *)body; - dump_SVGA3dCmdSetShaderConst(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_DRAW_PRIMITIVES: - _debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n"); - { - const SVGA3dCmdDrawPrimitives *cmd = (const SVGA3dCmdDrawPrimitives *)body; - unsigned i, j; - dump_SVGA3dCmdDrawPrimitives(cmd); - body = (const uint8_t *)&cmd[1]; - for(i = 0; i < cmd->numVertexDecls; ++i) { - dump_SVGA3dVertexDecl((const SVGA3dVertexDecl *)body); - body += sizeof(SVGA3dVertexDecl); - } - for(j = 0; j < cmd->numRanges; ++j) { - dump_SVGA3dPrimitiveRange((const SVGA3dPrimitiveRange *)body); - body += sizeof(SVGA3dPrimitiveRange); - } - while(body + sizeof(SVGA3dVertexDivisor) <= next) { - dump_SVGA3dVertexDivisor((const SVGA3dVertexDivisor *)body); - body += sizeof(SVGA3dVertexDivisor); - } - } - break; - case SVGA_3D_CMD_SETSCISSORRECT: - _debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n"); - { - const SVGA3dCmdSetScissorRect *cmd = (const SVGA3dCmdSetScissorRect *)body; - dump_SVGA3dCmdSetScissorRect(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_BEGIN_QUERY: - _debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n"); - { - const SVGA3dCmdBeginQuery *cmd = (const SVGA3dCmdBeginQuery *)body; - dump_SVGA3dCmdBeginQuery(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_END_QUERY: - _debug_printf("\tSVGA_3D_CMD_END_QUERY\n"); - { - const SVGA3dCmdEndQuery *cmd = (const SVGA3dCmdEndQuery *)body; - dump_SVGA3dCmdEndQuery(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_WAIT_FOR_QUERY: - _debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n"); - { - const SVGA3dCmdWaitForQuery *cmd = (const SVGA3dCmdWaitForQuery *)body; - dump_SVGA3dCmdWaitForQuery(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN: - _debug_printf("\tSVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN\n"); - { - const SVGA3dCmdBlitSurfaceToScreen *cmd = (const SVGA3dCmdBlitSurfaceToScreen *)body; - dump_SVGA3dCmdBlitSurfaceToScreen(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGASignedRect) <= next) { - dump_SVGASignedRect((const SVGASignedRect *)body); - body += sizeof(SVGASignedRect); - } - } - break; - default: - _debug_printf("\t0x%08x\n", cmd_id); - break; - } - - while(body + sizeof(uint32_t) <= next) { - _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); - body += sizeof(uint32_t); - } - while(body + sizeof(uint32_t) <= next) - _debug_printf("\t\t0x%02x\n", *body++); + svga_dump_command(cmd_id, body, header->size); } else if(cmd_id == SVGA_CMD_FENCE) { _debug_printf("\tSVGA_CMD_FENCE\n"); diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.h b/src/gallium/drivers/svga/svgadump/svga_dump.h index 69a8702087..ca0154361c 100644 --- a/src/gallium/drivers/svga/svgadump/svga_dump.h +++ b/src/gallium/drivers/svga/svgadump/svga_dump.h @@ -28,6 +28,9 @@ #include "pipe/p_compiler.h" +void +svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size); + void svga_dump_commands(const void *commands, uint32_t size); diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.py b/src/gallium/drivers/svga/svgadump/svga_dump.py index a1ada29ef8..0bc0b3ae31 100755 --- a/src/gallium/drivers/svga/svgadump/svga_dump.py +++ b/src/gallium/drivers/svga/svgadump/svga_dump.py @@ -208,6 +208,56 @@ cmds = [ def dump_cmds(): print r''' void +svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size) +{ + const uint8_t *body = (const uint8_t *)data; + const uint8_t *next = body + size; +''' + print ' switch(cmd_id) {' + indexes = 'ijklmn' + for id, header, body, footer in cmds: + print ' case %s:' % id + print ' _debug_printf("\\t%s\\n");' % id + print ' {' + print ' const %s *cmd = (const %s *)body;' % (header, header) + if len(body): + print ' unsigned ' + ', '.join(indexes[:len(body)]) + ';' + print ' dump_%s(cmd);' % header + print ' body = (const uint8_t *)&cmd[1];' + for i in range(len(body)): + struct, count = body[i] + idx = indexes[i] + print ' for(%s = 0; %s < cmd->%s; ++%s) {' % (idx, idx, count, idx) + print ' dump_%s((const %s *)body);' % (struct, struct) + print ' body += sizeof(%s);' % struct + print ' }' + if footer is not None: + print ' while(body + sizeof(%s) <= next) {' % footer + print ' dump_%s((const %s *)body);' % (footer, footer) + print ' body += sizeof(%s);' % footer + print ' }' + if id == 'SVGA_3D_CMD_SHADER_DEFINE': + print ' svga_shader_dump((const uint32_t *)body,' + print ' (unsigned)(next - body)/sizeof(uint32_t),' + print ' FALSE);' + print ' body = next;' + print ' }' + print ' break;' + print ' default:' + print ' _debug_printf("\\t0x%08x\\n", cmd_id);' + print ' break;' + print ' }' + print r''' + while(body + sizeof(uint32_t) <= next) { + _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); + body += sizeof(uint32_t); + } + while(body + sizeof(uint32_t) <= next) + _debug_printf("\t\t0x%02x\n", *body++); +} +''' + print r''' +void svga_dump_commands(const void *commands, uint32_t size) { const uint8_t *next = commands; @@ -222,51 +272,11 @@ svga_dump_commands(const void *commands, uint32_t size) const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next; const uint8_t *body = (const uint8_t *)&header[1]; - next = (const uint8_t *)body + header->size; + next = body + header->size; if(next > last) break; -''' - print ' switch(cmd_id) {' - indexes = 'ijklmn' - for id, header, body, footer in cmds: - print ' case %s:' % id - print ' _debug_printf("\\t%s\\n");' % id - print ' {' - print ' const %s *cmd = (const %s *)body;' % (header, header) - if len(body): - print ' unsigned ' + ', '.join(indexes[:len(body)]) + ';' - print ' dump_%s(cmd);' % header - print ' body = (const uint8_t *)&cmd[1];' - for i in range(len(body)): - struct, count = body[i] - idx = indexes[i] - print ' for(%s = 0; %s < cmd->%s; ++%s) {' % (idx, idx, count, idx) - print ' dump_%s((const %s *)body);' % (struct, struct) - print ' body += sizeof(%s);' % struct - print ' }' - if footer is not None: - print ' while(body + sizeof(%s) <= next) {' % footer - print ' dump_%s((const %s *)body);' % (footer, footer) - print ' body += sizeof(%s);' % footer - print ' }' - if id == 'SVGA_3D_CMD_SHADER_DEFINE': - print ' sh_svga_dump((const uint32_t *)body, (unsigned)(next - body)/sizeof(uint32_t));' - print ' body = next;' - print ' }' - print ' break;' - print ' default:' - print ' _debug_printf("\\t0x%08x\\n", cmd_id);' - print ' break;' - print ' }' - - print r''' - while(body + sizeof(uint32_t) <= next) { - _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); - body += sizeof(uint32_t); - } - while(body + sizeof(uint32_t) <= next) - _debug_printf("\t\t0x%02x\n", *body++); + svga_dump_command(cmd_id, body, header->size); } else if(cmd_id == SVGA_CMD_FENCE) { _debug_printf("\tSVGA_CMD_FENCE\n"); diff --git a/src/gallium/drivers/trace/README b/src/gallium/drivers/trace/README index 1000c31e49..203c3851bc 100644 --- a/src/gallium/drivers/trace/README +++ b/src/gallium/drivers/trace/README @@ -24,11 +24,10 @@ ensure the right libGL.so is being picked by doing ldd progs/trivial/tri -== Traceing == +== Tracing == -For traceing then do +For tracing then do - export XMESA_TRACE=y GALLIUM_TRACE=tri.trace progs/trivial/tri which should create a tri.trace file, which is an XML file. You can view copying diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 8008b596c3..5a9f0fc690 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -52,6 +52,7 @@ trace_buffer_unwrap(struct trace_context *tr_ctx, assert(tr_buf->buffer); assert(tr_buf->buffer->screen == tr_scr->screen); + (void) tr_scr; return tr_buf->buffer; } @@ -90,6 +91,7 @@ trace_surface_unwrap(struct trace_context *tr_ctx, assert(tr_surf->surface); assert(tr_surf->surface->texture->screen == tr_scr->screen); + (void) tr_scr; return tr_surf->surface; } @@ -159,16 +161,15 @@ trace_context_draw_block(struct trace_context *tr_ctx, int flag) pipe_mutex_unlock(tr_ctx->draw_mutex); } -static INLINE boolean +static INLINE void trace_context_draw_arrays(struct pipe_context *_pipe, unsigned mode, unsigned start, unsigned count) { struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; - boolean result; if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled) - return 0; + return; trace_context_draw_block(tr_ctx, 1); @@ -179,19 +180,15 @@ trace_context_draw_arrays(struct pipe_context *_pipe, trace_dump_arg(uint, start); trace_dump_arg(uint, count); - result = pipe->draw_arrays(pipe, mode, start, count); - - trace_dump_ret(bool, result); + pipe->draw_arrays(pipe, mode, start, count); trace_dump_call_end(); trace_context_draw_block(tr_ctx, 2); - - return result; } -static INLINE boolean +static INLINE void trace_context_draw_elements(struct pipe_context *_pipe, struct pipe_buffer *_indexBuffer, unsigned indexSize, @@ -201,10 +198,9 @@ trace_context_draw_elements(struct pipe_context *_pipe, struct trace_buffer *tr_buf = trace_buffer(_indexBuffer); struct pipe_context *pipe = tr_ctx->pipe; struct pipe_buffer *indexBuffer = tr_buf->buffer; - boolean result; if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled) - return 0; + return; trace_context_draw_block(tr_ctx, 1); @@ -219,19 +215,15 @@ trace_context_draw_elements(struct pipe_context *_pipe, trace_dump_arg(uint, start); trace_dump_arg(uint, count); - result = pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count); - - trace_dump_ret(bool, result); + pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count); trace_dump_call_end(); trace_context_draw_block(tr_ctx, 2); - - return result; } -static INLINE boolean +static INLINE void trace_context_draw_range_elements(struct pipe_context *_pipe, struct pipe_buffer *_indexBuffer, unsigned indexSize, @@ -245,10 +237,9 @@ trace_context_draw_range_elements(struct pipe_context *_pipe, struct trace_buffer *tr_buf = trace_buffer(_indexBuffer); struct pipe_context *pipe = tr_ctx->pipe; struct pipe_buffer *indexBuffer = tr_buf->buffer; - boolean result; if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled) - return 0; + return; trace_context_draw_block(tr_ctx, 1); @@ -265,18 +256,14 @@ trace_context_draw_range_elements(struct pipe_context *_pipe, trace_dump_arg(uint, start); trace_dump_arg(uint, count); - result = pipe->draw_range_elements(pipe, - indexBuffer, - indexSize, minIndex, maxIndex, - mode, start, count); - - trace_dump_ret(bool, result); + pipe->draw_range_elements(pipe, + indexBuffer, + indexSize, minIndex, maxIndex, + mode, start, count); trace_dump_call_end(); trace_context_draw_block(tr_ctx, 2); - - return result; } diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c index 7e2ccbcfdc..0f45e211a3 100644 --- a/src/gallium/drivers/trace/tr_dump.c +++ b/src/gallium/drivers/trace/tr_dump.c @@ -40,7 +40,7 @@ #include "pipe/p_config.h" -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) #include <stdlib.h> #endif @@ -258,7 +258,7 @@ boolean trace_dump_trace_begin() trace_dump_writes("<?xml-stylesheet type='text/xsl' href='trace.xsl'?>\n"); trace_dump_writes("<trace version='0.1'>\n"); -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) /* Linux applications rarely cleanup GL / Gallium resources so catch * application exit here */ atexit(trace_dump_trace_close); diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 5794c90298..32f61f8c94 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -409,7 +409,7 @@ void trace_dump_sampler_state(const struct pipe_sampler_state *state) trace_dump_member(uint, state, min_img_filter); trace_dump_member(uint, state, min_mip_filter); trace_dump_member(uint, state, mag_img_filter); - trace_dump_member(bool, state, compare_mode); + trace_dump_member(uint, state, compare_mode); trace_dump_member(uint, state, compare_func); trace_dump_member(bool, state, normalized_coords); trace_dump_member(uint, state, prefilter); diff --git a/src/gallium/drivers/trace/tr_rbug.c b/src/gallium/drivers/trace/tr_rbug.c index c31b1d8698..0546aad9b5 100644 --- a/src/gallium/drivers/trace/tr_rbug.c +++ b/src/gallium/drivers/trace/tr_rbug.c @@ -45,7 +45,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # define sleep Sleep -#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_APPLE) void usleep(int); # define sleep usleep #else @@ -180,7 +180,7 @@ static int trace_rbug_texture_info(struct trace_rbug *tr_rbug, struct rbug_header *header, uint32_t serial) { struct trace_screen *tr_scr = tr_rbug->tr_scr; - struct trace_texture *tr_tex; + struct trace_texture *tr_tex = NULL; struct rbug_proto_texture_info *gpti = (struct rbug_proto_texture_info *)header; struct tr_list *ptr; struct pipe_texture *t; @@ -223,7 +223,7 @@ trace_rbug_texture_read(struct trace_rbug *tr_rbug, struct rbug_header *header, struct rbug_proto_texture_read *gptr = (struct rbug_proto_texture_read *)header; struct trace_screen *tr_scr = tr_rbug->tr_scr; - struct trace_texture *tr_tex; + struct trace_texture *tr_tex = NULL; struct tr_list *ptr; struct pipe_screen *screen = tr_scr->screen; diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index ac20a47af1..117503aaff 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -426,7 +426,7 @@ trace_screen_transfer_unmap(struct pipe_screen *_screen, struct pipe_transfer *transfer = tr_trans->transfer; if(tr_trans->map) { - size_t size = util_format_get_nblocksy(transfer->texture->format, transfer->width) * transfer->stride; + size_t size = util_format_get_nblocksy(transfer->texture->format, transfer->height) * transfer->stride; trace_dump_call_begin("pipe_screen", "transfer_write"); diff --git a/src/gallium/drivers/trace/tr_state.h b/src/gallium/drivers/trace/tr_state.h index 1c16042ee5..e2f981d051 100644 --- a/src/gallium/drivers/trace/tr_state.h +++ b/src/gallium/drivers/trace/tr_state.h @@ -32,7 +32,7 @@ struct tgsi_token; enum trace_shader_type { TRACE_SHADER_FRAGMENT = 0, TRACE_SHADER_VERTEX = 1, - TRACE_SHADER_GEOMETRY = 2, + TRACE_SHADER_GEOMETRY = 2 }; struct trace_shader |