diff options
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r-- | src/gallium/auxiliary/draw/draw_pipe_clip.c | 38 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_vs_ppc.c | 6 | ||||
-rw-r--r-- | src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c | 34 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.c | 342 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.h | 11 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_ppc.c | 3 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_sanity.c | 15 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_sse2.c | 64 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_blitter.c | 195 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_debug_dump.c | 6 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_format.csv | 22 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_rect.c | 10 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_rect.h | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_simple_shaders.c | 22 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_tile.c | 27 |
15 files changed, 470 insertions, 327 deletions
diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index 205cda5eab..51a6115ebf 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -55,7 +55,7 @@ -struct clipper { +struct clip_stage { struct draw_stage stage; /**< base class */ /* Basically duplicate some of the flatshading logic here: @@ -70,9 +70,9 @@ struct clipper { /* This is a bit confusing: */ -static INLINE struct clipper *clipper_stage( struct draw_stage *stage ) +static INLINE struct clip_stage *clip_stage( struct draw_stage *stage ) { - return (struct clipper *)stage; + return (struct clip_stage *)stage; } @@ -92,11 +92,12 @@ static void interp_attr( float *fdst, fdst[3] = LINTERP( t, fout[3], fin[3] ); } + static void copy_colors( struct draw_stage *stage, struct vertex_header *dst, const struct vertex_header *src ) { - const struct clipper *clipper = clipper_stage(stage); + const struct clip_stage *clipper = clip_stage(stage); uint i; for (i = 0; i < clipper->num_color_attribs; i++) { const uint attr = clipper->color_attribs[i]; @@ -108,7 +109,7 @@ static void copy_colors( struct draw_stage *stage, /* Interpolate between two vertices to produce a third. */ -static void interp( const struct clipper *clip, +static void interp( const struct clip_stage *clip, struct vertex_header *dst, float t, const struct vertex_header *out, @@ -179,7 +180,7 @@ static void emit_poly( struct draw_stage *stage, header.v[2] = inlist[0]; /* keep in v[2] for flatshading */ if (i == n-1) - header.flags |= edge_last; + header.flags |= edge_last; if (0) { const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; @@ -200,13 +201,14 @@ static void emit_poly( struct draw_stage *stage, } } + static INLINE float dot4(const float *a, const float *b) { - return (a[0]*b[0] + - a[1]*b[1] + - a[2]*b[2] + - a[3]*b[3]); + return (a[0] * b[0] + + a[1] * b[1] + + a[2] * b[2] + + a[3] * b[3]); } @@ -217,7 +219,7 @@ do_clip_tri( struct draw_stage *stage, struct prim_header *header, unsigned clipmask ) { - struct clipper *clipper = clipper_stage( stage ); + struct clip_stage *clipper = clip_stage( stage ); struct vertex_header *a[MAX_CLIPPED_VERTICES]; struct vertex_header *b[MAX_CLIPPED_VERTICES]; struct vertex_header **inlist = a; @@ -280,6 +282,7 @@ do_clip_tri( struct draw_stage *stage, dp_prev = dp; } + /* swap in/out lists */ { struct vertex_header **tmp = inlist; inlist = outlist; @@ -291,15 +294,11 @@ do_clip_tri( struct draw_stage *stage, /* If flat-shading, copy color to new provoking vertex. */ if (clipper->flat && inlist[0] != header->v[2]) { - if (1) { - inlist[0] = dup_vert(stage, inlist[0], tmpnr++); - } + inlist[0] = dup_vert(stage, inlist[0], tmpnr++); copy_colors(stage, inlist[0], header->v[2]); } - - /* Emit the polygon as triangles to the setup stage: */ if (n >= 3) @@ -314,7 +313,7 @@ do_clip_line( struct draw_stage *stage, struct prim_header *header, unsigned clipmask ) { - const struct clipper *clipper = clipper_stage( stage ); + const struct clip_stage *clipper = clip_stage( stage ); struct vertex_header *v0 = header->v[0]; struct vertex_header *v1 = header->v[1]; const float *pos0 = v0->clip; @@ -416,13 +415,14 @@ clip_tri( struct draw_stage *stage, } } + /* Update state. Could further delay this until we hit the first * primitive that really requires clipping. */ static void clip_init_state( struct draw_stage *stage ) { - struct clipper *clipper = clipper_stage( stage ); + struct clip_stage *clipper = clip_stage( stage ); clipper->flat = stage->draw->rasterizer->flatshade ? TRUE : FALSE; @@ -488,7 +488,7 @@ static void clip_destroy( struct draw_stage *stage ) */ struct draw_stage *draw_clip_stage( struct draw_context *draw ) { - struct clipper *clipper = CALLOC_STRUCT(clipper); + struct clip_stage *clipper = CALLOC_STRUCT(clip_stage); if (clipper == NULL) goto fail; diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index ad184bd696..da9f3e3d35 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -98,9 +98,9 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, /* loop over verts */ for (i = 0; i < count; i += MAX_VERTICES) { const uint max_vertices = MIN2(MAX_VERTICES, count - i); - float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4] ALIGN16_ATTRIB; - float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4] ALIGN16_ATTRIB; - float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4]; + PIPE_ALIGN_VAR(16) float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4]; + PIPE_ALIGN_VAR(16) float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4]; uint attr; /* convert (up to) four input verts to SoA format */ diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index a9375abd21..ba6f7b15f9 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -80,7 +80,7 @@ struct fenced_buffer_list */ struct fenced_buffer { - /* + /* * Immutable members. */ @@ -126,8 +126,8 @@ fenced_buffer(struct pb_buffer *buf) /** * Add the buffer to the fenced list. * - * fenced_buffer_list::mutex and fenced_buffer::mutex must be held, in this - * order before calling this function. + * fenced_buffer_list::mutex and fenced_buffer::mutex must be held, in this + * order, before calling this function. * * Reference count should be incremented before calling this function. */ @@ -191,7 +191,7 @@ fenced_buffer_remove_locked(struct fenced_buffer_list *fenced_list, * Wait for the fence to expire, and remove it from the fenced list. * * fenced_buffer::mutex must be held. fenced_buffer_list::mutex must not be - * held -- it will + * held -- it will be acquired internally. */ static INLINE enum pipe_error fenced_buffer_finish_locked(struct fenced_buffer_list *fenced_list, @@ -207,7 +207,10 @@ fenced_buffer_finish_locked(struct fenced_buffer_list *fenced_list, assert(pipe_is_referenced(&fenced_buf->base.base.reference)); assert(fenced_buf->fence); - /* Acquire the global lock */ + /* + * Acquire the global lock. Must release buffer mutex first to preserve + * lock order. + */ pipe_mutex_unlock(fenced_buf->mutex); pipe_mutex_lock(fenced_list->mutex); pipe_mutex_lock(fenced_buf->mutex); @@ -217,7 +220,7 @@ fenced_buffer_finish_locked(struct fenced_buffer_list *fenced_list, /* Remove from the fenced list */ /* TODO: remove consequents */ fenced_buffer_remove_locked(fenced_list, fenced_buf); - + p_atomic_dec(&fenced_buf->base.base.reference.count); assert(pipe_is_referenced(&fenced_buf->base.base.reference)); @@ -238,7 +241,7 @@ fenced_buffer_finish_locked(struct fenced_buffer_list *fenced_list, */ static void fenced_buffer_list_check_free_locked(struct fenced_buffer_list *fenced_list, - int wait) + int wait) { struct pb_fence_ops *ops = fenced_list->ops; struct list_head *curr, *next; @@ -274,7 +277,6 @@ fenced_buffer_list_check_free_locked(struct fenced_buffer_list *fenced_list, pb_buf = &fenced_buf->base; pb_reference(&pb_buf, NULL); - curr = next; next = curr->next; @@ -329,7 +331,7 @@ fenced_buffer_map(struct pb_buffer *buf, if((flags & PIPE_BUFFER_USAGE_DONTBLOCK) && ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) { /* Don't wait for the GPU to finish writing */ - goto finish; + goto done; } /* Wait for the GPU to finish writing */ @@ -350,7 +352,7 @@ fenced_buffer_map(struct pb_buffer *buf, fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE; } -finish: +done: pipe_mutex_unlock(fenced_buf->mutex); return map; @@ -391,7 +393,7 @@ fenced_buffer_validate(struct pb_buffer *buf, fenced_buf->vl = NULL; fenced_buf->validation_flags = 0; ret = PIPE_OK; - goto finish; + goto done; } assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); @@ -401,7 +403,7 @@ fenced_buffer_validate(struct pb_buffer *buf, /* Buffer cannot be validated in two different lists */ if(fenced_buf->vl && fenced_buf->vl != vl) { ret = PIPE_ERROR_RETRY; - goto finish; + goto done; } #if 0 @@ -409,7 +411,7 @@ fenced_buffer_validate(struct pb_buffer *buf, if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { /* TODO: wait for the thread that mapped the buffer to unmap it */ ret = PIPE_ERROR_RETRY; - goto finish; + goto done; } /* Final sanity checking */ assert(!(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE)); @@ -420,17 +422,17 @@ fenced_buffer_validate(struct pb_buffer *buf, (fenced_buf->validation_flags & flags) == flags) { /* Nothing to do -- buffer already validated */ ret = PIPE_OK; - goto finish; + goto done; } ret = pb_validate(fenced_buf->buffer, vl, flags); if (ret != PIPE_OK) - goto finish; + goto done; fenced_buf->vl = vl; fenced_buf->validation_flags |= flags; -finish: +done: pipe_mutex_unlock(fenced_buf->mutex); return ret; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index f43233bdb4..2bcb33392a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1509,7 +1509,8 @@ fetch_texel( struct tgsi_sampler *sampler, const union tgsi_exec_channel *s, const union tgsi_exec_channel *t, const union tgsi_exec_channel *p, - float lodbias, /* XXX should be float[4] */ + const union tgsi_exec_channel *c0, + enum tgsi_sampler_control control, union tgsi_exec_channel *r, union tgsi_exec_channel *g, union tgsi_exec_channel *b, @@ -1518,7 +1519,7 @@ fetch_texel( struct tgsi_sampler *sampler, uint j; float rgba[NUM_CHANNELS][QUAD_SIZE]; - sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); + sampler->get_samples(sampler, s->f, t->f, p->f, c0->f, control, rgba); for (j = 0; j < 4; j++) { r->f[j] = rgba[0][j]; @@ -1529,102 +1530,95 @@ fetch_texel( struct tgsi_sampler *sampler, } +#define TEX_MODIFIER_NONE 0 +#define TEX_MODIFIER_PROJECTED 1 +#define TEX_MODIFIER_LOD_BIAS 2 +#define TEX_MODIFIER_EXPLICIT_LOD 3 + + static void exec_tex(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst, - boolean biasLod, - boolean projected) + uint modifier) { const uint unit = inst->Src[1].Register.Index; union tgsi_exec_channel r[4]; + const union tgsi_exec_channel *lod = &ZeroVec; + enum tgsi_sampler_control control; uint chan_index; - float lodBias; - /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ + if (modifier != TEX_MODIFIER_NONE) { + FETCH(&r[3], 0, CHAN_W); + if (modifier != TEX_MODIFIER_PROJECTED) { + lod = &r[3]; + } + } + + if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { + control = tgsi_sampler_lod_explicit; + } else { + control = tgsi_sampler_lod_bias; + } switch (inst->Texture.Texture) { case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: - FETCH(&r[0], 0, CHAN_X); - if (projected) { - FETCH(&r[1], 0, CHAN_W); - micro_div( &r[0], &r[0], &r[1] ); + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); } - if (biasLod) { - FETCH(&r[1], 0, CHAN_W); - lodBias = r[2].f[0]; - } - else - lodBias = 0.0; - fetch_texel(mach->Samplers[unit], - &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */ - &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + &r[0], &ZeroVec, &ZeroVec, lod, /* S, T, P, LOD */ + control, + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; case TGSI_TEXTURE_2D: case TGSI_TEXTURE_RECT: case TGSI_TEXTURE_SHADOW2D: case TGSI_TEXTURE_SHADOWRECT: - FETCH(&r[0], 0, CHAN_X); FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 0, CHAN_Z); - if (projected) { - FETCH(&r[3], 0, CHAN_W); - micro_div( &r[0], &r[0], &r[3] ); - micro_div( &r[1], &r[1], &r[3] ); - micro_div( &r[2], &r[2], &r[3] ); - } - - if (biasLod) { - FETCH(&r[3], 0, CHAN_W); - lodBias = r[3].f[0]; + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + micro_div(&r[1], &r[1], &r[3]); + micro_div(&r[2], &r[2], &r[3]); } - else - lodBias = 0.0; fetch_texel(mach->Samplers[unit], - &r[0], &r[1], &r[2], lodBias, /* inputs */ + &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */ + control, &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; case TGSI_TEXTURE_3D: case TGSI_TEXTURE_CUBE: - FETCH(&r[0], 0, CHAN_X); FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 0, CHAN_Z); - if (projected) { - FETCH(&r[3], 0, CHAN_W); - micro_div( &r[0], &r[0], &r[3] ); - micro_div( &r[1], &r[1], &r[3] ); - micro_div( &r[2], &r[2], &r[3] ); - } - - if (biasLod) { - FETCH(&r[3], 0, CHAN_W); - lodBias = r[3].f[0]; + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + micro_div(&r[1], &r[1], &r[3]); + micro_div(&r[2], &r[2], &r[3]); } - else - lodBias = 0.0; fetch_texel(mach->Samplers[unit], - &r[0], &r[1], &r[2], lodBias, + &r[0], &r[1], &r[2], lod, + control, &r[0], &r[1], &r[2], &r[3]); break; default: - assert (0); + assert(0); } - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[chan_index], 0, chan_index ); + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&r[chan_index], 0, chan_index); } } @@ -1647,8 +1641,9 @@ exec_txd(struct tgsi_exec_machine *mach, FETCH(&r[0], 0, CHAN_X); fetch_texel(mach->Samplers[unit], - &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */ - &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + &r[0], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, BIAS */ + tgsi_sampler_lod_bias, + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; case TGSI_TEXTURE_2D: @@ -1661,8 +1656,9 @@ exec_txd(struct tgsi_exec_machine *mach, FETCH(&r[2], 0, CHAN_Z); fetch_texel(mach->Samplers[unit], - &r[0], &r[1], &r[2], 0.0f, /* inputs */ - &r[0], &r[1], &r[2], &r[3]); /* outputs */ + &r[0], &r[1], &r[2], &ZeroVec, /* inputs */ + tgsi_sampler_lod_bias, + &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; case TGSI_TEXTURE_3D: @@ -1673,7 +1669,8 @@ exec_txd(struct tgsi_exec_machine *mach, FETCH(&r[2], 0, CHAN_Z); fetch_texel(mach->Samplers[unit], - &r[0], &r[1], &r[2], 0.0f, + &r[0], &r[1], &r[2], &ZeroVec, + tgsi_sampler_lod_bias, &r[0], &r[1], &r[2], &r[3]); break; @@ -1918,6 +1915,130 @@ exec_vector_trinary(struct tgsi_exec_machine *mach, } static void +exec_dp3(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned int chan; + union tgsi_exec_channel arg[3]; + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); + + for (chan = CHAN_Y; chan <= CHAN_Z; chan++) { + fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[2], arg); + } + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_dp4(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned int chan; + union tgsi_exec_channel arg[3]; + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); + + for (chan = CHAN_Y; chan <= CHAN_W; chan++) { + fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[2], arg); + } + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_dp2a(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned int chan; + union tgsi_exec_channel arg[3]; + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[0], arg); + + fetch_source(mach, &arg[1], &inst->Src[2], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_add(&arg[0], &arg[0], &arg[1]); + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_dph(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned int chan; + union tgsi_exec_channel arg[3]; + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[2], arg); + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[0], arg); + + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_W, TGSI_EXEC_DATA_FLOAT); + micro_add(&arg[0], &arg[0], &arg[1]); + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_dp2(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned int chan; + union tgsi_exec_channel arg[3]; + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[2], arg); + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void exec_break(struct tgsi_exec_machine *mach) { if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) { @@ -2397,54 +2518,11 @@ exec_instruction( break; case TGSI_OPCODE_DP3: - /* TGSI_OPCODE_DOT3 */ - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Z ); - FETCH( &r[2], 1, CHAN_Z ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_dp3(mach, inst); break; - case TGSI_OPCODE_DP4: - /* TGSI_OPCODE_DOT4 */ - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 1, CHAN_Y); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Z); - FETCH(&r[2], 1, CHAN_Z); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_W); - FETCH(&r[2], 1, CHAN_W); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + case TGSI_OPCODE_DP4: + exec_dp4(mach, inst); break; case TGSI_OPCODE_DST: @@ -2540,21 +2618,7 @@ exec_instruction( break; case TGSI_OPCODE_DP2A: - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH( &r[2], 2, CHAN_X ); - micro_add( &r[0], &r[0], &r[2] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_dp2a(mach, inst); break; case TGSI_OPCODE_FRC: @@ -2654,30 +2718,7 @@ exec_instruction( break; case TGSI_OPCODE_DPH: - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 1, CHAN_Y); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Z); - FETCH(&r[2], 1, CHAN_Z); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 1, CHAN_W); - - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_dph(mach, inst); break; case TGSI_OPCODE_COS: @@ -2801,14 +2842,14 @@ exec_instruction( /* simple texture lookup */ /* src[0] = texcoord */ /* src[1] = sampler unit */ - exec_tex(mach, inst, FALSE, FALSE); + exec_tex(mach, inst, TEX_MODIFIER_NONE); break; case TGSI_OPCODE_TXB: /* Texture lookup with lod bias */ /* src[0] = texcoord (src[0].w = LOD bias) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE, FALSE); + exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS); break; case TGSI_OPCODE_TXD: @@ -2824,14 +2865,14 @@ exec_instruction( /* Texture lookup with explit LOD */ /* src[0] = texcoord (src[0].w = LOD) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE, FALSE); + exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD); break; case TGSI_OPCODE_TXP: /* Texture lookup with projection */ /* src[0] = texcoord (src[0].w = projection) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, FALSE, TRUE); + exec_tex(mach, inst, TEX_MODIFIER_PROJECTED); break; case TGSI_OPCODE_UP2H: @@ -3091,18 +3132,7 @@ exec_instruction( break; case TGSI_OPCODE_DP2: - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_dp2(mach, inst); break; case TGSI_OPCODE_IF: diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index aa3a98d7f1..59e3b445cc 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -2,6 +2,7 @@ * * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2009-2010 VMware, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -35,11 +36,13 @@ extern "C" { #endif + #define MAX_LABELS (4 * 1024) /**< basically, max instructions */ #define NUM_CHANNELS 4 /* R,G,B,A */ #define QUAD_SIZE 4 /* 4 pixel/quad */ + /** * Registers may be treated as float, signed int or unsigned int. */ @@ -69,6 +72,11 @@ struct tgsi_interp_coef float dady[NUM_CHANNELS]; }; +enum tgsi_sampler_control { + tgsi_sampler_lod_bias, + tgsi_sampler_lod_explicit +}; + /** * Information for sampling textures, which must be implemented * by code outside the TGSI executor. @@ -80,7 +88,8 @@ struct tgsi_sampler const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]); }; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 138d2d095b..ad553c71a5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -51,7 +51,8 @@ * Since it's pretty much impossible to form PPC vector immediates, load * them from memory here: */ -const float ppc_builtin_constants[] ALIGN16_ATTRIB = { +PIPE_ALIGN_VAR(16) const float +ppc_builtin_constants[] = { 1.0f, -128.0f, 128.0, 0.0 }; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 9b0644465a..7f1c8e5dd6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -265,13 +265,15 @@ check_register_usage( } else { if (!is_register_declared( ctx, reg )) { - if (reg->dimensions == 2) + if (reg->dimensions == 2) { report_error( ctx, "%s[%d][%d]: Undeclared %s register", file_names[reg->file], reg->indices[0], reg->indices[1], name ); - else + } + else { report_error( ctx, "%s[%d]: Undeclared %s register", file_names[reg->file], reg->indices[0], name ); } + } if (!is_register_used( ctx, reg )) cso_hash_insert(ctx->regs_used, scan_register_key(reg), reg); else @@ -333,15 +335,15 @@ iter_instruction( fill_scan_register1d(ind_reg, inst->Src[i].Indirect.File, inst->Src[i].Indirect.Index); + if (!(reg->file == TGSI_FILE_ADDRESS || reg->file == TGSI_FILE_LOOP) || + reg->indices[0] != 0) { + report_warning(ctx, "Indirect register neither ADDR[0] nor LOOP[0]"); + } check_register_usage( ctx, reg, "indirect", FALSE ); - if (!(reg->file == TGSI_FILE_ADDRESS || reg->file == TGSI_FILE_LOOP) || - reg->indices[0] != 0) { - report_warning(ctx, "Indirect register neither ADDR[0] nor LOOP[0]"); - } } } @@ -513,6 +515,7 @@ regs_hash_destroy(struct cso_hash *hash) while (!cso_hash_iter_is_null(iter)) { scan_register *reg = (scan_register *)cso_hash_iter_data(iter); iter = cso_hash_erase(hash, iter); + assert(reg->file < TGSI_FILE_COUNT); FREE(reg); } cso_hash_delete(hash); diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 118059ace9..a85cc4659e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -2,6 +2,7 @@ * * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2009-2010 VMware, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -1418,13 +1419,13 @@ fetch_texel( struct tgsi_sampler **sampler, sampler, *sampler, store ); - debug_printf("lodbias %f\n", store[12]); - for (j = 0; j < 4; j++) - debug_printf("sample %d texcoord %f %f\n", + debug_printf("sample %d texcoord %f %f %f lodbias %f\n", j, store[0+j], - store[4+j]); + store[4+j], + store[8 + j], + store[12 + j]); #endif { @@ -1433,7 +1434,8 @@ fetch_texel( struct tgsi_sampler **sampler, &store[0], /* s */ &store[4], /* t */ &store[8], /* r */ - store[12], /* lodbias */ + &store[12], /* lodbias */ + tgsi_sampler_lod_bias, rgba); /* results */ memcpy( store, rgba, 16 * sizeof(float)); @@ -2144,40 +2146,50 @@ emit_instruction( break; case TGSI_OPCODE_XPD: + /* Note: we do all stores after all operands have been fetched + * to avoid src/dst register aliasing issues for an instruction + * such as: XPD TEMP[2].xyz, TEMP[0], TEMP[2]; + */ if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - FETCH( func, *inst, 1, 1, CHAN_Z ); - FETCH( func, *inst, 3, 0, CHAN_Z ); + FETCH( func, *inst, 1, 1, CHAN_Z ); /* xmm[1] = src[1].z */ + FETCH( func, *inst, 3, 0, CHAN_Z ); /* xmm[3] = src[0].z */ } if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - FETCH( func, *inst, 0, 0, CHAN_Y ); - FETCH( func, *inst, 4, 1, CHAN_Y ); + FETCH( func, *inst, 0, 0, CHAN_Y ); /* xmm[0] = src[0].y */ + FETCH( func, *inst, 4, 1, CHAN_Y ); /* xmm[4] = src[1].y */ } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { - emit_MOV( func, 2, 0 ); - emit_mul( func, 2, 1 ); - emit_MOV( func, 5, 3 ); - emit_mul( func, 5, 4 ); - emit_sub( func, 2, 5 ); - STORE( func, *inst, 2, 0, CHAN_X ); + emit_MOV( func, 7, 0 ); /* xmm[7] = xmm[0] */ + emit_mul( func, 7, 1 ); /* xmm[7] = xmm[2] * xmm[1] */ + emit_MOV( func, 5, 3 ); /* xmm[5] = xmm[3] */ + emit_mul( func, 5, 4 ); /* xmm[5] = xmm[5] * xmm[4] */ + emit_sub( func, 7, 5 ); /* xmm[7] = xmm[2] - xmm[5] */ + /* store xmm[7] in dst.x below */ } if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - FETCH( func, *inst, 2, 1, CHAN_X ); - FETCH( func, *inst, 5, 0, CHAN_X ); + FETCH( func, *inst, 2, 1, CHAN_X ); /* xmm[2] = src[1].x */ + FETCH( func, *inst, 5, 0, CHAN_X ); /* xmm[5] = src[0].x */ } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { - emit_mul( func, 3, 2 ); - emit_mul( func, 1, 5 ); - emit_sub( func, 3, 1 ); - STORE( func, *inst, 3, 0, CHAN_Y ); + emit_mul( func, 3, 2 ); /* xmm[3] = xmm[3] * xmm[2] */ + emit_mul( func, 1, 5 ); /* xmm[1] = xmm[1] * xmm[5] */ + emit_sub( func, 3, 1 ); /* xmm[3] = xmm[3] - xmm[1] */ + /* store xmm[3] in dst.y below */ } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { - emit_mul( func, 5, 4 ); - emit_mul( func, 0, 2 ); - emit_sub( func, 5, 0 ); - STORE( func, *inst, 5, 0, CHAN_Z ); + emit_mul( func, 5, 4 ); /* xmm[5] = xmm[5] * xmm[4] */ + emit_mul( func, 0, 2 ); /* xmm[0] = xmm[0] * xmm[2] */ + emit_sub( func, 5, 0 ); /* xmm[5] = xmm[5] - xmm[0] */ + STORE( func, *inst, 5, 0, CHAN_Z ); /* dst.z = xmm[5] */ + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + STORE( func, *inst, 7, 0, CHAN_X ); /* dst.x = xmm[7] */ + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + STORE( func, *inst, 3, 0, CHAN_Y ); /* dst.y = xmm[3] */ } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { emit_tempf( @@ -2506,7 +2518,7 @@ emit_instruction( break; case TGSI_OPCODE_TXL: - emit_tex( func, inst, TRUE, FALSE ); + return 0; break; case TGSI_OPCODE_TXP: diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 1f794d39a1..249a0375fc 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -48,6 +48,8 @@ #include "util/u_simple_shaders.h" #include "util/u_texture.h" +#define INVALID_PTR ((void*)~0) + struct blitter_context_priv { struct blitter_context blitter; @@ -110,6 +112,11 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) ctx->pipe = pipe; /* init state objects for them to be considered invalid */ + ctx->blitter.saved_blend_state = INVALID_PTR; + ctx->blitter.saved_dsa_state = INVALID_PTR; + ctx->blitter.saved_rs_state = INVALID_PTR; + ctx->blitter.saved_fs = INVALID_PTR; + ctx->blitter.saved_vs = INVALID_PTR; ctx->blitter.saved_fb_state.nr_cbufs = ~0; ctx->blitter.saved_num_textures = ~0; ctx->blitter.saved_num_sampler_states = ~0; @@ -156,6 +163,7 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) rs_state.cull_mode = PIPE_WINDING_NONE; rs_state.bypass_vs_clip_and_viewport = 1; rs_state.gl_rasterization_rules = 1; + rs_state.flatshade = 1; ctx->rs_state = pipe->create_rasterizer_state(pipe, &rs_state); /* fragment shaders are created on-demand */ @@ -234,11 +242,11 @@ void util_blitter_destroy(struct blitter_context *blitter) static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx) { /* make sure these CSOs have been saved */ - assert(ctx->blitter.saved_blend_state && - ctx->blitter.saved_dsa_state && - ctx->blitter.saved_rs_state && - ctx->blitter.saved_fs && - ctx->blitter.saved_vs); + assert(ctx->blitter.saved_blend_state != INVALID_PTR && + ctx->blitter.saved_dsa_state != INVALID_PTR && + ctx->blitter.saved_rs_state != INVALID_PTR && + ctx->blitter.saved_fs != INVALID_PTR && + ctx->blitter.saved_vs != INVALID_PTR); } static void blitter_restore_CSOs(struct blitter_context_priv *ctx) @@ -252,11 +260,11 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx) pipe->bind_fs_state(pipe, ctx->blitter.saved_fs); pipe->bind_vs_state(pipe, ctx->blitter.saved_vs); - ctx->blitter.saved_blend_state = 0; - ctx->blitter.saved_dsa_state = 0; - ctx->blitter.saved_rs_state = 0; - ctx->blitter.saved_fs = 0; - ctx->blitter.saved_vs = 0; + ctx->blitter.saved_blend_state = INVALID_PTR; + ctx->blitter.saved_dsa_state = INVALID_PTR; + ctx->blitter.saved_rs_state = INVALID_PTR; + ctx->blitter.saved_fs = INVALID_PTR; + ctx->blitter.saved_vs = INVALID_PTR; /* restore the state objects which are required to be saved before copy/fill */ @@ -371,9 +379,16 @@ static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx, float t1 = y1 / (float)surf->height; float s2 = x2 / (float)surf->width; float t2 = y2 / (float)surf->height; - const float st[4][2] = { - {s1, t1}, {s2, t1}, {s2, t2}, {s1, t2} - }; + float st[4][2]; + + st[0][0] = s1; + st[0][1] = t1; + st[1][0] = s2; + st[1][1] = t1; + st[2][0] = s2; + st[2][1] = t2; + st[3][0] = s1; + st[3][1] = t2; util_map_texcoords2d_onto_cubemap(surf->face, /* pointer, stride in floats */ @@ -560,45 +575,29 @@ void util_blitter_clear(struct blitter_context *blitter, blitter_restore_CSOs(ctx); } -void util_blitter_copy(struct blitter_context *blitter, - struct pipe_surface *dst, - unsigned dstx, unsigned dsty, - struct pipe_surface *src, - unsigned srcx, unsigned srcy, - unsigned width, unsigned height, - boolean ignore_stencil) +static boolean +is_overlap(unsigned sx1, unsigned sx2, unsigned sy1, unsigned sy2, + unsigned dx1, unsigned dx2, unsigned dy1, unsigned dy2) +{ + if (sx1 >= dx2 || sx2 <= dx1 || sy1 >= dy2 || sy2 <= dy1) { + return FALSE; + } else { + return TRUE; + } +} + +static void util_blitter_do_copy(struct blitter_context *blitter, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height, + boolean is_depth) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; struct pipe_context *pipe = ctx->pipe; - struct pipe_screen *screen = pipe->screen; struct pipe_framebuffer_state fb_state; - boolean is_stencil, is_depth; - unsigned dst_tex_usage; - - /* give up if textures are not set */ - assert(dst->texture && src->texture); - if (!dst->texture || !src->texture) - return; - - is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; - is_stencil = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 1) != 0; - dst_tex_usage = is_depth || is_stencil ? PIPE_TEXTURE_USAGE_DEPTH_STENCIL : - PIPE_TEXTURE_USAGE_RENDER_TARGET; - /* check if we can sample from and render to the surfaces */ - /* (assuming copying a stencil buffer is not possible) */ - if ((!ignore_stencil && is_stencil) || - !screen->is_format_supported(screen, dst->format, dst->texture->target, - dst_tex_usage, 0) || - !screen->is_format_supported(screen, src->format, src->texture->target, - PIPE_TEXTURE_USAGE_SAMPLER, 0)) { - util_surface_copy(pipe, FALSE, dst, dstx, dsty, src, srcx, srcy, - width, height); - return; - } - - /* check whether the states are properly saved */ - blitter_check_saved_CSOs(ctx); assert(blitter->saved_fb_state.nr_cbufs != ~0); assert(blitter->saved_num_textures != ~0); assert(blitter->saved_num_sampler_states != ~0); @@ -656,6 +655,108 @@ void util_blitter_copy(struct blitter_context *blitter, blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, 0); blitter_draw_quad(ctx); + +} + +static void util_blitter_overlap_copy(struct blitter_context *blitter, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + + struct pipe_texture texTemp; + struct pipe_texture *texture; + struct pipe_surface *tex_surf; + + /* check whether the states are properly saved */ + blitter_check_saved_CSOs(ctx); + + memset(&texTemp, 0, sizeof(texTemp)); + texTemp.target = PIPE_TEXTURE_2D; + texTemp.format = dst->texture->format; /* XXX verify supported by driver! */ + texTemp.last_level = 0; + texTemp.width0 = width; + texTemp.height0 = height; + texTemp.depth0 = 1; + + texture = screen->texture_create(screen, &texTemp); + if (!texture) + return; + + tex_surf = screen->get_tex_surface(screen, texture, 0, 0, 0, + PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_GPU_WRITE); + + /* blit from the src to the temp */ + util_blitter_do_copy(blitter, tex_surf, 0, 0, + src, srcx, srcy, + width, height, + FALSE); + util_blitter_do_copy(blitter, dst, dstx, dsty, + tex_surf, 0, 0, + width, height, + FALSE); + pipe_surface_reference(&tex_surf, NULL); + pipe_texture_reference(&texture, NULL); + blitter_restore_CSOs(ctx); +} + +void util_blitter_copy(struct blitter_context *blitter, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height, + boolean ignore_stencil) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + boolean is_stencil, is_depth; + unsigned dst_tex_usage; + + /* give up if textures are not set */ + assert(dst->texture && src->texture); + if (!dst->texture || !src->texture) + return; + + if (dst->texture == src->texture) { + if (is_overlap(srcx, srcx + width, srcy, srcy + height, + dstx, dstx + width, dsty, dsty + height)) { + util_blitter_overlap_copy(blitter, dst, dstx, dsty, src, srcx, srcy, + width, height); + return; + } + } + + is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; + is_stencil = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 1) != 0; + dst_tex_usage = is_depth || is_stencil ? PIPE_TEXTURE_USAGE_DEPTH_STENCIL : + PIPE_TEXTURE_USAGE_RENDER_TARGET; + + /* check if we can sample from and render to the surfaces */ + /* (assuming copying a stencil buffer is not possible) */ + if ((!ignore_stencil && is_stencil) || + !screen->is_format_supported(screen, dst->format, dst->texture->target, + dst_tex_usage, 0) || + !screen->is_format_supported(screen, src->format, src->texture->target, + PIPE_TEXTURE_USAGE_SAMPLER, 0)) { + util_surface_copy(pipe, FALSE, dst, dstx, dsty, src, srcx, srcy, + width, height); + return; + } + + /* check whether the states are properly saved */ + blitter_check_saved_CSOs(ctx); + util_blitter_do_copy(blitter, + dst, dstx, dsty, + src, srcx, srcy, + width, height, is_depth); blitter_restore_CSOs(ctx); } diff --git a/src/gallium/auxiliary/util/u_debug_dump.c b/src/gallium/auxiliary/util/u_debug_dump.c index 09866880ae..61624d05c0 100644 --- a/src/gallium/auxiliary/util/u_debug_dump.c +++ b/src/gallium/auxiliary/util/u_debug_dump.c @@ -255,15 +255,13 @@ DEFINE_DEBUG_DUMP_CONTINUOUS(tex_mipfilter) static const char * debug_dump_tex_filter_names[] = { "PIPE_TEX_FILTER_NEAREST", - "PIPE_TEX_FILTER_LINEAR", - "PIPE_TEX_FILTER_ANISO" + "PIPE_TEX_FILTER_LINEAR" }; static const char * debug_dump_tex_filter_short_names[] = { "nearest", - "linear", - "aniso" + "linear" }; DEFINE_DEBUG_DUMP_CONTINUOUS(tex_filter) diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 866b18ff16..9f16b42944 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -76,9 +76,9 @@ PIPE_FORMAT_R8G8_SNORM , array , 1, 1, sn8 , sn8 , , , xy01, PIPE_FORMAT_R8G8B8_SNORM , array , 1, 1, sn8 , sn8 , sn8 , , xyz1, rgb PIPE_FORMAT_R8G8B8A8_SNORM , array , 1, 1, sn8 , sn8 , sn8 , sn8 , xyzw, rgb PIPE_FORMAT_R8G8B8X8_SNORM , array , 1, 1, sn8 , sn8 , sn8 , sn8 , xyz1, rgb -PIPE_FORMAT_B6G5R5_SNORM , arith , 1, 1, sn5 , sn5 , sn6 , , zyx1, rgb -PIPE_FORMAT_A8B8G8R8_SNORM , arith , 1, 1, sn8 , sn8 , sn8 , sn8 , zyxw, rgb -PIPE_FORMAT_X8B8G8R8_SNORM , arith , 1, 1, sn8 , sn8 , sn8 , sn8 , zyx1, rgb +PIPE_FORMAT_B6G5R5_SNORM , arith , 1, 1, sn5 , sn5 , sn6 , , xyz1, rgb +PIPE_FORMAT_A8B8G8R8_SNORM , array , 1, 1, sn8 , sn8 , sn8 , sn8 , wzyx, rgb +PIPE_FORMAT_X8B8G8R8_SNORM , array , 1, 1, sn8 , sn8 , sn8 , sn8 , wzy1, rgb PIPE_FORMAT_R8_SSCALED , array , 1, 1, s8 , , , , x001, rgb PIPE_FORMAT_R8G8_SSCALED , array , 1, 1, s8 , s8 , , , xy01, rgb PIPE_FORMAT_R8G8B8_SSCALED , array , 1, 1, s8 , s8 , s8 , , xyz1, rgb @@ -90,14 +90,14 @@ PIPE_FORMAT_R32G32B32_FIXED , array , 1, 1, h32 , h32 , h32 , , xyz1, PIPE_FORMAT_R32G32B32A32_FIXED , array , 1, 1, h32 , h32 , h32 , h32 , xyzw, rgb PIPE_FORMAT_L8_SRGB , arith , 1, 1, u8 , , , , xxx1, srgb PIPE_FORMAT_A8L8_SRGB , arith , 1, 1, u8 , u8 , , , xxxy, srgb -PIPE_FORMAT_R8G8B8_SRGB , arith , 1, 1, u8 , u8 , u8 , , xyz1, srgb -PIPE_FORMAT_R8G8B8A8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , xyzw, srgb -PIPE_FORMAT_R8G8B8X8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , xyz1, srgb -PIPE_FORMAT_A8R8G8B8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , wxyz, srgb -PIPE_FORMAT_X8R8G8B8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , 1xyz, srgb -PIPE_FORMAT_B8G8R8A8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , zyxw, srgb -PIPE_FORMAT_B8G8R8X8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , zyx1, srgb -PIPE_FORMAT_X8UB8UG8SR8S_NORM , arith , 1, 1, sn8 , sn8 , un8 , x8 , 1zyx, rgb +PIPE_FORMAT_R8G8B8_SRGB , array , 1, 1, u8 , u8 , u8 , , xyz1, srgb +PIPE_FORMAT_R8G8B8A8_SRGB , array , 1, 1, u8 , u8 , u8 , u8 , xyzw, srgb +PIPE_FORMAT_R8G8B8X8_SRGB , array , 1, 1, u8 , u8 , u8 , u8 , xyz1, srgb +PIPE_FORMAT_A8R8G8B8_SRGB , array , 1, 1, u8 , u8 , u8 , u8 , yzwx, srgb +PIPE_FORMAT_X8R8G8B8_SRGB , array , 1, 1, u8 , u8 , u8 , u8 , yzw1, srgb +PIPE_FORMAT_B8G8R8A8_SRGB , array , 1, 1, u8 , u8 , u8 , u8 , zyxw, srgb +PIPE_FORMAT_B8G8R8X8_SRGB , array , 1, 1, u8 , u8 , u8 , u8 , zyx1, srgb +PIPE_FORMAT_X8UB8UG8SR8S_NORM , array , 1, 1, sn8 , sn8 , un8 , x8 , wzy1, rgb PIPE_FORMAT_B6UG5SR5S_NORM , arith , 1, 1, sn5 , sn5 , un6 , , xyz1, rgb PIPE_FORMAT_DXT1_RGB , dxt , 4, 4, x64 , , , , xyz1, rgb PIPE_FORMAT_DXT1_RGBA , dxt , 4, 4, x64 , , , , xyzw, rgb diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c index 298fbacecb..8479161c74 100644 --- a/src/gallium/auxiliary/util/u_rect.c +++ b/src/gallium/auxiliary/util/u_rect.c @@ -41,7 +41,7 @@ /** * Copy 2D rect from one place to another. * Position and sizes are in pixels. - * src_pitch may be negative to do vertical flip of pixels from source. + * src_stride may be negative to do vertical flip of pixels from source. */ void util_copy_rect(ubyte * dst, @@ -54,7 +54,7 @@ util_copy_rect(ubyte * dst, const ubyte * src, int src_stride, unsigned src_x, - int src_y) + unsigned src_y) { unsigned i; int src_stride_pos = src_stride < 0 ? -src_stride : src_stride; @@ -65,10 +65,6 @@ util_copy_rect(ubyte * dst, assert(blocksize > 0); assert(blockwidth > 0); assert(blockheight > 0); - assert(src_x >= 0); - assert(src_y >= 0); - assert(dst_x >= 0); - assert(dst_y >= 0); dst_x /= blockwidth; dst_y /= blockheight; @@ -113,8 +109,6 @@ util_fill_rect(ubyte * dst, assert(blocksize > 0); assert(blockwidth > 0); assert(blockheight > 0); - assert(dst_x >= 0); - assert(dst_y >= 0); dst_x /= blockwidth; dst_y /= blockheight; diff --git a/src/gallium/auxiliary/util/u_rect.h b/src/gallium/auxiliary/util/u_rect.h index 5e444ffae2..b44d821904 100644 --- a/src/gallium/auxiliary/util/u_rect.h +++ b/src/gallium/auxiliary/util/u_rect.h @@ -45,7 +45,7 @@ extern void util_copy_rect(ubyte * dst, enum pipe_format format, unsigned dst_stride, unsigned dst_x, unsigned dst_y, unsigned width, unsigned height, const ubyte * src, - int src_stride, unsigned src_x, int src_y); + int src_stride, unsigned src_x, unsigned src_y); extern void util_fill_rect(ubyte * dst, enum pipe_format format, diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index 8172ead020..b751e29ab6 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -44,13 +44,15 @@ /** * Make simple vertex pass-through shader. + * \param num_attribs number of attributes to pass through + * \param semantic_names array of semantic names for each attribute + * \param semantic_indexes array of semantic indexes for each attribute */ void * util_make_vertex_passthrough_shader(struct pipe_context *pipe, uint num_attribs, const uint *semantic_names, const uint *semantic_indexes) - { struct ureg_program *ureg; uint i; @@ -78,8 +80,6 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe, } - - /** * Make simple fragment texture shader: * IMM {0,0,0,1} // (if writemask != 0xf) @@ -125,6 +125,12 @@ util_make_fragment_tex_shader_writemask(struct pipe_context *pipe, return ureg_create_shader_and_destroy( ureg, pipe ); } + +/** + * Make a simple fragment shader that sets the output color to a color + * taken from a texture. + * \param tex_target one of PIPE_TEXTURE_x + */ void * util_make_fragment_tex_shader(struct pipe_context *pipe, unsigned tex_target ) { @@ -133,6 +139,7 @@ util_make_fragment_tex_shader(struct pipe_context *pipe, unsigned tex_target ) TGSI_WRITEMASK_XYZW ); } + /** * Make a simple fragment texture shader which reads an X component from * a texture and writes it as depth. @@ -177,6 +184,7 @@ util_make_fragment_tex_shader_writedepth(struct pipe_context *pipe, return ureg_create_shader_and_destroy( ureg, pipe ); } + /** * Make simple fragment color pass-through shader. */ @@ -186,15 +194,19 @@ util_make_fragment_passthrough_shader(struct pipe_context *pipe) return util_make_fragment_clonecolor_shader(pipe, 1); } + +/** + * Make a fragment shader that copies the input color to N output colors. + */ void * util_make_fragment_clonecolor_shader(struct pipe_context *pipe, int num_cbufs) { struct ureg_program *ureg; struct ureg_src src; - struct ureg_dst dst[8]; + struct ureg_dst dst[PIPE_MAX_COLOR_BUFS]; int i; - assert(num_cbufs <= 8); + assert(num_cbufs <= PIPE_MAX_COLOR_BUFS); ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT ); if (ureg == NULL) diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 5b8dd1abb9..1ba82bb21f 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -1155,27 +1155,6 @@ ycbcr_get_tile_rgba(const ushort *src, } -static void -fake_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - pRow[0] = - pRow[1] = - pRow[2] = - pRow[3] = (i ^ j) & 1 ? 1.0f : 0.0f; - } - p += dst_stride; - } -} - - void pipe_tile_raw_to_rgba(enum pipe_format format, void *src, @@ -1258,8 +1237,10 @@ pipe_tile_raw_to_rgba(enum pipe_format format, ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, TRUE); break; default: - debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(format)); - fake_get_tile_rgba(src, w, h, dst, dst_stride); + util_format_read_4f(format, + dst, dst_stride * sizeof(float), + src, util_format_get_stride(format, w), + 0, 0, w, h); } } |