From 2abc1b3641e435e0b68490fa6b0a7ffa7c030c76 Mon Sep 17 00:00:00 2001
From: Zack Rusin
Date: Thu, 1 May 2008 12:38:51 -0400
Subject: abstract fetching elts
---
src/gallium/auxiliary/draw/draw_pt_varray.c | 17 +++++++++++------
src/gallium/auxiliary/draw/draw_pt_varray_tmp.h | 6 ++----
2 files changed, 13 insertions(+), 10 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
index 355093f945..b0bd2b983e 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray.c
+++ b/src/gallium/auxiliary/draw/draw_pt_varray.c
@@ -43,6 +43,8 @@ struct varray_frontend {
unsigned draw_count;
unsigned fetch_count;
+ unsigned fetch_start;
+
struct draw_pt_middle_end *middle;
unsigned input_prim;
@@ -68,15 +70,18 @@ static void varray_flush(struct varray_frontend *varray)
varray->draw_count = 0;
}
-#if 0
-static void varray_check_flush(struct varray_frontend *varray)
+static INLINE void fetch_init(struct varray_frontend *varray,
+ unsigned current_count,
+ unsigned count)
{
- if (varray->draw_count + 6 >= DRAW_MAX/* ||
- varray->fetch_count + 4 >= FETCH_MAX*/) {
- varray_flush(varray);
+ unsigned idx;
+ const unsigned end = MIN2(FETCH_MAX, count - current_count);
+ for (idx = 0; idx < end; ++idx) {
+ varray->fetch_elts[idx] = varray->fetch_start + idx;
}
+ varray->fetch_start += idx;
+ varray->fetch_count = idx;
}
-#endif
static INLINE void add_draw_el(struct varray_frontend *varray,
int idx, ushort flags)
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
index b9a319b253..a3509613f5 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
@@ -20,10 +20,8 @@ static void FUNC(struct draw_pt_front_end *frontend,
start, count);
#endif
- for (i = 0; i < count; ++i) {
- varray->fetch_elts[i] = start + i;
- }
- varray->fetch_count = count;
+ varray->fetch_start = start;
+ fetch_init(varray, 0, count);
switch (varray->input_prim) {
case PIPE_PRIM_POINTS:
--
cgit v1.2.3
From 90a46ed277cc887d49c8d8c627174c3bd693ecf7 Mon Sep 17 00:00:00 2001
From: Zack Rusin
Date: Thu, 1 May 2008 23:54:39 -0400
Subject: split larger primitives in the simple varray pt
---
src/gallium/auxiliary/draw/draw_pt.c | 3 +-
src/gallium/auxiliary/draw/draw_pt_varray.c | 62 ++++++++-
src/gallium/auxiliary/draw/draw_pt_varray_tmp.h | 169 +++++++++++++++++-------
3 files changed, 180 insertions(+), 54 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index c9c5d18313..bccde6c5fd 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -80,8 +80,7 @@ draw_pt_arrays(struct draw_context *draw,
/* Pick the right frontend
*/
- if (draw->pt.user.elts ||
- count >= 256) {
+ if (draw->pt.user.elts) {
frontend = draw->pt.front.vcache;
} else {
frontend = draw->pt.front.varray;
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
index b0bd2b983e..c9843bded0 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray.c
+++ b/src/gallium/auxiliary/draw/draw_pt_varray.c
@@ -58,6 +58,11 @@ static void varray_flush(struct varray_frontend *varray)
debug_printf("FLUSH fc = %d, dc = %d\n",
varray->fetch_count,
varray->draw_count);
+ debug_printf("\telt0 = %d, eltx = %d, draw0 = %d, drawx = %d\n",
+ varray->fetch_elts[0],
+ varray->fetch_elts[varray->fetch_count-1],
+ varray->draw_elts[0],
+ varray->draw_elts[varray->draw_count-1]);
#endif
varray->middle->run(varray->middle,
varray->fetch_elts,
@@ -71,18 +76,69 @@ static void varray_flush(struct varray_frontend *varray)
}
static INLINE void fetch_init(struct varray_frontend *varray,
- unsigned current_count,
unsigned count)
{
unsigned idx;
- const unsigned end = MIN2(FETCH_MAX, count - current_count);
- for (idx = 0; idx < end; ++idx) {
+#if 0
+ debug_printf("FETCH INIT c = %d, fs = %d\n",
+ count,
+ varray->fetch_start);
+#endif
+ for (idx = 0; idx < count; ++idx) {
varray->fetch_elts[idx] = varray->fetch_start + idx;
}
varray->fetch_start += idx;
varray->fetch_count = idx;
}
+
+static boolean split_prim_inplace(unsigned prim, unsigned *first, unsigned *incr)
+{
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ *first = 1;
+ *incr = 1;
+ return TRUE;
+ case PIPE_PRIM_LINES:
+ *first = 2;
+ *incr = 2;
+ return TRUE;
+ case PIPE_PRIM_LINE_STRIP:
+ *first = 2;
+ *incr = 1;
+ return TRUE;
+ case PIPE_PRIM_TRIANGLES:
+ *first = 3;
+ *incr = 3;
+ return TRUE;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ *first = 3;
+ *incr = 1;
+ return TRUE;
+ case PIPE_PRIM_TRIANGLE_FAN:
+ *first = 3;
+ *incr = 1;
+ return TRUE;
+ case PIPE_PRIM_QUADS:
+ *first = 4;
+ *incr = 4;
+ return TRUE;
+ case PIPE_PRIM_QUAD_STRIP:
+ *first = 4;
+ *incr = 2;
+ return TRUE;
+ case PIPE_PRIM_POLYGON:
+ *first = 3;
+ *incr = 1;
+ return TRUE;
+ default:
+ *first = 0;
+ *incr = 1; /* set to one so that count % incr works */
+ return FALSE;
+ }
+}
+
+
static INLINE void add_draw_el(struct varray_frontend *varray,
int idx, ushort flags)
{
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
index a3509613f5..073c1aadbf 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
@@ -10,30 +10,42 @@ static void FUNC(struct draw_pt_front_end *frontend,
boolean flatfirst = (draw->rasterizer->flatshade &&
draw->rasterizer->flatshade_first);
- unsigned i, flags;
+ unsigned i, j, flags;
+ unsigned first, incr;
-#if 0
- debug_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count);
-#endif
-#if 0
- debug_printf("INPUT PRIM = %d (start = %d, count = %d)\n", varray->input_prim,
+ varray->fetch_start = start;
+
+ split_prim_inplace(varray->input_prim, &first, &incr);
+
+#if 1
+ debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
+ varray->input_prim,
start, count);
#endif
- varray->fetch_start = start;
- fetch_init(varray, 0, count);
-
switch (varray->input_prim) {
case PIPE_PRIM_POINTS:
- for (i = 0; i < count; i ++) {
- POINT(varray, i + 0);
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i < count; i ++) {
+ POINT(varray, i + 0);
+ }
+ fetch_init(varray, end);
+ varray_flush(varray);
}
break;
case PIPE_PRIM_LINES:
- for (i = 0; i+1 < count; i += 2) {
- LINE(varray, DRAW_PIPE_RESET_STIPPLE,
- i + 0, i + 1);
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+1 < end; i += 2) {
+ LINE(varray, DRAW_PIPE_RESET_STIPPLE,
+ i + 0, i + 1);
+ }
+ fetch_init(varray, end);
+ varray_flush(varray);
}
break;
@@ -41,38 +53,68 @@ static void FUNC(struct draw_pt_front_end *frontend,
if (count >= 2) {
flags = DRAW_PIPE_RESET_STIPPLE;
- for (i = 1; i < count; i++, flags = 0) {
- LINE(varray, flags, i - 1, i);
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 1; i < end; i++, flags = 0) {
+ LINE(varray, flags, i - 1, i);
+ }
+ LINE(varray, flags, i - 1, 0);
+ fetch_init(varray, end);
+ varray_flush(varray);
}
- LINE(varray, flags, i - 1, 0);
}
break;
case PIPE_PRIM_LINE_STRIP:
flags = DRAW_PIPE_RESET_STIPPLE;
- for (i = 1; i < count; i++, flags = 0) {
- LINE(varray, flags, i - 1, i);
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 1; i < end; i++, flags = 0) {
+ LINE(varray, flags, i - 1, i);
+ }
+ fetch_init(varray, end);
+ varray_flush(varray);
}
break;
case PIPE_PRIM_TRIANGLES:
- for (i = 0; i+2 < count; i += 3) {
- TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- i + 0, i + 1, i + 2);
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+2 < end; i += 3) {
+ TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ i + 0, i + 1, i + 2);
+ }
+ fetch_init(varray, end);
+ varray_flush(varray);
}
break;
case PIPE_PRIM_TRIANGLE_STRIP:
if (flatfirst) {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- i + 0, i + 1 + (i&1), i + 2 - (i&1));
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+2 < end; i++) {
+ TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ i + 0, i + 1 + (i&1), i + 2 - (i&1));
+ }
+ fetch_init(varray, end);
+ varray_flush(varray);
}
}
else {
- for (i = 0; i+2 < count; i++) {
- TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- i + 0 + (i&1), i + 1 - (i&1), i + 2);
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+2 < end; i++) {
+ TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ i + 0 + (i&1), i + 1 - (i&1), i + 2);
+ }
+ fetch_init(varray, end);
+ varray_flush(varray);
}
}
break;
@@ -81,51 +123,80 @@ static void FUNC(struct draw_pt_front_end *frontend,
if (count >= 3) {
if (flatfirst) {
flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
- for (i = 0; i+2 < count; i++) {
- TRIANGLE(varray, flags, i + 1, i + 2, 0);
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+2 < end; i++) {
+ TRIANGLE(varray, flags, i + 1, i + 2, 0);
+ }
+ fetch_init(varray, end);
+ varray_flush(varray);
}
}
else {
flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
- for (i = 0; i+2 < count; i++) {
- TRIANGLE(varray, flags, 0, i + 1, i + 2);
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+2 < end; i++) {
+ TRIANGLE(varray, flags, 0, i + 1, i + 2);
+ }
+ fetch_init(varray, end);
+ varray_flush(varray);
}
}
}
break;
case PIPE_PRIM_QUADS:
- for (i = 0; i+3 < count; i += 4) {
- QUAD(varray, i + 0, i + 1, i + 2, i + 3);
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+3 < end; i += 4) {
+ QUAD(varray, i + 0, i + 1, i + 2, i + 3);
+ }
+ fetch_init(varray, end);
+ varray_flush(varray);
}
break;
case PIPE_PRIM_QUAD_STRIP:
- for (i = 0; i+3 < count; i += 2) {
- QUAD(varray, i + 2, i + 0, i + 1, i + 3);
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+3 < end; i += 2) {
+ QUAD(varray, i + 2, i + 0, i + 1, i + 3);
+ }
+ fetch_init(varray, end);
+ varray_flush(varray);
}
break;
case PIPE_PRIM_POLYGON:
{
- /* These bitflags look a little odd because we submit the
- * vertices as (1,2,0) to satisfy flatshade requirements.
- */
- const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2;
- const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0;
- const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1;
-
- flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
-
- for (i = 0; i+2 < count; i++, flags = edge_middle) {
+ /* These bitflags look a little odd because we submit the
+ * vertices as (1,2,0) to satisfy flatshade requirements.
+ */
+ const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2;
+ const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0;
+ const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1;
+
+ flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+2 < end; i++, flags = edge_middle) {
if (i + 3 == count)
flags |= edge_last;
- TRIANGLE(varray, flags, i + 1, i + 2, 0);
- }
+ TRIANGLE(varray, flags, i + 1, i + 2, 0);
+ }
+ fetch_init(varray, end);
+ varray_flush(varray);
}
- break;
+ }
+ break;
default:
assert(0);
--
cgit v1.2.3
From abb08e9335b5d7cb004dc9e6cec390ab6968abe5 Mon Sep 17 00:00:00 2001
From: Zack Rusin
Date: Sat, 3 May 2008 22:32:17 -0400
Subject: implement linear emition and fetching and plug it in the varray paths
---
src/gallium/auxiliary/draw/draw_pt.h | 18 +++++
src/gallium/auxiliary/draw/draw_pt_emit.c | 45 +++++++++++
src/gallium/auxiliary/draw/draw_pt_fetch.c | 36 +++++++++
.../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 88 ++++++++++++++++++++--
src/gallium/auxiliary/draw/draw_pt_varray.c | 26 ++++++-
src/gallium/auxiliary/draw/draw_pt_varray_tmp.h | 4 +-
6 files changed, 208 insertions(+), 9 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index 2dec376cee..2f96ceaf00 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -92,6 +92,12 @@ struct draw_pt_middle_end {
const ushort *draw_elts,
unsigned draw_count );
+ void (*run_linear)(struct draw_pt_middle_end *,
+ unsigned fetch_start,
+ unsigned fetch_count,
+ const ushort *draw_elts,
+ unsigned draw_count);
+
void (*finish)( struct draw_pt_middle_end * );
void (*destroy)( struct draw_pt_middle_end * );
};
@@ -152,6 +158,13 @@ void draw_pt_emit( struct pt_emit *emit,
const ushort *elts,
unsigned count );
+void draw_pt_emit_linear( struct pt_emit *emit,
+ const float (*vertex_data)[4],
+ unsigned vertex_count,
+ unsigned stride,
+ unsigned start,
+ unsigned count );
+
void draw_pt_emit_destroy( struct pt_emit *emit );
struct pt_emit *draw_pt_emit_create( struct draw_context *draw );
@@ -170,6 +183,11 @@ void draw_pt_fetch_run( struct pt_fetch *fetch,
unsigned count,
char *verts );
+void draw_pt_fetch_run_linear( struct pt_fetch *fetch,
+ unsigned start,
+ unsigned count,
+ char *verts );
+
void draw_pt_fetch_destroy( struct pt_fetch *fetch );
struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw );
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index f9ac16786e..2a961b7088 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -179,6 +179,51 @@ void draw_pt_emit( struct pt_emit *emit,
}
+void draw_pt_emit_linear(struct pt_emit *emit,
+ const float (*vertex_data)[4],
+ unsigned vertex_count,
+ unsigned stride,
+ unsigned start,
+ unsigned count)
+{
+ struct draw_context *draw = emit->draw;
+ struct translate *translate = emit->translate;
+ struct vbuf_render *render = draw->render;
+ void *hw_verts;
+
+ debug_printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\n");
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ hw_verts = render->allocate_vertices(render,
+ (ushort)translate->key.output_stride,
+ (ushort)count);
+ if (!hw_verts) {
+ assert(0);
+ return;
+ }
+
+ translate->set_buffer(translate, 0,
+ vertex_data, stride);
+
+ translate->set_buffer(translate, 1,
+ &draw->rasterizer->point_size,
+ 0);
+
+ translate->run(translate,
+ 0,
+ vertex_count,
+ hw_verts);
+
+ render->draw_arrays(render, start, count);
+
+ render->release_vertices(render,
+ hw_verts,
+ translate->key.output_stride,
+ vertex_count);
+}
+
struct pt_emit *draw_pt_emit_create( struct draw_context *draw )
{
struct pt_emit *emit = CALLOC_STRUCT(pt_emit);
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c
index 1f765b73ad..d7cc1807d7 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -167,6 +167,42 @@ void draw_pt_fetch_run( struct pt_fetch *fetch,
}
+void draw_pt_fetch_run_linear( struct pt_fetch *fetch,
+ unsigned start,
+ unsigned count,
+ char *verts )
+{
+ struct draw_context *draw = fetch->draw;
+ struct translate *translate = fetch->translate;
+ unsigned i;
+
+ for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
+ translate->set_buffer(translate,
+ i,
+ ((char *)draw->pt.user.vbuffer[i] +
+ draw->pt.vertex_buffer[i].buffer_offset),
+ draw->pt.vertex_buffer[i].pitch );
+ }
+
+ translate->run( translate,
+ start,
+ count,
+ verts );
+
+ /* Edgeflags are hard to fit into a translate program, populate
+ * them separately if required. In the setup above they are
+ * defaulted to one, so only need this if there is reason to change
+ * that default:
+ */
+ if (fetch->need_edgeflags) {
+ for (i = 0; i < count; i++) {
+ struct vertex_header *vh = (struct vertex_header *)(verts + i * fetch->vertex_size);
+ vh->edgeflag = draw_pt_get_edgeflag( draw, start + i );
+ }
+ }
+}
+
+
struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw )
{
struct pt_fetch *fetch = CALLOC_STRUCT(pt_fetch);
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index 4ec20493c4..b1e08a8f40 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -162,7 +162,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
fpme->vertex_size,
draw_elts,
draw_count );
- }
+ }
else {
draw_pt_emit( fpme->emit,
(const float (*)[4])pipeline_verts->data,
@@ -177,6 +177,83 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
}
+static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
+ unsigned fetch_start,
+ unsigned fetch_count,
+ const ushort *draw_elts,
+ unsigned draw_count )
+{
+ struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
+ struct draw_context *draw = fpme->draw;
+ struct draw_vertex_shader *shader = draw->vertex_shader;
+ unsigned opt = fpme->opt;
+ unsigned alloc_count = align_int( fetch_count, 4 );
+
+ struct vertex_header *pipeline_verts =
+ (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
+
+ if (!pipeline_verts) {
+ /* Not much we can do here - just skip the rendering.
+ */
+ assert(0);
+ return;
+ }
+
+ /* Fetch into our vertex buffer
+ */
+ draw_pt_fetch_run_linear( fpme->fetch,
+ fetch_start,
+ fetch_count,
+ (char *)pipeline_verts );
+
+ /* Run the shader, note that this overwrites the data[] parts of
+ * the pipeline verts. If there is no shader, ie a bypass shader,
+ * then the inputs == outputs, and are already in the correct
+ * place.
+ */
+ if (opt & PT_SHADE)
+ {
+ shader->run_linear(shader,
+ (const float (*)[4])pipeline_verts->data,
+ ( float (*)[4])pipeline_verts->data,
+ (const float (*)[4])draw->pt.user.constants,
+ fetch_count,
+ fpme->vertex_size,
+ fpme->vertex_size);
+ }
+
+ if (draw_pt_post_vs_run( fpme->post_vs,
+ pipeline_verts,
+ fetch_count,
+ fpme->vertex_size ))
+ {
+ opt |= PT_PIPELINE;
+ }
+
+ /* Do we need to run the pipeline?
+ */
+ if (opt & PT_PIPELINE) {
+ draw_pipeline_run( fpme->draw,
+ fpme->prim,
+ pipeline_verts,
+ fetch_count,
+ fpme->vertex_size,
+ draw_elts,
+ draw_count );
+ }
+ else {
+ draw_pt_emit_linear( fpme->emit,
+ (const float (*)[4])pipeline_verts->data,
+ fetch_count,
+ fpme->vertex_size,
+ 0, /*start*/
+ draw_count );
+ }
+
+ FREE(pipeline_verts);
+}
+
+
static void fetch_pipeline_finish( struct draw_pt_middle_end *middle )
{
@@ -206,10 +283,11 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context *
if (!fpme)
goto fail;
- fpme->base.prepare = fetch_pipeline_prepare;
- fpme->base.run = fetch_pipeline_run;
- fpme->base.finish = fetch_pipeline_finish;
- fpme->base.destroy = fetch_pipeline_destroy;
+ fpme->base.prepare = fetch_pipeline_prepare;
+ fpme->base.run = fetch_pipeline_run;
+ fpme->base.run_linear = fetch_pipeline_linear_run;
+ fpme->base.finish = fetch_pipeline_finish;
+ fpme->base.destroy = fetch_pipeline_destroy;
fpme->draw = draw;
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
index c9843bded0..916373acc8 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray.c
+++ b/src/gallium/auxiliary/draw/draw_pt_varray.c
@@ -75,6 +75,28 @@ static void varray_flush(struct varray_frontend *varray)
varray->draw_count = 0;
}
+static void varray_flush_linear(struct varray_frontend *varray)
+{
+ if (varray->draw_count) {
+ debug_printf("FLUSH LINEAR fc = %d, dc = %d\n",
+ varray->fetch_count,
+ varray->draw_count);
+ debug_printf("\telt0 = %d, eltx = %d, draw0 = %d, drawx = %d\n",
+ varray->fetch_elts[0],
+ varray->fetch_elts[varray->fetch_count-1],
+ varray->draw_elts[0],
+ varray->draw_elts[varray->draw_count-1]);
+ varray->middle->run_linear(varray->middle,
+ varray->fetch_elts[0],
+ varray->fetch_count,
+ varray->draw_elts,
+ varray->draw_count);
+ }
+
+ varray->fetch_count = 0;
+ varray->draw_count = 0;
+}
+
static INLINE void fetch_init(struct varray_frontend *varray,
unsigned count)
{
@@ -265,8 +287,8 @@ static void varray_prepare(struct draw_pt_front_end *frontend,
if (opt & PT_PIPELINE)
{
varray->base.run = varray_run_extras;
- }
- else
+ }
+ else
{
varray->base.run = varray_run;
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
index 073c1aadbf..67baafa3be 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
@@ -17,7 +17,7 @@ static void FUNC(struct draw_pt_front_end *frontend,
split_prim_inplace(varray->input_prim, &first, &incr);
-#if 1
+#if 0
debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
varray->input_prim,
start, count);
@@ -88,7 +88,7 @@ static void FUNC(struct draw_pt_front_end *frontend,
i + 0, i + 1, i + 2);
}
fetch_init(varray, end);
- varray_flush(varray);
+ varray_flush_linear(varray);
}
break;
--
cgit v1.2.3
From ff1fee2cae9fabb47d6a2eb1f9f8094fec3c377f Mon Sep 17 00:00:00 2001
From: Zack Rusin
Date: Sun, 4 May 2008 00:44:27 -0400
Subject: don't fill in linear fetch_elts
---
src/gallium/auxiliary/draw/draw_pt_varray.c | 4 +++-
src/gallium/auxiliary/draw/draw_pt_varray_tmp.h | 3 ++-
2 files changed, 5 insertions(+), 2 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
index 916373acc8..fb1b59d53e 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray.c
+++ b/src/gallium/auxiliary/draw/draw_pt_varray.c
@@ -78,6 +78,7 @@ static void varray_flush(struct varray_frontend *varray)
static void varray_flush_linear(struct varray_frontend *varray)
{
if (varray->draw_count) {
+#if 0
debug_printf("FLUSH LINEAR fc = %d, dc = %d\n",
varray->fetch_count,
varray->draw_count);
@@ -86,8 +87,9 @@ static void varray_flush_linear(struct varray_frontend *varray)
varray->fetch_elts[varray->fetch_count-1],
varray->draw_elts[0],
varray->draw_elts[varray->draw_count-1]);
+#endif
varray->middle->run_linear(varray->middle,
- varray->fetch_elts[0],
+ varray->fetch_start,
varray->fetch_count,
varray->draw_elts,
varray->draw_count);
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
index 67baafa3be..a9f844a357 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
@@ -87,8 +87,9 @@ static void FUNC(struct draw_pt_front_end *frontend,
TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
i + 0, i + 1, i + 2);
}
- fetch_init(varray, end);
+ varray->fetch_count = end;
varray_flush_linear(varray);
+ varray->fetch_start += end;
}
break;
--
cgit v1.2.3
From a24cb269e1ba5434acf8c94abd03517c149b9c51 Mon Sep 17 00:00:00 2001
From: Zack Rusin
Date: Sun, 4 May 2008 01:23:01 -0400
Subject: implement linear path for fetch_emit pipeline
---
src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 64 +++++++++++++++++++++++--
src/gallium/auxiliary/draw/draw_pt_varray.c | 1 +
2 files changed, 61 insertions(+), 4 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index a4de341df8..6d5a54cf0e 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -257,6 +257,61 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
}
+static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
+ unsigned fetch_start,
+ unsigned fetch_count,
+ const ushort *draw_elts,
+ unsigned draw_count )
+{
+ struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
+ struct draw_context *draw = feme->draw;
+ void *hw_verts;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ hw_verts = draw->render->allocate_vertices( draw->render,
+ (ushort)feme->translate->key.output_stride,
+ (ushort)fetch_count );
+ if (!hw_verts) {
+ assert(0);
+ return;
+ }
+
+ /* Single routine to fetch vertices and emit HW verts.
+ */
+ feme->translate->run( feme->translate,
+ fetch_start,
+ fetch_count,
+ hw_verts );
+
+ if (0) {
+ unsigned i;
+ for (i = 0; i < fetch_count; i++) {
+ debug_printf("\n\nvertex %d:\n", i);
+ draw_dump_emitted_vertex( feme->vinfo,
+ (const uint8_t *)hw_verts + feme->vinfo->size * 4 * i );
+ }
+ }
+
+ /* XXX: Draw arrays path to avoid re-emitting index list again and
+ * again.
+ */
+ draw->render->draw_arrays( draw->render,
+ 0, /*start*/
+ draw_count );
+
+ /* Done -- that was easy, wasn't it:
+ */
+ draw->render->release_vertices( draw->render,
+ hw_verts,
+ feme->translate->key.output_stride,
+ fetch_count );
+
+}
+
+
static void fetch_emit_finish( struct draw_pt_middle_end *middle )
{
@@ -285,10 +340,11 @@ struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw )
return NULL;
}
- fetch_emit->base.prepare = fetch_emit_prepare;
- fetch_emit->base.run = fetch_emit_run;
- fetch_emit->base.finish = fetch_emit_finish;
- fetch_emit->base.destroy = fetch_emit_destroy;
+ fetch_emit->base.prepare = fetch_emit_prepare;
+ fetch_emit->base.run = fetch_emit_run;
+ fetch_emit->base.run_linear = fetch_emit_run_linear;
+ fetch_emit->base.finish = fetch_emit_finish;
+ fetch_emit->base.destroy = fetch_emit_destroy;
fetch_emit->draw = draw;
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
index fb1b59d53e..e7e21e4bf6 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray.c
+++ b/src/gallium/auxiliary/draw/draw_pt_varray.c
@@ -88,6 +88,7 @@ static void varray_flush_linear(struct varray_frontend *varray)
varray->draw_elts[0],
varray->draw_elts[varray->draw_count-1]);
#endif
+ assert(varray->middle->run_linear);
varray->middle->run_linear(varray->middle,
varray->fetch_start,
varray->fetch_count,
--
cgit v1.2.3
From 66d72f176de2568f053c6dc54e93d423723ae8aa Mon Sep 17 00:00:00 2001
From: Zack Rusin
Date: Sun, 4 May 2008 01:37:32 -0400
Subject: silence debugging output
---
src/gallium/auxiliary/draw/draw_pt_emit.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index 2a961b7088..776ca32cfa 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -191,7 +191,9 @@ void draw_pt_emit_linear(struct pt_emit *emit,
struct vbuf_render *render = draw->render;
void *hw_verts;
- debug_printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\n");
+#if 0
+ debug_printf("Linear emit\n");
+#endif
/* XXX: need to flush to get prim_vbuf.c to release its allocation??
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
--
cgit v1.2.3
From e897fd6cd35c6b9e398e1903d2e79678fe85708a Mon Sep 17 00:00:00 2001
From: Zack Rusin
Date: Mon, 5 May 2008 12:49:40 -0400
Subject: fix the regressions
---
src/gallium/auxiliary/draw/draw_pt_varray_tmp.h | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
index a9f844a357..10ac08ea30 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
@@ -28,9 +28,10 @@ static void FUNC(struct draw_pt_front_end *frontend,
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
- for (i = 0; i < count; i ++) {
+ for (i = 0; i < count; i++) {
POINT(varray, i + 0);
}
+ i = end;
fetch_init(varray, end);
varray_flush(varray);
}
@@ -44,6 +45,7 @@ static void FUNC(struct draw_pt_front_end *frontend,
LINE(varray, DRAW_PIPE_RESET_STIPPLE,
i + 0, i + 1);
}
+ i = end;
fetch_init(varray, end);
varray_flush(varray);
}
@@ -60,6 +62,7 @@ static void FUNC(struct draw_pt_front_end *frontend,
LINE(varray, flags, i - 1, i);
}
LINE(varray, flags, i - 1, 0);
+ i = end;
fetch_init(varray, end);
varray_flush(varray);
}
@@ -74,6 +77,7 @@ static void FUNC(struct draw_pt_front_end *frontend,
for (i = 1; i < end; i++, flags = 0) {
LINE(varray, flags, i - 1, i);
}
+ i = end;
fetch_init(varray, end);
varray_flush(varray);
}
@@ -87,6 +91,7 @@ static void FUNC(struct draw_pt_front_end *frontend,
TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
i + 0, i + 1, i + 2);
}
+ i = end;
varray->fetch_count = end;
varray_flush_linear(varray);
varray->fetch_start += end;
@@ -102,6 +107,7 @@ static void FUNC(struct draw_pt_front_end *frontend,
TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
i + 0, i + 1 + (i&1), i + 2 - (i&1));
}
+ i = end;
fetch_init(varray, end);
varray_flush(varray);
}
@@ -114,6 +120,7 @@ static void FUNC(struct draw_pt_front_end *frontend,
TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
i + 0 + (i&1), i + 1 - (i&1), i + 2);
}
+ i = end;
fetch_init(varray, end);
varray_flush(varray);
}
@@ -130,6 +137,7 @@ static void FUNC(struct draw_pt_front_end *frontend,
for (i = 0; i+2 < end; i++) {
TRIANGLE(varray, flags, i + 1, i + 2, 0);
}
+ i = end;
fetch_init(varray, end);
varray_flush(varray);
}
@@ -142,6 +150,7 @@ static void FUNC(struct draw_pt_front_end *frontend,
for (i = 0; i+2 < end; i++) {
TRIANGLE(varray, flags, 0, i + 1, i + 2);
}
+ i = end;
fetch_init(varray, end);
varray_flush(varray);
}
@@ -156,6 +165,7 @@ static void FUNC(struct draw_pt_front_end *frontend,
for (i = 0; i+3 < end; i += 4) {
QUAD(varray, i + 0, i + 1, i + 2, i + 3);
}
+ i = end;
fetch_init(varray, end);
varray_flush(varray);
}
@@ -168,6 +178,7 @@ static void FUNC(struct draw_pt_front_end *frontend,
for (i = 0; i+3 < end; i += 2) {
QUAD(varray, i + 2, i + 0, i + 1, i + 3);
}
+ i = end;
fetch_init(varray, end);
varray_flush(varray);
}
@@ -193,6 +204,7 @@ static void FUNC(struct draw_pt_front_end *frontend,
TRIANGLE(varray, flags, i + 1, i + 2, 0);
}
+ i = end;
fetch_init(varray, end);
varray_flush(varray);
}
--
cgit v1.2.3
From fe586f8612dd517b9a1f0d87fbaf3a75e3caf588 Mon Sep 17 00:00:00 2001
From: Zack Rusin
Date: Tue, 6 May 2008 18:59:45 -0400
Subject: redo the linear paths
---
src/gallium/auxiliary/draw/draw_pipe.c | 39 ++++
src/gallium/auxiliary/draw/draw_private.h | 6 +
src/gallium/auxiliary/draw/draw_pt.h | 6 +-
src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 18 +-
.../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 36 ++--
src/gallium/auxiliary/draw/draw_pt_varray.c | 27 +--
src/gallium/auxiliary/draw/draw_pt_varray_tmp.h | 5 +-
.../auxiliary/draw/draw_pt_varray_tmp_linear.h | 198 +++++++++++++++++++++
8 files changed, 279 insertions(+), 56 deletions(-)
create mode 100644 src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c
index 46afb0f41f..cb97f955b2 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.c
+++ b/src/gallium/auxiliary/draw/draw_pipe.c
@@ -212,6 +212,45 @@ void draw_pipeline_run( struct draw_context *draw,
draw->pipeline.vertex_count = 0;
}
+void draw_pipeline_run_linear( struct draw_context *draw,
+ unsigned prim,
+ struct vertex_header *vertices,
+ unsigned count,
+ unsigned stride )
+{
+ char *verts = (char *)vertices;
+ unsigned i;
+
+ draw->pipeline.verts = verts;
+ draw->pipeline.vertex_stride = stride;
+ draw->pipeline.vertex_count = count;
+
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ for (i = 0; i < count; i++)
+ do_point( draw,
+ verts + stride * i );
+ break;
+ case PIPE_PRIM_LINES:
+ for (i = 0; i+1 < count; i += 2)
+ do_line( draw,
+ i+0, /* flags */
+ verts + stride * ((i+0) & ~DRAW_PIPE_FLAG_MASK),
+ verts + stride * (i+1));
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ for (i = 0; i+2 < count; i += 3)
+ do_triangle( draw,
+ (i+0), /* flags */
+ verts + stride * ((i+0) & ~DRAW_PIPE_FLAG_MASK),
+ verts + stride * (i+1),
+ verts + stride * (i+2));
+ break;
+ }
+
+ draw->pipeline.verts = NULL;
+ draw->pipeline.vertex_count = 0;
+}
void draw_pipeline_flush( struct draw_context *draw,
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index cee58bbf73..e036d498b8 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -247,6 +247,12 @@ void draw_pipeline_run( struct draw_context *draw,
const ushort *elts,
unsigned count );
+void draw_pipeline_run_linear( struct draw_context *draw,
+ unsigned prim,
+ struct vertex_header *vertices,
+ unsigned count,
+ unsigned stride );
+
void draw_pipeline_flush( struct draw_context *draw,
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index 2f96ceaf00..312fdbe4f4 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -93,10 +93,8 @@ struct draw_pt_middle_end {
unsigned draw_count );
void (*run_linear)(struct draw_pt_middle_end *,
- unsigned fetch_start,
- unsigned fetch_count,
- const ushort *draw_elts,
- unsigned draw_count);
+ unsigned start,
+ unsigned count);
void (*finish)( struct draw_pt_middle_end * );
void (*destroy)( struct draw_pt_middle_end * );
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index 6d5a54cf0e..8df4241b82 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -258,10 +258,8 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
- unsigned fetch_start,
- unsigned fetch_count,
- const ushort *draw_elts,
- unsigned draw_count )
+ unsigned start,
+ unsigned count )
{
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
struct draw_context *draw = feme->draw;
@@ -273,7 +271,7 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
hw_verts = draw->render->allocate_vertices( draw->render,
(ushort)feme->translate->key.output_stride,
- (ushort)fetch_count );
+ (ushort)count );
if (!hw_verts) {
assert(0);
return;
@@ -282,13 +280,13 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
/* Single routine to fetch vertices and emit HW verts.
*/
feme->translate->run( feme->translate,
- fetch_start,
- fetch_count,
+ start,
+ count,
hw_verts );
if (0) {
unsigned i;
- for (i = 0; i < fetch_count; i++) {
+ for (i = 0; i < count; i++) {
debug_printf("\n\nvertex %d:\n", i);
draw_dump_emitted_vertex( feme->vinfo,
(const uint8_t *)hw_verts + feme->vinfo->size * 4 * i );
@@ -300,14 +298,14 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
*/
draw->render->draw_arrays( draw->render,
0, /*start*/
- draw_count );
+ count );
/* Done -- that was easy, wasn't it:
*/
draw->render->release_vertices( draw->render,
hw_verts,
feme->translate->key.output_stride,
- fetch_count );
+ count );
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index b1e08a8f40..dad54690a5 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -178,18 +178,16 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
- unsigned fetch_start,
- unsigned fetch_count,
- const ushort *draw_elts,
- unsigned draw_count )
+ unsigned start,
+ unsigned count)
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_context *draw = fpme->draw;
struct draw_vertex_shader *shader = draw->vertex_shader;
unsigned opt = fpme->opt;
- unsigned alloc_count = align_int( fetch_count, 4 );
+ unsigned alloc_count = align_int( count, 4 );
- struct vertex_header *pipeline_verts =
+ struct vertex_header *pipeline_verts =
(struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
if (!pipeline_verts) {
@@ -202,8 +200,8 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
/* Fetch into our vertex buffer
*/
draw_pt_fetch_run_linear( fpme->fetch,
- fetch_start,
- fetch_count,
+ start,
+ count,
(char *)pipeline_verts );
/* Run the shader, note that this overwrites the data[] parts of
@@ -213,18 +211,18 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
*/
if (opt & PT_SHADE)
{
- shader->run_linear(shader,
+ shader->run_linear(shader,
(const float (*)[4])pipeline_verts->data,
( float (*)[4])pipeline_verts->data,
(const float (*)[4])draw->pt.user.constants,
- fetch_count,
+ count,
fpme->vertex_size,
fpme->vertex_size);
}
if (draw_pt_post_vs_run( fpme->post_vs,
pipeline_verts,
- fetch_count,
+ count,
fpme->vertex_size ))
{
opt |= PT_PIPELINE;
@@ -233,21 +231,19 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
/* Do we need to run the pipeline?
*/
if (opt & PT_PIPELINE) {
- draw_pipeline_run( fpme->draw,
- fpme->prim,
- pipeline_verts,
- fetch_count,
- fpme->vertex_size,
- draw_elts,
- draw_count );
+ draw_pipeline_run_linear( fpme->draw,
+ fpme->prim,
+ pipeline_verts,
+ count,
+ fpme->vertex_size);
}
else {
draw_pt_emit_linear( fpme->emit,
(const float (*)[4])pipeline_verts->data,
- fetch_count,
+ count,
fpme->vertex_size,
0, /*start*/
- draw_count );
+ count );
}
FREE(pipeline_verts);
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
index e7e21e4bf6..59a9569270 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray.c
+++ b/src/gallium/auxiliary/draw/draw_pt_varray.c
@@ -75,29 +75,18 @@ static void varray_flush(struct varray_frontend *varray)
varray->draw_count = 0;
}
-static void varray_flush_linear(struct varray_frontend *varray)
+static void varray_flush_linear(struct varray_frontend *varray,
+ unsigned start, unsigned count)
{
- if (varray->draw_count) {
+ if (count) {
#if 0
- debug_printf("FLUSH LINEAR fc = %d, dc = %d\n",
- varray->fetch_count,
- varray->draw_count);
- debug_printf("\telt0 = %d, eltx = %d, draw0 = %d, drawx = %d\n",
- varray->fetch_elts[0],
- varray->fetch_elts[varray->fetch_count-1],
- varray->draw_elts[0],
- varray->draw_elts[varray->draw_count-1]);
+ debug_printf("FLUSH LINEAR start = %d, count = %d\n",
+ start,
+ count);
#endif
assert(varray->middle->run_linear);
- varray->middle->run_linear(varray->middle,
- varray->fetch_start,
- varray->fetch_count,
- varray->draw_elts,
- varray->draw_count);
+ varray->middle->run_linear(varray->middle, start, count);
}
-
- varray->fetch_count = 0;
- varray->draw_count = 0;
}
static INLINE void fetch_init(struct varray_frontend *varray,
@@ -261,7 +250,7 @@ static INLINE void varray_ef_quad( struct varray_frontend *varray,
#define LINE(vc,flags,i0,i1) varray_line(vc,i0,i1)
#define POINT(vc,i0) varray_point(vc,i0)
#define FUNC varray_run
-#include "draw_pt_varray_tmp.h"
+#include "draw_pt_varray_tmp_linear.h"
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
index 10ac08ea30..335c4c89ca 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
@@ -92,9 +92,8 @@ static void FUNC(struct draw_pt_front_end *frontend,
i + 0, i + 1, i + 2);
}
i = end;
- varray->fetch_count = end;
- varray_flush_linear(varray);
- varray->fetch_start += end;
+ fetch_init(varray, end);
+ varray_flush(varray);
}
break;
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
new file mode 100644
index 0000000000..dfa4338407
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
@@ -0,0 +1,198 @@
+
+static void FUNC(struct draw_pt_front_end *frontend,
+ pt_elt_func get_elt,
+ const void *elts,
+ unsigned count)
+{
+ struct varray_frontend *varray = (struct varray_frontend *)frontend;
+ struct draw_context *draw = varray->draw;
+ unsigned start = (unsigned)elts;
+
+ boolean flatfirst = (draw->rasterizer->flatshade &&
+ draw->rasterizer->flatshade_first);
+ unsigned i, j, flags;
+ unsigned first, incr;
+
+ varray->fetch_start = start;
+
+ split_prim_inplace(varray->input_prim, &first, &incr);
+
+#if 0
+ debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
+ varray->input_prim,
+ start, count);
+#endif
+
+ switch (varray->input_prim) {
+ case PIPE_PRIM_POINTS:
+ case PIPE_PRIM_LINES:
+ case PIPE_PRIM_TRIANGLES:
+ j = 0;
+ while (j + first <= count) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ varray_flush_linear(varray, start + j, end);
+ j += end;
+ }
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ if (count >= 2) {
+ flags = DRAW_PIPE_RESET_STIPPLE;
+
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 1; i < end; i++, flags = 0) {
+ LINE(varray, flags, i - 1, i);
+ }
+ LINE(varray, flags, i - 1, 0);
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
+ }
+ }
+ break;
+
+ case PIPE_PRIM_LINE_STRIP:
+ flags = DRAW_PIPE_RESET_STIPPLE;
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 1; i < end; i++, flags = 0) {
+ LINE(varray, flags, i - 1, i);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ if (flatfirst) {
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+2 < end; i++) {
+ TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ i + 0, i + 1 + (i&1), i + 2 - (i&1));
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
+ }
+ }
+ else {
+ for (j = 0; j + first <= count;) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ //end -= (end % incr);
+ for (i = 0; i+2 < end; i++) {
+ TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ i + 0 + (i&1), i + 1 - (i&1), i + 2);
+ }
+ fetch_init(varray, end);
+ varray_flush(varray);
+ j += end;
+ if (j <= count)
+ j -= incr;
+ }
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ if (count >= 3) {
+ if (flatfirst) {
+ flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+2 < end; i++) {
+ TRIANGLE(varray, flags, i + 1, i + 2, 0);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
+ }
+ }
+ else {
+ flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+2 < end; i++) {
+ TRIANGLE(varray, flags, 0, i + 1, i + 2);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
+ }
+ }
+ }
+ break;
+
+ case PIPE_PRIM_QUADS:
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+3 < end; i += 4) {
+ QUAD(varray, i + 0, i + 1, i + 2, i + 3);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
+ }
+ break;
+
+ case PIPE_PRIM_QUAD_STRIP:
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+3 < end; i += 2) {
+ QUAD(varray, i + 2, i + 0, i + 1, i + 3);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
+ }
+ break;
+
+ case PIPE_PRIM_POLYGON:
+ {
+ /* These bitflags look a little odd because we submit the
+ * vertices as (1,2,0) to satisfy flatshade requirements.
+ */
+ const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2;
+ const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0;
+ const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1;
+
+ flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
+ for (j = 0; j + first <= count; j += i) {
+ unsigned end = MIN2(FETCH_MAX, count - j);
+ end -= (end % incr);
+ for (i = 0; i+2 < end; i++, flags = edge_middle) {
+
+ if (i + 3 == count)
+ flags |= edge_last;
+
+ TRIANGLE(varray, flags, i + 1, i + 2, 0);
+ }
+ i = end;
+ fetch_init(varray, end);
+ varray_flush(varray);
+ }
+ }
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ varray_flush(varray);
+}
+
+#undef TRIANGLE
+#undef QUAD
+#undef POINT
+#undef LINE
+#undef FUNC
--
cgit v1.2.3
From 22323af525d00022a1fa06fab7ee84df5ef2d1f0 Mon Sep 17 00:00:00 2001
From: Zack Rusin
Date: Wed, 7 May 2008 19:34:12 -0400
Subject: fix silly mistakes
---
src/gallium/auxiliary/draw/draw_pt_varray_tmp.h | 2 +-
src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h | 4 +---
2 files changed, 2 insertions(+), 4 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
index 335c4c89ca..fb49452d8b 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
@@ -28,7 +28,7 @@ static void FUNC(struct draw_pt_front_end *frontend,
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
- for (i = 0; i < count; i++) {
+ for (i = 0; i < end; i++) {
POINT(varray, i + 0);
}
i = end;
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
index dfa4338407..ab28859c35 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
@@ -85,7 +85,7 @@ static void FUNC(struct draw_pt_front_end *frontend,
else {
for (j = 0; j + first <= count;) {
unsigned end = MIN2(FETCH_MAX, count - j);
- //end -= (end % incr);
+ end -= (end % incr);
for (i = 0; i+2 < end; i++) {
TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
i + 0 + (i&1), i + 1 - (i&1), i + 2);
@@ -93,8 +93,6 @@ static void FUNC(struct draw_pt_front_end *frontend,
fetch_init(varray, end);
varray_flush(varray);
j += end;
- if (j <= count)
- j -= incr;
}
}
break;
--
cgit v1.2.3
From 8d709ae1595047b45a81f2fbd22850887fdbfea0 Mon Sep 17 00:00:00 2001
From: Zack Rusin
Date: Thu, 8 May 2008 12:10:24 -0400
Subject: fix triangle strips
---
src/gallium/auxiliary/draw/draw_pt_varray_tmp.h | 12 ++++++++++--
src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h | 12 ++++++++++--
2 files changed, 20 insertions(+), 4 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
index fb49452d8b..d137a758e2 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
@@ -109,19 +109,27 @@ static void FUNC(struct draw_pt_front_end *frontend,
i = end;
fetch_init(varray, end);
varray_flush(varray);
+ if (j + first + i <= count) {
+ varray->fetch_start -= 2;
+ i -= 2;
+ }
}
}
else {
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++) {
+ end -= (end % incr);
+ for (i = 0; i + 2 < end; i++) {
TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
i + 0 + (i&1), i + 1 - (i&1), i + 2);
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
+ if (j + first + i <= count) {
+ varray->fetch_start -= 2;
+ i -= 2;
+ }
}
}
break;
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
index ab28859c35..4bf04fa62b 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
@@ -80,19 +80,27 @@ static void FUNC(struct draw_pt_front_end *frontend,
i = end;
fetch_init(varray, end);
varray_flush(varray);
+ if (j + first + i <= count) {
+ varray->fetch_start -= 2;
+ i -= 2;
+ }
}
}
else {
- for (j = 0; j + first <= count;) {
+ for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
for (i = 0; i+2 < end; i++) {
TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
i + 0 + (i&1), i + 1 - (i&1), i + 2);
}
+ i = end;
fetch_init(varray, end);
varray_flush(varray);
- j += end;
+ if (j + first + i <= count) {
+ varray->fetch_start -= 2;
+ i -= 2;
+ }
}
}
break;
--
cgit v1.2.3
From 8ea6106f01f38853e9c0f1029da55eb449109aea Mon Sep 17 00:00:00 2001
From: Zack Rusin
Date: Thu, 8 May 2008 15:11:16 -0400
Subject: fix quad strips
---
src/gallium/auxiliary/draw/draw_pt_varray_tmp.h | 4 ++++
src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h | 4 ++++
2 files changed, 8 insertions(+)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
index d137a758e2..1395275897 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
@@ -188,6 +188,10 @@ static void FUNC(struct draw_pt_front_end *frontend,
i = end;
fetch_init(varray, end);
varray_flush(varray);
+ if (j + first + i <= count) {
+ varray->fetch_start -= 2;
+ i -= 2;
+ }
}
break;
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
index 4bf04fa62b..6e2b16d9be 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
@@ -159,6 +159,10 @@ static void FUNC(struct draw_pt_front_end *frontend,
i = end;
fetch_init(varray, end);
varray_flush(varray);
+ if (j + first + i <= count) {
+ varray->fetch_start -= 2;
+ i -= 2;
+ }
}
break;
--
cgit v1.2.3
From 501be9c7dd0cc5f985c708fa0e5f35d7fd20deb4 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Fri, 9 May 2008 15:02:59 +0100
Subject: draw: fix translate double-free, minor cleanups
---
src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 3 ---
src/gallium/auxiliary/draw/draw_pt_fetch.c | 3 ++-
src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 3 ++-
3 files changed, 4 insertions(+), 5 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index 2a19e6916a..64a9f9084f 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -427,9 +427,6 @@ static void vbuf_destroy( struct draw_stage *stage )
if(vbuf->indices)
align_free( vbuf->indices );
- if(vbuf->translate)
- vbuf->translate->release( vbuf->translate );
-
if (vbuf->render)
vbuf->render->destroy( vbuf->render );
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c
index d62cd9358b..034e0eccb2 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -223,7 +223,8 @@ struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw )
void draw_pt_fetch_destroy( struct pt_fetch *fetch )
{
- translate_cache_destroy(fetch->cache);
+ if (fetch->cache)
+ translate_cache_destroy(fetch->cache);
FREE(fetch);
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index 8df4241b82..bdbb039f9e 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -320,7 +320,8 @@ static void fetch_emit_destroy( struct draw_pt_middle_end *middle )
{
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
- translate_cache_destroy(feme->cache);
+ if (feme->cache)
+ translate_cache_destroy(feme->cache);
FREE(middle);
}
--
cgit v1.2.3
From c0a6040f568e0c9be07797b2dc2fdd8a3624ec34 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Fri, 9 May 2008 13:09:58 +0100
Subject: translate: helper functions for mimizing cost of key compares
---
src/gallium/auxiliary/translate/translate.h | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
(limited to 'src')
diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h
index de6f09d18a..b8210af50c 100644
--- a/src/gallium/auxiliary/translate/translate.h
+++ b/src/gallium/auxiliary/translate/translate.h
@@ -95,6 +95,27 @@ struct translate *translate_lookup_or_create( struct translate_context *tctx,
struct translate *translate_create( const struct translate_key *key );
+static INLINE int translate_keysize( const struct translate_key *key )
+{
+ return 2 * sizeof(int) + key->nr_elements * sizeof(struct translate_element);
+}
+
+static INLINE int translate_key_compare( const struct translate_key *a,
+ const struct translate_key *b )
+{
+ int keysize = translate_keysize(a);
+ return memcmp(a, b, keysize);
+}
+
+
+static INLINE void translate_key_sanitize( struct translate_key *a )
+{
+ int keysize = translate_keysize(a);
+ char *ptr = (char *)a;
+ memset(ptr + keysize, 0, sizeof(*a) - keysize);
+}
+
+
/*******************************************************************************
* Private:
*/
--
cgit v1.2.3
From 7ddb925b8bc6c18eba953e34d2b630a3a6593f05 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Fri, 9 May 2008 13:10:15 +0100
Subject: draw: mimize cost of translate key compares, use cache universally
---
src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 20 ++++++++++++++------
src/gallium/auxiliary/draw/draw_pt_emit.c | 10 +++++-----
src/gallium/auxiliary/draw/draw_pt_fetch.c | 7 ++-----
src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 3 ++-
4 files changed, 23 insertions(+), 17 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index 64a9f9084f..67b9a9503d 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -42,6 +42,7 @@
#include "draw_vertex.h"
#include "draw_pipe.h"
#include "translate/translate.h"
+#include "translate/translate_cache.h"
/**
@@ -75,6 +76,8 @@ struct vbuf_stage {
/* Cache point size somewhere it's address won't change:
*/
float point_size;
+
+ struct translate_cache *cache;
};
@@ -220,7 +223,6 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint prim )
/* Translate from pipeline vertices to hw vertices.
*/
dst_offset = 0;
- memset(&hw_key, 0, sizeof(hw_key));
for (i = 0; i < vbuf->vinfo->num_attribs; i++) {
unsigned emit_sz = 0;
@@ -277,12 +279,10 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint prim )
/* Don't bother with caching at this stage:
*/
if (!vbuf->translate ||
- memcmp(&vbuf->translate->key, &hw_key, sizeof(hw_key)) != 0)
+ translate_key_compare(&vbuf->translate->key, &hw_key) != 0)
{
- if (vbuf->translate)
- vbuf->translate->release(vbuf->translate);
-
- vbuf->translate = translate_create( &hw_key );
+ translate_key_sanitize(&hw_key);
+ vbuf->translate = translate_cache_find(vbuf->cache, &hw_key);
vbuf->translate->set_buffer(vbuf->translate, 1, &vbuf->point_size, 0);
}
@@ -430,6 +430,9 @@ static void vbuf_destroy( struct draw_stage *stage )
if (vbuf->render)
vbuf->render->destroy( vbuf->render );
+ if (vbuf->cache)
+ translate_cache_destroy(vbuf->cache);
+
FREE( stage );
}
@@ -460,6 +463,11 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw,
16 );
if (!vbuf->indices)
goto fail;
+
+ vbuf->cache = translate_cache_create();
+ if (!vbuf->cache)
+ goto fail;
+
vbuf->vertices = NULL;
vbuf->vertex_ptr = vbuf->vertices;
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index 999b2007a2..4a854f4362 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -49,7 +49,6 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
const struct vertex_info *vinfo;
unsigned dst_offset;
struct translate_key hw_key;
- unsigned keysize;
unsigned i;
boolean ok;
@@ -62,7 +61,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
/* Must do this after set_primitive() above:
*/
vinfo = draw->render->get_vertex_info(draw->render);
- keysize = 2*4 + vinfo->num_attribs * sizeof(hw_key.element[0]);
+
/* Translate from pipeline vertices to hw vertices.
*/
@@ -121,9 +120,9 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
hw_key.output_stride = vinfo->size * 4;
if (!emit->translate ||
- memcmp(&emit->translate->key, &hw_key, keysize) != 0)
+ translate_key_compare(&emit->translate->key, &hw_key) != 0)
{
- memset((char *)&hw_key + keysize, 0, sizeof(hw_key) - keysize);
+ translate_key_sanitize(&hw_key);
emit->translate = translate_cache_find(emit->cache, &hw_key);
}
}
@@ -244,7 +243,8 @@ struct pt_emit *draw_pt_emit_create( struct draw_context *draw )
void draw_pt_emit_destroy( struct pt_emit *emit )
{
- translate_cache_destroy(emit->cache);
+ if (emit->cache)
+ translate_cache_destroy(emit->cache);
FREE(emit);
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c
index 034e0eccb2..07f4c99164 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -62,11 +62,8 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch,
unsigned i, nr = 0;
unsigned dst_offset = 0;
struct translate_key key;
- unsigned keysize;
fetch->vertex_size = vertex_size;
- keysize = (2*4 +
- (draw->pt.nr_vertex_elements + 1) * sizeof(key.element[0]));
/* Always emit/leave space for a vertex header.
*
@@ -111,9 +108,9 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch,
if (!fetch->translate ||
- memcmp(&fetch->translate->key, &key, keysize) != 0)
+ translate_key_compare(&fetch->translate->key, &key) != 0)
{
- memset((char *)&key + keysize, 0, sizeof(key) - keysize);
+ translate_key_sanitize(&key);
fetch->translate = translate_cache_find(fetch->cache, &key);
{
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index bdbb039f9e..a1d041a74f 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -174,8 +174,9 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
/* Don't bother with caching at this stage:
*/
if (!feme->translate ||
- memcmp(&feme->translate->key, &key, sizeof(key)) != 0)
+ translate_key_compare(&feme->translate->key, &key) != 0)
{
+ translate_key_sanitize(&key);
feme->translate = translate_cache_find(feme->cache,
&key);
--
cgit v1.2.3
From 2258f6b437705860912be300e728efbde60e2140 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Mon, 12 May 2008 14:09:50 +0100
Subject: xlib: add failure paths for context creation
---
src/gallium/winsys/xlib/xm_api.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
(limited to 'src')
diff --git a/src/gallium/winsys/xlib/xm_api.c b/src/gallium/winsys/xlib/xm_api.c
index 26b722f343..8a32c54349 100644
--- a/src/gallium/winsys/xlib/xm_api.c
+++ b/src/gallium/winsys/xlib/xm_api.c
@@ -797,8 +797,14 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list )
pipe = xmesa_create_i965simple(xmesa_get_pipe_winsys_aub(v));
}
+ if (pipe == NULL)
+ goto fail;
+
c->st = st_create_context(pipe, &v->mesa_visual,
share_list ? share_list->st : NULL);
+ if (c->st == NULL)
+ goto fail;
+
mesaCtx = c->st->ctx;
c->st->ctx->DriverCtx = c;
@@ -818,6 +824,14 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list )
#endif
return c;
+
+ fail:
+ if (c->st)
+ st_destroy_context(c->st);
+ if (pipe)
+ pipe->destroy(pipe);
+ FREE(c);
+ return NULL;
}
--
cgit v1.2.3
From de818835de70961602bb9ceca86b98e9bbc63fc1 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Mon, 12 May 2008 14:10:03 +0100
Subject: softpipe: add failure paths for context creation
---
src/gallium/drivers/softpipe/sp_context.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
(limited to 'src')
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index fe9cd8375e..a48e546139 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -88,7 +88,8 @@ static void softpipe_destroy( struct pipe_context *pipe )
struct pipe_winsys *ws = pipe->winsys;
uint i;
- draw_destroy( softpipe->draw );
+ if (softpipe->draw)
+ draw_destroy( softpipe->draw );
softpipe->quad.polygon_stipple->destroy( softpipe->quad.polygon_stipple );
softpipe->quad.earlyz->destroy( softpipe->quad.earlyz );
@@ -216,8 +217,12 @@ softpipe_create( struct pipe_screen *screen,
* Create drawing context and plug our rendering stage into it.
*/
softpipe->draw = draw_create();
- assert(softpipe->draw);
+ if (!softpipe->draw)
+ goto fail;
+
softpipe->setup = sp_draw_render_stage(softpipe);
+ if (!softpipe->setup)
+ goto fail;
if (GETENV( "SP_NO_RAST" ) != NULL)
softpipe->no_rast = TRUE;
@@ -241,4 +246,8 @@ softpipe_create( struct pipe_screen *screen,
sp_init_surface_functions(softpipe);
return &softpipe->pipe;
+
+ fail:
+ softpipe_destroy(&softpipe->pipe);
+ return NULL;
}
--
cgit v1.2.3
From b5e5369da5fc50d63a6ece931fac44b555eb0314 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Mon, 12 May 2008 15:20:38 +0100
Subject: draw: add fetch-shade-emit path
Enable with TEST_FSE=t. Performs fetch from API-provided vertex buffers,
transformation with one of three (two working) hard-coded shaders, and
final emit to hardware vertices all in a single pass.
Currently only really useful for profiling in conjunction with SP_NO_RAST=t.
---
src/gallium/auxiliary/draw/Makefile | 1 +
src/gallium/auxiliary/draw/draw_private.h | 3 +
src/gallium/auxiliary/draw/draw_pt.c | 25 +-
src/gallium/auxiliary/draw/draw_pt.h | 3 +
.../auxiliary/draw/draw_pt_fetch_shade_emit.c | 714 +++++++++++++++++++++
5 files changed, 742 insertions(+), 4 deletions(-)
create mode 100644 src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile
index da7eded21f..68e7744cc5 100644
--- a/src/gallium/auxiliary/draw/Makefile
+++ b/src/gallium/auxiliary/draw/Makefile
@@ -26,6 +26,7 @@ C_SOURCES = \
draw_pt_emit.c \
draw_pt_fetch.c \
draw_pt_fetch_emit.c \
+ draw_pt_fetch_shade_emit.c \
draw_pt_fetch_shade_pipeline.c \
draw_pt_post_vs.c \
draw_pt_varray.c \
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index e036d498b8..cbe64cd290 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -124,12 +124,14 @@ struct draw_context
struct {
struct {
struct draw_pt_middle_end *fetch_emit;
+ /*struct draw_pt_middle_end *fetch_shade_emit;*/
struct draw_pt_middle_end *general;
} middle;
struct {
struct draw_pt_front_end *vcache;
struct draw_pt_front_end *varray;
+ struct draw_pt_front_end *fetch_shade_emit; /* temp hack */
} front;
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
@@ -154,6 +156,7 @@ struct draw_context
const void *constants;
} user;
+ boolean test_fse;
} pt;
struct {
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index bccde6c5fd..448deef98c 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -64,7 +64,7 @@ draw_pt_arrays(struct draw_context *draw,
opt |= PT_PIPELINE;
}
- if (!draw->bypass_clipping) {
+ if (!draw->bypass_clipping && !draw->pt.test_fse) {
opt |= PT_CLIPTEST;
}
@@ -72,16 +72,20 @@ draw_pt_arrays(struct draw_context *draw,
opt |= PT_SHADE;
}
- if (opt)
- middle = draw->pt.middle.general;
- else
+
+ if (opt == 0)
middle = draw->pt.middle.fetch_emit;
+ else
+ middle = draw->pt.middle.general;
/* Pick the right frontend
*/
if (draw->pt.user.elts) {
frontend = draw->pt.front.vcache;
+ } else if (opt == PT_SHADE && draw->pt.test_fse) {
+ /* should be a middle end.. */
+ frontend = draw->pt.front.fetch_shade_emit;
} else {
frontend = draw->pt.front.varray;
}
@@ -113,6 +117,14 @@ boolean draw_pt_init( struct draw_context *draw )
if (!draw->pt.middle.fetch_emit)
return FALSE;
+ draw->pt.test_fse = GETENV("DRAW_FSE") != NULL;
+ if (draw->pt.test_fse) {
+ draw->pt.front.fetch_shade_emit = draw_pt_fetch_shade_emit( draw );
+ if (!draw->pt.front.fetch_shade_emit)
+ return FALSE;
+ }
+
+
draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw );
if (!draw->pt.middle.general)
return FALSE;
@@ -133,6 +145,11 @@ void draw_pt_destroy( struct draw_context *draw )
draw->pt.middle.fetch_emit = NULL;
}
+ if (draw->pt.front.fetch_shade_emit) {
+ draw->pt.front.fetch_shade_emit->destroy( draw->pt.front.fetch_shade_emit );
+ draw->pt.front.fetch_shade_emit = NULL;
+ }
+
if (draw->pt.front.vcache) {
draw->pt.front.vcache->destroy( draw->pt.front.vcache );
draw->pt.front.vcache = NULL;
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index 312fdbe4f4..bcd89f6bd6 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -121,6 +121,8 @@ const void *draw_pt_elt_ptr( struct draw_context *draw,
struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw );
struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw);
+struct draw_pt_front_end *draw_pt_fetch_shade_emit( struct draw_context *draw );
+
/* Middle-ends:
*
* Currently one general-purpose case which can do all possibilities,
@@ -132,6 +134,7 @@ struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw);
* vertex_elements.
*/
struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw );
+//struct draw_pt_middle_end *draw_pt_fetch_shade_emit( struct draw_context *draw );
struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw);
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
new file mode 100644
index 0000000000..9e1d1add36
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -0,0 +1,714 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell
+ */
+
+
+#include "pipe/p_util.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pt.h"
+#include "draw/draw_vs.h"
+
+#include "translate/translate.h"
+
+struct fetch_shade_emit;
+
+struct fse_shader {
+ struct translate_key key;
+
+ void (*run_linear)( const struct fetch_shade_emit *fse,
+ unsigned start,
+ unsigned count,
+ char *buffer );
+};
+
+/* Prototype fetch, shade, emit-hw-verts all in one go.
+ */
+struct fetch_shade_emit {
+ struct draw_pt_front_end base;
+
+ struct draw_context *draw;
+
+ struct translate_key key;
+
+ /* Temporaries:
+ */
+ const float *constants;
+ unsigned pitch[PIPE_MAX_ATTRIBS];
+ const ubyte *src[PIPE_MAX_ATTRIBS];
+ unsigned prim;
+
+ /* Points to one of the three hardwired example shaders, below:
+ */
+ struct fse_shader *active;
+
+ /* Temporary: A list of hard-wired shaders. Of course the plan
+ * would be to generate these for a given (vertex-shader,
+ * translate-key) pair...
+ */
+ struct fse_shader shader[10];
+ int nr_shaders;
+};
+
+
+
+/* Not quite passthrough yet -- we're still running the 'shader' here,
+ * inlined into the vertex fetch function.
+ */
+static void fetch_xyz_rgb_st( const struct fetch_shade_emit *fse,
+ unsigned start,
+ unsigned count,
+ char *buffer )
+{
+ unsigned i;
+
+ const float *m = fse->constants;
+ const float m0 = m[0], m4 = m[4], m8 = m[8], m12 = m[12];
+ const float m1 = m[1], m5 = m[5], m9 = m[9], m13 = m[13];
+ const float m2 = m[2], m6 = m[6], m10 = m[10], m14 = m[14];
+ const float m3 = m[3], m7 = m[7], m11 = m[11], m15 = m[15];
+
+ const ubyte *xyz = fse->src[0] + start * fse->pitch[0];
+ const ubyte *st = fse->src[2] + start * fse->pitch[2];
+
+ float *out = (float *)buffer;
+
+
+ assert(fse->pitch[1] == 0);
+
+ /* loop over vertex attributes (vertex shader inputs)
+ */
+ for (i = 0; i < count; i++) {
+ {
+ const float *in = (const float *)xyz;
+ const float ix = in[0], iy = in[1], iz = in[2];
+
+ out[0] = m0 * ix + m4 * iy + m8 * iz + m12;
+ out[1] = m1 * ix + m5 * iy + m9 * iz + m13;
+ out[2] = m2 * ix + m6 * iy + m10 * iz + m14;
+ out[3] = m3 * ix + m7 * iy + m11 * iz + m15;
+ xyz += fse->pitch[0];
+ }
+
+ {
+ out[4] = 1.0f;
+ out[5] = 1.0f;
+ out[6] = 1.0f;
+ out[7] = 1.0f;
+ }
+
+ {
+ const float *in = (const float *)st; st += fse->pitch[2];
+ out[8] = in[0];
+ out[9] = in[1];
+ out[10] = 0.0f;
+ out[11] = 1.0f;
+ }
+
+ out += 12;
+ }
+}
+
+
+
+static void fetch_xyz_rgb( const struct fetch_shade_emit *fse,
+ unsigned start,
+ unsigned count,
+ char *buffer )
+{
+ unsigned i;
+
+ const float *m = (const float *)fse->constants;
+ const float m0 = m[0], m4 = m[4], m8 = m[8], m12 = m[12];
+ const float m1 = m[1], m5 = m[5], m9 = m[9], m13 = m[13];
+ const float m2 = m[2], m6 = m[6], m10 = m[10], m14 = m[14];
+ const float m3 = m[3], m7 = m[7], m11 = m[11], m15 = m[15];
+
+ const ubyte *xyz = fse->src[0] + start * fse->pitch[0];
+ const ubyte *rgb = fse->src[1] + start * fse->pitch[1];
+
+ float *out = (float *)buffer;
+
+// debug_printf("rgb %f %f %f\n", rgb[0], rgb[1], rgb[2]);
+
+
+ for (i = 0; i < count; i++) {
+ {
+ const float *in = (const float *)xyz;
+ const float ix = in[0], iy = in[1], iz = in[2];
+
+ out[0] = m0 * ix + m4 * iy + m8 * iz + m12;
+ out[1] = m1 * ix + m5 * iy + m9 * iz + m13;
+ out[2] = m2 * ix + m6 * iy + m10 * iz + m14;
+ out[3] = m3 * ix + m7 * iy + m11 * iz + m15;
+ xyz += fse->pitch[0];
+ }
+
+ {
+ const float *in = (const float *)rgb;
+ out[4] = in[0];
+ out[5] = in[1];
+ out[6] = in[2];
+ out[7] = 1.0f;
+ rgb += fse->pitch[1];
+ }
+
+ out += 8;
+ }
+}
+
+
+
+
+static void fetch_xyz_rgb_psiz( const struct fetch_shade_emit *fse,
+ unsigned start,
+ unsigned count,
+ char *buffer )
+{
+ unsigned i;
+
+ const float *m = (const float *)fse->constants;
+ const float m0 = m[0], m4 = m[4], m8 = m[8], m12 = m[12];
+ const float m1 = m[1], m5 = m[5], m9 = m[9], m13 = m[13];
+ const float m2 = m[2], m6 = m[6], m10 = m[10], m14 = m[14];
+ const float m3 = m[3], m7 = m[7], m11 = m[11], m15 = m[15];
+
+ const ubyte *xyz = fse->src[0] + start * fse->pitch[0];
+ const float *rgb = (const float *)(fse->src[1] + start * fse->pitch[1]);
+ const float psiz = 1.0;
+
+ float *out = (float *)buffer;
+
+
+ assert(fse->pitch[1] == 0);
+
+ for (i = 0; i < count; i++) {
+ {
+ const float *in = (const float *)xyz;
+ const float ix = in[0], iy = in[1], iz = in[2];
+
+ out[0] = m0 * ix + m4 * iy + m8 * iz + m12;
+ out[1] = m1 * ix + m5 * iy + m9 * iz + m13;
+ out[2] = m2 * ix + m6 * iy + m10 * iz + m14;
+ out[3] = m3 * ix + m7 * iy + m11 * iz + m15;
+ xyz += fse->pitch[0];
+ }
+
+ {
+ out[4] = rgb[0];
+ out[5] = rgb[1];
+ out[6] = rgb[2];
+ out[7] = 1.0f;
+ }
+
+ {
+ out[8] = psiz;
+ }
+
+ out += 9;
+ }
+}
+
+
+
+
+static boolean set_prim( struct fetch_shade_emit *fse,
+ unsigned prim,
+ unsigned count )
+{
+ struct draw_context *draw = fse->draw;
+
+ fse->prim = prim;
+
+ switch (prim) {
+ case PIPE_PRIM_LINE_LOOP:
+ if (count > 1024)
+ return FALSE;
+ draw->render->set_primitive( draw->render, PIPE_PRIM_LINE_STRIP );
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ case PIPE_PRIM_POLYGON:
+ if (count > 1024)
+ return FALSE;
+ draw->render->set_primitive( draw->render, prim );
+ break;
+
+ case PIPE_PRIM_QUADS:
+ case PIPE_PRIM_QUAD_STRIP:
+ draw->render->set_primitive( draw->render, PIPE_PRIM_TRIANGLES );
+ break;
+
+ default:
+ draw->render->set_primitive( draw->render, prim );
+ break;
+ }
+
+ return TRUE;
+}
+
+
+
+
+
+
+static void fse_prepare( struct draw_pt_front_end *fe,
+ unsigned prim,
+ struct draw_pt_middle_end *unused,
+ unsigned opt )
+{
+ struct fetch_shade_emit *fse = (struct fetch_shade_emit *)fe;
+ struct draw_context *draw = fse->draw;
+ unsigned num_vs_inputs = draw->vertex_shader->info.num_inputs;
+ unsigned num_vs_outputs = draw->vertex_shader->info.num_outputs;
+ const struct vertex_info *vinfo;
+ unsigned i;
+ boolean need_psize = 0;
+
+
+ if (draw->pt.user.elts) {
+ assert(0);
+ return ;
+ }
+
+ if (!set_prim(fse, prim, /*count*/1022 )) {
+ assert(0);
+ return ;
+ }
+
+ /* Must do this after set_primitive() above:
+ */
+ vinfo = draw->render->get_vertex_info(draw->render);
+
+
+
+ fse->key.nr_elements = MAX2(num_vs_outputs, /* outputs - translate to hw format */
+ num_vs_inputs); /* inputs - fetch from api format */
+
+ fse->key.output_stride = vinfo->size * 4;
+ memset(fse->key.element, 0,
+ fse->key.nr_elements * sizeof(fse->key.element[0]));
+
+ for (i = 0; i < num_vs_inputs; i++) {
+ const struct pipe_vertex_element *src = &draw->pt.vertex_element[i];
+ fse->key.element[i].input_format = src->src_format;
+
+ /* Consider ignoring these at this point, ie make generated
+ * programs independent of this state:
+ */
+ fse->key.element[i].input_buffer = 0; //src->vertex_buffer_index;
+ fse->key.element[i].input_offset = 0; //src->src_offset;
+ }
+
+
+ {
+ unsigned dst_offset = 0;
+
+ for (i = 0; i < vinfo->num_attribs; i++) {
+ unsigned emit_sz = 0;
+ unsigned output_format = PIPE_FORMAT_NONE;
+ unsigned vs_output = vinfo->src_index[i];
+
+ switch (vinfo->emit[i]) {
+ case EMIT_4F:
+ output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ emit_sz = 4 * sizeof(float);
+ break;
+ case EMIT_3F:
+ output_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ emit_sz = 3 * sizeof(float);
+ break;
+ case EMIT_2F:
+ output_format = PIPE_FORMAT_R32G32_FLOAT;
+ emit_sz = 2 * sizeof(float);
+ break;
+ case EMIT_1F:
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ emit_sz = 1 * sizeof(float);
+ break;
+ case EMIT_1F_PSIZE:
+ need_psize = 1;
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ emit_sz = 1 * sizeof(float);
+ vs_output = num_vs_outputs + 1;
+
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ /* The elements in the key correspond to vertex shader output
+ * numbers, not to positions in the hw vertex description --
+ * that's handled by the output_offset field.
+ */
+ fse->key.element[vs_output].output_format = output_format;
+ fse->key.element[vs_output].output_offset = dst_offset;
+
+ dst_offset += emit_sz;
+ assert(fse->key.output_stride >= dst_offset);
+ }
+ }
+
+ /* To make psize work, really need to tell the vertex shader to
+ * copy that value from input->output. For 'translate' this was
+ * implicit for all elements.
+ */
+#if 0
+ if (need_psize) {
+ unsigned input = num_vs_inputs + 1;
+ const struct pipe_vertex_element *src = &draw->pt.vertex_element[i];
+ fse->key.element[i].input_format = PIPE_FORMAT_R32_FLOAT;
+ fse->key.element[i].input_buffer = 0; //nr_buffers + 1;
+ fse->key.element[i].input_offset = 0;
+
+ fse->key.nr_elements += 1;
+
+ }
+#endif
+
+ fse->constants = draw->pt.user.constants;
+
+ /* Would normally look up a vertex shader and peruse its list of
+ * varients somehow. We omitted that step and put all the
+ * hardcoded "shaders" into an array. We're just making the
+ * assumption that this happens to be a matching shader... ie
+ * you're running isosurf, aren't you?
+ */
+ fse->active = NULL;
+ for (i = 0; i < fse->nr_shaders; i++) {
+ if (translate_key_compare( &fse->key, &fse->shader[i].key) == 0)
+ fse->active = &fse->shader[i];
+ }
+
+ if (!fse->active) {
+ assert(0);
+ return ;
+ }
+
+ /* Now set buffer pointers:
+ */
+ for (i = 0; i < num_vs_inputs; i++) {
+ unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index;
+
+ fse->src[i] = ((const ubyte *) draw->pt.user.vbuffer[buf] +
+ draw->pt.vertex_buffer[buf].buffer_offset +
+ draw->pt.vertex_element[i].src_offset);
+
+ fse->pitch[i] = draw->pt.vertex_buffer[buf].pitch;
+
+ }
+
+
+ //return TRUE;
+}
+
+
+static boolean split_prim_inplace(unsigned prim, unsigned *first, unsigned *incr)
+{
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ *first = 1;
+ *incr = 1;
+ return TRUE;
+ case PIPE_PRIM_LINES:
+ *first = 2;
+ *incr = 2;
+ return TRUE;
+ case PIPE_PRIM_LINE_STRIP:
+ *first = 2;
+ *incr = 1;
+ return TRUE;
+ case PIPE_PRIM_TRIANGLES:
+ *first = 3;
+ *incr = 3;
+ return TRUE;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ *first = 3;
+ *incr = 1;
+ return TRUE;
+ case PIPE_PRIM_QUADS:
+ *first = 4;
+ *incr = 4;
+ return TRUE;
+ case PIPE_PRIM_QUAD_STRIP:
+ *first = 4;
+ *incr = 2;
+ return TRUE;
+ default:
+ *first = 0;
+ *incr = 1; /* set to one so that count % incr works */
+ return FALSE;
+ }
+}
+
+
+
+
+#define INDEX(i) (start + (i))
+static void fse_render_linear( struct vbuf_render *render,
+ unsigned prim,
+ unsigned start,
+ unsigned length )
+{
+ ushort *tmp = NULL;
+ unsigned i, j;
+
+ switch (prim) {
+ case PIPE_PRIM_LINE_LOOP:
+ tmp = MALLOC( sizeof(ushort) * (length + 1) );
+
+ for (i = 0; i < length; i++)
+ tmp[i] = INDEX(i);
+ tmp[length] = 0;
+
+ render->draw( render,
+ tmp,
+ length+1 );
+ break;
+
+
+ case PIPE_PRIM_QUAD_STRIP:
+ tmp = MALLOC( sizeof(ushort) * (length / 2 * 6) );
+
+ for (j = i = 0; i + 3 < length; i += 2, j += 6) {
+ tmp[j+0] = INDEX(i+0);
+ tmp[j+1] = INDEX(i+1);
+ tmp[j+2] = INDEX(i+3);
+
+ tmp[j+3] = INDEX(i+2);
+ tmp[j+4] = INDEX(i+0);
+ tmp[j+5] = INDEX(i+3);
+ }
+
+ if (j)
+ render->draw( render, tmp, j );
+ break;
+
+ case PIPE_PRIM_QUADS:
+ tmp = MALLOC( sizeof(int) * (length / 4 * 6) );
+
+ for (j = i = 0; i + 3 < length; i += 4, j += 6) {
+ tmp[j+0] = INDEX(i+0);
+ tmp[j+1] = INDEX(i+1);
+ tmp[j+2] = INDEX(i+3);
+
+ tmp[j+3] = INDEX(i+1);
+ tmp[j+4] = INDEX(i+2);
+ tmp[j+5] = INDEX(i+3);
+ }
+
+ if (j)
+ render->draw( render, tmp, j );
+ break;
+
+ default:
+ render->draw_arrays( render,
+ start,
+ length );
+ break;
+ }
+
+ if (tmp)
+ FREE(tmp);
+}
+
+
+
+static boolean do_draw( struct fetch_shade_emit *fse,
+ unsigned start, unsigned count )
+{
+ struct draw_context *draw = fse->draw;
+
+ char *hw_verts =
+ draw->render->allocate_vertices( draw->render,
+ (ushort)fse->key.output_stride,
+ (ushort)count );
+
+ if (!hw_verts)
+ return FALSE;
+
+ /* Single routine to fetch vertices, run shader and emit HW verts.
+ * Clipping and viewport transformation are done on hardware.
+ */
+ fse->active->run_linear( fse,
+ start, count,
+ hw_verts );
+
+ /* Draw arrays path to avoid re-emitting index list again and
+ * again.
+ */
+ fse_render_linear( draw->render,
+ fse->prim,
+ 0,
+ count );
+
+
+ draw->render->release_vertices( draw->render,
+ hw_verts,
+ fse->key.output_stride,
+ count );
+
+ return TRUE;
+}
+
+
+static void
+fse_run(struct draw_pt_front_end *fe,
+ pt_elt_func elt_func,
+ const void *elt_ptr,
+ unsigned count)
+{
+ struct fetch_shade_emit *fse = (struct fetch_shade_emit *)fe;
+ unsigned i = 0;
+ unsigned first, incr;
+ unsigned start = elt_func(elt_ptr, 0);
+
+ //debug_printf("%s prim %d start %d count %d\n", __FUNCTION__, prim, start, count);
+
+ split_prim_inplace(fse->prim, &first, &incr);
+
+ count -= (count - first) % incr;
+
+ while (i + first <= count) {
+ int nr = MIN2( count - i, 1024 );
+
+ /* snap to prim boundary
+ */
+ nr -= (nr - first) % incr;
+
+ if (!do_draw( fse, start + i, nr )) {
+ assert(0);
+ return ;
+ }
+
+ /* increment allowing for repeated vertices
+ */
+ i += nr - (first - incr);
+ }
+
+ //return TRUE;
+}
+
+
+static void fse_finish( struct draw_pt_front_end *frontend )
+{
+}
+
+
+static void
+fse_destroy( struct draw_pt_front_end *frontend )
+{
+ FREE(frontend);
+}
+
+struct draw_pt_front_end *draw_pt_fetch_shade_emit( struct draw_context *draw )
+{
+ struct fetch_shade_emit *fse = CALLOC_STRUCT(fetch_shade_emit);
+ if (!fse)
+ return NULL;
+
+ fse->base.prepare = fse_prepare;
+ fse->base.run = fse_run;
+ fse->base.finish = fse_finish;
+ fse->base.destroy = fse_destroy;
+ fse->draw = draw;
+
+ fse->shader[0].run_linear = fetch_xyz_rgb_st;
+ fse->shader[0].key.nr_elements = 3;
+ fse->shader[0].key.output_stride = 12 * sizeof(float);
+
+ fse->shader[0].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ fse->shader[0].key.element[0].input_buffer = 0;
+ fse->shader[0].key.element[0].input_offset = 0;
+ fse->shader[0].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ fse->shader[0].key.element[0].output_offset = 0;
+
+ fse->shader[0].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ fse->shader[0].key.element[1].input_buffer = 0;
+ fse->shader[0].key.element[1].input_offset = 0;
+ fse->shader[0].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ fse->shader[0].key.element[1].output_offset = 16;
+
+ fse->shader[0].key.element[1].input_format = PIPE_FORMAT_R32G32_FLOAT;
+ fse->shader[0].key.element[1].input_buffer = 0;
+ fse->shader[0].key.element[1].input_offset = 0;
+ fse->shader[0].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ fse->shader[0].key.element[1].output_offset = 32;
+
+ fse->shader[1].run_linear = fetch_xyz_rgb;
+ fse->shader[1].key.nr_elements = 2;
+ fse->shader[1].key.output_stride = 8 * sizeof(float);
+
+ fse->shader[1].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ fse->shader[1].key.element[0].input_buffer = 0;
+ fse->shader[1].key.element[0].input_offset = 0;
+ fse->shader[1].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ fse->shader[1].key.element[0].output_offset = 0;
+
+ fse->shader[1].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ fse->shader[1].key.element[1].input_buffer = 0;
+ fse->shader[1].key.element[1].input_offset = 0;
+ fse->shader[1].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ fse->shader[1].key.element[1].output_offset = 16;
+
+ fse->shader[2].run_linear = fetch_xyz_rgb_psiz;
+ fse->shader[2].key.nr_elements = 3;
+ fse->shader[2].key.output_stride = 9 * sizeof(float);
+
+ fse->shader[2].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ fse->shader[2].key.element[0].input_buffer = 0;
+ fse->shader[2].key.element[0].input_offset = 0;
+ fse->shader[2].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ fse->shader[2].key.element[0].output_offset = 0;
+
+ fse->shader[2].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ fse->shader[2].key.element[1].input_buffer = 0;
+ fse->shader[2].key.element[1].input_offset = 0;
+ fse->shader[2].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ fse->shader[2].key.element[1].output_offset = 16;
+
+ /* psize is special
+ * -- effectively add it here as another input!?!
+ * -- who knows how to add it as a buffer?
+ */
+ fse->shader[2].key.element[2].input_format = PIPE_FORMAT_R32_FLOAT;
+ fse->shader[2].key.element[2].input_buffer = 0;
+ fse->shader[2].key.element[2].input_offset = 0;
+ fse->shader[2].key.element[2].output_format = PIPE_FORMAT_R32_FLOAT;
+ fse->shader[2].key.element[2].output_offset = 32;
+
+ fse->nr_shaders = 3;
+
+ return &fse->base;
+}
--
cgit v1.2.3
From 90e86363de7dbcfda3490b5c31d701350a0fa2ef Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Mon, 12 May 2008 16:16:04 +0100
Subject: softpipe: make vbuf handle all primitive types
---
src/gallium/drivers/softpipe/sp_prim_setup.c | 15 +-
src/gallium/drivers/softpipe/sp_prim_vbuf.c | 255 +++++++++++++++------------
2 files changed, 155 insertions(+), 115 deletions(-)
(limited to 'src')
diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c
index 1cf9ffa632..941ab62e00 100644
--- a/src/gallium/drivers/softpipe/sp_prim_setup.c
+++ b/src/gallium/drivers/softpipe/sp_prim_setup.c
@@ -64,16 +64,17 @@ static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
}
+typedef const float (*cptrf4)[4];
static void
do_tri(struct draw_stage *stage, struct prim_header *prim)
{
struct setup_stage *setup = setup_stage( stage );
-
+
setup_tri( setup->setup,
- prim->v[0]->data,
- prim->v[1]->data,
- prim->v[2]->data );
+ (cptrf4)prim->v[0]->data,
+ (cptrf4)prim->v[1]->data,
+ (cptrf4)prim->v[2]->data );
}
static void
@@ -82,8 +83,8 @@ do_line(struct draw_stage *stage, struct prim_header *prim)
struct setup_stage *setup = setup_stage( stage );
setup_line( setup->setup,
- prim->v[0]->data,
- prim->v[1]->data );
+ (cptrf4)prim->v[0]->data,
+ (cptrf4)prim->v[1]->data );
}
static void
@@ -92,7 +93,7 @@ do_point(struct draw_stage *stage, struct prim_header *prim)
struct setup_stage *setup = setup_stage( stage );
setup_point( setup->setup,
- prim->v[0]->data );
+ (cptrf4)prim->v[0]->data );
}
diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
index e063fe82ef..1399776ff0 100644
--- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c
+++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
@@ -129,17 +129,22 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim)
}
+static INLINE cptrf4 get_vert( const void *vertex_buffer,
+ int index,
+ int stride )
+{
+ return (cptrf4)((char *)vertex_buffer + index * stride);
+}
static void
-sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices)
+sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
{
struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
struct softpipe_context *softpipe = cvbr->softpipe;
- unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float);
- unsigned i, j;
- void *vertex_buffer = cvbr->vertex_buffer;
- cptrf4 v[3];
+ unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float);
+ unsigned i;
+ const void *vertex_buffer = cvbr->vertex_buffer;
/* XXX: break this dependency - make setup_context live under
* softpipe, rename the old "setup" draw stage to something else.
@@ -149,40 +154,98 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices)
switch (cvbr->prim) {
- case PIPE_PRIM_TRIANGLES:
- for (i = 0; i < nr_indices; i += 3) {
- for (j = 0; j < 3; j++)
- v[j] = (cptrf4)((char *)vertex_buffer +
- indices[i+j] * vertex_size);
-
- setup_tri( setup_ctx,
- v[0],
- v[1],
- v[2]);
+ case PIPE_PRIM_POINTS:
+ for (i = 0; i < nr; i++) {
+ setup_point( setup_ctx,
+ get_vert(vertex_buffer, indices[i-0], stride) );
}
break;
case PIPE_PRIM_LINES:
- for (i = 0; i < nr_indices; i += 2) {
- for (j = 0; j < 2; j++)
- v[j] = (cptrf4)((char *)vertex_buffer +
- indices[i+j] * vertex_size);
+ for (i = 1; i < nr; i += 2) {
+ setup_line( setup_ctx,
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ break;
+ case PIPE_PRIM_LINE_STRIP:
+ for (i = 1; i < nr; i ++) {
setup_line( setup_ctx,
- v[0],
- v[1] );
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
}
break;
- case PIPE_PRIM_POINTS:
- for (i = 0; i < nr_indices; i++) {
- v[0] = (cptrf4)((char *)vertex_buffer +
- indices[i] * vertex_size);
+ case PIPE_PRIM_LINE_LOOP:
+ for (i = 1; i < nr; i ++) {
+ setup_line( setup_ctx,
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ if (nr) {
+ setup_line( setup_ctx,
+ get_vert(vertex_buffer, indices[nr-1], stride),
+ get_vert(vertex_buffer, indices[0], stride) );
+ }
+ break;
- setup_point( setup_ctx,
- v[0] );
+
+ case PIPE_PRIM_TRIANGLES:
+ for (i = 2; i < nr; i += 3) {
+ setup_tri( setup_ctx,
+ get_vert(vertex_buffer, indices[i-2], stride),
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride));
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ for (i = 2; i < nr; i += 3) {
+ setup_tri( setup_ctx,
+ get_vert(vertex_buffer, indices[i+(i&1)-2], stride),
+ get_vert(vertex_buffer, indices[i-(i&1)-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride));
}
break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ case PIPE_PRIM_POLYGON:
+ for (i = 2; i < nr; i += 3) {
+ setup_tri( setup_ctx,
+ get_vert(vertex_buffer, indices[0], stride),
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride));
+ }
+ break;
+ case PIPE_PRIM_QUADS:
+ for (i = 3; i < nr; i += 4) {
+ setup_tri( setup_ctx,
+ get_vert(vertex_buffer, indices[i-3], stride),
+ get_vert(vertex_buffer, indices[i-2], stride),
+ get_vert(vertex_buffer, indices[i-1], stride));
+
+ setup_tri( setup_ctx,
+ get_vert(vertex_buffer, indices[i-3], stride),
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride));
+ }
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ for (i = 3; i < nr; i += 2) {
+ setup_tri( setup_ctx,
+ get_vert(vertex_buffer, indices[i-2], stride),
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride));
+
+ setup_tri( setup_ctx,
+ get_vert(vertex_buffer, indices[i-3], stride),
+ get_vert(vertex_buffer, indices[i-0], stride),
+ get_vert(vertex_buffer, indices[i-2], stride));
+ }
+ break;
+ default:
+ assert(0);
}
/* XXX: why are we calling this??? If we had to call something, it
@@ -203,130 +266,106 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
struct softpipe_context *softpipe = cvbr->softpipe;
struct draw_stage *setup = softpipe->setup;
const void *vertex_buffer = cvbr->vertex_buffer;
- const unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float);
+ const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float);
unsigned i;
struct setup_context *setup_ctx = sp_draw_setup_context(setup);
- cptrf4 v[3];
-
-#define VERTEX(I) \
- (cptrf4) ((char *) vertex_buffer + (I) * vertex_size)
switch (cvbr->prim) {
case PIPE_PRIM_POINTS:
for (i = 0; i < nr; i++) {
- v[0] = VERTEX(i);
- setup_point( setup_ctx, v[0] );
+ setup_point( setup_ctx,
+ get_vert(vertex_buffer, i-0, stride) );
}
break;
+
case PIPE_PRIM_LINES:
- assert(nr % 2 == 0);
- for (i = 0; i < nr; i += 2) {
- v[0] = VERTEX(i);
- v[1] = VERTEX(i + 1);
- setup_line( setup_ctx, v[0], v[1] );
+ for (i = 1; i < nr; i += 2) {
+ setup_line( setup_ctx,
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride) );
}
break;
+
case PIPE_PRIM_LINE_STRIP:
- for (i = 1; i < nr; i++) {
- v[0] = VERTEX(i - 1);
- v[1] = VERTEX(i);
- setup_line( setup_ctx, v[0], v[1] );
+ for (i = 1; i < nr; i ++) {
+ setup_line( setup_ctx,
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride) );
}
break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ for (i = 1; i < nr; i ++) {
+ setup_line( setup_ctx,
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ if (nr) {
+ setup_line( setup_ctx,
+ get_vert(vertex_buffer, nr-1, stride),
+ get_vert(vertex_buffer, 0, stride) );
+ }
+ break;
+
+
case PIPE_PRIM_TRIANGLES:
- assert(nr % 3 == 0);
- for (i = 0; i < nr; i += 3) {
- v[0] = VERTEX(i + 0);
- v[1] = VERTEX(i + 1);
- v[2] = VERTEX(i + 2);
+ for (i = 2; i < nr; i += 3) {
setup_tri( setup_ctx,
- v[0],
- v[1],
- v[2] );
+ get_vert(vertex_buffer, i-2, stride),
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride));
}
break;
+
case PIPE_PRIM_TRIANGLE_STRIP:
- assert(nr >= 3);
- for (i = 2; i < nr; i++) {
- v[0] = VERTEX(i - 2);
- v[1] = VERTEX(i - 1);
- v[2] = VERTEX(i);
+ for (i = 2; i < nr; i += 3) {
setup_tri( setup_ctx,
- v[0],
- v[1],
- v[2] );
+ get_vert(vertex_buffer, i+(i&1)-2, stride),
+ get_vert(vertex_buffer, i-(i&1)-1, stride),
+ get_vert(vertex_buffer, i-0, stride));
}
break;
+
case PIPE_PRIM_TRIANGLE_FAN:
- assert(nr >= 3);
- for (i = 2; i < nr; i++) {
- v[0] = VERTEX(0);
- v[1] = VERTEX(i - 1);
- v[2] = VERTEX(i);
+ case PIPE_PRIM_POLYGON:
+ for (i = 2; i < nr; i += 3) {
setup_tri( setup_ctx,
- v[0],
- v[1],
- v[2] );
+ get_vert(vertex_buffer, 0, stride),
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride));
}
break;
case PIPE_PRIM_QUADS:
assert(nr % 4 == 0);
- for (i = 0; i < nr; i += 4) {
- v[0] = VERTEX(i + 0);
- v[1] = VERTEX(i + 1);
- v[2] = VERTEX(i + 2);
+ for (i = 3; i < nr; i += 4) {
setup_tri( setup_ctx,
- v[0],
- v[1],
- v[2] );
+ get_vert(vertex_buffer, i-3, stride),
+ get_vert(vertex_buffer, i-2, stride),
+ get_vert(vertex_buffer, i-1, stride));
- v[0] = VERTEX(i + 0);
- v[1] = VERTEX(i + 2);
- v[2] = VERTEX(i + 3);
setup_tri( setup_ctx,
- v[0],
- v[1],
- v[2] );
+ get_vert(vertex_buffer, i-3, stride),
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride));
}
break;
case PIPE_PRIM_QUAD_STRIP:
assert(nr >= 4);
- for (i = 2; i < nr; i += 2) {
- v[0] = VERTEX(i - 2);
- v[1] = VERTEX(i);
- v[2] = VERTEX(i + 1);
+ for (i = 3; i < nr; i += 2) {
setup_tri( setup_ctx,
- v[0],
- v[1],
- v[2] );
+ get_vert(vertex_buffer, i-2, stride),
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride));
- v[0] = VERTEX(i - 2);
- v[1] = VERTEX(i + 1);
- v[2] = VERTEX(i - 1);
- setup_tri( setup_ctx,
- v[0],
- v[1],
- v[2] );
- }
- break;
- case PIPE_PRIM_POLYGON:
- /* draw as tri fan */
- for (i = 2; i < nr; i++) {
- v[0] = VERTEX(0);
- v[1] = VERTEX(i - 1);
- v[2] = VERTEX(i);
setup_tri( setup_ctx,
- v[0],
- v[1],
- v[2] );
+ get_vert(vertex_buffer, i-3, stride),
+ get_vert(vertex_buffer, i-0, stride),
+ get_vert(vertex_buffer, i-2, stride));
}
break;
default:
- /* XXX finish remaining prim types */
assert(0);
}
-
-#undef VERTEX
}
--
cgit v1.2.3
From f116a149160d50d43a23b02a3416725d6f895d51 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Mon, 12 May 2008 17:30:05 +0100
Subject: softpipe: more work to get non-reduced primitives working in vbuf
---
src/gallium/drivers/softpipe/sp_prim_vbuf.c | 45 ++++++++++++-----------------
1 file changed, 19 insertions(+), 26 deletions(-)
(limited to 'src')
diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
index 1399776ff0..e9fae951e0 100644
--- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c
+++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
@@ -116,15 +116,8 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim)
- if (prim == PIPE_PRIM_TRIANGLES ||
- prim == PIPE_PRIM_LINES ||
- prim == PIPE_PRIM_POINTS) {
- cvbr->prim = prim;
- return TRUE;
- }
- else {
- return FALSE;
- }
+ cvbr->prim = prim;
+ return TRUE;
}
@@ -201,7 +194,7 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
break;
case PIPE_PRIM_TRIANGLE_STRIP:
- for (i = 2; i < nr; i += 3) {
+ for (i = 2; i < nr; i += 1) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, indices[i+(i&1)-2], stride),
get_vert(vertex_buffer, indices[i-(i&1)-1], stride),
@@ -211,7 +204,7 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
case PIPE_PRIM_TRIANGLE_FAN:
case PIPE_PRIM_POLYGON:
- for (i = 2; i < nr; i += 3) {
+ for (i = 2; i < nr; i += 1) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, indices[0], stride),
get_vert(vertex_buffer, indices[i-1], stride),
@@ -223,10 +216,10 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
setup_tri( setup_ctx,
get_vert(vertex_buffer, indices[i-3], stride),
get_vert(vertex_buffer, indices[i-2], stride),
- get_vert(vertex_buffer, indices[i-1], stride));
+ get_vert(vertex_buffer, indices[i-0], stride));
setup_tri( setup_ctx,
- get_vert(vertex_buffer, indices[i-3], stride),
+ get_vert(vertex_buffer, indices[i-2], stride),
get_vert(vertex_buffer, indices[i-1], stride),
get_vert(vertex_buffer, indices[i-0], stride));
}
@@ -234,14 +227,14 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
case PIPE_PRIM_QUAD_STRIP:
for (i = 3; i < nr; i += 2) {
setup_tri( setup_ctx,
+ get_vert(vertex_buffer, indices[i-3], stride),
get_vert(vertex_buffer, indices[i-2], stride),
- get_vert(vertex_buffer, indices[i-1], stride),
get_vert(vertex_buffer, indices[i-0], stride));
setup_tri( setup_ctx,
+ get_vert(vertex_buffer, indices[i-1], stride),
get_vert(vertex_buffer, indices[i-3], stride),
- get_vert(vertex_buffer, indices[i-0], stride),
- get_vert(vertex_buffer, indices[i-2], stride));
+ get_vert(vertex_buffer, indices[i-0], stride));
}
break;
default:
@@ -265,11 +258,13 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
struct softpipe_context *softpipe = cvbr->softpipe;
struct draw_stage *setup = softpipe->setup;
- const void *vertex_buffer = cvbr->vertex_buffer;
+ const void *vertex_buffer = NULL;
const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float);
unsigned i;
struct setup_context *setup_ctx = sp_draw_setup_context(setup);
+ vertex_buffer = (void *)get_vert(cvbr->vertex_buffer, start, stride);
+
switch (cvbr->prim) {
case PIPE_PRIM_POINTS:
for (i = 0; i < nr; i++) {
@@ -318,7 +313,7 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
break;
case PIPE_PRIM_TRIANGLE_STRIP:
- for (i = 2; i < nr; i += 3) {
+ for (i = 2; i < nr; i += 1) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, i+(i&1)-2, stride),
get_vert(vertex_buffer, i-(i&1)-1, stride),
@@ -328,7 +323,7 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
case PIPE_PRIM_TRIANGLE_FAN:
case PIPE_PRIM_POLYGON:
- for (i = 2; i < nr; i += 3) {
+ for (i = 2; i < nr; i += 1) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, 0, stride),
get_vert(vertex_buffer, i-1, stride),
@@ -336,31 +331,29 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
}
break;
case PIPE_PRIM_QUADS:
- assert(nr % 4 == 0);
for (i = 3; i < nr; i += 4) {
setup_tri( setup_ctx,
get_vert(vertex_buffer, i-3, stride),
get_vert(vertex_buffer, i-2, stride),
- get_vert(vertex_buffer, i-1, stride));
+ get_vert(vertex_buffer, i-0, stride));
setup_tri( setup_ctx,
- get_vert(vertex_buffer, i-3, stride),
+ get_vert(vertex_buffer, i-2, stride),
get_vert(vertex_buffer, i-1, stride),
get_vert(vertex_buffer, i-0, stride));
}
break;
case PIPE_PRIM_QUAD_STRIP:
- assert(nr >= 4);
for (i = 3; i < nr; i += 2) {
setup_tri( setup_ctx,
+ get_vert(vertex_buffer, i-3, stride),
get_vert(vertex_buffer, i-2, stride),
- get_vert(vertex_buffer, i-1, stride),
get_vert(vertex_buffer, i-0, stride));
setup_tri( setup_ctx,
+ get_vert(vertex_buffer, i-1, stride),
get_vert(vertex_buffer, i-3, stride),
- get_vert(vertex_buffer, i-0, stride),
- get_vert(vertex_buffer, i-2, stride));
+ get_vert(vertex_buffer, i-0, stride));
}
break;
default:
--
cgit v1.2.3
From 44463b2997826cd14def00abf724a7a65a4fc7cb Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Mon, 12 May 2008 17:36:35 +0100
Subject: draw: streamline the varray path
- drop support for running the pipeline (ie. don't populate the flags values)
- pass through all split-able primitives intact to the middle end
- only primitives that can't be split are shunted on the draw-element path
---
src/gallium/auxiliary/draw/draw_pt.c | 2 +-
src/gallium/auxiliary/draw/draw_pt_varray.c | 106 +++----------
.../auxiliary/draw/draw_pt_varray_tmp_linear.h | 165 +++------------------
3 files changed, 44 insertions(+), 229 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 448deef98c..d9e73a2396 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -81,7 +81,7 @@ draw_pt_arrays(struct draw_context *draw,
/* Pick the right frontend
*/
- if (draw->pt.user.elts) {
+ if (draw->pt.user.elts || (opt & PT_PIPELINE)) {
frontend = draw->pt.front.vcache;
} else if (opt == PT_SHADE && draw->pt.test_fse) {
/* should be a middle end.. */
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
index 59a9569270..d92ad4fda1 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray.c
+++ b/src/gallium/auxiliary/draw/draw_pt_varray.c
@@ -154,9 +154,9 @@ static boolean split_prim_inplace(unsigned prim, unsigned *first, unsigned *incr
static INLINE void add_draw_el(struct varray_frontend *varray,
- int idx, ushort flags)
+ int idx)
{
- varray->draw_elts[varray->draw_count++] = idx | flags;
+ varray->draw_elts[varray->draw_count++] = idx;
}
@@ -165,106 +165,47 @@ static INLINE void varray_triangle( struct varray_frontend *varray,
unsigned i1,
unsigned i2 )
{
- add_draw_el(varray, i0, 0);
- add_draw_el(varray, i1, 0);
- add_draw_el(varray, i2, 0);
-}
-
-static INLINE void varray_triangle_flags( struct varray_frontend *varray,
- ushort flags,
- unsigned i0,
- unsigned i1,
- unsigned i2 )
-{
- add_draw_el(varray, i0, flags);
- add_draw_el(varray, i1, 0);
- add_draw_el(varray, i2, 0);
+ add_draw_el(varray, i0);
+ add_draw_el(varray, i1);
+ add_draw_el(varray, i2);
}
static INLINE void varray_line( struct varray_frontend *varray,
unsigned i0,
unsigned i1 )
{
- add_draw_el(varray, i0, 0);
- add_draw_el(varray, i1, 0);
-}
-
-
-static INLINE void varray_line_flags( struct varray_frontend *varray,
- ushort flags,
- unsigned i0,
- unsigned i1 )
-{
- add_draw_el(varray, i0, flags);
- add_draw_el(varray, i1, 0);
+ add_draw_el(varray, i0);
+ add_draw_el(varray, i1);
}
static INLINE void varray_point( struct varray_frontend *varray,
unsigned i0 )
{
- add_draw_el(varray, i0, 0);
-}
-
-static INLINE void varray_quad( struct varray_frontend *varray,
- unsigned i0,
- unsigned i1,
- unsigned i2,
- unsigned i3 )
-{
- varray_triangle( varray, i0, i1, i3 );
- varray_triangle( varray, i1, i2, i3 );
+ add_draw_el(varray, i0);
}
-static INLINE void varray_ef_quad( struct varray_frontend *varray,
- unsigned i0,
- unsigned i1,
- unsigned i2,
- unsigned i3 )
-{
- const unsigned omitEdge1 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2;
- const unsigned omitEdge2 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1;
-
- varray_triangle_flags( varray,
- DRAW_PIPE_RESET_STIPPLE | omitEdge1,
- i0, i1, i3 );
-
- varray_triangle_flags( varray,
- omitEdge2,
- i1, i2, i3 );
-}
-/* At least for now, we're back to using a template include file for
- * this. The two paths aren't too different though - it may be
- * possible to reunify them.
- */
-#define TRIANGLE(vc,flags,i0,i1,i2) varray_triangle_flags(vc,flags,i0,i1,i2)
-#define QUAD(vc,i0,i1,i2,i3) varray_ef_quad(vc,i0,i1,i2,i3)
-#define LINE(vc,flags,i0,i1) varray_line_flags(vc,flags,i0,i1)
-#define POINT(vc,i0) varray_point(vc,i0)
-#define FUNC varray_run_extras
-#include "draw_pt_varray_tmp.h"
-#define TRIANGLE(vc,flags,i0,i1,i2) varray_triangle(vc,i0,i1,i2)
-#define QUAD(vc,i0,i1,i2,i3) varray_quad(vc,i0,i1,i2,i3)
-#define LINE(vc,flags,i0,i1) varray_line(vc,i0,i1)
+#define TRIANGLE(vc,i0,i1,i2) varray_triangle(vc,i0,i1,i2)
+#define LINE(vc,i0,i1) varray_line(vc,i0,i1)
#define POINT(vc,i0) varray_point(vc,i0)
#define FUNC varray_run
#include "draw_pt_varray_tmp_linear.h"
-static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = {
+static unsigned decompose_prim[PIPE_PRIM_POLYGON + 1] = {
PIPE_PRIM_POINTS,
PIPE_PRIM_LINES,
- PIPE_PRIM_LINES,
- PIPE_PRIM_LINES,
+ PIPE_PRIM_LINE_STRIP,
+ PIPE_PRIM_LINES, /* decomposed */
PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES
+ PIPE_PRIM_TRIANGLE_STRIP,
+ PIPE_PRIM_TRIANGLES, /* decomposed */
+ PIPE_PRIM_QUADS,
+ PIPE_PRIM_QUAD_STRIP,
+ PIPE_PRIM_TRIANGLES /* decomposed */
};
@@ -276,17 +217,10 @@ static void varray_prepare(struct draw_pt_front_end *frontend,
{
struct varray_frontend *varray = (struct varray_frontend *)frontend;
- if (opt & PT_PIPELINE)
- {
- varray->base.run = varray_run_extras;
- }
- else
- {
- varray->base.run = varray_run;
- }
+ varray->base.run = varray_run;
varray->input_prim = prim;
- varray->output_prim = reduced_prim[prim];
+ varray->output_prim = decompose_prim[prim];
varray->middle = middle;
middle->prepare(middle, varray->output_prim, opt);
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
index 6e2b16d9be..b6f1f0cadc 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
@@ -1,3 +1,7 @@
+static unsigned trim( unsigned count, unsigned first, unsigned incr )
+{
+ return count - (count - first) % incr;
+}
static void FUNC(struct draw_pt_front_end *frontend,
pt_elt_func get_elt,
@@ -5,12 +9,9 @@ static void FUNC(struct draw_pt_front_end *frontend,
unsigned count)
{
struct varray_frontend *varray = (struct varray_frontend *)frontend;
- struct draw_context *draw = varray->draw;
unsigned start = (unsigned)elts;
- boolean flatfirst = (draw->rasterizer->flatshade &&
- draw->rasterizer->flatshade_first);
- unsigned i, j, flags;
+ unsigned i, j;
unsigned first, incr;
varray->fetch_start = start;
@@ -27,26 +28,30 @@ static void FUNC(struct draw_pt_front_end *frontend,
case PIPE_PRIM_POINTS:
case PIPE_PRIM_LINES:
case PIPE_PRIM_TRIANGLES:
- j = 0;
- while (j + first <= count) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- varray_flush_linear(varray, start + j, end);
- j += end;
+ case PIPE_PRIM_LINE_STRIP:
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ case PIPE_PRIM_QUADS:
+ case PIPE_PRIM_QUAD_STRIP:
+
+ for (j = 0; j < count;) {
+ unsigned remaining = count - j;
+ unsigned nr = trim( MIN2(FETCH_MAX, remaining), first, incr );
+ varray_flush_linear(varray, start + j, nr);
+ j += nr;
+ if (nr != remaining)
+ j -= (first - incr);
}
break;
case PIPE_PRIM_LINE_LOOP:
if (count >= 2) {
- flags = DRAW_PIPE_RESET_STIPPLE;
-
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
- for (i = 1; i < end; i++, flags = 0) {
- LINE(varray, flags, i - 1, i);
+ for (i = 1; i < end; i++) {
+ LINE(varray, i - 1, i);
}
- LINE(varray, flags, i - 1, 0);
+ LINE(varray, i - 1, 0);
i = end;
fetch_init(varray, end);
varray_flush(varray);
@@ -54,145 +59,21 @@ static void FUNC(struct draw_pt_front_end *frontend,
}
break;
- case PIPE_PRIM_LINE_STRIP:
- flags = DRAW_PIPE_RESET_STIPPLE;
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 1; i < end; i++, flags = 0) {
- LINE(varray, flags, i - 1, i);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_STRIP:
- if (flatfirst) {
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++) {
- TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- i + 0, i + 1 + (i&1), i + 2 - (i&1));
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- if (j + first + i <= count) {
- varray->fetch_start -= 2;
- i -= 2;
- }
- }
- }
- else {
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++) {
- TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- i + 0 + (i&1), i + 1 - (i&1), i + 2);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- if (j + first + i <= count) {
- varray->fetch_start -= 2;
- i -= 2;
- }
- }
- }
- break;
+ case PIPE_PRIM_POLYGON:
case PIPE_PRIM_TRIANGLE_FAN:
- if (count >= 3) {
- if (flatfirst) {
- flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++) {
- TRIANGLE(varray, flags, i + 1, i + 2, 0);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- }
- else {
- flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++) {
- TRIANGLE(varray, flags, 0, i + 1, i + 2);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- }
- }
- break;
-
- case PIPE_PRIM_QUADS:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+3 < end; i += 4) {
- QUAD(varray, i + 0, i + 1, i + 2, i + 3);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_QUAD_STRIP:
for (j = 0; j + first <= count; j += i) {
unsigned end = MIN2(FETCH_MAX, count - j);
end -= (end % incr);
- for (i = 0; i+3 < end; i += 2) {
- QUAD(varray, i + 2, i + 0, i + 1, i + 3);
+ for (i = 2; i < end; i++) {
+ TRIANGLE(varray, 0, i - 1, i);
}
i = end;
fetch_init(varray, end);
varray_flush(varray);
- if (j + first + i <= count) {
- varray->fetch_start -= 2;
- i -= 2;
- }
}
break;
- case PIPE_PRIM_POLYGON:
- {
- /* These bitflags look a little odd because we submit the
- * vertices as (1,2,0) to satisfy flatshade requirements.
- */
- const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2;
- const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0;
- const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1;
-
- flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++, flags = edge_middle) {
-
- if (i + 3 == count)
- flags |= edge_last;
-
- TRIANGLE(varray, flags, i + 1, i + 2, 0);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- }
- break;
-
default:
assert(0);
break;
--
cgit v1.2.3
From bbda45ec769120324f44febf00c6bb170f594f23 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Mon, 12 May 2008 19:40:20 +0100
Subject: draw: turn fse path into a middle end
Also add some util functions in pt_util.c
---
src/gallium/auxiliary/draw/Makefile | 2 +
src/gallium/auxiliary/draw/draw_private.h | 2 +-
src/gallium/auxiliary/draw/draw_pt.c | 34 +-
src/gallium/auxiliary/draw/draw_pt.h | 9 +-
.../auxiliary/draw/draw_pt_fetch_shade_emit.c | 39 +-
src/gallium/auxiliary/draw/draw_pt_middle_fse.c | 705 +++++++++++++++++++++
src/gallium/auxiliary/draw/draw_pt_util.c | 103 +++
src/gallium/auxiliary/draw/draw_pt_varray.c | 45 --
src/gallium/auxiliary/draw/draw_pt_varray_tmp.h | 2 +-
.../auxiliary/draw/draw_pt_varray_tmp_linear.h | 9 +-
src/gallium/auxiliary/draw/draw_pt_vcache.c | 15 +-
11 files changed, 847 insertions(+), 118 deletions(-)
create mode 100644 src/gallium/auxiliary/draw/draw_pt_middle_fse.c
create mode 100644 src/gallium/auxiliary/draw/draw_pt_util.c
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile
index 68e7744cc5..67d78bdbbd 100644
--- a/src/gallium/auxiliary/draw/Makefile
+++ b/src/gallium/auxiliary/draw/Makefile
@@ -27,8 +27,10 @@ C_SOURCES = \
draw_pt_fetch.c \
draw_pt_fetch_emit.c \
draw_pt_fetch_shade_emit.c \
+ draw_pt_middle_fse.c \
draw_pt_fetch_shade_pipeline.c \
draw_pt_post_vs.c \
+ draw_pt_util.c \
draw_pt_varray.c \
draw_pt_vcache.c \
draw_vertex.c \
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index cbe64cd290..86b901a3c8 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -124,7 +124,7 @@ struct draw_context
struct {
struct {
struct draw_pt_middle_end *fetch_emit;
- /*struct draw_pt_middle_end *fetch_shade_emit;*/
+ struct draw_pt_middle_end *fetch_shade_emit;
struct draw_pt_middle_end *general;
} middle;
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index d9e73a2396..91e35db819 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -75,6 +75,8 @@ draw_pt_arrays(struct draw_context *draw,
if (opt == 0)
middle = draw->pt.middle.fetch_emit;
+ else if (opt == PT_SHADE && draw->pt.test_fse)
+ middle = draw->pt.middle.fetch_shade_emit;
else
middle = draw->pt.middle.general;
@@ -83,9 +85,11 @@ draw_pt_arrays(struct draw_context *draw,
*/
if (draw->pt.user.elts || (opt & PT_PIPELINE)) {
frontend = draw->pt.front.vcache;
+#if 0
} else if (opt == PT_SHADE && draw->pt.test_fse) {
/* should be a middle end.. */
frontend = draw->pt.front.fetch_shade_emit;
+#endif
} else {
frontend = draw->pt.front.varray;
}
@@ -105,6 +109,8 @@ draw_pt_arrays(struct draw_context *draw,
boolean draw_pt_init( struct draw_context *draw )
{
+ draw->pt.test_fse = GETENV("DRAW_FSE") != NULL;
+
draw->pt.front.vcache = draw_pt_vcache( draw );
if (!draw->pt.front.vcache)
return FALSE;
@@ -117,8 +123,11 @@ boolean draw_pt_init( struct draw_context *draw )
if (!draw->pt.middle.fetch_emit)
return FALSE;
- draw->pt.test_fse = GETENV("DRAW_FSE") != NULL;
if (draw->pt.test_fse) {
+ draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw );
+ if (!draw->pt.middle.fetch_shade_emit)
+ return FALSE;
+
draw->pt.front.fetch_shade_emit = draw_pt_fetch_shade_emit( draw );
if (!draw->pt.front.fetch_shade_emit)
return FALSE;
@@ -145,6 +154,11 @@ void draw_pt_destroy( struct draw_context *draw )
draw->pt.middle.fetch_emit = NULL;
}
+ if (draw->pt.middle.fetch_shade_emit) {
+ draw->pt.middle.fetch_shade_emit->destroy( draw->pt.middle.fetch_shade_emit );
+ draw->pt.middle.fetch_shade_emit = NULL;
+ }
+
if (draw->pt.front.fetch_shade_emit) {
draw->pt.front.fetch_shade_emit->destroy( draw->pt.front.fetch_shade_emit );
draw->pt.front.fetch_shade_emit = NULL;
@@ -163,19 +177,6 @@ void draw_pt_destroy( struct draw_context *draw )
-static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = {
- PIPE_PRIM_POINTS,
- PIPE_PRIM_LINES,
- PIPE_PRIM_LINES,
- PIPE_PRIM_LINES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES
-};
-
/**
* Draw vertex arrays
@@ -188,9 +189,10 @@ void
draw_arrays(struct draw_context *draw, unsigned prim,
unsigned start, unsigned count)
{
- if (reduced_prim[prim] != draw->reduced_prim) {
+ unsigned reduced_prim = draw_pt_reduced_prim(prim);
+ if (reduced_prim != draw->reduced_prim) {
draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
- draw->reduced_prim = reduced_prim[prim];
+ draw->reduced_prim = reduced_prim;
}
/* drawing done here: */
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index bcd89f6bd6..cdae46b8d2 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -134,7 +134,7 @@ struct draw_pt_front_end *draw_pt_fetch_shade_emit( struct draw_context *draw );
* vertex_elements.
*/
struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw );
-//struct draw_pt_middle_end *draw_pt_fetch_shade_emit( struct draw_context *draw );
+struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw );
struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw);
@@ -213,4 +213,11 @@ struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw );
void draw_pt_post_vs_destroy( struct pt_post_vs *pvs );
+/*******************************************************************************
+ * Utils:
+ */
+void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr);
+unsigned draw_pt_reduced_prim(unsigned prim);
+
+
#endif
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index 9e1d1add36..f756d3e0bb 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -434,43 +434,6 @@ static void fse_prepare( struct draw_pt_front_end *fe,
}
-static boolean split_prim_inplace(unsigned prim, unsigned *first, unsigned *incr)
-{
- switch (prim) {
- case PIPE_PRIM_POINTS:
- *first = 1;
- *incr = 1;
- return TRUE;
- case PIPE_PRIM_LINES:
- *first = 2;
- *incr = 2;
- return TRUE;
- case PIPE_PRIM_LINE_STRIP:
- *first = 2;
- *incr = 1;
- return TRUE;
- case PIPE_PRIM_TRIANGLES:
- *first = 3;
- *incr = 3;
- return TRUE;
- case PIPE_PRIM_TRIANGLE_STRIP:
- *first = 3;
- *incr = 1;
- return TRUE;
- case PIPE_PRIM_QUADS:
- *first = 4;
- *incr = 4;
- return TRUE;
- case PIPE_PRIM_QUAD_STRIP:
- *first = 4;
- *incr = 2;
- return TRUE;
- default:
- *first = 0;
- *incr = 1; /* set to one so that count % incr works */
- return FALSE;
- }
-}
@@ -596,7 +559,7 @@ fse_run(struct draw_pt_front_end *fe,
//debug_printf("%s prim %d start %d count %d\n", __FUNCTION__, prim, start, count);
- split_prim_inplace(fse->prim, &first, &incr);
+ draw_pt_split_prim(fse->prim, &first, &incr);
count -= (count - first) % incr;
diff --git a/src/gallium/auxiliary/draw/draw_pt_middle_fse.c b/src/gallium/auxiliary/draw/draw_pt_middle_fse.c
new file mode 100644
index 0000000000..cdb7d260da
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_middle_fse.c
@@ -0,0 +1,705 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell
+ */
+
+
+#include "pipe/p_util.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pt.h"
+#include "draw/draw_vs.h"
+
+#include "translate/translate.h"
+
+struct fetch_shade_emit;
+
+struct fse_shader {
+ struct translate_key key;
+
+ void (*run_linear)( const struct fetch_shade_emit *fse,
+ unsigned start,
+ unsigned count,
+ char *buffer );
+
+ void (*run_elts)( const struct fetch_shade_emit *fse,
+ const unsigned *fetch_elts,
+ unsigned fetch_count,
+ char *buffer );
+
+};
+
+/* Prototype fetch, shade, emit-hw-verts all in one go.
+ */
+struct fetch_shade_emit {
+ struct draw_pt_middle_end base;
+ struct draw_context *draw;
+
+ struct translate_key key;
+
+ /* Temporaries:
+ */
+ const float *constants;
+ unsigned pitch[PIPE_MAX_ATTRIBS];
+ const ubyte *src[PIPE_MAX_ATTRIBS];
+ unsigned prim;
+
+ /* Points to one of the three hardwired example shaders, below:
+ */
+ struct fse_shader *active;
+
+ /* Temporary: A list of hard-wired shaders. Of course the plan
+ * would be to generate these for a given (vertex-shader,
+ * translate-key) pair...
+ */
+ struct fse_shader shader[10];
+ int nr_shaders;
+};
+
+
+
+/* Not quite passthrough yet -- we're still running the 'shader' here,
+ * inlined into the vertex fetch function.
+ */
+static void shader0_run_linear( const struct fetch_shade_emit *fse,
+ unsigned start,
+ unsigned count,
+ char *buffer )
+{
+ unsigned i;
+
+ const float *m = fse->constants;
+ const ubyte *xyz = fse->src[0] + start * fse->pitch[0];
+ const ubyte *rgb = fse->src[1] + start * fse->pitch[1];
+ const ubyte *st = fse->src[2] + start * fse->pitch[2];
+
+ float *out = (float *)buffer;
+
+ /* loop over vertex attributes (vertex shader inputs)
+ */
+ for (i = 0; i < count; i++) {
+ {
+ const float *in = (const float *)xyz;
+ const float ix = in[0], iy = in[1], iz = in[2];
+
+ out[0] = m[0] * ix + m[4] * iy + m[8] * iz + m[12];
+ out[1] = m[1] * ix + m[5] * iy + m[9] * iz + m[13];
+ out[2] = m[2] * ix + m[6] * iy + m[10] * iz + m[14];
+ out[3] = m[3] * ix + m[7] * iy + m[11] * iz + m[15];
+ xyz += fse->pitch[0];
+ }
+
+ {
+ const float *in = (const float *)rgb;
+ out[4] = in[0];
+ out[5] = in[1];
+ out[6] = in[2];
+ out[7] = 1.0f;
+ rgb += fse->pitch[1];
+ }
+
+ {
+ const float *in = (const float *)st;
+ out[8] = in[0];
+ out[9] = in[1];
+ out[10] = 0.0f;
+ out[11] = 1.0f;
+ st += fse->pitch[2];
+ }
+
+ out += 12;
+ }
+}
+
+
+
+static void shader1_run_linear( const struct fetch_shade_emit *fse,
+ unsigned start,
+ unsigned count,
+ char *buffer )
+{
+ unsigned i;
+ const float *m = (const float *)fse->constants;
+ const ubyte *xyz = fse->src[0] + start * fse->pitch[0];
+ const ubyte *rgb = fse->src[1] + start * fse->pitch[1];
+ float *out = (float *)buffer;
+
+// debug_printf("rgb %f %f %f\n", rgb[0], rgb[1], rgb[2]);
+
+
+ for (i = 0; i < count; i++) {
+ {
+ const float *in = (const float *)xyz;
+ const float ix = in[0], iy = in[1], iz = in[2];
+
+ out[0] = m[0] * ix + m[4] * iy + m[8] * iz + m[12];
+ out[1] = m[1] * ix + m[5] * iy + m[9] * iz + m[13];
+ out[2] = m[2] * ix + m[6] * iy + m[10] * iz + m[14];
+ out[3] = m[3] * ix + m[7] * iy + m[11] * iz + m[15];
+ xyz += fse->pitch[0];
+ }
+
+ {
+ const float *in = (const float *)rgb;
+ out[4] = in[0];
+ out[5] = in[1];
+ out[6] = in[2];
+ out[7] = 1.0f;
+ rgb += fse->pitch[1];
+ }
+
+ out += 8;
+ }
+}
+
+
+
+
+static void shader2_run_linear( const struct fetch_shade_emit *fse,
+ unsigned start,
+ unsigned count,
+ char *buffer )
+{
+ unsigned i;
+ const float *m = (const float *)fse->constants;
+ const ubyte *xyz = fse->src[0] + start * fse->pitch[0];
+ const ubyte *rgb = fse->src[1] + start * fse->pitch[1];
+ const float psiz = 1.0;
+ float *out = (float *)buffer;
+
+
+ assert(fse->pitch[1] == 0);
+
+ for (i = 0; i < count; i++) {
+ {
+ const float *in = (const float *)xyz;
+ const float ix = in[0], iy = in[1], iz = in[2];
+
+ out[0] = m[0] * ix + m[4] * iy + m[8] * iz + m[12];
+ out[1] = m[1] * ix + m[5] * iy + m[9] * iz + m[13];
+ out[2] = m[2] * ix + m[6] * iy + m[10] * iz + m[14];
+ out[3] = m[3] * ix + m[7] * iy + m[11] * iz + m[15];
+ xyz += fse->pitch[0];
+ }
+
+ {
+ const float *in = (const float *)rgb;
+ out[4] = in[0];
+ out[5] = in[1];
+ out[6] = in[2];
+ out[7] = 1.0f;
+ rgb += fse->pitch[1];
+ }
+
+ {
+ out[8] = psiz;
+ }
+
+ out += 9;
+ }
+}
+
+
+
+
+static void shader0_run_elts( const struct fetch_shade_emit *fse,
+ const unsigned *elts,
+ unsigned count,
+ char *buffer )
+{
+ unsigned i;
+ const float *m = fse->constants;
+ float *out = (float *)buffer;
+
+
+ /* loop over vertex attributes (vertex shader inputs)
+ */
+ for (i = 0; i < count; i++) {
+ unsigned elt = elts[i];
+ {
+ const ubyte *xyz = fse->src[0] + elt * fse->pitch[0];
+ const float *in = (const float *)xyz;
+ const float ix = in[0], iy = in[1], iz = in[2];
+
+ out[0] = m[0] * ix + m[4] * iy + m[8] * iz + m[12];
+ out[1] = m[1] * ix + m[5] * iy + m[9] * iz + m[13];
+ out[2] = m[2] * ix + m[6] * iy + m[10] * iz + m[14];
+ out[3] = m[3] * ix + m[7] * iy + m[11] * iz + m[15];
+ }
+
+ {
+ const ubyte *rgb = fse->src[1] + elt * fse->pitch[1];
+ const float *in = (const float *)rgb;
+ out[4] = in[0];
+ out[5] = in[1];
+ out[6] = in[2];
+ out[7] = 1.0f;
+ }
+
+ {
+ const ubyte *st = fse->src[2] + elt * fse->pitch[2];
+ const float *in = (const float *)st;
+ out[8] = in[0];
+ out[9] = in[1];
+ out[10] = 0.0f;
+ out[11] = 1.0f;
+ }
+
+ out += 12;
+ }
+}
+
+
+
+static void shader1_run_elts( const struct fetch_shade_emit *fse,
+ const unsigned *elts,
+ unsigned count,
+ char *buffer )
+{
+ unsigned i;
+ const float *m = (const float *)fse->constants;
+ float *out = (float *)buffer;
+
+ for (i = 0; i < count; i++) {
+ unsigned elt = elts[i];
+
+ {
+ const ubyte *xyz = fse->src[0] + elt * fse->pitch[0];
+ const float *in = (const float *)xyz;
+ const float ix = in[0], iy = in[1], iz = in[2];
+
+ out[0] = m[0] * ix + m[4] * iy + m[8] * iz + m[12];
+ out[1] = m[1] * ix + m[5] * iy + m[9] * iz + m[13];
+ out[2] = m[2] * ix + m[6] * iy + m[10] * iz + m[14];
+ out[3] = m[3] * ix + m[7] * iy + m[11] * iz + m[15];
+ xyz += fse->pitch[0];
+ }
+
+ {
+ const ubyte *rgb = fse->src[1] + elt * fse->pitch[1];
+ const float *in = (const float *)rgb;
+ out[4] = in[0];
+ out[5] = in[1];
+ out[6] = in[2];
+ out[7] = 1.0f;
+ rgb += fse->pitch[1];
+ }
+
+ out += 8;
+ }
+}
+
+
+
+
+static void shader2_run_elts( const struct fetch_shade_emit *fse,
+ const unsigned *elts,
+ unsigned count,
+ char *buffer )
+{
+ unsigned i;
+ const float *m = (const float *)fse->constants;
+ const float psiz = 1.0;
+ float *out = (float *)buffer;
+
+ for (i = 0; i < count; i++) {
+ unsigned elt = elts[i];
+ {
+ const ubyte *xyz = fse->src[0] + elt * fse->pitch[0];
+ const float *in = (const float *)xyz;
+ const float ix = in[0], iy = in[1], iz = in[2];
+
+ out[0] = m[0] * ix + m[4] * iy + m[8] * iz + m[12];
+ out[1] = m[1] * ix + m[5] * iy + m[9] * iz + m[13];
+ out[2] = m[2] * ix + m[6] * iy + m[10] * iz + m[14];
+ out[3] = m[3] * ix + m[7] * iy + m[11] * iz + m[15];
+ }
+
+ {
+ const ubyte *rgb = fse->src[1] + elt * fse->pitch[1];
+ out[4] = rgb[0];
+ out[5] = rgb[1];
+ out[6] = rgb[2];
+ out[7] = 1.0f;
+ }
+
+ {
+ out[8] = psiz;
+ }
+
+ out += 9;
+ }
+}
+
+
+
+static void fse_prepare( struct draw_pt_middle_end *middle,
+ unsigned prim,
+ unsigned opt )
+{
+ struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
+ struct draw_context *draw = fse->draw;
+ unsigned num_vs_inputs = draw->vertex_shader->info.num_inputs;
+ unsigned num_vs_outputs = draw->vertex_shader->info.num_outputs;
+ const struct vertex_info *vinfo;
+ unsigned i;
+ boolean need_psize = 0;
+
+
+ if (draw->pt.user.elts) {
+ assert(0);
+ return ;
+ }
+
+ if (!draw->render->set_primitive( draw->render,
+ prim )) {
+ assert(0);
+ return;
+ }
+
+ /* Must do this after set_primitive() above:
+ */
+ vinfo = draw->render->get_vertex_info(draw->render);
+
+
+
+ fse->key.nr_elements = MAX2(num_vs_outputs, /* outputs - translate to hw format */
+ num_vs_inputs); /* inputs - fetch from api format */
+
+ fse->key.output_stride = vinfo->size * 4;
+ memset(fse->key.element, 0,
+ fse->key.nr_elements * sizeof(fse->key.element[0]));
+
+ for (i = 0; i < num_vs_inputs; i++) {
+ const struct pipe_vertex_element *src = &draw->pt.vertex_element[i];
+ fse->key.element[i].input_format = src->src_format;
+
+ /* Consider ignoring these at this point, ie make generated
+ * programs independent of this state:
+ */
+ fse->key.element[i].input_buffer = 0; //src->vertex_buffer_index;
+ fse->key.element[i].input_offset = 0; //src->src_offset;
+ }
+
+
+ {
+ unsigned dst_offset = 0;
+
+ for (i = 0; i < vinfo->num_attribs; i++) {
+ unsigned emit_sz = 0;
+ unsigned output_format = PIPE_FORMAT_NONE;
+ unsigned vs_output = vinfo->src_index[i];
+
+ switch (vinfo->emit[i]) {
+ case EMIT_4F:
+ output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ emit_sz = 4 * sizeof(float);
+ break;
+ case EMIT_3F:
+ output_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ emit_sz = 3 * sizeof(float);
+ break;
+ case EMIT_2F:
+ output_format = PIPE_FORMAT_R32G32_FLOAT;
+ emit_sz = 2 * sizeof(float);
+ break;
+ case EMIT_1F:
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ emit_sz = 1 * sizeof(float);
+ break;
+ case EMIT_1F_PSIZE:
+ need_psize = 1;
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ emit_sz = 1 * sizeof(float);
+ vs_output = num_vs_outputs + 1;
+
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ /* The elements in the key correspond to vertex shader output
+ * numbers, not to positions in the hw vertex description --
+ * that's handled by the output_offset field.
+ */
+ fse->key.element[vs_output].output_format = output_format;
+ fse->key.element[vs_output].output_offset = dst_offset;
+
+ dst_offset += emit_sz;
+ assert(fse->key.output_stride >= dst_offset);
+ }
+ }
+
+ /* To make psize work, really need to tell the vertex shader to
+ * copy that value from input->output. For 'translate' this was
+ * implicit for all elements.
+ */
+#if 0
+ if (need_psize) {
+ unsigned input = num_vs_inputs + 1;
+ const struct pipe_vertex_element *src = &draw->pt.vertex_element[i];
+ fse->key.element[i].input_format = PIPE_FORMAT_R32_FLOAT;
+ fse->key.element[i].input_buffer = 0; //nr_buffers + 1;
+ fse->key.element[i].input_offset = 0;
+
+ fse->key.nr_elements += 1;
+
+ }
+#endif
+
+ fse->constants = draw->pt.user.constants;
+
+ /* Would normally look up a vertex shader and peruse its list of
+ * varients somehow. We omitted that step and put all the
+ * hardcoded "shaders" into an array. We're just making the
+ * assumption that this happens to be a matching shader... ie
+ * you're running isosurf, aren't you?
+ */
+ fse->active = NULL;
+ for (i = 0; i < fse->nr_shaders; i++) {
+ if (translate_key_compare( &fse->key, &fse->shader[i].key) == 0)
+ fse->active = &fse->shader[i];
+ }
+
+ if (!fse->active) {
+ assert(0);
+ return ;
+ }
+
+ /* Now set buffer pointers:
+ */
+ for (i = 0; i < num_vs_inputs; i++) {
+ unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index;
+
+ fse->src[i] = ((const ubyte *) draw->pt.user.vbuffer[buf] +
+ draw->pt.vertex_buffer[buf].buffer_offset +
+ draw->pt.vertex_element[i].src_offset);
+
+ fse->pitch[i] = draw->pt.vertex_buffer[buf].pitch;
+
+ }
+
+
+ //return TRUE;
+}
+
+
+
+
+
+
+
+static void fse_run_linear( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count )
+{
+ struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
+ struct draw_context *draw = fse->draw;
+
+ char *hw_verts;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ hw_verts = draw->render->allocate_vertices( draw->render,
+ (ushort)fse->key.output_stride,
+ (ushort)count );
+
+ if (!hw_verts) {
+ assert(0);
+ return;
+ }
+
+ /* Single routine to fetch vertices, run shader and emit HW verts.
+ * Clipping and viewport transformation are done elsewhere --
+ * either by the API or on hardware, or for some other reason not
+ * required...
+ */
+ fse->active->run_linear( fse,
+ start, count,
+ hw_verts );
+
+ /* Draw arrays path to avoid re-emitting index list again and
+ * again.
+ */
+ draw->render->draw_arrays( draw->render,
+ 0,
+ count );
+
+
+ draw->render->release_vertices( draw->render,
+ hw_verts,
+ fse->key.output_stride,
+ count );
+}
+
+
+static void
+fse_run(struct draw_pt_middle_end *middle,
+ const unsigned *fetch_elts,
+ unsigned fetch_count,
+ const ushort *draw_elts,
+ unsigned draw_count )
+{
+ struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
+ struct draw_context *draw = fse->draw;
+ void *hw_verts;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ hw_verts = draw->render->allocate_vertices( draw->render,
+ (ushort)fse->key.output_stride,
+ (ushort)fetch_count );
+ if (!hw_verts) {
+ assert(0);
+ return;
+ }
+
+
+ /* Single routine to fetch vertices, run shader and emit HW verts.
+ */
+ fse->active->run_elts( fse,
+ fetch_elts,
+ fetch_count,
+ hw_verts );
+
+ draw->render->draw( draw->render,
+ draw_elts,
+ draw_count );
+
+ draw->render->release_vertices( draw->render,
+ hw_verts,
+ fse->key.output_stride,
+ fetch_count );
+
+}
+
+
+static void fse_finish( struct draw_pt_middle_end *middle )
+{
+}
+
+
+static void
+fse_destroy( struct draw_pt_middle_end *middle )
+{
+ FREE(middle);
+}
+
+struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw )
+{
+ struct fetch_shade_emit *fse = CALLOC_STRUCT(fetch_shade_emit);
+ if (!fse)
+ return NULL;
+
+ fse->base.prepare = fse_prepare;
+ fse->base.run = fse_run;
+ fse->base.run_linear = fse_run_linear;
+ fse->base.finish = fse_finish;
+ fse->base.destroy = fse_destroy;
+ fse->draw = draw;
+
+ fse->shader[0].run_linear = shader0_run_linear;
+ fse->shader[0].run_elts = shader0_run_elts;
+ fse->shader[0].key.nr_elements = 3;
+ fse->shader[0].key.output_stride = 12 * sizeof(float);
+
+ fse->shader[0].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ fse->shader[0].key.element[0].input_buffer = 0;
+ fse->shader[0].key.element[0].input_offset = 0;
+ fse->shader[0].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ fse->shader[0].key.element[0].output_offset = 0;
+
+ fse->shader[0].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ fse->shader[0].key.element[1].input_buffer = 0;
+ fse->shader[0].key.element[1].input_offset = 0;
+ fse->shader[0].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ fse->shader[0].key.element[1].output_offset = 16;
+
+ fse->shader[0].key.element[1].input_format = PIPE_FORMAT_R32G32_FLOAT;
+ fse->shader[0].key.element[1].input_buffer = 0;
+ fse->shader[0].key.element[1].input_offset = 0;
+ fse->shader[0].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ fse->shader[0].key.element[1].output_offset = 32;
+
+ fse->shader[1].run_linear = shader1_run_linear;
+ fse->shader[1].run_elts = shader1_run_elts;
+ fse->shader[1].key.nr_elements = 2;
+ fse->shader[1].key.output_stride = 8 * sizeof(float);
+
+ fse->shader[1].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ fse->shader[1].key.element[0].input_buffer = 0;
+ fse->shader[1].key.element[0].input_offset = 0;
+ fse->shader[1].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ fse->shader[1].key.element[0].output_offset = 0;
+
+ fse->shader[1].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ fse->shader[1].key.element[1].input_buffer = 0;
+ fse->shader[1].key.element[1].input_offset = 0;
+ fse->shader[1].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ fse->shader[1].key.element[1].output_offset = 16;
+
+ fse->shader[2].run_linear = shader2_run_linear;
+ fse->shader[2].run_elts = shader2_run_elts;
+ fse->shader[2].key.nr_elements = 3;
+ fse->shader[2].key.output_stride = 9 * sizeof(float);
+
+ fse->shader[2].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ fse->shader[2].key.element[0].input_buffer = 0;
+ fse->shader[2].key.element[0].input_offset = 0;
+ fse->shader[2].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ fse->shader[2].key.element[0].output_offset = 0;
+
+ fse->shader[2].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ fse->shader[2].key.element[1].input_buffer = 0;
+ fse->shader[2].key.element[1].input_offset = 0;
+ fse->shader[2].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ fse->shader[2].key.element[1].output_offset = 16;
+
+ /* psize is special
+ * -- effectively add it here as another input!?!
+ * -- who knows how to add it as a buffer?
+ */
+ fse->shader[2].key.element[2].input_format = PIPE_FORMAT_R32_FLOAT;
+ fse->shader[2].key.element[2].input_buffer = 0;
+ fse->shader[2].key.element[2].input_offset = 0;
+ fse->shader[2].key.element[2].output_format = PIPE_FORMAT_R32_FLOAT;
+ fse->shader[2].key.element[2].output_offset = 32;
+
+ fse->nr_shaders = 3;
+
+ return &fse->base;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c
new file mode 100644
index 0000000000..32c8a9632c
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_util.c
@@ -0,0 +1,103 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell
+ */
+
+#include "pipe/p_util.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_pt.h"
+
+void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
+{
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ *first = 1;
+ *incr = 1;
+ break;
+ case PIPE_PRIM_LINES:
+ *first = 2;
+ *incr = 2;
+ break;
+ case PIPE_PRIM_LINE_STRIP:
+ case PIPE_PRIM_LINE_LOOP:
+ *first = 2;
+ *incr = 1;
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ *first = 3;
+ *incr = 3;
+ break;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ case PIPE_PRIM_TRIANGLE_FAN:
+ case PIPE_PRIM_POLYGON:
+ *first = 3;
+ *incr = 1;
+ break;
+ case PIPE_PRIM_QUADS:
+ *first = 4;
+ *incr = 4;
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ *first = 4;
+ *incr = 2;
+ break;
+ default:
+ assert(0);
+ *first = 0;
+ *incr = 1; /* set to one so that count % incr works */
+ break;
+ }
+}
+
+
+unsigned draw_pt_reduced_prim(unsigned prim)
+{
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ return PIPE_PRIM_POINTS;
+ case PIPE_PRIM_LINES:
+ case PIPE_PRIM_LINE_STRIP:
+ case PIPE_PRIM_LINE_LOOP:
+ return PIPE_PRIM_LINES;
+ case PIPE_PRIM_TRIANGLES:
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ case PIPE_PRIM_TRIANGLE_FAN:
+ case PIPE_PRIM_POLYGON:
+ case PIPE_PRIM_QUADS:
+ case PIPE_PRIM_QUAD_STRIP:
+ return PIPE_PRIM_TRIANGLES;
+ default:
+ assert(0);
+ return PIPE_PRIM_POINTS;
+ }
+}
+
+
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
index d92ad4fda1..af6e2d5157 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray.c
+++ b/src/gallium/auxiliary/draw/draw_pt_varray.c
@@ -106,51 +106,6 @@ static INLINE void fetch_init(struct varray_frontend *varray,
}
-static boolean split_prim_inplace(unsigned prim, unsigned *first, unsigned *incr)
-{
- switch (prim) {
- case PIPE_PRIM_POINTS:
- *first = 1;
- *incr = 1;
- return TRUE;
- case PIPE_PRIM_LINES:
- *first = 2;
- *incr = 2;
- return TRUE;
- case PIPE_PRIM_LINE_STRIP:
- *first = 2;
- *incr = 1;
- return TRUE;
- case PIPE_PRIM_TRIANGLES:
- *first = 3;
- *incr = 3;
- return TRUE;
- case PIPE_PRIM_TRIANGLE_STRIP:
- *first = 3;
- *incr = 1;
- return TRUE;
- case PIPE_PRIM_TRIANGLE_FAN:
- *first = 3;
- *incr = 1;
- return TRUE;
- case PIPE_PRIM_QUADS:
- *first = 4;
- *incr = 4;
- return TRUE;
- case PIPE_PRIM_QUAD_STRIP:
- *first = 4;
- *incr = 2;
- return TRUE;
- case PIPE_PRIM_POLYGON:
- *first = 3;
- *incr = 1;
- return TRUE;
- default:
- *first = 0;
- *incr = 1; /* set to one so that count % incr works */
- return FALSE;
- }
-}
static INLINE void add_draw_el(struct varray_frontend *varray,
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
index 1395275897..6979f6b544 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
@@ -15,7 +15,7 @@ static void FUNC(struct draw_pt_front_end *frontend,
varray->fetch_start = start;
- split_prim_inplace(varray->input_prim, &first, &incr);
+ draw_pt_split_prim(varray->input_prim, &first, &incr);
#if 0
debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
index b6f1f0cadc..114ed371a0 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
@@ -16,7 +16,13 @@ static void FUNC(struct draw_pt_front_end *frontend,
varray->fetch_start = start;
- split_prim_inplace(varray->input_prim, &first, &incr);
+ draw_pt_split_prim(varray->input_prim, &first, &incr);
+
+ /* Sanitize primitive length:
+ */
+ count = trim(count, first, incr);
+ if (count < first)
+ return;
#if 0
debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
@@ -32,7 +38,6 @@ static void FUNC(struct draw_pt_front_end *frontend,
case PIPE_PRIM_TRIANGLE_STRIP:
case PIPE_PRIM_QUADS:
case PIPE_PRIM_QUAD_STRIP:
-
for (j = 0; j < count;) {
unsigned remaining = count - j;
unsigned nr = trim( MIN2(FETCH_MAX, remaining), first, incr );
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
index 6b3fb1406b..a3495f2a30 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c
@@ -204,19 +204,6 @@ static void vcache_ef_quad( struct vcache_frontend *vcache,
-static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = {
- PIPE_PRIM_POINTS,
- PIPE_PRIM_LINES,
- PIPE_PRIM_LINES,
- PIPE_PRIM_LINES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLES
-};
-
static void vcache_prepare( struct draw_pt_front_end *frontend,
@@ -236,7 +223,7 @@ static void vcache_prepare( struct draw_pt_front_end *frontend,
}
vcache->input_prim = prim;
- vcache->output_prim = reduced_prim[prim];
+ vcache->output_prim = draw_pt_reduced_prim(prim);
vcache->middle = middle;
middle->prepare( middle, vcache->output_prim, opt );
--
cgit v1.2.3
From 1c624846a81b0218b4a07328f485e295432c6312 Mon Sep 17 00:00:00 2001
From: Zack Rusin
Date: Tue, 13 May 2008 16:06:09 -0400
Subject: decomposition from keith, adds decomposition of more prim to the
pipeline
---
src/gallium/auxiliary/draw/draw_pipe.c | 74 ++++++++----
src/gallium/auxiliary/draw/draw_pt_decompose.h | 153 +++++++++++++++++++++++++
src/gallium/auxiliary/draw/draw_pt_varray.c | 11 +-
src/gallium/auxiliary/draw/draw_pt_vcache.c | 117 +++++++++----------
4 files changed, 263 insertions(+), 92 deletions(-)
create mode 100644 src/gallium/auxiliary/draw/draw_pt_decompose.h
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c
index cb97f955b2..1d26706dee 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.c
+++ b/src/gallium/auxiliary/draw/draw_pipe.c
@@ -212,6 +212,55 @@ void draw_pipeline_run( struct draw_context *draw,
draw->pipeline.vertex_count = 0;
}
+#define QUAD(i0,i1,i2,i3) \
+ do_triangle( draw, \
+ ( DRAW_PIPE_RESET_STIPPLE | \
+ DRAW_PIPE_EDGE_FLAG_0 | \
+ DRAW_PIPE_EDGE_FLAG_2 ), \
+ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (i1), \
+ verts + stride * (i3)); \
+ do_triangle( draw, \
+ ( DRAW_PIPE_EDGE_FLAG_0 | \
+ DRAW_PIPE_EDGE_FLAG_1 ), \
+ verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (i2), \
+ verts + stride * (i3))
+
+#define TRIANGLE(flags,i0,i1,i2) \
+ do_triangle( draw, \
+ flags, /* flags */ \
+ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (i1), \
+ verts + stride * (i2))
+
+#define LINE(flags,i0,i1) \
+ do_line( draw, \
+ flags, \
+ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (i+1))
+
+#define POINT(i0) \
+ do_point( draw, \
+ verts + stride * i0 )
+
+#define FUNC pipe_run_linear
+#define ARGS \
+ struct draw_context *draw, \
+ unsigned prim, \
+ struct vertex_header *vertices, \
+ unsigned stride
+
+#define LOCAL_VARS \
+ char *verts = (char *)vertices; \
+ boolean flatfirst = (draw->rasterizer->flatshade && \
+ draw->rasterizer->flatshade_first); \
+ unsigned i, flags
+
+#define FLUSH
+
+#include "draw_pt_decompose.h"
+
void draw_pipeline_run_linear( struct draw_context *draw,
unsigned prim,
struct vertex_header *vertices,
@@ -219,34 +268,11 @@ void draw_pipeline_run_linear( struct draw_context *draw,
unsigned stride )
{
char *verts = (char *)vertices;
- unsigned i;
-
draw->pipeline.verts = verts;
draw->pipeline.vertex_stride = stride;
draw->pipeline.vertex_count = count;
- switch (prim) {
- case PIPE_PRIM_POINTS:
- for (i = 0; i < count; i++)
- do_point( draw,
- verts + stride * i );
- break;
- case PIPE_PRIM_LINES:
- for (i = 0; i+1 < count; i += 2)
- do_line( draw,
- i+0, /* flags */
- verts + stride * ((i+0) & ~DRAW_PIPE_FLAG_MASK),
- verts + stride * (i+1));
- break;
- case PIPE_PRIM_TRIANGLES:
- for (i = 0; i+2 < count; i += 3)
- do_triangle( draw,
- (i+0), /* flags */
- verts + stride * ((i+0) & ~DRAW_PIPE_FLAG_MASK),
- verts + stride * (i+1),
- verts + stride * (i+2));
- break;
- }
+ pipe_run_linear(draw, prim, vertices, stride, count);
draw->pipeline.verts = NULL;
draw->pipeline.vertex_count = 0;
diff --git a/src/gallium/auxiliary/draw/draw_pt_decompose.h b/src/gallium/auxiliary/draw/draw_pt_decompose.h
new file mode 100644
index 0000000000..dccfde99dd
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_decompose.h
@@ -0,0 +1,153 @@
+
+
+static void FUNC( ARGS,
+ unsigned count )
+{
+ LOCAL_VARS;
+
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ for (i = 0; i < count; i ++) {
+ POINT( (i + 0) );
+ }
+ break;
+
+ case PIPE_PRIM_LINES:
+ for (i = 0; i+1 < count; i += 2) {
+ LINE( DRAW_PIPE_RESET_STIPPLE,
+ (i + 0),
+ (i + 1));
+ }
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ if (count >= 2) {
+ flags = DRAW_PIPE_RESET_STIPPLE;
+
+ for (i = 1; i < count; i++, flags = 0) {
+ LINE( flags,
+ (i - 1),
+ (i ));
+ }
+
+ LINE( flags,
+ (i - 1),
+ (0 ));
+ }
+ break;
+
+ case PIPE_PRIM_LINE_STRIP:
+ flags = DRAW_PIPE_RESET_STIPPLE;
+ for (i = 1; i < count; i++, flags = 0) {
+ LINE( flags,
+ (i - 1),
+ (i ));
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLES:
+ for (i = 0; i+2 < count; i += 3) {
+ TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ (i + 0),
+ (i + 1),
+ (i + 2 ));
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ if (flatfirst) {
+ for (i = 0; i+2 < count; i++) {
+ TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ (i + 0),
+ (i + 1 + (i&1)),
+ (i + 2 - (i&1)));
+ }
+ }
+ else {
+ for (i = 0; i+2 < count; i++) {
+ TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ (i + 0 + (i&1)),
+ (i + 1 - (i&1)),
+ (i + 2 ));
+ }
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ if (count >= 3) {
+ if (flatfirst) {
+ for (i = 0; i+2 < count; i++) {
+ TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ (i + 1),
+ (i + 2),
+ (0 ));
+ }
+ }
+ else {
+ for (i = 0; i+2 < count; i++) {
+ TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ (0),
+ (i + 1),
+ (i + 2 ));
+ }
+ }
+ }
+ break;
+
+
+ case PIPE_PRIM_QUADS:
+ for (i = 0; i+3 < count; i += 4) {
+ QUAD( (i + 0),
+ (i + 1),
+ (i + 2),
+ (i + 3));
+ }
+ break;
+
+ case PIPE_PRIM_QUAD_STRIP:
+ for (i = 0; i+3 < count; i += 2) {
+ QUAD( (i + 2),
+ (i + 0),
+ (i + 1),
+ (i + 3));
+ }
+ break;
+
+ case PIPE_PRIM_POLYGON:
+ {
+ /* These bitflags look a little odd because we submit the
+ * vertices as (1,2,0) to satisfy flatshade requirements.
+ */
+ const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2;
+ const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0;
+ const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1;
+
+ flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
+
+ for (i = 0; i+2 < count; i++, flags = edge_middle) {
+
+ if (i + 3 == count)
+ flags |= edge_last;
+
+ TRIANGLE( flags,
+ (i + 1),
+ (i + 2),
+ (0));
+ }
+ }
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ FLUSH;
+}
+
+
+#undef TRIANGLE
+#undef QUAD
+#undef POINT
+#undef LINE
+#undef FUNC
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
index af6e2d5157..06fd866ccd 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray.c
+++ b/src/gallium/auxiliary/draw/draw_pt_varray.c
@@ -141,14 +141,19 @@ static INLINE void varray_point( struct varray_frontend *varray,
}
-
+#if 0
+#define TRIANGLE(flags,i0,i1,i2) varray_triangle(varray,i0,i1,i2)
+#define LINE(flags,i0,i1) varray_line(varray,i0,i1)
+#define POINT(i0) varray_point(varray,i0)
+#define FUNC varray_decompose
+#include "draw_pt_decompose.h"
+#else
#define TRIANGLE(vc,i0,i1,i2) varray_triangle(vc,i0,i1,i2)
#define LINE(vc,i0,i1) varray_line(vc,i0,i1)
#define POINT(vc,i0) varray_point(vc,i0)
#define FUNC varray_run
#include "draw_pt_varray_tmp_linear.h"
-
-
+#endif
static unsigned decompose_prim[PIPE_PRIM_POLYGON + 1] = {
PIPE_PRIM_POINTS,
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
index a3495f2a30..6c17edba34 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c
@@ -104,22 +104,10 @@ static INLINE void vcache_elt( struct vcache_frontend *vcache,
static void vcache_triangle( struct vcache_frontend *vcache,
+ ushort flags,
unsigned i0,
unsigned i1,
unsigned i2 )
-{
- vcache_elt(vcache, i0, 0);
- vcache_elt(vcache, i1, 0);
- vcache_elt(vcache, i2, 0);
- vcache_check_flush(vcache);
-}
-
-
-static void vcache_triangle_flags( struct vcache_frontend *vcache,
- ushort flags,
- unsigned i0,
- unsigned i1,
- unsigned i2 )
{
vcache_elt(vcache, i0, flags);
vcache_elt(vcache, i1, 0);
@@ -128,19 +116,9 @@ static void vcache_triangle_flags( struct vcache_frontend *vcache,
}
static void vcache_line( struct vcache_frontend *vcache,
+ ushort flags,
unsigned i0,
unsigned i1 )
-{
- vcache_elt(vcache, i0, 0);
- vcache_elt(vcache, i1, 0);
- vcache_check_flush(vcache);
-}
-
-
-static void vcache_line_flags( struct vcache_frontend *vcache,
- ushort flags,
- unsigned i0,
- unsigned i1 )
{
vcache_elt(vcache, i0, flags);
vcache_elt(vcache, i1, 0);
@@ -161,46 +139,63 @@ static void vcache_quad( struct vcache_frontend *vcache,
unsigned i2,
unsigned i3 )
{
- vcache_triangle( vcache, i0, i1, i3 );
- vcache_triangle( vcache, i1, i2, i3 );
-}
-
-static void vcache_ef_quad( struct vcache_frontend *vcache,
- unsigned i0,
- unsigned i1,
- unsigned i2,
- unsigned i3 )
-{
- const unsigned omitEdge1 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2;
- const unsigned omitEdge2 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1;
-
- vcache_triangle_flags( vcache,
- DRAW_PIPE_RESET_STIPPLE | omitEdge1,
- i0, i1, i3 );
-
- vcache_triangle_flags( vcache,
- omitEdge2,
- i1, i2, i3 );
+ vcache_triangle( vcache,
+ ( DRAW_PIPE_RESET_STIPPLE |
+ DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_2 ),
+ i0, i1, i3 );
+
+ vcache_triangle( vcache,
+ ( DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_1 ),
+ i1, i2, i3 );
}
/* At least for now, we're back to using a template include file for
* this. The two paths aren't too different though - it may be
* possible to reunify them.
*/
-#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle_flags(vc,flags,i0,i1,i2)
-#define QUAD(vc,i0,i1,i2,i3) vcache_ef_quad(vc,i0,i1,i2,i3)
-#define LINE(vc,flags,i0,i1) vcache_line_flags(vc,flags,i0,i1)
-#define POINT(vc,i0) vcache_point(vc,i0)
-#define FUNC vcache_run_extras
-#include "draw_pt_vcache_tmp.h"
-
-#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle(vc,i0,i1,i2)
-#define QUAD(vc,i0,i1,i2,i3) vcache_quad(vc,i0,i1,i2,i3)
-#define LINE(vc,flags,i0,i1) vcache_line(vc,i0,i1)
-#define POINT(vc,i0) vcache_point(vc,i0)
+#define TRIANGLE(flags,i0,i1,i2) \
+ vcache_triangle(vcache, \
+ flags, \
+ get_elt(elts,i0), \
+ get_elt(elts,i1), \
+ get_elt(elts,i2))
+
+#define QUAD(i0,i1,i2,i3) \
+ vcache_quad(vcache, \
+ get_elt(elts,i0), \
+ get_elt(elts,i1), \
+ get_elt(elts,i2), \
+ get_elt(elts,i3))
+
+#define LINE(flags,i0,i1) \
+ vcache_line(vcache, \
+ flags, \
+ get_elt(elts,i0), \
+ get_elt(elts,i1))
+
+#define POINT(i0) \
+ vcache_point(vcache, \
+ get_elt(elts,i0))
+
#define FUNC vcache_run
-#include "draw_pt_vcache_tmp.h"
+#define ARGS \
+ struct draw_pt_front_end *frontend, \
+ pt_elt_func get_elt, \
+ const void *elts
+
+#define LOCAL_VARS \
+ struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; \
+ struct draw_context *draw = vcache->draw; \
+ boolean flatfirst = (draw->rasterizer->flatshade && \
+ draw->rasterizer->flatshade_first); \
+ unsigned prim = vcache->input_prim; \
+ unsigned i, flags;
+#define FLUSH vcache_flush( vcache )
+
+#include "draw_pt_decompose.h"
@@ -213,15 +208,7 @@ static void vcache_prepare( struct draw_pt_front_end *frontend,
{
struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
- if (opt & PT_PIPELINE)
- {
- vcache->base.run = vcache_run_extras;
- }
- else
- {
- vcache->base.run = vcache_run;
- }
-
+ vcache->base.run = vcache_run;
vcache->input_prim = prim;
vcache->output_prim = draw_pt_reduced_prim(prim);
--
cgit v1.2.3
From 8b25b5256fad23e8ea11c6931ecac658ca60c0b0 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Tue, 13 May 2008 09:46:53 +0100
Subject: draw: remove disabled non-sse swizzle code
---
src/gallium/auxiliary/draw/draw_vs_sse.c | 50 +-------------------------------
1 file changed, 1 insertion(+), 49 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index e3f4e67472..edf235cddc 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -47,9 +47,7 @@
#include "tgsi/util/tgsi_parse.h"
#define SSE_MAX_VERTICES 4
-#define SSE_SWIZZLES 1
-#if SSE_SWIZZLES
typedef void (XSTDCALL *codegen_function) (
const struct tgsi_exec_vector *input, /* 1 */
struct tgsi_exec_vector *output, /* 2 */
@@ -62,14 +60,6 @@ typedef void (XSTDCALL *codegen_function) (
float (*aos_output)[4], /* 9 */
uint num_outputs, /* 10 */
uint output_stride ); /* 11 */
-#else
-typedef void (XSTDCALL *codegen_function) (
- const struct tgsi_exec_vector *input,
- struct tgsi_exec_vector *output,
- float (*constant)[4],
- struct tgsi_exec_vector *temporary,
- float (*immediates)[4] );
-#endif
struct draw_sse_vertex_shader {
struct draw_vertex_shader base;
@@ -111,7 +101,6 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
-#if SSE_SWIZZLES
/* run compiled shader
*/
shader->func(machine->Inputs,
@@ -128,43 +117,6 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
input = (const float (*)[4])((const char *)input + input_stride * max_vertices);
output = (float (*)[4])((char *)output + output_stride * max_vertices);
-#else
- unsigned int j, slot;
-
- /* Swizzle inputs.
- */
- for (j = 0; j < max_vertices; j++) {
- for (slot = 0; slot < base->info.num_inputs; slot++) {
- machine->Inputs[slot].xyzw[0].f[j] = input[slot][0];
- machine->Inputs[slot].xyzw[1].f[j] = input[slot][1];
- machine->Inputs[slot].xyzw[2].f[j] = input[slot][2];
- machine->Inputs[slot].xyzw[3].f[j] = input[slot][3];
- }
-
- input = (const float (*)[4])((const char *)input + input_stride);
- }
-
- /* run compiled shader
- */
- shader->func(machine->Inputs,
- machine->Outputs,
- (float (*)[4])constants,
- machine->Temps,
- shader->immediates);
-
- /* Unswizzle all output results.
- */
- for (j = 0; j < max_vertices; j++) {
- for (slot = 0; slot < base->info.num_outputs; slot++) {
- output[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
- output[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
- output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
- output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
- }
-
- output = (float (*)[4])((char *)output + output_stride);
- }
-#endif
}
}
@@ -211,7 +163,7 @@ draw_create_vs_sse(struct draw_context *draw,
x86_init_func( &vs->sse2_program );
if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens,
- &vs->sse2_program, vs->immediates, SSE_SWIZZLES ))
+ &vs->sse2_program, vs->immediates, TRUE ))
goto fail;
vs->func = (codegen_function) x86_get_func( &vs->sse2_program );
--
cgit v1.2.3
From b23706454bb165a62888d264e95a98a2e4cf139c Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Tue, 13 May 2008 13:35:14 +0100
Subject: draw: get rid of fetch-shade-emit frontend hack
The code is now living in it's intended place as a pt middle end.
---
src/gallium/auxiliary/draw/Makefile | 1 -
src/gallium/auxiliary/draw/draw_private.h | 1 -
src/gallium/auxiliary/draw/draw_pt.c | 14 -
src/gallium/auxiliary/draw/draw_pt.h | 1 -
.../auxiliary/draw/draw_pt_fetch_shade_emit.c | 677 ---------------------
5 files changed, 694 deletions(-)
delete mode 100644 src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile
index 67d78bdbbd..3053682da8 100644
--- a/src/gallium/auxiliary/draw/Makefile
+++ b/src/gallium/auxiliary/draw/Makefile
@@ -26,7 +26,6 @@ C_SOURCES = \
draw_pt_emit.c \
draw_pt_fetch.c \
draw_pt_fetch_emit.c \
- draw_pt_fetch_shade_emit.c \
draw_pt_middle_fse.c \
draw_pt_fetch_shade_pipeline.c \
draw_pt_post_vs.c \
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 86b901a3c8..fd51a57781 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -131,7 +131,6 @@ struct draw_context
struct {
struct draw_pt_front_end *vcache;
struct draw_pt_front_end *varray;
- struct draw_pt_front_end *fetch_shade_emit; /* temp hack */
} front;
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 91e35db819..75f44d503e 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -85,11 +85,6 @@ draw_pt_arrays(struct draw_context *draw,
*/
if (draw->pt.user.elts || (opt & PT_PIPELINE)) {
frontend = draw->pt.front.vcache;
-#if 0
- } else if (opt == PT_SHADE && draw->pt.test_fse) {
- /* should be a middle end.. */
- frontend = draw->pt.front.fetch_shade_emit;
-#endif
} else {
frontend = draw->pt.front.varray;
}
@@ -127,10 +122,6 @@ boolean draw_pt_init( struct draw_context *draw )
draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw );
if (!draw->pt.middle.fetch_shade_emit)
return FALSE;
-
- draw->pt.front.fetch_shade_emit = draw_pt_fetch_shade_emit( draw );
- if (!draw->pt.front.fetch_shade_emit)
- return FALSE;
}
@@ -159,11 +150,6 @@ void draw_pt_destroy( struct draw_context *draw )
draw->pt.middle.fetch_shade_emit = NULL;
}
- if (draw->pt.front.fetch_shade_emit) {
- draw->pt.front.fetch_shade_emit->destroy( draw->pt.front.fetch_shade_emit );
- draw->pt.front.fetch_shade_emit = NULL;
- }
-
if (draw->pt.front.vcache) {
draw->pt.front.vcache->destroy( draw->pt.front.vcache );
draw->pt.front.vcache = NULL;
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index cdae46b8d2..e03816ebbc 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -121,7 +121,6 @@ const void *draw_pt_elt_ptr( struct draw_context *draw,
struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw );
struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw);
-struct draw_pt_front_end *draw_pt_fetch_shade_emit( struct draw_context *draw );
/* Middle-ends:
*
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
deleted file mode 100644
index f756d3e0bb..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ /dev/null
@@ -1,677 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell
- */
-
-
-#include "pipe/p_util.h"
-#include "draw/draw_context.h"
-#include "draw/draw_private.h"
-#include "draw/draw_vbuf.h"
-#include "draw/draw_vertex.h"
-#include "draw/draw_pt.h"
-#include "draw/draw_vs.h"
-
-#include "translate/translate.h"
-
-struct fetch_shade_emit;
-
-struct fse_shader {
- struct translate_key key;
-
- void (*run_linear)( const struct fetch_shade_emit *fse,
- unsigned start,
- unsigned count,
- char *buffer );
-};
-
-/* Prototype fetch, shade, emit-hw-verts all in one go.
- */
-struct fetch_shade_emit {
- struct draw_pt_front_end base;
-
- struct draw_context *draw;
-
- struct translate_key key;
-
- /* Temporaries:
- */
- const float *constants;
- unsigned pitch[PIPE_MAX_ATTRIBS];
- const ubyte *src[PIPE_MAX_ATTRIBS];
- unsigned prim;
-
- /* Points to one of the three hardwired example shaders, below:
- */
- struct fse_shader *active;
-
- /* Temporary: A list of hard-wired shaders. Of course the plan
- * would be to generate these for a given (vertex-shader,
- * translate-key) pair...
- */
- struct fse_shader shader[10];
- int nr_shaders;
-};
-
-
-
-/* Not quite passthrough yet -- we're still running the 'shader' here,
- * inlined into the vertex fetch function.
- */
-static void fetch_xyz_rgb_st( const struct fetch_shade_emit *fse,
- unsigned start,
- unsigned count,
- char *buffer )
-{
- unsigned i;
-
- const float *m = fse->constants;
- const float m0 = m[0], m4 = m[4], m8 = m[8], m12 = m[12];
- const float m1 = m[1], m5 = m[5], m9 = m[9], m13 = m[13];
- const float m2 = m[2], m6 = m[6], m10 = m[10], m14 = m[14];
- const float m3 = m[3], m7 = m[7], m11 = m[11], m15 = m[15];
-
- const ubyte *xyz = fse->src[0] + start * fse->pitch[0];
- const ubyte *st = fse->src[2] + start * fse->pitch[2];
-
- float *out = (float *)buffer;
-
-
- assert(fse->pitch[1] == 0);
-
- /* loop over vertex attributes (vertex shader inputs)
- */
- for (i = 0; i < count; i++) {
- {
- const float *in = (const float *)xyz;
- const float ix = in[0], iy = in[1], iz = in[2];
-
- out[0] = m0 * ix + m4 * iy + m8 * iz + m12;
- out[1] = m1 * ix + m5 * iy + m9 * iz + m13;
- out[2] = m2 * ix + m6 * iy + m10 * iz + m14;
- out[3] = m3 * ix + m7 * iy + m11 * iz + m15;
- xyz += fse->pitch[0];
- }
-
- {
- out[4] = 1.0f;
- out[5] = 1.0f;
- out[6] = 1.0f;
- out[7] = 1.0f;
- }
-
- {
- const float *in = (const float *)st; st += fse->pitch[2];
- out[8] = in[0];
- out[9] = in[1];
- out[10] = 0.0f;
- out[11] = 1.0f;
- }
-
- out += 12;
- }
-}
-
-
-
-static void fetch_xyz_rgb( const struct fetch_shade_emit *fse,
- unsigned start,
- unsigned count,
- char *buffer )
-{
- unsigned i;
-
- const float *m = (const float *)fse->constants;
- const float m0 = m[0], m4 = m[4], m8 = m[8], m12 = m[12];
- const float m1 = m[1], m5 = m[5], m9 = m[9], m13 = m[13];
- const float m2 = m[2], m6 = m[6], m10 = m[10], m14 = m[14];
- const float m3 = m[3], m7 = m[7], m11 = m[11], m15 = m[15];
-
- const ubyte *xyz = fse->src[0] + start * fse->pitch[0];
- const ubyte *rgb = fse->src[1] + start * fse->pitch[1];
-
- float *out = (float *)buffer;
-
-// debug_printf("rgb %f %f %f\n", rgb[0], rgb[1], rgb[2]);
-
-
- for (i = 0; i < count; i++) {
- {
- const float *in = (const float *)xyz;
- const float ix = in[0], iy = in[1], iz = in[2];
-
- out[0] = m0 * ix + m4 * iy + m8 * iz + m12;
- out[1] = m1 * ix + m5 * iy + m9 * iz + m13;
- out[2] = m2 * ix + m6 * iy + m10 * iz + m14;
- out[3] = m3 * ix + m7 * iy + m11 * iz + m15;
- xyz += fse->pitch[0];
- }
-
- {
- const float *in = (const float *)rgb;
- out[4] = in[0];
- out[5] = in[1];
- out[6] = in[2];
- out[7] = 1.0f;
- rgb += fse->pitch[1];
- }
-
- out += 8;
- }
-}
-
-
-
-
-static void fetch_xyz_rgb_psiz( const struct fetch_shade_emit *fse,
- unsigned start,
- unsigned count,
- char *buffer )
-{
- unsigned i;
-
- const float *m = (const float *)fse->constants;
- const float m0 = m[0], m4 = m[4], m8 = m[8], m12 = m[12];
- const float m1 = m[1], m5 = m[5], m9 = m[9], m13 = m[13];
- const float m2 = m[2], m6 = m[6], m10 = m[10], m14 = m[14];
- const float m3 = m[3], m7 = m[7], m11 = m[11], m15 = m[15];
-
- const ubyte *xyz = fse->src[0] + start * fse->pitch[0];
- const float *rgb = (const float *)(fse->src[1] + start * fse->pitch[1]);
- const float psiz = 1.0;
-
- float *out = (float *)buffer;
-
-
- assert(fse->pitch[1] == 0);
-
- for (i = 0; i < count; i++) {
- {
- const float *in = (const float *)xyz;
- const float ix = in[0], iy = in[1], iz = in[2];
-
- out[0] = m0 * ix + m4 * iy + m8 * iz + m12;
- out[1] = m1 * ix + m5 * iy + m9 * iz + m13;
- out[2] = m2 * ix + m6 * iy + m10 * iz + m14;
- out[3] = m3 * ix + m7 * iy + m11 * iz + m15;
- xyz += fse->pitch[0];
- }
-
- {
- out[4] = rgb[0];
- out[5] = rgb[1];
- out[6] = rgb[2];
- out[7] = 1.0f;
- }
-
- {
- out[8] = psiz;
- }
-
- out += 9;
- }
-}
-
-
-
-
-static boolean set_prim( struct fetch_shade_emit *fse,
- unsigned prim,
- unsigned count )
-{
- struct draw_context *draw = fse->draw;
-
- fse->prim = prim;
-
- switch (prim) {
- case PIPE_PRIM_LINE_LOOP:
- if (count > 1024)
- return FALSE;
- draw->render->set_primitive( draw->render, PIPE_PRIM_LINE_STRIP );
- break;
-
- case PIPE_PRIM_TRIANGLE_FAN:
- case PIPE_PRIM_POLYGON:
- if (count > 1024)
- return FALSE;
- draw->render->set_primitive( draw->render, prim );
- break;
-
- case PIPE_PRIM_QUADS:
- case PIPE_PRIM_QUAD_STRIP:
- draw->render->set_primitive( draw->render, PIPE_PRIM_TRIANGLES );
- break;
-
- default:
- draw->render->set_primitive( draw->render, prim );
- break;
- }
-
- return TRUE;
-}
-
-
-
-
-
-
-static void fse_prepare( struct draw_pt_front_end *fe,
- unsigned prim,
- struct draw_pt_middle_end *unused,
- unsigned opt )
-{
- struct fetch_shade_emit *fse = (struct fetch_shade_emit *)fe;
- struct draw_context *draw = fse->draw;
- unsigned num_vs_inputs = draw->vertex_shader->info.num_inputs;
- unsigned num_vs_outputs = draw->vertex_shader->info.num_outputs;
- const struct vertex_info *vinfo;
- unsigned i;
- boolean need_psize = 0;
-
-
- if (draw->pt.user.elts) {
- assert(0);
- return ;
- }
-
- if (!set_prim(fse, prim, /*count*/1022 )) {
- assert(0);
- return ;
- }
-
- /* Must do this after set_primitive() above:
- */
- vinfo = draw->render->get_vertex_info(draw->render);
-
-
-
- fse->key.nr_elements = MAX2(num_vs_outputs, /* outputs - translate to hw format */
- num_vs_inputs); /* inputs - fetch from api format */
-
- fse->key.output_stride = vinfo->size * 4;
- memset(fse->key.element, 0,
- fse->key.nr_elements * sizeof(fse->key.element[0]));
-
- for (i = 0; i < num_vs_inputs; i++) {
- const struct pipe_vertex_element *src = &draw->pt.vertex_element[i];
- fse->key.element[i].input_format = src->src_format;
-
- /* Consider ignoring these at this point, ie make generated
- * programs independent of this state:
- */
- fse->key.element[i].input_buffer = 0; //src->vertex_buffer_index;
- fse->key.element[i].input_offset = 0; //src->src_offset;
- }
-
-
- {
- unsigned dst_offset = 0;
-
- for (i = 0; i < vinfo->num_attribs; i++) {
- unsigned emit_sz = 0;
- unsigned output_format = PIPE_FORMAT_NONE;
- unsigned vs_output = vinfo->src_index[i];
-
- switch (vinfo->emit[i]) {
- case EMIT_4F:
- output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- emit_sz = 4 * sizeof(float);
- break;
- case EMIT_3F:
- output_format = PIPE_FORMAT_R32G32B32_FLOAT;
- emit_sz = 3 * sizeof(float);
- break;
- case EMIT_2F:
- output_format = PIPE_FORMAT_R32G32_FLOAT;
- emit_sz = 2 * sizeof(float);
- break;
- case EMIT_1F:
- output_format = PIPE_FORMAT_R32_FLOAT;
- emit_sz = 1 * sizeof(float);
- break;
- case EMIT_1F_PSIZE:
- need_psize = 1;
- output_format = PIPE_FORMAT_R32_FLOAT;
- emit_sz = 1 * sizeof(float);
- vs_output = num_vs_outputs + 1;
-
- break;
- default:
- assert(0);
- break;
- }
-
- /* The elements in the key correspond to vertex shader output
- * numbers, not to positions in the hw vertex description --
- * that's handled by the output_offset field.
- */
- fse->key.element[vs_output].output_format = output_format;
- fse->key.element[vs_output].output_offset = dst_offset;
-
- dst_offset += emit_sz;
- assert(fse->key.output_stride >= dst_offset);
- }
- }
-
- /* To make psize work, really need to tell the vertex shader to
- * copy that value from input->output. For 'translate' this was
- * implicit for all elements.
- */
-#if 0
- if (need_psize) {
- unsigned input = num_vs_inputs + 1;
- const struct pipe_vertex_element *src = &draw->pt.vertex_element[i];
- fse->key.element[i].input_format = PIPE_FORMAT_R32_FLOAT;
- fse->key.element[i].input_buffer = 0; //nr_buffers + 1;
- fse->key.element[i].input_offset = 0;
-
- fse->key.nr_elements += 1;
-
- }
-#endif
-
- fse->constants = draw->pt.user.constants;
-
- /* Would normally look up a vertex shader and peruse its list of
- * varients somehow. We omitted that step and put all the
- * hardcoded "shaders" into an array. We're just making the
- * assumption that this happens to be a matching shader... ie
- * you're running isosurf, aren't you?
- */
- fse->active = NULL;
- for (i = 0; i < fse->nr_shaders; i++) {
- if (translate_key_compare( &fse->key, &fse->shader[i].key) == 0)
- fse->active = &fse->shader[i];
- }
-
- if (!fse->active) {
- assert(0);
- return ;
- }
-
- /* Now set buffer pointers:
- */
- for (i = 0; i < num_vs_inputs; i++) {
- unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index;
-
- fse->src[i] = ((const ubyte *) draw->pt.user.vbuffer[buf] +
- draw->pt.vertex_buffer[buf].buffer_offset +
- draw->pt.vertex_element[i].src_offset);
-
- fse->pitch[i] = draw->pt.vertex_buffer[buf].pitch;
-
- }
-
-
- //return TRUE;
-}
-
-
-
-
-
-
-#define INDEX(i) (start + (i))
-static void fse_render_linear( struct vbuf_render *render,
- unsigned prim,
- unsigned start,
- unsigned length )
-{
- ushort *tmp = NULL;
- unsigned i, j;
-
- switch (prim) {
- case PIPE_PRIM_LINE_LOOP:
- tmp = MALLOC( sizeof(ushort) * (length + 1) );
-
- for (i = 0; i < length; i++)
- tmp[i] = INDEX(i);
- tmp[length] = 0;
-
- render->draw( render,
- tmp,
- length+1 );
- break;
-
-
- case PIPE_PRIM_QUAD_STRIP:
- tmp = MALLOC( sizeof(ushort) * (length / 2 * 6) );
-
- for (j = i = 0; i + 3 < length; i += 2, j += 6) {
- tmp[j+0] = INDEX(i+0);
- tmp[j+1] = INDEX(i+1);
- tmp[j+2] = INDEX(i+3);
-
- tmp[j+3] = INDEX(i+2);
- tmp[j+4] = INDEX(i+0);
- tmp[j+5] = INDEX(i+3);
- }
-
- if (j)
- render->draw( render, tmp, j );
- break;
-
- case PIPE_PRIM_QUADS:
- tmp = MALLOC( sizeof(int) * (length / 4 * 6) );
-
- for (j = i = 0; i + 3 < length; i += 4, j += 6) {
- tmp[j+0] = INDEX(i+0);
- tmp[j+1] = INDEX(i+1);
- tmp[j+2] = INDEX(i+3);
-
- tmp[j+3] = INDEX(i+1);
- tmp[j+4] = INDEX(i+2);
- tmp[j+5] = INDEX(i+3);
- }
-
- if (j)
- render->draw( render, tmp, j );
- break;
-
- default:
- render->draw_arrays( render,
- start,
- length );
- break;
- }
-
- if (tmp)
- FREE(tmp);
-}
-
-
-
-static boolean do_draw( struct fetch_shade_emit *fse,
- unsigned start, unsigned count )
-{
- struct draw_context *draw = fse->draw;
-
- char *hw_verts =
- draw->render->allocate_vertices( draw->render,
- (ushort)fse->key.output_stride,
- (ushort)count );
-
- if (!hw_verts)
- return FALSE;
-
- /* Single routine to fetch vertices, run shader and emit HW verts.
- * Clipping and viewport transformation are done on hardware.
- */
- fse->active->run_linear( fse,
- start, count,
- hw_verts );
-
- /* Draw arrays path to avoid re-emitting index list again and
- * again.
- */
- fse_render_linear( draw->render,
- fse->prim,
- 0,
- count );
-
-
- draw->render->release_vertices( draw->render,
- hw_verts,
- fse->key.output_stride,
- count );
-
- return TRUE;
-}
-
-
-static void
-fse_run(struct draw_pt_front_end *fe,
- pt_elt_func elt_func,
- const void *elt_ptr,
- unsigned count)
-{
- struct fetch_shade_emit *fse = (struct fetch_shade_emit *)fe;
- unsigned i = 0;
- unsigned first, incr;
- unsigned start = elt_func(elt_ptr, 0);
-
- //debug_printf("%s prim %d start %d count %d\n", __FUNCTION__, prim, start, count);
-
- draw_pt_split_prim(fse->prim, &first, &incr);
-
- count -= (count - first) % incr;
-
- while (i + first <= count) {
- int nr = MIN2( count - i, 1024 );
-
- /* snap to prim boundary
- */
- nr -= (nr - first) % incr;
-
- if (!do_draw( fse, start + i, nr )) {
- assert(0);
- return ;
- }
-
- /* increment allowing for repeated vertices
- */
- i += nr - (first - incr);
- }
-
- //return TRUE;
-}
-
-
-static void fse_finish( struct draw_pt_front_end *frontend )
-{
-}
-
-
-static void
-fse_destroy( struct draw_pt_front_end *frontend )
-{
- FREE(frontend);
-}
-
-struct draw_pt_front_end *draw_pt_fetch_shade_emit( struct draw_context *draw )
-{
- struct fetch_shade_emit *fse = CALLOC_STRUCT(fetch_shade_emit);
- if (!fse)
- return NULL;
-
- fse->base.prepare = fse_prepare;
- fse->base.run = fse_run;
- fse->base.finish = fse_finish;
- fse->base.destroy = fse_destroy;
- fse->draw = draw;
-
- fse->shader[0].run_linear = fetch_xyz_rgb_st;
- fse->shader[0].key.nr_elements = 3;
- fse->shader[0].key.output_stride = 12 * sizeof(float);
-
- fse->shader[0].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT;
- fse->shader[0].key.element[0].input_buffer = 0;
- fse->shader[0].key.element[0].input_offset = 0;
- fse->shader[0].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- fse->shader[0].key.element[0].output_offset = 0;
-
- fse->shader[0].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT;
- fse->shader[0].key.element[1].input_buffer = 0;
- fse->shader[0].key.element[1].input_offset = 0;
- fse->shader[0].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- fse->shader[0].key.element[1].output_offset = 16;
-
- fse->shader[0].key.element[1].input_format = PIPE_FORMAT_R32G32_FLOAT;
- fse->shader[0].key.element[1].input_buffer = 0;
- fse->shader[0].key.element[1].input_offset = 0;
- fse->shader[0].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- fse->shader[0].key.element[1].output_offset = 32;
-
- fse->shader[1].run_linear = fetch_xyz_rgb;
- fse->shader[1].key.nr_elements = 2;
- fse->shader[1].key.output_stride = 8 * sizeof(float);
-
- fse->shader[1].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT;
- fse->shader[1].key.element[0].input_buffer = 0;
- fse->shader[1].key.element[0].input_offset = 0;
- fse->shader[1].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- fse->shader[1].key.element[0].output_offset = 0;
-
- fse->shader[1].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT;
- fse->shader[1].key.element[1].input_buffer = 0;
- fse->shader[1].key.element[1].input_offset = 0;
- fse->shader[1].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- fse->shader[1].key.element[1].output_offset = 16;
-
- fse->shader[2].run_linear = fetch_xyz_rgb_psiz;
- fse->shader[2].key.nr_elements = 3;
- fse->shader[2].key.output_stride = 9 * sizeof(float);
-
- fse->shader[2].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT;
- fse->shader[2].key.element[0].input_buffer = 0;
- fse->shader[2].key.element[0].input_offset = 0;
- fse->shader[2].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- fse->shader[2].key.element[0].output_offset = 0;
-
- fse->shader[2].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT;
- fse->shader[2].key.element[1].input_buffer = 0;
- fse->shader[2].key.element[1].input_offset = 0;
- fse->shader[2].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- fse->shader[2].key.element[1].output_offset = 16;
-
- /* psize is special
- * -- effectively add it here as another input!?!
- * -- who knows how to add it as a buffer?
- */
- fse->shader[2].key.element[2].input_format = PIPE_FORMAT_R32_FLOAT;
- fse->shader[2].key.element[2].input_buffer = 0;
- fse->shader[2].key.element[2].input_offset = 0;
- fse->shader[2].key.element[2].output_format = PIPE_FORMAT_R32_FLOAT;
- fse->shader[2].key.element[2].output_offset = 32;
-
- fse->nr_shaders = 3;
-
- return &fse->base;
-}
--
cgit v1.2.3
From 2f0d1396e4c1626b3b1ac799bd29e86a9530369e Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Tue, 13 May 2008 13:40:22 +0100
Subject: draw: move some state into a new 'vs' area
---
src/gallium/auxiliary/draw/draw_context.c | 21 ++++---------
src/gallium/auxiliary/draw/draw_pipe.h | 2 +-
src/gallium/auxiliary/draw/draw_pipe_aaline.c | 2 +-
src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 4 +--
src/gallium/auxiliary/draw/draw_pipe_clip.c | 6 ++--
src/gallium/auxiliary/draw/draw_pipe_flatshade.c | 2 +-
src/gallium/auxiliary/draw/draw_pipe_stipple.c | 2 +-
src/gallium/auxiliary/draw/draw_pipe_twoside.c | 2 +-
src/gallium/auxiliary/draw/draw_pipe_wide_point.c | 4 +--
src/gallium/auxiliary/draw/draw_private.h | 28 +++++++++++------
.../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 6 ++--
src/gallium/auxiliary/draw/draw_pt_middle_fse.c | 4 +--
src/gallium/auxiliary/draw/draw_vs.c | 35 +++++++++++++++++++---
src/gallium/auxiliary/draw/draw_vs_exec.c | 2 +-
src/gallium/auxiliary/draw/draw_vs_sse.c | 2 +-
15 files changed, 75 insertions(+), 47 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 98e23fa830..2242074965 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -56,12 +56,6 @@ struct draw_context *draw_create( void )
draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */
- tgsi_exec_machine_init(&draw->machine);
-
- /* FIXME: give this machine thing a proper constructor:
- */
- draw->machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
- draw->machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
if (!draw_pipeline_init( draw ))
goto fail;
@@ -69,6 +63,9 @@ struct draw_context *draw_create( void )
if (!draw_pt_init( draw ))
goto fail;
+ if (!draw_vs_init( draw ))
+ goto fail;
+
return draw;
fail:
@@ -83,13 +80,6 @@ void draw_destroy( struct draw_context *draw )
return;
- if (draw->machine.Inputs)
- align_free(draw->machine.Inputs);
-
- if (draw->machine.Outputs)
- align_free(draw->machine.Outputs);
-
- tgsi_exec_machine_free_data(&draw->machine);
/* Not so fast -- we're just borrowing this at the moment.
*
@@ -99,6 +89,7 @@ void draw_destroy( struct draw_context *draw )
draw_pipeline_destroy( draw );
draw_pt_destroy( draw );
+ draw_vs_destroy( draw );
FREE( draw );
}
@@ -295,7 +286,7 @@ int
draw_find_vs_output(struct draw_context *draw,
uint semantic_name, uint semantic_index)
{
- const struct draw_vertex_shader *vs = draw->vertex_shader;
+ const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
uint i;
for (i = 0; i < vs->info.num_outputs; i++) {
if (vs->info.output_semantic_name[i] == semantic_name &&
@@ -320,7 +311,7 @@ draw_find_vs_output(struct draw_context *draw,
uint
draw_num_vs_outputs(struct draw_context *draw)
{
- uint count = draw->vertex_shader->info.num_outputs;
+ uint count = draw->vs.vertex_shader->info.num_outputs;
if (draw->extra_vp_outputs.slot > 0)
count++;
return count;
diff --git a/src/gallium/auxiliary/draw/draw_pipe.h b/src/gallium/auxiliary/draw/draw_pipe.h
index f1cb0891ca..dbad8f98ac 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.h
+++ b/src/gallium/auxiliary/draw/draw_pipe.h
@@ -116,7 +116,7 @@ dup_vert( struct draw_stage *stage,
{
struct vertex_header *tmp = stage->tmp[idx];
const uint vsize = sizeof(struct vertex_header)
- + stage->draw->num_vs_outputs * 4 * sizeof(float);
+ + stage->draw->vs.num_vs_outputs * 4 * sizeof(float);
memcpy(tmp, vert, vsize);
tmp->vertex_id = UNDEFINED_VERTEX_ID;
return tmp;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index f501b2aed4..d93708ad3c 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -651,7 +651,7 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
}
/* update vertex attrib info */
- aaline->tex_slot = draw->num_vs_outputs;
+ aaline->tex_slot = draw->vs.num_vs_outputs;
assert(aaline->tex_slot > 0); /* output[0] is vertex pos */
/* advertise the extra post-transformed vertex attribute */
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index 122a48660a..97d74ad693 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -681,7 +681,7 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
bind_aapoint_fragment_shader(aapoint);
/* update vertex attrib info */
- aapoint->tex_slot = draw->num_vs_outputs;
+ aapoint->tex_slot = draw->vs.num_vs_outputs;
assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */
draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
@@ -692,7 +692,7 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
aapoint->psize_slot = -1;
if (draw->rasterizer->point_size_per_vertex) {
/* find PSIZ vertex output */
- const struct draw_vertex_shader *vs = draw->vertex_shader;
+ const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
uint i;
for (i = 0; i < vs->info.num_outputs; i++) {
if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index ce80c94163..c11ed934a4 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -112,7 +112,7 @@ static void interp( const struct clipper *clip,
const struct vertex_header *out,
const struct vertex_header *in )
{
- const unsigned nr_attrs = clip->stage.draw->num_vs_outputs;
+ const unsigned nr_attrs = clip->stage.draw->vs.num_vs_outputs;
unsigned j;
/* Vertex header.
@@ -180,7 +180,7 @@ static void emit_poly( struct draw_stage *stage,
header.flags |= edge_last;
if (0) {
- const struct draw_vertex_shader *vs = stage->draw->vertex_shader;
+ const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader;
uint j, k;
debug_printf("Clipped tri:\n");
for (j = 0; j < 3; j++) {
@@ -425,7 +425,7 @@ clip_init_state( struct draw_stage *stage )
clipper->flat = stage->draw->rasterizer->flatshade ? TRUE : FALSE;
if (clipper->flat) {
- const struct draw_vertex_shader *vs = stage->draw->vertex_shader;
+ const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader;
uint i;
clipper->num_color_attribs = 0;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
index 09b68c4559..21a9c3b77f 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
@@ -159,7 +159,7 @@ static void flatshade_line_1( struct draw_stage *stage,
static void flatshade_init_state( struct draw_stage *stage )
{
struct flat_stage *flat = flat_stage(stage);
- const struct draw_vertex_shader *vs = stage->draw->vertex_shader;
+ const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader;
uint i;
/* Find which vertex shader outputs are colors, make a list */
diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
index 3cbced362e..4673d5dcba 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
@@ -71,7 +71,7 @@ screen_interp( struct draw_context *draw,
const struct vertex_header *v1 )
{
uint attr;
- for (attr = 0; attr < draw->num_vs_outputs; attr++) {
+ for (attr = 0; attr < draw->vs.num_vs_outputs; attr++) {
const float *val0 = v0->data[attr];
const float *val1 = v1->data[attr];
float *newv = dst->data[attr];
diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c
index 50872fdbe9..3ac825f565 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_twoside.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c
@@ -105,7 +105,7 @@ static void twoside_first_tri( struct draw_stage *stage,
struct prim_header *header )
{
struct twoside_stage *twoside = twoside_stage(stage);
- const struct draw_vertex_shader *vs = stage->draw->vertex_shader;
+ const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader;
uint i;
twoside->attrib_front0 = 0;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
index ed08573382..df92e3f2d0 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
@@ -197,7 +197,7 @@ static void widepoint_first_point( struct draw_stage *stage,
if (draw->rasterizer->point_sprite) {
/* find vertex shader texcoord outputs */
- const struct draw_vertex_shader *vs = draw->vertex_shader;
+ const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
uint i, j = 0;
for (i = 0; i < vs->info.num_outputs; i++) {
if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
@@ -212,7 +212,7 @@ static void widepoint_first_point( struct draw_stage *stage,
wide->psize_slot = -1;
if (draw->rasterizer->point_size_per_vertex) {
/* find PSIZ vertex output */
- const struct draw_vertex_shader *vs = draw->vertex_shader;
+ const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
uint i;
for (i = 0; i < vs->info.num_outputs; i++) {
if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index fd51a57781..3418ee2b88 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -169,13 +169,24 @@ struct draw_context
/* pipe state that we need: */
const struct pipe_rasterizer_state *rasterizer;
struct pipe_viewport_state viewport;
+ boolean identity_viewport;
- struct draw_vertex_shader *vertex_shader;
+ struct {
+ struct draw_vertex_shader *vertex_shader;
+ uint num_vs_outputs; /**< convenience, from vertex_shader */
- boolean identity_viewport;
- uint num_vs_outputs; /**< convenience, from vertex_shader */
+ /** TGSI program interpreter runtime state */
+ struct tgsi_exec_machine machine;
+
+ /* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private.
+ */
+ struct gallivm_cpu_engine *engine;
+
+ struct translate_cache *fetch_cache;
+ struct translate_cache *emit_cache;
+ } vs;
/* Clip derived state:
*/
@@ -192,16 +203,15 @@ struct draw_context
unsigned reduced_prim;
- /** TGSI program interpreter runtime state */
- struct tgsi_exec_machine machine;
-
- /* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private.
- */
- struct gallivm_cpu_engine *engine;
void *driver_private;
};
+/*******************************************************************************
+ * Vertex shader code:
+ */
+boolean draw_vs_init( struct draw_context *draw );
+void draw_vs_destroy( struct draw_context *draw );
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index dad54690a5..06718779a5 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -55,7 +55,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_context *draw = fpme->draw;
- struct draw_vertex_shader *vs = draw->vertex_shader;
+ struct draw_vertex_shader *vs = draw->vs.vertex_shader;
/* Add one to num_outputs because the pipeline occasionally tags on
* an additional texcoord, eg for AA lines.
@@ -107,7 +107,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_context *draw = fpme->draw;
- struct draw_vertex_shader *shader = draw->vertex_shader;
+ struct draw_vertex_shader *shader = draw->vs.vertex_shader;
unsigned opt = fpme->opt;
unsigned alloc_count = align_int( fetch_count, 4 );
@@ -183,7 +183,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_context *draw = fpme->draw;
- struct draw_vertex_shader *shader = draw->vertex_shader;
+ struct draw_vertex_shader *shader = draw->vs.vertex_shader;
unsigned opt = fpme->opt;
unsigned alloc_count = align_int( count, 4 );
diff --git a/src/gallium/auxiliary/draw/draw_pt_middle_fse.c b/src/gallium/auxiliary/draw/draw_pt_middle_fse.c
index cdb7d260da..643ea151c1 100644
--- a/src/gallium/auxiliary/draw/draw_pt_middle_fse.c
+++ b/src/gallium/auxiliary/draw/draw_pt_middle_fse.c
@@ -368,8 +368,8 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
{
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
struct draw_context *draw = fse->draw;
- unsigned num_vs_inputs = draw->vertex_shader->info.num_inputs;
- unsigned num_vs_outputs = draw->vertex_shader->info.num_outputs;
+ unsigned num_vs_inputs = draw->vs.vertex_shader->info.num_inputs;
+ unsigned num_vs_outputs = draw->vs.vertex_shader->info.num_outputs;
const struct vertex_info *vinfo;
unsigned i;
boolean need_psize = 0;
diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c
index 03fe00a951..4142dd9589 100644
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -66,13 +66,13 @@ draw_bind_vertex_shader(struct draw_context *draw,
if (dvs)
{
- draw->vertex_shader = dvs;
- draw->num_vs_outputs = dvs->info.num_outputs;
+ draw->vs.vertex_shader = dvs;
+ draw->vs.num_vs_outputs = dvs->info.num_outputs;
dvs->prepare( dvs, draw );
}
else {
- draw->vertex_shader = NULL;
- draw->num_vs_outputs = 0;
+ draw->vs.vertex_shader = NULL;
+ draw->vs.num_vs_outputs = 0;
}
}
@@ -83,3 +83,30 @@ draw_delete_vertex_shader(struct draw_context *draw,
{
dvs->delete( dvs );
}
+
+
+
+boolean
+draw_vs_init( struct draw_context *draw )
+{
+ tgsi_exec_machine_init(&draw->vs.machine);
+ /* FIXME: give this machine thing a proper constructor:
+ */
+ draw->vs.machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
+ draw->vs.machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
+
+ return TRUE;
+}
+
+void
+draw_vs_destroy( struct draw_context *draw )
+{
+ if (draw->vs.machine.Inputs)
+ align_free(draw->vs.machine.Inputs);
+
+ if (draw->vs.machine.Outputs)
+ align_free(draw->vs.machine.Outputs);
+
+ tgsi_exec_machine_free_data(&draw->vs.machine);
+
+}
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index 7a02f6334b..cb80d008cd 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -182,7 +182,7 @@ draw_create_vs_exec(struct draw_context *draw,
vs->base.prepare = vs_exec_prepare;
vs->base.run_linear = vs_exec_run_linear;
vs->base.delete = vs_exec_delete;
- vs->machine = &draw->machine;
+ vs->machine = &draw->vs.machine;
return &vs->base;
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index edf235cddc..13ad032bd3 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -158,7 +158,7 @@ draw_create_vs_sse(struct draw_context *draw,
vs->base.prepare = vs_sse_prepare;
vs->base.run_linear = vs_sse_run_linear;
vs->base.delete = vs_sse_delete;
- vs->machine = &draw->machine;
+ vs->machine = &draw->vs.machine;
x86_init_func( &vs->sse2_program );
--
cgit v1.2.3
From 7c99d7fe60e7bb0b7cf103a851aeef4614278ca6 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Thu, 15 May 2008 12:39:08 +0100
Subject: draw: create specialized vs varients incorporating fetch & emit
---
src/gallium/auxiliary/draw/Makefile | 3 +-
src/gallium/auxiliary/draw/draw_private.h | 2 +
.../auxiliary/draw/draw_pt_fetch_shade_emit.c | 338 +++++++++++++++++++++
src/gallium/auxiliary/draw/draw_vs.c | 83 ++++-
src/gallium/auxiliary/draw/draw_vs.h | 105 +++++++
src/gallium/auxiliary/draw/draw_vs_exec.c | 2 +
src/gallium/auxiliary/draw/draw_vs_llvm.c | 2 +
src/gallium/auxiliary/draw/draw_vs_sse.c | 2 +
src/gallium/auxiliary/draw/draw_vs_varient.c | 229 ++++++++++++++
9 files changed, 764 insertions(+), 2 deletions(-)
create mode 100644 src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
create mode 100644 src/gallium/auxiliary/draw/draw_vs_varient.c
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile
index 3053682da8..84877994fb 100644
--- a/src/gallium/auxiliary/draw/Makefile
+++ b/src/gallium/auxiliary/draw/Makefile
@@ -26,7 +26,7 @@ C_SOURCES = \
draw_pt_emit.c \
draw_pt_fetch.c \
draw_pt_fetch_emit.c \
- draw_pt_middle_fse.c \
+ draw_pt_fetch_shade_emit.c \
draw_pt_fetch_shade_pipeline.c \
draw_pt_post_vs.c \
draw_pt_util.c \
@@ -34,6 +34,7 @@ C_SOURCES = \
draw_pt_vcache.c \
draw_vertex.c \
draw_vs.c \
+ draw_vs_varient.c \
draw_vs_exec.c \
draw_vs_llvm.c \
draw_vs_sse.c
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 3418ee2b88..c095bf3d7b 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -184,7 +184,9 @@ struct draw_context
struct gallivm_cpu_engine *engine;
+ struct translate *fetch;
struct translate_cache *fetch_cache;
+ struct translate *emit;
struct translate_cache *emit_cache;
} vs;
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
new file mode 100644
index 0000000000..74945dcfe9
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -0,0 +1,338 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell
+ */
+
+
+#include "pipe/p_util.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pt.h"
+#include "draw/draw_vs.h"
+
+#include "translate/translate.h"
+
+struct fetch_shade_emit;
+
+
+/* Prototype fetch, shade, emit-hw-verts all in one go.
+ */
+struct fetch_shade_emit {
+ struct draw_pt_middle_end base;
+ struct draw_context *draw;
+
+
+ /* Temporaries:
+ */
+ const float *constants;
+ unsigned pitch[PIPE_MAX_ATTRIBS];
+ const ubyte *src[PIPE_MAX_ATTRIBS];
+ unsigned prim;
+
+ struct draw_vs_varient_key key;
+ struct draw_vs_varient *active;
+};
+
+
+
+
+static void fse_prepare( struct draw_pt_middle_end *middle,
+ unsigned prim,
+ unsigned opt )
+{
+ struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
+ struct draw_context *draw = fse->draw;
+ unsigned num_vs_inputs = draw->vs.vertex_shader->info.num_inputs;
+ unsigned num_vs_outputs = draw->vs.vertex_shader->info.num_outputs;
+ const struct vertex_info *vinfo;
+ unsigned i;
+ boolean need_psize = 0;
+
+
+ if (draw->pt.user.elts) {
+ assert(0);
+ return ;
+ }
+
+ if (!draw->render->set_primitive( draw->render,
+ prim )) {
+ assert(0);
+ return;
+ }
+
+ /* Must do this after set_primitive() above:
+ */
+ vinfo = draw->render->get_vertex_info(draw->render);
+
+
+
+ fse->key.nr_elements = MAX2(num_vs_outputs, /* outputs - translate to hw format */
+ num_vs_inputs); /* inputs - fetch from api format */
+
+ fse->key.output_stride = vinfo->size * 4;
+ memset(fse->key.element, 0,
+ fse->key.nr_elements * sizeof(fse->key.element[0]));
+
+ for (i = 0; i < num_vs_inputs; i++) {
+ const struct pipe_vertex_element *src = &draw->pt.vertex_element[i];
+ fse->key.element[i].in.format = src->src_format;
+
+ /* Consider ignoring these, ie make generated programs
+ * independent of this state:
+ */
+ fse->key.element[i].in.buffer = src->vertex_buffer_index;
+ fse->key.element[i].in.offset = src->src_offset;
+ }
+
+
+ {
+ unsigned dst_offset = 0;
+
+ for (i = 0; i < vinfo->num_attribs; i++) {
+ unsigned emit_sz = 0;
+ unsigned output_format = PIPE_FORMAT_NONE;
+ unsigned vs_output = vinfo->src_index[i];
+
+ switch (vinfo->emit[i]) {
+ case EMIT_4F:
+ output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ emit_sz = 4 * sizeof(float);
+ break;
+ case EMIT_3F:
+ output_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ emit_sz = 3 * sizeof(float);
+ break;
+ case EMIT_2F:
+ output_format = PIPE_FORMAT_R32G32_FLOAT;
+ emit_sz = 2 * sizeof(float);
+ break;
+ case EMIT_1F:
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ emit_sz = 1 * sizeof(float);
+ break;
+ case EMIT_1F_PSIZE:
+ need_psize = 1;
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ emit_sz = 1 * sizeof(float);
+ vs_output = num_vs_outputs + 1;
+
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ /* The elements in the key correspond to vertex shader output
+ * numbers, not to positions in the hw vertex description --
+ * that's handled by the output_offset field.
+ */
+ fse->key.element[vs_output].out.format = output_format;
+ fse->key.element[vs_output].out.offset = dst_offset;
+
+ dst_offset += emit_sz;
+ assert(fse->key.output_stride >= dst_offset);
+ }
+ }
+
+ /* To make psize work, really need to tell the vertex shader to
+ * copy that value from input->output. For 'translate' this was
+ * implicit for all elements.
+ */
+#if 0
+ if (need_psize) {
+ unsigned input = num_vs_inputs + 1;
+ const struct pipe_vertex_element *src = &draw->pt.vertex_element[i];
+ fse->key.element[i].input_format = PIPE_FORMAT_R32_FLOAT;
+ fse->key.element[i].input_buffer = 0; //nr_buffers + 1;
+ fse->key.element[i].input_offset = 0;
+
+ fse->key.nr_elements += 1;
+
+ }
+#endif
+
+ /* Would normally look up a vertex shader and peruse its list of
+ * varients somehow. We omitted that step and put all the
+ * hardcoded "shaders" into an array. We're just making the
+ * assumption that this happens to be a matching shader... ie
+ * you're running isosurf, aren't you?
+ */
+ fse->active = draw_vs_lookup_varient( draw->vs.vertex_shader,
+ &fse->key );
+
+ if (!fse->active) {
+ assert(0);
+ return ;
+ }
+
+ /* Now set buffer pointers:
+ */
+ for (i = 0; i < num_vs_inputs; i++) {
+ unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index;
+
+ fse->active->set_input( fse->active,
+ i,
+
+ ((const ubyte *) draw->pt.user.vbuffer[buf] +
+ draw->pt.vertex_buffer[buf].buffer_offset),
+
+ draw->pt.vertex_buffer[buf].pitch );
+ }
+
+ fse->active->set_constants( fse->active,
+ (const float (*)[4])draw->pt.user.constants );
+
+ //return TRUE;
+}
+
+
+
+
+
+
+
+static void fse_run_linear( struct draw_pt_middle_end *middle,
+ unsigned start,
+ unsigned count )
+{
+ struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
+ struct draw_context *draw = fse->draw;
+ unsigned alloc_count = align(count, 4);
+ char *hw_verts;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ hw_verts = draw->render->allocate_vertices( draw->render,
+ (ushort)fse->key.output_stride,
+ (ushort)alloc_count );
+
+ if (!hw_verts) {
+ assert(0);
+ return;
+ }
+
+ /* Single routine to fetch vertices, run shader and emit HW verts.
+ * Clipping and viewport transformation are done elsewhere --
+ * either by the API or on hardware, or for some other reason not
+ * required...
+ */
+ fse->active->run_linear( fse->active,
+ start, count,
+ hw_verts );
+
+ /* Draw arrays path to avoid re-emitting index list again and
+ * again.
+ */
+ draw->render->draw_arrays( draw->render,
+ 0,
+ count );
+
+
+ draw->render->release_vertices( draw->render,
+ hw_verts,
+ fse->key.output_stride,
+ count );
+}
+
+
+static void
+fse_run(struct draw_pt_middle_end *middle,
+ const unsigned *fetch_elts,
+ unsigned fetch_count,
+ const ushort *draw_elts,
+ unsigned draw_count )
+{
+ struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
+ struct draw_context *draw = fse->draw;
+ unsigned alloc_count = align(fetch_count, 4);
+ void *hw_verts;
+
+ /* XXX: need to flush to get prim_vbuf.c to release its allocation??
+ */
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+
+ hw_verts = draw->render->allocate_vertices( draw->render,
+ (ushort)fse->key.output_stride,
+ (ushort)alloc_count );
+ if (!hw_verts) {
+ assert(0);
+ return;
+ }
+
+
+ /* Single routine to fetch vertices, run shader and emit HW verts.
+ */
+ fse->active->run_elts( fse->active,
+ fetch_elts,
+ fetch_count,
+ hw_verts );
+
+ draw->render->draw( draw->render,
+ draw_elts,
+ draw_count );
+
+ draw->render->release_vertices( draw->render,
+ hw_verts,
+ fse->key.output_stride,
+ fetch_count );
+
+}
+
+
+static void fse_finish( struct draw_pt_middle_end *middle )
+{
+}
+
+
+static void
+fse_destroy( struct draw_pt_middle_end *middle )
+{
+ FREE(middle);
+}
+
+struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw )
+{
+ struct fetch_shade_emit *fse = CALLOC_STRUCT(fetch_shade_emit);
+ if (!fse)
+ return NULL;
+
+ fse->base.prepare = fse_prepare;
+ fse->base.run = fse_run;
+ fse->base.run_linear = fse_run_linear;
+ fse->base.finish = fse_finish;
+ fse->base.destroy = fse_destroy;
+ fse->draw = draw;
+
+ return &fse->base;
+}
diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c
index 4142dd9589..9b899d404e 100644
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -36,6 +36,8 @@
#include "draw_private.h"
#include "draw_context.h"
#include "draw_vs.h"
+#include "translate/translate.h"
+#include "translate/translate_cache.h"
@@ -90,11 +92,25 @@ boolean
draw_vs_init( struct draw_context *draw )
{
tgsi_exec_machine_init(&draw->vs.machine);
+
/* FIXME: give this machine thing a proper constructor:
*/
draw->vs.machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
- draw->vs.machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
+ if (!draw->vs.machine.Inputs)
+ return FALSE;
+ draw->vs.machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
+ if (!draw->vs.machine.Outputs)
+ return FALSE;
+
+ draw->vs.emit_cache = translate_cache_create();
+ if (!draw->vs.emit_cache)
+ return FALSE;
+
+ draw->vs.fetch_cache = translate_cache_create();
+ if (!draw->vs.fetch_cache)
+ return FALSE;
+
return TRUE;
}
@@ -107,6 +123,71 @@ draw_vs_destroy( struct draw_context *draw )
if (draw->vs.machine.Outputs)
align_free(draw->vs.machine.Outputs);
+ if (draw->vs.fetch_cache)
+ translate_cache_destroy(draw->vs.fetch_cache);
+
+ if (draw->vs.emit_cache)
+ translate_cache_destroy(draw->vs.emit_cache);
+
tgsi_exec_machine_free_data(&draw->vs.machine);
}
+
+
+struct draw_vs_varient *
+draw_vs_lookup_varient( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key )
+{
+ struct draw_vs_varient *varient;
+ unsigned i;
+
+ /* Lookup existing varient:
+ */
+ for (i = 0; i < vs->nr_varients; i++)
+ if (draw_vs_varient_key_compare(key, &vs->varient[i]->key) == 0)
+ return vs->varient[i];
+
+ /* Else have to create a new one:
+ */
+ varient = vs->create_varient( vs, key );
+ if (varient == NULL)
+ return NULL;
+
+ /* Add it to our list:
+ */
+ assert(vs->nr_varients < Elements(vs->varient));
+ vs->varient[vs->nr_varients++] = varient;
+
+ /* Done
+ */
+ return varient;
+}
+
+
+struct translate *
+draw_vs_get_fetch( struct draw_context *draw,
+ struct translate_key *key )
+{
+ if (!draw->vs.fetch ||
+ translate_key_compare(&draw->vs.fetch->key, key) != 0)
+ {
+ translate_key_sanitize(key);
+ draw->vs.fetch = translate_cache_find(draw->vs.fetch_cache, key);
+ }
+
+ return draw->vs.fetch;
+}
+
+struct translate *
+draw_vs_get_emit( struct draw_context *draw,
+ struct translate_key *key )
+{
+ if (!draw->vs.emit ||
+ translate_key_compare(&draw->vs.emit->key, key) != 0)
+ {
+ translate_key_sanitize(key);
+ draw->vs.emit = translate_cache_find(draw->vs.emit_cache, key);
+ }
+
+ return draw->vs.emit;
+}
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index f9772b83b8..677be0d28d 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -38,10 +38,63 @@
struct draw_context;
struct pipe_shader_state;
+struct draw_vs_input
+{
+ enum pipe_format format;
+ unsigned buffer;
+ unsigned offset;
+};
+
+struct draw_vs_output
+{
+ enum pipe_format format;
+ unsigned offset;
+};
+
+struct draw_vs_element {
+ struct draw_vs_input in;
+ struct draw_vs_output out;
+};
+
+struct draw_vs_varient_key {
+ unsigned output_stride;
+ unsigned nr_elements;
+ struct draw_vs_element element[PIPE_MAX_ATTRIBS];
+};
+
+struct draw_vs_varient {
+ struct draw_vs_varient_key key;
+
+ struct draw_vertex_shader *vs;
+
+ void (*set_input)( struct draw_vs_varient *,
+ unsigned i,
+ const void *ptr,
+ unsigned stride );
+
+ void (*set_constants)( struct draw_vs_varient *,
+ const float (*constants)[4] );
+
+
+ void (*run_linear)( struct draw_vs_varient *shader,
+ unsigned start,
+ unsigned count,
+ void *output_buffer );
+
+ void (*run_elts)( struct draw_vs_varient *shader,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer );
+
+ void (*destroy)( struct draw_vs_varient * );
+};
+
+
/**
* Private version of the compiled vertex_shader
*/
struct draw_vertex_shader {
+ struct draw_context *draw;
/* This member will disappear shortly:
*/
@@ -49,6 +102,14 @@ struct draw_vertex_shader {
struct tgsi_shader_info info;
+ /*
+ */
+ struct draw_vs_varient *varient[16];
+ unsigned nr_varients;
+ struct draw_vs_varient *(*create_varient)( struct draw_vertex_shader *shader,
+ const struct draw_vs_varient_key *key );
+
+
void (*prepare)( struct draw_vertex_shader *shader,
struct draw_context *draw );
@@ -68,6 +129,15 @@ struct draw_vertex_shader {
};
+struct draw_vs_varient *
+draw_vs_lookup_varient( struct draw_vertex_shader *base,
+ const struct draw_vs_varient_key *key );
+
+
+/********************************************************************************
+ * Internal functions:
+ */
+
struct draw_vertex_shader *
draw_create_vs_exec(struct draw_context *draw,
const struct pipe_shader_state *templ);
@@ -80,8 +150,43 @@ struct draw_vertex_shader *
draw_create_vs_llvm(struct draw_context *draw,
const struct pipe_shader_state *templ);
+/********************************************************************************
+ * Helpers for vs implementations that don't do their own fetch/emit varients.
+ * Means these can be shared between shaders.
+ */
+struct translate;
+struct translate_key;
+
+struct translate *draw_vs_get_fetch( struct draw_context *draw,
+ struct translate_key *key );
+
+
+struct translate *draw_vs_get_emit( struct draw_context *draw,
+ struct translate_key *key );
+
+struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key );
+
+
+
+static INLINE int draw_vs_varient_keysize( const struct draw_vs_varient_key *key )
+{
+ return 2 * sizeof(int) + key->nr_elements * sizeof(struct draw_vs_element);
+}
+
+static INLINE int draw_vs_varient_key_compare( const struct draw_vs_varient_key *a,
+ const struct draw_vs_varient_key *b )
+{
+ int keysize = draw_vs_varient_keysize(a);
+ return memcmp(a, b, keysize);
+}
+
+
+
+
#define MAX_TGSI_VERTICES 4
+
#endif
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index cb80d008cd..4501877efc 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -179,9 +179,11 @@ draw_create_vs_exec(struct draw_context *draw,
tgsi_scan_shader(state->tokens, &vs->base.info);
+ vs->base.draw = draw;
vs->base.prepare = vs_exec_prepare;
vs->base.run_linear = vs_exec_run_linear;
vs->base.delete = vs_exec_delete;
+ vs->base.create_varient = draw_vs_varient_generic;
vs->machine = &draw->vs.machine;
return &vs->base;
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
index 171da51dd5..621472ec7c 100644
--- a/src/gallium/auxiliary/draw/draw_vs_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -114,7 +114,9 @@ draw_create_vs_llvm(struct draw_context *draw,
tgsi_scan_shader(vs->base.state.tokens, &vs->base.info);
+ vs->base.draw = draw;
vs->base.prepare = vs_llvm_prepare;
+ vs->base.create_varient = draw_vs_varient_generic;
vs->base.run_linear = vs_llvm_run_linear;
vs->base.delete = vs_llvm_delete;
vs->machine = &draw->machine;
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 13ad032bd3..df94a7e0c7 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -155,6 +155,8 @@ draw_create_vs_sse(struct draw_context *draw,
tgsi_scan_shader(templ->tokens, &vs->base.info);
+ vs->base.draw = draw;
+ vs->base.create_varient = draw_vs_varient_generic;
vs->base.prepare = vs_sse_prepare;
vs->base.run_linear = vs_sse_run_linear;
vs->base.delete = vs_sse_delete;
diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c
new file mode 100644
index 0000000000..d27b0f6187
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_varient.c
@@ -0,0 +1,229 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell
+ */
+
+#include "pipe/p_util.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_vs.h"
+#include "translate/translate.h"
+#include "translate/translate_cache.h"
+
+/* A first pass at incorporating vertex fetch/emit functionality into
+ */
+struct draw_vs_varient_generic {
+ struct draw_vs_varient base;
+
+
+
+ struct draw_vertex_shader *shader;
+ struct draw_context *draw;
+
+ /* Basic plan is to run these two translate functions before/after
+ * the vertex shader's existing run_linear() routine to simulate
+ * the inclusion of this functionality into the shader...
+ *
+ * Next will look at actually including it.
+ */
+ struct translate *fetch;
+ struct translate *emit;
+
+ const float (*constants)[4];
+};
+
+
+
+
+static void vsvg_set_constants( struct draw_vs_varient *varient,
+ const float (*constants)[4] )
+{
+ struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
+
+ vsvg->constants = constants;
+}
+
+
+static void vsvg_set_input( struct draw_vs_varient *varient,
+ unsigned buffer,
+ const void *ptr,
+ unsigned stride )
+{
+ struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
+
+ vsvg->fetch->set_buffer(vsvg->fetch,
+ buffer,
+ ptr,
+ stride);
+}
+
+
+static void vsvg_run_elts( struct draw_vs_varient *varient,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer)
+{
+ struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
+
+ /* Want to do this in small batches for cache locality?
+ */
+
+ vsvg->fetch->run_elts( vsvg->fetch,
+ elts,
+ count,
+ output_buffer );
+
+ //if (!vsvg->base.vs->is_passthrough)
+ {
+ vsvg->base.vs->run_linear( vsvg->base.vs,
+ output_buffer,
+ output_buffer,
+ vsvg->constants,
+ count,
+ vsvg->base.key.output_stride,
+ vsvg->base.key.output_stride);
+
+ //if (!vsvg->already_in_emit_format)
+
+ vsvg->emit->set_buffer( vsvg->emit,
+ 0,
+ output_buffer,
+ vsvg->base.key.output_stride );
+
+
+ vsvg->emit->run( vsvg->emit,
+ 0, count,
+ output_buffer );
+ }
+}
+
+
+static void vsvg_run_linear( struct draw_vs_varient *varient,
+ unsigned start,
+ unsigned count,
+ void *output_buffer )
+{
+ struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
+
+ //debug_printf("%s %d %d\n", __FUNCTION__, start, count);
+
+
+ vsvg->fetch->run( vsvg->fetch,
+ start,
+ count,
+ output_buffer );
+
+ //if (!vsvg->base.vs->is_passthrough)
+ {
+ vsvg->base.vs->run_linear( vsvg->base.vs,
+ output_buffer,
+ output_buffer,
+ vsvg->constants,
+ count,
+ vsvg->base.key.output_stride,
+ vsvg->base.key.output_stride);
+
+ //if (!vsvg->already_in_emit_format)
+ vsvg->emit->set_buffer( vsvg->emit,
+ 0,
+ output_buffer,
+ vsvg->base.key.output_stride );
+
+
+ vsvg->emit->run( vsvg->emit,
+ 0, count,
+ output_buffer );
+ }
+}
+
+
+
+static void vsvg_destroy( struct draw_vs_varient *varient )
+{
+ FREE(varient);
+}
+
+
+struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key )
+{
+ unsigned i;
+ struct translate_key fetch, emit;
+
+ struct draw_vs_varient_generic *vsvg = CALLOC_STRUCT( draw_vs_varient_generic );
+ if (vsvg == NULL)
+ return NULL;
+
+ vsvg->base.key = *key;
+ vsvg->base.vs = vs;
+ vsvg->base.set_input = vsvg_set_input;
+ vsvg->base.set_constants = vsvg_set_constants;
+ vsvg->base.run_elts = vsvg_run_elts;
+ vsvg->base.run_linear = vsvg_run_linear;
+ vsvg->base.destroy = vsvg_destroy;
+
+
+
+ /* OK, have to build a new one:
+ */
+ fetch.nr_elements = vs->info.num_inputs;
+ fetch.output_stride = 0;
+ for (i = 0; i < vs->info.num_inputs; i++) {
+ fetch.element[i].input_format = key->element[i].in.format;
+ fetch.element[i].input_buffer = key->element[i].in.buffer;
+ fetch.element[i].input_offset = key->element[i].in.offset;
+ fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ fetch.element[i].output_offset = fetch.output_stride;
+ fetch.output_stride += 4 * sizeof(float);
+ }
+
+
+ emit.nr_elements = vs->info.num_outputs;
+ emit.output_stride = key->output_stride;
+ for (i = 0; i < vs->info.num_outputs; i++) {
+ emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ emit.element[i].input_buffer = 0;
+ emit.element[i].input_offset = i * 4 * sizeof(float);
+ emit.element[i].output_format = key->element[i].out.format;
+ emit.element[i].output_offset = key->element[i].out.offset;
+ }
+
+ vsvg->fetch = draw_vs_get_fetch( vs->draw, &fetch );
+ vsvg->emit = draw_vs_get_emit( vs->draw, &emit );
+
+ return &vsvg->base;
+}
+
+
+
+
+
--
cgit v1.2.3
From 9232f0c023af060b12f77dee5e8b6a533c48e146 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Mon, 19 May 2008 16:28:53 +0100
Subject: rtasm: remove unused struct member
---
src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 1 -
1 file changed, 1 deletion(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index eacaeeaf6f..baa10b7d4a 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -44,7 +44,6 @@ struct x86_function {
unsigned stack_offset;
int need_emms;
unsigned char error_overflow[4];
- const char *fn;
};
enum x86_reg_file {
--
cgit v1.2.3
From 8618e6aa16bdba2c8b08124261bbaedaf7e22447 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Tue, 20 May 2008 14:34:06 +0100
Subject: translate: remove spurious comment
---
src/gallium/auxiliary/translate/translate_sse.c | 8 +-------
1 file changed, 1 insertion(+), 7 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index a54ac5a82f..582d6f6466 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -472,13 +472,7 @@ static boolean build_vertex_emit( struct translate_sse *p,
x86_lea(p->func, vertexECX, x86_make_disp(vertexECX, p->translate.key.output_stride));
/* Incr index
- */ /* Emit code for each of the attributes. Currently routes
- * everything through SSE registers, even when it might be more
- * efficient to stick with regular old x86. No optimization or
- * other tricks - enough new ground to cover here just getting
- * things working.
- */
-
+ */
if (linear) {
x86_inc(p->func, idxEBX);
}
--
cgit v1.2.3
From d3e64caef6f8654af1a84825803e517ab8221c68 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Wed, 21 May 2008 08:28:16 +0100
Subject: rtasm: export debug reg print function
---
src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 20 +++++++++-----------
src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 3 +++
2 files changed, 12 insertions(+), 11 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index 4e036d9032..68ac91ed13 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -36,11 +36,8 @@
#define DUMP_SSE 0
-#if DUMP_SSE
-static void
-_print_reg(
- struct x86_reg reg )
+void x86_print_reg( struct x86_reg reg )
{
if (reg.mod != mod_REG)
debug_printf( "[" );
@@ -77,6 +74,7 @@ _print_reg(
debug_printf( "]" );
}
+#if DUMP_SSE
#define DUMP_START() debug_printf( "\n" )
#define DUMP_END() debug_printf( "\n" )
@@ -87,7 +85,7 @@ _print_reg(
foo++; \
if (*foo) \
foo++; \
- debug_printf( "\n% 15s ", foo ); \
+ debug_printf( "\n% 4x% 15s ", p->csr - p->store, foo ); \
} while (0)
#define DUMP_I( I ) do { \
@@ -97,27 +95,27 @@ _print_reg(
#define DUMP_R( R0 ) do { \
DUMP(); \
- _print_reg( R0 ); \
+ x86_print_reg( R0 ); \
} while( 0 )
#define DUMP_RR( R0, R1 ) do { \
DUMP(); \
- _print_reg( R0 ); \
+ x86_print_reg( R0 ); \
debug_printf( ", " ); \
- _print_reg( R1 ); \
+ x86_print_reg( R1 ); \
} while( 0 )
#define DUMP_RI( R0, I ) do { \
DUMP(); \
- _print_reg( R0 ); \
+ x86_print_reg( R0 ); \
debug_printf( ", %u", I ); \
} while( 0 )
#define DUMP_RRI( R0, R1, I ) do { \
DUMP(); \
- _print_reg( R0 ); \
+ x86_print_reg( R0 ); \
debug_printf( ", " ); \
- _print_reg( R1 ); \
+ x86_print_reg( R1 ); \
debug_printf( ", %u", I ); \
} while( 0 )
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index baa10b7d4a..1e02c6e73b 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -106,6 +106,9 @@ void x86_init_func_size( struct x86_function *p, unsigned code_size );
void x86_release_func( struct x86_function *p );
void (*x86_get_func( struct x86_function *p ))( void );
+/* Debugging:
+ */
+void x86_print_reg( struct x86_reg reg );
/* Create and manipulate registers and regmem values:
--
cgit v1.2.3
From 9343779a8c800cf72e38b09b6f5087a0df258c08 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Wed, 21 May 2008 08:28:53 +0100
Subject: gallium: define PIPE_CDECL calling convention, which really is cdecl
everywhere
---
src/gallium/include/pipe/p_compiler.h | 21 ++++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)
(limited to 'src')
diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h
index a4b772bc4f..01d1807b1c 100644
--- a/src/gallium/include/pipe/p_compiler.h
+++ b/src/gallium/include/pipe/p_compiler.h
@@ -103,6 +103,17 @@ typedef unsigned int uintptr_t;
#endif
+/* This should match linux gcc cdecl semantics everywhere, so that we
+ * just codegen one calling convention on all platforms.
+ */
+#ifdef WIN32
+#define PIPE_CDECL __cdecl
+#else
+#define PIPE_CDECL
+#endif
+
+
+
#if defined __GNUC__
#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___aligned[SIZE] __attribute__(( aligned( 16 ) ))
#define ALIGN16_ASSIGN(NAME) NAME##___aligned
@@ -115,12 +126,16 @@ typedef unsigned int uintptr_t;
-/** For calling code-gen'd functions */
+/**
+ * For calling code-gen'd functions, phase out in favor of
+ * PIPE_CDECL, above, which really means cdecl on all platforms, not
+ * like the below...
+ */
#if !defined(XSTDCALL)
#if defined(WIN32)
-#define XSTDCALL __stdcall
+#define XSTDCALL __stdcall /* phase this out */
#else
-#define XSTDCALL
+#define XSTDCALL /* XXX: NOTE! not STDCALL! */
#endif
#endif
--
cgit v1.2.3
From b5c8b3fba6ac90a0d83e02bfe432142f1adee9e5 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Wed, 21 May 2008 08:29:19 +0100
Subject: translate: mark functions as PIPE_CDECL
---
src/gallium/auxiliary/translate/translate.h | 18 +++++++++---------
src/gallium/auxiliary/translate/translate_generic.c | 16 ++++++++--------
src/gallium/auxiliary/translate/translate_sse.c | 20 +++++++-------------
3 files changed, 24 insertions(+), 30 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h
index b8210af50c..c3b754a902 100644
--- a/src/gallium/auxiliary/translate/translate.h
+++ b/src/gallium/auxiliary/translate/translate.h
@@ -71,15 +71,15 @@ struct translate {
const void *ptr,
unsigned stride );
- void (*run_elts)( struct translate *,
- const unsigned *elts,
- unsigned count,
- void *output_buffer);
-
- void (*run)( struct translate *,
- unsigned start,
- unsigned count,
- void *output_buffer);
+ void (PIPE_CDECL *run_elts)( struct translate *,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer);
+
+ void (PIPE_CDECL *run)( struct translate *,
+ unsigned start,
+ unsigned count,
+ void *output_buffer);
};
diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index 402780ee53..a25d94f2ca 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -541,10 +541,10 @@ static emit_func get_emit_func( enum pipe_format format )
/**
* Fetch vertex attributes for 'count' vertices.
*/
-static void generic_run_elts( struct translate *translate,
- const unsigned *elts,
- unsigned count,
- void *output_buffer )
+static void PIPE_CDECL generic_run_elts( struct translate *translate,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer )
{
struct translate_generic *tg = translate_generic(translate);
char *vert = output_buffer;
@@ -580,10 +580,10 @@ static void generic_run_elts( struct translate *translate,
-static void generic_run( struct translate *translate,
- unsigned start,
- unsigned count,
- void *output_buffer )
+static void PIPE_CDECL generic_run( struct translate *translate,
+ unsigned start,
+ unsigned count,
+ void *output_buffer )
{
struct translate_generic *tg = translate_generic(translate);
char *vert = output_buffer;
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index 582d6f6466..2fc8b9d3d0 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -45,22 +45,16 @@
#define W 3
-#ifdef WIN32
-#define RTASM __cdecl
-#else
-#define RTASM
-#endif
-
-typedef void (RTASM *run_func)( struct translate *translate,
- unsigned start,
- unsigned count,
- void *output_buffer );
-
-typedef void (RTASM *run_elts_func)( struct translate *translate,
- const unsigned *elts,
+typedef void (PIPE_CDECL *run_func)( struct translate *translate,
+ unsigned start,
unsigned count,
void *output_buffer );
+typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer );
+
struct translate_sse {
--
cgit v1.2.3
From ba738a3135415de8b381cd8845cd6c435d5747a8 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Wed, 21 May 2008 09:43:30 +0100
Subject: draw: mark varient functions as PIPE_CDECL
---
src/gallium/auxiliary/draw/draw_vs.h | 30 +++++++++++++++++++++---------
1 file changed, 21 insertions(+), 9 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index 677be0d28d..6bfc2c8d75 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -62,6 +62,19 @@ struct draw_vs_varient_key {
struct draw_vs_element element[PIPE_MAX_ATTRIBS];
};
+struct draw_vs_varient;
+
+typedef void (PIPE_CDECL *vsv_run_elts_func)( struct draw_vs_varient *,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer);
+
+typedef void (PIPE_CDECL *vsv_run_linear_func)( struct draw_vs_varient *,
+ unsigned start,
+ unsigned count,
+ void *output_buffer);
+
+
struct draw_vs_varient {
struct draw_vs_varient_key key;
@@ -75,16 +88,15 @@ struct draw_vs_varient {
void (*set_constants)( struct draw_vs_varient *,
const float (*constants)[4] );
+ void (PIPE_CDECL *run_linear)( struct draw_vs_varient *shader,
+ unsigned start,
+ unsigned count,
+ void *output_buffer );
- void (*run_linear)( struct draw_vs_varient *shader,
- unsigned start,
- unsigned count,
- void *output_buffer );
-
- void (*run_elts)( struct draw_vs_varient *shader,
- const unsigned *elts,
- unsigned count,
- void *output_buffer );
+ void (PIPE_CDECL *run_elts)( struct draw_vs_varient *shader,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer );
void (*destroy)( struct draw_vs_varient * );
};
--
cgit v1.2.3
From 1ba10e5ccf5cd0c990922e982e1e9bc6be48a5e4 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Wed, 21 May 2008 09:44:16 +0100
Subject: draw: add aos vertex shader varient
---
src/gallium/auxiliary/draw/Makefile | 2 +
src/gallium/auxiliary/draw/draw_vs.h | 10 +
src/gallium/auxiliary/draw/draw_vs_aos.c | 1739 +++++++++++++++++++++++++++
src/gallium/auxiliary/draw/draw_vs_aos.h | 181 +++
src/gallium/auxiliary/draw/draw_vs_aos_io.c | 314 +++++
src/gallium/auxiliary/draw/draw_vs_sse.c | 1 +
6 files changed, 2247 insertions(+)
create mode 100644 src/gallium/auxiliary/draw/draw_vs_aos.c
create mode 100644 src/gallium/auxiliary/draw/draw_vs_aos.h
create mode 100644 src/gallium/auxiliary/draw/draw_vs_aos_io.c
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile
index 84877994fb..9a88ecc070 100644
--- a/src/gallium/auxiliary/draw/Makefile
+++ b/src/gallium/auxiliary/draw/Makefile
@@ -35,6 +35,8 @@ C_SOURCES = \
draw_vertex.c \
draw_vs.c \
draw_vs_varient.c \
+ draw_vs_aos.c \
+ draw_vs_aos_io.c \
draw_vs_exec.c \
draw_vs_llvm.c \
draw_vs_sse.c
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index 6bfc2c8d75..5a8d0da06d 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -162,6 +162,16 @@ struct draw_vertex_shader *
draw_create_vs_llvm(struct draw_context *draw,
const struct pipe_shader_state *templ);
+
+
+struct draw_vs_varient_key;
+struct draw_vertex_shader;
+
+struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key );
+
+
+
/********************************************************************************
* Helpers for vs implementations that don't do their own fetch/emit varients.
* Means these can be shared between shaders.
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
new file mode 100644
index 0000000000..620f5e3592
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -0,0 +1,1739 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 6.3
+ *
+ * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * Translate tgsi vertex programs to x86/x87/SSE/SSE2 machine code
+ * using the rtasm runtime assembler. Based on the old
+ * t_vb_arb_program_sse.c
+ */
+
+
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/util/tgsi_util.h"
+#include "tgsi/exec/tgsi_exec.h"
+#include "tgsi/util/tgsi_dump.h"
+
+#include "draw_vs.h"
+#include "draw_vs_aos.h"
+
+#include "rtasm/rtasm_x86sse.h"
+
+#ifdef PIPE_ARCH_X86
+
+
+#define DISASSEM 0
+
+
+
+
+
+static INLINE boolean eq( struct x86_reg a,
+ struct x86_reg b )
+{
+ return (a.file == b.file &&
+ a.idx == b.idx &&
+ a.mod == b.mod &&
+ a.disp == b.disp);
+}
+
+
+static struct x86_reg get_reg_ptr(struct aos_compilation *cp,
+ unsigned file,
+ unsigned idx )
+{
+ struct x86_reg ptr = cp->machine_EDX;
+
+ switch (file) {
+ case TGSI_FILE_INPUT:
+ return x86_make_disp(ptr, Offset(struct aos_machine, input[idx]));
+
+ case TGSI_FILE_OUTPUT:
+ return x86_make_disp(ptr, Offset(struct aos_machine, output[idx]));
+
+ case TGSI_FILE_TEMPORARY:
+ return x86_make_disp(ptr, Offset(struct aos_machine, temp[idx]));
+
+ case TGSI_FILE_IMMEDIATE:
+ return x86_make_disp(ptr, Offset(struct aos_machine, immediate[idx]));
+
+ case TGSI_FILE_CONSTANT:
+ return x86_make_disp(ptr, Offset(struct aos_machine, constant[idx]));
+
+ case AOS_FILE_INTERNAL:
+ return x86_make_disp(ptr, Offset(struct aos_machine, immediate[idx]));
+
+ default:
+ ERROR(cp, "unknown reg file");
+ return x86_make_reg(0,0);
+ }
+}
+
+
+struct x86_reg aos_get_internal( struct aos_compilation *cp,
+ unsigned imm )
+{
+ return get_reg_ptr( cp,
+ AOS_FILE_INTERNAL,
+ imm + 1 );
+}
+
+static void spill( struct aos_compilation *cp, unsigned idx )
+{
+ if (!cp->xmm[idx].dirty ||
+ (cp->xmm[idx].file != TGSI_FILE_INPUT && /* inputs are fetched into xmm & set dirty */
+ cp->xmm[idx].file != TGSI_FILE_OUTPUT &&
+ cp->xmm[idx].file != TGSI_FILE_TEMPORARY)) {
+ ERROR(cp, "invalid spill");
+ return;
+ }
+ else {
+ struct x86_reg oldval = get_reg_ptr(cp,
+ cp->xmm[idx].file,
+ cp->xmm[idx].idx);
+
+ assert(cp->xmm[idx].dirty);
+ sse_movups(cp->func, oldval, x86_make_reg(file_XMM, idx));
+ cp->xmm[idx].dirty = 0;
+ }
+}
+
+struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp )
+{
+ unsigned i;
+ unsigned oldest = 0;
+
+ for (i = 0; i < 8; i++)
+ if (cp->xmm[i].last_used < cp->xmm[oldest].last_used)
+ oldest = i;
+
+ /* Need to write out the old value?
+ */
+ if (cp->xmm[oldest].dirty)
+ spill(cp, oldest);
+
+ assert(cp->xmm[oldest].last_used != cp->insn_counter);
+
+ cp->xmm[oldest].file = TGSI_FILE_NULL;
+ cp->xmm[oldest].idx = 0;
+ cp->xmm[oldest].last_used = cp->insn_counter;
+ return x86_make_reg(file_XMM, oldest);
+}
+
+void aos_release_xmm_reg( struct aos_compilation *cp,
+ unsigned idx )
+{
+ cp->xmm[idx].file = TGSI_FILE_NULL;
+ cp->xmm[idx].idx = 0;
+ cp->xmm[idx].dirty = 0;
+ cp->xmm[idx].last_used = 0;
+}
+
+static void invalidate_xmm( struct aos_compilation *cp,
+ unsigned file, unsigned idx )
+{
+ unsigned i;
+
+ /* Invalidate any old copy of this register in XMM0-7.
+ */
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].file == file && cp->xmm[i].idx == idx) {
+
+ if (cp->xmm[i].dirty)
+ spill(cp, i);
+
+ aos_release_xmm_reg(cp, i);
+ break;
+ }
+ }
+
+ for (; i < 8; i++) {
+ if (cp->xmm[i].file == file && cp->xmm[i].idx == idx) {
+ assert(0);
+ }
+ }
+}
+
+
+void aos_adopt_xmm_reg( struct aos_compilation *cp,
+ struct x86_reg reg,
+ unsigned file,
+ unsigned idx,
+ unsigned dirty )
+{
+ if (reg.file != file_XMM) {
+ assert(0);
+ return;
+ }
+
+ invalidate_xmm(cp, file, idx);
+ cp->xmm[reg.idx].file = file;
+ cp->xmm[reg.idx].idx = idx;
+ cp->xmm[reg.idx].dirty = dirty;
+}
+
+
+
+static struct x86_reg aos_get_shader_reg_ptr( struct aos_compilation *cp,
+ unsigned file,
+ unsigned idx )
+{
+ invalidate_xmm( cp, file, idx );
+ return get_reg_ptr( cp, file, idx );
+}
+
+
+/* As above, but return a pointer. Note - this pointer may alias
+ * those returned by get_arg_ptr().
+ */
+static struct x86_reg get_dst_ptr( struct aos_compilation *cp,
+ const struct tgsi_full_dst_register *dst )
+{
+ return aos_get_shader_reg_ptr( cp, dst->DstRegister.File, dst->DstRegister.Index );
+}
+
+
+
+
+
+/* Return an XMM reg if the argument is resident, otherwise return a
+ * base+offset pointer to the saved value.
+ */
+struct x86_reg aos_get_shader_reg( struct aos_compilation *cp,
+ unsigned file,
+ unsigned idx )
+{
+ unsigned i;
+
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].file == file &&
+ cp->xmm[i].idx == idx)
+ {
+ cp->xmm[i].last_used = cp->insn_counter;
+ return x86_make_reg(file_XMM, i);
+ }
+ }
+
+ /* If not found in the XMM register file, return an indirect
+ * reference to the in-memory copy:
+ */
+ return get_reg_ptr( cp, file, idx );
+}
+
+
+
+
+
+/* Emulate pshufd insn in regular SSE, if necessary:
+ */
+static void emit_pshufd( struct aos_compilation *cp,
+ struct x86_reg dst,
+ struct x86_reg arg0,
+ ubyte shuf )
+{
+ if (cp->have_sse2) {
+ sse2_pshufd(cp->func, dst, arg0, shuf);
+ }
+ else {
+ if (!eq(dst, arg0))
+ sse_movups(cp->func, dst, arg0);
+
+ sse_shufps(cp->func, dst, dst, shuf);
+ }
+}
+
+
+
+
+/* Helper for writemask:
+ */
+static boolean emit_shuf_copy1( struct aos_compilation *cp,
+ struct x86_reg dst,
+ struct x86_reg arg0,
+ struct x86_reg arg1,
+ ubyte shuf )
+{
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+ sse_movups(cp->func, dst, arg1);
+ emit_pshufd(cp, dst, dst, shuf);
+ emit_pshufd(cp, tmp, arg0, shuf);
+
+ sse_movss(cp->func, dst, tmp);
+
+ emit_pshufd(cp, dst, dst, shuf);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+ return TRUE;
+}
+
+
+/* Helper for writemask:
+ */
+static boolean emit_shuf_copy2( struct aos_compilation *cp,
+ struct x86_reg dst,
+ struct x86_reg arg0,
+ struct x86_reg arg1,
+ ubyte shuf )
+{
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+ emit_pshufd(cp, dst, arg1, shuf);
+ emit_pshufd(cp, tmp, arg0, shuf);
+
+ sse_shufps(cp->func, dst, tmp, SHUF(X, Y, Z, W));
+
+ emit_pshufd(cp, dst, dst, shuf);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+ return TRUE;
+}
+
+#define SSE_SWIZZLE_NOOP ((0<<0) | (1<<2) | (2<<4) | (3<<6))
+
+
+/* Locate a source register and perform any required (simple) swizzle.
+ *
+ * Just fail on complex swizzles at this point.
+ */
+static struct x86_reg fetch_src( struct aos_compilation *cp,
+ const struct tgsi_full_src_register *src )
+{
+ struct x86_reg arg0 = aos_get_shader_reg(cp,
+ src->SrcRegister.File,
+ src->SrcRegister.Index);
+ unsigned i;
+ unsigned swz = 0;
+ unsigned negs = 0;
+ unsigned abs = 0;
+
+ for (i = 0; i < 4; i++) {
+ unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( src, i );
+ unsigned neg = tgsi_util_get_full_src_register_sign_mode( src, i );
+
+ switch (swizzle) {
+ case TGSI_EXTSWIZZLE_ZERO:
+ case TGSI_EXTSWIZZLE_ONE:
+ ERROR(cp, "not supporting full swizzles yet in tgsi_aos_sse2");
+ break;
+
+ default:
+ swz |= (swizzle & 0x3) << (i * 2);
+ break;
+ }
+
+ switch (neg) {
+ case TGSI_UTIL_SIGN_TOGGLE:
+ negs |= (1<func, dst, arg0);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+ arg0 = dst;
+ }
+
+ if (abs && abs != 0xf) {
+ ERROR(cp, "unsupported partial abs");
+ }
+
+ if (abs) {
+ struct x86_reg neg = aos_get_internal(cp, IMM_NEGS);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+
+ sse_movups(cp->func, tmp, arg0);
+ sse_mulps(cp->func, tmp, neg);
+ sse_maxps(cp->func, dst, arg0);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+ arg0 = dst;
+ }
+ }
+
+ return arg0;
+}
+
+static void x87_fld_src( struct aos_compilation *cp,
+ const struct tgsi_full_src_register *src,
+ unsigned channel )
+{
+ struct x86_reg arg0 = aos_get_shader_reg_ptr(cp,
+ src->SrcRegister.File,
+ src->SrcRegister.Index);
+
+ unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( src, channel );
+ unsigned neg = tgsi_util_get_full_src_register_sign_mode( src, channel );
+
+ switch (swizzle) {
+ case TGSI_EXTSWIZZLE_ZERO:
+ x87_fldz( cp->func );
+ break;
+
+ case TGSI_EXTSWIZZLE_ONE:
+ x87_fld1( cp->func );
+ break;
+
+ default:
+ x87_fld( cp->func, x86_make_disp(arg0, (swizzle & 3) * sizeof(float)) );
+ break;
+ }
+
+
+ switch (neg) {
+ case TGSI_UTIL_SIGN_TOGGLE:
+ /* Flip the sign:
+ */
+ x87_fchs( cp->func );
+ break;
+
+ case TGSI_UTIL_SIGN_KEEP:
+ break;
+
+ case TGSI_UTIL_SIGN_CLEAR:
+ x87_fabs( cp->func );
+ break;
+
+ case TGSI_UTIL_SIGN_SET:
+ x87_fabs( cp->func );
+ x87_fchs( cp->func );
+ break;
+
+ default:
+ ERROR(cp, "unsupported sign-mode");
+ break;
+ }
+}
+
+
+
+
+
+
+/* Used to implement write masking. This and most of the other instructions
+ * here would be easier to implement if there had been a translation
+ * to a 2 argument format (dst/arg0, arg1) at the shader level before
+ * attempting to translate to x86/sse code.
+ */
+static void store_dest( struct aos_compilation *cp,
+ const struct tgsi_full_dst_register *reg,
+ struct x86_reg result )
+{
+ if (reg->DstRegister.WriteMask == 0)
+ {
+ return;
+ }
+ else if (reg->DstRegister.WriteMask == TGSI_WRITEMASK_XYZW)
+ {
+ if (result.file == file_XMM) {
+ aos_adopt_xmm_reg(cp,
+ result,
+ reg->DstRegister.File,
+ reg->DstRegister.Index,
+ TRUE);
+ }
+ else {
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+ aos_adopt_xmm_reg(cp,
+ dst,
+ reg->DstRegister.File,
+ reg->DstRegister.Index,
+ TRUE);
+ sse_movups(cp->func, dst, result);
+ }
+ }
+ else
+ {
+ /* Previous value of the dest register:
+ */
+ struct x86_reg old_dst = aos_get_shader_reg(cp,
+ reg->DstRegister.File,
+ reg->DstRegister.Index);
+
+
+ /* Alloc an xmm reg to hold the new value of the dest register:
+ */
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+
+ aos_adopt_xmm_reg(cp,
+ dst,
+ reg->DstRegister.File,
+ reg->DstRegister.Index,
+ TRUE );
+
+ switch (reg->DstRegister.WriteMask) {
+ case TGSI_WRITEMASK_X:
+ if (result.file == file_XMM) {
+ sse_movups(cp->func, dst, old_dst);
+ sse_movss(cp->func, dst, result);
+ }
+ else {
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+ sse_movups(cp->func, dst, old_dst);
+ sse_movss(cp->func, tmp, result);
+ sse_movss(cp->func, dst, tmp);
+ aos_release_xmm_reg(cp, tmp.idx);
+ }
+ break;
+
+ case TGSI_WRITEMASK_XY:
+ sse_movups(cp->func, dst, old_dst);
+ sse_shufps(cp->func, dst, result, SHUF(X, Y, Z, W));
+ break;
+
+ case TGSI_WRITEMASK_ZW:
+ sse_movups(cp->func, dst, result);
+ sse_shufps(cp->func, dst, old_dst, SHUF(X, Y, Z, W));
+ break;
+
+ case TGSI_WRITEMASK_YZW:
+ if (old_dst.file == file_XMM) {
+ sse_movups(cp->func, dst, result);
+ sse_movss(cp->func, dst, old_dst);
+ }
+ else {
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+ sse_movups(cp->func, dst, result);
+ sse_movss(cp->func, tmp, old_dst);
+ sse_movss(cp->func, dst, tmp);
+ aos_release_xmm_reg(cp, tmp.idx);
+ }
+ break;
+
+ case TGSI_WRITEMASK_Y:
+ emit_shuf_copy1(cp, dst, result, old_dst, SHUF(Y,X,Z,W));
+ break;
+
+ case TGSI_WRITEMASK_Z:
+ emit_shuf_copy1(cp, dst, result, old_dst, SHUF(Z,Y,X,W));
+ break;
+
+ case TGSI_WRITEMASK_W:
+ emit_shuf_copy1(cp, dst, result, old_dst, SHUF(W,Y,Z,X));
+ break;
+
+ case TGSI_WRITEMASK_XZ:
+ emit_shuf_copy2(cp, dst, result, old_dst, SHUF(X,Z,Y,W));
+ break;
+
+ case TGSI_WRITEMASK_XW:
+ emit_shuf_copy2(cp, dst, result, old_dst, SHUF(X,W,Z,Y));
+
+ case TGSI_WRITEMASK_YZ:
+ emit_shuf_copy2(cp, dst, result, old_dst, SHUF(Z,Y,X,W));
+ break;
+
+ case TGSI_WRITEMASK_YW:
+ emit_shuf_copy2(cp, dst, result, old_dst, SHUF(W,Y,Z,X));
+ break;
+
+ case TGSI_WRITEMASK_XZW:
+ emit_shuf_copy1(cp, dst, old_dst, result, SHUF(Y,X,Z,W));
+ break;
+
+ case TGSI_WRITEMASK_XYW:
+ emit_shuf_copy1(cp, dst, old_dst, result, SHUF(Z,Y,X,W));
+ break;
+
+ case TGSI_WRITEMASK_XYZ:
+ emit_shuf_copy1(cp, dst, old_dst, result, SHUF(W,Y,Z,X));
+ break;
+
+ default:
+ assert(0); /* not possible */
+ break;
+ }
+ }
+}
+
+
+static void x87_fst_or_nop( struct x86_function *func,
+ unsigned writemask,
+ unsigned channel,
+ struct x86_reg ptr )
+{
+ if (writemask & (1<DstRegister.WriteMask;
+
+ x87_fst_or_nop(cp->func, writemask, 0, ptr);
+ x87_fst_or_nop(cp->func, writemask, 1, ptr);
+ x87_fst_or_nop(cp->func, writemask, 2, ptr);
+ x87_fstp_or_pop(cp->func, writemask, 3, ptr);
+}
+
+/* Save current x87 state and put it into single precision mode.
+ */
+static void save_fpu_state( struct aos_compilation *cp )
+{
+#if 0
+ x87_fnstcw( cp->func, x86_make_disp(regEDX, get_offset(m, &m->fpucntl_restore)));
+ x87_fldcw( cp->func, );
+#endif
+}
+
+static void restore_fpu_state( struct aos_compilation *cp )
+{
+#if 0
+ x87_fnclex(cp->func);
+ x87_fldcw(cp->func, x86_make_disp(regEDX, get_offset(m, &m->fpucntl_restore)));
+#endif
+}
+
+static void set_fpu_round_neg_inf( struct aos_compilation *cp )
+{
+#if 0
+ if (cp->fpucntl != RND_NEG_FPU) {
+ struct x86_reg regEDX = x86_make_reg(file_REG32, reg_DX);
+ struct arb_vp_machine *m = NULL;
+
+ cp->fpucntl = RND_NEG_FPU;
+ x87_fnclex(cp->func);
+ x87_fldcw(cp->func, x86_make_disp(regEDX, get_offset(m, &m->fpucntl_rnd_neg)));
+ }
+#endif
+}
+
+static void set_fpu_round_nearest( struct aos_compilation *cp )
+{
+#if 0
+#endif
+}
+
+
+static void emit_x87_ex2( struct aos_compilation *cp )
+{
+ struct x86_reg st0 = x86_make_reg(file_x87, 0);
+ struct x86_reg st1 = x86_make_reg(file_x87, 1);
+ struct x86_reg st3 = x86_make_reg(file_x87, 3);
+
+ set_fpu_round_neg_inf( cp );
+
+ x87_fld(cp->func, st0); /* a a */
+ x87_fprndint( cp->func ); /* int(a) a */
+ x87_fld(cp->func, st0); /* int(a) int(a) a */
+ x87_fstp(cp->func, st3); /* int(a) a int(a)*/
+ x87_fsubp(cp->func, st1); /* frac(a) int(a) */
+ x87_f2xm1(cp->func); /* (2^frac(a))-1 int(a)*/
+ x87_fld1(cp->func); /* 1 (2^frac(a))-1 int(a)*/
+ x87_faddp(cp->func, st1); /* 2^frac(a) int(a) */
+ x87_fscale(cp->func); /* 2^a */
+}
+
+
+
+/**
+ * The traditional instructions. All operate on internal registers
+ * and ignore write masks and swizzling issues.
+ */
+
+static boolean emit_ABS( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+ struct x86_reg neg = aos_get_internal(cp, IMM_NEGS);
+
+ sse_movups(cp->func, dst, arg0);
+ sse_mulps(cp->func, dst, neg);
+ sse_maxps(cp->func, dst, arg0);
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_ADD( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+
+ sse_movups(cp->func, dst, arg0);
+ sse_addps(cp->func, dst, arg1);
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_COS( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 0);
+ x87_fcos(cp->func);
+ x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
+ return TRUE;
+}
+
+
+/* The dotproduct instructions don't really do that well in sse:
+ */
+static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+
+ sse_movups(cp->func, dst, arg0);
+ sse_mulps(cp->func, dst, arg1);
+
+ /* Now the hard bit: sum the first 3 values:
+ */
+ sse_movhlps(cp->func, tmp, dst);
+ sse_addss(cp->func, dst, tmp); /* a*x+c*z, b*y, ?, ? */
+ emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z));
+ sse_addss(cp->func, dst, tmp);
+ sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
+
+ aos_release_xmm_reg(cp, tmp.idx);
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+
+
+static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+
+ sse_movups(cp->func, dst, arg0);
+ sse_mulps(cp->func, dst, arg1);
+
+ /* Now the hard bit: sum the values:
+ */
+ sse_movhlps(cp->func, tmp, dst);
+ sse_addps(cp->func, dst, tmp); /* a*x+c*z, b*y+d*w, a*x+c*z, b*y+d*w */
+ emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z));
+ sse_addss(cp->func, dst, tmp);
+ sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
+
+ aos_release_xmm_reg(cp, tmp.idx);
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+
+ sse_movups(cp->func, dst, arg0);
+ sse_mulps(cp->func, dst, arg1);
+
+ /* Now the hard bit: sum the values (from DP3):
+ */
+ sse_movhlps(cp->func, tmp, dst);
+ sse_addss(cp->func, dst, tmp); /* a*x+c*z, b*y, ?, ? */
+ emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z));
+ sse_addss(cp->func, dst, tmp);
+ emit_pshufd(cp, tmp, arg1, SHUF(W,W,W,W));
+ sse_addss(cp->func, dst, tmp);
+ sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
+
+ aos_release_xmm_reg(cp, tmp.idx);
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_DST( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+ struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
+
+/* dst[0] = 1.0 * 1.0F; */
+/* dst[1] = arg0[1] * arg1[1]; */
+/* dst[2] = arg0[2] * 1.0; */
+/* dst[3] = 1.0 * arg1[3]; */
+
+ emit_shuf_copy2(cp, dst, arg0, ones, SHUF(X,W,Z,Y));
+ emit_shuf_copy2(cp, tmp, arg1, ones, SHUF(X,Z,Y,W));
+ sse_mulps(cp->func, dst, tmp);
+
+ aos_release_xmm_reg(cp, tmp.idx);
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_LG2( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ x87_fld1(cp->func); /* 1 */
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0 1 */
+ x87_fyl2x(cp->func); /* log2(a0) */
+ x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
+ return TRUE;
+}
+
+
+static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 0);
+
+ emit_x87_ex2(cp);
+
+ x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
+ return TRUE;
+}
+
+static boolean emit_EXP( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]);
+ struct x86_reg st0 = x86_make_reg(file_x87, 0);
+ struct x86_reg st1 = x86_make_reg(file_x87, 1);
+ struct x86_reg st3 = x86_make_reg(file_x87, 3);
+ unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
+
+ /* CAUTION: dst may alias arg0!
+ */
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* arg0.x */
+ x87_fld(cp->func, st0); /* arg arg */
+
+ /* by default, fpu is setup to round-to-nearest. We want to
+ * change this now, and track the state through to the end of the
+ * generated function so that it isn't repeated unnecessarily.
+ * Alternately, could subtract .5 to get round to -inf behaviour.
+ */
+ set_fpu_round_neg_inf( cp );
+ x87_fprndint( cp->func ); /* flr(a) a */
+ x87_fld(cp->func, st0); /* flr(a) flr(a) a */
+ x87_fld1(cp->func); /* 1 floor(a) floor(a) a */
+ x87_fst_or_nop(cp->func, writemask, 3, dst); /* stack unchanged */
+
+ x87_fscale(cp->func); /* 2^floor(a) floor(a) a */
+ x87_fst(cp->func, st3); /* 2^floor(a) floor(a) a 2^floor(a)*/
+
+ x87_fstp_or_pop(cp->func, writemask, 0, dst); /* flr(a) a 2^flr(a) */
+
+ x87_fsubrp(cp->func, st1); /* frac(a) 2^flr(a) */
+
+ x87_fst_or_nop(cp->func, writemask, 1, dst); /* frac(a) 2^flr(a) */
+
+ x87_f2xm1(cp->func); /* (2^frac(a))-1 2^flr(a)*/
+ x87_fld1(cp->func); /* 1 (2^frac(a))-1 2^flr(a)*/
+ x87_faddp(cp->func, st1); /* 2^frac(a) 2^flr(a) */
+ x87_fmulp(cp->func, st1); /* 2^a */
+
+ x87_fstp_or_pop(cp->func, writemask, 2, dst);
+
+/* dst[0] = 2^floor(tmp); */
+/* dst[1] = frac(tmp); */
+/* dst[2] = 2^floor(tmp) * 2^frac(tmp); */
+/* dst[3] = 1.0F; */
+ return TRUE;
+}
+
+static boolean emit_LOG( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]);
+ struct x86_reg st0 = x86_make_reg(file_x87, 0);
+ struct x86_reg st1 = x86_make_reg(file_x87, 1);
+ struct x86_reg st2 = x86_make_reg(file_x87, 2);
+ unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
+
+ /* CAUTION: dst may alias arg0!
+ */
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* arg0.x */
+ x87_fabs(cp->func); /* |arg0.x| */
+ x87_fxtract(cp->func); /* mantissa(arg0.x), exponent(arg0.x) */
+ x87_fst(cp->func, st2); /* mantissa, exponent, mantissa */
+ x87_fld1(cp->func); /* 1, mantissa, exponent, mantissa */
+ x87_fyl2x(cp->func); /* log2(mantissa), exponent, mantissa */
+ x87_fadd(cp->func, st0, st1); /* e+l2(m), e, m */
+
+ x87_fstp_or_pop(cp->func, writemask, 2, dst); /* e, m */
+
+ x87_fld1(cp->func); /* 1, e, m */
+ x87_fsub(cp->func, st1, st0); /* 1, e-1, m */
+
+ x87_fstp_or_pop(cp->func, writemask, 3, dst); /* e-1,m */
+ x87_fstp_or_pop(cp->func, writemask, 0, dst); /* m */
+
+ x87_fadd(cp->func, st0, st0); /* 2m */
+
+ x87_fstp_or_pop( cp->func, writemask, 1, dst );
+
+ return TRUE;
+}
+
+static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]);
+ unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
+ int i;
+
+ set_fpu_round_neg_inf( cp );
+
+ /* Load all sources first to avoid aliasing
+ */
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<FullSrcRegisters[0], i);
+ }
+ }
+
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<func );
+ x87_fstp(cp->func, x86_make_disp(dst, i*4));
+ }
+ }
+
+ return TRUE;
+}
+
+
+static boolean emit_RND( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]);
+ unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
+ int i;
+
+ set_fpu_round_nearest( cp );
+
+ /* Load all sources first to avoid aliasing
+ */
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<FullSrcRegisters[0], i);
+ }
+ }
+
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<func );
+ x87_fstp(cp->func, x86_make_disp(dst, i*4));
+ }
+ }
+
+ return TRUE;
+}
+
+
+static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]);
+ struct x86_reg st0 = x86_make_reg(file_x87, 0);
+ struct x86_reg st1 = x86_make_reg(file_x87, 1);
+ unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
+ int i;
+
+ set_fpu_round_neg_inf( cp );
+
+ /* suck all the source values onto the stack before writing out any
+ * dst, which may alias...
+ */
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<FullSrcRegisters[0], i);
+ }
+ }
+
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1<func, st0); /* a a */
+ x87_fprndint( cp->func ); /* flr(a) a */
+ x87_fsubrp(cp->func, st1); /* frc(a) */
+ x87_fstp(cp->func, x86_make_disp(dst, i*4));
+ }
+ }
+
+ return TRUE;
+}
+
+
+
+static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]);
+ struct x86_reg st1 = x86_make_reg(file_x87, 1);
+ unsigned fixup1, fixup2;
+ unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
+
+
+ /* Load the interesting parts of arg0:
+ */
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 3);
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 1);
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 0);
+
+
+ if (writemask & TGSI_WRITEMASK_XW) {
+ x87_fld1(cp->func);
+ x87_fst_or_nop(cp->func, writemask, 0, dst);
+ x87_fstp_or_pop(cp->func, writemask, 3, dst);
+ }
+
+ if (writemask & TGSI_WRITEMASK_YZ) {
+
+ /* Pre-zero destinations, may be overwritten later... fixme.
+ */
+ x87_fldz(cp->func);
+ x87_fst_or_nop(cp->func, writemask, 1, dst);
+ x87_fstp_or_pop(cp->func, writemask, 2, dst);
+
+
+ /* Check arg0[0]:
+ */
+ x87_fldz(cp->func); /* 0 a0 a1 a3 */
+ x87_fucomp(cp->func, st1); /* a0 a1 a3 */
+ x87_fnstsw(cp->func, cp->tmp_EAX);
+ x86_sahf(cp->func);
+ fixup1 = x86_jcc_forward(cp->func, cc_AE);
+
+ x87_fstp_or_pop(cp->func, writemask, 1, dst); /* a1 a3 */
+
+ /* Check arg0[1]:
+ */
+ x87_fldz(cp->func); /* 0 a1 a3 */
+ x87_fucomp(cp->func, st1); /* a1 a3 */
+ x87_fnstsw(cp->func, cp->tmp_EAX);
+ x86_sahf(cp->func);
+ fixup2 = x86_jcc_forward(cp->func, cc_AE);
+
+ /* Compute pow(a1, a3)
+ */
+ x87_fyl2x(cp->func); /* a3*log2(a1) */
+
+ emit_x87_ex2( cp ); /* 2^(a3*log2(a1)) */
+
+ x87_fstp_or_pop(cp->func, writemask, 2, dst);
+
+ /* Land jumps:
+ */
+ x86_fixup_fwd_jump(cp->func, fixup1);
+ x86_fixup_fwd_jump(cp->func, fixup2);
+ }
+
+ return TRUE;
+}
+
+
+
+static boolean emit_MAX( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+
+ sse_movups(cp->func, dst, arg0);
+ sse_maxps(cp->func, dst, arg1);
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+
+static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+
+ sse_movups(cp->func, dst, arg0);
+ sse_minps(cp->func, dst, arg1);
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_MOV( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+
+ sse_movups(cp->func, dst, arg0);
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_MUL( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+
+ sse_movups(cp->func, dst, arg0);
+ sse_mulps(cp->func, dst, arg1);
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+
+static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg arg2 = fetch_src(cp, &op->FullSrcRegisters[2]);
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+
+ sse_movups(cp->func, dst, arg0);
+ sse_mulps(cp->func, dst, arg1);
+ sse_addps(cp->func, dst, arg2);
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+
+static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ x87_fld_src(cp, &op->FullSrcRegisters[1], 0); /* a1.x */
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0.x a1.x */
+ x87_fyl2x(cp->func); /* a1*log2(a0) */
+
+ emit_x87_ex2( cp ); /* 2^(a1*log2(a0)) */
+
+ x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
+ return TRUE;
+}
+
+
+static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+
+ if (cp->have_sse2) {
+ sse2_rcpss(cp->func, dst, arg0);
+ /* extend precision here...
+ */
+ }
+ else {
+ struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
+ sse_movss(cp->func, dst, ones);
+ sse_divss(cp->func, dst, arg0);
+ }
+
+ sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+
+ sse_rsqrtss(cp->func, dst, arg0);
+
+ /* Extend precision here...
+ */
+
+ sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+
+static boolean emit_SGE( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+ struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
+
+ sse_movups(cp->func, dst, arg0);
+ sse_cmpps(cp->func, dst, arg1, cc_NotLessThan);
+ sse_andps(cp->func, dst, ones);
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_SIN( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 0);
+ x87_fsin(cp->func);
+ x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
+ return TRUE;
+}
+
+
+
+static boolean emit_SLT( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+ struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
+
+ sse_movups(cp->func, dst, arg0);
+ sse_cmpps(cp->func, dst, arg1, cc_LessThan);
+ sse_andps(cp->func, dst, ones);
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+static boolean emit_SUB( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+
+ sse_movups(cp->func, dst, arg0);
+ sse_subps(cp->func, dst, arg1);
+
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+
+static boolean emit_XPD( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+ struct x86_reg tmp0 = aos_get_xmm_reg(cp);
+ struct x86_reg tmp1 = aos_get_xmm_reg(cp);
+
+ /* Could avoid tmp0, tmp1 if we overwrote arg0, arg1. Need a way
+ * to invalidate registers. This will come with better analysis
+ * (liveness analysis) of the incoming program.
+ */
+ emit_pshufd(cp, dst, arg0, SHUF(Y, Z, X, W));
+ emit_pshufd(cp, tmp1, arg1, SHUF(Z, X, Y, W));
+ sse_mulps(cp->func, dst, tmp1);
+ emit_pshufd(cp, tmp0, arg0, SHUF(Z, X, Y, W));
+ emit_pshufd(cp, tmp1, arg1, SHUF(Y, Z, X, W));
+ sse_mulps(cp->func, tmp0, tmp1);
+ sse_subps(cp->func, dst, tmp0);
+
+/* dst[0] = arg0[1] * arg1[2] - arg0[2] * arg1[1]; */
+/* dst[1] = arg0[2] * arg1[0] - arg0[0] * arg1[2]; */
+/* dst[2] = arg0[0] * arg1[1] - arg0[1] * arg1[0]; */
+/* dst[3] is undef */
+
+
+ aos_release_xmm_reg(cp, tmp0.idx);
+ aos_release_xmm_reg(cp, tmp1.idx);
+ store_dest(cp, &op->FullDstRegisters[0], dst);
+ return TRUE;
+}
+
+
+
+static boolean
+emit_instruction( struct aos_compilation *cp,
+ struct tgsi_full_instruction *inst )
+{
+ switch( inst->Instruction.Opcode ) {
+ case TGSI_OPCODE_MOV:
+ return emit_MOV( cp, inst );
+
+ case TGSI_OPCODE_LIT:
+ return emit_LIT(cp, inst);
+
+ case TGSI_OPCODE_RCP:
+ return emit_RCP(cp, inst);
+
+ case TGSI_OPCODE_RSQ:
+ return emit_RSQ(cp, inst);
+
+ case TGSI_OPCODE_EXP:
+ return emit_EXP(cp, inst);
+
+ case TGSI_OPCODE_LOG:
+ return emit_LOG(cp, inst);
+
+ case TGSI_OPCODE_MUL:
+ return emit_MUL(cp, inst);
+
+ case TGSI_OPCODE_ADD:
+ return emit_ADD(cp, inst);
+
+ case TGSI_OPCODE_DP3:
+ return emit_DP3(cp, inst);
+
+ case TGSI_OPCODE_DP4:
+ return emit_DP4(cp, inst);
+
+ case TGSI_OPCODE_DST:
+ return emit_DST(cp, inst);
+
+ case TGSI_OPCODE_MIN:
+ return emit_MIN(cp, inst);
+
+ case TGSI_OPCODE_MAX:
+ return emit_MAX(cp, inst);
+
+ case TGSI_OPCODE_SLT:
+ return emit_SLT(cp, inst);
+
+ case TGSI_OPCODE_SGE:
+ return emit_SGE(cp, inst);
+
+ case TGSI_OPCODE_MAD:
+ return emit_MAD(cp, inst);
+
+ case TGSI_OPCODE_SUB:
+ return emit_SUB(cp, inst);
+
+ case TGSI_OPCODE_LERP:
+// return emit_LERP(cp, inst);
+ return FALSE;
+
+ case TGSI_OPCODE_FRAC:
+ return emit_FRC(cp, inst);
+
+ case TGSI_OPCODE_CLAMP:
+// return emit_CLAMP(cp, inst);
+ return FALSE;
+
+ case TGSI_OPCODE_FLOOR:
+ return emit_FLR(cp, inst);
+
+ case TGSI_OPCODE_ROUND:
+ return emit_RND(cp, inst);
+
+ case TGSI_OPCODE_EXPBASE2:
+ return emit_EX2(cp, inst);
+
+ case TGSI_OPCODE_LOGBASE2:
+ return emit_LG2(cp, inst);
+
+ case TGSI_OPCODE_POWER:
+ return emit_POW(cp, inst);
+
+ case TGSI_OPCODE_CROSSPRODUCT:
+ return emit_XPD(cp, inst);
+
+ case TGSI_OPCODE_ABS:
+ return emit_ABS(cp, inst);
+
+ case TGSI_OPCODE_DPH:
+ return emit_DPH(cp, inst);
+
+ case TGSI_OPCODE_COS:
+ return emit_COS(cp, inst);
+
+ case TGSI_OPCODE_SIN:
+ return emit_SIN(cp, inst);
+
+ case TGSI_OPCODE_END:
+ return TRUE;
+
+ default:
+ return FALSE;
+ }
+}
+
+static boolean note_immediate( struct aos_compilation *cp,
+ struct tgsi_full_immediate *imm )
+{
+ unsigned pos = cp->num_immediates++;
+ unsigned j;
+
+ for (j = 0; j < imm->Immediate.Size; j++) {
+ cp->vaos->machine->immediate[pos][j] = imm->u.ImmediateFloat32[j].Float;
+ }
+
+ return TRUE;
+}
+
+
+
+
+static void find_last_write_outputs( struct aos_compilation *cp )
+{
+ struct tgsi_parse_context parse;
+ unsigned this_instruction = 0;
+ unsigned i;
+
+ tgsi_parse_init( &parse, cp->vaos->base.vs->state.tokens );
+
+ while (!tgsi_parse_end_of_tokens( &parse )) {
+
+ tgsi_parse_token( &parse );
+
+ if (parse.FullToken.Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
+ continue;
+
+ for (i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++) {
+ if (parse.FullToken.FullInstruction.FullDstRegisters[i].DstRegister.File ==
+ TGSI_FILE_OUTPUT)
+ {
+ unsigned idx = parse.FullToken.FullInstruction.FullDstRegisters[i].DstRegister.Index;
+ cp->output_last_write[idx] = this_instruction;
+ }
+ }
+
+ this_instruction++;
+ }
+
+ tgsi_parse_free( &parse );
+}
+
+
+#define ARG_VARIENT 1
+#define ARG_START_ELTS 2
+#define ARG_COUNT 3
+#define ARG_OUTBUF 4
+
+
+static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
+ boolean linear )
+{
+ struct tgsi_parse_context parse;
+ struct aos_compilation cp;
+ unsigned fixup, label;
+
+ tgsi_parse_init( &parse, varient->base.vs->state.tokens );
+
+ memset(&cp, 0, sizeof(cp));
+
+ cp.insn_counter = 1;
+ cp.vaos = varient;
+ cp.have_sse2 = 1;
+ cp.func = &varient->func[ linear ? 0 : 1 ];
+
+ cp.tmp_EAX = x86_make_reg(file_REG32, reg_AX);
+ cp.idx_EBX = x86_make_reg(file_REG32, reg_BX);
+ cp.outbuf_ECX = x86_make_reg(file_REG32, reg_CX);
+ cp.machine_EDX = x86_make_reg(file_REG32, reg_DX);
+ cp.count_ESI = x86_make_reg(file_REG32, reg_SI);
+
+ x86_init_func(cp.func);
+
+ find_last_write_outputs(&cp);
+
+ x86_push(cp.func, cp.idx_EBX);
+ x86_push(cp.func, cp.count_ESI);
+
+
+ /* Load arguments into regs:
+ */
+ x86_mov(cp.func, cp.machine_EDX, x86_fn_arg(cp.func, ARG_VARIENT));
+ x86_mov(cp.func, cp.idx_EBX, x86_fn_arg(cp.func, ARG_START_ELTS));
+ x86_mov(cp.func, cp.count_ESI, x86_fn_arg(cp.func, ARG_COUNT));
+ x86_mov(cp.func, cp.outbuf_ECX, x86_fn_arg(cp.func, ARG_OUTBUF));
+
+
+ /* Compare count to zero and possibly bail.
+ */
+ x86_xor(cp.func, cp.tmp_EAX, cp.tmp_EAX);
+ x86_cmp(cp.func, cp.count_ESI, cp.tmp_EAX);
+ fixup = x86_jcc_forward(cp.func, cc_E);
+
+ /* Dig out the machine pointer from inside the varient arg
+ */
+ x86_mov(cp.func, cp.machine_EDX,
+ x86_make_disp(cp.machine_EDX,
+ Offset( struct draw_vs_varient_aos_sse, machine )));
+
+ save_fpu_state( &cp );
+
+ /* Note address for loop jump
+ */
+ label = x86_get_label(cp.func);
+ {
+ /* Fetch inputs... TODO: fetch lazily...
+ */
+ if (!aos_fetch_inputs( &cp, linear ))
+ goto fail;
+
+ /* Emit the shader:
+ */
+ while( !tgsi_parse_end_of_tokens( &parse ) && !cp.error )
+ {
+ tgsi_parse_token( &parse );
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ if (!note_immediate( &cp, &parse.FullToken.FullImmediate ))
+ goto fail;
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ if (!emit_instruction( &cp, &parse.FullToken.FullInstruction ))
+ goto fail;
+ break;
+ }
+
+ cp.insn_counter++;
+ debug_printf("\n");
+ }
+
+ if (cp.error)
+ goto fail;
+
+ /* Emit output... TODO: do this eagerly after the last write to a
+ * given output.
+ */
+ if (!aos_emit_outputs( &cp ))
+ goto fail;
+
+
+ /* Next vertex:
+ */
+ x86_lea(cp.func,
+ cp.outbuf_ECX,
+ x86_make_disp(cp.outbuf_ECX,
+ cp.vaos->base.key.output_stride));
+
+ /* Incr index
+ */
+ if (linear) {
+ x86_inc(cp.func, cp.idx_EBX);
+ }
+ else {
+ x86_lea(cp.func, cp.idx_EBX, x86_make_disp(cp.idx_EBX, 4));
+ }
+
+ }
+ /* decr count, loop if not zero
+ */
+ x86_dec(cp.func, cp.count_ESI);
+/* x86_test(cp.func, cp.count_ESI, cp.count_ESI); */
+ x86_jcc(cp.func, cc_NZ, label);
+
+ restore_fpu_state(&cp);
+
+ /* Land forward jump here:
+ */
+ x86_fixup_fwd_jump(cp.func, fixup);
+
+ /* Exit mmx state?
+ */
+ if (cp.func->need_emms)
+ mmx_emms(cp.func);
+
+ x86_pop(cp.func, cp.count_ESI);
+ x86_pop(cp.func, cp.idx_EBX);
+
+ x86_ret(cp.func);
+
+ tgsi_parse_free( &parse );
+ return !cp.error;
+
+ fail:
+ tgsi_parse_free( &parse );
+ return FALSE;
+}
+
+
+
+static void vaos_set_buffer( struct draw_vs_varient *varient,
+ unsigned buf,
+ const void *ptr,
+ unsigned stride )
+{
+ struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+ unsigned i;
+
+ for (i = 0; i < vaos->base.vs->info.num_inputs; i++) {
+ if (vaos->base.key.element[i].in.buffer == buf) {
+ vaos->machine->attrib[i].input_ptr = ((char *)ptr +
+ vaos->base.key.element[i].in.offset);
+ vaos->machine->attrib[i].input_stride = stride;
+ }
+ }
+}
+
+
+static void vaos_destroy( struct draw_vs_varient *varient )
+{
+ struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+
+ if (vaos->machine)
+ align_free( vaos->machine );
+
+ x86_release_func( &vaos->func[0] );
+ x86_release_func( &vaos->func[1] );
+
+ FREE(vaos);
+}
+
+static void vaos_run_elts( struct draw_vs_varient *varient,
+ const unsigned *elts,
+ unsigned count,
+ void *output_buffer )
+{
+ struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+
+ vaos->gen_run_elts( varient,
+ elts,
+ count,
+ output_buffer );
+}
+
+static void vaos_run_linear( struct draw_vs_varient *varient,
+ unsigned start,
+ unsigned count,
+ void *output_buffer )
+{
+ struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+
+ vaos->gen_run_linear( varient,
+ start,
+ count,
+ output_buffer );
+}
+
+
+static void vaos_set_constants( struct draw_vs_varient *varient,
+ const float (*constants)[4] )
+{
+ struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+
+ memcpy(vaos->machine->constant,
+ constants,
+ (vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1) * 4 * sizeof(float));
+}
+
+
+static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key )
+{
+ struct draw_vs_varient_aos_sse *vaos = CALLOC_STRUCT(draw_vs_varient_aos_sse);
+
+ if (!vaos)
+ goto fail;
+
+ vaos->base.key = *key;
+ vaos->base.vs = vs;
+ vaos->base.set_input = vaos_set_buffer;
+ vaos->base.set_constants = vaos_set_constants;
+ vaos->base.destroy = vaos_destroy;
+ vaos->base.run_linear = vaos_run_linear;
+ vaos->base.run_elts = vaos_run_elts;
+
+ vaos->machine = align_malloc( sizeof(struct aos_machine), 16 );
+ if (!vaos->machine)
+ goto fail;
+
+ memset(vaos->machine, 0, sizeof(struct aos_machine));
+
+ tgsi_dump(vs->state.tokens, 0);
+
+ if (!build_vertex_program( vaos, TRUE ))
+ goto fail;
+
+ if (!build_vertex_program( vaos, FALSE ))
+ goto fail;
+
+ vaos->gen_run_linear = (vsv_run_linear_func)x86_get_func(&vaos->func[0]);
+ if (!vaos->gen_run_linear)
+ goto fail;
+
+ vaos->gen_run_elts = (vsv_run_elts_func)x86_get_func(&vaos->func[1]);
+ if (!vaos->gen_run_elts)
+ goto fail;
+
+ return &vaos->base;
+
+ fail:
+ if (vaos->machine)
+ align_free( vaos->machine );
+
+ if (vaos)
+ x86_release_func( &vaos->func[0] );
+
+ if (vaos)
+ x86_release_func( &vaos->func[1] );
+
+ FREE(vaos);
+
+ return NULL;
+}
+
+
+struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs,
+ const struct draw_vs_varient_key *key )
+{
+ struct draw_vs_varient *varient = varient_aos_sse( vs, key );
+
+ if (varient == NULL) {
+ assert(0);
+ varient = draw_vs_varient_generic( vs, key );
+ }
+
+ return varient;
+}
+
+
+
+#endif
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h
new file mode 100644
index 0000000000..1d8a055a90
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.h
@@ -0,0 +1,181 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell
+ */
+
+#ifndef DRAW_VS_AOS_H
+#define DRAW_VS_AOS_H
+
+
+struct tgsi_token;
+struct x86_function;
+
+#include "pipe/p_state.h"
+#include "rtasm/rtasm_x86sse.h"
+
+
+
+
+
+#define X 0
+#define Y 1
+#define Z 2
+#define W 3
+
+#define MAX_INPUTS PIPE_MAX_ATTRIBS
+#define MAX_OUTPUTS PIPE_MAX_ATTRIBS
+#define MAX_TEMPS PIPE_MAX_ATTRIBS /* say */
+#define MAX_CONSTANTS PIPE_MAX_ATTRIBS /* say */
+#define MAX_IMMEDIATES PIPE_MAX_ATTRIBS /* say */
+#define MAX_INTERNALS 4
+
+#define AOS_FILE_INTERNAL TGSI_FILE_COUNT
+
+/* This is the temporary storage used by all the aos_sse vs varients.
+ * Create one per context and reuse by passing a pointer in at
+ * vs_varient creation??
+ */
+struct aos_machine {
+ float input [MAX_INPUTS ][4];
+ float output [MAX_OUTPUTS ][4];
+ float temp [MAX_TEMPS ][4];
+ float constant [MAX_CONSTANTS ][4]; /* fixme -- should just be a pointer */
+ float immediate[MAX_IMMEDIATES][4]; /* fixme -- should just be a pointer */
+ float internal [MAX_INTERNALS ][4];
+
+ unsigned fpu_round_nearest;
+ unsigned fpu_round_neg_inf;
+
+ struct {
+ const void *input_ptr;
+ unsigned input_stride;
+
+ unsigned output_offset;
+ } attrib[PIPE_MAX_ATTRIBS];
+};
+
+
+
+
+struct aos_compilation {
+ struct x86_function *func;
+ struct draw_vs_varient_aos_sse *vaos;
+
+ unsigned insn_counter;
+ unsigned num_immediates;
+
+ struct {
+ unsigned idx:16;
+ unsigned file:8;
+ unsigned dirty:8;
+ unsigned last_used;
+ } xmm[8];
+
+
+ boolean input_fetched[PIPE_MAX_ATTRIBS];
+ unsigned output_last_write[PIPE_MAX_ATTRIBS];
+
+ boolean have_sse2;
+ boolean error;
+ short fpucntl;
+
+ /* these are actually known values, but putting them in a struct
+ * like this is helpful to keep them in sync across the file.
+ */
+ struct x86_reg tmp_EAX;
+ struct x86_reg idx_EBX; /* either start+i or &elt[i] */
+ struct x86_reg outbuf_ECX;
+ struct x86_reg machine_EDX;
+ struct x86_reg count_ESI; /* decrements to zero */
+};
+
+struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp );
+void aos_release_xmm_reg( struct aos_compilation *cp, unsigned idx );
+
+void aos_adopt_xmm_reg( struct aos_compilation *cp,
+ struct x86_reg reg,
+ unsigned file,
+ unsigned idx,
+ unsigned dirty );
+
+struct x86_reg aos_get_shader_reg( struct aos_compilation *cp,
+ unsigned file,
+ unsigned idx );
+
+boolean aos_fetch_inputs( struct aos_compilation *cp,
+ boolean linear );
+
+boolean aos_emit_outputs( struct aos_compilation *cp );
+
+
+#define IMM_ONES 0 /* 1, 1,1,1 */
+#define IMM_NEGS 1 /* 1,-1,0,0 */
+#define IMM_IDENTITY 2 /* 0, 0,0,1 */
+#define IMM_INV_255 3 /* 1/255, 1/255, 1/255, 1/255 */
+#define IMM_255 4 /* 255, 255, 255, 255 */
+
+struct x86_reg aos_get_internal( struct aos_compilation *cp,
+ unsigned imm );
+
+
+#define ERROR(cp, msg) \
+do { \
+ debug_printf("%s: x86 translation failed: %s\n", __FUNCTION__, msg); \
+ cp->error = 1; \
+ assert(0); \
+} while (0)
+
+
+
+
+
+
+struct draw_vs_varient_aos_sse {
+ struct draw_vs_varient base;
+ struct draw_context *draw;
+
+#if 0
+ struct {
+ const void *ptr;
+ unsigned stride;
+ } attrib[PIPE_MAX_ATTRIBS];
+#endif
+
+ struct aos_machine *machine; /* XXX: temporarily unshared */
+
+ vsv_run_linear_func gen_run_linear;
+ vsv_run_elts_func gen_run_elts;
+
+
+ struct x86_function func[2];
+};
+
+
+
+#endif
+
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
new file mode 100644
index 0000000000..72b2b3d11d
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
@@ -0,0 +1,314 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/util/tgsi_util.h"
+#include "tgsi/exec/tgsi_exec.h"
+#include "draw_vs.h"
+#include "draw_vs_aos.h"
+
+#include "rtasm/rtasm_x86sse.h"
+
+#ifdef PIPE_ARCH_X86
+
+/* Note - don't yet have to worry about interacting with the code in
+ * draw_vs_aos.c as there is no intermingling of generated code...
+ * That may have to change, we'll see.
+ */
+static void emit_load_R32G32B32A32( struct aos_compilation *cp,
+ struct x86_reg data,
+ struct x86_reg src_ptr )
+{
+ sse_movups(cp->func, data, src_ptr);
+}
+
+static void emit_load_R32G32B32( struct aos_compilation *cp,
+ struct x86_reg data,
+ struct x86_reg src_ptr )
+{
+ sse_movss(cp->func, data, x86_make_disp(src_ptr, 8));
+ sse_shufps(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) );
+ sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) );
+ sse_movlps(cp->func, data, src_ptr);
+}
+
+static void emit_load_R32G32( struct aos_compilation *cp,
+ struct x86_reg data,
+ struct x86_reg src_ptr )
+{
+ sse_movups(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ) );
+ sse_movlps(cp->func, data, src_ptr);
+}
+
+
+static void emit_load_R32( struct aos_compilation *cp,
+ struct x86_reg data,
+ struct x86_reg src_ptr )
+{
+ sse_movss(cp->func, data, src_ptr);
+ sse_orps(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ) );
+}
+
+
+static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp,
+ struct x86_reg data,
+ struct x86_reg src_ptr )
+{
+ sse_movss(cp->func, data, src_ptr);
+ sse2_punpcklbw(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ));
+ sse2_punpcklbw(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ));
+ sse2_cvtdq2ps(cp->func, data, data);
+ sse_mulps(cp->func, data, aos_get_internal(cp, IMM_INV_255));
+}
+
+
+
+static void get_src_ptr( struct x86_function *func,
+ struct x86_reg src,
+ struct x86_reg machine,
+ struct x86_reg elt,
+ unsigned a )
+{
+ struct x86_reg input_ptr =
+ x86_make_disp(machine,
+ Offset(struct aos_machine, attrib[a].input_ptr));
+
+ struct x86_reg input_stride =
+ x86_make_disp(machine,
+ Offset(struct aos_machine, attrib[a].input_stride));
+
+ /* Calculate pointer to current attrib:
+ */
+ x86_mov(func, src, input_stride);
+ x86_imul(func, src, elt);
+ x86_add(func, src, input_ptr);
+}
+
+
+/* Extended swizzles? Maybe later.
+ */
+static void emit_swizzle( struct aos_compilation *cp,
+ struct x86_reg dest,
+ struct x86_reg src,
+ unsigned shuffle )
+{
+ sse_shufps(cp->func, dest, src, shuffle);
+}
+
+
+static boolean load_input( struct aos_compilation *cp,
+ unsigned idx,
+ boolean linear )
+{
+ unsigned format = cp->vaos->base.key.element[idx].in.format;
+ struct x86_reg src = cp->tmp_EAX;
+ struct x86_reg dataXMM = aos_get_xmm_reg(cp);
+
+ /* Figure out source pointer address:
+ */
+ get_src_ptr(cp->func,
+ src,
+ cp->machine_EDX,
+ linear ? cp->idx_EBX : x86_deref(cp->idx_EBX),
+ idx);
+
+ src = x86_deref(src);
+
+ aos_adopt_xmm_reg( cp,
+ dataXMM,
+ TGSI_FILE_INPUT,
+ idx,
+ TRUE );
+
+ switch (format) {
+ case PIPE_FORMAT_R32_FLOAT:
+ emit_load_R32(cp, dataXMM, src);
+ break;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ emit_load_R32G32(cp, dataXMM, src);
+ break;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ emit_load_R32G32B32(cp, dataXMM, src);
+ break;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ emit_load_R32G32B32A32(cp, dataXMM, src);
+ break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
+ emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
+ break;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
+ break;
+ default:
+ ERROR(cp, "unhandled input format");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear )
+{
+ unsigned i;
+
+ for (i = 0; i < cp->vaos->base.vs->info.num_inputs; i++) {
+ if (!load_input( cp, i, linear ))
+ return FALSE;
+ cp->insn_counter++;
+ debug_printf("\n");
+ }
+
+ return TRUE;
+}
+
+
+
+
+
+
+
+static void emit_store_R32G32B32A32( struct aos_compilation *cp,
+ struct x86_reg dst_ptr,
+ struct x86_reg dataXMM )
+{
+ sse_movups(cp->func, dst_ptr, dataXMM);
+}
+
+static void emit_store_R32G32B32( struct aos_compilation *cp,
+ struct x86_reg dst_ptr,
+ struct x86_reg dataXMM )
+{
+ sse_movlps(cp->func, dst_ptr, dataXMM);
+ sse_shufps(cp->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
+ sse_movss(cp->func, x86_make_disp(dst_ptr,8), dataXMM);
+}
+
+static void emit_store_R32G32( struct aos_compilation *cp,
+ struct x86_reg dst_ptr,
+ struct x86_reg dataXMM )
+{
+ sse_movlps(cp->func, dst_ptr, dataXMM);
+}
+
+static void emit_store_R32( struct aos_compilation *cp,
+ struct x86_reg dst_ptr,
+ struct x86_reg dataXMM )
+{
+ sse_movss(cp->func, dst_ptr, dataXMM);
+}
+
+
+
+static void emit_store_R8G8B8A8_UNORM( struct aos_compilation *cp,
+ struct x86_reg dst_ptr,
+ struct x86_reg dataXMM )
+{
+ sse_mulps(cp->func, dataXMM, aos_get_internal(cp, IMM_255));
+ sse2_cvtps2dq(cp->func, dataXMM, dataXMM);
+ sse2_packssdw(cp->func, dataXMM, dataXMM);
+ sse2_packuswb(cp->func, dataXMM, dataXMM);
+ sse_movss(cp->func, dst_ptr, dataXMM);
+}
+
+
+
+
+
+static boolean emit_output( struct aos_compilation *cp,
+ struct x86_reg ptr,
+ struct x86_reg dataXMM,
+ unsigned format )
+{
+ switch (format) {
+ case PIPE_FORMAT_R32_FLOAT:
+ emit_store_R32(cp, ptr, dataXMM);
+ break;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ emit_store_R32G32(cp, ptr, dataXMM);
+ break;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ emit_store_R32G32B32(cp, ptr, dataXMM);
+ break;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ emit_store_R32G32B32A32(cp, ptr, dataXMM);
+ break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
+ emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
+ break;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
+ break;
+ default:
+ ERROR(cp, "unhandled output format");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+
+boolean aos_emit_outputs( struct aos_compilation *cp )
+{
+ unsigned i;
+
+ for (i = 0; i < cp->vaos->base.vs->info.num_inputs; i++) {
+ unsigned format = cp->vaos->base.key.element[i].out.format;
+ unsigned offset = cp->vaos->base.key.element[i].out.offset;
+
+ struct x86_reg data = aos_get_shader_reg( cp,
+ TGSI_FILE_OUTPUT,
+ i );
+
+ if (data.file != file_XMM) {
+ struct x86_reg tmp = aos_get_xmm_reg( cp );
+ sse_movups(cp->func, tmp, data);
+ data = tmp;
+ }
+
+ if (!emit_output( cp,
+ x86_make_disp( cp->outbuf_ECX, offset ),
+ data,
+ format ))
+ return FALSE;
+
+ aos_release_xmm_reg( cp, data.idx );
+
+ cp->insn_counter++;
+ debug_printf("\n");
+ }
+
+ return TRUE;
+}
+
+#endif
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index df94a7e0c7..0581c3042f 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -157,6 +157,7 @@ draw_create_vs_sse(struct draw_context *draw,
vs->base.draw = draw;
vs->base.create_varient = draw_vs_varient_generic;
+// vs->base.create_varient = draw_vs_varient_aos_sse;
vs->base.prepare = vs_sse_prepare;
vs->base.run_linear = vs_sse_run_linear;
vs->base.delete = vs_sse_delete;
--
cgit v1.2.3
From 030af06691bc5bc82ca141a576da7a2edffe9d1c Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Wed, 21 May 2008 20:14:55 +0100
Subject: rtasm: add x87 instructions and debug-check for x87 stack usage
---
src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 120 +++++++++++++++++++++++++++++
src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 20 ++++-
2 files changed, 138 insertions(+), 2 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index 68ac91ed13..a2e8af343b 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -986,6 +986,26 @@ void sse2_movd( struct x86_function *p,
/***********************************************************************
* x87 instructions
*/
+static void note_x87_pop( struct x86_function *p )
+{
+ p->x87_stack--;
+ assert(p->x87_stack >= 0);
+ debug_printf("\nstack: %d\n", p->x87_stack);
+}
+
+static void note_x87_push( struct x86_function *p )
+{
+ p->x87_stack++;
+ assert(p->x87_stack <= 7);
+ debug_printf("\nstack: %d\n", p->x87_stack);
+}
+
+void x87_assert_stack_empty( struct x86_function *p )
+{
+ assert (p->x87_stack == 0);
+}
+
+
void x87_fist( struct x86_function *p, struct x86_reg dst )
{
DUMP_R( dst );
@@ -998,6 +1018,7 @@ void x87_fistp( struct x86_function *p, struct x86_reg dst )
DUMP_R( dst );
emit_1ub(p, 0xdb);
emit_modrm_noreg(p, 3, dst);
+ note_x87_pop(p);
}
void x87_fild( struct x86_function *p, struct x86_reg arg )
@@ -1005,12 +1026,14 @@ void x87_fild( struct x86_function *p, struct x86_reg arg )
DUMP_R( arg );
emit_1ub(p, 0xdf);
emit_modrm_noreg(p, 0, arg);
+ note_x87_push(p);
}
void x87_fldz( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xee);
+ note_x87_push(p);
}
@@ -1027,18 +1050,21 @@ void x87_fld1( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xe8);
+ note_x87_push(p);
}
void x87_fldl2e( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xea);
+ note_x87_push(p);
}
void x87_fldln2( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xed);
+ note_x87_push(p);
}
void x87_fwait( struct x86_function *p )
@@ -1059,6 +1085,49 @@ void x87_fclex( struct x86_function *p )
x87_fnclex(p);
}
+void x87_fcmovb( struct x86_function *p, struct x86_reg arg )
+{
+ DUMP_R( arg );
+ assert(arg.file == file_x87);
+ emit_2ub(p, 0xda, 0xc0+arg.idx);
+}
+
+void x87_fcmove( struct x86_function *p, struct x86_reg arg )
+{
+ DUMP_R( arg );
+ assert(arg.file == file_x87);
+ emit_2ub(p, 0xda, 0xc8+arg.idx);
+}
+
+void x87_fcmovbe( struct x86_function *p, struct x86_reg arg )
+{
+ DUMP_R( arg );
+ assert(arg.file == file_x87);
+ emit_2ub(p, 0xda, 0xd0+arg.idx);
+}
+
+void x87_fcmovnb( struct x86_function *p, struct x86_reg arg )
+{
+ DUMP_R( arg );
+ assert(arg.file == file_x87);
+ emit_2ub(p, 0xdb, 0xc0+arg.idx);
+}
+
+void x87_fcmovne( struct x86_function *p, struct x86_reg arg )
+{
+ DUMP_R( arg );
+ assert(arg.file == file_x87);
+ emit_2ub(p, 0xdb, 0xc8+arg.idx);
+}
+
+void x87_fcmovnbe( struct x86_function *p, struct x86_reg arg )
+{
+ DUMP_R( arg );
+ assert(arg.file == file_x87);
+ emit_2ub(p, 0xdb, 0xd0+arg.idx);
+}
+
+
static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg,
unsigned char dst0ub0,
@@ -1146,6 +1215,7 @@ void x87_fmulp( struct x86_function *p, struct x86_reg dst )
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xc8+dst.idx);
+ note_x87_pop(p);
}
void x87_fsubp( struct x86_function *p, struct x86_reg dst )
@@ -1154,6 +1224,7 @@ void x87_fsubp( struct x86_function *p, struct x86_reg dst )
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xe8+dst.idx);
+ note_x87_pop(p);
}
void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
@@ -1162,6 +1233,7 @@ void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xe0+dst.idx);
+ note_x87_pop(p);
}
void x87_faddp( struct x86_function *p, struct x86_reg dst )
@@ -1170,6 +1242,7 @@ void x87_faddp( struct x86_function *p, struct x86_reg dst )
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xc0+dst.idx);
+ note_x87_pop(p);
}
void x87_fdivp( struct x86_function *p, struct x86_reg dst )
@@ -1178,6 +1251,7 @@ void x87_fdivp( struct x86_function *p, struct x86_reg dst )
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xf8+dst.idx);
+ note_x87_pop(p);
}
void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
@@ -1186,6 +1260,13 @@ void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
assert(dst.file == file_x87);
assert(dst.idx >= 1);
emit_2ub(p, 0xde, 0xf0+dst.idx);
+ note_x87_pop(p);
+}
+
+void x87_ftst( struct x86_function *p )
+{
+ DUMP();
+ emit_2ub(p, 0xd9, 0xe4);
}
void x87_fucom( struct x86_function *p, struct x86_reg arg )
@@ -1200,12 +1281,15 @@ void x87_fucomp( struct x86_function *p, struct x86_reg arg )
DUMP_R( arg );
assert(arg.file == file_x87);
emit_2ub(p, 0xdd, 0xe8+arg.idx);
+ note_x87_pop(p);
}
void x87_fucompp( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xda, 0xe9);
+ note_x87_pop(p); /* pop twice */
+ note_x87_pop(p); /* pop twice */
}
void x87_fxch( struct x86_function *p, struct x86_reg arg )
@@ -1287,6 +1371,7 @@ void x87_fyl2x( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xf1);
+ note_x87_pop(p);
}
/* st1 = st1 * log2(st0 + 1.0);
@@ -1298,6 +1383,7 @@ void x87_fyl2xp1( struct x86_function *p )
{
DUMP();
emit_2ub(p, 0xd9, 0xf9);
+ note_x87_pop(p);
}
@@ -1310,6 +1396,7 @@ void x87_fld( struct x86_function *p, struct x86_reg arg )
emit_1ub(p, 0xd9);
emit_modrm_noreg(p, 0, arg);
}
+ note_x87_push(p);
}
void x87_fst( struct x86_function *p, struct x86_reg dst )
@@ -1332,8 +1419,15 @@ void x87_fstp( struct x86_function *p, struct x86_reg dst )
emit_1ub(p, 0xd9);
emit_modrm_noreg(p, 3, dst);
}
+ note_x87_pop(p);
+}
+
+void x87_fpop( struct x86_function *p )
+{
+ x87_fstp( p, x86_make_reg( file_x87, 0 ));
}
+
void x87_fcom( struct x86_function *p, struct x86_reg dst )
{
DUMP_R( dst );
@@ -1345,6 +1439,7 @@ void x87_fcom( struct x86_function *p, struct x86_reg dst )
}
}
+
void x87_fcomp( struct x86_function *p, struct x86_reg dst )
{
DUMP_R( dst );
@@ -1354,6 +1449,20 @@ void x87_fcomp( struct x86_function *p, struct x86_reg dst )
emit_1ub(p, 0xd8);
emit_modrm_noreg(p, 3, dst);
}
+ note_x87_pop(p);
+}
+
+void x87_fcomi( struct x86_function *p, struct x86_reg arg )
+{
+ DUMP_R( arg );
+ emit_2ub(p, 0xdb, 0xf0+arg.idx);
+}
+
+void x87_fcomip( struct x86_function *p, struct x86_reg arg )
+{
+ DUMP_R( arg );
+ emit_2ub(p, 0xdb, 0xf0+arg.idx);
+ note_x87_pop(p);
}
@@ -1372,6 +1481,17 @@ void x87_fnstsw( struct x86_function *p, struct x86_reg dst )
}
+void x87_fnstcw( struct x86_function *p, struct x86_reg dst )
+{
+ DUMP_R( dst );
+ assert(dst.file == file_REG32);
+
+ emit_1ub(p, 0x9b); /* WAIT -- needed? */
+ emit_1ub(p, 0xd9);
+ emit_modrm_noreg(p, 7, dst);
+}
+
+
/***********************************************************************
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index 1e02c6e73b..9f7e31e055 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -41,8 +41,11 @@ struct x86_function {
unsigned size;
unsigned char *store;
unsigned char *csr;
- unsigned stack_offset;
- int need_emms;
+
+ unsigned stack_offset:16;
+ unsigned need_emms:8;
+ int x87_stack:8;
+
unsigned char error_overflow[4];
};
@@ -229,13 +232,23 @@ void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_sahf( struct x86_function *p );
+void x87_assert_stack_empty( struct x86_function *p );
+
void x87_f2xm1( struct x86_function *p );
void x87_fabs( struct x86_function *p );
void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
void x87_faddp( struct x86_function *p, struct x86_reg dst );
void x87_fchs( struct x86_function *p );
void x87_fclex( struct x86_function *p );
+void x87_fcmovb( struct x86_function *p, struct x86_reg src );
+void x87_fcmovbe( struct x86_function *p, struct x86_reg src );
+void x87_fcmove( struct x86_function *p, struct x86_reg src );
+void x87_fcmovnb( struct x86_function *p, struct x86_reg src );
+void x87_fcmovnbe( struct x86_function *p, struct x86_reg src );
+void x87_fcmovne( struct x86_function *p, struct x86_reg src );
void x87_fcom( struct x86_function *p, struct x86_reg dst );
+void x87_fcomi( struct x86_function *p, struct x86_reg dst );
+void x87_fcomip( struct x86_function *p, struct x86_reg dst );
void x87_fcomp( struct x86_function *p, struct x86_reg dst );
void x87_fcos( struct x86_function *p );
void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
@@ -255,6 +268,7 @@ void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
void x87_fmulp( struct x86_function *p, struct x86_reg dst );
void x87_fnclex( struct x86_function *p );
void x87_fprndint( struct x86_function *p );
+void x87_fpop( struct x86_function *p );
void x87_fscale( struct x86_function *p );
void x87_fsin( struct x86_function *p );
void x87_fsincos( struct x86_function *p );
@@ -265,11 +279,13 @@ void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
void x87_fsubp( struct x86_function *p, struct x86_reg dst );
void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
void x87_fsubrp( struct x86_function *p, struct x86_reg dst );
+void x87_ftst( struct x86_function *p );
void x87_fxch( struct x86_function *p, struct x86_reg dst );
void x87_fxtract( struct x86_function *p );
void x87_fyl2x( struct x86_function *p );
void x87_fyl2xp1( struct x86_function *p );
void x87_fwait( struct x86_function *p );
+void x87_fnstcw( struct x86_function *p, struct x86_reg dst );
void x87_fnstsw( struct x86_function *p, struct x86_reg dst );
void x87_fucompp( struct x86_function *p );
void x87_fucomp( struct x86_function *p, struct x86_reg arg );
--
cgit v1.2.3
From 889473b3f5a216bd753c357974d6bae29fe3c41d Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Wed, 21 May 2008 20:28:56 +0100
Subject: draw: add viewport to varient state
---
.../auxiliary/draw/draw_pt_fetch_shade_emit.c | 9 +++-
src/gallium/auxiliary/draw/draw_vs.h | 8 +++-
src/gallium/auxiliary/draw/draw_vs_aos.c | 50 ++++++++++++++++++++++
src/gallium/auxiliary/draw/draw_vs_aos.h | 9 +++-
src/gallium/auxiliary/draw/draw_vs_sse.c | 4 +-
src/gallium/auxiliary/draw/draw_vs_varient.c | 10 +++++
6 files changed, 84 insertions(+), 6 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index 74945dcfe9..984fbb6767 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -95,10 +95,14 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
+ fse->key.output_stride = vinfo->size * 4;
fse->key.nr_elements = MAX2(num_vs_outputs, /* outputs - translate to hw format */
num_vs_inputs); /* inputs - fetch from api format */
- fse->key.output_stride = vinfo->size * 4;
+ fse->key.viewport = 1;
+ fse->key.clip = 0;
+ fse->key.pad = 0;
+
memset(fse->key.element, 0,
fse->key.nr_elements * sizeof(fse->key.element[0]));
@@ -211,6 +215,9 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
fse->active->set_constants( fse->active,
(const float (*)[4])draw->pt.user.constants );
+ fse->active->set_viewport( fse->active,
+ &draw->viewport );
+
//return TRUE;
}
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index 5a8d0da06d..ff3e19b2a8 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -58,7 +58,10 @@ struct draw_vs_element {
struct draw_vs_varient_key {
unsigned output_stride;
- unsigned nr_elements;
+ unsigned nr_elements:16;
+ unsigned viewport:1;
+ unsigned clip:1;
+ unsigned pad:14;
struct draw_vs_element element[PIPE_MAX_ATTRIBS];
};
@@ -88,6 +91,9 @@ struct draw_vs_varient {
void (*set_constants)( struct draw_vs_varient *,
const float (*constants)[4] );
+ void (*set_viewport)( struct draw_vs_varient *,
+ const struct pipe_viewport_state * );
+
void (PIPE_CDECL *run_linear)( struct draw_vs_varient *shader,
unsigned start,
unsigned count,
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 620f5e3592..b8e66e8b78 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -1401,6 +1401,37 @@ emit_instruction( struct aos_compilation *cp,
}
}
+
+static boolean emit_viewport( struct aos_compilation *cp )
+{
+ struct x86_reg pos = aos_get_shader_reg(cp,
+ TGSI_FILE_OUTPUT,
+ 0);
+
+ struct x86_reg scale = x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, scale));
+
+ struct x86_reg translate = x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, translate));
+
+ if (pos.file != file_XMM) {
+ struct x86_reg dst = aos_get_xmm_reg(cp);
+ sse_movups(cp->func, dst, pos);
+ pos = dst;
+ }
+
+ sse_mulps(cp->func, pos, scale);
+ sse_addps(cp->func, pos, translate);
+
+ aos_adopt_xmm_reg( cp,
+ pos,
+ TGSI_FILE_OUTPUT,
+ 0,
+ TRUE );
+ return TRUE;
+}
+
+
static boolean note_immediate( struct aos_compilation *cp,
struct tgsi_full_immediate *imm )
{
@@ -1540,6 +1571,10 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
if (cp.error)
goto fail;
+ if (cp.vaos->base.key.viewport) {
+ emit_viewport(&cp);
+ }
+
/* Emit output... TODO: do this eagerly after the last write to a
* given output.
*/
@@ -1665,11 +1700,25 @@ static void vaos_set_constants( struct draw_vs_varient *varient,
}
+static void vaos_set_viewport( struct draw_vs_varient *varient,
+ const struct pipe_viewport_state *viewport )
+{
+ struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+
+ memcpy(vaos->machine->scale, viewport->scale, 4 * sizeof(float));
+ memcpy(vaos->machine->translate, viewport->translate, 4 * sizeof(float));
+}
+
+
+
static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
const struct draw_vs_varient_key *key )
{
struct draw_vs_varient_aos_sse *vaos = CALLOC_STRUCT(draw_vs_varient_aos_sse);
+ if (key->clip)
+ return NULL;
+
if (!vaos)
goto fail;
@@ -1677,6 +1726,7 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
vaos->base.vs = vs;
vaos->base.set_input = vaos_set_buffer;
vaos->base.set_constants = vaos_set_constants;
+ vaos->base.set_viewport = vaos_set_viewport;
vaos->base.destroy = vaos_destroy;
vaos->base.run_linear = vaos_run_linear;
vaos->base.run_elts = vaos_run_elts;
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h
index 1d8a055a90..16fef6451c 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.h
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.h
@@ -68,8 +68,13 @@ struct aos_machine {
float immediate[MAX_IMMEDIATES][4]; /* fixme -- should just be a pointer */
float internal [MAX_INTERNALS ][4];
- unsigned fpu_round_nearest;
- unsigned fpu_round_neg_inf;
+ float scale[4]; /* viewport */
+ float translate[4]; /* viewport */
+
+ ushort fpu_round_nearest;
+ ushort fpu_round_neg_inf;
+ ushort fpu_restore;
+ ushort fpucntl; /* one of FPU_* above */
struct {
const void *input_ptr;
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 0581c3042f..7781782ae8 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -156,8 +156,8 @@ draw_create_vs_sse(struct draw_context *draw,
tgsi_scan_shader(templ->tokens, &vs->base.info);
vs->base.draw = draw;
- vs->base.create_varient = draw_vs_varient_generic;
-// vs->base.create_varient = draw_vs_varient_aos_sse;
+ vs->base.create_varient = draw_vs_varient_aos_sse;
+// vs->base.create_varient = draw_vs_varient_generic;
vs->base.prepare = vs_sse_prepare;
vs->base.run_linear = vs_sse_run_linear;
vs->base.delete = vs_sse_delete;
diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c
index d27b0f6187..f6f621a748 100644
--- a/src/gallium/auxiliary/draw/draw_vs_varient.c
+++ b/src/gallium/auxiliary/draw/draw_vs_varient.c
@@ -167,6 +167,12 @@ static void vsvg_run_linear( struct draw_vs_varient *varient,
+
+static void vsvg_set_viewport( struct draw_vs_varient *varient,
+ const struct pipe_viewport_state *viewport )
+{
+}
+
static void vsvg_destroy( struct draw_vs_varient *varient )
{
FREE(varient);
@@ -179,6 +185,9 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
unsigned i;
struct translate_key fetch, emit;
+ if (key->viewport || key->clip)
+ return NULL;
+
struct draw_vs_varient_generic *vsvg = CALLOC_STRUCT( draw_vs_varient_generic );
if (vsvg == NULL)
return NULL;
@@ -187,6 +196,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
vsvg->base.vs = vs;
vsvg->base.set_input = vsvg_set_input;
vsvg->base.set_constants = vsvg_set_constants;
+ vsvg->base.set_viewport = vsvg_set_viewport;
vsvg->base.run_elts = vsvg_run_elts;
vsvg->base.run_linear = vsvg_run_linear;
vsvg->base.destroy = vsvg_destroy;
--
cgit v1.2.3
From 194a7be28f6eed502f2475d9a637cb3610ca75f6 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Wed, 21 May 2008 20:31:08 +0100
Subject: draw: fix vs aos internal/machine state
---
src/gallium/auxiliary/draw/draw_vs_aos.c | 59 ++++++++++++++++++++++++++++++--
src/gallium/auxiliary/draw/draw_vs_aos.h | 9 +++--
2 files changed, 63 insertions(+), 5 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index b8e66e8b78..67761f881d 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -83,7 +83,7 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp,
return x86_make_disp(ptr, Offset(struct aos_machine, constant[idx]));
case AOS_FILE_INTERNAL:
- return x86_make_disp(ptr, Offset(struct aos_machine, immediate[idx]));
+ return x86_make_disp(ptr, Offset(struct aos_machine, internal[idx]));
default:
ERROR(cp, "unknown reg file");
@@ -97,9 +97,63 @@ struct x86_reg aos_get_internal( struct aos_compilation *cp,
{
return get_reg_ptr( cp,
AOS_FILE_INTERNAL,
- imm + 1 );
+ imm );
+}
+
+#define X87_CW_EXCEPTION_INV_OP (1<<0)
+#define X87_CW_EXCEPTION_DENORM_OP (1<<1)
+#define X87_CW_EXCEPTION_ZERO_DIVIDE (1<<2)
+#define X87_CW_EXCEPTION_OVERFLOW (1<<3)
+#define X87_CW_EXCEPTION_UNDERFLOW (1<<4)
+#define X87_CW_EXCEPTION_PRECISION (1<<5)
+#define X87_CW_PRECISION_SINGLE (0<<8)
+#define X87_CW_PRECISION_RESERVED (1<<8)
+#define X87_CW_PRECISION_DOUBLE (2<<8)
+#define X87_CW_PRECISION_DOUBLE_EXT (3<<8)
+#define X87_CW_PRECISION_MASK (3<<8)
+#define X87_CW_ROUND_NEAREST (0<<10)
+#define X87_CW_ROUND_DOWN (1<<10)
+#define X87_CW_ROUND_UP (2<<10)
+#define X87_CW_ROUND_ZERO (3<<10)
+#define X87_CW_ROUND_MASK (3<<10)
+#define X87_CW_INFINITY (1<<12)
+
+static void init_internals( struct aos_machine *machine )
+{
+ float inv = 1.0f/255.0f;
+ float f255 = 255.0f;
+
+ ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f);
+ ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f);
+ ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f);
+ ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv);
+ ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255);
+
+
+ machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP |
+ X87_CW_EXCEPTION_DENORM_OP |
+ X87_CW_EXCEPTION_ZERO_DIVIDE |
+ X87_CW_EXCEPTION_OVERFLOW |
+ X87_CW_EXCEPTION_UNDERFLOW |
+ X87_CW_EXCEPTION_PRECISION |
+ (1<<6) |
+ X87_CW_ROUND_NEAREST |
+ X87_CW_PRECISION_DOUBLE_EXT);
+
+ assert(machine->fpu_rnd_nearest == 0x37f);
+
+ machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP |
+ X87_CW_EXCEPTION_DENORM_OP |
+ X87_CW_EXCEPTION_ZERO_DIVIDE |
+ X87_CW_EXCEPTION_OVERFLOW |
+ X87_CW_EXCEPTION_UNDERFLOW |
+ X87_CW_EXCEPTION_PRECISION |
+ (1<<6) |
+ X87_CW_ROUND_DOWN |
+ X87_CW_PRECISION_DOUBLE_EXT);
}
+
static void spill( struct aos_compilation *cp, unsigned idx )
{
if (!cp->xmm[idx].dirty ||
@@ -1736,6 +1790,7 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
goto fail;
memset(vaos->machine, 0, sizeof(struct aos_machine));
+ init_internals(vaos->machine);
tgsi_dump(vs->state.tokens, 0);
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h
index 16fef6451c..c2afd4e9a0 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.h
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.h
@@ -52,10 +52,13 @@ struct x86_function;
#define MAX_TEMPS PIPE_MAX_ATTRIBS /* say */
#define MAX_CONSTANTS PIPE_MAX_ATTRIBS /* say */
#define MAX_IMMEDIATES PIPE_MAX_ATTRIBS /* say */
-#define MAX_INTERNALS 4
+#define MAX_INTERNALS 8
#define AOS_FILE_INTERNAL TGSI_FILE_COUNT
+#define FPU_RND_NEG 1
+#define FPU_RND_NEAREST 2
+
/* This is the temporary storage used by all the aos_sse vs varients.
* Create one per context and reuse by passing a pointer in at
* vs_varient creation??
@@ -71,8 +74,8 @@ struct aos_machine {
float scale[4]; /* viewport */
float translate[4]; /* viewport */
- ushort fpu_round_nearest;
- ushort fpu_round_neg_inf;
+ ushort fpu_rnd_nearest;
+ ushort fpu_rnd_neg_inf;
ushort fpu_restore;
ushort fpucntl; /* one of FPU_* above */
--
cgit v1.2.3
From 2302a5d3c1ea2c682dfc034012a054b8327a81de Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Wed, 21 May 2008 20:32:43 +0100
Subject: draw: fix fpu control word manipulations
---
src/gallium/auxiliary/draw/draw_vs_aos.c | 33 +++++++++++++++++++-------------
1 file changed, 20 insertions(+), 13 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 67761f881d..e736990acc 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -690,41 +690,47 @@ static void x87_fstp_dest4( struct aos_compilation *cp,
x87_fstp_or_pop(cp->func, writemask, 3, ptr);
}
+#define FPU_MANIP 1
/* Save current x87 state and put it into single precision mode.
*/
static void save_fpu_state( struct aos_compilation *cp )
{
-#if 0
- x87_fnstcw( cp->func, x86_make_disp(regEDX, get_offset(m, &m->fpucntl_restore)));
- x87_fldcw( cp->func, );
+#if FPU_MANIP
+ x87_fnstcw( cp->func, x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, fpu_restore)));
#endif
}
static void restore_fpu_state( struct aos_compilation *cp )
{
-#if 0
+#if FPU_MANIP
x87_fnclex(cp->func);
- x87_fldcw(cp->func, x86_make_disp(regEDX, get_offset(m, &m->fpucntl_restore)));
+ x87_fldcw( cp->func, x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, fpu_restore)));
#endif
}
static void set_fpu_round_neg_inf( struct aos_compilation *cp )
{
-#if 0
- if (cp->fpucntl != RND_NEG_FPU) {
- struct x86_reg regEDX = x86_make_reg(file_REG32, reg_DX);
- struct arb_vp_machine *m = NULL;
-
- cp->fpucntl = RND_NEG_FPU;
+#if FPU_MANIP
+ if (cp->fpucntl != FPU_RND_NEG) {
+ cp->fpucntl = FPU_RND_NEG;
x87_fnclex(cp->func);
- x87_fldcw(cp->func, x86_make_disp(regEDX, get_offset(m, &m->fpucntl_rnd_neg)));
+ x87_fldcw( cp->func, x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, fpu_rnd_neg_inf)));
}
#endif
}
static void set_fpu_round_nearest( struct aos_compilation *cp )
{
-#if 0
+#if FPU_MANIP
+ if (cp->fpucntl != FPU_RND_NEAREST) {
+ cp->fpucntl = FPU_RND_NEAREST;
+ x87_fnclex(cp->func);
+ x87_fldcw( cp->func, x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, fpu_rnd_nearest)));
+ }
#endif
}
@@ -1590,6 +1596,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
Offset( struct draw_vs_varient_aos_sse, machine )));
save_fpu_state( &cp );
+ set_fpu_round_nearest( &cp );
/* Note address for loop jump
*/
--
cgit v1.2.3
From 0a7a0d79f64de9794878c42bc5b79a04772d7ed8 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Wed, 21 May 2008 20:34:52 +0100
Subject: draw: fix x87_ex2 and partially fix lit insn
---
src/gallium/auxiliary/draw/draw_vs_aos.c | 112 +++++++++++++++----------------
1 file changed, 56 insertions(+), 56 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index e736990acc..a365d456d1 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -735,23 +735,26 @@ static void set_fpu_round_nearest( struct aos_compilation *cp )
}
-static void emit_x87_ex2( struct aos_compilation *cp )
+static void x87_emit_ex2( struct aos_compilation *cp )
{
struct x86_reg st0 = x86_make_reg(file_x87, 0);
struct x86_reg st1 = x86_make_reg(file_x87, 1);
- struct x86_reg st3 = x86_make_reg(file_x87, 3);
+ int stack = cp->func->x87_stack;
set_fpu_round_neg_inf( cp );
- x87_fld(cp->func, st0); /* a a */
- x87_fprndint( cp->func ); /* int(a) a */
- x87_fld(cp->func, st0); /* int(a) int(a) a */
- x87_fstp(cp->func, st3); /* int(a) a int(a)*/
- x87_fsubp(cp->func, st1); /* frac(a) int(a) */
- x87_f2xm1(cp->func); /* (2^frac(a))-1 int(a)*/
- x87_fld1(cp->func); /* 1 (2^frac(a))-1 int(a)*/
- x87_faddp(cp->func, st1); /* 2^frac(a) int(a) */
- x87_fscale(cp->func); /* 2^a */
+ x87_fld(cp->func, st0); /* a a */
+ x87_fld(cp->func, st0); /* a a a */
+ x87_fprndint( cp->func ); /* flr(a) a a*/
+ x87_fsubp(cp->func, st1); /* frac(a) a */
+ x87_f2xm1(cp->func); /* (2^frac(a))-1 a */
+ x87_fld1(cp->func); /* 1 (2^frac(a))-1 a */
+ x87_faddp(cp->func, st1); /* 2^frac(a) a */
+ x87_fscale(cp->func); /* 2^a a */
+ x87_fstp(cp->func, st1);
+
+ assert( stack == cp->func->x87_stack);
+
}
@@ -907,9 +910,7 @@ static boolean emit_LG2( struct aos_compilation *cp, const struct tgsi_full_inst
static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
x87_fld_src(cp, &op->FullSrcRegisters[0], 0);
-
- emit_x87_ex2(cp);
-
+ x87_emit_ex2(cp);
x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
return TRUE;
}
@@ -1084,63 +1085,62 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst
static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]);
- struct x86_reg st1 = x86_make_reg(file_x87, 1);
- unsigned fixup1, fixup2;
unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
- /* Load the interesting parts of arg0:
- */
- x87_fld_src(cp, &op->FullSrcRegisters[0], 3);
- x87_fld_src(cp, &op->FullSrcRegisters[0], 1);
- x87_fld_src(cp, &op->FullSrcRegisters[0], 0);
-
- if (writemask & TGSI_WRITEMASK_XW) {
- x87_fld1(cp->func);
- x87_fst_or_nop(cp->func, writemask, 0, dst);
- x87_fstp_or_pop(cp->func, writemask, 3, dst);
- }
if (writemask & TGSI_WRITEMASK_YZ) {
+ struct x86_reg st1 = x86_make_reg(file_x87, 1);
+ struct x86_reg st2 = x86_make_reg(file_x87, 2);
+
+
- /* Pre-zero destinations, may be overwritten later... fixme.
- */
- x87_fldz(cp->func);
- x87_fst_or_nop(cp->func, writemask, 1, dst);
- x87_fstp_or_pop(cp->func, writemask, 2, dst);
+ /* a1' = a1 <= 0 ? 1 : a1;
+ */
+ x87_fldz(cp->func); /* 0 */
+ x87_fld1(cp->func); /* 1 0 */
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 1); /* a1 1 0 */
+ x87_fcomi(cp->func, st2); /* a1 1 0 */
+ x87_fcmovb(cp->func, st1); /* a1' 1 0 */
+ x87_fstp(cp->func, st1); /* a1' 0 */
+ x87_fstp(cp->func, st1); /* a1' */
+
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 3); /* a3 a1' */
+ x87_fxch(cp->func, st1); /* a1' a3 */
+
- /* Check arg0[0]:
+ /* Compute pow(a1, a3)
*/
- x87_fldz(cp->func); /* 0 a0 a1 a3 */
- x87_fucomp(cp->func, st1); /* a0 a1 a3 */
- x87_fnstsw(cp->func, cp->tmp_EAX);
- x86_sahf(cp->func);
- fixup1 = x86_jcc_forward(cp->func, cc_AE);
-
- x87_fstp_or_pop(cp->func, writemask, 1, dst); /* a1 a3 */
+ x87_fyl2x(cp->func); /* a3*log2(a1) */
+ x87_emit_ex2( cp ); /* 2^(a3*log2(a1)) */
- /* Check arg0[1]:
- */
- x87_fldz(cp->func); /* 0 a1 a3 */
- x87_fucomp(cp->func, st1); /* a1 a3 */
- x87_fnstsw(cp->func, cp->tmp_EAX);
- x86_sahf(cp->func);
- fixup2 = x86_jcc_forward(cp->func, cc_AE);
- /* Compute pow(a1, a3)
+ /* a0' = max2(a0, 0):
*/
- x87_fyl2x(cp->func); /* a3*log2(a1) */
+ x87_fldz(cp->func); /* 0 r2 */
+ x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0 0 r2 */
+ x87_fcomi(cp->func, st1);
+ x87_fcmovb(cp->func, st1); /* a0' 0 r2 */
+ x87_fstp(cp->func, st1); /* a0' r2 */
- emit_x87_ex2( cp ); /* 2^(a3*log2(a1)) */
+ x87_fxch(cp->func, st1); /* a0' r2 */
+ x87_fst_or_nop(cp->func, writemask, 1, dst); /* result[1] = a0' */
+
+ x87_fldz(cp->func); /* 0 a0' r2 */
+ x87_fcomi(cp->func, st1); /* 0 a0' r2 */
+ x87_fcmovnbe(cp->func, st2); /* r2' a0' r2 */
x87_fstp_or_pop(cp->func, writemask, 2, dst);
-
- /* Land jumps:
- */
- x86_fixup_fwd_jump(cp->func, fixup1);
- x86_fixup_fwd_jump(cp->func, fixup2);
+ x87_fpop(cp->func);
+ x87_fpop(cp->func);
+ }
+
+ if (writemask & TGSI_WRITEMASK_XW) {
+ x87_fld1(cp->func);
+ x87_fst_or_nop(cp->func, writemask, 0, dst);
+ x87_fstp_or_pop(cp->func, writemask, 3, dst);
}
return TRUE;
@@ -1222,7 +1222,7 @@ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_inst
x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0.x a1.x */
x87_fyl2x(cp->func); /* a1*log2(a0) */
- emit_x87_ex2( cp ); /* 2^(a1*log2(a0)) */
+ x87_emit_ex2( cp ); /* 2^(a1*log2(a0)) */
x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
return TRUE;
--
cgit v1.2.3
From 083f3f5c32a28d2993a8a5a8b4f5ef81224a5ec3 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Wed, 21 May 2008 20:38:23 +0100
Subject: draw: avoid a pointless mov in many sse opcodes
---
src/gallium/auxiliary/draw/draw_vs_aos.c | 94 +++++++++++++++++++++-----------
1 file changed, 63 insertions(+), 31 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index a365d456d1..97de43c232 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -174,14 +174,44 @@ static void spill( struct aos_compilation *cp, unsigned idx )
}
}
+static boolean is_xmm_tmp( struct aos_compilation *cp,
+ struct x86_reg reg )
+{
+ return (reg.file == file_XMM &&
+ cp->xmm[reg.idx].file == TGSI_FILE_NULL);
+}
+
+static struct x86_reg get_xmm_tmp( struct aos_compilation *cp,
+ struct x86_reg reg )
+{
+ if (!is_xmm_tmp(cp, reg)) {
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+ sse_movups(cp->func, tmp, reg);
+ reg = tmp;
+ }
+
+ return reg;
+}
+
+
struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp )
{
unsigned i;
unsigned oldest = 0;
+ boolean found = FALSE;
for (i = 0; i < 8; i++)
- if (cp->xmm[i].last_used < cp->xmm[oldest].last_used)
+ if (cp->xmm[i].last_used != cp->insn_counter &&
+ cp->xmm[i].file == TGSI_FILE_NULL) {
oldest = i;
+ found = TRUE;
+ }
+
+ if (!found) {
+ for (i = 0; i < 8; i++)
+ if (cp->xmm[i].last_used < cp->xmm[oldest].last_used)
+ oldest = i;
+ }
/* Need to write out the old value?
*/
@@ -237,15 +267,24 @@ void aos_adopt_xmm_reg( struct aos_compilation *cp,
unsigned idx,
unsigned dirty )
{
+ unsigned i;
+
if (reg.file != file_XMM) {
assert(0);
return;
}
- invalidate_xmm(cp, file, idx);
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].file == file &&
+ cp->xmm[i].idx == idx) {
+ aos_release_xmm_reg(cp, i);
+ }
+ }
+
cp->xmm[reg.idx].file = file;
cp->xmm[reg.idx].idx = idx;
cp->xmm[reg.idx].dirty = dirty;
+ cp->xmm[reg.idx].last_used = cp->insn_counter;
}
@@ -659,6 +698,7 @@ static void x87_fst_or_nop( struct x86_function *func,
unsigned channel,
struct x86_reg ptr )
{
+ assert(ptr.file == file_REG32);
if (writemask & (1<FullSrcRegisters[0]);
- struct x86_reg dst = aos_get_xmm_reg(cp);
struct x86_reg neg = aos_get_internal(cp, IMM_NEGS);
+ struct x86_reg dst = get_xmm_tmp(cp, arg0);
- sse_movups(cp->func, dst, arg0);
sse_mulps(cp->func, dst, neg);
sse_maxps(cp->func, dst, arg0);
@@ -782,9 +822,8 @@ static boolean emit_ADD( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = aos_get_xmm_reg(cp);
+ struct x86_reg dst = get_xmm_tmp(cp, arg0);
- sse_movups(cp->func, dst, arg0);
sse_addps(cp->func, dst, arg1);
store_dest(cp, &op->FullDstRegisters[0], dst);
@@ -806,10 +845,9 @@ static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = aos_get_xmm_reg(cp);
struct x86_reg tmp = aos_get_xmm_reg(cp);
+ struct x86_reg dst = get_xmm_tmp(cp, arg0);
- sse_movups(cp->func, dst, arg0);
sse_mulps(cp->func, dst, arg1);
/* Now the hard bit: sum the first 3 values:
@@ -831,10 +869,9 @@ static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = aos_get_xmm_reg(cp);
struct x86_reg tmp = aos_get_xmm_reg(cp);
+ struct x86_reg dst = get_xmm_tmp(cp, arg0);
- sse_movups(cp->func, dst, arg0);
sse_mulps(cp->func, dst, arg1);
/* Now the hard bit: sum the values:
@@ -854,10 +891,9 @@ static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = aos_get_xmm_reg(cp);
struct x86_reg tmp = aos_get_xmm_reg(cp);
+ struct x86_reg dst = get_xmm_tmp(cp, arg0);
- sse_movups(cp->func, dst, arg0);
sse_mulps(cp->func, dst, arg1);
/* Now the hard bit: sum the values (from DP3):
@@ -1152,9 +1188,8 @@ static boolean emit_MAX( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = aos_get_xmm_reg(cp);
+ struct x86_reg dst = get_xmm_tmp(cp, arg0);
- sse_movups(cp->func, dst, arg0);
sse_maxps(cp->func, dst, arg1);
store_dest(cp, &op->FullDstRegisters[0], dst);
@@ -1166,9 +1201,8 @@ static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = aos_get_xmm_reg(cp);
+ struct x86_reg dst = get_xmm_tmp(cp, arg0);
- sse_movups(cp->func, dst, arg0);
sse_minps(cp->func, dst, arg1);
store_dest(cp, &op->FullDstRegisters[0], dst);
@@ -1178,9 +1212,9 @@ static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_inst
static boolean emit_MOV( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
- struct x86_reg dst = aos_get_xmm_reg(cp);
+ struct x86_reg dst = get_xmm_tmp(cp, arg0);
- sse_movups(cp->func, dst, arg0);
+ /* potentially nothing to do */
store_dest(cp, &op->FullDstRegisters[0], dst);
return TRUE;
@@ -1190,9 +1224,8 @@ static boolean emit_MUL( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = aos_get_xmm_reg(cp);
+ struct x86_reg dst = get_xmm_tmp(cp, arg0);
- sse_movups(cp->func, dst, arg0);
sse_mulps(cp->func, dst, arg1);
store_dest(cp, &op->FullDstRegisters[0], dst);
@@ -1205,13 +1238,15 @@ static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_inst
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
struct x86_reg arg2 = fetch_src(cp, &op->FullSrcRegisters[2]);
- struct x86_reg dst = aos_get_xmm_reg(cp);
- sse_movups(cp->func, dst, arg0);
- sse_mulps(cp->func, dst, arg1);
- sse_addps(cp->func, dst, arg2);
+ /* If we can't clobber old contents of arg0, get a temporary & copy
+ * it there, then clobber it...
+ */
+ arg0 = get_xmm_tmp(cp, arg0);
- store_dest(cp, &op->FullDstRegisters[0], dst);
+ sse_mulps(cp->func, arg0, arg1);
+ sse_addps(cp->func, arg0, arg2);
+ store_dest(cp, &op->FullDstRegisters[0], arg0);
return TRUE;
}
@@ -1272,10 +1307,9 @@ static boolean emit_SGE( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = aos_get_xmm_reg(cp);
struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
+ struct x86_reg dst = get_xmm_tmp(cp, arg0);
- sse_movups(cp->func, dst, arg0);
sse_cmpps(cp->func, dst, arg1, cc_NotLessThan);
sse_andps(cp->func, dst, ones);
@@ -1297,10 +1331,9 @@ static boolean emit_SLT( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = aos_get_xmm_reg(cp);
struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
+ struct x86_reg dst = get_xmm_tmp(cp, arg0);
- sse_movups(cp->func, dst, arg0);
sse_cmpps(cp->func, dst, arg1, cc_LessThan);
sse_andps(cp->func, dst, ones);
@@ -1312,9 +1345,8 @@ static boolean emit_SUB( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = aos_get_xmm_reg(cp);
+ struct x86_reg dst = get_xmm_tmp(cp, arg0);
- sse_movups(cp->func, dst, arg0);
sse_subps(cp->func, dst, arg1);
store_dest(cp, &op->FullDstRegisters[0], dst);
--
cgit v1.2.3
From 5b1bd30f22ffa3955150ec008631d0f4754d340f Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Wed, 21 May 2008 20:41:01 +0100
Subject: draw: when preloading args to x87 stack, need to use reverse order
---
src/gallium/auxiliary/draw/draw_vs_aos.c | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 97de43c232..fde92c7226 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -980,7 +980,7 @@ static boolean emit_EXP( struct aos_compilation *cp, const struct tgsi_full_inst
x87_fstp_or_pop(cp->func, writemask, 0, dst); /* flr(a) a 2^flr(a) */
- x87_fsubrp(cp->func, st1); /* frac(a) 2^flr(a) */
+ x87_fsubp(cp->func, st1); /* frac(a) 2^flr(a) */
x87_fst_or_nop(cp->func, writemask, 1, dst); /* frac(a) 2^flr(a) */
@@ -1041,9 +1041,9 @@ static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_inst
/* Load all sources first to avoid aliasing
*/
- for (i = 0; i < 4; i++) {
+ for (i = 3; i >= 0; i--) {
if (writemask & (1<FullSrcRegisters[0], i);
+ x87_fld_src(cp, &op->FullSrcRegisters[0], i);
}
}
@@ -1068,9 +1068,9 @@ static boolean emit_RND( struct aos_compilation *cp, const struct tgsi_full_inst
/* Load all sources first to avoid aliasing
*/
- for (i = 0; i < 4; i++) {
+ for (i = 3; i >= 0; i--) {
if (writemask & (1<FullSrcRegisters[0], i);
+ x87_fld_src(cp, &op->FullSrcRegisters[0], i);
}
}
@@ -1098,7 +1098,7 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst
/* suck all the source values onto the stack before writing out any
* dst, which may alias...
*/
- for (i = 0; i < 4; i++) {
+ for (i = 3; i >= 0; i--) {
if (writemask & (1<FullSrcRegisters[0], i);
}
@@ -1108,7 +1108,7 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst
if (writemask & (1<func, st0); /* a a */
x87_fprndint( cp->func ); /* flr(a) a */
- x87_fsubrp(cp->func, st1); /* frc(a) */
+ x87_fsubp(cp->func, st1); /* frc(a) */
x87_fstp(cp->func, x86_make_disp(dst, i*4));
}
}
@@ -1392,6 +1392,8 @@ static boolean
emit_instruction( struct aos_compilation *cp,
struct tgsi_full_instruction *inst )
{
+ x87_assert_stack_empty(cp->func);
+
switch( inst->Instruction.Opcode ) {
case TGSI_OPCODE_MOV:
return emit_MOV( cp, inst );
@@ -1657,6 +1659,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
break;
}
+ x87_assert_stack_empty(cp.func);
cp.insn_counter++;
debug_printf("\n");
}
@@ -1712,6 +1715,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
x86_pop(cp.func, cp.count_ESI);
x86_pop(cp.func, cp.idx_EBX);
+ x87_assert_stack_empty(cp.func);
x86_ret(cp.func);
tgsi_parse_free( &parse );
--
cgit v1.2.3
From 6f407b072453eb2bb7077a952257a099db4da025 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Wed, 21 May 2008 20:50:36 +0100
Subject: rtasm: remove debug
---
src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 2 --
1 file changed, 2 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index a2e8af343b..d78676b8f3 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -990,14 +990,12 @@ static void note_x87_pop( struct x86_function *p )
{
p->x87_stack--;
assert(p->x87_stack >= 0);
- debug_printf("\nstack: %d\n", p->x87_stack);
}
static void note_x87_push( struct x86_function *p )
{
p->x87_stack++;
assert(p->x87_stack <= 7);
- debug_printf("\nstack: %d\n", p->x87_stack);
}
void x87_assert_stack_empty( struct x86_function *p )
--
cgit v1.2.3
From a5c3b499fa40f46298389900e74f1db04f99166a Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Thu, 22 May 2008 13:37:48 +0100
Subject: draw: fse works with elts, remove assert
---
src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c | 10 ++--------
1 file changed, 2 insertions(+), 8 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index 984fbb6767..7fefd391a6 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -78,11 +78,6 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
boolean need_psize = 0;
- if (draw->pt.user.elts) {
- assert(0);
- return ;
- }
-
if (!draw->render->set_primitive( draw->render,
prim )) {
assert(0);
@@ -250,9 +245,8 @@ static void fse_run_linear( struct draw_pt_middle_end *middle,
}
/* Single routine to fetch vertices, run shader and emit HW verts.
- * Clipping and viewport transformation are done elsewhere --
- * either by the API or on hardware, or for some other reason not
- * required...
+ * Clipping is done elsewhere -- either by the API or on hardware,
+ * or for some other reason not required...
*/
fse->active->run_linear( fse->active,
start, count,
--
cgit v1.2.3
From c684ffa02d8d43ee04b99ee63ccd1adb66e81c1a Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Thu, 22 May 2008 13:41:49 +0100
Subject: draw: clean up internal immediates in aos sse
---
src/gallium/auxiliary/draw/draw_vs_aos.c | 64 +++++++++++++++++++++--------
src/gallium/auxiliary/draw/draw_vs_aos.h | 5 ++-
src/gallium/auxiliary/draw/draw_vs_aos_io.c | 10 ++---
3 files changed, 55 insertions(+), 24 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index fde92c7226..0b8600696a 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -44,12 +44,6 @@
#ifdef PIPE_ARCH_X86
-#define DISASSEM 0
-
-
-
-
-
static INLINE boolean eq( struct x86_reg a,
struct x86_reg b )
{
@@ -92,13 +86,6 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp,
}
-struct x86_reg aos_get_internal( struct aos_compilation *cp,
- unsigned imm )
-{
- return get_reg_ptr( cp,
- AOS_FILE_INTERNAL,
- imm );
-}
#define X87_CW_EXCEPTION_INV_OP (1<<0)
#define X87_CW_EXCEPTION_DENORM_OP (1<<1)
@@ -123,6 +110,9 @@ static void init_internals( struct aos_machine *machine )
float inv = 1.0f/255.0f;
float f255 = 255.0f;
+ ASSIGN_4V(machine->internal[IMM_SWZ], 1.0f, -1.0f, 0.0f, 1.0f);
+ *(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff;
+
ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f);
ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f);
ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f);
@@ -337,6 +327,39 @@ struct x86_reg aos_get_shader_reg( struct aos_compilation *cp,
+static struct x86_reg aos_get_shader_reg_xmm( struct aos_compilation *cp,
+ unsigned file,
+ unsigned idx )
+{
+ struct x86_reg reg = aos_get_shader_reg( cp, file, idx );
+
+ if (reg.file != file_XMM) {
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+ sse_movups(cp->func, tmp, reg);
+ aos_adopt_xmm_reg( cp, tmp, file, idx, FALSE );
+ reg = tmp;
+ }
+
+ return reg;
+}
+
+
+
+struct x86_reg aos_get_internal_xmm( struct aos_compilation *cp,
+ unsigned imm )
+{
+ return aos_get_shader_reg_xmm( cp, AOS_FILE_INTERNAL, imm );
+}
+
+
+struct x86_reg aos_get_internal( struct aos_compilation *cp,
+ unsigned imm )
+{
+ return aos_get_shader_reg( cp, AOS_FILE_INTERNAL, imm );
+}
+
+
+
/* Emulate pshufd insn in regular SSE, if necessary:
@@ -461,15 +484,15 @@ static struct x86_reg fetch_src( struct aos_compilation *cp,
arg0 = dst;
}
- if (negs) {
- struct x86_reg imm_negs = aos_get_internal(cp, IMM_NEGS);
+ if (negs && negs != 0xf) {
+ struct x86_reg imm_swz = aos_get_internal_xmm(cp, IMM_SWZ);
struct x86_reg tmp = aos_get_xmm_reg(cp);
/* Load 1,-1,0,0
* Use neg as arg to pshufd
* Multiply
*/
- emit_pshufd(cp, tmp, imm_negs,
+ emit_pshufd(cp, tmp, imm_swz,
SHUF((negs & 1) ? 1 : 0,
(negs & 2) ? 1 : 0,
(negs & 4) ? 1 : 0,
@@ -479,12 +502,17 @@ static struct x86_reg fetch_src( struct aos_compilation *cp,
aos_release_xmm_reg(cp, tmp.idx);
arg0 = dst;
}
+ else if (negs) {
+ struct x86_reg imm_negs = aos_get_internal_xmm(cp, IMM_NEGS);
+ sse_mulps(cp->func, dst, imm_negs);
+ arg0 = dst;
+ }
+
if (abs && abs != 0xf) {
ERROR(cp, "unsupported partial abs");
}
-
- if (abs) {
+ else if (abs) {
struct x86_reg neg = aos_get_internal(cp, IMM_NEGS);
struct x86_reg tmp = aos_get_xmm_reg(cp);
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h
index c2afd4e9a0..efdc9a38f4 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.h
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.h
@@ -142,13 +142,16 @@ boolean aos_emit_outputs( struct aos_compilation *cp );
#define IMM_ONES 0 /* 1, 1,1,1 */
-#define IMM_NEGS 1 /* 1,-1,0,0 */
+#define IMM_SWZ 1 /* 1,-1,0, 0xffffffff */
#define IMM_IDENTITY 2 /* 0, 0,0,1 */
#define IMM_INV_255 3 /* 1/255, 1/255, 1/255, 1/255 */
#define IMM_255 4 /* 255, 255, 255, 255 */
+#define IMM_NEGS 5 /* -1,-1,-1,-1 */
struct x86_reg aos_get_internal( struct aos_compilation *cp,
unsigned imm );
+struct x86_reg aos_get_internal_xmm( struct aos_compilation *cp,
+ unsigned imm );
#define ERROR(cp, msg) \
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
index 72b2b3d11d..0dda9df97d 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
@@ -54,7 +54,7 @@ static void emit_load_R32G32B32( struct aos_compilation *cp,
struct x86_reg src_ptr )
{
sse_movss(cp->func, data, x86_make_disp(src_ptr, 8));
- sse_shufps(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) );
+ sse_shufps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) );
sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) );
sse_movlps(cp->func, data, src_ptr);
}
@@ -63,7 +63,7 @@ static void emit_load_R32G32( struct aos_compilation *cp,
struct x86_reg data,
struct x86_reg src_ptr )
{
- sse_movups(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ) );
+ sse_movups(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
sse_movlps(cp->func, data, src_ptr);
}
@@ -73,7 +73,7 @@ static void emit_load_R32( struct aos_compilation *cp,
struct x86_reg src_ptr )
{
sse_movss(cp->func, data, src_ptr);
- sse_orps(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ) );
+ sse_orps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
}
@@ -82,8 +82,8 @@ static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp,
struct x86_reg src_ptr )
{
sse_movss(cp->func, data, src_ptr);
- sse2_punpcklbw(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ));
- sse2_punpcklbw(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ));
+ sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
+ sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
sse2_cvtdq2ps(cp->func, data, data);
sse_mulps(cp->func, data, aos_get_internal(cp, IMM_INV_255));
}
--
cgit v1.2.3
From 05029c919d46299ca259ee8af880d0a65f95ce7c Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Thu, 22 May 2008 13:46:06 +0100
Subject: draw: clean up masked writes in aos sse, make some xmm function names
clearer
---
src/gallium/auxiliary/draw/draw_vs_aos.c | 236 +++++++++++--------------------
1 file changed, 82 insertions(+), 154 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 0b8600696a..708ecadbac 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -171,7 +171,7 @@ static boolean is_xmm_tmp( struct aos_compilation *cp,
cp->xmm[reg.idx].file == TGSI_FILE_NULL);
}
-static struct x86_reg get_xmm_tmp( struct aos_compilation *cp,
+static struct x86_reg get_xmm_clone( struct aos_compilation *cp,
struct x86_reg reg )
{
if (!is_xmm_tmp(cp, reg)) {
@@ -380,31 +380,37 @@ static void emit_pshufd( struct aos_compilation *cp,
}
}
-
-
-
-/* Helper for writemask:
+/* load masks (pack into negs??)
+ * pshufd - shuffle according to writemask
+ * and - result, mask
+ * nand - dest, mask
+ * or - dest, result
*/
-static boolean emit_shuf_copy1( struct aos_compilation *cp,
- struct x86_reg dst,
- struct x86_reg arg0,
- struct x86_reg arg1,
- ubyte shuf )
+static boolean mask_write( struct aos_compilation *cp,
+ struct x86_reg dst,
+ struct x86_reg result,
+ unsigned mask )
{
+ struct x86_reg imm_swz = aos_get_internal_xmm(cp, IMM_SWZ);
struct x86_reg tmp = aos_get_xmm_reg(cp);
- sse_movups(cp->func, dst, arg1);
- emit_pshufd(cp, dst, dst, shuf);
- emit_pshufd(cp, tmp, arg0, shuf);
-
- sse_movss(cp->func, dst, tmp);
+
+ emit_pshufd(cp, tmp, imm_swz,
+ SHUF((mask & 1) ? 2 : 3,
+ (mask & 2) ? 2 : 3,
+ (mask & 4) ? 2 : 3,
+ (mask & 8) ? 2 : 3));
- emit_pshufd(cp, dst, dst, shuf);
+ sse_andps(cp->func, dst, tmp);
+ sse_andnps(cp->func, tmp, result);
+ sse_orps(cp->func, dst, tmp);
aos_release_xmm_reg(cp, tmp.idx);
return TRUE;
}
+
+
/* Helper for writemask:
*/
static boolean emit_shuf_copy2( struct aos_compilation *cp,
@@ -414,17 +420,18 @@ static boolean emit_shuf_copy2( struct aos_compilation *cp,
ubyte shuf )
{
struct x86_reg tmp = aos_get_xmm_reg(cp);
+
emit_pshufd(cp, dst, arg1, shuf);
emit_pshufd(cp, tmp, arg0, shuf);
-
sse_shufps(cp->func, dst, tmp, SHUF(X, Y, Z, W));
-
emit_pshufd(cp, dst, dst, shuf);
aos_release_xmm_reg(cp, tmp.idx);
return TRUE;
}
+
+
#define SSE_SWIZZLE_NOOP ((0<<0) | (1<<2) | (2<<4) | (3<<6))
@@ -593,131 +600,58 @@ static void store_dest( struct aos_compilation *cp,
const struct tgsi_full_dst_register *reg,
struct x86_reg result )
{
- if (reg->DstRegister.WriteMask == 0)
- {
- return;
- }
- else if (reg->DstRegister.WriteMask == TGSI_WRITEMASK_XYZW)
- {
- if (result.file == file_XMM) {
- aos_adopt_xmm_reg(cp,
- result,
- reg->DstRegister.File,
- reg->DstRegister.Index,
- TRUE);
- }
- else {
- struct x86_reg dst = aos_get_xmm_reg(cp);
- aos_adopt_xmm_reg(cp,
- dst,
- reg->DstRegister.File,
- reg->DstRegister.Index,
- TRUE);
- sse_movups(cp->func, dst, result);
- }
- }
- else
- {
- /* Previous value of the dest register:
- */
- struct x86_reg old_dst = aos_get_shader_reg(cp,
- reg->DstRegister.File,
- reg->DstRegister.Index);
-
-
- /* Alloc an xmm reg to hold the new value of the dest register:
- */
- struct x86_reg dst = aos_get_xmm_reg(cp);
+ struct x86_reg dst;
+ switch (reg->DstRegister.WriteMask) {
+ case 0:
+ return;
+
+ case TGSI_WRITEMASK_XYZW:
aos_adopt_xmm_reg(cp,
- dst,
+ get_xmm_clone(cp, result),
reg->DstRegister.File,
reg->DstRegister.Index,
- TRUE );
-
- switch (reg->DstRegister.WriteMask) {
- case TGSI_WRITEMASK_X:
- if (result.file == file_XMM) {
- sse_movups(cp->func, dst, old_dst);
- sse_movss(cp->func, dst, result);
- }
- else {
- struct x86_reg tmp = aos_get_xmm_reg(cp);
- sse_movups(cp->func, dst, old_dst);
- sse_movss(cp->func, tmp, result);
- sse_movss(cp->func, dst, tmp);
- aos_release_xmm_reg(cp, tmp.idx);
- }
- break;
-
- case TGSI_WRITEMASK_XY:
- sse_movups(cp->func, dst, old_dst);
- sse_shufps(cp->func, dst, result, SHUF(X, Y, Z, W));
- break;
-
- case TGSI_WRITEMASK_ZW:
- sse_movups(cp->func, dst, result);
- sse_shufps(cp->func, dst, old_dst, SHUF(X, Y, Z, W));
- break;
-
- case TGSI_WRITEMASK_YZW:
- if (old_dst.file == file_XMM) {
- sse_movups(cp->func, dst, result);
- sse_movss(cp->func, dst, old_dst);
- }
- else {
- struct x86_reg tmp = aos_get_xmm_reg(cp);
- sse_movups(cp->func, dst, result);
- sse_movss(cp->func, tmp, old_dst);
- sse_movss(cp->func, dst, tmp);
- aos_release_xmm_reg(cp, tmp.idx);
- }
- break;
-
- case TGSI_WRITEMASK_Y:
- emit_shuf_copy1(cp, dst, result, old_dst, SHUF(Y,X,Z,W));
- break;
-
- case TGSI_WRITEMASK_Z:
- emit_shuf_copy1(cp, dst, result, old_dst, SHUF(Z,Y,X,W));
- break;
-
- case TGSI_WRITEMASK_W:
- emit_shuf_copy1(cp, dst, result, old_dst, SHUF(W,Y,Z,X));
- break;
-
- case TGSI_WRITEMASK_XZ:
- emit_shuf_copy2(cp, dst, result, old_dst, SHUF(X,Z,Y,W));
- break;
+ TRUE);
+ return;
+ default:
+ break;
+ }
- case TGSI_WRITEMASK_XW:
- emit_shuf_copy2(cp, dst, result, old_dst, SHUF(X,W,Z,Y));
+ dst = aos_get_shader_reg_xmm(cp,
+ reg->DstRegister.File,
+ reg->DstRegister.Index);
- case TGSI_WRITEMASK_YZ:
- emit_shuf_copy2(cp, dst, result, old_dst, SHUF(Z,Y,X,W));
- break;
+ switch (reg->DstRegister.WriteMask) {
+ case TGSI_WRITEMASK_X:
+ sse_movss(cp->func, dst, get_xmm_clone(cp, result));
+ break;
+
+ case TGSI_WRITEMASK_XY:
+ sse_shufps(cp->func, dst, get_xmm_clone(cp, result), SHUF(X, Y, Z, W));
+ break;
- case TGSI_WRITEMASK_YW:
- emit_shuf_copy2(cp, dst, result, old_dst, SHUF(W,Y,Z,X));
- break;
+ case TGSI_WRITEMASK_ZW:
+ result = get_xmm_clone(cp, result);
+ sse_shufps(cp->func, result, dst, SHUF(X, Y, Z, W));
+ dst = result;
+ break;
- case TGSI_WRITEMASK_XZW:
- emit_shuf_copy1(cp, dst, old_dst, result, SHUF(Y,X,Z,W));
- break;
+ case TGSI_WRITEMASK_YZW:
+ sse_movss(cp->func, result, dst);
+ dst = result;
+ break;
- case TGSI_WRITEMASK_XYW:
- emit_shuf_copy1(cp, dst, old_dst, result, SHUF(Z,Y,X,W));
- break;
+ default:
+ mask_write(cp, dst, result, reg->DstRegister.WriteMask);
+ break;
+ }
- case TGSI_WRITEMASK_XYZ:
- emit_shuf_copy1(cp, dst, old_dst, result, SHUF(W,Y,Z,X));
- break;
+ aos_adopt_xmm_reg(cp,
+ dst,
+ reg->DstRegister.File,
+ reg->DstRegister.Index,
+ TRUE);
- default:
- assert(0); /* not possible */
- break;
- }
- }
}
@@ -837,7 +771,7 @@ static boolean emit_ABS( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg neg = aos_get_internal(cp, IMM_NEGS);
- struct x86_reg dst = get_xmm_tmp(cp, arg0);
+ struct x86_reg dst = get_xmm_clone(cp, arg0);
sse_mulps(cp->func, dst, neg);
sse_maxps(cp->func, dst, arg0);
@@ -850,7 +784,7 @@ static boolean emit_ADD( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = get_xmm_tmp(cp, arg0);
+ struct x86_reg dst = get_xmm_clone(cp, arg0);
sse_addps(cp->func, dst, arg1);
@@ -874,7 +808,7 @@ static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_inst
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
struct x86_reg tmp = aos_get_xmm_reg(cp);
- struct x86_reg dst = get_xmm_tmp(cp, arg0);
+ struct x86_reg dst = get_xmm_clone(cp, arg0);
sse_mulps(cp->func, dst, arg1);
@@ -898,7 +832,7 @@ static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_inst
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
struct x86_reg tmp = aos_get_xmm_reg(cp);
- struct x86_reg dst = get_xmm_tmp(cp, arg0);
+ struct x86_reg dst = get_xmm_clone(cp, arg0);
sse_mulps(cp->func, dst, arg1);
@@ -920,7 +854,7 @@ static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_inst
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
struct x86_reg tmp = aos_get_xmm_reg(cp);
- struct x86_reg dst = get_xmm_tmp(cp, arg0);
+ struct x86_reg dst = get_xmm_clone(cp, arg0);
sse_mulps(cp->func, dst, arg1);
@@ -1216,7 +1150,7 @@ static boolean emit_MAX( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = get_xmm_tmp(cp, arg0);
+ struct x86_reg dst = get_xmm_clone(cp, arg0);
sse_maxps(cp->func, dst, arg1);
@@ -1229,7 +1163,7 @@ static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = get_xmm_tmp(cp, arg0);
+ struct x86_reg dst = get_xmm_clone(cp, arg0);
sse_minps(cp->func, dst, arg1);
@@ -1240,7 +1174,7 @@ static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_inst
static boolean emit_MOV( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
- struct x86_reg dst = get_xmm_tmp(cp, arg0);
+ struct x86_reg dst = get_xmm_clone(cp, arg0);
/* potentially nothing to do */
@@ -1252,7 +1186,7 @@ static boolean emit_MUL( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = get_xmm_tmp(cp, arg0);
+ struct x86_reg dst = get_xmm_clone(cp, arg0);
sse_mulps(cp->func, dst, arg1);
@@ -1270,7 +1204,7 @@ static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_inst
/* If we can't clobber old contents of arg0, get a temporary & copy
* it there, then clobber it...
*/
- arg0 = get_xmm_tmp(cp, arg0);
+ arg0 = get_xmm_clone(cp, arg0);
sse_mulps(cp->func, arg0, arg1);
sse_addps(cp->func, arg0, arg2);
@@ -1336,7 +1270,7 @@ static boolean emit_SGE( struct aos_compilation *cp, const struct tgsi_full_inst
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
- struct x86_reg dst = get_xmm_tmp(cp, arg0);
+ struct x86_reg dst = get_xmm_clone(cp, arg0);
sse_cmpps(cp->func, dst, arg1, cc_NotLessThan);
sse_andps(cp->func, dst, ones);
@@ -1360,7 +1294,7 @@ static boolean emit_SLT( struct aos_compilation *cp, const struct tgsi_full_inst
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
- struct x86_reg dst = get_xmm_tmp(cp, arg0);
+ struct x86_reg dst = get_xmm_clone(cp, arg0);
sse_cmpps(cp->func, dst, arg1, cc_LessThan);
sse_andps(cp->func, dst, ones);
@@ -1373,7 +1307,7 @@ static boolean emit_SUB( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = get_xmm_tmp(cp, arg0);
+ struct x86_reg dst = get_xmm_clone(cp, arg0);
sse_subps(cp->func, dst, arg1);
@@ -1526,9 +1460,9 @@ emit_instruction( struct aos_compilation *cp,
static boolean emit_viewport( struct aos_compilation *cp )
{
- struct x86_reg pos = aos_get_shader_reg(cp,
- TGSI_FILE_OUTPUT,
- 0);
+ struct x86_reg pos = aos_get_shader_reg_xmm(cp,
+ TGSI_FILE_OUTPUT,
+ 0);
struct x86_reg scale = x86_make_disp(cp->machine_EDX,
Offset(struct aos_machine, scale));
@@ -1536,12 +1470,6 @@ static boolean emit_viewport( struct aos_compilation *cp )
struct x86_reg translate = x86_make_disp(cp->machine_EDX,
Offset(struct aos_machine, translate));
- if (pos.file != file_XMM) {
- struct x86_reg dst = aos_get_xmm_reg(cp);
- sse_movups(cp->func, dst, pos);
- pos = dst;
- }
-
sse_mulps(cp->func, pos, scale);
sse_addps(cp->func, pos, translate);
--
cgit v1.2.3
From 7b25c1a4032960752d8a8e950bdf75740b2de2e8 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Thu, 22 May 2008 13:47:08 +0100
Subject: draw: remove FPU_MANIP ifdef
---
src/gallium/auxiliary/draw/draw_vs_aos.c | 11 +----------
1 file changed, 1 insertion(+), 10 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 708ecadbac..d60940bb7a 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -693,48 +693,39 @@ static void x87_fstp_dest4( struct aos_compilation *cp,
x87_fstp_or_pop(cp->func, writemask, 3, ptr);
}
-#define FPU_MANIP 1
/* Save current x87 state and put it into single precision mode.
*/
static void save_fpu_state( struct aos_compilation *cp )
{
-#if FPU_MANIP
x87_fnstcw( cp->func, x86_make_disp(cp->machine_EDX,
Offset(struct aos_machine, fpu_restore)));
-#endif
}
static void restore_fpu_state( struct aos_compilation *cp )
{
-#if FPU_MANIP
x87_fnclex(cp->func);
x87_fldcw( cp->func, x86_make_disp(cp->machine_EDX,
Offset(struct aos_machine, fpu_restore)));
-#endif
}
static void set_fpu_round_neg_inf( struct aos_compilation *cp )
{
-#if FPU_MANIP
if (cp->fpucntl != FPU_RND_NEG) {
cp->fpucntl = FPU_RND_NEG;
x87_fnclex(cp->func);
x87_fldcw( cp->func, x86_make_disp(cp->machine_EDX,
Offset(struct aos_machine, fpu_rnd_neg_inf)));
}
-#endif
}
static void set_fpu_round_nearest( struct aos_compilation *cp )
{
-#if FPU_MANIP
if (cp->fpucntl != FPU_RND_NEAREST) {
cp->fpucntl = FPU_RND_NEAREST;
x87_fnclex(cp->func);
x87_fldcw( cp->func, x86_make_disp(cp->machine_EDX,
Offset(struct aos_machine, fpu_rnd_nearest)));
}
-#endif
}
@@ -754,7 +745,7 @@ static void x87_emit_ex2( struct aos_compilation *cp )
x87_fld1(cp->func); /* 1 (2^frac(a))-1 a */
x87_faddp(cp->func, st1); /* 2^frac(a) a */
x87_fscale(cp->func); /* 2^a a */
- x87_fstp(cp->func, st1);
+ x87_fstp(cp->func, st1); /* 2^a */
assert( stack == cp->func->x87_stack);
--
cgit v1.2.3
From 6780a6dede31e7f2eb465e1d7b507b3e64fe6ec9 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Thu, 22 May 2008 13:48:07 +0100
Subject: draw: shortcircuit shuffle in aos_sse when possible
---
src/gallium/auxiliary/draw/draw_vs_aos.c | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index d60940bb7a..b8fad231ca 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -809,7 +809,9 @@ static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_inst
sse_addss(cp->func, dst, tmp); /* a*x+c*z, b*y, ?, ? */
emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z));
sse_addss(cp->func, dst, tmp);
- sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
+
+ if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1)
+ sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
aos_release_xmm_reg(cp, tmp.idx);
store_dest(cp, &op->FullDstRegisters[0], dst);
@@ -833,7 +835,9 @@ static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_inst
sse_addps(cp->func, dst, tmp); /* a*x+c*z, b*y+d*w, a*x+c*z, b*y+d*w */
emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z));
sse_addss(cp->func, dst, tmp);
- sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
+
+ if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1)
+ sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
aos_release_xmm_reg(cp, tmp.idx);
store_dest(cp, &op->FullDstRegisters[0], dst);
@@ -857,7 +861,9 @@ static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_inst
sse_addss(cp->func, dst, tmp);
emit_pshufd(cp, tmp, arg1, SHUF(W,W,W,W));
sse_addss(cp->func, dst, tmp);
- sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
+
+ if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1)
+ sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
aos_release_xmm_reg(cp, tmp.idx);
store_dest(cp, &op->FullDstRegisters[0], dst);
@@ -1233,7 +1239,8 @@ static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_inst
sse_divss(cp->func, dst, arg0);
}
- sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
+ if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1)
+ sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
store_dest(cp, &op->FullDstRegisters[0], dst);
return TRUE;
@@ -1249,7 +1256,8 @@ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_inst
/* Extend precision here...
*/
- sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
+ if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1)
+ sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
store_dest(cp, &op->FullDstRegisters[0], dst);
return TRUE;
--
cgit v1.2.3
From 65cb09249e750b45ec3fc9a57670fc77250efc5e Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Thu, 22 May 2008 13:49:38 +0100
Subject: draw: for debug, do rhw divide in aos_sse viewport calcs
---
src/gallium/auxiliary/draw/draw_vs_aos.c | 45 +++++++++++++++++++++++++++++++-
1 file changed, 44 insertions(+), 1 deletion(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index b8fad231ca..40de13a98c 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -1481,6 +1481,46 @@ static boolean emit_viewport( struct aos_compilation *cp )
}
+/* This is useful to be able to see the results on softpipe. Doesn't
+ * do proper clipping, just assumes the backend can do it during
+ * rasterization -- for debug only...
+ */
+static boolean emit_rhw_viewport( struct aos_compilation *cp )
+{
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+ struct x86_reg pos = aos_get_shader_reg_xmm(cp,
+ TGSI_FILE_OUTPUT,
+ 0);
+
+ struct x86_reg scale = x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, scale));
+
+ struct x86_reg translate = x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, translate));
+
+
+
+ emit_pshufd(cp, tmp, pos, SHUF(W, W, W, W));
+ sse2_rcpss(cp->func, tmp, tmp);
+ sse_shufps(cp->func, tmp, tmp, SHUF(X, X, X, X));
+
+ sse_mulps(cp->func, pos, scale);
+ sse_mulps(cp->func, pos, tmp);
+ sse_addps(cp->func, pos, translate);
+
+ /* Set pos[3] = w
+ */
+ mask_write(cp, pos, tmp, TGSI_WRITEMASK_W);
+
+ aos_adopt_xmm_reg( cp,
+ pos,
+ TGSI_FILE_OUTPUT,
+ 0,
+ TRUE );
+ return TRUE;
+}
+
+
static boolean note_immediate( struct aos_compilation *cp,
struct tgsi_full_immediate *imm )
{
@@ -1623,7 +1663,10 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
goto fail;
if (cp.vaos->base.key.viewport) {
- emit_viewport(&cp);
+ if (0)
+ emit_viewport(&cp);
+ else
+ emit_rhw_viewport(&cp);
}
/* Emit output... TODO: do this eagerly after the last write to a
--
cgit v1.2.3
From 260001430bbd28ea17201f1980ab1ebed93b246f Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Thu, 22 May 2008 15:24:02 +0100
Subject: draw: use aligned movs within draw_vs_aos.c
---
src/gallium/auxiliary/draw/draw_vs_aos.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 40de13a98c..039e233fe8 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -159,7 +159,7 @@ static void spill( struct aos_compilation *cp, unsigned idx )
cp->xmm[idx].idx);
assert(cp->xmm[idx].dirty);
- sse_movups(cp->func, oldval, x86_make_reg(file_XMM, idx));
+ sse_movaps(cp->func, oldval, x86_make_reg(file_XMM, idx));
cp->xmm[idx].dirty = 0;
}
}
@@ -176,7 +176,7 @@ static struct x86_reg get_xmm_clone( struct aos_compilation *cp,
{
if (!is_xmm_tmp(cp, reg)) {
struct x86_reg tmp = aos_get_xmm_reg(cp);
- sse_movups(cp->func, tmp, reg);
+ sse_movaps(cp->func, tmp, reg);
reg = tmp;
}
@@ -335,7 +335,7 @@ static struct x86_reg aos_get_shader_reg_xmm( struct aos_compilation *cp,
if (reg.file != file_XMM) {
struct x86_reg tmp = aos_get_xmm_reg(cp);
- sse_movups(cp->func, tmp, reg);
+ sse_movaps(cp->func, tmp, reg);
aos_adopt_xmm_reg( cp, tmp, file, idx, FALSE );
reg = tmp;
}
@@ -374,7 +374,7 @@ static void emit_pshufd( struct aos_compilation *cp,
}
else {
if (!eq(dst, arg0))
- sse_movups(cp->func, dst, arg0);
+ sse_movaps(cp->func, dst, arg0);
sse_shufps(cp->func, dst, dst, shuf);
}
@@ -523,7 +523,7 @@ static struct x86_reg fetch_src( struct aos_compilation *cp,
struct x86_reg neg = aos_get_internal(cp, IMM_NEGS);
struct x86_reg tmp = aos_get_xmm_reg(cp);
- sse_movups(cp->func, tmp, arg0);
+ sse_movaps(cp->func, tmp, arg0);
sse_mulps(cp->func, tmp, neg);
sse_maxps(cp->func, dst, arg0);
--
cgit v1.2.3
From 43df4642f1d2f3d2673a1d5e4f5126f5175fb899 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Thu, 22 May 2008 20:21:49 +0100
Subject: draw: tweak x87_emit_ex2 to avoid changing x87 fpu settings
---
src/gallium/auxiliary/draw/draw_vs_aos.c | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 039e233fe8..93bb4f9bc0 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -735,16 +735,17 @@ static void x87_emit_ex2( struct aos_compilation *cp )
struct x86_reg st1 = x86_make_reg(file_x87, 1);
int stack = cp->func->x87_stack;
- set_fpu_round_neg_inf( cp );
+// set_fpu_round_neg_inf( cp );
x87_fld(cp->func, st0); /* a a */
- x87_fld(cp->func, st0); /* a a a */
- x87_fprndint( cp->func ); /* flr(a) a a*/
- x87_fsubp(cp->func, st1); /* frac(a) a */
- x87_f2xm1(cp->func); /* (2^frac(a))-1 a */
- x87_fld1(cp->func); /* 1 (2^frac(a))-1 a */
- x87_faddp(cp->func, st1); /* 2^frac(a) a */
- x87_fscale(cp->func); /* 2^a a */
+ x87_fprndint( cp->func ); /* int(a) a*/
+ x87_fsubr(cp->func, st1, st0); /* int(a) frc(a) */
+ x87_fxch(cp->func, st1); /* frc(a) int(a) */
+ x87_f2xm1(cp->func); /* (2^frc(a))-1 int(a) */
+ x87_fld1(cp->func); /* 1 (2^frc(a))-1 int(a) */
+ x87_faddp(cp->func, st1); /* 2^frac(a) int(a) */
+ x87_fscale(cp->func); /* (2^frac(a)*2^int(int(a))) int(a) */
+ /* 2^a int(a) */
x87_fstp(cp->func, st1); /* 2^a */
assert( stack == cp->func->x87_stack);
--
cgit v1.2.3
From 7106da136069f865747e03c30ca245bc030b241b Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Thu, 22 May 2008 20:22:15 +0100
Subject: draw: correct but slow LIT() in aos varient
---
src/gallium/auxiliary/draw/draw_vs_aos.c | 19 +++++++++----------
1 file changed, 9 insertions(+), 10 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 93bb4f9bc0..930914f609 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -1093,10 +1093,12 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst
- /* a1' = a1 <= 0 ? 1 : a1;
+ /* a1' = a1 <= 0 ? 1 : a1;
+ *
+ * Note: use 1.0 to avoid passing zero to
*/
- x87_fldz(cp->func); /* 0 */
- x87_fld1(cp->func); /* 1 0 */
+ x87_fldz(cp->func); /* 1 0 */
+ x87_fldz(cp->func); /* 1 0 */
x87_fld_src(cp, &op->FullSrcRegisters[0], 1); /* a1 1 0 */
x87_fcomi(cp->func, st2); /* a1 1 0 */
x87_fcmovb(cp->func, st1); /* a1' 1 0 */
@@ -1119,17 +1121,14 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst
x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0 0 r2 */
x87_fcomi(cp->func, st1);
x87_fcmovb(cp->func, st1); /* a0' 0 r2 */
- x87_fstp(cp->func, st1); /* a0' r2 */
- x87_fxch(cp->func, st1); /* a0' r2 */
x87_fst_or_nop(cp->func, writemask, 1, dst); /* result[1] = a0' */
- x87_fldz(cp->func); /* 0 a0' r2 */
- x87_fcomi(cp->func, st1); /* 0 a0' r2 */
- x87_fcmovnbe(cp->func, st2); /* r2' a0' r2 */
+ x87_fcomi(cp->func, st1); /* a0' 0 r2 */
+ x87_fcmovnbe(cp->func, st2); /* r2' 0' r2 */
- x87_fstp_or_pop(cp->func, writemask, 2, dst);
- x87_fpop(cp->func);
+ x87_fstp_or_pop(cp->func, writemask, 2, dst); /* 0 r2 */
+ x87_fpop(cp->func); /* r2 */
x87_fpop(cp->func);
}
--
cgit v1.2.3
From adc1f88fc9278bdbb3b24a6d48f91a0bd98e9f1c Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Fri, 23 May 2008 09:10:59 +0100
Subject: mesa: do object-space lighting in ffvertex_prog.c
Start pulling over some of the optimizations from the fixed function
paths.
---
src/mesa/main/ffvertex_prog.c | 79 +++++++++++++++++++++++++---------------
src/mesa/shader/prog_statevars.c | 42 +++++++++++++++++----
src/mesa/shader/prog_statevars.h | 6 ++-
3 files changed, 88 insertions(+), 39 deletions(-)
(limited to 'src')
diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c
index 810af9e33e..adf15b03c2 100644
--- a/src/mesa/main/ffvertex_prog.c
+++ b/src/mesa/main/ffvertex_prog.c
@@ -54,6 +54,7 @@ struct state_key {
unsigned light_color_material_mask:12;
unsigned light_material_mask:12;
+ unsigned need_eye_coords:1;
unsigned normalize:1;
unsigned rescale_normals:1;
unsigned fog_source_is_depth:1;
@@ -167,6 +168,8 @@ static struct state_key *make_state_key( GLcontext *ctx )
*/
assert(fp);
+ key->need_eye_coords = ctx->_NeedEyeCoords;
+
key->fragprog_inputs_read = fp->Base.InputsRead;
if (ctx->RenderMode == GL_FEEDBACK) {
@@ -310,7 +313,7 @@ struct tnl_program {
struct ureg eye_position;
struct ureg eye_position_normalized;
- struct ureg eye_normal;
+ struct ureg transformed_normal;
struct ureg identity;
GLuint materials;
@@ -653,9 +656,9 @@ static void emit_normalize_vec3( struct tnl_program *p,
struct ureg src )
{
struct ureg tmp = get_temp(p);
- emit_op2(p, OPCODE_DP3, tmp, 0, src, src);
- emit_op1(p, OPCODE_RSQ, tmp, 0, tmp);
- emit_op2(p, OPCODE_MUL, dest, 0, src, tmp);
+ emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src);
+ emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp);
+ emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X));
release_temp(p, tmp);
}
@@ -705,36 +708,53 @@ static struct ureg get_eye_position_normalized( struct tnl_program *p )
}
-static struct ureg get_eye_normal( struct tnl_program *p )
+static struct ureg get_transformed_normal( struct tnl_program *p )
{
- if (is_undef(p->eye_normal)) {
+ if (is_undef(p->transformed_normal) &&
+ !p->state->need_eye_coords &&
+ !p->state->normalize &&
+ !(p->state->need_eye_coords == p->state->rescale_normals))
+ {
+ p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL );
+ }
+ else if (is_undef(p->transformed_normal))
+ {
struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL );
struct ureg mvinv[3];
+ struct ureg transformed_normal = reserve_temp(p);
- register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2,
- STATE_MATRIX_INVTRANS, mvinv );
+ if (p->state->need_eye_coords) {
+ register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2,
+ STATE_MATRIX_INVTRANS, mvinv );
- p->eye_normal = reserve_temp(p);
-
- /* Transform to eye space:
- */
- emit_matrix_transform_vec3( p, p->eye_normal, mvinv, normal );
+ /* Transform to eye space:
+ */
+ emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal );
+ normal = transformed_normal;
+ }
/* Normalize/Rescale:
*/
if (p->state->normalize) {
- emit_normalize_vec3( p, p->eye_normal, p->eye_normal );
+ emit_normalize_vec3( p, transformed_normal, normal );
+ normal = transformed_normal;
}
- else if (p->state->rescale_normals) {
+ else if (p->state->need_eye_coords == p->state->rescale_normals) {
+ /* This is already adjusted for eye/non-eye rendering:
+ */
struct ureg rescale = register_param2(p, STATE_INTERNAL,
- STATE_NORMAL_SCALE);
+ STATE_NORMAL_SCALE);
- emit_op2( p, OPCODE_MUL, p->eye_normal, 0, p->eye_normal,
+ emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal,
swizzle1(rescale, X));
+ normal = transformed_normal;
}
+
+ assert(normal.file == PROGRAM_TEMPORARY);
+ p->transformed_normal = normal;
}
- return p->eye_normal;
+ return p->transformed_normal;
}
@@ -856,7 +876,7 @@ static struct ureg calculate_light_attenuation( struct tnl_program *p,
*/
if (!p->state->unit[i].light_spotcutoff_is_180) {
struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL,
- STATE_SPOT_DIR_NORMALIZED, i);
+ STATE_LIGHT_SPOT_DIR_NORMALIZED, i);
struct ureg spot = get_temp(p);
struct ureg slt = get_temp(p);
@@ -907,7 +927,7 @@ static void build_lighting( struct tnl_program *p )
const GLboolean twoside = p->state->light_twoside;
const GLboolean separate = p->state->separate_specular;
GLuint nr_lights = 0, count = 0;
- struct ureg normal = get_eye_normal(p);
+ struct ureg normal = get_transformed_normal(p);
struct ureg lit = get_temp(p);
struct ureg dots = get_temp(p);
struct ureg _col0 = undef, _col1 = undef;
@@ -984,20 +1004,21 @@ static void build_lighting( struct tnl_program *p )
/* Can used precomputed constants in this case.
* Attenuation never applies to infinite lights.
*/
- VPpli = register_param3(p, STATE_LIGHT, i,
- STATE_POSITION_NORMALIZED);
+ VPpli = register_param3(p, STATE_INTERNAL,
+ STATE_LIGHT_POSITION_NORMALIZED, i);
if (p->state->light_local_viewer) {
struct ureg eye_hat = get_eye_position_normalized(p);
half = get_temp(p);
emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
emit_normalize_vec3(p, half, half);
} else {
- half = register_param3(p, STATE_LIGHT, i, STATE_HALF_VECTOR);
+ half = register_param3(p, STATE_INTERNAL,
+ STATE_LIGHT_HALF_VECTOR, i);
}
}
else {
- struct ureg Ppli = register_param3(p, STATE_LIGHT, i,
- STATE_POSITION);
+ struct ureg Ppli = register_param3(p, STATE_INTERNAL,
+ STATE_LIGHT_POSITION, i);
struct ureg V = get_eye_position(p);
struct ureg dist = get_temp(p);
@@ -1201,7 +1222,7 @@ static void build_reflect_texgen( struct tnl_program *p,
struct ureg dest,
GLuint writemask )
{
- struct ureg normal = get_eye_normal(p);
+ struct ureg normal = get_transformed_normal(p);
struct ureg eye_hat = get_eye_position_normalized(p);
struct ureg tmp = get_temp(p);
@@ -1219,7 +1240,7 @@ static void build_sphere_texgen( struct tnl_program *p,
struct ureg dest,
GLuint writemask )
{
- struct ureg normal = get_eye_normal(p);
+ struct ureg normal = get_transformed_normal(p);
struct ureg eye_hat = get_eye_position_normalized(p);
struct ureg tmp = get_temp(p);
struct ureg half = register_scalar_const(p, .5);
@@ -1338,7 +1359,7 @@ static void build_texture_transform( struct tnl_program *p )
}
if (normal_mask) {
- struct ureg normal = get_eye_normal(p);
+ struct ureg normal = get_transformed_normal(p);
emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal );
}
@@ -1475,7 +1496,7 @@ create_new_program( const struct state_key *key,
p.program = program;
p.eye_position = undef;
p.eye_position_normalized = undef;
- p.eye_normal = undef;
+ p.transformed_normal = undef;
p.identity = undef;
p.temp_in_use = 0;
diff --git a/src/mesa/shader/prog_statevars.c b/src/mesa/shader/prog_statevars.c
index ba3c988445..37bd17ba4a 100644
--- a/src/mesa/shader/prog_statevars.c
+++ b/src/mesa/shader/prog_statevars.c
@@ -134,10 +134,6 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[],
value[3] = 1.0;
}
return;
- case STATE_POSITION_NORMALIZED:
- COPY_4V(value, ctx->Light.Light[ln].EyePosition);
- NORMALIZE_3FV( value );
- return;
default:
_mesa_problem(ctx, "Invalid light state in fetch_state");
return;
@@ -431,15 +427,46 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[],
value[2] = ctx->Fog.Density * ONE_DIV_LN2;
value[3] = ctx->Fog.Density * ONE_DIV_SQRT_LN2;
return;
- case STATE_SPOT_DIR_NORMALIZED: {
+
+ case STATE_LIGHT_SPOT_DIR_NORMALIZED: {
/* here, state[2] is the light number */
/* pre-normalize spot dir */
const GLuint ln = (GLuint) state[2];
- COPY_3V(value, ctx->Light.Light[ln].EyeDirection);
- NORMALIZE_3FV(value);
+ COPY_3V(value, ctx->Light.Light[ln]._NormDirection);
value[3] = ctx->Light.Light[ln]._CosCutoff;
return;
}
+
+ case STATE_LIGHT_POSITION: {
+ const GLuint ln = (GLuint) state[2];
+ COPY_4V(value, ctx->Light.Light[ln]._Position);
+ return;
+ }
+
+ case STATE_LIGHT_POSITION_NORMALIZED: {
+ const GLuint ln = (GLuint) state[2];
+ COPY_4V(value, ctx->Light.Light[ln]._Position);
+ NORMALIZE_3FV( value );
+ return;
+ }
+
+ case STATE_LIGHT_HALF_VECTOR: {
+ const GLuint ln = (GLuint) state[2];
+ GLfloat p[3];
+ /* Compute infinite half angle vector:
+ * halfVector = normalize(normalize(lightPos) + (0, 0, 1))
+ * light.EyePosition.w should be 0 for infinite lights.
+ */
+ COPY_3V(p, ctx->Light.Light[ln]._Position);
+ NORMALIZE_3FV(p);
+ ADD_3V(value, p, ctx->_EyeZDir);
+ NORMALIZE_3FV(value);
+ value[3] = 1.0;
+ return;
+ }
+
+
+
case STATE_PT_SCALE:
value[0] = ctx->Pixel.RedScale;
value[1] = ctx->Pixel.GreenScale;
@@ -696,7 +723,6 @@ append_token(char *dst, gl_state_index k)
append(dst, "normalScale");
break;
case STATE_INTERNAL:
- case STATE_POSITION_NORMALIZED:
append(dst, "(internal)");
break;
case STATE_PT_SCALE:
diff --git a/src/mesa/shader/prog_statevars.h b/src/mesa/shader/prog_statevars.h
index d12142055f..a515fda3aa 100644
--- a/src/mesa/shader/prog_statevars.h
+++ b/src/mesa/shader/prog_statevars.h
@@ -106,9 +106,11 @@ typedef enum gl_state_index_ {
STATE_INTERNAL, /* Mesa additions */
STATE_NORMAL_SCALE,
STATE_TEXRECT_SCALE,
- STATE_POSITION_NORMALIZED, /* normalized light position */
STATE_FOG_PARAMS_OPTIMIZED, /* for faster fog calc */
- STATE_SPOT_DIR_NORMALIZED, /* pre-normalized spot dir */
+ STATE_LIGHT_SPOT_DIR_NORMALIZED, /* pre-normalized spot dir */
+ STATE_LIGHT_POSITION, /* object vs eye space */
+ STATE_LIGHT_POSITION_NORMALIZED, /* object vs eye space */
+ STATE_LIGHT_HALF_VECTOR, /* object vs eye space */
STATE_PT_SCALE, /**< Pixel transfer RGBA scale */
STATE_PT_BIAS, /**< Pixel transfer RGBA bias */
STATE_PCM_SCALE, /**< Post color matrix RGBA scale */
--
cgit v1.2.3
From 3b41d619a1b7cc8c356c32af777486461ddd7926 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Fri, 23 May 2008 09:14:17 +0100
Subject: draw: faster LIT(), incorrect though
---
src/gallium/auxiliary/draw/draw_vs_aos.c | 6 ++++++
1 file changed, 6 insertions(+)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 930914f609..b0c3ac49d2 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -1098,7 +1098,13 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst
* Note: use 1.0 to avoid passing zero to
*/
x87_fldz(cp->func); /* 1 0 */
+#if 1
+ x87_fld1(cp->func); /* 1 0 */
+#else
+ /* Correct but slow due to fp exceptions generated in fyl2x - fix me.
+ */
x87_fldz(cp->func); /* 1 0 */
+#endif
x87_fld_src(cp, &op->FullSrcRegisters[0], 1); /* a1 1 0 */
x87_fcomi(cp->func, st2); /* a1 1 0 */
x87_fcmovb(cp->func, st1); /* a1' 1 0 */
--
cgit v1.2.3
From 0ac2f7955c01749e122f67ff03e79a0d8bd0f8e5 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Fri, 23 May 2008 19:17:02 +0100
Subject: mesa: don't emit LIT instruction when mat shininess known to be zero
Use a faster path in that case & make gears go faster.
---
src/mesa/main/ffvertex_prog.c | 133 ++++++++++++++++++++++++++++++++----------
1 file changed, 102 insertions(+), 31 deletions(-)
(limited to 'src')
diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c
index adf15b03c2..623c2a64b5 100644
--- a/src/mesa/main/ffvertex_prog.c
+++ b/src/mesa/main/ffvertex_prog.c
@@ -53,6 +53,7 @@ struct state_key {
unsigned light_color_material:1;
unsigned light_color_material_mask:12;
unsigned light_material_mask:12;
+ unsigned material_shininess_is_zero:1;
unsigned need_eye_coords:1;
unsigned normalize:1;
@@ -155,6 +156,26 @@ tnl_get_per_vertex_fog(GLcontext *ctx)
#endif
}
+static GLboolean check_active_shininess( GLcontext *ctx,
+ const struct state_key *key,
+ GLuint side )
+{
+ GLuint bit = 1 << (MAT_ATTRIB_FRONT_SHININESS + side);
+
+ if (key->light_color_material_mask & bit)
+ return GL_TRUE;
+
+ if (key->light_material_mask & bit)
+ return GL_TRUE;
+
+ if (ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS + side][0] != 0.0F)
+ return GL_TRUE;
+
+ return GL_FALSE;
+}
+
+
+
static struct state_key *make_state_key( GLcontext *ctx )
{
@@ -214,6 +235,17 @@ static struct state_key *make_state_key( GLcontext *ctx )
key->unit[i].light_attenuated = 1;
}
}
+
+ if (check_active_shininess(ctx, key, 0)) {
+ key->material_shininess_is_zero = 0;
+ }
+ else if (key->light_twoside &&
+ check_active_shininess(ctx, key, 1)) {
+ key->material_shininess_is_zero = 0;
+ }
+ else {
+ key->material_shininess_is_zero = 1;
+ }
}
if (ctx->Transform.Normalize)
@@ -915,7 +947,26 @@ static struct ureg calculate_light_attenuation( struct tnl_program *p,
}
+static void emit_degenerate_lit( struct tnl_program *p,
+ struct ureg lit,
+ struct ureg dots )
+{
+ struct ureg id = get_identity_param(p);
+
+ /* 1, 0, 0, 1
+ */
+ emit_op1(p, OPCODE_MOV, lit, 0, swizzle(id, Z, X, X, Z));
+ /* 1, MAX2(in[0], 0), 0, 1
+ */
+ emit_op2(p, OPCODE_MAX, lit, WRITEMASK_Y, lit, swizzle1(dots, X));
+
+ /* 1, MAX2(in[0], 0), (in[0] > 0 ? 1 : 0), 1
+ */
+ emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z,
+ lit, /* 0 */
+ swizzle1(dots, X)); /* in[0] */
+}
/* Need to add some addtional parameters to allow lighting in object
@@ -941,9 +992,11 @@ static void build_lighting( struct tnl_program *p )
set_material_flags(p);
{
- struct ureg shininess = get_material(p, 0, STATE_SHININESS);
- emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X));
- release_temp(p, shininess);
+ if (!p->state->material_shininess_is_zero) {
+ struct ureg shininess = get_material(p, 0, STATE_SHININESS);
+ emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X));
+ release_temp(p, shininess);
+ }
_col0 = make_temp(p, get_scenecolor(p, 0));
if (separate)
@@ -954,10 +1007,12 @@ static void build_lighting( struct tnl_program *p )
}
if (twoside) {
- struct ureg shininess = get_material(p, 1, STATE_SHININESS);
- emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z,
- negate(swizzle1(shininess,X)));
- release_temp(p, shininess);
+ if (!p->state->material_shininess_is_zero) {
+ struct ureg shininess = get_material(p, 1, STATE_SHININESS);
+ emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z,
+ negate(swizzle1(shininess,X)));
+ release_temp(p, shininess);
+ }
_bfc0 = make_temp(p, get_scenecolor(p, 1));
if (separate)
@@ -1006,14 +1061,17 @@ static void build_lighting( struct tnl_program *p )
*/
VPpli = register_param3(p, STATE_INTERNAL,
STATE_LIGHT_POSITION_NORMALIZED, i);
- if (p->state->light_local_viewer) {
- struct ureg eye_hat = get_eye_position_normalized(p);
- half = get_temp(p);
- emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
- emit_normalize_vec3(p, half, half);
- } else {
- half = register_param3(p, STATE_INTERNAL,
- STATE_LIGHT_HALF_VECTOR, i);
+
+ if (!p->state->material_shininess_is_zero) {
+ if (p->state->light_local_viewer) {
+ struct ureg eye_hat = get_eye_position_normalized(p);
+ half = get_temp(p);
+ emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
+ emit_normalize_vec3(p, half, half);
+ } else {
+ half = register_param3(p, STATE_INTERNAL,
+ STATE_LIGHT_HALF_VECTOR, i);
+ }
}
}
else {
@@ -1023,7 +1081,6 @@ static void build_lighting( struct tnl_program *p )
struct ureg dist = get_temp(p);
VPpli = get_temp(p);
- half = get_temp(p);
/* Calculate VPpli vector
*/
@@ -1045,16 +1102,20 @@ static void build_lighting( struct tnl_program *p )
/* Calculate viewer direction, or use infinite viewer:
*/
- if (p->state->light_local_viewer) {
- struct ureg eye_hat = get_eye_position_normalized(p);
- emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
- }
- else {
- struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z);
- emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir);
- }
-
- emit_normalize_vec3(p, half, half);
+ if (!p->state->material_shininess_is_zero) {
+ half = get_temp(p);
+
+ if (p->state->light_local_viewer) {
+ struct ureg eye_hat = get_eye_position_normalized(p);
+ emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
+ }
+ else {
+ struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z);
+ emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir);
+ }
+
+ emit_normalize_vec3(p, half, half);
+ }
release_temp(p, dist);
}
@@ -1062,7 +1123,9 @@ static void build_lighting( struct tnl_program *p )
/* Calculate dot products:
*/
emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli);
- emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half);
+
+ if (!p->state->material_shininess_is_zero)
+ emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half);
/* Front face lighting:
*/
@@ -1073,7 +1136,11 @@ static void build_lighting( struct tnl_program *p )
struct ureg res0, res1;
GLuint mask0, mask1;
- emit_op1(p, OPCODE_LIT, lit, 0, dots);
+ if (p->state->material_shininess_is_zero) {
+ emit_degenerate_lit(p, lit, dots);
+ } else {
+ emit_op1(p, OPCODE_LIT, lit, 0, dots);
+ }
if (!is_undef(att))
emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
@@ -1099,7 +1166,7 @@ static void build_lighting( struct tnl_program *p )
res1 = _col1;
}
- emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0);
+ emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0);
emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1);
@@ -1117,7 +1184,11 @@ static void build_lighting( struct tnl_program *p )
struct ureg res0, res1;
GLuint mask0, mask1;
- emit_op1(p, OPCODE_LIT, lit, 0, negate(swizzle(dots,X,Y,W,Z)));
+ if (p->state->material_shininess_is_zero) {
+ emit_degenerate_lit(p, lit, negate(swizzle(dots,X,Y,W,Z)));
+ } else {
+ emit_op1(p, OPCODE_LIT, lit, 0, negate(swizzle(dots,X,Y,W,Z)));
+ }
if (!is_undef(att))
emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
@@ -1142,7 +1213,7 @@ static void build_lighting( struct tnl_program *p )
mask1 = 0;
}
- emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0);
+ emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0);
emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0);
emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1);
--
cgit v1.2.3
From 333d377bbda4f598292108f91cd8ec4f0f647c20 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Fri, 23 May 2008 19:17:57 +0100
Subject: glapi: fix include path & make build work
---
src/mesa/x86/glapi_x86.S | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'src')
diff --git a/src/mesa/x86/glapi_x86.S b/src/mesa/x86/glapi_x86.S
index 9a5d875e21..b99c2b6520 100644
--- a/src/mesa/x86/glapi_x86.S
+++ b/src/mesa/x86/glapi_x86.S
@@ -27,7 +27,7 @@
*/
#include "assyntax.h"
-#include "glapioffsets.h"
+#include "glapi/glapioffsets.h"
#if defined(STDCALL_API)
# if defined(USE_MGL_NAMESPACE)
--
cgit v1.2.3
From e841b92d9c8bf48085b4996df828ae745977f931 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Fri, 23 May 2008 20:05:36 +0100
Subject: mesa: further degenerate the special case lit substitute
---
src/mesa/main/ffvertex_prog.c | 21 ++++++++++++---------
1 file changed, 12 insertions(+), 9 deletions(-)
(limited to 'src')
diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c
index 623c2a64b5..90b156f812 100644
--- a/src/mesa/main/ffvertex_prog.c
+++ b/src/mesa/main/ffvertex_prog.c
@@ -953,19 +953,19 @@ static void emit_degenerate_lit( struct tnl_program *p,
{
struct ureg id = get_identity_param(p);
- /* 1, 0, 0, 1
+ /* Note that result.x & result.w will not be examined. Note also that
+ * dots.xyzw == dots.xxxx.
*/
- emit_op1(p, OPCODE_MOV, lit, 0, swizzle(id, Z, X, X, Z));
- /* 1, MAX2(in[0], 0), 0, 1
+ /* result[1] = MAX2(in, 0)
*/
- emit_op2(p, OPCODE_MAX, lit, WRITEMASK_Y, lit, swizzle1(dots, X));
+ emit_op2(p, OPCODE_MAX, lit, 0, id, dots);
- /* 1, MAX2(in[0], 0), (in[0] > 0 ? 1 : 0), 1
+ /* result[2] = (in > 0 ? 1 : 0)
*/
emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z,
lit, /* 0 */
- swizzle1(dots, X)); /* in[0] */
+ dots); /* in[0] */
}
@@ -1122,10 +1122,13 @@ static void build_lighting( struct tnl_program *p )
/* Calculate dot products:
*/
- emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli);
-
- if (!p->state->material_shininess_is_zero)
+ if (p->state->material_shininess_is_zero) {
+ emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli);
+ }
+ else {
+ emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli);
emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half);
+ }
/* Front face lighting:
*/
--
cgit v1.2.3
From feceb43948f76cc4d4c8ecbb86b1b1f438c6daee Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Fri, 23 May 2008 20:37:50 +0100
Subject: mesa: save a temp on normalizes
---
src/mesa/main/ffvertex_prog.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
(limited to 'src')
diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c
index 90b156f812..e36f1f69a4 100644
--- a/src/mesa/main/ffvertex_prog.c
+++ b/src/mesa/main/ffvertex_prog.c
@@ -305,7 +305,7 @@ static struct state_key *make_state_key( GLcontext *ctx )
* generated program with line/function references for each
* instruction back into this file:
*/
-#define DISASSEM (MESA_VERBOSE&VERBOSE_DISASSEM)
+#define DISASSEM 1
/* Should be tunable by the driver - do we want to do matrix
* multiplications with DP4's or with MUL/MAD's? SSE works better
@@ -687,11 +687,9 @@ static void emit_normalize_vec3( struct tnl_program *p,
struct ureg dest,
struct ureg src )
{
- struct ureg tmp = get_temp(p);
- emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src);
- emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp);
- emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X));
- release_temp(p, tmp);
+ emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, src);
+ emit_op1(p, OPCODE_RSQ, dest, WRITEMASK_X, dest);
+ emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(dest, X));
}
static void emit_passthrough( struct tnl_program *p,
--
cgit v1.2.3
From 6b3723ee8d084a1abbc971b21c58f7c1e66949a7 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Sat, 24 May 2008 13:22:15 +0100
Subject: rtasm: add some helpers for calling out from generated code
---
src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 44 ++++++++++++++++++++++++++++--
src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 6 ++++
2 files changed, 47 insertions(+), 3 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index d78676b8f3..2415b0156b 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -218,6 +218,8 @@ static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1
/* Build a modRM byte + possible displacement. No treatment of SIB
* indexing. BZZT - no way to encode an absolute address.
+ *
+ * This is the "/r" field in the x86 manuals...
*/
static void emit_modrm( struct x86_function *p,
struct x86_reg reg,
@@ -256,7 +258,8 @@ static void emit_modrm( struct x86_function *p,
}
}
-
+/* Emits the "/0".."/7" specialized versions of the modrm ("/r") bytes.
+ */
static void emit_modrm_noreg( struct x86_function *p,
unsigned op,
struct x86_reg regmem )
@@ -365,8 +368,7 @@ void x86_jcc( struct x86_function *p,
DUMP_I(cc);
if (offset < 0) {
- int amt = p->csr - p->store;
- assert(amt > -offset);
+ assert(p->csr - p->store > -offset);
}
if (offset <= 127 && offset >= -128) {
@@ -443,6 +445,16 @@ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
emit_1i(p, imm);
}
+void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm )
+{
+ DUMP_RI( dst, imm );
+ assert(dst.mod == mod_REG);
+ emit_1ub(p, 0x80);
+ emit_modrm_noreg(p, 0, dst);
+ emit_1ub(p, imm);
+}
+
+
void x86_push( struct x86_function *p,
struct x86_reg reg )
{
@@ -459,6 +471,17 @@ void x86_push( struct x86_function *p,
p->stack_offset += 4;
}
+void x86_push_imm32( struct x86_function *p,
+ int imm32 )
+{
+ DUMP_I( imm32 );
+ emit_1ub(p, 0x68);
+ emit_1i(p, imm32);
+
+ p->stack_offset += 4;
+}
+
+
void x86_pop( struct x86_function *p,
struct x86_reg reg )
{
@@ -1558,6 +1581,21 @@ void mmx_movq( struct x86_function *p,
*/
+void x86_cdecl_caller_push_regs( struct x86_function *p )
+{
+ x86_push(p, x86_make_reg(file_REG32, reg_AX));
+ x86_push(p, x86_make_reg(file_REG32, reg_CX));
+ x86_push(p, x86_make_reg(file_REG32, reg_DX));
+}
+
+void x86_cdecl_caller_pop_regs( struct x86_function *p )
+{
+ x86_pop(p, x86_make_reg(file_REG32, reg_DX));
+ x86_pop(p, x86_make_reg(file_REG32, reg_CX));
+ x86_pop(p, x86_make_reg(file_REG32, reg_AX));
+}
+
+
/* Retreive a reference to one of the function arguments, taking into
* account any push/pop activity:
*/
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index 9f7e31e055..63e812fac9 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -155,6 +155,7 @@ void x86_call( struct x86_function *p, struct x86_reg reg);
* I load the immediate into general purpose register and use it.
*/
void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm );
+void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm );
/* Macro for sse_shufps() and sse2_pshufd():
@@ -225,6 +226,7 @@ void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_pop( struct x86_function *p, struct x86_reg reg );
void x86_push( struct x86_function *p, struct x86_reg reg );
+void x86_push_imm32( struct x86_function *p, int imm );
void x86_ret( struct x86_function *p );
void x86_retw( struct x86_function *p, unsigned short imm );
void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -232,6 +234,10 @@ void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_sahf( struct x86_function *p );
+
+void x86_cdecl_caller_push_regs( struct x86_function *p );
+void x86_cdecl_caller_pop_regs( struct x86_function *p );
+
void x87_assert_stack_empty( struct x86_function *p );
void x87_f2xm1( struct x86_function *p );
--
cgit v1.2.3
From 6172f1295cf812108d8ceba15a83ba87880360d3 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Sat, 24 May 2008 13:22:29 +0100
Subject: draw: add a debug-print which can be called from inside generated
shaders
---
src/gallium/auxiliary/draw/draw_vs_aos.c | 67 ++++++++++++++++++++++++++++++++
src/gallium/auxiliary/draw/draw_vs_aos.h | 1 +
2 files changed, 68 insertions(+)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index b0c3ac49d2..aa119f242e 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -752,7 +752,63 @@ static void x87_emit_ex2( struct aos_compilation *cp )
}
+static void PIPE_CDECL print_reg( const char *msg,
+ const float *reg )
+{
+ debug_printf("%s: %f %f %f %f\n", msg, reg[0], reg[1], reg[2], reg[3]);
+}
+
+static void emit_print( struct aos_compilation *cp,
+ const char *message, /* must point to a static string! */
+ unsigned file,
+ unsigned idx )
+{
+ struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
+ struct x86_reg arg = get_reg_ptr( cp, file, idx );
+ unsigned i;
+
+ /* There shouldn't be anything on the x87 stack. Can add this
+ * capacity later if need be.
+ */
+ assert(cp->func->x87_stack == 0);
+
+ /* For absolute correctness, need to spill/invalidate all XMM regs
+ * too. We're obviously not concerned about performance on this
+ * debug path, so here goes:
+ */
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].dirty)
+ spill(cp, i);
+ aos_release_xmm_reg(cp, i);
+ }
+
+ /* Push caller-save (ie scratch) regs.
+ */
+ x86_cdecl_caller_push_regs( cp->func );
+
+
+ /* Push the arguments:
+ */
+ x86_lea( cp->func, ecx, arg );
+ x86_push( cp->func, ecx );
+ x86_push_imm32( cp->func, (int)message );
+
+ /* Call the helper. Could call debug_printf directly, but
+ * print_reg is a nice place to put a breakpoint if need be.
+ */
+ x86_mov_reg_imm( cp->func, ecx, (int)print_reg );
+ x86_call( cp->func, ecx );
+ x86_pop( cp->func, ecx );
+ x86_pop( cp->func, ecx );
+
+ /* Pop caller-save regs
+ */
+ x86_cdecl_caller_pop_regs( cp->func );
+
+ /* Done...
+ */
+}
/**
* The traditional instructions. All operate on internal registers
@@ -1798,6 +1854,17 @@ static void vaos_set_constants( struct draw_vs_varient *varient,
memcpy(vaos->machine->constant,
constants,
(vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1) * 4 * sizeof(float));
+
+#if 0
+ unsigned i;
+ for (i =0; i < vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1; i++)
+ debug_printf("state %d: %f %f %f %f\n",
+ i,
+ constants[i][0],
+ constants[i][1],
+ constants[i][2],
+ constants[i][3]);
+#endif
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h
index efdc9a38f4..a0680ec63d 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.h
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.h
@@ -96,6 +96,7 @@ struct aos_compilation {
unsigned insn_counter;
unsigned num_immediates;
+ unsigned count;
struct {
unsigned idx:16;
--
cgit v1.2.3
From a2b1c46535d02bb4cc154f26481eda264a65abe8 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Sat, 24 May 2008 13:22:39 +0100
Subject: mesa: evaluate _NeedEyeCoords prior to generating internal vertex
shader
---
src/mesa/main/state.c | 26 ++++++++++++++------------
1 file changed, 14 insertions(+), 12 deletions(-)
(limited to 'src')
diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c
index f8cb943e64..cdf1249cd0 100644
--- a/src/mesa/main/state.c
+++ b/src/mesa/main/state.c
@@ -1209,18 +1209,6 @@ _mesa_update_state_locked( GLcontext *ctx )
| _NEW_STENCIL | _DD_NEW_SEPARATE_SPECULAR))
update_tricaps( ctx, new_state );
- if (ctx->FragmentProgram._MaintainTexEnvProgram) {
- prog_flags |= (_NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR);
- }
- if (ctx->VertexProgram._MaintainTnlProgram) {
- prog_flags |= (_NEW_TEXTURE | _NEW_TEXTURE_MATRIX |
- _NEW_TRANSFORM | _NEW_POINT |
- _NEW_FOG | _NEW_LIGHT);
- }
- if (new_state & prog_flags)
- update_program( ctx );
-
-
/* ctx->_NeedEyeCoords is now up to date.
*
* If the truth value of this variable has changed, update for the
@@ -1233,6 +1221,20 @@ _mesa_update_state_locked( GLcontext *ctx )
if (new_state & _MESA_NEW_NEED_EYE_COORDS)
_mesa_update_tnl_spaces( ctx, new_state );
+ if (ctx->FragmentProgram._MaintainTexEnvProgram) {
+ prog_flags |= (_NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR);
+ }
+ if (ctx->VertexProgram._MaintainTnlProgram) {
+ prog_flags |= (_NEW_TEXTURE | _NEW_TEXTURE_MATRIX |
+ _NEW_TRANSFORM | _NEW_POINT |
+ _NEW_FOG | _NEW_LIGHT |
+ _MESA_NEW_NEED_EYE_COORDS);
+ }
+ if (new_state & prog_flags)
+ update_program( ctx );
+
+
+
/*
* Give the driver a chance to act upon the new_state flags.
* The driver might plug in different span functions, for example.
--
cgit v1.2.3
From e1590abb17f1effd92c136207f363de6cf52df18 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Sat, 24 May 2008 13:23:06 +0100
Subject: mesa: pre-swizzle normal scale state value
---
src/mesa/main/ffvertex_prog.c | 3 +--
src/mesa/shader/prog_statevars.c | 6 +++++-
2 files changed, 6 insertions(+), 3 deletions(-)
(limited to 'src')
diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c
index e36f1f69a4..7a099b2376 100644
--- a/src/mesa/main/ffvertex_prog.c
+++ b/src/mesa/main/ffvertex_prog.c
@@ -775,8 +775,7 @@ static struct ureg get_transformed_normal( struct tnl_program *p )
struct ureg rescale = register_param2(p, STATE_INTERNAL,
STATE_NORMAL_SCALE);
- emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal,
- swizzle1(rescale, X));
+ emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale );
normal = transformed_normal;
}
diff --git a/src/mesa/shader/prog_statevars.c b/src/mesa/shader/prog_statevars.c
index 37bd17ba4a..44fbfdcd04 100644
--- a/src/mesa/shader/prog_statevars.c
+++ b/src/mesa/shader/prog_statevars.c
@@ -397,7 +397,11 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[],
case STATE_INTERNAL:
switch (state[1]) {
case STATE_NORMAL_SCALE:
- ASSIGN_4V(value, ctx->_ModelViewInvScale, 0, 0, 1);
+ ASSIGN_4V(value,
+ ctx->_ModelViewInvScale,
+ ctx->_ModelViewInvScale,
+ ctx->_ModelViewInvScale,
+ 1);
return;
case STATE_TEXRECT_SCALE:
{
--
cgit v1.2.3
From 86e529ad90411d21bca3d70984b2db202e7a0cd6 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Sat, 24 May 2008 16:31:11 +0100
Subject: draw: use lookup tables to avoid calling pow() in LIT opcode
---
src/gallium/auxiliary/draw/draw_vs_aos.c | 251 ++++++++++++++++++++++++++++++-
src/gallium/auxiliary/draw/draw_vs_aos.h | 27 ++++
2 files changed, 272 insertions(+), 6 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index aa119f242e..1fbb7088ca 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -105,8 +105,31 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp,
#define X87_CW_ROUND_MASK (3<<10)
#define X87_CW_INFINITY (1<<12)
+static void do_populate_lut( struct shine_tab *tab,
+ float unclamped_exponent )
+{
+ const float epsilon = 1.0F / 256.0F;
+ float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon));
+ unsigned i;
+
+ tab->exponent = unclamped_exponent; /* for later comparison */
+
+ tab->values[0] = 0;
+ if (exponent == 0) {
+ for (i = 1; i < 258; i++) {
+ tab->values[i] = 1.0;
+ }
+ }
+ else {
+ for (i = 1; i < 258; i++) {
+ tab->values[i] = powf((float)i * epsilon, exponent);
+ }
+ }
+}
+
static void init_internals( struct aos_machine *machine )
{
+ unsigned i;
float inv = 1.0f/255.0f;
float f255 = 255.0f;
@@ -141,6 +164,9 @@ static void init_internals( struct aos_machine *machine )
(1<<6) |
X87_CW_ROUND_DOWN |
X87_CW_PRECISION_DOUBLE_EXT);
+
+ for (i = 0; i < MAX_SHINE_TAB; i++)
+ do_populate_lut( &machine->shine_tab[i], 1.0f );
}
@@ -1132,26 +1158,231 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst
return TRUE;
}
+static PIPE_CDECL void do_lit( struct aos_machine *machine,
+ float *result,
+ const float *in,
+ unsigned count )
+{
+ if (in[0] > 0)
+ {
+ if (in[1] <= 0.0)
+ {
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = 1.0;
+ result[3] = 1.0F;
+ }
+ else
+ {
+ const float epsilon = 1.0F / 256.0F;
+ float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = powf(in[1], exponent);
+ result[3] = 1.0;
+ }
+ }
+ else
+ {
+ result[0] = 1.0F;
+ result[1] = 0.0;
+ result[2] = 0.0;
+ result[3] = 1.0F;
+ }
+}
+
+
+static PIPE_CDECL void do_lit_lut( struct aos_machine *machine,
+ float *result,
+ const float *in,
+ unsigned count )
+{
+ if (in[0] > 0)
+ {
+ if (in[1] <= 0.0)
+ {
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = 1.0;
+ result[3] = 1.0F;
+ return;
+ }
+
+ if (machine->lit_info[count].shine_tab->exponent != in[3]) {
+ machine->lit_info[count].func = do_lit;
+ goto no_luck;
+ }
+
+ if (in[1] <= 1.0)
+ {
+ const float *tab = machine->lit_info[count].shine_tab->values;
+ float f = in[1] * 256;
+ int k = (int)f;
+ float frac = f - (float)k;
+
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = tab[k] + frac*(tab[k+1]-tab[k]);
+ result[3] = 1.0;
+ return;
+ }
+
+ no_luck:
+ {
+ const float epsilon = 1.0F / 256.0F;
+ float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
+ result[0] = 1.0F;
+ result[1] = in[0];
+ result[2] = powf(in[1], exponent);
+ result[3] = 1.0;
+ }
+ }
+ else
+ {
+ result[0] = 1.0F;
+ result[1] = 0.0;
+ result[2] = 0.0;
+ result[3] = 1.0F;
+ }
+}
+
+
+
+static void PIPE_CDECL populate_lut( struct aos_machine *machine,
+ float *result,
+ const float *in,
+ unsigned count )
+{
+ unsigned i, tab;
+
+ /* Search for an existing table for this value. Note that without
+ * static analysis we don't really know if in[3] will be constant,
+ * but it usually is...
+ */
+ for (tab = 0; tab < 4; tab++) {
+ if (machine->shine_tab[tab].exponent == in[3]) {
+ goto found;
+ }
+ }
+
+ for (tab = 0, i = 1; i < 4; i++) {
+ if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used)
+ tab = i;
+ }
+
+ if (machine->shine_tab[tab].last_used == machine->now) {
+ /* No unused tables (this is not a ffvertex program...). Just
+ * call pow each time:
+ */
+ machine->lit_info[count].func = do_lit;
+ machine->lit_info[count].func( machine, result, in, count );
+ return;
+ }
+ else {
+ do_populate_lut( &machine->shine_tab[tab], in[3] );
+ }
+
+ found:
+ machine->shine_tab[tab].last_used = machine->now;
+ machine->lit_info[count].shine_tab = &machine->shine_tab[tab];
+ machine->lit_info[count].func = do_lit_lut;
+ machine->lit_info[count].func( machine, result, in, count );
+}
+
+
+
static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
- struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]);
+ struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
+ unsigned lit_count = cp->lit_count++;
+ struct x86_reg result, arg0;
+ unsigned i;
+
+#if 1
+ /* For absolute correctness, need to spill/invalidate all XMM regs
+ * too.
+ */
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].dirty)
+ spill(cp, i);
+ aos_release_xmm_reg(cp, i);
+ }
+#endif
+
+ if (writemask != TGSI_WRITEMASK_XYZW)
+ result = x86_make_disp(cp->machine_EDX, Offset(struct aos_machine, tmp[0]));
+ else
+ result = get_dst_ptr(cp, &op->FullDstRegisters[0]);
+
+
+ arg0 = fetch_src( cp, &op->FullSrcRegisters[0] );
+ if (arg0.file == file_XMM) {
+ struct x86_reg tmp = x86_make_disp(cp->machine_EDX,
+ Offset(struct aos_machine, tmp[1]));
+ sse_movaps( cp->func, tmp, arg0 );
+ arg0 = tmp;
+ }
+
+
+
+ /* Push caller-save (ie scratch) regs.
+ */
+ x86_cdecl_caller_push_regs( cp->func );
+
+ /* Push the arguments:
+ */
+ x86_push_imm32( cp->func, lit_count );
+
+ x86_lea( cp->func, ecx, arg0 );
+ x86_push( cp->func, ecx );
+ x86_lea( cp->func, ecx, result );
+ x86_push( cp->func, ecx );
+ x86_push( cp->func, cp->machine_EDX );
+ if (lit_count < MAX_LIT_INFO) {
+ x86_mov( cp->func, ecx, x86_make_disp( cp->machine_EDX,
+ Offset(struct aos_machine, lit_info) +
+ lit_count * sizeof(struct lit_info) +
+ Offset(struct lit_info, func)));
+ }
+ else {
+ x86_mov_reg_imm( cp->func, ecx, (int)do_lit );
+ }
+
+ x86_call( cp->func, ecx );
+
+ x86_pop( cp->func, ecx ); /* fixme... */
+ x86_pop( cp->func, ecx );
+ x86_pop( cp->func, ecx );
+ x86_pop( cp->func, ecx );
+
+ x86_cdecl_caller_pop_regs( cp->func );
+
+ if (writemask != TGSI_WRITEMASK_XYZW) {
+ store_dest( cp,
+ &op->FullDstRegisters[0],
+ get_xmm_clone( cp, result ) );
+ }
+
+ return TRUE;
+}
+
+
+static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
+{
+ struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]);
+ unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
if (writemask & TGSI_WRITEMASK_YZ) {
struct x86_reg st1 = x86_make_reg(file_x87, 1);
struct x86_reg st2 = x86_make_reg(file_x87, 2);
-
-
-
/* a1' = a1 <= 0 ? 1 : a1;
- *
- * Note: use 1.0 to avoid passing zero to
*/
x87_fldz(cp->func); /* 1 0 */
#if 1
@@ -1865,6 +2096,14 @@ static void vaos_set_constants( struct draw_vs_varient *varient,
constants[i][2],
constants[i][3]);
#endif
+
+ {
+ unsigned i;
+ for (i = 0; i < MAX_LIT_INFO; i++) {
+ vaos->machine->lit_info[i].func = populate_lut;
+ vaos->machine->now++;
+ }
+ }
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h
index a0680ec63d..c08c73d4bc 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.h
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.h
@@ -59,6 +59,25 @@ struct x86_function;
#define FPU_RND_NEG 1
#define FPU_RND_NEAREST 2
+struct aos_machine;
+typedef void PIPE_CDECL (*lit_func)( struct aos_machine *,
+ float *result,
+ const float *in,
+ unsigned count );
+struct shine_tab {
+ float exponent;
+ float values[258];
+ unsigned last_used;
+};
+
+struct lit_info {
+ lit_func func;
+ struct shine_tab *shine_tab;
+};
+
+#define MAX_SHINE_TAB 4
+#define MAX_LIT_INFO 16
+
/* This is the temporary storage used by all the aos_sse vs varients.
* Create one per context and reuse by passing a pointer in at
* vs_varient creation??
@@ -74,6 +93,13 @@ struct aos_machine {
float scale[4]; /* viewport */
float translate[4]; /* viewport */
+ float tmp[2][4]; /* scratch space for LIT */
+
+ struct shine_tab shine_tab[MAX_SHINE_TAB];
+ struct lit_info lit_info[MAX_LIT_INFO];
+ unsigned now;
+
+
ushort fpu_rnd_nearest;
ushort fpu_rnd_neg_inf;
ushort fpu_restore;
@@ -97,6 +123,7 @@ struct aos_compilation {
unsigned insn_counter;
unsigned num_immediates;
unsigned count;
+ unsigned lit_count;
struct {
unsigned idx:16;
--
cgit v1.2.3
From 48a24f0ff7e3aad000b8acc55c16bbeaca58abe6 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Sat, 24 May 2008 16:32:08 +0100
Subject: Revert "mesa: save a temp on normalizes"
This reverts commit feceb43948f76cc4d4c8ecbb86b1b1f438c6daee.
---
src/mesa/main/ffvertex_prog.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
(limited to 'src')
diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c
index 7a099b2376..a627a21f65 100644
--- a/src/mesa/main/ffvertex_prog.c
+++ b/src/mesa/main/ffvertex_prog.c
@@ -305,7 +305,7 @@ static struct state_key *make_state_key( GLcontext *ctx )
* generated program with line/function references for each
* instruction back into this file:
*/
-#define DISASSEM 1
+#define DISASSEM (MESA_VERBOSE&VERBOSE_DISASSEM)
/* Should be tunable by the driver - do we want to do matrix
* multiplications with DP4's or with MUL/MAD's? SSE works better
@@ -687,9 +687,11 @@ static void emit_normalize_vec3( struct tnl_program *p,
struct ureg dest,
struct ureg src )
{
- emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, src);
- emit_op1(p, OPCODE_RSQ, dest, WRITEMASK_X, dest);
- emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(dest, X));
+ struct ureg tmp = get_temp(p);
+ emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src);
+ emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp);
+ emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X));
+ release_temp(p, tmp);
}
static void emit_passthrough( struct tnl_program *p,
--
cgit v1.2.3
From a6fca8acb5e8ce0e5e6ce91a524e2bb4c180d3ac Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Sun, 25 May 2008 11:20:38 +0100
Subject: draw: fix input vs output typo in emit
---
src/gallium/auxiliary/draw/draw_vs_aos_io.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
index 0dda9df97d..f39ebb7a17 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
@@ -282,7 +282,7 @@ boolean aos_emit_outputs( struct aos_compilation *cp )
{
unsigned i;
- for (i = 0; i < cp->vaos->base.vs->info.num_inputs; i++) {
+ for (i = 0; i < cp->vaos->base.vs->info.num_outputs; i++) {
unsigned format = cp->vaos->base.key.element[i].out.format;
unsigned offset = cp->vaos->base.key.element[i].out.offset;
--
cgit v1.2.3
From 584a3dcf8e4042cc1a5d48d83ea63d0a3c9706c1 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Sun, 25 May 2008 11:25:09 +0100
Subject: draw: add viewport support to generic vs varient code
---
src/gallium/auxiliary/draw/draw_vs_varient.c | 43 ++++++++++++++++++++++++++--
1 file changed, 41 insertions(+), 2 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c
index f6f621a748..c15c648527 100644
--- a/src/gallium/auxiliary/draw/draw_vs_varient.c
+++ b/src/gallium/auxiliary/draw/draw_vs_varient.c
@@ -44,7 +44,7 @@
struct draw_vs_varient_generic {
struct draw_vs_varient base;
-
+ struct pipe_viewport_state viewport;
struct draw_vertex_shader *shader;
struct draw_context *draw;
@@ -87,6 +87,30 @@ static void vsvg_set_input( struct draw_vs_varient *varient,
}
+/* Mainly for debug at this stage:
+ */
+static void do_viewport( struct draw_vs_varient_generic *vsvg,
+ unsigned count,
+ void *output_buffer )
+{
+ char *ptr = (char *)output_buffer;
+ const float *scale = vsvg->viewport.scale;
+ const float *trans = vsvg->viewport.translate;
+ unsigned stride = vsvg->base.key.output_stride;
+ unsigned j;
+
+ for (j = 0; j < count; j++, ptr += stride) {
+ float *data = (float *)ptr;
+ float w = 1.0f / data[3];
+
+ data[0] = data[0] * w * scale[0] + trans[0];
+ data[1] = data[1] * w * scale[1] + trans[1];
+ data[2] = data[2] * w * scale[2] + trans[2];
+ data[3] = w;
+ }
+}
+
+
static void vsvg_run_elts( struct draw_vs_varient *varient,
const unsigned *elts,
unsigned count,
@@ -112,6 +136,12 @@ static void vsvg_run_elts( struct draw_vs_varient *varient,
vsvg->base.key.output_stride,
vsvg->base.key.output_stride);
+ if (vsvg->base.key.viewport)
+ do_viewport( vsvg,
+ count,
+ output_buffer );
+
+
//if (!vsvg->already_in_emit_format)
vsvg->emit->set_buffer( vsvg->emit,
@@ -152,6 +182,12 @@ static void vsvg_run_linear( struct draw_vs_varient *varient,
vsvg->base.key.output_stride,
vsvg->base.key.output_stride);
+ if (vsvg->base.key.viewport)
+ do_viewport( vsvg,
+ count,
+ output_buffer );
+
+
//if (!vsvg->already_in_emit_format)
vsvg->emit->set_buffer( vsvg->emit,
0,
@@ -171,6 +207,9 @@ static void vsvg_run_linear( struct draw_vs_varient *varient,
static void vsvg_set_viewport( struct draw_vs_varient *varient,
const struct pipe_viewport_state *viewport )
{
+ struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
+
+ vsvg->viewport = *viewport;
}
static void vsvg_destroy( struct draw_vs_varient *varient )
@@ -185,7 +224,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
unsigned i;
struct translate_key fetch, emit;
- if (key->viewport || key->clip)
+ if (key->clip)
return NULL;
struct draw_vs_varient_generic *vsvg = CALLOC_STRUCT( draw_vs_varient_generic );
--
cgit v1.2.3
From dc52622fcf5660a9675ed61c359cf7068aa4861b Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Sun, 25 May 2008 15:28:30 +0100
Subject: cso: use memcpy rather than structure asignment for copying
Apparently gcc will omit to copy hidden padding bytes under some circumstances,
which means later on memcmp() will indicate a difference between structs
even though all the visible members are identical.
---
src/gallium/auxiliary/cso_cache/cso_context.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index a1a3a9efaf..7236bff592 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -292,7 +292,7 @@ enum pipe_error cso_set_blend(struct cso_context *ctx,
if (!cso)
return PIPE_ERROR_OUT_OF_MEMORY;
- cso->state = *templ;
+ memcpy(&cso->state, templ, sizeof(*templ));
cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state);
cso->delete_state = (cso_state_callback)ctx->pipe->delete_blend_state;
cso->context = ctx->pipe;
@@ -350,7 +350,7 @@ enum pipe_error cso_single_sampler(struct cso_context *ctx,
if (!cso)
return PIPE_ERROR_OUT_OF_MEMORY;
- cso->state = *templ;
+ memcpy(&cso->state, templ, sizeof(*templ));
cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state);
cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state;
cso->context = ctx->pipe;
@@ -508,7 +508,7 @@ enum pipe_error cso_set_depth_stencil_alpha(struct cso_context *ctx,
if (!cso)
return PIPE_ERROR_OUT_OF_MEMORY;
- cso->state = *templ;
+ memcpy(&cso->state, templ, sizeof(*templ));
cso->data = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &cso->state);
cso->delete_state = (cso_state_callback)ctx->pipe->delete_depth_stencil_alpha_state;
cso->context = ctx->pipe;
@@ -564,7 +564,7 @@ enum pipe_error cso_set_rasterizer(struct cso_context *ctx,
if (!cso)
return PIPE_ERROR_OUT_OF_MEMORY;
- cso->state = *templ;
+ memcpy(&cso->state, templ, sizeof(*templ));
cso->data = ctx->pipe->create_rasterizer_state(ctx->pipe, &cso->state);
cso->delete_state = (cso_state_callback)ctx->pipe->delete_rasterizer_state;
cso->context = ctx->pipe;
@@ -726,7 +726,7 @@ enum pipe_error cso_set_vertex_shader(struct cso_context *ctx,
if (!cso)
return PIPE_ERROR_OUT_OF_MEMORY;
- cso->state = *templ;
+ memcpy(cso->state, templ, sizeof(*templ));
cso->data = ctx->pipe->create_vs_state(ctx->pipe, &cso->state);
cso->delete_state = (cso_state_callback)ctx->pipe->delete_vs_state;
cso->context = ctx->pipe;
--
cgit v1.2.3
From caadc8d944c558e1fa9f23c3616d726337a19862 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Sun, 25 May 2008 15:37:47 +0100
Subject: draw: clean up some of the xmm register manipulation function names
---
src/gallium/auxiliary/draw/draw_vs_aos.c | 141 +++++++++++++++++--------------
1 file changed, 78 insertions(+), 63 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 1fbb7088ca..17b9442d6b 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -190,17 +190,26 @@ static void spill( struct aos_compilation *cp, unsigned idx )
}
}
-static boolean is_xmm_tmp( struct aos_compilation *cp,
- struct x86_reg reg )
+
+static struct x86_reg get_xmm_writable( struct aos_compilation *cp,
+ struct x86_reg reg )
{
- return (reg.file == file_XMM &&
- cp->xmm[reg.idx].file == TGSI_FILE_NULL);
+ if (reg.file != file_XMM ||
+ cp->xmm[reg.idx].file != TGSI_FILE_NULL)
+ {
+ struct x86_reg tmp = aos_get_xmm_reg(cp);
+ sse_movaps(cp->func, tmp, reg);
+ reg = tmp;
+ }
+
+ return reg;
}
-static struct x86_reg get_xmm_clone( struct aos_compilation *cp,
- struct x86_reg reg )
+static struct x86_reg get_xmm( struct aos_compilation *cp,
+ struct x86_reg reg )
{
- if (!is_xmm_tmp(cp, reg)) {
+ if (reg.file != file_XMM)
+ {
struct x86_reg tmp = aos_get_xmm_reg(cp);
sse_movaps(cp->func, tmp, reg);
reg = tmp;
@@ -210,6 +219,9 @@ static struct x86_reg get_xmm_clone( struct aos_compilation *cp,
}
+/* Allocate an empty xmm register, either as a temporary or later to
+ * "adopt" as a shader reg.
+ */
struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp )
{
unsigned i;
@@ -251,32 +263,11 @@ void aos_release_xmm_reg( struct aos_compilation *cp,
cp->xmm[idx].last_used = 0;
}
-static void invalidate_xmm( struct aos_compilation *cp,
- unsigned file, unsigned idx )
-{
- unsigned i;
-
- /* Invalidate any old copy of this register in XMM0-7.
- */
- for (i = 0; i < 8; i++) {
- if (cp->xmm[i].file == file && cp->xmm[i].idx == idx) {
-
- if (cp->xmm[i].dirty)
- spill(cp, i);
-
- aos_release_xmm_reg(cp, i);
- break;
- }
- }
- for (; i < 8; i++) {
- if (cp->xmm[i].file == file && cp->xmm[i].idx == idx) {
- assert(0);
- }
- }
-}
-
+
+/* Mark an xmm reg as holding the current copy of a shader reg.
+ */
void aos_adopt_xmm_reg( struct aos_compilation *cp,
struct x86_reg reg,
unsigned file,
@@ -290,6 +281,9 @@ void aos_adopt_xmm_reg( struct aos_compilation *cp,
return;
}
+ /* If any xmm reg thinks it holds this shader reg, break the
+ * illusion.
+ */
for (i = 0; i < 8; i++) {
if (cp->xmm[i].file == file &&
cp->xmm[i].idx == idx) {
@@ -304,12 +298,24 @@ void aos_adopt_xmm_reg( struct aos_compilation *cp,
}
-
+/* Return a pointer to the in-memory copy of the reg, making sure it is uptodate.
+ */
static struct x86_reg aos_get_shader_reg_ptr( struct aos_compilation *cp,
unsigned file,
unsigned idx )
{
- invalidate_xmm( cp, file, idx );
+ unsigned i;
+
+ /* Ensure the in-memory copy of this reg is up-to-date
+ */
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].file == file &&
+ cp->xmm[i].idx == idx &&
+ cp->xmm[i].dirty) {
+ spill(cp, i);
+ }
+ }
+
return get_reg_ptr( cp, file, idx );
}
@@ -320,7 +326,26 @@ static struct x86_reg aos_get_shader_reg_ptr( struct aos_compilation *cp,
static struct x86_reg get_dst_ptr( struct aos_compilation *cp,
const struct tgsi_full_dst_register *dst )
{
- return aos_get_shader_reg_ptr( cp, dst->DstRegister.File, dst->DstRegister.Index );
+ unsigned file = dst->DstRegister.File;
+ unsigned idx = dst->DstRegister.Index;
+ unsigned i;
+
+
+ /* Ensure in-memory copy of this reg is up-to-date and invalidate
+ * any xmm copies.
+ */
+ for (i = 0; i < 8; i++) {
+ if (cp->xmm[i].file == file &&
+ cp->xmm[i].idx == idx)
+ {
+ if (cp->xmm[i].dirty)
+ spill(cp, i);
+
+ aos_release_xmm_reg(cp, i);
+ }
+ }
+
+ return get_reg_ptr( cp, file, idx );
}
@@ -358,15 +383,7 @@ static struct x86_reg aos_get_shader_reg_xmm( struct aos_compilation *cp,
unsigned idx )
{
struct x86_reg reg = aos_get_shader_reg( cp, file, idx );
-
- if (reg.file != file_XMM) {
- struct x86_reg tmp = aos_get_xmm_reg(cp);
- sse_movaps(cp->func, tmp, reg);
- aos_adopt_xmm_reg( cp, tmp, file, idx, FALSE );
- reg = tmp;
- }
-
- return reg;
+ return get_xmm( cp, reg );
}
@@ -634,7 +651,7 @@ static void store_dest( struct aos_compilation *cp,
case TGSI_WRITEMASK_XYZW:
aos_adopt_xmm_reg(cp,
- get_xmm_clone(cp, result),
+ get_xmm_writable(cp, result),
reg->DstRegister.File,
reg->DstRegister.Index,
TRUE);
@@ -649,15 +666,15 @@ static void store_dest( struct aos_compilation *cp,
switch (reg->DstRegister.WriteMask) {
case TGSI_WRITEMASK_X:
- sse_movss(cp->func, dst, get_xmm_clone(cp, result));
+ sse_movss(cp->func, dst, get_xmm(cp, result));
break;
case TGSI_WRITEMASK_XY:
- sse_shufps(cp->func, dst, get_xmm_clone(cp, result), SHUF(X, Y, Z, W));
+ sse_shufps(cp->func, dst, get_xmm(cp, result), SHUF(X, Y, Z, W));
break;
case TGSI_WRITEMASK_ZW:
- result = get_xmm_clone(cp, result);
+ result = get_xmm_writable(cp, result);
sse_shufps(cp->func, result, dst, SHUF(X, Y, Z, W));
dst = result;
break;
@@ -845,7 +862,7 @@ static boolean emit_ABS( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg neg = aos_get_internal(cp, IMM_NEGS);
- struct x86_reg dst = get_xmm_clone(cp, arg0);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
sse_mulps(cp->func, dst, neg);
sse_maxps(cp->func, dst, arg0);
@@ -858,7 +875,7 @@ static boolean emit_ADD( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = get_xmm_clone(cp, arg0);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
sse_addps(cp->func, dst, arg1);
@@ -882,10 +899,9 @@ static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_inst
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
struct x86_reg tmp = aos_get_xmm_reg(cp);
- struct x86_reg dst = get_xmm_clone(cp, arg0);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
sse_mulps(cp->func, dst, arg1);
-
/* Now the hard bit: sum the first 3 values:
*/
sse_movhlps(cp->func, tmp, dst);
@@ -908,7 +924,7 @@ static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_inst
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
struct x86_reg tmp = aos_get_xmm_reg(cp);
- struct x86_reg dst = get_xmm_clone(cp, arg0);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
sse_mulps(cp->func, dst, arg1);
@@ -932,7 +948,7 @@ static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_inst
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
struct x86_reg tmp = aos_get_xmm_reg(cp);
- struct x86_reg dst = get_xmm_clone(cp, arg0);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
sse_mulps(cp->func, dst, arg1);
@@ -1366,7 +1382,7 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst
if (writemask != TGSI_WRITEMASK_XYZW) {
store_dest( cp,
&op->FullDstRegisters[0],
- get_xmm_clone( cp, result ) );
+ get_xmm_writable( cp, result ) );
}
return TRUE;
@@ -1440,7 +1456,7 @@ static boolean emit_MAX( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = get_xmm_clone(cp, arg0);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
sse_maxps(cp->func, dst, arg1);
@@ -1453,7 +1469,7 @@ static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = get_xmm_clone(cp, arg0);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
sse_minps(cp->func, dst, arg1);
@@ -1464,7 +1480,7 @@ static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_inst
static boolean emit_MOV( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
- struct x86_reg dst = get_xmm_clone(cp, arg0);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
/* potentially nothing to do */
@@ -1476,7 +1492,7 @@ static boolean emit_MUL( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = get_xmm_clone(cp, arg0);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
sse_mulps(cp->func, dst, arg1);
@@ -1494,7 +1510,7 @@ static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_inst
/* If we can't clobber old contents of arg0, get a temporary & copy
* it there, then clobber it...
*/
- arg0 = get_xmm_clone(cp, arg0);
+ arg0 = get_xmm_writable(cp, arg0);
sse_mulps(cp->func, arg0, arg1);
sse_addps(cp->func, arg0, arg2);
@@ -1562,7 +1578,7 @@ static boolean emit_SGE( struct aos_compilation *cp, const struct tgsi_full_inst
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
- struct x86_reg dst = get_xmm_clone(cp, arg0);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
sse_cmpps(cp->func, dst, arg1, cc_NotLessThan);
sse_andps(cp->func, dst, ones);
@@ -1586,7 +1602,7 @@ static boolean emit_SLT( struct aos_compilation *cp, const struct tgsi_full_inst
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
- struct x86_reg dst = get_xmm_clone(cp, arg0);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
sse_cmpps(cp->func, dst, arg1, cc_LessThan);
sse_andps(cp->func, dst, ones);
@@ -1599,7 +1615,7 @@ static boolean emit_SUB( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = get_xmm_clone(cp, arg0);
+ struct x86_reg dst = get_xmm_writable(cp, arg0);
sse_subps(cp->func, dst, arg1);
@@ -1989,7 +2005,6 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
/* decr count, loop if not zero
*/
x86_dec(cp.func, cp.count_ESI);
-/* x86_test(cp.func, cp.count_ESI, cp.count_ESI); */
x86_jcc(cp.func, cc_NZ, label);
restore_fpu_state(&cp);
--
cgit v1.2.3
From ce331e3a5e2a0505e01637861bdd7f5e6cfbd041 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Sun, 25 May 2008 15:44:17 +0100
Subject: draw: special case for writing out scalar results
---
src/gallium/auxiliary/draw/draw_vs_aos.c | 127 +++++++++++++++++++++++++------
1 file changed, 102 insertions(+), 25 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 17b9442d6b..aebc230858 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -697,6 +697,72 @@ static void store_dest( struct aos_compilation *cp,
}
+static void inject_scalar( struct aos_compilation *cp,
+ struct x86_reg dst,
+ struct x86_reg result,
+ unsigned swizzle )
+{
+ sse_shufps(cp->func, dst, dst, swizzle);
+ sse_movss(cp->func, dst, result);
+ sse_shufps(cp->func, dst, dst, swizzle);
+}
+
+
+static void store_scalar_dest( struct aos_compilation *cp,
+ const struct tgsi_full_dst_register *reg,
+ struct x86_reg result )
+{
+ unsigned writemask = reg->DstRegister.WriteMask;
+ struct x86_reg dst;
+
+ if (writemask != TGSI_WRITEMASK_X &&
+ writemask != TGSI_WRITEMASK_Y &&
+ writemask != TGSI_WRITEMASK_Z &&
+ writemask != TGSI_WRITEMASK_W &&
+ writemask != 0)
+ {
+ result = get_xmm_writable(cp, result); /* already true, right? */
+ sse_shufps(cp->func, result, result, SHUF(X,X,X,X));
+ store_dest(cp, reg, result);
+ return;
+ }
+
+ result = get_xmm(cp, result);
+ dst = aos_get_shader_reg_xmm(cp,
+ reg->DstRegister.File,
+ reg->DstRegister.Index);
+
+
+
+ switch (reg->DstRegister.WriteMask) {
+ case TGSI_WRITEMASK_X:
+ sse_movss(cp->func, dst, result);
+ break;
+
+ case TGSI_WRITEMASK_Y:
+ inject_scalar(cp, dst, result, SHUF(Y, X, Z, W));
+ break;
+
+ case TGSI_WRITEMASK_Z:
+ inject_scalar(cp, dst, result, SHUF(Z, Y, X, W));
+ break;
+
+ case TGSI_WRITEMASK_W:
+ inject_scalar(cp, dst, result, SHUF(W, Y, Z, X));
+ break;
+
+ default:
+ break;
+ }
+
+ aos_adopt_xmm_reg(cp,
+ dst,
+ reg->DstRegister.File,
+ reg->DstRegister.Index,
+ TRUE);
+}
+
+
static void x87_fst_or_nop( struct x86_function *func,
unsigned writemask,
@@ -909,11 +975,8 @@ static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_inst
emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z));
sse_addss(cp->func, dst, tmp);
- if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1)
- sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
-
aos_release_xmm_reg(cp, tmp.idx);
- store_dest(cp, &op->FullDstRegisters[0], dst);
+ store_scalar_dest(cp, &op->FullDstRegisters[0], dst);
return TRUE;
}
@@ -935,11 +998,8 @@ static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_inst
emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z));
sse_addss(cp->func, dst, tmp);
- if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1)
- sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
-
aos_release_xmm_reg(cp, tmp.idx);
- store_dest(cp, &op->FullDstRegisters[0], dst);
+ store_scalar_dest(cp, &op->FullDstRegisters[0], dst);
return TRUE;
}
@@ -961,11 +1021,8 @@ static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_inst
emit_pshufd(cp, tmp, arg1, SHUF(W,W,W,W));
sse_addss(cp->func, dst, tmp);
- if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1)
- sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
-
aos_release_xmm_reg(cp, tmp.idx);
- store_dest(cp, &op->FullDstRegisters[0], dst);
+ store_scalar_dest(cp, &op->FullDstRegisters[0], dst);
return TRUE;
}
@@ -1518,7 +1575,9 @@ static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_inst
return TRUE;
}
-
+/* Really not sufficient -- need to check for conditions that could
+ * generate inf/nan values, which will slow things down hugely.
+ */
static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
x87_fld_src(cp, &op->FullSrcRegisters[1], 0); /* a1.x */
@@ -1548,27 +1607,45 @@ static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_inst
sse_divss(cp->func, dst, arg0);
}
- if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1)
- sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
-
- store_dest(cp, &op->FullDstRegisters[0], dst);
+ store_scalar_dest(cp, &op->FullDstRegisters[0], dst);
return TRUE;
}
+
+/* Although rsqrtps() and rcpps() are low precision on some/all SSE
+ * implementations, it is possible to improve its precision at
+ * fairly low cost, using a newton/raphson step, as below:
+ *
+ * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a)
+ * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)]
+ *
+ * See: http://softwarecommunity.intel.com/articles/eng/1818.htm
+ */
static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg dst = aos_get_xmm_reg(cp);
- sse_rsqrtss(cp->func, dst, arg0);
-
- /* Extend precision here...
- */
-
- if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1)
- sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X));
+ if (1) {
+ sse_rsqrtss(cp->func, dst, arg0);
+ }
+ else {
+#if 0
+ /* Extend precision here...
+ */
+ sse_movaps( func, dst, get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) );
+ sse_movaps( func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) );
+
+ sse_rsqrtss( func, tmp1, src ); /* rsqrtss(a) */
+ sse_mulss( func, src, tmp1 ); /* a * rsqrtss(a) */
+ sse_mulss( func, dst, tmp1 ); /* .5 * rsqrtss(a) */
+ sse_mulss( func, src, tmp1 ); /* a * rsqrtss(a) * rsqrtss(a) */
+ sse_subss( func, tmp0, src ); /* 3.0 - (a * rsqrtss(a) * rsqrtss(a)) */
+ sse_mulss( func, dst, tmp0 ); /* .5 * r * (3.0 - (a * r * r)) */
+#endif
+ }
- store_dest(cp, &op->FullDstRegisters[0], dst);
+ store_scalar_dest(cp, &op->FullDstRegisters[0], dst);
return TRUE;
}
--
cgit v1.2.3
From 3afb7198e01516dba38bb3248d4c0161e54650fe Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Sun, 25 May 2008 15:45:27 +0100
Subject: draw: remove EXP & LOG from vs_aos.c
These don't get hit & look like bug magnets to me...
---
src/gallium/auxiliary/draw/draw_vs_aos.c | 85 ++------------------------------
1 file changed, 4 insertions(+), 81 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index aebc230858..34dc09ead7 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -1066,85 +1066,6 @@ static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_inst
return TRUE;
}
-static boolean emit_EXP( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
-{
- struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]);
- struct x86_reg st0 = x86_make_reg(file_x87, 0);
- struct x86_reg st1 = x86_make_reg(file_x87, 1);
- struct x86_reg st3 = x86_make_reg(file_x87, 3);
- unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
-
- /* CAUTION: dst may alias arg0!
- */
- x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* arg0.x */
- x87_fld(cp->func, st0); /* arg arg */
-
- /* by default, fpu is setup to round-to-nearest. We want to
- * change this now, and track the state through to the end of the
- * generated function so that it isn't repeated unnecessarily.
- * Alternately, could subtract .5 to get round to -inf behaviour.
- */
- set_fpu_round_neg_inf( cp );
- x87_fprndint( cp->func ); /* flr(a) a */
- x87_fld(cp->func, st0); /* flr(a) flr(a) a */
- x87_fld1(cp->func); /* 1 floor(a) floor(a) a */
- x87_fst_or_nop(cp->func, writemask, 3, dst); /* stack unchanged */
-
- x87_fscale(cp->func); /* 2^floor(a) floor(a) a */
- x87_fst(cp->func, st3); /* 2^floor(a) floor(a) a 2^floor(a)*/
-
- x87_fstp_or_pop(cp->func, writemask, 0, dst); /* flr(a) a 2^flr(a) */
-
- x87_fsubp(cp->func, st1); /* frac(a) 2^flr(a) */
-
- x87_fst_or_nop(cp->func, writemask, 1, dst); /* frac(a) 2^flr(a) */
-
- x87_f2xm1(cp->func); /* (2^frac(a))-1 2^flr(a)*/
- x87_fld1(cp->func); /* 1 (2^frac(a))-1 2^flr(a)*/
- x87_faddp(cp->func, st1); /* 2^frac(a) 2^flr(a) */
- x87_fmulp(cp->func, st1); /* 2^a */
-
- x87_fstp_or_pop(cp->func, writemask, 2, dst);
-
-/* dst[0] = 2^floor(tmp); */
-/* dst[1] = frac(tmp); */
-/* dst[2] = 2^floor(tmp) * 2^frac(tmp); */
-/* dst[3] = 1.0F; */
- return TRUE;
-}
-
-static boolean emit_LOG( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
-{
- struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]);
- struct x86_reg st0 = x86_make_reg(file_x87, 0);
- struct x86_reg st1 = x86_make_reg(file_x87, 1);
- struct x86_reg st2 = x86_make_reg(file_x87, 2);
- unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
-
- /* CAUTION: dst may alias arg0!
- */
- x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* arg0.x */
- x87_fabs(cp->func); /* |arg0.x| */
- x87_fxtract(cp->func); /* mantissa(arg0.x), exponent(arg0.x) */
- x87_fst(cp->func, st2); /* mantissa, exponent, mantissa */
- x87_fld1(cp->func); /* 1, mantissa, exponent, mantissa */
- x87_fyl2x(cp->func); /* log2(mantissa), exponent, mantissa */
- x87_fadd(cp->func, st0, st1); /* e+l2(m), e, m */
-
- x87_fstp_or_pop(cp->func, writemask, 2, dst); /* e, m */
-
- x87_fld1(cp->func); /* 1, e, m */
- x87_fsub(cp->func, st1, st0); /* 1, e-1, m */
-
- x87_fstp_or_pop(cp->func, writemask, 3, dst); /* e-1,m */
- x87_fstp_or_pop(cp->func, writemask, 0, dst); /* m */
-
- x87_fadd(cp->func, st0, st0); /* 2m */
-
- x87_fstp_or_pop( cp->func, writemask, 1, dst );
-
- return TRUE;
-}
static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
@@ -1755,10 +1676,12 @@ emit_instruction( struct aos_compilation *cp,
return emit_RSQ(cp, inst);
case TGSI_OPCODE_EXP:
- return emit_EXP(cp, inst);
+ /*return emit_EXP(cp, inst);*/
+ return FALSE;
case TGSI_OPCODE_LOG:
- return emit_LOG(cp, inst);
+ /*return emit_LOG(cp, inst);*/
+ return FALSE;
case TGSI_OPCODE_MUL:
return emit_MUL(cp, inst);
--
cgit v1.2.3
From 9c7568965c00dcc2e9403a2f94f1cd09dcd783ae Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Sun, 25 May 2008 15:47:04 +0100
Subject: draw: slight tweak for XPD opcode
---
src/gallium/auxiliary/draw/draw_vs_aos.c | 21 +++++++--------------
1 file changed, 7 insertions(+), 14 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 34dc09ead7..37d04e45a6 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -1626,31 +1626,24 @@ static boolean emit_XPD( struct aos_compilation *cp, const struct tgsi_full_inst
{
struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
- struct x86_reg dst = aos_get_xmm_reg(cp);
struct x86_reg tmp0 = aos_get_xmm_reg(cp);
struct x86_reg tmp1 = aos_get_xmm_reg(cp);
- /* Could avoid tmp0, tmp1 if we overwrote arg0, arg1. Need a way
- * to invalidate registers. This will come with better analysis
- * (liveness analysis) of the incoming program.
- */
- emit_pshufd(cp, dst, arg0, SHUF(Y, Z, X, W));
- emit_pshufd(cp, tmp1, arg1, SHUF(Z, X, Y, W));
- sse_mulps(cp->func, dst, tmp1);
- emit_pshufd(cp, tmp0, arg0, SHUF(Z, X, Y, W));
emit_pshufd(cp, tmp1, arg1, SHUF(Y, Z, X, W));
- sse_mulps(cp->func, tmp0, tmp1);
- sse_subps(cp->func, dst, tmp0);
+ sse_mulps(cp->func, tmp1, arg0);
+ emit_pshufd(cp, tmp0, arg0, SHUF(Y, Z, X, W));
+ sse_mulps(cp->func, tmp0, arg1);
+ sse_subps(cp->func, tmp1, tmp0);
+ sse_shufps(cp->func, tmp1, tmp1, SHUF(Y, Z, X, W));
+/* dst[2] = arg0[0] * arg1[1] - arg0[1] * arg1[0]; */
/* dst[0] = arg0[1] * arg1[2] - arg0[2] * arg1[1]; */
/* dst[1] = arg0[2] * arg1[0] - arg0[0] * arg1[2]; */
-/* dst[2] = arg0[0] * arg1[1] - arg0[1] * arg1[0]; */
/* dst[3] is undef */
aos_release_xmm_reg(cp, tmp0.idx);
- aos_release_xmm_reg(cp, tmp1.idx);
- store_dest(cp, &op->FullDstRegisters[0], dst);
+ store_dest(cp, &op->FullDstRegisters[0], tmp1);
return TRUE;
}
--
cgit v1.2.3
From 721fb5597e687fc1446119002ab03cc428104b29 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Mon, 26 May 2008 00:09:02 +0100
Subject: draw: more aos tweaks
---
src/gallium/auxiliary/draw/draw_vs_aos.c | 26 ++++++++++++++++++++++++--
1 file changed, 24 insertions(+), 2 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 37d04e45a6..916203c66b 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -202,6 +202,7 @@ static struct x86_reg get_xmm_writable( struct aos_compilation *cp,
reg = tmp;
}
+ cp->xmm[reg.idx].last_used = cp->insn_counter;
return reg;
}
@@ -215,6 +216,7 @@ static struct x86_reg get_xmm( struct aos_compilation *cp,
reg = tmp;
}
+ cp->xmm[reg.idx].last_used = cp->insn_counter;
return reg;
}
@@ -281,6 +283,18 @@ void aos_adopt_xmm_reg( struct aos_compilation *cp,
return;
}
+ /* If this xmm reg is already holding this shader reg, just update
+ * last_used, and don't clobber the dirty flag...
+ */
+ if (cp->xmm[reg.idx].file == file &&
+ cp->xmm[reg.idx].idx == idx)
+ {
+ cp->xmm[reg.idx].dirty |= dirty;
+ cp->xmm[reg.idx].last_used = cp->insn_counter;
+ return;
+ }
+
+
/* If any xmm reg thinks it holds this shader reg, break the
* illusion.
*/
@@ -382,8 +396,16 @@ static struct x86_reg aos_get_shader_reg_xmm( struct aos_compilation *cp,
unsigned file,
unsigned idx )
{
- struct x86_reg reg = aos_get_shader_reg( cp, file, idx );
- return get_xmm( cp, reg );
+ struct x86_reg reg = get_xmm( cp,
+ aos_get_shader_reg( cp, file, idx ) );
+
+ aos_adopt_xmm_reg( cp,
+ reg,
+ file,
+ idx,
+ FALSE );
+
+ return reg;
}
--
cgit v1.2.3
From 351eca365c0ba488000c3826d5093de6170381e4 Mon Sep 17 00:00:00 2001
From: Keith Whitwell
Date: Mon, 26 May 2008 11:03:00 +0100
Subject: draw: extend precision in RSQ opcode
---
src/gallium/auxiliary/draw/draw_vs_aos.c | 48 ++++++++++++++++++--------------
src/gallium/auxiliary/draw/draw_vs_aos.h | 1 +
2 files changed, 28 insertions(+), 21 deletions(-)
(limited to 'src')
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 916203c66b..1622358ae1 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -140,7 +140,8 @@ static void init_internals( struct aos_machine *machine )
ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f);
ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f);
ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv);
- ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255);
+ ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255);
+ ASSIGN_4V(machine->internal[IMM_RSQ], -.5f, 1.5f, 0.0f, 0.0f);
machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP |
@@ -1561,35 +1562,40 @@ static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_inst
*
* x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a)
* x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)]
+ * or:
+ * x1 = rsqrtps(a) * [1.5 - .5 * a * rsqrtps(a) * rsqrtps(a)]
+ *
*
* See: http://softwarecommunity.intel.com/articles/eng/1818.htm
*/
static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
{
- struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
- struct x86_reg dst = aos_get_xmm_reg(cp);
- if (1) {
- sse_rsqrtss(cp->func, dst, arg0);
+ if (0) {
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg r = aos_get_xmm_reg(cp);
+ sse_rsqrtss(cp->func, r, arg0);
+ store_scalar_dest(cp, &op->FullDstRegisters[0], r);
+ return TRUE;
}
else {
-#if 0
- /* Extend precision here...
- */
- sse_movaps( func, dst, get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) );
- sse_movaps( func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) );
-
- sse_rsqrtss( func, tmp1, src ); /* rsqrtss(a) */
- sse_mulss( func, src, tmp1 ); /* a * rsqrtss(a) */
- sse_mulss( func, dst, tmp1 ); /* .5 * rsqrtss(a) */
- sse_mulss( func, src, tmp1 ); /* a * rsqrtss(a) * rsqrtss(a) */
- sse_subss( func, tmp0, src ); /* 3.0 - (a * rsqrtss(a) * rsqrtss(a)) */
- sse_mulss( func, dst, tmp0 ); /* .5 * r * (3.0 - (a * r * r)) */
-#endif
- }
+ struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+ struct x86_reg r = aos_get_xmm_reg(cp);
- store_scalar_dest(cp, &op->FullDstRegisters[0], dst);
- return TRUE;
+ struct x86_reg neg_half = get_reg_ptr( cp, AOS_FILE_INTERNAL, IMM_RSQ );
+ struct x86_reg one_point_five = x86_make_disp( neg_half, 4 );
+ struct x86_reg src = get_xmm_writable( cp, arg0 );
+
+ sse_rsqrtss( cp->func, r, src ); /* rsqrtss(a) */
+ sse_mulss( cp->func, src, neg_half ); /* -.5 * a */
+ sse_mulss( cp->func, src, r ); /* -.5 * a * r */
+ sse_mulss( cp->func, src, r ); /* -.5 * a * r * r */
+ sse_addss( cp->func, src, one_point_five ); /* 1.5 - .5 * a * r * r */
+ sse_mulss( cp->func, r, src ); /* r * (1.5 - .5 * a * r * r) */
+
+ store_scalar_dest(cp, &op->FullDstRegisters[0], r);
+ return TRUE;
+ }
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h
index c08c73d4bc..fffe2e4658 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.h
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.h
@@ -175,6 +175,7 @@ boolean aos_emit_outputs( struct aos_compilation *cp );
#define IMM_INV_255 3 /* 1/255, 1/255, 1/255, 1/255 */
#define IMM_255 4 /* 255, 255, 255, 255 */
#define IMM_NEGS 5 /* -1,-1,-1,-1 */
+#define IMM_RSQ 6 /* -.5,1.5,_,_ */
struct x86_reg aos_get_internal( struct aos_compilation *cp,
unsigned imm );
--
cgit v1.2.3
From fc72d7e032fc0a4130fae53106f03aa3fbe4e99e Mon Sep 17 00:00:00 2001
From: José Fonseca
Date: Mon, 26 May 2008 20:39:26 +0900
Subject: Remove CVS keywords.
---
docs/MESA_packed_depth_stencil.spec | 1 -
docs/MESA_program_debug.spec | 1 -
docs/MESA_resize_buffers.spec | 1 -
docs/MESA_shader_debug.spec | 1 -
docs/MESA_sprite_point.spec | 1 -
docs/MESA_texture_array.spec | 1 -
docs/MESA_trace.spec | 1 -
docs/MESA_window_pos.spec | 1 -
docs/README.BEOS | 1 -
docs/README.QUAKE | 1 -
docs/RELNOTES-3.1 | 1 -
docs/RELNOTES-3.2 | 1 -
docs/RELNOTES-3.2.1 | 1 -
docs/RELNOTES-3.3 | 1 -
docs/RELNOTES-3.4 | 1 -
docs/RELNOTES-3.4.1 | 1 -
docs/RELNOTES-3.4.2 | 1 -
docs/RELNOTES-3.5 | 1 -
docs/RELNOTES-4.0 | 1 -
docs/RELNOTES-4.0.1 | 1 -
docs/RELNOTES-4.0.2 | 1 -
docs/RELNOTES-4.0.3 | 1 -
docs/RELNOTES-4.1 | 1 -
docs/RELNOTES-5.0 | 1 -
docs/RELNOTES-5.0.1 | 1 -
docs/RELNOTES-5.0.2 | 1 -
docs/RELNOTES-6.0 | 1 -
docs/RELNOTES-6.0.1 | 1 -
docs/RELNOTES-6.1 | 1 -
docs/RELNOTES-6.2 | 1 -
docs/RELNOTES-6.2.1 | 1 -
docs/RELNOTES-6.3 | 1 -
docs/RELNOTES-6.3.1 | 1 -
docs/RELNOTES-6.3.2 | 1 -
docs/RELNOTES-6.4 | 1 -
docs/news.html | 1 -
include/GL/internal/sarea.h | 2 --
progs/beos/demo.cpp | 1 -
progs/ggi/gears.c | 1 -
progs/miniglx/glfbdevtest.c | 1 -
progs/miniglx/manytex.c | 1 -
progs/miniglx/sample_server.c | 1 -
progs/miniglx/sample_server2.c | 1 -
progs/miniglx/texline.c | 1 -
progs/tests/Makefile.win | 1 -
progs/tests/antialias.c | 1 -
progs/tests/cva.c | 1 -
progs/tests/getprocaddress.py | 1 -
progs/tests/jkrahntest.c | 1 -
progs/tests/manytex.c | 1 -
progs/tests/multipal.c | 1 -
progs/tests/multiwindow.c | 2 --
progs/tests/sharedtex.c | 1 -
progs/tests/texline.c | 1 -
progs/tests/texrect.c | 1 -
progs/tests/texwrap.c | 1 -
progs/util/README | 1 -
progs/util/glstate.c | 2 --
progs/util/glstate.h | 2 --
progs/util/sampleMakefile | 2 --
progs/windml/ugldrawpix.c | 1 -
progs/windml/ugltexcyl.c | 1 -
progs/xdemos/vgears.c | 1 -
src/gallium/winsys/dri/intel/server/i830_common.h | 1 -
src/gallium/winsys/dri/intel/server/i830_dri.h | 1 -
src/glu/mini/all.h | 1 -
src/glu/mini/glu.c | 1 -
src/glu/mini/gluP.h | 1 -
src/glu/mini/mipmap.c | 1 -
src/glu/mini/nurbs.c | 1 -
src/glu/mini/nurbs.h | 1 -
src/glu/mini/nurbscrv.c | 1 -
src/glu/mini/polytest.c | 1 -
src/glu/mini/project.c | 1 -
src/glu/mini/quadric.c | 1 -
src/glu/mini/tess.c | 1 -
src/glu/mini/tess.h | 1 -
src/glu/mini/tesselat.c | 1 -
src/glu/sgi/dummy.cc | 1 -
src/glu/sgi/libnurbs/interface/bezierEval.h | 2 --
src/glu/sgi/libnurbs/interface/bezierPatch.cc | 2 --
src/glu/sgi/libnurbs/interface/bezierPatch.h | 2 --
src/glu/sgi/libnurbs/interface/bezierPatchMesh.cc | 2 --
src/glu/sgi/libnurbs/interface/bezierPatchMesh.h | 2 --
src/glu/sgi/libnurbs/interface/glcurveval.cc | 2 --
src/glu/sgi/libnurbs/interface/glimports.h | 2 --
src/glu/sgi/libnurbs/interface/glinterface.cc | 2 --
src/glu/sgi/libnurbs/interface/glrenderer.h | 2 --
src/glu/sgi/libnurbs/interface/incurveeval.cc | 2 --
src/glu/sgi/libnurbs/interface/insurfeval.cc | 2 --
src/glu/sgi/libnurbs/interface/mystdio.h | 2 --
src/glu/sgi/libnurbs/interface/mystdlib.h | 2 --
src/glu/sgi/libnurbs/internals/arc.h | 2 --
src/glu/sgi/libnurbs/internals/arcsorter.cc | 2 --
src/glu/sgi/libnurbs/internals/arcsorter.h | 2 --
src/glu/sgi/libnurbs/internals/arctess.h | 2 --
src/glu/sgi/libnurbs/internals/backend.cc | 2 --
src/glu/sgi/libnurbs/internals/backend.h | 2 --
src/glu/sgi/libnurbs/internals/basiccrveval.h | 2 --
src/glu/sgi/libnurbs/internals/basicsurfeval.h | 2 --
src/glu/sgi/libnurbs/internals/bezierarc.h | 2 --
src/glu/sgi/libnurbs/internals/bin.cc | 2 --
src/glu/sgi/libnurbs/internals/bin.h | 2 --
src/glu/sgi/libnurbs/internals/bufpool.cc | 2 --
src/glu/sgi/libnurbs/internals/bufpool.h | 2 --
src/glu/sgi/libnurbs/internals/cachingeval.cc | 2 --
src/glu/sgi/libnurbs/internals/cachingeval.h | 2 --
src/glu/sgi/libnurbs/internals/ccw.cc | 2 --
src/glu/sgi/libnurbs/internals/coveandtiler.h | 2 --
src/glu/sgi/libnurbs/internals/curve.cc | 2 --
src/glu/sgi/libnurbs/internals/curve.h | 2 --
src/glu/sgi/libnurbs/internals/curvelist.cc | 2 --
src/glu/sgi/libnurbs/internals/curvelist.h | 2 --
src/glu/sgi/libnurbs/internals/curvesub.cc | 2 --
src/glu/sgi/libnurbs/internals/dataTransform.cc | 2 --
src/glu/sgi/libnurbs/internals/dataTransform.h | 2 --
src/glu/sgi/libnurbs/internals/defines.h | 2 --
src/glu/sgi/libnurbs/internals/displaylist.cc | 2 --
src/glu/sgi/libnurbs/internals/displaylist.h | 2 --
src/glu/sgi/libnurbs/internals/displaymode.h | 2 --
src/glu/sgi/libnurbs/internals/flist.cc | 2 --
src/glu/sgi/libnurbs/internals/flist.h | 2 --
src/glu/sgi/libnurbs/internals/flistsorter.cc | 2 --
src/glu/sgi/libnurbs/internals/flistsorter.h | 2 --
src/glu/sgi/libnurbs/internals/gridline.h | 2 --
src/glu/sgi/libnurbs/internals/gridtrimvertex.h | 2 --
src/glu/sgi/libnurbs/internals/gridvertex.h | 2 --
src/glu/sgi/libnurbs/internals/hull.cc | 2 --
src/glu/sgi/libnurbs/internals/hull.h | 2 --
src/glu/sgi/libnurbs/internals/intersect.cc | 2 --
src/glu/sgi/libnurbs/internals/jarcloc.h | 2 --
src/glu/sgi/libnurbs/internals/knotvector.h | 2 --
src/glu/sgi/libnurbs/internals/mapdesc.cc | 2 --
src/glu/sgi/libnurbs/internals/mapdesc.h | 2 --
src/glu/sgi/libnurbs/internals/mapdescv.cc | 2 --
src/glu/sgi/libnurbs/internals/maplist.cc | 2 --
src/glu/sgi/libnurbs/internals/maplist.h | 2 --
src/glu/sgi/libnurbs/internals/mesher.cc | 2 --
src/glu/sgi/libnurbs/internals/mesher.h | 2 --
src/glu/sgi/libnurbs/internals/monoTriangulationBackend.cc | 2 --
src/glu/sgi/libnurbs/internals/monotonizer.cc | 2 --
src/glu/sgi/libnurbs/internals/monotonizer.h | 1 -
src/glu/sgi/libnurbs/internals/myassert.h | 2 --
src/glu/sgi/libnurbs/internals/mycode.cc | 2 --
src/glu/sgi/libnurbs/internals/mystring.h | 2 --
src/glu/sgi/libnurbs/internals/nurbsconsts.h | 2 --
src/glu/sgi/libnurbs/internals/nurbstess.cc | 2 --
src/glu/sgi/libnurbs/internals/patch.cc | 2 --
src/glu/sgi/libnurbs/internals/patch.h | 2 --
src/glu/sgi/libnurbs/internals/patchlist.cc | 2 --
src/glu/sgi/libnurbs/internals/patchlist.h | 2 --
src/glu/sgi/libnurbs/internals/pwlarc.h | 2 --
src/glu/sgi/libnurbs/internals/quilt.cc | 2 --
src/glu/sgi/libnurbs/internals/quilt.h | 2 --
src/glu/sgi/libnurbs/internals/reader.cc | 2 --
src/glu/sgi/libnurbs/internals/reader.h | 2 --
src/glu/sgi/libnurbs/internals/renderhints.cc | 2 --
src/glu/sgi/libnurbs/internals/renderhints.h | 2 --
src/glu/sgi/libnurbs/internals/simplemath.h | 2 --
src/glu/sgi/libnurbs/internals/slicer.cc | 2 --
src/glu/sgi/libnurbs/internals/slicer.h | 2 --
src/glu/sgi/libnurbs/internals/sorter.cc | 2 --
src/glu/sgi/libnurbs/internals/sorter.h | 2 --
src/glu/sgi/libnurbs/internals/splitarcs.cc | 2 --
src/glu/sgi/libnurbs/internals/subdivider.h | 2 --
src/glu/sgi/libnurbs/internals/tobezier.cc | 2 --
src/glu/sgi/libnurbs/internals/trimline.cc | 2 --
src/glu/sgi/libnurbs/internals/trimline.h | 2 --
src/glu/sgi/libnurbs/internals/trimregion.cc | 2 --
src/glu/sgi/libnurbs/internals/trimregion.h | 2 --
src/glu/sgi/libnurbs/internals/trimvertex.h | 2 --
src/glu/sgi/libnurbs/internals/trimvertpool.cc | 2 --
src/glu/sgi/libnurbs/internals/trimvertpool.h | 2 --
src/glu/sgi/libnurbs/internals/types.h | 2 --
src/glu/sgi/libnurbs/internals/uarray.cc | 2 --
src/glu/sgi/libnurbs/internals/uarray.h | 2 --
src/glu/sgi/libnurbs/internals/varray.cc | 2 --
src/glu/sgi/libnurbs/internals/varray.h | 2 --
src/glu/sgi/libnurbs/nurbtess/definitions.h | 2 --
src/glu/sgi/libnurbs/nurbtess/directedLine.h | 2 --
src/glu/sgi/libnurbs/nurbtess/glimports.h | 2 --
src/glu/sgi/libnurbs/nurbtess/gridWrap.cc | 2 --
src/glu/sgi/libnurbs/nurbtess/gridWrap.h | 2 --
src/glu/sgi/libnurbs/nurbtess/monoChain.cc | 2 --
src/glu/sgi/libnurbs/nurbtess/monoChain.h | 2 --
src/glu/sgi/libnurbs/nurbtess/monoPolyPart.cc | 1 -
src/glu/sgi/libnurbs/nurbtess/monoPolyPart.h | 1 -
src/glu/sgi/libnurbs/nurbtess/monoTriangulation.cc | 2 --
src/glu/sgi/libnurbs/nurbtess/monoTriangulation.h | 2 --
src/glu/sgi/libnurbs/nurbtess/mystdio.h | 2 --
src/glu/sgi/libnurbs/nurbtess/mystdlib.h | 2 --
src/glu/sgi/libnurbs/nurbtess/partitionX.cc | 2 --
src/glu/sgi/libnurbs/nurbtess/partitionX.h | 2 --
src/glu/sgi/libnurbs/nurbtess/partitionY.cc | 2 --
src/glu/sgi/libnurbs/nurbtess/partitionY.h | 2 --
src/glu/sgi/libnurbs/nurbtess/polyDBG.h | 2 --
src/glu/sgi/libnurbs/nurbtess/polyUtil.cc | 2 --
src/glu/sgi/libnurbs/nurbtess/polyUtil.h | 2 --
src/glu/sgi/libnurbs/nurbtess/primitiveStream.cc | 2 --
src/glu/sgi/libnurbs/nurbtess/primitiveStream.h | 2 --
src/glu/sgi/libnurbs/nurbtess/quicksort.cc | 2 --
src/glu/sgi/libnurbs/nurbtess/quicksort.h | 2 --
src/glu/sgi/libnurbs/nurbtess/rectBlock.cc | 2 --
src/glu/sgi/libnurbs/nurbtess/rectBlock.h | 2 --
src/glu/sgi/libnurbs/nurbtess/sampleComp.cc | 2 --
src/glu/sgi/libnurbs/nurbtess/sampleComp.h | 2 --
src/glu/sgi/libnurbs/nurbtess/sampleCompBot.cc | 2 --
src/glu/sgi/libnurbs/nurbtess/sampleCompBot.h | 2 --
src/glu/sgi/libnurbs/nurbtess/sampleCompRight.cc | 2 --
src/glu/sgi/libnurbs/nurbtess/sampleCompRight.h | 2 --
src/glu/sgi/libnurbs/nurbtess/sampleCompTop.cc | 2 --
src/glu/sgi/libnurbs/nurbtess/sampleCompTop.h | 2 --
src/glu/sgi/libnurbs/nurbtess/sampleMonoPoly.cc | 2 --
src/glu/sgi/libnurbs/nurbtess/sampleMonoPoly.h | 2 --
src/glu/sgi/libnurbs/nurbtess/sampledLine.cc | 2 --
src/glu/sgi/libnurbs/nurbtess/sampledLine.h | 2 --
src/glu/sgi/libnurbs/nurbtess/searchTree.cc | 2 --
src/glu/sgi/libnurbs/nurbtess/searchTree.h | 2 --
src/glu/sgi/libnurbs/nurbtess/zlassert.h | 2 --
src/glu/sgi/libtess/README | 1 -
src/glu/sgi/libtess/alg-outline | 1 -
src/glu/sgi/libtess/dict-list.h | 2 --
src/glu/sgi/libtess/dict.c | 2 --
src/glu/sgi/libtess/dict.h | 2 --
src/glu/sgi/libtess/geom.c | 2 --
src/glu/sgi/libtess/memalloc.c | 2 --
src/glu/sgi/libtess/memalloc.h | 2 --
src/glu/sgi/libtess/mesh.c | 2 --
src/glu/sgi/libtess/mesh.h | 2 --
src/glu/sgi/libtess/normal.h | 2 --
src/glu/sgi/libtess/priorityq-heap.c | 2 --
src/glu/sgi/libtess/priorityq-heap.h | 2 --
src/glu/sgi/libtess/priorityq-sort.h | 2 --
src/glu/sgi/libtess/priorityq.c | 2 --
src/glu/sgi/libtess/priorityq.h | 2 --
src/glu/sgi/libtess/render.c | 2 --
src/glu/sgi/libtess/render.h | 2 --
src/glu/sgi/libtess/sweep.h | 2 --
src/glu/sgi/libtess/tess.h | 2 --
src/glu/sgi/libtess/tessmono.c | 2 --
src/glu/sgi/libtess/tessmono.h | 2 --
src/glu/sgi/libutil/error.c | 2 --
src/glu/sgi/libutil/glue.c | 2 --
src/glu/sgi/libutil/gluint.h | 2 --
src/glu/sgi/libutil/project.c | 2 --
src/glu/sgi/libutil/registry.c | 2 --
src/glut/beos/beos_x11.cpp | 1 -
src/glut/ggi/debug.h | 2 +-
src/glut/glx/stroke.h | 1 -
src/glut/glx/win32_x11.c | 1 -
src/glx/mini/miniglx_events.c | 1 -
src/glx/x11/XF86dri.c | 1 -
src/glx/x11/clientattrib.c | 1 -
src/glx/x11/compsize.c | 1 -
src/glx/x11/dri_glx.c | 1 -
src/glx/x11/eval.c | 1 -
src/glx/x11/glxclient.h | 1 -
src/glx/x11/glxcmds.c | 1 -
src/glx/x11/glxext.c | 1 -
src/glx/x11/indirect_init.h | 1 -
src/glx/x11/packrender.h | 1 -
src/glx/x11/packsingle.h | 1 -
src/glx/x11/pixel.c | 1 -
src/glx/x11/pixelstore.c | 1 -
src/glx/x11/render2.c | 1 -
src/glx/x11/renderpix.c | 1 -
src/glx/x11/single2.c | 1 -
src/glx/x11/singlepix.c | 1 -
src/glx/x11/vertarr.c | 1 -
src/glx/x11/xf86dri.h | 1 -
src/glx/x11/xf86dristr.h | 1 -
src/glx/x11/xfont.c | 1 -
src/mesa/drivers/dri/common/stenciltmp.h | 1 -
src/mesa/drivers/dri/common/texmem.c | 1 -
src/mesa/drivers/dri/common/texmem.h | 1 -
src/mesa/drivers/dri/common/utils.h | 1 -
src/mesa/drivers/dri/common/vblank.c | 1 -
src/mesa/drivers/dri/common/vblank.h | 1 -
src/mesa/drivers/dri/ffb/ffb_bitmap.c | 2 +-
src/mesa/drivers/dri/ffb/ffb_bitmap.h | 1 -
src/mesa/drivers/dri/ffb/ffb_clear.c | 2 +-
src/mesa/drivers/dri/ffb/ffb_context.h | 1 -
src/mesa/drivers/dri/ffb/ffb_dd.c | 2 +-
src/mesa/drivers/dri/ffb/ffb_dd.h | 2 +-
src/mesa/drivers/dri/ffb/ffb_depth.c | 2 +-
src/mesa/drivers/dri/ffb/ffb_depth.h | 1 -
src/mesa/drivers/dri/ffb/ffb_fifo.h | 1 -
src/mesa/drivers/dri/ffb/ffb_lines.c | 2 +-
src/mesa/drivers/dri/ffb/ffb_lines.h | 1 -
src/mesa/drivers/dri/ffb/ffb_linetmp.h | 1 -
src/mesa/drivers/dri/ffb/ffb_lock.h | 1 -
src/mesa/drivers/dri/ffb/ffb_points.c | 2 +-
src/mesa/drivers/dri/ffb/ffb_points.h | 1 -
src/mesa/drivers/dri/ffb/ffb_pointtmp.h | 1 -
src/mesa/drivers/dri/ffb/ffb_rendertmp.h | 1 -
src/mesa/drivers/dri/ffb/ffb_span.c | 2 +-
src/mesa/drivers/dri/ffb/ffb_span.h | 1 -
src/mesa/drivers/dri/ffb/ffb_state.c | 2 +-
src/mesa/drivers/dri/ffb/ffb_state.h | 1 -
src/mesa/drivers/dri/ffb/ffb_stencil.c | 2 +-
src/mesa/drivers/dri/ffb/ffb_stencil.h | 1 -
src/mesa/drivers/dri/ffb/ffb_tex.c | 2 +-
src/mesa/drivers/dri/ffb/ffb_tex.h | 2 +-
src/mesa/drivers/dri/ffb/ffb_tris.c | 2 +-
src/mesa/drivers/dri/ffb/ffb_tris.h | 1 -
src/mesa/drivers/dri/ffb/ffb_tritmp.h | 1 -
src/mesa/drivers/dri/ffb/ffb_vb.c | 2 +-
src/mesa/drivers/dri/ffb/ffb_vb.h | 1 -
src/mesa/drivers/dri/ffb/ffb_vbtmp.h | 1 -
src/mesa/drivers/dri/ffb/ffb_vtxfmt.c | 2 +-
src/mesa/drivers/dri/ffb/ffb_vtxfmt.h | 1 -
src/mesa/drivers/dri/ffb/ffb_xmesa.c | 2 +-
src/mesa/drivers/dri/ffb/ffb_xmesa.h | 1 -
src/mesa/drivers/dri/ffb/server/ffb_dac.h | 1 -
src/mesa/drivers/dri/ffb/server/ffb_drishare.h | 1 -
src/mesa/drivers/dri/ffb/server/ffb_regs.h | 1 -
src/mesa/drivers/dri/gamma/gamma_client.h | 1 -
src/mesa/drivers/dri/gamma/gamma_context.h | 1 -
src/mesa/drivers/dri/gamma/gamma_inithw.c | 1 -
src/mesa/drivers/dri/gamma/gamma_lock.c | 1 -
src/mesa/drivers/dri/gamma/gamma_macros.h | 1 -
src/mesa/drivers/dri/gamma/gamma_regs.h | 1 -
src/mesa/drivers/dri/gamma/gamma_span.c | 1 -
src/mesa/drivers/dri/gamma/gamma_state.c | 1 -
src/mesa/drivers/dri/gamma/gamma_tex.c | 1 -
src/mesa/drivers/dri/gamma/gamma_texmem.c | 1 -
src/mesa/drivers/dri/gamma/gamma_texstate.c | 1 -
src/mesa/drivers/dri/gamma/gamma_tritmp.h | 1 -
src/mesa/drivers/dri/gamma/gamma_vb.c | 1 -
src/mesa/drivers/dri/gamma/gamma_xmesa.c | 1 -
src/mesa/drivers/dri/gamma/server/glint_common.h | 1 -
src/mesa/drivers/dri/gamma/server/glint_dri.h | 1 -
src/mesa/drivers/dri/i810/i810_3d_reg.h | 1 -
src/mesa/drivers/dri/i810/i810context.c | 1 -
src/mesa/drivers/dri/i810/i810context.h | 1 -
src/mesa/drivers/dri/i810/i810ioctl.c | 1 -
src/mesa/drivers/dri/i810/i810ioctl.h | 1 -
src/mesa/drivers/dri/i810/i810screen.c | 1 -
src/mesa/drivers/dri/i810/i810state.c | 1 -
src/mesa/drivers/dri/i810/i810tex.c | 1 -
src/mesa/drivers/dri/i810/i810tris.c | 1 -
src/mesa/drivers/dri/i810/i810tris.h | 1 -
src/mesa/drivers/dri/i810/i810vb.c | 1 -
src/mesa/drivers/dri/i810/i810vb.h | 1 -
src/mesa/drivers/dri/i810/server/i810_common.h | 1 -
src/mesa/drivers/dri/i810/server/i810_dri.h | 1 -
src/mesa/drivers/dri/i810/server/i810_reg.h | 1 -
src/mesa/drivers/dri/i915/server/i830_common.h | 1 -
src/mesa/drivers/dri/i915/server/i830_dri.h | 1 -
src/mesa/drivers/dri/i965/server/i830_common.h | 1 -
src/mesa/drivers/dri/i965/server/i830_dri.h | 1 -
src/mesa/drivers/dri/mach64/mach64_context.c | 2 +-
src/mesa/drivers/dri/mach64/mach64_context.h | 2 +-
src/mesa/drivers/dri/mach64/mach64_dd.c | 2 +-
src/mesa/drivers/dri/mach64/mach64_dd.h | 2 +-
src/mesa/drivers/dri/mach64/mach64_ioctl.c | 2 +-
src/mesa/drivers/dri/mach64/mach64_ioctl.h | 2 +-
src/mesa/drivers/dri/mach64/mach64_lock.c | 2 +-
src/mesa/drivers/dri/mach64/mach64_lock.h | 2 +-
src/mesa/drivers/dri/mach64/mach64_native_vb.c | 2 +-
src/mesa/drivers/dri/mach64/mach64_native_vbtmp.h | 2 +-
src/mesa/drivers/dri/mach64/mach64_reg.h | 2 +-
src/mesa/drivers/dri/mach64/mach64_screen.c | 2 +-
src/mesa/drivers/dri/mach64/mach64_screen.h | 2 +-
src/mesa/drivers/dri/mach64/mach64_span.c | 2 +-
src/mesa/drivers/dri/mach64/mach64_span.h | 2 +-
src/mesa/drivers/dri/mach64/mach64_state.c | 2 +-
src/mesa/drivers/dri/mach64/mach64_state.h | 2 +-
src/mesa/drivers/dri/mach64/mach64_tex.c | 2 +-
src/mesa/drivers/dri/mach64/mach64_tex.h | 2 +-
src/mesa/drivers/dri/mach64/mach64_texmem.c | 2 +-
src/mesa/drivers/dri/mach64/mach64_texstate.c | 2 +-
src/mesa/drivers/dri/mach64/mach64_tris.c | 2 +-
src/mesa/drivers/dri/mach64/mach64_tris.h | 2 +-
src/mesa/drivers/dri/mach64/mach64_vb.c | 2 +-
src/mesa/drivers/dri/mach64/mach64_vb.h | 2 +-
src/mesa/drivers/dri/mach64/mach64_vbtmp.h | 2 +-
src/mesa/drivers/dri/mach64/server/mach64_dri.h | 2 +-
src/mesa/drivers/dri/mga/mga_texstate.c | 1 -
src/mesa/drivers/dri/mga/mga_xmesa.c | 1 -
src/mesa/drivers/dri/mga/mga_xmesa.h | 1 -
src/mesa/drivers/dri/mga/mgacontext.h | 1 -
src/mesa/drivers/dri/mga/mgadd.c | 1 -
src/mesa/drivers/dri/mga/mgadd.h | 1 -
src/mesa/drivers/dri/mga/mgaioctl.h | 1 -
src/mesa/drivers/dri/mga/mgapixel.c | 1 -
src/mesa/drivers/dri/mga/mgapixel.h | 1 -
src/mesa/drivers/dri/mga/mgaregs.h | 1 -
src/mesa/drivers/dri/mga/mgarender.c | 1 -
src/mesa/drivers/dri/mga/mgaspan.h | 1 -
src/mesa/drivers/dri/mga/mgastate.h | 1 -
src/mesa/drivers/dri/mga/mgatex.c | 1 -
src/mesa/drivers/dri/mga/mgatex.h | 1 -
src/mesa/drivers/dri/mga/mgatexmem.c | 1 -
src/mesa/drivers/dri/mga/mgatris.c | 1 -
src/mesa/drivers/dri/mga/mgatris.h | 1 -
src/mesa/drivers/dri/mga/mgavb.c | 1 -
src/mesa/drivers/dri/mga/mgavb.h | 1 -
src/mesa/drivers/dri/mga/server/mga.h | 1 -
src/mesa/drivers/dri/mga/server/mga_bios.h | 2 --
src/mesa/drivers/dri/mga/server/mga_dri.c | 1 -
src/mesa/drivers/dri/mga/server/mga_dri.h | 1 -
src/mesa/drivers/dri/mga/server/mga_macros.h | 1 -
src/mesa/drivers/dri/mga/server/mga_reg.h | 2 --
src/mesa/drivers/dri/r128/r128_context.c | 1 -
src/mesa/drivers/dri/r128/r128_context.h | 1 -
src/mesa/drivers/dri/r128/r128_dd.c | 1 -
src/mesa/drivers/dri/r128/r128_dd.h | 1 -
src/mesa/drivers/dri/r128/r128_ioctl.c | 1 -
src/mesa/drivers/dri/r128/r128_ioctl.h | 1 -
src/mesa/drivers/dri/r128/r128_lock.c | 1 -
src/mesa/drivers/dri/r128/r128_lock.h | 1 -
src/mesa/drivers/dri/r128/r128_screen.c | 1 -
src/mesa/drivers/dri/r128/r128_screen.h | 1 -
src/mesa/drivers/dri/r128/r128_span.c | 1 -
src/mesa/drivers/dri/r128/r128_span.h | 1 -
src/mesa/drivers/dri/r128/r128_state.c | 1 -
src/mesa/drivers/dri/r128/r128_state.h | 1 -
src/mesa/drivers/dri/r128/r128_tex.c | 1 -
src/mesa/drivers/dri/r128/r128_tex.h | 1 -
src/mesa/drivers/dri/r128/r128_texmem.c | 1 -
src/mesa/drivers/dri/r128/r128_texobj.h | 1 -
src/mesa/drivers/dri/r128/r128_texstate.c | 1 -
src/mesa/drivers/dri/r128/r128_tris.c | 2 +-
src/mesa/drivers/dri/r128/r128_tris.h | 1 -
src/mesa/drivers/dri/r128/server/r128.h | 1 -
src/mesa/drivers/dri/r128/server/r128_dri.c | 1 -
src/mesa/drivers/dri/r128/server/r128_dri.h | 1 -
src/mesa/drivers/dri/r128/server/r128_macros.h | 1 -
src/mesa/drivers/dri/r128/server/r128_reg.h | 1 -
src/mesa/drivers/dri/r128/server/r128_version.h | 1 -
src/mesa/drivers/dri/radeon/radeon_compat.c | 1 -
src/mesa/drivers/dri/radeon/radeon_context.c | 1 -
src/mesa/drivers/dri/radeon/radeon_ioctl.c | 1 -
src/mesa/drivers/dri/radeon/radeon_ioctl.h | 1 -
src/mesa/drivers/dri/radeon/radeon_lighting.c | 1 -
src/mesa/drivers/dri/radeon/radeon_maos.h | 1 -
src/mesa/drivers/dri/radeon/radeon_maos_arrays.c | 1 -
src/mesa/drivers/dri/radeon/radeon_maos_verts.c | 1 -
src/mesa/drivers/dri/radeon/radeon_sanity.c | 1 -
src/mesa/drivers/dri/radeon/radeon_screen.c | 1 -
src/mesa/drivers/dri/radeon/radeon_screen.h | 1 -
src/mesa/drivers/dri/radeon/radeon_state.c | 1 -
src/mesa/drivers/dri/radeon/radeon_state.h | 1 -
src/mesa/drivers/dri/radeon/radeon_state_init.c | 1 -
src/mesa/drivers/dri/radeon/radeon_swtcl.c | 1 -
src/mesa/drivers/dri/radeon/radeon_swtcl.h | 1 -
src/mesa/drivers/dri/radeon/radeon_tcl.c | 1 -
src/mesa/drivers/dri/radeon/radeon_tcl.h | 1 -
src/mesa/drivers/dri/radeon/radeon_tex.c | 1 -
src/mesa/drivers/dri/radeon/radeon_tex.h | 1 -
src/mesa/drivers/dri/radeon/radeon_texmem.c | 1 -
src/mesa/drivers/dri/radeon/radeon_texstate.c | 1 -
src/mesa/drivers/dri/radeon/server/radeon.h | 1 -
src/mesa/drivers/dri/radeon/server/radeon_dri.h | 1 -
src/mesa/drivers/dri/radeon/server/radeon_macros.h | 1 -
src/mesa/drivers/dri/radeon/server/radeon_reg.h | 1 -
src/mesa/drivers/dri/savage/savagetris.c | 2 +-
src/mesa/drivers/dri/savage/savagetris.h | 1 -
src/mesa/drivers/dri/sis/server/sis_common.h | 2 +-
src/mesa/drivers/dri/sis/server/sis_dri.h | 1 -
src/mesa/drivers/dri/sis/sis_alloc.c | 1 -
src/mesa/drivers/dri/sis/sis_alloc.h | 1 -
src/mesa/drivers/dri/sis/sis_clear.c | 1 -
src/mesa/drivers/dri/sis/sis_context.c | 1 -
src/mesa/drivers/dri/sis/sis_context.h | 1 -
src/mesa/drivers/dri/sis/sis_dd.c | 1 -
src/mesa/drivers/dri/sis/sis_dd.h | 1 -
src/mesa/drivers/dri/sis/sis_fog.c | 1 -
src/mesa/drivers/dri/sis/sis_lock.c | 1 -
src/mesa/drivers/dri/sis/sis_lock.h | 1 -
src/mesa/drivers/dri/sis/sis_reg.h | 1 -
src/mesa/drivers/dri/sis/sis_screen.c | 1 -
src/mesa/drivers/dri/sis/sis_screen.h | 1 -
src/mesa/drivers/dri/sis/sis_span.c | 1 -
src/mesa/drivers/dri/sis/sis_span.h | 1 -
src/mesa/drivers/dri/sis/sis_state.c | 1 -
src/mesa/drivers/dri/sis/sis_state.h | 1 -
src/mesa/drivers/dri/sis/sis_stencil.c | 1 -
src/mesa/drivers/dri/sis/sis_stencil.h | 1 -
src/mesa/drivers/dri/sis/sis_tex.c | 1 -
src/mesa/drivers/dri/sis/sis_tex.h | 1 -
src/mesa/drivers/dri/sis/sis_texstate.c | 1 -
src/mesa/drivers/dri/sis/sis_tris.h | 1 -
src/mesa/drivers/dri/tdfx/X86/fx_3dnow_fastpath.S | 1 -
src/mesa/drivers/dri/tdfx/X86/fx_3dnow_fasttmp.h | 1 -
src/mesa/drivers/dri/tdfx/dri_glide.h | 1 -
src/mesa/drivers/dri/tdfx/server/tdfx_dri.h | 1 -
src/mesa/drivers/dri/tdfx/tdfx_context.h | 1 -
src/mesa/drivers/dri/tdfx/tdfx_dd.h | 1 -
src/mesa/drivers/dri/tdfx/tdfx_glide.h | 1 -
src/mesa/drivers/dri/tdfx/tdfx_lock.c | 1 -
src/mesa/drivers/dri/tdfx/tdfx_lock.h | 1 -
src/mesa/drivers/dri/tdfx/tdfx_pixels.c | 1 -
src/mesa/drivers/dri/tdfx/tdfx_pixels.h | 1 -
src/mesa/drivers/dri/tdfx/tdfx_render.c | 1 -
src/mesa/drivers/dri/tdfx/tdfx_render.h | 1 -
src/mesa/drivers/dri/tdfx/tdfx_screen.c | 1 -
src/mesa/drivers/dri/tdfx/tdfx_screen.h | 1 -
src/mesa/drivers/dri/tdfx/tdfx_span.c | 1 -
src/mesa/drivers/dri/tdfx/tdfx_span.h | 1 -
src/mesa/drivers/dri/tdfx/tdfx_state.c | 1 -
src/mesa/drivers/dri/tdfx/tdfx_state.h | 1 -
src/mesa/drivers/dri/tdfx/tdfx_tex.c | 1 -
src/mesa/drivers/dri/tdfx/tdfx_tex.h | 1 -
src/mesa/drivers/dri/tdfx/tdfx_texman.c | 1 -
src/mesa/drivers/dri/tdfx/tdfx_texman.h | 1 -
src/mesa/drivers/dri/tdfx/tdfx_texstate.c | 1 -
src/mesa/drivers/dri/tdfx/tdfx_texstate.h | 1 -
src/mesa/drivers/dri/tdfx/tdfx_tris.c | 1 -
src/mesa/drivers/dri/tdfx/tdfx_tris.h | 1 -
src/mesa/drivers/dri/tdfx/tdfx_vb.c | 1 -
src/mesa/drivers/dri/tdfx/tdfx_vb.h | 1 -
src/mesa/drivers/dri/unichrome/server/via_dri.c | 1 -
src/mesa/drivers/dri/unichrome/server/via_driver.h | 1 -
src/mesa/drivers/dri/unichrome/server/via_priv.h | 1 -
src/mesa/drivers/ggi/default/genkgi.h | 2 +-
src/mesa/drivers/ggi/default/genkgi_mode.c | 2 +-
src/mesa/drivers/ggi/default/genkgi_visual.c | 2 +-
src/mesa/drivers/ggi/include/ggi/mesa/debug.h | 2 +-
src/mesa/drivers/svga/svgamesa.c | 1 -
src/mesa/drivers/svga/svgamesa15.c | 1 -
src/mesa/drivers/svga/svgamesa15.h | 1 -
src/mesa/drivers/svga/svgamesa16.c | 1 -
src/mesa/drivers/svga/svgamesa16.h | 1 -
src/mesa/drivers/svga/svgamesa24.c | 1 -
src/mesa/drivers/svga/svgamesa24.h | 1 -
src/mesa/drivers/svga/svgamesa32.c | 1 -
src/mesa/drivers/svga/svgamesa32.h | 1 -
src/mesa/drivers/svga/svgamesa8.c | 1 -
src/mesa/drivers/svga/svgamesa8.h | 1 -
src/mesa/drivers/svga/svgapix.h | 1 -
src/mesa/drivers/windows/gdi/wgl.c | 1 -
src/mesa/drivers/windows/gldirect/dx7/gld_vb_mesa_render_dx7.c | 1 -
src/mesa/drivers/windows/gldirect/dx8/gld_vb_mesa_render_dx8.c | 1 -
src/mesa/drivers/windows/gldirect/dx9/gld_vb_mesa_render_dx9.c | 1 -
src/mesa/drivers/windows/gldirect/gld_debug_clip.c | 1 -
src/mesa/drivers/windows/gldirect/gld_debug_norm.c | 1 -
src/mesa/drivers/windows/gldirect/gld_debug_xform.c | 1 -
src/mesa/drivers/windows/gldirect/mesasw/colors.h | 7 ++-----
src/mesa/glapi/mesadef.py | 1 -
src/mesa/sparc/norm.S | 1 -
src/mesa/sparc/sparc.h | 1 -
src/mesa/sparc/xform.S | 1 -
src/mesa/x86-64/x86-64.c | 1 -
src/mesa/x86-64/x86-64.h | 1 -
src/mesa/x86-64/xform4.S | 1 -
src/mesa/x86/3dnow.c | 1 -
src/mesa/x86/3dnow.h | 1 -
src/mesa/x86/3dnow_normal.S | 1 -
src/mesa/x86/3dnow_xform1.S | 1 -
src/mesa/x86/3dnow_xform2.S | 1 -
src/mesa/x86/3dnow_xform3.S | 1 -
src/mesa/x86/3dnow_xform4.S | 1 -
src/mesa/x86/clip_args.h | 1 -
src/mesa/x86/common_x86_asm.h | 1 -
src/mesa/x86/common_x86_features.h | 1 -
src/mesa/x86/common_x86_macros.h | 1 -
src/mesa/x86/norm_args.h | 1 -
src/mesa/x86/sse.h | 1 -
src/mesa/x86/sse_normal.S | 1 -
src/mesa/x86/sse_xform1.S | 1 -
src/mesa/x86/sse_xform2.S | 1 -
src/mesa/x86/sse_xform3.S | 1 -
src/mesa/x86/sse_xform4.S | 1 -
src/mesa/x86/x86.c | 1 -
src/mesa/x86/x86.h | 1 -
src/mesa/x86/x86_cliptest.S | 1 -
src/mesa/x86/x86_xform2.S | 1 -
src/mesa/x86/x86_xform3.S | 1 -
src/mesa/x86/x86_xform4.S | 1 -
src/mesa/x86/xform_args.h | 1 -
572 files changed, 53 insertions(+), 745 deletions(-)
(limited to 'src')
diff --git a/docs/MESA_packed_depth_stencil.spec b/docs/MESA_packed_depth_stencil.spec
index 4f7ab1e28c..112b730ecc 100644
--- a/docs/MESA_packed_depth_stencil.spec
+++ b/docs/MESA_packed_depth_stencil.spec
@@ -17,7 +17,6 @@ Status
Version
- $Id: MESA_packed_depth_stencil.spec,v 1.2 2003/09/19 14:58:21 brianp Exp $
Number
diff --git a/docs/MESA_program_debug.spec b/docs/MESA_program_debug.spec
index 391d39fa70..7694fdcc42 100644
--- a/docs/MESA_program_debug.spec
+++ b/docs/MESA_program_debug.spec
@@ -18,7 +18,6 @@ Version
Last Modified Date: July 20, 2003
Author Revision: 1.0
- $Date: 2004/03/25 01:42:41 $ $Revision: 1.4 $
Number
diff --git a/docs/MESA_resize_buffers.spec b/docs/MESA_resize_buffers.spec
index f79d29c405..533d017c9a 100644
--- a/docs/MESA_resize_buffers.spec
+++ b/docs/MESA_resize_buffers.spec
@@ -16,7 +16,6 @@ Status
Version
- $Id: MESA_resize_buffers.spec,v 1.3 2004/03/25 01:42:42 brianp Exp $
Number
diff --git a/docs/MESA_shader_debug.spec b/docs/MESA_shader_debug.spec
index dbd22b3c66..1f7d42ac91 100644
--- a/docs/MESA_shader_debug.spec
+++ b/docs/MESA_shader_debug.spec
@@ -19,7 +19,6 @@ Version
Last Modified Date: July 30, 2006
Author Revision: 0.2
- $Date: 2006/07/30 14:28:38 $ $Revision: 1.2 $
Number
diff --git a/docs/MESA_sprite_point.spec b/docs/MESA_sprite_point.spec
index 9422ff5729..b50d78e9e7 100644
--- a/docs/MESA_sprite_point.spec
+++ b/docs/MESA_sprite_point.spec
@@ -16,7 +16,6 @@ Status
Version
- $Id: MESA_sprite_point.spec,v 1.2 2003/09/19 14:58:21 brianp Exp $
Number
diff --git a/docs/MESA_texture_array.spec b/docs/MESA_texture_array.spec
index d3b7752115..9dee65b045 100644
--- a/docs/MESA_texture_array.spec
+++ b/docs/MESA_texture_array.spec
@@ -20,7 +20,6 @@ Status
Version
- $Date: 2007/05/16$ $Revision: 0.4$
Number
diff --git a/docs/MESA_trace.spec b/docs/MESA_trace.spec
index f0a79c7df9..dc4166e6b6 100644
--- a/docs/MESA_trace.spec
+++ b/docs/MESA_trace.spec
@@ -17,7 +17,6 @@ Status
Version
- $Id: MESA_trace.spec,v 1.4 2004/03/25 01:42:42 brianp Exp $
Number
diff --git a/docs/MESA_window_pos.spec b/docs/MESA_window_pos.spec
index eb1d0d1f06..4d01f1814c 100644
--- a/docs/MESA_window_pos.spec
+++ b/docs/MESA_window_pos.spec
@@ -16,7 +16,6 @@ Status
Version
- $Id: MESA_window_pos.spec,v 1.4 2004/03/25 01:42:42 brianp Exp $
Number
diff --git a/docs/README.BEOS b/docs/README.BEOS
index 5847730af0..efd84e888c 100644
--- a/docs/README.BEOS
+++ b/docs/README.BEOS
@@ -134,4 +134,3 @@ as of February, 1999.
----------------------------------------------------------------------
-$Id: README.BEOS,v 1.12 2004/10/13 00:35:55 phoudoin Exp $
diff --git a/docs/README.QUAKE b/docs/README.QUAKE
index 5a13b7a498..e90c76a083 100644
--- a/docs/README.QUAKE
+++ b/docs/README.QUAKE
@@ -205,4 +205,3 @@ http://www.linuxgames.com/quake2/
----------------------------------------------------------------------
-$Id: README.QUAKE,v 1.3 1998/08/23 15:26:26 brianp Exp $
diff --git a/docs/RELNOTES-3.1 b/docs/RELNOTES-3.1
index 4d6e3c2f44..65324eb496 100644
--- a/docs/RELNOTES-3.1
+++ b/docs/RELNOTES-3.1
@@ -143,4 +143,3 @@ code). Anyone want to help?
----------------------------------------------------------------------
-$Id: RELNOTES-3.1,v 1.2 2000/04/07 17:08:06 brianp Exp $
diff --git a/docs/RELNOTES-3.2 b/docs/RELNOTES-3.2
index 7737c28e80..ec7d4f8dc3 100644
--- a/docs/RELNOTES-3.2
+++ b/docs/RELNOTES-3.2
@@ -9,4 +9,3 @@ have been added. For a list of bug fixes please read the VERSIONS file.
----------------------------------------------------------------------
-$Id: RELNOTES-3.2,v 1.2 2000/04/07 17:08:06 brianp Exp $
diff --git a/docs/RELNOTES-3.2.1 b/docs/RELNOTES-3.2.1
index 2ad5b9046a..d34efcc867 100644
--- a/docs/RELNOTES-3.2.1
+++ b/docs/RELNOTES-3.2.1
@@ -29,4 +29,3 @@ GLU library.
----------------------------------------------------------------------
-$Id: RELNOTES-3.2.1,v 1.2 2000/07/21 16:32:33 brianp Exp $
diff --git a/docs/RELNOTES-3.3 b/docs/RELNOTES-3.3
index 362a74ee31..3850767bb1 100644
--- a/docs/RELNOTES-3.3
+++ b/docs/RELNOTES-3.3
@@ -268,4 +268,3 @@ image convolution. This will (hopefully) be done for Mesa 3.5/3.6.
----------------------------------------------------------------------
-$Id: RELNOTES-3.3,v 1.8 2000/07/21 16:26:41 brianp Exp $
diff --git a/docs/RELNOTES-3.4 b/docs/RELNOTES-3.4
index 4aa607a37c..657ccdaab6 100644
--- a/docs/RELNOTES-3.4
+++ b/docs/RELNOTES-3.4
@@ -19,4 +19,3 @@ see the VERSIONS file.
----------------------------------------------------------------------
-$Id: RELNOTES-3.4,v 1.2 2002/03/23 02:37:17 brianp Exp $
diff --git a/docs/RELNOTES-3.4.1 b/docs/RELNOTES-3.4.1
index 18443507c2..73d75c64d2 100644
--- a/docs/RELNOTES-3.4.1
+++ b/docs/RELNOTES-3.4.1
@@ -19,4 +19,3 @@ the Mesa 3.4 release. For details, see the VERSIONS file.
----------------------------------------------------------------------
-$Id: RELNOTES-3.4.1,v 1.2 2001/05/23 14:45:01 brianp Exp $
diff --git a/docs/RELNOTES-3.4.2 b/docs/RELNOTES-3.4.2
index 894ed199ff..9caea900d8 100644
--- a/docs/RELNOTES-3.4.2
+++ b/docs/RELNOTES-3.4.2
@@ -19,4 +19,3 @@ the Mesa 3.4.1 release. For details, see the VERSIONS file.
----------------------------------------------------------------------
-$Id: RELNOTES-3.4.2,v 1.2 2001/05/23 14:45:01 brianp Exp $
diff --git a/docs/RELNOTES-3.5 b/docs/RELNOTES-3.5
index 52097a1cd6..b2aa1b852e 100644
--- a/docs/RELNOTES-3.5
+++ b/docs/RELNOTES-3.5
@@ -225,4 +225,3 @@ In the future I hope to implement support for 32-bit, floating point
color channels.
----------------------------------------------------------------------
-$Id: RELNOTES-3.5,v 1.14 2001/06/20 19:02:48 brianp Exp $
diff --git a/docs/RELNOTES-4.0 b/docs/RELNOTES-4.0
index e4249cfa17..2f729db158 100644
--- a/docs/RELNOTES-4.0
+++ b/docs/RELNOTES-4.0
@@ -160,4 +160,3 @@ See the VERSIONS file for more details about bug fixes, etc. in Mesa 4.0.
----------------------------------------------------------------------
-$Id: RELNOTES-4.0,v 3.2 2001/10/17 14:59:21 brianp Exp $
diff --git a/docs/RELNOTES-4.0.1 b/docs/RELNOTES-4.0.1
index b4d7efca81..e84df6bf89 100644
--- a/docs/RELNOTES-4.0.1
+++ b/docs/RELNOTES-4.0.1
@@ -19,4 +19,3 @@ Mesa 4.0.1 only contains bug fixes since version 4.0.
See the docs/VERSIONS file for the list of bug fixes.
----------------------------------------------------------------------
-$Id: RELNOTES-4.0.1,v 1.2 2001/12/18 14:08:23 brianp Exp $
diff --git a/docs/RELNOTES-4.0.2 b/docs/RELNOTES-4.0.2
index 1b7eaaa8fe..b476956ba2 100644
--- a/docs/RELNOTES-4.0.2
+++ b/docs/RELNOTES-4.0.2
@@ -47,4 +47,3 @@ D3D needs updating
----------------------------------------------------------------------
-$Id: RELNOTES-4.0.2,v 1.2 2002/03/23 02:38:39 brianp Exp $
diff --git a/docs/RELNOTES-4.0.3 b/docs/RELNOTES-4.0.3
index c69b6a279e..0b3e34befe 100644
--- a/docs/RELNOTES-4.0.3
+++ b/docs/RELNOTES-4.0.3
@@ -49,4 +49,3 @@ D3D needs updating
----------------------------------------------------------------------
-$Id: RELNOTES-4.0.3,v 1.2 2002/06/26 02:36:34 brianp Exp $
diff --git a/docs/RELNOTES-4.1 b/docs/RELNOTES-4.1
index 92cf9196f0..24e9299eb2 100644
--- a/docs/RELNOTES-4.1
+++ b/docs/RELNOTES-4.1
@@ -305,4 +305,3 @@ are some things to change:
----------------------------------------------------------------------
-$Id: RELNOTES-4.1,v 1.22 2002/10/29 15:06:37 brianp Exp $
diff --git a/docs/RELNOTES-5.0 b/docs/RELNOTES-5.0
index 565e4ad78e..1b22996d83 100644
--- a/docs/RELNOTES-5.0
+++ b/docs/RELNOTES-5.0
@@ -82,4 +82,3 @@ driver call the _mesa_enable_1_4_extensions() function.
----------------------------------------------------------------------
-$Id: RELNOTES-5.0,v 3.2 2002/11/13 15:33:51 brianp Exp $
diff --git a/docs/RELNOTES-5.0.1 b/docs/RELNOTES-5.0.1
index 8d72cc44c1..f37e9c4a7f 100644
--- a/docs/RELNOTES-5.0.1
+++ b/docs/RELNOTES-5.0.1
@@ -43,4 +43,3 @@ driver call the _mesa_enable_1_4_extensions() function.
----------------------------------------------------------------------
-$Id: RELNOTES-5.0.1,v 3.1 2003/03/30 16:17:54 brianp Exp $
diff --git a/docs/RELNOTES-5.0.2 b/docs/RELNOTES-5.0.2
index cfc9ad04fd..d0e05b2c73 100644
--- a/docs/RELNOTES-5.0.2
+++ b/docs/RELNOTES-5.0.2
@@ -43,4 +43,3 @@ driver call the _mesa_enable_1_4_extensions() function.
----------------------------------------------------------------------
-$Id: RELNOTES-5.0.2,v 1.1 2003/09/04 23:10:38 brianp Exp $
diff --git a/docs/RELNOTES-6.0 b/docs/RELNOTES-6.0
index de01a879a4..1a3c2fb1aa 100644
--- a/docs/RELNOTES-6.0
+++ b/docs/RELNOTES-6.0
@@ -84,4 +84,3 @@ See the VERSIONS file for more details about bug fixes, etc. in Mesa 6.0.
----------------------------------------------------------------------
-$Id: RELNOTES-6.0,v 1.3 2004/01/15 15:47:57 brianp Exp $
diff --git a/docs/RELNOTES-6.0.1 b/docs/RELNOTES-6.0.1
index e72d9fe891..1444b9fc87 100644
--- a/docs/RELNOTES-6.0.1
+++ b/docs/RELNOTES-6.0.1
@@ -47,4 +47,3 @@ D3D needs updating
----------------------------------------------------------------------
-$Id: RELNOTES-6.0.1,v 3.1 2004/04/02 23:37:02 brianp Exp $
diff --git a/docs/RELNOTES-6.1 b/docs/RELNOTES-6.1
index 830f1e47e7..8de64d1f1c 100644
--- a/docs/RELNOTES-6.1
+++ b/docs/RELNOTES-6.1
@@ -109,4 +109,3 @@ See the VERSIONS file for more details about bug fixes, etc. in Mesa 6.1.
----------------------------------------------------------------------
-$Id: RELNOTES-6.1,v 3.5 2004/08/17 22:58:23 brianp Exp $
diff --git a/docs/RELNOTES-6.2 b/docs/RELNOTES-6.2
index 4043a5655e..06cfba0c75 100644
--- a/docs/RELNOTES-6.2
+++ b/docs/RELNOTES-6.2
@@ -49,4 +49,3 @@ D3D needs updating
----------------------------------------------------------------------
-$Id: RELNOTES-6.2,v 3.4 2004/10/02 15:43:14 brianp Exp $
diff --git a/docs/RELNOTES-6.2.1 b/docs/RELNOTES-6.2.1
index d72560e5af..c7baa5d421 100644
--- a/docs/RELNOTES-6.2.1
+++ b/docs/RELNOTES-6.2.1
@@ -47,4 +47,3 @@ D3D needs updating
----------------------------------------------------------------------
-$Id: RELNOTES-6.2.1,v 3.1 2004/12/09 23:21:36 brianp Exp $
diff --git a/docs/RELNOTES-6.3 b/docs/RELNOTES-6.3
index dde335eec1..6b4dfaaf9a 100644
--- a/docs/RELNOTES-6.3
+++ b/docs/RELNOTES-6.3
@@ -112,4 +112,3 @@ D3D needs updating
----------------------------------------------------------------------
-$Id: RELNOTES-6.3,v 3.13 2005/07/21 15:57:29 brianp Exp $
diff --git a/docs/RELNOTES-6.3.1 b/docs/RELNOTES-6.3.1
index cc6e8be1b2..eacc952aeb 100644
--- a/docs/RELNOTES-6.3.1
+++ b/docs/RELNOTES-6.3.1
@@ -46,4 +46,3 @@ D3D needs updating
----------------------------------------------------------------------
-$Id: RELNOTES-6.3.1,v 3.1 2005/07/21 18:45:54 brianp Exp $
diff --git a/docs/RELNOTES-6.3.2 b/docs/RELNOTES-6.3.2
index f2d47bff19..e5243ef783 100644
--- a/docs/RELNOTES-6.3.2
+++ b/docs/RELNOTES-6.3.2
@@ -34,4 +34,3 @@ D3D needs updating
----------------------------------------------------------------------
-$Id: RELNOTES-6.3.2,v 3.2 2005/08/19 16:57:50 brianp Exp $
diff --git a/docs/RELNOTES-6.4 b/docs/RELNOTES-6.4
index a12600c3c8..1a945a1039 100644
--- a/docs/RELNOTES-6.4
+++ b/docs/RELNOTES-6.4
@@ -47,4 +47,3 @@ in Mesa 6.3.
----------------------------------------------------------------------
-$Id: RELNOTES-6.4,v 3.1 2005/10/24 23:33:27 brianp Exp $
diff --git a/docs/news.html b/docs/news.html
index 58aca31858..b766ce7c75 100644
--- a/docs/news.html
+++ b/docs/news.html
@@ -1117,6 +1117,5 @@ source code.
-$Id: news.html,v 3.33 2006/12/02 18:18:41 brianp Exp $