summaryrefslogtreecommitdiff
path: root/src/gallium/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.c17
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.h9
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri.c190
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h112
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_tri.c115
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_soa.py29
-rw-r--r--src/gallium/drivers/nouveau/nouveau_stateobj.h10
-rw-r--r--src/gallium/drivers/nouveau/nouveau_util.h100
-rw-r--r--src/gallium/drivers/nouveau/nouveau_winsys.h5
-rw-r--r--src/gallium/drivers/nv50/nv50_push.c8
-rw-r--r--src/gallium/drivers/nv50/nv50_vbo.c8
-rw-r--r--src/gallium/drivers/r300/r300_blit.c90
-rw-r--r--src/gallium/drivers/r300/r300_context.c89
-rw-r--r--src/gallium/drivers/r300/r300_context.h11
-rw-r--r--src/gallium/drivers/r300/r300_emit.c41
-rw-r--r--src/gallium/drivers/r300/r300_emit.h1
-rw-r--r--src/gallium/drivers/r300/r300_flush.c3
-rw-r--r--src/gallium/drivers/r300/r300_fs.c19
-rw-r--r--src/gallium/drivers/r300/r300_hyperz.c71
-rw-r--r--src/gallium/drivers/r300/r300_hyperz.h2
-rw-r--r--src/gallium/drivers/r300/r300_reg.h21
-rw-r--r--src/gallium/drivers/r300/r300_render.c38
-rw-r--r--src/gallium/drivers/r300/r300_screen.c6
-rw-r--r--src/gallium/drivers/r300/r300_shader_semantics.h2
-rw-r--r--src/gallium/drivers/r300/r300_state.c20
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c88
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.c6
-rw-r--r--src/gallium/drivers/r300/r300_vs.c5
-rw-r--r--src/gallium/drivers/r600/r600_asm.c10
-rw-r--r--src/gallium/drivers/r600/r600_asm.h1
-rw-r--r--src/gallium/drivers/r600/r600_context.c29
-rw-r--r--src/gallium/drivers/r600/r600_context.h5
-rw-r--r--src/gallium/drivers/r600/r600_draw.c2
-rw-r--r--src/gallium/drivers/r600/r600_screen.c105
-rw-r--r--src/gallium/drivers/r600/r600_screen.h2
-rw-r--r--src/gallium/drivers/r600/r600_shader.c111
-rw-r--r--src/gallium/drivers/r600/r600_shader.h1
-rw-r--r--src/gallium/drivers/r600/r600_state.c117
-rw-r--r--src/gallium/drivers/r600/r600_state_inlines.h4
-rw-r--r--src/gallium/drivers/r600/r600_texture.c248
-rw-r--r--src/gallium/drivers/r600/r600d.h135
-rw-r--r--src/gallium/drivers/r600/radeon.h61
43 files changed, 1347 insertions, 602 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index 78744da500..2cf6f38c4b 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -141,7 +141,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
else {
dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), "");
dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), "");
- dadxy = LLVMBuildAdd(builder, dadx, dady, "");
+ dadxy = LLVMBuildFAdd(builder, dadx, dady, "");
attrib_name(dadx, attrib, chan, ".dadx");
attrib_name(dady, attrib, chan, ".dady");
attrib_name(dadxy, attrib, chan, ".dadxy");
@@ -177,7 +177,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
* dadq2 = 2 * dq
*/
- dadq2 = LLVMBuildAdd(builder, dadq, dadq, "");
+ dadq2 = LLVMBuildFAdd(builder, dadq, dadq, "");
/*
* a = a0 + x * dadx + y * dady
@@ -193,12 +193,11 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
a = a0;
if (interp != LP_INTERP_CONSTANT &&
interp != LP_INTERP_FACING) {
- a = LLVMBuildAdd(builder, a,
- LLVMBuildMul(builder, bld->x, dadx, ""),
- "");
- a = LLVMBuildAdd(builder, a,
- LLVMBuildMul(builder, bld->y, dady, ""),
- "");
+ LLVMValueRef tmp;
+ tmp = LLVMBuildFMul(builder, bld->x, dadx, "");
+ a = LLVMBuildFAdd(builder, a, tmp, "");
+ tmp = LLVMBuildFMul(builder, bld->y, dady, "");
+ a = LLVMBuildFAdd(builder, a, tmp, "");
}
}
@@ -212,7 +211,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
* Compute the attrib values on the upper-left corner of each quad.
*/
- a = LLVMBuildAdd(builder, a, dadq2, "");
+ a = LLVMBuildFAdd(builder, a, dadq2, "");
/*
* a *= 1 / w
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 28793682ed..7543bd7b2b 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -47,7 +47,7 @@
#include "lp_setup.h"
-DEBUG_GET_ONCE_BOOL_OPTION(lp_no_rast, "LP_NO_RAST", FALSE);
+DEBUG_GET_ONCE_BOOL_OPTION(lp_no_rast, "LP_NO_RAST", FALSE)
static void llvmpipe_destroy( struct pipe_context *pipe )
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index eaf2a6f334..102e902d02 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -104,9 +104,6 @@ struct lp_rast_plane {
int dcdx;
int dcdy;
-
- /* edge/step info for 3 edges and 4x4 block of pixels */
- const int *step;
};
/**
@@ -119,8 +116,6 @@ struct lp_rast_triangle {
/* inputs for the shader */
struct lp_rast_shader_inputs inputs;
- int step[3][16];
-
#ifdef DEBUG
float v[3][2];
#endif
@@ -261,5 +256,9 @@ void lp_rast_begin_query(struct lp_rasterizer_task *,
void lp_rast_end_query(struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
+void
+lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg);
+
#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index ebe9a8e92b..673f67386b 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -37,52 +37,6 @@
#include "lp_tile_soa.h"
-/**
- * Map an index in [0,15] to an x,y position, multiplied by 4.
- * This is used to get the position of each subtile in a 4x4
- * grid of edge step values.
- * Note: we can use some bit twiddling to compute these values instead
- * of using a look-up table, but there's no measurable performance
- * difference.
- */
-static const int pos_table4[16][2] = {
- { 0, 0 },
- { 4, 0 },
- { 0, 4 },
- { 4, 4 },
- { 8, 0 },
- { 12, 0 },
- { 8, 4 },
- { 12, 4 },
- { 0, 8 },
- { 4, 8 },
- { 0, 12 },
- { 4, 12 },
- { 8, 8 },
- { 12, 8 },
- { 8, 12 },
- { 12, 12 }
-};
-
-
-static const int pos_table16[16][2] = {
- { 0, 0 },
- { 16, 0 },
- { 0, 16 },
- { 16, 16 },
- { 32, 0 },
- { 48, 0 },
- { 32, 16 },
- { 48, 16 },
- { 0, 32 },
- { 16, 32 },
- { 0, 48 },
- { 16, 48 },
- { 32, 32 },
- { 48, 32 },
- { 32, 48 },
- { 48, 48 }
-};
/**
@@ -113,6 +67,68 @@ block_full_16(struct lp_rasterizer_task *task,
block_full_4(task, tri, x + ix, y + iy);
}
+
+static INLINE unsigned
+build_mask(int c, int dcdx, int dcdy)
+{
+ int mask = 0;
+
+ int c0 = c;
+ int c1 = c0 + dcdx;
+ int c2 = c1 + dcdx;
+ int c3 = c2 + dcdx;
+
+ mask |= ((c0 + 0 * dcdy) >> 31) & (1 << 0);
+ mask |= ((c0 + 1 * dcdy) >> 31) & (1 << 2);
+ mask |= ((c0 + 2 * dcdy) >> 31) & (1 << 8);
+ mask |= ((c0 + 3 * dcdy) >> 31) & (1 << 10);
+ mask |= ((c1 + 0 * dcdy) >> 31) & (1 << 1);
+ mask |= ((c1 + 1 * dcdy) >> 31) & (1 << 3);
+ mask |= ((c1 + 2 * dcdy) >> 31) & (1 << 9);
+ mask |= ((c1 + 3 * dcdy) >> 31) & (1 << 11);
+ mask |= ((c2 + 0 * dcdy) >> 31) & (1 << 4);
+ mask |= ((c2 + 1 * dcdy) >> 31) & (1 << 6);
+ mask |= ((c2 + 2 * dcdy) >> 31) & (1 << 12);
+ mask |= ((c2 + 3 * dcdy) >> 31) & (1 << 14);
+ mask |= ((c3 + 0 * dcdy) >> 31) & (1 << 5);
+ mask |= ((c3 + 1 * dcdy) >> 31) & (1 << 7);
+ mask |= ((c3 + 2 * dcdy) >> 31) & (1 << 13);
+ mask |= ((c3 + 3 * dcdy) >> 31) & (1 << 15);
+
+ return mask;
+}
+
+static INLINE unsigned
+build_mask_linear(int c, int dcdx, int dcdy)
+{
+ int mask = 0;
+
+ int c0 = c;
+ int c1 = c0 + dcdy;
+ int c2 = c1 + dcdy;
+ int c3 = c2 + dcdy;
+
+ mask |= ((c0 + 0 * dcdx) >> 31) & (1 << 0);
+ mask |= ((c0 + 1 * dcdx) >> 31) & (1 << 1);
+ mask |= ((c0 + 2 * dcdx) >> 31) & (1 << 2);
+ mask |= ((c0 + 3 * dcdx) >> 31) & (1 << 3);
+ mask |= ((c1 + 0 * dcdx) >> 31) & (1 << 4);
+ mask |= ((c1 + 1 * dcdx) >> 31) & (1 << 5);
+ mask |= ((c1 + 2 * dcdx) >> 31) & (1 << 6);
+ mask |= ((c1 + 3 * dcdx) >> 31) & (1 << 7);
+ mask |= ((c2 + 0 * dcdx) >> 31) & (1 << 8);
+ mask |= ((c2 + 1 * dcdx) >> 31) & (1 << 9);
+ mask |= ((c2 + 2 * dcdx) >> 31) & (1 << 10);
+ mask |= ((c2 + 3 * dcdx) >> 31) & (1 << 11);
+ mask |= ((c3 + 0 * dcdx) >> 31) & (1 << 12);
+ mask |= ((c3 + 1 * dcdx) >> 31) & (1 << 13);
+ mask |= ((c3 + 2 * dcdx) >> 31) & (1 << 14);
+ mask |= ((c3 + 3 * dcdx) >> 31) & (1 << 15);
+
+ return mask;
+}
+
+
#define TAG(x) x##_1
#define NR_PLANES 1
#include "lp_rast_tri_tmp.h"
@@ -141,3 +157,85 @@ block_full_16(struct lp_rasterizer_task *task,
#define NR_PLANES 7
#include "lp_rast_tri_tmp.h"
+
+/* Special case for 3 plane triangle which is contained entirely
+ * within a 16x16 block.
+ */
+void
+lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ const struct lp_rast_triangle *tri = arg.triangle.tri;
+ const struct lp_rast_plane *plane = tri->plane;
+ unsigned mask = arg.triangle.plane_mask;
+ const int x = task->x + (mask & 0xf) * 16;
+ const int y = task->y + (mask >> 4) * 16;
+ unsigned outmask, inmask, partmask, partial_mask;
+ unsigned j;
+ int c[3];
+
+ outmask = 0; /* outside one or more trivial reject planes */
+ partmask = 0; /* outside one or more trivial accept planes */
+
+ for (j = 0; j < 3; j++) {
+ c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
+
+ {
+ const int dcdx = -plane[j].dcdx * 4;
+ const int dcdy = plane[j].dcdy * 4;
+ const int cox = c[j] + plane[j].eo * 4;
+ const int cio = c[j] + plane[j].ei * 4 - 1;
+
+ outmask |= build_mask_linear(cox, dcdx, dcdy);
+ partmask |= build_mask_linear(cio, dcdx, dcdy);
+ }
+ }
+
+ if (outmask == 0xffff)
+ return;
+
+ /* Mask of sub-blocks which are inside all trivial accept planes:
+ */
+ inmask = ~partmask & 0xffff;
+
+ /* Mask of sub-blocks which are inside all trivial reject planes,
+ * but outside at least one trivial accept plane:
+ */
+ partial_mask = partmask & ~outmask;
+
+ assert((partial_mask & inmask) == 0);
+
+ /* Iterate over partials:
+ */
+ while (partial_mask) {
+ int i = ffs(partial_mask) - 1;
+ int ix = (i & 3) * 4;
+ int iy = (i >> 2) * 4;
+ int px = x + ix;
+ int py = y + iy;
+ int cx[3];
+
+ partial_mask &= ~(1 << i);
+
+ for (j = 0; j < 3; j++)
+ cx[j] = (c[j]
+ - plane[j].dcdx * ix
+ + plane[j].dcdy * iy);
+
+ do_block_4_3(task, tri, plane, px, py, cx);
+ }
+
+ /* Iterate over fulls:
+ */
+ while (inmask) {
+ int i = ffs(inmask) - 1;
+ int ix = (i & 3) * 4;
+ int iy = (i >> 2) * 4;
+ int px = x + ix;
+ int py = y + iy;
+
+ inmask &= ~(1 << i);
+
+ block_full_4(task, tri, px, py);
+ }
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
index a410c611a3..43f72d8ca8 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
@@ -46,19 +46,13 @@ TAG(do_block_4)(struct lp_rasterizer_task *task,
int x, int y,
const int *c)
{
- unsigned mask = 0;
- int i;
+ unsigned mask = 0xffff;
+ int j;
- for (i = 0; i < 16; i++) {
- int any_negative = 0;
- int j;
-
- for (j = 0; j < NR_PLANES; j++)
- any_negative |= (c[j] - 1 + plane[j].step[i]);
-
- any_negative >>= 31;
-
- mask |= (~any_negative) & (1 << i);
+ for (j = 0; j < NR_PLANES; j++) {
+ mask &= ~build_mask(c[j] - 1,
+ -plane[j].dcdx,
+ plane[j].dcdy);
}
/* Now pass to the shader:
@@ -79,24 +73,19 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
const int *c)
{
unsigned outmask, inmask, partmask, partial_mask;
- unsigned i, j;
+ unsigned j;
outmask = 0; /* outside one or more trivial reject planes */
partmask = 0; /* outside one or more trivial accept planes */
for (j = 0; j < NR_PLANES; j++) {
- const int *step = plane[j].step;
- const int eo = plane[j].eo * 4;
- const int ei = plane[j].ei * 4;
- const int cox = c[j] + eo;
- const int cio = ei - 1 - eo;
-
- for (i = 0; i < 16; i++) {
- int out = cox + step[i] * 4;
- int part = out + cio;
- outmask |= (out >> 31) & (1 << i);
- partmask |= (part >> 31) & (1 << i);
- }
+ const int dcdx = -plane[j].dcdx * 4;
+ const int dcdy = plane[j].dcdy * 4;
+ const int cox = c[j] + plane[j].eo * 4;
+ const int cio = c[j] + plane[j].ei * 4 - 1;
+
+ outmask |= build_mask_linear(cox, dcdx, dcdy);
+ partmask |= build_mask_linear(cio, dcdx, dcdy);
}
if (outmask == 0xffff)
@@ -117,15 +106,19 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
*/
while (partial_mask) {
int i = ffs(partial_mask) - 1;
- int px = x + pos_table4[i][0];
- int py = y + pos_table4[i][1];
+ int ix = (i & 3) * 4;
+ int iy = (i >> 2) * 4;
+ int px = x + ix;
+ int py = y + iy;
int cx[NR_PLANES];
- for (j = 0; j < NR_PLANES; j++)
- cx[j] = c[j] + plane[j].step[i] * 4;
-
partial_mask &= ~(1 << i);
+ for (j = 0; j < NR_PLANES; j++)
+ cx[j] = (c[j]
+ - plane[j].dcdx * ix
+ + plane[j].dcdy * iy);
+
TAG(do_block_4)(task, tri, plane, px, py, cx);
}
@@ -133,8 +126,10 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
*/
while (inmask) {
int i = ffs(inmask) - 1;
- int px = x + pos_table4[i][0];
- int py = y + pos_table4[i][1];
+ int ix = (i & 3) * 4;
+ int iy = (i >> 2) * 4;
+ int px = x + ix;
+ int py = y + iy;
inmask &= ~(1 << i);
@@ -157,35 +152,28 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
struct lp_rast_plane plane[NR_PLANES];
int c[NR_PLANES];
unsigned outmask, inmask, partmask, partial_mask;
- unsigned i, j, nr_planes = 0;
+ unsigned j = 0;
+
+ outmask = 0; /* outside one or more trivial reject planes */
+ partmask = 0; /* outside one or more trivial accept planes */
while (plane_mask) {
int i = ffs(plane_mask) - 1;
- plane[nr_planes] = tri->plane[i];
+ plane[j] = tri->plane[i];
plane_mask &= ~(1 << i);
- nr_planes++;
- };
-
- assert(nr_planes == NR_PLANES);
- outmask = 0; /* outside one or more trivial reject planes */
- partmask = 0; /* outside one or more trivial accept planes */
+ c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
- for (j = 0; j < NR_PLANES; j++) {
- const int *step = plane[j].step;
- const int eo = plane[j].eo * 16;
- const int ei = plane[j].ei * 16;
- int cox, cio;
+ {
+ const int dcdx = -plane[j].dcdx * 16;
+ const int dcdy = plane[j].dcdy * 16;
+ const int cox = c[j] + plane[j].eo * 16;
+ const int cio = c[j] + plane[j].ei * 16 - 1;
- c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
- cox = c[j] + eo;
- cio = ei - 1 - eo;
-
- for (i = 0; i < 16; i++) {
- int out = cox + step[i] * 16;
- int part = out + cio;
- outmask |= (out >> 31) & (1 << i);
- partmask |= (part >> 31) & (1 << i);
+ outmask |= build_mask_linear(cox, dcdx, dcdy);
+ partmask |= build_mask_linear(cio, dcdx, dcdy);
}
+
+ j++;
}
if (outmask == 0xffff)
@@ -206,12 +194,16 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
*/
while (partial_mask) {
int i = ffs(partial_mask) - 1;
- int px = x + pos_table16[i][0];
- int py = y + pos_table16[i][1];
+ int ix = (i & 3) * 16;
+ int iy = (i >> 2) * 16;
+ int px = x + ix;
+ int py = y + iy;
int cx[NR_PLANES];
for (j = 0; j < NR_PLANES; j++)
- cx[j] = c[j] + plane[j].step[i] * 16;
+ cx[j] = (c[j]
+ - plane[j].dcdx * ix
+ + plane[j].dcdy * iy);
partial_mask &= ~(1 << i);
@@ -223,8 +215,10 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
*/
while (inmask) {
int i = ffs(inmask) - 1;
- int px = x + pos_table16[i][0];
- int py = y + pos_table16[i][1];
+ int ix = (i & 3) * 16;
+ int iy = (i >> 2) * 16;
+ int px = x + ix;
+ int py = y + iy;
inmask &= ~(1 << i);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 7e432503c1..614a6372b4 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -61,36 +61,6 @@ struct tri_info {
-static const int step_scissor_minx[16] = {
- 0, 1, 0, 1,
- 2, 3, 2, 3,
- 0, 1, 0, 1,
- 2, 3, 2, 3
-};
-
-static const int step_scissor_maxx[16] = {
- 0, -1, 0, -1,
- -2, -3, -2, -3,
- 0, -1, 0, -1,
- -2, -3, -2, -3
-};
-
-static const int step_scissor_miny[16] = {
- 0, 0, 1, 1,
- 0, 0, 1, 1,
- 2, 2, 3, 3,
- 2, 2, 3, 3
-};
-
-static const int step_scissor_maxy[16] = {
- 0, 0, -1, -1,
- 0, 0, -1, -1,
- -2, -2, -3, -3,
- -2, -2, -3, -3
-};
-
-
-
static INLINE int
subpixel_snap(float a)
@@ -260,13 +230,13 @@ static void setup_tri_coefficients( struct lp_setup_context *setup,
{
unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
unsigned slot;
+ unsigned i;
/* setup interpolation for all the remaining attributes:
*/
for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
unsigned vert_attr = setup->fs.input[slot].src_index;
unsigned usage_mask = setup->fs.input[slot].usage_mask;
- unsigned i;
switch (setup->fs.input[slot].interp) {
case LP_INTERP_CONSTANT:
@@ -316,6 +286,34 @@ static void setup_tri_coefficients( struct lp_setup_context *setup,
/* The internal position input is in slot zero:
*/
setup_fragcoord_coef(tri, info, 0, fragcoord_usage_mask);
+
+ if (0) {
+ for (i = 0; i < NUM_CHANNELS; i++) {
+ float a0 = tri->inputs.a0 [0][i];
+ float dadx = tri->inputs.dadx[0][i];
+ float dady = tri->inputs.dady[0][i];
+
+ debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n",
+ "xyzw"[i],
+ a0, dadx, dady);
+ }
+
+ for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
+ unsigned usage_mask = setup->fs.input[slot].usage_mask;
+ for (i = 0; i < NUM_CHANNELS; i++) {
+ if (usage_mask & (1 << i)) {
+ float a0 = tri->inputs.a0 [1 + slot][i];
+ float dadx = tri->inputs.dadx[1 + slot][i];
+ float dady = tri->inputs.dady[1 + slot][i];
+
+ debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n",
+ slot,
+ "xyzw"[i],
+ a0, dadx, dady);
+ }
+ }
+ }
+ }
}
@@ -525,7 +523,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
info.dx20 = info.v2[0][0] - info.v0[0][0];
info.dy01 = info.v0[0][1] - info.v1[0][1];
info.dy20 = info.v2[0][1] - info.v0[0][1];
- info.oneoverarea = 1.0 / (info.dx01 * info.dy20 - info.dx20 * info.dy01);
+ info.oneoverarea = 1.0f / (info.dx01 * info.dy20 - info.dx20 * info.dy01);
info.frontfacing = frontfacing;
/* Setup parameter interpolants:
@@ -590,35 +588,6 @@ do_triangle_ccw(struct lp_setup_context *setup,
/* Calculate trivial accept offsets from the above.
*/
plane->ei = plane->dcdy - plane->dcdx - plane->eo;
-
- plane->step = tri->step[i];
-
- /* Fill in the inputs.step[][] arrays.
- * We've manually unrolled some loops here.
- */
-#define SETUP_STEP(j, x, y) \
- tri->step[i][j] = y * plane->dcdy - x * plane->dcdx
-
- SETUP_STEP(0, 0, 0);
- SETUP_STEP(1, 1, 0);
- SETUP_STEP(2, 0, 1);
- SETUP_STEP(3, 1, 1);
-
- SETUP_STEP(4, 2, 0);
- SETUP_STEP(5, 3, 0);
- SETUP_STEP(6, 2, 1);
- SETUP_STEP(7, 3, 1);
-
- SETUP_STEP(8, 0, 2);
- SETUP_STEP(9, 1, 2);
- SETUP_STEP(10, 0, 3);
- SETUP_STEP(11, 1, 3);
-
- SETUP_STEP(12, 2, 2);
- SETUP_STEP(13, 3, 2);
- SETUP_STEP(14, 2, 3);
- SETUP_STEP(15, 3, 3);
-#undef STEP
}
@@ -641,28 +610,24 @@ do_triangle_ccw(struct lp_setup_context *setup,
* these planes elsewhere.
*/
if (nr_planes == 7) {
- tri->plane[3].step = step_scissor_minx;
tri->plane[3].dcdx = -1;
tri->plane[3].dcdy = 0;
tri->plane[3].c = 1-minx;
tri->plane[3].ei = 0;
tri->plane[3].eo = 1;
- tri->plane[4].step = step_scissor_maxx;
tri->plane[4].dcdx = 1;
tri->plane[4].dcdy = 0;
tri->plane[4].c = maxx;
tri->plane[4].ei = -1;
tri->plane[4].eo = 0;
- tri->plane[5].step = step_scissor_miny;
tri->plane[5].dcdx = 0;
tri->plane[5].dcdy = 1;
tri->plane[5].c = 1-miny;
tri->plane[5].ei = 0;
tri->plane[5].eo = 1;
- tri->plane[6].step = step_scissor_maxy;
tri->plane[6].dcdx = 0;
tri->plane[6].dcdy = -1;
tri->plane[6].c = maxy;
@@ -678,6 +643,26 @@ do_triangle_ccw(struct lp_setup_context *setup,
/* Convert to tile coordinates, and inclusive ranges:
*/
+ if (nr_planes == 3) {
+ int ix0 = minx / 16;
+ int iy0 = miny / 16;
+ int ix1 = (maxx-1) / 16;
+ int iy1 = (maxy-1) / 16;
+
+ if (iy0 == iy1 && ix0 == ix1)
+ {
+
+ /* Triangle is contained in a single 16x16 block:
+ */
+ int mask = (ix0 & 3) | ((iy0 & 3) << 4);
+
+ lp_scene_bin_command( scene, ix0/4, iy0/4,
+ lp_rast_triangle_3_16,
+ lp_rast_arg_triangle(tri, mask) );
+ return;
+ }
+ }
+
ix0 = minx / TILE_SIZE;
iy0 = miny / TILE_SIZE;
ix1 = (maxx-1) / TILE_SIZE;
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
index c71ec8066c..2ba39052ab 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
@@ -293,34 +293,7 @@ def generate_ssse3():
print '''
#if defined(PIPE_ARCH_SSE)
-
-#if defined(PIPE_ARCH_SSSE3)
-
-#include <tmmintrin.h>
-
-#else
-
-#include <emmintrin.h>
-
-/**
- * Describe _mm_shuffle_epi8() with gcc extended inline assembly, for cases
- * where -mssse3 is not supported/enabled.
- *
- * MSVC will never get in here as its intrinsics support do not rely on
- * compiler command line options.
- */
-static __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_shuffle_epi8(__m128i a, __m128i mask)
-{
- __m128i result;
- __asm__("pshufb %1, %0"
- : "=x" (result)
- : "xm" (mask), "0" (a));
- return result;
-}
-
-#endif
-
+#include "util/u_sse.h"
static void
lp_tile_b8g8r8a8_unorm_swizzle_4ub_ssse3(uint8_t *dst,
diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h
index f5c1c5ca2c..e920cf9f3b 100644
--- a/src/gallium/drivers/nouveau/nouveau_stateobj.h
+++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h
@@ -151,9 +151,9 @@ so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr,
if (so->start_alloc <= so->cur_start) {
debug_printf("exceeding num_start size\n");
assert(0);
- } else
+ }
#endif /* DEBUG_NOUVEAU_STATEOBJ */
- start = so->start;
+ start = so->start;
#ifdef DEBUG_NOUVEAU_STATEOBJ
if (so->cur_start > 0 && start[so->cur_start - 1].size > so->cur) {
@@ -162,7 +162,6 @@ so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr,
}
#endif /* DEBUG_NOUVEAU_STATEOBJ */
- so->start = start;
start[so->cur_start].gr = gr;
start[so->cur_start].mthd = mthd;
start[so->cur_start].size = size;
@@ -193,11 +192,10 @@ so_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo,
if (so->reloc_alloc <= so->cur_reloc) {
debug_printf("exceeding num_reloc size\n");
assert(0);
- } else
+ }
#endif /* DEBUG_NOUVEAU_STATEOBJ */
- r = so->reloc;
+ r = so->reloc;
- so->reloc = r;
r[so->cur_reloc].bo = NULL;
nouveau_bo_ref(bo, &(r[so->cur_reloc].bo));
r[so->cur_reloc].gr = so->start[so->cur_start-1].gr;
diff --git a/src/gallium/drivers/nouveau/nouveau_util.h b/src/gallium/drivers/nouveau/nouveau_util.h
index a5e8537533..b165f7a611 100644
--- a/src/gallium/drivers/nouveau/nouveau_util.h
+++ b/src/gallium/drivers/nouveau/nouveau_util.h
@@ -88,104 +88,4 @@ static INLINE unsigned log2i(unsigned i)
return r;
}
-struct u_split_prim {
- void *priv;
- void (*emit)(void *priv, unsigned start, unsigned count);
- void (*edge)(void *priv, boolean enabled);
-
- unsigned mode;
- unsigned start;
- unsigned p_start;
- unsigned p_end;
-
- uint repeat_first:1;
- uint close_first:1;
- uint edgeflag_off:1;
-};
-
-static INLINE void
-u_split_prim_init(struct u_split_prim *s,
- unsigned mode, unsigned start, unsigned count)
-{
- if (mode == PIPE_PRIM_LINE_LOOP) {
- s->mode = PIPE_PRIM_LINE_STRIP;
- s->close_first = 1;
- } else {
- s->mode = mode;
- s->close_first = 0;
- }
- s->start = start;
- s->p_start = start;
- s->p_end = start + count;
- s->edgeflag_off = 0;
- s->repeat_first = 0;
-}
-
-static INLINE boolean
-u_split_prim_next(struct u_split_prim *s, unsigned max_verts)
-{
- int repeat = 0;
-
- if (s->repeat_first) {
- s->emit(s->priv, s->start, 1);
- max_verts--;
- if (s->edgeflag_off) {
- s->edge(s->priv, TRUE);
- s->edgeflag_off = FALSE;
- }
- }
-
- if (s->p_start + s->close_first + max_verts >= s->p_end) {
- s->emit(s->priv, s->p_start, s->p_end - s->p_start);
- if (s->close_first)
- s->emit(s->priv, s->start, 1);
- return TRUE;
- }
-
- switch (s->mode) {
- case PIPE_PRIM_LINES:
- max_verts &= ~1;
- break;
- case PIPE_PRIM_LINE_STRIP:
- repeat = 1;
- break;
- case PIPE_PRIM_POLYGON:
- max_verts--;
- s->emit(s->priv, s->p_start, max_verts);
- s->edge(s->priv, FALSE);
- s->emit(s->priv, s->p_start + max_verts, 1);
- s->p_start += max_verts;
- s->repeat_first = TRUE;
- s->edgeflag_off = TRUE;
- return FALSE;
- case PIPE_PRIM_TRIANGLES:
- max_verts = max_verts - (max_verts % 3);
- break;
- case PIPE_PRIM_TRIANGLE_STRIP:
- /* to ensure winding stays correct, always split
- * on an even number of generated triangles
- */
- max_verts = max_verts & ~1;
- repeat = 2;
- break;
- case PIPE_PRIM_TRIANGLE_FAN:
- s->repeat_first = TRUE;
- repeat = 1;
- break;
- case PIPE_PRIM_QUADS:
- max_verts &= ~3;
- break;
- case PIPE_PRIM_QUAD_STRIP:
- max_verts &= ~1;
- repeat = 2;
- break;
- default:
- break;
- }
-
- s->emit (s->priv, s->p_start, max_verts);
- s->p_start += (max_verts - repeat);
- return FALSE;
-}
-
#endif
diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h
index df79ca89ca..c6c93d40b8 100644
--- a/src/gallium/drivers/nouveau/nouveau_winsys.h
+++ b/src/gallium/drivers/nouveau/nouveau_winsys.h
@@ -24,11 +24,10 @@ nouveau_screen_transfer_flags(unsigned pipe)
flags |= NOUVEAU_BO_WR;
if (pipe & PIPE_TRANSFER_DISCARD)
flags |= NOUVEAU_BO_INVAL;
- if (pipe & PIPE_TRANSFER_DONTBLOCK)
- flags |= NOUVEAU_BO_NOWAIT;
- else
if (pipe & PIPE_TRANSFER_UNSYNCHRONIZED)
flags |= NOUVEAU_BO_NOSYNC;
+ else if (pipe & PIPE_TRANSFER_DONTBLOCK)
+ flags |= NOUVEAU_BO_NOWAIT;
return flags;
}
diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c
index c3ac804146..6a2ffd5a3c 100644
--- a/src/gallium/drivers/nv50/nv50_push.c
+++ b/src/gallium/drivers/nv50/nv50_push.c
@@ -2,8 +2,8 @@
#include "pipe/p_state.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
+#include "util/u_split_prim.h"
-#include "nouveau/nouveau_util.h"
#include "nv50_context.h"
#include "nv50_resource.h"
@@ -217,7 +217,7 @@ nv50_push_elements_instanced(struct pipe_context *pipe,
4; /* potential edgeflag enable/disable */
const unsigned v_overhead = 1 + /* VERTEX_DATA packet header */
2; /* potential edgeflag modification */
- struct u_split_prim s;
+ struct util_split_prim s;
unsigned vtx_size;
boolean nzi = FALSE;
int i;
@@ -335,7 +335,7 @@ nv50_push_elements_instanced(struct pipe_context *pipe,
ctx.attr[i].map = (uint8_t *)ctx.attr[i].map + ctx.attr[i].stride;
}
- u_split_prim_init(&s, mode, start, count);
+ util_split_prim_init(&s, mode, start, count);
do {
if (AVAIL_RING(chan) < p_overhead + (6 * vtx_size)) {
FIRE_RING(chan);
@@ -351,7 +351,7 @@ nv50_push_elements_instanced(struct pipe_context *pipe,
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1 << 28) : 0));
- done = u_split_prim_next(&s, max_verts);
+ done = util_split_prim_next(&s, max_verts);
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
OUT_RING (chan, 0);
} while (!done);
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index e7f8fe33ed..1f11950199 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -24,8 +24,8 @@
#include "pipe/p_state.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
+#include "util/u_split_prim.h"
-#include "nouveau/nouveau_util.h"
#include "nv50_context.h"
#include "nv50_resource.h"
@@ -311,7 +311,7 @@ nv50_draw_elements_inline(struct pipe_context *pipe,
struct pipe_transfer *transfer;
struct instance a[16];
struct inline_ctx ctx;
- struct u_split_prim s;
+ struct util_split_prim s;
boolean nzi = FALSE;
unsigned overhead;
@@ -347,7 +347,7 @@ nv50_draw_elements_inline(struct pipe_context *pipe,
unsigned max_verts;
boolean done;
- u_split_prim_init(&s, mode, start, count);
+ util_split_prim_init(&s, mode, start, count);
do {
if (AVAIL_RING(chan) < (overhead + 6)) {
FIRE_RING(chan);
@@ -366,7 +366,7 @@ nv50_draw_elements_inline(struct pipe_context *pipe,
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1<<28) : 0));
- done = u_split_prim_next(&s, max_verts);
+ done = util_split_prim_next(&s, max_verts);
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
OUT_RING (chan, 0);
} while (!done);
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index 6f8d9abfc8..47ffc0cb3c 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -21,6 +21,8 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include "r300_context.h"
+#include "r300_emit.h"
+#include "r300_hyperz.h"
#include "r300_texture.h"
#include "r300_winsys.h"
@@ -99,9 +101,6 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300,
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
- if (r300->z_fastfill)
- clear_buffers &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL);
-
/* Only color clear allowed, and only one colorbuffer. */
if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1)
return FALSE;
@@ -173,22 +172,25 @@ static void r300_clear(struct pipe_context* pipe,
(struct pipe_framebuffer_state*)r300->fb_state.state;
struct r300_hyperz_state *hyperz =
(struct r300_hyperz_state*)r300->hyperz_state.state;
+ struct r300_texture *zstex =
+ fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL;
uint32_t width = fb->width;
uint32_t height = fb->height;
boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ);
- uint32_t hyperz_dcv = 0;
+ uint32_t hyperz_dcv = hyperz->zb_depthclearvalue;
/* Enable fast Z clear.
* The zbuffer must be in micro-tiled mode, otherwise it locks up. */
- if ((buffers & (PIPE_CLEAR_DEPTH|PIPE_CLEAR_STENCIL)) && has_hyperz) {
-
+ if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && has_hyperz) {
hyperz_dcv = hyperz->zb_depthclearvalue =
r300_depth_clear_value(fb->zsbuf->format, depth, stencil);
r300_mark_fb_state_dirty(r300, R300_CHANGED_ZCLEAR_FLAG);
- if (r300->z_compression || r300->z_fastfill)
+ if (zstex->zmask_mem[fb->zsbuf->level]) {
r300->zmask_clear.dirty = TRUE;
- if (r300->hiz_enable)
+ buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
+ }
+ if (zstex->hiz_mem[fb->zsbuf->level])
r300->hiz_clear.dirty = TRUE;
}
@@ -207,13 +209,43 @@ static void r300_clear(struct pipe_context* pipe,
}
/* Clear. */
- r300_blitter_begin(r300, R300_CLEAR);
- util_blitter_clear(r300->blitter,
- width,
- height,
- fb->nr_cbufs,
- buffers, rgba, depth, stencil);
- r300_blitter_end(r300);
+ if (buffers) {
+ /* Clear using the blitter. */
+ r300_blitter_begin(r300, R300_CLEAR);
+ util_blitter_clear(r300->blitter,
+ width,
+ height,
+ fb->nr_cbufs,
+ buffers, rgba, depth, stencil);
+ r300_blitter_end(r300);
+ } else if (r300->zmask_clear.dirty) {
+ /* Just clear zmask and hiz now, this does not use a standard draw
+ * procedure. */
+ unsigned dwords;
+
+ /* Calculate zmask_clear and hiz_clear atom sizes. */
+ r300_update_hyperz_state(r300);
+ dwords = r300->zmask_clear.size +
+ (r300->hiz_clear.dirty ? r300->hiz_clear.size : 0) +
+ r300_get_num_cs_end_dwords(r300);
+
+ /* Reserve CS space. */
+ if (dwords > (r300->cs->ndw - r300->cs->cdw)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ }
+
+ /* Emit clear packets. */
+ r300_emit_zmask_clear(r300, r300->zmask_clear.size,
+ r300->zmask_clear.state);
+ r300->zmask_clear.dirty = FALSE;
+ if (r300->hiz_clear.dirty) {
+ r300_emit_hiz_clear(r300, r300->hiz_clear.size,
+ r300->hiz_clear.state);
+ r300->hiz_clear.dirty = FALSE;
+ }
+ } else {
+ assert(0);
+ }
/* Disable CBZB clear. */
if (r300->cbzb_clear) {
@@ -222,6 +254,16 @@ static void r300_clear(struct pipe_context* pipe,
r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
}
+ /* Enable fastfill and/or hiz.
+ *
+ * If we cleared zmask/hiz, it's in use now. The Hyper-Z state update
+ * looks if zmask/hiz is in use and enables fastfill accordingly. */
+ if (zstex &&
+ (zstex->zmask_in_use[fb->zsbuf->level] ||
+ zstex->hiz_in_use[fb->zsbuf->level])) {
+ r300->hyperz_state.dirty = TRUE;
+ }
+
/* XXX this flush "fixes" a hardlock in the cubestorm xscreensaver */
if (r300->flush_counter == 0)
pipe->flush(pipe, 0, NULL);
@@ -259,27 +301,31 @@ static void r300_clear_depth_stencil(struct pipe_context *pipe,
r300_blitter_end(r300);
}
-/* Clear a region of a depth stencil surface. */
-static void r300_flush_depth_stencil(struct pipe_context *pipe,
- struct pipe_resource *dst,
- struct pipe_subresource subdst)
+/* Flush a depth stencil buffer. */
+void r300_flush_depth_stencil(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ struct pipe_subresource subdst,
+ unsigned zslice)
{
struct r300_context *r300 = r300_context(pipe);
struct pipe_surface *dstsurf;
struct r300_texture *tex = r300_texture(dst);
- /* only flush the zmask if we have one attached to this texture */
if (!tex->zmask_mem[subdst.level])
return;
+ if (!tex->zmask_in_use[subdst.level])
+ return;
dstsurf = pipe->screen->get_tex_surface(pipe->screen, dst,
- subdst.face, subdst.level, 0,
+ subdst.face, subdst.level, zslice,
PIPE_BIND_DEPTH_STENCIL);
r300->z_decomp_rd = TRUE;
r300_blitter_begin(r300, R300_CLEAR_SURFACE);
util_blitter_flush_depth_stencil(r300->blitter, dstsurf);
r300_blitter_end(r300);
r300->z_decomp_rd = FALSE;
+
+ tex->zmask_in_use[subdst.level] = FALSE;
}
/* Copy a block of pixels from one surface to another using HW. */
@@ -342,7 +388,7 @@ static void r300_resource_copy_region(struct pipe_context *pipe,
is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0;
if (is_depth) {
- r300_flush_depth_stencil(pipe, src, subsrc);
+ r300_flush_depth_stencil(pipe, src, subsrc, srcz);
}
if (old_format != new_format) {
dst->format = new_format;
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index e8b6c4f7af..a83ad892ea 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -99,8 +99,10 @@ static void r300_destroy_context(struct pipe_context* context)
struct r300_context* r300 = r300_context(context);
struct r300_atom *atom;
- util_blitter_destroy(r300->blitter);
- draw_destroy(r300->draw);
+ if (r300->blitter)
+ util_blitter_destroy(r300->blitter);
+ if (r300->draw)
+ draw_destroy(r300->draw);
/* Print stats, if enabled. */
if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) {
@@ -112,40 +114,48 @@ static void r300_destroy_context(struct pipe_context* context)
}
}
- u_upload_destroy(r300->upload_vb);
- u_upload_destroy(r300->upload_ib);
+ if (r300->upload_vb)
+ u_upload_destroy(r300->upload_vb);
+ if (r300->upload_ib)
+ u_upload_destroy(r300->upload_ib);
- /* setup hyper-z mm */
- if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
+ if (r300->zmask_mm)
r300_hyperz_destroy_mm(r300);
- translate_cache_destroy(r300->tran.translate_cache);
+ if (r300->tran.translate_cache)
+ translate_cache_destroy(r300->tran.translate_cache);
+ /* XXX: This function assumes r300->query_list was initialized */
r300_release_referenced_objects(r300);
- r300->rws->cs_destroy(r300->cs);
+ if (r300->cs)
+ r300->rws->cs_destroy(r300->cs);
+ /* XXX: No way to tell if this was initialized or not? */
util_mempool_destroy(&r300->pool_transfers);
r300_update_num_contexts(r300->screen, -1);
- FREE(r300->aa_state.state);
- FREE(r300->blend_color_state.state);
- FREE(r300->clip_state.state);
- FREE(r300->fb_state.state);
- FREE(r300->gpu_flush.state);
- FREE(r300->hyperz_state.state);
- FREE(r300->invariant_state.state);
- FREE(r300->rs_block_state.state);
- FREE(r300->scissor_state.state);
- FREE(r300->textures_state.state);
- FREE(r300->vap_invariant_state.state);
- FREE(r300->viewport_state.state);
- FREE(r300->ztop_state.state);
- FREE(r300->fs_constants.state);
- FREE(r300->vs_constants.state);
- if (!r300->screen->caps.has_tcl) {
- FREE(r300->vertex_stream_state.state);
+ /* Free the structs allocated in r300_setup_atoms() */
+ if (r300->aa_state.state) {
+ FREE(r300->aa_state.state);
+ FREE(r300->blend_color_state.state);
+ FREE(r300->clip_state.state);
+ FREE(r300->fb_state.state);
+ FREE(r300->gpu_flush.state);
+ FREE(r300->hyperz_state.state);
+ FREE(r300->invariant_state.state);
+ FREE(r300->rs_block_state.state);
+ FREE(r300->scissor_state.state);
+ FREE(r300->textures_state.state);
+ FREE(r300->vap_invariant_state.state);
+ FREE(r300->viewport_state.state);
+ FREE(r300->ztop_state.state);
+ FREE(r300->fs_constants.state);
+ FREE(r300->vs_constants.state);
+ if (!r300->screen->caps.has_tcl) {
+ FREE(r300->vertex_stream_state.state);
+ }
}
FREE(r300);
}
@@ -158,12 +168,14 @@ void r300_flush_cb(void *data)
}
#define R300_INIT_ATOM(atomname, atomsize) \
+ do { \
r300->atomname.name = #atomname; \
r300->atomname.state = NULL; \
r300->atomname.size = atomsize; \
r300->atomname.emit = r300_emit_##atomname; \
r300->atomname.dirty = FALSE; \
- insert_at_tail(&r300->atom_list, &r300->atomname);
+ insert_at_tail(&r300->atom_list, &r300->atomname); \
+ } while (0)
static void r300_setup_atoms(struct r300_context* r300)
{
@@ -404,12 +416,16 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300->context.destroy = r300_destroy_context;
- r300->cs = rws->cs_create(rws);
+ make_empty_list(&r300->query_list);
util_mempool_create(&r300->pool_transfers,
sizeof(struct pipe_transfer), 64,
UTIL_MEMPOOL_SINGLETHREADED);
+ r300->cs = rws->cs_create(rws);
+ if (r300->cs == NULL)
+ goto fail;
+
if (!r300screen->caps.has_tcl) {
/* Create a Draw. This is used for SW TCL. */
r300->draw = draw_create(&r300->context);
@@ -424,8 +440,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300_setup_atoms(r300);
- make_empty_list(&r300->query_list);
-
r300_init_blit_functions(r300);
r300_init_flush_functions(r300);
r300_init_query_functions(r300);
@@ -433,6 +447,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300_init_resource_functions(r300);
r300->blitter = util_blitter_create(&r300->context);
+ if (r300->blitter == NULL)
+ goto fail;
/* Render functions must be initialized after blitter. */
r300_init_render_functions(r300);
@@ -441,22 +457,25 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
/* setup hyper-z mm */
if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
- r300_hyperz_init_mm(r300);
+ if (!r300_hyperz_init_mm(r300))
+ goto fail;
r300->upload_ib = u_upload_create(&r300->context,
32 * 1024, 16,
PIPE_BIND_INDEX_BUFFER);
if (r300->upload_ib == NULL)
- goto no_upload_ib;
+ goto fail;
r300->upload_vb = u_upload_create(&r300->context,
128 * 1024, 16,
PIPE_BIND_VERTEX_BUFFER);
if (r300->upload_vb == NULL)
- goto no_upload_vb;
+ goto fail;
r300->tran.translate_cache = translate_cache_create();
+ if (r300->tran.translate_cache == NULL)
+ goto fail;
r300_init_states(&r300->context);
@@ -486,10 +505,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
return &r300->context;
- no_upload_ib:
- u_upload_destroy(r300->upload_ib);
- no_upload_vb:
- FREE(r300);
+ fail:
+ r300_destroy_context(&r300->context);
return NULL;
}
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index d86a5c8fc9..6fa7f470f9 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -397,6 +397,8 @@ struct r300_texture {
/* hyper-z memory allocs */
struct mem_block *hiz_mem[R300_MAX_TEXTURE_LEVELS];
struct mem_block *zmask_mem[R300_MAX_TEXTURE_LEVELS];
+ boolean zmask_in_use[R300_MAX_TEXTURE_LEVELS];
+ boolean hiz_in_use[R300_MAX_TEXTURE_LEVELS];
/* This is the level tiling flags were last time set for.
* It's used to prevent redundant tiling-flags changes from happening.*/
@@ -564,12 +566,9 @@ struct r300_context {
boolean two_sided_color;
/* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */
boolean incompatible_vb_layout;
- /* Whether fast zclear is enabled. */
- boolean z_fastfill;
#define R300_Z_COMPRESS_44 1
#define RV350_Z_COMPRESS_88 2
int z_compression;
- boolean hiz_enable;
boolean cbzb_clear;
boolean z_decomp_rd;
@@ -628,6 +627,12 @@ void r300_init_render_functions(struct r300_context *r300);
void r300_init_state_functions(struct r300_context* r300);
void r300_init_resource_functions(struct r300_context* r300);
+/* r300_blit.c */
+void r300_flush_depth_stencil(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ struct pipe_subresource subdst,
+ unsigned zslice);
+
/* r300_query.c */
void r300_resume_query(struct r300_context *r300,
struct r300_query *query);
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 17e180a79a..d0fd45349e 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -393,7 +393,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
/* HiZ RAM. */
if (r300->screen->caps.hiz_ram) {
if (tex->hiz_mem[level]) {
- OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs);
+ OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs << 2);
OUT_CS_REG(R300_ZB_HIZ_PITCH, surf_pitch);
} else {
OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0);
@@ -402,7 +402,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
}
/* Z Mask RAM. (compressed zbuffer) */
if (tex->zmask_mem[level]) {
- OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs);
+ OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs << 2);
OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch);
} else {
OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0);
@@ -936,6 +936,22 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state)
OUT_CS_TABLE(data, 4);
}
}
+
+ /* Emit flow control instructions. */
+ if (code->num_fc_ops) {
+
+ OUT_CS_REG(R300_VAP_PVS_FLOW_CNTL_OPC, code->fc_ops);
+ if (r300screen->caps.is_r500) {
+ OUT_CS_REG_SEQ(R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0, code->num_fc_ops * 2);
+ OUT_CS_TABLE(code->fc_op_addrs.r500, code->num_fc_ops * 2);
+ } else {
+ OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_ADDRS_0, code->num_fc_ops);
+ OUT_CS_TABLE(code->fc_op_addrs.r300, code->num_fc_ops);
+ }
+ OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, code->num_fc_ops);
+ OUT_CS_TABLE(code->fc_loop_index, code->num_fc_ops);
+ }
+
END_CS;
}
@@ -1008,6 +1024,8 @@ void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state)
int i;
tex = r300_texture(fb->zsbuf->texture);
+
+ offset = tex->hiz_mem[fb->zsbuf->level]->ofs;
stride = tex->desc.stride_in_pixels[fb->zsbuf->level];
/* convert from pixels to 4x4 blocks */
@@ -1028,6 +1046,9 @@ void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state)
r300_emit_hiz_line_clear(r300, offset, stride, 0xffffffff);
}
z->current_func = -1;
+
+ /* Mark the current zbuffer's hiz ram as in use. */
+ tex->hiz_in_use[fb->zsbuf->level] = TRUE;
}
void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state)
@@ -1043,6 +1064,8 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state
tex = r300_texture(fb->zsbuf->texture);
stride = tex->desc.stride_in_pixels[fb->zsbuf->level];
+ offset = tex->zmask_mem[fb->zsbuf->level]->ofs;
+
if (r300->z_compression == RV350_Z_COMPRESS_88)
mult = 8;
else
@@ -1065,6 +1088,9 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state
offset <<= offset_shift;
r300_emit_zmask_line_clear(r300, offset, stride, 0x0);//0xffffffff);
}
+
+ /* Mark the current zbuffer's zmask as in use. */
+ tex->zmask_in_use[fb->zsbuf->level] = TRUE;
}
void r300_emit_ztop_state(struct r300_context* r300,
@@ -1186,6 +1212,17 @@ unsigned r300_get_num_dirty_dwords(struct r300_context *r300)
return dwords;
}
+unsigned r300_get_num_cs_end_dwords(struct r300_context *r300)
+{
+ unsigned dwords = 0;
+
+ /* Emitted in flush. */
+ dwords += 26; /* emit_query_end */
+ dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */
+
+ return dwords;
+}
+
/* Emit all dirty state. */
void r300_emit_dirty_state(struct r300_context* r300)
{
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index 2f2c2f2dcb..bae2525634 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -116,6 +116,7 @@ void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state);
void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state);
unsigned r300_get_num_dirty_dwords(struct r300_context *r300);
+unsigned r300_get_num_cs_end_dwords(struct r300_context *r300);
/* Emit all dirty state. */
void r300_emit_dirty_state(struct r300_context* r300);
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index 7fed9b5d07..fe182b6615 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -44,8 +44,7 @@ static void r300_flush(struct pipe_context* pipe,
u_upload_flush(r300->upload_ib);
if (r300->dirty_hw) {
- if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
- r300_emit_hyperz_end(r300);
+ r300_emit_hyperz_end(r300);
r300_emit_query_end(r300);
r300->flush_counter++;
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index 87ff49a90c..2a0c30620a 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -72,6 +72,11 @@ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
fs_inputs->wpos = i;
break;
+ case TGSI_SEMANTIC_FACE:
+ assert(index == 0);
+ fs_inputs->face = i;
+ break;
+
default:
fprintf(stderr, "r300: FP: Unknown input semantic: %i\n",
info->input_semantic_name[i]);
@@ -120,6 +125,9 @@ static void allocate_hardware_inputs(
allocate(mydata, inputs->color[i], reg++);
}
}
+ if (inputs->face != ATTR_UNUSED) {
+ allocate(mydata, inputs->face, reg++);
+ }
for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
if (inputs->generic[i] != ATTR_UNUSED) {
allocate(mydata, inputs->generic[i], reg++);
@@ -360,13 +368,14 @@ static void r300_translate_fragment_shader(
{
struct r300_fragment_program_compiler compiler;
struct tgsi_to_rc ttr;
- int wpos;
+ int wpos, face;
unsigned i;
tgsi_scan_shader(tokens, &shader->info);
r300_shader_read_fs_inputs(&shader->info, &shader->inputs);
wpos = shader->inputs.wpos;
+ face = shader->inputs.face;
/* Setup the compiler. */
memset(&compiler, 0, sizeof(compiler));
@@ -383,7 +392,7 @@ static void r300_translate_fragment_shader(
find_output_registers(&compiler, shader);
if (compiler.Base.Debug) {
- debug_printf("r300: Initial fragment program\n");
+ DBG(r300, DBG_FP, "r300: Initial fragment program\n");
tgsi_dump(tokens, 0);
}
@@ -406,6 +415,10 @@ static void r300_translate_fragment_shader(
rc_transform_fragment_wpos(&compiler.Base, wpos, wpos, TRUE);
}
+ if (face != ATTR_UNUSED) {
+ rc_transform_fragment_face(&compiler.Base, face);
+ }
+
/* Invoke the compiler */
r3xx_compile_fragment_program(&compiler);
@@ -418,7 +431,7 @@ static void r300_translate_fragment_shader(
}
if (compiler.Base.Error) {
- fprintf(stderr, "r300 FP: Compiler Error:\n%sUsing a dummy shader"
+ DBG(r300, DBG_FP, "r300 FP: Compiler Error:\n%sUsing a dummy shader"
" instead.\nIf there's an 'unknown opcode' message, please"
" file a bug report and attach this log.\n", compiler.Base.ErrorMsg);
diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c
index 10e440ce30..a471b7353b 100644
--- a/src/gallium/drivers/r300/r300_hyperz.c
+++ b/src/gallium/drivers/r300/r300_hyperz.c
@@ -21,12 +21,14 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
-#include "util/u_format.h"
-#include "util/u_mm.h"
#include "r300_context.h"
#include "r300_hyperz.h"
#include "r300_reg.h"
#include "r300_fs.h"
+#include "r300_winsys.h"
+
+#include "util/u_format.h"
+#include "util/u_mm.h"
/*
HiZ rules - taken from various docs
@@ -127,6 +129,12 @@ static void r300_update_hyperz(struct r300_context* r300)
{
struct r300_hyperz_state *z =
(struct r300_hyperz_state*)r300->hyperz_state.state;
+ struct pipe_framebuffer_state *fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ struct r300_texture *zstex =
+ fb->zsbuf ? r300_texture(fb->zsbuf->texture) : NULL;
+ boolean zmask_in_use = FALSE;
+ boolean hiz_in_use = FALSE;
z->gb_z_peq_config = 0;
z->zb_bw_cntl = 0;
@@ -138,22 +146,32 @@ static void r300_update_hyperz(struct r300_context* r300)
return;
}
+ if (!zstex)
+ return;
+
+ if (!r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
+ return;
+
+ zmask_in_use = zstex->zmask_in_use[fb->zsbuf->level];
+ hiz_in_use = zstex->hiz_in_use[fb->zsbuf->level];
+
+ /* Z fastfill. */
+ if (zmask_in_use) {
+ z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /* | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/
+ }
+
/* Zbuffer compression. */
- if (r300->z_compression) {
+ if (zmask_in_use && r300->z_compression) {
z->zb_bw_cntl |= R300_RD_COMP_ENABLE;
if (r300->z_decomp_rd == false)
z->zb_bw_cntl |= R300_WR_COMP_ENABLE;
- /* RV350 and up optimizations. */
- if (r300->z_compression == RV350_Z_COMPRESS_88)
- z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8;
- }
-
- /* Z fastfill. */
- if (r300->z_fastfill) {
- z->zb_bw_cntl |= R300_FAST_FILL_ENABLE; /* | R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE;*/
}
+ /* RV350 and up optimizations. */
+ /* The section 10.4.9 in the docs is a lie. */
+ if (r300->z_compression == RV350_Z_COMPRESS_88)
+ z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8;
- if (r300->hiz_enable) {
+ if (hiz_in_use) {
bool can_hiz = r300_can_hiz(r300);
if (can_hiz) {
z->zb_bw_cntl |= R300_HIZ_ENABLE;
@@ -163,8 +181,8 @@ static void r300_update_hyperz(struct r300_context* r300)
}
}
+ /* R500-specific features and optimizations. */
if (r300->screen->caps.is_r500) {
- /* XXX Are these bits really available on RV350? */
z->zb_bw_cntl |= R500_HIZ_FP_EXP_BITS_3;
z->zb_bw_cntl |=
R500_HIZ_EQUAL_REJECT_ENABLE |
@@ -333,6 +351,12 @@ void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf
tex = r300_texture(surf->base.texture);
+ /* We currently don't handle decompression for 3D textures and cubemaps
+ * correctly. */
+ if (tex->desc.b.b.target != PIPE_TEXTURE_1D &&
+ tex->desc.b.b.target != PIPE_TEXTURE_2D)
+ return;
+
if (tex->zmask_mem[level])
return;
@@ -349,23 +373,36 @@ void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf
return;
}
-void r300_hyperz_init_mm(struct r300_context *r300)
+boolean r300_hyperz_init_mm(struct r300_context *r300)
{
struct r300_screen* r300screen = r300->screen;
int frag_pipes = r300screen->caps.num_frag_pipes;
- if (r300screen->caps.hiz_ram)
+ r300->zmask_mm = u_mmInit(0, r300screen->caps.zmask_ram * frag_pipes);
+ if (!r300->zmask_mm)
+ return FALSE;
+
+ if (r300screen->caps.hiz_ram) {
r300->hiz_mm = u_mmInit(0, r300screen->caps.hiz_ram * frag_pipes);
+ if (!r300->hiz_mm) {
+ u_mmDestroy(r300->zmask_mm);
+ r300->zmask_mm = NULL;
+ return FALSE;
+ }
+ }
- r300->zmask_mm = u_mmInit(0, r300screen->caps.zmask_ram * frag_pipes);
+ return TRUE;
}
void r300_hyperz_destroy_mm(struct r300_context *r300)
{
struct r300_screen* r300screen = r300->screen;
- if (r300screen->caps.hiz_ram)
+ if (r300screen->caps.hiz_ram) {
u_mmDestroy(r300->hiz_mm);
+ r300->hiz_mm = NULL;
+ }
u_mmDestroy(r300->zmask_mm);
+ r300->zmask_mm = NULL;
}
diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h
index 09e1ff6625..30a23ec649 100644
--- a/src/gallium/drivers/r300/r300_hyperz.h
+++ b/src/gallium/drivers/r300/r300_hyperz.h
@@ -30,6 +30,6 @@ void r300_update_hyperz_state(struct r300_context* r300);
void r300_hiz_alloc_block(struct r300_context *r300, struct r300_surface *surf);
void r300_zmask_alloc_block(struct r300_context *r300, struct r300_surface *surf, int compress);
-void r300_hyperz_init_mm(struct r300_context *r300);
+boolean r300_hyperz_init_mm(struct r300_context *r300);
void r300_hyperz_destroy_mm(struct r300_context *r300);
#endif
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index 99a9d65055..60d3b600cb 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -496,6 +496,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228
#define R300_VAP_GB_HORZ_DISC_ADJ 0x222c
+#define R300_VAP_PVS_FLOW_CNTL_ADDRS_0 0x2230
+#define R300_PVS_FC_ACT_ADRS(x) ((x) << 0)
+#define R300_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 8)
+#define R300_PVS_FC_LAST_INST(x) ((x) << 16)
+#define R300_PVS_FC_RTN_INST(x) ((x) << 24)
+
/* gap */
/* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between
@@ -514,6 +520,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_2288_R300 0x00750000 /* -- nh */
# define R300_2288_RV350 0x0000FFFF /* -- Vladimir */
+#define R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0 0x2290
+#define R300_PVS_FC_LOOP_INIT_VAL(x) ((x) << 0)
+#define R300_PVS_FC_LOOP_STEP_VAL(x) ((x) << 8)
+
/* gap */
/* Addresses are relative to the vertex program instruction area of the
@@ -548,6 +558,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R300_VAP_PVS_CODE_CNTL_1 0x22D8
# define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0
#define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC
+#define R300_VAP_PVS_FC_OPC_JUMP(x) (1 << (2 * (x)))
+#define R300_VAP_PVS_FC_OPC_LOOP(x) (2 << (2 * (x)))
+#define R300_VAP_PVS_FC_OPC_JSR(x) (3 << (2 * (x)))
/* The entire range from 0x2300 to 0x2AC inclusive seems to be used for
* immediate vertices
@@ -564,6 +577,14 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/* write 0 to indicate end of packet? */
#define R300_VAP_VTX_END_OF_PKT 0x24AC
+#define R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0 0x2500
+#define R500_PVS_FC_ACT_ADRS(x) ((x) << 0)
+#define R500_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 16)
+
+#define R500_VAP_PVS_FLOW_CNTL_ADDRS_UW_0 0x2504
+#define R500_PVS_FC_LAST_INST(x) ((x) << 0)
+#define R500_PVS_FC_RTN_INST(x) ((x) << 16)
+
/* gap */
/* These are values from r300_reg/r300_reg.h - they are known to be correct
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 910f5f7113..86b11ca045 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -186,20 +186,14 @@ enum r300_prepare_flags {
* \param cs_dwords The number of dwords to reserve in CS.
* \param aos_offset The offset passed to emit_aos.
* \param index_bias The index bias to emit.
- * \param end_cs_dwords The number of free dwords which must be available
- * at the end of CS after drawing in case the CS space
- * management is performed by a draw_* function manually.
- * The parameter may be NULL.
*/
static void r300_prepare_for_rendering(struct r300_context *r300,
enum r300_prepare_flags flags,
struct pipe_resource *index_buffer,
unsigned cs_dwords,
int aos_offset,
- int index_bias,
- unsigned *end_cs_dwords)
+ int index_bias)
{
- unsigned end_dwords = 0;
boolean flushed = FALSE;
boolean first_draw = flags & PREP_FIRST_DRAW;
boolean emit_aos = flags & PREP_EMIT_AOS;
@@ -221,12 +215,7 @@ static void r300_prepare_for_rendering(struct r300_context *r300,
cs_dwords += 7; /* emit_aos_swtcl */
}
- /* Emitted in flush. */
- end_dwords += 26; /* emit_query_end */
- if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
- end_dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */
-
- cs_dwords += end_dwords;
+ cs_dwords += r300_get_num_cs_end_dwords(r300);
/* Reserve requested CS space. */
if (cs_dwords > (r300->cs->ndw - r300->cs->cdw)) {
@@ -251,9 +240,6 @@ static void r300_prepare_for_rendering(struct r300_context *r300,
if (emit_aos_swtcl)
r300_emit_aos_swtcl(r300, indexed);
}
-
- if (end_cs_dwords)
- *end_cs_dwords = end_dwords;
}
static boolean immd_is_good_idea(struct r300_context *r300,
@@ -354,7 +340,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
dwords = 9 + count * vertex_size;
- r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL);
+ r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0);
BEGIN_CS(dwords);
OUT_CS_REG(R300_GA_COLOR_CONTROL,
@@ -534,7 +520,7 @@ static void r300_draw_range_elements(struct pipe_context* pipe,
/* 15 dwords for emit_draw_elements */
r300_prepare_for_rendering(r300,
PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED,
- indexBuffer, 15, buffer_offset, indexBias, NULL);
+ indexBuffer, 15, buffer_offset, indexBias);
if (alt_num_verts || count <= 65535) {
r300_emit_draw_elements(r300, indexBuffer, indexSize,
@@ -553,7 +539,7 @@ static void r300_draw_range_elements(struct pipe_context* pipe,
if (count) {
r300_prepare_for_rendering(r300,
PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED,
- indexBuffer, 15, buffer_offset, indexBias, NULL);
+ indexBuffer, 15, buffer_offset, indexBias);
}
} while (count);
}
@@ -598,7 +584,7 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
} else {
/* 9 spare dwords for emit_draw_arrays. */
r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS,
- NULL, 9, start, 0, NULL);
+ NULL, 9, start, 0);
if (alt_num_verts || count <= 65535) {
r300_emit_draw_arrays(r300, mode, count);
@@ -614,7 +600,7 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
if (count) {
r300_prepare_for_rendering(r300,
PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9,
- start, 0, NULL);
+ start, 0);
}
} while (count);
}
@@ -855,7 +841,7 @@ static void r300_render_draw_arrays(struct vbuf_render* render,
(void) i; (void) ptr;
r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL,
- NULL, dwords, 0, 0, NULL);
+ NULL, dwords, 0, 0);
DBG(r300, DBG_DRAW, "r300: render_draw_arrays (count: %d)\n", count);
@@ -908,7 +894,8 @@ static void r300_render_draw_elements(struct vbuf_render* render,
* indices than it can fit in CS. */
r300_prepare_for_rendering(r300,
PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | PREP_INDEXED,
- NULL, 256, 0, 0, &end_cs_dwords);
+ NULL, 256, 0, 0);
+ end_cs_dwords = r300_get_num_cs_end_dwords(r300);
while (count) {
free_dwords = r300->cs->ndw - r300->cs->cdw;
@@ -938,7 +925,8 @@ static void r300_render_draw_elements(struct vbuf_render* render,
if (count) {
r300_prepare_for_rendering(r300,
PREP_EMIT_AOS_SWTCL | PREP_INDEXED,
- NULL, 256, 0, 0, &end_cs_dwords);
+ NULL, 256, 0, 0);
+ end_cs_dwords = r300_get_num_cs_end_dwords(r300);
}
}
}
@@ -1032,7 +1020,7 @@ static void r300_blitter_draw_rectangle(struct blitter_context *blitter,
r300->clip_state.dirty = FALSE;
r300->viewport_state.dirty = FALSE;
- r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL);
+ r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0);
DBG(r300, DBG_DRAW, "r300: draw_rectangle\n");
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 6268001054..1e4edcdbc3 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -115,7 +115,6 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
case PIPE_CAP_TEXTURE_SWIZZLE:
- case PIPE_CAP_DEPTH_CLAMP:
return 1;
/* Unsupported features (boolean caps). */
@@ -124,6 +123,8 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_CONT_SUPPORTED:
case PIPE_CAP_INDEP_BLEND_ENABLE:
case PIPE_CAP_INDEP_BLEND_FUNC:
+ case PIPE_CAP_DEPTH_CLAMP: /* XXX implemented, but breaks Regnum Online */
+ case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
return 0;
/* Texturing. */
@@ -150,9 +151,6 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_MAX_CONST_BUFFER_SIZE:
return 256;
- case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
- return 1;
-
/* Fragment coordinate conventions. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
diff --git a/src/gallium/drivers/r300/r300_shader_semantics.h b/src/gallium/drivers/r300/r300_shader_semantics.h
index cb7a37033f..4be23e64ce 100644
--- a/src/gallium/drivers/r300/r300_shader_semantics.h
+++ b/src/gallium/drivers/r300/r300_shader_semantics.h
@@ -38,6 +38,7 @@ struct r300_shader_semantics {
int psize;
int color[ATTR_COLOR_COUNT];
int bcolor[ATTR_COLOR_COUNT];
+ int face;
int generic[ATTR_GENERIC_COUNT];
int fog;
int wpos;
@@ -50,6 +51,7 @@ static INLINE void r300_shader_semantics_reset(
info->pos = ATTR_UNUSED;
info->psize = ATTR_UNUSED;
+ info->face = ATTR_UNUSED;
info->fog = ATTR_UNUSED;
info->wpos = ATTR_UNUSED;
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 1e6b81d798..239edd98e3 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -689,8 +689,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300,
/* What is marked as dirty depends on the enum r300_fb_state_change. */
r300->gpu_flush.dirty = TRUE;
r300->fb_state.dirty = TRUE;
- if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
- r300->hyperz_state.dirty = TRUE;
+ r300->hyperz_state.dirty = TRUE;
if (change == R300_CHANGED_FB_STATE) {
r300->aa_state.dirty = TRUE;
@@ -753,8 +752,6 @@ static void
r300_mark_fb_state_dirty(r300, R300_CHANGED_FB_STATE);
- r300->hiz_enable = false;
- r300->z_fastfill = false;
r300->z_compression = false;
if (state->zsbuf) {
@@ -781,23 +778,18 @@ static void
/* work out whether we can support zmask features on this buffer */
r300_zmask_alloc_block(r300, zs_surf, compress);
- if (tex->hiz_mem[level]) {
- r300->hiz_enable = 1;
- }
-
if (tex->zmask_mem[level]) {
- r300->z_fastfill = 1;
/* compression causes hangs on 16-bit */
if (zbuffer_bpp == 24)
r300->z_compression = compress;
}
DBG(r300, DBG_HYPERZ,
- "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", r300->hiz_enable,
+ "hyper-z features: hiz: %d @ %08x z-compression: %d z-fastfill: %d @ %08x\n", tex->hiz_mem[level] ? 1 : 0,
tex->hiz_mem[level] ? tex->hiz_mem[level]->ofs : 0xdeadbeef,
- r300->z_compression, r300->z_fastfill,
+ r300->z_compression, tex->zmask_mem[level] ? 1 : 0,
tex->zmask_mem[level] ? tex->zmask_mem[level]->ofs : 0xdeadbeef);
}
-
+
/* Polygon offset depends on the zbuffer bit depth. */
if (r300->zbuffer_bpp != zbuffer_bpp) {
r300->zbuffer_bpp = zbuffer_bpp;
@@ -1759,10 +1751,12 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */
if (r300->screen->caps.has_tcl) {
+ unsigned fc_op_dwords = r300->screen->caps.is_r500 ? 3 : 2;
r300->vs_state.dirty = TRUE;
r300->vs_state.size =
vs->code.length + 9 +
- (vs->immediates_count ? vs->immediates_count * 4 + 3 : 0);
+ (vs->immediates_count ? vs->immediates_count * 4 + 3 : 0) +
+ (vs->code.num_fc_ops ? vs->code.num_fc_ops * fc_op_dwords + 4 : 0);
if (vs->externals_count) {
r300->vs_constants.dirty = TRUE;
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index f3dad4c292..c8de3e1c52 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -35,7 +35,6 @@
#include "r300_state_inlines.h"
#include "r300_texture.h"
#include "r300_vs.h"
-#include "r300_winsys.h"
/* r300_state_derived: Various bits of state which are dependent upon
* currently bound CSO data. */
@@ -47,6 +46,11 @@ enum r300_rs_swizzle {
SWIZ_0001,
};
+enum r300_rs_col_write_type {
+ WRITE_COLOR = 0,
+ WRITE_FACE
+};
+
static void r300_draw_emit_attrib(struct r300_context* r300,
enum attrib_emit emit,
enum interp_mode interp,
@@ -204,8 +208,10 @@ static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr,
rs->inst[id] |= R300_RS_INST_COL_ID(id);
}
-static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset)
+static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset,
+ enum r300_rs_col_write_type type)
{
+ assert(type == WRITE_COLOR);
rs->inst[id] |= R300_RS_INST_COL_CN_WRITE |
R300_RS_INST_COL_ADDR(fp_offset);
}
@@ -253,10 +259,16 @@ static void r500_rs_col(struct r300_rs_block* rs, int id, int ptr,
rs->inst[id] |= R500_RS_INST_COL_ID(id);
}
-static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset)
+static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset,
+ enum r300_rs_col_write_type type)
{
- rs->inst[id] |= R500_RS_INST_COL_CN_WRITE |
- R500_RS_INST_COL_ADDR(fp_offset);
+ if (type == WRITE_FACE)
+ rs->inst[id] |= R500_RS_INST_COL_CN_WRITE_BACKFACE |
+ R500_RS_INST_COL_ADDR(fp_offset);
+ else
+ rs->inst[id] |= R500_RS_INST_COL_CN_WRITE |
+ R500_RS_INST_COL_ADDR(fp_offset);
+
}
static void r500_rs_tex(struct r300_rs_block* rs, int id, int ptr,
@@ -306,7 +318,7 @@ static void r300_update_rs_block(struct r300_context *r300)
struct r300_rs_block rs = {0};
int i, col_count = 0, tex_count = 0, fp_offset = 0, count, loc = 0, tex_ptr = 0;
void (*rX00_rs_col)(struct r300_rs_block*, int, int, enum r300_rs_swizzle);
- void (*rX00_rs_col_write)(struct r300_rs_block*, int, int);
+ void (*rX00_rs_col_write)(struct r300_rs_block*, int, int, enum r300_rs_col_write_type);
void (*rX00_rs_tex)(struct r300_rs_block*, int, int, enum r300_rs_swizzle);
void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int);
boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED ||
@@ -325,6 +337,11 @@ static void r300_update_rs_block(struct r300_context *r300)
rX00_rs_tex_write = r300_rs_tex_write;
}
+ /* 0x5555 copied from classic, which means:
+ * Select user color 0 for COLOR0 up to COLOR7.
+ * What the hell does that mean? */
+ rs.vap_vtx_state_cntl = 0x5555;
+
/* The position is always present in VAP. */
rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_POS;
rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
@@ -351,7 +368,7 @@ static void r300_update_rs_block(struct r300_context *r300)
/* Write it to the FS input register if it's needed by the FS. */
if (fs_inputs->color[i] != ATTR_UNUSED) {
- rX00_rs_col_write(&rs, col_count, fp_offset);
+ rX00_rs_col_write(&rs, col_count, fp_offset, WRITE_COLOR);
fp_offset++;
DBG(r300, DBG_RS,
@@ -399,6 +416,24 @@ static void r300_update_rs_block(struct r300_context *r300)
}
}
+ /* gl_FrontFacing.
+ * Note that we can use either the two-sided color selection based on
+ * the front and back vertex shader colors, or gl_FrontFacing,
+ * but not both! It locks up otherwise.
+ *
+ * In Direct3D 9, the two-sided color selection can be used
+ * with shaders 2.0 only, while gl_FrontFacing can be used
+ * with shaders 3.0 only. The hardware apparently hasn't been designed
+ * to support both at the same time. */
+ if (r300->screen->caps.is_r500 && fs_inputs->face != ATTR_UNUSED &&
+ !(any_bcolor_used && r300->two_sided_color)) {
+ rX00_rs_col(&rs, col_count, col_count, SWIZ_XYZW);
+ rX00_rs_col_write(&rs, col_count, fp_offset, WRITE_FACE);
+ fp_offset++;
+ col_count++;
+ DBG(r300, DBG_RS, "r300: Rasterized FACE written to FS.\n");
+ }
+
/* Rasterize texture coordinates. */
for (i = 0; i < ATTR_GENERIC_COUNT && tex_count < 8; i++) {
bool sprite_coord = !!(r300->sprite_coord_enable & (1 << i));
@@ -677,8 +712,44 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300)
}
}
+/* We can't use compressed zbuffers as samplers. */
+static void r300_flush_depth_textures(struct r300_context *r300)
+{
+ struct r300_textures_state *state =
+ (struct r300_textures_state*)r300->textures_state.state;
+ unsigned i, level;
+ unsigned count = MIN2(state->sampler_view_count,
+ state->sampler_state_count);
+
+ if (r300->z_decomp_rd)
+ return;
+
+ for (i = 0; i < count; i++)
+ if (state->sampler_views[i] && state->sampler_states[i]) {
+ struct pipe_resource *tex = state->sampler_views[i]->base.texture;
+
+ if (tex->target == PIPE_TEXTURE_3D ||
+ tex->target == PIPE_TEXTURE_CUBE)
+ continue;
+
+ /* Ignore non-depth textures.
+ * Also ignore reinterpreted depth textures, e.g. resource_copy. */
+ if (!util_format_is_depth_or_stencil(tex->format))
+ continue;
+
+ for (level = 0; level <= tex->last_level; level++)
+ if (r300_texture(tex)->zmask_in_use[level]) {
+ /* We don't handle 3D textures and cubemaps yet. */
+ r300_flush_depth_stencil(&r300->context, tex,
+ u_subresource(0, level), 0);
+ }
+ }
+}
+
void r300_update_derived_state(struct r300_context* r300)
{
+ r300_flush_depth_textures(r300);
+
if (r300->textures_state.dirty) {
r300_merge_textures_and_samplers(r300);
}
@@ -694,6 +765,5 @@ void r300_update_derived_state(struct r300_context* r300)
}
}
- if (r300->rws->get_value(r300->rws, R300_CAN_HYPERZ))
- r300_update_hyperz_state(r300);
+ r300_update_hyperz_state(r300);
}
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 51b2c55550..a4911b9a2a 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -97,13 +97,13 @@ static unsigned translate_opcode(unsigned opcode)
/* case TGSI_OPCODE_BRA: return RC_OPCODE_BRA; */
/* case TGSI_OPCODE_CAL: return RC_OPCODE_CAL; */
/* case TGSI_OPCODE_RET: return RC_OPCODE_RET; */
- /* case TGSI_OPCODE_SSG: return RC_OPCODE_SSG; */
+ case TGSI_OPCODE_SSG: return RC_OPCODE_SSG;
case TGSI_OPCODE_CMP: return RC_OPCODE_CMP;
case TGSI_OPCODE_SCS: return RC_OPCODE_SCS;
case TGSI_OPCODE_TXB: return RC_OPCODE_TXB;
/* case TGSI_OPCODE_NRM: return RC_OPCODE_NRM; */
/* case TGSI_OPCODE_DIV: return RC_OPCODE_DIV; */
- /* case TGSI_OPCODE_DP2: return RC_OPCODE_DP2; */
+ case TGSI_OPCODE_DP2: return RC_OPCODE_DP2;
case TGSI_OPCODE_TXL: return RC_OPCODE_TXL;
case TGSI_OPCODE_BRK: return RC_OPCODE_BRK;
case TGSI_OPCODE_IF: return RC_OPCODE_IF;
@@ -126,7 +126,7 @@ static unsigned translate_opcode(unsigned opcode)
/* case TGSI_OPCODE_SAD: return RC_OPCODE_SAD; */
/* case TGSI_OPCODE_TXF: return RC_OPCODE_TXF; */
/* case TGSI_OPCODE_TXQ: return RC_OPCODE_TXQ; */
- /* case TGSI_OPCODE_CONT: return RC_OPCODE_CONT; */
+ case TGSI_OPCODE_CONT: return RC_OPCODE_CONT;
/* case TGSI_OPCODE_EMIT: return RC_OPCODE_EMIT; */
/* case TGSI_OPCODE_ENDPRIM: return RC_OPCODE_ENDPRIM; */
/* case TGSI_OPCODE_BGNLOOP2: return RC_OPCODE_BGNLOOP2; */
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index b25c786d6b..54c8de1241 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -207,7 +207,7 @@ void r300_translate_vertex_shader(struct r300_context *r300,
compiler.Base.max_temp_regs = 32;
if (compiler.Base.Debug) {
- debug_printf("r300: Initial vertex program\n");
+ DBG(r300, DBG_VP, "r300: Initial vertex program\n");
tgsi_dump(vs->state.tokens, 0);
}
@@ -227,8 +227,7 @@ void r300_translate_vertex_shader(struct r300_context *r300,
/* Invoke the compiler */
r3xx_compile_vertex_program(&compiler);
if (compiler.Base.Error) {
- /* XXX We should fallback using Draw. */
- fprintf(stderr, "r300 VP: Compiler error:\n%sUsing a dummy shader"
+ DBG(r300, DBG_VP, "r300 VP: Compiler error:\n%sUsing a dummy shader"
" instead.\nIf there's an 'unknown opcode' message, please"
" file a bug report and attach this log.\n", compiler.Base.ErrorMsg);
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index f1dc3dc3a9..9ea9d4354d 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -167,8 +167,7 @@ int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
struct r600_bc_alu *alu;
if (bc->cf_last == NULL) {
- R600_ERR("no last CF\n");
- return -EINVAL;
+ return 0;
}
if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
return 0;
@@ -179,12 +178,13 @@ int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
return -EINVAL;
}
alu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
- if (!alu->last || !alu->nliteral) {
+ if (!alu->last || !alu->nliteral || alu->literal_added) {
return 0;
}
memcpy(alu->value, value, 4 * 4);
bc->cf_last->ndw += alu->nliteral;
bc->ndw += alu->nliteral;
+ alu->literal_added = 1;
return 0;
}
@@ -287,7 +287,7 @@ static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsign
return 0;
}
-int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
+static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
{
unsigned i;
@@ -331,7 +331,7 @@ int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
return 0;
}
-int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
+static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
{
unsigned id = cf->id;
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 3fd94dbda0..10d98afaf0 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -48,6 +48,7 @@ struct r600_bc_alu {
unsigned last;
unsigned is_op3;
unsigned nliteral;
+ unsigned literal_added;
u32 value[4];
};
diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c
index ae1780a1d4..edde80c660 100644
--- a/src/gallium/drivers/r600/r600_context.c
+++ b/src/gallium/drivers/r600/r600_context.c
@@ -47,20 +47,25 @@ void r600_flush(struct pipe_context *ctx, unsigned flags,
struct r600_context *rctx = r600_context(ctx);
struct r600_screen *rscreen = rctx->screen;
static int dc = 0;
+ char dname[256];
if (radeon_ctx_pm4(rctx->ctx))
return;
/* FIXME dumping should be removed once shader support instructions
* without throwing bad code
*/
- if (!dc)
- radeon_ctx_dump_bof(rctx->ctx, "gallium.bof");
+ if (!rctx->ctx->cpm4)
+ goto out;
+ sprintf(dname, "gallium-%08d.bof", dc);
+ if (dc < 1)
+ radeon_ctx_dump_bof(rctx->ctx, dname);
#if 1
radeon_ctx_submit(rctx->ctx);
#endif
+ dc++;
+out:
rctx->ctx = radeon_ctx_decref(rctx->ctx);
rctx->ctx = radeon_ctx(rscreen->rw);
- dc++;
}
static void r600_init_config(struct r600_context *rctx)
@@ -202,24 +207,6 @@ static void r600_init_config(struct r600_context *rctx)
num_es_stack_entries = 0;
break;
}
- printf("ps_prio : %d\n", ps_prio);
- printf("vs_prio : %d\n", vs_prio);
- printf("gs_prio : %d\n", gs_prio);
- printf("es_prio : %d\n", es_prio);
- printf("num_ps_gprs : %d\n", num_ps_gprs);
- printf("num_vs_gprs : %d\n", num_vs_gprs);
- printf("num_gs_gprs : %d\n", num_gs_gprs);
- printf("num_es_gprs : %d\n", num_es_gprs);
- printf("num_temp_gprs : %d\n", num_temp_gprs);
- printf("num_ps_threads : %d\n", num_ps_threads);
- printf("num_vs_threads : %d\n", num_vs_threads);
- printf("num_gs_threads : %d\n", num_gs_threads);
- printf("num_es_threads : %d\n", num_es_threads);
- printf("num_ps_stack_entries : %d\n", num_ps_stack_entries);
- printf("num_vs_stack_entries : %d\n", num_vs_stack_entries);
- printf("num_gs_stack_entries : %d\n", num_gs_stack_entries);
- printf("num_es_stack_entries : %d\n", num_es_stack_entries);
-
rctx->hw_states.config = radeon_state(rctx->rw, R600_CONFIG_TYPE, R600_CONFIG);
rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] = 0x00000000;
diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h
index 431f8951b2..76d5de8653 100644
--- a/src/gallium/drivers/r600/r600_context.h
+++ b/src/gallium/drivers/r600/r600_context.h
@@ -94,7 +94,7 @@ struct r600_context_hw_states {
struct radeon_state *dsa;
struct radeon_state *blend;
struct radeon_state *viewport;
- struct radeon_state *cb[7];
+ struct radeon_state *cb[8];
struct radeon_state *config;
struct radeon_state *cb_cntl;
struct radeon_state *db;
@@ -175,4 +175,7 @@ extern int r600_pipe_shader_update(struct pipe_context *ctx,
#define R600_ERR(fmt, args...) \
fprintf(stderr, "EE %s/%s:%d - "fmt, __FILE__, __func__, __LINE__, ##args)
+uint32_t r600_translate_texformat(enum pipe_format format,
+ const unsigned char *swizzle_view,
+ uint32_t *word4_p, uint32_t *yuv_format_p);
#endif
diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c
index 2420b76318..f058455162 100644
--- a/src/gallium/drivers/r600/r600_draw.c
+++ b/src/gallium/drivers/r600/r600_draw.c
@@ -127,7 +127,7 @@ static int r600_draw_common(struct r600_draw *draw)
draw->draw->states[R600_DRAW__VGT_NUM_INDICES] = draw->count;
draw->draw->states[R600_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator;
if (draw->index_buffer) {
- rbuffer = (struct r600_buffer*)draw->index_buffer;
+ rbuffer = (struct r600_resource*)draw->index_buffer;
draw->draw->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
draw->draw->placement[0] = RADEON_GEM_DOMAIN_GTT;
draw->draw->placement[1] = RADEON_GEM_DOMAIN_GTT;
diff --git a/src/gallium/drivers/r600/r600_screen.c b/src/gallium/drivers/r600/r600_screen.c
index 4b87327a7c..cdaca9ed7d 100644
--- a/src/gallium/drivers/r600/r600_screen.c
+++ b/src/gallium/drivers/r600/r600_screen.c
@@ -53,59 +53,100 @@ static const char* r600_get_name(struct pipe_screen* pscreen)
static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
{
switch (param) {
- case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
- case PIPE_CAP_MAX_COMBINED_SAMPLERS:
- return 16;
+ /* Supported features (boolean caps). */
case PIPE_CAP_NPOT_TEXTURES:
- return 1;
case PIPE_CAP_TWO_SIDED_STENCIL:
- return 1;
case PIPE_CAP_GLSL:
- return 1;
case PIPE_CAP_DUAL_SOURCE_BLEND:
- return 1;
case PIPE_CAP_ANISOTROPIC_FILTER:
- return 1;
case PIPE_CAP_POINT_SPRITE:
- return 1;
- case PIPE_CAP_MAX_RENDER_TARGETS:
- /* FIXME some r6xx are buggy and can only do 4 */
- return 8;
case PIPE_CAP_OCCLUSION_QUERY:
- return 1;
case PIPE_CAP_TEXTURE_SHADOW_MAP:
- return 1;
- case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
- case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
- case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
- /* FIXME not sure here */
- return 13;
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
- return 1;
case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
- return 1;
- case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
- /* FIXME allow this once infrastructure is there */
- return 0;
- case PIPE_CAP_TGSI_CONT_SUPPORTED:
- return 0;
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
- return 1;
case PIPE_CAP_SM3:
- return 1;
+ case PIPE_CAP_TEXTURE_SWIZZLE:
case PIPE_CAP_INDEP_BLEND_ENABLE:
- return 1;
- case PIPE_CAP_INDEP_BLEND_FUNC:
- /* FIXME allow this */
- return 0;
case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
return 1;
+
+ /* Unsupported features (boolean caps). */
+ case PIPE_CAP_TIMER_QUERY:
+ case PIPE_CAP_TGSI_CONT_SUPPORTED:
+ case PIPE_CAP_STREAM_OUTPUT:
+ case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */
+ case PIPE_CAP_GEOMETRY_SHADER4:
+ case PIPE_CAP_DEPTH_CLAMP: /* FIXME allow this */
+ return 0;
+
+ /* Texturing. */
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 14;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ /* FIXME allow this once infrastructure is there */
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+ return 16;
+
+ /* Render targets. */
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ /* FIXME some r6xx are buggy and can only do 4 */
+ return 8;
+
+ /* Fragment coordinate conventions. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
return 1;
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
return 0;
+
+ /* Shader limits. */
+ case PIPE_CAP_MAX_VS_INSTRUCTIONS:
+ return 16384; //max native instructions, not greater than max instructions
+ case PIPE_CAP_MAX_VS_ALU_INSTRUCTIONS:
+ case PIPE_CAP_MAX_VS_TEX_INSTRUCTIONS:
+ case PIPE_CAP_MAX_VS_TEX_INDIRECTIONS:
+ return 16384;
+ case PIPE_CAP_MAX_FS_INSTRUCTIONS:
+ return 16384; //max program native instructions
+ case PIPE_CAP_MAX_FS_ALU_INSTRUCTIONS:
+ return 16384; //max program native ALU instructions
+ case PIPE_CAP_MAX_FS_TEX_INSTRUCTIONS:
+ return 16384; //max program native texture instructions
+ case PIPE_CAP_MAX_FS_TEX_INDIRECTIONS:
+ return 2048; //max program native texture indirections
+ case PIPE_CAP_MAX_VS_CONTROL_FLOW_DEPTH:
+ case PIPE_CAP_MAX_FS_CONTROL_FLOW_DEPTH:
+ return 8; /* FIXME */
+ case PIPE_CAP_MAX_VS_INPUTS:
+ return 16; //max native attributes
+ case PIPE_CAP_MAX_FS_INPUTS:
+ return 10; //max native attributes
+ case PIPE_CAP_MAX_VS_TEMPS:
+ return 256; //max native temporaries
+ case PIPE_CAP_MAX_FS_TEMPS:
+ return 256; //max native temporaries
+ case PIPE_CAP_MAX_VS_ADDRS:
+ case PIPE_CAP_MAX_FS_ADDRS:
+ return 1; //max native address registers/* FIXME Isn't this equal to TEMPS? */
+ case PIPE_CAP_MAX_VS_CONSTS:
+ return 256; //max native parameters
+ case PIPE_CAP_MAX_FS_CONSTS:
+ return 256; //max program native parameters
+ case PIPE_CAP_MAX_CONST_BUFFERS:
+ return 1;
+ case PIPE_CAP_MAX_CONST_BUFFER_SIZE: /* in bytes */
+ return 4096;
+ case PIPE_CAP_MAX_PREDICATE_REGISTERS:
+ case PIPE_CAP_MAX_VS_PREDS:
+ case PIPE_CAP_MAX_FS_PREDS:
+ return 0; /* FIXME */
+
default:
R600_ERR("r600: unknown param %d\n", param);
return 0;
diff --git a/src/gallium/drivers/r600/r600_screen.h b/src/gallium/drivers/r600/r600_screen.h
index 9a452ecfe3..53b560c617 100644
--- a/src/gallium/drivers/r600/r600_screen.h
+++ b/src/gallium/drivers/r600/r600_screen.h
@@ -80,4 +80,6 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx,
int r600_conv_pipe_format(unsigned pformat, unsigned *format);
int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
+void r600_init_screen_texture_functions(struct pipe_screen *screen);
+
#endif
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index dc8d4cb315..956c7e7930 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -155,11 +155,14 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta
static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader)
{
+ const struct pipe_rasterizer_state *rasterizer;
struct r600_screen *rscreen = r600_screen(ctx->screen);
struct r600_shader *rshader = &rpshader->shader;
+ struct r600_context *rctx = r600_context(ctx);
struct radeon_state *state;
unsigned i, tmp, exports_ps, num_cout;
+ rasterizer = &rctx->rasterizer->state.rasterizer;
rpshader->rstate = radeon_state_decref(rpshader->rstate);
state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER);
if (state == NULL)
@@ -171,6 +174,9 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta
rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) {
tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
}
+ if (rasterizer->sprite_coord_enable & (1 << i)) {
+ tmp |= S_028644_PT_SPRITE_TEX(1);
+ }
state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp;
}
@@ -339,7 +345,8 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
{
struct tgsi_full_immediate *immediate;
struct r600_shader_ctx ctx;
- struct r600_bc_output output;
+ struct r600_bc_output output[32];
+ unsigned output_done, noutput;
unsigned opcode;
int i, r = 0, pos0;
@@ -417,34 +424,41 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
}
}
/* export output */
- for (i = 0, pos0 = 0; i < shader->noutput; i++) {
- memset(&output, 0, sizeof(struct r600_bc_output));
- output.gpr = shader->output[i].gpr;
- output.elem_size = 3;
- output.swizzle_x = 0;
- output.swizzle_y = 1;
- output.swizzle_z = 2;
- output.swizzle_w = 3;
- output.barrier = 1;
- output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
- output.array_base = i - pos0;
- output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE;
- switch (ctx.type == TGSI_PROCESSOR_VERTEX) {
+ noutput = shader->noutput;
+ for (i = 0, pos0 = 0; i < noutput; i++) {
+ memset(&output[i], 0, sizeof(struct r600_bc_output));
+ output[i].gpr = shader->output[i].gpr;
+ output[i].elem_size = 3;
+ output[i].swizzle_x = 0;
+ output[i].swizzle_y = 1;
+ output[i].swizzle_z = 2;
+ output[i].swizzle_w = 3;
+ output[i].barrier = 1;
+ output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+ output[i].array_base = i - pos0;
+ output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
+ switch (ctx.type) {
case TGSI_PROCESSOR_VERTEX:
if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
- output.array_base = 60;
- output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ output[i].array_base = 60;
+ output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
/* position doesn't count in array_base */
- pos0 = 1;
+ pos0++;
+ }
+ if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
+ output[i].array_base = 61;
+ output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ /* position doesn't count in array_base */
+ pos0++;
}
break;
case TGSI_PROCESSOR_FRAGMENT:
if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
- output.array_base = 0;
- output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+ output[i].array_base = shader->output[i].sid;
+ output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
- output.array_base = 61;
- output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+ output[i].array_base = 61;
+ output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
} else {
R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
r = -EINVAL;
@@ -456,10 +470,58 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
r = -EINVAL;
goto out_err;
}
- if (i == (shader->noutput - 1)) {
- output.end_of_program = 1;
+ }
+ /* add fake param output for vertex shader if no param is exported */
+ if (ctx.type == TGSI_PROCESSOR_VERTEX) {
+ for (i = 0, pos0 = 0; i < noutput; i++) {
+ if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
+ pos0 = 1;
+ break;
+ }
+ }
+ if (!pos0) {
+ memset(&output[i], 0, sizeof(struct r600_bc_output));
+ output[i].gpr = 0;
+ output[i].elem_size = 3;
+ output[i].swizzle_x = 0;
+ output[i].swizzle_y = 1;
+ output[i].swizzle_z = 2;
+ output[i].swizzle_w = 3;
+ output[i].barrier = 1;
+ output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+ output[i].array_base = 0;
+ output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
+ noutput++;
}
- r = r600_bc_add_output(ctx.bc, &output);
+ }
+ /* add fake pixel export */
+ if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
+ memset(&output[0], 0, sizeof(struct r600_bc_output));
+ output[0].gpr = 0;
+ output[0].elem_size = 3;
+ output[0].swizzle_x = 7;
+ output[0].swizzle_y = 7;
+ output[0].swizzle_z = 7;
+ output[0].swizzle_w = 7;
+ output[0].barrier = 1;
+ output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+ output[0].array_base = 0;
+ output[0].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
+ noutput++;
+ }
+ /* set export done on last export of each type */
+ for (i = noutput - 1, output_done = 0; i >= 0; i--) {
+ if (i == (noutput - 1)) {
+ output[i].end_of_program = 1;
+ }
+ if (!(output_done & (1 << output[i].type))) {
+ output_done |= (1 << output[i].type);
+ output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE;
+ }
+ }
+ /* add output to bytecode */
+ for (i = 0; i < noutput; i++) {
+ r = r600_bc_add_output(ctx.bc, &output[i]);
if (r)
goto out_err;
}
@@ -490,6 +552,7 @@ static int tgsi_src(struct r600_shader_ctx *ctx,
if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
r600_src->sel = 0;
}
+ r600_src->neg = tgsi_src->Register.Negate;
r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
return 0;
}
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index ee0381e8bd..2ee7780ead 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -28,6 +28,7 @@
struct r600_shader_io {
unsigned name;
unsigned gpr;
+ unsigned done;
int sid;
unsigned interpolate;
};
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index deb9bf3395..3efd409ae0 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -379,6 +379,8 @@ static void r600_set_scissor_state(struct pipe_context *ctx,
rstate = r600_context_state(rctx, pipe_scissor_type, state);
r600_bind_state(ctx, rstate);
+ /* refcount is taken care of this */
+ r600_delete_state(ctx, rstate);
}
static void r600_set_stencil_ref(struct pipe_context *ctx,
@@ -389,6 +391,8 @@ static void r600_set_stencil_ref(struct pipe_context *ctx,
rstate = r600_context_state(rctx, pipe_stencil_ref_type, state);
r600_bind_state(ctx, rstate);
+ /* refcount is taken care of this */
+ r600_delete_state(ctx, rstate);
}
static void r600_set_vertex_buffers(struct pipe_context *ctx,
@@ -433,6 +437,7 @@ static void r600_set_viewport_state(struct pipe_context *ctx,
rstate = r600_context_state(rctx, pipe_viewport_type, state);
r600_bind_state(ctx, rstate);
+ r600_delete_state(ctx, rstate);
}
void r600_init_state_functions(struct r600_context *rctx)
@@ -675,9 +680,8 @@ static struct radeon_state *r600_cb(struct r600_context *rctx, int cb)
unsigned color_info;
unsigned format, swap, ntype;
const struct util_format_description *desc;
- int id = R600_CB0 + cb;
- rstate = radeon_state(rscreen->rw, R600_CB0_TYPE, id);
+ rstate = radeon_state(rscreen->rw, R600_CB0_TYPE + cb, R600_CB0 + cb);
if (rstate == NULL)
return NULL;
rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
@@ -728,7 +732,7 @@ static struct radeon_state *r600_db(struct r600_context *rctx)
struct r600_resource *rbuffer;
struct radeon_state *rstate;
const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer;
- unsigned level = state->cbufs[0]->level;
+ unsigned level;
unsigned pitch, slice, format;
if (state->zsbuf == NULL)
@@ -770,7 +774,8 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx)
float offset_units = 0, offset_scale = 0;
char depth = 0;
unsigned offset_db_fmt_cntl = 0;
-
+ unsigned tmp;
+ unsigned prov_vtx = 1;
if (fb->zsbuf) {
offset_units = state->offset_units;
offset_scale = state->offset_scale * 12.0f;
@@ -796,23 +801,43 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx)
}
offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth);
+ if (state->flatshade_first)
+ prov_vtx = 0;
+
rctx->flat_shade = state->flatshade;
rstate = radeon_state(rscreen->rw, R600_RASTERIZER_TYPE, R600_RASTERIZER);
if (rstate == NULL)
return NULL;
rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000001;
+ if (state->sprite_coord_enable) {
+ rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] |=
+ S_0286D4_PNT_SPRITE_ENA(1) |
+ S_0286D4_PNT_SPRITE_OVRD_X(2) |
+ S_0286D4_PNT_SPRITE_OVRD_Y(3) |
+ S_0286D4_PNT_SPRITE_OVRD_Z(0) |
+ S_0286D4_PNT_SPRITE_OVRD_W(1);
+ if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) {
+ rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] |=
+ S_0286D4_PNT_SPRITE_TOP_1(1);
+ }
+ }
rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0x00000000;
- rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = 0x00080000 |
- S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
- S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
- S_028814_FACE(!state->front_ccw) |
- S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) |
- S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) |
- S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri);
- rstate->states[R600_RASTERIZER__PA_CL_VS_OUT_CNTL] = 0x00000000;
+ rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] =
+ S_028814_PROVOKING_VTX_LAST(prov_vtx) |
+ S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
+ S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
+ S_028814_FACE(!state->front_ccw) |
+ S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) |
+ S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) |
+ S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri);
+ rstate->states[R600_RASTERIZER__PA_CL_VS_OUT_CNTL] =
+ S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) |
+ S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex);
rstate->states[R600_RASTERIZER__PA_CL_NANINF_CNTL] = 0x00000000;
- rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] = 0x00080008;
- rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] = 0x00000000;
+ /* point size 12.4 fixed point */
+ tmp = (unsigned)(state->point_size * 8.0 / 2.0);
+ rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] = S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp);
+ rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] = 0x80000000;
rstate->states[R600_RASTERIZER__PA_SU_LINE_CNTL] = 0x00000008;
rstate->states[R600_RASTERIZER__PA_SC_LINE_STIPPLE] = 0x00000005;
rstate->states[R600_RASTERIZER__PA_SC_MPASS_PS_CNTL] = 0x00000000;
@@ -837,12 +862,25 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx)
static struct radeon_state *r600_scissor(struct r600_context *rctx)
{
const struct pipe_scissor_state *state = &rctx->scissor->state.scissor;
+ const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer;
struct r600_screen *rscreen = rctx->screen;
struct radeon_state *rstate;
+ unsigned minx, maxx, miny, maxy;
u32 tl, br;
- tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1);
- br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy);
+ if (state == NULL) {
+ minx = 0;
+ miny = 0;
+ maxx = fb->cbufs[0]->width;
+ maxy = fb->cbufs[0]->height;
+ } else {
+ minx = state->minx;
+ miny = state->miny;
+ maxx = state->maxx;
+ maxy = state->maxy;
+ }
+ tl = S_028240_TL_X(minx) | S_028240_TL_Y(miny) | S_028240_WINDOW_OFFSET_DISABLE(1);
+ br = S_028244_BR_X(maxx) | S_028244_BR_Y(maxy);
rstate = radeon_state(rscreen->rw, R600_SCISSOR_TYPE, R600_SCISSOR);
if (rstate == NULL)
return NULL;
@@ -1140,8 +1178,16 @@ static struct radeon_state *r600_resource(struct r600_context *rctx,
struct r600_resource *rbuffer;
struct radeon_state *rstate;
unsigned format;
-
- format = r600_translate_colorformat(view->texture->format);
+ uint32_t word4 = 0, yuv_format = 0;
+ unsigned char swizzle[4];
+
+ swizzle[0] = view->swizzle_r;
+ swizzle[1] = view->swizzle_g;
+ swizzle[2] = view->swizzle_b;
+ swizzle[3] = view->swizzle_a;
+ format = r600_translate_texformat(view->texture->format,
+ swizzle,
+ &word4, &yuv_format);
if (format == ~0)
return NULL;
desc = util_format_description(view->texture->format);
@@ -1175,18 +1221,10 @@ static struct radeon_state *r600_resource(struct r600_context *rctx,
rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = 0;
rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = tmp->offset[1] >> 8;
rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD4] =
- S_038010_FORMAT_COMP_X(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) |
- S_038010_FORMAT_COMP_Y(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) |
- S_038010_FORMAT_COMP_Z(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) |
- S_038010_FORMAT_COMP_W(r600_format_type(UTIL_FORMAT_TYPE_UNSIGNED)) |
+ word4 |
S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_NORM) |
S_038010_SRF_MODE_ALL(V_038010_SFR_MODE_NO_ZERO) |
S_038010_REQUEST_SIZE(1) |
- S_038010_DST_SEL_X(r600_tex_swizzle(view->swizzle_b)) |
- S_038010_DST_SEL_Y(r600_tex_swizzle(view->swizzle_g)) |
- S_038010_DST_SEL_Z(r600_tex_swizzle(view->swizzle_r)) |
- S_038010_DST_SEL_W(r600_tex_swizzle(view->swizzle_a)) |
- S_038010_FORCE_DEGAMMA(desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ? 1 : 0) |
S_038010_BASE_LEVEL(view->first_level);
rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD5] =
S_038014_LAST_LEVEL(view->last_level) |
@@ -1206,7 +1244,7 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx)
struct r600_screen *rscreen = rctx->screen;
struct radeon_state *rstate;
const struct pipe_blend_state *pbs = &rctx->blend->state.blend;
- int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs;
+ int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs;
uint32_t color_control, target_mask, shader_mask;
int i;
@@ -1215,20 +1253,29 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx)
color_control = S_028808_PER_MRT_BLEND(1);
for (i = 0; i < nr_cbufs; i++) {
- shader_mask |= 0xf << i;
+ shader_mask |= 0xf << (i * 4);
}
if (pbs->logicop_enable) {
color_control |= (pbs->logicop_func) << 16;
- } else
+ } else {
color_control |= (0xcc << 16);
+ }
- for (i = 0; i < 8; i++) {
- if (pbs->rt[i].blend_enable) {
- color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i);
+ if (pbs->independent_blend_enable) {
+ for (i = 0; i < nr_cbufs; i++) {
+ if (pbs->rt[i].blend_enable) {
+ color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i);
+ }
+ target_mask |= (pbs->rt[i].colormask << (4 * i));
+ }
+ } else {
+ for (i = 0; i < nr_cbufs; i++) {
+ if (pbs->rt[0].blend_enable) {
+ color_control |= S_028808_TARGET_BLEND_ENABLE(1 << i);
+ }
+ target_mask |= (pbs->rt[0].colormask << (4 * i));
}
- target_mask |= (pbs->rt[i].colormask << (4 * i));
-
}
rstate = radeon_state(rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL);
rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = shader_mask;
diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h
index 8271ad19fb..f93c20da35 100644
--- a/src/gallium/drivers/r600/r600_state_inlines.h
+++ b/src/gallium/drivers/r600/r600_state_inlines.h
@@ -110,7 +110,7 @@ static INLINE uint32_t r600_translate_stencil_op(int s_op)
case PIPE_STENCIL_OP_DECR:
return V_028800_STENCIL_DECR;
case PIPE_STENCIL_OP_INCR_WRAP:
- return V_028800_STENCIL_INVERT;
+ return V_028800_STENCIL_INCR_WRAP;
case PIPE_STENCIL_OP_DECR_WRAP:
return V_028800_STENCIL_DECR_WRAP;
case PIPE_STENCIL_OP_INVERT:
@@ -289,7 +289,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
static INLINE boolean r600_is_sampler_format_supported(enum pipe_format format)
{
- return r600_translate_colorformat(format) != ~0;
+ return r600_translate_texformat(format, NULL, NULL, NULL) != ~0;
}
static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format)
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 1bce911306..30d79ebdd6 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -33,6 +33,7 @@
#include "r600_screen.h"
#include "r600_context.h"
#include "r600_resource.h"
+#include "r600d.h"
extern struct u_resource_vtbl r600_texture_vtbl;
@@ -277,3 +278,250 @@ void r600_init_screen_texture_functions(struct pipe_screen *screen)
screen->get_tex_surface = r600_get_tex_surface;
screen->tex_surface_destroy = r600_tex_surface_destroy;
}
+
+static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
+ const unsigned char *swizzle_view)
+{
+ unsigned i;
+ unsigned char swizzle[4];
+ unsigned result = 0;
+ const uint32_t swizzle_shift[4] = {
+ 16, 19, 22, 25,
+ };
+ const uint32_t swizzle_bit[4] = {
+ 0, 1, 2, 3,
+ };
+
+ if (swizzle_view) {
+ /* Combine two sets of swizzles. */
+ for (i = 0; i < 4; i++) {
+ swizzle[i] = swizzle_view[i] <= UTIL_FORMAT_SWIZZLE_W ?
+ swizzle_format[swizzle_view[i]] : swizzle_view[i];
+ }
+ } else {
+ memcpy(swizzle, swizzle_format, 4);
+ }
+
+ /* Get swizzle. */
+ for (i = 0; i < 4; i++) {
+ switch (swizzle[i]) {
+ case UTIL_FORMAT_SWIZZLE_Y:
+ result |= swizzle_bit[1] << swizzle_shift[i];
+ break;
+ case UTIL_FORMAT_SWIZZLE_Z:
+ result |= swizzle_bit[2] << swizzle_shift[i];
+ break;
+ case UTIL_FORMAT_SWIZZLE_W:
+ result |= swizzle_bit[3] << swizzle_shift[i];
+ break;
+ case UTIL_FORMAT_SWIZZLE_0:
+ result |= V_038010_SQ_SEL_0 << swizzle_shift[i];
+ break;
+ case UTIL_FORMAT_SWIZZLE_1:
+ result |= V_038010_SQ_SEL_1 << swizzle_shift[i];
+ break;
+ default: /* UTIL_FORMAT_SWIZZLE_X */
+ result |= swizzle_bit[0] << swizzle_shift[i];
+ }
+ }
+ return result;
+}
+
+/* texture format translate */
+uint32_t r600_translate_texformat(enum pipe_format format,
+ const unsigned char *swizzle_view,
+ uint32_t *word4_p, uint32_t *yuv_format_p)
+{
+ uint32_t result = 0, word4 = 0, yuv_format = 0;
+ const struct util_format_description *desc;
+ boolean uniform = TRUE;
+ int i;
+ const uint32_t sign_bit[4] = {
+ S_038010_FORMAT_COMP_X(V_038010_SQ_FORMAT_COMP_SIGNED),
+ S_038010_FORMAT_COMP_Y(V_038010_SQ_FORMAT_COMP_SIGNED),
+ S_038010_FORMAT_COMP_Z(V_038010_SQ_FORMAT_COMP_SIGNED),
+ S_038010_FORMAT_COMP_W(V_038010_SQ_FORMAT_COMP_SIGNED)
+ };
+ desc = util_format_description(format);
+
+ /* Colorspace (return non-RGB formats directly). */
+ switch (desc->colorspace) {
+ /* Depth stencil formats */
+ case UTIL_FORMAT_COLORSPACE_ZS:
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ result = V_028010_DEPTH_16;
+ goto out_word4;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ result = V_028010_DEPTH_X8_24;
+ goto out_word4;
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+ result = V_028010_DEPTH_8_24;
+ goto out_word4;
+ default:
+ goto out_unknown;
+ }
+
+ case UTIL_FORMAT_COLORSPACE_YUV:
+ yuv_format |= (1 << 30);
+ switch (format) {
+ case PIPE_FORMAT_UYVY:
+ case PIPE_FORMAT_YUYV:
+ default:
+ break;
+ }
+ goto out_unknown; /* TODO */
+
+ case UTIL_FORMAT_COLORSPACE_SRGB:
+ word4 |= S_038010_FORCE_DEGAMMA(1);
+ if (format == PIPE_FORMAT_L8A8_SRGB || format == PIPE_FORMAT_L8_SRGB)
+ goto out_unknown; /* fails for some reason - TODO */
+ break;
+
+ default:
+ break;
+ }
+
+ word4 |= r600_get_swizzle_combined(desc->swizzle, swizzle_view);
+
+ /* S3TC formats. TODO */
+ if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
+ goto out_unknown;
+ }
+
+
+ for (i = 0; i < desc->nr_channels; i++) {
+ if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
+ word4 |= sign_bit[i];
+ }
+ }
+
+ /* R8G8Bx_SNORM - TODO CxV8U8 */
+
+ /* RGTC - TODO */
+
+ /* See whether the components are of the same size. */
+ for (i = 1; i < desc->nr_channels; i++) {
+ uniform = uniform && desc->channel[0].size == desc->channel[i].size;
+ }
+
+ /* Non-uniform formats. */
+ if (!uniform) {
+ switch(desc->nr_channels) {
+ case 3:
+ if (desc->channel[0].size == 5 &&
+ desc->channel[1].size == 6 &&
+ desc->channel[2].size == 5) {
+ result |= V_0280A0_COLOR_5_6_5;
+ goto out_word4;
+ }
+ goto out_unknown;
+ case 4:
+ if (desc->channel[0].size == 5 &&
+ desc->channel[1].size == 5 &&
+ desc->channel[2].size == 5 &&
+ desc->channel[3].size == 1) {
+ result |= V_0280A0_COLOR_1_5_5_5;
+ goto out_word4;
+ }
+ if (desc->channel[0].size == 10 &&
+ desc->channel[1].size == 10 &&
+ desc->channel[2].size == 10 &&
+ desc->channel[3].size == 2) {
+ result |= V_0280A0_COLOR_10_10_10_2;
+ goto out_word4;
+ }
+ goto out_unknown;
+ }
+ goto out_unknown;
+ }
+
+ /* uniform formats */
+ switch (desc->channel[0].type) {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ case UTIL_FORMAT_TYPE_SIGNED:
+ if (!desc->channel[0].normalized &&
+ desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) {
+ goto out_unknown;
+ }
+
+ switch (desc->channel[0].size) {
+ case 4:
+ switch (desc->nr_channels) {
+ case 2:
+ result |= V_0280A0_COLOR_4_4;
+ goto out_word4;
+ case 4:
+ result |= V_0280A0_COLOR_4_4_4_4;
+ goto out_word4;
+ }
+ goto out_unknown;
+ case 8:
+ switch (desc->nr_channels) {
+ case 1:
+ result |= V_0280A0_COLOR_8;
+ goto out_word4;
+ case 2:
+ result |= V_0280A0_COLOR_8_8;
+ goto out_word4;
+ case 4:
+ result |= V_0280A0_COLOR_8_8_8_8;
+ goto out_word4;
+ }
+ goto out_unknown;
+ case 16:
+ switch (desc->nr_channels) {
+ case 1:
+ result |= V_0280A0_COLOR_16;
+ goto out_word4;
+ case 2:
+ result |= V_0280A0_COLOR_16_16;
+ goto out_word4;
+ case 4:
+ result |= V_0280A0_COLOR_16_16_16_16;
+ goto out_word4;
+ }
+ }
+ goto out_unknown;
+
+ case UTIL_FORMAT_TYPE_FLOAT:
+ switch (desc->channel[0].size) {
+ case 16:
+ switch (desc->nr_channels) {
+ case 1:
+ result |= V_0280A0_COLOR_16_FLOAT;
+ goto out_word4;
+ case 2:
+ result |= V_0280A0_COLOR_16_16_FLOAT;
+ goto out_word4;
+ case 4:
+ result |= V_0280A0_COLOR_16_16_16_16_FLOAT;
+ goto out_word4;
+ }
+ goto out_unknown;
+ case 32:
+ switch (desc->nr_channels) {
+ case 1:
+ result |= V_0280A0_COLOR_32_FLOAT;
+ goto out_word4;
+ case 2:
+ result |= V_0280A0_COLOR_32_32_FLOAT;
+ goto out_word4;
+ case 4:
+ result |= V_0280A0_COLOR_32_32_32_32_FLOAT;
+ goto out_word4;
+ }
+ }
+
+ }
+out_word4:
+ if (word4_p)
+ *word4_p = word4;
+ if (yuv_format_p)
+ *yuv_format_p = yuv_format;
+// fprintf(stderr,"returning %08x %08x %08x\n", result, word4, yuv_format);
+ return result;
+out_unknown:
+// R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format));
+ return ~0;
+}
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index fb71b1e5d1..53388f822e 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -352,6 +352,61 @@
#define S_028808_ROP3(x) (((x) & 0xFF) << 16)
#define G_028808_ROP3(x) (((x) >> 16) & 0xFF)
#define C_028808_ROP3 0xFF00FFFF
+#define R_028810_PA_CL_CLIP_CNTL 0x028810
+#define S_028810_UCP_ENA_0(x) (((x) & 0x1) << 0)
+#define G_028810_UCP_ENA_0(x) (((x) >> 0) & 0x1)
+#define C_028810_UCP_ENA_0 0xFFFFFFFE
+#define S_028810_UCP_ENA_1(x) (((x) & 0x1) << 1)
+#define G_028810_UCP_ENA_1(x) (((x) >> 1) & 0x1)
+#define C_028810_UCP_ENA_1 0xFFFFFFFD
+#define S_028810_UCP_ENA_2(x) (((x) & 0x1) << 2)
+#define G_028810_UCP_ENA_2(x) (((x) >> 2) & 0x1)
+#define C_028810_UCP_ENA_2 0xFFFFFFFB
+#define S_028810_UCP_ENA_3(x) (((x) & 0x1) << 3)
+#define G_028810_UCP_ENA_3(x) (((x) >> 3) & 0x1)
+#define C_028810_UCP_ENA_3 0xFFFFFFF7
+#define S_028810_UCP_ENA_4(x) (((x) & 0x1) << 4)
+#define G_028810_UCP_ENA_4(x) (((x) >> 4) & 0x1)
+#define C_028810_UCP_ENA_4 0xFFFFFFEF
+#define S_028810_UCP_ENA_5(x) (((x) & 0x1) << 5)
+#define G_028810_UCP_ENA_5(x) (((x) >> 5) & 0x1)
+#define C_028810_UCP_ENA_5 0xFFFFFFDF
+#define S_028810_PS_UCP_Y_SCALE_NEG(x) (((x) & 0x1) << 13)
+#define G_028810_PS_UCP_Y_SCALE_NEG(x) (((x) >> 13) & 0x1)
+#define C_028810_PS_UCP_Y_SCALE_NEG 0xFFFFDFFF
+#define S_028810_PS_UCP_MODE(x) (((x) & 0x3) << 14)
+#define G_028810_PS_UCP_MODE(x) (((x) >> 14) & 0x3)
+#define C_028810_PS_UCP_MODE 0xFFFF3FFF
+#define S_028810_CLIP_DISABLE(x) (((x) & 0x1) << 16)
+#define G_028810_CLIP_DISABLE(x) (((x) >> 16) & 0x1)
+#define C_028810_CLIP_DISABLE 0xFFFEFFFF
+#define S_028810_UCP_CULL_ONLY_ENA(x) (((x) & 0x1) << 17)
+#define G_028810_UCP_CULL_ONLY_ENA(x) (((x) >> 17) & 0x1)
+#define C_028810_UCP_CULL_ONLY_ENA 0xFFFDFFFF
+#define S_028810_BOUNDARY_EDGE_FLAG_ENA(x) (((x) & 0x1) << 18)
+#define G_028810_BOUNDARY_EDGE_FLAG_ENA(x) (((x) >> 18) & 0x1)
+#define C_028810_BOUNDARY_EDGE_FLAG_ENA 0xFFFBFFFF
+#define S_028810_DX_CLIP_SPACE_DEF(x) (((x) & 0x1) << 19)
+#define G_028810_DX_CLIP_SPACE_DEF(x) (((x) >> 19) & 0x1)
+#define C_028810_DX_CLIP_SPACE_DEF 0xFFF7FFFF
+#define S_028810_DIS_CLIP_ERR_DETECT(x) (((x) & 0x1) << 20)
+#define G_028810_DIS_CLIP_ERR_DETECT(x) (((x) >> 20) & 0x1)
+#define C_028810_DIS_CLIP_ERR_DETECT 0xFFEFFFFF
+#define S_028810_VTX_KILL_OR(x) (((x) & 0x1) << 21)
+#define G_028810_VTX_KILL_OR(x) (((x) >> 21) & 0x1)
+#define C_028810_VTX_KILL_OR 0xFFDFFFFF
+#define S_028810_DX_LINEAR_ATTR_CLIP_ENA(x) (((x) & 0x1) << 24)
+#define G_028810_DX_LINEAR_ATTR_CLIP_ENA(x) (((x) >> 24) & 0x1)
+#define C_028810_DX_LINEAR_ATTR_CLIP_ENA 0xFEFFFFFF
+#define S_028810_VTE_VPORT_PROVOKE_DISABLE(x) (((x) & 0x1) << 25)
+#define G_028810_VTE_VPORT_PROVOKE_DISABLE(x) (((x) >> 25) & 0x1)
+#define C_028810_VTE_VPORT_PROVOKE_DISABLE 0xFDFFFFFF
+#define S_028810_ZCLIP_NEAR_DISABLE(x) (((x) & 0x1) << 26)
+#define G_028810_ZCLIP_NEAR_DISABLE(x) (((x) >> 26) & 0x1)
+#define C_028810_ZCLIP_NEAR_DISABLE 0xFBFFFFFF
+#define S_028810_ZCLIP_FAR_DISABLE(x) (((x) & 0x1) << 27)
+#define G_028810_ZCLIP_FAR_DISABLE(x) (((x) >> 27) & 0x1)
+#define C_028810_ZCLIP_FAR_DISABLE 0xF7FFFFFF
#define R_028010_DB_DEPTH_INFO 0x028010
#define S_028010_FORMAT(x) (((x) & 0x7) << 0)
#define G_028010_FORMAT(x) (((x) >> 0) & 0x7)
@@ -599,6 +654,13 @@
#define S_028E0C_OFFSET(x) (((x) & 0xFFFFFFFF) << 0)
#define G_028E0C_OFFSET(x) (((x) >> 0) & 0xFFFFFFFF)
#define C_028E0C_OFFSET 0x00000000
+#define R_028A00_PA_SU_POINT_SIZE 0x028A00
+#define S_028A00_HEIGHT(x) (((x) & 0xFFFF) << 0)
+#define G_028A00_HEIGHT(x) (((x) >> 0) & 0xFFFF)
+#define C_028A00_HEIGHT 0xFFFF0000
+#define S_028A00_WIDTH(x) (((x) & 0xFFFF) << 16)
+#define G_028A00_WIDTH(x) (((x) >> 16) & 0xFFFF)
+#define C_028A00_WIDTH 0x0000FFFF
#define R_028A40_VGT_GS_MODE 0x028A40
#define S_028A40_MODE(x) (((x) & 0x3) << 0)
#define G_028A40_MODE(x) (((x) >> 0) & 0x3)
@@ -1098,6 +1160,79 @@
#define V_008958_DI_PT_2D_FILL_RECT_LIST 0x0000001A
#define V_008958_DI_PT_2D_LINE_STRIP 0x0000001B
#define V_008958_DI_PT_2D_TRI_STRIP 0x0000001C
+#define R_02881C_PA_CL_VS_OUT_CNTL 0x02881C
+#define S_02881C_CLIP_DIST_ENA_0(x) (((x) & 0x1) << 0)
+#define G_02881C_CLIP_DIST_ENA_0(x) (((x) >> 0) & 0x1)
+#define C_02881C_CLIP_DIST_ENA_0 0xFFFFFFFE
+#define S_02881C_CLIP_DIST_ENA_1(x) (((x) & 0x1) << 1)
+#define G_02881C_CLIP_DIST_ENA_1(x) (((x) >> 1) & 0x1)
+#define C_02881C_CLIP_DIST_ENA_1 0xFFFFFFFD
+#define S_02881C_CLIP_DIST_ENA_2(x) (((x) & 0x1) << 2)
+#define G_02881C_CLIP_DIST_ENA_2(x) (((x) >> 2) & 0x1)
+#define C_02881C_CLIP_DIST_ENA_2 0xFFFFFFFB
+#define S_02881C_CLIP_DIST_ENA_3(x) (((x) & 0x1) << 3)
+#define G_02881C_CLIP_DIST_ENA_3(x) (((x) >> 3) & 0x1)
+#define C_02881C_CLIP_DIST_ENA_3 0xFFFFFFF7
+#define S_02881C_CLIP_DIST_ENA_4(x) (((x) & 0x1) << 4)
+#define G_02881C_CLIP_DIST_ENA_4(x) (((x) >> 4) & 0x1)
+#define C_02881C_CLIP_DIST_ENA_4 0xFFFFFFEF
+#define S_02881C_CLIP_DIST_ENA_5(x) (((x) & 0x1) << 5)
+#define G_02881C_CLIP_DIST_ENA_5(x) (((x) >> 5) & 0x1)
+#define C_02881C_CLIP_DIST_ENA_5 0xFFFFFFDF
+#define S_02881C_CLIP_DIST_ENA_6(x) (((x) & 0x1) << 6)
+#define G_02881C_CLIP_DIST_ENA_6(x) (((x) >> 6) & 0x1)
+#define C_02881C_CLIP_DIST_ENA_6 0xFFFFFFBF
+#define S_02881C_CLIP_DIST_ENA_7(x) (((x) & 0x1) << 7)
+#define G_02881C_CLIP_DIST_ENA_7(x) (((x) >> 7) & 0x1)
+#define C_02881C_CLIP_DIST_ENA_7 0xFFFFFF7F
+#define S_02881C_CULL_DIST_ENA_0(x) (((x) & 0x1) << 8)
+#define G_02881C_CULL_DIST_ENA_0(x) (((x) >> 8) & 0x1)
+#define C_02881C_CULL_DIST_ENA_0 0xFFFFFEFF
+#define S_02881C_CULL_DIST_ENA_1(x) (((x) & 0x1) << 9)
+#define G_02881C_CULL_DIST_ENA_1(x) (((x) >> 9) & 0x1)
+#define C_02881C_CULL_DIST_ENA_1 0xFFFFFDFF
+#define S_02881C_CULL_DIST_ENA_2(x) (((x) & 0x1) << 10)
+#define G_02881C_CULL_DIST_ENA_2(x) (((x) >> 10) & 0x1)
+#define C_02881C_CULL_DIST_ENA_2 0xFFFFFBFF
+#define S_02881C_CULL_DIST_ENA_3(x) (((x) & 0x1) << 11)
+#define G_02881C_CULL_DIST_ENA_3(x) (((x) >> 11) & 0x1)
+#define C_02881C_CULL_DIST_ENA_3 0xFFFFF7FF
+#define S_02881C_CULL_DIST_ENA_4(x) (((x) & 0x1) << 12)
+#define G_02881C_CULL_DIST_ENA_4(x) (((x) >> 12) & 0x1)
+#define C_02881C_CULL_DIST_ENA_4 0xFFFFEFFF
+#define S_02881C_CULL_DIST_ENA_5(x) (((x) & 0x1) << 13)
+#define G_02881C_CULL_DIST_ENA_5(x) (((x) >> 13) & 0x1)
+#define C_02881C_CULL_DIST_ENA_5 0xFFFFDFFF
+#define S_02881C_CULL_DIST_ENA_6(x) (((x) & 0x1) << 14)
+#define G_02881C_CULL_DIST_ENA_6(x) (((x) >> 14) & 0x1)
+#define C_02881C_CULL_DIST_ENA_6 0xFFFFBFFF
+#define S_02881C_CULL_DIST_ENA_7(x) (((x) & 0x1) << 15)
+#define G_02881C_CULL_DIST_ENA_7(x) (((x) >> 15) & 0x1)
+#define C_02881C_CULL_DIST_ENA_7 0xFFFF7FFF
+#define S_02881C_USE_VTX_POINT_SIZE(x) (((x) & 0x1) << 16)
+#define G_02881C_USE_VTX_POINT_SIZE(x) (((x) >> 16) & 0x1)
+#define C_02881C_USE_VTX_POINT_SIZE 0xFFFEFFFF
+#define S_02881C_USE_VTX_EDGE_FLAG(x) (((x) & 0x1) << 17)
+#define G_02881C_USE_VTX_EDGE_FLAG(x) (((x) >> 17) & 0x1)
+#define C_02881C_USE_VTX_EDGE_FLAG 0xFFFDFFFF
+#define S_02881C_USE_VTX_RENDER_TARGET_INDX(x) (((x) & 0x1) << 18)
+#define G_02881C_USE_VTX_RENDER_TARGET_INDX(x) (((x) >> 18) & 0x1)
+#define C_02881C_USE_VTX_RENDER_TARGET_INDX 0xFFFBFFFF
+#define S_02881C_USE_VTX_VIEWPORT_INDX(x) (((x) & 0x1) << 19)
+#define G_02881C_USE_VTX_VIEWPORT_INDX(x) (((x) >> 19) & 0x1)
+#define C_02881C_USE_VTX_VIEWPORT_INDX 0xFFF7FFFF
+#define S_02881C_USE_VTX_KILL_FLAG(x) (((x) & 0x1) << 20)
+#define G_02881C_USE_VTX_KILL_FLAG(x) (((x) >> 20) & 0x1)
+#define C_02881C_USE_VTX_KILL_FLAG 0xFFEFFFFF
+#define S_02881C_VS_OUT_MISC_VEC_ENA(x) (((x) & 0x1) << 21)
+#define G_02881C_VS_OUT_MISC_VEC_ENA(x) (((x) >> 21) & 0x1)
+#define C_02881C_VS_OUT_MISC_VEC_ENA 0xFFDFFFFF
+#define S_02881C_VS_OUT_CCDIST0_VEC_ENA(x) (((x) & 0x1) << 22)
+#define G_02881C_VS_OUT_CCDIST0_VEC_ENA(x) (((x) >> 22) & 0x1)
+#define C_02881C_VS_OUT_CCDIST0_VEC_ENA 0xFFBFFFFF
+#define S_02881C_VS_OUT_CCDIST1_VEC_ENA(x) (((x) & 0x1) << 23)
+#define G_02881C_VS_OUT_CCDIST1_VEC_ENA(x) (((x) >> 23) & 0x1)
+#define C_02881C_VS_OUT_CCDIST1_VEC_ENA 0xFF7FFFFF
#define R_028868_SQ_PGM_RESOURCES_VS 0x028868
#define S_028868_NUM_GPRS(x) (((x) & 0xFF) << 0)
#define G_028868_NUM_GPRS(x) (((x) >> 0) & 0xFF)
diff --git a/src/gallium/drivers/r600/radeon.h b/src/gallium/drivers/r600/radeon.h
index 3a8405f9b4..8f00a4895a 100644
--- a/src/gallium/drivers/r600/radeon.h
+++ b/src/gallium/drivers/r600/radeon.h
@@ -157,11 +157,42 @@ int radeon_ctx_submit(struct radeon_ctx *ctx);
void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file);
/*
+ * radeon context functions
+ */
+#pragma pack(1)
+struct radeon_cs_reloc {
+ uint32_t handle;
+ uint32_t read_domain;
+ uint32_t write_domain;
+ uint32_t flags;
+};
+#pragma pack()
+
+struct radeon_ctx {
+ int refcount;
+ struct radeon *radeon;
+ u32 *pm4;
+ u32 cpm4;
+ u32 draw_cpm4;
+ unsigned id;
+ unsigned next_id;
+ unsigned nreloc;
+ struct radeon_cs_reloc *reloc;
+ unsigned nbo;
+ struct radeon_bo **bo;
+ unsigned ndraw;
+ struct radeon_draw *cdraw;
+ struct radeon_draw **draw;
+ unsigned nstate;
+ struct radeon_state **state;
+};
+
+/*
* R600/R700
*/
-#define R600_NSTATE 1273
-#define R600_NTYPE 25
+#define R600_NSTATE 1280
+#define R600_NTYPE 32
#define R600_CONFIG 0
#define R600_CONFIG_TYPE 0
@@ -207,12 +238,26 @@ void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file);
#define R600_GS_SAMPLER_BORDER_TYPE 20
#define R600_CB0 1269
#define R600_CB0_TYPE 21
-#define R600_DB 1270
-#define R600_DB_TYPE 22
-#define R600_VGT 1271
-#define R600_VGT_TYPE 23
-#define R600_DRAW 1272
-#define R600_DRAW_TYPE 24
+#define R600_CB1 1270
+#define R600_CB1_TYPE 22
+#define R600_CB2 1271
+#define R600_CB2_TYPE 23
+#define R600_CB3 1272
+#define R600_CB3_TYPE 24
+#define R600_CB4 1273
+#define R600_CB4_TYPE 25
+#define R600_CB5 1274
+#define R600_CB5_TYPE 26
+#define R600_CB6 1275
+#define R600_CB6_TYPE 27
+#define R600_CB7 1276
+#define R600_CB7_TYPE 28
+#define R600_DB 1277
+#define R600_DB_TYPE 29
+#define R600_VGT 1278
+#define R600_VGT_TYPE 30
+#define R600_DRAW 1279
+#define R600_DRAW_TYPE 31
/* R600_CONFIG */
#define R600_CONFIG__SQ_CONFIG 0
#define R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1 1