summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r300
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/r300')
-rw-r--r--src/mesa/drivers/dri/r300/.gitignore13
-rw-r--r--src/mesa/drivers/dri/r300/Makefile10
-rw-r--r--src/mesa/drivers/dri/r300/r300_cmdbuf.c57
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.c13
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.h39
-rw-r--r--src/mesa/drivers/dri/r300/r300_draw.c38
-rw-r--r--src/mesa/drivers/dri/r300/r300_emit.c19
-rw-r--r--src/mesa/drivers/dri/r300/r300_emit.h4
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog_common.c290
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog_common.h6
-rw-r--r--src/mesa/drivers/dri/r300/r300_ioctl.c57
-rw-r--r--src/mesa/drivers/dri/r300/r300_reg.h2
-rw-r--r--src/mesa/drivers/dri/r300/r300_render.c24
-rw-r--r--src/mesa/drivers/dri/r300/r300_shader.c64
-rw-r--r--src/mesa/drivers/dri/r300/r300_state.c170
-rw-r--r--src/mesa/drivers/dri/r300/r300_state.h1
-rw-r--r--src/mesa/drivers/dri/r300/r300_swtcl.c50
-rw-r--r--src/mesa/drivers/dri/r300/r300_texstate.c45
-rw-r--r--src/mesa/drivers/dri/r300/r300_vertprog.c656
-rw-r--r--src/mesa/drivers/dri/r300/r300_vertprog.h4
-rw-r--r--src/mesa/drivers/dri/r300/radeon_nqssadce.c94
-rw-r--r--src/mesa/drivers/dri/r300/radeon_nqssadce.h7
-rw-r--r--src/mesa/drivers/dri/r300/radeon_program.h32
-rw-r--r--src/mesa/drivers/dri/r300/radeon_program_pair.c4
24 files changed, 1056 insertions, 643 deletions
diff --git a/src/mesa/drivers/dri/r300/.gitignore b/src/mesa/drivers/dri/r300/.gitignore
index 3689a6a78e..2f9cd1a987 100644
--- a/src/mesa/drivers/dri/r300/.gitignore
+++ b/src/mesa/drivers/dri/r300/.gitignore
@@ -1,4 +1,15 @@
+radeon_bocs_wrapper.h
+radeon_bo_legacy.[ch]
radeon_chipset.h
+radeon_cmdbuf.h
+radeon_common.[ch]
+radeon_common_context.[ch]
+radeon_cs_legacy.[ch]
+radeon_dma.[ch]
+radeon_fbo.c
+radeon_lock.[ch]
+radeon_mipmap_tree.[ch]
radeon_screen.[ch]
-radeon_span.h
+radeon_span.[ch]
+radeon_texture.[ch]
server
diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
index bdb09624be..a77209074a 100644
--- a/src/mesa/drivers/dri/r300/Makefile
+++ b/src/mesa/drivers/dri/r300/Makefile
@@ -13,6 +13,10 @@ ifeq ($(USING_EGL), 1)
EGL_SOURCES = server/radeon_egl.c
endif
+ifeq ($(RADEON_LDFLAGS),)
+CS_SOURCES = radeon_cs_space_drm.c
+endif
+
COMMON_SOURCES = \
../../common/driverfuncs.c \
../common/mm.c \
@@ -59,7 +63,8 @@ DRIVER_SOURCES = \
r300_emit.c \
r300_swtcl.c \
$(RADEON_COMMON_SOURCES) \
- $(EGL_SOURCES)
+ $(EGL_SOURCES) \
+ $(CS_SOURCES)
C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
@@ -100,7 +105,8 @@ COMMON_SYMLINKS = \
radeon_mipmap_tree.h \
radeon_texture.c \
radeon_texture.h \
- radeon_fbo.c
+ radeon_fbo.c \
+ $(CS_SOURCES)
DRI_LIB_DEPS += $(RADEON_LDFLAGS)
diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
index 0261a5b1d8..af535037d0 100644
--- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c
+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
@@ -232,8 +232,8 @@ void r300_emit_scissor(GLcontext *ctx)
} else {
x1 = 0;
y1 = 0;
- x2 = rrb->width - 1;
- y2 = rrb->height - 1;
+ x2 = rrb->base.Width - 1;
+ y2 = rrb->base.Height - 1;
}
if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
x1 += R300_SCISSORS_OFFSET;
@@ -264,11 +264,22 @@ static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
return;
}
+ if (RADEON_DEBUG & DEBUG_STATE)
+ fprintf(stderr,"rrb is %p %d %dx%d\n", rrb, offset, rrb->base.Width, rrb->base.Height);
cbpitch = (rrb->pitch / rrb->cpp);
if (rrb->cpp == 4)
cbpitch |= R300_COLOR_FORMAT_ARGB8888;
- else
+ else switch (rrb->base._ActualFormat) {
+ case GL_RGB5:
cbpitch |= R300_COLOR_FORMAT_RGB565;
+ break;
+ case GL_RGBA4:
+ cbpitch |= R300_COLOR_FORMAT_ARGB4444;
+ break;
+ case GL_RGB5_A1:
+ cbpitch |= R300_COLOR_FORMAT_ARGB1555;
+ break;
+ }
if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
cbpitch |= R300_COLOR_TILE_ENABLE;
@@ -289,14 +300,14 @@ static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
BEGIN_BATCH_NO_AUTOSTATE(3);
OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2);
OUT_BATCH(0);
- OUT_BATCH(((rrb->width - 1) << R300_SCISSORS_X_SHIFT) |
- ((rrb->height - 1) << R300_SCISSORS_Y_SHIFT));
+ OUT_BATCH(((rrb->base.Width - 1) << R300_SCISSORS_X_SHIFT) |
+ ((rrb->base.Height - 1) << R300_SCISSORS_Y_SHIFT));
END_BATCH();
BEGIN_BATCH_NO_AUTOSTATE(16);
for (i = 0; i < 4; i++) {
OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2);
OUT_BATCH((0 << R300_CLIPRECT_X_SHIFT) | (0 << R300_CLIPRECT_Y_SHIFT));
- OUT_BATCH(((rrb->width - 1) << R300_CLIPRECT_X_SHIFT) | ((rrb->height - 1) << R300_CLIPRECT_Y_SHIFT));
+ OUT_BATCH(((rrb->base.Width - 1) << R300_CLIPRECT_X_SHIFT) | ((rrb->base.Height - 1) << R300_CLIPRECT_Y_SHIFT));
}
OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1);
OUT_BATCH(0xAAAA);
@@ -308,15 +319,15 @@ static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2);
OUT_BATCH((R300_SCISSORS_OFFSET << R300_SCISSORS_X_SHIFT) |
(R300_SCISSORS_OFFSET << R300_SCISSORS_Y_SHIFT));
- OUT_BATCH(((rrb->width + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_X_SHIFT) |
- ((rrb->height + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_Y_SHIFT));
+ OUT_BATCH(((rrb->base.Width + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_X_SHIFT) |
+ ((rrb->base.Height + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_Y_SHIFT));
END_BATCH();
BEGIN_BATCH_NO_AUTOSTATE(16);
for (i = 0; i < 4; i++) {
OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2);
OUT_BATCH((R300_SCISSORS_OFFSET << R300_CLIPRECT_X_SHIFT) | (R300_SCISSORS_OFFSET << R300_CLIPRECT_Y_SHIFT));
- OUT_BATCH(((R300_SCISSORS_OFFSET + rrb->width - 1) << R300_CLIPRECT_X_SHIFT) |
- ((R300_SCISSORS_OFFSET + rrb->height - 1) << R300_CLIPRECT_Y_SHIFT));
+ OUT_BATCH(((R300_SCISSORS_OFFSET + rrb->base.Width - 1) << R300_CLIPRECT_X_SHIFT) |
+ ((R300_SCISSORS_OFFSET + rrb->base.Height - 1) << R300_CLIPRECT_Y_SHIFT));
}
OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1);
OUT_BATCH(0xAAAA);
@@ -333,23 +344,33 @@ static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
BATCH_LOCALS(&r300->radeon);
struct radeon_renderbuffer *rrb;
uint32_t zbpitch;
+ uint32_t dw;
rrb = radeon_get_depthbuffer(&r300->radeon);
if (!rrb)
return;
zbpitch = (rrb->pitch / rrb->cpp);
- if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
- zbpitch |= R300_DEPTHMACROTILE_ENABLE;
- }
- if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){
- zbpitch |= R300_DEPTHMICROTILE_TILED;
+ if (!r300->radeon.radeonScreen->kernel_mm) {
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
+ zbpitch |= R300_DEPTHMACROTILE_ENABLE;
+ }
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){
+ zbpitch |= R300_DEPTHMICROTILE_TILED;
+ }
}
- BEGIN_BATCH_NO_AUTOSTATE(6);
+ dw = 6;
+ if (r300->radeon.radeonScreen->kernel_mm)
+ dw += 2;
+ BEGIN_BATCH_NO_AUTOSTATE(dw);
OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1);
OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
- OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, zbpitch);
+ OUT_BATCH_REGSEQ(R300_ZB_DEPTHPITCH, 1);
+ if (!r300->radeon.radeonScreen->kernel_mm)
+ OUT_BATCH(zbpitch);
+ else
+ OUT_BATCH_RELOC(cbpitch, rrb->bo, zbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
END_BATCH();
}
@@ -503,7 +524,7 @@ void r300InitCmdBuf(r300ContextPtr r300)
r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_STATE_FLUSH_REG, 1);
r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH_1] = 0;
r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL, 1);
- if (is_r500) {
+ if (is_r500 && !r300->radeon.radeonScreen->kernel_mm) {
ALLOC_STATE(vap_index_offset, always, 2, 0);
r300->hw.vap_index_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_VAP_INDEX_OFFSET, 1);
r300->hw.vap_index_offset.cmd[1] = 0;
diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
index 394521a051..6f3aab986d 100644
--- a/src/mesa/drivers/dri/r300/r300_context.c
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -124,6 +124,7 @@ const struct dri_extension card_extensions[] = {
{"GL_EXT_texture_lod_bias", NULL},
{"GL_EXT_texture_mirror_clamp", NULL},
{"GL_EXT_texture_rectangle", NULL},
+ {"GL_EXT_texture_sRGB", NULL},
{"GL_EXT_vertex_array_bgra", NULL},
{"GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions},
{"GL_ATI_texture_env_combine3", NULL},
@@ -269,10 +270,16 @@ static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
ctx->Const.MaxTextureMaxAnisotropy = 16.0;
ctx->Const.MaxTextureLodBias = 16.0;
- if (screen->chip_family >= CHIP_FAMILY_RV515)
+ if (screen->chip_family >= CHIP_FAMILY_RV515) {
ctx->Const.MaxTextureLevels = 13;
- else
+ ctx->Const.MaxCubeTextureLevels = 13;
+ ctx->Const.MaxTextureRectSize = 4096;
+ }
+ else {
ctx->Const.MaxTextureLevels = 12;
+ ctx->Const.MaxCubeTextureLevels = 12;
+ ctx->Const.MaxTextureRectSize = 2048;
+ }
ctx->Const.MinPointSize = 1.0;
ctx->Const.MinPointSizeAA = 1.0;
@@ -409,6 +416,8 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
r300InitConstValues(ctx, screen);
+ _mesa_set_mvp_with_dp4( ctx, GL_TRUE );
+
/* Initialize the software rasterizer and helper modules.
*/
_swrast_CreateContext(ctx);
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index 026c33c67c..f7af7d4e57 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -405,12 +405,13 @@ struct r300_hw_state {
#undef TAG
struct r300_vertex_program {
+ struct gl_vertex_program *Base;
struct r300_vertex_program *next;
struct r300_vertex_program_key {
- GLuint InputsRead;
- GLuint OutputsWritten;
- GLuint OutputsAdded;
+ GLuint FpReads;
+ GLuint FogAttr;
+ GLuint WPosAttr;
} key;
struct r300_vertex_shader_hw_code {
@@ -426,13 +427,17 @@ struct r300_vertex_program {
int pos_end;
int num_temporaries; /* Number of temp vars used by program */
- int wpos_idx;
int inputs[VERT_ATTRIB_MAX];
int outputs[VERT_RESULT_MAX];
};
struct r300_vertex_program_cont {
- struct gl_vertex_program mesa_program; /* Must be first */
+ /* This is the unmodified vertex program mesa provided us with.
+ * We need to keep it unchanged because we may need to create another
+ * hw specific vertex program based on this.
+ */
+ struct gl_vertex_program mesa_program;
+ /* This is the list of hw specific vertex programs derived from mesa_program */
struct r300_vertex_program *progs;
};
@@ -546,7 +551,7 @@ struct r500_fragment_program_code {
* to render with that program.
*/
struct r300_fragment_program {
- struct gl_fragment_program Base;
+ struct gl_program *Base;
GLboolean translated;
GLboolean error;
@@ -559,6 +564,23 @@ struct r300_fragment_program {
GLboolean writes_depth;
GLuint optimization;
+
+ struct r300_fragment_program *next;
+
+ /* attribute that we are sending the WPOS in */
+ gl_frag_attrib wpos_attr;
+ /* attribute that we are sending the fog coordinate in */
+ gl_frag_attrib fog_attr;
+};
+
+struct r300_fragment_program_cont {
+ /* This is the unmodified fragment program mesa provided us with.
+ * We need to keep it unchanged because we may need to create another
+ * hw specific fragment program based on this.
+ */
+ struct gl_fragment_program Base;
+ /* This is the list of hw specific fragment programs derived from Base */
+ struct r300_fragment_program *progs;
};
struct r300_fragment_program_compiler {
@@ -633,6 +655,7 @@ struct r300_context {
struct r300_hw_state hw;
struct r300_vertex_program *selected_vp;
+ struct r300_fragment_program *selected_fp;
/* Vertex buffers
*/
@@ -664,11 +687,7 @@ extern GLboolean r300CreateContext(const __GLcontextModes * glVisual,
__DRIcontextPrivate * driContextPriv,
void *sharedContextPrivate);
-extern void r300SelectVertexShader(r300ContextPtr r300);
extern void r300InitShaderFuncs(struct dd_function_table *functions);
-extern int r300VertexProgUpdateParams(GLcontext * ctx,
- struct r300_vertex_program_cont *vp,
- float *dst);
extern void r300InitShaderFunctions(r300ContextPtr r300);
diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c
index cc5650fb7c..9769ff5399 100644
--- a/src/mesa/drivers/dri/r300/r300_draw.c
+++ b/src/mesa/drivers/dri/r300/r300_draw.c
@@ -195,6 +195,11 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st
}
GLfloat *dst_ptr, *tmp;
+
+ /* Convert value for first element only */
+ if (input->StrideB == 0)
+ count = 1;
+
tmp = dst_ptr = _mesa_malloc(sizeof(GLfloat) * input->Size * count);
switch (input->Type) {
@@ -228,13 +233,17 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st
type = GL_FLOAT;
r300_attr.free_needed = GL_TRUE;
r300_attr.data = tmp;
- r300_attr.stride = sizeof(GLfloat) * input->Size;
+ if (input->StrideB == 0) {
+ r300_attr.stride = 0;
+ } else {
+ r300_attr.stride = sizeof(GLfloat) * input->Size;
+ }
r300_attr.dwords = input->Size;
} else {
type = input->Type;
r300_attr.free_needed = GL_FALSE;
r300_attr.data = (GLvoid *)src_ptr;
- r300_attr.stride = stride;
+ r300_attr.stride = input->StrideB;
r300_attr.dwords = (getTypeSize(type) * input->Size + 3)/ 4;
}
@@ -332,7 +341,7 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar
{
int i, tmp;
- tmp = r300->selected_vp->key.InputsRead;
+ tmp = r300->selected_vp->Base->Base.InputsRead;
i = 0;
vbuf->num_attribs = 0;
while (tmp) {
@@ -417,12 +426,18 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx,
r300FixupIndexBuffer(ctx, ib, bo, &nr_bo);
+ /* ensure we have the cmd buf space in advance to cover
+ * the state + DMA AOS pointers */
+ rcommonEnsureCmdBufSpace(&r300->radeon,
+ r300->radeon.hw.max_state_size + (50*sizeof(int)),
+ __FUNCTION__);
+
r300SetVertexFormat(ctx, arrays, max_index + 1, bo, &nr_bo);
if (r300->fallback)
return GL_FALSE;
- r300SetupVAP(ctx, r300->selected_vp->key.InputsRead, r300->selected_vp->key.OutputsWritten);
+ r300SetupVAP(ctx, r300->selected_vp->Base->Base.InputsRead, r300->selected_vp->Base->Base.OutputsWritten);
r300UpdateShaderStates(r300);
@@ -442,8 +457,6 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx,
return GL_TRUE;
}
-/* TODO: rebase if number of indices in any of primitives is > 8192 for 32bit indices or 16384 for 16bit indices */
-
static void r300DrawPrims(GLcontext *ctx,
const struct gl_client_array *arrays[],
const struct _mesa_prim *prim,
@@ -452,12 +465,25 @@ static void r300DrawPrims(GLcontext *ctx,
GLuint min_index,
GLuint max_index)
{
+ struct split_limits limits;
GLboolean retval;
+ if (ib)
+ limits.max_verts = 0xffffffff;
+ else
+ limits.max_verts = 65535;
+
+ limits.max_indices = 65535;
+ limits.max_vb_size = 1024*1024;
+
if (min_index) {
vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r300DrawPrims );
return;
}
+ if ((ib && ib->count > 65535)) {
+ vbo_split_prims (ctx, arrays, prim, nr_prims, ib, min_index, max_index, r300DrawPrims, &limits);
+ return;
+ }
/* Make an attempt at drawing */
retval = r300TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c
index c3817721dc..feb3370f37 100644
--- a/src/mesa/drivers/dri/r300/r300_emit.c
+++ b/src/mesa/drivers/dri/r300/r300_emit.c
@@ -81,17 +81,17 @@ GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead)
return vic_1;
}
-GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads)
+GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes)
{
GLuint ret = 0;
if (vp_writes & (1 << VERT_RESULT_HPOS))
ret |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
- if (vp_writes & (1 << VERT_RESULT_COL0) && fp_reads & FRAG_BIT_COL0)
+ if (vp_writes & (1 << VERT_RESULT_COL0))
ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT;
- if (vp_writes & (1 << VERT_RESULT_COL1) && fp_reads & FRAG_BIT_COL1)
+ if (vp_writes & (1 << VERT_RESULT_COL1))
ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
/* Two sided lighting works only if all 4 colors are written */
@@ -105,26 +105,17 @@ GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads)
return ret;
}
-GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads)
+GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes)
{
GLuint i, ret = 0, first_free_texcoord = 0;
for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
- if (vp_writes & (1 << (VERT_RESULT_TEX0 + i)) && fp_reads & FRAG_BIT_TEX(i)) {
+ if (vp_writes & (1 << (VERT_RESULT_TEX0 + i))) {
ret |= (4 << (3 * first_free_texcoord));
++first_free_texcoord;
}
}
- if (fp_reads & FRAG_BIT_WPOS) {
- ret |= (4 << (3 * first_free_texcoord));
- ++first_free_texcoord;
- }
-
- if (vp_writes & (1 << VERT_RESULT_FOGC) && fp_reads & FRAG_BIT_FOGC) {
- ret |= 4 << (3 * first_free_texcoord);
- }
-
if (first_free_texcoord > 8) {
fprintf(stderr, "\tout of free texcoords\n");
_mesa_exit(-1);
diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h
index 2fb8b82d3a..3f8c60ffae 100644
--- a/src/mesa/drivers/dri/r300/r300_emit.h
+++ b/src/mesa/drivers/dri/r300/r300_emit.h
@@ -225,7 +225,7 @@ extern void r300EmitCacheFlush(r300ContextPtr rmesa);
extern GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead);
extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead);
-extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads);
-extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads);
+extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes);
+extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes);
#endif
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
index abc8757ba1..f5c4c0f4a0 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -50,13 +50,6 @@
#include "radeon_program.h"
#include "radeon_program_alu.h"
-static void update_params(GLcontext *ctx, struct gl_fragment_program *fp)
-{
- /* Ask Mesa nicely to fill in ParameterValues for us */
- if (fp->Base.Parameters)
- _mesa_load_state_parameters(ctx, fp->Base.Parameters);
-}
-
static void nqssadce_init(struct nqssadce_state* s)
{
s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW;
@@ -74,10 +67,12 @@ static void nqssadce_init(struct nqssadce_state* s)
*/
static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
{
- GLuint InputsRead = compiler->fp->Base.Base.InputsRead;
+ GLuint InputsRead = compiler->fp->Base->InputsRead;
- if (!(InputsRead & FRAG_BIT_WPOS))
+ if (!(InputsRead & FRAG_BIT_WPOS)) {
+ compiler->fp->wpos_attr = FRAG_ATTRIB_MAX;
return;
+ }
static gl_state_index tokens[STATE_LENGTH] = {
STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0
@@ -85,10 +80,23 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
struct prog_instruction *fpi;
GLuint window_index;
int i = 0;
+
+ for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
+ {
+ if (!(InputsRead & (1 << i))) {
+ InputsRead &= ~(1 << FRAG_ATTRIB_WPOS);
+ InputsRead |= 1 << i;
+ compiler->fp->Base->InputsRead = InputsRead;
+ compiler->fp->wpos_attr = i;
+ break;
+ }
+ }
+
GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY);
_mesa_insert_instructions(compiler->program, 0, 3);
fpi = compiler->program->Instructions;
+ i = 0;
/* perspective divide */
fpi[i].Opcode = OPCODE_RCP;
@@ -99,7 +107,7 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
fpi[i].DstReg.CondMask = COND_TR;
fpi[i].SrcReg[0].File = PROGRAM_INPUT;
- fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
+ fpi[i].SrcReg[0].Index = compiler->fp->wpos_attr;
fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW;
i++;
@@ -111,7 +119,7 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
fpi[i].DstReg.CondMask = COND_TR;
fpi[i].SrcReg[0].File = PROGRAM_INPUT;
- fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
+ fpi[i].SrcReg[0].Index = compiler->fp->wpos_attr;
fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY;
@@ -154,6 +162,57 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
}
}
+
+/**
+ * Rewrite fragment.fogcoord to use a texture coordinate slot.
+ * Note that fogcoord is forced into an X001 pattern, and this enforcement
+ * is done here.
+ *
+ * See also the counterpart rewriting for vertex programs.
+ */
+static void rewriteFog(struct r300_fragment_program_compiler *compiler)
+{
+ struct r300_fragment_program *fp = compiler->fp;
+ GLuint InputsRead;
+ int i;
+
+ InputsRead = fp->Base->InputsRead;
+
+ if (!(InputsRead & FRAG_BIT_FOGC)) {
+ fp->fog_attr = FRAG_ATTRIB_MAX;
+ return;
+ }
+
+ for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
+ {
+ if (!(InputsRead & (1 << i))) {
+ InputsRead &= ~(1 << FRAG_ATTRIB_FOGC);
+ InputsRead |= 1 << i;
+ fp->Base->InputsRead = InputsRead;
+ fp->fog_attr = i;
+ break;
+ }
+ }
+
+ {
+ struct prog_instruction *inst;
+
+ inst = compiler->program->Instructions;
+ while (inst->Opcode != OPCODE_END) {
+ const int src_regs = _mesa_num_inst_src_regs(inst->Opcode);
+ for (i = 0; i < src_regs; ++i) {
+ if (inst->SrcReg[i].File == PROGRAM_INPUT && inst->SrcReg[i].Index == FRAG_ATTRIB_FOGC) {
+ inst->SrcReg[i].Index = fp->fog_attr;
+ inst->SrcReg[i].Swizzle = combine_swizzles(
+ MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE),
+ inst->SrcReg[i].Swizzle);
+ }
+ }
+ ++inst;
+ }
+ }
+}
+
static GLuint build_dtm(GLuint depthmode)
{
switch(depthmode) {
@@ -175,7 +234,7 @@ static GLuint build_func(GLuint comparefunc)
*/
static void build_state(
r300ContextPtr r300,
- struct r300_fragment_program *fp,
+ struct gl_fragment_program *fp,
struct r300_fragment_program_external_state *state)
{
int unit;
@@ -183,7 +242,7 @@ static void build_state(
_mesa_bzero(state, sizeof(*state));
for(unit = 0; unit < 16; ++unit) {
- if (fp->Base.Base.ShadowSamplers & (1 << unit)) {
+ if (fp->Base.ShadowSamplers & (1 << unit)) {
struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current;
state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode);
@@ -192,100 +251,149 @@ static void build_state(
}
}
-void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp)
+static void rewrite_depth_out(struct gl_program *prog)
{
- r300ContextPtr r300 = R300_CONTEXT(ctx);
- struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)fp;
- struct r300_fragment_program_external_state state;
+ struct prog_instruction *inst;
- build_state(r300, r300_fp, &state);
- if (_mesa_memcmp(&r300_fp->state, &state, sizeof(state))) {
- /* TODO: cache compiled programs */
- r300_fp->translated = GL_FALSE;
- _mesa_memcpy(&r300_fp->state, &state, sizeof(state));
- }
-
- if (!r300_fp->translated) {
- struct r300_fragment_program_compiler compiler;
+ for (inst = prog->Instructions; inst->Opcode != OPCODE_END; ++inst) {
+ if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != FRAG_RESULT_DEPTH)
+ continue;
- compiler.r300 = r300;
- compiler.fp = r300_fp;
- compiler.code = &r300_fp->code;
- compiler.program = _mesa_clone_program(ctx, &fp->Base);
+ if (inst->DstReg.WriteMask & WRITEMASK_Z) {
+ inst->DstReg.WriteMask = WRITEMASK_W;
+ } else {
+ inst->DstReg.WriteMask = 0;
+ continue;
+ }
- if (RADEON_DEBUG & DEBUG_PIXEL) {
- fflush(stdout);
- _mesa_printf("Fragment Program: Initial program:\n");
- _mesa_print_program(compiler.program);
- fflush(stdout);
+ switch (inst->Opcode) {
+ case OPCODE_FRC:
+ case OPCODE_MOV:
+ inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ break;
+ case OPCODE_ADD:
+ case OPCODE_MAX:
+ case OPCODE_MIN:
+ case OPCODE_MUL:
+ inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
+ break;
+ case OPCODE_CMP:
+ case OPCODE_MAD:
+ inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
+ inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]);
+ break;
+ default:
+ // Scalar instructions needn't be reswizzled
+ break;
}
+ }
+}
- insert_WPOS_trailer(&compiler);
+void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *fp)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_fragment_program_compiler compiler;
+
+ compiler.r300 = r300;
+ compiler.fp = fp;
+ compiler.code = &fp->code;
+ compiler.program = fp->Base;
+
+ if (RADEON_DEBUG & DEBUG_PIXEL) {
+ fflush(stdout);
+ _mesa_printf("Fragment Program: Initial program:\n");
+ _mesa_print_program(compiler.program);
+ fflush(stdout);
+ }
- if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
- struct radeon_program_transformation transformations[] = {
- { &r500_transform_TEX, &compiler },
- { &radeonTransformALU, 0 },
- { &radeonTransformDeriv, 0 },
- { &radeonTransformTrigScale, 0 }
- };
- radeonLocalTransform(ctx, compiler.program, 4, transformations);
- } else {
- struct radeon_program_transformation transformations[] = {
- { &r300_transform_TEX, &compiler },
- { &radeonTransformALU, 0 },
- { &radeonTransformTrigSimple, 0 }
- };
- radeonLocalTransform(ctx, compiler.program, 3, transformations);
- }
+ insert_WPOS_trailer(&compiler);
+
+ rewriteFog(&compiler);
+
+ rewrite_depth_out(compiler.program);
+
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ struct radeon_program_transformation transformations[] = {
+ { &r500_transform_TEX, &compiler },
+ { &radeonTransformALU, 0 },
+ { &radeonTransformDeriv, 0 },
+ { &radeonTransformTrigScale, 0 }
+ };
+ radeonLocalTransform(ctx, compiler.program, 4, transformations);
+ } else {
+ struct radeon_program_transformation transformations[] = {
+ { &r300_transform_TEX, &compiler },
+ { &radeonTransformALU, 0 },
+ { &radeonTransformTrigSimple, 0 }
+ };
+ radeonLocalTransform(ctx, compiler.program, 3, transformations);
+ }
- if (RADEON_DEBUG & DEBUG_PIXEL) {
- _mesa_printf("Fragment Program: After native rewrite:\n");
- _mesa_print_program(compiler.program);
- fflush(stdout);
- }
+ if (RADEON_DEBUG & DEBUG_PIXEL) {
+ _mesa_printf("Fragment Program: After native rewrite:\n");
+ _mesa_print_program(compiler.program);
+ fflush(stdout);
+ }
- if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadce_init,
- .IsNativeSwizzle = &r500FPIsNativeSwizzle,
- .BuildSwizzle = &r500FPBuildSwizzle,
- .RewriteDepthOut = GL_TRUE
- };
- radeonNqssaDce(ctx, compiler.program, &nqssadce);
- } else {
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadce_init,
- .IsNativeSwizzle = &r300FPIsNativeSwizzle,
- .BuildSwizzle = &r300FPBuildSwizzle,
- .RewriteDepthOut = GL_TRUE
- };
- radeonNqssaDce(ctx, compiler.program, &nqssadce);
- }
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ struct radeon_nqssadce_descr nqssadce = {
+ .Init = &nqssadce_init,
+ .IsNativeSwizzle = &r500FPIsNativeSwizzle,
+ .BuildSwizzle = &r500FPBuildSwizzle
+ };
+ radeonNqssaDce(ctx, compiler.program, &nqssadce);
+ } else {
+ struct radeon_nqssadce_descr nqssadce = {
+ .Init = &nqssadce_init,
+ .IsNativeSwizzle = &r300FPIsNativeSwizzle,
+ .BuildSwizzle = &r300FPBuildSwizzle
+ };
+ radeonNqssaDce(ctx, compiler.program, &nqssadce);
+ }
- if (RADEON_DEBUG & DEBUG_PIXEL) {
- _mesa_printf("Compiler: after NqSSA-DCE:\n");
- _mesa_print_program(compiler.program);
- fflush(stdout);
- }
+ if (RADEON_DEBUG & DEBUG_PIXEL) {
+ _mesa_printf("Compiler: after NqSSA-DCE:\n");
+ _mesa_print_program(compiler.program);
+ fflush(stdout);
+ }
- if (!r300->vtbl.BuildFragmentProgramHwCode(&compiler))
- r300_fp->error = GL_TRUE;
+ if (!r300->vtbl.BuildFragmentProgramHwCode(&compiler))
+ fp->error = GL_TRUE;
- /* Subtle: Rescue any parameters that have been added during transformations */
- _mesa_free_parameter_list(fp->Base.Parameters);
- fp->Base.Parameters = compiler.program->Parameters;
- compiler.program->Parameters = 0;
+ fp->translated = GL_TRUE;
- _mesa_reference_program(ctx, &compiler.program, NULL);
+ if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL))
+ r300->vtbl.FragmentProgramDump(&fp->code);
+}
- r300_fp->translated = GL_TRUE;
+struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_fragment_program_cont *fp_list;
+ struct r300_fragment_program *fp;
+ struct r300_fragment_program_external_state state;
- r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
+ fp_list = (struct r300_fragment_program_cont *)ctx->FragmentProgram._Current;
+ build_state(r300, ctx->FragmentProgram._Current, &state);
- if (r300_fp->error || (RADEON_DEBUG & DEBUG_PIXEL))
- r300->vtbl.FragmentProgramDump(&r300_fp->code);
+ fp = fp_list->progs;
+ while (fp) {
+ if (_mesa_memcmp(&fp->state, &state, sizeof(state)) == 0) {
+ return r300->selected_fp = fp;
+ }
+ fp = fp->next;
}
- update_params(ctx, fp);
+ fp = _mesa_calloc(sizeof(struct r300_fragment_program));
+
+ fp->state = state;
+ fp->translated = GL_FALSE;
+ fp->Base = _mesa_clone_program(ctx, &ctx->FragmentProgram._Current->Base);
+
+ fp->next = fp_list->progs;
+ fp_list->progs = fp;
+
+ return r300->selected_fp = fp;
}
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.h b/src/mesa/drivers/dri/r300/r300_fragprog_common.h
index 85ea86fecb..5e103ee408 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.h
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.h
@@ -30,6 +30,10 @@
#include "main/mtypes.h"
-extern void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp);
+#include "r300_context.h"
+
+extern void r300TranslateFragmentShader(GLcontext *ctx, struct r300_fragment_program *fp);
+
+struct r300_fragment_program *r300SelectFragmentShader(GLcontext *ctx);
#endif
diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c
index 104079b4db..ddabd53992 100644
--- a/src/mesa/drivers/dri/r300/r300_ioctl.c
+++ b/src/mesa/drivers/dri/r300/r300_ioctl.c
@@ -68,11 +68,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
static void r300EmitClearState(GLcontext * ctx);
-static void r300UserClear(GLcontext *ctx, GLuint mask)
-{
- radeon_clear_tris(ctx, mask);
-}
-
static void r300ClearBuffer(r300ContextPtr r300, int flags,
struct radeon_renderbuffer *rrb,
struct radeon_renderbuffer *rrbd)
@@ -540,19 +535,50 @@ static void r300EmitClearState(GLcontext * ctx)
}
}
-static void r300KernelClear(GLcontext *ctx, GLuint flags)
+static int r300KernelClear(GLcontext *ctx, GLuint flags)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
__DRIdrawablePrivate *dPriv = radeon_get_drawable(&r300->radeon);
struct radeon_framebuffer *rfb = dPriv->driverPrivate;
struct radeon_renderbuffer *rrb;
struct radeon_renderbuffer *rrbd;
- int bits = 0;
+ int bits = 0, ret;
/* Make sure it fits there. */
+ radeon_cs_space_reset_bos(r300->radeon.cmdbuf.cs);
+
+ if (flags & BUFFER_BIT_COLOR0) {
+ rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_COLOR0);
+ radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs,
+ rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM);
+ }
+
+ if (flags & BUFFER_BIT_FRONT_LEFT) {
+ rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_FRONT_LEFT);
+ radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs,
+ rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM);
+ }
+
+ if (flags & BUFFER_BIT_BACK_LEFT) {
+ rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_BACK_LEFT);
+ radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs,
+ rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM);
+ }
+
+ rrbd = radeon_get_renderbuffer(&rfb->base, BUFFER_DEPTH);
+ if (rrbd) {
+ radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs,
+ rrbd->bo, 0, RADEON_GEM_DOMAIN_VRAM);
+ }
+
+ ret = radeon_cs_space_check(r300->radeon.cmdbuf.cs);
+ if (ret)
+ return -1;
+
rcommonEnsureCmdBufSpace(&r300->radeon, 421 * 3, __FUNCTION__);
if (flags || bits)
r300EmitClearState(ctx);
+
rrbd = radeon_get_renderbuffer(&rfb->base, BUFFER_DEPTH);
if (rrbd && (flags & BUFFER_BIT_DEPTH))
bits |= CLEARBUFFER_DEPTH;
@@ -582,6 +608,7 @@ static void r300KernelClear(GLcontext *ctx, GLuint flags)
r300ClearBuffer(r300, bits, NULL, rrbd);
COMMIT_BATCH();
+ return 0;
}
/**
@@ -593,7 +620,7 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
__DRIdrawablePrivate *dPriv = radeon_get_drawable(&r300->radeon);
const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask);
GLbitfield swrast_mask = 0, tri_mask = 0;
- int i;
+ int i, ret;
struct gl_framebuffer *fb = ctx->DrawBuffer;
if (RADEON_DEBUG & DEBUG_IOCTL)
@@ -614,6 +641,8 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
if (colorMask == ~0)
tri_mask |= (mask & BUFFER_BITS_COLOR);
+ else
+ tri_mask |= (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT));
/* HW stencil */
@@ -643,12 +672,18 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
/* SW fallback clearing */
swrast_mask = mask & ~tri_mask;
+ ret = 0;
if (tri_mask) {
if (r300->radeon.radeonScreen->kernel_mm)
- r300UserClear(ctx, tri_mask);
- else
- r300KernelClear(ctx, tri_mask);
+ radeonUserClear(ctx, tri_mask);
+ else {
+ /* if kernel clear fails due to size restraints fallback */
+ ret = r300KernelClear(ctx, tri_mask);
+ if (ret < 0)
+ swrast_mask |= tri_mask;
+ }
}
+
if (swrast_mask) {
if (RADEON_DEBUG & DEBUG_FALLBACKS)
fprintf(stderr, "%s: swrast clear, mask: %x\n",
diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
index c22616b95f..357c600af9 100644
--- a/src/mesa/drivers/dri/r300/r300_reg.h
+++ b/src/mesa/drivers/dri/r300/r300_reg.h
@@ -1467,6 +1467,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_TX_FORMAT_3D (1 << 25)
# define R300_TX_FORMAT_CUBIC_MAP (2 << 25)
+# define R300_TX_FORMAT_GAMMA (1 << 21)
+
/* gap */
/* Floating point formats */
/* Note - hardware supports both 16 and 32 bit floating point */
diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c
index 1356305a21..08d67b73ed 100644
--- a/src/mesa/drivers/dri/r300/r300_render.c
+++ b/src/mesa/drivers/dri/r300/r300_render.c
@@ -243,7 +243,6 @@ static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
offset);
-
if (!rmesa->radeon.radeonScreen->kernel_mm) {
BEGIN_BATCH(sz+2+(nr * 2));
OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
@@ -360,31 +359,15 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
if (type < 0 || num_verts <= 0)
return;
- /* Make space for at least 64 dwords.
+ /* Make space for at least 128 dwords.
* This is supposed to ensure that we can get all rendering
* commands into a single command buffer.
*/
rcommonEnsureCmdBufSpace(&rmesa->radeon, 128, __FUNCTION__);
if (rmesa->ind_buf.ptr) {
- if (num_verts > 65535) {
- /* not implemented yet */
- WARN_ONCE("Too many elts\n");
- return;
- }
- /* Note: The following is incorrect, but it's the best I can do
- * without a major refactoring of how DMA memory is handled.
- * The problem: Ensuring that both vertex arrays *and* index
- * arrays are at the right position, and then ensuring that
- * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
- * at once.
- *
- * So why is the following incorrect? Well, it seems like
- * allocating the index array might actually evict the vertex
- * arrays. *sigh*
- */
r300EmitElts(ctx, num_verts);
- r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start);
+ r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, 0);
if (rmesa->radeon.radeonScreen->kernel_mm) {
BEGIN_BATCH_NO_AUTOSTATE(2);
OUT_BATCH_REGSEQ(R300_VAP_VF_MAX_VTX_INDX, 1);
@@ -469,7 +452,8 @@ void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode)
if (mode) {
if ((fallback_warn & bit) == 0) {
- _mesa_fprintf(stderr, "WARNING! Falling back to software for %s\n", getFallbackString(bit));
+ if (RADEON_DEBUG & DEBUG_FALLBACKS)
+ _mesa_fprintf(stderr, "WARNING! Falling back to software for %s\n", getFallbackString(bit));
fallback_warn |= bit;
}
rmesa->fallback |= bit;
diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c
index 0133b83796..62228a3786 100644
--- a/src/mesa/drivers/dri/r300/r300_shader.c
+++ b/src/mesa/drivers/dri/r300/r300_shader.c
@@ -32,22 +32,45 @@
#include "r300_context.h"
#include "r300_fragprog_common.h"
+static void freeFragProgCache(GLcontext *ctx, struct r300_fragment_program_cont *cache)
+{
+ struct r300_fragment_program *tmp, *fp = cache->progs;
+
+ while (fp) {
+ tmp = fp->next;
+ _mesa_reference_program(ctx, &fp->Base, NULL);
+ _mesa_free(fp);
+ fp = tmp;
+ }
+}
+
+static void freeVertProgCache(GLcontext *ctx, struct r300_vertex_program_cont *cache)
+{
+ struct r300_vertex_program *tmp, *vp = cache->progs;
+
+ while (vp) {
+ tmp = vp->next;
+ _mesa_reference_vertprog(ctx, &vp->Base, NULL);
+ _mesa_free(vp);
+ vp = tmp;
+ }
+}
+
static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target,
GLuint id)
{
struct r300_vertex_program_cont *vp;
- struct r300_fragment_program *fp;
+ struct r300_fragment_program_cont *fp;
switch (target) {
case GL_VERTEX_STATE_PROGRAM_NV:
case GL_VERTEX_PROGRAM_ARB:
vp = CALLOC_STRUCT(r300_vertex_program_cont);
- return _mesa_init_vertex_program(ctx, &vp->mesa_program,
- target, id);
+ return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
case GL_FRAGMENT_PROGRAM_NV:
case GL_FRAGMENT_PROGRAM_ARB:
- fp = CALLOC_STRUCT(r300_fragment_program);
+ fp = CALLOC_STRUCT(r300_fragment_program_cont);
return _mesa_init_fragment_program(ctx, &fp->Base, target, id);
default:
@@ -59,21 +82,35 @@ static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target,
static void r300DeleteProgram(GLcontext * ctx, struct gl_program *prog)
{
+ struct r300_vertex_program_cont *vp = (struct r300_vertex_program_cont *)prog;
+ struct r300_fragment_program_cont *fp = (struct r300_fragment_program_cont *)prog;
+
+ switch (prog->Target) {
+ case GL_VERTEX_PROGRAM_ARB:
+ freeVertProgCache(ctx, vp);
+ break;
+ case GL_FRAGMENT_PROGRAM_ARB:
+ freeFragProgCache(ctx, fp);
+ break;
+ }
+
_mesa_delete_program(ctx, prog);
}
static void
r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog)
{
- struct r300_vertex_program_cont *vp = (void *)prog;
- struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)prog;
+ struct r300_vertex_program_cont *vp = (struct r300_vertex_program_cont *)prog;
+ struct r300_fragment_program_cont *fp = (struct r300_fragment_program_cont *)prog;
switch (target) {
case GL_VERTEX_PROGRAM_ARB:
+ freeVertProgCache(ctx, vp);
vp->progs = NULL;
break;
case GL_FRAGMENT_PROGRAM_ARB:
- r300_fp->translated = GL_FALSE;
+ freeFragProgCache(ctx, fp);
+ fp->progs = NULL;
break;
}
@@ -85,13 +122,18 @@ static GLboolean
r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
{
if (target == GL_FRAGMENT_PROGRAM_ARB) {
- struct r300_fragment_program *fp = (struct r300_fragment_program *)prog;
+ struct r300_fragment_program *fp = r300SelectFragmentShader(ctx);
if (!fp->translated)
- r300TranslateFragmentShader(ctx, &fp->Base);
+ r300TranslateFragmentShader(ctx, fp);
return !fp->error;
- } else
- return GL_TRUE;
+ } else {
+ struct r300_vertex_program *vp = r300SelectVertexShader(ctx);
+ if (!vp->translated)
+ r300TranslateVertexShader(vp);
+
+ return !vp->error;
+ }
}
void r300InitShaderFuncs(struct dd_function_table *functions)
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index c0eda977db..12fbf281d9 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -434,6 +434,10 @@ static void r300UpdateCulling(GLcontext * ctx)
break;
}
+ /* Winding is inverted when rendering to FBO */
+ if (ctx->DrawBuffer && ctx->DrawBuffer->Name)
+ val ^= R300_FRONT_FACE_CW;
+
R300_STATECHANGE(r300, cul);
r300->hw.cul.cmd[R300_CUL_CULL] = val;
}
@@ -452,9 +456,9 @@ static void r300SetPolygonOffsetState(GLcontext * ctx, GLboolean state)
static GLboolean current_fragment_program_writes_depth(GLcontext* ctx)
{
- struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
- return (fp && fp->writes_depth);
+ return ctx->FragmentProgram._Current && r300->selected_fp->writes_depth;
}
static void r300SetEarlyZState(GLcontext * ctx)
@@ -1093,24 +1097,25 @@ r300FetchStateParameter(GLcontext * ctx,
* Update R300's own internal state parameters.
* For now just STATE_R300_WINDOW_DIMENSION
*/
-void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)
+static void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)
{
- struct r300_fragment_program *fp;
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
struct gl_program_parameter_list *paramList;
GLuint i;
if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)))
return;
- fp = (struct r300_fragment_program *)ctx->FragmentProgram._Current;
- if (!fp)
+ if (!ctx->FragmentProgram._Current || !rmesa->selected_fp)
return;
- paramList = fp->Base.Base.Parameters;
+ paramList = rmesa->selected_fp->Base->Parameters;
if (!paramList)
return;
+ _mesa_load_state_parameters(ctx, paramList);
+
for (i = 0; i < paramList->NumParameters; i++) {
if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) {
r300FetchStateParameter(ctx,
@@ -1225,8 +1230,7 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
int i;
- struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
- struct r300_fragment_program_code *code = &fp->code.r300;
+ struct r300_fragment_program_code *code = &r300->selected_fp->code.r300;
R300_STATECHANGE(r300, fpt);
@@ -1266,9 +1270,9 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
int i;
- struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
- struct r500_fragment_program_code *code = &fp->code.r500;
+ struct r500_fragment_program_code *code = &r300->selected_fp->code.r500;
/* find all the texture instructions and relocate the texture units */
for (i = 0; i < code->inst_end + 1; i++) {
@@ -1316,8 +1320,6 @@ static void r300SetupTextures(GLcontext * ctx)
int hw_tmu = 0;
int last_hw_tmu = -1; /* -1 translates into no setup costs for fields */
int tmu_mappings[R300_MAX_TEXTURE_UNITS] = { -1, };
- struct r300_fragment_program *fp = (struct r300_fragment_program *)
- (char *)ctx->FragmentProgram._Current;
R300_STATECHANGE(r300, txe);
R300_STATECHANGE(r300, tex.filter);
@@ -1420,7 +1422,7 @@ static void r300SetupTextures(GLcontext * ctx)
cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, last_hw_tmu + 1);
if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
- if (fp->Base.UsesKill && last_hw_tmu < 0) {
+ if (ctx->FragmentProgram._Current->UsesKill && last_hw_tmu < 0) {
// The KILL operation requires the first texture unit
// to be enabled.
r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1;
@@ -1458,11 +1460,11 @@ static void r300SetupRSUnit(GLcontext * ctx)
hw_tcl_on = r300->options.hw_tcl_enabled;
if (hw_tcl_on)
- OutputsWritten.vp_outputs = r300->selected_vp->key.OutputsWritten;
+ OutputsWritten.vp_outputs = r300->selected_vp->Base->Base.OutputsWritten;
else
RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset);
- InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
+ InputsRead = r300->selected_fp->Base->InputsRead;
R300_STATECHANGE(r300, ri);
R300_STATECHANGE(r300, rc);
@@ -1521,29 +1523,6 @@ static void r300SetupRSUnit(GLcontext * ctx)
++fp_reg;
}
- if (InputsRead & FRAG_BIT_WPOS) {
- r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count);
- r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg);
- InputsRead &= ~FRAG_BIT_WPOS;
- rs_tex_count += 4;
- ++tex_ip;
- ++fp_reg;
- }
-
- if (InputsRead & FRAG_BIT_FOGC) {
- if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) {
- r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(R300_RS_SEL_K0) | R300_RS_SEL_R(R300_RS_SEL_K0);
- r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_Q(R300_RS_SEL_K1) | R300_RS_TEX_PTR(rs_tex_count);
- r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg);
- InputsRead &= ~FRAG_BIT_FOGC;
- rs_tex_count += 4;
- ++tex_ip;
- ++fp_reg;
- } else {
- WARN_ONCE("fragprog wants fogc, vp doesn't provide it\n");
- }
- }
-
/* Setup default color if no color or tex was set */
if (rs_tex_count == 0 && col_ip == 0) {
r300->hw.rr.cmd[R300_RR_INST_0] = R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_ADDR(0);
@@ -1575,11 +1554,11 @@ static void r500SetupRSUnit(GLcontext * ctx)
hw_tcl_on = r300->options.hw_tcl_enabled;
if (hw_tcl_on)
- OutputsWritten.vp_outputs = r300->selected_vp->key.OutputsWritten;
+ OutputsWritten.vp_outputs = r300->selected_vp->Base->Base.OutputsWritten;
else
RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset);
- InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
+ InputsRead = r300->selected_fp->Base->InputsRead;
R300_STATECHANGE(r300, ri);
R300_STATECHANGE(r300, rc);
@@ -1642,36 +1621,6 @@ static void r500SetupRSUnit(GLcontext * ctx)
++fp_reg;
}
- if (InputsRead & FRAG_BIT_WPOS) {
- r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) |
- ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) |
- ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) |
- ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT);
-
- r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg);
- InputsRead &= ~FRAG_BIT_WPOS;
- rs_tex_count += 4;
- ++tex_ip;
- ++fp_reg;
- }
-
- if (InputsRead & FRAG_BIT_FOGC) {
- if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) {
- r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= (rs_tex_count << R500_RS_IP_TEX_PTR_S_SHIFT) |
- (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
- (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
- (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT);
-
- r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg);
- InputsRead &= ~FRAG_BIT_FOGC;
- rs_tex_count += 4;
- ++tex_ip;
- ++fp_reg;
- } else {
- WARN_ONCE("fragprog wants fogc, vp doesn't provide it\n");
- }
- }
-
/* Setup default color if no color or tex was set */
if (rs_tex_count == 0 && col_ip == 0) {
r300->hw.rr.cmd[R300_RR_INST_0] = R500_RS_INST_COL_ID(0) | R500_RS_INST_COL_ADDR(0);
@@ -2036,43 +1985,51 @@ static void r300ResetHwState(r300ContextPtr r300)
void r300UpdateShaders(r300ContextPtr rmesa)
{
- GLcontext *ctx;
- struct r300_fragment_program *fp;
- int i;
-
- ctx = rmesa->radeon.glCtx;
- fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
+ GLcontext *ctx = rmesa->radeon.glCtx;
/* should only happenen once, just after context is created */
/* TODO: shouldn't we fallback to sw here? */
- if (!fp) {
+ if (!ctx->FragmentProgram._Current) {
_mesa_fprintf(stderr, "No ctx->FragmentProgram._Current!!\n");
return;
}
- if (rmesa->radeon.NewGLState && rmesa->options.hw_tcl_enabled) {
- for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
- rmesa->temp_attrib[i] =
- TNL_CONTEXT(ctx)->vb.AttribPtr[i];
- TNL_CONTEXT(ctx)->vb.AttribPtr[i] =
- &rmesa->dummy_attrib[i];
- }
+ {
+ struct r300_fragment_program *fp;
- _tnl_UpdateFixedFunctionProgram(ctx);
+ fp = r300SelectFragmentShader(ctx);
+ if (!fp->translated)
+ r300TranslateFragmentShader(ctx, fp);
- for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
- TNL_CONTEXT(ctx)->vb.AttribPtr[i] =
- rmesa->temp_attrib[i];
- }
-
- r300SelectVertexShader(rmesa);
- r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, rmesa->selected_vp->error);
+ r300SwitchFallback(ctx, R300_FALLBACK_FRAGMENT_PROGRAM, fp->error);
}
- if (!fp->translated || rmesa->radeon.NewGLState)
- r300TranslateFragmentShader(ctx, ctx->FragmentProgram._Current);
+ if (rmesa->options.hw_tcl_enabled) {
+ struct r300_vertex_program *vp;
+
+ if (rmesa->radeon.NewGLState) {
+ int i;
+ for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
+ rmesa->temp_attrib[i] =
+ TNL_CONTEXT(ctx)->vb.AttribPtr[i];
+ TNL_CONTEXT(ctx)->vb.AttribPtr[i] =
+ &rmesa->dummy_attrib[i];
+ }
+
+ _tnl_UpdateFixedFunctionProgram(ctx);
+
+ for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
+ TNL_CONTEXT(ctx)->vb.AttribPtr[i] =
+ rmesa->temp_attrib[i];
+ }
+ }
+
+ vp = r300SelectVertexShader(ctx);
+ if (!vp->translated)
+ r300TranslateVertexShader(vp);
- r300SwitchFallback(ctx, R300_FALLBACK_FRAGMENT_PROGRAM, fp->error);
+ r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, vp->error);
+ }
r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
rmesa->radeon.NewGLState = 0;
@@ -2102,7 +2059,7 @@ static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx,
static void r300SetupPixelShader(GLcontext *ctx)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
- struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
+ struct r300_fragment_program *fp = rmesa->selected_fp;
struct r300_fragment_program_code *code;
int i, k;
@@ -2148,8 +2105,7 @@ static void r300SetupPixelShader(GLcontext *ctx)
R300_STATECHANGE(rmesa, fpp);
rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, code->const_nr * 4);
for (i = 0; i < code->const_nr; i++) {
- const GLfloat *constant = get_fragmentprogram_constant(ctx,
- &fp->Base.Base, code->constant[i]);
+ const GLfloat *constant = get_fragmentprogram_constant(ctx, fp->Base, code->constant[i]);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]);
@@ -2174,7 +2130,7 @@ static void r300SetupPixelShader(GLcontext *ctx)
static void r500SetupPixelShader(GLcontext *ctx)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
- struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
+ struct r300_fragment_program *fp = rmesa->selected_fp;
int i;
struct r500_fragment_program_code *code;
@@ -2210,8 +2166,7 @@ static void r500SetupPixelShader(GLcontext *ctx)
R300_STATECHANGE(rmesa, r500fp_const);
for (i = 0; i < code->const_nr; i++) {
- const GLfloat *constant = get_fragmentprogram_constant(ctx,
- &fp->Base.Base, code->constant[i]);
+ const GLfloat *constant = get_fragmentprogram_constant(ctx, fp->Base, code->constant[i]);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]);
rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]);
@@ -2274,20 +2229,17 @@ void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten)
rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
- rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten, ctx->FragmentProgram._Current->Base.InputsRead);
- rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten, ctx->FragmentProgram._Current->Base.InputsRead);
+ rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten);
+ rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten);
}
void r300UpdateShaderStates(r300ContextPtr rmesa)
{
GLcontext *ctx;
ctx = rmesa->radeon.glCtx;
- struct r300_fragment_program *r300_fp;
-
- r300_fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
/* should only happenen once, just after context is created */
- if (!r300_fp)
+ if (!ctx->FragmentProgram._Current)
return;
r300SetEarlyZState(ctx);
@@ -2323,8 +2275,6 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state)
R300_STATECHANGE(r300, cb);
}
- r300UpdateStateParameters(ctx, new_state);
-
r300->radeon.NewGLState |= new_state;
}
diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h
index 2328289420..d46bf9f179 100644
--- a/src/mesa/drivers/dri/r300/r300_state.h
+++ b/src/mesa/drivers/dri/r300/r300_state.h
@@ -52,7 +52,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
void r300UpdateViewportOffset (GLcontext * ctx);
void r300UpdateDrawBuffer (GLcontext * ctx);
-void r300UpdateStateParameters (GLcontext * ctx, GLuint new_state);
void r300UpdateShaders (r300ContextPtr rmesa);
void r300UpdateShaderStates (r300ContextPtr rmesa);
void r300InitState (r300ContextPtr r300);
diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c
index ce4179208e..56ed519cf4 100644
--- a/src/mesa/drivers/dri/r300/r300_swtcl.c
+++ b/src/mesa/drivers/dri/r300/r300_swtcl.c
@@ -76,7 +76,7 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_
GLuint InputsRead = 0;
GLuint OutputsWritten = 0;
int num_attrs = 0;
- GLuint fp_reads = ctx->FragmentProgram._Current->Base.InputsRead;
+ GLuint fp_reads = rmesa->selected_fp->Base->InputsRead;
struct vertex_attribute *attrs = rmesa->vbuf.attribs;
rmesa->swtcl.coloroffset = rmesa->swtcl.specoffset = 0;
@@ -150,6 +150,22 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_
ADD_ATTR(VERT_ATTRIB_POINT_SIZE, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_POINT_SIZE, swiz, MASK_X, 0);
}
+ if (rmesa->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) {
+ int tex_id = rmesa->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0;
+
+ VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS];
+ VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_POS];
+ RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id);
+ }
+
+ if (rmesa->selected_fp->fog_attr != FRAG_ATTRIB_MAX) {
+ int tex_id = rmesa->selected_fp->fog_attr - FRAG_ATTRIB_TEX0;
+
+ VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG];
+ VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG];
+ RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id);
+ }
+
/**
* Sending only one texcoord component may lead to lock up,
* so for all textures always output 4 texcoord components to RS.
@@ -192,31 +208,9 @@ void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_
}
}
- /* RS can't put fragment position on the pixel stack, so stuff it in texcoord if needed */
- if (fp_reads & FRAG_BIT_WPOS) {
- if (first_free_tex >= ctx->Const.MaxTextureUnits) {
- fprintf(stderr, "\tout of free texcoords to write w pos\n");
- _mesa_exit(-1);
- }
-
- InputsRead |= 1 << (VERT_ATTRIB_TEX0 + first_free_tex);
- OutputsWritten |= 1 << (VERT_RESULT_TEX0 + first_free_tex);
- EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F );
- ADD_ATTR(VERT_ATTRIB_POS, R300_DATA_TYPE_FLOAT_4, SWTCL_OVM_TEX(first_free_tex), SWIZZLE_XYZW, MASK_XYZW, 0);
- ++first_free_tex;
- }
-
- if (fp_reads & FRAG_BIT_FOGC) {
- if (first_free_tex >= ctx->Const.MaxTextureUnits) {
- fprintf(stderr, "\tout of free texcoords to write fog coordinate\n");
- _mesa_exit(-1);
- }
-
- InputsRead |= 1 << VERT_ATTRIB_FOG;
- OutputsWritten |= 1 << VERT_RESULT_FOGC;
- GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO);
- EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F );
- ADD_ATTR(VERT_ATTRIB_FOG, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_TEX(first_free_tex), swiz, MASK_XYZW, 0);
+ if (first_free_tex >= ctx->Const.MaxTextureUnits) {
+ fprintf(stderr, "\tout of free texcoords to write fog coordinate\n");
+ _mesa_exit(-1);
}
R300_NEWPRIM(rmesa);
@@ -497,11 +491,13 @@ void r300RenderStart(GLcontext *ctx)
r300ContextPtr rmesa = R300_CONTEXT( ctx );
r300ChooseRenderState(ctx);
+
+ r300UpdateShaders(rmesa);
+
r300PrepareVertices(ctx);
r300ValidateBuffers(ctx);
- r300UpdateShaders(rmesa);
r300UpdateShaderStates(rmesa);
r300EmitCacheFlush(rmesa);
diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
index 6d6a90aa88..6f489ace7b 100644
--- a/src/mesa/drivers/dri/r300/r300_texstate.c
+++ b/src/mesa/drivers/dri/r300/r300_texstate.c
@@ -119,6 +119,10 @@ static const struct tx_table {
_ASSIGN(Z24_S8, R300_EASY_TX_FORMAT(X, X, X, X, X24_Y8)),
_ASSIGN(S8_Z24, R300_EASY_TX_FORMAT(Y, Y, Y, Y, X24_Y8)),
_ASSIGN(Z32, R300_EASY_TX_FORMAT(X, X, X, X, X32)),
+ /* EXT_texture_sRGB */
+ _ASSIGN(SRGBA8, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8) | R300_TX_FORMAT_GAMMA),
+ _ASSIGN(SLA8, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8) | R300_TX_FORMAT_GAMMA),
+ _ASSIGN(SL8, R300_EASY_TX_FORMAT(X, X, X, ONE, X8) | R300_TX_FORMAT_GAMMA),
/* *INDENT-ON* */
};
@@ -277,21 +281,24 @@ GLboolean r300ValidateBuffers(GLcontext * ctx)
r300ContextPtr rmesa = R300_CONTEXT(ctx);
struct radeon_renderbuffer *rrb;
int i;
+ int ret;
- radeon_validate_reset_bos(&rmesa->radeon);
+ radeon_cs_space_reset_bos(rmesa->radeon.cmdbuf.cs);
rrb = radeon_get_colorbuffer(&rmesa->radeon);
/* color buffer */
if (rrb && rrb->bo) {
- radeon_validate_bo(&rmesa->radeon, rrb->bo,
- 0, RADEON_GEM_DOMAIN_VRAM);
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+ rrb->bo, 0,
+ RADEON_GEM_DOMAIN_VRAM);
}
/* depth buffer */
rrb = radeon_get_depthbuffer(&rmesa->radeon);
if (rrb && rrb->bo) {
- radeon_validate_bo(&rmesa->radeon, rrb->bo,
- 0, RADEON_GEM_DOMAIN_VRAM);
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+ rrb->bo, 0,
+ RADEON_GEM_DOMAIN_VRAM);
}
for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) {
@@ -307,17 +314,19 @@ GLboolean r300ValidateBuffers(GLcontext * ctx)
}
t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
if (t->image_override && t->bo)
- radeon_validate_bo(&rmesa->radeon, t->bo,
- RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
-
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+ t->bo,
+ RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
else if (t->mt->bo)
- radeon_validate_bo(&rmesa->radeon, t->mt->bo,
- RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+ t->mt->bo,
+ RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
}
- if (rmesa->radeon.dma.current)
- radeon_validate_bo(&rmesa->radeon, rmesa->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0);
- return radeon_revalidate_bos(ctx);
+ ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, rmesa->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0);
+ if (ret)
+ return GL_FALSE;
+ return GL_TRUE;
}
void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
@@ -433,7 +442,7 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo
rImage->mt = NULL;
}
_mesa_init_teximage_fields(radeon->glCtx, target, texImage,
- rb->width, rb->height, 1, 0, rb->cpp);
+ rb->base.Width, rb->base.Height, 1, 0, rb->cpp);
texImage->RowStride = rb->pitch / rb->cpp;
texImage->TexFormat = radeonChooseTextureFormat(radeon->glCtx,
internalFormat,
@@ -469,15 +478,15 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo
break;
}
pitch_val--;
- t->pp_txsize = ((rb->width - 1) << R300_TX_WIDTHMASK_SHIFT) |
- ((rb->height - 1) << R300_TX_HEIGHTMASK_SHIFT);
+ t->pp_txsize = ((rb->base.Width - 1) << R300_TX_WIDTHMASK_SHIFT) |
+ ((rb->base.Height - 1) << R300_TX_HEIGHTMASK_SHIFT);
t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN;
t->pp_txpitch |= pitch_val;
if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
- if (rb->width > 2048)
+ if (rb->base.Width > 2048)
t->pp_txpitch |= R500_TXWIDTH_BIT11;
- if (rb->height > 2048)
+ if (rb->base.Height > 2048)
t->pp_txpitch |= R500_TXHEIGHT_BIT11;
}
t->validated = GL_TRUE;
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
index c41a8fdd62..de32013032 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -32,12 +32,15 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/macros.h"
#include "main/enums.h"
#include "shader/program.h"
+#include "shader/programopt.h"
#include "shader/prog_instruction.h"
+#include "shader/prog_optimize.h"
#include "shader/prog_parameter.h"
#include "shader/prog_print.h"
#include "shader/prog_statevars.h"
#include "tnl/tnl.h"
+#include "radeon_nqssadce.h"
#include "r300_context.h"
#include "r300_state.h"
@@ -71,15 +74,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
} while (0)
-int r300VertexProgUpdateParams(GLcontext * ctx,
- struct r300_vertex_program_cont *vp, float *dst)
+static int r300VertexProgUpdateParams(GLcontext * ctx, struct gl_vertex_program *vp, float *dst)
{
int pi;
- struct gl_vertex_program *mesa_vp = &vp->mesa_program;
float *dst_o = dst;
struct gl_program_parameter_list *paramList;
- if (mesa_vp->IsNVProgram) {
+ if (vp->IsNVProgram) {
_mesa_load_tracked_matrices(ctx);
for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
@@ -91,16 +92,18 @@ int r300VertexProgUpdateParams(GLcontext * ctx,
return dst - dst_o;
}
- assert(mesa_vp->Base.Parameters);
- _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
+ if (!vp->Base.Parameters)
+ return 0;
- if (mesa_vp->Base.Parameters->NumParameters * 4 >
+ _mesa_load_state_parameters(ctx, vp->Base.Parameters);
+
+ if (vp->Base.Parameters->NumParameters * 4 >
VSF_MAX_FRAGMENT_LENGTH) {
fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
_mesa_exit(-1);
}
- paramList = mesa_vp->Base.Parameters;
+ paramList = vp->Base.Parameters;
for (pi = 0; pi < paramList->NumParameters; pi++) {
switch (paramList->Parameters[pi].Type) {
case PROGRAM_STATE_VAR:
@@ -933,10 +936,14 @@ static void t_inputs_outputs(struct r300_vertex_program *vp)
{
int i;
int cur_reg;
+ GLuint OutputsWritten, InputsRead;
+
+ OutputsWritten = vp->Base->Base.OutputsWritten;
+ InputsRead = vp->Base->Base.InputsRead;
cur_reg = -1;
for (i = 0; i < VERT_ATTRIB_MAX; i++) {
- if (vp->key.InputsRead & (1 << i))
+ if (InputsRead & (1 << i))
vp->inputs[i] = ++cur_reg;
else
vp->inputs[i] = -1;
@@ -946,13 +953,13 @@ static void t_inputs_outputs(struct r300_vertex_program *vp)
for (i = 0; i < VERT_RESULT_MAX; i++)
vp->outputs[i] = -1;
- assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS));
+ assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) {
+ if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
}
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
+ if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
}
@@ -962,46 +969,46 @@ static void t_inputs_outputs(struct r300_vertex_program *vp)
* pretend it does by skipping output index reg so the colors
* get written into appropriate output vectors.
*/
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) {
+ if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
vp->outputs[VERT_RESULT_COL0] = cur_reg++;
- } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0) ||
- vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
+ OutputsWritten & (1 << VERT_RESULT_BFC1)) {
cur_reg++;
}
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) {
+ if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
vp->outputs[VERT_RESULT_COL1] = cur_reg++;
- } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0) ||
- vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
+ OutputsWritten & (1 << VERT_RESULT_BFC1)) {
cur_reg++;
}
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) {
+ if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
- } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
cur_reg++;
}
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
- } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) {
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
cur_reg++;
}
for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
- if (vp->key.OutputsWritten & (1 << i)) {
+ if (OutputsWritten & (1 << i)) {
vp->outputs[i] = cur_reg++;
}
}
- if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) {
+ if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
}
}
-static void r300TranslateVertexShader(struct r300_vertex_program *vp,
- struct prog_instruction *vpi)
+void r300TranslateVertexShader(struct r300_vertex_program *vp)
{
+ struct prog_instruction *vpi = vp->Base->Base.Instructions;
int i;
GLuint *inst;
unsigned long num_operands;
@@ -1191,313 +1198,463 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp,
}
}
-/* DP4 version seems to trigger some hw peculiarity */
-//#define PREFER_DP4
+static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id)
+{
+ struct prog_instruction *vpi;
+
+ _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2);
+
+ vpi = &prog->Instructions[prog->NumInstructions - 3];
+
+ vpi->Opcode = OPCODE_MOV;
+
+ vpi->DstReg.File = PROGRAM_OUTPUT;
+ vpi->DstReg.Index = VERT_RESULT_HPOS;
+ vpi->DstReg.WriteMask = WRITEMASK_XYZW;
+ vpi->DstReg.CondMask = COND_TR;
+
+ vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
+ vpi->SrcReg[0].Index = temp_index;
+ vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+ ++vpi;
+
+ vpi->Opcode = OPCODE_MOV;
-static void position_invariant(struct gl_program *prog)
+ vpi->DstReg.File = PROGRAM_OUTPUT;
+ vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
+ vpi->DstReg.WriteMask = WRITEMASK_XYZW;
+ vpi->DstReg.CondMask = COND_TR;
+
+ vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
+ vpi->SrcReg[0].Index = temp_index;
+ vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+ ++vpi;
+
+ vpi->Opcode = OPCODE_END;
+}
+
+static void pos_as_texcoord(struct gl_program *prog, int tex_id)
{
struct prog_instruction *vpi;
- struct gl_program_parameter_list *paramList;
- int i;
+ GLuint tempregi = prog->NumTemporaries;
- gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 };
+ prog->NumTemporaries++;
- /* tokens[4] = matrix modifier */
-#ifdef PREFER_DP4
- tokens[4] = 0; /* not transposed or inverted */
-#else
- tokens[4] = STATE_MATRIX_TRANSPOSE;
-#endif
- paramList = prog->Parameters;
-
- vpi = _mesa_alloc_instructions(prog->NumInstructions + 4);
- _mesa_init_instructions(vpi, prog->NumInstructions + 4);
-
- for (i = 0; i < 4; i++) {
- GLint idx;
- tokens[2] = tokens[3] = i; /* matrix row[i]..row[i] */
- idx = _mesa_add_state_reference(paramList, tokens);
-#ifdef PREFER_DP4
- vpi[i].Opcode = OPCODE_DP4;
- vpi[i].StringPos = 0;
- vpi[i].Data = 0;
-
- vpi[i].DstReg.File = PROGRAM_OUTPUT;
- vpi[i].DstReg.Index = VERT_RESULT_HPOS;
- vpi[i].DstReg.WriteMask = 1 << i;
- vpi[i].DstReg.CondMask = COND_TR;
-
- vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
- vpi[i].SrcReg[0].Index = idx;
- vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
-
- vpi[i].SrcReg[1].File = PROGRAM_INPUT;
- vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
- vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW;
-#else
- if (i == 0)
- vpi[i].Opcode = OPCODE_MUL;
- else
- vpi[i].Opcode = OPCODE_MAD;
+ for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
+ if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) {
+ vpi->DstReg.File = PROGRAM_TEMPORARY;
+ vpi->DstReg.Index = tempregi;
+ }
+ }
- vpi[i].Data = 0;
+ insert_wpos(prog, tempregi, tex_id);
- if (i == 3)
- vpi[i].DstReg.File = PROGRAM_OUTPUT;
- else
- vpi[i].DstReg.File = PROGRAM_TEMPORARY;
- vpi[i].DstReg.Index = 0;
- vpi[i].DstReg.WriteMask = 0xf;
- vpi[i].DstReg.CondMask = COND_TR;
-
- vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
- vpi[i].SrcReg[0].Index = idx;
- vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
-
- vpi[i].SrcReg[1].File = PROGRAM_INPUT;
- vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
- vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
-
- if (i > 0) {
- vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
- vpi[i].SrcReg[2].Index = 0;
- vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW;
+ prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
+}
+
+/**
+ * The fogcoord attribute is special in that only the first component
+ * is relevant, and the remaining components are always fixed (when read
+ * from by the fragment program) to yield an X001 pattern.
+ *
+ * We need to enforce this either in the vertex program or in the fragment
+ * program, and this code chooses not to enforce it in the vertex program.
+ * This is slightly cheaper, as long as the fragment program does not use
+ * weird swizzles.
+ *
+ * And it seems that usually, weird swizzles are not used, so...
+ *
+ * See also the counterpart rewriting for fragment programs.
+ */
+static void fog_as_texcoord(struct gl_program *prog, int tex_id)
+{
+ struct prog_instruction *vpi;
+
+ vpi = prog->Instructions;
+ while (vpi->Opcode != OPCODE_END) {
+ if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) {
+ vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
+ vpi->DstReg.WriteMask = WRITEMASK_X;
}
-#endif
+
+ ++vpi;
}
- _mesa_copy_instructions(&vpi[i], prog->Instructions,
- prog->NumInstructions);
+ prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC);
+ prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
+}
- free(prog->Instructions);
+static int translateABS(struct gl_program *prog, int pos)
+{
+ struct prog_instruction *inst;
- prog->Instructions = vpi;
+ inst = &prog->Instructions[pos];
- prog->NumInstructions += 4;
- vpi = &prog->Instructions[prog->NumInstructions - 1];
+ inst->Opcode = OPCODE_MAX;
+ inst->SrcReg[1] = inst->SrcReg[0];
+ inst->SrcReg[1].Negate ^= NEGATE_XYZW;
- assert(vpi->Opcode == OPCODE_END);
+ return 0;
}
-static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog,
- GLuint temp_index)
+static int translateDP3(struct gl_program *prog, int pos)
{
- struct prog_instruction *vpi;
- struct prog_instruction *vpi_insert;
- int i = 0;
+ struct prog_instruction *inst;
- vpi = _mesa_alloc_instructions(prog->NumInstructions + 2);
- _mesa_init_instructions(vpi, prog->NumInstructions + 2);
- /* all but END */
- _mesa_copy_instructions(vpi, prog->Instructions,
- prog->NumInstructions - 1);
- /* END */
- _mesa_copy_instructions(&vpi[prog->NumInstructions + 1],
- &prog->Instructions[prog->NumInstructions - 1],
- 1);
- vpi_insert = &vpi[prog->NumInstructions - 1];
+ inst = &prog->Instructions[pos];
- vpi_insert[i].Opcode = OPCODE_MOV;
+ inst->Opcode = OPCODE_DP4;
+ inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
- vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
- vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS;
- vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
- vpi_insert[i].DstReg.CondMask = COND_TR;
+ return 0;
+}
+
+static int translateDPH(struct gl_program *prog, int pos)
+{
+ struct prog_instruction *inst;
+
+ inst = &prog->Instructions[pos];
+
+ inst->Opcode = OPCODE_DP4;
+ inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
- vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
- vpi_insert[i].SrcReg[0].Index = temp_index;
- vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
- i++;
+ return 0;
+}
- vpi_insert[i].Opcode = OPCODE_MOV;
+static int translateFLR(struct gl_program *prog, int pos)
+{
+ struct prog_instruction *inst;
+ struct prog_dst_register dst;
+ int tmp_idx;
- vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
- vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx;
- vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
- vpi_insert[i].DstReg.CondMask = COND_TR;
+ tmp_idx = prog->NumTemporaries++;
- vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
- vpi_insert[i].SrcReg[0].Index = temp_index;
- vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
- i++;
+ _mesa_insert_instructions(prog, pos + 1, 1);
- free(prog->Instructions);
+ inst = &prog->Instructions[pos];
+ dst = inst->DstReg;
- prog->Instructions = vpi;
+ inst->Opcode = OPCODE_FRC;
+ inst->DstReg.File = PROGRAM_TEMPORARY;
+ inst->DstReg.Index = tmp_idx;
+ ++inst;
- prog->NumInstructions += i;
- vpi = &prog->Instructions[prog->NumInstructions - 1];
+ inst->Opcode = OPCODE_ADD;
+ inst->DstReg = dst;
+ inst->SrcReg[0] = (inst-1)->SrcReg[0];
+ inst->SrcReg[1].File = PROGRAM_TEMPORARY;
+ inst->SrcReg[1].Index = tmp_idx;
+ inst->SrcReg[1].Negate = NEGATE_XYZW;
- assert(vpi->Opcode == OPCODE_END);
+ return 1;
}
-static void pos_as_texcoord(struct r300_vertex_program *vp,
- struct gl_program *prog)
+static int translateSUB(struct gl_program *prog, int pos)
{
- struct prog_instruction *vpi;
- GLuint tempregi = prog->NumTemporaries;
- /* should do something else if no temps left... */
- prog->NumTemporaries++;
+ struct prog_instruction *inst;
- for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
- if (vpi->DstReg.File == PROGRAM_OUTPUT
- && vpi->DstReg.Index == VERT_RESULT_HPOS) {
- vpi->DstReg.File = PROGRAM_TEMPORARY;
- vpi->DstReg.Index = tempregi;
- }
- }
- insert_wpos(vp, prog, tempregi);
+ inst = &prog->Instructions[pos];
+
+ inst->Opcode = OPCODE_ADD;
+ inst->SrcReg[1].Negate ^= NEGATE_XYZW;
+
+ return 0;
}
-static struct r300_vertex_program *build_program(struct r300_vertex_program_key
- *wanted_key, struct gl_vertex_program
- *mesa_vp, GLint wpos_idx)
+static int translateSWZ(struct gl_program *prog, int pos)
{
- struct r300_vertex_program *vp;
+ prog->Instructions[pos].Opcode = OPCODE_MOV;
- vp = _mesa_calloc(sizeof(*vp));
- _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
- vp->wpos_idx = wpos_idx;
+ return 0;
+}
- if (mesa_vp->IsPositionInvariant) {
- position_invariant(&mesa_vp->Base);
- }
+static int translateXPD(struct gl_program *prog, int pos)
+{
+ struct prog_instruction *inst;
+ int tmp_idx;
+
+ tmp_idx = prog->NumTemporaries++;
+
+ _mesa_insert_instructions(prog, pos + 1, 1);
+
+ inst = &prog->Instructions[pos];
+
+ *(inst+1) = *inst;
+
+ inst->Opcode = OPCODE_MUL;
+ inst->DstReg.File = PROGRAM_TEMPORARY;
+ inst->DstReg.Index = tmp_idx;
+ inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W);
+ inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W);
+ ++inst;
+
+ inst->Opcode = OPCODE_MAD;
+ inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W);
+ inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W);
+ inst->SrcReg[1].Negate ^= NEGATE_XYZW;
+ inst->SrcReg[2].File = PROGRAM_TEMPORARY;
+ inst->SrcReg[2].Index = tmp_idx;
+
+ return 1;
+}
+
+static void translateInsts(struct gl_program *prog)
+{
+ struct prog_instruction *inst;
+ int i;
+
+ for (i = 0; i < prog->NumInstructions; ++i) {
+ inst = &prog->Instructions[i];
- if (wpos_idx > -1) {
- pos_as_texcoord(vp, &mesa_vp->Base);
+ switch (inst->Opcode) {
+ case OPCODE_ABS:
+ i += translateABS(prog, i);
+ break;
+ case OPCODE_DP3:
+ i += translateDP3(prog, i);
+ break;
+ case OPCODE_DPH:
+ i += translateDPH(prog, i);
+ break;
+ case OPCODE_FLR:
+ i += translateFLR(prog, i);
+ break;
+ case OPCODE_SUB:
+ i += translateSUB(prog, i);
+ break;
+ case OPCODE_SWZ:
+ i += translateSWZ(prog, i);
+ break;
+ case OPCODE_XPD:
+ i += translateXPD(prog, i);
+ break;
+ default:
+ break;
+ }
}
+}
- if (RADEON_DEBUG & DEBUG_VERTS) {
- fprintf(stderr, "Vertex program after native rewrite:\n");
- _mesa_print_program(&mesa_vp->Base);
- fflush(stdout);
+#define ADD_OUTPUT(fp_attr, vp_result) \
+ do { \
+ if ((FpReads & (1 << (fp_attr))) && !(prog->OutputsWritten & (1 << (vp_result)))) { \
+ OutputsAdded |= 1 << (vp_result); \
+ count++; \
+ } \
+ } while (0)
+
+static void addArtificialOutputs(GLcontext *ctx, struct gl_program *prog)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ GLuint OutputsAdded, FpReads;
+ int i, count;
+
+ OutputsAdded = 0;
+ count = 0;
+ FpReads = r300->selected_fp->Base->InputsRead;
+
+ ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
+ ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
+
+ for (i = 0; i < 7; ++i) {
+ ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
}
/* Some outputs may be artificially added, to match the inputs of the fragment program.
* Issue 16 of vertex program spec says that all vertex attributes that are unwritten by
* vertex program are undefined, so just use MOV [vertex_result], CONST[0]
*/
- {
- int i, count = 0;
+ if (count > 0) {
+ struct prog_instruction *inst;
+
+ _mesa_insert_instructions(prog, prog->NumInstructions - 1, count);
+ inst = &prog->Instructions[prog->NumInstructions - 1 - count];
+
for (i = 0; i < VERT_RESULT_MAX; ++i) {
- if (vp->key.OutputsAdded & (1 << i)) {
- ++count;
+ if (OutputsAdded & (1 << i)) {
+ inst->Opcode = OPCODE_MOV;
+
+ inst->DstReg.File = PROGRAM_OUTPUT;
+ inst->DstReg.Index = i;
+ inst->DstReg.WriteMask = WRITEMASK_XYZW;
+ inst->DstReg.CondMask = COND_TR;
+
+ inst->SrcReg[0].File = PROGRAM_CONSTANT;
+ inst->SrcReg[0].Index = 0;
+ inst->SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+ ++inst;
}
}
- if (count > 0) {
- struct prog_instruction *inst;
+ prog->OutputsWritten |= OutputsAdded;
+ }
+}
- _mesa_insert_instructions(&mesa_vp->Base, mesa_vp->Base.NumInstructions - 1, count);
- inst = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions - 1 - count];
+#undef ADD_OUTPUT
- for (i = 0; i < VERT_RESULT_MAX; ++i) {
- if (vp->key.OutputsAdded & (1 << i)) {
- inst->Opcode = OPCODE_MOV;
+static void nqssadceInit(struct nqssadce_state* s)
+{
+ r300ContextPtr r300 = R300_CONTEXT(s->Ctx);
+ GLuint fp_reads;
- inst->DstReg.File = PROGRAM_OUTPUT;
- inst->DstReg.Index = i;
- inst->DstReg.WriteMask = WRITEMASK_XYZW;
- inst->DstReg.CondMask = COND_TR;
+ fp_reads = r300->selected_fp->Base->InputsRead;
+ {
+ if (fp_reads & FRAG_BIT_COL0) {
+ s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW;
+ s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW;
+ }
- inst->SrcReg[0].File = PROGRAM_CONSTANT;
- inst->SrcReg[0].Index = 0;
- inst->SrcReg[0].Swizzle = SWIZZLE_XYZW;
+ if (fp_reads & FRAG_BIT_COL1) {
+ s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW;
+ s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW;
+ }
+ }
- ++inst;
- }
+ {
+ int i;
+ for (i = 0; i < 8; ++i) {
+ if (fp_reads & FRAG_BIT_TEX(i)) {
+ s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW;
}
}
}
- assert(mesa_vp->Base.NumInstructions);
- vp->num_temporaries = mesa_vp->Base.NumTemporaries;
- r300TranslateVertexShader(vp, mesa_vp->Base.Instructions);
-
- return vp;
+ s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW;
+ if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ))
+ s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X;
}
-static void add_outputs(struct r300_vertex_program_key *key, GLint vert)
+static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
{
- if (key->OutputsWritten & (1 << vert))
- return;
+ (void) opcode;
+ (void) reg;
- key->OutputsWritten |= 1 << vert;
- key->OutputsAdded |= 1 << vert;
+ return GL_TRUE;
}
-void r300SelectVertexShader(r300ContextPtr r300)
+static struct r300_vertex_program *build_program(GLcontext *ctx,
+ struct r300_vertex_program_key *wanted_key,
+ const struct gl_vertex_program *mesa_vp)
{
- GLcontext *ctx = ctx = r300->radeon.glCtx;
- GLuint InputsRead;
- struct r300_vertex_program_key wanted_key = { 0 };
- GLint i;
- struct r300_vertex_program_cont *vpc;
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
struct r300_vertex_program *vp;
- GLint wpos_idx;
+ struct gl_program *prog;
- vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
- wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
- wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
- InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
-
- wpos_idx = -1;
- if (InputsRead & FRAG_BIT_WPOS) {
- for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
- if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
- break;
+ vp = _mesa_calloc(sizeof(*vp));
+ vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base);
+ _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
- if (i == ctx->Const.MaxTextureUnits) {
- fprintf(stderr, "\tno free texcoord found\n");
- _mesa_exit(-1);
- }
+ prog = &vp->Base->Base;
+
+ if (RADEON_DEBUG & DEBUG_VERTS) {
+ fprintf(stderr, "Initial vertex program:\n");
+ _mesa_print_program(prog);
+ fflush(stdout);
+ }
- wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
- wpos_idx = i;
+ if (vp->Base->IsPositionInvariant) {
+ _mesa_insert_mvp_code(ctx, vp->Base);
}
- if (vpc->mesa_program.IsPositionInvariant) {
- wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
- wanted_key.OutputsWritten |= (1 << VERT_RESULT_HPOS);
- } else {
- add_outputs(&wanted_key, VERT_RESULT_HPOS);
+ if (r300->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) {
+ pos_as_texcoord(&vp->Base->Base, r300->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0);
}
- if (InputsRead & FRAG_BIT_COL0) {
- add_outputs(&wanted_key, VERT_RESULT_COL0);
+ if (r300->selected_fp->fog_attr != FRAG_ATTRIB_MAX) {
+ fog_as_texcoord(&vp->Base->Base, r300->selected_fp->fog_attr - FRAG_ATTRIB_TEX0);
}
- if (InputsRead & FRAG_BIT_COL1) {
- add_outputs(&wanted_key, VERT_RESULT_COL1);
+ addArtificialOutputs(ctx, prog);
+
+ translateInsts(prog);
+
+ if (RADEON_DEBUG & DEBUG_VERTS) {
+ fprintf(stderr, "Vertex program after native rewrite:\n");
+ _mesa_print_program(prog);
+ fflush(stdout);
}
- if (InputsRead & FRAG_BIT_FOGC) {
- add_outputs(&wanted_key, VERT_RESULT_FOGC);
+ {
+ struct radeon_nqssadce_descr nqssadce = {
+ .Init = &nqssadceInit,
+ .IsNativeSwizzle = &swizzleIsNative,
+ .BuildSwizzle = NULL
+ };
+ radeonNqssaDce(ctx, prog, &nqssadce);
+
+ /* We need this step for reusing temporary registers */
+ _mesa_optimize_program(ctx, prog);
+
+ if (RADEON_DEBUG & DEBUG_VERTS) {
+ fprintf(stderr, "Vertex program after NQSSADCE:\n");
+ _mesa_print_program(prog);
+ fflush(stdout);
+ }
}
- for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
- if (InputsRead & (FRAG_BIT_TEX0 << i)) {
- add_outputs(&wanted_key, VERT_RESULT_TEX0 + i);
+ assert(prog->NumInstructions);
+ {
+ struct prog_instruction *inst;
+ int max, i, tmp;
+
+ inst = prog->Instructions;
+ max = -1;
+ while (inst->Opcode != OPCODE_END) {
+ tmp = _mesa_num_inst_src_regs(inst->Opcode);
+ for (i = 0; i < tmp; ++i) {
+ if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) {
+ if ((int) inst->SrcReg[i].Index > max) {
+ max = inst->SrcReg[i].Index;
+ }
+ }
+ }
+
+ if (_mesa_num_inst_dst_regs(inst->Opcode)) {
+ if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ if ((int) inst->DstReg.Index > max) {
+ max = inst->DstReg.Index;
+ }
+ }
+ }
+ ++inst;
}
+
+ /* We actually want highest index of used temporary register,
+ * not the number of temporaries used.
+ * These values aren't always the same.
+ */
+ vp->num_temporaries = max + 1;
}
- for (vp = vpc->progs; vp; vp = vp->next)
+ return vp;
+}
+
+struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_vertex_program_key wanted_key = { 0 };
+ struct r300_vertex_program_cont *vpc;
+ struct r300_vertex_program *vp;
+
+ vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
+ wanted_key.FpReads = r300->selected_fp->Base->InputsRead;
+ wanted_key.FogAttr = r300->selected_fp->fog_attr;
+ wanted_key.WPosAttr = r300->selected_fp->wpos_attr;
+
+ for (vp = vpc->progs; vp; vp = vp->next) {
if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
== 0) {
- r300->selected_vp = vp;
- return;
+ return r300->selected_vp = vp;
}
-
- if (RADEON_DEBUG & DEBUG_VERTS) {
- fprintf(stderr, "Initial vertex program:\n");
- _mesa_print_program(&vpc->mesa_program.Base);
- fflush(stdout);
}
- vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx);
+ vp = build_program(ctx, &wanted_key, &vpc->mesa_program);
vp->next = vpc->progs;
vpc->progs = vp;
- r300->selected_vp = vp;
+
+ return r300->selected_vp = vp;
}
#define bump_vpu_count(ptr, new_count) do { \
@@ -1544,25 +1701,22 @@ void r300SetupVertexProgram(r300ContextPtr rmesa)
struct r300_vertex_program *prog = rmesa->selected_vp;
int inst_count = 0;
int param_count = 0;
-
+
/* Reset state, in case we don't use something */
((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
-
+
R300_STATECHANGE(rmesa, vpp);
- param_count = r300VertexProgUpdateParams(ctx,
- (struct r300_vertex_program_cont *)
- ctx->VertexProgram._Current,
- (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
+ param_count = r300VertexProgUpdateParams(ctx, prog->Base, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
param_count /= 4;
r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->hw_code));
inst_count = (prog->hw_code.length / 4) - 1;
- r300VapCntl(rmesa, _mesa_bitcount(prog->key.InputsRead),
- _mesa_bitcount(prog->key.OutputsWritten), prog->num_temporaries);
+ r300VapCntl(rmesa, _mesa_bitcount(prog->Base->Base.InputsRead),
+ _mesa_bitcount(prog->Base->Base.OutputsWritten), prog->num_temporaries);
R300_STATECHANGE(rmesa, pvs);
rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.h b/src/mesa/drivers/dri/r300/r300_vertprog.h
index b552e3fb1b..2dab11c337 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.h
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.h
@@ -34,4 +34,8 @@
void r300SetupVertexProgram(r300ContextPtr rmesa);
+struct r300_vertex_program * r300SelectVertexShader(GLcontext *ctx);
+
+void r300TranslateVertexShader(struct r300_vertex_program *vp);
+
#endif
diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/radeon_nqssadce.c
index 4a2e1cba40..840c9733b1 100644
--- a/src/mesa/drivers/dri/r300/radeon_nqssadce.c
+++ b/src/mesa/drivers/dri/r300/radeon_nqssadce.c
@@ -46,6 +46,7 @@ static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint fil
switch(file) {
case PROGRAM_TEMPORARY: return &s->Temps[index];
case PROGRAM_OUTPUT: return &s->Outputs[index];
+ case PROGRAM_ADDRESS: return &s->Address;
default: return 0;
}
}
@@ -56,7 +57,7 @@ static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint fil
*
* @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles.
*/
-static struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg)
+struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg)
{
struct prog_src_register tmp = srcreg;
int i;
@@ -114,47 +115,19 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
deswz_source = sourced;
}
- struct register_state *regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
+ struct register_state *regstate;
+
+ if (inst->SrcReg[src].RelAddr)
+ regstate = get_reg_state(s, PROGRAM_ADDRESS, 0);
+ else
+ regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
+
if (regstate)
regstate->Sourced |= deswz_source & 0xf;
return inst;
}
-
-static void rewrite_depth_out(struct prog_instruction *inst)
-{
- if (inst->DstReg.WriteMask & WRITEMASK_Z) {
- inst->DstReg.WriteMask = WRITEMASK_W;
- } else {
- inst->DstReg.WriteMask = 0;
- return;
- }
-
- switch (inst->Opcode) {
- case OPCODE_FRC:
- case OPCODE_MOV:
- inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
- break;
- case OPCODE_ADD:
- case OPCODE_MAX:
- case OPCODE_MIN:
- case OPCODE_MUL:
- inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
- inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
- break;
- case OPCODE_CMP:
- case OPCODE_MAD:
- inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
- inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
- inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]);
- break;
- default:
- // Scalar instructions needn't be reswizzled
- break;
- }
-}
-
static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex)
{
int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
@@ -189,11 +162,6 @@ static void process_instruction(struct nqssadce_state* s)
return;
if (inst->Opcode != OPCODE_KIL) {
- if (s->Descr->RewriteDepthOut) {
- if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPTH)
- rewrite_depth_out(inst);
- }
-
struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index);
if (!regstate) {
_mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n",
@@ -217,6 +185,7 @@ static void process_instruction(struct nqssadce_state* s)
* might change the instruction stream under us, so we have
* to be careful with the inst pointer. */
switch (inst->Opcode) {
+ case OPCODE_ARL:
case OPCODE_DDX:
case OPCODE_DDY:
case OPCODE_FRC:
@@ -227,6 +196,8 @@ static void process_instruction(struct nqssadce_state* s)
case OPCODE_MAX:
case OPCODE_MIN:
case OPCODE_MUL:
+ case OPCODE_SGE:
+ case OPCODE_SLT:
inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
break;
@@ -258,12 +229,51 @@ static void process_instruction(struct nqssadce_state* s)
case OPCODE_TXP:
inst = track_used_srcreg(s, inst, 0, 0xf);
break;
+ case OPCODE_DST:
+ inst = track_used_srcreg(s, inst, 0, 0x6);
+ inst = track_used_srcreg(s, inst, 1, 0xa);
+ break;
+ case OPCODE_EXP:
+ case OPCODE_LOG:
+ case OPCODE_POW:
+ inst = track_used_srcreg(s, inst, 0, 0x3);
+ break;
+ case OPCODE_LIT:
+ inst = track_used_srcreg(s, inst, 0, 0xb);
+ break;
default:
_mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
return;
}
}
+static void calculateInputsOutputs(struct gl_program *p)
+{
+ struct prog_instruction *inst;
+ GLuint InputsRead, OutputsWritten;
+
+ inst = p->Instructions;
+ InputsRead = 0;
+ OutputsWritten = 0;
+ while (inst->Opcode != OPCODE_END)
+ {
+ int i, num_src_regs;
+
+ num_src_regs = _mesa_num_inst_src_regs(inst->Opcode);
+ for (i = 0; i < num_src_regs; ++i) {
+ if (inst->SrcReg[i].File == PROGRAM_INPUT)
+ InputsRead |= 1 << inst->SrcReg[i].Index;
+ }
+
+ if (inst->DstReg.File == PROGRAM_OUTPUT)
+ OutputsWritten |= 1 << inst->DstReg.Index;
+
+ ++inst;
+ }
+
+ p->InputsRead = InputsRead;
+ p->OutputsWritten = OutputsWritten;
+}
void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr)
{
@@ -280,4 +290,6 @@ void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce
s.IP--;
process_instruction(&s);
}
+
+ calculateInputsOutputs(p);
}
diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/radeon_nqssadce.h
index a4f94abcb6..8626f21c25 100644
--- a/src/mesa/drivers/dri/r300/radeon_nqssadce.h
+++ b/src/mesa/drivers/dri/r300/radeon_nqssadce.h
@@ -58,6 +58,7 @@ struct nqssadce_state {
*/
struct register_state Temps[MAX_PROGRAM_TEMPS];
struct register_state Outputs[VERT_RESULT_MAX];
+ struct register_state Address;
};
@@ -83,14 +84,10 @@ struct radeon_nqssadce_descr {
*/
void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src);
- /**
- * Rewrite instructions that write to DEPR.z to write to DEPR.w
- * instead (rewriting is done *before* the WriteMask test).
- */
- GLboolean RewriteDepthOut;
void *Data;
};
void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr);
+struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg);
#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */
diff --git a/src/mesa/drivers/dri/r300/radeon_program.h b/src/mesa/drivers/dri/r300/radeon_program.h
index b411f69bc8..88474d43a2 100644
--- a/src/mesa/drivers/dri/r300/radeon_program.h
+++ b/src/mesa/drivers/dri/r300/radeon_program.h
@@ -52,6 +52,38 @@ enum {
#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO)
#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE)
+static inline GLuint get_swz(GLuint swz, GLuint idx)
+{
+ if (idx & 0x4)
+ return idx;
+ return GET_SWZ(swz, idx);
+}
+
+static inline GLuint combine_swizzles4(GLuint src, GLuint swz_x, GLuint swz_y, GLuint swz_z, GLuint swz_w)
+{
+ GLuint ret = 0;
+
+ ret |= get_swz(src, swz_x);
+ ret |= get_swz(src, swz_y) << 3;
+ ret |= get_swz(src, swz_z) << 6;
+ ret |= get_swz(src, swz_w) << 9;
+
+ return ret;
+}
+
+static inline GLuint combine_swizzles(GLuint src, GLuint swz)
+{
+ GLuint ret = 0;
+
+ ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_X));
+ ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Y)) << 3;
+ ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Z)) << 6;
+ ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_W)) << 9;
+
+ return ret;
+}
+
+
/**
* Transformation context that is passed to local transformations.
*
diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/radeon_program_pair.c
index 906d36e522..d6fb474cf2 100644
--- a/src/mesa/drivers/dri/r300/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/radeon_program_pair.c
@@ -870,7 +870,7 @@ GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
_mesa_bzero(&s, sizeof(s));
s.Ctx = ctx;
- s.Program = program;
+ s.Program = _mesa_clone_program(ctx, program);
s.Handler = handler;
s.UserData = userdata;
s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE;
@@ -904,6 +904,8 @@ GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
_mesa_free(s.ValuePool);
_mesa_free(s.ReaderPool);
+ _mesa_reference_program(ctx, &s.Program, NULL);
+
return !s.Error;
}