summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/r300
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/r300')
-rw-r--r--src/gallium/drivers/r300/r300_chipset.c5
-rw-r--r--src/gallium/drivers/r300/r300_chipset.h2
-rw-r--r--src/gallium/drivers/r300/r300_context.c38
-rw-r--r--src/gallium/drivers/r300/r300_context.h86
-rw-r--r--src/gallium/drivers/r300/r300_cs.h42
-rw-r--r--src/gallium/drivers/r300/r300_debug.c118
-rw-r--r--src/gallium/drivers/r300/r300_debug.h176
-rw-r--r--src/gallium/drivers/r300/r300_emit.c203
-rw-r--r--src/gallium/drivers/r300/r300_emit.h5
-rw-r--r--src/gallium/drivers/r300/r300_flush.c2
-rw-r--r--src/gallium/drivers/r300/r300_reg.h3
-rw-r--r--src/gallium/drivers/r300/r300_render.c77
-rw-r--r--src/gallium/drivers/r300/r300_screen.c41
-rw-r--r--src/gallium/drivers/r300/r300_state.c82
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c357
-rw-r--r--src/gallium/drivers/r300/r300_state_inlines.h19
-rw-r--r--src/gallium/drivers/r300/r300_state_invariant.c34
-rw-r--r--src/gallium/drivers/r300/r300_state_shader.c153
-rw-r--r--src/gallium/drivers/r300/r300_state_shader.h24
-rw-r--r--src/gallium/drivers/r300/r300_state_tcl.c171
-rw-r--r--src/gallium/drivers/r300/r300_state_tcl.h24
-rw-r--r--src/gallium/drivers/r300/r300_surface.c150
-rw-r--r--src/gallium/drivers/r300/r300_surface.h4
-rw-r--r--src/gallium/drivers/r300/r300_texture.c2
-rw-r--r--src/gallium/drivers/r300/r300_winsys.h56
25 files changed, 1301 insertions, 573 deletions
diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c
index 9d95ad918c..00fae8d26f 100644
--- a/src/gallium/drivers/r300/r300_chipset.c
+++ b/src/gallium/drivers/r300/r300_chipset.c
@@ -34,7 +34,6 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->is_r500 = FALSE;
caps->num_vert_fpus = 4;
-
/* Note: These are not ordered by PCI ID. I leave that task to GCC,
* which will perform the ordering while collating jump tables. Instead,
* I've tried to group them according to capabilities and age. */
@@ -150,6 +149,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->num_vert_fpus = 6;
break;
+ case 0x4B48:
case 0x4B49:
case 0x4B4A:
case 0x4B4B:
@@ -349,7 +349,4 @@ void r300_parse_chipset(struct r300_capabilities* caps)
caps->pci_id);
break;
}
-
- /* XXX SW TCL is broken so no forcing it off right now
- caps->has_tcl = FALSE; */
}
diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h
index 21eebeae60..5b2e1f0568 100644
--- a/src/gallium/drivers/r300/r300_chipset.h
+++ b/src/gallium/drivers/r300/r300_chipset.h
@@ -34,8 +34,6 @@ struct r300_capabilities {
int family;
/* The number of vertex floating-point units */
int num_vert_fpus;
- /* The number of fragment pipes */
- int num_frag_pipes;
/* Whether or not TCL is physically present */
boolean has_tcl;
/* Whether or not this is an RV515 or newer; R500s have many differences
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 31efe91417..233a32b53c 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -34,10 +34,6 @@ static boolean r300_draw_range_elements(struct pipe_context* pipe,
struct r300_context* r300 = r300_context(pipe);
int i;
- if (r300->dirty_state) {
- r300_emit_dirty_state(r300);
- }
-
for (i = 0; i < r300->vertex_buffer_count; i++) {
void* buf = pipe_buffer_map(pipe->screen,
r300->vertex_buffers[i].buffer,
@@ -102,6 +98,29 @@ static void r300_destroy_context(struct pipe_context* context) {
FREE(r300);
}
+static unsigned int
+r300_is_texture_referenced( struct pipe_context *pipe,
+ struct pipe_texture *texture,
+ unsigned face, unsigned level)
+{
+ /**
+ * FIXME: Optimize.
+ */
+
+ return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+}
+
+static unsigned int
+r300_is_buffer_referenced( struct pipe_context *pipe,
+ struct pipe_buffer *buf)
+{
+ /**
+ * FIXME: Optimize.
+ */
+
+ return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+}
+
struct pipe_context* r300_create_context(struct pipe_screen* screen,
struct r300_winsys* r300_winsys)
{
@@ -110,7 +129,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
if (!r300)
return NULL;
- /* XXX this could be refactored now? */
r300->winsys = r300_winsys;
r300->context.winsys = (struct pipe_winsys*)r300_winsys;
@@ -124,8 +142,18 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
r300->context.draw_elements = r300_draw_elements;
r300->context.draw_range_elements = r300_draw_range_elements;
+ r300->context.is_texture_referenced = r300_is_texture_referenced;
+ r300->context.is_buffer_referenced = r300_is_buffer_referenced;
+
+ /* Create a Draw. This is used for vert collation and SW TCL. */
r300->draw = draw_create();
+ /* Enable our renderer. */
draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300));
+ /* Disable Draw's clipping if TCL is present. */
+ draw_set_driver_clipping(r300->draw, r300_screen(screen)->caps->has_tcl);
+ /* Force Draw to never do viewport transform, since (again) we can do
+ * transform in hardware, always. */
+ draw_set_viewport_state(r300->draw, &r300_viewport_identity);
r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state);
r300->rs_block = CALLOC_STRUCT(r300_rs_block);
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index fec2bad546..27bc7fd1a9 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -60,9 +60,14 @@ struct r300_dsa_state {
};
struct r300_rs_state {
- /* XXX icky as fucking hell */
+ /* Draw-specific rasterizer state */
struct pipe_rasterizer_state rs;
+ /* Whether or not to enable the VTE. This is referenced at the very
+ * last moment during emission of VTE state, to decide whether or not
+ * the VTE should be used for transformation. */
+ boolean enable_vte;
+
uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */
uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */
uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */
@@ -112,23 +117,24 @@ struct r300_viewport_state {
uint32_t vte_control; /* R300_VAP_VTE_CNTL: 0x20b0 */
};
-#define R300_NEW_BLEND 0x0000001
-#define R300_NEW_BLEND_COLOR 0x0000002
-#define R300_NEW_CONSTANTS 0x0000004
-#define R300_NEW_DSA 0x0000008
-#define R300_NEW_FRAMEBUFFERS 0x0000010
-#define R300_NEW_FRAGMENT_SHADER 0x0000020
-#define R300_NEW_RASTERIZER 0x0000040
-#define R300_NEW_RS_BLOCK 0x0000080
-#define R300_NEW_SAMPLER 0x0000100
-#define R300_ANY_NEW_SAMPLERS 0x000ff00
-#define R300_NEW_SCISSOR 0x0010000
-#define R300_NEW_TEXTURE 0x0020000
-#define R300_ANY_NEW_TEXTURES 0x1fe0000
-#define R300_NEW_VERTEX_FORMAT 0x2000000
-#define R300_NEW_VERTEX_SHADER 0x4000000
-#define R300_NEW_VIEWPORT 0x8000000
-#define R300_NEW_KITCHEN_SINK 0xfffffff
+#define R300_NEW_BLEND 0x00000001
+#define R300_NEW_BLEND_COLOR 0x00000002
+#define R300_NEW_CLIP 0x00000004
+#define R300_NEW_CONSTANTS 0x00000008
+#define R300_NEW_DSA 0x00000010
+#define R300_NEW_FRAMEBUFFERS 0x00000020
+#define R300_NEW_FRAGMENT_SHADER 0x00000040
+#define R300_NEW_RASTERIZER 0x00000080
+#define R300_NEW_RS_BLOCK 0x00000100
+#define R300_NEW_SAMPLER 0x00000200
+#define R300_ANY_NEW_SAMPLERS 0x0001fe00
+#define R300_NEW_SCISSOR 0x00020000
+#define R300_NEW_TEXTURE 0x00040000
+#define R300_ANY_NEW_TEXTURES 0x03fc0000
+#define R300_NEW_VERTEX_FORMAT 0x04000000
+#define R300_NEW_VERTEX_SHADER 0x08000000
+#define R300_NEW_VIEWPORT 0x10000000
+#define R300_NEW_KITCHEN_SINK 0x1fffffff
/* The next several objects are not pure Radeon state; they inherit from
* various Gallium classes. */
@@ -136,11 +142,11 @@ struct r300_viewport_state {
struct r300_constant_buffer {
/* Buffer of constants */
/* XXX first number should be raised */
- float constants[8][4];
+ float constants[32][4];
/* Number of user-defined constants */
- int user_count;
+ unsigned user_count;
/* Total number of constants */
- int count;
+ unsigned count;
};
struct r3xx_fragment_shader {
@@ -153,6 +159,10 @@ struct r3xx_fragment_shader {
/* Pixel stack size */
int stack_size;
+
+ /* Are there immediates in this shader?
+ * If not, we can heavily optimize recompilation. */
+ boolean uses_imms;
};
struct r300_fragment_shader {
@@ -225,10 +235,11 @@ struct r300_vertex_format {
uint32_t vap_prog_stream_cntl[8];
/* R300_VAP_PROG_STREAK_CNTL_EXT_[0-7] */
uint32_t vap_prog_stream_cntl_ext[8];
- /* This is a map of VAP/SW TCL outputs into the GA/RS.
- * tab[i] is the location of input i in GA/RS input memory.
- * Named tab for historical reasons. */
- int tab[16];
+ /* Map of vertex attributes into PVS memory for HW TCL,
+ * or GA memory for SW TCL. */
+ int vs_tab[16];
+ /* Map of rasterizer attributes from GB through RS to US. */
+ int fs_tab[16];
};
struct r300_vertex_shader {
@@ -242,6 +253,10 @@ struct r300_vertex_shader {
/* Has this shader been translated yet? */
boolean translated;
+ /* Are there immediates in this shader?
+ * If not, we can heavily optimize recompilation. */
+ boolean uses_imms;
+
/* Number of used instructions */
int instruction_count;
@@ -254,6 +269,11 @@ struct r300_vertex_shader {
} instructions[128]; /*< XXX magic number */
};
+static struct pipe_viewport_state r300_viewport_identity = {
+ .scale = {1.0, 1.0, 1.0, 1.0},
+ .translate = {0.0, 0.0, 0.0, 0.0},
+};
+
struct r300_context {
/* Parent class */
struct pipe_context context;
@@ -263,11 +283,18 @@ struct r300_context {
/* Draw module. Used mostly for SW TCL. */
struct draw_context* draw;
+ /* Vertex buffer for rendering. */
+ struct pipe_buffer* vbo;
+ /* Offset into the VBO. */
+ size_t vbo_offset;
+
/* Various CSO state objects. */
/* Blend state. */
struct r300_blend_state* blend_state;
/* Blend color state. */
struct r300_blend_color_state* blend_color_state;
+ /* User clip planes. */
+ struct pipe_clip_state clip_state;
/* Shader constants. */
struct r300_constant_buffer shader_constants[PIPE_SHADER_TYPES];
/* Depth, stencil, and alpha state. */
@@ -288,7 +315,7 @@ struct r300_context {
/* Texture states. */
struct r300_texture* textures[8];
int texture_count;
- /* Vertex buffers. */
+ /* Vertex buffers for Gallium. */
struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
int vertex_buffer_count;
/* Vertex information. */
@@ -313,11 +340,4 @@ struct draw_stage* r300_draw_stage(struct r300_context* r300);
void r300_init_state_functions(struct r300_context* r300);
void r300_init_surface_functions(struct r300_context* r300);
-/* Fun with includes: r300_winsys also declares this prototype.
- * We'll just step out in that case... */
-#ifndef R300_WINSYS_H
-struct pipe_context* r300_create_context(struct pipe_screen* screen,
- struct r300_winsys* r300_winsys);
-#endif
-
#endif /* R300_CONTEXT_H */
diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h
index 5d9799dd72..71b142c0db 100644
--- a/src/gallium/drivers/r300/r300_cs.h
+++ b/src/gallium/drivers/r300/r300_cs.h
@@ -34,6 +34,7 @@
#define MAX_CS_SIZE 64 * 1024 / 4
+#define VERY_VERBOSE_CS 0
#define VERY_VERBOSE_REGISTERS 0
/* XXX stolen from radeon_drm.h */
@@ -49,27 +50,29 @@
#define CS_LOCALS(context) \
struct r300_winsys* cs_winsys = context->winsys; \
- struct radeon_cs* cs = cs_winsys->cs; \
int cs_count = 0;
#define CHECK_CS(size) \
- cs_winsys->check_cs(cs, (size))
+ cs_winsys->check_cs(cs_winsys, (size))
#define BEGIN_CS(size) do { \
CHECK_CS(size); \
- debug_printf("r300: BEGIN_CS, count %d, in %s (%s:%d)\n", \
- size, __FUNCTION__, __FILE__, __LINE__); \
- cs_winsys->begin_cs(cs, (size), __FILE__, __FUNCTION__, __LINE__); \
+ if (VERY_VERBOSE_CS) { \
+ debug_printf("r300: BEGIN_CS, count %d, in %s (%s:%d)\n", \
+ size, __FUNCTION__, __FILE__, __LINE__); \
+ } \
+ cs_winsys->begin_cs(cs_winsys, (size), \
+ __FILE__, __FUNCTION__, __LINE__); \
cs_count = size; \
} while (0)
#define OUT_CS(value) do { \
- cs_winsys->write_cs_dword(cs, (value)); \
+ cs_winsys->write_cs_dword(cs_winsys, (value)); \
cs_count--; \
} while (0)
#define OUT_CS_32F(value) do { \
- cs_winsys->write_cs_dword(cs, fui(value)); \
+ cs_winsys->write_cs_dword(cs_winsys, fui(value)); \
cs_count--; \
} while (0)
@@ -93,26 +96,31 @@
} while (0)
#define OUT_CS_RELOC(bo, offset, rd, wd, flags) do { \
- debug_printf("r300: writing relocation for buffer %p, offset %d\n", \
- bo, offset); \
+ debug_printf("r300: writing relocation for buffer %p, offset %d, " \
+ "domains (%d, %d, %d)\n", \
+ bo, offset, rd, wd, flags); \
assert(bo); \
OUT_CS(offset); \
- cs_winsys->write_cs_reloc(cs, bo, rd, wd, flags); \
+ cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \
cs_count -= 2; \
} while (0)
#define END_CS do { \
- debug_printf("r300: END_CS in %s (%s:%d)\n", __FUNCTION__, __FILE__, \
- __LINE__); \
+ if (VERY_VERBOSE_CS) { \
+ debug_printf("r300: END_CS in %s (%s:%d)\n", __FUNCTION__, \
+ __FILE__, __LINE__); \
+ } \
if (cs_count != 0) \
debug_printf("r300: Warning: cs_count off by %d\n", cs_count); \
- cs_winsys->end_cs(cs, __FILE__, __FUNCTION__, __LINE__); \
+ cs_winsys->end_cs(cs_winsys, __FILE__, __FUNCTION__, __LINE__); \
} while (0)
#define FLUSH_CS do { \
- debug_printf("r300: FLUSH_CS in %s (%s:%d)\n\n", __FUNCTION__, __FILE__, \
- __LINE__); \
- cs_winsys->flush_cs(cs); \
+ if (VERY_VERBOSE_CS) { \
+ debug_printf("r300: FLUSH_CS in %s (%s:%d)\n\n", __FUNCTION__, \
+ __FILE__, __LINE__); \
+ } \
+ cs_winsys->flush_cs(cs_winsys); \
} while (0)
#define RADEON_ONE_REG_WR (1 << 15)
@@ -138,7 +146,7 @@
assert(bo); \
OUT_CS(offset); \
OUT_CS(count); \
- cs_winsys->write_cs_reloc(cs, bo, rd, wd, flags); \
+ cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \
cs_count -= 2; \
} while (0)
diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c
index dd63136c9d..678cd2b812 100644
--- a/src/gallium/drivers/r300/r300_debug.c
+++ b/src/gallium/drivers/r300/r300_debug.c
@@ -30,81 +30,6 @@ static void r300_dump_fs(struct r300_fragment_shader* fs)
}
}
-static char* r500_fs_swiz[] = {
- " R",
- " G",
- " B",
- " A",
- " 0",
- ".5",
- " 1",
- " U",
-};
-
-static char* r500_fs_op_rgb[] = {
- "MAD",
- "DP3",
- "DP4",
- "D2A",
- "MIN",
- "MAX",
- "---",
- "CND",
- "CMP",
- "FRC",
- "SOP",
- "MDH",
- "MDV",
-};
-
-static char* r500_fs_op_alpha[] = {
- "MAD",
- " DP",
- "MIN",
- "MAX",
- "---",
- "CND",
- "CMP",
- "FRC",
- "EX2",
- "LN2",
- "RCP",
- "RSQ",
- "SIN",
- "COS",
- "MDH",
- "MDV",
-};
-
-static char* r500_fs_mask[] = {
- "NONE",
- "R ",
- " G ",
- "RG ",
- " B ",
- "R B ",
- " GB ",
- "RGB ",
- " A",
- "R A",
- " G A",
- "RG A",
- " BA",
- "R BA",
- " GBA",
- "RGBA",
-};
-
-static char* r500_fs_tex[] = {
- " NOP",
- " LD",
- "TEXKILL",
- " PROJ",
- "LODBIAS",
- " LOD",
- " DXDY",
-};
-
void r500_fs_dump(struct r500_fragment_shader* fs)
{
int i;
@@ -225,14 +150,49 @@ void r500_fs_dump(struct r500_fragment_shader* fs)
}
}
+static void r300_vs_op_dump(uint32_t op)
+{
+ debug_printf(" dst: %d%s op: ",
+ (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]);
+ if (op & 0x80) {
+ if (op & 0x1) {
+ debug_printf("PVS_MACRO_OP_2CLK_M2X_ADD\n");
+ } else {
+ debug_printf(" PVS_MACRO_OP_2CLK_MADD\n");
+ }
+ } else if (op & 0x40) {
+ debug_printf("%s\n", r300_vs_me_ops[op & 0x1f]);
+ } else {
+ debug_printf("%s\n", r300_vs_ve_ops[op & 0x1f]);
+ }
+}
+
+void r300_vs_src_dump(uint32_t src)
+{
+ debug_printf(" reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n",
+ (src >> 5) & 0x7f, r300_vs_src_debug[src & 0x3],
+ src & (1 << 25) ? "-" : " ",
+ r300_vs_swiz_debug[(src >> 13) & 0x7],
+ src & (1 << 26) ? "-" : " ",
+ r300_vs_swiz_debug[(src >> 16) & 0x7],
+ src & (1 << 27) ? "-" : " ",
+ r300_vs_swiz_debug[(src >> 19) & 0x7],
+ src & (1 << 28) ? "-" : " ",
+ r300_vs_swiz_debug[(src >> 22) & 0x7]);
+}
+
void r300_vs_dump(struct r300_vertex_shader* vs)
{
int i;
for (i = 0; i < vs->instruction_count; i++) {
- debug_printf("inst0: 0x%x\n", vs->instructions[i].inst0);
- debug_printf("inst1: 0x%x\n", vs->instructions[i].inst1);
- debug_printf("inst2: 0x%x\n", vs->instructions[i].inst2);
- debug_printf("inst3: 0x%x\n", vs->instructions[i].inst3);
+ debug_printf("%d: op: 0x%08x", i, vs->instructions[i].inst0);
+ r300_vs_op_dump(vs->instructions[i].inst0);
+ debug_printf(" src0: 0x%08x", vs->instructions[i].inst1);
+ r300_vs_src_dump(vs->instructions[i].inst1);
+ debug_printf(" src1: 0x%08x", vs->instructions[i].inst2);
+ r300_vs_src_dump(vs->instructions[i].inst2);
+ debug_printf(" src2: 0x%08x", vs->instructions[i].inst3);
+ r300_vs_src_dump(vs->instructions[i].inst3);
}
}
diff --git a/src/gallium/drivers/r300/r300_debug.h b/src/gallium/drivers/r300/r300_debug.h
index a1f873656d..c86410ec0a 100644
--- a/src/gallium/drivers/r300/r300_debug.h
+++ b/src/gallium/drivers/r300/r300_debug.h
@@ -27,6 +27,182 @@
#include "r300_state_shader.h"
#include "r300_state_tcl.h"
+static char* r500_fs_swiz[] = {
+ " R",
+ " G",
+ " B",
+ " A",
+ " 0",
+ ".5",
+ " 1",
+ " U",
+};
+
+static char* r500_fs_op_rgb[] = {
+ "MAD",
+ "DP3",
+ "DP4",
+ "D2A",
+ "MIN",
+ "MAX",
+ "---",
+ "CND",
+ "CMP",
+ "FRC",
+ "SOP",
+ "MDH",
+ "MDV",
+};
+
+static char* r500_fs_op_alpha[] = {
+ "MAD",
+ " DP",
+ "MIN",
+ "MAX",
+ "---",
+ "CND",
+ "CMP",
+ "FRC",
+ "EX2",
+ "LN2",
+ "RCP",
+ "RSQ",
+ "SIN",
+ "COS",
+ "MDH",
+ "MDV",
+};
+
+static char* r500_fs_mask[] = {
+ "NONE",
+ "R ",
+ " G ",
+ "RG ",
+ " B ",
+ "R B ",
+ " GB ",
+ "RGB ",
+ " A",
+ "R A",
+ " G A",
+ "RG A",
+ " BA",
+ "R BA",
+ " GBA",
+ "RGBA",
+};
+
+static char* r500_fs_tex[] = {
+ " NOP",
+ " LD",
+ "TEXKILL",
+ " PROJ",
+ "LODBIAS",
+ " LOD",
+ " DXDY",
+};
+
+static char* r300_vs_ve_ops[] = {
+ /* R300 vector ops */
+ " VE_NO_OP",
+ " VE_DOT_PRODUCT",
+ " VE_MULTIPLY",
+ " VE_ADD",
+ " VE_MULTIPLY_ADD",
+ " VE_DISTANCE_FACTOR",
+ " VE_FRACTION",
+ " VE_MAXIMUM",
+ " VE_MINIMUM",
+ "VE_SET_GREATER_THAN_EQUAL",
+ " VE_SET_LESS_THAN",
+ " VE_MULTIPLYX2_ADD",
+ " VE_MULTIPLY_CLAMP",
+ " VE_FLT2FIX_DX",
+ " VE_FLT2FIX_DX_RND",
+ /* R500 vector ops */
+ " VE_PRED_SET_EQ_PUSH",
+ " VE_PRED_SET_GT_PUSH",
+ " VE_PRED_SET_GTE_PUSH",
+ " VE_PRED_SET_NEQ_PUSH",
+ " VE_COND_WRITE_EQ",
+ " VE_COND_WRITE_GT",
+ " VE_COND_WRITE_GTE",
+ " VE_COND_WRITE_NEQ",
+ " VE_SET_GREATER_THAN",
+ " VE_SET_EQUAL",
+ " VE_SET_NOT_EQUAL",
+ " (reserved)",
+ " (reserved)",
+ " (reserved)",
+};
+
+static char* r300_vs_me_ops[] = {
+ /* R300 math ops */
+ " ME_NO_OP",
+ " ME_EXP_BASE2_DX",
+ " ME_LOG_BASE2_DX",
+ " ME_EXP_BASEE_FF",
+ " ME_LIGHT_COEFF_DX",
+ " ME_POWER_FUNC_FF",
+ " ME_RECIP_DX",
+ " ME_RECIP_FF",
+ " ME_RECIP_SQRT_DX",
+ " ME_RECIP_SQRT_FF",
+ " ME_MULTIPLY",
+ " ME_EXP_BASE2_FULL_DX",
+ " ME_LOG_BASE2_FULL_DX",
+ " ME_POWER_FUNC_FF_CLAMP_B",
+ "ME_POWER_FUNC_FF_CLAMP_B1",
+ "ME_POWER_FUNC_FF_CLAMP_01",
+ " ME_SIN",
+ " ME_COS",
+ /* R500 math ops */
+ " ME_LOG_BASE2_IEEE",
+ " ME_RECIP_IEEE",
+ " ME_RECIP_SQRT_IEEE",
+ " ME_PRED_SET_EQ",
+ " ME_PRED_SET_GT",
+ " ME_PRED_SET_GTE",
+ " ME_PRED_SET_NEQ",
+ " ME_PRED_SET_CLR",
+ " ME_PRED_SET_INV",
+ " ME_PRED_SET_POP",
+ " ME_PRED_SET_RESTORE",
+ " (reserved)",
+ " (reserved)",
+ " (reserved)",
+};
+
+/* XXX refactor to avoid clashing symbols */
+static char* r300_vs_src_debug[] = {
+ "t",
+ "i",
+ "c",
+ "a",
+};
+
+static char* r300_vs_dst_debug[] = {
+ "t",
+ "a0",
+ "o",
+ "ox",
+ "a",
+ "i",
+ "u",
+ "u",
+};
+
+static char* r300_vs_swiz_debug[] = {
+ "X",
+ "Y",
+ "Z",
+ "W",
+ "0",
+ "1",
+ "U",
+ "U",
+};
+
void r500_fs_dump(struct r500_fragment_shader* fs);
void r300_vs_dump(struct r300_vertex_shader* vs);
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index a3d83376b6..93cf6909a3 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -56,6 +56,36 @@ void r300_emit_blend_color_state(struct r300_context* r300,
}
}
+void r300_emit_clip_state(struct r300_context* r300,
+ struct pipe_clip_state* clip)
+{
+ int i;
+ struct r300_screen* r300screen = r300_screen(r300->context.screen);
+ CS_LOCALS(r300);
+
+ if (r300screen->caps->has_tcl) {
+ BEGIN_CS(5 + (6 * 4));
+ OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG,
+ (r300screen->caps->is_r500 ?
+ R500_PVS_UCP_START : R300_PVS_UCP_START));
+ OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, 6 * 4);
+ for (i = 0; i < 6; i++) {
+ OUT_CS_32F(clip->ucp[i][0]);
+ OUT_CS_32F(clip->ucp[i][1]);
+ OUT_CS_32F(clip->ucp[i][2]);
+ OUT_CS_32F(clip->ucp[i][3]);
+ }
+ OUT_CS_REG(R300_VAP_CLIP_CNTL, ((1 << clip->nr) - 1) |
+ R300_PS_UCP_MODE_CLIP_AS_TRIFAN);
+ END_CS;
+ } else {
+ BEGIN_CS(2);
+ OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE);
+ END_CS;
+ }
+
+}
+
void r300_emit_dsa_state(struct r300_context* r300,
struct r300_dsa_state* dsa)
{
@@ -152,34 +182,41 @@ void r500_emit_fragment_shader(struct r300_context* r300,
END_CS;
}
-/* XXX add pitch, stride, clean up */
void r300_emit_fb_state(struct r300_context* r300,
struct pipe_framebuffer_state* fb)
{
- int i;
struct r300_texture* tex;
+ unsigned pixpitch;
+ int i;
CS_LOCALS(r300);
- BEGIN_CS((6 * fb->nr_cbufs) + (fb->zsbuf ? 6 : 0) + 4);
+ BEGIN_CS((8 * fb->nr_cbufs) + (fb->zsbuf ? 8 : 0) + 4);
for (i = 0; i < fb->nr_cbufs; i++) {
tex = (struct r300_texture*)fb->cbufs[i]->texture;
+ assert(tex && tex->buffer && "cbuf is marked, but NULL!");
+ pixpitch = tex->stride / tex->tex.block.size;
+
OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1);
OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ OUT_CS_REG(R300_RB3D_COLORPITCH0 + (4 * i), pixpitch |
+ r300_translate_colorformat(tex->tex.format));
+
OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i),
r300_translate_out_fmt(fb->cbufs[i]->format));
}
if (fb->zsbuf) {
tex = (struct r300_texture*)fb->zsbuf->texture;
+ assert(tex && tex->buffer && "zsbuf is marked, but NULL!");
+ pixpitch = tex->stride / tex->tex.block.size;
+
OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
- if (fb->zsbuf->format == PIPE_FORMAT_Z24S8_UNORM) {
- OUT_CS_REG(R300_ZB_FORMAT,
- R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL);
- } else {
- OUT_CS_REG(R300_ZB_FORMAT, 0x0);
- }
+
+ OUT_CS_REG(R300_ZB_FORMAT, r300_translate_zsformat(tex->tex.format));
+
+ OUT_CS_REG(R300_ZB_DEPTHPITCH, pixpitch);
}
OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT,
@@ -229,7 +266,7 @@ void r300_emit_rs_block_state(struct r300_context* r300,
}
for (i = 0; i < 8; i++) {
OUT_CS(rs->ip[i]);
- debug_printf("ip %d: 0x%08x\n", i, rs->ip[i]);
+ /* debug_printf("ip %d: 0x%08x\n", i, rs->ip[i]); */
}
OUT_CS_REG_SEQ(R300_RS_COUNT, 2);
@@ -243,11 +280,11 @@ void r300_emit_rs_block_state(struct r300_context* r300,
}
for (i = 0; i < 8; i++) {
OUT_CS(rs->inst[i]);
- debug_printf("inst %d: 0x%08x\n", i, rs->inst[i]);
+ /* debug_printf("inst %d: 0x%08x\n", i, rs->inst[i]); */
}
- debug_printf("count: 0x%08x inst_count: 0x%08x\n", rs->count,
- rs->inst_count);
+ /* debug_printf("count: 0x%08x inst_count: 0x%08x\n", rs->count,
+ * rs->inst_count); */
END_CS;
}
@@ -291,6 +328,30 @@ void r300_emit_texture(struct r300_context* r300,
END_CS;
}
+void r300_emit_vertex_buffer(struct r300_context* r300)
+{
+ CS_LOCALS(r300);
+
+ debug_printf("r300: Preparing vertex buffer %p for render, "
+ "vertex size %d\n", r300->vbo,
+ r300->vertex_info.vinfo.size);
+ /* Set the pointer to our vertex buffer. The emitted values are this:
+ * PACKET3 [3D_LOAD_VBPNTR]
+ * COUNT [1]
+ * FORMAT [size | stride << 8]
+ * OFFSET [offset into BO]
+ * VBPNTR [relocated BO]
+ */
+ BEGIN_CS(7);
+ OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3);
+ OUT_CS(1);
+ OUT_CS(r300->vertex_info.vinfo.size |
+ (r300->vertex_info.vinfo.size << 8));
+ OUT_CS(r300->vbo_offset);
+ OUT_CS_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_CS;
+}
+
void r300_emit_vertex_format_state(struct r300_context* r300)
{
int i;
@@ -305,22 +366,22 @@ void r300_emit_vertex_format_state(struct r300_context* r300)
OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
OUT_CS(r300->vertex_info.vinfo.hwfmt[2]);
OUT_CS(r300->vertex_info.vinfo.hwfmt[3]);
- for (i = 0; i < 4; i++) {
- debug_printf("hwfmt%d: 0x%08x\n", i,
- r300->vertex_info.vinfo.hwfmt[i]);
- }
+ /* for (i = 0; i < 4; i++) {
+ * debug_printf("hwfmt%d: 0x%08x\n", i,
+ * r300->vertex_info.vinfo.hwfmt[i]);
+ * } */
OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, 8);
for (i = 0; i < 8; i++) {
OUT_CS(r300->vertex_info.vap_prog_stream_cntl[i]);
- debug_printf("prog_stream_cntl%d: 0x%08x\n", i,
- r300->vertex_info.vap_prog_stream_cntl[i]);
+ /* debug_printf("prog_stream_cntl%d: 0x%08x\n", i,
+ * r300->vertex_info.vap_prog_stream_cntl[i]); */
}
OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, 8);
for (i = 0; i < 8; i++) {
OUT_CS(r300->vertex_info.vap_prog_stream_cntl_ext[i]);
- debug_printf("prog_stream_cntl_ext%d: 0x%08x\n", i,
- r300->vertex_info.vap_prog_stream_cntl_ext[i]);
+ /* debug_printf("prog_stream_cntl_ext%d: 0x%08x\n", i,
+ * r300->vertex_info.vap_prog_stream_cntl_ext[i]); */
}
END_CS;
}
@@ -340,14 +401,23 @@ void r300_emit_vertex_shader(struct r300_context* r300,
return;
}
- BEGIN_CS(13 + (vs->instruction_count * 4) + (constants->count * 4));
+ if (constants->count) {
+ BEGIN_CS(14 + (vs->instruction_count * 4) + (constants->count * 4));
+ } else {
+ BEGIN_CS(11 + (vs->instruction_count * 4));
+ }
- OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_0, R300_PVS_FIRST_INST(0) |
+ /* R300_VAP_PVS_CODE_CNTL_0
+ * R300_VAP_PVS_CONST_CNTL
+ * R300_VAP_PVS_CODE_CNTL_1
+ * See the r5xx docs for instructions on how to use these.
+ * XXX these could be optimized to select better values... */
+ OUT_CS_REG_SEQ(R300_VAP_PVS_CODE_CNTL_0, 3);
+ OUT_CS(R300_PVS_FIRST_INST(0) |
+ R300_PVS_XYZW_VALID_INST(vs->instruction_count - 1) |
R300_PVS_LAST_INST(vs->instruction_count - 1));
- OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_1, vs->instruction_count - 1);
-
- /* XXX */
- OUT_CS_REG(R300_VAP_PVS_CONST_CNTL, 0x0);
+ OUT_CS(R300_PVS_MAX_CONST_ADDR(constants->count - 1));
+ OUT_CS(vs->instruction_count - 1);
OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0);
OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, vs->instruction_count * 4);
@@ -377,7 +447,6 @@ void r300_emit_vertex_shader(struct r300_context* r300,
R300_PVS_VF_MAX_VTX_NUM(12));
OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0);
END_CS;
-
}
void r300_emit_viewport_state(struct r300_context* r300,
@@ -394,7 +463,11 @@ void r300_emit_viewport_state(struct r300_context* r300,
OUT_CS_32F(viewport->zscale);
OUT_CS_32F(viewport->zoffset);
- OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control);
+ if (r300->rs_state->enable_vte) {
+ OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control);
+ } else {
+ OUT_CS_REG(R300_VAP_VTE_CNTL, 0);
+ }
END_CS;
}
@@ -412,16 +485,67 @@ void r300_flush_textures(struct r300_context* r300)
void r300_emit_dirty_state(struct r300_context* r300)
{
struct r300_screen* r300screen = r300_screen(r300->context.screen);
- int i;
- int dirty_tex = 0;
+ struct r300_texture* tex;
+ int i, dirty_tex = 0;
+ boolean invalid = FALSE;
- if (!(r300->dirty_state) && !(r300->dirty_hw)) {
+ if (!(r300->dirty_state)) {
return;
}
r300_update_derived_state(r300);
/* XXX check size */
+validate:
+ /* Color buffers... */
+ for (i = 0; i < r300->framebuffer_state.nr_cbufs; i++) {
+ tex = (struct r300_texture*)r300->framebuffer_state.cbufs[i]->texture;
+ assert(tex && tex->buffer && "cbuf is marked, but NULL!");
+ if (!r300->winsys->add_buffer(r300->winsys, tex->buffer,
+ 0, RADEON_GEM_DOMAIN_VRAM)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ goto validate;
+ }
+ }
+ /* ...depth buffer... */
+ if (r300->framebuffer_state.zsbuf) {
+ tex = (struct r300_texture*)r300->framebuffer_state.zsbuf->texture;
+ assert(tex && tex->buffer && "zsbuf is marked, but NULL!");
+ if (!r300->winsys->add_buffer(r300->winsys, tex->buffer,
+ 0, RADEON_GEM_DOMAIN_VRAM)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ goto validate;
+ }
+ }
+ /* ...textures... */
+ for (i = 0; i < r300->texture_count; i++) {
+ tex = r300->textures[i];
+ assert(tex && tex->buffer && "texture is marked, but NULL!");
+ if (!r300->winsys->add_buffer(r300->winsys, tex->buffer,
+ RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ goto validate;
+ }
+ }
+ /* ...and vertex buffer. */
+ if (r300->vbo) {
+ if (!r300->winsys->add_buffer(r300->winsys, r300->vbo,
+ RADEON_GEM_DOMAIN_GTT, 0)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ goto validate;
+ }
+ } else {
+ debug_printf("No VBO while emitting dirty state!\n");
+ }
+ if (r300->winsys->validate(r300->winsys)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ if (invalid) {
+ /* Well, hell. */
+ exit(1);
+ }
+ invalid = TRUE;
+ goto validate;
+ }
if (r300->dirty_state & R300_NEW_BLEND) {
r300_emit_blend_state(r300, r300->blend_state);
@@ -433,6 +557,11 @@ void r300_emit_dirty_state(struct r300_context* r300)
r300->dirty_state &= ~R300_NEW_BLEND_COLOR;
}
+ if (r300->dirty_state & R300_NEW_CLIP) {
+ r300_emit_clip_state(r300, &r300->clip_state);
+ r300->dirty_state &= ~R300_NEW_CLIP;
+ }
+
if (r300->dirty_state & R300_NEW_DSA) {
r300_emit_dsa_state(r300, r300->dsa_state);
r300->dirty_state &= ~R300_NEW_DSA;
@@ -502,4 +631,14 @@ void r300_emit_dirty_state(struct r300_context* r300)
r300_emit_vertex_format_state(r300);
r300->dirty_state &= ~R300_NEW_VERTEX_FORMAT;
}
+
+ if (r300->dirty_state & R300_NEW_VERTEX_SHADER) {
+ r300_emit_vertex_shader(r300, r300->vs);
+ r300->dirty_state &= ~R300_NEW_VERTEX_SHADER;
+ }
+
+ /* Finally, emit the VBO. */
+ r300_emit_vertex_buffer(r300);
+
+ r300->dirty_hw++;
}
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index 31dbc7ab85..946f625bd8 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -36,6 +36,9 @@ void r300_emit_blend_state(struct r300_context* r300,
void r300_emit_blend_color_state(struct r300_context* r300,
struct r300_blend_color_state* bc);
+void r300_emit_clip_state(struct r300_context* r300,
+ struct pipe_clip_state* clip);
+
void r300_emit_dsa_state(struct r300_context* r300,
struct r300_dsa_state* dsa);
@@ -62,6 +65,8 @@ void r300_emit_scissor_state(struct r300_context* r300,
void r300_emit_texture(struct r300_context* r300,
struct r300_texture* tex, unsigned offset);
+void r300_emit_vertex_buffer(struct r300_context* r300);
+
void r300_emit_vertex_format_state(struct r300_context* r300);
void r300_emit_vertex_shader(struct r300_context* r300,
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index 20ca6905ad..89a5f2b20c 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -29,6 +29,8 @@ static void r300_flush(struct pipe_context* pipe,
struct r300_context* r300 = r300_context(pipe);
CS_LOCALS(r300);
+ draw_flush(r300->draw);
+
if (r300->dirty_hw) {
FLUSH_CS;
r300_emit_invariant_state(r300);
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index 660816e1da..3bb9bc47b5 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -511,11 +511,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_PVS_XYZW_VALID_INST_SHIFT 10
# define R300_PVS_LAST_INST_SHIFT 20
# define R300_PVS_FIRST_INST(x) ((x) << 0)
+# define R300_PVS_XYZW_VALID_INST(x) ((x) << 10)
# define R300_PVS_LAST_INST(x) ((x) << 20)
/* Addresses are relative the the vertex program parameters area. */
#define R300_VAP_PVS_CONST_CNTL 0x22D4
# define R300_PVS_CONST_BASE_OFFSET_SHIFT 0
# define R300_PVS_MAX_CONST_ADDR_SHIFT 16
+# define R300_PVS_MAX_CONST_ADDR(x) ((x) << 16)
#define R300_VAP_PVS_CODE_CNTL_1 0x22D8
# define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0
#define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC
@@ -3040,6 +3042,7 @@ enum {
# define R500_INST_RGB_WMASK_R (1 << 11)
# define R500_INST_RGB_WMASK_G (1 << 12)
# define R500_INST_RGB_WMASK_B (1 << 13)
+# define R500_INST_RGB_WMASK_RGB (7 << 11)
# define R500_INST_ALPHA_WMASK (1 << 14)
# define R500_INST_RGB_OMASK_R (1 << 15)
# define R500_INST_RGB_OMASK_G (1 << 16)
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index b7ee8fb8a9..cd458d019a 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -45,11 +45,7 @@ struct r300_render {
/* VBO */
struct pipe_buffer* vbo;
- size_t vbo_size;
- size_t vbo_offset;
- void* vbo_map;
size_t vbo_alloc_size;
- size_t vbo_max_used;
};
static INLINE struct r300_render*
@@ -78,24 +74,21 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render,
struct pipe_screen* screen = r300->context.screen;
size_t size = (size_t)vertex_size * (size_t)count;
- if (r300render->vbo) {
+ if (r300render->vbo && (size > r300render->vbo_alloc_size)) {
pipe_buffer_reference(&r300render->vbo, NULL);
}
+
+ if (!r300render->vbo) {
+ r300render->vbo = pipe_buffer_create(screen,
+ 64,
+ PIPE_BUFFER_USAGE_VERTEX,
+ size);
+ }
- r300render->vbo_size = MAX2(size, r300render->vbo_alloc_size);
- r300render->vbo_offset = 0;
- r300render->vbo = pipe_buffer_create(screen,
- 64,
- PIPE_BUFFER_USAGE_VERTEX,
- r300render->vbo_size);
-
+ r300render->vbo_alloc_size = MAX2(size, r300render->vbo_alloc_size);
r300render->vertex_size = vertex_size;
- if (r300render->vbo) {
- return TRUE;
- } else {
- return FALSE;
- }
+ return (r300render->vbo) ? TRUE : FALSE;
}
static void* r300_render_map_vertices(struct vbuf_render* render)
@@ -103,10 +96,8 @@ static void* r300_render_map_vertices(struct vbuf_render* render)
struct r300_render* r300render = r300_render(render);
struct pipe_screen* screen = r300render->r300->context.screen;
- r300render->vbo_map = pipe_buffer_map(screen, r300render->vbo,
- PIPE_BUFFER_USAGE_CPU_WRITE);
-
- return (unsigned char*)r300render->vbo_map + r300render->vbo_offset;
+ return (unsigned char*)pipe_buffer_map(screen, r300render->vbo,
+ PIPE_BUFFER_USAGE_CPU_WRITE);
}
static void r300_render_unmap_vertices(struct vbuf_render* render,
@@ -116,9 +107,6 @@ static void r300_render_unmap_vertices(struct vbuf_render* render,
struct r300_render* r300render = r300_render(render);
struct pipe_screen* screen = r300render->r300->context.screen;
- r300render->vbo_max_used = MAX2(r300render->vbo_max_used,
- r300render->vertex_size * (max + 1));
-
pipe_buffer_unmap(screen, r300render->vbo);
}
@@ -180,27 +168,9 @@ static void prepare_render(struct r300_render* render, unsigned count)
CS_LOCALS(r300);
- /* Make sure that all possible state is emitted. */
- r300_emit_dirty_state(r300);
+ r300->vbo = render->vbo;
- debug_printf("r300: Preparing vertex buffer %p for render, "
- "vertex size %d, vertex count %d\n", render->vbo,
- r300->vertex_info.vinfo.size, count);
- /* Set the pointer to our vertex buffer. The emitted values are this:
- * PACKET3 [3D_LOAD_VBPNTR]
- * COUNT [1]
- * FORMAT [size | stride << 8]
- * OFFSET [0]
- * VBPNTR [relocated BO]
- */
- BEGIN_CS(7);
- OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3);
- OUT_CS(1);
- OUT_CS(r300->vertex_info.vinfo.size |
- (r300->vertex_info.vinfo.size << 8));
- OUT_CS(render->vbo_offset);
- OUT_CS_RELOC(render->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0);
- END_CS;
+ r300_emit_dirty_state(r300);
}
static void r300_render_draw_arrays(struct vbuf_render* render,
@@ -212,8 +182,6 @@ static void r300_render_draw_arrays(struct vbuf_render* render,
CS_LOCALS(r300);
- r300render->vbo_offset = start;
-
prepare_render(r300render, count);
debug_printf("r300: Doing vbuf render, count %d\n", count);
@@ -234,6 +202,8 @@ static void r300_render_draw(struct vbuf_render* render,
struct pipe_screen* screen = r300->context.screen;
struct pipe_buffer* index_buffer;
void* index_map;
+ int i;
+ uint32_t index;
CS_LOCALS(r300);
@@ -246,6 +216,7 @@ static void r300_render_draw(struct vbuf_render* render,
return;
}
+/*
index_map = pipe_buffer_map(screen, index_buffer,
PIPE_BUFFER_USAGE_CPU_WRITE);
memcpy(index_map, indices, count);
@@ -253,13 +224,25 @@ static void r300_render_draw(struct vbuf_render* render,
debug_printf("r300: Doing indexbuf render, count %d\n", count);
- BEGIN_CS(6);
+ BEGIN_CS(8);
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0);
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
r300render->hwprim);
OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2);
OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2));
OUT_CS_INDEX_RELOC(index_buffer, 0, count, RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_CS; */
+
+ BEGIN_CS(2 + (count+1)/2);
+ OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2);
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
+ r300render->hwprim);
+ for (i = 0; i < count-1; i += 2) {
+ OUT_CS(indices[i+1] << 16 | indices[i]);
+ }
+ if (count % 2) {
+ OUT_CS(indices[count-1]);
+ }
END_CS;
}
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index d2c5998c26..a6f1efe356 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -87,23 +87,25 @@ static int r300_get_param(struct pipe_screen* pscreen, int param)
} else {
return 0;
}
- return 0;
case PIPE_CAP_GLSL:
- /* IN THEORY */
- return 0;
+ if (r300screen->caps->is_r500) {
+ return 1;
+ } else {
+ return 0;
+ }
case PIPE_CAP_S3TC:
/* IN THEORY */
return 0;
case PIPE_CAP_ANISOTROPIC_FILTER:
- /* IN THEORY */
- return 0;
+ return 1;
case PIPE_CAP_POINT_SPRITE:
/* IN THEORY */
return 0;
case PIPE_CAP_MAX_RENDER_TARGETS:
return 4;
case PIPE_CAP_OCCLUSION_QUERY:
- return 1;
+ /* IN THEORY */
+ return 0;
case PIPE_CAP_TEXTURE_SHADOW_MAP:
/* IN THEORY */
return 0;
@@ -152,17 +154,20 @@ static int r300_get_param(struct pipe_screen* pscreen, int param)
static float r300_get_paramf(struct pipe_screen* pscreen, int param)
{
+ struct r300_screen* r300screen = r300_screen(pscreen);
+
switch (param) {
case PIPE_CAP_MAX_LINE_WIDTH:
case PIPE_CAP_MAX_LINE_WIDTH_AA:
- /* XXX this is the biggest thing that will fit in that register.
- * Perhaps the actual rendering limits are less? */
- return 10922.0f;
case PIPE_CAP_MAX_POINT_WIDTH:
case PIPE_CAP_MAX_POINT_WIDTH_AA:
- /* XXX this is the biggest thing that will fit in that register.
- * Perhaps the actual rendering limits are less? */
- return 10922.0f;
+ /* The maximum dimensions of the colorbuffer are our practical
+ * rendering limits. 2048 pixels should be enough for anybody. */
+ if (r300screen->caps->is_r500) {
+ return 4096.0f;
+ } else {
+ return 2048.0f;
+ }
case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
return 16.0f;
case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
@@ -230,9 +235,16 @@ static boolean r300_is_format_supported(struct pipe_screen* pscreen,
case PIPE_TEXTURE_2D:
return check_tex_2d_format(format,
r300_screen(pscreen)->caps->is_r500);
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_3D:
+ case PIPE_TEXTURE_CUBE:
+ debug_printf("r300: Implementation error: Unsupported format "
+ "target: %d\n", target);
+ break;
default:
- debug_printf("r300: Warning: Got unknown format target: %d\n",
- format);
+ debug_printf("r300: Fatal: This is not a format target: %d\n",
+ target);
+ assert(0);
break;
}
@@ -337,7 +349,6 @@ struct pipe_screen* r300_create_screen(struct r300_winsys* r300_winsys)
return NULL;
caps->pci_id = r300_winsys->pci_id;
- caps->num_frag_pipes = r300_winsys->gb_pipes;
r300_parse_chipset(caps);
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 2a77fd1739..01e2b51153 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -62,8 +62,6 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
}
/* PIPE_LOGICOP_* don't need to be translated, fortunately. */
- /* XXX are logicops still allowed if blending's disabled?
- * Does Gallium take care of it for us? */
if (state->logicop_enable) {
blend->rop = R300_RB3D_ROPCNTL_ROP_ENABLE |
(state->logicop_func) << R300_RB3D_ROPCNTL_ROP_SHIFT;
@@ -121,9 +119,14 @@ static void r300_set_clip_state(struct pipe_context* pipe,
const struct pipe_clip_state* state)
{
struct r300_context* r300 = r300_context(pipe);
- /* XXX Draw */
- draw_flush(r300->draw);
- draw_set_clip_state(r300->draw, state);
+
+ if (r300_screen(pipe->screen)->caps->has_tcl) {
+ r300->clip_state = *state;
+ r300->dirty_state |= R300_NEW_CLIP;
+ } else {
+ draw_flush(r300->draw);
+ draw_set_clip_state(r300->draw, state);
+ }
}
static void
@@ -153,10 +156,12 @@ static void
/* If the number of constants have changed, invalidate the shader. */
if (r300->shader_constants[shader].user_count != i) {
- if (shader == PIPE_SHADER_FRAGMENT && r300->fs) {
+ if (shader == PIPE_SHADER_FRAGMENT && r300->fs &&
+ r300->fs->uses_imms) {
r300->fs->translated = FALSE;
r300_translate_fragment_shader(r300, r300->fs);
- } else if (shader == PIPE_SHADER_VERTEX && r300->vs) {
+ } else if (shader == PIPE_SHADER_VERTEX && r300->vs &&
+ r300->vs->uses_imms) {
r300->vs->translated = FALSE;
r300_translate_vertex_shader(r300, r300->vs);
}
@@ -257,6 +262,7 @@ static void r300_set_edgeflags(struct pipe_context* pipe,
const unsigned* bitfield)
{
/* XXX you know it's bad when i915 has this blank too */
+ /* XXX and even worse, I have no idea WTF the bitfield is */
}
static void
@@ -289,6 +295,7 @@ static void* r300_create_fs_state(struct pipe_context* pipe,
/* Copy state directly into shader. */
fs->state = *shader;
+ fs->state.tokens = tgsi_dup_tokens(shader->tokens);
tgsi_scan_shader(shader->tokens, &fs->info);
@@ -317,13 +324,15 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader)
/* Delete fragment shader state. */
static void r300_delete_fs_state(struct pipe_context* pipe, void* shader)
{
+ struct r3xx_fragment_shader* fs = (struct r3xx_fragment_shader*)shader;
+ FREE(fs->state.tokens);
FREE(shader);
}
static void r300_set_polygon_stipple(struct pipe_context* pipe,
const struct pipe_poly_stipple* state)
{
- /* XXX */
+ /* XXX no idea how to set this up, but not terribly important */
}
/* Create a new rasterizer state based on the CSO rasterizer state.
@@ -341,6 +350,8 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
/* Copy rasterizer state for Draw. */
rs->rs = *state;
+ rs->enable_vte = !state->bypass_vs_clip_and_viewport;
+
/* If bypassing TCL, or if no TCL engine is present, turn off the HW TCL.
* Else, enable HW TCL and force Draw's TCL off. */
if (state->bypass_vs_clip_and_viewport ||
@@ -421,6 +432,7 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state)
struct r300_context* r300 = r300_context(pipe);
struct r300_rs_state* rs = (struct r300_rs_state*)state;
+ draw_flush(r300->draw);
draw_set_rasterizer_state(r300->draw, &rs->rs);
r300->rs_state = rs;
@@ -528,7 +540,6 @@ static void r300_set_scissor_state(struct pipe_context* pipe,
const struct pipe_scissor_state* state)
{
struct r300_context* r300 = r300_context(pipe);
- draw_flush(r300->draw);
if (r300_screen(r300->context.screen)->caps->is_r500) {
r300->scissor_state->scissor_top_left =
@@ -555,25 +566,38 @@ static void r300_set_viewport_state(struct pipe_context* pipe,
{
struct r300_context* r300 = r300_context(pipe);
- r300->viewport_state->xscale = state->scale[0];
- r300->viewport_state->yscale = state->scale[1];
- r300->viewport_state->zscale = state->scale[2];
-
- r300->viewport_state->xoffset = state->translate[0];
- r300->viewport_state->yoffset = state->translate[1];
- r300->viewport_state->zoffset = state->translate[2];
-
- r300->viewport_state->vte_control = 0;
- if (r300_screen(r300->context.screen)->caps->has_tcl) {
- /* Do the transform in HW. */
- r300->viewport_state->vte_control |=
- R300_VPORT_X_SCALE_ENA | R300_VPORT_X_OFFSET_ENA |
- R300_VPORT_Y_SCALE_ENA | R300_VPORT_Y_OFFSET_ENA |
- R300_VPORT_Z_SCALE_ENA | R300_VPORT_Z_OFFSET_ENA;
- } else {
- /* Have Draw do the actual transform. */
- draw_set_viewport_state(r300->draw, state);
+ /* Do the transform in HW. */
+ r300->viewport_state->vte_control = R300_VTX_W0_FMT;
+
+ if (state->scale[0] != 1.0f) {
+ assert(state->scale[0] != 0.0f);
+ r300->viewport_state->xscale = state->scale[0];
+ r300->viewport_state->vte_control |= R300_VPORT_X_SCALE_ENA;
+ }
+ if (state->scale[1] != 1.0f) {
+ assert(state->scale[1] != 0.0f);
+ r300->viewport_state->yscale = state->scale[1];
+ r300->viewport_state->vte_control |= R300_VPORT_Y_SCALE_ENA;
+ }
+ if (state->scale[2] != 1.0f) {
+ assert(state->scale[2] != 0.0f);
+ r300->viewport_state->zscale = state->scale[2];
+ r300->viewport_state->vte_control |= R300_VPORT_Z_SCALE_ENA;
+ }
+ if (state->translate[0] != 0.0f) {
+ r300->viewport_state->xoffset = state->translate[0];
+ r300->viewport_state->vte_control |= R300_VPORT_X_OFFSET_ENA;
+ }
+ if (state->translate[1] != 0.0f) {
+ r300->viewport_state->yoffset = state->translate[1];
+ r300->viewport_state->vte_control |= R300_VPORT_Y_OFFSET_ENA;
}
+ if (state->translate[2] != 0.0f) {
+ r300->viewport_state->zoffset = state->translate[2];
+ r300->viewport_state->vte_control |= R300_VPORT_Z_OFFSET_ENA;
+ }
+
+ r300->dirty_state |= R300_NEW_VIEWPORT;
}
static void r300_set_vertex_buffers(struct pipe_context* pipe,
@@ -610,6 +634,7 @@ static void* r300_create_vs_state(struct pipe_context* pipe,
struct r300_vertex_shader* vs = CALLOC_STRUCT(r300_vertex_shader);
/* Copy state directly into shader. */
vs->state = *shader;
+ vs->state.tokens = tgsi_dup_tokens(shader->tokens);
tgsi_scan_shader(shader->tokens, &vs->info);
@@ -626,6 +651,8 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
{
struct r300_context* r300 = r300_context(pipe);
+ draw_flush(r300->draw);
+
if (r300_screen(pipe->screen)->caps->has_tcl) {
struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader;
@@ -653,6 +680,7 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader)
struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader;
draw_delete_vertex_shader(r300->draw, vs->draw);
+ FREE(vs->state.tokens);
FREE(shader);
} else {
draw_delete_vertex_shader(r300->draw,
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index f1feafbcf9..2477b30822 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -25,62 +25,88 @@
/* r300_state_derived: Various bits of state which are dependent upon
* currently bound CSO data. */
-/* Update the vertex_info struct in our r300_context.
- *
- * The vertex_info struct describes the post-TCL format of vertices. It is
- * required for Draw when doing SW TCL, and also for describing the
- * dreaded RS block on R300 chipsets. */
-static void r300_update_vertex_layout(struct r300_context* r300)
+/* Set up the vs_tab and routes. */
+static void r300_vs_tab_routes(struct r300_context* r300,
+ struct r300_vertex_format* vformat)
{
struct r300_screen* r300screen = r300_screen(r300->context.screen);
- struct r300_vertex_format vformat;
- struct vertex_info vinfo;
+ struct vertex_info* vinfo = &vformat->vinfo;
+ int* tab = vformat->vs_tab;
boolean pos = FALSE, psize = FALSE, fog = FALSE;
int i, texs = 0, cols = 0;
- int tab[16];
-
- struct tgsi_shader_info* info = &r300->fs->info;
+ struct tgsi_shader_info* info;
- memset(&vinfo, 0, sizeof(vinfo));
- for (i = 0; i < 16; i++) {
- tab[i] = -1;
+ if (r300screen->caps->has_tcl) {
+ /* Use vertex shader to determine required routes. */
+ info = &r300->vs->info;
+ } else {
+ /* Use fragment shader to determine required routes. */
+ info = &r300->fs->info;
}
assert(info->num_inputs <= 16);
- for (i = 0; i < info->num_inputs; i++) {
- switch (info->input_semantic_name[i]) {
- case TGSI_SEMANTIC_POSITION:
- pos = TRUE;
- tab[i] = 0;
- break;
- case TGSI_SEMANTIC_COLOR:
- tab[i] = 2 + cols++;
- break;
- case TGSI_SEMANTIC_PSIZE:
- psize = TRUE;
- tab[i] = 1;
- break;
- case TGSI_SEMANTIC_FOG:
- fog = TRUE;
- /* Fall through... */
- case TGSI_SEMANTIC_GENERIC:
- tab[i] = 6 + texs++;
- break;
- default:
- debug_printf("r300: Unknown vertex input %d\n",
- info->input_semantic_name[i]);
- break;
- }
- }
-
if (r300screen->caps->has_tcl) {
+ /* Just copy vert attribs over as-is. */
for (i = 0; i < info->num_inputs; i++) {
- /* XXX should probably do real lookup with vert shader */
tab[i] = i;
}
+ for (i = 0; i < info->num_outputs; i++) {
+ switch (info->output_semantic_name[i]) {
+ case TGSI_SEMANTIC_POSITION:
+ pos = TRUE;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ cols++;
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ psize = TRUE;
+ break;
+ case TGSI_SEMANTIC_FOG:
+ fog = TRUE;
+ /* Fall through */
+ case TGSI_SEMANTIC_GENERIC:
+ texs++;
+ break;
+ default:
+ debug_printf("r300: Unknown vertex output %d\n",
+ info->output_semantic_name[i]);
+ break;
+ }
+ }
+ } else {
+ for (i = 0; i < info->num_inputs; i++) {
+ switch (info->input_semantic_name[i]) {
+ case TGSI_SEMANTIC_POSITION:
+ pos = TRUE;
+ tab[i] = 0;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ tab[i] = 2 + cols;
+ cols++;
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ psize = TRUE;
+ tab[i] = 15;
+ break;
+ case TGSI_SEMANTIC_FOG:
+ fog = TRUE;
+ /* Fall through */
+ case TGSI_SEMANTIC_GENERIC:
+ tab[i] = 6 + texs;
+ texs++;
+ break;
+ default:
+ debug_printf("r300: Unknown vertex input %d\n",
+ info->input_semantic_name[i]);
+ break;
+ }
+ }
}
+ /* XXX magic */
+ assert(texs <= 8);
+
/* Do the actual vertex_info setup.
*
* vertex_info has four uints of hardware-specific data in it.
@@ -89,7 +115,7 @@ static void r300_update_vertex_layout(struct r300_context* r300)
* vinfo.hwfmt[2] is R300_VAP_OUTPUT_VTX_FMT_0
* vinfo.hwfmt[3] is R300_VAP_OUTPUT_VTX_FMT_1 */
- vinfo.hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */
+ vinfo->hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */
if (!pos) {
debug_printf("r300: Forcing vertex position attribute emit...\n");
@@ -99,109 +125,223 @@ static void r300_update_vertex_layout(struct r300_context* r300)
tab[i] = tab[i-1];
}
tab[0] = 0;
-
- draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_POS,
- draw_find_vs_output(r300->draw, TGSI_SEMANTIC_POSITION, 0));
- } else {
- draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE,
- draw_find_vs_output(r300->draw, TGSI_SEMANTIC_POSITION, 0));
}
- vinfo.hwfmt[1] |= R300_INPUT_CNTL_POS;
- vinfo.hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE,
+ draw_find_vs_output(r300->draw, TGSI_SEMANTIC_POSITION, 0));
+ vinfo->hwfmt[1] |= R300_INPUT_CNTL_POS;
+ vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
if (psize) {
- draw_emit_vertex_attr(&vinfo, EMIT_1F_PSIZE, INTERP_POS,
+ draw_emit_vertex_attr(vinfo, EMIT_1F_PSIZE, INTERP_POS,
draw_find_vs_output(r300->draw, TGSI_SEMANTIC_PSIZE, 0));
- vinfo.hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
+ vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
}
for (i = 0; i < cols; i++) {
- draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR,
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR,
draw_find_vs_output(r300->draw, TGSI_SEMANTIC_COLOR, i));
- vinfo.hwfmt[1] |= R300_INPUT_CNTL_COLOR;
- vinfo.hwfmt[2] |= (R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i);
+ vinfo->hwfmt[1] |= R300_INPUT_CNTL_COLOR;
+ vinfo->hwfmt[2] |= (R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i);
}
- for (i = 0; i < texs; i++) {
- draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE,
- draw_find_vs_output(r300->draw, TGSI_SEMANTIC_GENERIC, i));
- vinfo.hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i);
- vinfo.hwfmt[3] |= (4 << (3 * i));
- }
+ /* Init i right here, increment it if fog is enabled.
+ * This gets around a double-increment problem. */
+ i = 0;
if (fog) {
i++;
- draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE,
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE,
draw_find_vs_output(r300->draw, TGSI_SEMANTIC_FOG, 0));
- vinfo.hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i);
- vinfo.hwfmt[3] |= (4 << (3 * i));
+ vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i);
+ vinfo->hwfmt[3] |= (4 << (3 * i));
+ }
+
+ for (i; i < texs; i++) {
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE,
+ draw_find_vs_output(r300->draw, TGSI_SEMANTIC_GENERIC, i));
+ vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i);
+ vinfo->hwfmt[3] |= (4 << (3 * i));
}
- draw_compute_vertex_size(&vinfo);
+ /* Handle the case where the vertex shader will be generating some of
+ * the attribs based on its inputs. */
+ if (r300screen->caps->has_tcl &&
+ info->num_inputs < info->num_outputs) {
+ vinfo->num_attribs = info->num_inputs;
+ }
- if (memcmp(&r300->vertex_info, &vinfo, sizeof(struct vertex_info))) {
- uint32_t temp;
- debug_printf("attrib count: %d, fp input count: %d\n",
- vinfo.num_attribs, info->num_inputs);
- for (i = 0; i < vinfo.num_attribs; i++) {
- debug_printf("attrib: offset %d, interp %d, size %d,"
- " tab %d\n", vinfo.attrib[i].src_index,
- vinfo.attrib[i].interp_mode, vinfo.attrib[i].emit,
+ draw_compute_vertex_size(vinfo);
+}
+
+/* Update the PSC tables. */
+static void r300_vertex_psc(struct r300_context* r300,
+ struct r300_vertex_format* vformat)
+{
+ struct r300_screen* r300screen = r300_screen(r300->context.screen);
+ struct vertex_info* vinfo = &vformat->vinfo;
+ int* tab = vformat->vs_tab;
+ uint32_t temp;
+ int i, attrib_count;
+
+ /* Vertex shaders have no semantics on their inputs,
+ * so PSC should just route stuff based on their info,
+ * and not on attrib information. */
+ if (r300screen->caps->has_tcl) {
+ attrib_count = r300->vs->info.num_inputs;
+ debug_printf("r300: routing %d attribs in psc for vs\n",
+ attrib_count);
+ } else {
+ attrib_count = vinfo->num_attribs;
+ debug_printf("r300: attrib count: %d\n", attrib_count);
+ for (i = 0; i < attrib_count; i++) {
+ debug_printf("r300: attrib: offset %d, interp %d, size %d,"
+ " tab %d\n", vinfo->attrib[i].src_index,
+ vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit,
tab[i]);
}
+ }
- for (i = 0; i < vinfo.num_attribs; i++) {
- /* Make sure we have a proper destination for our attribute */
- assert(tab[i] != -1);
+ for (i = 0; i < attrib_count; i++) {
+ /* Make sure we have a proper destination for our attribute */
+ assert(tab[i] != -1);
- temp = translate_vertex_data_type(vinfo.attrib[i].emit) |
- (tab[i] << R300_DST_VEC_LOC_SHIFT);
- if (i & 1) {
- r300->vertex_info.vap_prog_stream_cntl[i >> 1] &= 0x0000ffff;
- r300->vertex_info.vap_prog_stream_cntl[i >> 1] |= temp << 16;
- } else {
- r300->vertex_info.vap_prog_stream_cntl[i >> 1] &= 0xffff0000;
- r300->vertex_info.vap_prog_stream_cntl[i >> 1] |= temp;
- }
+ /* Add the attribute to the PSC table. */
+ temp = r300screen->caps->has_tcl ?
+ R300_DATA_TYPE_FLOAT_4 :
+ translate_vertex_data_type(vinfo->attrib[i].emit);
+ temp |= tab[i] << R300_DST_VEC_LOC_SHIFT;
+
+ if (i & 1) {
+ vformat->vap_prog_stream_cntl[i >> 1] &= 0x0000ffff;
+ vformat->vap_prog_stream_cntl[i >> 1] |= temp << 16;
- r300->vertex_info.vap_prog_stream_cntl_ext[i >> 1] |=
- (R300_VAP_SWIZZLE_XYZW << (i & 1 ? 16 : 0));
+ vformat->vap_prog_stream_cntl_ext[i >> 1] |=
+ (R300_VAP_SWIZZLE_XYZW << 16);
+ } else {
+ vformat->vap_prog_stream_cntl[i >> 1] &= 0xffff0000;
+ vformat->vap_prog_stream_cntl[i >> 1] |= temp << 0;
+
+ vformat->vap_prog_stream_cntl_ext[i >> 1] |=
+ (R300_VAP_SWIZZLE_XYZW << 0);
}
- /* Set the last vector. */
- i--;
- r300->vertex_info.vap_prog_stream_cntl[i >> 1] |= (R300_LAST_VEC <<
- (i & 1 ? 16 : 0));
+ }
- memcpy(r300->vertex_info.tab, tab, sizeof(tab));
- memcpy(&r300->vertex_info, &vinfo, sizeof(struct vertex_info));
+ /* Set the last vector in the PSC. */
+ i--;
+ vformat->vap_prog_stream_cntl[i >> 1] |=
+ (R300_LAST_VEC << (i & 1 ? 16 : 0));
+}
+
+/* Update the vertex format. */
+static void r300_update_vertex_format(struct r300_context* r300)
+{
+ struct r300_vertex_format vformat;
+ int i;
+
+ memset(&vformat, 0, sizeof(struct r300_vertex_format));
+ for (i = 0; i < 16; i++) {
+ vformat.vs_tab[i] = -1;
+ vformat.fs_tab[i] = -1;
+ }
+
+ r300_vs_tab_routes(r300, &vformat);
+
+ r300_vertex_psc(r300, &vformat);
+
+ if (memcmp(&r300->vertex_info, &vformat,
+ sizeof(struct r300_vertex_format))) {
+ memcpy(&r300->vertex_info, &vformat,
+ sizeof(struct r300_vertex_format));
r300->dirty_state |= R300_NEW_VERTEX_FORMAT;
}
}
+/* Set up the mappings from GB to US, for RS block. */
+static void r300_update_fs_tab(struct r300_context* r300)
+{
+ struct r300_vertex_format* vformat = &r300->vertex_info;
+ struct tgsi_shader_info* info = &r300->fs->info;
+ int i, cols = 0, texs = 0, cols_emitted = 0;
+ int* tab = vformat->fs_tab;
+
+ for (i = 0; i < 16; i++) {
+ tab[i] = -1;
+ }
+
+ assert(info->num_inputs <= 16);
+ for (i = 0; i < info->num_inputs; i++) {
+ switch (info->input_semantic_name[i]) {
+ case TGSI_SEMANTIC_COLOR:
+ tab[i] = INTERP_LINEAR;
+ cols++;
+ break;
+ case TGSI_SEMANTIC_POSITION:
+ case TGSI_SEMANTIC_PSIZE:
+ debug_printf("r300: Implementation error: Can't use "
+ "pos attribs in fragshader yet!\n");
+ /* Pass through for now */
+ case TGSI_SEMANTIC_FOG:
+ case TGSI_SEMANTIC_GENERIC:
+ tab[i] = INTERP_PERSPECTIVE;
+ break;
+ default:
+ debug_printf("r300: Unknown vertex input %d\n",
+ info->input_semantic_name[i]);
+ break;
+ }
+ }
+
+ /* Now that we know where everything is... */
+ debug_printf("r300: fp input count: %d\n", info->num_inputs);
+ for (i = 0; i < info->num_inputs; i++) {
+ switch (tab[i]) {
+ case INTERP_LINEAR:
+ debug_printf("r300: attrib: "
+ "stack offset %d, color, tab %d\n",
+ i, cols_emitted);
+ tab[i] = cols_emitted;
+ cols_emitted++;
+ break;
+ case INTERP_PERSPECTIVE:
+ debug_printf("r300: attrib: "
+ "stack offset %d, texcoord, tab %d\n",
+ i, cols + texs);
+ tab[i] = cols + texs;
+ texs++;
+ break;
+ case -1:
+ debug_printf("r300: Implementation error: Bad fp interp!\n");
+ default:
+ break;
+ }
+ }
+
+}
+
/* Set up the RS block. This is the part of the chipset that actually does
* the rasterization of vertices into fragments. This is also the part of the
* chipset that locks up if any part of it is even slightly wrong. */
static void r300_update_rs_block(struct r300_context* r300)
{
struct r300_rs_block* rs = r300->rs_block;
- struct vertex_info* vinfo = &r300->vertex_info.vinfo;
- int* tab = r300->vertex_info.tab;
+ struct tgsi_shader_info* info = &r300->fs->info;
+ int* tab = r300->vertex_info.fs_tab;
int col_count = 0, fp_offset = 0, i, memory_pos, tex_count = 0;
memset(rs, 0, sizeof(struct r300_rs_block));
if (r300_screen(r300->context.screen)->caps->is_r500) {
- for (i = 0; i < vinfo->num_attribs; i++) {
- assert(tab[vinfo->attrib[i].src_index] != -1);
- memory_pos = tab[vinfo->attrib[i].src_index] * 4;
- switch (vinfo->attrib[i].interp_mode) {
- case INTERP_LINEAR:
+ for (i = 0; i < info->num_inputs; i++) {
+ assert(tab[i] != -1);
+ memory_pos = tab[i] * 4;
+ switch (info->input_semantic_name[i]) {
+ case TGSI_SEMANTIC_COLOR:
rs->ip[col_count] |=
R500_RS_COL_PTR(memory_pos) |
R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
col_count++;
break;
- case INTERP_PERSPECTIVE:
+ case TGSI_SEMANTIC_GENERIC:
rs->ip[tex_count] |=
R500_RS_SEL_S(memory_pos) |
R500_RS_SEL_T(memory_pos + 1) |
@@ -243,17 +383,17 @@ static void r300_update_rs_block(struct r300_context* r300)
fp_offset++;
}
} else {
- for (i = 0; i < vinfo->num_attribs; i++) {
- memory_pos = tab[vinfo->attrib[i].src_index] * 4;
- assert(tab[vinfo->attrib[i].src_index] != -1);
- switch (vinfo->attrib[i].interp_mode) {
- case INTERP_LINEAR:
+ for (i = 0; i < info->num_inputs; i++) {
+ assert(tab[i] != -1);
+ memory_pos = tab[i] * 4;
+ switch (info->input_semantic_name[i]) {
+ case TGSI_SEMANTIC_COLOR:
rs->ip[col_count] |=
R300_RS_COL_PTR(memory_pos) |
R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
col_count++;
break;
- case INTERP_PERSPECTIVE:
+ case TGSI_SEMANTIC_GENERIC:
rs->ip[tex_count] |=
R300_RS_TEX_PTR(memory_pos) |
R300_RS_SEL_S(R300_RS_SEL_C0) |
@@ -307,10 +447,11 @@ void r300_update_derived_state(struct r300_context* r300)
{
if (r300->dirty_state &
(R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER)) {
- r300_update_vertex_layout(r300);
+ r300_update_vertex_format(r300);
}
if (r300->dirty_state & R300_NEW_VERTEX_FORMAT) {
+ r300_update_fs_tab(r300);
r300_update_rs_block(r300);
}
}
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index 91b93fc367..22c8e199ae 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -353,25 +353,6 @@ static INLINE uint32_t r300_translate_out_fmt(enum pipe_format format)
/* Non-CSO state. (For now.) */
-static INLINE uint32_t r300_translate_gb_pipes(int pipe_count)
-{
- switch (pipe_count) {
- case 1:
- return R300_GB_TILE_PIPE_COUNT_RV300;
- break;
- case 2:
- return R300_GB_TILE_PIPE_COUNT_R300;
- break;
- case 3:
- return R300_GB_TILE_PIPE_COUNT_R420_3P;
- break;
- case 4:
- return R300_GB_TILE_PIPE_COUNT_R420;
- break;
- }
- return 0;
-}
-
static INLINE uint32_t translate_vertex_data_type(int type) {
switch (type) {
case EMIT_1F:
diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c
index 8bd9b41bd7..e438114010 100644
--- a/src/gallium/drivers/r300/r300_state_invariant.c
+++ b/src/gallium/drivers/r300/r300_state_invariant.c
@@ -34,36 +34,19 @@ void r300_emit_invariant_state(struct r300_context* r300)
struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps;
CS_LOCALS(r300);
- BEGIN_CS(30 + (caps->has_tcl ? 2: 0));
+ BEGIN_CS(26 + (caps->has_tcl ? 2: 0));
/*** Graphics Backend (GB) ***/
/* Various GB enables */
OUT_CS_REG(R300_GB_ENABLE, 0x0);
/* Subpixel multisampling for AA */
OUT_CS_REG(R300_GB_MSPOS0, 0x66666666);
- OUT_CS_REG(R300_GB_MSPOS1, 0x66666666);
- /* GB tile config and pipe setup */
- OUT_CS_REG(R300_GB_TILE_CONFIG, R300_GB_TILE_DISABLE |
- r300_translate_gb_pipes(caps->num_frag_pipes));
+ OUT_CS_REG(R300_GB_MSPOS1, 0x6666666);
/* Source of fog depth */
OUT_CS_REG(R300_GB_SELECT, R300_GB_FOG_SELECT_1_1_W);
/* AA enable */
OUT_CS_REG(R300_GB_AA_CONFIG, 0x0);
- /*** Geometry Assembly (GA) ***/
- /* GA errata fixes. */
- if (caps->is_r500) {
- OUT_CS_REG(R300_GA_ENHANCE,
- R300_GA_ENHANCE_DEADLOCK_CNTL_PREVENT_TCL |
- R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE |
- R500_GA_ENHANCE_REG_READWRITE_ENABLE |
- R500_GA_ENHANCE_REG_NOSTALL_ENABLE);
- } else {
- OUT_CS_REG(R300_GA_ENHANCE,
- R300_GA_ENHANCE_DEADLOCK_CNTL_PREVENT_TCL |
- R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE);
- }
-
/*** Fog (FG) ***/
OUT_CS_REG(R300_FG_FOG_BLEND, 0x0);
OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x0);
@@ -86,7 +69,7 @@ void r300_emit_invariant_state(struct r300_context* r300)
END_CS;
/* XXX unsorted stuff from surface_fill */
- BEGIN_CS(79 + (caps->has_tcl ? 7 : 0));
+ BEGIN_CS(77 + (caps->has_tcl ? 5 : 0));
/* Flush PVS. */
OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0);
@@ -97,8 +80,6 @@ void r300_emit_invariant_state(struct r300_context* r300)
/* XXX endian */
if (caps->has_tcl) {
OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VC_NO_SWAP);
- OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE |
- R300_PS_UCP_MODE_CLIP_AS_TRIFAN);
OUT_CS_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4);
OUT_CS_32F(1.0);
OUT_CS_32F(1.0);
@@ -113,11 +94,14 @@ void r300_emit_invariant_state(struct r300_context* r300)
OUT_CS_32F(0.0);
OUT_CS_REG_SEQ(R300_GA_POINT_S1, 1);
OUT_CS_32F(1.0);
+ /* XXX line tex stuffing */
+ OUT_CS_REG_SEQ(R300_GA_LINE_S0, 1);
+ OUT_CS_32F(0.0);
+ OUT_CS_REG_SEQ(R300_GA_LINE_S1, 1);
+ OUT_CS_32F(1.0);
OUT_CS_REG(R300_GA_TRIANGLE_STIPPLE, 0x5 |
(0x5 << R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT));
/* XXX this big chunk should be refactored into rs_state */
- OUT_CS_REG(R300_GA_LINE_S0, 0x00000000);
- OUT_CS_REG(R300_GA_LINE_S1, 0x3F800000);
OUT_CS_REG(R300_GA_SOLID_RG, 0x00000000);
OUT_CS_REG(R300_GA_SOLID_BA, 0x00000000);
OUT_CS_REG(R300_GA_POLY_MODE, 0x00000000);
@@ -144,8 +128,6 @@ void r300_emit_invariant_state(struct r300_context* r300)
OUT_CS_REG(R300_VAP_VTX_STATE_CNTL, 0x1);
OUT_CS_REG(R300_VAP_VSM_VTX_ASSM, 0x405);
OUT_CS_REG(R300_SE_VTE_CNTL, 0x0000043F);
- /* Vertex size. */
- OUT_CS_REG(R300_VAP_VTX_SIZE, 0x8);
/* XXX */
OUT_CS_REG(R300_SC_CLIP_RULE, 0xaaaa);
diff --git a/src/gallium/drivers/r300/r300_state_shader.c b/src/gallium/drivers/r300/r300_state_shader.c
index 1b02239ee7..cc7f6a7c4b 100644
--- a/src/gallium/drivers/r300/r300_state_shader.c
+++ b/src/gallium/drivers/r300/r300_state_shader.c
@@ -22,24 +22,6 @@
#include "r300_state_shader.h"
-static void r300_copy_passthrough_shader(struct r300_fragment_shader* fs)
-{
- struct r300_fragment_shader* pt = &r300_passthrough_fragment_shader;
- fs->shader.stack_size = pt->shader.stack_size;
- fs->alu_instruction_count = pt->alu_instruction_count;
- fs->tex_instruction_count = pt->tex_instruction_count;
- fs->indirections = pt->indirections;
- fs->instructions[0] = pt->instructions[0];
-}
-
-static void r500_copy_passthrough_shader(struct r500_fragment_shader* fs)
-{
- struct r500_fragment_shader* pt = &r500_passthrough_fragment_shader;
- fs->shader.stack_size = pt->shader.stack_size;
- fs->instruction_count = pt->instruction_count;
- fs->instructions[0] = pt->instructions[0];
-}
-
static void r300_fs_declare(struct r300_fs_asm* assembler,
struct tgsi_full_declaration* decl)
{
@@ -49,6 +31,7 @@ static void r300_fs_declare(struct r300_fs_asm* assembler,
case TGSI_SEMANTIC_COLOR:
assembler->color_count++;
break;
+ case TGSI_SEMANTIC_FOG:
case TGSI_SEMANTIC_GENERIC:
assembler->tex_count++;
break;
@@ -59,6 +42,12 @@ static void r300_fs_declare(struct r300_fs_asm* assembler,
}
break;
case TGSI_FILE_OUTPUT:
+ /* Depth write. Mark the position of the output so we can
+ * identify it later. */
+ if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) {
+ assembler->depth_output = decl->DeclarationRange.First;
+ }
+ break;
case TGSI_FILE_CONSTANT:
break;
case TGSI_FILE_TEMPORARY:
@@ -120,6 +109,14 @@ static INLINE unsigned r300_fs_dst(struct r300_fs_asm* assembler,
return 0;
}
+static INLINE boolean r300_fs_is_depr(struct r300_fs_asm* assembler,
+ struct tgsi_dst_register* dst)
+{
+ return (assembler->writes_depth &&
+ (dst->File == TGSI_FILE_OUTPUT) &&
+ (dst->Index == assembler->depth_output));
+}
+
static INLINE unsigned r500_fix_swiz(unsigned s)
{
/* For historical reasons, the swizzle values x, y, z, w, and 0 are
@@ -194,11 +191,17 @@ static INLINE uint32_t r300_alpha_op(unsigned op)
static INLINE uint32_t r500_rgba_op(unsigned op)
{
switch (op) {
+ case TGSI_OPCODE_COS:
case TGSI_OPCODE_EX2:
case TGSI_OPCODE_LG2:
case TGSI_OPCODE_RCP:
case TGSI_OPCODE_RSQ:
+ case TGSI_OPCODE_SIN:
return R500_ALU_RGBA_OP_SOP;
+ case TGSI_OPCODE_DDX:
+ return R500_ALU_RGBA_OP_MDH;
+ case TGSI_OPCODE_DDY:
+ return R500_ALU_RGBA_OP_MDV;
case TGSI_OPCODE_FRC:
return R500_ALU_RGBA_OP_FRC;
case TGSI_OPCODE_DP3:
@@ -224,6 +227,8 @@ static INLINE uint32_t r500_rgba_op(unsigned op)
static INLINE uint32_t r500_alpha_op(unsigned op)
{
switch (op) {
+ case TGSI_OPCODE_COS:
+ return R500_ALPHA_OP_COS;
case TGSI_OPCODE_EX2:
return R500_ALPHA_OP_EX2;
case TGSI_OPCODE_LG2:
@@ -234,6 +239,12 @@ static INLINE uint32_t r500_alpha_op(unsigned op)
return R500_ALPHA_OP_RSQ;
case TGSI_OPCODE_FRC:
return R500_ALPHA_OP_FRC;
+ case TGSI_OPCODE_SIN:
+ return R500_ALPHA_OP_SIN;
+ case TGSI_OPCODE_DDX:
+ return R500_ALPHA_OP_MDH;
+ case TGSI_OPCODE_DDY:
+ return R500_ALPHA_OP_MDV;
case TGSI_OPCODE_DP3:
case TGSI_OPCODE_DP4:
case TGSI_OPCODE_DPH:
@@ -295,38 +306,34 @@ static INLINE void r300_emit_maths(struct r300_fragment_shader* fs,
}
/* Setup an ALU operation. */
-static INLINE void r500_emit_alu(struct r500_fragment_shader* fs,
- struct r300_fs_asm* assembler,
- struct tgsi_full_dst_register* dst)
+static INLINE void r500_emit_maths(struct r500_fragment_shader* fs,
+ struct r300_fs_asm* assembler,
+ struct tgsi_full_src_register* src,
+ struct tgsi_full_dst_register* dst,
+ unsigned op,
+ unsigned count)
{
int i = fs->instruction_count;
if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
- fs->instructions[i].inst0 = R500_INST_TYPE_OUT |
- R500_ALU_OMASK(dst->DstRegister.WriteMask);
+ fs->instructions[i].inst0 = R500_INST_TYPE_OUT;
+ if (r300_fs_is_depr(assembler, dst)) {
+ fs->instructions[i].inst4 = R500_W_OMASK;
+ } else {
+ fs->instructions[i].inst0 |=
+ R500_ALU_OMASK(dst->DstRegister.WriteMask);
+ }
} else {
fs->instructions[i].inst0 = R500_INST_TYPE_ALU |
- R500_ALU_WMASK(dst->DstRegister.WriteMask);
+ R500_ALU_WMASK(dst->DstRegister.WriteMask);
}
fs->instructions[i].inst0 |= R500_INST_TEX_SEM_WAIT;
- fs->instructions[i].inst4 =
+ fs->instructions[i].inst4 |=
R500_ALPHA_ADDRD(r300_fs_dst(assembler, &dst->DstRegister));
fs->instructions[i].inst5 =
R500_ALU_RGBA_ADDRD(r300_fs_dst(assembler, &dst->DstRegister));
-}
-
-static INLINE void r500_emit_maths(struct r500_fragment_shader* fs,
- struct r300_fs_asm* assembler,
- struct tgsi_full_src_register* src,
- struct tgsi_full_dst_register* dst,
- unsigned op,
- unsigned count)
-{
- int i = fs->instruction_count;
-
- r500_emit_alu(fs, assembler, dst);
switch (count) {
case 3:
@@ -348,8 +355,8 @@ static INLINE void r500_emit_maths(struct r500_fragment_shader* fs,
R500_ALU_RGB_SEL_B_SRC1 |
R500_SWIZ_RGB_B(r500_rgb_swiz(&src[1]));
fs->instructions[i].inst4 |=
- R500_SWIZ_ALPHA_B(r500_alpha_swiz(&src[1])) |
- R500_ALPHA_SEL_B_SRC1;
+ R500_ALPHA_SEL_B_SRC1 |
+ R500_SWIZ_ALPHA_B(r500_alpha_swiz(&src[1]));
case 1:
case 0:
default:
@@ -361,8 +368,8 @@ static INLINE void r500_emit_maths(struct r500_fragment_shader* fs,
R500_ALU_RGB_SEL_A_SRC0 |
R500_SWIZ_RGB_A(r500_rgb_swiz(&src[0]));
fs->instructions[i].inst4 |=
- R500_SWIZ_ALPHA_A(r500_alpha_swiz(&src[0])) |
- R500_ALPHA_SEL_A_SRC0;
+ R500_ALPHA_SEL_A_SRC0 |
+ R500_SWIZ_ALPHA_A(r500_alpha_swiz(&src[0]));
break;
}
@@ -441,6 +448,9 @@ static void r500_fs_instruction(struct r500_fragment_shader* fs,
* AMD/ATI names for opcodes, please, as it facilitates using the
* documentation. */
switch (inst->Instruction.Opcode) {
+ /* XXX trig needs extra prep */
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_SIN:
/* The simple scalar ops. */
case TGSI_OPCODE_EX2:
case TGSI_OPCODE_LG2:
@@ -452,6 +462,8 @@ static void r500_fs_instruction(struct r500_fragment_shader* fs,
inst->FullSrcRegisters[0].SrcRegister.SwizzleW =
inst->FullSrcRegisters[0].SrcRegister.SwizzleX;
/* Fall through */
+ case TGSI_OPCODE_DDX:
+ case TGSI_OPCODE_DDY:
case TGSI_OPCODE_FRC:
r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
&inst->FullDstRegisters[0], inst->Instruction.Opcode, 1);
@@ -527,6 +539,60 @@ static void r500_fs_instruction(struct r500_fragment_shader* fs,
&inst->FullDstRegisters[0], inst->Instruction.Opcode, 3);
break;
+ /* The compound and hybrid insts. */
+ case TGSI_OPCODE_LRP:
+ /* LRP DST A, B, C -> MAD TMP -A, C, C; MAD DST A, B, TMP */
+ inst->FullSrcRegisters[3] = inst->FullSrcRegisters[1];
+ inst->FullSrcRegisters[1] = inst->FullSrcRegisters[2];
+ inst->FullSrcRegisters[0].SrcRegister.Negate =
+ !(inst->FullSrcRegisters[0].SrcRegister.Negate);
+ inst->FullDstRegisters[1] = inst->FullDstRegisters[0];
+ inst->FullDstRegisters[0].DstRegister.Index =
+ assembler->temp_count;
+ inst->FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+ r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
+ &inst->FullDstRegisters[0], TGSI_OPCODE_MAD, 3);
+ inst->FullSrcRegisters[2].SrcRegister.Index =
+ assembler->temp_count;
+ inst->FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ inst->FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+ inst->FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
+ inst->FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z;
+ inst->FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
+ inst->FullSrcRegisters[1] = inst->FullSrcRegisters[3];
+ inst->FullSrcRegisters[0].SrcRegister.Negate =
+ !(inst->FullSrcRegisters[0].SrcRegister.Negate);
+ inst->FullDstRegisters[0] = inst->FullDstRegisters[1];
+ r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
+ &inst->FullDstRegisters[0], TGSI_OPCODE_MAD, 3);
+ break;
+ case TGSI_OPCODE_POW:
+ /* POW DST A, B -> LG2 TMP A; MUL TMP TMP, B; EX2 DST TMP */
+ inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleW =
+ inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleX;
+ inst->FullSrcRegisters[0].SrcRegister.SwizzleW =
+ inst->FullSrcRegisters[0].SrcRegister.SwizzleX;
+ inst->FullDstRegisters[1] = inst->FullDstRegisters[0];
+ inst->FullDstRegisters[0].DstRegister.Index =
+ assembler->temp_count;
+ inst->FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+ r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
+ &inst->FullDstRegisters[0], TGSI_OPCODE_LG2, 1);
+ inst->FullSrcRegisters[0].SrcRegister.Index =
+ assembler->temp_count;
+ inst->FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+ inst->FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+ inst->FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
+ inst->FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z;
+ inst->FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
+ inst->FullSrcRegisters[2] = r500_constant_zero;
+ r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
+ &inst->FullDstRegisters[0], TGSI_OPCODE_MUL, 3);
+ inst->FullDstRegisters[0] = inst->FullDstRegisters[1];
+ r500_emit_maths(fs, assembler, inst->FullSrcRegisters,
+ &inst->FullDstRegisters[0], TGSI_OPCODE_EX2, 1);
+ break;
+
/* The texture instruction set. */
case TGSI_OPCODE_KIL:
case TGSI_OPCODE_TEX:
@@ -555,7 +621,7 @@ static void r500_fs_instruction(struct r500_fragment_shader* fs,
static void r300_fs_finalize(struct r3xx_fragment_shader* fs,
struct r300_fs_asm* assembler)
{
- fs->stack_size = assembler->temp_count + assembler->temp_offset;
+ fs->stack_size = assembler->temp_count + assembler->temp_offset + 1;
}
static void r500_fs_finalize(struct r500_fragment_shader* fs,
@@ -581,6 +647,8 @@ void r300_translate_fragment_shader(struct r300_context* r300,
}
/* Setup starting offset for immediates. */
assembler->imm_offset = consts->user_count;
+ /* Enable depth writes, if needed. */
+ assembler->writes_depth = fs->info.writes_z;
/* Make sure we start at the beginning of the shader. */
if (is_r500) {
@@ -630,6 +698,7 @@ void r300_translate_fragment_shader(struct r300_context* r300,
assembler->tex_count + assembler->color_count);
consts->count = consts->user_count + assembler->imm_count;
+ fs->uses_imms = assembler->imm_count;
debug_printf("r300: fs: %d total constants, "
"%d from user and %d from immediates\n", consts->count,
consts->user_count, assembler->imm_count);
diff --git a/src/gallium/drivers/r300/r300_state_shader.h b/src/gallium/drivers/r300/r300_state_shader.h
index 185fdd90f0..b6087404ce 100644
--- a/src/gallium/drivers/r300/r300_state_shader.h
+++ b/src/gallium/drivers/r300/r300_state_shader.h
@@ -57,6 +57,7 @@
#define R500_TEX_WMASK(x) ((x) << 11)
#define R500_ALU_WMASK(x) ((x) << 11)
#define R500_ALU_OMASK(x) ((x) << 15)
+#define R500_W_OMASK (1 << 31)
/* TGSI constants. TGSI is like XML: If it can't solve your problems, you're
* not using enough of it. */
@@ -99,20 +100,17 @@ struct r300_fs_asm {
unsigned imm_offset;
/* Number of immediate constants. */
unsigned imm_count;
+ /* Are depth writes enabled? */
+ boolean writes_depth;
+ /* Depth write offset. This is the TGSI output that corresponds to
+ * depth writes. */
+ unsigned depth_output;
};
void r300_translate_fragment_shader(struct r300_context* r300,
struct r3xx_fragment_shader* fs);
static struct r300_fragment_shader r300_passthrough_fragment_shader = {
- /* XXX This is the emission code. TODO: decode
- OUT_CS_REG(R300_US_CONFIG, 0);
- OUT_CS_REG(R300_US_CODE_OFFSET, 0x0);
- OUT_CS_REG(R300_US_CODE_ADDR_0, 0x0);
- OUT_CS_REG(R300_US_CODE_ADDR_1, 0x0);
- OUT_CS_REG(R300_US_CODE_ADDR_2, 0x0);
- OUT_CS_REG(R300_US_CODE_ADDR_3, 0x400000);
-*/
.alu_instruction_count = 1,
.tex_instruction_count = 0,
.indirections = 0,
@@ -159,14 +157,6 @@ static struct r500_fragment_shader r500_passthrough_fragment_shader = {
};
static struct r300_fragment_shader r300_texture_fragment_shader = {
- /* XXX This is the emission code. TODO: decode
- OUT_CS_REG(R300_US_CONFIG, 0);
- OUT_CS_REG(R300_US_CODE_OFFSET, 0x0);
- OUT_CS_REG(R300_US_CODE_ADDR_0, 0x0);
- OUT_CS_REG(R300_US_CODE_ADDR_1, 0x0);
- OUT_CS_REG(R300_US_CODE_ADDR_2, 0x0);
- OUT_CS_REG(R300_US_CODE_ADDR_3, 0x400000);
-*/
.alu_instruction_count = 1,
.tex_instruction_count = 0,
.indirections = 0,
@@ -191,7 +181,7 @@ static struct r500_fragment_shader r500_texture_fragment_shader = {
.instruction_count = 2,
.instructions[0].inst0 = R500_INST_TYPE_TEX |
R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK |
+ R500_INST_RGB_WMASK_RGB | R500_INST_ALPHA_WMASK |
R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP,
.instructions[0].inst1 = R500_TEX_ID(0) | R500_TEX_INST_LD |
R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED,
diff --git a/src/gallium/drivers/r300/r300_state_tcl.c b/src/gallium/drivers/r300/r300_state_tcl.c
index 47d6c6dfcd..8cf8250425 100644
--- a/src/gallium/drivers/r300/r300_state_tcl.c
+++ b/src/gallium/drivers/r300/r300_state_tcl.c
@@ -34,11 +34,20 @@ static void r300_vs_declare(struct r300_vs_asm* assembler,
assembler->tab[decl->DeclarationRange.First] = 0;
break;
case TGSI_SEMANTIC_COLOR:
- assembler->tab[decl->DeclarationRange.First] = 2;
+ assembler->tab[decl->DeclarationRange.First] =
+ (assembler->point_size ? 1 : 0) +
+ assembler->out_colors;
break;
+ case TGSI_SEMANTIC_FOG:
case TGSI_SEMANTIC_GENERIC:
/* XXX multiple? */
- assembler->tab[decl->DeclarationRange.First] = 6;
+ assembler->tab[decl->DeclarationRange.First] =
+ (assembler->point_size ? 1 : 0) +
+ assembler->out_colors +
+ assembler->out_texcoords;
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ assembler->tab[decl->DeclarationRange.First] = 1;
break;
default:
debug_printf("r300: vs: Bad semantic declaration %d\n",
@@ -62,16 +71,13 @@ static INLINE unsigned r300_vs_src_type(struct r300_vs_asm* assembler,
{
switch (src->File) {
case TGSI_FILE_NULL:
- /* Probably a zero or one swizzle */
- return R300_PVS_SRC_REG_INPUT;
- break;
case TGSI_FILE_INPUT:
+ /* Probably a zero or one swizzle */
return R300_PVS_SRC_REG_INPUT;
- break;
case TGSI_FILE_TEMPORARY:
return R300_PVS_SRC_REG_TEMPORARY;
- break;
case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_IMMEDIATE:
return R300_PVS_SRC_REG_CONSTANT;
default:
debug_printf("r300: vs: Unimplemented src type %d\n", src->File);
@@ -80,16 +86,32 @@ static INLINE unsigned r300_vs_src_type(struct r300_vs_asm* assembler,
return 0;
}
+static INLINE unsigned r300_vs_src(struct r300_vs_asm* assembler,
+ struct tgsi_src_register* src)
+{
+ switch (src->File) {
+ case TGSI_FILE_NULL:
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_CONSTANT:
+ return src->Index;
+ case TGSI_FILE_IMMEDIATE:
+ return src->Index + assembler->imm_offset;
+ default:
+ debug_printf("r300: vs: Unimplemented src type %d\n", src->File);
+ break;
+ }
+ return 0;
+}
+
static INLINE unsigned r300_vs_dst_type(struct r300_vs_asm* assembler,
struct tgsi_dst_register* dst)
{
switch (dst->File) {
case TGSI_FILE_TEMPORARY:
return R300_PVS_DST_REG_TEMPORARY;
- break;
case TGSI_FILE_OUTPUT:
return R300_PVS_DST_REG_OUT;
- break;
default:
debug_printf("r300: vs: Unimplemented dst type %d\n", dst->File);
break;
@@ -103,10 +125,8 @@ static INLINE unsigned r300_vs_dst(struct r300_vs_asm* assembler,
switch (dst->File) {
case TGSI_FILE_TEMPORARY:
return dst->Index;
- break;
case TGSI_FILE_OUTPUT:
return assembler->tab[dst->Index];
- break;
default:
debug_printf("r300: vs: Unimplemented dst %d\n", dst->File);
break;
@@ -117,12 +137,22 @@ static INLINE unsigned r300_vs_dst(struct r300_vs_asm* assembler,
static uint32_t r300_vs_op(unsigned op)
{
switch (op) {
+ case TGSI_OPCODE_DP3:
+ case TGSI_OPCODE_DP4:
+ return R300_VE_DOT_PRODUCT;
case TGSI_OPCODE_MUL:
return R300_VE_MULTIPLY;
case TGSI_OPCODE_ADD:
case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_SUB:
case TGSI_OPCODE_SWZ:
return R300_VE_ADD;
+ case TGSI_OPCODE_MAX:
+ return R300_VE_MAXIMUM;
+ case TGSI_OPCODE_SLT:
+ return R300_VE_SET_LESS_THAN;
+ case TGSI_OPCODE_RSQ:
+ return R300_PVS_DST_MATH_INST | R300_ME_RECIP_DX;
case TGSI_OPCODE_MAD:
return R300_PVS_DST_MACRO_INST | R300_PVS_MACRO_OP_2CLK_MADD;
default:
@@ -134,51 +164,78 @@ static uint32_t r300_vs_op(unsigned op)
static uint32_t r300_vs_swiz(struct tgsi_full_src_register* reg)
{
if (reg->SrcRegister.Extended) {
- return reg->SrcRegisterExtSwz.ExtSwizzleX |
+ return (reg->SrcRegister.Negate ? (0xf << 12) : 0) |
+ reg->SrcRegisterExtSwz.ExtSwizzleX |
(reg->SrcRegisterExtSwz.ExtSwizzleY << 3) |
(reg->SrcRegisterExtSwz.ExtSwizzleZ << 6) |
(reg->SrcRegisterExtSwz.ExtSwizzleW << 9);
} else {
- return reg->SrcRegister.SwizzleX |
+ return (reg->SrcRegister.Negate ? (0xf << 12) : 0) |
+ reg->SrcRegister.SwizzleX |
(reg->SrcRegister.SwizzleY << 3) |
(reg->SrcRegister.SwizzleZ << 6) |
(reg->SrcRegister.SwizzleW << 9);
}
}
+/* XXX icky icky icky icky */
+static uint32_t r300_vs_scalar_swiz(struct tgsi_full_src_register* reg)
+{
+ if (reg->SrcRegister.Extended) {
+ return (reg->SrcRegister.Negate ? (0xf << 12) : 0) |
+ reg->SrcRegisterExtSwz.ExtSwizzleX |
+ (reg->SrcRegisterExtSwz.ExtSwizzleX << 3) |
+ (reg->SrcRegisterExtSwz.ExtSwizzleX << 6) |
+ (reg->SrcRegisterExtSwz.ExtSwizzleX << 9);
+ } else {
+ return (reg->SrcRegister.Negate ? (0xf << 12) : 0) |
+ reg->SrcRegister.SwizzleX |
+ (reg->SrcRegister.SwizzleX << 3) |
+ (reg->SrcRegister.SwizzleX << 6) |
+ (reg->SrcRegister.SwizzleX << 9);
+ }
+}
+
+/* XXX scalar stupidity */
static void r300_vs_emit_inst(struct r300_vertex_shader* vs,
struct r300_vs_asm* assembler,
struct tgsi_full_src_register* src,
struct tgsi_full_dst_register* dst,
unsigned op,
- unsigned count)
+ unsigned count,
+ boolean is_scalar)
{
int i = vs->instruction_count;
vs->instructions[i].inst0 = R300_PVS_DST_OPCODE(r300_vs_op(op)) |
R300_PVS_DST_REG_TYPE(r300_vs_dst_type(assembler, &dst->DstRegister)) |
R300_PVS_DST_OFFSET(r300_vs_dst(assembler, &dst->DstRegister)) |
- R300_PVS_DST_WE_XYZW;
+ R300_PVS_DST_WE(dst->DstRegister.WriteMask);
switch (count) {
case 3:
vs->instructions[i].inst3 =
R300_PVS_SRC_REG_TYPE(r300_vs_src_type(assembler,
&src[2].SrcRegister)) |
- R300_PVS_SRC_OFFSET(src[2].SrcRegister.Index) |
+ R300_PVS_SRC_OFFSET(r300_vs_src(assembler,
+ &src[2].SrcRegister)) |
R300_PVS_SRC_SWIZZLE(r300_vs_swiz(&src[2]));
/* Fall through */
case 2:
vs->instructions[i].inst2 =
R300_PVS_SRC_REG_TYPE(r300_vs_src_type(assembler,
&src[1].SrcRegister)) |
- R300_PVS_SRC_OFFSET(src[1].SrcRegister.Index) |
+ R300_PVS_SRC_OFFSET(r300_vs_src(assembler,
+ &src[1].SrcRegister)) |
R300_PVS_SRC_SWIZZLE(r300_vs_swiz(&src[1]));
/* Fall through */
case 1:
vs->instructions[i].inst1 =
R300_PVS_SRC_REG_TYPE(r300_vs_src_type(assembler,
&src[0].SrcRegister)) |
- R300_PVS_SRC_OFFSET(src[0].SrcRegister.Index) |
- R300_PVS_SRC_SWIZZLE(r300_vs_swiz(&src[0]));
+ R300_PVS_SRC_OFFSET(r300_vs_src(assembler,
+ &src[0].SrcRegister)) |
+ /* XXX the icky, it burns */
+ R300_PVS_SRC_SWIZZLE(is_scalar ? r300_vs_scalar_swiz(&src[0])
+ : r300_vs_swiz(&src[0]));
break;
}
vs->instruction_count++;
@@ -189,23 +246,64 @@ static void r300_vs_instruction(struct r300_vertex_shader* vs,
struct tgsi_full_instruction* inst)
{
switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_RSQ:
+ r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters,
+ &inst->FullDstRegisters[0], inst->Instruction.Opcode,
+ 1, TRUE);
+ break;
+ case TGSI_OPCODE_SUB:
+ inst->FullSrcRegisters[1].SrcRegister.Negate =
+ !inst->FullSrcRegisters[1].SrcRegister.Negate;
+ /* Fall through */
case TGSI_OPCODE_ADD:
case TGSI_OPCODE_MUL:
+ case TGSI_OPCODE_MAX:
+ case TGSI_OPCODE_SLT:
+ r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters,
+ &inst->FullDstRegisters[0], inst->Instruction.Opcode,
+ 2, FALSE);
+ break;
+ case TGSI_OPCODE_DP3:
+ /* Set alpha swizzle to zero for src0 and src1 */
+ if (!inst->FullSrcRegisters[0].SrcRegister.Extended) {
+ inst->FullSrcRegisters[0].SrcRegister.Extended = TRUE;
+ inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleX =
+ inst->FullSrcRegisters[0].SrcRegister.SwizzleX;
+ inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleY =
+ inst->FullSrcRegisters[0].SrcRegister.SwizzleY;
+ inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleZ =
+ inst->FullSrcRegisters[0].SrcRegister.SwizzleZ;
+ }
+ inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleW =
+ TGSI_EXTSWIZZLE_ZERO;
+ if (!inst->FullSrcRegisters[1].SrcRegister.Extended) {
+ inst->FullSrcRegisters[1].SrcRegister.Extended = TRUE;
+ inst->FullSrcRegisters[1].SrcRegisterExtSwz.ExtSwizzleX =
+ inst->FullSrcRegisters[1].SrcRegister.SwizzleX;
+ inst->FullSrcRegisters[1].SrcRegisterExtSwz.ExtSwizzleY =
+ inst->FullSrcRegisters[1].SrcRegister.SwizzleY;
+ inst->FullSrcRegisters[1].SrcRegisterExtSwz.ExtSwizzleZ =
+ inst->FullSrcRegisters[1].SrcRegister.SwizzleZ;
+ }
+ inst->FullSrcRegisters[1].SrcRegisterExtSwz.ExtSwizzleW =
+ TGSI_EXTSWIZZLE_ZERO;
+ /* Fall through */
+ case TGSI_OPCODE_DP4:
r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters,
&inst->FullDstRegisters[0], inst->Instruction.Opcode,
- 2);
+ 2, FALSE);
break;
case TGSI_OPCODE_MOV:
case TGSI_OPCODE_SWZ:
inst->FullSrcRegisters[1] = r300_constant_zero;
r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters,
&inst->FullDstRegisters[0], inst->Instruction.Opcode,
- 2);
+ 2, FALSE);
break;
case TGSI_OPCODE_MAD:
r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters,
&inst->FullDstRegisters[0], inst->Instruction.Opcode,
- 3);
+ 3, FALSE);
break;
case TGSI_OPCODE_END:
break;
@@ -216,6 +314,30 @@ static void r300_vs_instruction(struct r300_vertex_shader* vs,
}
}
+static void r300_vs_init(struct r300_vertex_shader* vs,
+ struct r300_vs_asm* assembler)
+{
+ struct tgsi_shader_info* info = &vs->info;
+ int i;
+
+ for (i = 0; i < info->num_outputs; i++) {
+ switch (info->output_semantic_name[i]) {
+ case TGSI_SEMANTIC_PSIZE:
+ assembler->point_size = TRUE;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ assembler->out_colors++;
+ break;
+ case TGSI_SEMANTIC_FOG:
+ case TGSI_SEMANTIC_GENERIC:
+ assembler->out_texcoords++;
+ break;
+ }
+ }
+
+ vs->instruction_count = 0;
+}
+
void r300_translate_vertex_shader(struct r300_context* r300,
struct r300_vertex_shader* vs)
{
@@ -228,6 +350,10 @@ void r300_translate_vertex_shader(struct r300_context* r300,
if (assembler == NULL) {
return;
}
+
+ /* Init assembler. */
+ r300_vs_init(vs, assembler);
+
/* Setup starting offset for immediates. */
assembler->imm_offset = consts->user_count;
@@ -269,6 +395,7 @@ void r300_translate_vertex_shader(struct r300_context* r300,
assembler->tex_count + assembler->color_count);
consts->count = consts->user_count + assembler->imm_count;
+ vs->uses_imms = assembler->imm_count;
debug_printf("r300: vs: %d total constants, "
"%d from user and %d from immediates\n", consts->count,
consts->user_count, assembler->imm_count);
diff --git a/src/gallium/drivers/r300/r300_state_tcl.h b/src/gallium/drivers/r300/r300_state_tcl.h
index 3d10e248e1..2c8b586c2f 100644
--- a/src/gallium/drivers/r300/r300_state_tcl.h
+++ b/src/gallium/drivers/r300/r300_state_tcl.h
@@ -32,8 +32,13 @@
/* XXX get these to r300_reg */
#define R300_PVS_DST_OPCODE(x) ((x) << 0)
+# define R300_VE_DOT_PRODUCT 1
# define R300_VE_MULTIPLY 2
# define R300_VE_ADD 3
+# define R300_VE_MAXIMUM 7
+# define R300_VE_SET_LESS_THAN 10
+#define R300_PVS_DST_MATH_INST (1 << 6)
+# define R300_ME_RECIP_DX 6
#define R300_PVS_DST_MACRO_INST (1 << 7)
# define R300_PVS_MACRO_OP_2CLK_MADD 0
#define R300_PVS_DST_REG_TYPE(x) ((x) << 8)
@@ -71,6 +76,13 @@
((R300_PVS_SRC_SELECT_FORCE_1 | (R300_PVS_SRC_SELECT_FORCE_1 << 3) | \
(R300_PVS_SRC_SELECT_FORCE_1 << 6) | \
(R300_PVS_SRC_SELECT_FORCE_1 << 9)) << 13)
+#define R300_PVS_MODIFIER_X (1 << 25)
+#define R300_PVS_MODIFIER_Y (1 << 26)
+#define R300_PVS_MODIFIER_Z (1 << 27)
+#define R300_PVS_MODIFIER_W (1 << 28)
+#define R300_PVS_NEGATE_XYZW \
+ (R300_PVS_MODIFIER_X | R300_PVS_MODIFIER_Y | \
+ R300_PVS_MODIFIER_Z | R300_PVS_MODIFIER_W)
static const struct tgsi_full_src_register r300_constant_zero = {
.SrcRegister.Extended = TRUE,
@@ -98,7 +110,13 @@ struct r300_vs_asm {
unsigned imm_offset;
/* Number of immediate constants. */
unsigned imm_count;
- /* Offsets into vertex output memory. */
+ /* Number of colors to write. */
+ unsigned out_colors;
+ /* Number of texcoords to write. */
+ unsigned out_texcoords;
+ /* Whether to emit point size. */
+ boolean point_size;
+ /* Tab of declared outputs to OVM outputs. */
unsigned tab[16];
};
@@ -114,7 +132,7 @@ static struct r300_vertex_shader r300_passthrough_vertex_shader = {
.instructions[0].inst3 = 0x0,
.instructions[1].inst0 = R300_PVS_DST_OPCODE(R300_VE_ADD) |
R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
- R300_PVS_DST_OFFSET(2) | R300_PVS_DST_WE_XYZW,
+ R300_PVS_DST_OFFSET(1) | R300_PVS_DST_WE_XYZW,
.instructions[1].inst1 = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
R300_PVS_SRC_OFFSET(1) | R300_PVS_SRC_SWIZZLE_XYZW,
.instructions[1].inst2 = R300_PVS_SRC_SWIZZLE_ZERO,
@@ -133,7 +151,7 @@ static struct r300_vertex_shader r300_texture_vertex_shader = {
.instructions[0].inst3 = 0x0,
.instructions[1].inst0 = R300_PVS_DST_OPCODE(R300_VE_ADD) |
R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
- R300_PVS_DST_OFFSET(6) | R300_PVS_DST_WE_XYZW,
+ R300_PVS_DST_OFFSET(1) | R300_PVS_DST_WE_XYZW,
.instructions[1].inst1 = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
R300_PVS_SRC_OFFSET(1) | R300_PVS_SRC_SWIZZLE_XYZW,
.instructions[1].inst2 = R300_PVS_SRC_SWIZZLE_ZERO,
diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c
index 79bed03253..c9e2dff14e 100644
--- a/src/gallium/drivers/r300/r300_surface.c
+++ b/src/gallium/drivers/r300/r300_surface.c
@@ -23,15 +23,13 @@
#include "r300_surface.h"
-static void r300_surface_setup(struct pipe_context* pipe,
- struct pipe_surface* dest,
+static void r300_surface_setup(struct r300_context* r300,
+ struct r300_texture* dest,
unsigned x, unsigned y,
unsigned w, unsigned h)
{
- struct r300_context* r300 = r300_context(pipe);
- struct r300_capabilities* caps = r300_screen(pipe->screen)->caps;
- struct r300_texture* tex = (struct r300_texture*)dest->texture;
- unsigned pixpitch = tex->stride / tex->tex.block.size;
+ struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps;
+ unsigned pixpitch = dest->stride / dest->tex.block.size;
CS_LOCALS(r300);
r300_emit_blend_state(r300, &blend_clear_state);
@@ -39,7 +37,22 @@ static void r300_surface_setup(struct pipe_context* pipe,
r300_emit_dsa_state(r300, &dsa_clear_state);
r300_emit_rs_state(r300, &rs_clear_state);
- BEGIN_CS(15);
+ BEGIN_CS(24);
+
+ /* Viewport setup */
+ OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6);
+ OUT_CS_32F((float)w);
+ OUT_CS_32F((float)x);
+ OUT_CS_32F((float)h);
+ OUT_CS_32F((float)y);
+ OUT_CS_32F(1.0);
+ OUT_CS_32F(0.0);
+
+ OUT_CS_REG(R300_VAP_VTE_CNTL, R300_VPORT_X_SCALE_ENA |
+ R300_VPORT_X_OFFSET_ENA |
+ R300_VPORT_Y_SCALE_ENA |
+ R300_VPORT_Y_OFFSET_ENA |
+ R300_VTX_XY_FMT | R300_VTX_Z_FMT);
/* Pixel scissors. */
OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
@@ -64,9 +77,9 @@ static void r300_surface_setup(struct pipe_context* pipe,
/* Setup colorbuffer. */
OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1);
- OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ OUT_CS_RELOC(dest->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
OUT_CS_REG(R300_RB3D_COLORPITCH0, pixpitch |
- r300_translate_colorformat(tex->tex.format));
+ r300_translate_colorformat(dest->tex.format));
OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0xf);
END_CS;
@@ -86,6 +99,7 @@ static void r300_surface_fill(struct pipe_context* pipe,
struct r300_capabilities* caps = r300_screen(pipe->screen)->caps;
struct r300_texture* tex = (struct r300_texture*)dest->texture;
unsigned pixpitch = tex->stride / tex->tex.block.size;
+ boolean invalid = FALSE;
CS_LOCALS(r300);
a = (float)((color >> 24) & 0xff) / 255.0f;
@@ -98,12 +112,29 @@ static void r300_surface_fill(struct pipe_context* pipe,
/* Fallback? */
if (FALSE) {
+fallback:
debug_printf("r300: Falling back on surface clear...");
util_surface_fill(pipe, dest, x, y, w, h, color);
return;
}
- r300_surface_setup(r300, dest, x, y, w, h);
+ /* Make sure our target BO is okay. */
+validate:
+ if (!r300->winsys->add_buffer(r300->winsys, tex->buffer,
+ 0, RADEON_GEM_DOMAIN_VRAM)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ goto validate;
+ }
+ if (r300->winsys->validate(r300->winsys)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ if (invalid) {
+ goto fallback;
+ }
+ invalid = TRUE;
+ goto validate;
+ }
+
+ r300_surface_setup(r300, tex, x, y, w, h);
/* Vertex shader setup */
if (caps->has_tcl) {
@@ -127,7 +158,7 @@ static void r300_surface_fill(struct pipe_context* pipe,
r300_emit_rs_block_state(r300, &r300_rs_block_clear_state);
}
- BEGIN_CS(31);
+ BEGIN_CS(26);
/* VAP stream control, mapping from input memory to PVS/RS memory */
if (caps->has_tcl) {
@@ -154,27 +185,21 @@ static void r300_surface_fill(struct pipe_context* pipe,
/* Disable textures */
OUT_CS_REG(R300_TX_ENABLE, 0x0);
- /* Viewport setup */
- OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6);
- OUT_CS_32F(1.0);
- OUT_CS_32F((float)x);
- OUT_CS_32F(1.0);
- OUT_CS_32F((float)y);
- OUT_CS_32F(1.0);
- OUT_CS_32F(0.0);
-
/* The size of the point we're about to draw, in sixths of pixels */
OUT_CS_REG(R300_GA_POINT_SIZE,
- ((h * 6) & R300_POINTSIZE_Y_MASK) |
+ ((h * 6) & R300_POINTSIZE_Y_MASK) |
((w * 6) << R300_POINTSIZE_X_SHIFT));
+ /* Vertex size. */
+ OUT_CS_REG(R300_VAP_VTX_SIZE, 0x8);
+
/* Packet3 with our point vertex */
OUT_CS_PKT3(R200_3D_DRAW_IMMD_2, 8);
OUT_CS(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
(1 << R300_PRIM_NUM_VERTICES_SHIFT));
/* Position */
- OUT_CS_32F(w / 2.0);
- OUT_CS_32F(h / 2.0);
+ OUT_CS_32F(0.5);
+ OUT_CS_32F(0.5);
OUT_CS_32F(1.0);
OUT_CS_32F(1.0);
/* Color */
@@ -183,11 +208,7 @@ static void r300_surface_fill(struct pipe_context* pipe,
OUT_CS_32F(b);
OUT_CS_32F(a);
- /* XXX figure out why this is 0xA and not 0x2 */
OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA);
- /* XXX OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT,
- R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
- R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); */
END_CS;
@@ -206,6 +227,7 @@ static void r300_surface_copy(struct pipe_context* pipe,
struct r300_texture* srctex = (struct r300_texture*)src->texture;
struct r300_texture* desttex = (struct r300_texture*)dest->texture;
unsigned pixpitch = srctex->stride / srctex->tex.block.size;
+ boolean invalid = FALSE;
CS_LOCALS(r300);
debug_printf("r300: Copying surface %p at (%d,%d) to %p at (%d, %d),"
@@ -215,18 +237,48 @@ static void r300_surface_copy(struct pipe_context* pipe,
if ((srctex == desttex) &&
((destx < srcx + w) || (srcx < destx + w)) &&
((desty < srcy + h) || (srcy < desty + h))) {
+fallback:
debug_printf("r300: Falling back on surface_copy\n");
util_surface_copy(pipe, FALSE, dest, destx, desty, src,
srcx, srcy, w, h);
}
+ /* Add our target BOs to the list. */
+validate:
+ if (!r300->winsys->add_buffer(r300->winsys, srctex->buffer,
+ RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ goto validate;
+ }
+ if (!r300->winsys->add_buffer(r300->winsys, desttex->buffer,
+ 0, RADEON_GEM_DOMAIN_VRAM)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ goto validate;
+ }
+ if (r300->winsys->validate(r300->winsys)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ if (invalid) {
+ goto fallback;
+ }
+ invalid = TRUE;
+ goto validate;
+ }
+
+ r300_surface_setup(r300, desttex, destx, desty, w, h);
+
+ /* Setup the texture. */
r300_emit_sampler(r300, &r300_sampler_copy_state, 0);
r300_emit_texture(r300, srctex, 0);
- r300_flush_textures(r300);
+
+ /* Flush and enable. */
+ BEGIN_CS(4);
+ OUT_CS_REG(R300_TX_INVALTAGS, 0);
+ OUT_CS_REG(R300_TX_ENABLE, 0x1);
+ END_CS;
/* Vertex shader setup */
if (caps->has_tcl) {
- r300_emit_vertex_shader(r300, &r300_texture_vertex_shader);
+ r300_emit_vertex_shader(r300, &r300_passthrough_vertex_shader);
} else {
BEGIN_CS(4);
OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VAP_TCL_BYPASS);
@@ -246,6 +298,7 @@ static void r300_surface_copy(struct pipe_context* pipe,
r300_emit_rs_block_state(r300, &r300_rs_block_copy_state);
}
+ BEGIN_CS(30);
/* VAP stream control, mapping from input memory to PVS/RS memory */
if (caps->has_tcl) {
OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0,
@@ -268,33 +321,38 @@ static void r300_surface_copy(struct pipe_context* pipe,
/* Two components of texture 0 */
OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_1, 0x2);
+ /* Vertex size. */
+ OUT_CS_REG(R300_VAP_VTX_SIZE, 0x4);
+
/* Packet3 with our texcoords */
- OUT_CS_PKT3(R200_3D_DRAW_IMMD_2, 8);
+ OUT_CS_PKT3(R200_3D_DRAW_IMMD_2, 16);
OUT_CS(R300_PRIM_TYPE_QUADS | R300_PRIM_WALK_RING |
(4 << R300_PRIM_NUM_VERTICES_SHIFT));
/* (x , y ) */
- OUT_CS_32F((float)destx);
- OUT_CS_32F((float)desty);
- OUT_CS_32F((float)srcx);
- OUT_CS_32F((float)srcy);
+ OUT_CS_32F((float)(destx / dest->width));
+ OUT_CS_32F((float)(desty / dest->height));
+ OUT_CS_32F((float)(srcx / dest->width));
+ OUT_CS_32F((float)(srcy / dest->height));
/* (x , y + h) */
- OUT_CS_32F((float)destx);
- OUT_CS_32F((float)(desty + h));
- OUT_CS_32F((float)srcx);
- OUT_CS_32F((float)(srcy + h));
+ OUT_CS_32F((float)(destx / dest->width));
+ OUT_CS_32F((float)((desty + h) / dest->height));
+ OUT_CS_32F((float)(srcx / dest->width));
+ OUT_CS_32F((float)((srcy + h) / dest->height));
/* (x + w, y + h) */
- OUT_CS_32F((float)(destx + w));
- OUT_CS_32F((float)(desty + h));
- OUT_CS_32F((float)(srcx + w));
- OUT_CS_32F((float)(srcy + h));
+ OUT_CS_32F((float)((destx + w) / dest->width));
+ OUT_CS_32F((float)((desty + h) / dest->height));
+ OUT_CS_32F((float)((srcx + w) / dest->width));
+ OUT_CS_32F((float)((srcy + h) / dest->height));
/* (x + w, y ) */
- OUT_CS_32F((float)(destx + w));
- OUT_CS_32F((float)desty);
- OUT_CS_32F((float)(srcx + w));
- OUT_CS_32F((float)srcy);
+ OUT_CS_32F((float)((destx + w) / dest->width));
+ OUT_CS_32F((float)(desty / dest->height));
+ OUT_CS_32F((float)((srcx + w) / dest->width));
+ OUT_CS_32F((float)(srcy / dest->height));
OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA);
+ END_CS;
+
r300->dirty_hw++;
}
diff --git a/src/gallium/drivers/r300/r300_surface.h b/src/gallium/drivers/r300/r300_surface.h
index 894def07aa..9a4c39f58b 100644
--- a/src/gallium/drivers/r300/r300_surface.h
+++ b/src/gallium/drivers/r300/r300_surface.h
@@ -101,7 +101,7 @@ static struct r300_rs_block r300_rs_block_copy_state = {
R500_RS_SEL_Q(R300_RS_SEL_K1),
.inst[0] = R300_RS_INST_COL_CN_WRITE,
.count = R300_IT_COUNT(2) | R300_IC_COUNT(0) | R300_HIRES_EN,
- .inst_count = R300_RS_TX_OFFSET(6),
+ .inst_count = R300_RS_TX_OFFSET(0),
};
static struct r300_rs_block r500_rs_block_copy_state = {
@@ -111,7 +111,7 @@ static struct r300_rs_block r500_rs_block_copy_state = {
R500_RS_SEL_Q(R500_RS_IP_PTR_K1),
.inst[0] = R500_RS_INST_TEX_CN_WRITE,
.count = R300_IT_COUNT(2) | R300_IC_COUNT(0) | R300_HIRES_EN,
- .inst_count = R300_RS_TX_OFFSET(6),
+ .inst_count = R300_RS_TX_OFFSET(0),
};
static struct r300_sampler_state r300_sampler_copy_state = {
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index fe91f4e184..5ea9f56247 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -86,8 +86,6 @@ static struct pipe_texture*
r300_texture_create(struct pipe_screen* screen,
const struct pipe_texture* template)
{
- /* XXX struct r300_screen* r300screen = r300_screen(screen); */
-
struct r300_texture* tex = CALLOC_STRUCT(r300_texture);
if (!tex) {
diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h
index baa95282c3..d2893c3b9d 100644
--- a/src/gallium/drivers/r300/r300_winsys.h
+++ b/src/gallium/drivers/r300/r300_winsys.h
@@ -35,8 +35,6 @@ extern "C" {
#include "pipe/p_state.h"
#include "pipe/internal/p_winsys_screen.h"
-struct radeon_cs;
-
struct r300_winsys {
/* Parent class */
struct pipe_winsys base;
@@ -47,42 +45,50 @@ struct r300_winsys {
/* PCI ID */
uint32_t pci_id;
- /* GB pipe count */
- uint32_t gb_pipes;
+ /* GART size. */
+ uint32_t gart_size;
+
+ /* VRAM size. */
+ uint32_t vram_size;
+
+ /* Add a pipe_buffer to the list of buffer objects to validate. */
+ boolean (*add_buffer)(struct r300_winsys* winsys,
+ struct pipe_buffer* pbuffer,
+ uint32_t rd,
+ uint32_t wd);
- /* CS object. This is very much like Intel's batchbuffer.
- * Fill it full of dwords and relocs and then submit.
- * Repeat as needed. */
- struct radeon_cs* cs;
+ /* Revalidate all currently setup pipe_buffers.
+ * Returns TRUE if a flush is required. */
+ boolean (*validate)(struct r300_winsys* winsys);
/* Check to see if there's room for commands. */
- boolean (*check_cs)(struct radeon_cs* cs, int size);
+ boolean (*check_cs)(struct r300_winsys* winsys, int size);
/* Start a command emit. */
- void (*begin_cs)(struct radeon_cs* cs,
- int size,
- const char* file,
- const char* function,
- int line);
+ void (*begin_cs)(struct r300_winsys* winsys,
+ int size,
+ const char* file,
+ const char* function,
+ int line);
/* Write a dword to the command buffer. */
- void (*write_cs_dword)(struct radeon_cs* cs, uint32_t dword);
+ void (*write_cs_dword)(struct r300_winsys* winsys, uint32_t dword);
/* Write a relocated dword to the command buffer. */
- void (*write_cs_reloc)(struct radeon_cs* cs,
- struct pipe_buffer* bo,
- uint32_t rd,
- uint32_t wd,
- uint32_t flags);
+ void (*write_cs_reloc)(struct r300_winsys* winsys,
+ struct pipe_buffer* bo,
+ uint32_t rd,
+ uint32_t wd,
+ uint32_t flags);
/* Finish a command emit. */
- void (*end_cs)(struct radeon_cs* cs,
- const char* file,
- const char* function,
- int line);
+ void (*end_cs)(struct r300_winsys* winsys,
+ const char* file,
+ const char* function,
+ int line);
/* Flush the CS. */
- void (*flush_cs)(struct radeon_cs* cs);
+ void (*flush_cs)(struct r300_winsys* winsys);
};
struct pipe_context* r300_create_context(struct pipe_screen* screen,