diff options
Diffstat (limited to 'src/gallium/drivers/r300')
37 files changed, 1781 insertions, 1026 deletions
diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index e44f9b9dfc..faceec9842 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -4,20 +4,22 @@ include $(TOP)/configs/current LIBNAME = r300 C_SOURCES = \ + r3xx_fs.c \ + r5xx_fs.c \ r300_chipset.c \ r300_clear.c \ r300_context.c \ r300_debug.c \ r300_emit.c \ r300_flush.c \ + r300_fs.c \ r300_query.c \ r300_render.c \ r300_screen.c \ r300_state.c \ r300_state_derived.c \ r300_state_invariant.c \ - r300_state_shader.c \ - r300_state_tcl.c \ + r300_vs.c \ r300_surface.c \ r300_texture.c diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index 182ed2d459..493d7b28bc 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -5,20 +5,22 @@ env = env.Clone() r300 = env.ConvenienceLibrary( target = 'r300', source = [ + 'r3xx_fs.c', + 'r5xx_fs.c', 'r300_chipset.c', 'r300_clear.c', 'r300_context.c', 'r300_debug.c', 'r300_emit.c', 'r300_flush.c', + 'r300_fs.c', 'r300_query.c', 'r300_render.c', 'r300_screen.c', 'r300_state.c', 'r300_state_derived.c', 'r300_state_invariant.c', - 'r300_state_shader.c', - 'r300_state_tcl.c', + 'r300_vs.c', 'r300_surface.c', 'r300_texture.c', ]) diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 9d95ad918c..00fae8d26f 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -34,7 +34,6 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->is_r500 = FALSE; caps->num_vert_fpus = 4; - /* Note: These are not ordered by PCI ID. I leave that task to GCC, * which will perform the ordering while collating jump tables. Instead, * I've tried to group them according to capabilities and age. */ @@ -150,6 +149,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->num_vert_fpus = 6; break; + case 0x4B48: case 0x4B49: case 0x4B4A: case 0x4B4B: @@ -349,7 +349,4 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->pci_id); break; } - - /* XXX SW TCL is broken so no forcing it off right now - caps->has_tcl = FALSE; */ } diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 21eebeae60..5b2e1f0568 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -34,8 +34,6 @@ struct r300_capabilities { int family; /* The number of vertex floating-point units */ int num_vert_fpus; - /* The number of fragment pipes */ - int num_frag_pipes; /* Whether or not TCL is physically present */ boolean has_tcl; /* Whether or not this is an RV515 or newer; R500s have many differences diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 6bdf544a05..233a32b53c 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -34,10 +34,6 @@ static boolean r300_draw_range_elements(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); int i; - if (r300->dirty_state) { - r300_emit_dirty_state(r300); - } - for (i = 0; i < r300->vertex_buffer_count; i++) { void* buf = pipe_buffer_map(pipe->screen, r300->vertex_buffers[i].buffer, @@ -133,7 +129,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, if (!r300) return NULL; - /* XXX this could be refactored now? */ r300->winsys = r300_winsys; r300->context.winsys = (struct pipe_winsys*)r300_winsys; @@ -150,8 +145,15 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->context.is_texture_referenced = r300_is_texture_referenced; r300->context.is_buffer_referenced = r300_is_buffer_referenced; + /* Create a Draw. This is used for vert collation and SW TCL. */ r300->draw = draw_create(); + /* Enable our renderer. */ draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300)); + /* Disable Draw's clipping if TCL is present. */ + draw_set_driver_clipping(r300->draw, r300_screen(screen)->caps->has_tcl); + /* Force Draw to never do viewport transform, since (again) we can do + * transform in hardware, always. */ + draw_set_viewport_state(r300->draw, &r300_viewport_identity); r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state); r300->rs_block = CALLOC_STRUCT(r300_rs_block); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 6f62998b35..d891fd6265 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -63,6 +63,11 @@ struct r300_rs_state { /* Draw-specific rasterizer state */ struct pipe_rasterizer_state rs; + /* Whether or not to enable the VTE. This is referenced at the very + * last moment during emission of VTE state, to decide whether or not + * the VTE should be used for transformation. */ + boolean enable_vte; + uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */ @@ -112,23 +117,24 @@ struct r300_viewport_state { uint32_t vte_control; /* R300_VAP_VTE_CNTL: 0x20b0 */ }; -#define R300_NEW_BLEND 0x0000001 -#define R300_NEW_BLEND_COLOR 0x0000002 -#define R300_NEW_CONSTANTS 0x0000004 -#define R300_NEW_DSA 0x0000008 -#define R300_NEW_FRAMEBUFFERS 0x0000010 -#define R300_NEW_FRAGMENT_SHADER 0x0000020 -#define R300_NEW_RASTERIZER 0x0000040 -#define R300_NEW_RS_BLOCK 0x0000080 -#define R300_NEW_SAMPLER 0x0000100 -#define R300_ANY_NEW_SAMPLERS 0x000ff00 -#define R300_NEW_SCISSOR 0x0010000 -#define R300_NEW_TEXTURE 0x0020000 -#define R300_ANY_NEW_TEXTURES 0x1fe0000 -#define R300_NEW_VERTEX_FORMAT 0x2000000 -#define R300_NEW_VERTEX_SHADER 0x4000000 -#define R300_NEW_VIEWPORT 0x8000000 -#define R300_NEW_KITCHEN_SINK 0xfffffff +#define R300_NEW_BLEND 0x00000001 +#define R300_NEW_BLEND_COLOR 0x00000002 +#define R300_NEW_CLIP 0x00000004 +#define R300_NEW_CONSTANTS 0x00000008 +#define R300_NEW_DSA 0x00000010 +#define R300_NEW_FRAMEBUFFERS 0x00000020 +#define R300_NEW_FRAGMENT_SHADER 0x00000040 +#define R300_NEW_RASTERIZER 0x00000080 +#define R300_NEW_RS_BLOCK 0x00000100 +#define R300_NEW_SAMPLER 0x00000200 +#define R300_ANY_NEW_SAMPLERS 0x0001fe00 +#define R300_NEW_SCISSOR 0x00020000 +#define R300_NEW_TEXTURE 0x00040000 +#define R300_ANY_NEW_TEXTURES 0x03fc0000 +#define R300_NEW_VERTEX_FORMAT 0x04000000 +#define R300_NEW_VERTEX_SHADER 0x08000000 +#define R300_NEW_VIEWPORT 0x10000000 +#define R300_NEW_KITCHEN_SINK 0x1fffffff /* The next several objects are not pure Radeon state; they inherit from * various Gallium classes. */ @@ -136,14 +142,14 @@ struct r300_viewport_state { struct r300_constant_buffer { /* Buffer of constants */ /* XXX first number should be raised */ - float constants[8][4]; + float constants[32][4]; /* Number of user-defined constants */ - int user_count; + unsigned user_count; /* Total number of constants */ - int count; + unsigned count; }; -struct r3xx_fragment_shader { +struct r300_fragment_shader { /* Parent class */ struct pipe_shader_state state; struct tgsi_shader_info info; @@ -153,11 +159,15 @@ struct r3xx_fragment_shader { /* Pixel stack size */ int stack_size; + + /* Are there immediates in this shader? + * If not, we can heavily optimize recompilation. */ + boolean uses_imms; }; -struct r300_fragment_shader { +struct r3xx_fragment_shader { /* Parent class */ - struct r3xx_fragment_shader shader; + struct r300_fragment_shader shader; /* Number of ALU instructions */ int alu_instruction_count; @@ -180,9 +190,9 @@ struct r300_fragment_shader { } instructions[64]; /* XXX magic num */ }; -struct r500_fragment_shader { +struct r5xx_fragment_shader { /* Parent class */ - struct r3xx_fragment_shader shader; + struct r300_fragment_shader shader; /* Number of used instructions */ int instruction_count; @@ -207,7 +217,7 @@ struct r300_texture { /* Stride (pitch?) of this texture in bytes */ unsigned stride; - + /* Total size of this texture, in bytes. */ unsigned size; @@ -243,6 +253,10 @@ struct r300_vertex_shader { /* Has this shader been translated yet? */ boolean translated; + /* Are there immediates in this shader? + * If not, we can heavily optimize recompilation. */ + boolean uses_imms; + /* Number of used instructions */ int instruction_count; @@ -255,6 +269,11 @@ struct r300_vertex_shader { } instructions[128]; /*< XXX magic number */ }; +static struct pipe_viewport_state r300_viewport_identity = { + .scale = {1.0, 1.0, 1.0, 1.0}, + .translate = {0.0, 0.0, 0.0, 0.0}, +}; + struct r300_context { /* Parent class */ struct pipe_context context; @@ -264,17 +283,24 @@ struct r300_context { /* Draw module. Used mostly for SW TCL. */ struct draw_context* draw; + /* Vertex buffer for rendering. */ + struct pipe_buffer* vbo; + /* Offset into the VBO. */ + size_t vbo_offset; + /* Various CSO state objects. */ /* Blend state. */ struct r300_blend_state* blend_state; /* Blend color state. */ struct r300_blend_color_state* blend_color_state; + /* User clip planes. */ + struct pipe_clip_state clip_state; /* Shader constants. */ struct r300_constant_buffer shader_constants[PIPE_SHADER_TYPES]; /* Depth, stencil, and alpha state. */ struct r300_dsa_state* dsa_state; /* Fragment shader. */ - struct r3xx_fragment_shader* fs; + struct r300_fragment_shader* fs; /* Framebuffer state. We currently don't need our own version of this. */ struct pipe_framebuffer_state framebuffer_state; /* Rasterizer state. */ @@ -289,7 +315,7 @@ struct r300_context { /* Texture states. */ struct r300_texture* textures[8]; int texture_count; - /* Vertex buffers. */ + /* Vertex buffers for Gallium. */ struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; int vertex_buffer_count; /* Vertex information. */ @@ -305,7 +331,8 @@ struct r300_context { }; /* Convenience cast wrapper. */ -static struct r300_context* r300_context(struct pipe_context* context) { +static INLINE struct r300_context* r300_context(struct pipe_context* context) +{ return (struct r300_context*)context; } diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 82a3942248..71b142c0db 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -34,6 +34,7 @@ #define MAX_CS_SIZE 64 * 1024 / 4 +#define VERY_VERBOSE_CS 0 #define VERY_VERBOSE_REGISTERS 0 /* XXX stolen from radeon_drm.h */ @@ -56,8 +57,10 @@ #define BEGIN_CS(size) do { \ CHECK_CS(size); \ - debug_printf("r300: BEGIN_CS, count %d, in %s (%s:%d)\n", \ - size, __FUNCTION__, __FILE__, __LINE__); \ + if (VERY_VERBOSE_CS) { \ + debug_printf("r300: BEGIN_CS, count %d, in %s (%s:%d)\n", \ + size, __FUNCTION__, __FILE__, __LINE__); \ + } \ cs_winsys->begin_cs(cs_winsys, (size), \ __FILE__, __FUNCTION__, __LINE__); \ cs_count = size; \ @@ -93,8 +96,9 @@ } while (0) #define OUT_CS_RELOC(bo, offset, rd, wd, flags) do { \ - debug_printf("r300: writing relocation for buffer %p, offset %d\n", \ - bo, offset); \ + debug_printf("r300: writing relocation for buffer %p, offset %d, " \ + "domains (%d, %d, %d)\n", \ + bo, offset, rd, wd, flags); \ assert(bo); \ OUT_CS(offset); \ cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \ @@ -102,16 +106,20 @@ } while (0) #define END_CS do { \ - debug_printf("r300: END_CS in %s (%s:%d)\n", __FUNCTION__, __FILE__, \ - __LINE__); \ + if (VERY_VERBOSE_CS) { \ + debug_printf("r300: END_CS in %s (%s:%d)\n", __FUNCTION__, \ + __FILE__, __LINE__); \ + } \ if (cs_count != 0) \ debug_printf("r300: Warning: cs_count off by %d\n", cs_count); \ cs_winsys->end_cs(cs_winsys, __FILE__, __FUNCTION__, __LINE__); \ } while (0) #define FLUSH_CS do { \ - debug_printf("r300: FLUSH_CS in %s (%s:%d)\n\n", __FUNCTION__, __FILE__, \ - __LINE__); \ + if (VERY_VERBOSE_CS) { \ + debug_printf("r300: FLUSH_CS in %s (%s:%d)\n\n", __FUNCTION__, \ + __FILE__, __LINE__); \ + } \ cs_winsys->flush_cs(cs_winsys); \ } while (0) diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index dd63136c9d..c83e8526cf 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -22,7 +22,7 @@ #include "r300_debug.h" -static void r300_dump_fs(struct r300_fragment_shader* fs) +void r3xx_dump_fs(struct r3xx_fragment_shader* fs) { int i; @@ -30,82 +30,7 @@ static void r300_dump_fs(struct r300_fragment_shader* fs) } } -static char* r500_fs_swiz[] = { - " R", - " G", - " B", - " A", - " 0", - ".5", - " 1", - " U", -}; - -static char* r500_fs_op_rgb[] = { - "MAD", - "DP3", - "DP4", - "D2A", - "MIN", - "MAX", - "---", - "CND", - "CMP", - "FRC", - "SOP", - "MDH", - "MDV", -}; - -static char* r500_fs_op_alpha[] = { - "MAD", - " DP", - "MIN", - "MAX", - "---", - "CND", - "CMP", - "FRC", - "EX2", - "LN2", - "RCP", - "RSQ", - "SIN", - "COS", - "MDH", - "MDV", -}; - -static char* r500_fs_mask[] = { - "NONE", - "R ", - " G ", - "RG ", - " B ", - "R B ", - " GB ", - "RGB ", - " A", - "R A", - " G A", - "RG A", - " BA", - "R BA", - " GBA", - "RGBA", -}; - -static char* r500_fs_tex[] = { - " NOP", - " LD", - "TEXKILL", - " PROJ", - "LODBIAS", - " LOD", - " DXDY", -}; - -void r500_fs_dump(struct r500_fragment_shader* fs) +void r5xx_fs_dump(struct r5xx_fragment_shader* fs) { int i; uint32_t inst; @@ -133,8 +58,8 @@ void r500_fs_dump(struct r500_fragment_shader* fs) inst & R500_INST_NOP ? "NOP" : "", inst & R500_INST_ALU_WAIT ? "ALU_WAIT" : ""); debug_printf("wmask: %s omask: %s\n", - r500_fs_mask[(inst >> 11) & 0xf], - r500_fs_mask[(inst >> 15) & 0xf]); + r5xx_fs_mask[(inst >> 11) & 0xf], + r5xx_fs_mask[(inst >> 15) & 0xf]); switch (inst & 0x3) { case R500_INST_TYPE_ALU: case R500_INST_TYPE_OUT: @@ -160,36 +85,36 @@ void r500_fs_dump(struct r500_fragment_shader* fs) debug_printf(" 3: RGB_INST 0x%08x:", inst); debug_printf("rgb_A_src:%d %s/%s/%s %d " "rgb_B_src:%d %s/%s/%s %d\n", - inst & 0x3, r500_fs_swiz[(inst >> 2) & 0x7], - r500_fs_swiz[(inst >> 5) & 0x7], - r500_fs_swiz[(inst >> 8) & 0x7], + inst & 0x3, r5xx_fs_swiz[(inst >> 2) & 0x7], + r5xx_fs_swiz[(inst >> 5) & 0x7], + r5xx_fs_swiz[(inst >> 8) & 0x7], (inst >> 11) & 0x3, (inst >> 13) & 0x3, - r500_fs_swiz[(inst >> 15) & 0x7], - r500_fs_swiz[(inst >> 18) & 0x7], - r500_fs_swiz[(inst >> 21) & 0x7], + r5xx_fs_swiz[(inst >> 15) & 0x7], + r5xx_fs_swiz[(inst >> 18) & 0x7], + r5xx_fs_swiz[(inst >> 21) & 0x7], (inst >> 24) & 0x3); inst = fs->instructions[i].inst4; debug_printf(" 4: ALPHA_INST 0x%08x:", inst); debug_printf("%s dest:%d%s alp_A_src:%d %s %d " "alp_B_src:%d %s %d w:%d\n", - r500_fs_op_alpha[inst & 0xf], (inst >> 4) & 0x7f, + r5xx_fs_op_alpha[inst & 0xf], (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", (inst >> 12) & 0x3, - r500_fs_swiz[(inst >> 14) & 0x7], (inst >> 17) & 0x3, - (inst >> 19) & 0x3, r500_fs_swiz[(inst >> 21) & 0x7], + r5xx_fs_swiz[(inst >> 14) & 0x7], (inst >> 17) & 0x3, + (inst >> 19) & 0x3, r5xx_fs_swiz[(inst >> 21) & 0x7], (inst >> 24) & 0x3, (inst >> 31) & 0x1); inst = fs->instructions[i].inst5; debug_printf(" 5: RGBA_INST 0x%08x:", inst); debug_printf("%s dest:%d%s rgb_C_src:%d %s/%s/%s %d " "alp_C_src:%d %s %d\n", - r500_fs_op_rgb[inst & 0xf], (inst >> 4) & 0x7f, + r5xx_fs_op_rgb[inst & 0xf], (inst >> 4) & 0x7f, inst & (1 << 11) ? "(rel)":"", (inst >> 12) & 0x3, - r500_fs_swiz[(inst >> 14) & 0x7], - r500_fs_swiz[(inst >> 17) & 0x7], - r500_fs_swiz[(inst >> 20) & 0x7], + r5xx_fs_swiz[(inst >> 14) & 0x7], + r5xx_fs_swiz[(inst >> 17) & 0x7], + r5xx_fs_swiz[(inst >> 20) & 0x7], (inst >> 23) & 0x3, (inst >> 25) & 0x3, - r500_fs_swiz[(inst >> 27) & 0x7], (inst >> 30) & 0x3); + r5xx_fs_swiz[(inst >> 27) & 0x7], (inst >> 30) & 0x3); break; case R500_INST_TYPE_FC: /* XXX don't even bother yet */ @@ -199,7 +124,7 @@ void r500_fs_dump(struct r500_fragment_shader* fs) debug_printf(" 1: TEX_INST 0x%08x: id: %d " "op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf, - r500_fs_tex[(inst >> 22) & 0x7], + r5xx_fs_tex[(inst >> 22) & 0x7], (inst & (1 << 25)) ? "ACQ" : "", (inst & (1 << 26)) ? "IGNUNC" : "", (inst & (1 << 27)) ? "UNSCALED" : "SCALED"); @@ -208,15 +133,15 @@ void r500_fs_dump(struct r500_fragment_shader* fs) debug_printf(" 2: TEX_ADDR 0x%08x: " "src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst, inst & 0x7f, inst & (1 << 7) ? "(rel)" : "", - r500_fs_swiz[(inst >> 8) & 0x3], - r500_fs_swiz[(inst >> 10) & 0x3], - r500_fs_swiz[(inst >> 12) & 0x3], - r500_fs_swiz[(inst >> 14) & 0x3], + r5xx_fs_swiz[(inst >> 8) & 0x3], + r5xx_fs_swiz[(inst >> 10) & 0x3], + r5xx_fs_swiz[(inst >> 12) & 0x3], + r5xx_fs_swiz[(inst >> 14) & 0x3], (inst >> 16) & 0x7f, inst & (1 << 23) ? "(rel)" : "", - r500_fs_swiz[(inst >> 24) & 0x3], - r500_fs_swiz[(inst >> 26) & 0x3], - r500_fs_swiz[(inst >> 28) & 0x3], - r500_fs_swiz[(inst >> 30) & 0x3]); + r5xx_fs_swiz[(inst >> 24) & 0x3], + r5xx_fs_swiz[(inst >> 26) & 0x3], + r5xx_fs_swiz[(inst >> 28) & 0x3], + r5xx_fs_swiz[(inst >> 30) & 0x3]); inst = fs->instructions[i].inst3; debug_printf(" 3: TEX_DXDY 0x%08x\n", inst); @@ -225,14 +150,49 @@ void r500_fs_dump(struct r500_fragment_shader* fs) } } +static void r300_vs_op_dump(uint32_t op) +{ + debug_printf(" dst: %d%s op: ", + (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]); + if (op & 0x80) { + if (op & 0x1) { + debug_printf("PVS_MACRO_OP_2CLK_M2X_ADD\n"); + } else { + debug_printf(" PVS_MACRO_OP_2CLK_MADD\n"); + } + } else if (op & 0x40) { + debug_printf("%s\n", r300_vs_me_ops[op & 0x1f]); + } else { + debug_printf("%s\n", r300_vs_ve_ops[op & 0x1f]); + } +} + +void r300_vs_src_dump(uint32_t src) +{ + debug_printf(" reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n", + (src >> 5) & 0x7f, r300_vs_src_debug[src & 0x3], + src & (1 << 25) ? "-" : " ", + r300_vs_swiz_debug[(src >> 13) & 0x7], + src & (1 << 26) ? "-" : " ", + r300_vs_swiz_debug[(src >> 16) & 0x7], + src & (1 << 27) ? "-" : " ", + r300_vs_swiz_debug[(src >> 19) & 0x7], + src & (1 << 28) ? "-" : " ", + r300_vs_swiz_debug[(src >> 22) & 0x7]); +} + void r300_vs_dump(struct r300_vertex_shader* vs) { int i; for (i = 0; i < vs->instruction_count; i++) { - debug_printf("inst0: 0x%x\n", vs->instructions[i].inst0); - debug_printf("inst1: 0x%x\n", vs->instructions[i].inst1); - debug_printf("inst2: 0x%x\n", vs->instructions[i].inst2); - debug_printf("inst3: 0x%x\n", vs->instructions[i].inst3); + debug_printf("%d: op: 0x%08x", i, vs->instructions[i].inst0); + r300_vs_op_dump(vs->instructions[i].inst0); + debug_printf(" src0: 0x%08x", vs->instructions[i].inst1); + r300_vs_src_dump(vs->instructions[i].inst1); + debug_printf(" src1: 0x%08x", vs->instructions[i].inst2); + r300_vs_src_dump(vs->instructions[i].inst2); + debug_printf(" src2: 0x%08x", vs->instructions[i].inst3); + r300_vs_src_dump(vs->instructions[i].inst3); } } diff --git a/src/gallium/drivers/r300/r300_debug.h b/src/gallium/drivers/r300/r300_debug.h index a1f873656d..6b58c1e250 100644 --- a/src/gallium/drivers/r300/r300_debug.h +++ b/src/gallium/drivers/r300/r300_debug.h @@ -24,10 +24,187 @@ #define R300_DEBUG_H #include "r300_reg.h" -#include "r300_state_shader.h" -#include "r300_state_tcl.h" +#include "r300_fs.h" +#include "r300_vs.h" -void r500_fs_dump(struct r500_fragment_shader* fs); +static char* r5xx_fs_swiz[] = { + " R", + " G", + " B", + " A", + " 0", + ".5", + " 1", + " U", +}; + +static char* r5xx_fs_op_rgb[] = { + "MAD", + "DP3", + "DP4", + "D2A", + "MIN", + "MAX", + "---", + "CND", + "CMP", + "FRC", + "SOP", + "MDH", + "MDV", +}; + +static char* r5xx_fs_op_alpha[] = { + "MAD", + " DP", + "MIN", + "MAX", + "---", + "CND", + "CMP", + "FRC", + "EX2", + "LN2", + "RCP", + "RSQ", + "SIN", + "COS", + "MDH", + "MDV", +}; + +static char* r5xx_fs_mask[] = { + "NONE", + "R ", + " G ", + "RG ", + " B ", + "R B ", + " GB ", + "RGB ", + " A", + "R A", + " G A", + "RG A", + " BA", + "R BA", + " GBA", + "RGBA", +}; + +static char* r5xx_fs_tex[] = { + " NOP", + " LD", + "TEXKILL", + " PROJ", + "LODBIAS", + " LOD", + " DXDY", +}; + +static char* r300_vs_ve_ops[] = { + /* R300 vector ops */ + " VE_NO_OP", + " VE_DOT_PRODUCT", + " VE_MULTIPLY", + " VE_ADD", + " VE_MULTIPLY_ADD", + " VE_DISTANCE_FACTOR", + " VE_FRACTION", + " VE_MAXIMUM", + " VE_MINIMUM", + "VE_SET_GREATER_THAN_EQUAL", + " VE_SET_LESS_THAN", + " VE_MULTIPLYX2_ADD", + " VE_MULTIPLY_CLAMP", + " VE_FLT2FIX_DX", + " VE_FLT2FIX_DX_RND", + /* R500 vector ops */ + " VE_PRED_SET_EQ_PUSH", + " VE_PRED_SET_GT_PUSH", + " VE_PRED_SET_GTE_PUSH", + " VE_PRED_SET_NEQ_PUSH", + " VE_COND_WRITE_EQ", + " VE_COND_WRITE_GT", + " VE_COND_WRITE_GTE", + " VE_COND_WRITE_NEQ", + " VE_SET_GREATER_THAN", + " VE_SET_EQUAL", + " VE_SET_NOT_EQUAL", + " (reserved)", + " (reserved)", + " (reserved)", +}; + +static char* r300_vs_me_ops[] = { + /* R300 math ops */ + " ME_NO_OP", + " ME_EXP_BASE2_DX", + " ME_LOG_BASE2_DX", + " ME_EXP_BASEE_FF", + " ME_LIGHT_COEFF_DX", + " ME_POWER_FUNC_FF", + " ME_RECIP_DX", + " ME_RECIP_FF", + " ME_RECIP_SQRT_DX", + " ME_RECIP_SQRT_FF", + " ME_MULTIPLY", + " ME_EXP_BASE2_FULL_DX", + " ME_LOG_BASE2_FULL_DX", + " ME_POWER_FUNC_FF_CLAMP_B", + "ME_POWER_FUNC_FF_CLAMP_B1", + "ME_POWER_FUNC_FF_CLAMP_01", + " ME_SIN", + " ME_COS", + /* R500 math ops */ + " ME_LOG_BASE2_IEEE", + " ME_RECIP_IEEE", + " ME_RECIP_SQRT_IEEE", + " ME_PRED_SET_EQ", + " ME_PRED_SET_GT", + " ME_PRED_SET_GTE", + " ME_PRED_SET_NEQ", + " ME_PRED_SET_CLR", + " ME_PRED_SET_INV", + " ME_PRED_SET_POP", + " ME_PRED_SET_RESTORE", + " (reserved)", + " (reserved)", + " (reserved)", +}; + +/* XXX refactor to avoid clashing symbols */ +static char* r300_vs_src_debug[] = { + "t", + "i", + "c", + "a", +}; + +static char* r300_vs_dst_debug[] = { + "t", + "a0", + "o", + "ox", + "a", + "i", + "u", + "u", +}; + +static char* r300_vs_swiz_debug[] = { + "X", + "Y", + "Z", + "W", + "0", + "1", + "U", + "U", +}; + +void r5xx_fs_dump(struct r5xx_fragment_shader* fs); +void r3xx_dump_fs(struct r3xx_fragment_shader* fs); void r300_vs_dump(struct r300_vertex_shader* vs); diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 01bac5f759..ac510ffc2e 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -56,6 +56,36 @@ void r300_emit_blend_color_state(struct r300_context* r300, } } +void r300_emit_clip_state(struct r300_context* r300, + struct pipe_clip_state* clip) +{ + int i; + struct r300_screen* r300screen = r300_screen(r300->context.screen); + CS_LOCALS(r300); + + if (r300screen->caps->has_tcl) { + BEGIN_CS(5 + (6 * 4)); + OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, + (r300screen->caps->is_r500 ? + R500_PVS_UCP_START : R300_PVS_UCP_START)); + OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, 6 * 4); + for (i = 0; i < 6; i++) { + OUT_CS_32F(clip->ucp[i][0]); + OUT_CS_32F(clip->ucp[i][1]); + OUT_CS_32F(clip->ucp[i][2]); + OUT_CS_32F(clip->ucp[i][3]); + } + OUT_CS_REG(R300_VAP_CLIP_CNTL, ((1 << clip->nr) - 1) | + R300_PS_UCP_MODE_CLIP_AS_TRIFAN); + END_CS; + } else { + BEGIN_CS(2); + OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); + END_CS; + } + +} + void r300_emit_dsa_state(struct r300_context* r300, struct r300_dsa_state* dsa) { @@ -80,7 +110,7 @@ void r300_emit_dsa_state(struct r300_context* r300, } void r300_emit_fragment_shader(struct r300_context* r300, - struct r300_fragment_shader* fs) + struct r3xx_fragment_shader* fs) { int i; CS_LOCALS(r300); @@ -112,7 +142,7 @@ void r300_emit_fragment_shader(struct r300_context* r300, } void r500_emit_fragment_shader(struct r300_context* r300, - struct r500_fragment_shader* fs) + struct r5xx_fragment_shader* fs) { int i; struct r300_constant_buffer* constants = @@ -163,6 +193,7 @@ void r300_emit_fb_state(struct r300_context* r300, BEGIN_CS((8 * fb->nr_cbufs) + (fb->zsbuf ? 8 : 0) + 4); for (i = 0; i < fb->nr_cbufs; i++) { tex = (struct r300_texture*)fb->cbufs[i]->texture; + assert(tex && tex->buffer && "cbuf is marked, but NULL!"); pixpitch = tex->stride / tex->tex.block.size; OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); @@ -177,7 +208,8 @@ void r300_emit_fb_state(struct r300_context* r300, if (fb->zsbuf) { tex = (struct r300_texture*)fb->zsbuf->texture; - pixpitch = (tex->stride / tex->tex.block.size); + assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); + pixpitch = tex->stride / tex->tex.block.size; OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); @@ -234,7 +266,7 @@ void r300_emit_rs_block_state(struct r300_context* r300, } for (i = 0; i < 8; i++) { OUT_CS(rs->ip[i]); - debug_printf("ip %d: 0x%08x\n", i, rs->ip[i]); + /* debug_printf("ip %d: 0x%08x\n", i, rs->ip[i]); */ } OUT_CS_REG_SEQ(R300_RS_COUNT, 2); @@ -248,27 +280,15 @@ void r300_emit_rs_block_state(struct r300_context* r300, } for (i = 0; i < 8; i++) { OUT_CS(rs->inst[i]); - debug_printf("inst %d: 0x%08x\n", i, rs->inst[i]); + /* debug_printf("inst %d: 0x%08x\n", i, rs->inst[i]); */ } - debug_printf("count: 0x%08x inst_count: 0x%08x\n", rs->count, - rs->inst_count); + /* debug_printf("count: 0x%08x inst_count: 0x%08x\n", rs->count, + * rs->inst_count); */ END_CS; } -void r300_emit_sampler(struct r300_context* r300, - struct r300_sampler_state* sampler, unsigned offset) -{ - CS_LOCALS(r300); - - BEGIN_CS(6); - OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), sampler->filter0); - OUT_CS_REG(R300_TX_FILTER1_0 + (offset * 4), sampler->filter1); - OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (offset * 4), sampler->border_color); - END_CS; -} - void r300_emit_scissor_state(struct r300_context* r300, struct r300_scissor_state* scissor) { @@ -282,11 +302,17 @@ void r300_emit_scissor_state(struct r300_context* r300, } void r300_emit_texture(struct r300_context* r300, - struct r300_texture* tex, unsigned offset) + struct r300_sampler_state* sampler, + struct r300_texture* tex, + unsigned offset) { CS_LOCALS(r300); - BEGIN_CS(10); + BEGIN_CS(16); + OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), sampler->filter0); + OUT_CS_REG(R300_TX_FILTER1_0 + (offset * 4), sampler->filter1); + OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (offset * 4), sampler->border_color); + OUT_CS_REG(R300_TX_FORMAT0_0 + (offset * 4), tex->state.format0); OUT_CS_REG(R300_TX_FORMAT1_0 + (offset * 4), tex->state.format1); OUT_CS_REG(R300_TX_FORMAT2_0 + (offset * 4), tex->state.format2); @@ -296,6 +322,30 @@ void r300_emit_texture(struct r300_context* r300, END_CS; } +void r300_emit_vertex_buffer(struct r300_context* r300) +{ + CS_LOCALS(r300); + + debug_printf("r300: Preparing vertex buffer %p for render, " + "vertex size %d\n", r300->vbo, + r300->vertex_info.vinfo.size); + /* Set the pointer to our vertex buffer. The emitted values are this: + * PACKET3 [3D_LOAD_VBPNTR] + * COUNT [1] + * FORMAT [size | stride << 8] + * OFFSET [offset into BO] + * VBPNTR [relocated BO] + */ + BEGIN_CS(7); + OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3); + OUT_CS(1); + OUT_CS(r300->vertex_info.vinfo.size | + (r300->vertex_info.vinfo.size << 8)); + OUT_CS(r300->vbo_offset); + OUT_CS_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); + END_CS; +} + void r300_emit_vertex_format_state(struct r300_context* r300) { int i; @@ -310,22 +360,22 @@ void r300_emit_vertex_format_state(struct r300_context* r300) OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); OUT_CS(r300->vertex_info.vinfo.hwfmt[2]); OUT_CS(r300->vertex_info.vinfo.hwfmt[3]); - for (i = 0; i < 4; i++) { - debug_printf("hwfmt%d: 0x%08x\n", i, - r300->vertex_info.vinfo.hwfmt[i]); - } + /* for (i = 0; i < 4; i++) { + * debug_printf("hwfmt%d: 0x%08x\n", i, + * r300->vertex_info.vinfo.hwfmt[i]); + * } */ OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, 8); for (i = 0; i < 8; i++) { OUT_CS(r300->vertex_info.vap_prog_stream_cntl[i]); - debug_printf("prog_stream_cntl%d: 0x%08x\n", i, - r300->vertex_info.vap_prog_stream_cntl[i]); + /* debug_printf("prog_stream_cntl%d: 0x%08x\n", i, + * r300->vertex_info.vap_prog_stream_cntl[i]); */ } OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, 8); for (i = 0; i < 8; i++) { OUT_CS(r300->vertex_info.vap_prog_stream_cntl_ext[i]); - debug_printf("prog_stream_cntl_ext%d: 0x%08x\n", i, - r300->vertex_info.vap_prog_stream_cntl_ext[i]); + /* debug_printf("prog_stream_cntl_ext%d: 0x%08x\n", i, + * r300->vertex_info.vap_prog_stream_cntl_ext[i]); */ } END_CS; } @@ -346,17 +396,22 @@ void r300_emit_vertex_shader(struct r300_context* r300, } if (constants->count) { - BEGIN_CS(16 + (vs->instruction_count * 4) + (constants->count * 4)); + BEGIN_CS(14 + (vs->instruction_count * 4) + (constants->count * 4)); } else { - BEGIN_CS(13 + (vs->instruction_count * 4) + (constants->count * 4)); + BEGIN_CS(11 + (vs->instruction_count * 4)); } - OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_0, R300_PVS_FIRST_INST(0) | + /* R300_VAP_PVS_CODE_CNTL_0 + * R300_VAP_PVS_CONST_CNTL + * R300_VAP_PVS_CODE_CNTL_1 + * See the r5xx docs for instructions on how to use these. + * XXX these could be optimized to select better values... */ + OUT_CS_REG_SEQ(R300_VAP_PVS_CODE_CNTL_0, 3); + OUT_CS(R300_PVS_FIRST_INST(0) | + R300_PVS_XYZW_VALID_INST(vs->instruction_count - 1) | R300_PVS_LAST_INST(vs->instruction_count - 1)); - OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_1, vs->instruction_count - 1); - - /* XXX */ - OUT_CS_REG(R300_VAP_PVS_CONST_CNTL, 0x0); + OUT_CS(R300_PVS_MAX_CONST_ADDR(constants->count - 1)); + OUT_CS(vs->instruction_count - 1); OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0); OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, vs->instruction_count * 4); @@ -386,7 +441,6 @@ void r300_emit_vertex_shader(struct r300_context* r300, R300_PVS_VF_MAX_VTX_NUM(12)); OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); END_CS; - } void r300_emit_viewport_state(struct r300_context* r300, @@ -403,7 +457,11 @@ void r300_emit_viewport_state(struct r300_context* r300, OUT_CS_32F(viewport->zscale); OUT_CS_32F(viewport->zoffset); - OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); + if (r300->rs_state->enable_vte) { + OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); + } else { + OUT_CS_REG(R300_VAP_VTE_CNTL, 0); + } END_CS; } @@ -421,23 +479,67 @@ void r300_flush_textures(struct r300_context* r300) void r300_emit_dirty_state(struct r300_context* r300) { struct r300_screen* r300screen = r300_screen(r300->context.screen); - int i; - int dirty_tex = 0; + struct r300_texture* tex; + int i, dirty_tex = 0; + boolean invalid = FALSE; - if (!(r300->dirty_hw)) { + if (!(r300->dirty_state)) { return; } r300_update_derived_state(r300); /* XXX check size */ - struct r300_texture* fb_tex = - (struct r300_texture*)r300->framebuffer_state.cbufs[0]; - r300->winsys->add_buffer(r300->winsys, fb_tex->buffer, - 0, RADEON_GEM_DOMAIN_VRAM); - if (r300->winsys->validate(r300->winsys)) { - /* XXX */ +validate: + /* Color buffers... */ + for (i = 0; i < r300->framebuffer_state.nr_cbufs; i++) { + tex = (struct r300_texture*)r300->framebuffer_state.cbufs[i]->texture; + assert(tex && tex->buffer && "cbuf is marked, but NULL!"); + if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, + 0, RADEON_GEM_DOMAIN_VRAM)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } + } + /* ...depth buffer... */ + if (r300->framebuffer_state.zsbuf) { + tex = (struct r300_texture*)r300->framebuffer_state.zsbuf->texture; + assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); + if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, + 0, RADEON_GEM_DOMAIN_VRAM)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } + } + /* ...textures... */ + for (i = 0; i < r300->texture_count; i++) { + tex = r300->textures[i]; + assert(tex && tex->buffer && "texture is marked, but NULL!"); + if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } + } + /* ...and vertex buffer. */ + if (r300->vbo) { + if (!r300->winsys->add_buffer(r300->winsys, r300->vbo, + RADEON_GEM_DOMAIN_GTT, 0)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } + } else { + debug_printf("No VBO while emitting dirty state!\n"); + } + if (!r300->winsys->validate(r300->winsys)) { r300->context.flush(&r300->context, 0, NULL); + if (invalid) { + /* Well, hell. */ + debug_printf("r300: Stuck in validation loop, gonna quit now."); + exit(1); + } + invalid = TRUE; + goto validate; } if (r300->dirty_state & R300_NEW_BLEND) { @@ -450,6 +552,11 @@ void r300_emit_dirty_state(struct r300_context* r300) r300->dirty_state &= ~R300_NEW_BLEND_COLOR; } + if (r300->dirty_state & R300_NEW_CLIP) { + r300_emit_clip_state(r300, &r300->clip_state); + r300->dirty_state &= ~R300_NEW_CLIP; + } + if (r300->dirty_state & R300_NEW_DSA) { r300_emit_dsa_state(r300, r300->dsa_state); r300->dirty_state &= ~R300_NEW_DSA; @@ -458,10 +565,10 @@ void r300_emit_dirty_state(struct r300_context* r300) if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER) { if (r300screen->caps->is_r500) { r500_emit_fragment_shader(r300, - (struct r500_fragment_shader*)r300->fs); + (struct r5xx_fragment_shader*)r300->fs); } else { r300_emit_fragment_shader(r300, - (struct r300_fragment_shader*)r300->fs); + (struct r3xx_fragment_shader*)r300->fs); } r300->dirty_state &= ~R300_NEW_FRAGMENT_SHADER; } @@ -481,29 +588,27 @@ void r300_emit_dirty_state(struct r300_context* r300) r300->dirty_state &= ~R300_NEW_RS_BLOCK; } - if (r300->dirty_state & R300_ANY_NEW_SAMPLERS) { - for (i = 0; i < r300->sampler_count; i++) { - if (r300->dirty_state & (R300_NEW_SAMPLER << i)) { - r300_emit_sampler(r300, r300->sampler_states[i], i); - r300->dirty_state &= ~(R300_NEW_SAMPLER << i); - dirty_tex++; - } - } - } - if (r300->dirty_state & R300_NEW_SCISSOR) { r300_emit_scissor_state(r300, r300->scissor_state); r300->dirty_state &= ~R300_NEW_SCISSOR; } - if (r300->dirty_state & R300_ANY_NEW_TEXTURES) { - for (i = 0; i < r300->texture_count; i++) { - if (r300->dirty_state & (R300_NEW_TEXTURE << i)) { - r300_emit_texture(r300, r300->textures[i], i); - r300->dirty_state &= ~(R300_NEW_TEXTURE << i); + /* Samplers and textures are tracked separately but emitted together. */ + if (r300->dirty_state & + (R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES)) { + for (i = 0; i < MIN2(r300->sampler_count, r300->texture_count); i++) { + if (r300->dirty_state & + ((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i))) { + r300_emit_texture(r300, + r300->sampler_states[i], + r300->textures[i], + i); + r300->dirty_state &= + ~((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i)); dirty_tex++; } } + r300->dirty_state &= ~(R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES); } if (r300->dirty_state & R300_NEW_VIEWPORT) { @@ -519,4 +624,18 @@ void r300_emit_dirty_state(struct r300_context* r300) r300_emit_vertex_format_state(r300); r300->dirty_state &= ~R300_NEW_VERTEX_FORMAT; } + + if (r300->dirty_state & R300_NEW_VERTEX_SHADER) { + r300_emit_vertex_shader(r300, r300->vs); + r300->dirty_state &= ~R300_NEW_VERTEX_SHADER; + } + + /* XXX + assert(r300->dirty_state == 0); + */ + + /* Finally, emit the VBO. */ + r300_emit_vertex_buffer(r300); + + r300->dirty_hw++; } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 31dbc7ab85..fda26f3948 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -36,14 +36,17 @@ void r300_emit_blend_state(struct r300_context* r300, void r300_emit_blend_color_state(struct r300_context* r300, struct r300_blend_color_state* bc); +void r300_emit_clip_state(struct r300_context* r300, + struct pipe_clip_state* clip); + void r300_emit_dsa_state(struct r300_context* r300, struct r300_dsa_state* dsa); void r300_emit_fragment_shader(struct r300_context* r300, - struct r300_fragment_shader* fs); + struct r3xx_fragment_shader* fs); void r500_emit_fragment_shader(struct r300_context* r300, - struct r500_fragment_shader* fs); + struct r5xx_fragment_shader* fs); void r300_emit_fb_state(struct r300_context* r300, struct pipe_framebuffer_state* fb); @@ -53,14 +56,15 @@ void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs); void r300_emit_rs_block_state(struct r300_context* r300, struct r300_rs_block* rs); -void r300_emit_sampler(struct r300_context* r300, - struct r300_sampler_state* sampler, unsigned offset); - void r300_emit_scissor_state(struct r300_context* r300, struct r300_scissor_state* scissor); void r300_emit_texture(struct r300_context* r300, - struct r300_texture* tex, unsigned offset); + struct r300_sampler_state* sampler, + struct r300_texture* tex, + unsigned offset); + +void r300_emit_vertex_buffer(struct r300_context* r300); void r300_emit_vertex_format_state(struct r300_context* r300); diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 89a5f2b20c..0dff1c6f4f 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -29,7 +29,11 @@ static void r300_flush(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); CS_LOCALS(r300); - draw_flush(r300->draw); + /* We probably need to flush Draw, but we may have been called from + * within Draw. This feels kludgy, but it might be the best thing. */ + if (!r300->draw->flushing) { + draw_flush(r300->draw); + } if (r300->dirty_hw) { FLUSH_CS; diff --git a/src/gallium/drivers/r300/r300_flush.h b/src/gallium/drivers/r300/r300_flush.h index a1b224b39c..9a83d89daa 100644 --- a/src/gallium/drivers/r300/r300_flush.h +++ b/src/gallium/drivers/r300/r300_flush.h @@ -23,6 +23,8 @@ #ifndef R300_FLUSH_H #define R300_FLUSH_H +#include "draw/draw_private.h" + #include "pipe/p_context.h" #include "r300_context.h" diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c new file mode 100644 index 0000000000..ca8ef99902 --- /dev/null +++ b/src/gallium/drivers/r300/r300_fs.c @@ -0,0 +1,111 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Joakim Sindholt <opensource@zhasha.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_fs.h" + +void r300_translate_fragment_shader(struct r300_context* r300, + struct r300_fragment_shader* fs) +{ + struct tgsi_parse_context parser; + int i; + boolean is_r500 = r300_screen(r300->context.screen)->caps->is_r500; + struct r300_constant_buffer* consts = + &r300->shader_constants[PIPE_SHADER_FRAGMENT]; + + struct r300_fs_asm* assembler = CALLOC_STRUCT(r300_fs_asm); + if (assembler == NULL) { + return; + } + /* Setup starting offset for immediates. */ + assembler->imm_offset = consts->user_count; + /* Enable depth writes, if needed. */ + assembler->writes_depth = fs->info.writes_z; + + /* Make sure we start at the beginning of the shader. */ + if (is_r500) { + ((struct r5xx_fragment_shader*)fs)->instruction_count = 0; + } + + tgsi_parse_init(&parser, fs->state.tokens); + + while (!tgsi_parse_end_of_tokens(&parser)) { + tgsi_parse_token(&parser); + + /* This is seriously the lamest way to create fragment programs ever. + * I blame TGSI. */ + switch (parser.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + /* Allocated registers sitting at the beginning + * of the program. */ + r300_fs_declare(assembler, &parser.FullToken.FullDeclaration); + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: + debug_printf("r300: Emitting immediate to constant buffer, " + "position %d\n", + assembler->imm_offset + assembler->imm_count); + /* I am not amused by the length of these. */ + for (i = 0; i < 4; i++) { + consts->constants[assembler->imm_offset + + assembler->imm_count][i] = + parser.FullToken.FullImmediate.u[i].Float; + } + assembler->imm_count++; + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (is_r500) { + r5xx_fs_instruction((struct r5xx_fragment_shader*)fs, + assembler, &parser.FullToken.FullInstruction); + } else { + r3xx_fs_instruction((struct r3xx_fragment_shader*)fs, + assembler, &parser.FullToken.FullInstruction); + } + break; + } + } + + debug_printf("r300: fs: %d texs and %d colors, first free reg is %d\n", + assembler->tex_count, assembler->color_count, + assembler->tex_count + assembler->color_count); + + consts->count = consts->user_count + assembler->imm_count; + fs->uses_imms = assembler->imm_count; + debug_printf("r300: fs: %d total constants, " + "%d from user and %d from immediates\n", consts->count, + consts->user_count, assembler->imm_count); + r3xx_fs_finalize(fs, assembler); + if (is_r500) { + r5xx_fs_finalize((struct r5xx_fragment_shader*)fs, assembler); + } + + tgsi_dump(fs->state.tokens, 0); + /* XXX finish r300 dumper too */ + if (is_r500) { + r5xx_fs_dump((struct r5xx_fragment_shader*)fs); + } + + tgsi_parse_free(&parser); + FREE(assembler); + + /* And, finally... */ + fs->translated = TRUE; +} diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h new file mode 100644 index 0000000000..18deb7a05e --- /dev/null +++ b/src/gallium/drivers/r300/r300_fs.h @@ -0,0 +1,36 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Joakim Sindholt <opensource@zhasha.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_FS_H +#define R300_FS_H + +#include "tgsi/tgsi_dump.h" + +#include "r300_context.h" +#include "r3xx_fs.h" +#include "r5xx_fs.h" + +void r300_translate_fragment_shader(struct r300_context* r300, + struct r300_fragment_shader* fs); + + #endif /* R300_FS_H */ diff --git a/src/gallium/drivers/r300/r300_fs_inlines.h b/src/gallium/drivers/r300/r300_fs_inlines.h new file mode 100644 index 0000000000..be4be9465e --- /dev/null +++ b/src/gallium/drivers/r300/r300_fs_inlines.h @@ -0,0 +1,158 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Joakim Sindholt <opensource@zhasha.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_FS_INLINES_H +#define R300_FS_INLINES_H + +#include "tgsi/tgsi_parse.h" + +#include "r300_context.h" +#include "r300_debug.h" +#include "r300_reg.h" +#include "r300_screen.h" +#include "r300_shader_inlines.h" + +/* Temporary struct used to hold assembly state while putting together + * fragment programs. */ +struct r300_fs_asm { + /* Pipe context. */ + struct r300_context* r300; + /* Number of colors. */ + unsigned color_count; + /* Number of texcoords. */ + unsigned tex_count; + /* Offset for temporary registers. Inputs and temporaries have no + * distinguishing markings, so inputs start at 0 and the first usable + * temporary register is after all inputs. */ + unsigned temp_offset; + /* Number of requested temporary registers. */ + unsigned temp_count; + /* Offset for immediate constants. Neither R300 nor R500 can do four + * inline constants per source, so instead we copy immediates into the + * constant buffer. */ + unsigned imm_offset; + /* Number of immediate constants. */ + unsigned imm_count; + /* Are depth writes enabled? */ + boolean writes_depth; + /* Depth write offset. This is the TGSI output that corresponds to + * depth writes. */ + unsigned depth_output; +}; + +static INLINE void r300_fs_declare(struct r300_fs_asm* assembler, + struct tgsi_full_declaration* decl) +{ + switch (decl->Declaration.File) { + case TGSI_FILE_INPUT: + switch (decl->Semantic.SemanticName) { + case TGSI_SEMANTIC_COLOR: + assembler->color_count++; + break; + case TGSI_SEMANTIC_FOG: + case TGSI_SEMANTIC_GENERIC: + assembler->tex_count++; + break; + default: + debug_printf("r300: fs: Bad semantic declaration %d\n", + decl->Semantic.SemanticName); + break; + } + break; + case TGSI_FILE_OUTPUT: + /* Depth write. Mark the position of the output so we can + * identify it later. */ + if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) { + assembler->depth_output = decl->DeclarationRange.First; + } + break; + case TGSI_FILE_CONSTANT: + break; + case TGSI_FILE_TEMPORARY: + assembler->temp_count++; + break; + default: + debug_printf("r300: fs: Bad file %d\n", decl->Declaration.File); + break; + } + + assembler->temp_offset = assembler->color_count + assembler->tex_count; +} + +static INLINE unsigned r300_fs_src(struct r300_fs_asm* assembler, + struct tgsi_src_register* src) +{ + switch (src->File) { + case TGSI_FILE_NULL: + return 0; + case TGSI_FILE_INPUT: + /* XXX may be wrong */ + return src->Index; + break; + case TGSI_FILE_TEMPORARY: + return src->Index + assembler->temp_offset; + break; + case TGSI_FILE_IMMEDIATE: + return (src->Index + assembler->imm_offset) | (1 << 8); + break; + case TGSI_FILE_CONSTANT: + /* XXX magic */ + return src->Index | (1 << 8); + break; + default: + debug_printf("r300: fs: Unimplemented src %d\n", src->File); + break; + } + return 0; +} + +static INLINE unsigned r300_fs_dst(struct r300_fs_asm* assembler, + struct tgsi_dst_register* dst) +{ + switch (dst->File) { + case TGSI_FILE_NULL: + /* This happens during KIL instructions. */ + return 0; + break; + case TGSI_FILE_OUTPUT: + return 0; + break; + case TGSI_FILE_TEMPORARY: + return dst->Index + assembler->temp_offset; + break; + default: + debug_printf("r300: fs: Unimplemented dst %d\n", dst->File); + break; + } + return 0; +} + +static INLINE boolean r300_fs_is_depr(struct r300_fs_asm* assembler, + struct tgsi_dst_register* dst) +{ + return (assembler->writes_depth && + (dst->File == TGSI_FILE_OUTPUT) && + (dst->Index == assembler->depth_output)); +} + +#endif /* R300_FS_INLINES_H */ diff --git a/src/gallium/drivers/r300/r300_query.h b/src/gallium/drivers/r300/r300_query.h index 4f447ea45b..6a7646087a 100644 --- a/src/gallium/drivers/r300/r300_query.h +++ b/src/gallium/drivers/r300/r300_query.h @@ -27,6 +27,8 @@ #include "r300_cs.h" #include "r300_reg.h" +struct r300_context; + struct r300_query { /* The kind of query. Currently only OQ is supported. */ unsigned type; diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 660816e1da..6825d99870 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -511,11 +511,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_PVS_XYZW_VALID_INST_SHIFT 10 # define R300_PVS_LAST_INST_SHIFT 20 # define R300_PVS_FIRST_INST(x) ((x) << 0) +# define R300_PVS_XYZW_VALID_INST(x) ((x) << 10) # define R300_PVS_LAST_INST(x) ((x) << 20) /* Addresses are relative the the vertex program parameters area. */ #define R300_VAP_PVS_CONST_CNTL 0x22D4 # define R300_PVS_CONST_BASE_OFFSET_SHIFT 0 # define R300_PVS_MAX_CONST_ADDR_SHIFT 16 +# define R300_PVS_MAX_CONST_ADDR(x) ((x) << 16) #define R300_VAP_PVS_CODE_CNTL_1 0x22D8 # define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0 #define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC @@ -1062,8 +1064,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT | \ R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT | \ R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | \ - R300_GA_COLOR_CONTROL_ALPHA3_SHADING_FLAT | \ - R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST ) + R300_GA_COLOR_CONTROL_ALPHA3_SHADING_FLAT ) # define R300_SHADE_MODEL_SMOOTH ( \ R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD | \ @@ -1073,8 +1074,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD | \ R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD | \ R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | \ - R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \ - R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST ) + R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD ) /* Specifies red & green components of fill color -- S312 format -- Backwards comp. */ #define R300_GA_SOLID_RG 0x427c @@ -1478,6 +1478,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_PITCH_EN (1 << 31) # define R300_TX_WIDTH(x) ((x) << 0) # define R300_TX_HEIGHT(x) ((x) << 11) +# define R300_TX_NUM_LEVELS(x) ((x) << 26) #define R300_TX_FORMAT1_0 0x44C0 /* The interpretation of the format word by Wladimir van der Laan */ @@ -1485,9 +1486,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. They are given meanings as R, G, B and Alpha by the swizzle specification */ # define R300_TX_FORMAT_X8 0x0 -# define R500_TX_FORMAT_X1 0x0 // bit set in format 2 # define R300_TX_FORMAT_X16 0x1 -# define R500_TX_FORMAT_X1_REV 0x0 // bit set in format 2 # define R300_TX_FORMAT_Y4X4 0x2 # define R300_TX_FORMAT_Y8X8 0x3 # define R300_TX_FORMAT_Y16X16 0x4 @@ -1504,31 +1503,29 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_FORMAT_DXT1 0xF # define R300_TX_FORMAT_DXT3 0x10 # define R300_TX_FORMAT_DXT5 0x11 -# define R300_TX_FORMAT_D3DMFT_CxV8U8 0x12 /* no swizzle */ -# define R300_TX_FORMAT_A8R8G8B8 0x13 /* no swizzle */ -# define R300_TX_FORMAT_B8G8_B8G8 0x14 /* no swizzle */ -# define R300_TX_FORMAT_G8R8_G8B8 0x15 /* no swizzle */ - - /* These two values are wrong, but they're the only values that - * produce any even vaguely correct results. Can r300 only do 16-bit - * depth textures? - */ -# define R300_TX_FORMAT_X24_Y8 0x1e -# define R300_TX_FORMAT_X32 0x1e - - /* 0x16 - some 16 bit green format.. ?? */ +# define R300_TX_FORMAT_Y8 0x12 +# define R300_TX_FORMAT_AVYU444 0x13 +# define R300_TX_FORMAT_VYUY422 0x14 +# define R300_TX_FORMAT_YVYU422 0x15 +# define R300_TX_FORMAT_16_MPEG 0x16 +# define R300_TX_FORMAT_16_16_MPEG 0x17 +# define R300_TX_FORMAT_16F 0x18 +# define R300_TX_FORMAT_16F_16F 0x19 +# define R300_TX_FORMAT_16F_16F_16F_16F 0x1A +# define R300_TX_FORMAT_32F 0x1B +# define R300_TX_FORMAT_32F_32F 0x1C +# define R300_TX_FORMAT_32F_32F_32F_32F 0x1D +# define R300_TX_FORMAT_W24_FP 0x1E + +# define R300_TX_FORMAT_SIGNED_W (1 << 5) +# define R300_TX_FORMAT_SIGNED_Z (1 << 6) +# define R300_TX_FORMAT_SIGNED_Y (1 << 7) +# define R300_TX_FORMAT_SIGNED_X (1 << 8) +# define R300_TX_FORMAT_SIGNED (0xf << 5) + # define R300_TX_FORMAT_3D (1 << 25) # define R300_TX_FORMAT_CUBIC_MAP (2 << 25) - /* gap */ - /* Floating point formats */ - /* Note - hardware supports both 16 and 32 bit floating point */ -# define R300_TX_FORMAT_FL_I16 0x18 -# define R300_TX_FORMAT_FL_I16A16 0x19 -# define R300_TX_FORMAT_FL_R16G16B16A16 0x1A -# define R300_TX_FORMAT_FL_I32 0x1B -# define R300_TX_FORMAT_FL_I32A32 0x1C -# define R300_TX_FORMAT_FL_R32G32B32A32 0x1D /* alpha modes, convenience mostly */ /* if you have alpha, pick constant appropriate to the number of channels (1 for I8, 2 for I8A8, 4 for R8G8B8A8, etc */ @@ -1569,7 +1566,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_FORMAT_CONST_Z (4<<5) # define R300_TX_FORMAT_CONST_W (8<<5) -# define R300_TX_FORMAT_YUV_MODE 0x00800000 +# define R300_TX_FORMAT_GAMMA (1 << 21) +# define R300_TX_FORMAT_YUV_TO_RGB (1 << 22) #define R300_TX_FORMAT2_0 0x4500 /* obvious missing in gap */ # define R300_TX_PITCHMASK_SHIFT 0 @@ -3040,6 +3038,7 @@ enum { # define R500_INST_RGB_WMASK_R (1 << 11) # define R500_INST_RGB_WMASK_G (1 << 12) # define R500_INST_RGB_WMASK_B (1 << 13) +# define R500_INST_RGB_WMASK_RGB (7 << 11) # define R500_INST_ALPHA_WMASK (1 << 14) # define R500_INST_RGB_OMASK_R (1 << 15) # define R500_INST_RGB_OMASK_G (1 << 16) diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index cbd84d7c56..cd458d019a 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -45,11 +45,7 @@ struct r300_render { /* VBO */ struct pipe_buffer* vbo; - size_t vbo_size; - size_t vbo_offset; - void* vbo_map; size_t vbo_alloc_size; - size_t vbo_max_used; }; static INLINE struct r300_render* @@ -78,24 +74,21 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render, struct pipe_screen* screen = r300->context.screen; size_t size = (size_t)vertex_size * (size_t)count; - if (r300render->vbo) { + if (r300render->vbo && (size > r300render->vbo_alloc_size)) { pipe_buffer_reference(&r300render->vbo, NULL); } + + if (!r300render->vbo) { + r300render->vbo = pipe_buffer_create(screen, + 64, + PIPE_BUFFER_USAGE_VERTEX, + size); + } - r300render->vbo_size = MAX2(size, r300render->vbo_alloc_size); - r300render->vbo_offset = 0; - r300render->vbo = pipe_buffer_create(screen, - 64, - PIPE_BUFFER_USAGE_VERTEX, - r300render->vbo_size); - + r300render->vbo_alloc_size = MAX2(size, r300render->vbo_alloc_size); r300render->vertex_size = vertex_size; - if (r300render->vbo) { - return TRUE; - } else { - return FALSE; - } + return (r300render->vbo) ? TRUE : FALSE; } static void* r300_render_map_vertices(struct vbuf_render* render) @@ -103,10 +96,8 @@ static void* r300_render_map_vertices(struct vbuf_render* render) struct r300_render* r300render = r300_render(render); struct pipe_screen* screen = r300render->r300->context.screen; - r300render->vbo_map = pipe_buffer_map(screen, r300render->vbo, - PIPE_BUFFER_USAGE_CPU_WRITE); - - return (unsigned char*)r300render->vbo_map + r300render->vbo_offset; + return (unsigned char*)pipe_buffer_map(screen, r300render->vbo, + PIPE_BUFFER_USAGE_CPU_WRITE); } static void r300_render_unmap_vertices(struct vbuf_render* render, @@ -116,9 +107,6 @@ static void r300_render_unmap_vertices(struct vbuf_render* render, struct r300_render* r300render = r300_render(render); struct pipe_screen* screen = r300render->r300->context.screen; - r300render->vbo_max_used = MAX2(r300render->vbo_max_used, - r300render->vertex_size * (max + 1)); - pipe_buffer_unmap(screen, r300render->vbo); } @@ -180,27 +168,9 @@ static void prepare_render(struct r300_render* render, unsigned count) CS_LOCALS(r300); - /* Make sure that all possible state is emitted. */ - r300_emit_dirty_state(r300); + r300->vbo = render->vbo; - debug_printf("r300: Preparing vertex buffer %p for render, " - "vertex size %d, vertex count %d\n", render->vbo, - r300->vertex_info.vinfo.size, count); - /* Set the pointer to our vertex buffer. The emitted values are this: - * PACKET3 [3D_LOAD_VBPNTR] - * COUNT [1] - * FORMAT [size | stride << 8] - * OFFSET [0] - * VBPNTR [relocated BO] - */ - BEGIN_CS(7); - OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3); - OUT_CS(1); - OUT_CS(r300->vertex_info.vinfo.size | - (r300->vertex_info.vinfo.size << 8)); - OUT_CS(render->vbo_offset); - OUT_CS_RELOC(render->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); - END_CS; + r300_emit_dirty_state(r300); } static void r300_render_draw_arrays(struct vbuf_render* render, @@ -212,8 +182,6 @@ static void r300_render_draw_arrays(struct vbuf_render* render, CS_LOCALS(r300); - r300render->vbo_offset = start; - prepare_render(r300render, count); debug_printf("r300: Doing vbuf render, count %d\n", count); @@ -248,13 +216,14 @@ static void r300_render_draw(struct vbuf_render* render, return; } +/* index_map = pipe_buffer_map(screen, index_buffer, PIPE_BUFFER_USAGE_CPU_WRITE); memcpy(index_map, indices, count); pipe_buffer_unmap(screen, index_buffer); debug_printf("r300: Doing indexbuf render, count %d\n", count); -/* + BEGIN_CS(8); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | @@ -264,13 +233,15 @@ static void r300_render_draw(struct vbuf_render* render, OUT_CS_INDEX_RELOC(index_buffer, 0, count, RADEON_GEM_DOMAIN_GTT, 0, 0); END_CS; */ - BEGIN_CS(2 + count); - OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, count); + BEGIN_CS(2 + (count+1)/2); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | - r300render->hwprim | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); - for (i = 0; i < count; i++) { - index = indices[i]; - OUT_CS(index); + r300render->hwprim); + for (i = 0; i < count-1; i += 2) { + OUT_CS(indices[i+1] << 16 | indices[i]); + } + if (count % 2) { + OUT_CS(indices[count-1]); } END_CS; } diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index d2c5998c26..258e4ac7b2 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -87,23 +87,24 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) } else { return 0; } - return 0; case PIPE_CAP_GLSL: - /* IN THEORY */ - return 0; + if (r300screen->caps->is_r500) { + return 1; + } else { + return 0; + } case PIPE_CAP_S3TC: - /* IN THEORY */ - return 0; + return 1; case PIPE_CAP_ANISOTROPIC_FILTER: - /* IN THEORY */ - return 0; + return 1; case PIPE_CAP_POINT_SPRITE: /* IN THEORY */ return 0; case PIPE_CAP_MAX_RENDER_TARGETS: return 4; case PIPE_CAP_OCCLUSION_QUERY: - return 1; + /* IN THEORY */ + return 0; case PIPE_CAP_TEXTURE_SHADOW_MAP: /* IN THEORY */ return 0; @@ -143,6 +144,11 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: /* XXX guessing (what a terrible guess) */ return 2; + case PIPE_CAP_TGSI_CONT_SUPPORTED: + /* XXX */ + return 0; + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + return 1; default: debug_printf("r300: Implementation error: Bad param %d\n", param); @@ -152,17 +158,20 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) static float r300_get_paramf(struct pipe_screen* pscreen, int param) { + struct r300_screen* r300screen = r300_screen(pscreen); + switch (param) { case PIPE_CAP_MAX_LINE_WIDTH: case PIPE_CAP_MAX_LINE_WIDTH_AA: - /* XXX this is the biggest thing that will fit in that register. - * Perhaps the actual rendering limits are less? */ - return 10922.0f; case PIPE_CAP_MAX_POINT_WIDTH: case PIPE_CAP_MAX_POINT_WIDTH_AA: - /* XXX this is the biggest thing that will fit in that register. - * Perhaps the actual rendering limits are less? */ - return 10922.0f; + /* The maximum dimensions of the colorbuffer are our practical + * rendering limits. 2048 pixels should be enough for anybody. */ + if (r300screen->caps->is_r500) { + return 4096.0f; + } else { + return 2048.0f; + } case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: return 16.0f; case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: @@ -174,21 +183,58 @@ static float r300_get_paramf(struct pipe_screen* pscreen, int param) } } -static boolean check_tex_2d_format(enum pipe_format format, boolean is_r500) +static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage, + boolean is_r500) { switch (format) { + /* Supported formats. */ /* Colorbuffer */ case PIPE_FORMAT_A4R4G4B4_UNORM: case PIPE_FORMAT_R5G6B5_UNORM: case PIPE_FORMAT_A1R5G5B5_UNORM: - case PIPE_FORMAT_A8R8G8B8_UNORM: + return usage & + (PIPE_TEXTURE_USAGE_RENDER_TARGET | + PIPE_TEXTURE_USAGE_DISPLAY_TARGET | + PIPE_TEXTURE_USAGE_PRIMARY); + + /* Texture */ + case PIPE_FORMAT_A8R8G8B8_SRGB: + case PIPE_FORMAT_R8G8B8A8_SRGB: + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + case PIPE_FORMAT_YCBCR: + return usage & PIPE_TEXTURE_USAGE_SAMPLER; + /* Colorbuffer or texture */ + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_I8_UNORM: + return usage & + (PIPE_TEXTURE_USAGE_RENDER_TARGET | + PIPE_TEXTURE_USAGE_DISPLAY_TARGET | + PIPE_TEXTURE_USAGE_PRIMARY | + PIPE_TEXTURE_USAGE_SAMPLER); + /* Z buffer */ case PIPE_FORMAT_Z16_UNORM: - /* Z buffer with stencil */ + return usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL; + + /* Z buffer with stencil or texture */ case PIPE_FORMAT_Z24S8_UNORM: - return TRUE; + return usage & + (PIPE_TEXTURE_USAGE_DEPTH_STENCIL | + PIPE_TEXTURE_USAGE_SAMPLER); + + /* Definitely unsupported formats. */ + /* Non-usable Z buffer/stencil formats. */ + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + debug_printf("r300: Note: Got unsupported format: %s in %s\n", + pf_name(format), __FUNCTION__); + return FALSE; /* XXX These don't even exist case PIPE_FORMAT_A32R32G32B32: @@ -211,7 +257,8 @@ static boolean check_tex_2d_format(enum pipe_format format, boolean is_r500) return FALSE; */ default: - debug_printf("r300: Warning: Got unsupported format: %s in %s\n", + /* Unknown format... */ + debug_printf("r300: Warning: Got unknown format: %s in %s\n", pf_name(format), __FUNCTION__); break; } @@ -228,11 +275,18 @@ static boolean r300_is_format_supported(struct pipe_screen* pscreen, { switch (target) { case PIPE_TEXTURE_2D: - return check_tex_2d_format(format, + return check_tex_2d_format(format, tex_usage, r300_screen(pscreen)->caps->is_r500); + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_3D: + case PIPE_TEXTURE_CUBE: + debug_printf("r300: Implementation error: Unsupported format " + "target: %d\n", target); + break; default: - debug_printf("r300: Warning: Got unknown format target: %d\n", - format); + debug_printf("r300: Fatal: This is not a format target: %d\n", + target); + assert(0); break; } @@ -337,7 +391,6 @@ struct pipe_screen* r300_create_screen(struct r300_winsys* r300_winsys) return NULL; caps->pci_id = r300_winsys->pci_id; - caps->num_frag_pipes = r300_winsys->gb_pipes; r300_parse_chipset(caps); diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 3f52dbc3be..2a0e41fbc3 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -49,7 +49,7 @@ struct r300_transfer { }; /* Convenience cast wrapper. */ -static struct r300_screen* r300_screen(struct pipe_screen* screen) { +static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) { return (struct r300_screen*)screen; } diff --git a/src/gallium/drivers/r300/r300_shader_inlines.h b/src/gallium/drivers/r300/r300_shader_inlines.h new file mode 100644 index 0000000000..a04f45b03e --- /dev/null +++ b/src/gallium/drivers/r300/r300_shader_inlines.h @@ -0,0 +1,47 @@ +/* + * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com> + * Joakim Sindholt <opensource@zhasha.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_SHADER_INLINES_H +#define R300_SHADER_INLINES_H + +/* TGSI constants. TGSI is like XML: If it can't solve your problems, you're + * not using enough of it. */ +static const struct tgsi_full_src_register r300_constant_zero = { + .SrcRegister.Extended = TRUE, + .SrcRegister.File = TGSI_FILE_NULL, + .SrcRegisterExtSwz.ExtSwizzleX = TGSI_EXTSWIZZLE_ZERO, + .SrcRegisterExtSwz.ExtSwizzleY = TGSI_EXTSWIZZLE_ZERO, + .SrcRegisterExtSwz.ExtSwizzleZ = TGSI_EXTSWIZZLE_ZERO, + .SrcRegisterExtSwz.ExtSwizzleW = TGSI_EXTSWIZZLE_ZERO, +}; + +static const struct tgsi_full_src_register r300_constant_one = { + .SrcRegister.Extended = TRUE, + .SrcRegister.File = TGSI_FILE_NULL, + .SrcRegisterExtSwz.ExtSwizzleX = TGSI_EXTSWIZZLE_ONE, + .SrcRegisterExtSwz.ExtSwizzleY = TGSI_EXTSWIZZLE_ONE, + .SrcRegisterExtSwz.ExtSwizzleZ = TGSI_EXTSWIZZLE_ONE, + .SrcRegisterExtSwz.ExtSwizzleW = TGSI_EXTSWIZZLE_ONE, +}; + +#endif /* R300_SHADER_INLINES_H */ diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 184a23c9e6..162740f594 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -24,12 +24,14 @@ #include "util/u_pack_color.h" #include "util/u_debug.h" + +#include "pipe/p_config.h" #include "pipe/internal/p_winsys_screen.h" #include "r300_context.h" #include "r300_reg.h" #include "r300_state_inlines.h" -#include "r300_state_shader.h" +#include "r300_fs.h" /* r300_state: Functions used to intialize state context by translating * Gallium state objects into semi-native r300 state objects. */ @@ -62,8 +64,6 @@ static void* r300_create_blend_state(struct pipe_context* pipe, } /* PIPE_LOGICOP_* don't need to be translated, fortunately. */ - /* XXX are logicops still allowed if blending's disabled? - * Does Gallium take care of it for us? */ if (state->logicop_enable) { blend->rop = R300_RB3D_ROPCNTL_ROP_ENABLE | (state->logicop_func) << R300_RB3D_ROPCNTL_ROP_SHIFT; @@ -121,9 +121,14 @@ static void r300_set_clip_state(struct pipe_context* pipe, const struct pipe_clip_state* state) { struct r300_context* r300 = r300_context(pipe); - /* XXX Draw */ - draw_flush(r300->draw); - draw_set_clip_state(r300->draw, state); + + if (r300_screen(pipe->screen)->caps->has_tcl) { + r300->clip_state = *state; + r300->dirty_state |= R300_NEW_CLIP; + } else { + draw_flush(r300->draw); + draw_set_clip_state(r300->draw, state); + } } static void @@ -150,17 +155,20 @@ static void } r300->dirty_state |= R300_NEW_CONSTANTS; - +#if 0 /* If the number of constants have changed, invalidate the shader. */ if (r300->shader_constants[shader].user_count != i) { - if (shader == PIPE_SHADER_FRAGMENT && r300->fs) { + if (shader == PIPE_SHADER_FRAGMENT && r300->fs && + r300->fs->uses_imms) { r300->fs->translated = FALSE; r300_translate_fragment_shader(r300, r300->fs); - } else if (shader == PIPE_SHADER_VERTEX && r300->vs) { + } else if (shader == PIPE_SHADER_VERTEX && r300->vs && + r300->vs->uses_imms) { r300->vs->translated = FALSE; r300_translate_vertex_shader(r300, r300->vs); } } +#endif } /* Create a new depth, stencil, and alpha state based on the CSO dsa state. @@ -257,6 +265,7 @@ static void r300_set_edgeflags(struct pipe_context* pipe, const unsigned* bitfield) { /* XXX you know it's bad when i915 has this blank too */ + /* XXX and even worse, I have no idea WTF the bitfield is */ } static void @@ -277,18 +286,17 @@ static void* r300_create_fs_state(struct pipe_context* pipe, const struct pipe_shader_state* shader) { struct r300_context* r300 = r300_context(pipe); - struct r3xx_fragment_shader* fs = NULL; + struct r300_fragment_shader* fs = NULL; if (r300_screen(r300->context.screen)->caps->is_r500) { - fs = - (struct r3xx_fragment_shader*)CALLOC_STRUCT(r500_fragment_shader); + fs = (struct r300_fragment_shader*)CALLOC_STRUCT(r5xx_fragment_shader); } else { - fs = - (struct r3xx_fragment_shader*)CALLOC_STRUCT(r300_fragment_shader); + fs = (struct r300_fragment_shader*)CALLOC_STRUCT(r3xx_fragment_shader); } /* Copy state directly into shader. */ fs->state = *shader; + fs->state.tokens = tgsi_dup_tokens(shader->tokens); tgsi_scan_shader(shader->tokens, &fs->info); @@ -299,7 +307,7 @@ static void* r300_create_fs_state(struct pipe_context* pipe, static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) { struct r300_context* r300 = r300_context(pipe); - struct r3xx_fragment_shader* fs = (struct r3xx_fragment_shader*)shader; + struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader; if (fs == NULL) { r300->fs = NULL; @@ -308,7 +316,6 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) r300_translate_fragment_shader(r300, fs); } - fs->translated = TRUE; r300->fs = fs; r300->dirty_state |= R300_NEW_FRAGMENT_SHADER; @@ -317,13 +324,15 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) /* Delete fragment shader state. */ static void r300_delete_fs_state(struct pipe_context* pipe, void* shader) { + struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader; + FREE(fs->state.tokens); FREE(shader); } static void r300_set_polygon_stipple(struct pipe_context* pipe, const struct pipe_poly_stipple* state) { - /* XXX */ + /* XXX no idea how to set this up, but not terribly important */ } /* Create a new rasterizer state based on the CSO rasterizer state. @@ -341,14 +350,21 @@ static void* r300_create_rs_state(struct pipe_context* pipe, /* Copy rasterizer state for Draw. */ rs->rs = *state; + rs->enable_vte = !state->bypass_vs_clip_and_viewport; + +#ifdef PIPE_ARCH_LITTLE_ENDIAN + rs->vap_control_status = R300_VC_NO_SWAP; +#else + rs->vap_control_status = R300_VC_32BIT_SWAP; +#endif + /* If bypassing TCL, or if no TCL engine is present, turn off the HW TCL. * Else, enable HW TCL and force Draw's TCL off. */ if (state->bypass_vs_clip_and_viewport || !r300_screen(pipe->screen)->caps->has_tcl) { - rs->vap_control_status = R300_VAP_TCL_BYPASS; + rs->vap_control_status |= R300_VAP_TCL_BYPASS; } else { rs->rs.bypass_vs_clip_and_viewport = TRUE; - rs->vap_control_status = 0; } rs->point_size = pack_float_16_6x(state->point_size) | @@ -412,6 +428,10 @@ static void* r300_create_rs_state(struct pipe_context* pipe, rs->color_control = R300_SHADE_MODEL_SMOOTH; } + if (!state->flatshade_first) { + rs->color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST; + } + return (void*)rs; } @@ -555,40 +575,35 @@ static void r300_set_viewport_state(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - draw_flush(r300->draw); - - if (r300_screen(r300->context.screen)->caps->has_tcl) { - /* Do the transform in HW. */ - r300->viewport_state->vte_control = R300_VTX_W0_FMT; + /* Do the transform in HW. */ + r300->viewport_state->vte_control = R300_VTX_W0_FMT; - if (state->scale[0] != 1.0f) { - r300->viewport_state->xscale = state->scale[0]; - r300->viewport_state->vte_control |= R300_VPORT_X_SCALE_ENA; - } - if (state->scale[1] != 1.0f) { - r300->viewport_state->yscale = state->scale[1]; - r300->viewport_state->vte_control |= R300_VPORT_Y_SCALE_ENA; - } - if (state->scale[2] != 1.0f) { - r300->viewport_state->zscale = state->scale[2]; - r300->viewport_state->vte_control |= R300_VPORT_Z_SCALE_ENA; - } - if (state->translate[0] != 0.0f) { - r300->viewport_state->xoffset = state->translate[0]; - r300->viewport_state->vte_control |= R300_VPORT_X_OFFSET_ENA; - } - if (state->translate[1] != 0.0f) { - r300->viewport_state->yoffset = state->translate[1]; - r300->viewport_state->vte_control |= R300_VPORT_Y_OFFSET_ENA; - } - if (state->translate[2] != 0.0f) { - r300->viewport_state->zoffset = state->translate[2]; - r300->viewport_state->vte_control |= R300_VPORT_Z_OFFSET_ENA; - } - } else { - r300->viewport_state->vte_control = 0; - /* Have Draw do the actual transform. */ - draw_set_viewport_state(r300->draw, state); + if (state->scale[0] != 1.0f) { + assert(state->scale[0] != 0.0f); + r300->viewport_state->xscale = state->scale[0]; + r300->viewport_state->vte_control |= R300_VPORT_X_SCALE_ENA; + } + if (state->scale[1] != 1.0f) { + assert(state->scale[1] != 0.0f); + r300->viewport_state->yscale = state->scale[1]; + r300->viewport_state->vte_control |= R300_VPORT_Y_SCALE_ENA; + } + if (state->scale[2] != 1.0f) { + assert(state->scale[2] != 0.0f); + r300->viewport_state->zscale = state->scale[2]; + r300->viewport_state->vte_control |= R300_VPORT_Z_SCALE_ENA; + } + if (state->translate[0] != 0.0f) { + r300->viewport_state->xoffset = state->translate[0]; + r300->viewport_state->vte_control |= R300_VPORT_X_OFFSET_ENA; + } + if (state->translate[1] != 0.0f) { + r300->viewport_state->yoffset = state->translate[1]; + r300->viewport_state->vte_control |= R300_VPORT_Y_OFFSET_ENA; + } + if (state->translate[2] != 0.0f) { + r300->viewport_state->zoffset = state->translate[2]; + r300->viewport_state->vte_control |= R300_VPORT_Z_OFFSET_ENA; } r300->dirty_state |= R300_NEW_VIEWPORT; @@ -628,6 +643,7 @@ static void* r300_create_vs_state(struct pipe_context* pipe, struct r300_vertex_shader* vs = CALLOC_STRUCT(r300_vertex_shader); /* Copy state directly into shader. */ vs->state = *shader; + vs->state.tokens = tgsi_dup_tokens(shader->tokens); tgsi_scan_shader(shader->tokens, &vs->info); @@ -673,6 +689,7 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader) struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader; draw_delete_vertex_shader(r300->draw, vs->draw); + FREE(vs->state.tokens); FREE(shader); } else { draw_delete_vertex_shader(r300->draw, diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index c4c9784a00..2477b30822 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -64,6 +64,7 @@ static void r300_vs_tab_routes(struct r300_context* r300, break; case TGSI_SEMANTIC_FOG: fog = TRUE; + /* Fall through */ case TGSI_SEMANTIC_GENERIC: texs++; break; @@ -103,6 +104,9 @@ static void r300_vs_tab_routes(struct r300_context* r300, } } + /* XXX magic */ + assert(texs <= 8); + /* Do the actual vertex_info setup. * * vertex_info has four uints of hardware-specific data in it. @@ -140,21 +144,32 @@ static void r300_vs_tab_routes(struct r300_context* r300, vinfo->hwfmt[2] |= (R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i); } - for (i = 0; i < texs; i++) { + /* Init i right here, increment it if fog is enabled. + * This gets around a double-increment problem. */ + i = 0; + + if (fog) { + i++; draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_GENERIC, i)); + draw_find_vs_output(r300->draw, TGSI_SEMANTIC_FOG, 0)); vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i); vinfo->hwfmt[3] |= (4 << (3 * i)); } - if (fog) { - i++; + for (i; i < texs; i++) { draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, - draw_find_vs_output(r300->draw, TGSI_SEMANTIC_FOG, 0)); + draw_find_vs_output(r300->draw, TGSI_SEMANTIC_GENERIC, i)); vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i); vinfo->hwfmt[3] |= (4 << (3 * i)); } + /* Handle the case where the vertex shader will be generating some of + * the attribs based on its inputs. */ + if (r300screen->caps->has_tcl && + info->num_inputs < info->num_outputs) { + vinfo->num_attribs = info->num_inputs; + } + draw_compute_vertex_size(vinfo); } @@ -162,26 +177,40 @@ static void r300_vs_tab_routes(struct r300_context* r300, static void r300_vertex_psc(struct r300_context* r300, struct r300_vertex_format* vformat) { + struct r300_screen* r300screen = r300_screen(r300->context.screen); struct vertex_info* vinfo = &vformat->vinfo; int* tab = vformat->vs_tab; uint32_t temp; - int i; + int i, attrib_count; - debug_printf("r300: attrib count: %d\n", vinfo->num_attribs); - for (i = 0; i < vinfo->num_attribs; i++) { - debug_printf("r300: attrib: offset %d, interp %d, size %d," - " tab %d\n", vinfo->attrib[i].src_index, - vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, - tab[i]); + /* Vertex shaders have no semantics on their inputs, + * so PSC should just route stuff based on their info, + * and not on attrib information. */ + if (r300screen->caps->has_tcl) { + attrib_count = r300->vs->info.num_inputs; + debug_printf("r300: routing %d attribs in psc for vs\n", + attrib_count); + } else { + attrib_count = vinfo->num_attribs; + debug_printf("r300: attrib count: %d\n", attrib_count); + for (i = 0; i < attrib_count; i++) { + debug_printf("r300: attrib: offset %d, interp %d, size %d," + " tab %d\n", vinfo->attrib[i].src_index, + vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, + tab[i]); + } } - for (i = 0; i < vinfo->num_attribs; i++) { + for (i = 0; i < attrib_count; i++) { /* Make sure we have a proper destination for our attribute */ assert(tab[i] != -1); /* Add the attribute to the PSC table. */ - temp = translate_vertex_data_type(vinfo->attrib[i].emit) | - (tab[i] << R300_DST_VEC_LOC_SHIFT); + temp = r300screen->caps->has_tcl ? + R300_DATA_TYPE_FLOAT_4 : + translate_vertex_data_type(vinfo->attrib[i].emit); + temp |= tab[i] << R300_DST_VEC_LOC_SHIFT; + if (i & 1) { vformat->vap_prog_stream_cntl[i >> 1] &= 0x0000ffff; vformat->vap_prog_stream_cntl[i >> 1] |= temp << 16; @@ -206,7 +235,6 @@ static void r300_vertex_psc(struct r300_context* r300, /* Update the vertex format. */ static void r300_update_vertex_format(struct r300_context* r300) { - struct r300_screen* r300screen = r300_screen(r300->context.screen); struct r300_vertex_format vformat; int i; diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 91b93fc367..22c8e199ae 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -353,25 +353,6 @@ static INLINE uint32_t r300_translate_out_fmt(enum pipe_format format) /* Non-CSO state. (For now.) */ -static INLINE uint32_t r300_translate_gb_pipes(int pipe_count) -{ - switch (pipe_count) { - case 1: - return R300_GB_TILE_PIPE_COUNT_RV300; - break; - case 2: - return R300_GB_TILE_PIPE_COUNT_R300; - break; - case 3: - return R300_GB_TILE_PIPE_COUNT_R420_3P; - break; - case 4: - return R300_GB_TILE_PIPE_COUNT_R420; - break; - } - return 0; -} - static INLINE uint32_t translate_vertex_data_type(int type) { switch (type) { case EMIT_1F: diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index 8bd9b41bd7..430129d5bd 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -34,36 +34,22 @@ void r300_emit_invariant_state(struct r300_context* r300) struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; CS_LOCALS(r300); - BEGIN_CS(30 + (caps->has_tcl ? 2: 0)); + BEGIN_CS(22 + (caps->has_tcl ? 2: 0)); /*** Graphics Backend (GB) ***/ /* Various GB enables */ OUT_CS_REG(R300_GB_ENABLE, 0x0); - /* Subpixel multisampling for AA */ - OUT_CS_REG(R300_GB_MSPOS0, 0x66666666); - OUT_CS_REG(R300_GB_MSPOS1, 0x66666666); - /* GB tile config and pipe setup */ - OUT_CS_REG(R300_GB_TILE_CONFIG, R300_GB_TILE_DISABLE | - r300_translate_gb_pipes(caps->num_frag_pipes)); + /* Subpixel multisampling for AA + * These are commented out because glisse's CS checker doesn't like them. + * I presume these will be re-enabled later. + * OUT_CS_REG(R300_GB_MSPOS0, 0x66666666); + * OUT_CS_REG(R300_GB_MSPOS1, 0x6666666); + */ /* Source of fog depth */ OUT_CS_REG(R300_GB_SELECT, R300_GB_FOG_SELECT_1_1_W); /* AA enable */ OUT_CS_REG(R300_GB_AA_CONFIG, 0x0); - /*** Geometry Assembly (GA) ***/ - /* GA errata fixes. */ - if (caps->is_r500) { - OUT_CS_REG(R300_GA_ENHANCE, - R300_GA_ENHANCE_DEADLOCK_CNTL_PREVENT_TCL | - R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE | - R500_GA_ENHANCE_REG_READWRITE_ENABLE | - R500_GA_ENHANCE_REG_NOSTALL_ENABLE); - } else { - OUT_CS_REG(R300_GA_ENHANCE, - R300_GA_ENHANCE_DEADLOCK_CNTL_PREVENT_TCL | - R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE); - } - /*** Fog (FG) ***/ OUT_CS_REG(R300_FG_FOG_BLEND, 0x0); OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x0); @@ -86,7 +72,7 @@ void r300_emit_invariant_state(struct r300_context* r300) END_CS; /* XXX unsorted stuff from surface_fill */ - BEGIN_CS(79 + (caps->has_tcl ? 7 : 0)); + BEGIN_CS(71 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0)); /* Flush PVS. */ OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); @@ -94,30 +80,26 @@ void r300_emit_invariant_state(struct r300_context* r300) R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | R300_VPORT_Z_OFFSET_ENA | R300_VTX_W0_FMT); - /* XXX endian */ if (caps->has_tcl) { - OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VC_NO_SWAP); - OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE | - R300_PS_UCP_MODE_CLIP_AS_TRIFAN); OUT_CS_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4); OUT_CS_32F(1.0); OUT_CS_32F(1.0); OUT_CS_32F(1.0); OUT_CS_32F(1.0); - } else { - OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VC_NO_SWAP | - R300_VAP_TCL_BYPASS); } /* XXX point tex stuffing */ OUT_CS_REG_SEQ(R300_GA_POINT_S0, 1); OUT_CS_32F(0.0); OUT_CS_REG_SEQ(R300_GA_POINT_S1, 1); OUT_CS_32F(1.0); + /* XXX line tex stuffing */ + OUT_CS_REG_SEQ(R300_GA_LINE_S0, 1); + OUT_CS_32F(0.0); + OUT_CS_REG_SEQ(R300_GA_LINE_S1, 1); + OUT_CS_32F(1.0); OUT_CS_REG(R300_GA_TRIANGLE_STIPPLE, 0x5 | (0x5 << R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT)); /* XXX this big chunk should be refactored into rs_state */ - OUT_CS_REG(R300_GA_LINE_S0, 0x00000000); - OUT_CS_REG(R300_GA_LINE_S1, 0x3F800000); OUT_CS_REG(R300_GA_SOLID_RG, 0x00000000); OUT_CS_REG(R300_GA_SOLID_BA, 0x00000000); OUT_CS_REG(R300_GA_POLY_MODE, 0x00000000); @@ -133,8 +115,10 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_RB3D_CCTL, 0x00000000); OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0x0000000F); OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000); - OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x00000000); - OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFFFFFFFF); + if (caps->is_r500) { + OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x00000000); + OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFFFFFFFF); + } OUT_CS_REG(R300_ZB_FORMAT, 0x00000002); OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, 0x00000003); OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000); @@ -144,8 +128,6 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_VAP_VTX_STATE_CNTL, 0x1); OUT_CS_REG(R300_VAP_VSM_VTX_ASSM, 0x405); OUT_CS_REG(R300_SE_VTE_CNTL, 0x0000043F); - /* Vertex size. */ - OUT_CS_REG(R300_VAP_VTX_SIZE, 0x8); /* XXX */ OUT_CS_REG(R300_SC_CLIP_RULE, 0xaaaa); diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c index 4dd5b8af99..25168ce5e9 100644 --- a/src/gallium/drivers/r300/r300_surface.c +++ b/src/gallium/drivers/r300/r300_surface.c @@ -23,30 +23,36 @@ #include "r300_surface.h" -static void r300_surface_setup(struct pipe_context* pipe, - struct pipe_surface* dest, +static void r300_surface_setup(struct r300_context* r300, + struct r300_texture* dest, unsigned x, unsigned y, unsigned w, unsigned h) { - struct r300_context* r300 = r300_context(pipe); - struct r300_capabilities* caps = r300_screen(pipe->screen)->caps; - struct r300_texture* tex = (struct r300_texture*)dest->texture; - unsigned pixpitch = tex->stride / tex->tex.block.size; + struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; + unsigned pixpitch = dest->stride / dest->tex.block.size; CS_LOCALS(r300); - /* Make sure our target BO is okay. */ - r300->winsys->add_buffer(r300->winsys, tex->buffer, - 0, RADEON_GEM_DOMAIN_VRAM); - if (r300->winsys->validate(r300->winsys)) { - r300->context.flush(&r300->context, 0, NULL); - } - r300_emit_blend_state(r300, &blend_clear_state); r300_emit_blend_color_state(r300, &blend_color_clear_state); r300_emit_dsa_state(r300, &dsa_clear_state); r300_emit_rs_state(r300, &rs_clear_state); - BEGIN_CS(15); + BEGIN_CS(24); + + /* Viewport setup */ + OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); + OUT_CS_32F((float)w); + OUT_CS_32F((float)x); + OUT_CS_32F((float)h); + OUT_CS_32F((float)y); + OUT_CS_32F(1.0); + OUT_CS_32F(0.0); + + OUT_CS_REG(R300_VAP_VTE_CNTL, R300_VPORT_X_SCALE_ENA | + R300_VPORT_X_OFFSET_ENA | + R300_VPORT_Y_SCALE_ENA | + R300_VPORT_Y_OFFSET_ENA | + R300_VTX_XY_FMT | R300_VTX_Z_FMT); /* Pixel scissors. */ OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); @@ -71,9 +77,9 @@ static void r300_surface_setup(struct pipe_context* pipe, /* Setup colorbuffer. */ OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1); - OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_RELOC(dest->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_RB3D_COLORPITCH0, pixpitch | - r300_translate_colorformat(tex->tex.format)); + r300_translate_colorformat(dest->tex.format)); OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0xf); END_CS; @@ -93,6 +99,7 @@ static void r300_surface_fill(struct pipe_context* pipe, struct r300_capabilities* caps = r300_screen(pipe->screen)->caps; struct r300_texture* tex = (struct r300_texture*)dest->texture; unsigned pixpitch = tex->stride / tex->tex.block.size; + boolean invalid = FALSE; CS_LOCALS(r300); a = (float)((color >> 24) & 0xff) / 255.0f; @@ -105,12 +112,30 @@ static void r300_surface_fill(struct pipe_context* pipe, /* Fallback? */ if (FALSE) { +fallback: debug_printf("r300: Falling back on surface clear..."); util_surface_fill(pipe, dest, x, y, w, h, color); return; } - r300_surface_setup(r300, dest, x, y, w, h); + /* Make sure our target BO is okay. */ +validate: + if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, + 0, RADEON_GEM_DOMAIN_VRAM)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } + if (!r300->winsys->validate(r300->winsys)) { + r300->context.flush(&r300->context, 0, NULL); + if (invalid) { + debug_printf("r300: Stuck in validation loop, gonna fallback."); + goto fallback; + } + invalid = TRUE; + goto validate; + } + + r300_surface_setup(r300, tex, x, y, w, h); /* Vertex shader setup */ if (caps->has_tcl) { @@ -127,14 +152,14 @@ static void r300_surface_fill(struct pipe_context* pipe, /* Fragment shader setup */ if (caps->is_r500) { - r500_emit_fragment_shader(r300, &r500_passthrough_fragment_shader); - r300_emit_rs_block_state(r300, &r500_rs_block_clear_state); + r500_emit_fragment_shader(r300, &r5xx_passthrough_fragment_shader); + r300_emit_rs_block_state(r300, &r5xx_rs_block_clear_state); } else { - r300_emit_fragment_shader(r300, &r300_passthrough_fragment_shader); - r300_emit_rs_block_state(r300, &r300_rs_block_clear_state); + r300_emit_fragment_shader(r300, &r3xx_passthrough_fragment_shader); + r300_emit_rs_block_state(r300, &r3xx_rs_block_clear_state); } - BEGIN_CS(31); + BEGIN_CS(26); /* VAP stream control, mapping from input memory to PVS/RS memory */ if (caps->has_tcl) { @@ -161,27 +186,21 @@ static void r300_surface_fill(struct pipe_context* pipe, /* Disable textures */ OUT_CS_REG(R300_TX_ENABLE, 0x0); - /* Viewport setup */ - OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); - OUT_CS_32F(1.0); - OUT_CS_32F((float)x); - OUT_CS_32F(1.0); - OUT_CS_32F((float)y); - OUT_CS_32F(1.0); - OUT_CS_32F(0.0); - /* The size of the point we're about to draw, in sixths of pixels */ OUT_CS_REG(R300_GA_POINT_SIZE, - ((h * 6) & R300_POINTSIZE_Y_MASK) | + ((h * 6) & R300_POINTSIZE_Y_MASK) | ((w * 6) << R300_POINTSIZE_X_SHIFT)); + /* Vertex size. */ + OUT_CS_REG(R300_VAP_VTX_SIZE, 0x8); + /* Packet3 with our point vertex */ OUT_CS_PKT3(R200_3D_DRAW_IMMD_2, 8); OUT_CS(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING | (1 << R300_PRIM_NUM_VERTICES_SHIFT)); /* Position */ - OUT_CS_32F(w / 2.0); - OUT_CS_32F(h / 2.0); + OUT_CS_32F(0.5); + OUT_CS_32F(0.5); OUT_CS_32F(1.0); OUT_CS_32F(1.0); /* Color */ @@ -190,11 +209,7 @@ static void r300_surface_fill(struct pipe_context* pipe, OUT_CS_32F(b); OUT_CS_32F(a); - /* XXX figure out why this is 0xA and not 0x2 */ OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA); - /* XXX OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, - R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | - R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); */ END_CS; @@ -213,27 +228,56 @@ static void r300_surface_copy(struct pipe_context* pipe, struct r300_texture* srctex = (struct r300_texture*)src->texture; struct r300_texture* desttex = (struct r300_texture*)dest->texture; unsigned pixpitch = srctex->stride / srctex->tex.block.size; + boolean invalid = FALSE; + float fsrcx = srcx, fsrcy = srcy, fdestx = destx, fdesty = desty; CS_LOCALS(r300); debug_printf("r300: Copying surface %p at (%d,%d) to %p at (%d, %d)," " dimensions %dx%d (pixel pitch %d)\n", src, srcx, srcy, dest, destx, desty, w, h, pixpitch); - if ((srctex == desttex) && + if ((srctex->buffer == desttex->buffer) && ((destx < srcx + w) || (srcx < destx + w)) && ((desty < srcy + h) || (srcy < desty + h))) { +fallback: debug_printf("r300: Falling back on surface_copy\n"); util_surface_copy(pipe, FALSE, dest, destx, desty, src, srcx, srcy, w, h); } - r300_emit_sampler(r300, &r300_sampler_copy_state, 0); - r300_emit_texture(r300, srctex, 0); + /* Add our target BOs to the list. */ +validate: + if (!r300->winsys->add_buffer(r300->winsys, srctex->buffer, + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } + if (!r300->winsys->add_buffer(r300->winsys, desttex->buffer, + 0, RADEON_GEM_DOMAIN_VRAM)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } + if (!r300->winsys->validate(r300->winsys)) { + r300->context.flush(&r300->context, 0, NULL); + if (invalid) { + debug_printf("r300: Stuck in validation loop, gonna fallback."); + goto fallback; + } + invalid = TRUE; + goto validate; + } + + r300_surface_setup(r300, desttex, destx, desty, w, h); + + /* Setup the texture. */ + r300_emit_texture(r300, &r300_sampler_copy_state, srctex, 0); + + /* Flush and enable. */ r300_flush_textures(r300); /* Vertex shader setup */ if (caps->has_tcl) { - r300_emit_vertex_shader(r300, &r300_texture_vertex_shader); + r300_emit_vertex_shader(r300, &r300_passthrough_vertex_shader); } else { BEGIN_CS(4); OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VAP_TCL_BYPASS); @@ -246,13 +290,14 @@ static void r300_surface_copy(struct pipe_context* pipe, /* Fragment shader setup */ if (caps->is_r500) { - r500_emit_fragment_shader(r300, &r500_texture_fragment_shader); - r300_emit_rs_block_state(r300, &r500_rs_block_copy_state); + r500_emit_fragment_shader(r300, &r5xx_texture_fragment_shader); + r300_emit_rs_block_state(r300, &r5xx_rs_block_copy_state); } else { - r300_emit_fragment_shader(r300, &r300_texture_fragment_shader); - r300_emit_rs_block_state(r300, &r300_rs_block_copy_state); + r300_emit_fragment_shader(r300, &r3xx_texture_fragment_shader); + r300_emit_rs_block_state(r300, &r3xx_rs_block_copy_state); } + BEGIN_CS(30); /* VAP stream control, mapping from input memory to PVS/RS memory */ if (caps->has_tcl) { OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0, @@ -275,33 +320,38 @@ static void r300_surface_copy(struct pipe_context* pipe, /* Two components of texture 0 */ OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_1, 0x2); + /* Vertex size. */ + OUT_CS_REG(R300_VAP_VTX_SIZE, 0x4); + /* Packet3 with our texcoords */ - OUT_CS_PKT3(R200_3D_DRAW_IMMD_2, 8); + OUT_CS_PKT3(R200_3D_DRAW_IMMD_2, 16); OUT_CS(R300_PRIM_TYPE_QUADS | R300_PRIM_WALK_RING | (4 << R300_PRIM_NUM_VERTICES_SHIFT)); /* (x , y ) */ - OUT_CS_32F((float)destx); - OUT_CS_32F((float)desty); - OUT_CS_32F((float)srcx); - OUT_CS_32F((float)srcy); + OUT_CS_32F(fdestx / dest->width); + OUT_CS_32F(fdesty / dest->height); + OUT_CS_32F(fsrcx / src->width); + OUT_CS_32F(fsrcy / src->height); /* (x , y + h) */ - OUT_CS_32F((float)destx); - OUT_CS_32F((float)(desty + h)); - OUT_CS_32F((float)srcx); - OUT_CS_32F((float)(srcy + h)); + OUT_CS_32F(fdestx / dest->width); + OUT_CS_32F((fdesty + h) / dest->height); + OUT_CS_32F(fsrcx / src->width); + OUT_CS_32F((fsrcy + h) / src->height); /* (x + w, y + h) */ - OUT_CS_32F((float)(destx + w)); - OUT_CS_32F((float)(desty + h)); - OUT_CS_32F((float)(srcx + w)); - OUT_CS_32F((float)(srcy + h)); + OUT_CS_32F((fdestx + w) / dest->width); + OUT_CS_32F((fdesty + h) / dest->height); + OUT_CS_32F((fsrcx + w) / src->width); + OUT_CS_32F((fsrcy + h) / src->height); /* (x + w, y ) */ - OUT_CS_32F((float)(destx + w)); - OUT_CS_32F((float)desty); - OUT_CS_32F((float)(srcx + w)); - OUT_CS_32F((float)srcy); + OUT_CS_32F((fdestx + w) / dest->width); + OUT_CS_32F(fdesty / dest->height); + OUT_CS_32F((fsrcx + w) / src->width); + OUT_CS_32F(fsrcy / src->height); OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA); + END_CS; + r300->dirty_hw++; } diff --git a/src/gallium/drivers/r300/r300_surface.h b/src/gallium/drivers/r300/r300_surface.h index 894def07aa..d01f0b143f 100644 --- a/src/gallium/drivers/r300/r300_surface.h +++ b/src/gallium/drivers/r300/r300_surface.h @@ -31,8 +31,8 @@ #include "r300_context.h" #include "r300_cs.h" #include "r300_emit.h" -#include "r300_state_shader.h" -#include "r300_state_tcl.h" +#include "r300_fs.h" +#include "r300_vs.h" #include "r300_state_inlines.h" static struct r300_blend_state blend_clear_state = { @@ -72,7 +72,7 @@ static struct r300_rs_state rs_clear_state = { .color_control = R300_SHADE_MODEL_FLAT, }; -static struct r300_rs_block r300_rs_block_clear_state = { +static struct r300_rs_block r3xx_rs_block_clear_state = { .ip[0] = R500_RS_SEL_S(R300_RS_SEL_K0) | R500_RS_SEL_T(R300_RS_SEL_K0) | R500_RS_SEL_R(R300_RS_SEL_K0) | @@ -82,7 +82,7 @@ static struct r300_rs_block r300_rs_block_clear_state = { .inst_count = 0, }; -static struct r300_rs_block r500_rs_block_clear_state = { +static struct r300_rs_block r5xx_rs_block_clear_state = { .ip[0] = R500_RS_SEL_S(R500_RS_IP_PTR_K0) | R500_RS_SEL_T(R500_RS_IP_PTR_K0) | R500_RS_SEL_R(R500_RS_IP_PTR_K0) | @@ -94,24 +94,24 @@ static struct r300_rs_block r500_rs_block_clear_state = { /* The following state is used for surface_copy only. */ -static struct r300_rs_block r300_rs_block_copy_state = { +static struct r300_rs_block r3xx_rs_block_copy_state = { .ip[0] = R500_RS_SEL_S(R300_RS_SEL_K0) | R500_RS_SEL_T(R300_RS_SEL_K0) | R500_RS_SEL_R(R300_RS_SEL_K0) | R500_RS_SEL_Q(R300_RS_SEL_K1), .inst[0] = R300_RS_INST_COL_CN_WRITE, .count = R300_IT_COUNT(2) | R300_IC_COUNT(0) | R300_HIRES_EN, - .inst_count = R300_RS_TX_OFFSET(6), + .inst_count = R300_RS_TX_OFFSET(0), }; -static struct r300_rs_block r500_rs_block_copy_state = { +static struct r300_rs_block r5xx_rs_block_copy_state = { .ip[0] = R500_RS_SEL_S(0) | R500_RS_SEL_T(1) | R500_RS_SEL_R(R500_RS_IP_PTR_K0) | R500_RS_SEL_Q(R500_RS_IP_PTR_K1), .inst[0] = R500_RS_INST_TEX_CN_WRITE, .count = R300_IT_COUNT(2) | R300_IC_COUNT(0) | R300_HIRES_EN, - .inst_count = R300_RS_TX_OFFSET(6), + .inst_count = R300_RS_TX_OFFSET(0), }; static struct r300_sampler_state r300_sampler_copy_state = { diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index fe91f4e184..daf1647bee 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -22,6 +22,8 @@ #include "r300_texture.h" +/* XXX maths need to go to util */ + static int minify(int i) { return MAX2(1, i >> 1); @@ -30,25 +32,32 @@ static int minify(int i) static void r300_setup_texture_state(struct r300_texture* tex, unsigned width, unsigned height, - unsigned pitch) + unsigned pitch, + unsigned levels) { struct r300_texture_state* state = &tex->state; state->format0 = R300_TX_WIDTH((width - 1) & 0x7ff) | - R300_TX_HEIGHT((height - 1) & 0x7ff) | R300_TX_PITCH_EN; + R300_TX_HEIGHT((height - 1) & 0x7ff) | + R300_TX_NUM_LEVELS(levels) | + R300_TX_PITCH_EN; /* XXX */ - state->format1 = R300_TX_FORMAT_A8R8G8B8; + state->format1 = r300_translate_texformat(tex->tex.format); state->format2 = pitch - 1; - /* XXX + /* Assume (somewhat foolishly) that oversized textures will + * not be permitted by the state tracker. */ if (width > 2048) { - state->pitch |= R300_TXWIDTH_11; + state->format2 |= R500_TXWIDTH_BIT11; } if (height > 2048) { - state->pitch |= R300_TXHEIGHT_11; - } */ + state->format2 |= R500_TXHEIGHT_BIT11; + } + + debug_printf("r300: Set texture state (%dx%d, pitch %d, %d levels)\n", + width, height, pitch, levels); } static void r300_setup_miptree(struct r300_texture* tex) @@ -65,16 +74,26 @@ static void r300_setup_miptree(struct r300_texture* tex) } base->nblocksx[i] = pf_get_nblocksx(&base->block, base->width[i]); - base->nblocksy[i] = pf_get_nblocksy(&base->block, base->width[i]); - - /* Radeons enjoy things in multiples of 32. */ - /* XXX this can be 32 when POT */ - stride = (base->nblocksx[i] * base->block.size + 63) & ~63; + base->nblocksy[i] = pf_get_nblocksy(&base->block, base->height[i]); + + /* Radeons enjoy things in multiples of 64. + * + * XXX + * POT, uncompressed, unmippmapped textures can be aligned to 32, + * instead of 64. */ + stride = align( + (base->nblocksx[i] * base->block.size) / base->block.width, + 32); size = stride * base->nblocksy[i] * base->depth[i]; - tex->offset[i] = (tex->size + 63) & ~63; - tex->size = tex->offset[i] + size; + tex->offset[i] = align(tex->size, 32); + tex->size += tex->offset[i] + size; + debug_printf("r300: Texture miptree: Level %d " + "(%dx%dx%d px, pitch %d bytes)\n", + i, base->width[i], base->height[i], base->depth[i], + stride); + /* Save stride of first level to the texture. */ if (i == 0) { tex->stride = stride; } @@ -86,8 +105,6 @@ static struct pipe_texture* r300_texture_create(struct pipe_screen* screen, const struct pipe_texture* template) { - /* XXX struct r300_screen* r300screen = r300_screen(screen); */ - struct r300_texture* tex = CALLOC_STRUCT(r300_texture); if (!tex) { @@ -100,11 +117,10 @@ static struct pipe_texture* r300_setup_miptree(tex); - /* XXX */ - r300_setup_texture_state(tex, tex->tex.width[0], tex->tex.height[0], - tex->tex.width[0]); + r300_setup_texture_state(tex, template->width[0], template->height[0], + template->width[0], template->last_level); - tex->buffer = screen->buffer_create(screen, 64, + tex->buffer = screen->buffer_create(screen, 1024, PIPE_BUFFER_USAGE_PIXEL, tex->size); @@ -166,6 +182,7 @@ static struct pipe_texture* { struct r300_texture* tex; + /* XXX we should start doing mips now... */ if (base->target != PIPE_TEXTURE_2D || base->last_level != 0 || base->depth[0] != 1) { @@ -183,8 +200,9 @@ static struct pipe_texture* tex->stride = *stride; + /* XXX */ r300_setup_texture_state(tex, tex->tex.width[0], tex->tex.height[0], - tex->stride); + tex->stride, 0); pipe_buffer_reference(&tex->buffer, buffer); diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 98fb5c9a08..3b56f0307c 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -32,6 +32,52 @@ void r300_init_screen_texture_functions(struct pipe_screen* screen); +/* Note the signature of R300_EASY_TX_FORMAT(A, R, G, B, FORMAT)... */ +static INLINE uint32_t r300_translate_texformat(enum pipe_format format) +{ + switch (format) { + /* X8 */ + case PIPE_FORMAT_I8_UNORM: + return R300_EASY_TX_FORMAT(X, X, X, X, X8); + /* W8Z8Y8X8 */ + case PIPE_FORMAT_A8R8G8B8_UNORM: + return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); + case PIPE_FORMAT_R8G8B8A8_UNORM: + return R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8); + case PIPE_FORMAT_A8R8G8B8_SRGB: + return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8) | + R300_TX_FORMAT_GAMMA; + case PIPE_FORMAT_R8G8B8A8_SRGB: + return R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8) | + R300_TX_FORMAT_GAMMA; + /* DXT1 */ + case PIPE_FORMAT_DXT1_RGB: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1); + case PIPE_FORMAT_DXT1_RGBA: + return R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1); + /* DXT3 */ + case PIPE_FORMAT_DXT3_RGBA: + return R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3); + /* DXT5 */ + case PIPE_FORMAT_DXT5_RGBA: + return R300_EASY_TX_FORMAT(Y, Z, W, X, DXT5); + /* YVYU422 */ + case PIPE_FORMAT_YCBCR: + return R300_EASY_TX_FORMAT(X, Y, Z, ONE, YVYU422) | + R300_TX_FORMAT_YUV_TO_RGB; + /* W24_FP */ + case PIPE_FORMAT_Z24S8_UNORM: + return R300_EASY_TX_FORMAT(X, X, X, X, W24_FP); + default: + debug_printf("r300: Implementation error: " + "Got unsupported texture format %s in %s\n", + pf_name(format), __FUNCTION__); + assert(0); + break; + } + return 0; +} + #ifndef R300_WINSYS_H boolean r300_get_texture_buffer(struct pipe_texture* texture, diff --git a/src/gallium/drivers/r300/r300_state_tcl.c b/src/gallium/drivers/r300/r300_vs.c index d84912de48..741a1b6989 100644 --- a/src/gallium/drivers/r300/r300_state_tcl.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -20,7 +20,7 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "r300_state_tcl.h" +#include "r300_vs.h" static void r300_vs_declare(struct r300_vs_asm* assembler, struct tgsi_full_declaration* decl) @@ -71,16 +71,13 @@ static INLINE unsigned r300_vs_src_type(struct r300_vs_asm* assembler, { switch (src->File) { case TGSI_FILE_NULL: - /* Probably a zero or one swizzle */ - return R300_PVS_SRC_REG_INPUT; - break; case TGSI_FILE_INPUT: + /* Probably a zero or one swizzle */ return R300_PVS_SRC_REG_INPUT; - break; case TGSI_FILE_TEMPORARY: return R300_PVS_SRC_REG_TEMPORARY; - break; case TGSI_FILE_CONSTANT: + case TGSI_FILE_IMMEDIATE: return R300_PVS_SRC_REG_CONSTANT; default: debug_printf("r300: vs: Unimplemented src type %d\n", src->File); @@ -89,16 +86,32 @@ static INLINE unsigned r300_vs_src_type(struct r300_vs_asm* assembler, return 0; } +static INLINE unsigned r300_vs_src(struct r300_vs_asm* assembler, + struct tgsi_src_register* src) +{ + switch (src->File) { + case TGSI_FILE_NULL: + case TGSI_FILE_INPUT: + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_CONSTANT: + return src->Index; + case TGSI_FILE_IMMEDIATE: + return src->Index + assembler->imm_offset; + default: + debug_printf("r300: vs: Unimplemented src type %d\n", src->File); + break; + } + return 0; +} + static INLINE unsigned r300_vs_dst_type(struct r300_vs_asm* assembler, struct tgsi_dst_register* dst) { switch (dst->File) { case TGSI_FILE_TEMPORARY: return R300_PVS_DST_REG_TEMPORARY; - break; case TGSI_FILE_OUTPUT: return R300_PVS_DST_REG_OUT; - break; default: debug_printf("r300: vs: Unimplemented dst type %d\n", dst->File); break; @@ -112,10 +125,8 @@ static INLINE unsigned r300_vs_dst(struct r300_vs_asm* assembler, switch (dst->File) { case TGSI_FILE_TEMPORARY: return dst->Index; - break; case TGSI_FILE_OUTPUT: return assembler->tab[dst->Index]; - break; default: debug_printf("r300: vs: Unimplemented dst %d\n", dst->File); break; @@ -133,8 +144,15 @@ static uint32_t r300_vs_op(unsigned op) return R300_VE_MULTIPLY; case TGSI_OPCODE_ADD: case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SUB: case TGSI_OPCODE_SWZ: return R300_VE_ADD; + case TGSI_OPCODE_MAX: + return R300_VE_MAXIMUM; + case TGSI_OPCODE_SLT: + return R300_VE_SET_LESS_THAN; + case TGSI_OPCODE_RSQ: + return R300_PVS_DST_MATH_INST | R300_ME_RECIP_DX; case TGSI_OPCODE_MAD: return R300_PVS_DST_MACRO_INST | R300_PVS_MACRO_OP_2CLK_MADD; default: @@ -146,51 +164,78 @@ static uint32_t r300_vs_op(unsigned op) static uint32_t r300_vs_swiz(struct tgsi_full_src_register* reg) { if (reg->SrcRegister.Extended) { - return reg->SrcRegisterExtSwz.ExtSwizzleX | + return (reg->SrcRegister.Negate ? (0xf << 12) : 0) | + reg->SrcRegisterExtSwz.ExtSwizzleX | (reg->SrcRegisterExtSwz.ExtSwizzleY << 3) | (reg->SrcRegisterExtSwz.ExtSwizzleZ << 6) | (reg->SrcRegisterExtSwz.ExtSwizzleW << 9); } else { - return reg->SrcRegister.SwizzleX | + return (reg->SrcRegister.Negate ? (0xf << 12) : 0) | + reg->SrcRegister.SwizzleX | (reg->SrcRegister.SwizzleY << 3) | (reg->SrcRegister.SwizzleZ << 6) | (reg->SrcRegister.SwizzleW << 9); } } +/* XXX icky icky icky icky */ +static uint32_t r300_vs_scalar_swiz(struct tgsi_full_src_register* reg) +{ + if (reg->SrcRegister.Extended) { + return (reg->SrcRegister.Negate ? (0xf << 12) : 0) | + reg->SrcRegisterExtSwz.ExtSwizzleX | + (reg->SrcRegisterExtSwz.ExtSwizzleX << 3) | + (reg->SrcRegisterExtSwz.ExtSwizzleX << 6) | + (reg->SrcRegisterExtSwz.ExtSwizzleX << 9); + } else { + return (reg->SrcRegister.Negate ? (0xf << 12) : 0) | + reg->SrcRegister.SwizzleX | + (reg->SrcRegister.SwizzleX << 3) | + (reg->SrcRegister.SwizzleX << 6) | + (reg->SrcRegister.SwizzleX << 9); + } +} + +/* XXX scalar stupidity */ static void r300_vs_emit_inst(struct r300_vertex_shader* vs, struct r300_vs_asm* assembler, struct tgsi_full_src_register* src, struct tgsi_full_dst_register* dst, unsigned op, - unsigned count) + unsigned count, + boolean is_scalar) { int i = vs->instruction_count; vs->instructions[i].inst0 = R300_PVS_DST_OPCODE(r300_vs_op(op)) | R300_PVS_DST_REG_TYPE(r300_vs_dst_type(assembler, &dst->DstRegister)) | R300_PVS_DST_OFFSET(r300_vs_dst(assembler, &dst->DstRegister)) | - R300_PVS_DST_WE_XYZW; + R300_PVS_DST_WE(dst->DstRegister.WriteMask); switch (count) { case 3: vs->instructions[i].inst3 = R300_PVS_SRC_REG_TYPE(r300_vs_src_type(assembler, &src[2].SrcRegister)) | - R300_PVS_SRC_OFFSET(src[2].SrcRegister.Index) | + R300_PVS_SRC_OFFSET(r300_vs_src(assembler, + &src[2].SrcRegister)) | R300_PVS_SRC_SWIZZLE(r300_vs_swiz(&src[2])); /* Fall through */ case 2: vs->instructions[i].inst2 = R300_PVS_SRC_REG_TYPE(r300_vs_src_type(assembler, &src[1].SrcRegister)) | - R300_PVS_SRC_OFFSET(src[1].SrcRegister.Index) | + R300_PVS_SRC_OFFSET(r300_vs_src(assembler, + &src[1].SrcRegister)) | R300_PVS_SRC_SWIZZLE(r300_vs_swiz(&src[1])); /* Fall through */ case 1: vs->instructions[i].inst1 = R300_PVS_SRC_REG_TYPE(r300_vs_src_type(assembler, &src[0].SrcRegister)) | - R300_PVS_SRC_OFFSET(src[0].SrcRegister.Index) | - R300_PVS_SRC_SWIZZLE(r300_vs_swiz(&src[0])); + R300_PVS_SRC_OFFSET(r300_vs_src(assembler, + &src[0].SrcRegister)) | + /* XXX the icky, it burns */ + R300_PVS_SRC_SWIZZLE(is_scalar ? r300_vs_scalar_swiz(&src[0]) + : r300_vs_swiz(&src[0])); break; } vs->instruction_count++; @@ -201,11 +246,22 @@ static void r300_vs_instruction(struct r300_vertex_shader* vs, struct tgsi_full_instruction* inst) { switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_RSQ: + r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters, + &inst->FullDstRegisters[0], inst->Instruction.Opcode, + 1, TRUE); + break; + case TGSI_OPCODE_SUB: + inst->FullSrcRegisters[1].SrcRegister.Negate = + !inst->FullSrcRegisters[1].SrcRegister.Negate; + /* Fall through */ case TGSI_OPCODE_ADD: case TGSI_OPCODE_MUL: + case TGSI_OPCODE_MAX: + case TGSI_OPCODE_SLT: r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], inst->Instruction.Opcode, - 2); + 2, FALSE); break; case TGSI_OPCODE_DP3: /* Set alpha swizzle to zero for src0 and src1 */ @@ -235,19 +291,19 @@ static void r300_vs_instruction(struct r300_vertex_shader* vs, case TGSI_OPCODE_DP4: r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], inst->Instruction.Opcode, - 2); + 2, FALSE); break; case TGSI_OPCODE_MOV: case TGSI_OPCODE_SWZ: inst->FullSrcRegisters[1] = r300_constant_zero; r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], inst->Instruction.Opcode, - 2); + 2, FALSE); break; case TGSI_OPCODE_MAD: r300_vs_emit_inst(vs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], inst->Instruction.Opcode, - 3); + 3, FALSE); break; case TGSI_OPCODE_END: break; @@ -278,6 +334,8 @@ static void r300_vs_init(struct r300_vertex_shader* vs, break; } } + + vs->instruction_count = 0; } void r300_translate_vertex_shader(struct r300_context* r300, @@ -320,8 +378,7 @@ void r300_translate_vertex_shader(struct r300_context* r300, for (i = 0; i < 4; i++) { consts->constants[assembler->imm_offset + assembler->imm_count][i] = - parser.FullToken.FullImmediate.u.ImmediateFloat32[i] - .Float; + parser.FullToken.FullImmediate.u[i].Float; } assembler->imm_count++; break; @@ -337,6 +394,7 @@ void r300_translate_vertex_shader(struct r300_context* r300, assembler->tex_count + assembler->color_count); consts->count = consts->user_count + assembler->imm_count; + vs->uses_imms = assembler->imm_count; debug_printf("r300: vs: %d total constants, " "%d from user and %d from immediates\n", consts->count, consts->user_count, assembler->imm_count); @@ -344,10 +402,13 @@ void r300_translate_vertex_shader(struct r300_context* r300, debug_printf("r300: vs: tab: %d %d %d %d\n", assembler->tab[0], assembler->tab[1], assembler->tab[2], assembler->tab[3]); - tgsi_dump(vs->state.tokens); + tgsi_dump(vs->state.tokens, 0); /* XXX finish r300 vertex shader dumper */ r300_vs_dump(vs); tgsi_parse_free(&parser); FREE(assembler); + + /* And, finally... */ + vs->translated = TRUE; } diff --git a/src/gallium/drivers/r300/r300_state_tcl.h b/src/gallium/drivers/r300/r300_vs.h index e2e1357d43..165d717812 100644 --- a/src/gallium/drivers/r300/r300_state_tcl.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -20,21 +20,27 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef R300_STATE_TCL_H -#define R300_STATE_TCL_H +#ifndef R300_VS_H +#define R300_VS_H #include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" #include "r300_context.h" #include "r300_debug.h" #include "r300_reg.h" #include "r300_screen.h" +#include "r300_shader_inlines.h" /* XXX get these to r300_reg */ #define R300_PVS_DST_OPCODE(x) ((x) << 0) # define R300_VE_DOT_PRODUCT 1 # define R300_VE_MULTIPLY 2 # define R300_VE_ADD 3 +# define R300_VE_MAXIMUM 7 +# define R300_VE_SET_LESS_THAN 10 +#define R300_PVS_DST_MATH_INST (1 << 6) +# define R300_ME_RECIP_DX 6 #define R300_PVS_DST_MACRO_INST (1 << 7) # define R300_PVS_MACRO_OP_2CLK_MADD 0 #define R300_PVS_DST_REG_TYPE(x) ((x) << 8) @@ -72,15 +78,13 @@ ((R300_PVS_SRC_SELECT_FORCE_1 | (R300_PVS_SRC_SELECT_FORCE_1 << 3) | \ (R300_PVS_SRC_SELECT_FORCE_1 << 6) | \ (R300_PVS_SRC_SELECT_FORCE_1 << 9)) << 13) - -static const struct tgsi_full_src_register r300_constant_zero = { - .SrcRegister.Extended = TRUE, - .SrcRegister.File = TGSI_FILE_NULL, - .SrcRegisterExtSwz.ExtSwizzleX = TGSI_EXTSWIZZLE_ZERO, - .SrcRegisterExtSwz.ExtSwizzleY = TGSI_EXTSWIZZLE_ZERO, - .SrcRegisterExtSwz.ExtSwizzleZ = TGSI_EXTSWIZZLE_ZERO, - .SrcRegisterExtSwz.ExtSwizzleW = TGSI_EXTSWIZZLE_ZERO, -}; +#define R300_PVS_MODIFIER_X (1 << 25) +#define R300_PVS_MODIFIER_Y (1 << 26) +#define R300_PVS_MODIFIER_Z (1 << 27) +#define R300_PVS_MODIFIER_W (1 << 28) +#define R300_PVS_NEGATE_XYZW \ + (R300_PVS_MODIFIER_X | R300_PVS_MODIFIER_Y | \ + R300_PVS_MODIFIER_Z | R300_PVS_MODIFIER_W) /* Temporary struct used to hold assembly state while putting together * fragment programs. */ @@ -150,4 +154,4 @@ static struct r300_vertex_shader r300_texture_vertex_shader = { void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs); -#endif /* R300_STATE_TCL_H */ +#endif /* R300_VS_H */ diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index a833bb0399..d2893c3b9d 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -45,9 +45,6 @@ struct r300_winsys { /* PCI ID */ uint32_t pci_id; - /* GB pipe count */ - uint32_t gb_pipes; - /* GART size. */ uint32_t gart_size; @@ -55,10 +52,10 @@ struct r300_winsys { uint32_t vram_size; /* Add a pipe_buffer to the list of buffer objects to validate. */ - void (*add_buffer)(struct r300_winsys* winsys, - struct pipe_buffer* pbuffer, - uint32_t rd, - uint32_t wd); + boolean (*add_buffer)(struct r300_winsys* winsys, + struct pipe_buffer* pbuffer, + uint32_t rd, + uint32_t wd); /* Revalidate all currently setup pipe_buffers. * Returns TRUE if a flush is required. */ diff --git a/src/gallium/drivers/r300/r3xx_fs.c b/src/gallium/drivers/r300/r3xx_fs.c new file mode 100644 index 0000000000..6e05d76977 --- /dev/null +++ b/src/gallium/drivers/r300/r3xx_fs.c @@ -0,0 +1,96 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Joakim Sindholt <opensource@zhasha.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r3xx_fs.h" + +static INLINE uint32_t r3xx_rgb_op(unsigned op) +{ + switch (op) { + case TGSI_OPCODE_MOV: + return R300_ALU_OUTC_CMP; + default: + return 0; + } +} + +static INLINE uint32_t r3xx_alpha_op(unsigned op) +{ + switch (op) { + case TGSI_OPCODE_MOV: + return R300_ALU_OUTA_CMP; + default: + return 0; + } +} + +static INLINE void r3xx_emit_maths(struct r3xx_fragment_shader* fs, + struct r300_fs_asm* assembler, + struct tgsi_full_src_register* src, + struct tgsi_full_dst_register* dst, + unsigned op, + unsigned count) +{ + int i = fs->alu_instruction_count; + + fs->instructions[i].alu_rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) | + R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) | + R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) | + r3xx_rgb_op(op); + fs->instructions[i].alu_rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) | + R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ; + fs->instructions[i].alu_alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) | + R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) | + R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) | + r3xx_alpha_op(op); + fs->instructions[i].alu_alpha_addr = R300_ALPHA_ADDR0(0) | + R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT; + + fs->alu_instruction_count++; +} + +void r3xx_fs_finalize(struct r300_fragment_shader* fs, + struct r300_fs_asm* assembler) +{ + fs->stack_size = assembler->temp_count + assembler->temp_offset + 1; +} + +void r3xx_fs_instruction(struct r3xx_fragment_shader* fs, + struct r300_fs_asm* assembler, + struct tgsi_full_instruction* inst) +{ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_MOV: + /* src0 -> src1 and src2 forced to zero */ + inst->FullSrcRegisters[1] = inst->FullSrcRegisters[0]; + inst->FullSrcRegisters[2] = r300_constant_zero; + r3xx_emit_maths(fs, assembler, inst->FullSrcRegisters, + &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); + break; + case TGSI_OPCODE_END: + break; + default: + debug_printf("r300: fs: Bad opcode %d\n", + inst->Instruction.Opcode); + break; + } +} diff --git a/src/gallium/drivers/r300/r3xx_fs.h b/src/gallium/drivers/r300/r3xx_fs.h new file mode 100644 index 0000000000..3da39ec252 --- /dev/null +++ b/src/gallium/drivers/r300/r3xx_fs.h @@ -0,0 +1,76 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Joakim Sindholt <opensource@zhasha.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R3XX_FS_H +#define R3XX_FS_H + +#include "r300_fs_inlines.h" + +static struct r3xx_fragment_shader r3xx_passthrough_fragment_shader = { + .alu_instruction_count = 1, + .tex_instruction_count = 0, + .indirections = 0, + .shader.stack_size = 1, + + .instructions[0].alu_rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) | + R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) | + R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) | + R300_ALU_OUTC_CMP, + .instructions[0].alu_rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) | + R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ, + .instructions[0].alu_alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) | + R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) | + R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) | + R300_ALU_OUTA_CMP, + .instructions[0].alu_alpha_addr = R300_ALPHA_ADDR0(0) | + R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT, +}; + +static struct r3xx_fragment_shader r3xx_texture_fragment_shader = { + .alu_instruction_count = 1, + .tex_instruction_count = 0, + .indirections = 0, + .shader.stack_size = 1, + + .instructions[0].alu_rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) | + R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) | + R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) | + R300_ALU_OUTC_CMP, + .instructions[0].alu_rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) | + R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ, + .instructions[0].alu_alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) | + R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) | + R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) | + R300_ALU_OUTA_CMP, + .instructions[0].alu_alpha_addr = R300_ALPHA_ADDR0(0) | + R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT, +}; + +void r3xx_fs_finalize(struct r300_fragment_shader* fs, + struct r300_fs_asm* assembler); + +void r3xx_fs_instruction(struct r3xx_fragment_shader* fs, + struct r300_fs_asm* assembler, + struct tgsi_full_instruction* inst); + +#endif /* R3XX_FS_H */ diff --git a/src/gallium/drivers/r300/r300_state_shader.c b/src/gallium/drivers/r300/r5xx_fs.c index 1b02239ee7..99d826278c 100644 --- a/src/gallium/drivers/r300/r300_state_shader.c +++ b/src/gallium/drivers/r300/r5xx_fs.c @@ -1,5 +1,6 @@ /* * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Joakim Sindholt <opensource@zhasha.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -20,107 +21,9 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "r300_state_shader.h" +#include "r5xx_fs.h" -static void r300_copy_passthrough_shader(struct r300_fragment_shader* fs) -{ - struct r300_fragment_shader* pt = &r300_passthrough_fragment_shader; - fs->shader.stack_size = pt->shader.stack_size; - fs->alu_instruction_count = pt->alu_instruction_count; - fs->tex_instruction_count = pt->tex_instruction_count; - fs->indirections = pt->indirections; - fs->instructions[0] = pt->instructions[0]; -} - -static void r500_copy_passthrough_shader(struct r500_fragment_shader* fs) -{ - struct r500_fragment_shader* pt = &r500_passthrough_fragment_shader; - fs->shader.stack_size = pt->shader.stack_size; - fs->instruction_count = pt->instruction_count; - fs->instructions[0] = pt->instructions[0]; -} - -static void r300_fs_declare(struct r300_fs_asm* assembler, - struct tgsi_full_declaration* decl) -{ - switch (decl->Declaration.File) { - case TGSI_FILE_INPUT: - switch (decl->Semantic.SemanticName) { - case TGSI_SEMANTIC_COLOR: - assembler->color_count++; - break; - case TGSI_SEMANTIC_GENERIC: - assembler->tex_count++; - break; - default: - debug_printf("r300: fs: Bad semantic declaration %d\n", - decl->Semantic.SemanticName); - break; - } - break; - case TGSI_FILE_OUTPUT: - case TGSI_FILE_CONSTANT: - break; - case TGSI_FILE_TEMPORARY: - assembler->temp_count++; - break; - default: - debug_printf("r300: fs: Bad file %d\n", decl->Declaration.File); - break; - } - - assembler->temp_offset = assembler->color_count + assembler->tex_count; -} - -static INLINE unsigned r300_fs_src(struct r300_fs_asm* assembler, - struct tgsi_src_register* src) -{ - switch (src->File) { - case TGSI_FILE_NULL: - return 0; - case TGSI_FILE_INPUT: - /* XXX may be wrong */ - return src->Index; - break; - case TGSI_FILE_TEMPORARY: - return src->Index + assembler->temp_offset; - break; - case TGSI_FILE_IMMEDIATE: - return (src->Index + assembler->imm_offset) | (1 << 8); - break; - case TGSI_FILE_CONSTANT: - /* XXX magic */ - return src->Index | (1 << 8); - break; - default: - debug_printf("r300: fs: Unimplemented src %d\n", src->File); - break; - } - return 0; -} - -static INLINE unsigned r300_fs_dst(struct r300_fs_asm* assembler, - struct tgsi_dst_register* dst) -{ - switch (dst->File) { - case TGSI_FILE_NULL: - /* This happens during KIL instructions. */ - return 0; - break; - case TGSI_FILE_OUTPUT: - return 0; - break; - case TGSI_FILE_TEMPORARY: - return dst->Index + assembler->temp_offset; - break; - default: - debug_printf("r300: fs: Unimplemented dst %d\n", dst->File); - break; - } - return 0; -} - -static INLINE unsigned r500_fix_swiz(unsigned s) +static INLINE unsigned r5xx_fix_swiz(unsigned s) { /* For historical reasons, the swizzle values x, y, z, w, and 0 are * equivalent to the actual machine code, but 1 is not. Thus, we just @@ -132,13 +35,13 @@ static INLINE unsigned r500_fix_swiz(unsigned s) } } -static uint32_t r500_rgba_swiz(struct tgsi_full_src_register* reg) +static uint32_t r5xx_rgba_swiz(struct tgsi_full_src_register* reg) { if (reg->SrcRegister.Extended) { - return r500_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleX) | - (r500_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleY) << 3) | - (r500_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleZ) << 6) | - (r500_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleW) << 9); + return r5xx_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleX) | + (r5xx_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleY) << 3) | + (r5xx_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleZ) << 6) | + (r5xx_fix_swiz(reg->SrcRegisterExtSwz.ExtSwizzleW) << 9); } else { return reg->SrcRegister.SwizzleX | (reg->SrcRegister.SwizzleY << 3) | @@ -147,7 +50,7 @@ static uint32_t r500_rgba_swiz(struct tgsi_full_src_register* reg) } } -static uint32_t r500_strq_swiz(struct tgsi_full_src_register* reg) +static uint32_t r5xx_strq_swiz(struct tgsi_full_src_register* reg) { return reg->SrcRegister.SwizzleX | (reg->SrcRegister.SwizzleY << 2) | @@ -155,50 +58,36 @@ static uint32_t r500_strq_swiz(struct tgsi_full_src_register* reg) (reg->SrcRegister.SwizzleW << 6); } -static INLINE uint32_t r500_rgb_swiz(struct tgsi_full_src_register* reg) +static INLINE uint32_t r5xx_rgb_swiz(struct tgsi_full_src_register* reg) { /* Only the first 9 bits... */ - return (r500_rgba_swiz(reg) & 0x1ff) | + return (r5xx_rgba_swiz(reg) & 0x1ff) | (reg->SrcRegister.Negate ? (1 << 9) : 0) | (reg->SrcRegisterExtMod.Absolute ? (1 << 10) : 0); } -static INLINE uint32_t r500_alpha_swiz(struct tgsi_full_src_register* reg) +static INLINE uint32_t r5xx_alpha_swiz(struct tgsi_full_src_register* reg) { /* Only the last 3 bits... */ - return (r500_rgba_swiz(reg) >> 9) | + return (r5xx_rgba_swiz(reg) >> 9) | (reg->SrcRegister.Negate ? (1 << 9) : 0) | (reg->SrcRegisterExtMod.Absolute ? (1 << 10) : 0); } -static INLINE uint32_t r300_rgb_op(unsigned op) -{ - switch (op) { - case TGSI_OPCODE_MOV: - return R300_ALU_OUTC_CMP; - default: - return 0; - } -} - -static INLINE uint32_t r300_alpha_op(unsigned op) -{ - switch (op) { - case TGSI_OPCODE_MOV: - return R300_ALU_OUTA_CMP; - default: - return 0; - } -} - -static INLINE uint32_t r500_rgba_op(unsigned op) +static INLINE uint32_t r5xx_rgba_op(unsigned op) { switch (op) { + case TGSI_OPCODE_COS: case TGSI_OPCODE_EX2: case TGSI_OPCODE_LG2: case TGSI_OPCODE_RCP: case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_SIN: return R500_ALU_RGBA_OP_SOP; + case TGSI_OPCODE_DDX: + return R500_ALU_RGBA_OP_MDH; + case TGSI_OPCODE_DDY: + return R500_ALU_RGBA_OP_MDV; case TGSI_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; case TGSI_OPCODE_DP3: @@ -221,9 +110,11 @@ static INLINE uint32_t r500_rgba_op(unsigned op) } } -static INLINE uint32_t r500_alpha_op(unsigned op) +static INLINE uint32_t r5xx_alpha_op(unsigned op) { switch (op) { + case TGSI_OPCODE_COS: + return R500_ALPHA_OP_COS; case TGSI_OPCODE_EX2: return R500_ALPHA_OP_EX2; case TGSI_OPCODE_LG2: @@ -234,6 +125,12 @@ static INLINE uint32_t r500_alpha_op(unsigned op) return R500_ALPHA_OP_RSQ; case TGSI_OPCODE_FRC: return R500_ALPHA_OP_FRC; + case TGSI_OPCODE_SIN: + return R500_ALPHA_OP_SIN; + case TGSI_OPCODE_DDX: + return R500_ALPHA_OP_MDH; + case TGSI_OPCODE_DDY: + return R500_ALPHA_OP_MDV; case TGSI_OPCODE_DP3: case TGSI_OPCODE_DP4: case TGSI_OPCODE_DPH: @@ -253,7 +150,7 @@ static INLINE uint32_t r500_alpha_op(unsigned op) } } -static INLINE uint32_t r500_tex_op(unsigned op) +static INLINE uint32_t r5xx_tex_op(unsigned op) { switch (op) { case TGSI_OPCODE_KIL: @@ -269,64 +166,35 @@ static INLINE uint32_t r500_tex_op(unsigned op) } } -static INLINE void r300_emit_maths(struct r300_fragment_shader* fs, +/* Setup an ALU operation. */ +static INLINE void r5xx_emit_maths(struct r5xx_fragment_shader* fs, struct r300_fs_asm* assembler, struct tgsi_full_src_register* src, struct tgsi_full_dst_register* dst, unsigned op, unsigned count) { - int i = fs->alu_instruction_count; - - fs->instructions[i].alu_rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) | - R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) | - R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) | - r300_rgb_op(op); - fs->instructions[i].alu_rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) | - R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ; - fs->instructions[i].alu_alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) | - R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) | - R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) | - r300_alpha_op(op); - fs->instructions[i].alu_alpha_addr = R300_ALPHA_ADDR0(0) | - R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT; - - fs->alu_instruction_count++; -} - -/* Setup an ALU operation. */ -static INLINE void r500_emit_alu(struct r500_fragment_shader* fs, - struct r300_fs_asm* assembler, - struct tgsi_full_dst_register* dst) -{ int i = fs->instruction_count; if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { - fs->instructions[i].inst0 = R500_INST_TYPE_OUT | - R500_ALU_OMASK(dst->DstRegister.WriteMask); + fs->instructions[i].inst0 = R500_INST_TYPE_OUT; + if (r300_fs_is_depr(assembler, dst)) { + fs->instructions[i].inst4 = R500_W_OMASK; + } else { + fs->instructions[i].inst0 |= + R500_ALU_OMASK(dst->DstRegister.WriteMask); + } } else { fs->instructions[i].inst0 = R500_INST_TYPE_ALU | - R500_ALU_WMASK(dst->DstRegister.WriteMask); + R500_ALU_WMASK(dst->DstRegister.WriteMask); } fs->instructions[i].inst0 |= R500_INST_TEX_SEM_WAIT; - fs->instructions[i].inst4 = + fs->instructions[i].inst4 |= R500_ALPHA_ADDRD(r300_fs_dst(assembler, &dst->DstRegister)); fs->instructions[i].inst5 = R500_ALU_RGBA_ADDRD(r300_fs_dst(assembler, &dst->DstRegister)); -} - -static INLINE void r500_emit_maths(struct r500_fragment_shader* fs, - struct r300_fs_asm* assembler, - struct tgsi_full_src_register* src, - struct tgsi_full_dst_register* dst, - unsigned op, - unsigned count) -{ - int i = fs->instruction_count; - - r500_emit_alu(fs, assembler, dst); switch (count) { case 3: @@ -336,9 +204,9 @@ static INLINE void r500_emit_maths(struct r500_fragment_shader* fs, R500_ALPHA_ADDR2(r300_fs_src(assembler, &src[2].SrcRegister)); fs->instructions[i].inst5 |= R500_ALU_RGBA_SEL_C_SRC2 | - R500_SWIZ_RGBA_C(r500_rgb_swiz(&src[2])) | + R500_SWIZ_RGBA_C(r5xx_rgb_swiz(&src[2])) | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 | - R500_SWIZ_ALPHA_C(r500_alpha_swiz(&src[2])); + R500_SWIZ_ALPHA_C(r5xx_alpha_swiz(&src[2])); case 2: fs->instructions[i].inst1 |= R500_RGB_ADDR1(r300_fs_src(assembler, &src[1].SrcRegister)); @@ -346,10 +214,10 @@ static INLINE void r500_emit_maths(struct r500_fragment_shader* fs, R500_ALPHA_ADDR1(r300_fs_src(assembler, &src[1].SrcRegister)); fs->instructions[i].inst3 = R500_ALU_RGB_SEL_B_SRC1 | - R500_SWIZ_RGB_B(r500_rgb_swiz(&src[1])); + R500_SWIZ_RGB_B(r5xx_rgb_swiz(&src[1])); fs->instructions[i].inst4 |= - R500_SWIZ_ALPHA_B(r500_alpha_swiz(&src[1])) | - R500_ALPHA_SEL_B_SRC1; + R500_ALPHA_SEL_B_SRC1 | + R500_SWIZ_ALPHA_B(r5xx_alpha_swiz(&src[1])); case 1: case 0: default: @@ -359,20 +227,20 @@ static INLINE void r500_emit_maths(struct r500_fragment_shader* fs, R500_ALPHA_ADDR0(r300_fs_src(assembler, &src[0].SrcRegister)); fs->instructions[i].inst3 |= R500_ALU_RGB_SEL_A_SRC0 | - R500_SWIZ_RGB_A(r500_rgb_swiz(&src[0])); + R500_SWIZ_RGB_A(r5xx_rgb_swiz(&src[0])); fs->instructions[i].inst4 |= - R500_SWIZ_ALPHA_A(r500_alpha_swiz(&src[0])) | - R500_ALPHA_SEL_A_SRC0; + R500_ALPHA_SEL_A_SRC0 | + R500_SWIZ_ALPHA_A(r5xx_alpha_swiz(&src[0])); break; } - fs->instructions[i].inst4 |= r500_alpha_op(op); - fs->instructions[i].inst5 |= r500_rgba_op(op); + fs->instructions[i].inst4 |= r5xx_alpha_op(op); + fs->instructions[i].inst5 |= r5xx_rgba_op(op); fs->instruction_count++; } -static INLINE void r500_emit_tex(struct r500_fragment_shader* fs, +static INLINE void r5xx_emit_tex(struct r5xx_fragment_shader* fs, struct r300_fs_asm* assembler, struct tgsi_full_src_register* src, struct tgsi_full_dst_register* dst, @@ -385,10 +253,10 @@ static INLINE void r500_emit_tex(struct r500_fragment_shader* fs, R500_INST_TEX_SEM_WAIT; fs->instructions[i].inst1 = R500_TEX_ID(0) | R500_TEX_SEM_ACQUIRE | //R500_TEX_IGNORE_UNCOVERED | - r500_tex_op(op); + r5xx_tex_op(op); fs->instructions[i].inst2 = R500_TEX_SRC_ADDR(r300_fs_src(assembler, &src->SrcRegister)) | - R500_SWIZ_TEX_STRQ(r500_strq_swiz(src)) | + R500_SWIZ_TEX_STRQ(r5xx_strq_swiz(src)) | R500_TEX_DST_ADDR(r300_fs_dst(assembler, &dst->DstRegister)) | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; @@ -405,42 +273,32 @@ static INLINE void r500_emit_tex(struct r500_fragment_shader* fs, src[0].SrcRegister.File = TGSI_FILE_TEMPORARY; src[1] = src[0]; - src[2] = r500_constant_zero; - r500_emit_maths(fs, assembler, src, dst, TGSI_OPCODE_MOV, 3); + src[2] = r300_constant_zero; + r5xx_emit_maths(fs, assembler, src, dst, TGSI_OPCODE_MOV, 3); } else { fs->instruction_count++; } } -static void r300_fs_instruction(struct r300_fragment_shader* fs, - struct r300_fs_asm* assembler, - struct tgsi_full_instruction* inst) +void r5xx_fs_finalize(struct r5xx_fragment_shader* fs, + struct r300_fs_asm* assembler) { - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_MOV: - /* src0 -> src1 and src2 forced to zero */ - inst->FullSrcRegisters[1] = inst->FullSrcRegisters[0]; - inst->FullSrcRegisters[2] = r500_constant_zero; - r300_emit_maths(fs, assembler, inst->FullSrcRegisters, - &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); - break; - case TGSI_OPCODE_END: - break; - default: - debug_printf("r300: fs: Bad opcode %d\n", - inst->Instruction.Opcode); - break; - } + /* XXX should this just go with OPCODE_END? */ + fs->instructions[fs->instruction_count - 1].inst0 |= + R500_INST_LAST; } -static void r500_fs_instruction(struct r500_fragment_shader* fs, - struct r300_fs_asm* assembler, - struct tgsi_full_instruction* inst) +void r5xx_fs_instruction(struct r5xx_fragment_shader* fs, + struct r300_fs_asm* assembler, + struct tgsi_full_instruction* inst) { /* Switch between opcodes. When possible, prefer using the official * AMD/ATI names for opcodes, please, as it facilitates using the * documentation. */ switch (inst->Instruction.Opcode) { + /* XXX trig needs extra prep */ + case TGSI_OPCODE_COS: + case TGSI_OPCODE_SIN: /* The simple scalar ops. */ case TGSI_OPCODE_EX2: case TGSI_OPCODE_LG2: @@ -452,8 +310,10 @@ static void r500_fs_instruction(struct r500_fragment_shader* fs, inst->FullSrcRegisters[0].SrcRegister.SwizzleW = inst->FullSrcRegisters[0].SrcRegister.SwizzleX; /* Fall through */ + case TGSI_OPCODE_DDX: + case TGSI_OPCODE_DDY: case TGSI_OPCODE_FRC: - r500_emit_maths(fs, assembler, inst->FullSrcRegisters, + r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], inst->Instruction.Opcode, 1); break; @@ -474,7 +334,7 @@ static void r500_fs_instruction(struct r500_fragment_shader* fs, /* Fall through */ case TGSI_OPCODE_DP3: case TGSI_OPCODE_DP4: - r500_emit_maths(fs, assembler, inst->FullSrcRegisters, + r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], inst->Instruction.Opcode, 2); break; @@ -484,7 +344,7 @@ static void r500_fs_instruction(struct r500_fragment_shader* fs, inst->FullSrcRegisters[3] = inst->FullSrcRegisters[2]; inst->FullSrcRegisters[2] = inst->FullSrcRegisters[0]; inst->FullSrcRegisters[0] = inst->FullSrcRegisters[3]; - r500_emit_maths(fs, assembler, inst->FullSrcRegisters, + r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); break; @@ -498,18 +358,18 @@ static void r500_fs_instruction(struct r500_fragment_shader* fs, /* Force src0 to one, move all registers over */ inst->FullSrcRegisters[2] = inst->FullSrcRegisters[1]; inst->FullSrcRegisters[1] = inst->FullSrcRegisters[0]; - inst->FullSrcRegisters[0] = r500_constant_one; - r500_emit_maths(fs, assembler, inst->FullSrcRegisters, + inst->FullSrcRegisters[0] = r300_constant_one; + r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); break; case TGSI_OPCODE_MUL: /* Force our src2 to zero */ - inst->FullSrcRegisters[2] = r500_constant_zero; - r500_emit_maths(fs, assembler, inst->FullSrcRegisters, + inst->FullSrcRegisters[2] = r300_constant_zero; + r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); break; case TGSI_OPCODE_MAD: - r500_emit_maths(fs, assembler, inst->FullSrcRegisters, + r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); break; @@ -522,17 +382,71 @@ static void r500_fs_instruction(struct r500_fragment_shader* fs, case TGSI_OPCODE_SWZ: /* src0 -> src1 and src2 forced to zero */ inst->FullSrcRegisters[1] = inst->FullSrcRegisters[0]; - inst->FullSrcRegisters[2] = r500_constant_zero; - r500_emit_maths(fs, assembler, inst->FullSrcRegisters, + inst->FullSrcRegisters[2] = r300_constant_zero; + r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, &inst->FullDstRegisters[0], inst->Instruction.Opcode, 3); break; + /* The compound and hybrid insts. */ + case TGSI_OPCODE_LRP: + /* LRP DST A, B, C -> MAD TMP -A, C, C; MAD DST A, B, TMP */ + inst->FullSrcRegisters[3] = inst->FullSrcRegisters[1]; + inst->FullSrcRegisters[1] = inst->FullSrcRegisters[2]; + inst->FullSrcRegisters[0].SrcRegister.Negate = + !(inst->FullSrcRegisters[0].SrcRegister.Negate); + inst->FullDstRegisters[1] = inst->FullDstRegisters[0]; + inst->FullDstRegisters[0].DstRegister.Index = + assembler->temp_count; + inst->FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, + &inst->FullDstRegisters[0], TGSI_OPCODE_MAD, 3); + inst->FullSrcRegisters[2].SrcRegister.Index = + assembler->temp_count; + inst->FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst->FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst->FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst->FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; + inst->FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + inst->FullSrcRegisters[1] = inst->FullSrcRegisters[3]; + inst->FullSrcRegisters[0].SrcRegister.Negate = + !(inst->FullSrcRegisters[0].SrcRegister.Negate); + inst->FullDstRegisters[0] = inst->FullDstRegisters[1]; + r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, + &inst->FullDstRegisters[0], TGSI_OPCODE_MAD, 3); + break; + case TGSI_OPCODE_POW: + /* POW DST A, B -> LG2 TMP A; MUL TMP TMP, B; EX2 DST TMP */ + inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleW = + inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtSwizzleX; + inst->FullSrcRegisters[0].SrcRegister.SwizzleW = + inst->FullSrcRegisters[0].SrcRegister.SwizzleX; + inst->FullDstRegisters[1] = inst->FullDstRegisters[0]; + inst->FullDstRegisters[0].DstRegister.Index = + assembler->temp_count; + inst->FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, + &inst->FullDstRegisters[0], TGSI_OPCODE_LG2, 1); + inst->FullSrcRegisters[0].SrcRegister.Index = + assembler->temp_count; + inst->FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst->FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst->FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst->FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; + inst->FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + inst->FullSrcRegisters[2] = r300_constant_zero; + r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, + &inst->FullDstRegisters[0], TGSI_OPCODE_MUL, 3); + inst->FullDstRegisters[0] = inst->FullDstRegisters[1]; + r5xx_emit_maths(fs, assembler, inst->FullSrcRegisters, + &inst->FullDstRegisters[0], TGSI_OPCODE_EX2, 1); + break; + /* The texture instruction set. */ case TGSI_OPCODE_KIL: case TGSI_OPCODE_TEX: case TGSI_OPCODE_TXB: case TGSI_OPCODE_TXP: - r500_emit_tex(fs, assembler, &inst->FullSrcRegisters[0], + r5xx_emit_tex(fs, assembler, &inst->FullSrcRegisters[0], &inst->FullDstRegisters[0], inst->Instruction.Opcode); break; @@ -551,99 +465,3 @@ static void r500_fs_instruction(struct r500_fragment_shader* fs, R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP; } } - -static void r300_fs_finalize(struct r3xx_fragment_shader* fs, - struct r300_fs_asm* assembler) -{ - fs->stack_size = assembler->temp_count + assembler->temp_offset; -} - -static void r500_fs_finalize(struct r500_fragment_shader* fs, - struct r300_fs_asm* assembler) -{ - /* XXX should this just go with OPCODE_END? */ - fs->instructions[fs->instruction_count - 1].inst0 |= - R500_INST_LAST; -} - -void r300_translate_fragment_shader(struct r300_context* r300, - struct r3xx_fragment_shader* fs) -{ - struct tgsi_parse_context parser; - int i; - boolean is_r500 = r300_screen(r300->context.screen)->caps->is_r500; - struct r300_constant_buffer* consts = - &r300->shader_constants[PIPE_SHADER_FRAGMENT]; - - struct r300_fs_asm* assembler = CALLOC_STRUCT(r300_fs_asm); - if (assembler == NULL) { - return; - } - /* Setup starting offset for immediates. */ - assembler->imm_offset = consts->user_count; - - /* Make sure we start at the beginning of the shader. */ - if (is_r500) { - ((struct r500_fragment_shader*)fs)->instruction_count = 0; - } - - tgsi_parse_init(&parser, fs->state.tokens); - - while (!tgsi_parse_end_of_tokens(&parser)) { - tgsi_parse_token(&parser); - - /* This is seriously the lamest way to create fragment programs ever. - * I blame TGSI. */ - switch (parser.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - /* Allocated registers sitting at the beginning - * of the program. */ - r300_fs_declare(assembler, &parser.FullToken.FullDeclaration); - break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - debug_printf("r300: Emitting immediate to constant buffer, " - "position %d\n", - assembler->imm_offset + assembler->imm_count); - /* I am not amused by the length of these. */ - for (i = 0; i < 4; i++) { - consts->constants[assembler->imm_offset + - assembler->imm_count][i] = - parser.FullToken.FullImmediate.u.ImmediateFloat32[i] - .Float; - } - assembler->imm_count++; - break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - if (is_r500) { - r500_fs_instruction((struct r500_fragment_shader*)fs, - assembler, &parser.FullToken.FullInstruction); - } else { - r300_fs_instruction((struct r300_fragment_shader*)fs, - assembler, &parser.FullToken.FullInstruction); - } - break; - } - } - - debug_printf("r300: fs: %d texs and %d colors, first free reg is %d\n", - assembler->tex_count, assembler->color_count, - assembler->tex_count + assembler->color_count); - - consts->count = consts->user_count + assembler->imm_count; - debug_printf("r300: fs: %d total constants, " - "%d from user and %d from immediates\n", consts->count, - consts->user_count, assembler->imm_count); - r300_fs_finalize(fs, assembler); - if (is_r500) { - r500_fs_finalize((struct r500_fragment_shader*)fs, assembler); - } - - tgsi_dump(fs->state.tokens); - /* XXX finish r300 dumper too */ - if (is_r500) { - r500_fs_dump((struct r500_fragment_shader*)fs); - } - - tgsi_parse_free(&parser); - FREE(assembler); -} diff --git a/src/gallium/drivers/r300/r300_state_shader.h b/src/gallium/drivers/r300/r5xx_fs.h index 185fdd90f0..629e587be4 100644 --- a/src/gallium/drivers/r300/r300_state_shader.h +++ b/src/gallium/drivers/r300/r5xx_fs.h @@ -1,5 +1,6 @@ /* * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Joakim Sindholt <opensource@zhasha.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -20,15 +21,10 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef R300_STATE_SHADER_H -#define R300_STATE_SHADER_H +#ifndef R5XX_FS_H +#define R5XX_FS_H -#include "tgsi/tgsi_parse.h" - -#include "r300_context.h" -#include "r300_debug.h" -#include "r300_reg.h" -#include "r300_screen.h" +#include "r300_fs_inlines.h" /* XXX this all should find its way back to r300_reg */ /* Swizzle tools */ @@ -57,82 +53,9 @@ #define R500_TEX_WMASK(x) ((x) << 11) #define R500_ALU_WMASK(x) ((x) << 11) #define R500_ALU_OMASK(x) ((x) << 15) +#define R500_W_OMASK (1 << 31) -/* TGSI constants. TGSI is like XML: If it can't solve your problems, you're - * not using enough of it. */ -static const struct tgsi_full_src_register r500_constant_zero = { - .SrcRegister.Extended = TRUE, - .SrcRegister.File = TGSI_FILE_NULL, - .SrcRegisterExtSwz.ExtSwizzleX = TGSI_EXTSWIZZLE_ZERO, - .SrcRegisterExtSwz.ExtSwizzleY = TGSI_EXTSWIZZLE_ZERO, - .SrcRegisterExtSwz.ExtSwizzleZ = TGSI_EXTSWIZZLE_ZERO, - .SrcRegisterExtSwz.ExtSwizzleW = TGSI_EXTSWIZZLE_ZERO, -}; - -static const struct tgsi_full_src_register r500_constant_one = { - .SrcRegister.Extended = TRUE, - .SrcRegister.File = TGSI_FILE_NULL, - .SrcRegisterExtSwz.ExtSwizzleX = TGSI_EXTSWIZZLE_ONE, - .SrcRegisterExtSwz.ExtSwizzleY = TGSI_EXTSWIZZLE_ONE, - .SrcRegisterExtSwz.ExtSwizzleZ = TGSI_EXTSWIZZLE_ONE, - .SrcRegisterExtSwz.ExtSwizzleW = TGSI_EXTSWIZZLE_ONE, -}; - -/* Temporary struct used to hold assembly state while putting together - * fragment programs. */ -struct r300_fs_asm { - /* Pipe context. */ - struct r300_context* r300; - /* Number of colors. */ - unsigned color_count; - /* Number of texcoords. */ - unsigned tex_count; - /* Offset for temporary registers. Inputs and temporaries have no - * distinguishing markings, so inputs start at 0 and the first usable - * temporary register is after all inputs. */ - unsigned temp_offset; - /* Number of requested temporary registers. */ - unsigned temp_count; - /* Offset for immediate constants. Neither R300 nor R500 can do four - * inline constants per source, so instead we copy immediates into the - * constant buffer. */ - unsigned imm_offset; - /* Number of immediate constants. */ - unsigned imm_count; -}; - -void r300_translate_fragment_shader(struct r300_context* r300, - struct r3xx_fragment_shader* fs); - -static struct r300_fragment_shader r300_passthrough_fragment_shader = { - /* XXX This is the emission code. TODO: decode - OUT_CS_REG(R300_US_CONFIG, 0); - OUT_CS_REG(R300_US_CODE_OFFSET, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_0, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_1, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_2, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_3, 0x400000); -*/ - .alu_instruction_count = 1, - .tex_instruction_count = 0, - .indirections = 0, - .shader.stack_size = 1, - - .instructions[0].alu_rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) | - R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) | - R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) | - R300_ALU_OUTC_CMP, - .instructions[0].alu_rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) | - R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ, - .instructions[0].alu_alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) | - R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) | - R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) | - R300_ALU_OUTA_CMP, - .instructions[0].alu_alpha_addr = R300_ALPHA_ADDR0(0) | - R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT, -}; - -static struct r500_fragment_shader r500_passthrough_fragment_shader = { +static struct r5xx_fragment_shader r5xx_passthrough_fragment_shader = { .shader.stack_size = 0, .instruction_count = 1, .instructions[0].inst0 = R500_INST_TYPE_OUT | @@ -158,40 +81,12 @@ static struct r500_fragment_shader r500_passthrough_fragment_shader = { R500_ALU_RGBA_A_SWIZ_0, }; -static struct r300_fragment_shader r300_texture_fragment_shader = { - /* XXX This is the emission code. TODO: decode - OUT_CS_REG(R300_US_CONFIG, 0); - OUT_CS_REG(R300_US_CODE_OFFSET, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_0, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_1, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_2, 0x0); - OUT_CS_REG(R300_US_CODE_ADDR_3, 0x400000); -*/ - .alu_instruction_count = 1, - .tex_instruction_count = 0, - .indirections = 0, - .shader.stack_size = 1, - - .instructions[0].alu_rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) | - R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) | - R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) | - R300_ALU_OUTC_CMP, - .instructions[0].alu_rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) | - R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ, - .instructions[0].alu_alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) | - R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) | - R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) | - R300_ALU_OUTA_CMP, - .instructions[0].alu_alpha_addr = R300_ALPHA_ADDR0(0) | - R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT, -}; - -static struct r500_fragment_shader r500_texture_fragment_shader = { +static struct r5xx_fragment_shader r5xx_texture_fragment_shader = { .shader.stack_size = 1, .instruction_count = 2, .instructions[0].inst0 = R500_INST_TYPE_TEX | R500_INST_TEX_SEM_WAIT | - R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK | + R500_INST_RGB_WMASK_RGB | R500_INST_ALPHA_WMASK | R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP, .instructions[0].inst1 = R500_TEX_ID(0) | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED, @@ -227,4 +122,11 @@ static struct r500_fragment_shader r500_texture_fragment_shader = { R500_ALU_RGBA_A_SWIZ_0, }; -#endif /* R300_STATE_SHADER_H */ +void r5xx_fs_finalize(struct r5xx_fragment_shader* fs, + struct r300_fs_asm* assembler); + +void r5xx_fs_instruction(struct r5xx_fragment_shader* fs, + struct r300_fs_asm* assembler, + struct tgsi_full_instruction* inst); + +#endif /* R5XX_FS_H */ |