summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/r600
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/r600')
-rw-r--r--src/gallium/drivers/r600/r600_asm.c138
-rw-r--r--src/gallium/drivers/r600/r600_asm.h39
-rw-r--r--src/gallium/drivers/r600/r600_blit.c484
-rw-r--r--src/gallium/drivers/r600/r600_context.c182
-rw-r--r--src/gallium/drivers/r600/r600_context.h79
-rw-r--r--src/gallium/drivers/r600/r600_draw.c105
-rw-r--r--src/gallium/drivers/r600/r600_query.c200
-rw-r--r--src/gallium/drivers/r600/r600_resource.h12
-rw-r--r--src/gallium/drivers/r600/r600_screen.c25
-rw-r--r--src/gallium/drivers/r600/r600_screen.h18
-rw-r--r--src/gallium/drivers/r600/r600_shader.c1602
-rw-r--r--src/gallium/drivers/r600/r600_shader.h1
-rw-r--r--src/gallium/drivers/r600/r600_sq.h27
-rw-r--r--src/gallium/drivers/r600/r600_state.c476
-rw-r--r--src/gallium/drivers/r600/r600_state_inlines.h7
-rw-r--r--src/gallium/drivers/r600/r600_texture.c461
-rw-r--r--src/gallium/drivers/r600/r600d.h8
-rw-r--r--src/gallium/drivers/r600/radeon.h198
18 files changed, 3248 insertions, 814 deletions
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 9ea9d4354d..6483dac703 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -76,6 +76,27 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family)
{
LIST_INITHEAD(&bc->cf);
bc->family = family;
+ switch (bc->family) {
+ case CHIP_R600:
+ case CHIP_RV610:
+ case CHIP_RV630:
+ case CHIP_RV670:
+ case CHIP_RV620:
+ case CHIP_RV635:
+ case CHIP_RS780:
+ case CHIP_RS880:
+ bc->chiprev = 0;
+ break;
+ case CHIP_RV770:
+ case CHIP_RV730:
+ case CHIP_RV710:
+ case CHIP_RV740:
+ bc->chiprev = 1;
+ break;
+ default:
+ R600_ERR("unknown family %d\n", bc->family);
+ return -EINVAL;
+ }
return 0;
}
@@ -107,7 +128,7 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
return 0;
}
-int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
+int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type)
{
struct r600_bc_alu *nalu = r600_bc_alu();
struct r600_bc_alu *lalu;
@@ -119,7 +140,7 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
nalu->nliteral = 0;
/* cf can contains only alu or only vtx or only tex */
- if (bc->cf_last == NULL || bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) ||
+ if (bc->cf_last == NULL || bc->cf_last->inst != (type << 3) ||
bc->force_add_cf) {
/* at most 128 slots, one add alu can add 4 slots + 4 constant worst case */
r = r600_bc_add_cf(bc);
@@ -127,7 +148,7 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
free(nalu);
return r;
}
- bc->cf_last->inst = V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3;
+ bc->cf_last->inst = (type << 3);
}
if (alu->last && (bc->cf_last->ndw >> 1) >= 124) {
bc->force_add_cf = 1;
@@ -162,6 +183,11 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
return 0;
}
+int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
+{
+ return r600_bc_add_alu_type(bc, alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU);
+}
+
int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
{
struct r600_bc_alu *alu;
@@ -172,7 +198,17 @@ int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
return 0;
}
- if (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) ||
+ if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP ||
+ bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE ||
+ bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL ||
+ bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK ||
+ bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE ||
+ bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END ||
+ bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) {
+ return 0;
+ }
+ if (((bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) &&
+ (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3))) ||
LIST_IS_EMPTY(&bc->cf_last->alu)) {
R600_ERR("last CF is not ALU (%p)\n", bc->cf_last);
return -EINVAL;
@@ -241,6 +277,18 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex)
return 0;
}
+int r600_bc_add_cfinst(struct r600_bc *bc, int inst)
+{
+ int r;
+ r = r600_bc_add_cf(bc);
+ if (r)
+ return r;
+
+ bc->cf_last->cond = V_SQ_CF_COND_ACTIVE;
+ bc->cf_last->inst = inst;
+ return 0;
+}
+
static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id)
{
bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
@@ -292,38 +340,44 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign
unsigned i;
/* don't replace gpr by pv or ps for destination register */
+ bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
+ S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
+ S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
+ S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) |
+ S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
+ S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) |
+ S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
+ S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) |
+ S_SQ_ALU_WORD0_LAST(alu->last);
+
if (alu->is_op3) {
- bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
- S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
- S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
- S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
- S_SQ_ALU_WORD0_LAST(alu->last);
bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
+ S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) |
+ S_SQ_ALU_WORD1_OP3_SRC2_REL(alu->src[2].rel) |
S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) |
S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) |
S_SQ_ALU_WORD1_OP3_ALU_INST(alu->inst) |
S_SQ_ALU_WORD1_BANK_SWIZZLE(0);
} else {
- bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
- S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
- S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) |
- S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
- S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
- S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) |
- S_SQ_ALU_WORD0_LAST(alu->last);
bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
+ S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) |
- S_SQ_ALU_WORD1_BANK_SWIZZLE(0);
+ S_SQ_ALU_WORD1_BANK_SWIZZLE(0) |
+ S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
+ S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate);
}
if (alu->last) {
+ if (alu->nliteral && !alu->literal_added) {
+ R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n");
+ }
for (i = 0; i < alu->nliteral; i++) {
bc->bytecode[id++] = alu->value[i];
}
@@ -337,6 +391,7 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
switch (cf->inst) {
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1);
bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) |
S_SQ_CF_ALU_WORD1_BARRIER(1) |
@@ -364,6 +419,20 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst) |
S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
break;
+ case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+ case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+ bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
+ bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COND(cf->cond) |
+ S_SQ_CF_WORD1_POP_COUNT(cf->pop_count);
+
+ break;
default:
R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
return -EINVAL;
@@ -380,6 +449,8 @@ int r600_bc_build(struct r600_bc *bc)
unsigned addr;
int r;
+ if (bc->callstack[0].max > 0)
+ bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2;
/* first path compute addr of each CF block */
/* addr start after all the CF instructions */
@@ -387,6 +458,7 @@ int r600_bc_build(struct r600_bc *bc)
LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
switch (cf->inst) {
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
break;
case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
@@ -398,6 +470,14 @@ int r600_bc_build(struct r600_bc *bc)
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
break;
+ case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+ case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+ break;
default:
R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
return -EINVAL;
@@ -417,22 +497,13 @@ int r600_bc_build(struct r600_bc *bc)
return r;
switch (cf->inst) {
case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+ case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
- switch (bc->family) {
- case CHIP_R600:
- case CHIP_RV610:
- case CHIP_RV630:
- case CHIP_RV670:
- case CHIP_RV620:
- case CHIP_RV635:
- case CHIP_RS780:
- case CHIP_RS880:
+ switch(bc->chiprev) {
+ case 0:
r = r600_bc_alu_build(bc, alu, addr);
break;
- case CHIP_RV770:
- case CHIP_RV730:
- case CHIP_RV710:
- case CHIP_RV740:
+ case 1:
r = r700_bc_alu_build(bc, alu, addr);
break;
default:
@@ -466,6 +537,13 @@ int r600_bc_build(struct r600_bc *bc)
break;
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+ case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+ case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+ case V_SQ_CF_WORD1_SQ_CF_INST_POP:
break;
default:
R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 10d98afaf0..9e65fcdd4f 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -31,6 +31,7 @@ struct r600_bc_alu_src {
unsigned chan;
unsigned neg;
unsigned abs;
+ unsigned rel;
};
struct r600_bc_alu_dst {
@@ -38,6 +39,7 @@ struct r600_bc_alu_dst {
unsigned chan;
unsigned clamp;
unsigned write;
+ unsigned rel;
};
struct r600_bc_alu {
@@ -47,6 +49,7 @@ struct r600_bc_alu {
unsigned inst;
unsigned last;
unsigned is_op3;
+ unsigned predicate;
unsigned nliteral;
unsigned literal_added;
u32 value[4];
@@ -114,22 +117,55 @@ struct r600_bc_cf {
unsigned addr;
unsigned ndw;
unsigned id;
+ unsigned cond;
+ unsigned pop_count;
+ unsigned cf_addr; /* control flow addr */
struct list_head alu;
struct list_head tex;
struct list_head vtx;
struct r600_bc_output output;
};
+#define FC_NONE 0
+#define FC_IF 1
+#define FC_LOOP 2
+#define FC_REP 3
+#define FC_PUSH_VPM 4
+#define FC_PUSH_WQM 5
+
+struct r600_cf_stack_entry {
+ int type;
+ struct r600_bc_cf *start;
+ struct r600_bc_cf **mid; /* used to store the else point */
+ int num_mid;
+};
+
+#define SQ_MAX_CALL_DEPTH 0x00000020
+struct r600_cf_callstack {
+ unsigned fc_sp_before_entry;
+ int sub_desc_index;
+ int current;
+ int max;
+};
+
struct r600_bc {
enum radeon_family family;
+ int chiprev; /* 0 - r600, 1 - r700, 2 - evergreen */
struct list_head cf;
struct r600_bc_cf *cf_last;
unsigned ndw;
unsigned ncf;
unsigned ngpr;
+ unsigned nstack;
unsigned nresource;
unsigned force_add_cf;
u32 *bytecode;
+
+ u32 fc_sp;
+ struct r600_cf_stack_entry fc_stack[32];
+
+ unsigned call_sp;
+ struct r600_cf_callstack callstack[SQ_MAX_CALL_DEPTH];
};
int r600_bc_init(struct r600_bc *bc, enum radeon_family family);
@@ -139,5 +175,6 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx);
int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex);
int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output);
int r600_bc_build(struct r600_bc *bc);
-
+int r600_bc_add_cfinst(struct r600_bc *bc, int inst);
+int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type);
#endif
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index f4eedfe4cb..e6ded342e5 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -24,6 +24,7 @@
* Jerome Glisse
* Marek Olšák
*/
+#include <errno.h>
#include <pipe/p_screen.h>
#include <util/u_blitter.h>
#include <util/u_inlines.h>
@@ -31,9 +32,12 @@
#include "util/u_surface.h"
#include "r600_screen.h"
#include "r600_context.h"
+#include "r600d.h"
-static void r600_blitter_save_states(struct r600_context *rctx)
+static void r600_blitter_save_states(struct pipe_context *ctx)
{
+ struct r600_context *rctx = r600_context(ctx);
+
util_blitter_save_blend(rctx->blitter, rctx->blend);
util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->dsa);
if (rctx->stencil_ref) {
@@ -47,48 +51,58 @@ static void r600_blitter_save_states(struct r600_context *rctx)
if (rctx->viewport) {
util_blitter_save_viewport(rctx->blitter, &rctx->viewport->state.viewport);
}
- /* XXX util_blitter_save_clip(rctx->blitter, &rctx->clip); */
+ if (rctx->clip) {
+ util_blitter_save_clip(rctx->blitter, &rctx->clip->state.clip);
+ }
util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer,
rctx->vertex_buffer);
/* remove ptr so they don't get deleted */
rctx->blend = NULL;
+ rctx->clip = NULL;
rctx->vs_shader = NULL;
rctx->ps_shader = NULL;
rctx->rasterizer = NULL;
rctx->dsa = NULL;
rctx->vertex_elements = NULL;
+
+ /* suspend queries */
+ r600_queries_suspend(ctx);
}
static void r600_clear(struct pipe_context *ctx, unsigned buffers,
- const float *rgba, double depth, unsigned stencil)
+ const float *rgba, double depth, unsigned stencil)
{
struct r600_context *rctx = r600_context(ctx);
struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer;
- r600_blitter_save_states(rctx);
+ r600_blitter_save_states(ctx);
util_blitter_clear(rctx->blitter, fb->width, fb->height,
fb->nr_cbufs, buffers, rgba, depth,
stencil);
+ /* resume queries */
+ r600_queries_resume(ctx);
}
-static void r600_clear_render_target(struct pipe_context *pipe,
+static void r600_clear_render_target(struct pipe_context *ctx,
struct pipe_surface *dst,
const float *rgba,
unsigned dstx, unsigned dsty,
unsigned width, unsigned height)
{
- struct r600_context *rctx = r600_context(pipe);
+ struct r600_context *rctx = r600_context(ctx);
struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer;
- r600_blitter_save_states(rctx);
+ r600_blitter_save_states(ctx);
util_blitter_save_framebuffer(rctx->blitter, fb);
util_blitter_clear_render_target(rctx->blitter, dst, rgba,
dstx, dsty, width, height);
+ /* resume queries */
+ r600_queries_resume(ctx);
}
-static void r600_clear_depth_stencil(struct pipe_context *pipe,
+static void r600_clear_depth_stencil(struct pipe_context *ctx,
struct pipe_surface *dst,
unsigned clear_flags,
double depth,
@@ -96,17 +110,20 @@ static void r600_clear_depth_stencil(struct pipe_context *pipe,
unsigned dstx, unsigned dsty,
unsigned width, unsigned height)
{
- struct r600_context *rctx = r600_context(pipe);
+ struct r600_context *rctx = r600_context(ctx);
struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer;
- r600_blitter_save_states(rctx);
+ r600_blitter_save_states(ctx);
util_blitter_save_framebuffer(rctx->blitter, fb);
util_blitter_clear_depth_stencil(rctx->blitter, dst, clear_flags, depth, stencil,
dstx, dsty, width, height);
+ /* resume queries */
+ r600_queries_resume(ctx);
}
-static void r600_resource_copy_region(struct pipe_context *pipe,
+
+static void r600_resource_copy_region(struct pipe_context *ctx,
struct pipe_resource *dst,
struct pipe_subresource subdst,
unsigned dstx, unsigned dsty, unsigned dstz,
@@ -115,7 +132,7 @@ static void r600_resource_copy_region(struct pipe_context *pipe,
unsigned srcx, unsigned srcy, unsigned srcz,
unsigned width, unsigned height)
{
- util_resource_copy_region(pipe, dst, subdst, dstx, dsty, dstz,
+ util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz,
src, subsrc, srcx, srcy, srcz, width, height);
}
@@ -126,3 +143,446 @@ void r600_init_blit_functions(struct r600_context *rctx)
rctx->context.clear_depth_stencil = r600_clear_depth_stencil;
rctx->context.resource_copy_region = r600_resource_copy_region;
}
+
+
+struct r600_blit_states {
+ struct radeon_state rasterizer;
+ struct radeon_state dsa;
+ struct radeon_state blend;
+ struct radeon_state cb_cntl;
+ struct radeon_state vgt;
+ struct radeon_state draw;
+ struct radeon_state vs_constant0;
+ struct radeon_state vs_constant1;
+ struct radeon_state vs_constant2;
+ struct radeon_state vs_constant3;
+ struct radeon_state ps_shader;
+ struct radeon_state vs_shader;
+ struct radeon_state vs_resource0;
+ struct radeon_state vs_resource1;
+};
+
+static int r600_blit_state_vs_resources(struct r600_screen *rscreen, struct r600_blit_states *bstates)
+{
+ struct radeon_state *rstate;
+ struct radeon_bo *bo;
+ u32 vbo[] = {
+ 0xBF800000, 0xBF800000, 0x3F800000, 0x3F800000,
+ 0x3F000000, 0x3F000000, 0x3F000000, 0x00000000,
+ 0x3F800000, 0xBF800000, 0x3F800000, 0x3F800000,
+ 0x3F000000, 0x3F000000, 0x3F000000, 0x00000000,
+ 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000,
+ 0x3F000000, 0x3F000000, 0x3F000000, 0x00000000,
+ 0xBF800000, 0x3F800000, 0x3F800000, 0x3F800000,
+ 0x3F000000, 0x3F000000, 0x3F000000, 0x00000000
+ };
+
+ /* simple shader */
+ bo = radeon_bo(rscreen->rw, 0, 128, 4096, NULL);
+ if (bo == NULL) {
+ return -ENOMEM;
+ }
+ if (radeon_bo_map(rscreen->rw, bo)) {
+ radeon_bo_decref(rscreen->rw, bo);
+ return -ENOMEM;
+ }
+ memcpy(bo->data, vbo, 128);
+ radeon_bo_unmap(rscreen->rw, bo);
+
+ rstate = &bstates->vs_resource0;
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_RESOURCE, 0, R600_SHADER_VS);
+
+ /* set states (most default value are 0 and struct already
+ * initialized to 0, thus avoid resetting them)
+ */
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD0] = 0x00000000;
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD1] = 0x00000080;
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD2] = 0x02302000;
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD3] = 0x00000000;
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD4] = 0x00000000;
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD5] = 0x00000000;
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD6] = 0xC0000000;
+ rstate->bo[0] = bo;
+ rstate->nbo = 1;
+ rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
+ if (radeon_state_pm4(rstate)) {
+ radeon_state_fini(rstate);
+ return -ENOMEM;
+ }
+
+ rstate = &bstates->vs_resource1;
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_RESOURCE, 1, R600_SHADER_VS);
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD0] = 0x00000010;
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD1] = 0x00000070;
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD2] = 0x02302000;
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD3] = 0x00000000;
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD4] = 0x00000000;
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD5] = 0x00000000;
+ rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD6] = 0xC0000000;
+ rstate->bo[0] = radeon_bo_incref(rscreen->rw, bo);
+ rstate->nbo = 1;
+ rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
+ if (radeon_state_pm4(rstate)) {
+ radeon_state_fini(rstate);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void r600_blit_state_vs_shader(struct r600_screen *rscreen, struct radeon_state *rstate)
+{
+ struct radeon_bo *bo;
+ u32 shader_bc_r600[] = {
+ 0x00000004, 0x81000400,
+ 0x00000008, 0xA01C0000,
+ 0xC001A03C, 0x94000688,
+ 0xC0024000, 0x94200688,
+ 0x7C000000, 0x002D1001,
+ 0x00080000, 0x00000000,
+ 0x7C000100, 0x002D1002,
+ 0x00080000, 0x00000000,
+ 0x00000001, 0x00601910,
+ 0x00000401, 0x20601910,
+ 0x00000801, 0x40601910,
+ 0x80000C01, 0x60601910,
+ 0x00000002, 0x00801910,
+ 0x00000402, 0x20801910,
+ 0x00000802, 0x40801910,
+ 0x80000C02, 0x60801910
+ };
+ u32 shader_bc_r700[] = {
+ 0x00000004, 0x81000400,
+ 0x00000008, 0xA01C0000,
+ 0xC001A03C, 0x94000688,
+ 0xC0024000, 0x94200688,
+ 0x7C000000, 0x002D1001,
+ 0x00080000, 0x00000000,
+ 0x7C000100, 0x002D1002,
+ 0x00080000, 0x00000000,
+ 0x00000001, 0x00600C90,
+ 0x00000401, 0x20600C90,
+ 0x00000801, 0x40600C90,
+ 0x80000C01, 0x60600C90,
+ 0x00000002, 0x00800C90,
+ 0x00000402, 0x20800C90,
+ 0x00000802, 0x40800C90,
+ 0x80000C02, 0x60800C90
+ };
+
+ /* simple shader */
+ bo = radeon_bo(rscreen->rw, 0, 128, 4096, NULL);
+ if (bo == NULL) {
+ return;
+ }
+ if (radeon_bo_map(rscreen->rw, bo)) {
+ radeon_bo_decref(rscreen->rw, bo);
+ return;
+ }
+ switch (rscreen->chip_class) {
+ case R600:
+ memcpy(bo->data, shader_bc_r600, 128);
+ break;
+ case R700:
+ memcpy(bo->data, shader_bc_r700, 128);
+ break;
+ default:
+ R600_ERR("unsupported chip family\n");
+ radeon_bo_unmap(rscreen->rw, bo);
+ radeon_bo_decref(rscreen->rw, bo);
+ return;
+ }
+ radeon_bo_unmap(rscreen->rw, bo);
+
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_VS);
+
+ /* set states (most default value are 0 and struct already
+ * initialized to 0, thus avoid resetting them)
+ */
+ rstate->states[R600_VS_SHADER__SPI_VS_OUT_ID_0] = 0x03020100;
+ rstate->states[R600_VS_SHADER__SPI_VS_OUT_ID_1] = 0x07060504;
+ rstate->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = 0x00000005;
+
+ rstate->bo[0] = bo;
+ rstate->bo[1] = radeon_bo_incref(rscreen->rw, bo);
+ rstate->nbo = 2;
+ rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
+ rstate->placement[2] = RADEON_GEM_DOMAIN_GTT;
+
+ radeon_state_pm4(rstate);
+}
+
+static void r600_blit_state_ps_shader(struct r600_screen *rscreen, struct radeon_state *rstate)
+{
+ struct radeon_bo *bo;
+ u32 shader_bc_r600[] = {
+ 0x00000002, 0xA00C0000,
+ 0xC0008000, 0x94200688,
+ 0x00000000, 0x00201910,
+ 0x00000400, 0x20201910,
+ 0x00000800, 0x40201910,
+ 0x80000C00, 0x60201910
+ };
+ u32 shader_bc_r700[] = {
+ 0x00000002, 0xA00C0000,
+ 0xC0008000, 0x94200688,
+ 0x00000000, 0x00200C90,
+ 0x00000400, 0x20200C90,
+ 0x00000800, 0x40200C90,
+ 0x80000C00, 0x60200C90
+ };
+
+ /* simple shader */
+ bo = radeon_bo(rscreen->rw, 0, 128, 4096, NULL);
+ if (bo == NULL) {
+ radeon_bo_decref(rscreen->rw, bo);
+ return;
+ }
+ if (radeon_bo_map(rscreen->rw, bo)) {
+ return;
+ }
+ switch (rscreen->chip_class) {
+ case R600:
+ memcpy(bo->data, shader_bc_r600, 48);
+ break;
+ case R700:
+ memcpy(bo->data, shader_bc_r700, 48);
+ break;
+ default:
+ R600_ERR("unsupported chip family\n");
+ radeon_bo_unmap(rscreen->rw, bo);
+ radeon_bo_decref(rscreen->rw, bo);
+ return;
+ }
+ radeon_bo_unmap(rscreen->rw, bo);
+
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_PS);
+
+ /* set states (most default value are 0 and struct already
+ * initialized to 0, thus avoid resetting them)
+ */
+ rstate->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0] = 0x00000C00;
+ rstate->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = 0x10000001;
+ rstate->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = 0x00000002;
+ rstate->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = 0x00000002;
+
+ rstate->bo[0] = bo;
+ rstate->nbo = 1;
+ rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
+
+ radeon_state_pm4(rstate);
+}
+
+static void r600_blit_state_vgt(struct r600_screen *rscreen, struct radeon_state *rstate)
+{
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_VGT, 0, 0);
+
+ /* set states (most default value are 0 and struct already
+ * initialized to 0, thus avoid resetting them)
+ */
+ rstate->states[R600_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001;
+ rstate->states[R600_VGT__VGT_MAX_VTX_INDX] = 0x00FFFFFF;
+ rstate->states[R600_VGT__VGT_PRIMITIVE_TYPE] = 0x00000005;
+
+ radeon_state_pm4(rstate);
+}
+
+static void r600_blit_state_draw(struct r600_screen *rscreen, struct radeon_state *rstate)
+{
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_DRAW, 0, 0);
+
+ /* set states (most default value are 0 and struct already
+ * initialized to 0, thus avoid resetting them)
+ */
+ rstate->states[R600_DRAW__VGT_DRAW_INITIATOR] = 0x00000002;
+ rstate->states[R600_DRAW__VGT_NUM_INDICES] = 0x00000004;
+
+ radeon_state_pm4(rstate);
+}
+
+static void r600_blit_state_vs_constant(struct r600_screen *rscreen, struct radeon_state *rstate,
+ unsigned id, float c0, float c1, float c2, float c3)
+{
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_CONSTANT, id, R600_SHADER_VS);
+
+ /* set states (most default value are 0 and struct already
+ * initialized to 0, thus avoid resetting them)
+ */
+ rstate->states[R600_VS_CONSTANT__SQ_ALU_CONSTANT0_256] = fui(c0);
+ rstate->states[R600_VS_CONSTANT__SQ_ALU_CONSTANT1_256] = fui(c1);
+ rstate->states[R600_VS_CONSTANT__SQ_ALU_CONSTANT2_256] = fui(c2);
+ rstate->states[R600_VS_CONSTANT__SQ_ALU_CONSTANT3_256] = fui(c3);
+
+ radeon_state_pm4(rstate);
+}
+
+static void r600_blit_state_rasterizer(struct r600_screen *rscreen, struct radeon_state *rstate)
+{
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_RASTERIZER, 0, 0);
+
+ /* set states (most default value are 0 and struct already
+ * initialized to 0, thus avoid resetting them)
+ */
+ rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ] = 0x3F800000;
+ rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ] = 0x3F800000;
+ rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ] = 0x3F800000;
+ rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ] = 0x3F800000;
+ rstate->states[R600_RASTERIZER__PA_SC_LINE_CNTL] = 0x00000400;
+ rstate->states[R600_RASTERIZER__PA_SC_LINE_STIPPLE] = 0x00000005;
+ rstate->states[R600_RASTERIZER__PA_SU_LINE_CNTL] = 0x00000008;
+ rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] = 0x80000000;
+ rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = 0x00080004;
+ rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000001;
+
+ radeon_state_pm4(rstate);
+}
+
+static void r600_blit_state_dsa(struct r600_screen *rscreen, struct radeon_state *rstate)
+{
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_DSA, 0, 0);
+
+ /* set states (most default value are 0 and struct already
+ * initialized to 0, thus avoid resetting them)
+ */
+ rstate->states[R600_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00;
+ rstate->states[R600_DSA__DB_DEPTH_CLEAR] = 0x3F800000;
+ rstate->states[R600_DSA__DB_RENDER_CONTROL] = 0x00000060;
+ rstate->states[R600_DSA__DB_RENDER_OVERRIDE] = 0x0000002A;
+ rstate->states[R600_DSA__DB_SHADER_CONTROL] = 0x00000210;
+
+ radeon_state_pm4(rstate);
+}
+
+static void r600_blit_state_blend(struct r600_screen *rscreen, struct radeon_state *rstate)
+{
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_BLEND, 0, 0);
+ radeon_state_pm4(rstate);
+}
+
+static void r600_blit_state_cb_cntl(struct r600_screen *rscreen, struct radeon_state *rstate)
+{
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_CB_CNTL, 0, 0);
+ rstate->states[R600_CB_CNTL__CB_CLRCMP_CONTROL] = 0x01000000;
+ rstate->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF;
+ rstate->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF;
+ rstate->states[R600_CB_CNTL__CB_COLOR_CONTROL] = 0x00CC0080;
+ rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = 0x0000000F;
+ rstate->states[R600_CB_CNTL__CB_TARGET_MASK] = 0x0000000F;
+ rstate->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF;
+ radeon_state_pm4(rstate);
+}
+
+static int r600_blit_states_init(struct pipe_context *ctx, struct r600_blit_states *bstates)
+{
+ struct r600_screen *rscreen = r600_screen(ctx->screen);
+
+ r600_blit_state_ps_shader(rscreen, &bstates->ps_shader);
+ r600_blit_state_vs_shader(rscreen, &bstates->vs_shader);
+ r600_blit_state_vgt(rscreen, &bstates->vgt);
+ r600_blit_state_draw(rscreen, &bstates->draw);
+ r600_blit_state_vs_constant(rscreen, &bstates->vs_constant0, 0, 1.0, 0.0, 0.0, 0.0);
+ r600_blit_state_vs_constant(rscreen, &bstates->vs_constant1, 1, 0.0, 1.0, 0.0, 0.0);
+ r600_blit_state_vs_constant(rscreen, &bstates->vs_constant2, 2, 0.0, 0.0, -0.00199900055, 0.0);
+ r600_blit_state_vs_constant(rscreen, &bstates->vs_constant3, 3, 0.0, 0.0, -0.99900049, 1.0);
+ r600_blit_state_rasterizer(rscreen, &bstates->rasterizer);
+ r600_blit_state_dsa(rscreen, &bstates->dsa);
+ r600_blit_state_blend(rscreen, &bstates->blend);
+ r600_blit_state_cb_cntl(rscreen, &bstates->cb_cntl);
+ r600_blit_state_vs_resources(rscreen, bstates);
+ return 0;
+}
+
+static void r600_blit_states_destroy(struct pipe_context *ctx, struct r600_blit_states *bstates)
+{
+ radeon_state_fini(&bstates->ps_shader);
+ radeon_state_fini(&bstates->vs_shader);
+ radeon_state_fini(&bstates->vs_resource0);
+ radeon_state_fini(&bstates->vs_resource1);
+}
+
+int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level)
+{
+ struct r600_screen *rscreen = r600_screen(ctx->screen);
+ struct r600_context *rctx = r600_context(ctx);
+ struct radeon_draw draw;
+ struct r600_blit_states bstates;
+ int r;
+
+ r = r600_texture_scissor(ctx, rtexture, level);
+ if (r) {
+ return r;
+ }
+ r = r600_texture_cb(ctx, rtexture, 0, level);
+ if (r) {
+ return r;
+ }
+ r = r600_texture_db(ctx, rtexture, level);
+ if (r) {
+ return r;
+ }
+ r = r600_texture_viewport(ctx, rtexture, level);
+ if (r) {
+ return r;
+ }
+
+ r = r600_blit_states_init(ctx, &bstates);
+ if (r) {
+ return r;
+ }
+ bstates.dsa.states[R600_DSA__DB_RENDER_CONTROL] = 0x0000008C;
+ bstates.cb_cntl.states[R600_CB_CNTL__CB_TARGET_MASK] = 0x00000001;
+ /* force rebuild */
+ bstates.dsa.cpm4 = bstates.cb_cntl.cpm4 = 0;
+ if (radeon_state_pm4(&bstates.dsa)) {
+ goto out;
+ }
+ if (radeon_state_pm4(&bstates.cb_cntl)) {
+ goto out;
+ }
+
+ r = radeon_draw_init(&draw, rscreen->rw);
+ if (r) {
+ R600_ERR("failed creating draw for uncompressing textures\n");
+ goto out;
+ }
+
+ radeon_draw_bind(&draw, &bstates.vs_shader);
+ radeon_draw_bind(&draw, &bstates.ps_shader);
+ radeon_draw_bind(&draw, &bstates.rasterizer);
+ radeon_draw_bind(&draw, &bstates.dsa);
+ radeon_draw_bind(&draw, &bstates.blend);
+ radeon_draw_bind(&draw, &bstates.cb_cntl);
+ radeon_draw_bind(&draw, &rctx->config);
+ radeon_draw_bind(&draw, &bstates.vgt);
+ radeon_draw_bind(&draw, &bstates.draw);
+ radeon_draw_bind(&draw, &bstates.vs_resource0);
+ radeon_draw_bind(&draw, &bstates.vs_resource1);
+ radeon_draw_bind(&draw, &bstates.vs_constant0);
+ radeon_draw_bind(&draw, &bstates.vs_constant1);
+ radeon_draw_bind(&draw, &bstates.vs_constant2);
+ radeon_draw_bind(&draw, &bstates.vs_constant3);
+ radeon_draw_bind(&draw, &rtexture->viewport[level]);
+ radeon_draw_bind(&draw, &rtexture->scissor[level]);
+ radeon_draw_bind(&draw, &rtexture->cb[0][level]);
+ radeon_draw_bind(&draw, &rtexture->db[level]);
+
+ /* suspend queries */
+ r600_queries_suspend(ctx);
+
+ /* schedule draw*/
+ r = radeon_ctx_set_draw(&rctx->ctx, &draw);
+ if (r == -EBUSY) {
+ r600_flush(ctx, 0, NULL);
+ r = radeon_ctx_set_draw(&rctx->ctx, &draw);
+ }
+ if (r) {
+ goto out;
+ }
+
+ /* resume queries */
+ r600_queries_resume(ctx);
+
+out:
+ r600_blit_states_destroy(ctx, &bstates);
+ return r;
+}
diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c
index edde80c660..7a0e5b4049 100644
--- a/src/gallium/drivers/r600/r600_context.c
+++ b/src/gallium/drivers/r600/r600_context.c
@@ -34,10 +34,26 @@
#include "r600_resource.h"
#include "r600d.h"
+
static void r600_destroy_context(struct pipe_context *context)
{
struct r600_context *rctx = r600_context(context);
+ rctx->rasterizer = r600_context_state_decref(rctx->rasterizer);
+ rctx->poly_stipple = r600_context_state_decref(rctx->poly_stipple);
+ rctx->scissor = r600_context_state_decref(rctx->scissor);
+ rctx->clip = r600_context_state_decref(rctx->clip);
+ rctx->ps_shader = r600_context_state_decref(rctx->ps_shader);
+ rctx->vs_shader = r600_context_state_decref(rctx->vs_shader);
+ rctx->depth = r600_context_state_decref(rctx->depth);
+ rctx->stencil = r600_context_state_decref(rctx->stencil);
+ rctx->alpha = r600_context_state_decref(rctx->alpha);
+ rctx->dsa = r600_context_state_decref(rctx->dsa);
+ rctx->blend = r600_context_state_decref(rctx->blend);
+ rctx->stencil_ref = r600_context_state_decref(rctx->stencil_ref);
+ rctx->viewport = r600_context_state_decref(rctx->viewport);
+ rctx->framebuffer = r600_context_state_decref(rctx->framebuffer);
+ radeon_ctx_fini(&rctx->ctx);
FREE(rctx);
}
@@ -45,27 +61,35 @@ void r600_flush(struct pipe_context *ctx, unsigned flags,
struct pipe_fence_handle **fence)
{
struct r600_context *rctx = r600_context(ctx);
- struct r600_screen *rscreen = rctx->screen;
+ struct r600_query *rquery;
static int dc = 0;
char dname[256];
- if (radeon_ctx_pm4(rctx->ctx))
- return;
+ /* suspend queries */
+ r600_queries_suspend(ctx);
/* FIXME dumping should be removed once shader support instructions
* without throwing bad code
*/
- if (!rctx->ctx->cpm4)
+ if (!rctx->ctx.cdwords)
goto out;
+#if 0
sprintf(dname, "gallium-%08d.bof", dc);
- if (dc < 1)
- radeon_ctx_dump_bof(rctx->ctx, dname);
+ if (dc < 2) {
+ radeon_ctx_dump_bof(&rctx->ctx, dname);
+ R600_ERR("dumped %s\n", dname);
+ }
+#endif
#if 1
- radeon_ctx_submit(rctx->ctx);
+ radeon_ctx_submit(&rctx->ctx);
#endif
+ LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) {
+ rquery->flushed = true;
+ }
dc++;
out:
- rctx->ctx = radeon_ctx_decref(rctx->ctx);
- rctx->ctx = radeon_ctx(rscreen->rw);
+ radeon_ctx_clear(&rctx->ctx);
+ /* resume queries */
+ r600_queries_resume(ctx);
}
static void r600_init_config(struct r600_context *rctx)
@@ -207,9 +231,9 @@ static void r600_init_config(struct r600_context *rctx)
num_es_stack_entries = 0;
break;
}
- rctx->hw_states.config = radeon_state(rctx->rw, R600_CONFIG_TYPE, R600_CONFIG);
+ radeon_state_init(&rctx->config, rctx->rw, R600_STATE_CONFIG, 0, 0);
- rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] = 0x00000000;
+ rctx->config.states[R600_CONFIG__SQ_CONFIG] = 0x00000000;
switch (family) {
case CHIP_RV610:
case CHIP_RV620:
@@ -218,75 +242,85 @@ static void r600_init_config(struct r600_context *rctx)
case CHIP_RV710:
break;
default:
- rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1);
+ rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1);
break;
}
- rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_DX9_CONSTS(1);
- rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ALU_INST_PREFER_VECTOR(1);
- rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio);
- rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio);
- rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio);
- rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio);
+ rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_DX9_CONSTS(1);
+ rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ALU_INST_PREFER_VECTOR(1);
+ rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio);
+ rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio);
+ rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio);
+ rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio);
+
+ rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0;
+ rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
+ rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
+ rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
- rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0;
- rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
- rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
- rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
+ rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0;
+ rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs);
+ rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_es_gprs);
- rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0;
- rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs);
- rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_es_gprs);
+ rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0;
+ rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_PS_THREADS(num_ps_threads);
+ rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_VS_THREADS(num_vs_threads);
+ rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_GS_THREADS(num_gs_threads);
+ rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_ES_THREADS(num_es_threads);
- rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0;
- rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_PS_THREADS(num_ps_threads);
- rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_VS_THREADS(num_vs_threads);
- rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_GS_THREADS(num_gs_threads);
- rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_ES_THREADS(num_es_threads);
+ rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0;
+ rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries);
+ rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries);
- rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0;
- rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries);
- rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries);
+ rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0;
+ rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries);
+ rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries);
- rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0;
- rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries);
- rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries);
+ rctx->config.states[R600_CONFIG__VC_ENHANCE] = 0x00000000;
+ rctx->config.states[R600_CONFIG__SX_MISC] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000;
- rctx->hw_states.config->states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002;
- rctx->hw_states.config->states[R600_CONFIG__VC_ENHANCE] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__DB_DEBUG] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__DB_WATERMARKS] = 0x00420204;
- rctx->hw_states.config->states[R600_CONFIG__SX_MISC] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001;
- rctx->hw_states.config->states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003;
- rctx->hw_states.config->states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_GS_MODE] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001;
- rctx->hw_states.config->states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000;
- rctx->hw_states.config->states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000;
- radeon_state_pm4(rctx->hw_states.config);
+ if (family >= CHIP_RV770) {
+ rctx->config.states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000;
+ rctx->config.states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002;
+ rctx->config.states[R600_CONFIG__DB_DEBUG] = 0x00000000;
+ rctx->config.states[R600_CONFIG__DB_WATERMARKS] = 0x00420204;
+ rctx->config.states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000000;
+ rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000;
+ } else {
+ rctx->config.states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00000000;
+ rctx->config.states[R600_CONFIG__TA_CNTL_AUX] = 0x07000003;
+ rctx->config.states[R600_CONFIG__DB_DEBUG] = 0x82000000;
+ rctx->config.states[R600_CONFIG__DB_WATERMARKS] = 0x01020204;
+ rctx->config.states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001;
+ rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00004010;
+ }
+ rctx->config.states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003;
+ rctx->config.states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000;
+ rctx->config.states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000;
+ rctx->config.states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000;
+ rctx->config.states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000;
+ rctx->config.states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000;
+ rctx->config.states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000;
+ rctx->config.states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000;
+ rctx->config.states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000;
+ rctx->config.states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_GS_MODE] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001;
+ rctx->config.states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000;
+ rctx->config.states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000;
+ radeon_state_pm4(&rctx->config);
}
struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv)
@@ -320,7 +354,7 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv)
r600_init_config(rctx);
- rctx->ctx = radeon_ctx(rscreen->rw);
- rctx->draw = radeon_draw(rscreen->rw);
+ radeon_ctx_init(&rctx->ctx, rscreen->rw);
+ radeon_draw_init(&rctx->draw, rscreen->rw);
return &rctx->context;
}
diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h
index 76d5de8653..cea0813054 100644
--- a/src/gallium/drivers/r600/r600_context.h
+++ b/src/gallium/drivers/r600/r600_context.h
@@ -30,9 +30,32 @@
#include <tgsi/tgsi_parse.h>
#include <tgsi/tgsi_util.h>
#include <util/u_blitter.h>
+#include <util/u_double_list.h>
#include "radeon.h"
#include "r600_shader.h"
+#define R600_QUERY_STATE_STARTED (1 << 0)
+#define R600_QUERY_STATE_ENDED (1 << 1)
+#define R600_QUERY_STATE_SUSPENDED (1 << 2)
+
+struct r600_query {
+ u64 result;
+ /* The kind of query. Currently only OQ is supported. */
+ unsigned type;
+ /* How many results have been written, in dwords. It's incremented
+ * after end_query and flush. */
+ unsigned num_results;
+ /* if we've flushed the query */
+ boolean flushed;
+ unsigned state;
+ /* The buffer where query results are stored. */
+ struct radeon_bo *buffer;
+ unsigned buffer_size;
+ /* linked list of queries */
+ struct list_head list;
+ struct radeon_state rstate;
+};
+
/* XXX move this to a more appropriate place */
union pipe_states {
struct pipe_rasterizer_state rasterizer;
@@ -72,13 +95,16 @@ enum pipe_state_type {
pipe_type_count
};
+#define R600_MAX_RSTATE 16
+
struct r600_context_state {
union pipe_states state;
unsigned refcount;
unsigned type;
- struct radeon_state *rstate;
+ struct radeon_state rstate[R600_MAX_RSTATE];
struct r600_shader shader;
struct radeon_bo *bo;
+ unsigned nrstate;
};
struct r600_vertex_element
@@ -89,28 +115,25 @@ struct r600_vertex_element
};
struct r600_context_hw_states {
- struct radeon_state *rasterizer;
- struct radeon_state *scissor;
- struct radeon_state *dsa;
- struct radeon_state *blend;
- struct radeon_state *viewport;
- struct radeon_state *cb[8];
- struct radeon_state *config;
- struct radeon_state *cb_cntl;
- struct radeon_state *db;
- unsigned ps_nresource;
- unsigned ps_nsampler;
- struct radeon_state *ps_resource[160];
- struct radeon_state *ps_sampler[16];
+ struct radeon_state rasterizer;
+ struct radeon_state scissor;
+ struct radeon_state dsa;
+ struct radeon_state cb_cntl;
};
struct r600_context {
struct pipe_context context;
struct r600_screen *screen;
struct radeon *rw;
- struct radeon_ctx *ctx;
+ struct radeon_ctx ctx;
struct blitter_context *blitter;
- struct radeon_draw *draw;
+ struct radeon_draw draw;
+ struct radeon_state config;
+ /* FIXME get rid of those vs_resource,vs/ps_constant */
+ struct radeon_state vs_resource[160];
+ unsigned vs_nresource;
+ struct radeon_state vs_constant[256];
+ struct radeon_state ps_constant[256];
/* hw states */
struct r600_context_hw_states hw_states;
/* pipe states */
@@ -134,14 +157,15 @@ struct r600_context {
struct r600_context_state *stencil_ref;
struct r600_context_state *viewport;
struct r600_context_state *framebuffer;
- struct r600_context_state *ps_sampler[PIPE_MAX_ATTRIBS];
- struct r600_context_state *vs_sampler[PIPE_MAX_ATTRIBS];
- struct r600_context_state *ps_sampler_view[PIPE_MAX_ATTRIBS];
- struct r600_context_state *vs_sampler_view[PIPE_MAX_ATTRIBS];
+ struct radeon_state *ps_sampler[PIPE_MAX_ATTRIBS];
+ struct radeon_state *vs_sampler[PIPE_MAX_ATTRIBS];
+ struct radeon_state *ps_sampler_view[PIPE_MAX_ATTRIBS];
+ struct radeon_state *vs_sampler_view[PIPE_MAX_ATTRIBS];
struct r600_vertex_element *vertex_elements;
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
struct pipe_index_buffer index_buffer;
- struct pipe_blend_color blend_color;
+ struct pipe_blend_color blend_color;
+ struct list_head query_list;
};
/* Convenience cast wrapper. */
@@ -150,13 +174,18 @@ static INLINE struct r600_context *r600_context(struct pipe_context *pipe)
return (struct r600_context*)pipe;
}
+static INLINE struct r600_query* r600_query(struct pipe_query* q)
+{
+ return (struct r600_query*)q;
+}
+
struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigned type, const void *state);
struct r600_context_state *r600_context_state_incref(struct r600_context_state *rstate);
struct r600_context_state *r600_context_state_decref(struct r600_context_state *rstate);
void r600_flush(struct pipe_context *ctx, unsigned flags,
struct pipe_fence_handle **fence);
-int r600_context_hw_states(struct r600_context *rctx);
+int r600_context_hw_states(struct pipe_context *ctx);
void r600_draw_vbo(struct pipe_context *ctx,
const struct pipe_draw_info *info);
@@ -178,4 +207,10 @@ extern int r600_pipe_shader_update(struct pipe_context *ctx,
uint32_t r600_translate_texformat(enum pipe_format format,
const unsigned char *swizzle_view,
uint32_t *word4_p, uint32_t *yuv_format_p);
+
+/* query */
+extern void r600_queries_resume(struct pipe_context *ctx);
+extern void r600_queries_suspend(struct pipe_context *ctx);
+
+
#endif
diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c
index f058455162..fabd337d23 100644
--- a/src/gallium/drivers/r600/r600_draw.c
+++ b/src/gallium/drivers/r600/r600_draw.c
@@ -31,6 +31,7 @@
#include <util/u_math.h>
#include <util/u_inlines.h>
#include <util/u_memory.h>
+#include "radeon.h"
#include "r600_screen.h"
#include "r600_context.h"
#include "r600_resource.h"
@@ -38,8 +39,8 @@
struct r600_draw {
struct pipe_context *ctx;
- struct radeon_state *draw;
- struct radeon_state *vgt;
+ struct radeon_state draw;
+ struct radeon_state vgt;
unsigned mode;
unsigned start;
unsigned count;
@@ -51,6 +52,7 @@ static int r600_draw_common(struct r600_draw *draw)
{
struct r600_context *rctx = r600_context(draw->ctx);
struct r600_screen *rscreen = rctx->screen;
+ /* FIXME vs_resource */
struct radeon_state *vs_resource;
struct r600_resource *rbuffer;
unsigned i, j, offset, format, prim;
@@ -58,7 +60,7 @@ static int r600_draw_common(struct r600_draw *draw)
struct pipe_vertex_buffer *vertex_buffer;
int r;
- r = r600_context_hw_states(rctx);
+ r = r600_context_hw_states(draw->ctx);
if (r)
return r;
switch (draw->index_size) {
@@ -81,6 +83,7 @@ static int r600_draw_common(struct r600_draw *draw)
r = r600_conv_pipe_prim(draw->mode, &prim);
if (r)
return r;
+
/* rebuild vertex shader if input format changed */
r = r600_pipe_shader_update(draw->ctx, rctx->vs_shader);
if (r)
@@ -88,26 +91,24 @@ static int r600_draw_common(struct r600_draw *draw)
r = r600_pipe_shader_update(draw->ctx, rctx->ps_shader);
if (r)
return r;
- r = radeon_draw_set(rctx->draw, rctx->vs_shader->rstate);
- if (r)
- return r;
- r = radeon_draw_set(rctx->draw, rctx->ps_shader->rstate);
- if (r)
- return r;
+ radeon_draw_bind(&rctx->draw, &rctx->vs_shader->rstate[0]);
+ radeon_draw_bind(&rctx->draw, &rctx->ps_shader->rstate[0]);
+ for (i = 0 ; i < rctx->vs_nresource; i++) {
+ radeon_state_fini(&rctx->vs_resource[i]);
+ }
for (i = 0 ; i < rctx->vertex_elements->count; i++) {
+ vs_resource = &rctx->vs_resource[i];
j = rctx->vertex_elements->elements[i].vertex_buffer_index;
vertex_buffer = &rctx->vertex_buffer[j];
rbuffer = (struct r600_resource*)vertex_buffer->buffer;
offset = rctx->vertex_elements->elements[i].src_offset + vertex_buffer->buffer_offset;
format = r600_translate_colorformat(rctx->vertex_elements->elements[i].src_format);
- vs_resource = radeon_state(rscreen->rw, R600_VS_RESOURCE_TYPE, R600_VS_RESOURCE + i);
- if (vs_resource == NULL)
- return -ENOMEM;
+ radeon_state_init(vs_resource, rscreen->rw, R600_STATE_RESOURCE, i, R600_SHADER_VS);
vs_resource->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
vs_resource->nbo = 1;
vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = offset;
- vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->bo->size - offset;
+ vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->bo->size - offset - 1;
vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = S_038008_STRIDE(vertex_buffer->stride) |
S_038008_DATA_FORMAT(format);
vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = 0x00000000;
@@ -116,59 +117,61 @@ static int r600_draw_common(struct r600_draw *draw)
vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = 0xC0000000;
vs_resource->placement[0] = RADEON_GEM_DOMAIN_GTT;
vs_resource->placement[1] = RADEON_GEM_DOMAIN_GTT;
- r = radeon_draw_set_new(rctx->draw, vs_resource);
- if (r)
+ r = radeon_state_pm4(vs_resource);
+ if (r) {
return r;
+ }
+ radeon_draw_bind(&rctx->draw, vs_resource);
}
+ rctx->vs_nresource = rctx->vertex_elements->count;
/* FIXME start need to change winsys */
- draw->draw = radeon_state(rscreen->rw, R600_DRAW_TYPE, R600_DRAW);
- if (draw->draw == NULL)
- return -ENOMEM;
- draw->draw->states[R600_DRAW__VGT_NUM_INDICES] = draw->count;
- draw->draw->states[R600_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator;
+ radeon_state_init(&draw->draw, rscreen->rw, R600_STATE_DRAW, 0, 0);
+ draw->draw.states[R600_DRAW__VGT_NUM_INDICES] = draw->count;
+ draw->draw.states[R600_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator;
if (draw->index_buffer) {
rbuffer = (struct r600_resource*)draw->index_buffer;
- draw->draw->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
- draw->draw->placement[0] = RADEON_GEM_DOMAIN_GTT;
- draw->draw->placement[1] = RADEON_GEM_DOMAIN_GTT;
- draw->draw->nbo = 1;
+ draw->draw.bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
+ draw->draw.placement[0] = RADEON_GEM_DOMAIN_GTT;
+ draw->draw.placement[1] = RADEON_GEM_DOMAIN_GTT;
+ draw->draw.nbo = 1;
}
- r = radeon_draw_set_new(rctx->draw, draw->draw);
- if (r)
+ r = radeon_state_pm4(&draw->draw);
+ if (r) {
return r;
- draw->vgt = radeon_state(rscreen->rw, R600_VGT_TYPE, R600_VGT);
- if (draw->vgt == NULL)
- return -ENOMEM;
- draw->vgt->states[R600_VGT__VGT_PRIMITIVE_TYPE] = prim;
- draw->vgt->states[R600_VGT__VGT_MAX_VTX_INDX] = 0x00FFFFFF;
- draw->vgt->states[R600_VGT__VGT_MIN_VTX_INDX] = 0x00000000;
- draw->vgt->states[R600_VGT__VGT_INDX_OFFSET] = draw->start;
- draw->vgt->states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX] = 0x00000000;
- draw->vgt->states[R600_VGT__VGT_DMA_INDEX_TYPE] = vgt_dma_index_type;
- draw->vgt->states[R600_VGT__VGT_PRIMITIVEID_EN] = 0x00000000;
- draw->vgt->states[R600_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001;
- draw->vgt->states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_EN] = 0x00000000;
- draw->vgt->states[R600_VGT__VGT_INSTANCE_STEP_RATE_0] = 0x00000000;
- draw->vgt->states[R600_VGT__VGT_INSTANCE_STEP_RATE_1] = 0x00000000;
- r = radeon_draw_set_new(rctx->draw, draw->vgt);
- if (r)
+ }
+ radeon_draw_bind(&rctx->draw, &draw->draw);
+
+ radeon_state_init(&draw->vgt, rscreen->rw, R600_STATE_VGT, 0, 0);
+ draw->vgt.states[R600_VGT__VGT_PRIMITIVE_TYPE] = prim;
+ draw->vgt.states[R600_VGT__VGT_MAX_VTX_INDX] = 0x00FFFFFF;
+ draw->vgt.states[R600_VGT__VGT_MIN_VTX_INDX] = 0x00000000;
+ draw->vgt.states[R600_VGT__VGT_INDX_OFFSET] = draw->start;
+ draw->vgt.states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX] = 0x00000000;
+ draw->vgt.states[R600_VGT__VGT_DMA_INDEX_TYPE] = vgt_dma_index_type;
+ draw->vgt.states[R600_VGT__VGT_PRIMITIVEID_EN] = 0x00000000;
+ draw->vgt.states[R600_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001;
+ draw->vgt.states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_EN] = 0x00000000;
+ draw->vgt.states[R600_VGT__VGT_INSTANCE_STEP_RATE_0] = 0x00000000;
+ draw->vgt.states[R600_VGT__VGT_INSTANCE_STEP_RATE_1] = 0x00000000;
+ r = radeon_state_pm4(&draw->vgt);
+ if (r) {
return r;
- /* FIXME */
- r = radeon_ctx_set_draw_new(rctx->ctx, rctx->draw);
+ }
+ radeon_draw_bind(&rctx->draw, &draw->vgt);
+
+ r = radeon_ctx_set_draw(&rctx->ctx, &rctx->draw);
if (r == -EBUSY) {
r600_flush(draw->ctx, 0, NULL);
- r = radeon_ctx_set_draw_new(rctx->ctx, rctx->draw);
+ r = radeon_ctx_set_draw(&rctx->ctx, &rctx->draw);
}
- if (r)
- return r;
- rctx->draw = radeon_draw_duplicate(rctx->draw);
- return 0;
+ return r;
}
void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
{
struct r600_context *rctx = r600_context(ctx);
struct r600_draw draw;
+ int r;
assert(info->index_bias == 0);
@@ -189,5 +192,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
draw.index_size = 0;
draw.index_buffer = NULL;
}
- r600_draw_common(&draw);
+ r = r600_draw_common(&draw);
+ if (r)
+ fprintf(stderr,"draw common failed %d\n", r);
}
diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c
index 9b02ae680e..530940ed84 100644
--- a/src/gallium/drivers/r600/r600_query.c
+++ b/src/gallium/drivers/r600/r600_query.c
@@ -24,39 +24,225 @@
* Jerome Glisse
* Corbin Simpson
*/
+#include <errno.h>
#include <util/u_inlines.h>
#include <util/u_format.h>
#include <util/u_memory.h>
#include "r600_screen.h"
#include "r600_context.h"
-static struct pipe_query *r600_create_query(struct pipe_context *pipe, unsigned query_type)
+static void r600_query_begin(struct r600_context *rctx, struct r600_query *rquery)
{
- return NULL;
+ struct r600_screen *rscreen = rctx->screen;
+ struct radeon_state *rstate = &rquery->rstate;
+
+ radeon_state_fini(rstate);
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_QUERY_BEGIN, 0, 0);
+ rstate->states[R600_QUERY__OFFSET] = rquery->num_results;
+ rstate->bo[0] = radeon_bo_incref(rscreen->rw, rquery->buffer);
+ rstate->nbo = 1;
+ rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
+ if (radeon_state_pm4(rstate)) {
+ radeon_state_fini(rstate);
+ }
+}
+
+static void r600_query_end(struct r600_context *rctx, struct r600_query *rquery)
+{
+ struct r600_screen *rscreen = rctx->screen;
+ struct radeon_state *rstate = &rquery->rstate;
+
+ radeon_state_fini(rstate);
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_QUERY_END, 0, 0);
+ rstate->states[R600_QUERY__OFFSET] = rquery->num_results + 8;
+ rstate->bo[0] = radeon_bo_incref(rscreen->rw, rquery->buffer);
+ rstate->nbo = 1;
+ rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
+ if (radeon_state_pm4(rstate)) {
+ radeon_state_fini(rstate);
+ }
}
-static void r600_destroy_query(struct pipe_context *pipe, struct pipe_query *query)
+static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type)
{
+ struct r600_screen *rscreen = r600_screen(ctx->screen);
+ struct r600_context *rctx = r600_context(ctx);
+ struct r600_query *q;
+
+ if (query_type != PIPE_QUERY_OCCLUSION_COUNTER)
+ return NULL;
+
+ q = CALLOC_STRUCT(r600_query);
+ if (!q)
+ return NULL;
+
+ q->type = query_type;
+ LIST_ADDTAIL(&q->list, &rctx->query_list);
+ q->buffer_size = 4096;
+
+ q->buffer = radeon_bo(rscreen->rw, 0, q->buffer_size, 1, NULL);
+ if (!q->buffer) {
+ FREE(q);
+ return NULL;
+ }
+ return (struct pipe_query *)q;
+}
+
+static void r600_destroy_query(struct pipe_context *ctx,
+ struct pipe_query *query)
+{
+ struct r600_screen *rscreen = r600_screen(ctx->screen);
+ struct r600_query *q = r600_query(query);
+
+ radeon_bo_decref(rscreen->rw, q->buffer);
+ LIST_DEL(&q->list);
FREE(query);
}
-static void r600_begin_query(struct pipe_context *pipe, struct pipe_query *query)
+static void r600_query_result(struct pipe_context *ctx, struct r600_query *rquery)
{
+ struct r600_screen *rscreen = r600_screen(ctx->screen);
+ u64 start, end;
+ u32 *results;
+ int i;
+
+ radeon_bo_wait(rscreen->rw, rquery->buffer);
+ radeon_bo_map(rscreen->rw, rquery->buffer);
+ results = rquery->buffer->data;
+ for (i = 0; i < rquery->num_results; i += 4) {
+ start = (u64)results[i] | (u64)results[i + 1] << 32;
+ end = (u64)results[i + 2] | (u64)results[i + 3] << 32;
+ if ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL)) {
+ rquery->result += end - start;
+ }
+ }
+ radeon_bo_unmap(rscreen->rw, rquery->buffer);
+ rquery->num_results = 0;
}
-static void r600_end_query(struct pipe_context *pipe, struct pipe_query *query)
+static void r600_query_resume(struct pipe_context *ctx, struct r600_query *rquery)
{
+ struct r600_context *rctx = r600_context(ctx);
+
+ if (rquery->num_results >= ((rquery->buffer_size >> 2) - 2)) {
+ /* running out of space */
+ if (!rquery->flushed) {
+ ctx->flush(ctx, 0, NULL);
+ }
+ r600_query_result(ctx, rquery);
+ }
+ r600_query_begin(rctx, rquery);
+ rquery->flushed = false;
+}
+
+static void r600_query_suspend(struct pipe_context *ctx, struct r600_query *rquery)
+{
+ struct r600_context *rctx = r600_context(ctx);
+
+ r600_query_end(rctx, rquery);
+ rquery->num_results += 16;
}
-static boolean r600_get_query_result(struct pipe_context *pipe,
+static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+ struct r600_context *rctx = r600_context(ctx);
+ struct r600_query *rquery = r600_query(query);
+ int r;
+
+ rquery->state = R600_QUERY_STATE_STARTED;
+ rquery->num_results = 0;
+ rquery->flushed = false;
+ r600_query_resume(ctx, rquery);
+ r = radeon_ctx_set_query_state(&rctx->ctx, &rquery->rstate);
+ if (r == -EBUSY) {
+ /* this shouldn't happen */
+ R600_ERR("had to flush while emitting end query\n");
+ ctx->flush(ctx, 0, NULL);
+ r = radeon_ctx_set_query_state(&rctx->ctx, &rquery->rstate);
+ }
+}
+
+static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+ struct r600_context *rctx = r600_context(ctx);
+ struct r600_query *rquery = r600_query(query);
+ int r;
+
+ rquery->state &= ~R600_QUERY_STATE_STARTED;
+ rquery->state |= R600_QUERY_STATE_ENDED;
+ r600_query_suspend(ctx, rquery);
+ r = radeon_ctx_set_query_state(&rctx->ctx, &rquery->rstate);
+ if (r == -EBUSY) {
+ /* this shouldn't happen */
+ R600_ERR("had to flush while emitting end query\n");
+ ctx->flush(ctx, 0, NULL);
+ r = radeon_ctx_set_query_state(&rctx->ctx, &rquery->rstate);
+ }
+}
+
+void r600_queries_suspend(struct pipe_context *ctx)
+{
+ struct r600_context *rctx = r600_context(ctx);
+ struct r600_query *rquery;
+ int r;
+
+ LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) {
+ if (rquery->state & R600_QUERY_STATE_STARTED) {
+ r600_query_suspend(ctx, rquery);
+ r = radeon_ctx_set_query_state(&rctx->ctx, &rquery->rstate);
+ if (r == -EBUSY) {
+ /* this shouldn't happen */
+ R600_ERR("had to flush while emitting end query\n");
+ ctx->flush(ctx, 0, NULL);
+ r = radeon_ctx_set_query_state(&rctx->ctx, &rquery->rstate);
+ }
+ }
+ rquery->state |= R600_QUERY_STATE_SUSPENDED;
+ }
+}
+
+void r600_queries_resume(struct pipe_context *ctx)
+{
+ struct r600_context *rctx = r600_context(ctx);
+ struct r600_query *rquery;
+ int r;
+
+ LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) {
+ if (rquery->state & R600_QUERY_STATE_STARTED) {
+ r600_query_resume(ctx, rquery);
+ r = radeon_ctx_set_query_state(&rctx->ctx, &rquery->rstate);
+ if (r == -EBUSY) {
+ /* this shouldn't happen */
+ R600_ERR("had to flush while emitting end query\n");
+ ctx->flush(ctx, 0, NULL);
+ r = radeon_ctx_set_query_state(&rctx->ctx, &rquery->rstate);
+ }
+ }
+ rquery->state &= ~R600_QUERY_STATE_SUSPENDED;
+ }
+}
+
+static boolean r600_get_query_result(struct pipe_context *ctx,
struct pipe_query *query,
- boolean wait, void *result)
+ boolean wait, void *vresult)
{
+ struct r600_query *rquery = r600_query(query);
+ uint64_t *result = (uint64_t*)vresult;
+
+ if (!rquery->flushed) {
+ ctx->flush(ctx, 0, NULL);
+ rquery->flushed = true;
+ }
+ r600_query_result(ctx, rquery);
+ *result = rquery->result;
+ rquery->result = 0;
return TRUE;
}
void r600_init_query_functions(struct r600_context* rctx)
{
+ LIST_INITHEAD(&rctx->query_list);
+
rctx->context.create_query = r600_create_query;
rctx->context.destroy_query = r600_destroy_query;
rctx->context.begin_query = r600_begin_query;
diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h
index bb90e76fb7..129667ad20 100644
--- a/src/gallium/drivers/r600/r600_resource.h
+++ b/src/gallium/drivers/r600/r600_resource.h
@@ -44,10 +44,22 @@ struct r600_resource_texture {
struct r600_resource resource;
unsigned long offset[PIPE_MAX_TEXTURE_LEVELS];
unsigned long pitch[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned long width[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned long height[PIPE_MAX_TEXTURE_LEVELS];
unsigned long layer_size[PIPE_MAX_TEXTURE_LEVELS];
unsigned long pitch_override;
unsigned long bpt;
unsigned long size;
+ unsigned tilled;
+ unsigned array_mode;
+ unsigned tile_type;
+ unsigned depth;
+ unsigned dirty;
+ struct radeon_bo *uncompressed;
+ struct radeon_state scissor[PIPE_MAX_TEXTURE_LEVELS];
+ struct radeon_state cb[8][PIPE_MAX_TEXTURE_LEVELS];
+ struct radeon_state db[PIPE_MAX_TEXTURE_LEVELS];
+ struct radeon_state viewport[PIPE_MAX_TEXTURE_LEVELS];
};
void r600_init_context_resource_functions(struct r600_context *r600);
diff --git a/src/gallium/drivers/r600/r600_screen.c b/src/gallium/drivers/r600/r600_screen.c
index cdaca9ed7d..a047a49a6c 100644
--- a/src/gallium/drivers/r600/r600_screen.c
+++ b/src/gallium/drivers/r600/r600_screen.c
@@ -69,6 +69,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_SWIZZLE:
case PIPE_CAP_INDEP_BLEND_ENABLE:
case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
+ case PIPE_CAP_DEPTH_CLAMP:
return 1;
/* Unsupported features (boolean caps). */
@@ -77,7 +78,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_STREAM_OUTPUT:
case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */
case PIPE_CAP_GEOMETRY_SHADER4:
- case PIPE_CAP_DEPTH_CLAMP: /* FIXME allow this */
return 0;
/* Texturing. */
@@ -234,11 +234,34 @@ static void r600_destroy_screen(struct pipe_screen* pscreen)
struct pipe_screen *r600_screen_create(struct radeon *rw)
{
struct r600_screen* rscreen;
+ enum radeon_family family = radeon_get_family(rw);
rscreen = CALLOC_STRUCT(r600_screen);
if (rscreen == NULL) {
return NULL;
}
+
+ switch (family) {
+ case CHIP_R600:
+ case CHIP_RV610:
+ case CHIP_RV630:
+ case CHIP_RV670:
+ case CHIP_RV620:
+ case CHIP_RV635:
+ case CHIP_RS780:
+ case CHIP_RS880:
+ rscreen->chip_class = R600;
+ break;
+ case CHIP_RV770:
+ case CHIP_RV730:
+ case CHIP_RV710:
+ case CHIP_RV740:
+ rscreen->chip_class = R700;
+ break;
+ default:
+ FREE(rscreen);
+ return NULL;
+ }
rscreen->rw = rw;
rscreen->screen.winsys = (struct pipe_winsys*)rw;
rscreen->screen.destroy = r600_destroy_screen;
diff --git a/src/gallium/drivers/r600/r600_screen.h b/src/gallium/drivers/r600/r600_screen.h
index 53b560c617..b9938f117a 100644
--- a/src/gallium/drivers/r600/r600_screen.h
+++ b/src/gallium/drivers/r600/r600_screen.h
@@ -30,6 +30,7 @@
#include <radeon_drm.h>
#include "radeon.h"
#include "util/u_transfer.h"
+#include "r600_resource.h"
/* Texture transfer. */
struct r600_transfer {
@@ -38,11 +39,19 @@ struct r600_transfer {
/* Buffer transfer. */
struct pipe_transfer *buffer_transfer;
unsigned offset;
+ struct pipe_resource *linear_texture;
+};
+
+enum chip_class {
+ R600,
+ R700,
+ EVERGREEN,
};
struct r600_screen {
struct pipe_screen screen;
struct radeon *rw;
+ enum chip_class chip_class;
};
static INLINE struct r600_screen *r600_screen(struct pipe_screen *screen)
@@ -62,7 +71,7 @@ unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context,
struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen,
struct winsys_handle *whandle);
-/* Texture transfer functions. */
+/* r600_texture.c texture transfer functions. */
struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
struct pipe_resource *texture,
struct pipe_subresource sr,
@@ -74,7 +83,14 @@ void* r600_texture_transfer_map(struct pipe_context *ctx,
struct pipe_transfer* transfer);
void r600_texture_transfer_unmap(struct pipe_context *ctx,
struct pipe_transfer* transfer);
+int r600_texture_scissor(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level);
+int r600_texture_cb(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned cb, unsigned level);
+int r600_texture_db(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level);
+int r600_texture_from_depth(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level);
+int r600_texture_viewport(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level);
+/* r600_blit.c */
+int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level);
/* helpers */
int r600_conv_pipe_format(unsigned pformat, unsigned *format);
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 956c7e7930..0ba26a2311 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -48,6 +48,9 @@ struct r600_shader_ctx {
struct r600_bc *bc;
struct r600_shader *shader;
u32 value[4];
+ u32 *literals;
+ u32 nliterals;
+ u32 max_driver_temp_used;
};
struct r600_shader_tgsi_instruction {
@@ -105,8 +108,8 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
struct r600_screen *rscreen = r600_screen(ctx->screen);
int r;
-fprintf(stderr, "--------------------------------------------------------------\n");
-tgsi_dump(tokens, 0);
+//fprintf(stderr, "--------------------------------------------------------------\n");
+//tgsi_dump(tokens, 0);
if (rpshader == NULL)
return -ENOMEM;
rpshader->shader.family = radeon_get_family(rscreen->rw);
@@ -120,7 +123,7 @@ tgsi_dump(tokens, 0);
R600_ERR("building bytecode failed !\n");
return r;
}
-fprintf(stderr, "______________________________________________________________\n");
+//fprintf(stderr, "______________________________________________________________\n");
return 0;
}
@@ -131,10 +134,9 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta
struct radeon_state *state;
unsigned i, tmp;
- rpshader->rstate = radeon_state_decref(rpshader->rstate);
- state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER);
- if (state == NULL)
- return -ENOMEM;
+ state = &rpshader->rstate[0];
+ radeon_state_fini(&rpshader->rstate[0]);
+ radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_VS);
for (i = 0; i < 10; i++) {
state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0;
}
@@ -144,12 +146,13 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta
state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp;
}
state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2);
- state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
- rpshader->rstate = state;
- rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
- rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
- rpshader->rstate->nbo = 2;
- rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
+ state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr) |
+ S_028868_STACK_SIZE(rshader->bc.nstack);
+ state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
+ state->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
+ state->nbo = 2;
+ state->placement[0] = RADEON_GEM_DOMAIN_GTT;
+ state->placement[2] = RADEON_GEM_DOMAIN_GTT;
return radeon_state_pm4(state);
}
@@ -161,17 +164,20 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta
struct r600_context *rctx = r600_context(ctx);
struct radeon_state *state;
unsigned i, tmp, exports_ps, num_cout;
+ boolean have_pos = FALSE;
+ state = &rpshader->rstate[0];
rasterizer = &rctx->rasterizer->state.rasterizer;
- rpshader->rstate = radeon_state_decref(rpshader->rstate);
- state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER);
- if (state == NULL)
- return -ENOMEM;
+ radeon_state_fini(state);
+ radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_PS);
for (i = 0; i < rshader->ninput; i++) {
tmp = S_028644_SEMANTIC(i);
tmp |= S_028644_SEL_CENTROID(1);
+ if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
+ have_pos = TRUE;
if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) {
+ rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
+ rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
}
if (rasterizer->sprite_coord_enable & (1 << i)) {
@@ -190,15 +196,24 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta
num_cout++;
}
}
+ if (!exports_ps) {
+ /* always at least export 1 component per pixel */
+ exports_ps = 2;
+ }
state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) |
S_0286CC_PERSP_GRADIENT_ENA(1);
+ if (have_pos) {
+ state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] |= S_0286CC_POSITION_ENA(1) |
+ S_0286CC_BARYC_SAMPLE_CNTL(1);
+ state->states[R600_PS_SHADER__SPI_INPUT_Z] |= 1;
+ }
state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000;
- state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
+ state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr) |
+ S_028868_STACK_SIZE(rshader->bc.nstack);
state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps;
- rpshader->rstate = state;
- rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
- rpshader->rstate->nbo = 1;
- rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
+ state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
+ state->nbo = 1;
+ state->placement[0] = RADEON_GEM_DOMAIN_GTT;
return radeon_state_pm4(state);
}
@@ -268,21 +283,24 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx)
R600_ERR("predicate unsupported\n");
return -EINVAL;
}
+#if 0
if (i->Instruction.Label) {
R600_ERR("label unsupported\n");
return -EINVAL;
}
+#endif
for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
- if (i->Src[j].Register.Indirect ||
- i->Src[j].Register.Dimension ||
+ if (i->Src[j].Register.Dimension ||
i->Src[j].Register.Absolute) {
- R600_ERR("unsupported src (indirect|dimension|absolute)\n");
+ R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j,
+ i->Src[j].Register.Dimension,
+ i->Src[j].Register.Absolute);
return -EINVAL;
}
}
for (j = 0; j < i->Instruction.NumDstRegs; j++) {
- if (i->Dst[j].Register.Indirect || i->Dst[j].Register.Dimension) {
- R600_ERR("unsupported dst (indirect|dimension)\n");
+ if (i->Dst[j].Register.Dimension) {
+ R600_ERR("unsupported dst (dimension)\n");
return -EINVAL;
}
}
@@ -333,6 +351,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
case TGSI_FILE_CONSTANT:
case TGSI_FILE_TEMPORARY:
case TGSI_FILE_SAMPLER:
+ case TGSI_FILE_ADDRESS:
break;
default:
R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
@@ -341,6 +360,11 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
return 0;
}
+static int r600_get_temp(struct r600_shader_ctx *ctx)
+{
+ return ctx->temp_reg + ctx->max_driver_temp_used++;
+}
+
int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
{
struct tgsi_full_immediate *immediate;
@@ -362,9 +386,15 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
shader->processor_type = ctx.type;
/* register allocations */
- /* Values [0,127] correspond to GPR[0..127].
- * Values [256,511] correspond to cfile constants c[0..255].
+ /* Values [0,127] correspond to GPR[0..127].
+ * Values [128,159] correspond to constant buffer bank 0
+ * Values [160,191] correspond to constant buffer bank 1
+ * Values [256,511] correspond to cfile constants c[0..255].
* Other special values are shown in the list below.
+ * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
+ * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
+ * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
+ * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
* 248 SQ_ALU_SRC_0: special constant 0.0.
* 249 SQ_ALU_SRC_1: special constant 1.0 float.
* 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
@@ -389,15 +419,24 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
ctx.info.file_count[TGSI_FILE_TEMPORARY];
+ ctx.nliterals = 0;
+ ctx.literals = NULL;
+
while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
tgsi_parse_token(&ctx.parse);
switch (ctx.parse.FullToken.Token.Type) {
case TGSI_TOKEN_TYPE_IMMEDIATE:
immediate = &ctx.parse.FullToken.FullImmediate;
- ctx.value[0] = immediate->u[0].Uint;
- ctx.value[1] = immediate->u[1].Uint;
- ctx.value[2] = immediate->u[2].Uint;
- ctx.value[3] = immediate->u[3].Uint;
+ ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16);
+ if(ctx.literals == NULL) {
+ r = -ENOMEM;
+ goto out_err;
+ }
+ ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint;
+ ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint;
+ ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint;
+ ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint;
+ ctx.nliterals++;
break;
case TGSI_TOKEN_TYPE_DECLARATION:
r = tgsi_declaration(&ctx);
@@ -408,6 +447,9 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
r = tgsi_is_supported(&ctx);
if (r)
goto out_err;
+ ctx.max_driver_temp_used = 0;
+ /* reserve first tmp for everyone */
+ r600_get_temp(&ctx);
opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
r = ctx.inst_info->process(&ctx);
@@ -458,6 +500,8 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
} else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
output[i].array_base = 61;
+ output[i].swizzle_x = 2;
+ output[i].swizzle_y = output[i].swizzle_z = output[i].swizzle_w = 7;
output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
} else {
R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
@@ -504,7 +548,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
output[0].swizzle_z = 7;
output[0].swizzle_w = 7;
output[0].barrier = 1;
- output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+ output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
output[0].array_base = 0;
output[0].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
noutput++;
@@ -525,9 +569,11 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
if (r)
goto out_err;
}
+ free(ctx.literals);
tgsi_parse_free(&ctx.parse);
return 0;
out_err:
+ free(ctx.literals);
tgsi_parse_free(&ctx.parse);
return r;
}
@@ -547,11 +593,19 @@ static int tgsi_src(struct r600_shader_ctx *ctx,
const struct tgsi_full_src_register *tgsi_src,
struct r600_bc_alu_src *r600_src)
{
+ int index;
memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
r600_src->sel = tgsi_src->Register.Index;
if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
r600_src->sel = 0;
+ index = tgsi_src->Register.Index;
+ ctx->value[0] = ctx->literals[index * 4 + 0];
+ ctx->value[1] = ctx->literals[index * 4 + 1];
+ ctx->value[2] = ctx->literals[index * 4 + 2];
+ ctx->value[3] = ctx->literals[index * 4 + 3];
}
+ if (tgsi_src->Register.Indirect)
+ r600_src->rel = V_SQ_REL_RELATIVE;
r600_src->neg = tgsi_src->Register.Negate;
r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
return 0;
@@ -568,6 +622,8 @@ static int tgsi_dst(struct r600_shader_ctx *ctx,
r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File];
r600_dst->chan = swizzle;
r600_dst->write = 1;
+ if (tgsi_dst->Register.Indirect)
+ r600_dst->rel = V_SQ_REL_RELATIVE;
if (inst->Instruction.Saturate) {
r600_dst->clamp = 1;
}
@@ -607,12 +663,13 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s
}
for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) {
+ int treg = r600_get_temp(ctx);
for (k = 0; k < 4; k++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
- alu.src[0].sel = r600_src[0].sel;
+ alu.src[0].sel = r600_src[j].sel;
alu.src[0].chan = k;
- alu.dst.sel = ctx->temp_reg + j;
+ alu.dst.sel = treg;
alu.dst.chan = k;
alu.dst.write = 1;
if (k == 3)
@@ -621,37 +678,90 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s
if (r)
return r;
}
- r600_src[0].sel = ctx->temp_reg + j;
+ r600_src[j].sel = treg;
j--;
}
}
return 0;
}
-static int tgsi_op2(struct r600_shader_ctx *ctx)
+/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
+static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int i, j, k, nliteral, r;
+
+ for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
+ nliteral++;
+ }
+ }
+ for (i = 0, j = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ if (inst->Src[j].Register.File == TGSI_FILE_IMMEDIATE) {
+ int treg = r600_get_temp(ctx);
+ for (k = 0; k < 4; k++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
+ alu.src[0].sel = r600_src[j].sel;
+ alu.src[0].chan = k;
+ alu.dst.sel = treg;
+ alu.dst.chan = k;
+ alu.dst.write = 1;
+ if (k == 3)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ r = r600_bc_add_literal(ctx->bc, ctx->value);
+ if (r)
+ return r;
+ r600_src[j].sel = treg;
+ j++;
+ }
+ }
+ return 0;
+}
+
+static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, j, r;
+ int lasti = 0;
+
+ for (i = 0; i < 4; i++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << i)) {
+ lasti = i;
+ }
+ }
r = tgsi_split_constant(ctx, r600_src);
if (r)
return r;
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
- if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
- alu.dst.chan = i;
- } else {
- alu.inst = ctx->inst_info->r600_opcode;
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+
+ alu.inst = ctx->inst_info->r600_opcode;
+ if (!swap) {
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
alu.src[j] = r600_src[j];
alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
}
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ } else {
+ alu.src[0] = r600_src[1];
+ alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
+
+ alu.src[1] = r600_src[0];
+ alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
}
/* handle some special cases */
switch (ctx->inst_info->tgsi_opcode) {
@@ -664,7 +774,7 @@ static int tgsi_op2(struct r600_shader_ctx *ctx)
default:
break;
}
- if (i == 3) {
+ if (i == lasti) {
alu.last = 1;
}
r = r600_bc_add_alu(ctx->bc, &alu);
@@ -674,24 +784,154 @@ static int tgsi_op2(struct r600_shader_ctx *ctx)
return 0;
}
-static int tgsi_kill(struct r600_shader_ctx *ctx)
+static int tgsi_op2(struct r600_shader_ctx *ctx)
+{
+ return tgsi_op2_s(ctx, 0);
+}
+
+static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
+{
+ return tgsi_op2_s(ctx, 1);
+}
+
+/*
+ * r600 - trunc to -PI..PI range
+ * r700 - normalize by dividing by 2PI
+ * see fdo bug 27901
+ */
+static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
+ struct r600_bc_alu_src r600_src[3])
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ int r;
+ uint32_t lit_vals[4];
+ struct r600_bc_alu alu;
+
+ memset(lit_vals, 0, 4*4);
+ r = tgsi_split_constant(ctx, r600_src);
+ if (r)
+ return r;
+
+ r = tgsi_split_literal_constant(ctx, r600_src);
+ if (r)
+ return r;
+
+ lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
+ lit_vals[1] = fui(0.5f);
+
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
+ alu.is_op3 = 1;
+
+ alu.dst.chan = 0;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+
+ alu.src[0] = r600_src[0];
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].chan = 0;
+ alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[2].chan = 1;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ r = r600_bc_add_literal(ctx->bc, lit_vals);
+ if (r)
+ return r;
+
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT;
+
+ alu.dst.chan = 0;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ if (ctx->bc->chiprev == 0) {
+ lit_vals[0] = fui(3.1415926535897f * 2.0f);
+ lit_vals[1] = fui(-3.1415926535897f);
+ } else {
+ lit_vals[0] = fui(1.0f);
+ lit_vals[1] = fui(-0.5f);
+ }
+
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
+ alu.is_op3 = 1;
+
+ alu.dst.chan = 0;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].chan = 0;
+ alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[2].chan = 1;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ r = r600_bc_add_literal(ctx->bc, lit_vals);
+ if (r)
+ return r;
+ return 0;
+}
+
+static int tgsi_trig(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, r;
+ int lasti = 0;
+
+ r = tgsi_setup_trig(ctx, r600_src);
+ if (r)
+ return r;
+
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = ctx->inst_info->r600_opcode;
+ alu.dst.chan = 0;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ /* replicate result */
for (i = 0; i < 4; i++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << i))
+ lasti = i;
+ }
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = ctx->inst_info->r600_opcode;
- alu.dst.chan = i;
- alu.src[0].sel = 248;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
+
+ alu.src[0].sel = ctx->temp_reg;
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
if (r)
return r;
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
- if (i == 3) {
+ if (i == lasti)
alu.last = 1;
- }
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
@@ -699,30 +939,70 @@ static int tgsi_kill(struct r600_shader_ctx *ctx)
return 0;
}
-static int tgsi_slt(struct r600_shader_ctx *ctx)
+static int tgsi_scs(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
- int i, r;
+ int r;
- r = tgsi_split_constant(ctx, r600_src);
+ r = tgsi_setup_trig(ctx, r600_src);
+ if (r)
+ return r;
+
+
+ /* dst.x = COS */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS;
+ r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
if (r)
return r;
+
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ /* dst.y = SIN */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN;
+ r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
+ if (r)
+ return r;
+
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ return 0;
+}
+
+static int tgsi_kill(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int i, r;
+
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
- if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
- alu.dst.chan = i;
+ alu.inst = ctx->inst_info->r600_opcode;
+
+ alu.dst.chan = i;
+
+ alu.src[0].sel = V_SQ_ALU_SRC_0;
+
+ if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
+ alu.src[1].sel = V_SQ_ALU_SRC_1;
+ alu.src[1].neg = 1;
} else {
- alu.inst = ctx->inst_info->r600_opcode;
- alu.src[1] = r600_src[0];
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
- alu.src[0] = r600_src[1];
- alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
if (r)
return r;
+ alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
}
if (i == 3) {
alu.last = 1;
@@ -731,6 +1011,13 @@ static int tgsi_slt(struct r600_shader_ctx *ctx)
if (r)
return r;
}
+ r = r600_bc_add_literal(ctx->bc, ctx->value);
+ if (r)
+ return r;
+
+ /* kill must be last in ALU */
+ ctx->bc->force_add_cf = 1;
+ ctx->shader->uses_kill = TRUE;
return 0;
}
@@ -738,12 +1025,20 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
+ struct r600_bc_alu_src r600_src[3];
int r;
+ r = tgsi_split_constant(ctx, r600_src);
+ if (r)
+ return r;
+ r = tgsi_split_literal_constant(ctx, r600_src);
+ if (r)
+ return r;
+
/* dst.x, <- 1.0 */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
- alu.src[0].sel = 249; /*1.0*/
+ alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
alu.src[0].chan = 0;
r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
if (r)
@@ -756,11 +1051,9 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
/* dst.y = max(src.x, 0.0) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[1].sel = 248; /*0.0*/
- alu.src[1].chan = tgsi_chan(&inst->Src[0], 0);
+ alu.src[0] = r600_src[0];
+ alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
+ alu.src[1].chan = 0;
r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
if (r)
return r;
@@ -769,18 +1062,10 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
if (r)
return r;
- /* dst.z = NOP - fill Z slot */
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
- alu.dst.chan = 2;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
-
/* dst.w, <- 1.0 */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
- alu.src[0].sel = 249;
+ alu.src[0].sel = V_SQ_ALU_SRC_1;
alu.src[0].chan = 0;
r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
if (r)
@@ -791,6 +1076,10 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
if (r)
return r;
+ r = r600_bc_add_literal(ctx->bc, ctx->value);
+ if (r)
+ return r;
+
if (inst->Dst[0].Register.WriteMask & (1 << 2))
{
int chan;
@@ -799,9 +1088,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
/* dst.z = log(src.y) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
+ alu.src[0] = r600_src[0];
alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
if (r)
@@ -811,21 +1098,22 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
if (r)
return r;
+ r = r600_bc_add_literal(ctx->bc, ctx->value);
+ if (r)
+ return r;
+
chan = alu.dst.chan;
sel = alu.dst.sel;
/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
+ alu.src[0] = r600_src[0];
alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
alu.src[1].sel = sel;
alu.src[1].chan = chan;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[2]);
- if (r)
- return r;
+
+ alu.src[2] = r600_src[0];
alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
@@ -836,6 +1124,9 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
if (r)
return r;
+ r = r600_bc_add_literal(ctx->bc, ctx->value);
+ if (r)
+ return r;
/* dst.z = exp(tmp.x) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
@@ -880,19 +1171,43 @@ static int tgsi_trans(struct r600_shader_ctx *ctx)
return 0;
}
+static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int i, r;
+
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.src[0].sel = ctx->temp_reg;
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
+ alu.dst.chan = i;
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+ alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
- int i, j, r;
+ int i, r;
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = ctx->inst_info->r600_opcode;
- for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
- r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]);
+ for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
if (r)
return r;
- alu.src[j].chan = tgsi_chan(&inst->Src[j], 0);
+ alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
}
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
@@ -900,16 +1215,124 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
+ r = r600_bc_add_literal(ctx->bc, ctx->value);
+ if (r)
+ return r;
/* replicate result */
+ return tgsi_helper_tempx_replicate(ctx);
+}
+
+static int tgsi_pow(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int r;
+
+ /* LOG2(a) */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE;
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+ if (r)
+ return r;
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ r = r600_bc_add_literal(ctx->bc,ctx->value);
+ if (r)
+ return r;
+ /* b * LOG2(a) */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE;
+ r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
+ if (r)
+ return r;
+ alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
+ alu.src[1].sel = ctx->temp_reg;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ r = r600_bc_add_literal(ctx->bc,ctx->value);
+ if (r)
+ return r;
+ /* POW(a,b) = EXP2(b * LOG2(a))*/
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
+ alu.src[0].sel = ctx->temp_reg;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ r = r600_bc_add_literal(ctx->bc,ctx->value);
+ if (r)
+ return r;
+ return tgsi_helper_tempx_replicate(ctx);
+}
+
+static int tgsi_ssg(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ struct r600_bc_alu_src r600_src[3];
+ int i, r;
+
+ r = tgsi_split_constant(ctx, r600_src);
+ if (r)
+ return r;
+
+ /* tmp = (src > 0 ? 1 : src) */
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.src[0].sel = ctx->temp_reg;
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
+ alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
+ alu.is_op3 = 1;
+
+ alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
+
+ alu.src[0] = r600_src[0];
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
+
+ alu.src[1].sel = V_SQ_ALU_SRC_1;
+
+ alu.src[2] = r600_src[0];
+ alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ r = r600_bc_add_literal(ctx->bc, ctx->value);
+ if (r)
+ return r;
+
+ /* dst = (-tmp > 0 ? -1 : tmp) */
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT;
+ alu.is_op3 = 1;
r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
if (r)
return r;
- alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
+
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = i;
+ alu.src[0].neg = 1;
+
+ alu.src[1].sel = V_SQ_ALU_SRC_1;
+ alu.src[1].neg = 1;
+
+ alu.src[2].sel = ctx->temp_reg;
+ alu.src[2].chan = i;
+
if (i == 3)
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
@@ -1006,16 +1429,23 @@ static int tgsi_dp(struct r600_shader_ctx *ctx)
switch (ctx->inst_info->tgsi_opcode) {
case TGSI_OPCODE_DP2:
if (i > 1) {
- alu.src[0].sel = alu.src[1].sel = 248;
+ alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
alu.src[0].chan = alu.src[1].chan = 0;
}
break;
case TGSI_OPCODE_DP3:
if (i > 2) {
- alu.src[0].sel = alu.src[1].sel = 248;
+ alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0;
alu.src[0].chan = alu.src[1].chan = 0;
}
break;
+ case TGSI_OPCODE_DPH:
+ if (i == 3) {
+ alu.src[0].sel = V_SQ_ALU_SRC_1;
+ alu.src[0].chan = 0;
+ alu.src[0].neg = 0;
+ }
+ break;
default:
break;
}
@@ -1035,75 +1465,197 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
struct r600_bc_tex tex;
struct r600_bc_alu alu;
unsigned src_gpr;
- int r;
+ int r, i;
+ int opcode;
+ boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY;
+ uint32_t lit_vals[4];
src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
- /* Add perspective divide */
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
- alu.src[0].sel = src_gpr;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
- alu.dst.sel = ctx->temp_reg;
- alu.dst.chan = 3;
- alu.last = 1;
- alu.dst.write = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
+ /* Add perspective divide */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+ if (r)
+ return r;
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
- alu.src[0].sel = ctx->temp_reg;
- alu.src[0].chan = 3;
- alu.src[1].sel = src_gpr;
- alu.src[1].chan = tgsi_chan(&inst->Src[0], 0);
- alu.dst.sel = ctx->temp_reg;
- alu.dst.chan = 0;
- alu.dst.write = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
- alu.src[0].sel = ctx->temp_reg;
- alu.src[0].chan = 3;
- alu.src[1].sel = src_gpr;
- alu.src[1].chan = tgsi_chan(&inst->Src[0], 1);
- alu.dst.sel = ctx->temp_reg;
- alu.dst.chan = 1;
- alu.dst.write = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
- alu.src[0].sel = ctx->temp_reg;
- alu.src[0].chan = 3;
- alu.src[1].sel = src_gpr;
- alu.src[1].chan = tgsi_chan(&inst->Src[0], 2);
- alu.dst.sel = ctx->temp_reg;
- alu.dst.chan = 2;
- alu.dst.write = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
- alu.src[0].sel = 249;
- alu.src[0].chan = 0;
- alu.dst.sel = ctx->temp_reg;
- alu.dst.chan = 3;
- alu.last = 1;
- alu.dst.write = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- src_gpr = ctx->temp_reg;
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 3;
+ alu.last = 1;
+ alu.dst.write = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ for (i = 0; i < 3; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 3;
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
+ if (r)
+ return r;
+ alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
+ alu.src[0].sel = V_SQ_ALU_SRC_1;
+ alu.src[0].chan = 0;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 3;
+ alu.last = 1;
+ alu.dst.write = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ src_not_temp = false;
+ src_gpr = ctx->temp_reg;
+ }
+
+ if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
+ int src_chan, src2_chan;
+
+ /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE;
+ switch (i) {
+ case 0:
+ src_chan = 2;
+ src2_chan = 1;
+ break;
+ case 1:
+ src_chan = 2;
+ src2_chan = 0;
+ break;
+ case 2:
+ src_chan = 0;
+ src2_chan = 2;
+ break;
+ case 3:
+ src_chan = 1;
+ src2_chan = 2;
+ break;
+ }
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+ if (r)
+ return r;
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
+ if (r)
+ return r;
+ alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ if (i == 3)
+ alu.last = 1;
+ alu.dst.write = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ /* tmp1.z = RCP_e(|tmp1.z|) */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 2;
+ alu.src[0].abs = 1;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 2;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
+ * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
+ * muladd has no writemask, have to use another temp
+ */
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
+ alu.is_op3 = 1;
+
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+ alu.src[1].sel = ctx->temp_reg;
+ alu.src[1].chan = 2;
+
+ alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[2].chan = 0;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 0;
+ alu.dst.write = 1;
+
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
+ alu.is_op3 = 1;
+
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 1;
+ alu.src[1].sel = ctx->temp_reg;
+ alu.src[1].chan = 2;
+
+ alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[2].chan = 0;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 1;
+ alu.dst.write = 1;
+
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ lit_vals[0] = fui(1.5f);
+
+ r = r600_bc_add_literal(ctx->bc, lit_vals);
+ if (r)
+ return r;
+ src_not_temp = false;
+ src_gpr = ctx->temp_reg;
+ }
+
+ if (src_not_temp) {
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
+ alu.src[0].sel = src_gpr;
+ alu.src[0].chan = i;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ if (i == 3)
+ alu.last = 1;
+ alu.dst.write = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ src_gpr = ctx->temp_reg;
+ }
+
+ opcode = ctx->inst_info->r600_opcode;
+ if (opcode == SQ_TEX_INST_SAMPLE &&
+ (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
+ opcode = SQ_TEX_INST_SAMPLE_C;
- /* TODO use temp if src_gpr is not a temporary reg (File != TEMPORARY) */
memset(&tex, 0, sizeof(struct r600_bc_tex));
- tex.inst = ctx->inst_info->r600_opcode;
+ tex.inst = opcode;
tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
tex.sampler_id = tex.resource_id;
tex.src_gpr = src_gpr;
@@ -1117,13 +1669,30 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
tex.src_sel_z = 2;
tex.src_sel_w = 3;
+ if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
+ tex.src_sel_x = 1;
+ tex.src_sel_y = 0;
+ tex.src_sel_z = 3;
+ tex.src_sel_w = 1;
+ }
+
if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
tex.coord_type_x = 1;
tex.coord_type_y = 1;
tex.coord_type_z = 1;
tex.coord_type_w = 1;
}
- return r600_bc_add_tex(ctx->bc, &tex);
+
+ if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
+ tex.src_sel_w = 2;
+
+ r = r600_bc_add_tex(ctx->bc, &tex);
+ if (r)
+ return r;
+
+ /* add shadow ambient support - gallium doesn't do it yet */
+ return 0;
+
}
static int tgsi_lrp(struct r600_shader_ctx *ctx)
@@ -1141,7 +1710,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD;
- alu.src[0].sel = 249;
+ alu.src[0].sel = V_SQ_ALU_SRC_1;
alu.src[0].chan = 0;
alu.src[1] = r600_src[0];
alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
@@ -1205,23 +1774,654 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
return tgsi_helper_copy(ctx, inst);
}
+static int tgsi_cmp(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu_src r600_src[3];
+ struct r600_bc_alu alu;
+ int use_temp = 0;
+ int i, r;
+
+ r = tgsi_split_constant(ctx, r600_src);
+ if (r)
+ return r;
+
+ if (inst->Dst[0].Register.WriteMask != 0xf)
+ use_temp = 1;
+
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE;
+ alu.src[0] = r600_src[0];
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
+
+ alu.src[1] = r600_src[2];
+ alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
+
+ alu.src[2] = r600_src[1];
+ alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
+
+ if (use_temp)
+ alu.dst.sel = ctx->temp_reg;
+ else {
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+ }
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ alu.is_op3 = 1;
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ if (use_temp)
+ return tgsi_helper_copy(ctx, inst);
+ return 0;
+}
+
+static int tgsi_xpd(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu_src r600_src[3];
+ struct r600_bc_alu alu;
+ uint32_t use_temp = 0;
+ int i, r;
+
+ if (inst->Dst[0].Register.WriteMask != 0xf)
+ use_temp = 1;
+
+ r = tgsi_split_constant(ctx, r600_src);
+ if (r)
+ return r;
+
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
+
+ alu.src[0] = r600_src[0];
+ switch (i) {
+ case 0:
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
+ break;
+ case 1:
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ break;
+ case 2:
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
+ break;
+ case 3:
+ alu.src[0].sel = V_SQ_ALU_SRC_0;
+ alu.src[0].chan = i;
+ }
+
+ alu.src[1] = r600_src[1];
+ switch (i) {
+ case 0:
+ alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
+ break;
+ case 1:
+ alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
+ break;
+ case 2:
+ alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
+ break;
+ case 3:
+ alu.src[1].sel = V_SQ_ALU_SRC_0;
+ alu.src[1].chan = i;
+ }
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
+
+ alu.src[0] = r600_src[0];
+ switch (i) {
+ case 0:
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
+ break;
+ case 1:
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
+ break;
+ case 2:
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ break;
+ case 3:
+ alu.src[0].sel = V_SQ_ALU_SRC_0;
+ alu.src[0].chan = i;
+ }
+
+ alu.src[1] = r600_src[1];
+ switch (i) {
+ case 0:
+ alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
+ break;
+ case 1:
+ alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
+ break;
+ case 2:
+ alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
+ break;
+ case 3:
+ alu.src[1].sel = V_SQ_ALU_SRC_0;
+ alu.src[1].chan = i;
+ }
+
+ alu.src[2].sel = ctx->temp_reg;
+ alu.src[2].neg = 1;
+ alu.src[2].chan = i;
+
+ if (use_temp)
+ alu.dst.sel = ctx->temp_reg;
+ else {
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+ }
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ alu.is_op3 = 1;
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ if (use_temp)
+ return tgsi_helper_copy(ctx, inst);
+ return 0;
+}
+
+static int tgsi_exp(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu_src r600_src[3];
+ struct r600_bc_alu alu;
+ int r;
+
+ /* result.x = 2^floor(src); */
+ if (inst->Dst[0].Register.WriteMask & 1) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+ if (r)
+ return r;
+
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 0;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ r = r600_bc_add_literal(ctx->bc, ctx->value);
+ if (r)
+ return r;
+
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 0;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ r = r600_bc_add_literal(ctx->bc, ctx->value);
+ if (r)
+ return r;
+ }
+
+ /* result.y = tmp - floor(tmp); */
+ if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT;
+ alu.src[0] = r600_src[0];
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+ if (r)
+ return r;
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+
+ alu.dst.sel = ctx->temp_reg;
+// r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+// if (r)
+// return r;
+ alu.dst.write = 1;
+ alu.dst.chan = 1;
+
+ alu.last = 1;
+
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ r = r600_bc_add_literal(ctx->bc, ctx->value);
+ if (r)
+ return r;
+ }
+
+ /* result.z = RoughApprox2ToX(tmp);*/
+ if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+ if (r)
+ return r;
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+ alu.dst.chan = 2;
+
+ alu.last = 1;
+
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ r = r600_bc_add_literal(ctx->bc, ctx->value);
+ if (r)
+ return r;
+ }
+
+ /* result.w = 1.0;*/
+ if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
+ alu.src[0].sel = V_SQ_ALU_SRC_1;
+ alu.src[0].chan = 0;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 3;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ r = r600_bc_add_literal(ctx->bc, ctx->value);
+ if (r)
+ return r;
+ }
+ return tgsi_helper_copy(ctx, inst);
+}
+
+static int tgsi_arl(struct r600_shader_ctx *ctx)
+{
+ /* TODO from r600c, ar values don't persist between clauses */
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int r;
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
+
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+ if (r)
+ return r;
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+
+ alu.last = 1;
+
+ r = r600_bc_add_alu_type(ctx->bc, &alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU);
+ if (r)
+ return r;
+ return 0;
+}
+
+static int tgsi_opdst(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int i, r = 0;
+
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+
+ if (i == 0 || i == 3) {
+ alu.src[0].sel = V_SQ_ALU_SRC_1;
+ } else {
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+ if (r)
+ return r;
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
+ }
+
+ if (i == 0 || i == 2) {
+ alu.src[1].sel = V_SQ_ALU_SRC_1;
+ } else {
+ r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
+ if (r)
+ return r;
+ alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
+ }
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int r;
+
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = opcode;
+ alu.predicate = 1;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+ alu.dst.chan = 0;
+
+ r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+ if (r)
+ return r;
+ alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ alu.src[1].sel = V_SQ_ALU_SRC_0;
+ alu.src[1].chan = 0;
+
+ alu.last = 1;
+
+ r = r600_bc_add_alu_type(ctx->bc, &alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE);
+ if (r)
+ return r;
+ return 0;
+}
+
+static int pops(struct r600_shader_ctx *ctx, int pops)
+{
+ r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_POP);
+ ctx->bc->cf_last->pop_count = pops;
+ return 0;
+}
+
+static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
+{
+ switch(reason) {
+ case FC_PUSH_VPM:
+ ctx->bc->callstack[ctx->bc->call_sp].current--;
+ break;
+ case FC_PUSH_WQM:
+ case FC_LOOP:
+ ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
+ break;
+ case FC_REP:
+ /* TOODO : for 16 vp asic should -= 2; */
+ ctx->bc->callstack[ctx->bc->call_sp].current --;
+ break;
+ }
+}
+
+static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
+{
+ if (check_max_only) {
+ int diff;
+ switch (reason) {
+ case FC_PUSH_VPM:
+ diff = 1;
+ break;
+ case FC_PUSH_WQM:
+ diff = 4;
+ break;
+ }
+ if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
+ ctx->bc->callstack[ctx->bc->call_sp].max) {
+ ctx->bc->callstack[ctx->bc->call_sp].max =
+ ctx->bc->callstack[ctx->bc->call_sp].current + diff;
+ }
+ return;
+ }
+ switch (reason) {
+ case FC_PUSH_VPM:
+ ctx->bc->callstack[ctx->bc->call_sp].current++;
+ break;
+ case FC_PUSH_WQM:
+ case FC_LOOP:
+ ctx->bc->callstack[ctx->bc->call_sp].current += 4;
+ break;
+ case FC_REP:
+ ctx->bc->callstack[ctx->bc->call_sp].current++;
+ break;
+ }
+
+ if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
+ ctx->bc->callstack[ctx->bc->call_sp].max) {
+ ctx->bc->callstack[ctx->bc->call_sp].max =
+ ctx->bc->callstack[ctx->bc->call_sp].current;
+ }
+}
+
+static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
+{
+ struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
+
+ sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
+ sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
+ sp->mid[sp->num_mid] = ctx->bc->cf_last;
+ sp->num_mid++;
+}
+
+static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
+{
+ ctx->bc->fc_sp++;
+ ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
+ ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
+}
+
+static void fc_poplevel(struct r600_shader_ctx *ctx)
+{
+ struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
+ if (sp->mid) {
+ free(sp->mid);
+ sp->mid = NULL;
+ }
+ sp->num_mid = 0;
+ sp->start = NULL;
+ sp->type = 0;
+ ctx->bc->fc_sp--;
+}
+
+#if 0
+static int emit_return(struct r600_shader_ctx *ctx)
+{
+ r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
+ return 0;
+}
+
+static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
+{
+
+ r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
+ ctx->bc->cf_last->pop_count = pops;
+ /* TODO work out offset */
+ return 0;
+}
+
+static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
+{
+ return 0;
+}
+
+static void emit_testflag(struct r600_shader_ctx *ctx)
+{
+
+}
+
+static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
+{
+ emit_testflag(ctx);
+ emit_jump_to_offset(ctx, 1, 4);
+ emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
+ pops(ctx, ifidx + 1);
+ emit_return(ctx);
+}
+
+static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
+{
+ emit_testflag(ctx);
+
+ r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
+ ctx->bc->cf_last->pop_count = 1;
+
+ fc_set_mid(ctx, fc_sp);
+
+ pops(ctx, 1);
+}
+#endif
+
+static int tgsi_if(struct r600_shader_ctx *ctx)
+{
+ emit_logic_pred(ctx, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE);
+
+ r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
+
+ fc_pushlevel(ctx, FC_IF);
+
+ callstack_check_depth(ctx, FC_PUSH_VPM, 0);
+ return 0;
+}
+
+static int tgsi_else(struct r600_shader_ctx *ctx)
+{
+ r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_ELSE);
+ ctx->bc->cf_last->pop_count = 1;
+
+ fc_set_mid(ctx, ctx->bc->fc_sp);
+ ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
+ return 0;
+}
+
+static int tgsi_endif(struct r600_shader_ctx *ctx)
+{
+ pops(ctx, 1);
+ if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
+ R600_ERR("if/endif unbalanced in shader\n");
+ return -1;
+ }
+
+ if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
+ ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
+ ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
+ } else {
+ ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
+ }
+ fc_poplevel(ctx);
+
+ callstack_decrease_current(ctx, FC_PUSH_VPM);
+ return 0;
+}
+
+static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
+{
+ r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL);
+
+ fc_pushlevel(ctx, FC_LOOP);
+
+ /* check stack depth */
+ callstack_check_depth(ctx, FC_LOOP, 0);
+ return 0;
+}
+
+static int tgsi_endloop(struct r600_shader_ctx *ctx)
+{
+ int i;
+
+ r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END);
+
+ if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
+ R600_ERR("loop/endloop in shader code are not paired.\n");
+ return -EINVAL;
+ }
+
+ /* fixup loop pointers - from r600isa
+ LOOP END points to CF after LOOP START,
+ LOOP START point to CF after LOOP END
+ BRK/CONT point to LOOP END CF
+ */
+ ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
+
+ ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
+
+ for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
+ ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
+ }
+ /* TODO add LOOPRET support */
+ fc_poplevel(ctx);
+ callstack_decrease_current(ctx, FC_LOOP);
+ return 0;
+}
+
+static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
+{
+ unsigned int fscp;
+
+ for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
+ {
+ if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
+ break;
+ }
+
+ if (fscp == 0) {
+ R600_ERR("Break not inside loop/endloop pair\n");
+ return -EINVAL;
+ }
+
+ r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
+ ctx->bc->cf_last->pop_count = 1;
+
+ fc_set_mid(ctx, fscp);
+
+ pops(ctx, 1);
+ callstack_check_depth(ctx, FC_PUSH_VPM, 1);
+ return 0;
+}
+
static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
- {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl},
{TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
{TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit},
{TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
{TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
- {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
{TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
{TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
{TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
{TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
- {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst},
{TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2},
{TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2},
- {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_slt},
- {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap},
+ {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2},
{TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3},
{TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
{TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp},
@@ -1232,38 +2432,38 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
/* gap */
{22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
{TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
{TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
- {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
+ {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
+ {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd},
/* gap */
{32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
{TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_DDX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_DDY, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* predicated kill */
+ {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
+ {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig},
+ {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex},
+ {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex},
+ {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */
{TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2},
{TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2},
+ {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig},
+ {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap},
+ {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2},
{TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TEX, 0, 0x10, tgsi_tex},
+ {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
{TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TXP, 0, 0x10, tgsi_tex},
+ {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex},
{TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
@@ -1274,21 +2474,21 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* SGN */
- {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TXB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg},
+ {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp},
+ {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs},
+ {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex},
{TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
{TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_BRK, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
+ {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
/* gap */
{75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
+ {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
/* gap */
{79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
@@ -1297,7 +2497,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
{TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
/* gap */
{88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
@@ -1308,12 +2508,12 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_CONT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
{TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
{TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
{TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
/* gap */
{103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index 2ee7780ead..7c722c07cb 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -42,6 +42,7 @@ struct r600_shader {
struct r600_shader_io input[32];
struct r600_shader_io output[32];
enum radeon_family family;
+ boolean uses_kill;
};
#endif
diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h
index 002660c654..fa7a31742a 100644
--- a/src/gallium/drivers/r600/r600_sq.h
+++ b/src/gallium/drivers/r600/r600_sq.h
@@ -206,6 +206,26 @@
#define S_SQ_ALU_WORD0_SRC0_SEL(x) (((x) & 0x1FF) << 0)
#define G_SQ_ALU_WORD0_SRC0_SEL(x) (((x) >> 0) & 0x1FF)
#define C_SQ_ALU_WORD0_SRC0_SEL 0xFFFFFE00
+/*
+ * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
+ * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
+ * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+)
+ * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+)
+ * 248 SQ_ALU_SRC_0: special constant 0.0.
+ * 249 SQ_ALU_SRC_1: special constant 1.0 float.
+ * 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
+ * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
+ * 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
+ * 253 SQ_ALU_SRC_LITERAL: literal constant.
+ * 254 SQ_ALU_SRC_PV: previous vector result.
+ * 255 SQ_ALU_SRC_PS: previous scalar result.
+ */
+#define V_SQ_ALU_SRC_0 0x000000F8
+#define V_SQ_ALU_SRC_1 0x000000F9
+#define V_SQ_ALU_SRC_1_INT 0x000000FA
+#define V_SQ_ALU_SRC_M_1_INT 0x000000FB
+#define V_SQ_ALU_SRC_0_5 0x000000FC
+#define V_SQ_ALU_SRC_LITERAL 0x000000FD
#define S_SQ_ALU_WORD0_SRC0_REL(x) (((x) & 0x1) << 9)
#define G_SQ_ALU_WORD0_SRC0_REL(x) (((x) >> 9) & 0x1)
#define C_SQ_ALU_WORD0_SRC0_REL 0xFFFFFDFF
@@ -583,4 +603,11 @@
#define G_SQ_TEX_WORD2_SRC_SEL_W(x) (((x) >> 29) & 0x7)
#define C_SQ_TEX_WORD2_SRC_SEL_W 0x1FFFFFFF
+#define V_SQ_CF_COND_ACTIVE 0x00
+#define V_SQ_CF_COND_FALSE 0x01
+#define V_SQ_CF_COND_BOOL 0x02
+#define V_SQ_CF_COND_NOT_BOOL 0x03
+
+#define V_SQ_REL_ABSOLUTE 0
+#define V_SQ_REL_RELATIVE 1
#endif
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 3efd409ae0..66cab7d7a6 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -34,6 +34,17 @@
#include "r600d.h"
#include "r600_state_inlines.h"
+static void r600_blend(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_blend_state *state);
+static void r600_viewport(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_viewport_state *state);
+static void r600_ucp(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_clip_state *state);
+static void r600_sampler(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_sampler_state *state, unsigned id);
+static void r600_resource(struct pipe_context *ctx, struct radeon_state *rstate, const struct pipe_sampler_view *view, unsigned id);
+static void r600_cb(struct r600_context *rctx, struct radeon_state *rstate,
+ const struct pipe_framebuffer_state *state, int cb);
+static void r600_db(struct r600_context *rctx, struct radeon_state *rstate,
+ const struct pipe_framebuffer_state *state);
+
+
static void *r600_create_blend_state(struct pipe_context *ctx,
const struct pipe_blend_state *state)
{
@@ -81,11 +92,12 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
struct r600_context *rctx = r600_context(ctx);
struct r600_context_state *rstate;
- rstate = r600_context_state(rctx, pipe_sampler_type, state);
+ rstate = r600_context_state(rctx, pipe_sampler_view_type, state);
pipe_reference(NULL, &texture->reference);
rstate->state.sampler_view.texture = texture;
rstate->state.sampler_view.reference.count = 1;
rstate->state.sampler_view.context = ctx;
+ r600_resource(ctx, &rstate->rstate[0], &rstate->state.sampler_view, 0);
return &rstate->state.sampler_view;
}
@@ -223,12 +235,24 @@ static void r600_bind_ps_sampler(struct pipe_context *ctx,
struct r600_context_state *rstate;
unsigned i;
- for (i = 0; i < rctx->ps_nsampler; i++) {
- rctx->ps_sampler[i] = r600_context_state_decref(rctx->ps_sampler[i]);
+ for (i = 0; i < count; i++) {
+ rstate = (struct r600_context_state *)states[i];
+ if (rstate) {
+ rstate->nrstate = 0;
+ }
}
for (i = 0; i < count; i++) {
rstate = (struct r600_context_state *)states[i];
- rctx->ps_sampler[i] = r600_context_state_incref(rstate);
+ if (rstate) {
+ if (rstate->nrstate >= R600_MAX_RSTATE)
+ continue;
+ if (rstate->nrstate) {
+ memcpy(&rstate->rstate[rstate->nrstate], &rstate->rstate[0], sizeof(struct radeon_state));
+ }
+ radeon_state_convert(&rstate->rstate[rstate->nrstate], R600_STATE_SAMPLER, i, R600_SHADER_PS);
+ rctx->ps_sampler[i] = &rstate->rstate[rstate->nrstate];
+ rstate->nrstate++;
+ }
}
rctx->ps_nsampler = count;
}
@@ -240,12 +264,24 @@ static void r600_bind_vs_sampler(struct pipe_context *ctx,
struct r600_context_state *rstate;
unsigned i;
- for (i = 0; i < rctx->vs_nsampler; i++) {
- rctx->vs_sampler[i] = r600_context_state_decref(rctx->vs_sampler[i]);
+ for (i = 0; i < count; i++) {
+ rstate = (struct r600_context_state *)states[i];
+ if (rstate) {
+ rstate->nrstate = 0;
+ }
}
for (i = 0; i < count; i++) {
rstate = (struct r600_context_state *)states[i];
- rctx->vs_sampler[i] = r600_context_state_incref(rstate);
+ if (rstate) {
+ if (rstate->nrstate >= R600_MAX_RSTATE)
+ continue;
+ if (rstate->nrstate) {
+ memcpy(&rstate->rstate[rstate->nrstate], &rstate->rstate[0], sizeof(struct radeon_state));
+ }
+ radeon_state_convert(&rstate->rstate[rstate->nrstate], R600_STATE_SAMPLER, i, R600_SHADER_VS);
+ rctx->vs_sampler[i] = &rstate->rstate[rstate->nrstate];
+ rstate->nrstate++;
+ }
}
rctx->vs_nsampler = count;
}
@@ -268,6 +304,13 @@ static void r600_set_blend_color(struct pipe_context *ctx,
static void r600_set_clip_state(struct pipe_context *ctx,
const struct pipe_clip_state *state)
{
+ struct r600_context *rctx = r600_context(ctx);
+ struct r600_context_state *rstate;
+
+ rstate = r600_context_state(rctx, pipe_clip_type, state);
+ r600_bind_state(ctx, rstate);
+ /* refcount is taken care of this */
+ r600_delete_state(ctx, rstate);
}
static void r600_set_constant_buffer(struct pipe_context *ctx,
@@ -276,19 +319,21 @@ static void r600_set_constant_buffer(struct pipe_context *ctx,
{
struct r600_screen *rscreen = r600_screen(ctx->screen);
struct r600_context *rctx = r600_context(ctx);
- unsigned nconstant = 0, i, type, id;
- struct radeon_state *rstate;
+ unsigned nconstant = 0, i, type, shader_class;
+ struct radeon_state *rstate, *rstates;
struct pipe_transfer *transfer;
u32 *ptr;
+ type = R600_STATE_CONSTANT;
+
switch (shader) {
case PIPE_SHADER_VERTEX:
- id = R600_VS_CONSTANT;
- type = R600_VS_CONSTANT_TYPE;
+ shader_class = R600_SHADER_VS;
+ rstates = rctx->vs_constant;
break;
case PIPE_SHADER_FRAGMENT:
- id = R600_PS_CONSTANT;
- type = R600_PS_CONSTANT_TYPE;
+ shader_class = R600_SHADER_PS;
+ rstates = rctx->ps_constant;
break;
default:
R600_ERR("unsupported %d\n", shader);
@@ -300,17 +345,15 @@ static void r600_set_constant_buffer(struct pipe_context *ctx,
if (ptr == NULL)
return;
for (i = 0; i < nconstant; i++) {
- rstate = radeon_state(rscreen->rw, type, id + i);
- if (rstate == NULL)
- return;
+ rstate = &rstates[i];
+ radeon_state_init(rstate, rscreen->rw, type, i, shader_class);
rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0] = ptr[i * 4 + 0];
rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0] = ptr[i * 4 + 1];
rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0] = ptr[i * 4 + 2];
rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0] = ptr[i * 4 + 3];
if (radeon_state_pm4(rstate))
return;
- if (radeon_draw_set_new(rctx->draw, rstate))
- return;
+ radeon_draw_bind(&rctx->draw, rstate);
}
pipe_buffer_unmap(ctx, buffer, transfer);
}
@@ -324,12 +367,24 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx,
struct r600_context_state *rstate;
unsigned i;
- for (i = 0; i < rctx->ps_nsampler_view; i++) {
- rctx->ps_sampler_view[i] = r600_context_state_decref(rctx->ps_sampler_view[i]);
+ for (i = 0; i < count; i++) {
+ rstate = (struct r600_context_state *)views[i];
+ if (rstate) {
+ rstate->nrstate = 0;
+ }
}
for (i = 0; i < count; i++) {
rstate = (struct r600_context_state *)views[i];
- rctx->ps_sampler_view[i] = r600_context_state_incref(rstate);
+ if (rstate) {
+ if (rstate->nrstate >= R600_MAX_RSTATE)
+ continue;
+ if (rstate->nrstate) {
+ memcpy(&rstate->rstate[rstate->nrstate], &rstate->rstate[0], sizeof(struct radeon_state));
+ }
+ radeon_state_convert(&rstate->rstate[rstate->nrstate], R600_STATE_RESOURCE, i, R600_SHADER_PS);
+ rctx->ps_sampler_view[i] = &rstate->rstate[rstate->nrstate];
+ rstate->nrstate++;
+ }
}
rctx->ps_nsampler_view = count;
}
@@ -342,12 +397,24 @@ static void r600_set_vs_sampler_view(struct pipe_context *ctx,
struct r600_context_state *rstate;
unsigned i;
- for (i = 0; i < rctx->vs_nsampler_view; i++) {
- rctx->vs_sampler_view[i] = r600_context_state_decref(rctx->vs_sampler_view[i]);
+ for (i = 0; i < count; i++) {
+ rstate = (struct r600_context_state *)views[i];
+ if (rstate) {
+ rstate->nrstate = 0;
+ }
}
for (i = 0; i < count; i++) {
rstate = (struct r600_context_state *)views[i];
- rctx->vs_sampler_view[i] = r600_context_state_incref(rstate);
+ if (rstate) {
+ if (rstate->nrstate >= R600_MAX_RSTATE)
+ continue;
+ if (rstate->nrstate) {
+ memcpy(&rstate->rstate[rstate->nrstate], &rstate->rstate[0], sizeof(struct radeon_state));
+ }
+ radeon_state_convert(&rstate->rstate[rstate->nrstate], R600_STATE_RESOURCE, i, R600_SHADER_VS);
+ rctx->vs_sampler_view[i] = &rstate->rstate[rstate->nrstate];
+ rstate->nrstate++;
+ }
}
rctx->vs_nsampler_view = count;
}
@@ -360,6 +427,12 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
rstate = r600_context_state(rctx, pipe_framebuffer_type, state);
r600_bind_state(ctx, rstate);
+ for (int i = 0; i < state->nr_cbufs; i++) {
+ r600_cb(rctx, &rstate->rstate[i+1], state, i);
+ }
+ if (state->zsbuf) {
+ r600_db(rctx, &rstate->rstate[0], state);
+ }
}
static void r600_set_polygon_stipple(struct pipe_context *ctx,
@@ -525,7 +598,7 @@ struct r600_context_state *r600_context_state_decref(struct r600_context_state *
R600_ERR("invalid type %d\n", rstate->type);
return NULL;
}
- radeon_state_decref(rstate->rstate);
+ radeon_state_fini(&rstate->rstate[0]);
FREE(rstate);
return NULL;
}
@@ -558,6 +631,7 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne
break;
case pipe_viewport_type:
rstate->state.viewport = (*states).viewport;
+ r600_viewport(rctx, &rstate->rstate[0], &rstate->state.viewport);
break;
case pipe_depth_type:
rstate->state.depth = (*states).depth;
@@ -573,6 +647,7 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne
break;
case pipe_clip_type:
rstate->state.clip = (*states).clip;
+ r600_ucp(rctx, &rstate->rstate[0], &rstate->state.clip);
break;
case pipe_stencil_type:
rstate->state.stencil = (*states).stencil;
@@ -585,6 +660,7 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne
break;
case pipe_blend_type:
rstate->state.blend = (*states).blend;
+ r600_blend(rctx, &rstate->rstate[0], &rstate->state.blend);
break;
case pipe_stencil_ref_type:
rstate->state.stencil_ref = (*states).stencil_ref;
@@ -599,6 +675,7 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne
break;
case pipe_sampler_type:
rstate->state.sampler = (*states).sampler;
+ r600_sampler(rctx, &rstate->rstate[0], &rstate->state.sampler, 0);
break;
default:
R600_ERR("invalid type %d\n", rstate->type);
@@ -608,16 +685,12 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne
return rstate;
}
-static struct radeon_state *r600_blend(struct r600_context *rctx)
+static void r600_blend(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_blend_state *state)
{
struct r600_screen *rscreen = rctx->screen;
- struct radeon_state *rstate;
- const struct pipe_blend_state *state = &rctx->blend->state.blend;
int i;
- rstate = radeon_state(rscreen->rw, R600_BLEND_TYPE, R600_BLEND);
- if (rstate == NULL)
- return NULL;
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_BLEND, 0, 0);
rstate->states[R600_BLEND__CB_BLEND_RED] = fui(rctx->blend_color.color[0]);
rstate->states[R600_BLEND__CB_BLEND_GREEN] = fui(rctx->blend_color.color[1]);
rstate->states[R600_BLEND__CB_BLEND_BLUE] = fui(rctx->blend_color.color[2]);
@@ -661,29 +734,38 @@ static struct radeon_state *r600_blend(struct r600_context *rctx)
rstate->states[R600_BLEND__CB_BLEND_CONTROL] = bc;
}
- if (radeon_state_pm4(rstate)) {
- radeon_state_decref(rstate);
- return NULL;
+ radeon_state_pm4(rstate);
+}
+
+static void r600_ucp(struct r600_context *rctx, struct radeon_state *rstate,
+ const struct pipe_clip_state *state)
+{
+ struct r600_screen *rscreen = rctx->screen;
+
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_UCP, 0, 0);
+
+ for (int i = 0; i < state->nr; i++) {
+ rstate->states[i * 4 + 0] = fui(state->ucp[i][0]);
+ rstate->states[i * 4 + 1] = fui(state->ucp[i][1]);
+ rstate->states[i * 4 + 2] = fui(state->ucp[i][2]);
+ rstate->states[i * 4 + 3] = fui(state->ucp[i][3]);
}
- return rstate;
+ radeon_state_pm4(rstate);
}
-static struct radeon_state *r600_cb(struct r600_context *rctx, int cb)
+static void r600_cb(struct r600_context *rctx, struct radeon_state *rstate,
+ const struct pipe_framebuffer_state *state, int cb)
{
struct r600_screen *rscreen = rctx->screen;
struct r600_resource_texture *rtex;
struct r600_resource *rbuffer;
- struct radeon_state *rstate;
- const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer;
unsigned level = state->cbufs[cb]->level;
unsigned pitch, slice;
unsigned color_info;
unsigned format, swap, ntype;
const struct util_format_description *desc;
- rstate = radeon_state(rscreen->rw, R600_CB0_TYPE + cb, R600_CB0 + cb);
- if (rstate == NULL)
- return NULL;
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_CB0 + cb, 0, 0);
rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
rbuffer = &rtex->resource;
rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
@@ -710,7 +792,7 @@ static struct radeon_state *r600_cb(struct r600_context *rctx, int cb)
S_0280A0_SOURCE_FORMAT(1) |
S_0280A0_NUMBER_TYPE(ntype);
- rstate->states[R600_CB0__CB_COLOR0_BASE] = 0x00000000;
+ rstate->states[R600_CB0__CB_COLOR0_BASE] = state->cbufs[cb]->offset >> 8;
rstate->states[R600_CB0__CB_COLOR0_INFO] = color_info;
rstate->states[R600_CB0__CB_COLOR0_SIZE] = S_028060_PITCH_TILE_MAX(pitch) |
S_028060_SLICE_TILE_MAX(slice);
@@ -718,32 +800,29 @@ static struct radeon_state *r600_cb(struct r600_context *rctx, int cb)
rstate->states[R600_CB0__CB_COLOR0_FRAG] = 0x00000000;
rstate->states[R600_CB0__CB_COLOR0_TILE] = 0x00000000;
rstate->states[R600_CB0__CB_COLOR0_MASK] = 0x00000000;
- if (radeon_state_pm4(rstate)) {
- radeon_state_decref(rstate);
- return NULL;
- }
- return rstate;
+ radeon_state_pm4(rstate);
}
-static struct radeon_state *r600_db(struct r600_context *rctx)
+static void r600_db(struct r600_context *rctx, struct radeon_state *rstate,
+ const struct pipe_framebuffer_state *state)
{
struct r600_screen *rscreen = rctx->screen;
struct r600_resource_texture *rtex;
struct r600_resource *rbuffer;
- struct radeon_state *rstate;
- const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer;
unsigned level;
unsigned pitch, slice, format;
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_DB, 0, 0);
if (state->zsbuf == NULL)
- return NULL;
-
- rstate = radeon_state(rscreen->rw, R600_DB_TYPE, R600_DB);
- if (rstate == NULL)
- return NULL;
+ return;
rtex = (struct r600_resource_texture*)state->zsbuf->texture;
+ rtex->tilled = 1;
+ rtex->array_mode = 2;
+ rtex->tile_type = 1;
+ rtex->depth = 1;
rbuffer = &rtex->resource;
+
rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
rstate->nbo = 1;
rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM;
@@ -751,31 +830,30 @@ static struct radeon_state *r600_db(struct r600_context *rctx)
pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1;
slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1;
format = r600_translate_dbformat(state->zsbuf->texture->format);
- rstate->states[R600_DB__DB_DEPTH_BASE] = 0x00000000;
- rstate->states[R600_DB__DB_DEPTH_INFO] = 0x00010000 |
+ rstate->states[R600_DB__DB_DEPTH_BASE] = state->zsbuf->offset >> 8;
+ rstate->states[R600_DB__DB_DEPTH_INFO] = S_028010_ARRAY_MODE(rtex->array_mode) |
S_028010_FORMAT(format);
rstate->states[R600_DB__DB_DEPTH_VIEW] = 0x00000000;
rstate->states[R600_DB__DB_PREFETCH_LIMIT] = (state->zsbuf->height / 8) -1;
rstate->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) |
S_028000_SLICE_TILE_MAX(slice);
- if (radeon_state_pm4(rstate)) {
- radeon_state_decref(rstate);
- return NULL;
- }
- return rstate;
+ radeon_state_pm4(rstate);
}
-static struct radeon_state *r600_rasterizer(struct r600_context *rctx)
+static void r600_rasterizer(struct r600_context *rctx, struct radeon_state *rstate)
{
const struct pipe_rasterizer_state *state = &rctx->rasterizer->state.rasterizer;
const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer;
+ const struct pipe_clip_state *clip = NULL;
struct r600_screen *rscreen = rctx->screen;
- struct radeon_state *rstate;
float offset_units = 0, offset_scale = 0;
char depth = 0;
unsigned offset_db_fmt_cntl = 0;
unsigned tmp;
unsigned prov_vtx = 1;
+
+ if (rctx->clip)
+ clip = &rctx->clip->state.clip;
if (fb->zsbuf) {
offset_units = state->offset_units;
offset_scale = state->offset_scale * 12.0f;
@@ -796,7 +874,7 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx)
break;
default:
R600_ERR("unsupported %d\n", fb->zsbuf->texture->format);
- return NULL;
+ return;
}
}
offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth);
@@ -805,9 +883,7 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx)
prov_vtx = 0;
rctx->flat_shade = state->flatshade;
- rstate = radeon_state(rscreen->rw, R600_RASTERIZER_TYPE, R600_RASTERIZER);
- if (rstate == NULL)
- return NULL;
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_RASTERIZER, 0, 0);
rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000001;
if (state->sprite_coord_enable) {
rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] |=
@@ -821,7 +897,12 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx)
S_0286D4_PNT_SPRITE_TOP_1(1);
}
}
- rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0x00000000;
+ rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0;
+ if (clip) {
+ rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = S_028810_PS_UCP_MODE(3) | ((1 << clip->nr) - 1);
+ rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] |= S_028810_ZCLIP_NEAR_DISABLE(clip->depth_clamp);
+ rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] |= S_028810_ZCLIP_FAR_DISABLE(clip->depth_clamp);
+ }
rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] =
S_028814_PROVOKING_VTX_LAST(prov_vtx) |
S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
@@ -835,7 +916,7 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx)
S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex);
rstate->states[R600_RASTERIZER__PA_CL_NANINF_CNTL] = 0x00000000;
/* point size 12.4 fixed point */
- tmp = (unsigned)(state->point_size * 8.0 / 2.0);
+ tmp = (unsigned)(state->point_size * 8.0);
rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] = S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp);
rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] = 0x80000000;
rstate->states[R600_RASTERIZER__PA_SU_LINE_CNTL] = 0x00000008;
@@ -852,19 +933,14 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx)
rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET] = fui(offset_units);
rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE] = fui(offset_scale);
rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET] = fui(offset_units);
- if (radeon_state_pm4(rstate)) {
- radeon_state_decref(rstate);
- return NULL;
- }
- return rstate;
+ radeon_state_pm4(rstate);
}
-static struct radeon_state *r600_scissor(struct r600_context *rctx)
+static void r600_scissor(struct r600_context *rctx, struct radeon_state *rstate)
{
const struct pipe_scissor_state *state = &rctx->scissor->state.scissor;
const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer;
struct r600_screen *rscreen = rctx->screen;
- struct radeon_state *rstate;
unsigned minx, maxx, miny, maxy;
u32 tl, br;
@@ -881,9 +957,7 @@ static struct radeon_state *r600_scissor(struct r600_context *rctx)
}
tl = S_028240_TL_X(minx) | S_028240_TL_Y(miny) | S_028240_WINDOW_OFFSET_DISABLE(1);
br = S_028244_BR_X(maxx) | S_028244_BR_Y(maxy);
- rstate = radeon_state(rscreen->rw, R600_SCISSOR_TYPE, R600_SCISSOR);
- if (rstate == NULL)
- return NULL;
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_SCISSOR, 0, 0);
rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = tl;
rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = br;
rstate->states[R600_SCISSOR__PA_SC_WINDOW_OFFSET] = 0x00000000;
@@ -903,22 +977,14 @@ static struct radeon_state *r600_scissor(struct r600_context *rctx)
rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = br;
rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = tl;
rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = br;
- if (radeon_state_pm4(rstate)) {
- radeon_state_decref(rstate);
- return NULL;
- }
- return rstate;
+ radeon_state_pm4(rstate);
}
-static struct radeon_state *r600_viewport(struct r600_context *rctx)
+static void r600_viewport(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_viewport_state *state)
{
- const struct pipe_viewport_state *state = &rctx->viewport->state.viewport;
struct r600_screen *rscreen = rctx->screen;
- struct radeon_state *rstate;
- rstate = radeon_state(rscreen->rw, R600_VIEWPORT_TYPE, R600_VIEWPORT);
- if (rstate == NULL)
- return NULL;
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_VIEWPORT, 0, 0);
rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMIN_0] = 0x00000000;
rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000;
rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(state->scale[0]);
@@ -928,29 +994,28 @@ static struct radeon_state *r600_viewport(struct r600_context *rctx)
rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(state->translate[1]);
rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(state->translate[2]);
rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F;
- if (radeon_state_pm4(rstate)) {
- radeon_state_decref(rstate);
- return NULL;
- }
- return rstate;
+ radeon_state_pm4(rstate);
}
-static struct radeon_state *r600_dsa(struct r600_context *rctx)
+static void r600_dsa(struct r600_context *rctx, struct radeon_state *rstate)
{
const struct pipe_depth_stencil_alpha_state *state = &rctx->dsa->state.dsa;
const struct pipe_stencil_ref *stencil_ref = &rctx->stencil_ref->state.stencil_ref;
struct r600_screen *rscreen = rctx->screen;
unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control;
unsigned stencil_ref_mask, stencil_ref_mask_bf;
- struct r600_shader *rshader = &rctx->ps_shader->shader;
- struct radeon_state *rstate;
+ struct r600_shader *rshader;
int i;
- rstate = radeon_state(rscreen->rw, R600_DSA_TYPE, R600_DSA);
- if (rstate == NULL)
- return NULL;
+ if (rctx->ps_shader == NULL) {
+ return;
+ }
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_DSA, 0, 0);
db_shader_control = 0x210;
+ rshader = &rctx->ps_shader->shader;
+ if (rshader->uses_kill)
+ db_shader_control |= (1 << 6);
for (i = 0; i < rshader->noutput; i++) {
if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
db_shader_control |= 1;
@@ -1008,11 +1073,7 @@ static struct radeon_state *r600_dsa(struct r600_context *rctx)
rstate->states[R600_DSA__DB_SRESULTS_COMPARE_STATE1] = 0x00000000;
rstate->states[R600_DSA__DB_PRELOAD_CONTROL] = 0x00000000;
rstate->states[R600_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00;
- if (radeon_state_pm4(rstate)) {
- radeon_state_decref(rstate);
- return NULL;
- }
- return rstate;
+ radeon_state_pm4(rstate);
}
static inline unsigned r600_tex_wrap(unsigned wrap)
@@ -1090,16 +1151,12 @@ static INLINE u32 S_FIXED(float value, u32 frac_bits)
return value * (1 << frac_bits);
}
-static struct radeon_state *r600_sampler(struct r600_context *rctx,
- const struct pipe_sampler_state *state,
- unsigned id)
+static void r600_sampler(struct r600_context *rctx, struct radeon_state *rstate,
+ const struct pipe_sampler_state *state, unsigned id)
{
struct r600_screen *rscreen = rctx->screen;
- struct radeon_state *rstate;
- rstate = radeon_state(rscreen->rw, R600_PS_SAMPLER_TYPE, id);
- if (rstate == NULL)
- return NULL;
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_SAMPLER, id, R600_SHADER_PS);
rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0] =
S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) |
S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) |
@@ -1114,11 +1171,7 @@ static struct radeon_state *r600_sampler(struct r600_context *rctx,
S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) |
S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6));
rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0] = S_03C008_TYPE(1);
- if (radeon_state_pm4(rstate)) {
- radeon_state_decref(rstate);
- return NULL;
- }
- return rstate;
+ radeon_state_pm4(rstate);
}
static inline unsigned r600_tex_swizzle(unsigned swizzle)
@@ -1160,6 +1213,7 @@ static inline unsigned r600_tex_dim(unsigned dim)
case PIPE_TEXTURE_1D:
return V_038000_SQ_TEX_DIM_1D;
case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
return V_038000_SQ_TEX_DIM_2D;
case PIPE_TEXTURE_3D:
return V_038000_SQ_TEX_DIM_3D;
@@ -1168,19 +1222,20 @@ static inline unsigned r600_tex_dim(unsigned dim)
}
}
-static struct radeon_state *r600_resource(struct r600_context *rctx,
- const struct pipe_sampler_view *view,
- unsigned id)
+static void r600_resource(struct pipe_context *ctx, struct radeon_state *rstate,
+ const struct pipe_sampler_view *view, unsigned id)
{
+ struct r600_context *rctx = r600_context(ctx);
struct r600_screen *rscreen = rctx->screen;
const struct util_format_description *desc;
struct r600_resource_texture *tmp;
struct r600_resource *rbuffer;
- struct radeon_state *rstate;
unsigned format;
- uint32_t word4 = 0, yuv_format = 0;
- unsigned char swizzle[4];
+ uint32_t word4 = 0, yuv_format = 0, pitch = 0;
+ unsigned char swizzle[4], array_mode = 0, tile_type = 0;
+ int r;
+ rstate->cpm4 = 0;
swizzle[0] = view->swizzle_r;
swizzle[1] = view->swizzle_g;
swizzle[2] = view->swizzle_b;
@@ -1188,37 +1243,49 @@ static struct radeon_state *r600_resource(struct r600_context *rctx,
format = r600_translate_texformat(view->texture->format,
swizzle,
&word4, &yuv_format);
- if (format == ~0)
- return NULL;
+ if (format == ~0) {
+ return;
+ }
desc = util_format_description(view->texture->format);
if (desc == NULL) {
R600_ERR("unknow format %d\n", view->texture->format);
- return NULL;
- }
- rstate = radeon_state(rscreen->rw, R600_PS_RESOURCE_TYPE, id);
- if (rstate == NULL) {
- return NULL;
+ return;
}
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_RESOURCE, id, R600_SHADER_PS);
tmp = (struct r600_resource_texture*)view->texture;
rbuffer = &tmp->resource;
- rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
- rstate->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
+ if (tmp->depth) {
+ r = r600_texture_from_depth(ctx, tmp, view->first_level);
+ if (r) {
+ return;
+ }
+ rstate->bo[0] = radeon_bo_incref(rscreen->rw, tmp->uncompressed);
+ rstate->bo[1] = radeon_bo_incref(rscreen->rw, tmp->uncompressed);
+ } else {
+ rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
+ rstate->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
+ }
rstate->nbo = 2;
rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
rstate->placement[1] = RADEON_GEM_DOMAIN_GTT;
rstate->placement[2] = RADEON_GEM_DOMAIN_GTT;
rstate->placement[3] = RADEON_GEM_DOMAIN_GTT;
+ pitch = (tmp->pitch[0] / tmp->bpt);
+ pitch = (pitch + 0x7) & ~0x7;
+
/* FIXME properly handle first level != 0 */
rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD0] =
S_038000_DIM(r600_tex_dim(view->texture->target)) |
- S_038000_PITCH(((tmp->pitch[0] / tmp->bpt) / 8) - 1) |
+ S_038000_TILE_MODE(array_mode) |
+ S_038000_TILE_TYPE(tile_type) |
+ S_038000_PITCH((pitch / 8) - 1) |
S_038000_TEX_WIDTH(view->texture->width0 - 1);
rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD1] =
S_038004_TEX_HEIGHT(view->texture->height0 - 1) |
S_038004_TEX_DEPTH(view->texture->depth0 - 1) |
S_038004_DATA_FORMAT(format);
- rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = 0;
+ rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = tmp->offset[0] >> 8;
rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = tmp->offset[1] >> 8;
rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD4] =
word4 |
@@ -1232,17 +1299,12 @@ static struct radeon_state *r600_resource(struct r600_context *rctx,
S_038014_LAST_ARRAY(0);
rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD6] =
S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE);
- if (radeon_state_pm4(rstate)) {
- radeon_state_decref(rstate);
- return NULL;
- }
- return rstate;
+ radeon_state_pm4(rstate);
}
-static struct radeon_state *r600_cb_cntl(struct r600_context *rctx)
+static void r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate)
{
struct r600_screen *rscreen = rctx->screen;
- struct radeon_state *rstate;
const struct pipe_blend_state *pbs = &rctx->blend->state.blend;
int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs;
uint32_t color_control, target_mask, shader_mask;
@@ -1257,7 +1319,7 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx)
}
if (pbs->logicop_enable) {
- color_control |= (pbs->logicop_func) << 16;
+ color_control |= (pbs->logicop_func << 16) | (pbs->logicop_func << 20);
} else {
color_control |= (0xcc << 16);
}
@@ -1277,7 +1339,7 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx)
target_mask |= (pbs->rt[0].colormask << (4 * i));
}
}
- rstate = radeon_state(rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL);
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_CB_CNTL, 0, 0);
rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = shader_mask;
rstate->states[R600_CB_CNTL__CB_TARGET_MASK] = target_mask;
rstate->states[R600_CB_CNTL__CB_COLOR_CONTROL] = color_control;
@@ -1289,115 +1351,51 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx)
rstate->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF;
rstate->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF;
rstate->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF;
- if (radeon_state_pm4(rstate)) {
- radeon_state_decref(rstate);
- return NULL;
- }
- return rstate;
+ radeon_state_pm4(rstate);
}
-int r600_context_hw_states(struct r600_context *rctx)
+int r600_context_hw_states(struct pipe_context *ctx)
{
+ struct r600_context *rctx = r600_context(ctx);
unsigned i;
- int r;
- int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs;
- /* free previous TODO determine what need to be updated, what
- * doesn't
- */
- //radeon_state_decref(rctx->hw_states.config);
- rctx->hw_states.cb_cntl = radeon_state_decref(rctx->hw_states.cb_cntl);
- rctx->hw_states.db = radeon_state_decref(rctx->hw_states.db);
- rctx->hw_states.rasterizer = radeon_state_decref(rctx->hw_states.rasterizer);
- rctx->hw_states.scissor = radeon_state_decref(rctx->hw_states.scissor);
- rctx->hw_states.dsa = radeon_state_decref(rctx->hw_states.dsa);
- rctx->hw_states.blend = radeon_state_decref(rctx->hw_states.blend);
- rctx->hw_states.viewport = radeon_state_decref(rctx->hw_states.viewport);
- for (i = 0; i < 8; i++) {
- rctx->hw_states.cb[i] = radeon_state_decref(rctx->hw_states.cb[i]);
+ /* build new states */
+ r600_rasterizer(rctx, &rctx->hw_states.rasterizer);
+ r600_scissor(rctx, &rctx->hw_states.scissor);
+ r600_dsa(rctx, &rctx->hw_states.dsa);
+ r600_cb_cntl(rctx, &rctx->hw_states.cb_cntl);
+
+ /* bind states */
+ radeon_draw_bind(&rctx->draw, &rctx->hw_states.rasterizer);
+ radeon_draw_bind(&rctx->draw, &rctx->hw_states.scissor);
+ radeon_draw_bind(&rctx->draw, &rctx->hw_states.dsa);
+ radeon_draw_bind(&rctx->draw, &rctx->hw_states.cb_cntl);
+
+ radeon_draw_bind(&rctx->draw, &rctx->config);
+
+ if (rctx->viewport) {
+ radeon_draw_bind(&rctx->draw, &rctx->viewport->rstate[0]);
}
- for (i = 0; i < rctx->hw_states.ps_nresource; i++) {
- radeon_state_decref(rctx->hw_states.ps_resource[i]);
- rctx->hw_states.ps_resource[i] = NULL;
+ if (rctx->blend) {
+ radeon_draw_bind(&rctx->draw, &rctx->blend->rstate[0]);
}
- rctx->hw_states.ps_nresource = 0;
- for (i = 0; i < rctx->hw_states.ps_nsampler; i++) {
- radeon_state_decref(rctx->hw_states.ps_sampler[i]);
- rctx->hw_states.ps_sampler[i] = NULL;
+ if (rctx->clip) {
+ radeon_draw_bind(&rctx->draw, &rctx->clip->rstate[0]);
}
- rctx->hw_states.ps_nsampler = 0;
-
- /* build new states */
- rctx->hw_states.rasterizer = r600_rasterizer(rctx);
- rctx->hw_states.scissor = r600_scissor(rctx);
- rctx->hw_states.dsa = r600_dsa(rctx);
- rctx->hw_states.blend = r600_blend(rctx);
- rctx->hw_states.viewport = r600_viewport(rctx);
- for (i = 0; i < nr_cbufs; i++) {
- rctx->hw_states.cb[i] = r600_cb(rctx, i);
+ for (i = 0; i < rctx->framebuffer->state.framebuffer.nr_cbufs; i++) {
+ radeon_draw_bind(&rctx->draw, &rctx->framebuffer->rstate[i+1]);
+ }
+ if (rctx->framebuffer->state.framebuffer.zsbuf) {
+ radeon_draw_bind(&rctx->draw, &rctx->framebuffer->rstate[0]);
}
- rctx->hw_states.db = r600_db(rctx);
- rctx->hw_states.cb_cntl = r600_cb_cntl(rctx);
-
for (i = 0; i < rctx->ps_nsampler; i++) {
if (rctx->ps_sampler[i]) {
- rctx->hw_states.ps_sampler[i] = r600_sampler(rctx,
- &rctx->ps_sampler[i]->state.sampler,
- R600_PS_SAMPLER + i);
+ radeon_draw_bind(&rctx->draw, rctx->ps_sampler[i]);
}
}
- rctx->hw_states.ps_nsampler = rctx->ps_nsampler;
for (i = 0; i < rctx->ps_nsampler_view; i++) {
if (rctx->ps_sampler_view[i]) {
- rctx->hw_states.ps_resource[i] = r600_resource(rctx,
- &rctx->ps_sampler_view[i]->state.sampler_view,
- R600_PS_RESOURCE + i);
- }
- }
- rctx->hw_states.ps_nresource = rctx->ps_nsampler_view;
-
- /* bind states */
- r = radeon_draw_set(rctx->draw, rctx->hw_states.db);
- if (r)
- return r;
- r = radeon_draw_set(rctx->draw, rctx->hw_states.rasterizer);
- if (r)
- return r;
- r = radeon_draw_set(rctx->draw, rctx->hw_states.scissor);
- if (r)
- return r;
- r = radeon_draw_set(rctx->draw, rctx->hw_states.dsa);
- if (r)
- return r;
- r = radeon_draw_set(rctx->draw, rctx->hw_states.blend);
- if (r)
- return r;
- r = radeon_draw_set(rctx->draw, rctx->hw_states.viewport);
- if (r)
- return r;
- for (i = 0; i < nr_cbufs; i++) {
- r = radeon_draw_set(rctx->draw, rctx->hw_states.cb[i]);
- if (r)
- return r;
- }
- r = radeon_draw_set(rctx->draw, rctx->hw_states.config);
- if (r)
- return r;
- r = radeon_draw_set(rctx->draw, rctx->hw_states.cb_cntl);
- if (r)
- return r;
- for (i = 0; i < rctx->hw_states.ps_nresource; i++) {
- if (rctx->hw_states.ps_resource[i]) {
- r = radeon_draw_set(rctx->draw, rctx->hw_states.ps_resource[i]);
- if (r)
- return r;
- }
- }
- for (i = 0; i < rctx->hw_states.ps_nsampler; i++) {
- if (rctx->hw_states.ps_sampler[i]) {
- r = radeon_draw_set(rctx->draw, rctx->hw_states.ps_sampler[i]);
- if (r)
- return r;
+ radeon_draw_bind(&rctx->draw, rctx->ps_sampler_view[i]);
}
}
return 0;
diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h
index f93c20da35..84866825aa 100644
--- a/src/gallium/drivers/r600/r600_state_inlines.h
+++ b/src/gallium/drivers/r600/r600_state_inlines.h
@@ -252,6 +252,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
case PIPE_FORMAT_X8B8G8R8_UNORM:
case PIPE_FORMAT_X8R8G8B8_UNORM:
+ case PIPE_FORMAT_R8G8B8_UNORM:
return V_0280A0_COLOR_8_8_8_8;
case PIPE_FORMAT_R10G10B10A2_UNORM:
@@ -262,7 +263,10 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- return V_0280A0_COLOR_24_8;
+ return V_0280A0_COLOR_8_24;
+
+ case PIPE_FORMAT_R32_FLOAT:
+ return V_0280A0_COLOR_32_FLOAT;
/* 64-bit buffers. */
case PIPE_FORMAT_R16G16B16A16_UNORM:
@@ -275,6 +279,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
/* 128-bit buffers. */
case PIPE_FORMAT_R32G32B32_FLOAT:
+ return V_0280A0_COLOR_32_32_32_FLOAT;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
return V_0280A0_COLOR_32_32_32_32_FLOAT;
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 30d79ebdd6..b6698e3885 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -24,6 +24,7 @@
* Jerome Glisse
* Corbin Simpson
*/
+#include <errno.h>
#include <pipe/p_screen.h>
#include <util/u_format.h>
#include <util/u_math.h>
@@ -33,10 +34,26 @@
#include "r600_screen.h"
#include "r600_context.h"
#include "r600_resource.h"
+#include "r600_state_inlines.h"
#include "r600d.h"
extern struct u_resource_vtbl r600_texture_vtbl;
+/* Copy from a tiled texture to a detiled one. */
+static void r600_copy_from_tiled_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
+{
+ struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
+ struct pipe_resource *texture = transfer->resource;
+ struct pipe_subresource subdst;
+
+ subdst.face = 0;
+ subdst.level = 0;
+ ctx->resource_copy_region(ctx, rtransfer->linear_texture,
+ subdst, 0, 0, 0, texture, transfer->sr,
+ transfer->box.x, transfer->box.y, transfer->box.z,
+ transfer->box.width, transfer->box.height);
+}
+
static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex,
unsigned level, unsigned zslice,
unsigned face)
@@ -65,7 +82,9 @@ static void r600_setup_miptree(struct r600_screen *rscreen, struct r600_resource
for (i = 0, offset = 0; i <= ptex->last_level; i++) {
w = u_minify(ptex->width0, i);
h = u_minify(ptex->height0, i);
+ h = util_next_power_of_two(h);
pitch = util_format_get_stride(ptex->format, align(w, 64));
+ pitch = align(pitch, 256);
layer_size = pitch * h;
if (ptex->target == PIPE_TEXTURE_CUBE)
size = layer_size * 6;
@@ -74,6 +93,8 @@ static void r600_setup_miptree(struct r600_screen *rscreen, struct r600_resource
rtex->offset[i] = offset;
rtex->layer_size[i] = layer_size;
rtex->pitch[i] = pitch;
+ rtex->width[i] = w;
+ rtex->height[i] = h;
offset += size;
}
rtex->size = offset;
@@ -104,10 +125,22 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
FREE(rtex);
return NULL;
}
-
return &resource->base.b;
}
+static void r600_texture_destroy_state(struct pipe_resource *ptexture)
+{
+ struct r600_resource_texture *rtexture = (struct r600_resource_texture*)ptexture;
+
+ for (int i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) {
+ radeon_state_fini(&rtexture->scissor[i]);
+ radeon_state_fini(&rtexture->db[i]);
+ for (int j = 0; j < 8; j++) {
+ radeon_state_fini(&rtexture->cb[j][i]);
+ }
+ }
+}
+
static void r600_texture_destroy(struct pipe_screen *screen,
struct pipe_resource *ptex)
{
@@ -118,6 +151,10 @@ static void r600_texture_destroy(struct pipe_screen *screen,
if (resource->bo) {
radeon_bo_decref(rscreen->rw, resource->bo);
}
+ if (rtex->uncompressed) {
+ radeon_bo_decref(rscreen->rw, rtex->uncompressed);
+ }
+ r600_texture_destroy_state(ptex);
FREE(rtex);
}
@@ -168,7 +205,8 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
}
/* Support only 2D textures without mipmaps */
- if (templ->target != PIPE_TEXTURE_2D || templ->depth0 != 1 || templ->last_level != 0)
+ if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) ||
+ templ->depth0 != 1 || templ->last_level != 0)
return NULL;
rtex = CALLOC_STRUCT(r600_resource_texture);
@@ -181,9 +219,12 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
pipe_reference_init(&resource->base.b.reference, 1);
resource->base.b.screen = screen;
resource->bo = bo;
+ rtex->depth = 0;
rtex->pitch_override = whandle->stride;
rtex->bpt = util_format_get_blocksize(templ->format);
rtex->pitch[0] = whandle->stride;
+ rtex->width[0] = templ->width0;
+ rtex->height[0] = templ->height0;
rtex->offset[0] = 0;
rtex->size = align(rtex->pitch[0] * templ->height0, 64);
@@ -205,6 +246,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
const struct pipe_box *box)
{
struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture;
+ struct pipe_resource resource;
struct r600_transfer *trans;
trans = CALLOC_STRUCT(r600_transfer);
@@ -216,48 +258,117 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
trans->transfer.box = *box;
trans->transfer.stride = rtex->pitch[sr.level];
trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face);
+ if (rtex->tilled && !rtex->depth) {
+ resource.target = PIPE_TEXTURE_2D;
+ resource.format = texture->format;
+ resource.width0 = box->width;
+ resource.height0 = box->height;
+ resource.depth0 = 0;
+ resource.last_level = 0;
+ resource.nr_samples = 0;
+ resource.usage = PIPE_USAGE_DYNAMIC;
+ resource.bind = 0;
+ resource.flags = 0;
+ /* For texture reading, the temporary (detiled) texture is used as
+ * a render target when blitting from a tiled texture. */
+ if (usage & PIPE_TRANSFER_READ) {
+ resource.bind |= PIPE_BIND_RENDER_TARGET;
+ }
+ /* For texture writing, the temporary texture is used as a sampler
+ * when blitting into a tiled texture. */
+ if (usage & PIPE_TRANSFER_WRITE) {
+ resource.bind |= PIPE_BIND_SAMPLER_VIEW;
+ }
+ /* Create the temporary texture. */
+ trans->linear_texture = ctx->screen->resource_create(ctx->screen, &resource);
+ if (trans->linear_texture == NULL) {
+ R600_ERR("failed to create temporary texture to hold untiled copy\n");
+ pipe_resource_reference(&trans->transfer.resource, NULL);
+ FREE(trans);
+ return NULL;
+ }
+ if (usage & PIPE_TRANSFER_READ) {
+ /* We cannot map a tiled texture directly because the data is
+ * in a different order, therefore we do detiling using a blit. */
+ r600_copy_from_tiled_texture(ctx, trans);
+ /* Always referenced in the blit. */
+ ctx->flush(ctx, 0, NULL);
+ }
+ }
return &trans->transfer;
}
void r600_texture_transfer_destroy(struct pipe_context *ctx,
- struct pipe_transfer *trans)
+ struct pipe_transfer *transfer)
{
- pipe_resource_reference(&trans->resource, NULL);
- FREE(trans);
+ struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
+
+ if (rtransfer->linear_texture) {
+ pipe_resource_reference(&rtransfer->linear_texture, NULL);
+ }
+ pipe_resource_reference(&transfer->resource, NULL);
+ FREE(transfer);
}
void* r600_texture_transfer_map(struct pipe_context *ctx,
struct pipe_transfer* transfer)
{
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
- struct r600_resource *resource;
+ struct radeon_bo *bo;
enum pipe_format format = transfer->resource->format;
struct r600_screen *rscreen = r600_screen(ctx->screen);
+ struct r600_resource_texture *rtex;
+ unsigned long offset = 0;
char *map;
+ int r;
r600_flush(ctx, 0, NULL);
-
- resource = (struct r600_resource *)transfer->resource;
- if (radeon_bo_map(rscreen->rw, resource->bo)) {
+ if (rtransfer->linear_texture) {
+ bo = ((struct r600_resource *)rtransfer->linear_texture)->bo;
+ } else {
+ rtex = (struct r600_resource_texture*)transfer->resource;
+ if (rtex->depth) {
+ r = r600_texture_from_depth(ctx, rtex, transfer->sr.level);
+ if (r) {
+ return NULL;
+ }
+ r600_flush(ctx, 0, NULL);
+ bo = rtex->uncompressed;
+ } else {
+ bo = ((struct r600_resource *)transfer->resource)->bo;
+ }
+ offset = rtransfer->offset +
+ transfer->box.y / util_format_get_blockheight(format) * transfer->stride +
+ transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
+ }
+ if (radeon_bo_map(rscreen->rw, bo)) {
return NULL;
}
- radeon_bo_wait(rscreen->rw, resource->bo);
-
- map = resource->bo->data;
+ radeon_bo_wait(rscreen->rw, bo);
- return map + rtransfer->offset +
- transfer->box.y / util_format_get_blockheight(format) * transfer->stride +
- transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
+ map = bo->data;
+ return map + offset;
}
void r600_texture_transfer_unmap(struct pipe_context *ctx,
struct pipe_transfer* transfer)
{
+ struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
struct r600_screen *rscreen = r600_screen(ctx->screen);
- struct r600_resource *resource;
-
- resource = (struct r600_resource *)transfer->resource;
- radeon_bo_unmap(rscreen->rw, resource->bo);
+ struct r600_resource_texture *rtex;
+ struct radeon_bo *bo;
+
+ if (rtransfer->linear_texture) {
+ bo = ((struct r600_resource *)rtransfer->linear_texture)->bo;
+ } else {
+ rtex = (struct r600_resource_texture*)transfer->resource;
+ if (rtex->depth) {
+ bo = rtex->uncompressed;
+ } else {
+ bo = ((struct r600_resource *)transfer->resource)->bo;
+ }
+ }
+ radeon_bo_unmap(rscreen->rw, bo);
}
struct u_resource_vtbl r600_texture_vtbl =
@@ -280,51 +391,51 @@ void r600_init_screen_texture_functions(struct pipe_screen *screen)
}
static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
- const unsigned char *swizzle_view)
+ const unsigned char *swizzle_view)
{
- unsigned i;
- unsigned char swizzle[4];
- unsigned result = 0;
- const uint32_t swizzle_shift[4] = {
- 16, 19, 22, 25,
- };
- const uint32_t swizzle_bit[4] = {
- 0, 1, 2, 3,
- };
-
- if (swizzle_view) {
- /* Combine two sets of swizzles. */
- for (i = 0; i < 4; i++) {
- swizzle[i] = swizzle_view[i] <= UTIL_FORMAT_SWIZZLE_W ?
- swizzle_format[swizzle_view[i]] : swizzle_view[i];
- }
- } else {
- memcpy(swizzle, swizzle_format, 4);
- }
-
- /* Get swizzle. */
- for (i = 0; i < 4; i++) {
- switch (swizzle[i]) {
- case UTIL_FORMAT_SWIZZLE_Y:
- result |= swizzle_bit[1] << swizzle_shift[i];
- break;
- case UTIL_FORMAT_SWIZZLE_Z:
- result |= swizzle_bit[2] << swizzle_shift[i];
- break;
- case UTIL_FORMAT_SWIZZLE_W:
- result |= swizzle_bit[3] << swizzle_shift[i];
- break;
- case UTIL_FORMAT_SWIZZLE_0:
- result |= V_038010_SQ_SEL_0 << swizzle_shift[i];
- break;
- case UTIL_FORMAT_SWIZZLE_1:
- result |= V_038010_SQ_SEL_1 << swizzle_shift[i];
- break;
- default: /* UTIL_FORMAT_SWIZZLE_X */
- result |= swizzle_bit[0] << swizzle_shift[i];
- }
- }
- return result;
+ unsigned i;
+ unsigned char swizzle[4];
+ unsigned result = 0;
+ const uint32_t swizzle_shift[4] = {
+ 16, 19, 22, 25,
+ };
+ const uint32_t swizzle_bit[4] = {
+ 0, 1, 2, 3,
+ };
+
+ if (swizzle_view) {
+ /* Combine two sets of swizzles. */
+ for (i = 0; i < 4; i++) {
+ swizzle[i] = swizzle_view[i] <= UTIL_FORMAT_SWIZZLE_W ?
+ swizzle_format[swizzle_view[i]] : swizzle_view[i];
+ }
+ } else {
+ memcpy(swizzle, swizzle_format, 4);
+ }
+
+ /* Get swizzle. */
+ for (i = 0; i < 4; i++) {
+ switch (swizzle[i]) {
+ case UTIL_FORMAT_SWIZZLE_Y:
+ result |= swizzle_bit[1] << swizzle_shift[i];
+ break;
+ case UTIL_FORMAT_SWIZZLE_Z:
+ result |= swizzle_bit[2] << swizzle_shift[i];
+ break;
+ case UTIL_FORMAT_SWIZZLE_W:
+ result |= swizzle_bit[3] << swizzle_shift[i];
+ break;
+ case UTIL_FORMAT_SWIZZLE_0:
+ result |= V_038010_SQ_SEL_0 << swizzle_shift[i];
+ break;
+ case UTIL_FORMAT_SWIZZLE_1:
+ result |= V_038010_SQ_SEL_1 << swizzle_shift[i];
+ break;
+ default: /* UTIL_FORMAT_SWIZZLE_X */
+ result |= swizzle_bit[0] << swizzle_shift[i];
+ }
+ }
+ return result;
}
/* texture format translate */
@@ -344,19 +455,21 @@ uint32_t r600_translate_texformat(enum pipe_format format,
};
desc = util_format_description(format);
+ word4 |= r600_get_swizzle_combined(desc->swizzle, swizzle_view);
+
/* Colorspace (return non-RGB formats directly). */
switch (desc->colorspace) {
/* Depth stencil formats */
case UTIL_FORMAT_COLORSPACE_ZS:
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
- result = V_028010_DEPTH_16;
+ result = V_0280A0_COLOR_16;
goto out_word4;
case PIPE_FORMAT_Z24X8_UNORM:
- result = V_028010_DEPTH_X8_24;
+ result = V_0280A0_COLOR_8_24;
goto out_word4;
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- result = V_028010_DEPTH_8_24;
+ result = V_0280A0_COLOR_8_24;
goto out_word4;
default:
goto out_unknown;
@@ -382,8 +495,6 @@ uint32_t r600_translate_texformat(enum pipe_format format,
break;
}
- word4 |= r600_get_swizzle_combined(desc->swizzle, swizzle_view);
-
/* S3TC formats. TODO */
if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
goto out_unknown;
@@ -519,9 +630,221 @@ out_word4:
*word4_p = word4;
if (yuv_format_p)
*yuv_format_p = yuv_format;
-// fprintf(stderr,"returning %08x %08x %08x\n", result, word4, yuv_format);
return result;
out_unknown:
// R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format));
return ~0;
}
+
+int r600_texture_from_depth(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level)
+{
+ struct r600_screen *rscreen = r600_screen(ctx->screen);
+ int r;
+
+ if (!rtexture->depth) {
+ /* This shouldn't happen maybe print a warning */
+ return 0;
+ }
+ if (rtexture->uncompressed && !rtexture->dirty) {
+ /* Uncompressed bo already in good state */
+ return 0;
+ }
+
+ /* allocate uncompressed texture */
+ if (rtexture->uncompressed == NULL) {
+ rtexture->uncompressed = radeon_bo(rscreen->rw, 0, rtexture->size, 4096, NULL);
+ if (rtexture->uncompressed == NULL) {
+ return -ENOMEM;
+ }
+ }
+
+ /* render a rectangle covering whole buffer to uncompress depth */
+ r = r600_blit_uncompress_depth(ctx, rtexture, level);
+ if (r) {
+ return r;
+ }
+
+ rtexture->dirty = 0;
+ return 0;
+}
+
+static void r600_texture_state_scissor(struct r600_screen *rscreen,
+ struct r600_resource_texture *rtexture,
+ unsigned level)
+{
+ struct radeon_state *rstate = &rtexture->scissor[level];
+
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_SCISSOR, 0, 0);
+ /* set states (most default value are 0 and struct already
+ * initialized to 0, thus avoid resetting them)
+ */
+ rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]);
+ rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_TL] = 0x80000000;
+ rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]);
+ rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_TL] = 0x80000000;
+ rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]);
+ rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_TL] = 0x80000000;
+ rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]);
+ rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_TL] = 0x80000000;
+ rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_RULE] = 0x0000FFFF;
+ rstate->states[R600_SCISSOR__PA_SC_EDGERULE] = 0xAAAAAAAA;
+ rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]);
+ rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_TL] = 0x80000000;
+ rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]);
+ rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = 0x80000000;
+ rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]);
+ rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = 0x80000000;
+ rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]);
+ rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_TL] = 0x80000000;
+
+ radeon_state_pm4(rstate);
+}
+
+static void r600_texture_state_cb(struct r600_screen *rscreen, struct r600_resource_texture *rtexture, unsigned cb, unsigned level)
+{
+ struct radeon_state *rstate;
+ struct r600_resource *rbuffer;
+ unsigned pitch, slice;
+ unsigned color_info;
+ unsigned format, swap, ntype;
+ const struct util_format_description *desc;
+
+ rstate = &rtexture->cb[cb][level];
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_CB0 + cb, 0, 0);
+ rbuffer = &rtexture->resource;
+
+ /* set states (most default value are 0 and struct already
+ * initialized to 0, thus avoid resetting them)
+ */
+ pitch = (rtexture->pitch[level] / rtexture->bpt) / 8 - 1;
+ slice = (rtexture->pitch[level] / rtexture->bpt) * rtexture->height[level] / 64 - 1;
+ ntype = 0;
+ desc = util_format_description(rbuffer->base.b.format);
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
+ ntype = V_0280A0_NUMBER_SRGB;
+ format = r600_translate_colorformat(rtexture->resource.base.b.format);
+ swap = r600_translate_colorswap(rtexture->resource.base.b.format);
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+ rstate->bo[0] = radeon_bo_incref(rscreen->rw, rtexture->uncompressed);
+ rstate->bo[1] = radeon_bo_incref(rscreen->rw, rtexture->uncompressed);
+ rstate->bo[2] = radeon_bo_incref(rscreen->rw, rtexture->uncompressed);
+ rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
+ rstate->placement[2] = RADEON_GEM_DOMAIN_GTT;
+ rstate->placement[4] = RADEON_GEM_DOMAIN_GTT;
+ rstate->nbo = 3;
+ color_info = 0;
+ } else {
+ rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
+ rstate->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
+ rstate->bo[2] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
+ rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
+ rstate->placement[2] = RADEON_GEM_DOMAIN_GTT;
+ rstate->placement[4] = RADEON_GEM_DOMAIN_GTT;
+ rstate->nbo = 3;
+ color_info = S_0280A0_SOURCE_FORMAT(1);
+ }
+ color_info |= S_0280A0_FORMAT(format) |
+ S_0280A0_COMP_SWAP(swap) |
+ S_0280A0_BLEND_CLAMP(1) |
+ S_0280A0_NUMBER_TYPE(ntype);
+ rstate->states[R600_CB0__CB_COLOR0_BASE] = rtexture->offset[level] >> 8;
+ rstate->states[R600_CB0__CB_COLOR0_INFO] = color_info;
+ rstate->states[R600_CB0__CB_COLOR0_SIZE] = S_028060_PITCH_TILE_MAX(pitch) |
+ S_028060_SLICE_TILE_MAX(slice);
+
+ radeon_state_pm4(rstate);
+}
+
+static void r600_texture_state_db(struct r600_screen *rscreen, struct r600_resource_texture *rtexture, unsigned level)
+{
+ struct radeon_state *rstate = &rtexture->db[level];
+ struct r600_resource *rbuffer;
+ unsigned pitch, slice, format;
+
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_DB, 0, 0);
+ rbuffer = &rtexture->resource;
+ rtexture->tilled = 1;
+ rtexture->array_mode = 2;
+ rtexture->tile_type = 1;
+ rtexture->depth = 1;
+
+ /* set states (most default value are 0 and struct already
+ * initialized to 0, thus avoid resetting them)
+ */
+ pitch = (rtexture->pitch[level] / rtexture->bpt) / 8 - 1;
+ slice = (rtexture->pitch[level] / rtexture->bpt) * rtexture->height[level] / 64 - 1;
+ format = r600_translate_dbformat(rbuffer->base.b.format);
+ rstate->states[R600_DB__DB_DEPTH_BASE] = rtexture->offset[level] >> 8;
+ rstate->states[R600_DB__DB_DEPTH_INFO] = S_028010_ARRAY_MODE(rtexture->array_mode) |
+ S_028010_FORMAT(format);
+ rstate->states[R600_DB__DB_DEPTH_VIEW] = 0x00000000;
+ rstate->states[R600_DB__DB_PREFETCH_LIMIT] = (rtexture->height[level] / 8) -1;
+ rstate->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) |
+ S_028000_SLICE_TILE_MAX(slice);
+ rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo);
+ rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
+ rstate->nbo = 1;
+
+ radeon_state_pm4(rstate);
+}
+
+int r600_texture_scissor(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level)
+{
+ struct r600_screen *rscreen = r600_screen(ctx->screen);
+
+ if (!rtexture->scissor[level].cpm4) {
+ r600_texture_state_scissor(rscreen, rtexture, level);
+ }
+ return 0;
+}
+
+static void r600_texture_state_viewport(struct r600_screen *rscreen, struct r600_resource_texture *rtexture, unsigned level)
+{
+ struct radeon_state *rstate = &rtexture->viewport[level];
+
+ radeon_state_init(rstate, rscreen->rw, R600_STATE_VIEWPORT, 0, 0);
+
+ /* set states (most default value are 0 and struct already
+ * initialized to 0, thus avoid resetting them)
+ */
+ rstate->states[R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui((float)rtexture->width[level]/2.0);
+ rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui((float)rtexture->width[level]/2.0);
+ rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui((float)rtexture->height[level]/2.0);
+ rstate->states[R600_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui((float)-rtexture->height[level]/2.0);
+ rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = 0x3F000000;
+ rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = 0x3F000000;
+ rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F;
+ rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000;
+
+ radeon_state_pm4(rstate);
+}
+
+int r600_texture_cb(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned cb, unsigned level)
+{
+ struct r600_screen *rscreen = r600_screen(ctx->screen);
+
+ if (!rtexture->cb[cb][level].cpm4) {
+ r600_texture_state_cb(rscreen, rtexture, cb, level);
+ }
+ return 0;
+}
+
+int r600_texture_db(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level)
+{
+ struct r600_screen *rscreen = r600_screen(ctx->screen);
+
+ if (!rtexture->db[level].cpm4) {
+ r600_texture_state_db(rscreen, rtexture, level);
+ }
+ return 0;
+}
+
+int r600_texture_viewport(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level)
+{
+ struct r600_screen *rscreen = r600_screen(ctx->screen);
+
+ if (!rtexture->viewport[level].cpm4) {
+ r600_texture_state_viewport(rscreen, rtexture, level);
+ }
+ return 0;
+}
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 53388f822e..7b9a983d53 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -199,6 +199,7 @@
#define V_0280A0_COLOR_16_16_16_16_FLOAT 0x00000020
#define V_0280A0_COLOR_32_32_32_32 0x00000022
#define V_0280A0_COLOR_32_32_32_32_FLOAT 0x00000023
+#define V_0280A0_COLOR_32_32_32_FLOAT 0x00000030
#define S_0280A0_ARRAY_MODE(x) (((x) & 0xF) << 8)
#define G_0280A0_ARRAY_MODE(x) (((x) >> 8) & 0xF)
#define C_0280A0_ARRAY_MODE 0xFFFFF0FF
@@ -1316,4 +1317,11 @@
#define G_0286D4_PNT_SPRITE_TOP_1(x) (((x) >> 14) & 0x1)
#define C_0286D4_PNT_SPRITE_TOP_1 0xFFFFBFFF
+#define SQ_TEX_INST_LD 0x03
+#define SQ_TEX_INST_GET_GRADIENTS_H 0x7
+#define SQ_TEX_INST_GET_GRADIENTS_V 0x8
+
+#define SQ_TEX_INST_SAMPLE 0x10
+#define SQ_TEX_INST_SAMPLE_L 0x11
+#define SQ_TEX_INST_SAMPLE_C 0x18
#endif
diff --git a/src/gallium/drivers/r600/radeon.h b/src/gallium/drivers/r600/radeon.h
index 8f00a4895a..aaac8de528 100644
--- a/src/gallium/drivers/r600/radeon.h
+++ b/src/gallium/drivers/r600/radeon.h
@@ -77,6 +77,14 @@ enum radeon_family {
CHIP_LAST,
};
+enum {
+ R600_SHADER_PS = 1,
+ R600_SHADER_VS,
+ R600_SHADER_GS,
+ R600_SHADER_FS,
+ R600_SHADER_MAX = R600_SHADER_FS,
+};
+
enum radeon_family radeon_get_family(struct radeon *rw);
/*
@@ -98,22 +106,23 @@ struct radeon_bo *radeon_bo_incref(struct radeon *radeon, struct radeon_bo *bo);
struct radeon_bo *radeon_bo_decref(struct radeon *radeon, struct radeon_bo *bo);
int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo);
+struct radeon_stype_info;
/*
* states functions
*/
struct radeon_state {
struct radeon *radeon;
unsigned refcount;
- unsigned type;
+ struct radeon_stype_info *stype;
+ unsigned state_id;
unsigned id;
+ unsigned shader_index;
unsigned nstates;
- u32 *states;
+ u32 states[64];
unsigned npm4;
unsigned cpm4;
u32 pm4_crc;
- u32 *pm4;
- u32 nimmd;
- u32 *immd;
+ u32 pm4[128];
unsigned nbo;
struct radeon_bo *bo[4];
unsigned nreloc;
@@ -123,38 +132,22 @@ struct radeon_state {
unsigned bo_dirty[4];
};
-struct radeon_state *radeon_state(struct radeon *radeon, u32 type, u32 id);
-struct radeon_state *radeon_state_incref(struct radeon_state *state);
-struct radeon_state *radeon_state_decref(struct radeon_state *state);
+int radeon_state_init(struct radeon_state *rstate, struct radeon *radeon, u32 type, u32 id, u32 shader_class);
+void radeon_state_fini(struct radeon_state *state);
int radeon_state_pm4(struct radeon_state *state);
+int radeon_state_convert(struct radeon_state *state, u32 stype, u32 id, u32 shader_type);
/*
* draw functions
*/
struct radeon_draw {
- unsigned refcount;
struct radeon *radeon;
- unsigned nstate;
struct radeon_state **state;
- unsigned cpm4;
};
-struct radeon_draw *radeon_draw(struct radeon *radeon);
-struct radeon_draw *radeon_draw_duplicate(struct radeon_draw *draw);
-struct radeon_draw *radeon_draw_incref(struct radeon_draw *draw);
-struct radeon_draw *radeon_draw_decref(struct radeon_draw *draw);
-int radeon_draw_set(struct radeon_draw *draw, struct radeon_state *state);
-int radeon_draw_set_new(struct radeon_draw *draw, struct radeon_state *state);
-int radeon_draw_check(struct radeon_draw *draw);
-
-struct radeon_ctx *radeon_ctx(struct radeon *radeon);
-struct radeon_ctx *radeon_ctx_decref(struct radeon_ctx *ctx);
-struct radeon_ctx *radeon_ctx_incref(struct radeon_ctx *ctx);
-int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw);
-int radeon_ctx_set_draw_new(struct radeon_ctx *ctx, struct radeon_draw *draw);
-int radeon_ctx_pm4(struct radeon_ctx *ctx);
-int radeon_ctx_submit(struct radeon_ctx *ctx);
-void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file);
+int radeon_draw_init(struct radeon_draw *draw, struct radeon *radeon);
+void radeon_draw_bind(struct radeon_draw *draw, struct radeon_state *state);
+void radeon_draw_unbind(struct radeon_draw *draw, struct radeon_state *state);
/*
* radeon context functions
@@ -169,95 +162,57 @@ struct radeon_cs_reloc {
#pragma pack()
struct radeon_ctx {
- int refcount;
struct radeon *radeon;
u32 *pm4;
- u32 cpm4;
- u32 draw_cpm4;
- unsigned id;
- unsigned next_id;
+ int cdwords;
+ int ndwords;
unsigned nreloc;
struct radeon_cs_reloc *reloc;
unsigned nbo;
struct radeon_bo **bo;
- unsigned ndraw;
- struct radeon_draw *cdraw;
- struct radeon_draw **draw;
- unsigned nstate;
- struct radeon_state **state;
};
+int radeon_ctx_init(struct radeon_ctx *ctx, struct radeon *radeon);
+void radeon_ctx_fini(struct radeon_ctx *ctx);
+void radeon_ctx_clear(struct radeon_ctx *ctx);
+int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw);
+int radeon_ctx_submit(struct radeon_ctx *ctx);
+void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file);
+int radeon_ctx_set_query_state(struct radeon_ctx *ctx, struct radeon_state *state);
+
/*
* R600/R700
*/
-#define R600_NSTATE 1280
-#define R600_NTYPE 32
+enum r600_stype {
+ R600_STATE_CONFIG,
+ R600_STATE_CB_CNTL,
+ R600_STATE_RASTERIZER,
+ R600_STATE_VIEWPORT,
+ R600_STATE_SCISSOR,
+ R600_STATE_BLEND,
+ R600_STATE_DSA,
+ R600_STATE_SHADER, /* has PS,VS,GS,FS variants */
+ R600_STATE_CONSTANT, /* has PS,VS,GS,FS variants */
+ R600_STATE_RESOURCE, /* has PS,VS,GS,FS variants */
+ R600_STATE_SAMPLER, /* has PS,VS,GS,FS variants */
+ R600_STATE_SAMPLER_BORDER, /* has PS,VS,GS,FS variants */
+ R600_STATE_CB0,
+ R600_STATE_CB1,
+ R600_STATE_CB2,
+ R600_STATE_CB3,
+ R600_STATE_CB4,
+ R600_STATE_CB5,
+ R600_STATE_CB6,
+ R600_STATE_CB7,
+ R600_STATE_DB,
+ R600_STATE_QUERY_BEGIN,
+ R600_STATE_QUERY_END,
+ R600_STATE_UCP,
+ R600_STATE_VGT,
+ R600_STATE_DRAW,
+};
-#define R600_CONFIG 0
-#define R600_CONFIG_TYPE 0
-#define R600_CB_CNTL 1
-#define R600_CB_CNTL_TYPE 1
-#define R600_RASTERIZER 2
-#define R600_RASTERIZER_TYPE 2
-#define R600_VIEWPORT 3
-#define R600_VIEWPORT_TYPE 3
-#define R600_SCISSOR 4
-#define R600_SCISSOR_TYPE 4
-#define R600_BLEND 5
-#define R600_BLEND_TYPE 5
-#define R600_DSA 6
-#define R600_DSA_TYPE 6
-#define R600_VS_SHADER 7
-#define R600_VS_SHADER_TYPE 7
-#define R600_PS_SHADER 8
-#define R600_PS_SHADER_TYPE 8
-#define R600_PS_CONSTANT 9
-#define R600_PS_CONSTANT_TYPE 9
-#define R600_VS_CONSTANT 265
-#define R600_VS_CONSTANT_TYPE 10
-#define R600_PS_RESOURCE 521
-#define R600_PS_RESOURCE_TYPE 11
-#define R600_VS_RESOURCE 681
-#define R600_VS_RESOURCE_TYPE 12
-#define R600_FS_RESOURCE 841
-#define R600_FS_RESOURCE_TYPE 13
-#define R600_GS_RESOURCE 1001
-#define R600_GS_RESOURCE_TYPE 14
-#define R600_PS_SAMPLER 1161
-#define R600_PS_SAMPLER_TYPE 15
-#define R600_VS_SAMPLER 1179
-#define R600_VS_SAMPLER_TYPE 16
-#define R600_GS_SAMPLER 1197
-#define R600_GS_SAMPLER_TYPE 17
-#define R600_PS_SAMPLER_BORDER 1215
-#define R600_PS_SAMPLER_BORDER_TYPE 18
-#define R600_VS_SAMPLER_BORDER 1233
-#define R600_VS_SAMPLER_BORDER_TYPE 19
-#define R600_GS_SAMPLER_BORDER 1251
-#define R600_GS_SAMPLER_BORDER_TYPE 20
-#define R600_CB0 1269
-#define R600_CB0_TYPE 21
-#define R600_CB1 1270
-#define R600_CB1_TYPE 22
-#define R600_CB2 1271
-#define R600_CB2_TYPE 23
-#define R600_CB3 1272
-#define R600_CB3_TYPE 24
-#define R600_CB4 1273
-#define R600_CB4_TYPE 25
-#define R600_CB5 1274
-#define R600_CB5_TYPE 26
-#define R600_CB6 1275
-#define R600_CB6_TYPE 27
-#define R600_CB7 1276
-#define R600_CB7_TYPE 28
-#define R600_DB 1277
-#define R600_DB_TYPE 29
-#define R600_VGT 1278
-#define R600_VGT_TYPE 30
-#define R600_DRAW 1279
-#define R600_DRAW_TYPE 31
/* R600_CONFIG */
#define R600_CONFIG__SQ_CONFIG 0
#define R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1 1
@@ -639,9 +594,40 @@ struct radeon_ctx {
/* R600_DRAW */
#define R600_DRAW__VGT_NUM_INDICES 0
#define R600_DRAW__VGT_DMA_BASE_HI 1
-#define R600_DRAW__VGT_DMA_BASE 2
+#define R600_DRAW__VGT_DMA_BASE 2
#define R600_DRAW__VGT_DRAW_INITIATOR 3
-#define R600_DRAW_SIZE 4
-#define R600_DRAW_PM4 128
+#define R600_DRAW_SIZE 4
+#define R600_DRAW_PM4 128
+/* R600_CLIP */
+#define R600_CLIP__PA_CL_UCP_X_0 0
+#define R600_CLIP__PA_CL_UCP_Y_0 1
+#define R600_CLIP__PA_CL_UCP_Z_0 2
+#define R600_CLIP__PA_CL_UCP_W_0 3
+#define R600_CLIP__PA_CL_UCP_X_1 4
+#define R600_CLIP__PA_CL_UCP_Y_1 5
+#define R600_CLIP__PA_CL_UCP_Z_1 6
+#define R600_CLIP__PA_CL_UCP_W_1 7
+#define R600_CLIP__PA_CL_UCP_X_2 8
+#define R600_CLIP__PA_CL_UCP_Y_2 9
+#define R600_CLIP__PA_CL_UCP_Z_2 10
+#define R600_CLIP__PA_CL_UCP_W_2 11
+#define R600_CLIP__PA_CL_UCP_X_3 12
+#define R600_CLIP__PA_CL_UCP_Y_3 13
+#define R600_CLIP__PA_CL_UCP_Z_3 14
+#define R600_CLIP__PA_CL_UCP_W_3 15
+#define R600_CLIP__PA_CL_UCP_X_4 16
+#define R600_CLIP__PA_CL_UCP_Y_4 17
+#define R600_CLIP__PA_CL_UCP_Z_4 18
+#define R600_CLIP__PA_CL_UCP_W_4 19
+#define R600_CLIP__PA_CL_UCP_X_5 20
+#define R600_CLIP__PA_CL_UCP_Y_5 21
+#define R600_CLIP__PA_CL_UCP_Z_5 22
+#define R600_CLIP__PA_CL_UCP_W_5 23
+#define R600_CLIP_SIZE 24
+#define R600_CLIP_PM4 128
+/* R600 QUERY BEGIN/END */
+#define R600_QUERY__OFFSET 0
+#define R600_QUERY_SIZE 1
+#define R600_QUERY_PM4 128
#endif