summaryrefslogtreecommitdiff
path: root/src/mesa/drivers
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2006-10-18 00:24:01 -0700
committerKeith Packard <keithp@neko.keithp.com>2007-01-06 15:18:23 -0800
commit1b9f78195f62959601d440475a6cbba5e8046813 (patch)
treebf86214e2af04a045369df53e8c025366f533cf7 /src/mesa/drivers
parente54ec49155052ab663d8671e7036d985992464a3 (diff)
i965: Avoid branch instructions while in single program flow mode.
There is an errata for Broadwater that threads don't have the instruction/loop mask stacks initialized on thread spawn. In single program flow mode, those stacks are not writable, so we can't initialize them. However, they do get read during ELSE and ENDIF instructions. So, instead, replace branch instructions in single program flow mode with predicated jumps (ADD to the ip register), avoiding use of the more complicated branch instructions that may fail. This is also a minor optimization as no ENDIF equivalent is necessary. Signed-off-by: Keith Packard <keithp@neko.keithp.com>
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c155
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_structs.h22
5 files changed, 125 insertions, 59 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c
index 0e8591aaa8..3bec153075 100644
--- a/src/mesa/drivers/dri/i965/brw_clip.c
+++ b/src/mesa/drivers/dri/i965/brw_clip.c
@@ -62,6 +62,8 @@ static void compile_clip_prog( struct brw_context *brw,
*/
brw_init_compile(&c.func);
+ c.func.single_program_flow = 1;
+
c.key = *key;
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 1afa0f816b..d4dbcf38a7 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -104,6 +104,7 @@ struct brw_compile {
struct brw_instruction *current;
GLuint flag_value;
+ GLboolean single_program_flow;
};
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 6425c91450..9992b47d8a 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -464,7 +464,6 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p,
return insn;
}
-
/* EU takes the value from the flag register and pushes it onto some
* sort of a stack (presumably merging with any flag value already on
* the stack). Within an if block, the flags at the top of the stack
@@ -482,7 +481,16 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p,
*/
struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
{
- struct brw_instruction *insn = next_insn(p, BRW_OPCODE_IF);
+ struct brw_instruction *insn;
+
+ if (p->single_program_flow) {
+ assert(execute_size == BRW_EXECUTE_1);
+
+ insn = next_insn(p, BRW_OPCODE_ADD);
+ insn->header.predicate_inverse = 1;
+ } else {
+ insn = next_insn(p, BRW_OPCODE_IF);
+ }
/* Override the defaults for this instruction:
*/
@@ -504,7 +512,13 @@ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
struct brw_instruction *brw_ELSE(struct brw_compile *p,
struct brw_instruction *if_insn)
{
- struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ELSE);
+ struct brw_instruction *insn;
+
+ if (p->single_program_flow) {
+ insn = next_insn(p, BRW_OPCODE_ADD);
+ } else {
+ insn = next_insn(p, BRW_OPCODE_ELSE);
+ }
brw_set_dest(insn, brw_ip_reg());
brw_set_src0(insn, brw_ip_reg());
@@ -516,11 +530,17 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p,
/* Patch the if instruction to point at this instruction.
*/
- assert(if_insn->header.opcode == BRW_OPCODE_IF);
+ if (p->single_program_flow) {
+ assert(if_insn->header.opcode == BRW_OPCODE_ADD);
- if_insn->bits3.if_else.jump_count = insn - if_insn;
- if_insn->bits3.if_else.pop_count = 1;
- if_insn->bits3.if_else.pad0 = 0;
+ if_insn->bits3.ud = (insn - if_insn + 1) * 16;
+ } else {
+ assert(if_insn->header.opcode == BRW_OPCODE_IF);
+
+ if_insn->bits3.if_else.jump_count = insn - if_insn;
+ if_insn->bits3.if_else.pop_count = 1;
+ if_insn->bits3.if_else.pad0 = 0;
+ }
return insn;
}
@@ -528,63 +548,76 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p,
void brw_ENDIF(struct brw_compile *p,
struct brw_instruction *patch_insn)
{
- struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
+ if (p->single_program_flow) {
+ /* In single program flow mode, there's no need to execute an ENDIF,
+ * since we don't need to do any stack operations, and if we're executing
+ * currently, we want to just continue executing.
+ */
+ struct brw_instruction *next = &p->store[p->nr_insn];
- brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
- brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
- brw_set_src1(insn, brw_imm_d(0x0));
+ assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
- insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.execution_size = patch_insn->header.execution_size;
- insn->header.mask_control = BRW_MASK_ENABLE;
+ patch_insn->bits3.ud = (next - patch_insn) * 16;
+ } else {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
- assert(patch_insn->bits3.if_else.jump_count == 0);
-
- /* Patch the if or else instructions to point at this or the next
- * instruction respectively.
- */
- if (patch_insn->header.opcode == BRW_OPCODE_IF) {
- /* Automagically turn it into an IFF:
+ brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = patch_insn->header.execution_size;
+ insn->header.mask_control = BRW_MASK_ENABLE;
+
+ assert(patch_insn->bits3.if_else.jump_count == 0);
+
+ /* Patch the if or else instructions to point at this or the next
+ * instruction respectively.
*/
- patch_insn->header.opcode = BRW_OPCODE_IFF;
- patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
- patch_insn->bits3.if_else.pop_count = 0;
- patch_insn->bits3.if_else.pad0 = 0;
+ if (patch_insn->header.opcode == BRW_OPCODE_IF) {
+ /* Automagically turn it into an IFF:
+ */
+ patch_insn->header.opcode = BRW_OPCODE_IFF;
+ patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
+ patch_insn->bits3.if_else.pop_count = 0;
+ patch_insn->bits3.if_else.pad0 = 0;
+ } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
+ patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
+ patch_insn->bits3.if_else.pop_count = 1;
+ patch_insn->bits3.if_else.pad0 = 0;
+ } else {
+ assert(0);
+ }
+ /* Also pop item off the stack in the endif instruction:
+ */
+ insn->bits3.if_else.jump_count = 0;
+ insn->bits3.if_else.pop_count = 1;
+ insn->bits3.if_else.pad0 = 0;
}
- else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
- patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
- patch_insn->bits3.if_else.pop_count = 1;
- patch_insn->bits3.if_else.pad0 = 0;
- }
- else {
- assert(0);
- }
-
- /* Also pop item off the stack in the endif instruction:
- */
- insn->bits3.if_else.jump_count = 0;
- insn->bits3.if_else.pop_count = 1;
- insn->bits3.if_else.pad0 = 0;
}
/* DO/WHILE loop:
*/
struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
{
- struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
+ if (p->single_program_flow) {
+ return &p->store[p->nr_insn];
+ } else {
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
- /* Override the defaults for this instruction:
- */
- brw_set_dest(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
- brw_set_src0(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
- brw_set_src1(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
+ /* Override the defaults for this instruction:
+ */
+ brw_set_dest(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src0(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src1(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
- insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.execution_size = execute_size;
-/* insn->header.mask_control = BRW_MASK_ENABLE; */
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = execute_size;
+ /* insn->header.mask_control = BRW_MASK_ENABLE; */
- return insn;
+ return insn;
+ }
}
@@ -592,19 +625,31 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
void brw_WHILE(struct brw_compile *p,
struct brw_instruction *do_insn)
{
- struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WHILE);
+ struct brw_instruction *insn;
+
+ if (p->single_program_flow)
+ insn = next_insn(p, BRW_OPCODE_ADD);
+ else
+ insn = next_insn(p, BRW_OPCODE_WHILE);
brw_set_dest(insn, brw_ip_reg());
brw_set_src0(insn, brw_ip_reg());
brw_set_src1(insn, brw_imm_d(0x0));
insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.execution_size = do_insn->header.execution_size;
- assert(do_insn->header.opcode == BRW_OPCODE_DO);
- insn->bits3.if_else.jump_count = do_insn - insn;
- insn->bits3.if_else.pop_count = 0;
- insn->bits3.if_else.pad0 = 0;
+ if (p->single_program_flow) {
+ insn->header.execution_size = BRW_EXECUTE_1;
+
+ insn->bits3.d = (do_insn - insn) * 16;
+ } else {
+ insn->header.execution_size = do_insn->header.execution_size;
+
+ assert(do_insn->header.opcode == BRW_OPCODE_DO);
+ insn->bits3.if_else.jump_count = do_insn - insn;
+ insn->bits3.if_else.pop_count = 0;
+ insn->bits3.if_else.pad0 = 0;
+ }
/* insn->header.mask_control = BRW_MASK_ENABLE; */
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index 7d3f9dd5e3..9066e42252 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -66,7 +66,9 @@ static void compile_gs_prog( struct brw_context *brw,
/* Begin the compilation:
*/
brw_init_compile(&c.func);
-
+
+ c.func.single_program_flow = 1;
+
/* For some reason the thread is spawned with only 4 channels
* unmasked.
*/
diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
index 25acdcfe94..10fee944e8 100644
--- a/src/mesa/drivers/dri/i965/brw_structs.h
+++ b/src/mesa/drivers/dri/i965/brw_structs.h
@@ -519,7 +519,22 @@ struct thread3
struct brw_clip_unit_state
{
struct thread0 thread0;
- struct thread1 thread1;
+ struct
+ {
+ GLuint pad0:7;
+ GLuint sw_exception_enable:1;
+ GLuint pad1:3;
+ GLuint mask_stack_exception_enable:1;
+ GLuint pad2:1;
+ GLuint illegal_op_exception_enable:1;
+ GLuint pad3:2;
+ GLuint floating_point_mode:1;
+ GLuint thread_priority:1;
+ GLuint binding_table_entry_count:8;
+ GLuint pad4:5;
+ GLuint single_program_flow:1;
+ } thread1;
+
struct thread2 thread2;
struct thread3 thread3;
@@ -532,8 +547,8 @@ struct brw_clip_unit_state
GLuint pad1:1;
GLuint urb_entry_allocation_size:5;
GLuint pad2:1;
- GLuint max_threads:6; /* may be less */
- GLuint pad3:1;
+ GLuint max_threads:1; /* may be less */
+ GLuint pad3:6;
} thread4;
struct
@@ -1322,6 +1337,7 @@ struct brw_instruction
GLuint end_of_thread:1;
} generic;
+ GLint d;
GLuint ud;
} bits3;
};