summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <tstellar@gmail.com>2010-05-27 17:14:51 -0700
committerMarek Olšák <maraeo@gmail.com>2010-06-11 22:06:58 +0200
commit622fd4d061678027d5de2c84d1c07370830c4264 (patch)
tree15eb6911bed9f7be4f66b0245962c213c9b47b9a
parent108264e859b4f435e9608472dc2e388aa200183c (diff)
r300/compiler: Implement simple loop emulation
The loop emulation unrolls loops as may times as possbile while still keeping the shader program below the maximum instruction limit. At this point, there are no checks for constant conditionals. This is only enabled for fragment shaders.
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.c6
-rw-r--r--src/mesa/drivers/dri/r300/compiler/Makefile1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c10
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c183
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h12
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c18
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h6
7 files changed, 233 insertions, 3 deletions
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 89f39af976..5394e04f72 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -105,12 +105,12 @@ static unsigned translate_opcode(unsigned opcode)
/* case TGSI_OPCODE_DIV: return RC_OPCODE_DIV; */
/* case TGSI_OPCODE_DP2: return RC_OPCODE_DP2; */
case TGSI_OPCODE_TXL: return RC_OPCODE_TXL;
- /* case TGSI_OPCODE_BRK: return RC_OPCODE_BRK; */
+ case TGSI_OPCODE_BRK: return RC_OPCODE_BRK;
case TGSI_OPCODE_IF: return RC_OPCODE_IF;
- /* case TGSI_OPCODE_LOOP: return RC_OPCODE_LOOP; */
+ case TGSI_OPCODE_BGNLOOP: return RC_OPCODE_BGNLOOP;
case TGSI_OPCODE_ELSE: return RC_OPCODE_ELSE;
case TGSI_OPCODE_ENDIF: return RC_OPCODE_ENDIF;
- /* case TGSI_OPCODE_ENDLOOP: return RC_OPCODE_ENDLOOP; */
+ case TGSI_OPCODE_ENDLOOP: return RC_OPCODE_ENDLOOP;
/* case TGSI_OPCODE_PUSHA: return RC_OPCODE_PUSHA; */
/* case TGSI_OPCODE_POPA: return RC_OPCODE_POPA; */
case TGSI_OPCODE_CEIL: return RC_OPCODE_CEIL;
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
index 34d22b4559..ff3801dc67 100644
--- a/src/mesa/drivers/dri/r300/compiler/Makefile
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -9,6 +9,7 @@ C_SOURCES = \
radeon_code.c \
radeon_compiler.c \
radeon_emulate_branches.c \
+ radeon_emulate_loops.c \
radeon_program.c \
radeon_program_print.c \
radeon_opcodes.c \
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index 7f3b88ed75..38312658d6 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -26,6 +26,7 @@
#include "radeon_dataflow.h"
#include "radeon_emulate_branches.h"
+#include "radeon_emulate_loops.h"
#include "radeon_program_alu.h"
#include "radeon_program_tex.h"
#include "r300_fragprog.h"
@@ -103,6 +104,15 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
/* XXX Ideally this should be done only for r3xx, but since
* we don't have branching support for r5xx, we use the emulation
* on all chipsets. */
+
+ if(c->Base.is_r500){
+ rc_emulate_loops(&c->Base, R500_PFS_MAX_INST);
+ }
+ else{
+ rc_emulate_loops(&c->Base, R300_PFS_MAX_ALU_INST);
+ }
+ debug_program_log(c, "after emulate loops");
+
rc_emulate_branches(&c->Base);
debug_program_log(c, "after emulate branches");
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
new file mode 100644
index 0000000000..b05ba08e13
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ */
+
+#include "radeon_emulate_loops.h"
+
+#include "radeon_compiler.h"
+
+struct emulate_loop_state {
+ struct radeon_compiler * C;
+ struct loop_info * Loops;
+ unsigned int LoopCount;
+ unsigned int LoopReserved;
+};
+
+struct loop_info {
+ struct rc_instruction * BeginLoop;
+ struct rc_instruction * EndLoop;
+};
+
+static unsigned int loop_count_instructions(struct loop_info * loop)
+{
+ unsigned int count = 0;
+ struct rc_instruction * inst = loop->BeginLoop->Next;
+ while(inst != loop->EndLoop){
+ count++;
+ inst = inst->Next;
+ }
+ return count;
+}
+
+static unsigned int loop_calc_iterations(struct loop_info * loop,
+ unsigned int loop_count, unsigned int max_instructions)
+{
+ unsigned int icount = loop_count_instructions(loop);
+ return max_instructions / (loop_count * icount);
+}
+
+static void loop_unroll(struct emulate_loop_state * s,
+ struct loop_info *loop, unsigned int iterations)
+{
+ unsigned int i;
+ struct rc_instruction * ptr;
+ struct rc_instruction * first = loop->BeginLoop->Next;
+ struct rc_instruction * last = loop->EndLoop->Prev;
+ struct rc_instruction * append_to = last;
+ rc_remove_instruction(loop->BeginLoop);
+ rc_remove_instruction(loop->EndLoop);
+ for( i = 1; i < iterations; i++){
+ for(ptr = first; ptr != last->Next; ptr = ptr->Next){
+ struct rc_instruction *new = rc_alloc_instruction(s->C);
+ memcpy(new, ptr, sizeof(struct rc_instruction));
+ rc_insert_instruction(append_to, new);
+ append_to = new;
+ }
+ }
+}
+
+/**
+ * This function prepares a loop to be unrolled by converting it into an if
+ * statement. Here is an outline of the conversion process:
+ * BGNLOOP; -> BGNLOOP;
+ * SGE temp[0], temp[1], temp[2]; -> SLT temp[0], temp[1], temp[2];
+ * IF temp[0]; -> IF temp[0];
+ * BRK; ->
+ * ENDIF; -> <Loop Body>
+ * <Loop Body> -> ENDIF;
+ * ENDLOOP; -> ENDLOOP
+ *
+ * @param inst Pointer to a BGNLOOP instruction.
+ */
+static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
+ struct rc_instruction * inst)
+{
+ struct loop_info *loop;
+ struct rc_instruction * ptr;
+
+ memory_pool_array_reserve(&s->C->Pool, struct loop_info,
+ s->Loops, s->LoopCount, s->LoopReserved, 1);
+
+ loop = &s->Loops[s->LoopCount++];
+ memset(loop, 0, sizeof(struct loop_info));
+
+ loop->BeginLoop = inst;
+ /* Reverse the SGE instruction */
+ ptr = inst->Next;
+ ptr->U.I.Opcode = RC_OPCODE_SLT;
+ while(!loop->EndLoop){
+ struct rc_instruction * endif;
+ if(ptr->Type == RC_INSTRUCTION_NORMAL){
+ }
+ switch(ptr->U.I.Opcode){
+ case RC_OPCODE_BGNLOOP:
+ /* Nested loop */
+ ptr = transform_loop(s, ptr);
+ break;
+ case RC_OPCODE_BRK:
+ /* The BRK instruction should always be followed by
+ * an ENDIF. This ENDIF will eventually replace the
+ * ENDLOOP insruction. */
+ endif = ptr->Next;
+ rc_remove_instruction(ptr);
+ rc_remove_instruction(endif);
+ break;
+ case RC_OPCODE_ENDLOOP:
+ /* Insert the ENDIF before ENDLOOP. */
+ rc_insert_instruction(ptr->Prev, endif);
+ loop->EndLoop = ptr;
+ break;
+ }
+ ptr = ptr->Next;
+ }
+ return ptr;
+}
+
+static void rc_transform_loops(struct emulate_loop_state * s)
+{
+ struct rc_instruction * ptr = s->C->Program.Instructions.Next;
+ while(ptr != &s->C->Program.Instructions) {
+ if(ptr->Type == RC_INSTRUCTION_NORMAL &&
+ ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
+ ptr = transform_loop(s, ptr);
+ }
+ ptr = ptr->Next;
+ }
+}
+
+static void rc_unroll_loops(struct emulate_loop_state *s,
+ unsigned int max_instructions)
+{
+ int i;
+ /* Iterate backwards of the list of loops so that loops that nested
+ * loops are unrolled first.
+ */
+ for( i = s->LoopCount - 1; i >= 0; i-- ){
+ unsigned int iterations = loop_calc_iterations(&s->Loops[i],
+ s->LoopCount, max_instructions);
+ loop_unroll(s, &s->Loops[i], iterations);
+ }
+}
+
+void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions)
+{
+ struct emulate_loop_state s;
+
+ memset(&s, 0, sizeof(struct emulate_loop_state));
+ s.C = c;
+
+ /* We may need to move these two operations to r3xx_(vert|frag)prog.c
+ * and run the optimization passes between them in order to increase
+ * the number of unrolls we can do for each loop.
+ */
+ rc_transform_loops(&s);
+
+ rc_unroll_loops(&s, max_instructions);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
new file mode 100644
index 0000000000..ddcf1c0fab
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
@@ -0,0 +1,12 @@
+
+
+#ifndef RADEON_EMULATE_LOOPS_H
+#define RADEON_EMULATE_LOOPS_H
+
+#define MAX_ITERATIONS 8
+
+struct radeon_compiler;
+
+void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions);
+
+#endif /* RADEON_EMULATE_LOOPS_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
index d593b3e81a..1dc16855dc 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -368,6 +368,24 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.NumSrcRegs = 0
},
{
+ .Opcode = RC_OPCODE_BGNLOOP,
+ .Name = "BGNLOOP",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 0
+ },
+ {
+ .Opcode = RC_OPCODE_BRK,
+ .Name = "BRK",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 0
+ },
+ {
+ .Opcode = RC_OPCODE_ENDLOOP,
+ .Name = "ENDLOOP",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 0,
+ },
+ {
.Opcode = RC_OPCODE_REPL_ALPHA,
.Name = "REPL_ALPHA",
.HasDstReg = 1
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
index 87a2e23084..91c82ac089 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
@@ -180,6 +180,12 @@ typedef enum {
/** branch instruction: has no effect */
RC_OPCODE_ENDIF,
+
+ RC_OPCODE_BGNLOOP,
+
+ RC_OPCODE_BRK,
+
+ RC_OPCODE_ENDLOOP,
/** special instruction, used in R300-R500 fragment program pair instructions
* indicates that the result of the alpha operation shall be replicated