summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r--src/mesa/drivers/dri/i965/Makefile1
-rw-r--r--src/mesa/drivers/dri/i965/brw_cc.c25
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_curbe.c22
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h18
-rw-r--r--src/mesa/drivers/dri/i965/brw_disasm.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c5
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.c35
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c314
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp460
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h67
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp33
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.c17
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c50
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_batch.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_dump.c52
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_structs.h31
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c369
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c132
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h33
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c172
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_fp.c19
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_glsl.c1035
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_iz.c32
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass0.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass1.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass2.c45
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_sampler_state.c41
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c176
-rw-r--r--src/mesa/drivers/dri/i965/gen6_cc.c92
-rw-r--r--src/mesa/drivers/dri/i965/gen6_clip_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen6_sf_state.c89
-rw-r--r--src/mesa/drivers/dri/i965/gen6_urb.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_vs_state.c25
-rw-r--r--src/mesa/drivers/dri/i965/gen6_wm_state.c31
43 files changed, 1571 insertions, 1896 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index e3ca863fe5..7c3ac0c14e 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -81,7 +81,6 @@ DRIVER_SOURCES = \
brw_wm_emit.c \
brw_wm_fp.c \
brw_wm_iz.c \
- brw_wm_glsl.c \
brw_wm_pass0.c \
brw_wm_pass1.c \
brw_wm_pass2.c \
diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c
index a8369b07c3..d3a1233aac 100644
--- a/src/mesa/drivers/dri/i965/brw_cc.c
+++ b/src/mesa/drivers/dri/i965/brw_cc.c
@@ -232,3 +232,28 @@ const struct brw_tracked_state brw_cc_unit = {
.prepare = prepare_cc_unit,
.emit = upload_cc_unit,
};
+
+static void upload_blend_constant_color(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->intel.ctx;
+ struct brw_blend_constant_color bcc;
+
+ memset(&bcc, 0, sizeof(bcc));
+ bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR;
+ bcc.header.length = sizeof(bcc)/4-2;
+ bcc.blend_constant_color[0] = ctx->Color.BlendColor[0];
+ bcc.blend_constant_color[1] = ctx->Color.BlendColor[1];
+ bcc.blend_constant_color[2] = ctx->Color.BlendColor[2];
+ bcc.blend_constant_color[3] = ctx->Color.BlendColor[3];
+
+ BRW_CACHED_BATCH_STRUCT(brw, &bcc);
+}
+
+const struct brw_tracked_state brw_blend_constant_color = {
+ .dirty = {
+ .mesa = _NEW_COLOR,
+ .brw = BRW_NEW_CONTEXT,
+ .cache = 0
+ },
+ .emit = upload_blend_constant_color
+};
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index cb0a8b96c9..28549f2574 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -122,9 +122,6 @@ GLboolean brwCreateContext( int api,
(i == MESA_SHADER_FRAGMENT);
ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp =
(i == MESA_SHADER_FRAGMENT);
-
- if (intel->gen == 6)
- ctx->ShaderCompilerOptions[i].EmitNoIfs = (i == MESA_SHADER_VERTEX);
}
ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024);
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 335339515a..7069724466 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -171,7 +171,6 @@ struct brw_vertex_program {
struct brw_fragment_program {
struct gl_fragment_program program;
GLuint id; /**< serial no. to identify frag progs, never re-used */
- GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */
/** for debugging, which texture units are referenced */
GLbitfield tex_units_used;
@@ -211,6 +210,7 @@ struct brw_wm_prog_data {
GLuint nr_params; /**< number of float params/constants */
GLuint nr_pull_params;
GLboolean error;
+ int dispatch_width;
/* Pointer to tracked values (only valid once
* _mesa_load_state_parameters has been called at runtime).
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index 7b823eb201..877b22fec1 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -242,21 +242,13 @@ static void prepare_constant_buffer(struct brw_context *brw)
GLuint offset = brw->curbe.vs_start * 16;
GLuint nr = brw->vs.prog_data->nr_params / 4;
- if (vp->use_const_buffer) {
- /* Load the subset of push constants that will get used when
- * we also have a pull constant buffer.
- */
- for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
- if (brw->vs.constant_map[i] != -1) {
- assert(brw->vs.constant_map[i] <= nr);
- memcpy(buf + offset + brw->vs.constant_map[i] * 4,
- vp->program.Base.Parameters->ParameterValues[i],
- 4 * sizeof(float));
- }
- }
- } else {
- for (i = 0; i < nr; i++) {
- memcpy(buf + offset + i * 4,
+ /* Load the subset of push constants that will get used when
+ * we also have a pull constant buffer.
+ */
+ for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+ if (brw->vs.constant_map[i] != -1) {
+ assert(brw->vs.constant_map[i] <= nr);
+ memcpy(buf + offset + brw->vs.constant_map[i] * 4,
vp->program.Base.Parameters->ParameterValues[i],
4 * sizeof(float));
}
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 239586a036..b48a30d6be 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -462,6 +462,13 @@
#define BRW_COMPRESSION_2NDHALF 1
#define BRW_COMPRESSION_COMPRESSED 2
+#define GEN6_COMPRESSION_1Q 0
+#define GEN6_COMPRESSION_2Q 1
+#define GEN6_COMPRESSION_3Q 2
+#define GEN6_COMPRESSION_4Q 3
+#define GEN6_COMPRESSION_1H 0
+#define GEN6_COMPRESSION_2H 2
+
#define BRW_CONDITIONAL_NONE 0
#define BRW_CONDITIONAL_Z 1
#define BRW_CONDITIONAL_NZ 2
@@ -899,6 +906,8 @@
# define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30)
# define GEN6_VS_SAMPLER_COUNT_SHIFT 27
# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+# define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
+# define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16)
/* DW4 */
# define GEN6_VS_DISPATCH_START_GRF_SHIFT 20
# define GEN6_VS_URB_READ_LENGTH_SHIFT 11
@@ -1022,6 +1031,13 @@
# define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9
# define ATTRIBUTE_0_SWIZZLE_SHIFT 6
# define ATTRIBUTE_0_SOURCE_SHIFT 0
+
+# define ATTRIBUTE_SWIZZLE_INPUTATTR 0
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING 1
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_W 2
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W 3
+# define ATTRIBUTE_SWIZZLE_SHIFT 6
+
/* DW16: Point sprite texture coordinate enables */
/* DW17: Constant interpolation enables */
/* DW18: attr 0-7 wrap shortest enables */
@@ -1034,6 +1050,8 @@
# define GEN6_WM_VECTOR_MASK_ENABLE (1 << 30)
# define GEN6_WM_SAMPLER_COUNT_SHIFT 27
# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+# define GEN6_WM_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
+# define GEN6_WM_FLOATING_POINT_MODE_ALT (1 << 16)
/* DW3: scratch space */
/* DW4 */
# define GEN6_WM_STATISTICS_ENABLE (1 << 31)
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index 962c04128b..6b61f7af15 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -899,7 +899,8 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
err |= dest (file, inst);
} else if (gen >= 6 && (inst->header.opcode == BRW_OPCODE_IF ||
inst->header.opcode == BRW_OPCODE_ELSE ||
- inst->header.opcode == BRW_OPCODE_ENDIF)) {
+ inst->header.opcode == BRW_OPCODE_ENDIF ||
+ inst->header.opcode == BRW_OPCODE_WHILE)) {
format (file, " %d", inst->bits1.branch_gen6.jump_count);
}
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index a1f403ca4e..7eb16b71f4 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -159,7 +159,7 @@ static void brw_emit_prim(struct brw_context *brw,
}
if (prim_packet.verts_per_instance) {
intel_batchbuffer_data( brw->intel.batch, &prim_packet,
- sizeof(prim_packet));
+ sizeof(prim_packet), false);
}
if (intel->always_flush_cache) {
intel_batchbuffer_emit_mi_flush(intel->batch);
@@ -351,7 +351,8 @@ static GLboolean brw_try_draw_prims( struct gl_context *ctx,
* an upper bound of how much we might emit in a single
* brw_try_draw_prims().
*/
- intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4);
+ intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4,
+ false);
hw_prim = brw_set_prim(brw, &prim[i]);
diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c
index 2ff39e8e64..3b5c4c071e 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.c
+++ b/src/mesa/drivers/dri/i965/brw_eu.c
@@ -72,7 +72,37 @@ void brw_set_access_mode( struct brw_compile *p, GLuint access_mode )
void brw_set_compression_control( struct brw_compile *p, GLboolean compression_control )
{
- p->current->header.compression_control = compression_control;
+ p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED);
+
+ if (p->brw->intel.gen >= 6) {
+ /* Since we don't use the 32-wide support in gen6, we translate
+ * the pre-gen6 compression control here.
+ */
+ switch (compression_control) {
+ case BRW_COMPRESSION_NONE:
+ /* This is the "use the first set of bits of dmask/vmask/arf
+ * according to execsize" option.
+ */
+ p->current->header.compression_control = GEN6_COMPRESSION_1Q;
+ break;
+ case BRW_COMPRESSION_2NDHALF:
+ /* For 8-wide, this is "use the second set of 8 bits." */
+ p->current->header.compression_control = GEN6_COMPRESSION_2Q;
+ break;
+ case BRW_COMPRESSION_COMPRESSED:
+ /* For 16-wide instruction compression, use the first set of 16 bits
+ * since we don't do 32-wide dispatch.
+ */
+ p->current->header.compression_control = GEN6_COMPRESSION_1H;
+ break;
+ default:
+ assert(!"not reached");
+ p->current->header.compression_control = GEN6_COMPRESSION_1H;
+ break;
+ }
+ } else {
+ p->current->header.compression_control = compression_control;
+ }
}
void brw_set_mask_control( struct brw_compile *p, GLuint value )
@@ -95,6 +125,7 @@ void brw_push_insn_state( struct brw_compile *p )
{
assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
memcpy(p->current+1, p->current, sizeof(struct brw_instruction));
+ p->compressed_stack[p->current - p->stack] = p->compressed;
p->current++;
}
@@ -102,6 +133,7 @@ void brw_pop_insn_state( struct brw_compile *p )
{
assert(p->current != p->stack);
p->current--;
+ p->compressed = p->compressed_stack[p->current - p->stack];
}
@@ -112,6 +144,7 @@ void brw_init_compile( struct brw_context *brw, struct brw_compile *p )
p->brw = brw;
p->nr_insn = 0;
p->current = p->stack;
+ p->compressed = false;
memset(p->current, 0, sizeof(p->current[0]));
/* Some defaults?
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index b4538e6e8a..4dbdc52210 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -33,6 +33,7 @@
#ifndef BRW_EU_H
#define BRW_EU_H
+#include <stdbool.h>
#include "brw_structs.h"
#include "brw_defines.h"
#include "program/prog_instruction.h"
@@ -106,10 +107,12 @@ struct brw_compile {
/* Allow clients to push/pop instruction state:
*/
struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
+ bool compressed_stack[BRW_EU_MAX_INSN_STACK];
struct brw_instruction *current;
GLuint flag_value;
GLboolean single_program_flow;
+ bool compressed;
struct brw_context *brw;
struct brw_glsl_label *first_label; /**< linked list of labels */
@@ -954,6 +957,8 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
struct brw_instruction *patch_insn);
struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count);
+struct brw_instruction *brw_CONT_gen6(struct brw_compile *p,
+ struct brw_instruction *do_insn);
struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count);
/* Forward jumps:
*/
@@ -1009,6 +1014,7 @@ void brw_math_invert( struct brw_compile *p,
void brw_set_src1( struct brw_instruction *insn,
struct brw_reg reg );
+void brw_set_uip_jip(struct brw_compile *p);
/* brw_optimize.c */
void brw_optimize(struct brw_compile *p);
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 9cb941dacf..9c764fe779 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -41,19 +41,20 @@
* Internal helper for constructing instructions
*/
-static void guess_execution_size( struct brw_instruction *insn,
- struct brw_reg reg )
+static void guess_execution_size(struct brw_compile *p,
+ struct brw_instruction *insn,
+ struct brw_reg reg)
{
- if (reg.width == BRW_WIDTH_8 &&
- insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
+ if (reg.width == BRW_WIDTH_8 && p->compressed)
insn->header.execution_size = BRW_EXECUTE_16;
else
insn->header.execution_size = reg.width; /* note - definitions are compatible */
}
-static void brw_set_dest( struct brw_instruction *insn,
- struct brw_reg dest )
+static void brw_set_dest(struct brw_compile *p,
+ struct brw_instruction *insn,
+ struct brw_reg dest)
{
if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
dest.file != BRW_MESSAGE_REGISTER_FILE)
@@ -100,7 +101,7 @@ static void brw_set_dest( struct brw_instruction *insn,
/* NEW: Set the execution size based on dest.width and
* insn->compression_control:
*/
- guess_execution_size(insn, dest);
+ guess_execution_size(p, insn, dest);
}
extern int reg_type_size[];
@@ -629,7 +630,7 @@ static struct brw_instruction *brw_alu1( struct brw_compile *p,
struct brw_reg src )
{
struct brw_instruction *insn = next_insn(p, opcode);
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src);
return insn;
}
@@ -641,7 +642,7 @@ static struct brw_instruction *brw_alu2(struct brw_compile *p,
struct brw_reg src1 )
{
struct brw_instruction *insn = next_insn(p, opcode);
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src0);
brw_set_src1(insn, src1);
return insn;
@@ -680,7 +681,7 @@ void brw_##OP(struct brw_compile *p, \
{ \
struct brw_instruction *rnd, *add; \
rnd = next_insn(p, BRW_OPCODE_##OP); \
- brw_set_dest(rnd, dest); \
+ brw_set_dest(p, rnd, dest); \
brw_set_src0(rnd, src); \
rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \
\
@@ -779,7 +780,7 @@ struct brw_instruction *brw_MUL(struct brw_compile *p,
void brw_NOP(struct brw_compile *p)
{
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
- brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
brw_set_src1(insn, brw_imm_ud(0x0));
}
@@ -840,11 +841,11 @@ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
/* Override the defaults for this instruction:
*/
if (intel->gen < 6) {
- brw_set_dest(insn, brw_ip_reg());
+ brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(insn, brw_ip_reg());
brw_set_src1(insn, brw_imm_d(0x0));
} else {
- brw_set_dest(insn, brw_imm_w(0));
+ brw_set_dest(p, insn, brw_imm_w(0));
insn->bits1.branch_gen6.jump_count = 0;
brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
@@ -870,7 +871,7 @@ brw_IF_gen6(struct brw_compile *p, uint32_t conditional,
insn = next_insn(p, BRW_OPCODE_IF);
- brw_set_dest(insn, brw_imm_w(0));
+ brw_set_dest(p, insn, brw_imm_w(0));
insn->header.execution_size = BRW_EXECUTE_8;
insn->bits1.branch_gen6.jump_count = 0;
brw_set_src0(insn, src0);
@@ -905,11 +906,11 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p,
}
if (intel->gen < 6) {
- brw_set_dest(insn, brw_ip_reg());
+ brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(insn, brw_ip_reg());
brw_set_src1(insn, brw_imm_d(0x0));
} else {
- brw_set_dest(insn, brw_imm_w(0));
+ brw_set_dest(p, insn, brw_imm_w(0));
insn->bits1.branch_gen6.jump_count = 0;
brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
@@ -965,11 +966,11 @@ void brw_ENDIF(struct brw_compile *p,
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
if (intel->gen < 6) {
- brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
brw_set_src1(insn, brw_imm_d(0x0));
} else {
- brw_set_dest(insn, brw_imm_w(0));
+ brw_set_dest(p, insn, brw_imm_w(0));
brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
}
@@ -1029,16 +1030,44 @@ void brw_ENDIF(struct brw_compile *p,
struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
{
+ struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
+
insn = next_insn(p, BRW_OPCODE_BREAK);
- brw_set_dest(insn, brw_ip_reg());
+ if (intel->gen >= 6) {
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(insn, brw_imm_d(0x0));
+ } else {
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+ insn->bits3.if_else.pad0 = 0;
+ insn->bits3.if_else.pop_count = pop_count;
+ }
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = BRW_EXECUTE_8;
+
+ return insn;
+}
+
+struct brw_instruction *brw_CONT_gen6(struct brw_compile *p,
+ struct brw_instruction *do_insn)
+{
+ struct brw_instruction *insn;
+ int br = 2;
+
+ insn = next_insn(p, BRW_OPCODE_CONTINUE);
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(insn, brw_ip_reg());
brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->bits3.break_cont.uip = br * (do_insn - insn);
+
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.execution_size = BRW_EXECUTE_8;
- /* insn->header.mask_control = BRW_MASK_DISABLE; */
- insn->bits3.if_else.pad0 = 0;
- insn->bits3.if_else.pop_count = pop_count;
return insn;
}
@@ -1046,7 +1075,7 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
{
struct brw_instruction *insn;
insn = next_insn(p, BRW_OPCODE_CONTINUE);
- brw_set_dest(insn, brw_ip_reg());
+ brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(insn, brw_ip_reg());
brw_set_src1(insn, brw_imm_d(0x0));
insn->header.compression_control = BRW_COMPRESSION_NONE;
@@ -1058,17 +1087,33 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
}
/* DO/WHILE loop:
+ *
+ * The DO/WHILE is just an unterminated loop -- break or continue are
+ * used for control within the loop. We have a few ways they can be
+ * done.
+ *
+ * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
+ * jip and no DO instruction.
+ *
+ * For non-uniform control flow pre-gen6, there's a DO instruction to
+ * push the mask, and a WHILE to jump back, and BREAK to get out and
+ * pop the mask.
+ *
+ * For gen6, there's no more mask stack, so no need for DO. WHILE
+ * just points back to the first instruction of the loop.
*/
struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
{
- if (p->single_program_flow) {
+ struct intel_context *intel = &p->brw->intel;
+
+ if (intel->gen >= 6 || p->single_program_flow) {
return &p->store[p->nr_insn];
} else {
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
/* Override the defaults for this instruction:
*/
- brw_set_dest(insn, brw_null_reg());
+ brw_set_dest(p, insn, brw_null_reg());
brw_set_src0(insn, brw_null_reg());
brw_set_src1(insn, brw_null_reg());
@@ -1094,34 +1139,42 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
if (intel->gen >= 5)
br = 2;
- if (p->single_program_flow)
- insn = next_insn(p, BRW_OPCODE_ADD);
- else
+ if (intel->gen >= 6) {
insn = next_insn(p, BRW_OPCODE_WHILE);
- brw_set_dest(insn, brw_ip_reg());
- brw_set_src0(insn, brw_ip_reg());
- brw_set_src1(insn, brw_imm_d(0x0));
+ brw_set_dest(p, insn, brw_imm_w(0));
+ insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
+ brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = do_insn->header.execution_size;
+ assert(insn->header.execution_size == BRW_EXECUTE_8);
+ } else {
+ if (p->single_program_flow) {
+ insn = next_insn(p, BRW_OPCODE_ADD);
- if (p->single_program_flow) {
- insn->header.execution_size = BRW_EXECUTE_1;
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d((do_insn - insn) * 16));
+ insn->header.execution_size = BRW_EXECUTE_1;
+ } else {
+ insn = next_insn(p, BRW_OPCODE_WHILE);
- insn->bits3.d = (do_insn - insn) * 16;
- } else {
- insn->header.execution_size = do_insn->header.execution_size;
+ assert(do_insn->header.opcode == BRW_OPCODE_DO);
- assert(do_insn->header.opcode == BRW_OPCODE_DO);
- insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
- insn->bits3.if_else.pop_count = 0;
- insn->bits3.if_else.pad0 = 0;
- }
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0));
-/* insn->header.mask_control = BRW_MASK_ENABLE; */
+ insn->header.execution_size = do_insn->header.execution_size;
+ insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
+ insn->bits3.if_else.pop_count = 0;
+ insn->bits3.if_else.pad0 = 0;
+ }
+ }
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
- /* insn->header.mask_control = BRW_MASK_DISABLE; */
- p->current->header.predicate_control = BRW_PREDICATE_NONE;
return insn;
}
@@ -1159,7 +1212,7 @@ void brw_CMP(struct brw_compile *p,
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
insn->header.destreg__conditionalmod = conditional;
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src0);
brw_set_src1(insn, src1);
@@ -1184,7 +1237,7 @@ void brw_WAIT (struct brw_compile *p)
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
struct brw_reg src = brw_notification_1_reg();
- brw_set_dest(insn, src);
+ brw_set_dest(p, insn, src);
brw_set_src0(insn, src);
brw_set_src1(insn, brw_null_reg());
insn->header.execution_size = 0; /* must */
@@ -1219,6 +1272,10 @@ void brw_math( struct brw_compile *p,
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
+ /* Source modifiers are ignored for extended math instructions. */
+ assert(!src.negate);
+ assert(!src.abs);
+
if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
assert(src.type == BRW_REGISTER_TYPE_F);
@@ -1228,8 +1285,9 @@ void brw_math( struct brw_compile *p,
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
*/
insn->header.destreg__conditionalmod = function;
+ insn->header.saturate = saturate;
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src);
brw_set_src1(insn, brw_null_reg());
} else {
@@ -1242,7 +1300,7 @@ void brw_math( struct brw_compile *p,
insn->header.predicate_control = 0;
insn->header.destreg__conditionalmod = msg_reg_nr;
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src);
brw_set_math_message(p->brw,
insn,
@@ -1284,12 +1342,18 @@ void brw_math2(struct brw_compile *p,
assert(src1.type == BRW_REGISTER_TYPE_F);
}
+ /* Source modifiers are ignored for extended math instructions. */
+ assert(!src0.negate);
+ assert(!src0.abs);
+ assert(!src1.negate);
+ assert(!src1.abs);
+
/* Math is the same ISA format as other opcodes, except that CondModifier
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
*/
insn->header.destreg__conditionalmod = function;
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src0);
brw_set_src1(insn, src1);
}
@@ -1318,8 +1382,13 @@ void brw_math_16( struct brw_compile *p,
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
*/
insn->header.destreg__conditionalmod = function;
+ insn->header.saturate = saturate;
+
+ /* Source modifiers are ignored for extended math instructions. */
+ assert(!src.negate);
+ assert(!src.abs);
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src);
brw_set_src1(insn, brw_null_reg());
return;
@@ -1334,7 +1403,7 @@ void brw_math_16( struct brw_compile *p,
insn = next_insn(p, BRW_OPCODE_SEND);
insn->header.destreg__conditionalmod = msg_reg_nr;
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src);
brw_set_math_message(p->brw,
insn,
@@ -1351,7 +1420,7 @@ void brw_math_16( struct brw_compile *p,
insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
insn->header.destreg__conditionalmod = msg_reg_nr+1;
- brw_set_dest(insn, offset(dest,1));
+ brw_set_dest(p, insn, offset(dest,1));
brw_set_src0(insn, src);
brw_set_math_message(p->brw,
insn,
@@ -1446,7 +1515,7 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
send_commit_msg = 1;
}
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, brw_null_reg());
brw_set_dp_write_message(p->brw,
@@ -1516,7 +1585,7 @@ brw_oword_block_read_scratch(struct brw_compile *p,
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.destreg__conditionalmod = mrf.nr;
- brw_set_dest(insn, dest); /* UW? */
+ brw_set_dest(p, insn, dest); /* UW? */
brw_set_src0(insn, brw_null_reg());
brw_set_dp_read_message(p->brw,
@@ -1569,7 +1638,7 @@ void brw_oword_block_read(struct brw_compile *p,
/* cast dest to a uword[8] vector */
dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
if (intel->gen >= 6) {
brw_set_src0(insn, mrf);
} else {
@@ -1614,7 +1683,7 @@ void brw_dword_scattered_read(struct brw_compile *p,
/* cast dest to a uword[8] vector */
dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, brw_null_reg());
brw_set_dp_read_message(p->brw,
@@ -1639,29 +1708,21 @@ void brw_dp_READ_4_vs(struct brw_compile *p,
GLuint location,
GLuint bind_table_index)
{
+ struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
GLuint msg_reg_nr = 1;
- struct brw_reg b;
- /*
- printf("vs const read msg, location %u, msg_reg_nr %d\n",
- location, msg_reg_nr);
- */
+ if (intel->gen >= 6)
+ location /= 16;
/* Setup MRF[1] with location/offset into const buffer */
brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-
- /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
- * when the docs say only dword[2] should be set. Hmmm. But it works.
- */
- b = brw_message_reg(msg_reg_nr);
- b = retype(b, BRW_REGISTER_TYPE_UD);
- /*b = get_element_ud(b, 2);*/
- brw_MOV(p, b, brw_imm_ud(location));
-
+ brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
+ BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(location));
brw_pop_insn_state(p);
insn = next_insn(p, BRW_OPCODE_SEND);
@@ -1671,8 +1732,12 @@ void brw_dp_READ_4_vs(struct brw_compile *p,
insn->header.destreg__conditionalmod = msg_reg_nr;
insn->header.mask_control = BRW_MASK_DISABLE;
- brw_set_dest(insn, dest);
- brw_set_src0(insn, brw_null_reg());
+ brw_set_dest(p, insn, dest);
+ if (intel->gen >= 6) {
+ brw_set_src0(insn, brw_message_reg(msg_reg_nr));
+ } else {
+ brw_set_src0(insn, brw_null_reg());
+ }
brw_set_dp_read_message(p->brw,
insn,
@@ -1706,7 +1771,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
/* M1.0 is block offset 0, M1.4 is block offset 1, all other
* fields ignored.
*/
- brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD),
+ brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
addr_reg, brw_imm_d(offset));
brw_pop_insn_state(p);
@@ -1717,7 +1782,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
insn->header.destreg__conditionalmod = 0;
insn->header.mask_control = BRW_MASK_DISABLE;
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, brw_vec8_grf(0, 0));
if (intel->gen == 6)
@@ -1782,7 +1847,7 @@ void brw_fb_WRITE(struct brw_compile *p,
else
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src0);
brw_set_dp_write_message(p->brw,
insn,
@@ -1860,7 +1925,7 @@ void brw_SAMPLE(struct brw_compile *p,
struct brw_reg m1 = brw_message_reg(msg_reg_nr);
- guess_execution_size(p->current, dest);
+ guess_execution_size(p, p->current, dest);
if (p->current->header.execution_size == BRW_EXECUTE_16)
dispatch_16 = GL_TRUE;
@@ -1895,12 +1960,15 @@ void brw_SAMPLE(struct brw_compile *p,
* and the first message register index comes from src0.
*/
if (intel->gen >= 6) {
- brw_push_insn_state(p);
- brw_set_mask_control( p, BRW_MASK_DISABLE );
- /* m1 contains header? */
- brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
- brw_pop_insn_state(p);
- src0 = brw_message_reg(msg_reg_nr);
+ if (src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
+ src0.nr != BRW_ARF_NULL) {
+ brw_push_insn_state(p);
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, retype(brw_message_reg(msg_reg_nr), src0.type), src0);
+ brw_pop_insn_state(p);
+ }
+ src0 = brw_message_reg(msg_reg_nr);
}
insn = next_insn(p, BRW_OPCODE_SEND);
@@ -1909,7 +1977,7 @@ void brw_SAMPLE(struct brw_compile *p,
if (intel->gen < 6)
insn->header.destreg__conditionalmod = msg_reg_nr;
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src0);
brw_set_sampler_message(p->brw, insn,
binding_table_index,
@@ -1970,7 +2038,7 @@ void brw_urb_WRITE(struct brw_compile *p,
assert(msg_length < BRW_MAX_MRF);
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src0);
brw_set_src1(insn, brw_imm_d(0));
@@ -1989,6 +2057,80 @@ void brw_urb_WRITE(struct brw_compile *p,
swizzle);
}
+static int
+brw_find_next_block_end(struct brw_compile *p, int start)
+{
+ int ip;
+
+ for (ip = start + 1; ip < p->nr_insn; ip++) {
+ struct brw_instruction *insn = &p->store[ip];
+
+ switch (insn->header.opcode) {
+ case BRW_OPCODE_ENDIF:
+ case BRW_OPCODE_ELSE:
+ case BRW_OPCODE_WHILE:
+ return ip;
+ }
+ }
+ assert(!"not reached");
+ return start + 1;
+}
+
+/* There is no DO instruction on gen6, so to find the end of the loop
+ * we have to see if the loop is jumping back before our start
+ * instruction.
+ */
+static int
+brw_find_loop_end(struct brw_compile *p, int start)
+{
+ int ip;
+ int br = 2;
+
+ for (ip = start + 1; ip < p->nr_insn; ip++) {
+ struct brw_instruction *insn = &p->store[ip];
+
+ if (insn->header.opcode == BRW_OPCODE_WHILE) {
+ if (ip + insn->bits1.branch_gen6.jump_count / br < start)
+ return ip;
+ }
+ }
+ assert(!"not reached");
+ return start + 1;
+}
+
+/* After program generation, go back and update the UIP and JIP of
+ * BREAK and CONT instructions to their correct locations.
+ */
+void
+brw_set_uip_jip(struct brw_compile *p)
+{
+ struct intel_context *intel = &p->brw->intel;
+ int ip;
+ int br = 2;
+
+ if (intel->gen < 6)
+ return;
+
+ for (ip = 0; ip < p->nr_insn; ip++) {
+ struct brw_instruction *insn = &p->store[ip];
+
+ switch (insn->header.opcode) {
+ case BRW_OPCODE_BREAK:
+ insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
+ insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip + 1);
+ break;
+ case BRW_OPCODE_CONTINUE:
+ /* JIP is set at CONTINUE emit time, since that's when we
+ * know where the start of the loop is.
+ */
+ insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
+ assert(insn->bits3.break_cont.uip != 0);
+ assert(insn->bits3.break_cont.jip != 0);
+ break;
+ }
+ }
+}
+
void brw_ff_sync(struct brw_compile *p,
struct brw_reg dest,
GLuint msg_reg_nr,
@@ -2013,7 +2155,7 @@ void brw_ff_sync(struct brw_compile *p,
}
insn = next_insn(p, BRW_OPCODE_SEND);
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src0);
brw_set_src1(insn, brw_imm_d(0));
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 283d5aad49..4eead32cbb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -101,10 +101,12 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
do_mat_op_to_vec(shader->ir);
- do_mod_to_fract(shader->ir);
- do_div_to_mul_rcp(shader->ir);
- do_sub_to_add_neg(shader->ir);
- do_explog_to_explog2(shader->ir);
+ lower_instructions(shader->ir,
+ MOD_TO_FRACT |
+ DIV_TO_MUL_RCP |
+ SUB_TO_ADD_NEG |
+ EXP_TO_EXP2 |
+ LOG_TO_LOG2);
do_lower_texture_projection(shader->ir);
brw_do_cubemap_normalize(shader->ir);
@@ -130,6 +132,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
GL_TRUE, /* temp */
GL_TRUE /* uniform */
) || progress;
+ progress = lower_quadop_vector(shader->ir, false) || progress;
} while (progress);
validate_ir_tree(shader->ir);
@@ -174,6 +177,46 @@ type_size(const struct glsl_type *type)
}
}
+/**
+ * Returns how many MRFs an FS opcode will write over.
+ *
+ * Note that this is not the 0 or 1 implied writes in an actual gen
+ * instruction -- the FS opcodes often generate MOVs in addition.
+ */
+int
+fs_visitor::implied_mrf_writes(fs_inst *inst)
+{
+ if (inst->mlen == 0)
+ return 0;
+
+ switch (inst->opcode) {
+ case FS_OPCODE_RCP:
+ case FS_OPCODE_RSQ:
+ case FS_OPCODE_SQRT:
+ case FS_OPCODE_EXP2:
+ case FS_OPCODE_LOG2:
+ case FS_OPCODE_SIN:
+ case FS_OPCODE_COS:
+ return 1;
+ case FS_OPCODE_POW:
+ return 2;
+ case FS_OPCODE_TEX:
+ case FS_OPCODE_TXB:
+ case FS_OPCODE_TXL:
+ return 1;
+ case FS_OPCODE_FB_WRITE:
+ return 2;
+ case FS_OPCODE_PULL_CONSTANT_LOAD:
+ case FS_OPCODE_UNSPILL:
+ return 1;
+ case FS_OPCODE_SPILL:
+ return 2;
+ default:
+ assert(!"not reached");
+ return inst->mlen;
+ }
+}
+
int
fs_visitor::virtual_grf_alloc(int size)
{
@@ -299,6 +342,10 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
case GLSL_TYPE_BOOL:
c->prog_data.param_convert[param] = PARAM_CONVERT_F2B;
break;
+ default:
+ assert(!"not reached");
+ c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
+ break;
}
c->prog_data.param[param] = &vec_values[i];
@@ -400,6 +447,7 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
fs_reg wpos = *reg;
fs_reg neg_y = this->pixel_y;
neg_y.negate = true;
+ bool flip = !ir->origin_upper_left ^ c->key.render_to_fbo;
/* gl_FragCoord.x */
if (ir->pixel_center_integer) {
@@ -410,13 +458,13 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
wpos.reg_offset++;
/* gl_FragCoord.y */
- if (ir->origin_upper_left && ir->pixel_center_integer) {
+ if (!flip && ir->pixel_center_integer) {
emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y));
} else {
fs_reg pixel_y = this->pixel_y;
float offset = (ir->pixel_center_integer ? 0.0 : 0.5);
- if (!ir->origin_upper_left) {
+ if (flip) {
pixel_y.negate = true;
offset += c->key.drawable_height - 1.0;
}
@@ -426,8 +474,13 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
wpos.reg_offset++;
/* gl_FragCoord.z */
- emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
- interp_reg(FRAG_ATTRIB_WPOS, 2)));
+ if (intel->gen >= 6) {
+ emit(fs_inst(BRW_OPCODE_MOV, wpos,
+ fs_reg(brw_vec8_grf(c->source_depth_reg, 0))));
+ } else {
+ emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
+ interp_reg(FRAG_ATTRIB_WPOS, 2)));
+ }
wpos.reg_offset++;
/* gl_FragCoord.w: Already set up in emit_interpolation */
@@ -552,8 +605,13 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
* might be able to do better by doing execsize = 1 math and then
* expanding that result out, but we would need to be careful with
* masking.
+ *
+ * The hardware ignores source modifiers (negate and abs) on math
+ * instructions, so we also move to a temp to set those up.
*/
- if (intel->gen >= 6 && src.file == UNIFORM) {
+ if (intel->gen >= 6 && (src.file == UNIFORM ||
+ src.abs ||
+ src.negate)) {
fs_reg expanded = fs_reg(this, glsl_type::float_type);
emit(fs_inst(BRW_OPCODE_MOV, expanded, src));
src = expanded;
@@ -696,6 +754,27 @@ fs_visitor::visit(ir_dereference_array *ir)
}
}
+/* Instruction selection: Produce a MOV.sat instead of
+ * MIN(MAX(val, 0), 1) when possible.
+ */
+bool
+fs_visitor::try_emit_saturate(ir_expression *ir)
+{
+ ir_rvalue *sat_val = ir->as_rvalue_to_saturate();
+
+ if (!sat_val)
+ return false;
+
+ sat_val->accept(this);
+ fs_reg src = this->result;
+
+ this->result = fs_reg(this, ir->type);
+ fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, src));
+ inst->saturate = true;
+
+ return true;
+}
+
void
fs_visitor::visit(ir_expression *ir)
{
@@ -703,6 +782,11 @@ fs_visitor::visit(ir_expression *ir)
fs_reg op[2], temp;
fs_inst *inst;
+ assert(ir->get_num_operands() <= 2);
+
+ if (try_emit_saturate(ir))
+ return;
+
for (operand = 0; operand < ir->get_num_operands(); operand++) {
ir->operands[operand]->accept(this);
if (this->result.file == BAD_FILE) {
@@ -773,9 +857,11 @@ fs_visitor::visit(ir_expression *ir)
assert(!"not reached: should be handled by ir_explog_to_explog2");
break;
case ir_unop_sin:
+ case ir_unop_sin_reduced:
emit_math(FS_OPCODE_SIN, this->result, op[0]);
break;
case ir_unop_cos:
+ case ir_unop_cos_reduced:
emit_math(FS_OPCODE_COS, this->result, op[0]);
break;
@@ -849,7 +935,6 @@ fs_visitor::visit(ir_expression *ir)
break;
case ir_binop_dot:
- case ir_binop_cross:
case ir_unop_any:
assert(!"not reached: should be handled by brw_fs_channel_expressions");
break;
@@ -858,6 +943,10 @@ fs_visitor::visit(ir_expression *ir)
assert(!"not reached: should be handled by lower_noise");
break;
+ case ir_quadop_vector:
+ assert(!"not reached: should be handled by lower_quadop_vector");
+ break;
+
case ir_unop_sqrt:
emit_math(FS_OPCODE_SQRT, this->result, op[0]);
break;
@@ -1348,28 +1437,70 @@ fs_visitor::visit(ir_discard *ir)
void
fs_visitor::visit(ir_constant *ir)
{
- fs_reg reg(this, ir->type);
- this->result = reg;
+ /* Set this->result to reg at the bottom of the function because some code
+ * paths will cause this visitor to be applied to other fields. This will
+ * cause the value stored in this->result to be modified.
+ *
+ * Make reg constant so that it doesn't get accidentally modified along the
+ * way. Yes, I actually had this problem. :(
+ */
+ const fs_reg reg(this, ir->type);
+ fs_reg dst_reg = reg;
- for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
- switch (ir->type->base_type) {
- case GLSL_TYPE_FLOAT:
- emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
- break;
- case GLSL_TYPE_UINT:
- emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
- break;
- case GLSL_TYPE_INT:
- emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
- break;
- case GLSL_TYPE_BOOL:
- emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
- break;
- default:
- assert(!"Non-float/uint/int/bool constant");
+ if (ir->type->is_array()) {
+ const unsigned size = type_size(ir->type->fields.array);
+
+ for (unsigned i = 0; i < ir->type->length; i++) {
+ ir->array_elements[i]->accept(this);
+ fs_reg src_reg = this->result;
+
+ dst_reg.type = src_reg.type;
+ for (unsigned j = 0; j < size; j++) {
+ emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg));
+ src_reg.reg_offset++;
+ dst_reg.reg_offset++;
+ }
+ }
+ } else if (ir->type->is_record()) {
+ foreach_list(node, &ir->components) {
+ ir_instruction *const field = (ir_instruction *) node;
+ const unsigned size = type_size(field->type);
+
+ field->accept(this);
+ fs_reg src_reg = this->result;
+
+ dst_reg.type = src_reg.type;
+ for (unsigned j = 0; j < size; j++) {
+ emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg));
+ src_reg.reg_offset++;
+ dst_reg.reg_offset++;
+ }
+ }
+ } else {
+ const unsigned size = type_size(ir->type);
+
+ for (unsigned i = 0; i < size; i++) {
+ switch (ir->type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i])));
+ break;
+ case GLSL_TYPE_UINT:
+ emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i])));
+ break;
+ case GLSL_TYPE_INT:
+ emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i])));
+ break;
+ case GLSL_TYPE_BOOL:
+ emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i])));
+ break;
+ default:
+ assert(!"Non-float/uint/int/bool constant");
+ }
+ dst_reg.reg_offset++;
}
- reg.reg_offset++;
}
+
+ this->result = reg;
}
void
@@ -1381,6 +1512,7 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
fs_reg op[2];
fs_inst *inst;
+ assert(expr->get_num_operands() <= 2);
for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
assert(expr->operands[i]->type->is_scalar());
@@ -1488,6 +1620,7 @@ fs_visitor::emit_if_gen6(ir_if *ir)
fs_inst *inst;
fs_reg temp;
+ assert(expr->get_num_operands() <= 2);
for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
assert(expr->operands[i]->type->is_scalar());
@@ -1497,7 +1630,7 @@ fs_visitor::emit_if_gen6(ir_if *ir)
switch (expr->operation) {
case ir_unop_logic_not:
- inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(1)));
+ inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(0)));
inst->conditional_mod = BRW_CONDITIONAL_Z;
return;
@@ -1874,7 +2007,7 @@ fs_visitor::emit_interpolation_setup_gen6()
emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y));
this->current_annotation = "compute 1/pos.w";
- this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0));
+ this->wpos_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0));
this->pixel_w = fs_reg(this, glsl_type::float_type);
emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w);
@@ -1902,17 +2035,17 @@ fs_visitor::emit_fb_writes()
nr += 2;
}
- if (c->key.aa_dest_stencil_reg) {
+ if (c->aa_dest_stencil_reg) {
emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
- fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0))));
+ fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0))));
}
/* Reserve space for color. It'll be filled in per MRT below. */
int color_mrf = nr;
nr += 4;
- if (c->key.source_depth_to_render_target) {
- if (c->key.computes_depth) {
+ if (c->source_depth_to_render_target) {
+ if (c->computes_depth) {
/* Hand over gl_FragDepth. */
assert(this->frag_depth);
fs_reg depth = *(variable_storage(this->frag_depth));
@@ -1921,20 +2054,22 @@ fs_visitor::emit_fb_writes()
} else {
/* Pass through the payload depth. */
emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
- fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0))));
+ fs_reg(brw_vec8_grf(c->source_depth_reg, 0))));
}
}
- if (c->key.dest_depth_reg) {
+ if (c->dest_depth_reg) {
emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
- fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0))));
+ fs_reg(brw_vec8_grf(c->dest_depth_reg, 0))));
}
fs_reg color = reg_undef;
if (this->frag_color)
color = *(variable_storage(this->frag_color));
- else if (this->frag_data)
+ else if (this->frag_data) {
color = *(variable_storage(this->frag_data));
+ color.type = BRW_REGISTER_TYPE_F;
+ }
for (int target = 0; target < c->key.nr_color_regions; target++) {
this->current_annotation = talloc_asprintf(this->mem_ctx,
@@ -2375,7 +2510,7 @@ fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
void
fs_visitor::assign_curb_setup()
{
- c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
+ c->prog_data.first_curbe_grf = c->nr_payload_regs;
c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
/* Map the offsets in the UNIFORM file to fixed HW regs. */
@@ -2515,6 +2650,7 @@ fs_visitor::split_virtual_grfs()
for (int j = 2; j < this->virtual_grf_sizes[i]; j++) {
int reg = virtual_grf_alloc(1);
assert(reg == new_virtual_grf[i] + j - 1);
+ (void) reg;
}
this->virtual_grf_sizes[i] = 1;
}
@@ -2768,6 +2904,7 @@ fs_visitor::propagate_constants()
}
break;
case BRW_OPCODE_CMP:
+ case BRW_OPCODE_SEL:
if (i == 1) {
scan_inst->src[i] = inst->src[0];
progress = true;
@@ -2796,26 +2933,17 @@ bool
fs_visitor::dead_code_eliminate()
{
bool progress = false;
- int num_vars = this->virtual_grf_next;
- bool dead[num_vars];
-
- for (int i = 0; i < num_vars; i++) {
- dead[i] = this->virtual_grf_def[i] >= this->virtual_grf_use[i];
-
- if (dead[i]) {
- /* Mark off its interval so it won't interfere with anything. */
- this->virtual_grf_def[i] = -1;
- this->virtual_grf_use[i] = -1;
- }
- }
+ int pc = 0;
foreach_iter(exec_list_iterator, iter, this->instructions) {
fs_inst *inst = (fs_inst *)iter.get();
- if (inst->dst.file == GRF && dead[inst->dst.reg]) {
+ if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) {
inst->remove();
progress = true;
}
+
+ pc++;
}
return progress;
@@ -2933,11 +3061,60 @@ fs_visitor::compute_to_mrf()
/* Found a move of a GRF to a MRF. Let's see if we can go
* rewrite the thing that made this GRF to write into the MRF.
*/
- bool found = false;
fs_inst *scan_inst;
for (scan_inst = (fs_inst *)inst->prev;
scan_inst->prev != NULL;
scan_inst = (fs_inst *)scan_inst->prev) {
+ if (scan_inst->dst.file == GRF &&
+ scan_inst->dst.reg == inst->src[0].reg) {
+ /* Found the last thing to write our reg we want to turn
+ * into a compute-to-MRF.
+ */
+
+ if (scan_inst->opcode == FS_OPCODE_TEX) {
+ /* texturing writes several continuous regs, so we can't
+ * compute-to-mrf that.
+ */
+ break;
+ }
+
+ /* If it's predicated, it (probably) didn't populate all
+ * the channels.
+ */
+ if (scan_inst->predicated)
+ break;
+
+ /* SEND instructions can't have MRF as a destination. */
+ if (scan_inst->mlen)
+ break;
+
+ if (intel->gen >= 6) {
+ /* gen6 math instructions must have the destination be
+ * GRF, so no compute-to-MRF for them.
+ */
+ if (scan_inst->opcode == FS_OPCODE_RCP ||
+ scan_inst->opcode == FS_OPCODE_RSQ ||
+ scan_inst->opcode == FS_OPCODE_SQRT ||
+ scan_inst->opcode == FS_OPCODE_EXP2 ||
+ scan_inst->opcode == FS_OPCODE_LOG2 ||
+ scan_inst->opcode == FS_OPCODE_SIN ||
+ scan_inst->opcode == FS_OPCODE_COS ||
+ scan_inst->opcode == FS_OPCODE_POW) {
+ break;
+ }
+ }
+
+ if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
+ /* Found the creator of our MRF's source value. */
+ scan_inst->dst.file = MRF;
+ scan_inst->dst.hw_reg = inst->dst.hw_reg;
+ scan_inst->saturate |= inst->saturate;
+ inst->remove();
+ progress = true;
+ }
+ break;
+ }
+
/* We don't handle flow control here. Most computation of
* values that end up in MRFs are shortly before the MRF
* write anyway.
@@ -2971,71 +3148,88 @@ fs_visitor::compute_to_mrf()
}
if (scan_inst->mlen > 0) {
- /* Found a SEND instruction, which will do some amount of
- * implied write that may overwrite our MRF that we were
- * hoping to compute-to-MRF somewhere above it. Nothing
- * we have implied-writes more than 2 MRFs from base_mrf,
- * though.
+ /* Found a SEND instruction, which means that there are
+ * live values in MRFs from base_mrf to base_mrf +
+ * scan_inst->mlen - 1. Don't go pushing our MRF write up
+ * above it.
*/
- int implied_write_len = MIN2(scan_inst->mlen, 2);
if (inst->dst.hw_reg >= scan_inst->base_mrf &&
- inst->dst.hw_reg < scan_inst->base_mrf + implied_write_len) {
+ inst->dst.hw_reg < scan_inst->base_mrf + scan_inst->mlen) {
break;
}
}
+ }
+ }
- if (scan_inst->dst.file == GRF &&
- scan_inst->dst.reg == inst->src[0].reg) {
- /* Found the last thing to write our reg we want to turn
- * into a compute-to-MRF.
- */
+ return progress;
+}
- if (scan_inst->opcode == FS_OPCODE_TEX) {
- /* texturing writes several continuous regs, so we can't
- * compute-to-mrf that.
- */
- break;
- }
+/**
+ * Walks through basic blocks, locking for repeated MRF writes and
+ * removing the later ones.
+ */
+bool
+fs_visitor::remove_duplicate_mrf_writes()
+{
+ fs_inst *last_mrf_move[16];
+ bool progress = false;
- /* If it's predicated, it (probably) didn't populate all
- * the channels.
- */
- if (scan_inst->predicated)
- break;
+ memset(last_mrf_move, 0, sizeof(last_mrf_move));
- /* SEND instructions can't have MRF as a destination. */
- if (scan_inst->mlen)
- break;
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ fs_inst *inst = (fs_inst *)iter.get();
- if (intel->gen >= 6) {
- /* gen6 math instructions must have the destination be
- * GRF, so no compute-to-MRF for them.
- */
- if (scan_inst->opcode == FS_OPCODE_RCP ||
- scan_inst->opcode == FS_OPCODE_RSQ ||
- scan_inst->opcode == FS_OPCODE_SQRT ||
- scan_inst->opcode == FS_OPCODE_EXP2 ||
- scan_inst->opcode == FS_OPCODE_LOG2 ||
- scan_inst->opcode == FS_OPCODE_SIN ||
- scan_inst->opcode == FS_OPCODE_COS ||
- scan_inst->opcode == FS_OPCODE_POW) {
- break;
- }
- }
+ switch (inst->opcode) {
+ case BRW_OPCODE_DO:
+ case BRW_OPCODE_WHILE:
+ case BRW_OPCODE_IF:
+ case BRW_OPCODE_ELSE:
+ case BRW_OPCODE_ENDIF:
+ memset(last_mrf_move, 0, sizeof(last_mrf_move));
+ continue;
+ default:
+ break;
+ }
- if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
- /* Found the creator of our MRF's source value. */
- found = true;
- break;
+ if (inst->opcode == BRW_OPCODE_MOV &&
+ inst->dst.file == MRF) {
+ fs_inst *prev_inst = last_mrf_move[inst->dst.hw_reg];
+ if (prev_inst && inst->equals(prev_inst)) {
+ inst->remove();
+ progress = true;
+ continue;
+ }
+ }
+
+ /* Clear out the last-write records for MRFs that were overwritten. */
+ if (inst->dst.file == MRF) {
+ last_mrf_move[inst->dst.hw_reg] = NULL;
+ }
+
+ if (inst->mlen > 0) {
+ /* Found a SEND instruction, which will include two of fewer
+ * implied MRF writes. We could do better here.
+ */
+ for (int i = 0; i < implied_mrf_writes(inst); i++) {
+ last_mrf_move[inst->base_mrf + i] = NULL;
+ }
+ }
+
+ /* Clear out any MRF move records whose sources got overwritten. */
+ if (inst->dst.file == GRF) {
+ for (unsigned int i = 0; i < Elements(last_mrf_move); i++) {
+ if (last_mrf_move[i] &&
+ last_mrf_move[i]->src[0].reg == inst->dst.reg) {
+ last_mrf_move[i] = NULL;
}
}
}
- if (found) {
- scan_inst->dst.file = MRF;
- scan_inst->dst.hw_reg = inst->dst.hw_reg;
- scan_inst->saturate |= inst->saturate;
- inst->remove();
- progress = true;
+
+ if (inst->opcode == BRW_OPCODE_MOV &&
+ inst->dst.file == MRF &&
+ inst->src[0].file == GRF &&
+ !inst->predicated) {
+ last_mrf_move[inst->dst.hw_reg] = inst;
}
}
@@ -3091,6 +3285,7 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
break;
default:
assert(!"not reached");
+ brw_reg = brw_null_reg();
break;
}
break;
@@ -3105,6 +3300,10 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
assert(!"not reached");
brw_reg = brw_null_reg();
break;
+ default:
+ assert(!"not reached");
+ brw_reg = brw_null_reg();
+ break;
}
if (reg->abs)
brw_reg = brw_abs(brw_reg);
@@ -3159,6 +3358,7 @@ fs_visitor::generate_code()
brw_set_conditionalmod(p, inst->conditional_mod);
brw_set_predicate_control(p, inst->predicated);
+ brw_set_saturate(p, inst->saturate);
switch (inst->opcode) {
case BRW_OPCODE_MOV:
@@ -3245,7 +3445,11 @@ fs_visitor::generate_code()
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
case BRW_OPCODE_CONTINUE:
- brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
+ /* FINISHME: We need to write the loop instruction support still. */
+ if (intel->gen >= 6)
+ brw_CONT_gen6(p, loop_stack[loop_stack_depth - 1]);
+ else
+ brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
@@ -3259,16 +3463,18 @@ fs_visitor::generate_code()
assert(loop_stack_depth > 0);
loop_stack_depth--;
inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
- /* patch all the BREAK/CONT instructions from last BGNLOOP */
- while (inst0 > loop_stack[loop_stack_depth]) {
- inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK &&
- inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+ if (intel->gen < 6) {
+ /* patch all the BREAK/CONT instructions from last BGNLOOP */
+ while (inst0 > loop_stack[loop_stack_depth]) {
+ inst0--;
+ if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+ inst0->bits3.if_else.jump_count == 0) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
}
- else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
- inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+ inst0->bits3.if_else.jump_count == 0) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ }
}
}
}
@@ -3340,12 +3546,31 @@ fs_visitor::generate_code()
((uint32_t *)&p->store[i])[0]);
}
brw_disasm(stdout, &p->store[i], intel->gen);
- printf("\n");
}
}
last_native_inst = p->nr_insn;
}
+
+ brw_set_uip_jip(p);
+
+ /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS
+ * emit issues, it doesn't get the jump distances into the output,
+ * which is often something we want to debug. So this is here in
+ * case you're doing that.
+ */
+ if (0) {
+ if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
+ for (unsigned int i = 0; i < p->nr_insn; i++) {
+ printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+ ((uint32_t *)&p->store[i])[3],
+ ((uint32_t *)&p->store[i])[2],
+ ((uint32_t *)&p->store[i])[1],
+ ((uint32_t *)&p->store[i])[0]);
+ brw_disasm(stdout, &p->store[i], intel->gen);
+ }
+ }
+ }
}
GLboolean
@@ -3410,6 +3635,9 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
bool progress;
do {
progress = false;
+
+ progress = v.remove_duplicate_mrf_writes() || progress;
+
v.calculate_live_intervals();
progress = v.propagate_constants() || progress;
progress = v.register_coalesce() || progress;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 9b7fcde858..de7b15312a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -96,10 +96,7 @@ public:
void init()
{
- this->reg = 0;
- this->reg_offset = 0;
- this->negate = 0;
- this->abs = 0;
+ memset(this, 0, sizeof(*this));
this->hw_reg = -1;
this->smear = -1;
}
@@ -151,6 +148,21 @@ public:
fs_reg(enum register_file file, int hw_reg, uint32_t type);
fs_reg(class fs_visitor *v, const struct glsl_type *type);
+ bool equals(fs_reg *r)
+ {
+ return (file == r->file &&
+ reg == r->reg &&
+ reg_offset == r->reg_offset &&
+ hw_reg == r->hw_reg &&
+ type == r->type &&
+ negate == r->negate &&
+ abs == r->abs &&
+ memcmp(&fixed_hw_reg, &r->fixed_hw_reg,
+ sizeof(fixed_hw_reg)) == 0 &&
+ smear == r->smear &&
+ imm.u == r->imm.u);
+ }
+
/** Register file: ARF, GRF, MRF, IMM. */
enum register_file file;
/** virtual register number. 0 = fixed hw reg */
@@ -174,6 +186,10 @@ public:
} imm;
};
+static const fs_reg reg_undef;
+static const fs_reg reg_null_f(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_F);
+static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D);
+
class fs_inst : public exec_node {
public:
/* Callers of this talloc-based new need not call delete. It's
@@ -190,18 +206,14 @@ public:
void init()
{
+ memset(this, 0, sizeof(*this));
this->opcode = BRW_OPCODE_NOP;
- this->saturate = false;
this->conditional_mod = BRW_CONDITIONAL_NONE;
- this->predicated = false;
- this->sampler = 0;
- this->target = 0;
- this->eot = false;
- this->header_present = false;
- this->shadow_compare = false;
- this->mlen = 0;
- this->base_mrf = 0;
- this->offset = 0;
+
+ this->dst = reg_undef;
+ this->src[0] = reg_undef;
+ this->src[1] = reg_undef;
+ this->src[2] = reg_undef;
}
fs_inst()
@@ -273,6 +285,26 @@ public:
assert(src[2].reg_offset >= 0);
}
+ bool equals(fs_inst *inst)
+ {
+ return (opcode == inst->opcode &&
+ dst.equals(&inst->dst) &&
+ src[0].equals(&inst->src[0]) &&
+ src[1].equals(&inst->src[1]) &&
+ src[2].equals(&inst->src[2]) &&
+ saturate == inst->saturate &&
+ predicated == inst->predicated &&
+ conditional_mod == inst->conditional_mod &&
+ mlen == inst->mlen &&
+ base_mrf == inst->base_mrf &&
+ sampler == inst->sampler &&
+ target == inst->target &&
+ eot == inst->eot &&
+ header_present == inst->header_present &&
+ shadow_compare == inst->shadow_compare &&
+ offset == inst->offset);
+ }
+
int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
fs_reg dst;
fs_reg src[3];
@@ -375,6 +407,7 @@ public:
bool register_coalesce();
bool compute_to_mrf();
bool dead_code_eliminate();
+ bool remove_duplicate_mrf_writes();
bool virtual_grf_interferes(int a, int b);
void generate_code();
void generate_fb_write(fs_inst *inst);
@@ -400,6 +433,7 @@ public:
fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate);
fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0);
fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0, fs_reg src1);
+ bool try_emit_saturate(ir_expression *ir);
void emit_bool_to_cond_code(ir_rvalue *condition);
void emit_if_gen6(ir_if *ir);
void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset);
@@ -411,6 +445,7 @@ public:
struct brw_reg interp_reg(int location, int channel);
int setup_uniform_values(int loc, const glsl_type *type);
void setup_builtin_uniform_values(ir_variable *ir);
+ int implied_mrf_writes(fs_inst *inst);
struct brw_context *brw;
const struct gl_fragment_program *fp;
@@ -454,9 +489,5 @@ public:
int grf_used;
};
-static const fs_reg reg_undef;
-static const fs_reg reg_null_f(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_F);
-static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D);
-
GLboolean brw_do_channel_expressions(struct exec_list *instructions);
GLboolean brw_do_vector_splitting(struct exec_list *instructions);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
index 2a6da4058b..20bfa4c3ea 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -205,6 +205,8 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
case ir_unop_round_even:
case ir_unop_sin:
case ir_unop_cos:
+ case ir_unop_sin_reduced:
+ case ir_unop_cos_reduced:
case ir_unop_dFdx:
case ir_unop_dFdy:
for (i = 0; i < vector_elements; i++) {
@@ -288,34 +290,6 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
break;
}
- case ir_binop_cross: {
- for (i = 0; i < vector_elements; i++) {
- int swiz0 = (i + 1) % 3;
- int swiz1 = (i + 2) % 3;
- ir_expression *temp1, *temp2;
-
- temp1 = new(mem_ctx) ir_expression(ir_binop_mul,
- element_type,
- get_element(op_var[0], swiz0),
- get_element(op_var[1], swiz1));
-
- temp2 = new(mem_ctx) ir_expression(ir_binop_mul,
- element_type,
- get_element(op_var[1], swiz0),
- get_element(op_var[0], swiz1));
-
- temp2 = new(mem_ctx) ir_expression(ir_unop_neg,
- element_type,
- temp2,
- NULL);
-
- assign(ir, i, new(mem_ctx) ir_expression(ir_binop_add,
- element_type,
- temp1, temp2));
- }
- break;
- }
-
case ir_binop_logic_and:
case ir_binop_logic_xor:
case ir_binop_logic_or:
@@ -356,6 +330,9 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
case ir_unop_noise:
assert(!"noise should have been broken down to function call");
break;
+ case ir_quadop_vector:
+ assert(!"should have been lowered");
+ break;
}
ir->remove();
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index b0c76f4094..73b41fdbce 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -166,6 +166,9 @@ static void populate_key( struct brw_context *brw,
struct brw_gs_prog_key *key )
{
struct gl_context *ctx = &brw->intel.ctx;
+ struct intel_context *intel = &brw->intel;
+ int prim_gs_always;
+
memset(key, 0, sizeof(*key));
/* CACHE_NEW_VS_PROG */
@@ -185,10 +188,14 @@ static void populate_key( struct brw_context *brw,
key->pv_first = GL_TRUE;
}
- key->need_gs_prog = (key->hint_gs_always ||
- brw->primitive == GL_QUADS ||
+ if (intel->gen == 6)
+ prim_gs_always = brw->primitive == GL_LINE_LOOP;
+ else
+ prim_gs_always = brw->primitive == GL_QUADS ||
brw->primitive == GL_QUAD_STRIP ||
- brw->primitive == GL_LINE_LOOP);
+ brw->primitive == GL_LINE_LOOP;
+
+ key->need_gs_prog = (key->hint_gs_always || prim_gs_always);
}
/* Calculate interpolants for triangle and line rasterization.
@@ -205,8 +212,10 @@ static void prepare_gs_prog(struct brw_context *brw)
brw->gs.prog_active = key.need_gs_prog;
}
+ drm_intel_bo_unreference(brw->gs.prog_bo);
+ brw->gs.prog_bo = NULL;
+
if (brw->gs.prog_active) {
- drm_intel_bo_unreference(brw->gs.prog_bo);
brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG,
&key, sizeof(key),
NULL, 0,
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 1d350bc041..a91b0528fa 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -38,40 +38,6 @@
#include "brw_state.h"
#include "brw_defines.h"
-
-
-
-
-/***********************************************************************
- * Blend color
- */
-
-static void upload_blend_constant_color(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->intel.ctx;
- struct brw_blend_constant_color bcc;
-
- memset(&bcc, 0, sizeof(bcc));
- bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR;
- bcc.header.length = sizeof(bcc)/4-2;
- bcc.blend_constant_color[0] = ctx->Color.BlendColor[0];
- bcc.blend_constant_color[1] = ctx->Color.BlendColor[1];
- bcc.blend_constant_color[2] = ctx->Color.BlendColor[2];
- bcc.blend_constant_color[3] = ctx->Color.BlendColor[3];
-
- BRW_CACHED_BATCH_STRUCT(brw, &bcc);
-}
-
-
-const struct brw_tracked_state brw_blend_constant_color = {
- .dirty = {
- .mesa = _NEW_COLOR,
- .brw = BRW_NEW_CONTEXT,
- .cache = 0
- },
- .emit = upload_blend_constant_color
-};
-
/* Constant single cliprect for framebuffer object or DRI2 drawing */
static void upload_drawing_rect(struct brw_context *brw)
{
@@ -339,6 +305,9 @@ static void upload_polygon_stipple(struct brw_context *brw)
struct brw_polygon_stipple bps;
GLuint i;
+ if (!ctx->Polygon.StippleFlag)
+ return;
+
memset(&bps, 0, sizeof(bps));
bps.header.opcode = CMD_POLY_STIPPLE_PATTERN;
bps.header.length = sizeof(bps)/4-2;
@@ -381,6 +350,9 @@ static void upload_polygon_stipple_offset(struct brw_context *brw)
struct gl_context *ctx = &brw->intel.ctx;
struct brw_polygon_stipple_offset bpso;
+ if (!ctx->Polygon.StippleFlag)
+ return;
+
memset(&bpso, 0, sizeof(bpso));
bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
bpso.header.length = sizeof(bpso)/4-2;
@@ -409,7 +381,7 @@ static void upload_polygon_stipple_offset(struct brw_context *brw)
const struct brw_tracked_state brw_polygon_stipple_offset = {
.dirty = {
- .mesa = _NEW_WINDOW_POS,
+ .mesa = _NEW_WINDOW_POS | _NEW_POLYGONSTIPPLE,
.brw = BRW_NEW_CONTEXT,
.cache = 0
},
@@ -421,9 +393,10 @@ const struct brw_tracked_state brw_polygon_stipple_offset = {
*/
static void upload_aa_line_parameters(struct brw_context *brw)
{
+ struct gl_context *ctx = &brw->intel.ctx;
struct brw_aa_line_parameters balp;
- if (!brw->has_aa_line_parameters)
+ if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
return;
/* use legacy aa line coverage computation */
@@ -436,7 +409,7 @@ static void upload_aa_line_parameters(struct brw_context *brw)
const struct brw_tracked_state brw_aa_line_parameters = {
.dirty = {
- .mesa = 0,
+ .mesa = _NEW_LINE,
.brw = BRW_NEW_CONTEXT,
.cache = 0
},
@@ -454,6 +427,9 @@ static void upload_line_stipple(struct brw_context *brw)
GLfloat tmp;
GLint tmpi;
+ if (!ctx->Line.StippleFlag)
+ return;
+
memset(&bls, 0, sizeof(bls));
bls.header.opcode = CMD_LINE_STIPPLE_PATTERN;
bls.header.length = sizeof(bls)/4 - 2;
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 1367d81469..94efa79109 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -142,7 +142,6 @@ static GLboolean brwProgramStringNotify( struct gl_context *ctx,
if (newFP == curFP)
brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
newFP->id = brw->program_id++;
- newFP->isGLSL = brw_wm_is_glsl(fprog);
/* Don't reject fragment shaders for their Mesa IR state when we're
* using the new FS backend.
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 3beed16945..4bb93e7336 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -164,7 +164,8 @@ void brw_destroy_caches( struct brw_context *brw );
/***********************************************************************
* brw_state_batch.c
*/
-#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)))
+#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data(brw->intel.batch, (s), \
+ sizeof(*(s)), false)
#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) )
GLboolean brw_cached_batch_struct( struct brw_context *brw,
diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c
index be3989eb7d..a21af13caa 100644
--- a/src/mesa/drivers/dri/i965/brw_state_batch.c
+++ b/src/mesa/drivers/dri/i965/brw_state_batch.c
@@ -48,7 +48,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
struct header *newheader = (struct header *)data;
if (brw->emit_state_always) {
- intel_batchbuffer_data(brw->intel.batch, data, sz);
+ intel_batchbuffer_data(brw->intel.batch, data, sz, false);
return GL_TRUE;
}
@@ -75,7 +75,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
emit:
memcpy(item->header, newheader, sz);
- intel_batchbuffer_data(brw->intel.batch, data, sz);
+ intel_batchbuffer_data(brw->intel.batch, data, sz, false);
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c
index b79b33c2e3..e262887471 100644
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -107,14 +107,14 @@ static void dump_wm_surface_state(struct brw_context *brw)
char name[20];
if (surf_bo == NULL) {
- fprintf(stderr, " WM SS%d: NULL\n", i);
+ fprintf(stderr, "WM SURF%d: NULL\n", i);
continue;
}
drm_intel_bo_map(surf_bo, GL_FALSE);
surfoff = surf_bo->offset + brw->wm.surf_offset[i];
surf = (struct brw_surface_state *)(surf_bo->virtual + brw->wm.surf_offset[i]);
- sprintf(name, "WM SS%d", i);
+ sprintf(name, "WM SURF%d", i);
state_out(name, surf, surfoff, 0, "%s %s\n",
get_965_surfacetype(surf->ss0.surface_type),
get_965_surface_format(surf->ss0.surface_format));
@@ -132,6 +132,53 @@ static void dump_wm_surface_state(struct brw_context *brw)
}
}
+
+static void dump_wm_sampler_state(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->intel.ctx;
+ int i;
+
+ if (!brw->wm.sampler_bo) {
+ fprintf(stderr, "WM_SAMPLER: NULL\n");
+ return;
+ }
+
+ drm_intel_bo_map(brw->wm.sampler_bo, GL_FALSE);
+ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+ unsigned int offset;
+ struct brw_sampler_state *samp;
+ struct brw_sampler_default_color *sdc;
+ char name[20];
+
+ if (!ctx->Texture.Unit[i]._ReallyEnabled) {
+ fprintf(stderr, "WM SAMP%d: disabled\n", i);
+ continue;
+ }
+
+ offset = brw->wm.sampler_bo->offset +
+ i * sizeof(struct brw_sampler_state);
+ samp = (struct brw_sampler_state *)(brw->wm.sampler_bo->virtual +
+ i * sizeof(struct brw_sampler_state));
+
+ sprintf(name, "WM SAMP%d", i);
+ state_out(name, samp, offset, 0, "filtering\n");
+ state_out(name, samp, offset, 1, "wrapping, lod\n");
+ state_out(name, samp, offset, 2, "default color pointer\n");
+ state_out(name, samp, offset, 3, "chroma key, aniso\n");
+
+ sprintf(name, " WM SDC%d", i);
+
+ drm_intel_bo_map(brw->wm.sdc_bo[i], GL_FALSE);
+ sdc = (struct brw_sampler_default_color *)(brw->wm.sdc_bo[i]->virtual);
+ state_out(name, sdc, brw->wm.sdc_bo[i]->offset, 0, "r\n");
+ state_out(name, sdc, brw->wm.sdc_bo[i]->offset, 1, "g\n");
+ state_out(name, sdc, brw->wm.sdc_bo[i]->offset, 2, "b\n");
+ state_out(name, sdc, brw->wm.sdc_bo[i]->offset, 3, "a\n");
+ drm_intel_bo_unmap(brw->wm.sdc_bo[i]);
+ }
+ drm_intel_bo_unmap(brw->wm.sampler_bo);
+}
+
static void dump_sf_viewport_state(struct brw_context *brw)
{
const char *name = "SF VP";
@@ -324,6 +371,7 @@ void brw_debug_batch(struct intel_context *intel)
state_struct_out("WM bind", brw->wm.bind_bo, 4 * brw->wm.nr_surfaces);
dump_wm_surface_state(brw);
+ dump_wm_sampler_state(brw);
if (intel->gen < 6)
state_struct_out("VS", brw->vs.state_bo, sizeof(struct brw_vs_unit_state));
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 338f3876b3..eba4411ca7 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -129,7 +129,7 @@ const struct brw_tracked_state *gen6_atoms[] =
&brw_vs_constants, /* Before vs_surfaces and constant_buffer */
&brw_wm_constants, /* Before wm_surfaces and constant_buffer */
- &gen6_wm_constants, /* Before wm_surfaces and constant_buffer */
+ &gen6_wm_constants, /* Before wm_state */
&brw_vs_surfaces, /* must do before unit */
&brw_wm_constant_surface, /* must do before wm surfaces/bind bo */
diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
index 8ce9af9c4f..461f27048c 100644
--- a/src/mesa/drivers/dri/i965/brw_structs.h
+++ b/src/mesa/drivers/dri/i965/brw_structs.h
@@ -1064,6 +1064,15 @@ struct brw_sampler_default_color {
GLfloat color[4];
};
+struct gen5_sampler_default_color {
+ uint8_t ub[4];
+ float f[4];
+ uint16_t hf[4];
+ uint16_t us[4];
+ int16_t s[4];
+ uint8_t b[4];
+};
+
struct brw_sampler_state
{
@@ -1169,7 +1178,12 @@ struct brw_surface_state
GLuint cube_neg_y:1;
GLuint cube_pos_x:1;
GLuint cube_neg_x:1;
- GLuint pad:4;
+ GLuint pad:2;
+ /* Required on gen6 for surfaces accessed through render cache messages.
+ */
+ GLuint render_cache_read_write:1;
+ /* Ironlake and newer: instead of replicating one of the texels */
+ GLuint cube_corner_average:1;
GLuint mipmap_layout_mode:1;
GLuint vert_line_stride_ofs:1;
GLuint vert_line_stride:1;
@@ -1539,6 +1553,21 @@ struct brw_instruction
GLuint pad0:12;
} if_else;
+ struct
+ {
+ /* Signed jump distance to the ip to jump to if all channels
+ * are disabled after the break or continue. It should point
+ * to the end of the innermost control flow block, as that's
+ * where some channel could get re-enabled.
+ */
+ int jip:16;
+
+ /* Signed jump distance to the location to resume execution
+ * of this channel if it's enabled for the break or continue.
+ */
+ int uip:16;
+ } break_cont;
+
struct {
GLuint function:4;
GLuint int_type:1;
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 4a41c7a517..6ae75d22c1 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -99,8 +99,8 @@ static void do_vs_prog( struct brw_context *brw,
(void) ctx;
aux_size = sizeof(c.prog_data);
- if (c.vp->use_const_buffer)
- aux_size += c.vp->program.Base.Parameters->NumParameters;
+ /* constant_map */
+ aux_size += c.vp->program.Base.Parameters->NumParameters;
drm_intel_bo_unreference(brw->vs.prog_bo);
brw->vs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_VS_PROG,
@@ -130,6 +130,7 @@ static void brw_upload_vs_prog(struct brw_context *brw)
key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
ctx->Polygon.BackMode != GL_FILL);
+ key.two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
/* _NEW_POINT */
if (ctx->Point.PointSprite) {
@@ -157,7 +158,7 @@ static void brw_upload_vs_prog(struct brw_context *brw)
*/
const struct brw_tracked_state brw_vs_prog = {
.dirty = {
- .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT,
+ .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT | _NEW_LIGHT,
.brw = BRW_NEW_VERTEX_PROGRAM,
.cache = 0
},
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 9338a6b7db..0b88cc1ec7 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -44,6 +44,7 @@ struct brw_vs_prog_key {
GLuint nr_userclip:4;
GLuint copy_edgeflag:1;
GLuint point_coord_replace:8;
+ GLuint two_side_color: 1;
};
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 7e43324a1f..326bb1e562 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -140,9 +140,13 @@ clear_current_const(struct brw_vs_compile *c)
static void brw_vs_alloc_regs( struct brw_vs_compile *c )
{
struct intel_context *intel = &c->func.brw->intel;
- GLuint i, reg = 0, mrf;
+ GLuint i, reg = 0, mrf, j;
int attributes_in_vue;
int first_reladdr_output;
+ int max_constant;
+ int constant = 0;
+ int vert_result_reoder[VERT_RESULT_MAX];
+ int bfc = 0;
/* Determine whether to use a real constant buffer or use a block
* of GRF registers for constants. The later is faster but only
@@ -181,62 +185,81 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
}
- /* Vertex program parameters from curbe:
+ /* Assign some (probably all) of the vertex program constants to
+ * the push constant buffer/CURBE.
+ *
+ * There's an obvious limit to the numer of push constants equal to
+ * the number of register available, and that number is smaller
+ * than the minimum maximum number of vertex program parameters, so
+ * support for pull constants is required if we overflow.
+ * Additionally, on gen6 the number of push constants is even
+ * lower.
+ *
+ * When there's relative addressing, we don't know what range of
+ * Mesa IR registers can be accessed. And generally, when relative
+ * addressing is used we also have too many constants to load them
+ * all as push constants. So, we'll just support relative
+ * addressing out of the pull constant buffers, and try to load as
+ * many statically-accessed constants into the push constant buffer
+ * as we can.
*/
- if (c->vp->use_const_buffer) {
- int max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries;
- int constant = 0;
-
- /* We've got more constants than we can load with the push
- * mechanism. This is often correlated with reladdr loads where
- * we should probably be using a pull mechanism anyway to avoid
- * excessive reading. However, the pull mechanism is slow in
- * general. So, we try to allocate as many non-reladdr-loaded
- * constants through the push buffer as we can before giving up.
- */
- memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters);
- for (i = 0;
- i < c->vp->program.Base.NumInstructions && constant < max_constant;
- i++) {
- struct prog_instruction *inst = &c->vp->program.Base.Instructions[i];
- int arg;
-
- for (arg = 0; arg < 3 && constant < max_constant; arg++) {
- if ((inst->SrcReg[arg].File != PROGRAM_STATE_VAR &&
- inst->SrcReg[arg].File != PROGRAM_CONSTANT &&
- inst->SrcReg[arg].File != PROGRAM_UNIFORM &&
- inst->SrcReg[arg].File != PROGRAM_ENV_PARAM &&
- inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) ||
- inst->SrcReg[arg].RelAddr)
- continue;
-
- if (c->constant_map[inst->SrcReg[arg].Index] == -1) {
- c->constant_map[inst->SrcReg[arg].Index] = constant++;
- }
+ if (intel->gen >= 6) {
+ /* We can only load 32 regs of push constants. */
+ max_constant = 32 * 2 - c->key.nr_userclip;
+ } else {
+ max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries;
+ }
+
+ /* constant_map maps from ParameterValues[] index to index in the
+ * push constant buffer, or -1 if it's only in the pull constant
+ * buffer.
+ */
+ memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters);
+ for (i = 0;
+ i < c->vp->program.Base.NumInstructions && constant < max_constant;
+ i++) {
+ struct prog_instruction *inst = &c->vp->program.Base.Instructions[i];
+ int arg;
+
+ for (arg = 0; arg < 3 && constant < max_constant; arg++) {
+ if (inst->SrcReg[arg].File != PROGRAM_STATE_VAR &&
+ inst->SrcReg[arg].File != PROGRAM_CONSTANT &&
+ inst->SrcReg[arg].File != PROGRAM_UNIFORM &&
+ inst->SrcReg[arg].File != PROGRAM_ENV_PARAM &&
+ inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) {
+ continue;
}
- }
- for (i = 0; i < constant; i++) {
- c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2,
- (i%2) * 4),
- 0, 4, 1);
+ if (inst->SrcReg[arg].RelAddr) {
+ c->vp->use_const_buffer = GL_TRUE;
+ continue;
+ }
+
+ if (c->constant_map[inst->SrcReg[arg].Index] == -1) {
+ c->constant_map[inst->SrcReg[arg].Index] = constant++;
+ }
}
- reg += (constant + 1) / 2;
- c->prog_data.curb_read_length = reg - 1;
- /* XXX 0 causes a bug elsewhere... */
- c->prog_data.nr_params = MAX2(constant * 4, 4);
}
- else {
- /* use a section of the GRF for constants */
- GLuint nr_params = c->vp->program.Base.Parameters->NumParameters;
- for (i = 0; i < nr_params; i++) {
- c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
- }
- reg += (nr_params + 1) / 2;
- c->prog_data.curb_read_length = reg - 1;
- c->prog_data.nr_params = nr_params * 4;
+ /* If we ran out of push constant space, then we'll also upload all
+ * constants through the pull constant buffer so that they can be
+ * accessed no matter what. For relative addressing (the common
+ * case) we need them all in place anyway.
+ */
+ if (constant == max_constant)
+ c->vp->use_const_buffer = GL_TRUE;
+
+ for (i = 0; i < constant; i++) {
+ c->regs[PROGRAM_STATE_VAR][i] = stride(brw_vec4_grf(reg + i / 2,
+ (i % 2) * 4),
+ 0, 4, 1);
}
+ reg += (constant + 1) / 2;
+ c->prog_data.curb_read_length = reg - 1;
+ c->prog_data.nr_params = constant * 4;
+ /* XXX 0 causes a bug elsewhere... */
+ if (intel->gen < 6 && c->prog_data.nr_params == 0)
+ c->prog_data.nr_params = 4;
/* Allocate input regs:
*/
@@ -270,7 +293,36 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
mrf = 4;
first_reladdr_output = get_first_reladdr_output(&c->vp->program);
- for (i = 0; i < VERT_RESULT_MAX; i++) {
+
+ for (i = 0; i < VERT_RESULT_MAX; i++)
+ vert_result_reoder[i] = i;
+
+ /* adjust attribute order in VUE for BFC0/BFC1 on Gen6+ */
+ if (intel->gen >= 6 && c->key.two_side_color) {
+ if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL1)) &&
+ (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC1))) {
+ assert(c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0));
+ assert(c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0));
+ bfc = 2;
+ } else if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)) &&
+ (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0)))
+ bfc = 1;
+
+ if (bfc) {
+ for (i = 0; i < bfc; i++) {
+ vert_result_reoder[VERT_RESULT_COL0 + i * 2 + 0] = VERT_RESULT_COL0 + i;
+ vert_result_reoder[VERT_RESULT_COL0 + i * 2 + 1] = VERT_RESULT_BFC0 + i;
+ }
+
+ for (i = VERT_RESULT_COL0 + bfc * 2; i < VERT_RESULT_BFC0 + bfc; i++) {
+ vert_result_reoder[i] = i - bfc;
+ }
+ }
+ }
+
+ for (j = 0; j < VERT_RESULT_MAX; j++) {
+ i = vert_result_reoder[j];
+
if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) {
c->nr_outputs++;
assert(i < Elements(c->regs[PROGRAM_OUTPUT]));
@@ -281,7 +333,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
else if (i == VERT_RESULT_PSIZ) {
c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
reg++;
- mrf++; /* just a placeholder? XXX fix later stages & remove this */
}
else {
/* Two restrictions on our compute-to-MRF here. The
@@ -574,9 +625,18 @@ static void emit_max( struct brw_compile *p,
struct brw_reg arg0,
struct brw_reg arg1 )
{
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0, arg1);
- brw_SEL(p, dst, arg0, arg1);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ struct intel_context *intel = &p->brw->intel;
+
+ if (intel->gen >= 6) {
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_GE);
+ brw_SEL(p, dst, arg0, arg1);
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ } else {
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0, arg1);
+ brw_SEL(p, dst, arg0, arg1);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
}
static void emit_min( struct brw_compile *p,
@@ -584,9 +644,34 @@ static void emit_min( struct brw_compile *p,
struct brw_reg arg0,
struct brw_reg arg1 )
{
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
- brw_SEL(p, dst, arg0, arg1);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ struct intel_context *intel = &p->brw->intel;
+
+ if (intel->gen >= 6) {
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_SEL(p, dst, arg0, arg1);
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ } else {
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
+ brw_SEL(p, dst, arg0, arg1);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+}
+
+static void emit_arl(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ struct intel_context *intel = &p->brw->intel;
+
+ if (intel->gen >= 6) {
+ struct brw_reg dst_f = retype(dst, BRW_REGISTER_TYPE_F);
+
+ brw_RNDD(p, dst_f, src);
+ brw_MOV(p, dst, dst_f);
+ } else {
+ brw_RNDD(p, dst, src);
+ }
}
static void emit_math1_gen4(struct brw_vs_compile *c,
@@ -680,7 +765,7 @@ emit_math1(struct brw_vs_compile *c,
emit_math1_gen4(c, function, dst, arg0, precision);
}
-static void emit_math2( struct brw_vs_compile *c,
+static void emit_math2_gen4( struct brw_vs_compile *c,
GLuint function,
struct brw_reg dst,
struct brw_reg arg0,
@@ -688,14 +773,11 @@ static void emit_math2( struct brw_vs_compile *c,
GLuint precision)
{
struct brw_compile *p = &c->func;
- struct intel_context *intel = &p->brw->intel;
struct brw_reg tmp = dst;
GLboolean need_tmp = GL_FALSE;
- if (dst.file != BRW_GENERAL_REGISTER_FILE)
- need_tmp = GL_TRUE;
-
- if (intel->gen < 6 && dst.dw1.bits.writemask != 0xf)
+ if (dst.file != BRW_GENERAL_REGISTER_FILE ||
+ dst.dw1.bits.writemask != 0xf)
need_tmp = GL_TRUE;
if (need_tmp)
@@ -718,6 +800,53 @@ static void emit_math2( struct brw_vs_compile *c,
}
}
+static void emit_math2_gen6( struct brw_vs_compile *c,
+ GLuint function,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ GLuint precision)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp_src0, tmp_src1, tmp_dst;
+
+ tmp_src0 = get_tmp(c);
+ tmp_src1 = get_tmp(c);
+ tmp_dst = get_tmp(c);
+
+ brw_MOV(p, tmp_src0, arg0);
+ brw_MOV(p, tmp_src1, arg1);
+
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_math2(p,
+ tmp_dst,
+ function,
+ tmp_src0,
+ tmp_src1);
+ brw_set_access_mode(p, BRW_ALIGN_16);
+
+ brw_MOV(p, dst, tmp_dst);
+
+ release_tmp(c, tmp_src0);
+ release_tmp(c, tmp_src1);
+ release_tmp(c, tmp_dst);
+}
+
+static void emit_math2( struct brw_vs_compile *c,
+ GLuint function,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ GLuint precision)
+{
+ struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
+
+ if (intel->gen >= 6)
+ emit_math2_gen6(c, function, dst, arg0, arg1, precision);
+ else
+ emit_math2_gen4(c, function, dst, arg0, arg1, precision);
+}
static void emit_exp_noalias( struct brw_vs_compile *c,
struct brw_reg dst,
@@ -990,8 +1119,6 @@ get_constant(struct brw_vs_compile *c,
assert(argIndex < 3);
- assert(c->func.brw->intel.gen < 6); /* FINISHME */
-
if (c->current_const[argIndex].index != src->Index) {
/* Keep track of the last constant loaded in this slot, for reuse. */
c->current_const[argIndex].index = src->Index;
@@ -1022,14 +1149,14 @@ get_reladdr_constant(struct brw_vs_compile *c,
{
const struct prog_src_register *src = &inst->SrcReg[argIndex];
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
struct brw_reg const_reg = c->current_const[argIndex].reg;
- struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
- struct brw_reg byte_addr_reg = retype(get_tmp(c), BRW_REGISTER_TYPE_D);
+ struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
+ uint32_t offset;
assert(argIndex < 3);
- assert(c->func.brw->intel.gen < 6); /* FINISHME */
-
/* Can't reuse a reladdr constant load. */
c->current_const[argIndex].index = -1;
@@ -1038,15 +1165,21 @@ get_reladdr_constant(struct brw_vs_compile *c,
src->Index, argIndex, c->current_const[argIndex].reg.nr);
#endif
- brw_MUL(p, byte_addr_reg, addrReg, brw_imm_ud(16));
+ if (intel->gen >= 6) {
+ offset = src->Index;
+ } else {
+ struct brw_reg byte_addr_reg = retype(get_tmp(c), BRW_REGISTER_TYPE_D);
+ brw_MUL(p, byte_addr_reg, addr_reg, brw_imm_d(16));
+ addr_reg = byte_addr_reg;
+ offset = 16 * src->Index;
+ }
/* fetch the first vec4 */
brw_dp_READ_4_vs_relative(p,
- const_reg, /* writeback dest */
- byte_addr_reg, /* address register */
- 16 * src->Index, /* byte offset */
- SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
- );
+ const_reg,
+ addr_reg,
+ offset,
+ SURF_INDEX_VERT_CONST_BUFFER);
return const_reg;
}
@@ -1241,22 +1374,18 @@ get_src_reg( struct brw_vs_compile *c,
case PROGRAM_UNIFORM:
case PROGRAM_ENV_PARAM:
case PROGRAM_LOCAL_PARAM:
- if (c->vp->use_const_buffer) {
- if (!relAddr && c->constant_map[index] != -1) {
- assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0);
- return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]];
- } else if (relAddr)
+ if (!relAddr && c->constant_map[index] != -1) {
+ /* Take from the push constant buffer if possible. */
+ assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0);
+ return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]];
+ } else {
+ /* Must be in the pull constant buffer then .*/
+ assert(c->vp->use_const_buffer);
+ if (relAddr)
return get_reladdr_constant(c, inst, argIndex);
else
return get_constant(c, inst, argIndex);
}
- else if (relAddr) {
- return deref(c, c->regs[PROGRAM_STATE_VAR][0], index, 16);
- }
- else {
- assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
- return c->regs[PROGRAM_STATE_VAR][index];
- }
case PROGRAM_ADDRESS:
assert(index == 0);
return c->regs[file][index];
@@ -1585,6 +1714,8 @@ static void emit_vertex_write( struct brw_vs_compile *c)
break;
if (!(c->prog_data.outputs_written & BITFIELD64_BIT(i)))
continue;
+ if (i == VERT_RESULT_PSIZ)
+ continue;
if (i >= VERT_RESULT_TEX0 &&
c->regs[PROGRAM_OUTPUT][i].file == BRW_GENERAL_REGISTER_FILE) {
@@ -1848,7 +1979,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
break;
case OPCODE_ARL:
- brw_RNDD(p, dst, args[0]);
+ emit_arl(p, dst, args[0]);
break;
case OPCODE_FLR:
brw_RNDD(p, dst, args[0]);
@@ -1895,7 +2026,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
break;
case OPCODE_RSQ:
- emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
+ emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, brw_abs(args[0]), BRW_MATH_PRECISION_FULL);
break;
case OPCODE_SEQ:
@@ -1969,35 +2100,42 @@ void brw_vs_emit(struct brw_vs_compile *c )
break;
case OPCODE_CONT:
brw_set_predicate_control(p, get_predicate(inst));
- brw_CONT(p, if_depth_in_loop[loop_depth]);
+ if (intel->gen >= 6) {
+ brw_CONT_gen6(p, loop_inst[loop_depth - 1]);
+ } else {
+ brw_CONT(p, if_depth_in_loop[loop_depth]);
+ }
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
- case OPCODE_ENDLOOP:
- {
- clear_current_const(c);
- struct brw_instruction *inst0, *inst1;
- GLuint br = 1;
-
- loop_depth--;
-
- if (intel->gen == 5)
- br = 2;
-
- inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
- /* patch all the BREAK/CONT instructions from last BEGINLOOP */
- while (inst0 > loop_inst[loop_depth]) {
- inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+
+ case OPCODE_ENDLOOP: {
+ clear_current_const(c);
+ struct brw_instruction *inst0, *inst1;
+ GLuint br = 1;
+
+ loop_depth--;
+
+ if (intel->gen == 5)
+ br = 2;
+
+ inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
+
+ if (intel->gen < 6) {
+ /* patch all the BREAK/CONT instructions from last BEGINLOOP */
+ while (inst0 > loop_inst[loop_depth]) {
+ inst0--;
+ if (inst0->header.opcode == BRW_OPCODE_BREAK &&
inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
- }
- else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
- inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
- }
- }
- }
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+ } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+ inst0->bits3.if_else.jump_count == 0) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ }
+ }
+ }
+ }
break;
+
case OPCODE_BRA:
brw_set_predicate_control(p, get_predicate(inst));
brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
@@ -2088,6 +2226,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
}
brw_resolve_cals(p);
+ brw_set_uip_jip(p);
brw_optimize(p);
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index a6d2a2377f..656501b4f7 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -119,6 +119,62 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
brw_wm_emit(c);
}
+static void
+brw_wm_payload_setup(struct brw_context *brw,
+ struct brw_wm_compile *c)
+{
+ struct intel_context *intel = &brw->intel;
+ bool uses_depth = (c->fp->program.Base.InputsRead &
+ (1 << FRAG_ATTRIB_WPOS)) != 0;
+
+ if (intel->gen >= 6) {
+ /* R0-1: masks, pixel X/Y coordinates. */
+ c->nr_payload_regs = 2;
+ /* R2: only for 32-pixel dispatch.*/
+ /* R3-4: perspective pixel location barycentric */
+ c->nr_payload_regs += 2;
+ /* R5-6: perspective pixel location bary for dispatch width != 8 */
+ if (c->dispatch_width == 16) {
+ c->nr_payload_regs += 2;
+ }
+ /* R7-10: perspective centroid barycentric */
+ /* R11-14: perspective sample barycentric */
+ /* R15-18: linear pixel location barycentric */
+ /* R19-22: linear centroid barycentric */
+ /* R23-26: linear sample barycentric */
+
+ /* R27: interpolated depth if uses source depth */
+ if (uses_depth) {
+ c->source_depth_reg = c->nr_payload_regs;
+ c->nr_payload_regs++;
+ if (c->dispatch_width == 16) {
+ /* R28: interpolated depth if not 8-wide. */
+ c->nr_payload_regs++;
+ }
+ }
+ /* R29: interpolated W set if GEN6_WM_USES_SOURCE_W.
+ */
+ if (uses_depth) {
+ c->source_w_reg = c->nr_payload_regs;
+ c->nr_payload_regs++;
+ if (c->dispatch_width == 16) {
+ /* R30: interpolated W if not 8-wide. */
+ c->nr_payload_regs++;
+ }
+ }
+ /* R31: MSAA position offsets. */
+ /* R32-: bary for 32-pixel. */
+ /* R58-59: interp W for 32-pixel. */
+
+ if (c->fp->program.Base.OutputsWritten &
+ BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
+ c->source_depth_to_render_target = GL_TRUE;
+ c->computes_depth = GL_TRUE;
+ }
+ } else {
+ brw_wm_lookup_iz(intel, c);
+ }
+}
/**
* All Mesa program -> GPU code generation goes through this function.
@@ -167,23 +223,18 @@ static void do_wm_prog( struct brw_context *brw,
brw_init_compile(brw, &c->func);
- /* temporary sanity check assertion */
- ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program));
+ brw_wm_payload_setup(brw, c);
if (!brw_wm_fs_emit(brw, c)) {
/*
* Shader which use GLSL features such as flow control are handled
* differently from "simple" shaders.
*/
- if (fp->isGLSL) {
- c->dispatch_width = 8;
- brw_wm_glsl_emit(brw, c);
- }
- else {
- c->dispatch_width = 16;
- brw_wm_non_glsl_emit(brw, c);
- }
+ c->dispatch_width = 16;
+ brw_wm_payload_setup(brw, c);
+ brw_wm_non_glsl_emit(brw, c);
}
+ c->prog_data.dispatch_width = c->dispatch_width;
/* Scratch space is used for register spilling */
if (c->last_scratch) {
@@ -220,12 +271,10 @@ static void do_wm_prog( struct brw_context *brw,
static void brw_wm_populate_key( struct brw_context *brw,
struct brw_wm_prog_key *key )
{
- struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &brw->intel.ctx;
/* BRW_NEW_FRAGMENT_PROGRAM */
const struct brw_fragment_program *fp =
(struct brw_fragment_program *)brw->fragment_program;
- GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
GLuint lookup = 0;
GLuint line_aa;
GLuint i;
@@ -285,57 +334,9 @@ static void brw_wm_populate_key( struct brw_context *brw,
}
}
- if (intel->gen >= 6) {
- /* R0-1: masks, pixel X/Y coordinates. */
- key->nr_payload_regs = 2;
- /* R2: only for 32-pixel dispatch.*/
- /* R3-4: perspective pixel location barycentric */
- key->nr_payload_regs += 2;
- /* R5-6: perspective pixel location bary for dispatch width != 8 */
- if (!fp->isGLSL) { /* dispatch_width != 8 */
- key->nr_payload_regs += 2;
- }
- /* R7-10: perspective centroid barycentric */
- /* R11-14: perspective sample barycentric */
- /* R15-18: linear pixel location barycentric */
- /* R19-22: linear centroid barycentric */
- /* R23-26: linear sample barycentric */
-
- /* R27: interpolated depth if uses source depth */
- if (uses_depth) {
- key->source_depth_reg = key->nr_payload_regs;
- key->nr_payload_regs++;
- if (!fp->isGLSL) { /* dispatch_width != 8 */
- /* R28: interpolated depth if not 8-wide. */
- key->nr_payload_regs++;
- }
- }
- /* R29: interpolated W set if GEN6_WM_USES_SOURCE_W.
- */
- if (uses_depth) {
- key->source_w_reg = key->nr_payload_regs;
- key->nr_payload_regs++;
- if (!fp->isGLSL) { /* dispatch_width != 8 */
- /* R30: interpolated W if not 8-wide. */
- key->nr_payload_regs++;
- }
- }
- /* R31: MSAA position offsets. */
- /* R32-: bary for 32-pixel. */
- /* R58-59: interp W for 32-pixel. */
-
- if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
- key->source_depth_to_render_target = GL_TRUE;
- key->computes_depth = GL_TRUE;
- }
-
- } else {
- brw_wm_lookup_iz(intel,
- line_aa,
- lookup,
- uses_depth,
- key);
- }
+ key->iz_lookup = lookup;
+ key->line_aa = line_aa;
+ key->stats_wm = brw->intel.stats_wm;
/* BRW_NEW_WM_INPUT_DIMENSIONS */
key->proj_attrib_mask = brw->wm.input_size_masks[4-1];
@@ -377,6 +378,10 @@ static void brw_wm_populate_key( struct brw_context *brw,
swizzles[2] = SWIZZLE_ZERO;
} else if (t->DepthMode == GL_LUMINANCE) {
swizzles[3] = SWIZZLE_ONE;
+ } else if (t->DepthMode == GL_RED) {
+ swizzles[1] = SWIZZLE_ZERO;
+ swizzles[2] = SWIZZLE_ZERO;
+ swizzles[3] = SWIZZLE_ZERO;
}
}
@@ -423,6 +428,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
*/
if (fp->program.Base.InputsRead & FRAG_BIT_WPOS) {
key->drawable_height = ctx->DrawBuffer->Height;
+ key->render_to_fbo = ctx->DrawBuffer->Name != 0;
}
key->nr_color_regions = brw->state.nr_color_regions;
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 99bd15c187..e7f3cfbb75 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -59,18 +59,12 @@
#define AA_ALWAYS 2
struct brw_wm_prog_key {
- GLuint source_depth_reg:3;
- GLuint source_w_reg:3;
- GLuint aa_dest_stencil_reg:3;
- GLuint dest_depth_reg:3;
- GLuint nr_payload_regs:4;
- GLuint computes_depth:1; /* could be derived from program string */
- GLuint source_depth_to_render_target:1;
+ GLuint stats_wm:1;
GLuint flat_shade:1;
GLuint linear_color:1; /**< linear interpolation vs perspective interp */
- GLuint runtime_check_aads_emit:1;
GLuint nr_color_regions:5;
-
+ GLuint render_to_fbo:1;
+
GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */
GLuint shadowtex_mask:16;
GLuint yuvtex_mask:16;
@@ -80,6 +74,8 @@ struct brw_wm_prog_key {
GLushort drawable_height;
GLbitfield64 vp_outputs_written;
+ GLuint iz_lookup;
+ GLuint line_aa;
GLuint program_string_id:32;
};
@@ -203,6 +199,15 @@ struct brw_wm_compile {
PASS2_DONE
} state;
+ GLuint source_depth_reg:3;
+ GLuint source_w_reg:3;
+ GLuint aa_dest_stencil_reg:3;
+ GLuint dest_depth_reg:3;
+ GLuint nr_payload_regs:4;
+ GLuint computes_depth:1; /* could be derived from program string */
+ GLuint source_depth_to_render_target:1;
+ GLuint runtime_check_aads_emit:1;
+
/* Initial pass - translate fp instructions to fp instructions,
* simplifying and adding instructions for interpolation and
* framebuffer writes.
@@ -305,14 +310,9 @@ void brw_wm_print_insn( struct brw_wm_compile *c,
void brw_wm_print_program( struct brw_wm_compile *c,
const char *stage );
-void brw_wm_lookup_iz( struct intel_context *intel,
- GLuint line_aa,
- GLuint lookup,
- GLboolean ps_uses_depth,
- struct brw_wm_prog_key *key );
+void brw_wm_lookup_iz(struct intel_context *intel,
+ struct brw_wm_compile *c);
-GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
-void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c);
GLboolean brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c);
/* brw_wm_emit.c */
@@ -380,7 +380,6 @@ void emit_fb_write(struct brw_wm_compile *c,
void emit_frontfacing(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask);
-void emit_kil_nv(struct brw_wm_compile *c);
void emit_linterp(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index 96fecc97ee..be86e0e128 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -219,43 +219,45 @@ void emit_wpos_xy(struct brw_wm_compile *c,
const struct brw_reg *arg0)
{
struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_reg delta_x = retype(arg0[0], BRW_REGISTER_TYPE_W);
+ struct brw_reg delta_y = retype(arg0[1], BRW_REGISTER_TYPE_W);
if (mask & WRITEMASK_X) {
+ if (intel->gen >= 6) {
+ struct brw_reg delta_x_f = retype(delta_x, BRW_REGISTER_TYPE_F);
+ brw_MOV(p, delta_x_f, delta_x);
+ delta_x = delta_x_f;
+ }
+
if (c->fp->program.PixelCenterInteger) {
/* X' = X */
- brw_MOV(p,
- dst[0],
- retype(arg0[0], BRW_REGISTER_TYPE_W));
+ brw_MOV(p, dst[0], delta_x);
} else {
/* X' = X + 0.5 */
- brw_ADD(p,
- dst[0],
- retype(arg0[0], BRW_REGISTER_TYPE_W),
- brw_imm_f(0.5));
+ brw_ADD(p, dst[0], delta_x, brw_imm_f(0.5));
}
}
if (mask & WRITEMASK_Y) {
+ if (intel->gen >= 6) {
+ struct brw_reg delta_y_f = retype(delta_y, BRW_REGISTER_TYPE_F);
+ brw_MOV(p, delta_y_f, delta_y);
+ delta_y = delta_y_f;
+ }
+
if (c->fp->program.OriginUpperLeft) {
if (c->fp->program.PixelCenterInteger) {
/* Y' = Y */
- brw_MOV(p,
- dst[1],
- retype(arg0[1], BRW_REGISTER_TYPE_W));
+ brw_MOV(p, dst[1], delta_y);
} else {
- /* Y' = Y + 0.5 */
- brw_ADD(p,
- dst[1],
- retype(arg0[1], BRW_REGISTER_TYPE_W),
- brw_imm_f(0.5));
+ brw_ADD(p, dst[1], delta_y, brw_imm_f(0.5));
}
} else {
float center_offset = c->fp->program.PixelCenterInteger ? 0.0 : 0.5;
/* Y' = (height - 1) - Y + center */
- brw_ADD(p,
- dst[1],
- negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
+ brw_ADD(p, dst[1], negate(delta_y),
brw_imm_f(c->key.drawable_height - 1 + center_offset));
}
}
@@ -896,10 +898,14 @@ void emit_math1(struct brw_wm_compile *c,
BRW_MATH_SATURATE_NONE);
struct brw_reg src;
- if (intel->gen >= 6 && (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 ||
- arg0[0].file != BRW_GENERAL_REGISTER_FILE)) {
+ if (intel->gen >= 6 && ((arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 ||
+ arg0[0].file != BRW_GENERAL_REGISTER_FILE) ||
+ arg0[0].negate || arg0[0].abs)) {
/* Gen6 math requires that source and dst horizontal stride be 1,
* and that the argument be in the GRF.
+ *
+ * The hardware ignores source modifiers (negate and abs) on math
+ * instructions, so we also move to a temp to set those up.
*/
src = dst[dst_chan];
brw_MOV(p, src, arg0[0]);
@@ -967,34 +973,23 @@ void emit_math2(struct brw_wm_compile *c,
struct brw_reg temp_dst = dst[dst_chan];
if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
- if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
- /* Both scalar arguments. Do scalar calc. */
- src0.hstride = BRW_HORIZONTAL_STRIDE_1;
- src1.hstride = BRW_HORIZONTAL_STRIDE_1;
- temp_dst.hstride = BRW_HORIZONTAL_STRIDE_1;
- temp_dst.width = BRW_WIDTH_1;
-
- if (arg0[0].subnr != 0) {
- brw_MOV(p, temp_dst, src0);
- src0 = temp_dst;
-
- /* Ouch. We've used the temp as a dst, and we still
- * need a temp to store arg1 in, because src and dst
- * offsets have to be equal. Leaving this up to
- * glsl2-965 to handle correctly.
- */
- assert(arg1[0].subnr == 0);
- } else if (arg1[0].subnr != 0) {
- brw_MOV(p, temp_dst, src1);
- src1 = temp_dst;
- }
- } else {
- brw_MOV(p, temp_dst, src0);
- src0 = temp_dst;
- }
- } else if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
- brw_MOV(p, temp_dst, src1);
- src1 = temp_dst;
+ brw_MOV(p, temp_dst, src0);
+ src0 = temp_dst;
+ }
+
+ if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
+ /* This is a heinous hack to get a temporary register for use
+ * in case both arg0 and arg1 are constants. Why you're
+ * doing exponentiation on constant values in the shader, we
+ * don't know.
+ *
+ * max_wm_grf is almost surely less than the maximum GRF, and
+ * gen6 doesn't care about the number of GRFs used in a
+ * shader like pre-gen6 did.
+ */
+ struct brw_reg temp = brw_vec8_grf(c->max_wm_grf, 0);
+ brw_MOV(p, temp, src1);
+ src1 = temp;
}
brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
@@ -1012,14 +1007,6 @@ void emit_math2(struct brw_wm_compile *c,
sechalf(src0),
sechalf(src1));
}
-
- /* Splat a scalar result into all the channels. */
- if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 &&
- arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
- temp_dst.hstride = BRW_HORIZONTAL_STRIDE_0;
- temp_dst.vstride = BRW_VERTICAL_STRIDE_0;
- brw_MOV(p, dst[dst_chan], temp_dst);
- }
} else {
GLuint saturate = ((mask & SATURATE) ?
BRW_MATH_SATURATE_SATURATE :
@@ -1301,9 +1288,15 @@ static void emit_kil( struct brw_wm_compile *c,
struct brw_reg *arg0)
{
struct brw_compile *p = &c->func;
- struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_reg pixelmask;
GLuint i, j;
+ if (intel->gen >= 6)
+ pixelmask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+ else
+ pixelmask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+
for (i = 0; i < 4; i++) {
/* Check if we've already done the comparison for this reg
* -- common when someone does KIL TEMP.wwww.
@@ -1319,26 +1312,11 @@ static void emit_kil( struct brw_wm_compile *c,
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
brw_set_predicate_control_flag_value(p, 0xff);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_AND(p, r0uw, brw_flag_reg(), r0uw);
+ brw_AND(p, pixelmask, brw_flag_reg(), pixelmask);
brw_pop_insn_state(p);
}
}
-/* KIL_NV kills the pixels that are currently executing, not based on a test
- * of the arguments.
- */
-void emit_kil_nv( struct brw_wm_compile *c )
-{
- struct brw_compile *p = &c->func;
- struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
-
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
- brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
- brw_pop_insn_state(p);
-}
-
static void fire_fb_write( struct brw_wm_compile *c,
GLuint base_reg,
GLuint nr,
@@ -1387,8 +1365,8 @@ static void emit_aa( struct brw_wm_compile *c,
GLuint reg )
{
struct brw_compile *p = &c->func;
- GLuint comp = c->key.aa_dest_stencil_reg / 2;
- GLuint off = c->key.aa_dest_stencil_reg % 2;
+ GLuint comp = c->aa_dest_stencil_reg / 2;
+ GLuint off = c->aa_dest_stencil_reg % 2;
struct brw_reg aa = offset(arg1[comp], off);
brw_push_insn_state(p);
@@ -1416,11 +1394,10 @@ void emit_fb_write(struct brw_wm_compile *c,
struct intel_context *intel = &brw->intel;
GLuint nr = 2;
GLuint channel;
- int base_reg; /* For gen6 fb write with no header, starting from color payload directly!. */
/* Reserve a space for AA - may not be needed:
*/
- if (c->key.aa_dest_stencil_reg)
+ if (c->aa_dest_stencil_reg)
nr += 1;
/* I don't really understand how this achieves the color interleave
@@ -1428,11 +1405,6 @@ void emit_fb_write(struct brw_wm_compile *c,
*/
brw_push_insn_state(p);
- if (intel->gen >= 6)
- base_reg = nr;
- else
- base_reg = 0;
-
for (channel = 0; channel < 4; channel++) {
if (intel->gen >= 6) {
/* gen6 SIMD16 single source DP write looks like:
@@ -1493,9 +1465,9 @@ void emit_fb_write(struct brw_wm_compile *c,
brw_pop_insn_state(p);
- if (c->key.source_depth_to_render_target)
+ if (c->source_depth_to_render_target)
{
- if (c->key.computes_depth)
+ if (c->computes_depth)
brw_MOV(p, brw_message_reg(nr), arg2[2]);
else
brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
@@ -1503,10 +1475,10 @@ void emit_fb_write(struct brw_wm_compile *c,
nr += 2;
}
- if (c->key.dest_depth_reg)
+ if (c->dest_depth_reg)
{
- GLuint comp = c->key.dest_depth_reg / 2;
- GLuint off = c->key.dest_depth_reg % 2;
+ GLuint comp = c->dest_depth_reg / 2;
+ GLuint off = c->dest_depth_reg % 2;
if (off != 0) {
brw_push_insn_state(p);
@@ -1524,15 +1496,27 @@ void emit_fb_write(struct brw_wm_compile *c,
}
if (intel->gen >= 6) {
- /* Subtract off the message header, since we send headerless. */
- nr -= 2;
+ /* Load the message header. There's no implied move from src0
+ * to the base mrf on gen6.
+ */
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p, brw_message_reg(0), brw_vec8_grf(0, 0));
+ brw_pop_insn_state(p);
+
+ if (target != 0) {
+ brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+ 0,
+ 2), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(target));
+ }
}
- if (!c->key.runtime_check_aads_emit) {
- if (c->key.aa_dest_stencil_reg)
+ if (!c->runtime_check_aads_emit) {
+ if (c->aa_dest_stencil_reg)
emit_aa(c, arg1, 2);
- fire_fb_write(c, base_reg, nr, target, eot);
+ fire_fb_write(c, 0, nr, target, eot);
}
else {
struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
@@ -1897,10 +1881,6 @@ void brw_wm_emit( struct brw_wm_compile *c )
emit_kil(c, args[0]);
break;
- case OPCODE_KIL_NV:
- emit_kil_nv(c);
- break;
-
default:
printf("Unsupported opcode %i (%s) in fragment shader\n",
inst->opcode, inst->opcode < MAX_OPCODE ?
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index 2cae698880..4759b289a0 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -338,11 +338,13 @@ static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
{
- /* This is only called for producing 1/w in pre-gen6 interp. for
- * gen6, the interp opcodes don't use this argument.
+ /* This is called for producing 1/w in pre-gen6 interp. for gen6,
+ * the interp opcodes don't use this argument. But to keep the
+ * nr_args = 3 expectations of pinterp happy, just stuff delta_xy
+ * into the slot.
*/
if (c->func.brw->intel.gen >= 6)
- return src_undef();
+ return c->delta_xy;
if (src_is_undef(c->pixel_w)) {
struct prog_dst_register pixel_w = get_temp(c);
@@ -373,11 +375,7 @@ static void emit_interp( struct brw_wm_compile *c,
struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
struct prog_src_register deltas;
- if (c->func.brw->intel.gen < 6) {
- deltas = get_delta_xy(c);
- } else {
- deltas = src_undef();
- }
+ deltas = get_delta_xy(c);
/* Need to use PINTERP on attributes which have been
* multiplied by 1/W in the SF program, and LINTERP on those
@@ -1133,6 +1131,11 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
precalc_lit(c, inst);
break;
+ case OPCODE_RSQ:
+ out = emit_scalar_insn(c, inst);
+ out->SrcReg[0].Abs = GL_TRUE;
+ break;
+
case OPCODE_TEX:
precalc_tex(c, inst);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
deleted file mode 100644
index 7fe8ab1f33..0000000000
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ /dev/null
@@ -1,1035 +0,0 @@
-#include "main/macros.h"
-#include "program/prog_parameter.h"
-#include "program/prog_print.h"
-#include "program/prog_optimize.h"
-#include "brw_context.h"
-#include "brw_eu.h"
-#include "brw_wm.h"
-
-static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
- const struct prog_instruction *inst,
- GLuint component);
-
-/**
- * Determine if the given fragment program uses GLSL features such
- * as flow conditionals, loops, subroutines.
- * Some GLSL shaders may use these features, others might not.
- */
-GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
-{
- int i;
-
- if (unlikely(INTEL_DEBUG & DEBUG_GLSL_FORCE))
- return GL_TRUE;
-
- for (i = 0; i < fp->Base.NumInstructions; i++) {
- const struct prog_instruction *inst = &fp->Base.Instructions[i];
- switch (inst->Opcode) {
- case OPCODE_ARL:
- case OPCODE_IF:
- case OPCODE_ENDIF:
- case OPCODE_CAL:
- case OPCODE_BRK:
- case OPCODE_RET:
- case OPCODE_BGNLOOP:
- return GL_TRUE;
- default:
- break;
- }
- }
- return GL_FALSE;
-}
-
-
-
-static void
-reclaim_temps(struct brw_wm_compile *c);
-
-
-/** Mark GRF register as used. */
-static void
-prealloc_grf(struct brw_wm_compile *c, int r)
-{
- c->used_grf[r] = GL_TRUE;
-}
-
-
-/** Mark given GRF register as not in use. */
-static void
-release_grf(struct brw_wm_compile *c, int r)
-{
- /*assert(c->used_grf[r]);*/
- c->used_grf[r] = GL_FALSE;
- c->first_free_grf = MIN2(c->first_free_grf, r);
-}
-
-
-/** Return index of a free GRF, mark it as used. */
-static int
-alloc_grf(struct brw_wm_compile *c)
-{
- GLuint r;
- for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
- if (!c->used_grf[r]) {
- c->used_grf[r] = GL_TRUE;
- c->first_free_grf = r + 1; /* a guess */
- return r;
- }
- }
-
- /* no free temps, try to reclaim some */
- reclaim_temps(c);
- c->first_free_grf = 0;
-
- /* try alloc again */
- for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
- if (!c->used_grf[r]) {
- c->used_grf[r] = GL_TRUE;
- c->first_free_grf = r + 1; /* a guess */
- return r;
- }
- }
-
- for (r = 0; r < BRW_WM_MAX_GRF; r++) {
- assert(c->used_grf[r]);
- }
-
- /* really, no free GRF regs found */
- if (!c->out_of_regs) {
- /* print warning once per compilation */
- _mesa_warning(NULL, "i965: ran out of registers for fragment program");
- c->out_of_regs = GL_TRUE;
- }
-
- return -1;
-}
-
-
-/** Return number of GRF registers used */
-static int
-num_grf_used(const struct brw_wm_compile *c)
-{
- int r;
- for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--)
- if (c->used_grf[r])
- return r + 1;
- return 0;
-}
-
-
-
-/**
- * Record the mapping of a Mesa register to a hardware register.
- */
-static void set_reg(struct brw_wm_compile *c, int file, int index,
- int component, struct brw_reg reg)
-{
- c->wm_regs[file][index][component].reg = reg;
- c->wm_regs[file][index][component].inited = GL_TRUE;
-}
-
-static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
-{
- struct brw_reg reg;
-
- /* if we need to allocate another temp, grow the tmp_regs[] array */
- if (c->tmp_index == c->tmp_max) {
- int r = alloc_grf(c);
- if (r < 0) {
- /*printf("Out of temps in %s\n", __FUNCTION__);*/
- r = 50; /* XXX random register! */
- }
- c->tmp_regs[ c->tmp_max++ ] = r;
- }
-
- /* form the GRF register */
- reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0);
- /*printf("alloc_temp %d\n", reg.nr);*/
- assert(reg.nr < BRW_WM_MAX_GRF);
- return reg;
-
-}
-
-/**
- * Save current temp register info.
- * There must be a matching call to release_tmps().
- */
-static int mark_tmps(struct brw_wm_compile *c)
-{
- return c->tmp_index;
-}
-
-static void release_tmps(struct brw_wm_compile *c, int mark)
-{
- c->tmp_index = mark;
-}
-
-/**
- * Convert Mesa src register to brw register.
- *
- * Since we're running in SOA mode each Mesa register corresponds to four
- * hardware registers. We allocate the hardware registers as needed here.
- *
- * \param file register file, one of PROGRAM_x
- * \param index register number
- * \param component src component (X=0, Y=1, Z=2, W=3)
- * \param nr not used?!?
- * \param neg negate value?
- * \param abs take absolute value?
- */
-static struct brw_reg
-get_reg(struct brw_wm_compile *c, int file, int index, int component,
- int nr, GLuint neg, GLuint abs)
-{
- struct brw_reg reg;
- switch (file) {
- case PROGRAM_STATE_VAR:
- case PROGRAM_CONSTANT:
- case PROGRAM_UNIFORM:
- file = PROGRAM_STATE_VAR;
- break;
- case PROGRAM_UNDEFINED:
- return brw_null_reg();
- case PROGRAM_TEMPORARY:
- case PROGRAM_INPUT:
- case PROGRAM_OUTPUT:
- case PROGRAM_PAYLOAD:
- break;
- default:
- _mesa_problem(NULL, "Unexpected file in get_reg()");
- return brw_null_reg();
- }
-
- assert(index < 256);
- assert(component < 4);
-
- /* see if we've already allocated a HW register for this Mesa register */
- if (c->wm_regs[file][index][component].inited) {
- /* yes, re-use */
- reg = c->wm_regs[file][index][component].reg;
- }
- else {
- /* no, allocate new register */
- int grf = alloc_grf(c);
- /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
- if (grf < 0) {
- /* totally out of temps */
- grf = 51; /* XXX random register! */
- }
-
- reg = brw_vec8_grf(grf, 0);
- /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
-
- set_reg(c, file, index, component, reg);
- }
-
- if (neg & (1 << component)) {
- reg = negate(reg);
- }
- if (abs)
- reg = brw_abs(reg);
- return reg;
-}
-
-
-
-/**
- * This is called if we run out of GRF registers. Examine the live intervals
- * of temp regs in the program and free those which won't be used again.
- */
-static void
-reclaim_temps(struct brw_wm_compile *c)
-{
- GLint intBegin[MAX_PROGRAM_TEMPS];
- GLint intEnd[MAX_PROGRAM_TEMPS];
- int index;
-
- /*printf("Reclaim temps:\n");*/
-
- _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns,
- intBegin, intEnd);
-
- for (index = 0; index < MAX_PROGRAM_TEMPS; index++) {
- if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) {
- /* program temp[i] can be freed */
- int component;
- /*printf(" temp[%d] is dead\n", index);*/
- for (component = 0; component < 4; component++) {
- if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) {
- int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr;
- release_grf(c, r);
- /*
- printf(" Reclaim temp %d, reg %d at inst %d\n",
- index, r, c->cur_inst);
- */
- c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE;
- }
- }
- }
- }
-}
-
-
-
-
-/**
- * Preallocate registers. This sets up the Mesa to hardware register
- * mapping for certain registers, such as constants (uniforms/state vars)
- * and shader inputs.
- */
-static void prealloc_reg(struct brw_wm_compile *c)
-{
- struct intel_context *intel = &c->func.brw->intel;
- int i, j;
- struct brw_reg reg;
- int urb_read_length = 0;
- GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted;
- GLuint reg_index = 0;
-
- memset(c->used_grf, GL_FALSE, sizeof(c->used_grf));
- c->first_free_grf = 0;
-
- for (i = 0; i < 4; i++) {
- if (i < (c->key.nr_payload_regs + 1) / 2)
- reg = brw_vec8_grf(i * 2, 0);
- else
- reg = brw_vec8_grf(0, 0);
- set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
- }
- set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_W, 0,
- brw_vec8_grf(c->key.source_w_reg, 0));
- reg_index += c->key.nr_payload_regs;
-
- /* constants */
- {
- const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters;
- const GLuint nr_temps = c->fp->program.Base.NumTemporaries;
-
- /* use a real constant buffer, or just use a section of the GRF? */
- /* XXX this heuristic may need adjustment... */
- if ((nr_params + nr_temps) * 4 + reg_index > 80) {
- for (i = 0; i < nr_params; i++) {
- float *pv = c->fp->program.Base.Parameters->ParameterValues[i];
- for (j = 0; j < 4; j++) {
- c->prog_data.pull_param[c->prog_data.nr_pull_params] = &pv[j];
- c->prog_data.nr_pull_params++;
- }
- }
-
- c->prog_data.nr_params = 0;
- }
- /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
-
- if (!c->prog_data.nr_pull_params) {
- const struct gl_program_parameter_list *plist =
- c->fp->program.Base.Parameters;
- int index = 0;
-
- /* number of float constants in CURBE */
- c->prog_data.nr_params = 4 * nr_params;
-
- /* loop over program constants (float[4]) */
- for (i = 0; i < nr_params; i++) {
- /* loop over XYZW channels */
- for (j = 0; j < 4; j++, index++) {
- reg = brw_vec1_grf(reg_index + index / 8, index % 8);
- /* Save pointer to parameter/constant value.
- * Constants will be copied in prepare_constant_buffer()
- */
- c->prog_data.param[index] = &plist->ParameterValues[i][j];
- set_reg(c, PROGRAM_STATE_VAR, i, j, reg);
- }
- }
- /* number of constant regs used (each reg is float[8]) */
- c->nr_creg = ALIGN(nr_params, 2) / 2;
- reg_index += c->nr_creg;
- }
- }
-
- /* fragment shader inputs: One 2-reg pair of interpolation
- * coefficients for each vec4 to be set up.
- */
- if (intel->gen >= 6) {
- for (i = 0; i < FRAG_ATTRIB_MAX; i++) {
- if (!(c->fp->program.Base.InputsRead & BITFIELD64_BIT(i)))
- continue;
-
- reg = brw_vec8_grf(reg_index, 0);
- for (j = 0; j < 4; j++) {
- set_reg(c, PROGRAM_PAYLOAD, i, j, reg);
- }
- reg_index += 2;
- }
- urb_read_length = reg_index;
- } else {
- for (i = 0; i < VERT_RESULT_MAX; i++) {
- int fp_input;
-
- if (i >= VERT_RESULT_VAR0)
- fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0;
- else if (i <= VERT_RESULT_TEX7)
- fp_input = i;
- else
- fp_input = -1;
-
- if (fp_input >= 0 && inputs & (1 << fp_input)) {
- urb_read_length = reg_index;
- reg = brw_vec8_grf(reg_index, 0);
- for (j = 0; j < 4; j++)
- set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg);
- }
- if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) {
- reg_index += 2;
- }
- }
- }
-
- c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
- c->prog_data.urb_read_length = urb_read_length;
- c->prog_data.curb_read_length = c->nr_creg;
- c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
- reg_index++;
- c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
- reg_index += 2;
-
- /* mark GRF regs [0..reg_index-1] as in-use */
- for (i = 0; i < reg_index; i++)
- prealloc_grf(c, i);
-
- /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */
- prealloc_grf(c, 126);
- prealloc_grf(c, 127);
-
- for (i = 0; i < c->nr_fp_insns; i++) {
- const struct prog_instruction *inst = &c->prog_instructions[i];
- struct brw_reg dst[4];
-
- switch (inst->Opcode) {
- case OPCODE_TEX:
- case OPCODE_TXB:
- /* Allocate the channels of texture results contiguously,
- * since they are written out that way by the sampler unit.
- */
- for (j = 0; j < 4; j++) {
- dst[j] = get_dst_reg(c, inst, j);
- if (j != 0)
- assert(dst[j].nr == dst[j - 1].nr + 1);
- }
- break;
- default:
- break;
- }
- }
-
- for (i = 0; i < c->nr_fp_insns; i++) {
- const struct prog_instruction *inst = &c->prog_instructions[i];
-
- switch (inst->Opcode) {
- case WM_DELTAXY:
- /* Allocate WM_DELTAXY destination on G45/GM45 to an
- * even-numbered GRF if possible so that we can use the PLN
- * instruction.
- */
- if (inst->DstReg.WriteMask == WRITEMASK_XY &&
- !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][0].inited &&
- !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][1].inited &&
- (IS_G4X(intel->intelScreen->deviceID) || intel->gen == 5)) {
- int grf;
-
- for (grf = c->first_free_grf & ~1;
- grf < BRW_WM_MAX_GRF;
- grf += 2)
- {
- if (!c->used_grf[grf] && !c->used_grf[grf + 1]) {
- c->used_grf[grf] = GL_TRUE;
- c->used_grf[grf + 1] = GL_TRUE;
- c->first_free_grf = grf + 2; /* a guess */
-
- set_reg(c, inst->DstReg.File, inst->DstReg.Index, 0,
- brw_vec8_grf(grf, 0));
- set_reg(c, inst->DstReg.File, inst->DstReg.Index, 1,
- brw_vec8_grf(grf + 1, 0));
- break;
- }
- }
- }
- default:
- break;
- }
- }
-
- /* An instruction may reference up to three constants.
- * They'll be found in these registers.
- * XXX alloc these on demand!
- */
- if (c->prog_data.nr_pull_params) {
- for (i = 0; i < 3; i++) {
- c->current_const[i].index = -1;
- c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0);
- }
- }
-#if 0
- printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer);
- printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index);
-#endif
-}
-
-
-/**
- * Check if any of the instruction's src registers are constants, uniforms,
- * or statevars. If so, fetch any constants that we don't already have in
- * the three GRF slots.
- */
-static void fetch_constants(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint i;
-
- /* loop over instruction src regs */
- for (i = 0; i < 3; i++) {
- const struct prog_src_register *src = &inst->SrcReg[i];
- if (src->File == PROGRAM_STATE_VAR ||
- src->File == PROGRAM_CONSTANT ||
- src->File == PROGRAM_UNIFORM) {
- c->current_const[i].index = src->Index;
-
-#if 0
- printf(" fetch const[%d] for arg %d into reg %d\n",
- src->Index, i, c->current_const[i].reg.nr);
-#endif
-
- /* need to fetch the constant now */
- brw_oword_block_read(p,
- c->current_const[i].reg,
- brw_message_reg(1),
- 16 * src->Index,
- SURF_INDEX_FRAG_CONST_BUFFER);
- }
- }
-}
-
-
-/**
- * Convert Mesa dst register to brw register.
- */
-static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
- const struct prog_instruction *inst,
- GLuint component)
-{
- const int nr = 1;
- return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr,
- 0, 0);
-}
-
-
-static struct brw_reg
-get_src_reg_const(struct brw_wm_compile *c,
- const struct prog_instruction *inst,
- GLuint srcRegIndex, GLuint component)
-{
- /* We should have already fetched the constant from the constant
- * buffer in fetch_constants(). Now we just have to return a
- * register description that extracts the needed component and
- * smears it across all eight vector components.
- */
- const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
- struct brw_reg const_reg;
-
- assert(component < 4);
- assert(srcRegIndex < 3);
- assert(c->current_const[srcRegIndex].index != -1);
- const_reg = c->current_const[srcRegIndex].reg;
-
- /* extract desired float from the const_reg, and smear */
- const_reg = stride(const_reg, 0, 1, 0);
- const_reg.subnr = component * 4;
-
- if (src->Negate & (1 << component))
- const_reg = negate(const_reg);
- if (src->Abs)
- const_reg = brw_abs(const_reg);
-
-#if 0
- printf(" form const[%d].%d for arg %d, reg %d\n",
- c->current_const[srcRegIndex].index,
- component,
- srcRegIndex,
- const_reg.nr);
-#endif
-
- return const_reg;
-}
-
-
-/**
- * Convert Mesa src register to brw register.
- */
-static struct brw_reg get_src_reg(struct brw_wm_compile *c,
- const struct prog_instruction *inst,
- GLuint srcRegIndex, GLuint channel)
-{
- const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
- const GLuint nr = 1;
- const GLuint component = GET_SWZ(src->Swizzle, channel);
-
- /* Only one immediate value can be used per native opcode, and it
- * has be in the src1 slot, so not all Mesa instructions will get
- * to take advantage of immediate constants.
- */
- if (brw_wm_arg_can_be_immediate(inst->Opcode, srcRegIndex)) {
- const struct gl_program_parameter_list *params;
-
- params = c->fp->program.Base.Parameters;
-
- /* Extended swizzle terms */
- if (component == SWIZZLE_ZERO) {
- return brw_imm_f(0.0F);
- } else if (component == SWIZZLE_ONE) {
- if (src->Negate)
- return brw_imm_f(-1.0F);
- else
- return brw_imm_f(1.0F);
- }
-
- if (src->File == PROGRAM_CONSTANT) {
- float f = params->ParameterValues[src->Index][component];
-
- if (src->Abs)
- f = fabs(f);
- if (src->Negate)
- f = -f;
-
- return brw_imm_f(f);
- }
- }
-
- if (c->prog_data.nr_pull_params &&
- (src->File == PROGRAM_STATE_VAR ||
- src->File == PROGRAM_CONSTANT ||
- src->File == PROGRAM_UNIFORM)) {
- return get_src_reg_const(c, inst, srcRegIndex, component);
- }
- else {
- /* other type of source register */
- return get_reg(c, src->File, src->Index, component, nr,
- src->Negate, src->Abs);
- }
-}
-
-static void emit_arl(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg src0, addr_reg;
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
- BRW_ARF_ADDRESS, 0);
- src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */
- brw_MOV(p, addr_reg, src0);
- brw_set_saturate(p, 0);
-}
-
-static INLINE struct brw_reg high_words( struct brw_reg reg )
-{
- return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ),
- 0, 8, 2 );
-}
-
-static INLINE struct brw_reg low_words( struct brw_reg reg )
-{
- return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 );
-}
-
-static INLINE struct brw_reg even_bytes( struct brw_reg reg )
-{
- return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 );
-}
-
-static INLINE struct brw_reg odd_bytes( struct brw_reg reg )
-{
- return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ),
- 0, 16, 2 );
-}
-
-/**
- * Resolve subroutine calls after code emit is done.
- */
-static void post_wm_emit( struct brw_wm_compile *c )
-{
- brw_resolve_cals(&c->func);
-}
-
-static void
-get_argument_regs(struct brw_wm_compile *c,
- const struct prog_instruction *inst,
- int index,
- struct brw_reg *dst,
- struct brw_reg *regs,
- int mask)
-{
- struct brw_compile *p = &c->func;
- int i, j;
-
- for (i = 0; i < 4; i++) {
- if (mask & (1 << i)) {
- regs[i] = get_src_reg(c, inst, index, i);
-
- /* Unalias destination registers from our sources. */
- if (regs[i].file == BRW_GENERAL_REGISTER_FILE) {
- for (j = 0; j < 4; j++) {
- if (memcmp(&regs[i], &dst[j], sizeof(regs[0])) == 0) {
- struct brw_reg tmp = alloc_tmp(c);
- brw_MOV(p, tmp, regs[i]);
- regs[i] = tmp;
- break;
- }
- }
- }
- }
- }
-}
-
-static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
-{
- struct intel_context *intel = &brw->intel;
-#define MAX_IF_DEPTH 32
-#define MAX_LOOP_DEPTH 32
- struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
- int if_depth_in_loop[MAX_LOOP_DEPTH];
- GLuint i, if_depth = 0, loop_depth = 0;
- struct brw_compile *p = &c->func;
- struct brw_indirect stack_index = brw_indirect(0, 0);
-
- c->out_of_regs = GL_FALSE;
-
- if_depth_in_loop[loop_depth] = 0;
-
- prealloc_reg(c);
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
-
- if (intel->gen >= 6)
- brw_set_acc_write_control(p, 1);
-
- for (i = 0; i < c->nr_fp_insns; i++) {
- const struct prog_instruction *inst = &c->prog_instructions[i];
- int dst_flags;
- struct brw_reg args[3][4], dst[4];
- int j;
- int mark = mark_tmps( c );
-
- c->cur_inst = i;
-
-#if 0
- printf("Inst %d: ", i);
- _mesa_print_instruction(inst);
-#endif
-
- /* fetch any constants that this instruction needs */
- if (c->prog_data.nr_pull_params)
- fetch_constants(c, inst);
-
- if (inst->Opcode != OPCODE_ARL) {
- for (j = 0; j < 4; j++) {
- if (inst->DstReg.WriteMask & (1 << j))
- dst[j] = get_dst_reg(c, inst, j);
- else
- dst[j] = brw_null_reg();
- }
- }
- for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++)
- get_argument_regs(c, inst, j, dst, args[j], WRITEMASK_XYZW);
-
- dst_flags = inst->DstReg.WriteMask;
- if (inst->SaturateMode == SATURATE_ZERO_ONE)
- dst_flags |= SATURATE;
-
- if (inst->CondUpdate)
- brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
- else
- brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
-
- switch (inst->Opcode) {
- case WM_PIXELXY:
- emit_pixel_xy(c, dst, dst_flags);
- break;
- case WM_DELTAXY:
- emit_delta_xy(p, dst, dst_flags, args[0]);
- break;
- case WM_PIXELW:
- emit_pixel_w(c, dst, dst_flags, args[0], args[1]);
- break;
- case WM_LINTERP:
- emit_linterp(p, dst, dst_flags, args[0], args[1]);
- break;
- case WM_PINTERP:
- emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
- break;
- case WM_CINTERP:
- emit_cinterp(p, dst, dst_flags, args[0]);
- break;
- case WM_WPOSXY:
- emit_wpos_xy(c, dst, dst_flags, args[0]);
- break;
- case WM_FB_WRITE:
- emit_fb_write(c, args[0], args[1], args[2],
- INST_AUX_GET_TARGET(inst->Aux),
- inst->Aux & INST_AUX_EOT);
- break;
- case WM_FRONTFACING:
- emit_frontfacing(p, dst, dst_flags);
- break;
- case OPCODE_ADD:
- emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_ARL:
- emit_arl(c, inst);
- break;
- case OPCODE_FRC:
- emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
- break;
- case OPCODE_FLR:
- emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
- break;
- case OPCODE_LRP:
- emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
- break;
- case OPCODE_TRUNC:
- emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
- break;
- case OPCODE_MOV:
- case OPCODE_SWZ:
- emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
- break;
- case OPCODE_DP2:
- emit_dp2(p, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_DP3:
- emit_dp3(p, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_DP4:
- emit_dp4(p, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_XPD:
- emit_xpd(p, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_DPH:
- emit_dph(p, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_RCP:
- emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
- break;
- case OPCODE_RSQ:
- emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
- break;
- case OPCODE_SIN:
- emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
- break;
- case OPCODE_COS:
- emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
- break;
- case OPCODE_EX2:
- emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
- break;
- case OPCODE_LG2:
- emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
- break;
- case OPCODE_CMP:
- emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
- break;
- case OPCODE_MIN:
- emit_min(p, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_MAX:
- emit_max(p, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_DDX:
- case OPCODE_DDY:
- emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX),
- args[0]);
- break;
- case OPCODE_SLT:
- emit_sop(p, dst, dst_flags,
- BRW_CONDITIONAL_L, args[0], args[1]);
- break;
- case OPCODE_SLE:
- emit_sop(p, dst, dst_flags,
- BRW_CONDITIONAL_LE, args[0], args[1]);
- break;
- case OPCODE_SGT:
- emit_sop(p, dst, dst_flags,
- BRW_CONDITIONAL_G, args[0], args[1]);
- break;
- case OPCODE_SGE:
- emit_sop(p, dst, dst_flags,
- BRW_CONDITIONAL_GE, args[0], args[1]);
- break;
- case OPCODE_SEQ:
- emit_sop(p, dst, dst_flags,
- BRW_CONDITIONAL_EQ, args[0], args[1]);
- break;
- case OPCODE_SNE:
- emit_sop(p, dst, dst_flags,
- BRW_CONDITIONAL_NEQ, args[0], args[1]);
- break;
- case OPCODE_SSG:
- emit_sign(p, dst, dst_flags, args[0]);
- break;
- case OPCODE_MUL:
- emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_POW:
- emit_math2(c, BRW_MATH_FUNCTION_POW,
- dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_MAD:
- emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
- break;
- case OPCODE_TEX:
- emit_tex(c, dst, dst_flags, args[0],
- get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH,
- 0, 1, 0, 0),
- inst->TexSrcTarget,
- inst->TexSrcUnit,
- (c->key.shadowtex_mask & (1 << inst->TexSrcUnit)) != 0);
- break;
- case OPCODE_TXB:
- emit_txb(c, dst, dst_flags, args[0],
- get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH,
- 0, 1, 0, 0),
- inst->TexSrcTarget,
- c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]);
- break;
- case OPCODE_KIL_NV:
- emit_kil_nv(c);
- break;
- case OPCODE_IF:
- assert(if_depth < MAX_IF_DEPTH);
- if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8);
- if_depth_in_loop[loop_depth]++;
- break;
- case OPCODE_ELSE:
- assert(if_depth > 0);
- if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
- break;
- case OPCODE_ENDIF:
- assert(if_depth > 0);
- brw_ENDIF(p, if_inst[--if_depth]);
- if_depth_in_loop[loop_depth]--;
- break;
- case OPCODE_BGNSUB:
- brw_save_label(p, inst->Comment, p->nr_insn);
- break;
- case OPCODE_ENDSUB:
- /* no-op */
- break;
- case OPCODE_CAL:
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
- brw_set_access_mode(p, BRW_ALIGN_16);
- brw_ADD(p, get_addr_reg(stack_index),
- get_addr_reg(stack_index), brw_imm_d(4));
- brw_save_call(&c->func, inst->Comment, p->nr_insn);
- brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
- brw_pop_insn_state(p);
- break;
-
- case OPCODE_RET:
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_ADD(p, get_addr_reg(stack_index),
- get_addr_reg(stack_index), brw_imm_d(-4));
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0));
- brw_set_access_mode(p, BRW_ALIGN_16);
- brw_pop_insn_state(p);
-
- break;
- case OPCODE_BGNLOOP:
- /* XXX may need to invalidate the current_constant regs */
- loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
- if_depth_in_loop[loop_depth] = 0;
- break;
- case OPCODE_BRK:
- brw_BREAK(p, if_depth_in_loop[loop_depth]);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- break;
- case OPCODE_CONT:
- brw_CONT(p, if_depth_in_loop[loop_depth]);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- break;
- case OPCODE_ENDLOOP:
- {
- struct brw_instruction *inst0, *inst1;
- GLuint br = 1;
-
- if (intel->gen == 5)
- br = 2;
-
- assert(loop_depth > 0);
- loop_depth--;
- inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
- /* patch all the BREAK/CONT instructions from last BGNLOOP */
- while (inst0 > loop_inst[loop_depth]) {
- inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK &&
- inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
- }
- else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
- inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
- }
- }
- }
- break;
- default:
- printf("unsupported opcode %d (%s) in fragment shader\n",
- inst->Opcode, inst->Opcode < MAX_OPCODE ?
- _mesa_opcode_string(inst->Opcode) : "unknown");
- }
-
- /* Release temporaries containing any unaliased source regs. */
- release_tmps( c, mark );
-
- if (inst->CondUpdate)
- brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
- else
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- }
- post_wm_emit(c);
-
- if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- printf("wm-native:\n");
- for (i = 0; i < p->nr_insn; i++)
- brw_disasm(stdout, &p->store[i], intel->gen);
- printf("\n");
- }
-}
-
-/**
- * Do GPU code generation for shaders that use GLSL features such as
- * flow control. Other shaders will be compiled with the
- */
-void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
-{
- if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- printf("brw_wm_glsl_emit:\n");
- }
-
- /* initial instruction translation/simplification */
- brw_wm_pass_fp(c);
-
- /* actual code generation */
- brw_wm_emit_glsl(brw, c);
-
- if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- brw_wm_print_program(c, "brw_wm_glsl_emit done");
- }
-
- c->prog_data.total_grf = num_grf_used(c);
- c->prog_data.total_scratch = 0;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.c b/src/mesa/drivers/dri/i965/brw_wm_iz.c
index 62e556698b..471ea1c18d 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_iz.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_iz.c
@@ -120,14 +120,14 @@ const struct {
* \param line_aa AA_NEVER, AA_ALWAYS or AA_SOMETIMES
* \param lookup bitmask of IZ_* flags
*/
-void brw_wm_lookup_iz( struct intel_context *intel,
- GLuint line_aa,
- GLuint lookup,
- GLboolean ps_uses_depth,
- struct brw_wm_prog_key *key )
+void brw_wm_lookup_iz(struct intel_context *intel,
+ struct brw_wm_compile *c)
{
GLuint reg = 2;
GLboolean kill_stats_promoted_workaround = GL_FALSE;
+ int lookup = c->key.iz_lookup;
+ bool uses_depth = (c->fp->program.Base.InputsRead &
+ (1 << FRAG_ATTRIB_WPOS)) != 0;
assert (lookup < IZ_BIT_MAX);
@@ -136,36 +136,36 @@ void brw_wm_lookup_iz( struct intel_context *intel,
* statistics are enabled..." paragraph of 11.5.3.2: Early Depth
* Test Cases [Pre-DevGT] of the 3D Pipeline - Windower B-Spec.
*/
- if (intel->stats_wm &&
+ if (c->key.stats_wm &&
(lookup & IZ_PS_KILL_ALPHATEST_BIT) &&
wm_iz_table[lookup].mode == P) {
kill_stats_promoted_workaround = GL_TRUE;
}
if (lookup & IZ_PS_COMPUTES_DEPTH_BIT)
- key->computes_depth = 1;
+ c->computes_depth = 1;
- if (wm_iz_table[lookup].sd_present || ps_uses_depth ||
+ if (wm_iz_table[lookup].sd_present || uses_depth ||
kill_stats_promoted_workaround) {
- key->source_depth_reg = reg;
+ c->source_depth_reg = reg;
reg += 2;
}
if (wm_iz_table[lookup].sd_to_rt || kill_stats_promoted_workaround)
- key->source_depth_to_render_target = 1;
+ c->source_depth_to_render_target = 1;
- if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) {
- key->aa_dest_stencil_reg = reg;
- key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present &&
- line_aa == AA_SOMETIMES);
+ if (wm_iz_table[lookup].ds_present || c->key.line_aa != AA_NEVER) {
+ c->aa_dest_stencil_reg = reg;
+ c->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present &&
+ c->key.line_aa == AA_SOMETIMES);
reg++;
}
if (wm_iz_table[lookup].dd_present) {
- key->dest_depth_reg = reg;
+ c->dest_depth_reg = reg;
reg+=2;
}
- key->nr_payload_regs = reg;
+ c->nr_payload_regs = reg;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
index 83152526b3..f78bdc3186 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
@@ -380,7 +380,7 @@ static void pass0_init_payload( struct brw_wm_compile *c )
GLuint i;
for (i = 0; i < 4; i++) {
- GLuint j = i >= (c->key.nr_payload_regs + 1) / 2 ? 0 : i;
+ GLuint j = i >= (c->nr_payload_regs + 1) / 2 ? 0 : i;
pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i,
&c->payload.depth[j] );
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c
index 3a2874b6dd..7d6a3fa9f1 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c
@@ -128,8 +128,7 @@ void brw_wm_pass1( struct brw_wm_compile *c )
if (inst->opcode == WM_FB_WRITE) {
track_arg(c, inst, 0, WRITEMASK_XYZW);
track_arg(c, inst, 1, WRITEMASK_XYZW);
- if (c->key.source_depth_to_render_target &&
- c->key.computes_depth)
+ if (c->source_depth_to_render_target && c->computes_depth)
track_arg(c, inst, 2, WRITEMASK_Z);
else
track_arg(c, inst, 2, 0);
@@ -281,7 +280,6 @@ void brw_wm_pass1( struct brw_wm_compile *c )
case OPCODE_DST:
case WM_FRONTFACING:
- case OPCODE_KIL_NV:
default:
break;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass2.c b/src/mesa/drivers/dri/i965/brw_wm_pass2.c
index 44e3953814..8c2b9e7020 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass2.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass2.c
@@ -69,6 +69,8 @@ static void prealloc_reg(struct brw_wm_compile *c,
*/
static void init_registers( struct brw_wm_compile *c )
{
+ struct brw_context *brw = c->func.brw;
+ struct intel_context *intel = &brw->intel;
GLuint nr_interp_regs = 0;
GLuint i = 0;
GLuint j;
@@ -76,32 +78,41 @@ static void init_registers( struct brw_wm_compile *c )
for (j = 0; j < c->grf_limit; j++)
c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN;
- for (j = 0; j < (c->key.nr_payload_regs + 1) / 2; j++)
+ for (j = 0; j < (c->nr_payload_regs + 1) / 2; j++)
prealloc_reg(c, &c->payload.depth[j], i++);
for (j = 0; j < c->nr_creg; j++)
prealloc_reg(c, &c->creg[j], i++);
- for (j = 0; j < VERT_RESULT_MAX; j++) {
- if (c->key.vp_outputs_written & BITFIELD64_BIT(j)) {
- int fp_index;
-
- if (j >= VERT_RESULT_VAR0)
- fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
- else if (j <= VERT_RESULT_TEX7)
- fp_index = j;
- else
- fp_index = -1;
-
- nr_interp_regs++;
- if (fp_index >= 0)
- prealloc_reg(c, &c->payload.input_interp[fp_index], i++);
+ if (intel->gen >= 6) {
+ for (unsigned int j = 0; j < FRAG_ATTRIB_MAX; j++) {
+ if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(j)) {
+ nr_interp_regs++;
+ prealloc_reg(c, &c->payload.input_interp[j], i++);
+ }
+ }
+ } else {
+ for (j = 0; j < VERT_RESULT_MAX; j++) {
+ if (c->key.vp_outputs_written & BITFIELD64_BIT(j)) {
+ int fp_index;
+
+ if (j >= VERT_RESULT_VAR0)
+ fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
+ else if (j <= VERT_RESULT_TEX7)
+ fp_index = j;
+ else
+ fp_index = -1;
+
+ nr_interp_regs++;
+ if (fp_index >= 0)
+ prealloc_reg(c, &c->payload.input_interp[fp_index], i++);
+ }
}
+ assert(nr_interp_regs >= 1);
}
- assert(nr_interp_regs >= 1);
- c->prog_data.first_curbe_grf = ALIGN(c->key.nr_payload_regs, 2);
+ c->prog_data.first_curbe_grf = ALIGN(c->nr_payload_regs, 2);
c->prog_data.urb_read_length = nr_interp_regs * 2;
c->prog_data.curb_read_length = c->nr_creg * 2;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index fea96d3538..e7c97a1cb0 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -69,12 +69,43 @@ static GLuint translate_wrap_mode( GLenum wrap )
static drm_intel_bo *upload_default_color( struct brw_context *brw,
const GLfloat *color )
{
- struct brw_sampler_default_color sdc;
+ struct intel_context *intel = &brw->intel;
- COPY_4V(sdc.color, color);
-
- return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR,
- &sdc, sizeof(sdc));
+ if (intel->gen >= 5) {
+ struct gen5_sampler_default_color sdc;
+
+ memset(&sdc, 0, sizeof(sdc));
+
+ UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[0], color[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[1], color[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[2], color[2]);
+ UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[3], color[3]);
+
+ UNCLAMPED_FLOAT_TO_USHORT(sdc.us[0], color[0]);
+ UNCLAMPED_FLOAT_TO_USHORT(sdc.us[1], color[1]);
+ UNCLAMPED_FLOAT_TO_USHORT(sdc.us[2], color[2]);
+ UNCLAMPED_FLOAT_TO_USHORT(sdc.us[3], color[3]);
+
+ UNCLAMPED_FLOAT_TO_SHORT(sdc.s[0], color[0]);
+ UNCLAMPED_FLOAT_TO_SHORT(sdc.s[1], color[1]);
+ UNCLAMPED_FLOAT_TO_SHORT(sdc.s[2], color[2]);
+ UNCLAMPED_FLOAT_TO_SHORT(sdc.s[3], color[3]);
+
+ /* XXX: Fill in half floats */
+ /* XXX: Fill in signed bytes */
+
+ COPY_4V(sdc.f, color);
+
+ return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR,
+ &sdc, sizeof(sdc));
+ } else {
+ struct brw_sampler_default_color sdc;
+
+ COPY_4V(sdc.color, color);
+
+ return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR,
+ &sdc, sizeof(sdc));
+ }
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 76de7b7b6f..e9ef635bca 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -87,7 +87,6 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
{
struct gl_context *ctx = &brw->intel.ctx;
const struct gl_fragment_program *fp = brw->fragment_program;
- const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp;
struct intel_context *intel = &brw->intel;
memset(key, 0, sizeof(*key));
@@ -132,7 +131,6 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
/* _NEW_COLOR */
key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
- key->is_glsl = bfp->isGLSL;
/* If using the fragment shader backend, the program is always
* 8-wide.
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 76fc94df1f..1cd736a111 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -68,104 +68,54 @@ static GLuint translate_tex_target( GLenum target )
}
}
+static uint32_t brw_format_for_mesa_format[MESA_FORMAT_COUNT] =
+{
+ [MESA_FORMAT_L8] = BRW_SURFACEFORMAT_L8_UNORM,
+ [MESA_FORMAT_I8] = BRW_SURFACEFORMAT_I8_UNORM,
+ [MESA_FORMAT_A8] = BRW_SURFACEFORMAT_A8_UNORM,
+ [MESA_FORMAT_AL88] = BRW_SURFACEFORMAT_L8A8_UNORM,
+ [MESA_FORMAT_AL1616] = BRW_SURFACEFORMAT_L16A16_UNORM,
+ [MESA_FORMAT_R8] = BRW_SURFACEFORMAT_R8_UNORM,
+ [MESA_FORMAT_R16] = BRW_SURFACEFORMAT_R16_UNORM,
+ [MESA_FORMAT_RG88] = BRW_SURFACEFORMAT_R8G8_UNORM,
+ [MESA_FORMAT_RG1616] = BRW_SURFACEFORMAT_R16G16_UNORM,
+ [MESA_FORMAT_ARGB8888] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM,
+ [MESA_FORMAT_XRGB8888] = BRW_SURFACEFORMAT_B8G8R8X8_UNORM,
+ [MESA_FORMAT_RGB565] = BRW_SURFACEFORMAT_B5G6R5_UNORM,
+ [MESA_FORMAT_ARGB1555] = BRW_SURFACEFORMAT_B5G5R5A1_UNORM,
+ [MESA_FORMAT_ARGB4444] = BRW_SURFACEFORMAT_B4G4R4A4_UNORM,
+ [MESA_FORMAT_YCBCR_REV] = BRW_SURFACEFORMAT_YCRCB_NORMAL,
+ [MESA_FORMAT_YCBCR] = BRW_SURFACEFORMAT_YCRCB_SWAPUVY,
+ [MESA_FORMAT_RGB_FXT1] = BRW_SURFACEFORMAT_FXT1,
+ [MESA_FORMAT_RGBA_FXT1] = BRW_SURFACEFORMAT_FXT1,
+ [MESA_FORMAT_RGB_DXT1] = BRW_SURFACEFORMAT_DXT1_RGB,
+ [MESA_FORMAT_RGBA_DXT1] = BRW_SURFACEFORMAT_BC1_UNORM,
+ [MESA_FORMAT_RGBA_DXT3] = BRW_SURFACEFORMAT_BC2_UNORM,
+ [MESA_FORMAT_RGBA_DXT5] = BRW_SURFACEFORMAT_BC3_UNORM,
+ [MESA_FORMAT_SRGB_DXT1] = BRW_SURFACEFORMAT_BC1_UNORM_SRGB,
+ [MESA_FORMAT_SARGB8] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB,
+ [MESA_FORMAT_SLA8] = BRW_SURFACEFORMAT_L8A8_UNORM_SRGB,
+ [MESA_FORMAT_SL8] = BRW_SURFACEFORMAT_L8_UNORM_SRGB,
+ [MESA_FORMAT_DUDV8] = BRW_SURFACEFORMAT_R8G8_SNORM,
+ [MESA_FORMAT_SIGNED_RGBA8888_REV] = BRW_SURFACEFORMAT_R8G8B8A8_SNORM,
+};
static GLuint translate_tex_format( gl_format mesa_format,
GLenum internal_format,
GLenum depth_mode )
{
switch( mesa_format ) {
- case MESA_FORMAT_L8:
- return BRW_SURFACEFORMAT_L8_UNORM;
-
- case MESA_FORMAT_I8:
- return BRW_SURFACEFORMAT_I8_UNORM;
-
- case MESA_FORMAT_A8:
- return BRW_SURFACEFORMAT_A8_UNORM;
-
- case MESA_FORMAT_AL88:
- return BRW_SURFACEFORMAT_L8A8_UNORM;
-
- case MESA_FORMAT_AL1616:
- return BRW_SURFACEFORMAT_L16A16_UNORM;
-
- case MESA_FORMAT_R8:
- return BRW_SURFACEFORMAT_R8_UNORM;
-
- case MESA_FORMAT_R16:
- return BRW_SURFACEFORMAT_R16_UNORM;
-
- case MESA_FORMAT_RG88:
- return BRW_SURFACEFORMAT_R8G8_UNORM;
-
- case MESA_FORMAT_RG1616:
- return BRW_SURFACEFORMAT_R16G16_UNORM;
-
- case MESA_FORMAT_RGB888:
- assert(0); /* not supported for sampling */
- return BRW_SURFACEFORMAT_R8G8B8_UNORM;
-
- case MESA_FORMAT_ARGB8888:
- return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
-
- case MESA_FORMAT_XRGB8888:
- return BRW_SURFACEFORMAT_B8G8R8X8_UNORM;
-
- case MESA_FORMAT_RGBA8888_REV:
- _mesa_problem(NULL, "unexpected format in i965:translate_tex_format()");
- return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
-
- case MESA_FORMAT_RGB565:
- return BRW_SURFACEFORMAT_B5G6R5_UNORM;
-
- case MESA_FORMAT_ARGB1555:
- return BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
-
- case MESA_FORMAT_ARGB4444:
- return BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
-
- case MESA_FORMAT_YCBCR_REV:
- return BRW_SURFACEFORMAT_YCRCB_NORMAL;
-
- case MESA_FORMAT_YCBCR:
- return BRW_SURFACEFORMAT_YCRCB_SWAPUVY;
-
- case MESA_FORMAT_RGB_FXT1:
- case MESA_FORMAT_RGBA_FXT1:
- return BRW_SURFACEFORMAT_FXT1;
case MESA_FORMAT_Z16:
if (depth_mode == GL_INTENSITY)
return BRW_SURFACEFORMAT_I16_UNORM;
else if (depth_mode == GL_ALPHA)
return BRW_SURFACEFORMAT_A16_UNORM;
+ else if (depth_mode == GL_RED)
+ return BRW_SURFACEFORMAT_R16_UNORM;
else
return BRW_SURFACEFORMAT_L16_UNORM;
- case MESA_FORMAT_RGB_DXT1:
- return BRW_SURFACEFORMAT_DXT1_RGB;
-
- case MESA_FORMAT_RGBA_DXT1:
- return BRW_SURFACEFORMAT_BC1_UNORM;
-
- case MESA_FORMAT_RGBA_DXT3:
- return BRW_SURFACEFORMAT_BC2_UNORM;
-
- case MESA_FORMAT_RGBA_DXT5:
- return BRW_SURFACEFORMAT_BC3_UNORM;
-
- case MESA_FORMAT_SARGB8:
- return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB;
-
- case MESA_FORMAT_SLA8:
- return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB;
-
- case MESA_FORMAT_SL8:
- return BRW_SURFACEFORMAT_L8_UNORM_SRGB;
-
- case MESA_FORMAT_SRGB_DXT1:
- return BRW_SURFACEFORMAT_BC1_UNORM_SRGB;
-
case MESA_FORMAT_S8_Z24:
/* XXX: these different surface formats don't seem to
* make any difference for shadow sampler/compares.
@@ -174,18 +124,14 @@ static GLuint translate_tex_format( gl_format mesa_format,
return BRW_SURFACEFORMAT_I24X8_UNORM;
else if (depth_mode == GL_ALPHA)
return BRW_SURFACEFORMAT_A24X8_UNORM;
+ else if (depth_mode == GL_RED)
+ return BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS;
else
return BRW_SURFACEFORMAT_L24X8_UNORM;
- case MESA_FORMAT_DUDV8:
- return BRW_SURFACEFORMAT_R8G8_SNORM;
-
- case MESA_FORMAT_SIGNED_RGBA8888_REV:
- return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
-
default:
- assert(0);
- return 0;
+ assert(brw_format_for_mesa_format[mesa_format] != 0);
+ return brw_format_for_mesa_format[mesa_format];
}
}
@@ -274,6 +220,7 @@ brw_create_constant_surface(struct brw_context *brw,
drm_intel_bo **out_bo,
uint32_t *out_offset)
{
+ struct intel_context *intel = &brw->intel;
const GLint w = width - 1;
struct brw_surface_state surf;
void *map;
@@ -284,6 +231,9 @@ brw_create_constant_surface(struct brw_context *brw,
surf.ss0.surface_type = BRW_SURFACE_BUFFER;
surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+ if (intel->gen >= 6)
+ surf.ss0.render_cache_read_write = 1;
+
assert(bo);
surf.ss1.base_addr = bo->offset; /* reloc */
@@ -440,45 +390,19 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
key.surface_type = BRW_SURFACE_2D;
switch (irb->Base.Format) {
- /* XRGB and ARGB are treated the same here because the chips in this
- * family cannot render to XRGB targets. This means that we have to
- * mask writes to alpha (ala glColorMask) and reconfigure the alpha
- * blending hardware to use GL_ONE (or GL_ZERO) for cases where
- * GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is used.
- */
- case MESA_FORMAT_ARGB8888:
case MESA_FORMAT_XRGB8888:
+ /* XRGB is handled as ARGB because the chips in this family
+ * cannot render to XRGB targets. This means that we have to
+ * mask writes to alpha (ala glColorMask) and reconfigure the
+ * alpha blending hardware to use GL_ONE (or GL_ZERO) for
+ * cases where GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is
+ * used.
+ */
key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
break;
- case MESA_FORMAT_SARGB8:
- key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB;
- break;
- case MESA_FORMAT_RGB565:
- key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
- break;
- case MESA_FORMAT_ARGB1555:
- key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
- break;
- case MESA_FORMAT_ARGB4444:
- key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
- break;
- case MESA_FORMAT_A8:
- key.surface_format = BRW_SURFACEFORMAT_A8_UNORM;
- break;
- case MESA_FORMAT_R8:
- key.surface_format = BRW_SURFACEFORMAT_R8_UNORM;
- break;
- case MESA_FORMAT_R16:
- key.surface_format = BRW_SURFACEFORMAT_R16_UNORM;
- break;
- case MESA_FORMAT_RG88:
- key.surface_format = BRW_SURFACEFORMAT_R8G8_UNORM;
- break;
- case MESA_FORMAT_RG1616:
- key.surface_format = BRW_SURFACEFORMAT_R16G16_UNORM;
- break;
default:
- _mesa_problem(ctx, "Bad renderbuffer format: %d\n", irb->Base.Format);
+ key.surface_format = brw_format_for_mesa_format[irb->Base.Format];
+ assert(key.surface_format != 0);
}
key.tiling = region->tiling;
key.width = rb->Width;
diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c
index 800a255521..c2631a7b4d 100644
--- a/src/mesa/drivers/dri/i965/gen6_cc.c
+++ b/src/mesa/drivers/dri/i965/gen6_cc.c
@@ -35,6 +35,7 @@
struct gen6_blend_state_key {
GLboolean color_blend, alpha_enabled;
GLboolean dither;
+ GLboolean color_mask[BRW_MAX_DRAW_BUFFERS][4];
GLenum logic_op;
@@ -54,6 +55,9 @@ blend_state_populate_key(struct brw_context *brw,
memset(key, 0, sizeof(*key));
/* _NEW_COLOR */
+ memcpy(key->color_mask, ctx->Color.ColorMask, sizeof(key->color_mask));
+
+ /* _NEW_COLOR */
if (ctx->Color._LogicOpEnabled)
key->logic_op = ctx->Color.LogicOp;
else
@@ -87,54 +91,62 @@ static drm_intel_bo *
blend_state_create_from_key(struct brw_context *brw,
struct gen6_blend_state_key *key)
{
- struct gen6_blend_state blend;
+ struct gen6_blend_state blend[BRW_MAX_DRAW_BUFFERS];
drm_intel_bo *bo;
+ int b;
memset(&blend, 0, sizeof(blend));
- if (key->logic_op != GL_COPY) {
- blend.blend1.logic_op_enable = 1;
- blend.blend1.logic_op_func = intel_translate_logic_op(key->logic_op);
- } else if (key->color_blend) {
- GLenum eqRGB = key->blend_eq_rgb;
- GLenum eqA = key->blend_eq_a;
- GLenum srcRGB = key->blend_src_rgb;
- GLenum dstRGB = key->blend_dst_rgb;
- GLenum srcA = key->blend_src_a;
- GLenum dstA = key->blend_dst_a;
-
- if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
- srcRGB = dstRGB = GL_ONE;
- }
-
- if (eqA == GL_MIN || eqA == GL_MAX) {
- srcA = dstA = GL_ONE;
+ for (b = 0; b < BRW_MAX_DRAW_BUFFERS; b++) {
+ if (key->logic_op != GL_COPY) {
+ blend[b].blend1.logic_op_enable = 1;
+ blend[b].blend1.logic_op_func = intel_translate_logic_op(key->logic_op);
+ } else if (key->color_blend & (1 << b)) {
+ GLenum eqRGB = key->blend_eq_rgb;
+ GLenum eqA = key->blend_eq_a;
+ GLenum srcRGB = key->blend_src_rgb;
+ GLenum dstRGB = key->blend_dst_rgb;
+ GLenum srcA = key->blend_src_a;
+ GLenum dstA = key->blend_dst_a;
+
+ if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
+ srcRGB = dstRGB = GL_ONE;
+ }
+
+ if (eqA == GL_MIN || eqA == GL_MAX) {
+ srcA = dstA = GL_ONE;
+ }
+
+ blend[b].blend0.dest_blend_factor = brw_translate_blend_factor(dstRGB);
+ blend[b].blend0.source_blend_factor = brw_translate_blend_factor(srcRGB);
+ blend[b].blend0.blend_func = brw_translate_blend_equation(eqRGB);
+
+ blend[b].blend0.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
+ blend[b].blend0.ia_source_blend_factor = brw_translate_blend_factor(srcA);
+ blend[b].blend0.ia_blend_func = brw_translate_blend_equation(eqA);
+
+ blend[b].blend0.blend_enable = 1;
+ blend[b].blend0.ia_blend_enable = (srcA != srcRGB ||
+ dstA != dstRGB ||
+ eqA != eqRGB);
}
- blend.blend0.dest_blend_factor = brw_translate_blend_factor(dstRGB);
- blend.blend0.source_blend_factor = brw_translate_blend_factor(srcRGB);
- blend.blend0.blend_func = brw_translate_blend_equation(eqRGB);
-
- blend.blend0.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
- blend.blend0.ia_source_blend_factor = brw_translate_blend_factor(srcA);
- blend.blend0.ia_blend_func = brw_translate_blend_equation(eqA);
+ if (key->alpha_enabled) {
+ blend[b].blend1.alpha_test_enable = 1;
+ blend[b].blend1.alpha_test_func = intel_translate_compare_func(key->alpha_func);
- blend.blend0.blend_enable = 1;
- blend.blend0.ia_blend_enable = (srcA != srcRGB ||
- dstA != dstRGB ||
- eqA != eqRGB);
- }
-
- if (key->alpha_enabled) {
- blend.blend1.alpha_test_enable = 1;
- blend.blend1.alpha_test_func = intel_translate_compare_func(key->alpha_func);
+ }
- }
+ if (key->dither) {
+ blend[b].blend1.dither_enable = 1;
+ blend[b].blend1.y_dither_offset = 0;
+ blend[b].blend1.x_dither_offset = 0;
+ }
- if (key->dither) {
- blend.blend1.dither_enable = 1;
- blend.blend1.y_dither_offset = 0;
- blend.blend1.x_dither_offset = 0;
+ blend[b].blend1.write_disable_r = !key->color_mask[b][0];
+ blend[b].blend1.write_disable_g = !key->color_mask[b][1];
+ blend[b].blend1.write_disable_b = !key->color_mask[b][2];
+ blend[b].blend1.write_disable_a = !key->color_mask[b][3];
}
bo = brw_upload_cache(&brw->cache, BRW_BLEND_STATE,
@@ -172,7 +184,7 @@ const struct brw_tracked_state gen6_blend_state = {
};
struct gen6_color_calc_state_key {
- GLubyte blend_constant_color[4];
+ float blend_constant_color[4];
GLclampf alpha_ref;
GLubyte stencil_ref[2];
};
diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c
index c65b41e2b6..c7c4eb1f27 100644
--- a/src/mesa/drivers/dri/i965/gen6_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c
@@ -64,7 +64,9 @@ upload_clip_state(struct brw_context *brw)
userclip << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT |
depth_clamp |
provoking);
- OUT_BATCH(GEN6_CLIP_FORCE_ZERO_RTAINDEX);
+ OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
+ U_FIXED(225.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
+ GEN6_CLIP_FORCE_ZERO_RTAINDEX);
ADVANCE_BATCH();
}
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index 471067e8f0..45c148baed 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -33,9 +33,10 @@
#include "intel_batchbuffer.h"
static uint32_t
-get_attr_override(struct brw_context *brw, int fs_attr)
+get_attr_override(struct brw_context *brw, int fs_attr, int two_side_color)
{
int attr_index = 0, i, vs_attr;
+ int bfc = 0;
if (fs_attr <= FRAG_ATTRIB_TEX7)
vs_attr = fs_attr;
@@ -57,6 +58,30 @@ get_attr_override(struct brw_context *brw, int fs_attr)
attr_index++;
}
+ assert(attr_index < 32);
+
+ if (two_side_color) {
+ if ((brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL1)) &&
+ (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC1))) {
+ assert(brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0));
+ assert(brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0));
+ bfc = 2;
+ } else if ((brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)) &&
+ (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0)))
+ bfc = 1;
+ }
+
+ if (bfc && (fs_attr <= FRAG_ATTRIB_TEX7 && fs_attr > FRAG_ATTRIB_WPOS)) {
+ if (fs_attr == FRAG_ATTRIB_COL0)
+ attr_index |= (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT);
+ else if (fs_attr == FRAG_ATTRIB_COL1 && bfc == 2) {
+ attr_index++;
+ attr_index |= (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT);
+ } else {
+ attr_index += bfc;
+ }
+ }
+
return attr_index;
}
@@ -67,13 +92,15 @@ upload_sf_state(struct brw_context *brw)
struct gl_context *ctx = &intel->ctx;
/* CACHE_NEW_VS_PROG */
uint32_t num_inputs = brw_count_bits(brw->vs.prog_data->outputs_written);
+ /* BRW_NEW_FRAGMENT_PROGRAM */
uint32_t num_outputs = brw_count_bits(brw->fragment_program->Base.InputsRead);
- uint32_t dw1, dw2, dw3, dw4, dw16;
+ uint32_t dw1, dw2, dw3, dw4, dw16, dw17;
int i;
/* _NEW_BUFFER */
GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
int attr = 0;
int urb_start;
+ int two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
/* _NEW_TRANSFORM */
if (ctx->Transform.ClipPlanesEnabled)
@@ -91,6 +118,7 @@ upload_sf_state(struct brw_context *brw)
dw3 = 0;
dw4 = 0;
dw16 = 0;
+ dw17 = 0;
/* _NEW_POLYGON */
if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo)
@@ -99,6 +127,48 @@ upload_sf_state(struct brw_context *brw)
if (ctx->Polygon.OffsetFill)
dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
+ if (ctx->Polygon.OffsetLine)
+ dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
+
+ if (ctx->Polygon.OffsetPoint)
+ dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
+
+ switch (ctx->Polygon.FrontMode) {
+ case GL_FILL:
+ dw2 |= GEN6_SF_FRONT_SOLID;
+ break;
+
+ case GL_LINE:
+ dw2 |= GEN6_SF_FRONT_WIREFRAME;
+ break;
+
+ case GL_POINT:
+ dw2 |= GEN6_SF_FRONT_POINT;
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (ctx->Polygon.BackMode) {
+ case GL_FILL:
+ dw2 |= GEN6_SF_BACK_SOLID;
+ break;
+
+ case GL_LINE:
+ dw2 |= GEN6_SF_BACK_WIREFRAME;
+ break;
+
+ case GL_POINT:
+ dw2 |= GEN6_SF_BACK_POINT;
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
/* _NEW_SCISSOR */
if (ctx->Scissor.Enabled)
dw3 |= GEN6_SF_SCISSOR_ENABLE;
@@ -160,6 +230,12 @@ upload_sf_state(struct brw_context *brw)
}
}
+ /* flat shading */
+ if (ctx->Light.ShadeModel == GL_FLAT) {
+ dw17 |= ((brw->fragment_program->Base.InputsRead & (FRAG_BIT_COL0 | FRAG_BIT_COL1)) >>
+ ((brw->fragment_program->Base.InputsRead & FRAG_BIT_WPOS) ? 0 : 1));
+ }
+
BEGIN_BATCH(20);
OUT_BATCH(CMD_3D_SF_STATE << 16 | (20 - 2));
OUT_BATCH(dw1);
@@ -174,7 +250,7 @@ upload_sf_state(struct brw_context *brw)
for (; attr < 64; attr++) {
if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) {
- attr_overrides |= get_attr_override(brw, attr);
+ attr_overrides |= get_attr_override(brw, attr, two_side_color);
attr++;
break;
}
@@ -182,7 +258,7 @@ upload_sf_state(struct brw_context *brw)
for (; attr < 64; attr++) {
if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) {
- attr_overrides |= get_attr_override(brw, attr) << 16;
+ attr_overrides |= get_attr_override(brw, attr, two_side_color) << 16;
attr++;
break;
}
@@ -190,7 +266,7 @@ upload_sf_state(struct brw_context *brw)
OUT_BATCH(attr_overrides);
}
OUT_BATCH(dw16); /* point sprite texcoord bitmask */
- OUT_BATCH(0); /* constant interp bitmask */
+ OUT_BATCH(dw17); /* constant interp bitmask */
OUT_BATCH(0); /* wrapshortest enables 0-7 */
OUT_BATCH(0); /* wrapshortest enables 8-15 */
ADVANCE_BATCH();
@@ -205,7 +281,8 @@ const struct brw_tracked_state gen6_sf_state = {
_NEW_BUFFERS |
_NEW_POINT |
_NEW_TRANSFORM),
- .brw = BRW_NEW_CONTEXT,
+ .brw = (BRW_NEW_CONTEXT |
+ BRW_NEW_FRAGMENT_PROGRAM),
.cache = CACHE_NEW_VS_PROG
},
.emit = upload_sf_state,
diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c
index a34123478f..de97fd3783 100644
--- a/src/mesa/drivers/dri/i965/gen6_urb.c
+++ b/src/mesa/drivers/dri/i965/gen6_urb.c
@@ -72,7 +72,7 @@ const struct brw_tracked_state gen6_urb = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_CONTEXT,
- .cache = CACHE_NEW_VS_PROG,
+ .cache = (CACHE_NEW_VS_PROG | CACHE_NEW_GS_PROG),
},
.prepare = prepare_urb,
.emit = upload_urb,
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index e94d0c0ddb..ed132bdbd9 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -54,7 +54,7 @@ upload_vs_state(struct brw_context *brw)
OUT_BATCH(0);
ADVANCE_BATCH();
} else {
- int params_uploaded = 0;
+ int params_uploaded = 0, param_regs;
float *param;
if (brw->vertex_program->IsNVProgram)
@@ -88,20 +88,11 @@ upload_vs_state(struct brw_context *brw)
params_uploaded++;
}
- if (vp->use_const_buffer) {
- for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
- if (brw->vs.constant_map[i] != -1) {
- memcpy(param + brw->vs.constant_map[i] * 4,
- vp->program.Base.Parameters->ParameterValues[i],
- 4 * sizeof(float));
- params_uploaded++;
- }
- }
- } else {
- for (i = 0; i < nr_params; i++) {
- memcpy(param, vp->program.Base.Parameters->ParameterValues[i],
+ for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+ if (brw->vs.constant_map[i] != -1) {
+ memcpy(param + brw->vs.constant_map[i] * 4,
+ vp->program.Base.Parameters->ParameterValues[i],
4 * sizeof(float));
- param += 4;
params_uploaded++;
}
}
@@ -117,13 +108,16 @@ upload_vs_state(struct brw_context *brw)
drm_intel_gem_bo_unmap_gtt(constant_bo);
+ param_regs = (params_uploaded + 1) / 2;
+ assert(param_regs <= 32);
+
BEGIN_BATCH(5);
OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 |
GEN6_CONSTANT_BUFFER_0_ENABLE |
(5 - 2));
OUT_RELOC(constant_bo,
I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
- ALIGN(params_uploaded, 2) / 2 - 1);
+ param_regs - 1);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
@@ -136,6 +130,7 @@ upload_vs_state(struct brw_context *brw)
OUT_BATCH(CMD_3D_VS_STATE << 16 | (6 - 2));
OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
+ GEN6_VS_FLOATING_POINT_MODE_ALT |
(brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
OUT_BATCH(0); /* scratch space base offset */
OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index ea5418bacf..2ae0c093eb 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -66,6 +66,21 @@ prepare_wm_constants(struct brw_context *brw)
constants[i] = convert_param(brw->wm.prog_data->param_convert[i],
*brw->wm.prog_data->param[i]);
}
+
+ if (0) {
+ printf("WM constants:\n");
+ for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
+ if ((i & 7) == 0)
+ printf("g%d: ", brw->wm.prog_data->first_curbe_grf + i / 8);
+ printf("%8f ", constants[i]);
+ if ((i & 7) == 7)
+ printf("\n");
+ }
+ if ((i & 7) != 0)
+ printf("\n");
+ printf("\n");
+ }
+
drm_intel_gem_bo_unmap_gtt(brw->wm.push_const_bo);
}
}
@@ -88,6 +103,7 @@ upload_wm_state(struct brw_context *brw)
brw_fragment_program_const(brw->fragment_program);
uint32_t dw2, dw4, dw5, dw6;
+ /* CACHE_NEW_WM_PROG */
if (brw->wm.prog_data->nr_params == 0) {
/* Disable the push constant buffers. */
BEGIN_BATCH(5);
@@ -104,7 +120,8 @@ upload_wm_state(struct brw_context *brw)
(5 - 2));
OUT_RELOC(brw->wm.push_const_bo,
I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
- ALIGN(brw->wm.prog_data->nr_params, 8) / 8 - 1);
+ ALIGN(brw->wm.prog_data->nr_params,
+ brw->wm.prog_data->dispatch_width) / 8 - 1);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
@@ -116,6 +133,9 @@ upload_wm_state(struct brw_context *brw)
dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0;
dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5;
+ /* OpenGL non-ieee floating point mode */
+ dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT;
+
/* BRW_NEW_NR_WM_SURFACES */
dw2 |= brw->wm.nr_surfaces << GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT;
@@ -126,8 +146,8 @@ upload_wm_state(struct brw_context *brw)
dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT;
- /* BRW_NEW_FRAGMENT_PROGRAM */
- if (fp->isGLSL)
+ /* CACHE_NEW_WM_PROG */
+ if (brw->wm.prog_data->dispatch_width == 8)
dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
else
dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
@@ -176,13 +196,14 @@ upload_wm_state(struct brw_context *brw)
const struct brw_tracked_state gen6_wm_state = {
.dirty = {
.mesa = (_NEW_LINE | _NEW_POLYGONSTIPPLE | _NEW_COLOR | _NEW_BUFFERS |
- _NEW_PROGRAM_CONSTANTS),
+ _NEW_PROGRAM_CONSTANTS | _NEW_POLYGON),
.brw = (BRW_NEW_CURBE_OFFSETS |
BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_NR_WM_SURFACES |
BRW_NEW_URB_FENCE |
BRW_NEW_BATCH),
- .cache = CACHE_NEW_SAMPLER
+ .cache = (CACHE_NEW_SAMPLER |
+ CACHE_NEW_WM_PROG)
},
.emit = upload_wm_state,
};