summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r--src/mesa/drivers/dri/i965/Makefile1
-rw-r--r--src/mesa/drivers/dri/i965/brw_cc.c128
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c16
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h61
-rw-r--r--src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_curbe.c20
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h95
-rw-r--r--src/mesa/drivers/dri/i965/brw_disasm.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c53
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw_upload.c519
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c56
-rw-r--r--src/mesa/drivers/dri/i965/brw_fallback.c43
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp567
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h70
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp13
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp488
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp14
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.c5
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c94
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_queryobj.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h13
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_batch.c82
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_cache.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_dump.c49
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c105
-rw-r--r--src/mesa/drivers/dri/i965/brw_structs.h37
-rw-r--r--src/mesa/drivers/dri/i965/brw_urb.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_util.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_util.h9
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c103
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_state.c9
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_surface_state.c26
-rw-r--r--src/mesa/drivers/dri/i965/brw_vtbl.c24
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c11
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c101
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_sampler_state.c44
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c5
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c508
-rw-r--r--src/mesa/drivers/dri/i965/gen6_cc.c26
-rw-r--r--src/mesa/drivers/dri/i965/gen6_clip_state.c9
-rw-r--r--src/mesa/drivers/dri/i965/gen6_gs_state.c6
-rw-r--r--src/mesa/drivers/dri/i965/gen6_sampler_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_scissor_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_sf_state.c38
-rw-r--r--src/mesa/drivers/dri/i965/gen6_urb.c29
-rw-r--r--src/mesa/drivers/dri/i965/gen6_viewport_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_vs_state.c10
-rw-r--r--src/mesa/drivers/dri/i965/gen6_wm_state.c9
54 files changed, 2150 insertions, 1403 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index 7c3ac0c14e..b05ba35d65 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -108,6 +108,7 @@ CXX_SOURCES = \
brw_fs.cpp \
brw_fs_channel_expressions.cpp \
brw_fs_reg_allocate.cpp \
+ brw_fs_schedule_instructions.cpp \
brw_fs_vector_splitting.cpp
ASM_SOURCES =
diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c
index d3a1233aac..412d82ab3c 100644
--- a/src/mesa/drivers/dri/i965/brw_cc.c
+++ b/src/mesa/drivers/dri/i965/brw_cc.c
@@ -35,6 +35,7 @@
#include "brw_defines.h"
#include "brw_util.h"
#include "main/macros.h"
+#include "intel_batchbuffer.h"
void
brw_update_cc_vp(struct brw_context *brw)
@@ -92,61 +93,61 @@ static void upload_cc_unit(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &brw->intel.ctx;
- struct brw_cc_unit_state cc;
- void *map;
+ struct brw_cc_unit_state *cc;
- memset(&cc, 0, sizeof(cc));
+ cc = brw_state_batch(brw, sizeof(*cc), 64, &brw->cc.state_offset);
+ memset(cc, 0, sizeof(*cc));
/* _NEW_STENCIL */
if (ctx->Stencil._Enabled) {
const unsigned back = ctx->Stencil._BackFace;
- cc.cc0.stencil_enable = 1;
- cc.cc0.stencil_func =
+ cc->cc0.stencil_enable = 1;
+ cc->cc0.stencil_func =
intel_translate_compare_func(ctx->Stencil.Function[0]);
- cc.cc0.stencil_fail_op =
+ cc->cc0.stencil_fail_op =
intel_translate_stencil_op(ctx->Stencil.FailFunc[0]);
- cc.cc0.stencil_pass_depth_fail_op =
+ cc->cc0.stencil_pass_depth_fail_op =
intel_translate_stencil_op(ctx->Stencil.ZFailFunc[0]);
- cc.cc0.stencil_pass_depth_pass_op =
+ cc->cc0.stencil_pass_depth_pass_op =
intel_translate_stencil_op(ctx->Stencil.ZPassFunc[0]);
- cc.cc1.stencil_ref = ctx->Stencil.Ref[0];
- cc.cc1.stencil_write_mask = ctx->Stencil.WriteMask[0];
- cc.cc1.stencil_test_mask = ctx->Stencil.ValueMask[0];
+ cc->cc1.stencil_ref = ctx->Stencil.Ref[0];
+ cc->cc1.stencil_write_mask = ctx->Stencil.WriteMask[0];
+ cc->cc1.stencil_test_mask = ctx->Stencil.ValueMask[0];
if (ctx->Stencil._TestTwoSide) {
- cc.cc0.bf_stencil_enable = 1;
- cc.cc0.bf_stencil_func =
+ cc->cc0.bf_stencil_enable = 1;
+ cc->cc0.bf_stencil_func =
intel_translate_compare_func(ctx->Stencil.Function[back]);
- cc.cc0.bf_stencil_fail_op =
+ cc->cc0.bf_stencil_fail_op =
intel_translate_stencil_op(ctx->Stencil.FailFunc[back]);
- cc.cc0.bf_stencil_pass_depth_fail_op =
+ cc->cc0.bf_stencil_pass_depth_fail_op =
intel_translate_stencil_op(ctx->Stencil.ZFailFunc[back]);
- cc.cc0.bf_stencil_pass_depth_pass_op =
+ cc->cc0.bf_stencil_pass_depth_pass_op =
intel_translate_stencil_op(ctx->Stencil.ZPassFunc[back]);
- cc.cc1.bf_stencil_ref = ctx->Stencil.Ref[back];
- cc.cc2.bf_stencil_write_mask = ctx->Stencil.WriteMask[back];
- cc.cc2.bf_stencil_test_mask = ctx->Stencil.ValueMask[back];
+ cc->cc1.bf_stencil_ref = ctx->Stencil.Ref[back];
+ cc->cc2.bf_stencil_write_mask = ctx->Stencil.WriteMask[back];
+ cc->cc2.bf_stencil_test_mask = ctx->Stencil.ValueMask[back];
}
/* Not really sure about this:
*/
if (ctx->Stencil.WriteMask[0] ||
(ctx->Stencil._TestTwoSide && ctx->Stencil.WriteMask[back]))
- cc.cc0.stencil_write_enable = 1;
+ cc->cc0.stencil_write_enable = 1;
}
/* _NEW_COLOR */
if (ctx->Color._LogicOpEnabled && ctx->Color.LogicOp != GL_COPY) {
- cc.cc2.logicop_enable = 1;
- cc.cc5.logicop_func = intel_translate_logic_op(ctx->Color.LogicOp);
+ cc->cc2.logicop_enable = 1;
+ cc->cc5.logicop_func = intel_translate_logic_op(ctx->Color.LogicOp);
} else if (ctx->Color.BlendEnabled) {
- GLenum eqRGB = ctx->Color.BlendEquationRGB;
- GLenum eqA = ctx->Color.BlendEquationA;
- GLenum srcRGB = ctx->Color.BlendSrcRGB;
- GLenum dstRGB = ctx->Color.BlendDstRGB;
- GLenum srcA = ctx->Color.BlendSrcA;
- GLenum dstA = ctx->Color.BlendDstA;
+ GLenum eqRGB = ctx->Color.Blend[0].EquationRGB;
+ GLenum eqA = ctx->Color.Blend[0].EquationA;
+ GLenum srcRGB = ctx->Color.Blend[0].SrcRGB;
+ GLenum dstRGB = ctx->Color.Blend[0].DstRGB;
+ GLenum srcA = ctx->Color.Blend[0].SrcA;
+ GLenum dstA = ctx->Color.Blend[0].DstA;
/* If the renderbuffer is XRGB, we have to frob the blend function to
* force the destination alpha to 1.0. This means replacing GL_DST_ALPHA
@@ -167,58 +168,55 @@ static void upload_cc_unit(struct brw_context *brw)
srcA = dstA = GL_ONE;
}
- cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB);
- cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB);
- cc.cc6.blend_function = brw_translate_blend_equation(eqRGB);
+ cc->cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB);
+ cc->cc6.src_blend_factor = brw_translate_blend_factor(srcRGB);
+ cc->cc6.blend_function = brw_translate_blend_equation(eqRGB);
- cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
- cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA);
- cc.cc5.ia_blend_function = brw_translate_blend_equation(eqA);
+ cc->cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
+ cc->cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA);
+ cc->cc5.ia_blend_function = brw_translate_blend_equation(eqA);
- cc.cc3.blend_enable = 1;
- cc.cc3.ia_blend_enable = (srcA != srcRGB ||
+ cc->cc3.blend_enable = 1;
+ cc->cc3.ia_blend_enable = (srcA != srcRGB ||
dstA != dstRGB ||
eqA != eqRGB);
}
if (ctx->Color.AlphaEnabled) {
- cc.cc3.alpha_test = 1;
- cc.cc3.alpha_test_func =
+ cc->cc3.alpha_test = 1;
+ cc->cc3.alpha_test_func =
intel_translate_compare_func(ctx->Color.AlphaFunc);
- cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
+ cc->cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
- UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], ctx->Color.AlphaRef);
+ UNCLAMPED_FLOAT_TO_UBYTE(cc->cc7.alpha_ref.ub[0], ctx->Color.AlphaRef);
}
if (ctx->Color.DitherFlag) {
- cc.cc5.dither_enable = 1;
- cc.cc6.y_dither_offset = 0;
- cc.cc6.x_dither_offset = 0;
+ cc->cc5.dither_enable = 1;
+ cc->cc6.y_dither_offset = 0;
+ cc->cc6.x_dither_offset = 0;
}
/* _NEW_DEPTH */
if (ctx->Depth.Test) {
- cc.cc2.depth_test = 1;
- cc.cc2.depth_test_function =
+ cc->cc2.depth_test = 1;
+ cc->cc2.depth_test_function =
intel_translate_compare_func(ctx->Depth.Func);
- cc.cc2.depth_write_enable = ctx->Depth.Mask;
+ cc->cc2.depth_write_enable = ctx->Depth.Mask;
}
if (intel->stats_wm || unlikely(INTEL_DEBUG & DEBUG_STATS))
- cc.cc5.statistics_enable = 1;
+ cc->cc5.statistics_enable = 1;
/* CACHE_NEW_CC_VP */
- cc.cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */
+ cc->cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */
- map = brw_state_batch(brw, sizeof(cc), 64,
- &brw->cc.state_bo, &brw->cc.state_offset);
- memcpy(map, &cc, sizeof(cc));
brw->state.dirty.cache |= CACHE_NEW_CC_UNIT;
/* Emit CC viewport relocation */
- drm_intel_bo_emit_reloc(brw->cc.state_bo, (brw->cc.state_offset +
- offsetof(struct brw_cc_unit_state,
- cc4)),
+ drm_intel_bo_emit_reloc(brw->intel.batch.bo,
+ (brw->cc.state_offset +
+ offsetof(struct brw_cc_unit_state, cc4)),
brw->cc.vp_bo, 0,
I915_GEM_DOMAIN_INSTRUCTION, 0);
}
@@ -235,18 +233,16 @@ const struct brw_tracked_state brw_cc_unit = {
static void upload_blend_constant_color(struct brw_context *brw)
{
- struct gl_context *ctx = &brw->intel.ctx;
- struct brw_blend_constant_color bcc;
-
- memset(&bcc, 0, sizeof(bcc));
- bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR;
- bcc.header.length = sizeof(bcc)/4-2;
- bcc.blend_constant_color[0] = ctx->Color.BlendColor[0];
- bcc.blend_constant_color[1] = ctx->Color.BlendColor[1];
- bcc.blend_constant_color[2] = ctx->Color.BlendColor[2];
- bcc.blend_constant_color[3] = ctx->Color.BlendColor[3];
-
- BRW_CACHED_BATCH_STRUCT(brw, &bcc);
+ struct intel_context *intel = &brw->intel;
+ struct gl_context *ctx = &intel->ctx;
+
+ BEGIN_BATCH(5);
+ OUT_BATCH(_3DSTATE_BLEND_CONSTANT_COLOR << 16 | (5-2));
+ OUT_BATCH_F(ctx->Color.BlendColor[0]);
+ OUT_BATCH_F(ctx->Color.BlendColor[1]);
+ OUT_BATCH_F(ctx->Color.BlendColor[2]);
+ OUT_BATCH_F(ctx->Color.BlendColor[3]);
+ CACHED_BATCH();
}
const struct brw_tracked_state brw_blend_constant_color = {
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 28549f2574..9483ec69d9 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -151,6 +151,22 @@ GLboolean brwCreateContext( int api,
MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
ctx->Const.FragmentProgram.MaxEnvParams);
+ /* Fragment shaders use real, 32-bit twos-complement integers for all
+ * integer types.
+ */
+ ctx->Const.FragmentProgram.LowInt.RangeMin = 31;
+ ctx->Const.FragmentProgram.LowInt.RangeMax = 30;
+ ctx->Const.FragmentProgram.LowInt.Precision = 0;
+ ctx->Const.FragmentProgram.HighInt = ctx->Const.FragmentProgram.MediumInt
+ = ctx->Const.FragmentProgram.LowInt;
+
+ /* Gen6 converts quads to polygon in beginning of 3D pipeline,
+ but we're not sure how it's actually done for vertex order,
+ that affect provoking vertex decision. Always use last vertex
+ convention for quad primitive which works as expected for now. */
+ if (intel->gen == 6)
+ ctx->Const.QuadsFollowProvokingVertexConvention = GL_FALSE;
+
if (intel->is_g4x || intel->gen >= 5) {
brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_GM45;
brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 7069724466..7b0551a92b 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -145,7 +145,7 @@ struct brw_context;
#define BRW_NEW_NR_VS_SURFACES 0x80000
#define BRW_NEW_INDEX_BUFFER 0x100000
#define BRW_NEW_VS_CONSTBUF 0x200000
-#define BRW_NEW_WM_CONSTBUF 0x200000
+#define BRW_NEW_WM_CONSTBUF 0x400000
struct brw_state_flags {
/** State update flags signalled by mesa internals */
@@ -408,21 +408,24 @@ struct brw_cached_batch_item {
*/
#define ATTRIB_BIT_DWORDS ((VERT_ATTRIB_MAX+31)/32)
+struct brw_vertex_buffer {
+ /** Buffer object containing the uploaded vertex data */
+ drm_intel_bo *bo;
+ uint32_t offset;
+ /** Byte stride between elements in the uploaded array */
+ GLuint stride;
+};
struct brw_vertex_element {
const struct gl_client_array *glarray;
+ int buffer;
+
/** The corresponding Mesa vertex attribute */
gl_vert_attrib attrib;
/** Size of a complete element */
GLuint element_size;
- /** Number of uploaded elements for this input. */
- GLuint count;
- /** Byte stride between elements in the uploaded array */
- GLuint stride;
/** Offset of the first element within the buffer object */
unsigned int offset;
- /** Buffer object containing the uploaded vertex data */
- drm_intel_bo *bo;
};
@@ -457,12 +460,10 @@ struct brw_context
GLboolean has_negative_rhw_bug;
GLboolean has_aa_line_parameters;
GLboolean has_pln;
-;
+
struct {
struct brw_state_flags dirty;
- GLuint nr_color_regions;
- struct intel_region *color_regions[MAX_DRAW_BUFFERS];
struct intel_region *depth_region;
/**
@@ -485,23 +486,27 @@ struct brw_context
struct {
struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
+ struct brw_vertex_buffer buffers[VERT_ATTRIB_MAX];
+ struct {
+ uint32_t handle;
+ uint32_t offset;
+ uint32_t stride;
+ } current_buffers[VERT_ATTRIB_MAX];
struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
GLuint nr_enabled;
-
-#define BRW_NR_UPLOAD_BUFS 17
-#define BRW_UPLOAD_INIT_SIZE (128*1024)
-
- struct {
- drm_intel_bo *bo;
- GLuint offset;
- } upload;
+ GLuint nr_buffers, nr_current_buffers;
/* Summary of size and varying of active arrays, so we can check
* for changes to this state:
*/
struct brw_vertex_info info;
unsigned int min_index, max_index;
+
+ /* Offset from start of vertex buffer so we can avoid redefining
+ * the same VB packed over and over again.
+ */
+ unsigned int start_vertex_bias;
} vb;
struct {
@@ -512,10 +517,10 @@ struct brw_context
*/
const struct _mesa_index_buffer *ib;
- /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */
+ /* Updates are signaled by BRW_NEW_INDEX_BUFFER. */
drm_intel_bo *bo;
- unsigned int offset;
- unsigned int size;
+ GLuint type;
+
/* Offset to index buffer index to use in CMD_3D_PRIM so that we can
* avoid re-uploading the IB packet over and over if we're actually
* referencing the same index buffer.
@@ -528,11 +533,6 @@ struct brw_context
const struct gl_vertex_program *vertex_program;
const struct gl_fragment_program *fragment_program;
-
- /* For populating the gtt:
- */
- GLuint next_free_page;
-
/* hw-dependent 3DSTATE_VF_STATISTICS opcode */
uint32_t CMD_VF_STATISTICS;
/* hw-dependent 3DSTATE_PIPELINE_SELECT opcode */
@@ -612,9 +612,7 @@ struct brw_context
drm_intel_bo *const_bo;
/** Binding table of pointers to surf_bo entries */
- drm_intel_bo *bind_bo;
uint32_t bind_bo_offset;
- drm_intel_bo *surf_bo[BRW_VS_MAX_SURF];
uint32_t surf_offset[BRW_VS_MAX_SURF];
GLuint nr_surfaces;
} vs;
@@ -666,9 +664,7 @@ struct brw_context
drm_intel_bo *sampler_bo;
/** Binding table of pointers to surf_bo entries */
- drm_intel_bo *bind_bo;
uint32_t bind_bo_offset;
- drm_intel_bo *surf_bo[BRW_WM_MAX_SURF];
uint32_t surf_offset[BRW_WM_MAX_SURF];
drm_intel_bo *prog_bo;
@@ -693,7 +689,6 @@ struct brw_context
drm_intel_bo *depth_stencil_state_bo;
drm_intel_bo *color_calc_state_bo;
- drm_intel_bo *state_bo;
uint32_t state_offset;
} cc;
@@ -706,6 +701,9 @@ struct brw_context
/* Used to give every program string a unique id
*/
GLuint program_id;
+
+ int num_prepare_atoms, num_emit_atoms;
+ struct brw_tracked_state prepare_atoms[64], emit_atoms[64];
};
@@ -841,4 +839,3 @@ float convert_param(enum param_conversion conversion, float param)
GLboolean brw_do_cubemap_normalize(struct exec_list *instructions);
#endif
-
diff --git a/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp b/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp
index 35bea68121..8574169e47 100644
--- a/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp
@@ -51,7 +51,7 @@ brw_cubemap_normalize_visitor::visit_leave(ir_texture *ir)
if (ir->sampler->type->sampler_dimensionality != GLSL_SAMPLER_DIM_CUBE)
return visit_continue;
- void *mem_ctx = talloc_parent(ir);
+ void *mem_ctx = ralloc_parent(ir);
ir_variable *var = new(mem_ctx) ir_variable(ir->coordinate->type,
"coordinate", ir_var_auto);
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index 877b22fec1..ae11c487a2 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -146,22 +146,24 @@ const struct brw_tracked_state brw_curbe_offsets = {
*/
void brw_upload_cs_urb_state(struct brw_context *brw)
{
- struct brw_cs_urb_state cs_urb;
- memset(&cs_urb, 0, sizeof(cs_urb));
+ struct intel_context *intel = &brw->intel;
+ BEGIN_BATCH(2);
/* It appears that this is the state packet for the CS unit, ie. the
* urb entries detailed here are housed in the CS range from the
* URB_FENCE command.
*/
- cs_urb.header.opcode = CMD_CS_URB_STATE;
- cs_urb.header.length = sizeof(cs_urb)/4 - 2;
+ OUT_BATCH(CMD_CS_URB_STATE << 16 | (2-2));
/* BRW_NEW_URB_FENCE */
- cs_urb.bits0.nr_urb_entries = brw->urb.nr_cs_entries;
- cs_urb.bits0.urb_entry_size = brw->urb.csize - 1;
-
- assert(brw->urb.nr_cs_entries);
- BRW_CACHED_BATCH_STRUCT(brw, &cs_urb);
+ if (brw->urb.csize == 0) {
+ OUT_BATCH(0);
+ } else {
+ /* BRW_NEW_URB_FENCE */
+ assert(brw->urb.nr_cs_entries);
+ OUT_BATCH((brw->urb.csize - 1) << 4 | brw->urb.nr_cs_entries);
+ }
+ CACHED_BATCH();
}
static GLfloat fixed_plane[6][4] = {
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 5c5b8259e1..5496b4fdd3 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -35,28 +35,6 @@
/* 3D state:
*/
-#define _3DOP_3DSTATE_PIPELINED 0x0
-#define _3DOP_3DSTATE_NONPIPELINED 0x1
-#define _3DOP_3DCONTROL 0x2
-#define _3DOP_3DPRIMITIVE 0x3
-
-#define _3DSTATE_PIPELINED_POINTERS 0x00
-#define _3DSTATE_BINDING_TABLE_POINTERS 0x01
-#define _3DSTATE_VERTEX_BUFFERS 0x08
-#define _3DSTATE_VERTEX_ELEMENTS 0x09
-#define _3DSTATE_INDEX_BUFFER 0x0A
-#define _3DSTATE_VF_STATISTICS 0x0B
-#define _3DSTATE_DRAWING_RECTANGLE 0x00
-#define _3DSTATE_CONSTANT_COLOR 0x01
-#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02
-#define _3DSTATE_CHROMA_KEY 0x04
-#define _3DSTATE_DEPTH_BUFFER 0x05
-#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06
-#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07
-#define _3DSTATE_LINE_STIPPLE 0x08
-#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09
-#define _3DCONTROL 0x00
-
#define PIPE_CONTROL_NOWRITE 0x00
#define PIPE_CONTROL_WRITEIMMEDIATE 0x01
#define PIPE_CONTROL_WRITEDEPTH 0x02
@@ -389,6 +367,7 @@
#define BRW_SURFACEFORMAT_R8_SSCALED 0x149
#define BRW_SURFACEFORMAT_R8_USCALED 0x14A
#define BRW_SURFACEFORMAT_L8_UNORM_SRGB 0x14C
+#define BRW_SURFACEFORMAT_DXT1_RGB_SRGB 0x180
#define BRW_SURFACEFORMAT_R1_UINT 0x181
#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182
#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
@@ -700,6 +679,8 @@
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1
#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2
#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2
@@ -838,13 +819,13 @@
#define CMD_PIPELINE_SELECT_965 0x6104
#define CMD_PIPELINE_SELECT_GM45 0x6904
-#define CMD_PIPELINED_STATE_POINTERS 0x7800
-#define CMD_BINDING_TABLE_PTRS 0x7801
+#define _3DSTATE_PIPELINED_POINTERS 0x7800
+#define _3DSTATE_BINDING_TABLE_POINTERS 0x7801
# define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8)
# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9)
# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12)
-#define CMD_3D_SAMPLER_STATE_POINTERS 0x7802 /* SNB+ */
+#define _3DSTATE_SAMPLER_STATE_POINTERS 0x7802 /* GEN6+ */
# define PS_SAMPLER_STATE_CHANGE (1 << 12)
# define GS_SAMPLER_STATE_CHANGE (1 << 9)
# define VS_SAMPLER_STATE_CHANGE (1 << 8)
@@ -885,27 +866,29 @@
#define CMD_INDEX_BUFFER 0x780a
#define CMD_VF_STATISTICS_965 0x780b
#define CMD_VF_STATISTICS_GM45 0x680b
-#define CMD_3D_CC_STATE_POINTERS 0x780e /* GEN6+ */
+#define _3DSTATE_CC_STATE_POINTERS 0x780e /* GEN6+ */
-#define CMD_URB 0x7805 /* GEN6+ */
+#define _3DSTATE_URB 0x7805 /* GEN6+ */
# define GEN6_URB_VS_SIZE_SHIFT 16
# define GEN6_URB_VS_ENTRIES_SHIFT 0
# define GEN6_URB_GS_ENTRIES_SHIFT 8
# define GEN6_URB_GS_SIZE_SHIFT 0
-#define CMD_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */
+#define _3DSTATE_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */
# define GEN6_CC_VIEWPORT_MODIFY (1 << 12)
# define GEN6_SF_VIEWPORT_MODIFY (1 << 11)
# define GEN6_CLIP_VIEWPORT_MODIFY (1 << 10)
-#define CMD_3D_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */
+#define _3DSTATE_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */
-#define CMD_3D_VS_STATE 0x7810 /* GEN6+ */
+#define _3DSTATE_VS 0x7810 /* GEN6+ */
/* DW2 */
# define GEN6_VS_SPF_MODE (1 << 31)
# define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30)
# define GEN6_VS_SAMPLER_COUNT_SHIFT 27
# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+# define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
+# define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16)
/* DW4 */
# define GEN6_VS_DISPATCH_START_GRF_SHIFT 20
# define GEN6_VS_URB_READ_LENGTH_SHIFT 11
@@ -916,7 +899,7 @@
# define GEN6_VS_CACHE_DISABLE (1 << 1)
# define GEN6_VS_ENABLE (1 << 0)
-#define CMD_3D_GS_STATE 0x7811 /* GEN6+ */
+#define _3DSTATE_GS 0x7811 /* GEN6+ */
/* DW2 */
# define GEN6_GS_SPF_MODE (1 << 31)
# define GEN6_GS_VECTOR_MASK_ENABLE (1 << 30)
@@ -934,7 +917,7 @@
/* DW6 */
# define GEN6_GS_ENABLE (1 << 15)
-#define CMD_3D_CLIP_STATE 0x7812 /* GEN6+ */
+#define _3DSTATE_CLIP 0x7812 /* GEN6+ */
/* DW1 */
# define GEN6_CLIP_STATISTICS_ENABLE (1 << 10)
/**
@@ -964,7 +947,7 @@
# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT 6
# define GEN6_CLIP_FORCE_ZERO_RTAINDEX (1 << 5)
-#define CMD_3D_SF_STATE 0x7813 /* GEN6+ */
+#define _3DSTATE_SF 0x7813 /* GEN6+ */
/* DW1 */
# define GEN6_SF_NUM_OUTPUTS_SHIFT 22
# define GEN6_SF_SWIZZLE_ENABLE (1 << 21)
@@ -1029,18 +1012,27 @@
# define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9
# define ATTRIBUTE_0_SWIZZLE_SHIFT 6
# define ATTRIBUTE_0_SOURCE_SHIFT 0
+
+# define ATTRIBUTE_SWIZZLE_INPUTATTR 0
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING 1
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_W 2
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W 3
+# define ATTRIBUTE_SWIZZLE_SHIFT 6
+
/* DW16: Point sprite texture coordinate enables */
/* DW17: Constant interpolation enables */
/* DW18: attr 0-7 wrap shortest enables */
/* DW19: attr 8-16 wrap shortest enables */
-#define CMD_3D_WM_STATE 0x7814 /* GEN6+ */
+#define _3DSTATE_WM 0x7814 /* GEN6+ */
/* DW1: kernel pointer */
/* DW2 */
# define GEN6_WM_SPF_MODE (1 << 31)
# define GEN6_WM_VECTOR_MASK_ENABLE (1 << 30)
# define GEN6_WM_SAMPLER_COUNT_SHIFT 27
# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+# define GEN6_WM_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
+# define GEN6_WM_FLOATING_POINT_MODE_ALT (1 << 16)
/* DW3: scratch space */
/* DW4 */
# define GEN6_WM_STATISTICS_ENABLE (1 << 31)
@@ -1095,34 +1087,34 @@
/* DW7: kernel 1 pointer */
/* DW8: kernel 2 pointer */
-#define CMD_3D_CONSTANT_VS_STATE 0x7815 /* GEN6+ */
-#define CMD_3D_CONSTANT_GS_STATE 0x7816 /* GEN6+ */
-#define CMD_3D_CONSTANT_PS_STATE 0x7817 /* GEN6+ */
+#define _3DSTATE_CONSTANT_VS 0x7815 /* GEN6+ */
+#define _3DSTATE_CONSTANT_GS 0x7816 /* GEN6+ */
+#define _3DSTATE_CONSTANT_PS 0x7817 /* GEN6+ */
# define GEN6_CONSTANT_BUFFER_3_ENABLE (1 << 15)
# define GEN6_CONSTANT_BUFFER_2_ENABLE (1 << 14)
# define GEN6_CONSTANT_BUFFER_1_ENABLE (1 << 13)
# define GEN6_CONSTANT_BUFFER_0_ENABLE (1 << 12)
-#define CMD_3D_SAMPLE_MASK 0x7818 /* GEN6+ */
+#define _3DSTATE_SAMPLE_MASK 0x7818 /* GEN6+ */
-#define CMD_DRAW_RECT 0x7900
-#define CMD_BLEND_CONSTANT_COLOR 0x7901
-#define CMD_CHROMA_KEY 0x7904
-#define CMD_DEPTH_BUFFER 0x7905
-#define CMD_POLY_STIPPLE_OFFSET 0x7906
-#define CMD_POLY_STIPPLE_PATTERN 0x7907
-#define CMD_LINE_STIPPLE_PATTERN 0x7908
-#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909
-#define CMD_AA_LINE_PARAMETERS 0x790a
+#define _3DSTATE_DRAWING_RECTANGLE 0x7900
+#define _3DSTATE_BLEND_CONSTANT_COLOR 0x7901
+#define _3DSTATE_CHROMA_KEY 0x7904
+#define _3DSTATE_DEPTH_BUFFER 0x7905
+#define _3DSTATE_POLY_STIPPLE_OFFSET 0x7906
+#define _3DSTATE_POLY_STIPPLE_PATTERN 0x7907
+#define _3DSTATE_LINE_STIPPLE_PATTERN 0x7908
+#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909
+#define _3DSTATE_AA_LINE_PARAMETERS 0x790a /* G45+ */
-#define CMD_GS_SVB_INDEX 0x790b /* CTG+ */
+#define _3DSTATE_GS_SVB_INDEX 0x790b /* CTG+ */
/* DW1 */
# define SVB_INDEX_SHIFT 29
# define SVB_LOAD_INTERNAL_VERTEX_COUNT (1 << 0) /* SNB+ */
/* DW2: SVB index */
/* DW3: SVB maximum index */
-#define CMD_3D_MULTISAMPLE 0x790d /* SNB+ */
+#define _3DSTATE_MULTISAMPLE 0x790d /* GEN6+ */
/* DW1 */
# define MS_PIXEL_LOCATION_CENTER (0 << 4)
# define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
@@ -1130,7 +1122,10 @@
# define MS_NUMSAMPLES_4 (2 << 1)
# define MS_NUMSAMPLES_8 (3 << 1)
-#define CMD_3D_CLEAR_PARAMS 0x7910 /* ILK+ */
+#define _3DSTATE_STENCIL_BUFFER 0x790e /* ILK, SNB */
+#define _3DSTATE_HIER_DEPTH_BUFFER 0x790f /* ILK, SNB */
+
+#define _3DSTATE_CLEAR_PARAMS 0x7910 /* ILK+ */
# define DEPTH_CLEAR_VALID (1 << 15)
/* DW1: depth clear value */
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index 6b61f7af15..111cb9974e 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -973,7 +973,7 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
inst->bits3.dp_render_cache.send_commit_msg,
inst->bits3.dp_render_cache.msg_length,
inst->bits3.dp_render_cache.response_length);
- } else if (gen >= 5) {
+ } else if (gen >= 5 /* FINISHME: || is_g4x */) {
format (file, " (%d, %d, %d)",
inst->bits3.dp_read_gen5.binding_table_index,
inst->bits3.dp_read_gen5.msg_control,
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index a1f403ca4e..f5abe021c4 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -145,9 +145,14 @@ static void brw_emit_prim(struct brw_context *brw,
prim_packet.start_vert_location = prim->start;
if (prim->indexed)
prim_packet.start_vert_location += brw->ib.start_vertex_offset;
+ else
+ prim_packet.start_vert_location += brw->vb.start_vertex_bias;
prim_packet.instance_count = 1;
prim_packet.start_instance_location = 0;
prim_packet.base_vert_location = prim->basevertex;
+ if (prim->indexed)
+ prim_packet.base_vert_location += brw->vb.start_vertex_bias;
+
/* If we're set to always flush, do it before and after the primitive emit.
* We want to catch both missed flushes that hurt instruction/state cache
@@ -155,14 +160,14 @@ static void brw_emit_prim(struct brw_context *brw,
* the besides the draw code.
*/
if (intel->always_flush_cache) {
- intel_batchbuffer_emit_mi_flush(intel->batch);
+ intel_batchbuffer_emit_mi_flush(intel);
}
if (prim_packet.verts_per_instance) {
- intel_batchbuffer_data( brw->intel.batch, &prim_packet,
- sizeof(prim_packet));
+ intel_batchbuffer_data(&brw->intel, &prim_packet,
+ sizeof(prim_packet), false);
}
if (intel->always_flush_cache) {
- intel_batchbuffer_emit_mi_flush(intel->batch);
+ intel_batchbuffer_emit_mi_flush(intel);
}
}
@@ -172,13 +177,16 @@ static void brw_merge_inputs( struct brw_context *brw,
struct brw_vertex_info old = brw->vb.info;
GLuint i;
- for (i = 0; i < VERT_ATTRIB_MAX; i++)
- drm_intel_bo_unreference(brw->vb.inputs[i].bo);
+ for (i = 0; i < brw->vb.nr_buffers; i++) {
+ drm_intel_bo_unreference(brw->vb.buffers[i].bo);
+ brw->vb.buffers[i].bo = NULL;
+ }
+ brw->vb.nr_buffers = 0;
- memset(&brw->vb.inputs, 0, sizeof(brw->vb.inputs));
memset(&brw->vb.info, 0, sizeof(brw->vb.info));
for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+ brw->vb.inputs[i].buffer = -1;
brw->vb.inputs[i].glarray = arrays[i];
brw->vb.inputs[i].attrib = (gl_vert_attrib) i;
@@ -303,7 +311,6 @@ static GLboolean brw_try_draw_prims( struct gl_context *ctx,
struct brw_context *brw = brw_context(ctx);
GLboolean retval = GL_FALSE;
GLboolean warn = GL_FALSE;
- GLboolean first_time = GL_TRUE;
GLuint i;
if (ctx->NewState)
@@ -351,13 +358,10 @@ static GLboolean brw_try_draw_prims( struct gl_context *ctx,
* an upper bound of how much we might emit in a single
* brw_try_draw_prims().
*/
- intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4);
+ intel_batchbuffer_require_space(intel, 1024, false);
hw_prim = brw_set_prim(brw, &prim[i]);
-
- if (first_time || (brw->state.dirty.brw & BRW_NEW_PRIMITIVE)) {
- first_time = GL_FALSE;
-
+ if (brw->state.dirty.brw) {
brw_validate_state(brw);
/* Various fallback checks: */
@@ -370,7 +374,7 @@ static GLboolean brw_try_draw_prims( struct gl_context *ctx,
if (dri_bufmgr_check_aperture_space(brw->state.validated_bos,
brw->state.validated_bo_count)) {
static GLboolean warned;
- intel_batchbuffer_flush(intel->batch);
+ intel_batchbuffer_flush(intel);
/* Validate the state after we flushed the batch (which would have
* changed the set of dirty state). If we still fail to
@@ -399,7 +403,7 @@ static GLboolean brw_try_draw_prims( struct gl_context *ctx,
}
if (intel->always_flush_batch)
- intel_batchbuffer_flush(intel->batch);
+ intel_batchbuffer_flush(intel);
out:
brw_state_cache_check_size(brw);
@@ -460,25 +464,32 @@ void brw_draw_init( struct brw_context *brw )
{
struct gl_context *ctx = &brw->intel.ctx;
struct vbo_context *vbo = vbo_context(ctx);
+ int i;
/* Register our drawing function:
*/
vbo->draw_prims = brw_draw_prims;
+
+ for (i = 0; i < VERT_ATTRIB_MAX; i++)
+ brw->vb.inputs[i].buffer = -1;
+ brw->vb.nr_buffers = 0;
+ brw->vb.nr_enabled = 0;
}
void brw_draw_destroy( struct brw_context *brw )
{
int i;
- if (brw->vb.upload.bo != NULL) {
- drm_intel_bo_unreference(brw->vb.upload.bo);
- brw->vb.upload.bo = NULL;
+ for (i = 0; i < brw->vb.nr_buffers; i++) {
+ drm_intel_bo_unreference(brw->vb.buffers[i].bo);
+ brw->vb.buffers[i].bo = NULL;
}
+ brw->vb.nr_buffers = 0;
- for (i = 0; i < VERT_ATTRIB_MAX; i++) {
- drm_intel_bo_unreference(brw->vb.inputs[i].bo);
- brw->vb.inputs[i].bo = NULL;
+ for (i = 0; i < brw->vb.nr_enabled; i++) {
+ brw->vb.enabled[i]->buffer = -1;
}
+ brw->vb.nr_enabled = 0;
drm_intel_bo_unreference(brw->ib.bo);
brw->ib.bo = NULL;
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 405e161bdb..78885b58a8 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -25,6 +25,7 @@
*
**************************************************************************/
+#undef NDEBUG
#include "main/glheader.h"
#include "main/bufferobj.h"
@@ -209,7 +210,7 @@ static GLuint get_surface_type( GLenum type, GLuint size,
case GL_UNSIGNED_BYTE: return ubyte_types_scale[size];
case GL_FIXED: return float_types[size]; /* was uploaded as floats */
default: assert(0); return 0;
- }
+ }
}
}
@@ -227,11 +228,11 @@ static GLuint get_size( GLenum type )
case GL_UNSIGNED_SHORT: return sizeof(GLushort);
case GL_UNSIGNED_BYTE: return sizeof(GLubyte);
case GL_FIXED: return sizeof(GLfloat); /* will be uploaded as floats */
- default: return 0;
- }
+ default: assert(0); return 0;
+ }
}
-static GLuint get_index_type(GLenum type)
+static GLuint get_index_type(GLenum type)
{
switch (type) {
case GL_UNSIGNED_BYTE: return BRW_INDEX_BYTE;
@@ -241,60 +242,23 @@ static GLuint get_index_type(GLenum type)
}
}
-static void wrap_buffers( struct brw_context *brw,
- GLuint size )
-{
- if (size < BRW_UPLOAD_INIT_SIZE)
- size = BRW_UPLOAD_INIT_SIZE;
-
- brw->vb.upload.offset = 0;
-
- if (brw->vb.upload.bo != NULL)
- drm_intel_bo_unreference(brw->vb.upload.bo);
- brw->vb.upload.bo = drm_intel_bo_alloc(brw->intel.bufmgr, "temporary VBO",
- size, 1);
-}
-
-static void get_space( struct brw_context *brw,
- GLuint size,
- drm_intel_bo **bo_return,
- GLuint *offset_return )
-{
- size = ALIGN(size, 64);
-
- if (brw->vb.upload.bo == NULL ||
- brw->vb.upload.offset + size > brw->vb.upload.bo->size) {
- wrap_buffers(brw, size);
- }
-
- assert(*bo_return == NULL);
- drm_intel_bo_reference(brw->vb.upload.bo);
- *bo_return = brw->vb.upload.bo;
- *offset_return = brw->vb.upload.offset;
- brw->vb.upload.offset += size;
-}
-
static void
-copy_array_to_vbo_array( struct brw_context *brw,
- struct brw_vertex_element *element,
- GLuint dst_stride)
+copy_array_to_vbo_array(struct brw_context *brw,
+ struct brw_vertex_element *element,
+ int min, int max,
+ struct brw_vertex_buffer *buffer,
+ GLuint dst_stride)
{
- GLuint size = element->count * dst_stride;
-
- get_space(brw, size, &element->bo, &element->offset);
-
- if (element->glarray->StrideB == 0) {
- assert(element->count == 1);
- element->stride = 0;
- } else {
- element->stride = dst_stride;
- }
+ int src_stride = element->glarray->StrideB;
+ const unsigned char *src = element->glarray->Ptr + min * src_stride;
+ int count = max - min + 1;
+ GLuint size = count * dst_stride;
/* upload as floats */
if (element->glarray->Type == GL_FIXED) {
+ char * const map = intel_upload_map(&brw->intel, size, dst_stride);
+ char *dst = map;
drm_intel_bo *src_bo = NULL;
- const char *src;
- char *dst;
GLint i, j;
/* map source bo */
@@ -305,68 +269,58 @@ copy_array_to_vbo_array( struct brw_context *brw,
src_bo = intel_bufferobj_buffer(&brw->intel, intel_buffer, INTEL_READ);
drm_intel_gem_bo_map_gtt(src_bo);
- src = (const char *) element->bo->virtual +
- (unsigned long) element->glarray->Ptr;
- }
- else {
- src = (const char *) element->glarray->Ptr;
+ src = (const unsigned char *) src_bo->virtual + (unsigned long) src;
}
- drm_intel_gem_bo_map_gtt(element->bo);
- dst = (char *) element->bo->virtual + element->offset;
-
- for (i = 0; i < element->count; i++) {
+ while (count--) {
const GLint *s = (GLint *) src;
GLfloat *d = (GLfloat *) dst;
+ int i;
- for (j = 0; j < element->glarray->Size; j++)
- d[j] = s[j] / 65536.0f;
+ for (i = 0; i < element->glarray->Size; i++)
+ d[i] = s[i] / 65536.0f;
- src += element->glarray->StrideB;
- dst += dst_stride;
+ src += src_stride;
+ dst += dst_stride;
}
- drm_intel_gem_bo_unmap_gtt(element->bo);
+ intel_upload_unmap(&brw->intel, map, size, dst_stride,
+ &buffer->bo, &buffer->offset);
+
if (src_bo)
drm_intel_gem_bo_unmap_gtt(src_bo);
return;
}
- if (dst_stride == element->glarray->StrideB) {
- drm_intel_gem_bo_map_gtt(element->bo);
- memcpy((char *)element->bo->virtual + element->offset,
- element->glarray->Ptr, size);
- drm_intel_gem_bo_unmap_gtt(element->bo);
+ if (dst_stride == src_stride) {
+ intel_upload_data(&brw->intel, src, size, dst_stride,
+ &buffer->bo, &buffer->offset);
} else {
- char *dest;
- const unsigned char *src = element->glarray->Ptr;
- int i;
-
- drm_intel_gem_bo_map_gtt(element->bo);
- dest = element->bo->virtual;
- dest += element->offset;
-
- for (i = 0; i < element->count; i++) {
- memcpy(dest, src, dst_stride);
- src += element->glarray->StrideB;
- dest += dst_stride;
- }
+ char * const map = intel_upload_map(&brw->intel, size, dst_stride);
+ char *dst = map;
- drm_intel_gem_bo_unmap_gtt(element->bo);
+ while (count--) {
+ memcpy(dst, src, dst_stride);
+ src += src_stride;
+ dst += dst_stride;
+ }
+ intel_upload_unmap(&brw->intel, map, size, dst_stride,
+ &buffer->bo, &buffer->offset);
}
+ buffer->stride = dst_stride;
}
static void brw_prepare_vertices(struct brw_context *brw)
{
struct gl_context *ctx = &brw->intel.ctx;
struct intel_context *intel = intel_context(ctx);
- GLbitfield vs_inputs = brw->vs.prog_data->inputs_read;
- GLuint i;
+ GLbitfield vs_inputs = brw->vs.prog_data->inputs_read;
const unsigned char *ptr = NULL;
- GLuint interleave = 0;
+ GLuint interleaved = 0, total_size = 0;
unsigned int min_index = brw->vb.min_index;
unsigned int max_index = brw->vb.max_index;
+ int delta, i, j;
struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
GLuint nr_uploads = 0;
@@ -379,13 +333,20 @@ static void brw_prepare_vertices(struct brw_context *brw)
/* Accumulate the list of enabled arrays. */
brw->vb.nr_enabled = 0;
while (vs_inputs) {
- GLuint i = _mesa_ffsll(vs_inputs) - 1;
+ GLuint i = ffs(vs_inputs) - 1;
struct brw_vertex_element *input = &brw->vb.inputs[i];
vs_inputs &= ~(1 << i);
- brw->vb.enabled[brw->vb.nr_enabled++] = input;
+ if (input->glarray->Size && get_size(input->glarray->Type))
+ brw->vb.enabled[brw->vb.nr_enabled++] = input;
}
+ if (brw->vb.nr_enabled == 0)
+ return;
+
+ if (brw->vb.nr_buffers)
+ goto validate;
+
/* XXX: In the rare cases where this happens we fallback all
* the way to software rasterization, although a tnl fallback
* would be sufficient. I don't know of *any* real world
@@ -397,24 +358,44 @@ static void brw_prepare_vertices(struct brw_context *brw)
return;
}
- for (i = 0; i < brw->vb.nr_enabled; i++) {
+ for (i = j = 0; i < brw->vb.nr_enabled; i++) {
struct brw_vertex_element *input = brw->vb.enabled[i];
+ const struct gl_client_array *glarray = input->glarray;
+ int type_size = get_size(glarray->Type);
- input->element_size = get_size(input->glarray->Type) * input->glarray->Size;
+ input->element_size = type_size * glarray->Size;
- if (_mesa_is_bufferobj(input->glarray->BufferObj) &&
- input->glarray->Type != GL_FIXED) {
+ if (_mesa_is_bufferobj(glarray->BufferObj) &&
+ glarray->Type != GL_FIXED) {
struct intel_buffer_object *intel_buffer =
- intel_buffer_object(input->glarray->BufferObj);
-
- /* Named buffer object: Just reference its contents directly. */
- drm_intel_bo_unreference(input->bo);
- input->bo = intel_bufferobj_buffer(intel, intel_buffer,
- INTEL_READ);
- drm_intel_bo_reference(input->bo);
- input->offset = (unsigned long)input->glarray->Ptr;
- input->stride = input->glarray->StrideB;
- input->count = input->glarray->_MaxElement;
+ intel_buffer_object(glarray->BufferObj);
+ int k;
+
+ for (k = 0; k < i; k++) {
+ const struct gl_client_array *other = brw->vb.enabled[k]->glarray;
+ if (glarray->BufferObj == other->BufferObj &&
+ glarray->StrideB == other->StrideB &&
+ (uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB)
+ {
+ input->buffer = brw->vb.enabled[k]->buffer;
+ input->offset = glarray->Ptr - other->Ptr;
+ break;
+ }
+ }
+ if (k == i) {
+ struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
+
+ /* Named buffer object: Just reference its contents directly. */
+ buffer->bo = intel_bufferobj_source(intel,
+ intel_buffer, type_size,
+ &buffer->offset);
+ drm_intel_bo_reference(buffer->bo);
+ buffer->offset += (uintptr_t)glarray->Ptr;
+ buffer->stride = glarray->StrideB;
+
+ input->buffer = j++;
+ input->offset = 0;
+ }
/* This is a common place to reach if the user mistakenly supplies
* a pointer in place of a VBO offset. If we just let it go through,
@@ -428,71 +409,170 @@ static void brw_prepare_vertices(struct brw_context *brw)
* probably a service to the poor programmer to do so rather than
* trying to just not render.
*/
- assert(input->offset < input->bo->size);
+ assert(input->offset < brw->vb.buffers[input->buffer].bo->size);
} else {
- input->count = input->glarray->StrideB ? max_index + 1 : 1;
- if (input->bo != NULL) {
- /* Already-uploaded vertex data is present from a previous
- * prepare_vertices, but we had to re-validate state due to
- * check_aperture failing and a new batch being produced.
- */
- continue;
- }
-
/* Queue the buffer object up to be uploaded in the next pass,
* when we've decided if we're doing interleaved or not.
*/
- if (input->attrib == VERT_ATTRIB_POS) {
+ if (nr_uploads == 0) {
/* Position array not properly enabled:
*/
- if (input->glarray->StrideB == 0) {
+ if (input->attrib == VERT_ATTRIB_POS && glarray->StrideB == 0) {
intel->Fallback = GL_TRUE; /* boolean, not bitfield */
return;
}
- interleave = input->glarray->StrideB;
- ptr = input->glarray->Ptr;
+ interleaved = glarray->StrideB;
+ ptr = glarray->Ptr;
+ }
+ else if (interleaved != glarray->StrideB ||
+ (uintptr_t)(glarray->Ptr - ptr) > interleaved)
+ {
+ interleaved = 0;
}
- else if (interleave != input->glarray->StrideB ||
- (const unsigned char *)input->glarray->Ptr - ptr < 0 ||
- (const unsigned char *)input->glarray->Ptr - ptr > interleave)
+ else if ((uintptr_t)(glarray->Ptr - ptr) & (type_size -1))
{
- interleave = 0;
+ /* enforce natural alignment (for doubles) */
+ interleaved = 0;
}
upload[nr_uploads++] = input;
+ total_size = ALIGN(total_size, type_size);
+ total_size += input->element_size;
}
}
+ /* If we need to upload all the arrays, then we can trim those arrays to
+ * only the used elements [min_index, max_index] so long as we adjust all
+ * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias.
+ */
+ brw->vb.start_vertex_bias = 0;
+ delta = min_index;
+ if (nr_uploads == brw->vb.nr_enabled) {
+ brw->vb.start_vertex_bias = -delta;
+ delta = 0;
+ }
+ if (delta && !brw->intel.intelScreen->relaxed_relocations)
+ min_index = delta = 0;
+
/* Handle any arrays to be uploaded. */
- if (nr_uploads > 1 && interleave && interleave <= 256) {
- /* All uploads are interleaved, so upload the arrays together as
- * interleaved. First, upload the contents and set up upload[0].
- */
- copy_array_to_vbo_array(brw, upload[0], interleave);
-
- for (i = 1; i < nr_uploads; i++) {
- /* Then, just point upload[i] at upload[0]'s buffer. */
- upload[i]->stride = interleave;
- upload[i]->offset = upload[0]->offset +
- ((const unsigned char *)upload[i]->glarray->Ptr - ptr);
- upload[i]->bo = upload[0]->bo;
- drm_intel_bo_reference(upload[i]->bo);
+ if (nr_uploads > 1) {
+ if (interleaved && interleaved <= 2*total_size) {
+ struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
+ /* All uploads are interleaved, so upload the arrays together as
+ * interleaved. First, upload the contents and set up upload[0].
+ */
+ copy_array_to_vbo_array(brw, upload[0], min_index, max_index,
+ buffer, interleaved);
+ buffer->offset -= delta * interleaved;
+
+ for (i = 0; i < nr_uploads; i++) {
+ /* Then, just point upload[i] at upload[0]'s buffer. */
+ upload[i]->offset =
+ ((const unsigned char *)upload[i]->glarray->Ptr - ptr);
+ upload[i]->buffer = j;
+ }
+ j++;
+
+ nr_uploads = 0;
}
- }
- else {
- /* Upload non-interleaved arrays */
- for (i = 0; i < nr_uploads; i++) {
- copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size);
+ else if (total_size < 2048) {
+ /* Upload non-interleaved arrays into a single interleaved array */
+ struct brw_vertex_buffer *buffer;
+ int count = max_index - min_index + 1;
+ int offset;
+ char *map;
+
+ map = intel_upload_map(&brw->intel, total_size * count, total_size);
+ for (i = offset = 0; i < nr_uploads; i++) {
+ const unsigned char *src = upload[i]->glarray->Ptr;
+ int size = upload[i]->element_size;
+ int stride = upload[i]->glarray->StrideB;
+ char *dst;
+ int n;
+
+ offset = ALIGN(offset, get_size(upload[i]->glarray->Type));
+ dst = map + offset;
+ src += min_index * stride;
+
+ if (upload[i]->glarray->Type == GL_FIXED) {
+ for (n = 0; n < count; n++) {
+ const GLint *s = (GLint *) src;
+ GLfloat *d = (GLfloat *) dst;
+ int k;
+
+ for (k = 0; k < upload[i]->glarray->Size; k++) {
+ d[k] = s[k] / 65536.0f;
+ }
+
+ src += stride;
+ dst += total_size;
+ }
+ }
+ else {
+ for (n = 0; n < count; n++) {
+ memcpy(dst, src, size);
+ src += stride;
+ dst += total_size;
+ }
+ }
+
+ upload[i]->offset = offset;
+ upload[i]->buffer = j;
+
+ offset += size;
+ }
+ assert(offset == total_size);
+ buffer = &brw->vb.buffers[j++];
+ intel_upload_unmap(&brw->intel, map, offset * count, offset,
+ &buffer->bo, &buffer->offset);
+ buffer->stride = offset;
+ buffer->offset -= delta * offset;
+
+ nr_uploads = 0;
}
}
+ /* Upload non-interleaved arrays */
+ for (i = 0; i < nr_uploads; i++) {
+ struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
+ copy_array_to_vbo_array(brw, upload[i], min_index, max_index,
+ buffer, upload[i]->element_size);
+ buffer->offset -= delta * buffer->stride;
+ upload[i]->buffer = j++;
+ upload[i]->offset = 0;
+ }
- brw_prepare_query_begin(brw);
+ /* can we simply extend the current vb? */
+ if (j == brw->vb.nr_current_buffers) {
+ int delta = 0;
+ for (i = 0; i < j; i++) {
+ int d;
+
+ if (brw->vb.current_buffers[i].handle != brw->vb.buffers[i].bo->handle ||
+ brw->vb.current_buffers[i].stride != brw->vb.buffers[i].stride)
+ break;
+
+ d = brw->vb.buffers[i].offset - brw->vb.current_buffers[i].offset;
+ if (i == 0)
+ delta = d / brw->vb.current_buffers[i].stride;
+ if (delta * brw->vb.current_buffers[i].stride != d)
+ break;
+ }
- for (i = 0; i < brw->vb.nr_enabled; i++) {
- struct brw_vertex_element *input = brw->vb.enabled[i];
+ if (i == j) {
+ brw->vb.start_vertex_bias += delta;
+ while (--j >= 0)
+ drm_intel_bo_unreference(brw->vb.buffers[j].bo);
+ j = 0;
+ }
+ }
- brw_add_validated_bo(brw, input->bo);
+ brw->vb.nr_buffers = j;
+
+validate:
+ brw_prepare_query_begin(brw);
+ for (i = 0; i < brw->vb.nr_buffers; i++) {
+ brw_add_validated_bo(brw, brw->vb.buffers[i].bo);
}
}
@@ -529,49 +609,44 @@ static void brw_emit_vertices(struct brw_context *brw)
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
(BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
- ADVANCE_BATCH();
+ CACHED_BATCH();
return;
}
/* Now emit VB and VEP state packets.
- *
- * This still defines a hardware VB for each input, even if they
- * are interleaved or from the same VBO. TBD if this makes a
- * performance difference.
*/
- BEGIN_BATCH(1 + brw->vb.nr_enabled * 4);
- OUT_BATCH((CMD_VERTEX_BUFFER << 16) |
- ((1 + brw->vb.nr_enabled * 4) - 2));
- for (i = 0; i < brw->vb.nr_enabled; i++) {
- struct brw_vertex_element *input = brw->vb.enabled[i];
- uint32_t dw0;
+ if (brw->vb.nr_buffers) {
+ BEGIN_BATCH(1 + 4*brw->vb.nr_buffers);
+ OUT_BATCH((CMD_VERTEX_BUFFER << 16) | (4*brw->vb.nr_buffers - 1));
+ for (i = 0; i < brw->vb.nr_buffers; i++) {
+ struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
+ uint32_t dw0;
+
+ if (intel->gen >= 6) {
+ dw0 = GEN6_VB0_ACCESS_VERTEXDATA | (i << GEN6_VB0_INDEX_SHIFT);
+ } else {
+ dw0 = BRW_VB0_ACCESS_VERTEXDATA | (i << BRW_VB0_INDEX_SHIFT);
+ }
- if (intel->gen >= 6) {
- dw0 = GEN6_VB0_ACCESS_VERTEXDATA |
- (i << GEN6_VB0_INDEX_SHIFT);
- } else {
- dw0 = BRW_VB0_ACCESS_VERTEXDATA |
- (i << BRW_VB0_INDEX_SHIFT);
+ OUT_BATCH(dw0 | (buffer->stride << BRW_VB0_PITCH_SHIFT));
+ OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset);
+ if (intel->gen >= 5) {
+ OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->bo->size - 1);
+ } else
+ OUT_BATCH(buffer->bo->size / buffer->stride);
+ OUT_BATCH(0); /* Instance data step rate */
+
+ brw->vb.current_buffers[i].handle = buffer->bo->handle;
+ brw->vb.current_buffers[i].offset = buffer->offset;
+ brw->vb.current_buffers[i].stride = buffer->stride;
}
-
- OUT_BATCH(dw0 |
- (input->stride << BRW_VB0_PITCH_SHIFT));
- OUT_RELOC(input->bo,
- I915_GEM_DOMAIN_VERTEX, 0,
- input->offset);
- if (intel->gen >= 5) {
- OUT_RELOC(input->bo,
- I915_GEM_DOMAIN_VERTEX, 0,
- input->bo->size - 1);
- } else
- OUT_BATCH(input->stride ? input->count : 0);
- OUT_BATCH(0); /* Instance data step rate */
+ brw->vb.nr_current_buffers = i;
+ ADVANCE_BATCH();
}
- ADVANCE_BATCH();
BEGIN_BATCH(1 + brw->vb.nr_enabled * 2);
- OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2));
+ OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | (2*brw->vb.nr_enabled - 1));
for (i = 0; i < brw->vb.nr_enabled; i++) {
struct brw_vertex_element *input = brw->vb.enabled[i];
uint32_t format = get_surface_type(input->glarray->Type,
@@ -592,15 +667,15 @@ static void brw_emit_vertices(struct brw_context *brw)
}
if (intel->gen >= 6) {
- OUT_BATCH((i << GEN6_VE0_INDEX_SHIFT) |
+ OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) |
GEN6_VE0_VALID |
(format << BRW_VE0_FORMAT_SHIFT) |
- (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+ (input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
} else {
- OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) |
+ OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) |
BRW_VE0_VALID |
(format << BRW_VE0_FORMAT_SHIFT) |
- (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+ (input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
}
if (intel->gen >= 5)
@@ -615,7 +690,7 @@ static void brw_emit_vertices(struct brw_context *brw)
(comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
}
- ADVANCE_BATCH();
+ CACHED_BATCH();
}
const struct brw_tracked_state brw_vertices = {
@@ -644,25 +719,19 @@ static void brw_prepare_indices(struct brw_context *brw)
ib_type_size = get_size(index_buffer->type);
ib_size = ib_type_size * index_buffer->count;
- bufferobj = index_buffer->obj;;
+ bufferobj = index_buffer->obj;
/* Turn into a proper VBO:
*/
if (!_mesa_is_bufferobj(bufferobj)) {
- brw->ib.start_vertex_offset = 0;
/* Get new bufferobj, offset:
*/
- get_space(brw, ib_size, &bo, &offset);
-
- /* Straight upload
- */
- drm_intel_gem_bo_map_gtt(bo);
- memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size);
- drm_intel_gem_bo_unmap_gtt(bo);
+ intel_upload_data(&brw->intel, index_buffer->ptr, ib_size, ib_type_size,
+ &bo, &offset);
+ brw->ib.start_vertex_offset = offset / ib_type_size;
} else {
offset = (GLuint) (unsigned long) index_buffer->ptr;
- brw->ib.start_vertex_offset = 0;
/* If the index buffer isn't aligned to its element size, we have to
* rebase it into a temporary.
@@ -674,41 +743,42 @@ static void brw_prepare_indices(struct brw_context *brw)
bufferobj);
map += offset;
- get_space(brw, ib_size, &bo, &offset);
-
- drm_intel_bo_subdata(bo, offset, ib_size, map);
+ intel_upload_data(&brw->intel, map, ib_size, ib_type_size,
+ &bo, &offset);
+ brw->ib.start_vertex_offset = offset / ib_type_size;
ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj);
} else {
- bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj),
- INTEL_READ);
- drm_intel_bo_reference(bo);
-
/* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading
* the index buffer state when we're just moving the start index
* of our drawing.
*/
brw->ib.start_vertex_offset = offset / ib_type_size;
- offset = 0;
- ib_size = bo->size;
+
+ bo = intel_bufferobj_source(intel,
+ intel_buffer_object(bufferobj),
+ ib_type_size,
+ &offset);
+ drm_intel_bo_reference(bo);
+
+ brw->ib.start_vertex_offset += offset / ib_type_size;
}
}
- if (brw->ib.bo != bo ||
- brw->ib.offset != offset ||
- brw->ib.size != ib_size)
- {
+ if (brw->ib.bo != bo) {
drm_intel_bo_unreference(brw->ib.bo);
brw->ib.bo = bo;
- brw->ib.offset = offset;
- brw->ib.size = ib_size;
+ brw_add_validated_bo(brw, brw->ib.bo);
brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER;
} else {
drm_intel_bo_unreference(bo);
}
- brw_add_validated_bo(brw, brw->ib.bo);
+ if (index_buffer->type != brw->ib.type) {
+ brw->ib.type = index_buffer->type;
+ brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER;
+ }
}
const struct brw_tracked_state brw_indices = {
@@ -728,29 +798,18 @@ static void brw_emit_index_buffer(struct brw_context *brw)
if (index_buffer == NULL)
return;
- /* Emit the indexbuffer packet:
- */
- {
- struct brw_indexbuffer ib;
-
- memset(&ib, 0, sizeof(ib));
-
- ib.header.bits.opcode = CMD_INDEX_BUFFER;
- ib.header.bits.length = sizeof(ib)/4 - 2;
- ib.header.bits.index_format = get_index_type(index_buffer->type);
- ib.header.bits.cut_index_enable = 0;
-
- BEGIN_BATCH(4);
- OUT_BATCH( ib.header.dword );
- OUT_RELOC(brw->ib.bo,
- I915_GEM_DOMAIN_VERTEX, 0,
- brw->ib.offset);
- OUT_RELOC(brw->ib.bo,
- I915_GEM_DOMAIN_VERTEX, 0,
- brw->ib.offset + brw->ib.size - 1);
- OUT_BATCH( 0 );
- ADVANCE_BATCH();
- }
+ BEGIN_BATCH(3);
+ OUT_BATCH(CMD_INDEX_BUFFER << 16 |
+ /* cut index enable << 10 */
+ get_index_type(index_buffer->type) << 8 |
+ 1);
+ OUT_RELOC(brw->ib.bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ 0);
+ OUT_RELOC(brw->ib.bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ brw->ib.bo->size - 1);
+ ADVANCE_BATCH();
}
const struct brw_tracked_state brw_index_buffer = {
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 4dbdc52210..119ffc7237 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -861,7 +861,8 @@ void brw_fb_WRITE(struct brw_compile *p,
GLuint binding_table_index,
GLuint msg_length,
GLuint response_length,
- GLboolean eot);
+ GLboolean eot,
+ GLboolean header_present);
void brw_SAMPLE(struct brw_compile *p,
struct brw_reg dest,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index f62fc7ebfb..88131c432e 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -536,6 +536,16 @@ brw_set_dp_read_message(struct brw_context *brw,
insn->bits3.dp_read_gen5.end_of_thread = 0;
insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
insn->bits2.send_gen5.end_of_thread = 0;
+ } else if (intel->is_g4x) {
+ insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
+ insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
+ insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
+ insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
+ insn->bits3.dp_read_g4x.response_length = response_length; /*16:19*/
+ insn->bits3.dp_read_g4x.msg_length = msg_length; /*20:23*/
+ insn->bits3.dp_read_g4x.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
+ insn->bits3.dp_read_g4x.pad1 = 0;
+ insn->bits3.dp_read_g4x.end_of_thread = 0;
} else {
insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
@@ -1708,29 +1718,22 @@ void brw_dp_READ_4_vs(struct brw_compile *p,
GLuint location,
GLuint bind_table_index)
{
+ struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
GLuint msg_reg_nr = 1;
- struct brw_reg b;
- /*
- printf("vs const read msg, location %u, msg_reg_nr %d\n",
- location, msg_reg_nr);
- */
+ if (intel->gen >= 6)
+ location /= 16;
/* Setup MRF[1] with location/offset into const buffer */
brw_push_insn_state(p);
+ brw_set_access_mode(p, BRW_ALIGN_1);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-
- /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
- * when the docs say only dword[2] should be set. Hmmm. But it works.
- */
- b = brw_message_reg(msg_reg_nr);
- b = retype(b, BRW_REGISTER_TYPE_UD);
- /*b = get_element_ud(b, 2);*/
- brw_MOV(p, b, brw_imm_ud(location));
-
+ brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
+ BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(location));
brw_pop_insn_state(p);
insn = next_insn(p, BRW_OPCODE_SEND);
@@ -1741,7 +1744,11 @@ void brw_dp_READ_4_vs(struct brw_compile *p,
insn->header.mask_control = BRW_MASK_DISABLE;
brw_set_dest(p, insn, dest);
- brw_set_src0(insn, brw_null_reg());
+ if (intel->gen >= 6) {
+ brw_set_src0(insn, brw_message_reg(msg_reg_nr));
+ } else {
+ brw_set_src0(insn, brw_null_reg());
+ }
brw_set_dp_read_message(p->brw,
insn,
@@ -1768,6 +1775,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
/* Setup MRF[1] with offset into const buffer */
brw_push_insn_state(p);
+ brw_set_access_mode(p, BRW_ALIGN_1);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
@@ -1775,7 +1783,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
/* M1.0 is block offset 0, M1.4 is block offset 1, all other
* fields ignored.
*/
- brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD),
+ brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
addr_reg, brw_imm_d(offset));
brw_pop_insn_state(p);
@@ -1816,12 +1824,12 @@ void brw_fb_WRITE(struct brw_compile *p,
GLuint binding_table_index,
GLuint msg_length,
GLuint response_length,
- GLboolean eot)
+ GLboolean eot,
+ GLboolean header_present)
{
struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
GLuint msg_control, msg_type;
- GLboolean header_present = GL_TRUE;
if (intel->gen >= 6 && binding_table_index == 0) {
insn = next_insn(p, BRW_OPCODE_SENDC);
@@ -1833,9 +1841,6 @@ void brw_fb_WRITE(struct brw_compile *p,
insn->header.compression_control = BRW_COMPRESSION_NONE;
if (intel->gen >= 6) {
- if (msg_length == 4)
- header_present = GL_FALSE;
-
/* headerless version, just submit color payload */
src0 = brw_message_reg(msg_reg_nr);
@@ -1940,7 +1945,8 @@ void brw_SAMPLE(struct brw_compile *p,
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_MOV(p, m1, brw_vec8_grf(0,0));
+ brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
+ retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
brw_pop_insn_state(p);
@@ -2001,7 +2007,8 @@ void brw_SAMPLE(struct brw_compile *p,
*/
brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_MOV(p, reg, reg);
+ brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD),
+ retype(reg, BRW_REGISTER_TYPE_UD));
brw_pop_insn_state(p);
}
@@ -2033,7 +2040,8 @@ void brw_urb_WRITE(struct brw_compile *p,
if (intel->gen >= 6) {
brw_push_insn_state(p);
brw_set_mask_control( p, BRW_MASK_DISABLE );
- brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
+ brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
+ retype(src0, BRW_REGISTER_TYPE_UD));
brw_pop_insn_state(p);
src0 = brw_message_reg(msg_reg_nr);
}
diff --git a/src/mesa/drivers/dri/i965/brw_fallback.c b/src/mesa/drivers/dri/i965/brw_fallback.c
index 6796fb208d..d0b0c22abf 100644
--- a/src/mesa/drivers/dri/i965/brw_fallback.c
+++ b/src/mesa/drivers/dri/i965/brw_fallback.c
@@ -36,8 +36,6 @@
#include "swrast/swrast.h"
#include "tnl/tnl.h"
#include "brw_context.h"
-#include "intel_fbo.h"
-#include "intel_regions.h"
#define FILE_DEBUG_FLAG DEBUG_FALLBACKS
@@ -63,49 +61,14 @@ static GLboolean do_check_fallback(struct brw_context *brw)
for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
if (texUnit->_ReallyEnabled) {
- struct intel_texture_object *intelObj = intel_texture_object(texUnit->_Current);
- struct gl_texture_image *texImage = intelObj->base.Image[0][intelObj->firstLevel];
+ struct gl_texture_object *tex_obj = texUnit->_Current;
+ struct gl_texture_image *texImage = tex_obj->Image[0][tex_obj->BaseLevel];
if (texImage->Border) {
DBG("FALLBACK: texture border\n");
return GL_TRUE;
}
}
}
-
- /* _NEW_STENCIL
- */
- if (ctx->Stencil._Enabled &&
- (ctx->DrawBuffer->Name == 0 && !brw->intel.hw_stencil)) {
- DBG("FALLBACK: stencil\n");
- return GL_TRUE;
- }
-
- /* _NEW_BUFFERS */
- if (!brw->has_surface_tile_offset) {
- for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
- struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
- struct intel_renderbuffer *irb = intel_renderbuffer(rb);
-
- /* The original gen4 hardware couldn't set up WM surfaces pointing
- * at an offset within a tile, which can happen when rendering to
- * anything but the base level of a texture or the +X face/0 depth.
- * This was fixed with the 4 Series hardware.
- *
- * For these original chips, you would have to make the depth and
- * color destination surfaces include information on the texture
- * type, LOD, face, and various limits to use them as a destination.
- * I would have done this, but there's also a nasty requirement that
- * the depth and the color surfaces all be of the same LOD, which
- * may be a worse requirement than this alignment. (Also, we may
- * want to just demote the texture to untiled, instead).
- */
- if (irb->region && irb->region->tiling != I915_TILING_NONE &&
- (irb->region->draw_offset & 4095)) {
- DBG("FALLBACK: non-tile-aligned destination for tiled FBO\n");
- return GL_TRUE;
- }
- }
- }
return GL_FALSE;
}
@@ -117,7 +80,7 @@ static void check_fallback(struct brw_context *brw)
const struct brw_tracked_state brw_check_fallback = {
.dirty = {
- .mesa = _NEW_BUFFERS | _NEW_RENDERMODE | _NEW_TEXTURE | _NEW_STENCIL,
+ .mesa = _NEW_RENDERMODE | _NEW_TEXTURE | _NEW_STENCIL,
.brw = 0,
.cache = 0
},
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 6bb195b487..2c997b4eb3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -41,13 +41,13 @@ extern "C" {
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_wm.h"
-#include "talloc.h"
}
#include "brw_fs.h"
#include "../glsl/glsl_types.h"
#include "../glsl/ir_optimization.h"
#include "../glsl/ir_print_visitor.h"
+#define MAX_INSTRUCTION (1 << 30)
static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg);
struct gl_shader *
@@ -55,7 +55,7 @@ brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
{
struct brw_shader *shader;
- shader = talloc_zero(NULL, struct brw_shader);
+ shader = rzalloc(NULL, struct brw_shader);
if (shader) {
shader->base.Type = type;
shader->base.Name = name;
@@ -69,7 +69,7 @@ struct gl_shader_program *
brw_new_shader_program(struct gl_context *ctx, GLuint name)
{
struct brw_shader_program *prog;
- prog = talloc_zero(NULL, struct brw_shader_program);
+ prog = rzalloc(NULL, struct brw_shader_program);
if (prog) {
prog->base.Name = name;
_mesa_init_shader_program(ctx, &prog->base);
@@ -89,14 +89,17 @@ brw_compile_shader(struct gl_context *ctx, struct gl_shader *shader)
GLboolean
brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
{
+ struct brw_context *brw = brw_context(ctx);
+ struct intel_context *intel = &brw->intel;
+
struct brw_shader *shader =
(struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
if (shader != NULL) {
- void *mem_ctx = talloc_new(NULL);
+ void *mem_ctx = ralloc_context(NULL);
bool progress;
if (shader->ir)
- talloc_free(shader->ir);
+ ralloc_free(shader->ir);
shader->ir = new(shader) exec_list;
clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
@@ -107,8 +110,24 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
SUB_TO_ADD_NEG |
EXP_TO_EXP2 |
LOG_TO_LOG2);
+
+ /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this,
+ * if-statements need to be flattened.
+ */
+ if (intel->gen < 6)
+ lower_if_to_cond_assign(shader->ir, 16);
+
do_lower_texture_projection(shader->ir);
+ do_vec_index_to_cond_assign(shader->ir);
brw_do_cubemap_normalize(shader->ir);
+ lower_noise(shader->ir);
+ lower_quadop_vector(shader->ir, false);
+ lower_variable_index_to_cond_assign(shader->ir,
+ GL_TRUE, /* input */
+ GL_TRUE, /* output */
+ GL_TRUE, /* temp */
+ GL_TRUE /* uniform */
+ );
do {
progress = false;
@@ -123,22 +142,12 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
) || progress;
progress = do_common_optimization(shader->ir, true, 32) || progress;
-
- progress = lower_noise(shader->ir) || progress;
- progress =
- lower_variable_index_to_cond_assign(shader->ir,
- GL_TRUE, /* input */
- GL_TRUE, /* output */
- GL_TRUE, /* temp */
- GL_TRUE /* uniform */
- ) || progress;
- progress = lower_quadop_vector(shader->ir, false) || progress;
} while (progress);
validate_ir_tree(shader->ir);
reparent_ir(shader->ir, shader->ir);
- talloc_free(mem_ctx);
+ ralloc_free(mem_ctx);
}
if (!_mesa_ir_link_shader(ctx, prog))
@@ -202,6 +211,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
return 2;
case FS_OPCODE_TEX:
case FS_OPCODE_TXB:
+ case FS_OPCODE_TXD:
case FS_OPCODE_TXL:
return 1;
case FS_OPCODE_FB_WRITE:
@@ -225,8 +235,8 @@ fs_visitor::virtual_grf_alloc(int size)
virtual_grf_array_size = 16;
else
virtual_grf_array_size *= 2;
- virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes,
- int, virtual_grf_array_size);
+ virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
+ virtual_grf_array_size);
/* This slot is always unused. */
virtual_grf_sizes[0] = 0;
@@ -304,7 +314,6 @@ int
fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
{
unsigned int offset = 0;
- float *vec_values;
if (type->is_matrix()) {
const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
@@ -323,7 +332,6 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
case GLSL_TYPE_UINT:
case GLSL_TYPE_INT:
case GLSL_TYPE_BOOL:
- vec_values = fp->Base.Parameters->ParameterValues[loc];
for (unsigned int i = 0; i < type->vector_elements; i++) {
unsigned int param = c->prog_data.nr_params++;
@@ -347,8 +355,8 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
break;
}
-
- c->prog_data.param[param] = &vec_values[i];
+ this->param_index[param] = loc;
+ this->param_offset[param] = i;
}
return 1;
@@ -419,7 +427,6 @@ fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
*/
int index = _mesa_add_state_reference(this->fp->Base.Parameters,
(gl_state_index *)tokens);
- float *vec_values = this->fp->Base.Parameters->ParameterValues[index];
/* Add each of the unique swizzles of the element as a
* parameter. This'll end up matching the expected layout of
@@ -434,7 +441,9 @@ fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
c->prog_data.param_convert[c->prog_data.nr_params] =
PARAM_NO_CONVERT;
- c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz];
+ this->param_index[c->prog_data.nr_params] = index;
+ this->param_offset[c->prog_data.nr_params] = swiz;
+ c->prog_data.nr_params++;
}
}
}
@@ -474,8 +483,13 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
wpos.reg_offset++;
/* gl_FragCoord.z */
- emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
- interp_reg(FRAG_ATTRIB_WPOS, 2)));
+ if (intel->gen >= 6) {
+ emit(fs_inst(BRW_OPCODE_MOV, wpos,
+ fs_reg(brw_vec8_grf(c->source_depth_reg, 0))));
+ } else {
+ emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
+ interp_reg(FRAG_ATTRIB_WPOS, 2)));
+ }
wpos.reg_offset++;
/* gl_FragCoord.w: Already set up in emit_interpolation */
@@ -518,25 +532,40 @@ fs_visitor::emit_general_interpolation(ir_variable *ir)
continue;
}
- for (unsigned int c = 0; c < type->vector_elements; c++) {
- struct brw_reg interp = interp_reg(location, c);
- emit(fs_inst(FS_OPCODE_LINTERP,
- attr,
- this->delta_x,
- this->delta_y,
- fs_reg(interp)));
- attr.reg_offset++;
- }
-
- if (intel->gen < 6) {
- attr.reg_offset -= type->vector_elements;
+ if (c->key.flat_shade && (location == FRAG_ATTRIB_COL0 ||
+ location == FRAG_ATTRIB_COL1)) {
+ /* Constant interpolation (flat shading) case. The SF has
+ * handed us defined values in only the constant offset
+ * field of the setup reg.
+ */
for (unsigned int c = 0; c < type->vector_elements; c++) {
- emit(fs_inst(BRW_OPCODE_MUL,
- attr,
+ struct brw_reg interp = interp_reg(location, c);
+ interp = suboffset(interp, 3);
+ emit(fs_inst(FS_OPCODE_CINTERP, attr, fs_reg(interp)));
+ attr.reg_offset++;
+ }
+ } else {
+ /* Perspective interpolation case. */
+ for (unsigned int c = 0; c < type->vector_elements; c++) {
+ struct brw_reg interp = interp_reg(location, c);
+ emit(fs_inst(FS_OPCODE_LINTERP,
attr,
- this->pixel_w));
+ this->delta_x,
+ this->delta_y,
+ fs_reg(interp)));
attr.reg_offset++;
}
+
+ if (intel->gen < 6) {
+ attr.reg_offset -= type->vector_elements;
+ for (unsigned int c = 0; c < type->vector_elements; c++) {
+ emit(fs_inst(BRW_OPCODE_MUL,
+ attr,
+ attr,
+ this->pixel_w));
+ attr.reg_offset++;
+ }
+ }
}
location++;
}
@@ -631,14 +660,18 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1)
assert(opcode == FS_OPCODE_POW);
if (intel->gen >= 6) {
- /* Can't do hstride == 0 args to gen6 math, so expand it out. */
- if (src0.file == UNIFORM) {
+ /* Can't do hstride == 0 args to gen6 math, so expand it out.
+ *
+ * The hardware ignores source modifiers (negate and abs) on math
+ * instructions, so we also move to a temp to set those up.
+ */
+ if (src0.file == UNIFORM || src0.abs || src0.negate) {
fs_reg expanded = fs_reg(this, glsl_type::float_type);
emit(fs_inst(BRW_OPCODE_MOV, expanded, src0));
src0 = expanded;
}
- if (src1.file == UNIFORM) {
+ if (src1.file == UNIFORM || src1.abs || src1.negate) {
fs_reg expanded = fs_reg(this, glsl_type::float_type);
emit(fs_inst(BRW_OPCODE_MOV, expanded, src1));
src1 = expanded;
@@ -770,6 +803,30 @@ fs_visitor::try_emit_saturate(ir_expression *ir)
return true;
}
+static uint32_t
+brw_conditional_for_comparison(unsigned int op)
+{
+ switch (op) {
+ case ir_binop_less:
+ return BRW_CONDITIONAL_L;
+ case ir_binop_greater:
+ return BRW_CONDITIONAL_G;
+ case ir_binop_lequal:
+ return BRW_CONDITIONAL_LE;
+ case ir_binop_gequal:
+ return BRW_CONDITIONAL_GE;
+ case ir_binop_equal:
+ case ir_binop_all_equal: /* same as equal for scalars */
+ return BRW_CONDITIONAL_Z;
+ case ir_binop_nequal:
+ case ir_binop_any_nequal: /* same as nequal for scalars */
+ return BRW_CONDITIONAL_NZ;
+ default:
+ assert(!"not reached: bad operation for comparison");
+ return BRW_CONDITIONAL_NZ;
+ }
+}
+
void
fs_visitor::visit(ir_expression *ir)
{
@@ -819,6 +876,7 @@ fs_visitor::visit(ir_expression *ir)
break;
case ir_unop_abs:
op[0].abs = true;
+ op[0].negate = false;
this->result = op[0];
break;
case ir_unop_sign:
@@ -885,35 +943,20 @@ fs_visitor::visit(ir_expression *ir)
break;
case ir_binop_less:
- inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_L;
- emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
- break;
case ir_binop_greater:
- inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_G;
- emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
- break;
case ir_binop_lequal:
- inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_LE;
- emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
- break;
case ir_binop_gequal:
- inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_GE;
- emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
- break;
case ir_binop_equal:
- case ir_binop_all_equal: /* same as nequal for scalars */
- inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_Z;
- emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
- break;
+ case ir_binop_all_equal:
case ir_binop_nequal:
- case ir_binop_any_nequal: /* same as nequal for scalars */
- inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ case ir_binop_any_nequal:
+ temp = this->result;
+ /* original gen4 does implicit conversion before comparison. */
+ if (intel->gen < 5)
+ temp.type = op[0].type;
+
+ inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], op[1]));
+ inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
break;
@@ -958,7 +1001,12 @@ fs_visitor::visit(ir_expression *ir)
break;
case ir_unop_f2b:
case ir_unop_i2b:
- inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
+ temp = this->result;
+ /* original gen4 does implicit conversion before comparison. */
+ if (intel->gen < 5)
+ temp.type = op[0].type;
+
+ inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f)));
inst->conditional_mod = BRW_CONDITIONAL_NZ;
inst = emit(fs_inst(BRW_OPCODE_AND, this->result,
this->result, fs_reg(1)));
@@ -1151,6 +1199,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
}
/* gen4's SIMD8 sampler always has the slots for u,v,r present. */
mlen += 3;
+ } else if (ir->op == ir_txd) {
+ assert(!"TXD isn't supported on gen4 yet.");
} else {
/* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod
* instructions. We'll need to do SIMD16 here.
@@ -1204,6 +1254,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
inst = emit(fs_inst(FS_OPCODE_TXL, dst));
break;
case ir_txd:
+ inst = emit(fs_inst(FS_OPCODE_TXD, dst));
+ break;
case ir_txf:
assert(!"GLSL 1.30 features unsupported");
break;
@@ -1292,6 +1344,37 @@ fs_visitor::visit(ir_texture *ir)
ir->coordinate->accept(this);
fs_reg coordinate = this->result;
+ if (ir->offset != NULL) {
+ ir_constant *offset = ir->offset->as_constant();
+ assert(offset != NULL);
+
+ signed char offsets[3];
+ for (unsigned i = 0; i < ir->offset->type->vector_elements; i++)
+ offsets[i] = (signed char) offset->value.i[i];
+
+ /* Combine all three offsets into a single unsigned dword:
+ *
+ * bits 11:8 - U Offset (X component)
+ * bits 7:4 - V Offset (Y component)
+ * bits 3:0 - R Offset (Z component)
+ */
+ unsigned offset_bits = 0;
+ for (unsigned i = 0; i < ir->offset->type->vector_elements; i++) {
+ const unsigned shift = 4 * (2 - i);
+ offset_bits |= (offsets[i] << shift) & (0xF << shift);
+ }
+
+ /* Explicitly set up the message header by copying g0 to msg reg m1. */
+ emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, 1, BRW_REGISTER_TYPE_UD),
+ fs_reg(GRF, 0, BRW_REGISTER_TYPE_UD)));
+
+ /* Then set the offset bits in DWord 2 of the message header. */
+ emit(fs_inst(BRW_OPCODE_MOV,
+ fs_reg(retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1, 2),
+ BRW_REGISTER_TYPE_UD)),
+ fs_reg(brw_imm_uw(offset_bits))));
+ }
+
/* Should be lowered by do_lower_texture_projection */
assert(!ir->projector);
@@ -1323,10 +1406,13 @@ fs_visitor::visit(ir_texture *ir)
fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1);
GLuint index = _mesa_add_state_reference(params,
(gl_state_index *)tokens);
- float *vec_values = this->fp->Base.Parameters->ParameterValues[index];
- c->prog_data.param[c->prog_data.nr_params++] = &vec_values[0];
- c->prog_data.param[c->prog_data.nr_params++] = &vec_values[1];
+ this->param_index[c->prog_data.nr_params] = index;
+ this->param_offset[c->prog_data.nr_params] = 0;
+ c->prog_data.nr_params++;
+ this->param_index[c->prog_data.nr_params] = index;
+ this->param_offset[c->prog_data.nr_params] = 1;
+ c->prog_data.nr_params++;
fs_reg dst = fs_reg(this, ir->coordinate->type);
fs_reg src = coordinate;
@@ -1349,6 +1435,14 @@ fs_visitor::visit(ir_texture *ir)
inst = emit_texture_gen5(ir, dst, coordinate);
}
+ /* If there's an offset, we already set up m1. To avoid the implied move,
+ * use the null register. Otherwise, we want an implied move from g0.
+ */
+ if (ir->offset != NULL)
+ inst->src[0] = fs_reg(brw_null_reg());
+ else
+ inst->src[0] = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
+
inst->sampler = sampler;
this->result = dst;
@@ -1356,7 +1450,10 @@ fs_visitor::visit(ir_texture *ir)
if (ir->shadow_comparitor)
inst->shadow_compare = true;
- if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) {
+ if (ir->type == glsl_type::float_type) {
+ /* Ignore DEPTH_TEXTURE_MODE swizzling. */
+ assert(ir->sampler->type->sampler_shadow);
+ } else if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) {
fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type);
for (int i = 0; i < 4; i++) {
@@ -1541,7 +1638,7 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d,
op[0], fs_reg(0.0f)));
} else {
- inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, op[0]));
+ inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_f, op[0]));
}
inst->conditional_mod = BRW_CONDITIONAL_NZ;
break;
@@ -1556,31 +1653,18 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
break;
case ir_binop_greater:
- inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_G;
- break;
case ir_binop_gequal:
- inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_GE;
- break;
case ir_binop_less:
- inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_L;
- break;
case ir_binop_lequal:
- inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_LE;
- break;
case ir_binop_equal:
case ir_binop_all_equal:
- inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_Z;
- break;
case ir_binop_nequal:
case ir_binop_any_nequal:
- inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1]));
+ inst->conditional_mod =
+ brw_conditional_for_comparison(expr->operation);
break;
+
default:
assert(!"not reached");
this->fail = true;
@@ -1659,30 +1743,16 @@ fs_visitor::emit_if_gen6(ir_if *ir)
return;
case ir_binop_greater:
- inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_G;
- return;
case ir_binop_gequal:
- inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_GE;
- return;
case ir_binop_less:
- inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_L;
- return;
case ir_binop_lequal:
- inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_LE;
- return;
case ir_binop_equal:
case ir_binop_all_equal:
- inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_Z;
- return;
case ir_binop_nequal:
case ir_binop_any_nequal:
inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ inst->conditional_mod =
+ brw_conditional_for_comparison(expr->operation);
return;
default:
assert(!"not reached");
@@ -1764,32 +1834,9 @@ fs_visitor::visit(ir_loop *ir)
this->base_ir = ir->to;
ir->to->accept(this);
- fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d,
+ fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_cmp,
counter, this->result));
- switch (ir->cmp) {
- case ir_binop_equal:
- inst->conditional_mod = BRW_CONDITIONAL_Z;
- break;
- case ir_binop_nequal:
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- break;
- case ir_binop_gequal:
- inst->conditional_mod = BRW_CONDITIONAL_GE;
- break;
- case ir_binop_lequal:
- inst->conditional_mod = BRW_CONDITIONAL_LE;
- break;
- case ir_binop_greater:
- inst->conditional_mod = BRW_CONDITIONAL_G;
- break;
- case ir_binop_less:
- inst->conditional_mod = BRW_CONDITIONAL_L;
- break;
- default:
- assert(!"not reached: unknown loop condition");
- this->fail = true;
- break;
- }
+ inst->conditional_mod = brw_conditional_for_comparison(ir->cmp);
inst = emit(fs_inst(BRW_OPCODE_BREAK));
inst->predicated = true;
@@ -2067,7 +2114,7 @@ fs_visitor::emit_fb_writes()
}
for (int target = 0; target < c->key.nr_color_regions; target++) {
- this->current_annotation = talloc_asprintf(this->mem_ctx,
+ this->current_annotation = ralloc_asprintf(this->mem_ctx,
"FB write target %d",
target);
if (this->frag_color || this->frag_data) {
@@ -2093,6 +2140,17 @@ fs_visitor::emit_fb_writes()
}
if (c->key.nr_color_regions == 0) {
+ if (c->key.alpha_test && (this->frag_color || this->frag_data)) {
+ /* If the alpha test is enabled but there's no color buffer,
+ * we still need to send alpha out the pipeline to our null
+ * renderbuffer.
+ */
+ color.reg_offset += 3;
+ emit(fs_inst(BRW_OPCODE_MOV,
+ fs_reg(MRF, color_mrf + 3),
+ color));
+ }
+
fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
reg_undef, reg_undef));
inst->base_mrf = 0;
@@ -2158,7 +2216,8 @@ fs_visitor::generate_fb_write(fs_inst *inst)
inst->target,
inst->mlen,
0,
- eot);
+ eot,
+ inst->header_present);
}
void
@@ -2244,7 +2303,7 @@ fs_visitor::generate_math(fs_inst *inst,
}
void
-fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst)
+fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
{
int msg_type = -1;
int rlen = 4;
@@ -2266,6 +2325,16 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst)
msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
}
break;
+ case FS_OPCODE_TXL:
+ if (inst->shadow_compare) {
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE_GEN5;
+ } else {
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_LOD_GEN5;
+ }
+ break;
+ case FS_OPCODE_TXD:
+ assert(!"TXD isn't supported on gen5+ yet.");
+ break;
}
} else {
switch (inst->opcode) {
@@ -2283,13 +2352,26 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst)
case FS_OPCODE_TXB:
if (inst->shadow_compare) {
assert(inst->mlen == 6);
- msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
} else {
assert(inst->mlen == 9);
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
}
break;
+ case FS_OPCODE_TXL:
+ if (inst->shadow_compare) {
+ assert(inst->mlen == 6);
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
+ } else {
+ assert(inst->mlen == 9);
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD;
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+ }
+ break;
+ case FS_OPCODE_TXD:
+ assert(!"TXD isn't supported on gen4 yet.");
+ break;
}
}
assert(msg_type != -1);
@@ -2302,7 +2384,7 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst)
brw_SAMPLE(p,
retype(dst, BRW_REGISTER_TYPE_UW),
inst->base_mrf,
- retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+ src,
SURF_INDEX_TEXTURE(inst->sampler),
inst->sampler,
WRITEMASK_XYZW,
@@ -2502,6 +2584,22 @@ fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
}
}
+/**
+ * To be called after the last _mesa_add_state_reference() call, to
+ * set up prog_data.param[] for assign_curb_setup() and
+ * setup_pull_constants().
+ */
+void
+fs_visitor::setup_paramvalues_refs()
+{
+ /* Set up the pointers to ParamValues now that that array is finalized. */
+ for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+ c->prog_data.param[i] =
+ fp->Base.Parameters->ParameterValues[this->param_index[i]] +
+ this->param_offset[i];
+ }
+}
+
void
fs_visitor::assign_curb_setup()
{
@@ -2575,12 +2673,15 @@ fs_visitor::assign_urb_setup()
foreach_iter(exec_list_iterator, iter, this->instructions) {
fs_inst *inst = (fs_inst *)iter.get();
- if (inst->opcode != FS_OPCODE_LINTERP)
- continue;
-
- assert(inst->src[2].file == FIXED_HW_REG);
+ if (inst->opcode == FS_OPCODE_LINTERP) {
+ assert(inst->src[2].file == FIXED_HW_REG);
+ inst->src[2].fixed_hw_reg.nr += urb_start;
+ }
- inst->src[2].fixed_hw_reg.nr += urb_start;
+ if (inst->opcode == FS_OPCODE_CINTERP) {
+ assert(inst->src[0].file == FIXED_HW_REG);
+ inst->src[0].fixed_hw_reg.nr += urb_start;
+ }
}
this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
@@ -2628,10 +2729,7 @@ fs_visitor::split_virtual_grfs()
fs_inst *inst = (fs_inst *)iter.get();
/* Texturing produces 4 contiguous registers, so no splitting. */
- if ((inst->opcode == FS_OPCODE_TEX ||
- inst->opcode == FS_OPCODE_TXB ||
- inst->opcode == FS_OPCODE_TXL) &&
- inst->dst.file == GRF) {
+ if (inst->is_tex()) {
split_grf[inst->dst.reg] = false;
}
}
@@ -2671,6 +2769,7 @@ fs_visitor::split_virtual_grfs()
}
}
}
+ this->live_intervals_valid = false;
}
/**
@@ -2739,14 +2838,17 @@ void
fs_visitor::calculate_live_intervals()
{
int num_vars = this->virtual_grf_next;
- int *def = talloc_array(mem_ctx, int, num_vars);
- int *use = talloc_array(mem_ctx, int, num_vars);
+ int *def = ralloc_array(mem_ctx, int, num_vars);
+ int *use = ralloc_array(mem_ctx, int, num_vars);
int loop_depth = 0;
int loop_start = 0;
int bb_header_ip = 0;
+ if (this->live_intervals_valid)
+ return;
+
for (int i = 0; i < num_vars; i++) {
- def[i] = 1 << 30;
+ def[i] = MAX_INSTRUCTION;
use[i] = -1;
}
@@ -2820,10 +2922,12 @@ fs_visitor::calculate_live_intervals()
}
}
- talloc_free(this->virtual_grf_def);
- talloc_free(this->virtual_grf_use);
+ ralloc_free(this->virtual_grf_def);
+ ralloc_free(this->virtual_grf_use);
this->virtual_grf_def = def;
this->virtual_grf_use = use;
+
+ this->live_intervals_valid = true;
}
/**
@@ -2839,6 +2943,8 @@ fs_visitor::propagate_constants()
{
bool progress = false;
+ calculate_live_intervals();
+
foreach_iter(exec_list_iterator, iter, this->instructions) {
fs_inst *inst = (fs_inst *)iter.get();
@@ -2896,6 +3002,7 @@ fs_visitor::propagate_constants()
/* Fit this constant in by commuting the operands */
scan_inst->src[0] = scan_inst->src[1];
scan_inst->src[1] = inst->src[0];
+ progress = true;
}
break;
case BRW_OPCODE_CMP:
@@ -2910,12 +3017,15 @@ fs_visitor::propagate_constants()
if (scan_inst->dst.file == GRF &&
scan_inst->dst.reg == inst->dst.reg &&
(scan_inst->dst.reg_offset == inst->dst.reg_offset ||
- scan_inst->opcode == FS_OPCODE_TEX)) {
+ scan_inst->is_tex())) {
break;
}
}
}
+ if (progress)
+ this->live_intervals_valid = false;
+
return progress;
}
/**
@@ -2930,6 +3040,8 @@ fs_visitor::dead_code_eliminate()
bool progress = false;
int pc = 0;
+ calculate_live_intervals();
+
foreach_iter(exec_list_iterator, iter, this->instructions) {
fs_inst *inst = (fs_inst *)iter.get();
@@ -2941,6 +3053,9 @@ fs_visitor::dead_code_eliminate()
pc++;
}
+ if (progress)
+ live_intervals_valid = false;
+
return progress;
}
@@ -2948,10 +3063,35 @@ bool
fs_visitor::register_coalesce()
{
bool progress = false;
+ int if_depth = 0;
+ int loop_depth = 0;
foreach_iter(exec_list_iterator, iter, this->instructions) {
fs_inst *inst = (fs_inst *)iter.get();
+ /* Make sure that we dominate the instructions we're going to
+ * scan for interfering with our coalescing, or we won't have
+ * scanned enough to see if anything interferes with our
+ * coalescing. We don't dominate the following instructions if
+ * we're in a loop or an if block.
+ */
+ switch (inst->opcode) {
+ case BRW_OPCODE_DO:
+ loop_depth++;
+ break;
+ case BRW_OPCODE_WHILE:
+ loop_depth--;
+ break;
+ case BRW_OPCODE_IF:
+ if_depth++;
+ break;
+ case BRW_OPCODE_ENDIF:
+ if_depth--;
+ break;
+ }
+ if (loop_depth || if_depth)
+ continue;
+
if (inst->opcode != BRW_OPCODE_MOV ||
inst->predicated ||
inst->saturate ||
@@ -2959,6 +3099,8 @@ fs_visitor::register_coalesce()
inst->dst.type != inst->src[0].type)
continue;
+ bool has_source_modifiers = inst->src[0].abs || inst->src[0].negate;
+
/* Found a move of a GRF to a GRF. Let's see if we can coalesce
* them: check for no writes to either one until the exit of the
* program.
@@ -2969,37 +3111,33 @@ fs_visitor::register_coalesce()
for (; scan_iter.has_next(); scan_iter.next()) {
fs_inst *scan_inst = (fs_inst *)scan_iter.get();
- if (scan_inst->opcode == BRW_OPCODE_DO ||
- scan_inst->opcode == BRW_OPCODE_WHILE ||
- scan_inst->opcode == BRW_OPCODE_ENDIF) {
- interfered = true;
- iter = scan_iter;
- break;
- }
-
if (scan_inst->dst.file == GRF) {
if (scan_inst->dst.reg == inst->dst.reg &&
(scan_inst->dst.reg_offset == inst->dst.reg_offset ||
- scan_inst->opcode == FS_OPCODE_TEX)) {
+ scan_inst->is_tex())) {
interfered = true;
break;
}
if (scan_inst->dst.reg == inst->src[0].reg &&
(scan_inst->dst.reg_offset == inst->src[0].reg_offset ||
- scan_inst->opcode == FS_OPCODE_TEX)) {
+ scan_inst->is_tex())) {
interfered = true;
break;
}
}
+
+ /* The gen6 MATH instruction can't handle source modifiers, so avoid
+ * coalescing those for now. We should do something more specific.
+ */
+ if (intel->gen == 6 && scan_inst->is_math() && has_source_modifiers) {
+ interfered = true;
+ break;
+ }
}
if (interfered) {
continue;
}
- /* Update live interval so we don't have to recalculate. */
- this->virtual_grf_use[inst->src[0].reg] = MAX2(virtual_grf_use[inst->src[0].reg],
- virtual_grf_use[inst->dst.reg]);
-
/* Rewrite the later usage to point at the source of the move to
* be removed.
*/
@@ -3024,6 +3162,9 @@ fs_visitor::register_coalesce()
progress = true;
}
+ if (progress)
+ live_intervals_valid = false;
+
return progress;
}
@@ -3034,6 +3175,8 @@ fs_visitor::compute_to_mrf()
bool progress = false;
int next_ip = 0;
+ calculate_live_intervals();
+
foreach_iter(exec_list_iterator, iter, this->instructions) {
fs_inst *inst = (fs_inst *)iter.get();
@@ -3066,7 +3209,7 @@ fs_visitor::compute_to_mrf()
* into a compute-to-MRF.
*/
- if (scan_inst->opcode == FS_OPCODE_TEX) {
+ if (scan_inst->is_tex()) {
/* texturing writes several continuous regs, so we can't
* compute-to-mrf that.
*/
@@ -3087,14 +3230,7 @@ fs_visitor::compute_to_mrf()
/* gen6 math instructions must have the destination be
* GRF, so no compute-to-MRF for them.
*/
- if (scan_inst->opcode == FS_OPCODE_RCP ||
- scan_inst->opcode == FS_OPCODE_RSQ ||
- scan_inst->opcode == FS_OPCODE_SQRT ||
- scan_inst->opcode == FS_OPCODE_EXP2 ||
- scan_inst->opcode == FS_OPCODE_LOG2 ||
- scan_inst->opcode == FS_OPCODE_SIN ||
- scan_inst->opcode == FS_OPCODE_COS ||
- scan_inst->opcode == FS_OPCODE_POW) {
+ if (scan_inst->is_math()) {
break;
}
}
@@ -3116,6 +3252,7 @@ fs_visitor::compute_to_mrf()
*/
if (scan_inst->opcode == BRW_OPCODE_DO ||
scan_inst->opcode == BRW_OPCODE_WHILE ||
+ scan_inst->opcode == BRW_OPCODE_ELSE ||
scan_inst->opcode == BRW_OPCODE_ENDIF) {
break;
}
@@ -3202,7 +3339,7 @@ fs_visitor::remove_duplicate_mrf_writes()
}
if (inst->mlen > 0) {
- /* Found a SEND instruction, which will include two of fewer
+ /* Found a SEND instruction, which will include two or fewer
* implied MRF writes. We could do better here.
*/
for (int i = 0; i < implied_mrf_writes(inst); i++) {
@@ -3237,15 +3374,16 @@ fs_visitor::virtual_grf_interferes(int a, int b)
int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]);
int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]);
- /* For dead code, just check if the def interferes with the other range. */
- if (this->virtual_grf_use[a] == -1) {
- return (this->virtual_grf_def[a] >= this->virtual_grf_def[b] &&
- this->virtual_grf_def[a] < this->virtual_grf_use[b]);
- }
- if (this->virtual_grf_use[b] == -1) {
- return (this->virtual_grf_def[b] >= this->virtual_grf_def[a] &&
- this->virtual_grf_def[b] < this->virtual_grf_use[a]);
- }
+ /* We can't handle dead register writes here, without iterating
+ * over the whole instruction stream to find every single dead
+ * write to that register to compare to the live interval of the
+ * other register. Just assert that dead_code_eliminate() has been
+ * called.
+ */
+ assert((this->virtual_grf_use[a] != -1 ||
+ this->virtual_grf_def[a] == MAX_INSTRUCTION) &&
+ (this->virtual_grf_use[b] != -1 ||
+ this->virtual_grf_def[b] == MAX_INSTRUCTION));
return start < end;
}
@@ -3280,6 +3418,7 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
break;
default:
assert(!"not reached");
+ brw_reg = brw_null_reg();
break;
}
break;
@@ -3294,6 +3433,10 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
assert(!"not reached");
brw_reg = brw_null_reg();
break;
+ default:
+ assert(!"not reached");
+ brw_reg = brw_null_reg();
+ break;
}
if (reg->abs)
brw_reg = brw_abs(brw_reg);
@@ -3307,20 +3450,25 @@ void
fs_visitor::generate_code()
{
int last_native_inst = 0;
- struct brw_instruction *if_stack[16], *loop_stack[16];
- int if_stack_depth = 0, loop_stack_depth = 0;
- int if_depth_in_loop[16];
const char *last_annotation_string = NULL;
ir_instruction *last_annotation_ir = NULL;
+ int if_stack_array_size = 16;
+ int loop_stack_array_size = 16;
+ int if_stack_depth = 0, loop_stack_depth = 0;
+ brw_instruction **if_stack =
+ rzalloc_array(this->mem_ctx, brw_instruction *, if_stack_array_size);
+ brw_instruction **loop_stack =
+ rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
+ int *if_depth_in_loop =
+ rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
+
+
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
printf("Native code for fragment shader %d:\n",
ctx->Shader.CurrentFragmentProgram->Name);
}
- if_depth_in_loop[loop_stack_depth] = 0;
-
- memset(&if_stack, 0, sizeof(if_stack));
foreach_iter(exec_list_iterator, iter, this->instructions) {
fs_inst *inst = (fs_inst *)iter.get();
struct brw_reg src[3], dst;
@@ -3404,7 +3552,6 @@ fs_visitor::generate_code()
break;
case BRW_OPCODE_IF:
- assert(if_stack_depth < 16);
if (inst->src[0].file != BAD_FILE) {
assert(intel->gen >= 6);
if_stack[if_stack_depth] = brw_IF_gen6(p, inst->conditional_mod, src[0], src[1]);
@@ -3413,6 +3560,11 @@ fs_visitor::generate_code()
}
if_depth_in_loop[loop_stack_depth]++;
if_stack_depth++;
+ if (if_stack_array_size <= if_stack_depth) {
+ if_stack_array_size *= 2;
+ if_stack = reralloc(this->mem_ctx, if_stack, brw_instruction *,
+ if_stack_array_size);
+ }
break;
case BRW_OPCODE_ELSE:
@@ -3427,6 +3579,13 @@ fs_visitor::generate_code()
case BRW_OPCODE_DO:
loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
+ if (loop_stack_array_size <= loop_stack_depth) {
+ loop_stack_array_size *= 2;
+ loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
+ loop_stack_array_size);
+ if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
+ loop_stack_array_size);
+ }
if_depth_in_loop[loop_stack_depth] = 0;
break;
@@ -3480,13 +3639,17 @@ fs_visitor::generate_code()
case FS_OPCODE_COS:
generate_math(inst, dst, src);
break;
+ case FS_OPCODE_CINTERP:
+ brw_MOV(p, dst, src[0]);
+ break;
case FS_OPCODE_LINTERP:
generate_linterp(inst, dst, src);
break;
case FS_OPCODE_TEX:
case FS_OPCODE_TXB:
+ case FS_OPCODE_TXD:
case FS_OPCODE_TXL:
- generate_tex(inst, dst);
+ generate_tex(inst, dst, src[0]);
break;
case FS_OPCODE_DISCARD_NOT:
generate_discard_not(inst, dst);
@@ -3542,6 +3705,10 @@ fs_visitor::generate_code()
last_native_inst = p->nr_insn;
}
+ ralloc_free(if_stack);
+ ralloc_free(loop_stack);
+ ralloc_free(if_depth_in_loop);
+
brw_set_uip_jip(p);
/* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS
@@ -3617,10 +3784,9 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
v.emit_fb_writes();
v.split_virtual_grfs();
- v.setup_pull_constants();
- v.assign_curb_setup();
- v.assign_urb_setup();
+ v.setup_paramvalues_refs();
+ v.setup_pull_constants();
bool progress;
do {
@@ -3628,20 +3794,23 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
progress = v.remove_duplicate_mrf_writes() || progress;
- v.calculate_live_intervals();
progress = v.propagate_constants() || progress;
progress = v.register_coalesce() || progress;
progress = v.compute_to_mrf() || progress;
progress = v.dead_code_eliminate() || progress;
} while (progress);
+ v.schedule_instructions();
+
+ v.assign_curb_setup();
+ v.assign_urb_setup();
+
if (0) {
/* Debug of register spilling: Go spill everything. */
int virtual_grf_count = v.virtual_grf_next;
for (int i = 1; i < virtual_grf_count; i++) {
v.spill_reg(i);
}
- v.calculate_live_intervals();
}
if (0)
@@ -3650,8 +3819,6 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
while (!v.assign_regs()) {
if (v.fail)
break;
-
- v.calculate_live_intervals();
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index de7b15312a..dc030ae5b5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -41,7 +41,6 @@ extern "C" {
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_wm.h"
-#include "talloc.h"
}
#include "../glsl/glsl_types.h"
#include "../glsl/ir.h"
@@ -68,9 +67,11 @@ enum fs_opcodes {
FS_OPCODE_COS,
FS_OPCODE_DDX,
FS_OPCODE_DDY,
+ FS_OPCODE_CINTERP,
FS_OPCODE_LINTERP,
FS_OPCODE_TEX,
FS_OPCODE_TXB,
+ FS_OPCODE_TXD,
FS_OPCODE_TXL,
FS_OPCODE_DISCARD_NOT,
FS_OPCODE_DISCARD_AND,
@@ -82,13 +83,13 @@ enum fs_opcodes {
class fs_reg {
public:
- /* Callers of this talloc-based new need not call delete. It's
- * easier to just talloc_free 'ctx' (or any of its ancestors). */
+ /* Callers of this ralloc-based new need not call delete. It's
+ * easier to just ralloc_free 'ctx' (or any of its ancestors). */
static void* operator new(size_t size, void *ctx)
{
void *node;
- node = talloc_size(ctx, size);
+ node = ralloc_size(ctx, size);
assert(node != NULL);
return node;
@@ -192,13 +193,13 @@ static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D);
class fs_inst : public exec_node {
public:
- /* Callers of this talloc-based new need not call delete. It's
- * easier to just talloc_free 'ctx' (or any of its ancestors). */
+ /* Callers of this ralloc-based new need not call delete. It's
+ * easier to just ralloc_free 'ctx' (or any of its ancestors). */
static void* operator new(size_t size, void *ctx)
{
void *node;
- node = talloc_zero_size(ctx, size);
+ node = rzalloc_size(ctx, size);
assert(node != NULL);
return node;
@@ -305,6 +306,26 @@ public:
offset == inst->offset);
}
+ bool is_tex()
+ {
+ return (opcode == FS_OPCODE_TEX ||
+ opcode == FS_OPCODE_TXB ||
+ opcode == FS_OPCODE_TXD ||
+ opcode == FS_OPCODE_TXL);
+ }
+
+ bool is_math()
+ {
+ return (opcode == FS_OPCODE_RCP ||
+ opcode == FS_OPCODE_RSQ ||
+ opcode == FS_OPCODE_SQRT ||
+ opcode == FS_OPCODE_EXP2 ||
+ opcode == FS_OPCODE_LOG2 ||
+ opcode == FS_OPCODE_SIN ||
+ opcode == FS_OPCODE_COS ||
+ opcode == FS_OPCODE_POW);
+ }
+
int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
fs_reg dst;
fs_reg src[3];
@@ -341,13 +362,30 @@ public:
this->fp = brw->fragment_program;
this->intel = &brw->intel;
this->ctx = &intel->ctx;
- this->mem_ctx = talloc_new(NULL);
+ this->mem_ctx = ralloc_context(NULL);
this->shader = shader;
this->fail = false;
this->variable_ht = hash_table_ctor(0,
hash_table_pointer_hash,
hash_table_pointer_compare);
+ /* There's a question that appears to be left open in the spec:
+ * How do implicit dst conversions interact with the CMP
+ * instruction or conditional mods? On gen6, the instruction:
+ *
+ * CMP null<d> src0<f> src1<f>
+ *
+ * will do src1 - src0 and compare that result as if it was an
+ * integer. On gen4, it will do src1 - src0 as float, convert
+ * the result to int, and compare as int. In between, it
+ * appears that it does src1 - src0 and does the compare in the
+ * execution type so dst type doesn't matter.
+ */
+ if (this->intel->gen > 4)
+ this->reg_null_cmp = reg_null_d;
+ else
+ this->reg_null_cmp = reg_null_f;
+
this->frag_color = NULL;
this->frag_data = NULL;
this->frag_depth = NULL;
@@ -361,13 +399,14 @@ public:
this->virtual_grf_array_size = 0;
this->virtual_grf_def = NULL;
this->virtual_grf_use = NULL;
+ this->live_intervals_valid = false;
this->kill_emitted = false;
}
~fs_visitor()
{
- talloc_free(this->mem_ctx);
+ ralloc_free(this->mem_ctx);
hash_table_dtor(this->variable_ht);
}
@@ -393,6 +432,7 @@ public:
void visit(ir_function_signature *ir);
fs_inst *emit(fs_inst inst);
+ void setup_paramvalues_refs();
void assign_curb_setup();
void calculate_urb_setup();
void assign_urb_setup();
@@ -409,11 +449,13 @@ public:
bool dead_code_eliminate();
bool remove_duplicate_mrf_writes();
bool virtual_grf_interferes(int a, int b);
+ void schedule_instructions();
+
void generate_code();
void generate_fb_write(fs_inst *inst);
void generate_linterp(fs_inst *inst, struct brw_reg dst,
struct brw_reg *src);
- void generate_tex(fs_inst *inst, struct brw_reg dst);
+ void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
void generate_discard_not(fs_inst *inst, struct brw_reg temp);
void generate_discard_and(fs_inst *inst, struct brw_reg temp);
@@ -457,11 +499,18 @@ public:
void *mem_ctx;
exec_list instructions;
+ /* Delayed setup of c->prog_data.params[] due to realloc of
+ * ParamValues[] during compile.
+ */
+ int param_index[MAX_UNIFORMS * 4];
+ int param_offset[MAX_UNIFORMS * 4];
+
int *virtual_grf_sizes;
int virtual_grf_next;
int virtual_grf_array_size;
int *virtual_grf_def;
int *virtual_grf_use;
+ bool live_intervals_valid;
struct hash_table *variable_ht;
ir_variable *frag_color, *frag_data, *frag_depth;
@@ -485,6 +534,7 @@ public:
fs_reg pixel_w;
fs_reg delta_x;
fs_reg delta_y;
+ fs_reg reg_null_cmp;
int grf_used;
};
diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
index 20bfa4c3ea..7f3f52854d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -141,7 +141,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
return visit_continue;
if (!this->mem_ctx)
- this->mem_ctx = talloc_parent(ir);
+ this->mem_ctx = ralloc_parent(ir);
for (i = 0; i < expr->get_num_operands(); i++) {
if (expr->operands[i]->type->is_vector()) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index bbb210cd44..f027742317 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -41,7 +41,6 @@ extern "C" {
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_wm.h"
-#include "talloc.h"
}
#include "brw_fs.h"
#include "../glsl/glsl_types.h"
@@ -94,6 +93,8 @@ fs_visitor::assign_regs()
int class_count = 0;
int aligned_pair_class = -1;
+ calculate_live_intervals();
+
/* Set up the register classes.
*
* The base registers store a scalar value. For texture samples,
@@ -232,8 +233,8 @@ fs_visitor::assign_regs()
}
- talloc_free(g);
- talloc_free(regs);
+ ralloc_free(g);
+ ralloc_free(regs);
return false;
}
@@ -271,8 +272,8 @@ fs_visitor::assign_regs()
this->grf_used = last_grf + 1;
- talloc_free(g);
- talloc_free(regs);
+ ralloc_free(g);
+ ralloc_free(regs);
return true;
}
@@ -416,4 +417,6 @@ fs_visitor::spill_reg(int spill_reg)
}
}
}
+
+ this->live_intervals_valid = false;
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
new file mode 100644
index 0000000000..bff8f82f3f
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -0,0 +1,488 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+extern "C" {
+
+#include <sys/types.h>
+
+#include "main/macros.h"
+#include "main/shaderobj.h"
+#include "main/uniforms.h"
+#include "program/prog_optimize.h"
+#include "program/register_allocate.h"
+#include "program/sampler.h"
+#include "program/hash_table.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+}
+#include "brw_fs.h"
+#include "../glsl/glsl_types.h"
+#include "../glsl/ir_optimization.h"
+#include "../glsl/ir_print_visitor.h"
+
+/** @file brw_fs_schedule_instructions.cpp
+ *
+ * List scheduling of FS instructions.
+ *
+ * The basic model of the list scheduler is to take a basic block,
+ * compute a DAG of the dependencies (RAW ordering with latency, WAW
+ * ordering, WAR ordering), and make a list of the DAG heads.
+ * Heuristically pick a DAG head, then put all the children that are
+ * now DAG heads into the list of things to schedule.
+ *
+ * The heuristic is the important part. We're trying to be cheap,
+ * since actually computing the optimal scheduling is NP complete.
+ * What we do is track a "current clock". When we schedule a node, we
+ * update the earliest-unblocked clock time of its children, and
+ * increment the clock. Then, when trying to schedule, we just pick
+ * the earliest-unblocked instruction to schedule.
+ *
+ * Note that often there will be many things which could execute
+ * immediately, and there are a range of heuristic options to choose
+ * from in picking among those.
+ */
+
+class schedule_node : public exec_node
+{
+public:
+ schedule_node(fs_inst *inst)
+ {
+ this->inst = inst;
+ this->child_array_size = 0;
+ this->children = NULL;
+ this->child_latency = NULL;
+ this->child_count = 0;
+ this->parent_count = 0;
+ this->unblocked_time = 0;
+
+ int chans = 8;
+ int math_latency = 22;
+
+ switch (inst->opcode) {
+ case FS_OPCODE_RCP:
+ this->latency = 1 * chans * math_latency;
+ break;
+ case FS_OPCODE_RSQ:
+ this->latency = 2 * chans * math_latency;
+ break;
+ case FS_OPCODE_SQRT:
+ case FS_OPCODE_LOG2:
+ /* full precision log. partial is 2. */
+ this->latency = 3 * chans * math_latency;
+ break;
+ case FS_OPCODE_EXP2:
+ /* full precision. partial is 3, same throughput. */
+ this->latency = 4 * chans * math_latency;
+ break;
+ case FS_OPCODE_POW:
+ this->latency = 8 * chans * math_latency;
+ break;
+ case FS_OPCODE_SIN:
+ case FS_OPCODE_COS:
+ /* minimum latency, max is 12 rounds. */
+ this->latency = 5 * chans * math_latency;
+ break;
+ default:
+ this->latency = 2;
+ break;
+ }
+ }
+
+ fs_inst *inst;
+ schedule_node **children;
+ int *child_latency;
+ int child_count;
+ int parent_count;
+ int child_array_size;
+ int unblocked_time;
+ int latency;
+};
+
+class instruction_scheduler {
+public:
+ instruction_scheduler(fs_visitor *v, void *mem_ctx, int virtual_grf_count)
+ {
+ this->v = v;
+ this->mem_ctx = ralloc_context(mem_ctx);
+ this->virtual_grf_count = virtual_grf_count;
+ this->instructions.make_empty();
+ this->instructions_to_schedule = 0;
+ }
+
+ ~instruction_scheduler()
+ {
+ ralloc_free(this->mem_ctx);
+ }
+ void add_barrier_deps(schedule_node *n);
+ void add_dep(schedule_node *before, schedule_node *after, int latency);
+
+ void add_inst(fs_inst *inst);
+ void calculate_deps();
+ void schedule_instructions(fs_inst *next_block_header);
+
+ void *mem_ctx;
+
+ int instructions_to_schedule;
+ int virtual_grf_count;
+ exec_list instructions;
+ fs_visitor *v;
+};
+
+void
+instruction_scheduler::add_inst(fs_inst *inst)
+{
+ schedule_node *n = new(mem_ctx) schedule_node(inst);
+
+ assert(!inst->is_head_sentinel());
+ assert(!inst->is_tail_sentinel());
+
+ this->instructions_to_schedule++;
+
+ inst->remove();
+ instructions.push_tail(n);
+}
+
+/**
+ * Add a dependency between two instruction nodes.
+ *
+ * The @after node will be scheduled after @before. We will try to
+ * schedule it @latency cycles after @before, but no guarantees there.
+ */
+void
+instruction_scheduler::add_dep(schedule_node *before, schedule_node *after,
+ int latency)
+{
+ if (!before || !after)
+ return;
+
+ assert(before != after);
+
+ for (int i = 0; i < before->child_count; i++) {
+ if (before->children[i] == after) {
+ before->child_latency[i] = MAX2(before->child_latency[i], latency);
+ return;
+ }
+ }
+
+ if (before->child_array_size <= before->child_count) {
+ if (before->child_array_size < 16)
+ before->child_array_size = 16;
+ else
+ before->child_array_size *= 2;
+
+ before->children = reralloc(mem_ctx, before->children,
+ schedule_node *,
+ before->child_array_size);
+ before->child_latency = reralloc(mem_ctx, before->child_latency,
+ int, before->child_array_size);
+ }
+
+ before->children[before->child_count] = after;
+ before->child_latency[before->child_count] = latency;
+ before->child_count++;
+ after->parent_count++;
+}
+
+/**
+ * Sometimes we really want this node to execute after everything that
+ * was before it and before everything that followed it. This adds
+ * the deps to do so.
+ */
+void
+instruction_scheduler::add_barrier_deps(schedule_node *n)
+{
+ schedule_node *prev = (schedule_node *)n->prev;
+ schedule_node *next = (schedule_node *)n->next;
+
+ if (prev) {
+ while (!prev->is_head_sentinel()) {
+ add_dep(prev, n, 0);
+ prev = (schedule_node *)prev->prev;
+ }
+ }
+
+ if (next) {
+ while (!next->is_tail_sentinel()) {
+ add_dep(n, next, 0);
+ next = (schedule_node *)next->next;
+ }
+ }
+}
+
+void
+instruction_scheduler::calculate_deps()
+{
+ schedule_node *last_grf_write[virtual_grf_count];
+ schedule_node *last_mrf_write[BRW_MAX_MRF];
+ schedule_node *last_conditional_mod = NULL;
+
+ /* The last instruction always needs to still be the last
+ * instruction. Either it's flow control (IF, ELSE, ENDIF, DO,
+ * WHILE) and scheduling other things after it would disturb the
+ * basic block, or it's FB_WRITE and we should do a better job at
+ * dead code elimination anyway.
+ */
+ schedule_node *last = (schedule_node *)instructions.get_tail();
+ add_barrier_deps(last);
+
+ memset(last_grf_write, 0, sizeof(last_grf_write));
+ memset(last_mrf_write, 0, sizeof(last_mrf_write));
+
+ /* top-to-bottom dependencies: RAW and WAW. */
+ foreach_iter(exec_list_iterator, iter, instructions) {
+ schedule_node *n = (schedule_node *)iter.get();
+ fs_inst *inst = n->inst;
+
+ /* read-after-write deps. */
+ for (int i = 0; i < 3; i++) {
+ if (inst->src[i].file == GRF) {
+ if (last_grf_write[inst->src[i].reg]) {
+ add_dep(last_grf_write[inst->src[i].reg], n,
+ last_grf_write[inst->src[i].reg]->latency);
+ }
+ } else if (inst->src[i].file != BAD_FILE &&
+ inst->src[i].file != IMM &&
+ inst->src[i].file != UNIFORM) {
+ assert(inst->src[i].file != MRF);
+ add_barrier_deps(n);
+ }
+ }
+
+ for (int i = 0; i < inst->mlen; i++) {
+ /* It looks like the MRF regs are released in the send
+ * instruction once it's sent, not when the result comes
+ * back.
+ */
+ if (last_mrf_write[inst->base_mrf + i]) {
+ add_dep(last_mrf_write[inst->base_mrf + i], n,
+ last_mrf_write[inst->base_mrf + i]->latency);
+ }
+ }
+
+ if (inst->predicated) {
+ assert(last_conditional_mod);
+ add_dep(last_conditional_mod, n, last_conditional_mod->latency);
+ }
+
+ /* write-after-write deps. */
+ if (inst->dst.file == GRF) {
+ if (last_grf_write[inst->dst.reg]) {
+ add_dep(last_grf_write[inst->dst.reg], n,
+ last_grf_write[inst->dst.reg]->latency);
+ }
+ last_grf_write[inst->dst.reg] = n;
+ } else if (inst->dst.file == MRF) {
+ if (last_mrf_write[inst->dst.hw_reg]) {
+ add_dep(last_mrf_write[inst->dst.hw_reg], n,
+ last_mrf_write[inst->dst.hw_reg]->latency);
+ }
+ last_mrf_write[inst->dst.hw_reg] = n;
+ } else if (inst->dst.file != BAD_FILE) {
+ add_barrier_deps(n);
+ }
+
+ if (inst->mlen > 0) {
+ for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
+ if (last_mrf_write[inst->base_mrf + i]) {
+ add_dep(last_mrf_write[inst->base_mrf + i], n,
+ last_mrf_write[inst->base_mrf + i]->latency);
+ }
+ last_mrf_write[inst->base_mrf + i] = n;
+ }
+ }
+
+ if (inst->conditional_mod) {
+ add_dep(last_conditional_mod, n, 0);
+ last_conditional_mod = n;
+ }
+ }
+
+ /* bottom-to-top dependencies: WAR */
+ memset(last_grf_write, 0, sizeof(last_grf_write));
+ memset(last_mrf_write, 0, sizeof(last_mrf_write));
+ last_conditional_mod = NULL;
+
+ exec_node *node;
+ exec_node *prev;
+ for (node = instructions.get_tail(), prev = node->prev;
+ !node->is_head_sentinel();
+ node = prev, prev = node->prev) {
+ schedule_node *n = (schedule_node *)node;
+ fs_inst *inst = n->inst;
+
+ /* write-after-read deps. */
+ for (int i = 0; i < 3; i++) {
+ if (inst->src[i].file == GRF) {
+ if (last_grf_write[inst->src[i].reg]) {
+ add_dep(n, last_grf_write[inst->src[i].reg], n->latency);
+ }
+ } else if (inst->src[i].file != BAD_FILE &&
+ inst->src[i].file != IMM &&
+ inst->src[i].file != UNIFORM) {
+ assert(inst->src[i].file != MRF);
+ add_barrier_deps(n);
+ }
+ }
+
+ for (int i = 0; i < inst->mlen; i++) {
+ /* It looks like the MRF regs are released in the send
+ * instruction once it's sent, not when the result comes
+ * back.
+ */
+ add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
+ }
+
+ if (inst->predicated) {
+ if (last_conditional_mod) {
+ add_dep(n, last_conditional_mod, n->latency);
+ }
+ }
+
+ /* Update the things this instruction wrote, so earlier reads
+ * can mark this as WAR dependency.
+ */
+ if (inst->dst.file == GRF) {
+ last_grf_write[inst->dst.reg] = n;
+ } else if (inst->dst.file == MRF) {
+ last_mrf_write[inst->dst.hw_reg] = n;
+ } else if (inst->dst.file != BAD_FILE) {
+ add_barrier_deps(n);
+ }
+
+ if (inst->mlen > 0) {
+ for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
+ last_mrf_write[inst->base_mrf + i] = n;
+ }
+ }
+
+ if (inst->conditional_mod)
+ last_conditional_mod = n;
+ }
+}
+
+void
+instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
+{
+ int time = 0;
+
+ /* Remove non-DAG heads from the list. */
+ foreach_iter(exec_list_iterator, iter, instructions) {
+ schedule_node *n = (schedule_node *)iter.get();
+ if (n->parent_count != 0)
+ n->remove();
+ }
+
+ while (!instructions.is_empty()) {
+ schedule_node *chosen = NULL;
+ int chosen_time = 0;
+
+ foreach_iter(exec_list_iterator, iter, instructions) {
+ schedule_node *n = (schedule_node *)iter.get();
+
+ if (!chosen || n->unblocked_time < chosen_time) {
+ chosen = n;
+ chosen_time = n->unblocked_time;
+ }
+ }
+
+ /* Schedule this instruction. */
+ assert(chosen);
+ chosen->remove();
+ next_block_header->insert_before(chosen->inst);
+ instructions_to_schedule--;
+
+ /* Bump the clock. If we expected a delay for scheduling, then
+ * bump the clock to reflect that.
+ */
+ time = MAX2(time + 1, chosen_time);
+
+ /* Now that we've scheduled a new instruction, some of its
+ * children can be promoted to the list of instructions ready to
+ * be scheduled. Update the children's unblocked time for this
+ * DAG edge as we do so.
+ */
+ for (int i = 0; i < chosen->child_count; i++) {
+ schedule_node *child = chosen->children[i];
+
+ child->unblocked_time = MAX2(child->unblocked_time,
+ time + chosen->child_latency[i]);
+
+ child->parent_count--;
+ if (child->parent_count == 0) {
+ instructions.push_tail(child);
+ }
+ }
+
+ /* Shared resource: the mathbox. There's one per EU (on later
+ * generations, it's even more limited pre-gen6), so if we send
+ * something off to it then the next math isn't going to make
+ * progress until the first is done.
+ */
+ if (chosen->inst->is_math()) {
+ foreach_iter(exec_list_iterator, iter, instructions) {
+ schedule_node *n = (schedule_node *)iter.get();
+
+ if (n->inst->is_math())
+ n->unblocked_time = MAX2(n->unblocked_time,
+ time + chosen->latency);
+ }
+ }
+ }
+
+ assert(instructions_to_schedule == 0);
+}
+
+void
+fs_visitor::schedule_instructions()
+{
+ fs_inst *next_block_header = (fs_inst *)instructions.head;
+ instruction_scheduler sched(this, mem_ctx, this->virtual_grf_next);
+
+ while (!next_block_header->is_tail_sentinel()) {
+ /* Add things to be scheduled until we get to a new BB. */
+ while (!next_block_header->is_tail_sentinel()) {
+ fs_inst *inst = next_block_header;
+ next_block_header = (fs_inst *)next_block_header->next;
+
+ sched.add_inst(inst);
+ if (inst->opcode == BRW_OPCODE_IF ||
+ inst->opcode == BRW_OPCODE_ELSE ||
+ inst->opcode == BRW_OPCODE_ENDIF ||
+ inst->opcode == BRW_OPCODE_DO ||
+ inst->opcode == BRW_OPCODE_WHILE ||
+ inst->opcode == BRW_OPCODE_BREAK ||
+ inst->opcode == BRW_OPCODE_CONTINUE) {
+ break;
+ }
+ }
+ sched.calculate_deps();
+ sched.schedule_instructions(next_block_header);
+ }
+
+ this->live_intervals_valid = false;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
index 2be6b08b5c..530ffa2658 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
@@ -69,7 +69,7 @@ public:
ir_variable *components[4];
- /** talloc_parent(this->var) -- the shader's talloc context. */
+ /** ralloc_parent(this->var) -- the shader's ralloc context. */
void *mem_ctx;
};
@@ -77,13 +77,13 @@ class ir_vector_reference_visitor : public ir_hierarchical_visitor {
public:
ir_vector_reference_visitor(void)
{
- this->mem_ctx = talloc_new(NULL);
+ this->mem_ctx = ralloc_context(NULL);
this->variable_list.make_empty();
}
~ir_vector_reference_visitor(void)
{
- talloc_free(mem_ctx);
+ ralloc_free(mem_ctx);
}
virtual ir_visitor_status visit(ir_variable *);
@@ -358,7 +358,7 @@ brw_do_vector_splitting(exec_list *instructions)
if (refs.variable_list.is_empty())
return false;
- void *mem_ctx = talloc_new(NULL);
+ void *mem_ctx = ralloc_context(NULL);
/* Replace the decls of the vectors to be split with their split
* components.
@@ -368,10 +368,10 @@ brw_do_vector_splitting(exec_list *instructions)
const struct glsl_type *type;
type = glsl_type::get_instance(entry->var->type->base_type, 1, 1);
- entry->mem_ctx = talloc_parent(entry->var);
+ entry->mem_ctx = ralloc_parent(entry->var);
for (unsigned int i = 0; i < entry->var->type->vector_elements; i++) {
- const char *name = talloc_asprintf(mem_ctx, "%s_%c",
+ const char *name = ralloc_asprintf(mem_ctx, "%s_%c",
entry->var->name,
"xyzw"[i]);
@@ -386,7 +386,7 @@ brw_do_vector_splitting(exec_list *instructions)
ir_vector_splitting_visitor split(&refs.variable_list);
visit_list_elements(&split, instructions);
- talloc_free(mem_ctx);
+ ralloc_free(mem_ctx);
return true;
}
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index 73b41fdbce..70c451d071 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -96,6 +96,9 @@ static void compile_gs_prog( struct brw_context *brw,
brw_gs_quad_strip( &c, key );
break;
case GL_LINE_LOOP:
+ /* Gen6: LINELOOP is converted to LINESTRIP at the beginning of the 3D pipeline */
+ if (intel->gen == 6)
+ return;
brw_gs_lines( &c );
break;
case GL_LINES:
@@ -189,7 +192,7 @@ static void populate_key( struct brw_context *brw,
}
if (intel->gen == 6)
- prim_gs_always = brw->primitive == GL_LINE_LOOP;
+ prim_gs_always = 0;
else
prim_gs_always = brw->primitive == GL_QUADS ||
brw->primitive == GL_QUAD_STRIP ||
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index a91b0528fa..c768be23fa 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -74,7 +74,7 @@ static void upload_binding_table_pointers(struct brw_context *brw)
struct intel_context *intel = &brw->intel;
BEGIN_BATCH(6);
- OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
+ OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
OUT_BATCH(brw->vs.bind_bo_offset);
OUT_BATCH(0); /* gs */
OUT_BATCH(0); /* clip */
@@ -104,7 +104,7 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw)
struct intel_context *intel = &brw->intel;
BEGIN_BATCH(4);
- OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 |
+ OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 |
GEN6_BINDING_TABLE_MODIFY_VS |
GEN6_BINDING_TABLE_MODIFY_GS |
GEN6_BINDING_TABLE_MODIFY_PS |
@@ -142,7 +142,7 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )
}
BEGIN_BATCH(7);
- OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2));
+ OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
if (brw->gs.prog_active)
OUT_RELOC(brw->gs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
@@ -151,7 +151,7 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )
OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
- OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
brw->cc.state_offset);
ADVANCE_BATCH();
@@ -214,7 +214,7 @@ static void emit_depthbuffer(struct brw_context *brw)
if (region == NULL) {
BEGIN_BATCH(len);
- OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
+ OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
(BRW_SURFACE_NULL << 29));
OUT_BATCH(0);
@@ -251,7 +251,7 @@ static void emit_depthbuffer(struct brw_context *brw)
assert(region->tiling != I915_TILING_NONE);
BEGIN_BATCH(len);
- OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
+ OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
OUT_BATCH(((region->pitch * region->cpp) - 1) |
(format << 18) |
(BRW_TILEWALK_YMAJOR << 26) |
@@ -277,7 +277,7 @@ static void emit_depthbuffer(struct brw_context *brw)
/* Initialize it for safety. */
if (intel->gen >= 6) {
BEGIN_BATCH(2);
- OUT_BATCH(CMD_3D_CLEAR_PARAMS << 16 | (2 - 2));
+ OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2));
OUT_BATCH(0);
ADVANCE_BATCH();
}
@@ -301,16 +301,15 @@ const struct brw_tracked_state brw_depthbuffer = {
static void upload_polygon_stipple(struct brw_context *brw)
{
+ struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &brw->intel.ctx;
- struct brw_polygon_stipple bps;
GLuint i;
if (!ctx->Polygon.StippleFlag)
return;
- memset(&bps, 0, sizeof(bps));
- bps.header.opcode = CMD_POLY_STIPPLE_PATTERN;
- bps.header.length = sizeof(bps)/4-2;
+ BEGIN_BATCH(33);
+ OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2));
/* Polygon stipple is provided in OpenGL order, i.e. bottom
* row first. If we're rendering to a window (i.e. the
@@ -321,14 +320,13 @@ static void upload_polygon_stipple(struct brw_context *brw)
*/
if (ctx->DrawBuffer->Name == 0) {
for (i = 0; i < 32; i++)
- bps.stipple[i] = ctx->PolygonStipple[31 - i]; /* invert */
+ OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
}
else {
for (i = 0; i < 32; i++)
- bps.stipple[i] = ctx->PolygonStipple[i]; /* don't invert */
+ OUT_BATCH(ctx->PolygonStipple[i]);
}
-
- BRW_CACHED_BATCH_STRUCT(brw, &bps);
+ CACHED_BATCH();
}
const struct brw_tracked_state brw_polygon_stipple = {
@@ -347,15 +345,14 @@ const struct brw_tracked_state brw_polygon_stipple = {
static void upload_polygon_stipple_offset(struct brw_context *brw)
{
+ struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &brw->intel.ctx;
- struct brw_polygon_stipple_offset bpso;
if (!ctx->Polygon.StippleFlag)
return;
- memset(&bpso, 0, sizeof(bpso));
- bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
- bpso.header.length = sizeof(bpso)/4-2;
+ BEGIN_BATCH(2);
+ OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2));
/* If we're drawing to a system window (ctx->DrawBuffer->Name == 0),
* we have to invert the Y axis in order to match the OpenGL
@@ -365,16 +362,11 @@ static void upload_polygon_stipple_offset(struct brw_context *brw)
* system works just fine, and there's no window system to
* worry about.
*/
- if (brw->intel.ctx.DrawBuffer->Name == 0) {
- bpso.bits0.x_offset = 0;
- bpso.bits0.y_offset = (32 - (ctx->DrawBuffer->Height & 31)) & 31;
- }
- else {
- bpso.bits0.y_offset = 0;
- bpso.bits0.x_offset = 0;
- }
-
- BRW_CACHED_BATCH_STRUCT(brw, &bpso);
+ if (brw->intel.ctx.DrawBuffer->Name == 0)
+ OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
+ else
+ OUT_BATCH(0);
+ CACHED_BATCH();
}
#define _NEW_WINDOW_POS 0x40000000
@@ -393,18 +385,17 @@ const struct brw_tracked_state brw_polygon_stipple_offset = {
*/
static void upload_aa_line_parameters(struct brw_context *brw)
{
+ struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &brw->intel.ctx;
- struct brw_aa_line_parameters balp;
if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
return;
+ OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
/* use legacy aa line coverage computation */
- memset(&balp, 0, sizeof(balp));
- balp.header.opcode = CMD_AA_LINE_PARAMETERS;
- balp.header.length = sizeof(balp) / 4 - 2;
-
- BRW_CACHED_BATCH_STRUCT(brw, &balp);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ CACHED_BATCH();
}
const struct brw_tracked_state brw_aa_line_parameters = {
@@ -422,28 +413,21 @@ const struct brw_tracked_state brw_aa_line_parameters = {
static void upload_line_stipple(struct brw_context *brw)
{
+ struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &brw->intel.ctx;
- struct brw_line_stipple bls;
GLfloat tmp;
GLint tmpi;
if (!ctx->Line.StippleFlag)
return;
- memset(&bls, 0, sizeof(bls));
- bls.header.opcode = CMD_LINE_STIPPLE_PATTERN;
- bls.header.length = sizeof(bls)/4 - 2;
-
- bls.bits0.pattern = ctx->Line.StipplePattern;
- bls.bits1.repeat_count = ctx->Line.StippleFactor;
-
+ BEGIN_BATCH(3);
+ OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
+ OUT_BATCH(ctx->Line.StipplePattern);
tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
tmpi = tmp * (1<<13);
-
-
- bls.bits1.inverse_repeat_count = tmpi;
-
- BRW_CACHED_BATCH_STRUCT(brw, &bls);
+ OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
+ CACHED_BATCH();
}
const struct brw_tracked_state brw_line_stipple = {
@@ -481,7 +465,7 @@ static void upload_invarient_state( struct brw_context *brw )
/* Disable depth offset clamping.
*/
- gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP;
+ gdo.header.opcode = _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP;
gdo.header.length = sizeof(gdo)/4 - 2;
gdo.depth_offset_clamp = 0.0;
@@ -492,20 +476,20 @@ static void upload_invarient_state( struct brw_context *brw )
int i;
BEGIN_BATCH(3);
- OUT_BATCH(CMD_3D_MULTISAMPLE << 16 | (3 - 2));
+ OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (3 - 2));
OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
MS_NUMSAMPLES_1);
OUT_BATCH(0); /* positions for 4/8-sample */
ADVANCE_BATCH();
BEGIN_BATCH(2);
- OUT_BATCH(CMD_3D_SAMPLE_MASK << 16 | (2 - 2));
+ OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
OUT_BATCH(1);
ADVANCE_BATCH();
for (i = 0; i < 4; i++) {
BEGIN_BATCH(4);
- OUT_BATCH(CMD_GS_SVB_INDEX << 16 | (4 - 2));
+ OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
OUT_BATCH(i << SVB_INDEX_SHIFT);
OUT_BATCH(0);
OUT_BATCH(0xffffffff);
@@ -565,7 +549,7 @@ static void upload_state_base_address( struct brw_context *brw )
BEGIN_BATCH(10);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
OUT_BATCH(1); /* General state base address */
- OUT_RELOC(intel->batch->buf, I915_GEM_DOMAIN_SAMPLER, 0,
+ OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
1); /* Surface state base address */
OUT_BATCH(1); /* Dynamic state base address */
OUT_BATCH(1); /* Indirect object base address */
@@ -579,7 +563,7 @@ static void upload_state_base_address( struct brw_context *brw )
BEGIN_BATCH(8);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
OUT_BATCH(1); /* General state base address */
- OUT_RELOC(intel->batch->buf, I915_GEM_DOMAIN_SAMPLER, 0,
+ OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
1); /* Surface state base address */
OUT_BATCH(1); /* Indirect object base address */
OUT_BATCH(1); /* Instruction base address */
@@ -591,7 +575,7 @@ static void upload_state_base_address( struct brw_context *brw )
BEGIN_BATCH(6);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
OUT_BATCH(1); /* General state base address */
- OUT_RELOC(intel->batch->buf, I915_GEM_DOMAIN_SAMPLER, 0,
+ OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
1); /* Surface state base address */
OUT_BATCH(1); /* Indirect object base address */
OUT_BATCH(1); /* General state upper bound */
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 94efa79109..7d653327e3 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -36,7 +36,7 @@
#include "program/program.h"
#include "program/programopt.h"
#include "tnl/tnl.h"
-#include "talloc.h"
+#include "../glsl/ralloc.h"
#include "brw_context.h"
#include "brw_wm.h"
@@ -115,7 +115,7 @@ shader_error(struct gl_context *ctx, struct gl_program *prog, const char *msg)
shader = _mesa_lookup_shader_program(ctx, prog->Id);
if (shader) {
- shader->InfoLog = talloc_strdup_append(shader->InfoLog, msg);
+ ralloc_strcat(&shader->InfoLog, msg);
shader->LinkStatus = GL_FALSE;
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c
index f28f28663e..b41d05dd43 100644
--- a/src/mesa/drivers/dri/i965/brw_queryobj.c
+++ b/src/mesa/drivers/dri/i965/brw_queryobj.c
@@ -177,7 +177,7 @@ brw_end_query(struct gl_context *ctx, struct gl_query_object *q)
ADVANCE_BATCH();
}
- intel_batchbuffer_flush(intel->batch);
+ intel_batchbuffer_flush(intel);
} else {
/* Flush the batchbuffer in case it has writes to our query BO.
* Have later queries write to a new query BO so that further rendering
@@ -185,7 +185,7 @@ brw_end_query(struct gl_context *ctx, struct gl_query_object *q)
*/
if (query->bo) {
brw_emit_query_end(brw);
- intel_batchbuffer_flush(intel->batch);
+ intel_batchbuffer_flush(intel);
drm_intel_bo_unreference(brw->query.bo);
brw->query.bo = NULL;
@@ -232,6 +232,12 @@ brw_prepare_query_begin(struct brw_context *brw)
brw->query.bo = NULL;
brw->query.bo = drm_intel_bo_alloc(intel->bufmgr, "query", 4096, 1);
+
+ /* clear target buffer */
+ drm_intel_bo_map(brw->query.bo, GL_TRUE);
+ memset((char *)brw->query.bo->virtual, 0, 4096);
+ drm_intel_bo_unmap(brw->query.bo);
+
brw->query.index = 0;
}
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 3beed16945..86b0caa4a4 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -164,25 +164,18 @@ void brw_destroy_caches( struct brw_context *brw );
/***********************************************************************
* brw_state_batch.c
*/
-#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)))
-#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) )
-
-GLboolean brw_cached_batch_struct( struct brw_context *brw,
- const void *data,
- GLuint sz );
-void brw_destroy_batch_cache( struct brw_context *brw );
-void brw_clear_batch_cache( struct brw_context *brw );
+#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data(&brw->intel, (s), \
+ sizeof(*(s)), false)
+
void *brw_state_batch(struct brw_context *brw,
int size,
int alignment,
- drm_intel_bo **out_bo,
uint32_t *out_offset);
/* brw_wm_surface_state.c */
void brw_create_constant_surface(struct brw_context *brw,
drm_intel_bo *bo,
int width,
- drm_intel_bo **out_bo,
uint32_t *out_offset);
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c
index be3989eb7d..213c7a38d8 100644
--- a/src/mesa/drivers/dri/i965/brw_state_batch.c
+++ b/src/mesa/drivers/dri/i965/brw_state_batch.c
@@ -29,75 +29,10 @@
* Keith Whitwell <keith@tungstengraphics.com>
*/
-
-
#include "brw_state.h"
#include "intel_batchbuffer.h"
#include "main/imports.h"
-
-
-/* A facility similar to the data caching code above, which aims to
- * prevent identical commands being issued repeatedly.
- */
-GLboolean brw_cached_batch_struct( struct brw_context *brw,
- const void *data,
- GLuint sz )
-{
- struct brw_cached_batch_item *item = brw->cached_batch_items;
- struct header *newheader = (struct header *)data;
-
- if (brw->emit_state_always) {
- intel_batchbuffer_data(brw->intel.batch, data, sz);
- return GL_TRUE;
- }
-
- while (item) {
- if (item->header->opcode == newheader->opcode) {
- if (item->sz == sz && memcmp(item->header, newheader, sz) == 0)
- return GL_FALSE;
- if (item->sz != sz) {
- free(item->header);
- item->header = malloc(sz);
- item->sz = sz;
- }
- goto emit;
- }
- item = item->next;
- }
-
- assert(!item);
- item = CALLOC_STRUCT(brw_cached_batch_item);
- item->header = malloc(sz);
- item->sz = sz;
- item->next = brw->cached_batch_items;
- brw->cached_batch_items = item;
-
- emit:
- memcpy(item->header, newheader, sz);
- intel_batchbuffer_data(brw->intel.batch, data, sz);
- return GL_TRUE;
-}
-
-void brw_clear_batch_cache( struct brw_context *brw )
-{
- struct brw_cached_batch_item *item = brw->cached_batch_items;
-
- while (item) {
- struct brw_cached_batch_item *next = item->next;
- free((void *)item->header);
- free(item);
- item = next;
- }
-
- brw->cached_batch_items = NULL;
-}
-
-void brw_destroy_batch_cache( struct brw_context *brw )
-{
- brw_clear_batch_cache(brw);
-}
-
/**
* Allocates a block of space in the batchbuffer for indirect state.
*
@@ -116,13 +51,12 @@ void *
brw_state_batch(struct brw_context *brw,
int size,
int alignment,
- drm_intel_bo **out_bo,
uint32_t *out_offset)
{
- struct intel_batchbuffer *batch = brw->intel.batch;
+ struct intel_batchbuffer *batch = &brw->intel.batch;
uint32_t offset;
- assert(size < batch->buf->size);
+ assert(size < batch->bo->size);
offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment);
/* If allocating from the top would wrap below the batchbuffer, or
@@ -130,19 +64,13 @@ brw_state_batch(struct brw_context *brw,
* space, then flush and try again.
*/
if (batch->state_batch_offset < size ||
- offset < batch->ptr - batch->map + batch->reserved_space) {
- intel_batchbuffer_flush(batch);
+ offset < 4*batch->used + batch->reserved_space) {
+ intel_batchbuffer_flush(&brw->intel);
offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment);
}
batch->state_batch_offset = offset;
- if (*out_bo != batch->buf) {
- drm_intel_bo_unreference(*out_bo);
- drm_intel_bo_reference(batch->buf);
- *out_bo = batch->buf;
- }
-
*out_offset = offset;
- return batch->map + offset;
+ return batch->map + (offset>>2);
}
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index 58ff528d44..01eeb19a68 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -58,8 +58,6 @@
#include "main/imports.h"
#include "brw_state.h"
-#include "intel_batchbuffer.h"
-#include "brw_wm.h"
#define FILE_DEBUG_FLAG DEBUG_STATE
@@ -433,8 +431,6 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
void
brw_state_cache_check_size(struct brw_context *brw)
{
- DBG("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items);
-
/* un-tuned guess. Each object is generally a page, so 1000 of them is 4 MB of
* state cache.
*/
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c
index e262887471..fdce79da2f 100644
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -26,6 +26,7 @@
*/
#include "main/mtypes.h"
+#include "intel_batchbuffer.h"
#include "brw_context.h"
#include "brw_defines.h"
@@ -54,7 +55,8 @@ state_out(const char *name, void *data, uint32_t hw_offset, int index,
/** Generic, undecoded state buffer debug printout */
static void
-state_struct_out(const char *name, drm_intel_bo *buffer, unsigned int state_size)
+state_struct_out(const char *name, drm_intel_bo *buffer,
+ unsigned int offset, unsigned int size)
{
int i;
@@ -62,8 +64,8 @@ state_struct_out(const char *name, drm_intel_bo *buffer, unsigned int state_size
return;
drm_intel_bo_map(buffer, GL_FALSE);
- for (i = 0; i < state_size / 4; i++) {
- state_out(name, buffer->virtual, buffer->offset, i,
+ for (i = 0; i < size / 4; i++) {
+ state_out(name, buffer->virtual + offset, buffer->offset + offset, i,
"dword %d\n", i);
}
drm_intel_bo_unmap(buffer);
@@ -98,21 +100,25 @@ get_965_surface_format(unsigned int surface_format)
static void dump_wm_surface_state(struct brw_context *brw)
{
+ dri_bo *bo;
+ GLubyte *base;
int i;
+ bo = brw->intel.batch.bo;
+ drm_intel_bo_map(bo, GL_FALSE);
+ base = bo->virtual;
+
for (i = 0; i < brw->wm.nr_surfaces; i++) {
- drm_intel_bo *surf_bo = brw->wm.surf_bo[i];
unsigned int surfoff;
struct brw_surface_state *surf;
char name[20];
- if (surf_bo == NULL) {
+ if (brw->wm.surf_offset[i] == 0) {
fprintf(stderr, "WM SURF%d: NULL\n", i);
continue;
}
- drm_intel_bo_map(surf_bo, GL_FALSE);
- surfoff = surf_bo->offset + brw->wm.surf_offset[i];
- surf = (struct brw_surface_state *)(surf_bo->virtual + brw->wm.surf_offset[i]);
+ surfoff = bo->offset + brw->wm.surf_offset[i];
+ surf = (struct brw_surface_state *)(base + brw->wm.surf_offset[i]);
sprintf(name, "WM SURF%d", i);
state_out(name, surf, surfoff, 0, "%s %s\n",
@@ -127,9 +133,8 @@ static void dump_wm_surface_state(struct brw_context *brw)
surf->ss4.min_lod);
state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n",
surf->ss5.x_offset, surf->ss5.y_offset);
-
- drm_intel_bo_unmap(surf_bo);
}
+ drm_intel_bo_unmap(bo);
}
@@ -280,13 +285,14 @@ static void dump_cc_state(struct brw_context *brw)
const char *name = "CC";
struct gen6_color_calc_state *cc;
uint32_t cc_off;
+ dri_bo *bo = brw->intel.batch.bo;
- if (brw->cc.state_bo == NULL)
+ if (brw->cc.state_offset == 0)
return;
- drm_intel_bo_map(brw->cc.state_bo, GL_FALSE);
- cc = brw->cc.state_bo->virtual;
- cc_off = brw->cc.state_bo->offset;
+ drm_intel_bo_map(bo, GL_FALSE);
+ cc = bo->virtual;
+ cc_off = bo->offset;
state_out(name, cc, cc_off, 0, "alpha test format %s, round disable %d, stencil ref %d,"
"bf stencil ref %d\n",
@@ -300,7 +306,7 @@ static void dump_cc_state(struct brw_context *brw)
state_out(name, cc, cc_off, 4, "constant blue %f\n", cc->constant_b);
state_out(name, cc, cc_off, 5, "constant alpha %f\n", cc->constant_a);
- drm_intel_bo_unmap(brw->cc.state_bo);
+ drm_intel_bo_unmap(bo);
}
@@ -369,26 +375,29 @@ void brw_debug_batch(struct intel_context *intel)
{
struct brw_context *brw = brw_context(&intel->ctx);
- state_struct_out("WM bind", brw->wm.bind_bo, 4 * brw->wm.nr_surfaces);
+ state_struct_out("WM bind",
+ brw->intel.batch.bo,
+ brw->wm.bind_bo_offset,
+ 4 * brw->wm.nr_surfaces);
dump_wm_surface_state(brw);
dump_wm_sampler_state(brw);
if (intel->gen < 6)
- state_struct_out("VS", brw->vs.state_bo, sizeof(struct brw_vs_unit_state));
+ state_struct_out("VS", brw->vs.state_bo, 0, sizeof(struct brw_vs_unit_state));
brw_debug_prog("VS prog", brw->vs.prog_bo);
if (intel->gen < 6)
- state_struct_out("GS", brw->gs.state_bo, sizeof(struct brw_gs_unit_state));
+ state_struct_out("GS", brw->gs.state_bo, 0, sizeof(struct brw_gs_unit_state));
brw_debug_prog("GS prog", brw->gs.prog_bo);
if (intel->gen < 6) {
- state_struct_out("SF", brw->sf.state_bo, sizeof(struct brw_sf_unit_state));
+ state_struct_out("SF", brw->sf.state_bo, 0, sizeof(struct brw_sf_unit_state));
brw_debug_prog("SF prog", brw->sf.prog_bo);
}
dump_sf_viewport_state(brw);
if (intel->gen < 6)
- state_struct_out("WM", brw->wm.state_bo, sizeof(struct brw_wm_unit_state));
+ state_struct_out("WM", brw->wm.state_bo, 0, sizeof(struct brw_wm_unit_state));
brw_debug_prog("WM prog", brw->wm.prog_bo);
if (intel->gen >= 6) {
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index eba4411ca7..6f521be659 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -104,7 +104,7 @@ static const struct brw_tracked_state *gen4_atoms[] =
&brw_constant_buffer
};
-const struct brw_tracked_state *gen6_atoms[] =
+static const struct brw_tracked_state *gen6_atoms[] =
{
&brw_check_fallback,
@@ -169,25 +169,49 @@ const struct brw_tracked_state *gen6_atoms[] =
void brw_init_state( struct brw_context *brw )
{
+ const struct brw_tracked_state **atoms;
+ int num_atoms;
+
brw_init_caches(brw);
+
+ if (brw->intel.gen >= 6) {
+ atoms = gen6_atoms;
+ num_atoms = ARRAY_SIZE(gen6_atoms);
+ } else {
+ atoms = gen4_atoms;
+ num_atoms = ARRAY_SIZE(gen4_atoms);
+ }
+
+ while (num_atoms--) {
+ assert((*atoms)->dirty.mesa |
+ (*atoms)->dirty.brw |
+ (*atoms)->dirty.cache);
+
+ if ((*atoms)->prepare)
+ brw->prepare_atoms[brw->num_prepare_atoms++] = **atoms;
+ if ((*atoms)->emit)
+ brw->emit_atoms[brw->num_emit_atoms++] = **atoms;
+ atoms++;
+ }
+ assert(brw->num_emit_atoms <= ARRAY_SIZE(brw->emit_atoms));
+ assert(brw->num_prepare_atoms <= ARRAY_SIZE(brw->prepare_atoms));
}
void brw_destroy_state( struct brw_context *brw )
{
brw_destroy_caches(brw);
- brw_destroy_batch_cache(brw);
}
/***********************************************************************
*/
-static GLboolean check_state( const struct brw_state_flags *a,
- const struct brw_state_flags *b )
+static GLuint check_state( const struct brw_state_flags *a,
+ const struct brw_state_flags *b )
{
- return ((a->mesa & b->mesa) ||
- (a->brw & b->brw) ||
- (a->cache & b->cache));
+ return ((a->mesa & b->mesa) |
+ (a->brw & b->brw) |
+ (a->cache & b->cache)) != 0;
}
static void accumulate_state( struct brw_state_flags *a,
@@ -233,7 +257,6 @@ static struct dirty_bit_map mesa_bits[] = {
DEFINE_BIT(_NEW_MODELVIEW),
DEFINE_BIT(_NEW_PROJECTION),
DEFINE_BIT(_NEW_TEXTURE_MATRIX),
- DEFINE_BIT(_NEW_ACCUM),
DEFINE_BIT(_NEW_COLOR),
DEFINE_BIT(_NEW_DEPTH),
DEFINE_BIT(_NEW_EVAL),
@@ -279,6 +302,10 @@ static struct dirty_bit_map brw_bits[] = {
DEFINE_BIT(BRW_NEW_VERTICES),
DEFINE_BIT(BRW_NEW_BATCH),
DEFINE_BIT(BRW_NEW_DEPTH_BUFFER),
+ DEFINE_BIT(BRW_NEW_NR_WM_SURFACES),
+ DEFINE_BIT(BRW_NEW_NR_VS_SURFACES),
+ DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
+ DEFINE_BIT(BRW_NEW_WM_CONSTBUF),
{0, 0, 0}
};
@@ -340,24 +367,16 @@ void brw_validate_state( struct brw_context *brw )
struct gl_context *ctx = &brw->intel.ctx;
struct intel_context *intel = &brw->intel;
struct brw_state_flags *state = &brw->state.dirty;
+ const struct brw_tracked_state *atoms = brw->prepare_atoms;
+ int num_atoms = brw->num_prepare_atoms;
GLuint i;
- const struct brw_tracked_state **atoms;
- int num_atoms;
brw_clear_validated_bos(brw);
state->mesa |= brw->intel.NewGLState;
brw->intel.NewGLState = 0;
- brw_add_validated_bo(brw, intel->batch->buf);
-
- if (intel->gen >= 6) {
- atoms = gen6_atoms;
- num_atoms = ARRAY_SIZE(gen6_atoms);
- } else {
- atoms = gen4_atoms;
- num_atoms = ARRAY_SIZE(gen4_atoms);
- }
+ brw_add_validated_bo(brw, intel->batch.bo);
if (brw->emit_state_always) {
state->mesa |= ~0;
@@ -375,27 +394,20 @@ void brw_validate_state( struct brw_context *brw )
brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
}
- if (state->mesa == 0 &&
- state->cache == 0 &&
- state->brw == 0)
+ if ((state->mesa | state->cache | state->brw) == 0)
return;
- if (brw->state.dirty.brw & BRW_NEW_CONTEXT)
- brw_clear_batch_cache(brw);
-
brw->intel.Fallback = GL_FALSE; /* boolean, not bitfield */
/* do prepare stage for all atoms */
for (i = 0; i < num_atoms; i++) {
- const struct brw_tracked_state *atom = atoms[i];
-
- if (brw->intel.Fallback)
- break;
+ const struct brw_tracked_state *atom = &atoms[i];
if (check_state(state, &atom->dirty)) {
- if (atom->prepare) {
- atom->prepare(brw);
- }
+ atom->prepare(brw);
+
+ if (brw->intel.Fallback)
+ break;
}
}
@@ -418,20 +430,11 @@ void brw_validate_state( struct brw_context *brw )
void brw_upload_state(struct brw_context *brw)
{
- struct intel_context *intel = &brw->intel;
struct brw_state_flags *state = &brw->state.dirty;
+ const struct brw_tracked_state *atoms = brw->emit_atoms;
+ int num_atoms = brw->num_emit_atoms;
int i;
static int dirty_count = 0;
- const struct brw_tracked_state **atoms;
- int num_atoms;
-
- if (intel->gen >= 6) {
- atoms = gen6_atoms;
- num_atoms = ARRAY_SIZE(gen6_atoms);
- } else {
- atoms = gen4_atoms;
- num_atoms = ARRAY_SIZE(gen4_atoms);
- }
brw_clear_validated_bos(brw);
@@ -445,20 +448,14 @@ void brw_upload_state(struct brw_context *brw)
prev = *state;
for (i = 0; i < num_atoms; i++) {
- const struct brw_tracked_state *atom = atoms[i];
+ const struct brw_tracked_state *atom = &atoms[i];
struct brw_state_flags generated;
- assert(atom->dirty.mesa ||
- atom->dirty.brw ||
- atom->dirty.cache);
-
if (brw->intel.Fallback)
break;
if (check_state(state, &atom->dirty)) {
- if (atom->emit) {
- atom->emit( brw );
- }
+ atom->emit(brw);
}
accumulate_state(&examined, &atom->dirty);
@@ -474,15 +471,13 @@ void brw_upload_state(struct brw_context *brw)
}
else {
for (i = 0; i < num_atoms; i++) {
- const struct brw_tracked_state *atom = atoms[i];
+ const struct brw_tracked_state *atom = &atoms[i];
if (brw->intel.Fallback)
break;
if (check_state(state, &atom->dirty)) {
- if (atom->emit) {
- atom->emit( brw );
- }
+ atom->emit(brw);
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
index 8f97bd136f..6687a89e80 100644
--- a/src/mesa/drivers/dri/i965/brw_structs.h
+++ b/src/mesa/drivers/dri/i965/brw_structs.h
@@ -1017,7 +1017,14 @@ struct brw_wm_unit_state
GLuint enable_32_pix:1;
GLuint enable_con_32_pix:1;
GLuint enable_con_64_pix:1;
- GLuint pad0:5;
+ GLuint pad0:1;
+
+ /* These next four bits are for Ironlake+ */
+ GLuint fast_span_coverage_enable:1;
+ GLuint depth_buffer_clear:1;
+ GLuint depth_buffer_resolve_enable:1;
+ GLuint hierarchical_depth_buffer_resolve_enable:1;
+
GLuint legacy_global_depth_bias:1;
GLuint line_stipple:1;
GLuint depth_offset:1;
@@ -1064,6 +1071,15 @@ struct brw_sampler_default_color {
GLfloat color[4];
};
+struct gen5_sampler_default_color {
+ uint8_t ub[4];
+ float f[4];
+ uint16_t hf[4];
+ uint16_t us[4];
+ int16_t s[4];
+ uint8_t b[4];
+};
+
struct brw_sampler_state
{
@@ -1169,7 +1185,12 @@ struct brw_surface_state
GLuint cube_neg_y:1;
GLuint cube_pos_x:1;
GLuint cube_neg_x:1;
- GLuint pad:4;
+ GLuint pad:2;
+ /* Required on gen6 for surfaces accessed through render cache messages.
+ */
+ GLuint render_cache_read_write:1;
+ /* Ironlake and newer: instead of replicating one of the texels */
+ GLuint cube_corner_average:1;
GLuint mipmap_layout_mode:1;
GLuint vert_line_stride_ofs:1;
GLuint vert_line_stride:1;
@@ -1651,6 +1672,18 @@ struct brw_instruction
struct {
GLuint binding_table_index:8;
+ GLuint msg_control:3;
+ GLuint msg_type:3;
+ GLuint target_cache:2;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+ } dp_read_g4x;
+
+ struct {
+ GLuint binding_table_index:8;
GLuint msg_control:3;
GLuint msg_type:3;
GLuint target_cache:2;
diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c
index dfc1551aca..b0419d8a42 100644
--- a/src/mesa/drivers/dri/i965/brw_urb.c
+++ b/src/mesa/drivers/dri/i965/brw_urb.c
@@ -248,5 +248,13 @@ void brw_upload_urb_fence(struct brw_context *brw)
uf.bits1.sf_fence = brw->urb.cs_start;
uf.bits1.cs_fence = brw->urb.size;
+ /* erratum: URB_FENCE must not cross a 64byte cacheline */
+ if ((brw->intel.batch.used & 15) > 12) {
+ int pad = 16 - (brw->intel.batch.used & 15);
+ do
+ brw->intel.batch.map[brw->intel.batch.used++] = MI_NOOP;
+ while (--pad);
+ }
+
BRW_BATCH_STRUCT(brw, &uf);
}
diff --git a/src/mesa/drivers/dri/i965/brw_util.c b/src/mesa/drivers/dri/i965/brw_util.c
index e878da3850..d28d9abcb3 100644
--- a/src/mesa/drivers/dri/i965/brw_util.c
+++ b/src/mesa/drivers/dri/i965/brw_util.c
@@ -37,16 +37,6 @@
#include "brw_util.h"
#include "brw_defines.h"
-GLuint brw_count_bits(uint64_t val)
-{
- GLuint i;
- for (i = 0; val ; val >>= 1)
- if (val & 1)
- i++;
- return i;
-}
-
-
GLuint brw_translate_blend_equation( GLenum mode )
{
switch (mode) {
diff --git a/src/mesa/drivers/dri/i965/brw_util.h b/src/mesa/drivers/dri/i965/brw_util.h
index 04f3175d3e..940a871550 100644
--- a/src/mesa/drivers/dri/i965/brw_util.h
+++ b/src/mesa/drivers/dri/i965/brw_util.h
@@ -35,7 +35,14 @@
#include "main/mtypes.h"
-extern GLuint brw_count_bits(uint64_t val);
+#ifdef __GNUC__
+#define brw_count_bits(v) __builtin_popcount(v)
+#else
+static inline GLuint brw_count_bits(uint64_t v)
+{
+ return _mesa_popcount(v>>32) + _mesa_popcount(v&0xffffffff);
+}
+#endif
extern GLuint brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList);
extern GLuint brw_translate_blend_factor( GLenum factor );
extern GLuint brw_translate_blend_equation( GLenum mode );
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 59f270d675..6ae75d22c1 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -130,6 +130,7 @@ static void brw_upload_vs_prog(struct brw_context *brw)
key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
ctx->Polygon.BackMode != GL_FILL);
+ key.two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
/* _NEW_POINT */
if (ctx->Point.PointSprite) {
@@ -157,7 +158,7 @@ static void brw_upload_vs_prog(struct brw_context *brw)
*/
const struct brw_tracked_state brw_vs_prog = {
.dirty = {
- .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT,
+ .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT | _NEW_LIGHT,
.brw = BRW_NEW_VERTEX_PROGRAM,
.cache = 0
},
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 9338a6b7db..0b88cc1ec7 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -44,6 +44,7 @@ struct brw_vs_prog_key {
GLuint nr_userclip:4;
GLuint copy_edgeflag:1;
GLuint point_coord_replace:8;
+ GLuint two_side_color: 1;
};
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index e1a3f33393..6ec62554cc 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -140,11 +140,13 @@ clear_current_const(struct brw_vs_compile *c)
static void brw_vs_alloc_regs( struct brw_vs_compile *c )
{
struct intel_context *intel = &c->func.brw->intel;
- GLuint i, reg = 0, mrf;
+ GLuint i, reg = 0, mrf, j;
int attributes_in_vue;
int first_reladdr_output;
int max_constant;
int constant = 0;
+ int vert_result_reoder[VERT_RESULT_MAX];
+ int bfc = 0;
/* Determine whether to use a real constant buffer or use a block
* of GRF registers for constants. The later is faster but only
@@ -254,7 +256,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
}
reg += (constant + 1) / 2;
c->prog_data.curb_read_length = reg - 1;
- c->prog_data.nr_params = constant;
+ c->prog_data.nr_params = constant * 4;
/* XXX 0 causes a bug elsewhere... */
if (intel->gen < 6 && c->prog_data.nr_params == 0)
c->prog_data.nr_params = 4;
@@ -291,7 +293,36 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
mrf = 4;
first_reladdr_output = get_first_reladdr_output(&c->vp->program);
- for (i = 0; i < VERT_RESULT_MAX; i++) {
+
+ for (i = 0; i < VERT_RESULT_MAX; i++)
+ vert_result_reoder[i] = i;
+
+ /* adjust attribute order in VUE for BFC0/BFC1 on Gen6+ */
+ if (intel->gen >= 6 && c->key.two_side_color) {
+ if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL1)) &&
+ (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC1))) {
+ assert(c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0));
+ assert(c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0));
+ bfc = 2;
+ } else if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)) &&
+ (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0)))
+ bfc = 1;
+
+ if (bfc) {
+ for (i = 0; i < bfc; i++) {
+ vert_result_reoder[VERT_RESULT_COL0 + i * 2 + 0] = VERT_RESULT_COL0 + i;
+ vert_result_reoder[VERT_RESULT_COL0 + i * 2 + 1] = VERT_RESULT_BFC0 + i;
+ }
+
+ for (i = VERT_RESULT_COL0 + bfc * 2; i < VERT_RESULT_BFC0 + bfc; i++) {
+ vert_result_reoder[i] = i - bfc;
+ }
+ }
+ }
+
+ for (j = 0; j < VERT_RESULT_MAX; j++) {
+ i = vert_result_reoder[j];
+
if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) {
c->nr_outputs++;
assert(i < Elements(c->regs[PROGRAM_OUTPUT]));
@@ -627,6 +658,22 @@ static void emit_min( struct brw_compile *p,
}
}
+static void emit_arl(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ struct intel_context *intel = &p->brw->intel;
+
+ if (intel->gen >= 6) {
+ struct brw_reg dst_f = retype(dst, BRW_REGISTER_TYPE_F);
+
+ brw_RNDD(p, dst_f, src);
+ brw_MOV(p, dst, dst_f);
+ } else {
+ brw_RNDD(p, dst, src);
+ }
+}
+
static void emit_math1_gen4(struct brw_vs_compile *c,
GLuint function,
struct brw_reg dst,
@@ -1072,8 +1119,6 @@ get_constant(struct brw_vs_compile *c,
assert(argIndex < 3);
- assert(c->func.brw->intel.gen < 6); /* FINISHME */
-
if (c->current_const[argIndex].index != src->Index) {
/* Keep track of the last constant loaded in this slot, for reuse. */
c->current_const[argIndex].index = src->Index;
@@ -1091,7 +1136,7 @@ get_constant(struct brw_vs_compile *c,
}
/* replicate lower four floats into upper half (to get XYZWXYZW) */
- const_reg = stride(const_reg, 0, 4, 0);
+ const_reg = stride(const_reg, 0, 4, 1);
const_reg.subnr = 0;
return const_reg;
@@ -1104,14 +1149,14 @@ get_reladdr_constant(struct brw_vs_compile *c,
{
const struct prog_src_register *src = &inst->SrcReg[argIndex];
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
struct brw_reg const_reg = c->current_const[argIndex].reg;
- struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
- struct brw_reg byte_addr_reg = retype(get_tmp(c), BRW_REGISTER_TYPE_D);
+ struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
+ uint32_t offset;
assert(argIndex < 3);
- assert(c->func.brw->intel.gen < 6); /* FINISHME */
-
/* Can't reuse a reladdr constant load. */
c->current_const[argIndex].index = -1;
@@ -1120,15 +1165,21 @@ get_reladdr_constant(struct brw_vs_compile *c,
src->Index, argIndex, c->current_const[argIndex].reg.nr);
#endif
- brw_MUL(p, byte_addr_reg, addrReg, brw_imm_ud(16));
+ if (intel->gen >= 6) {
+ offset = src->Index;
+ } else {
+ struct brw_reg byte_addr_reg = retype(get_tmp(c), BRW_REGISTER_TYPE_D);
+ brw_MUL(p, byte_addr_reg, addr_reg, brw_imm_d(16));
+ addr_reg = byte_addr_reg;
+ offset = 16 * src->Index;
+ }
/* fetch the first vec4 */
brw_dp_READ_4_vs_relative(p,
- const_reg, /* writeback dest */
- byte_addr_reg, /* address register */
- 16 * src->Index, /* byte offset */
- SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
- );
+ const_reg,
+ addr_reg,
+ offset,
+ SURF_INDEX_VERT_CONST_BUFFER);
return const_reg;
}
@@ -1375,11 +1426,10 @@ static struct brw_reg get_arg( struct brw_vs_compile *c,
GET_SWZ(src->Swizzle, 1),
GET_SWZ(src->Swizzle, 2),
GET_SWZ(src->Swizzle, 3));
- }
- /* Note this is ok for non-swizzle instructions:
- */
- reg.negate = src->Negate ? 1 : 0;
+ /* Note this is ok for non-swizzle ARB_vp instructions */
+ reg.negate = src->Negate ? 1 : 0;
+ }
return reg;
}
@@ -1511,6 +1561,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
int eot;
GLuint len_vertex_header = 2;
int next_mrf, i;
+ int msg_len;
if (c->key.copy_edgeflag) {
brw_MOV(p,
@@ -1677,13 +1728,20 @@ static void emit_vertex_write( struct brw_vs_compile *c)
eot = (c->first_overflow_output == 0);
+ msg_len = c->nr_outputs + 2 + len_vertex_header;
+ if (intel->gen >= 6) {
+ /* interleaved urb write message length for gen6 should be multiple of 2 */
+ if ((msg_len % 2) != 0)
+ msg_len++;
+ }
+
brw_urb_WRITE(p,
brw_null_reg(), /* dest */
0, /* starting mrf reg nr */
c->r0, /* src */
0, /* allocate */
1, /* used */
- MIN2(c->nr_outputs + 1 + len_vertex_header, (BRW_MAX_MRF-1)), /* msg len */
+ MIN2(msg_len - 1, (BRW_MAX_MRF - 1)), /* msg len */
0, /* response len */
eot, /* eot */
eot, /* writes complete */
@@ -1892,6 +1950,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
switch (inst->Opcode) {
case OPCODE_ABS:
+ args[0].negate = false;
brw_MOV(p, dst, brw_abs(args[0]));
break;
case OPCODE_ADD:
@@ -1928,7 +1987,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
break;
case OPCODE_ARL:
- brw_RNDD(p, dst, args[0]);
+ emit_arl(p, dst, args[0]);
break;
case OPCODE_FLR:
brw_RNDD(p, dst, args[0]);
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index be92313861..c3a7cc247c 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -96,7 +96,14 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
* and those dwords will be written to the second URB handle when we
* brw_urb_WRITE() results.
*/
- vs.thread1.single_program_flow = 0;
+ /* Disable single program flow on Ironlake. We cannot reliably get
+ * all applications working without it. See:
+ * https://bugs.freedesktop.org/show_bug.cgi?id=29172
+ *
+ * The most notable and reliably failing application is the Humus
+ * demo "CelShading"
+ */
+ vs.thread1.single_program_flow = (intel->gen == 5);
if (intel->gen == 5)
vs.thread1.binding_table_entry_count = 0; /* hardware requirement */
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
index eabac51160..48cf265e51 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -82,6 +82,15 @@ prepare_vs_constants(struct brw_context *brw)
params->ParameterValues[i],
4 * sizeof(float));
}
+
+ if (0) {
+ for (i = 0; i < params->NumParameters; i++) {
+ float *row = (float *)brw->vs.const_bo->virtual + i * 4;
+ printf("vs const surface %3d: %4.3f %4.3f %4.3f %4.3f\n",
+ i, row[0], row[1], row[2], row[3]);
+ }
+ }
+
drm_intel_gem_bo_unmap_gtt(brw->vs.const_bo);
brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF;
}
@@ -115,13 +124,11 @@ brw_update_vs_constant_surface( struct gl_context *ctx,
* it.
*/
if (brw->vs.const_bo == NULL) {
- drm_intel_bo_unreference(brw->vs.surf_bo[surf]);
- brw->vs.surf_bo[surf] = NULL;
+ brw->vs.surf_offset[surf] = 0;
return;
}
brw_create_constant_surface(brw, brw->vs.const_bo, params->NumParameters,
- &brw->vs.surf_bo[surf],
&brw->vs.surf_offset[surf]);
}
@@ -157,11 +164,10 @@ static void upload_vs_surfaces(struct brw_context *brw)
/* BRW_NEW_NR_VS_SURFACES */
if (brw->vs.nr_surfaces == 0) {
- if (brw->vs.bind_bo) {
- drm_intel_bo_unreference(brw->vs.bind_bo);
- brw->vs.bind_bo = NULL;
+ if (brw->vs.bind_bo_offset) {
brw->state.dirty.brw |= BRW_NEW_BINDING_TABLE;
}
+ brw->vs.bind_bo_offset = 0;
return;
}
@@ -171,15 +177,11 @@ static void upload_vs_surfaces(struct brw_context *brw)
* space for the binding table. (once we have vs samplers)
*/
bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_VS_MAX_SURF,
- 32, &brw->vs.bind_bo, &brw->vs.bind_bo_offset);
+ 32, &brw->vs.bind_bo_offset);
for (i = 0; i < BRW_VS_MAX_SURF; i++) {
/* BRW_NEW_VS_CONSTBUF */
- if (brw->vs.surf_bo[i]) {
- bind[i] = brw->vs.surf_offset[i];
- } else {
- bind[i] = 0;
- }
+ bind[i] = brw->vs.surf_offset[i];
}
brw->state.dirty.brw |= BRW_NEW_BINDING_TABLE;
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index 3d7a98c981..152ee14156 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -73,15 +73,11 @@ static void brw_destroy_context( struct intel_context *intel )
free(brw->wm.compile_data);
}
- for (i = 0; i < brw->state.nr_color_regions; i++)
- intel_region_release(&brw->state.color_regions[i]);
- brw->state.nr_color_regions = 0;
intel_region_release(&brw->state.depth_region);
dri_bo_release(&brw->curbe.curbe_bo);
dri_bo_release(&brw->vs.prog_bo);
dri_bo_release(&brw->vs.state_bo);
- dri_bo_release(&brw->vs.bind_bo);
dri_bo_release(&brw->vs.const_bo);
dri_bo_release(&brw->gs.prog_bo);
dri_bo_release(&brw->gs.state_bo);
@@ -93,16 +89,12 @@ static void brw_destroy_context( struct intel_context *intel )
dri_bo_release(&brw->sf.vp_bo);
for (i = 0; i < BRW_MAX_TEX_UNIT; i++)
dri_bo_release(&brw->wm.sdc_bo[i]);
- dri_bo_release(&brw->wm.bind_bo);
- for (i = 0; i < BRW_WM_MAX_SURF; i++)
- dri_bo_release(&brw->wm.surf_bo[i]);
dri_bo_release(&brw->wm.sampler_bo);
dri_bo_release(&brw->wm.prog_bo);
dri_bo_release(&brw->wm.state_bo);
dri_bo_release(&brw->wm.const_bo);
dri_bo_release(&brw->wm.push_const_bo);
dri_bo_release(&brw->cc.prog_bo);
- dri_bo_release(&brw->cc.state_bo);
dri_bo_release(&brw->cc.vp_bo);
dri_bo_release(&brw->cc.blend_state_bo);
dri_bo_release(&brw->cc.depth_stencil_state_bo);
@@ -122,20 +114,14 @@ static void brw_set_draw_region( struct intel_context *intel,
GLuint num_color_regions)
{
struct brw_context *brw = brw_context(&intel->ctx);
- GLuint i;
/* release old color/depth regions */
if (brw->state.depth_region != depth_region)
brw->state.dirty.brw |= BRW_NEW_DEPTH_BUFFER;
- for (i = 0; i < brw->state.nr_color_regions; i++)
- intel_region_release(&brw->state.color_regions[i]);
intel_region_release(&brw->state.depth_region);
/* reference new color/depth regions */
- for (i = 0; i < num_color_regions; i++)
- intel_region_reference(&brw->state.color_regions[i], color_regions[i]);
intel_region_reference(&brw->state.depth_region, depth_region);
- brw->state.nr_color_regions = num_color_regions;
}
@@ -173,14 +159,7 @@ static void brw_new_batch( struct intel_context *intel )
brw->state.dirty.brw |= ~0;
brw->state.dirty.cache |= ~0;
- /* Move to the end of the current upload buffer so that we'll force choosing
- * a new buffer next time.
- */
- if (brw->vb.upload.bo != NULL) {
- drm_intel_bo_unreference(brw->vb.upload.bo);
- brw->vb.upload.bo = NULL;
- brw->vb.upload.offset = 0;
- }
+ brw->vb.nr_current_buffers = 0;
}
static void brw_invalidate_state( struct intel_context *intel, GLuint new_state )
@@ -203,4 +182,5 @@ void brwInitVtbl( struct brw_context *brw )
brw->intel.vtbl.destroy = brw_destroy_context;
brw->intel.vtbl.set_draw_region = brw_set_draw_region;
brw->intel.vtbl.debug_batch = brw_debug_batch;
+ brw->intel.vtbl.render_target_supported = brw_render_target_supported;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index e0aa3fd7f2..ca51d1599a 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -284,6 +284,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
/* Build the index for table lookup
*/
/* _NEW_COLOR */
+ key->alpha_test = ctx->Color.AlphaEnabled;
if (fp->program.UsesKill ||
ctx->Color.AlphaEnabled)
lookup |= IZ_PS_KILL_ALPHATEST_BIT;
@@ -364,8 +365,6 @@ static void brw_wm_populate_key( struct brw_context *brw,
SWIZZLE_NIL
};
- key->tex_swizzles[i] = SWIZZLE_NOOP;
-
/* GL_DEPTH_TEXTURE_MODE is normally handled through
* brw_wm_surface_state, but it applies to shadow compares as
* well and our shadow compares always return the result in
@@ -378,6 +377,11 @@ static void brw_wm_populate_key( struct brw_context *brw,
swizzles[2] = SWIZZLE_ZERO;
} else if (t->DepthMode == GL_LUMINANCE) {
swizzles[3] = SWIZZLE_ONE;
+ } else if (t->DepthMode == GL_RED) {
+ /* See table 3.23 of the GL 3.0 spec. */
+ swizzles[1] = SWIZZLE_ZERO;
+ swizzles[2] = SWIZZLE_ZERO;
+ swizzles[3] = SWIZZLE_ONE;
}
}
@@ -427,7 +431,8 @@ static void brw_wm_populate_key( struct brw_context *brw,
key->render_to_fbo = ctx->DrawBuffer->Name != 0;
}
- key->nr_color_regions = brw->state.nr_color_regions;
+ /* _NEW_BUFFERS */
+ key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers;
/* CACHE_NEW_VS_PROG */
key->vp_outputs_written = brw->vs.prog_data->outputs_written;
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index e7f3cfbb75..90771e1f50 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -64,6 +64,7 @@ struct brw_wm_prog_key {
GLuint linear_color:1; /**< linear interpolation vs perspective interp */
GLuint nr_color_regions:5;
GLuint render_to_fbo:1;
+ GLuint alpha_test:1;
GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */
GLuint shadowtex_mask:16;
@@ -474,5 +475,6 @@ struct gl_shader *brw_new_shader(struct gl_context *ctx, GLuint name, GLuint typ
struct gl_shader_program *brw_new_shader_program(struct gl_context *ctx, GLuint name);
bool brw_color_buffer_write_enabled(struct brw_context *brw);
+bool brw_render_target_supported(gl_format format);
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index a0e86034e1..2336e27c1e 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -219,43 +219,45 @@ void emit_wpos_xy(struct brw_wm_compile *c,
const struct brw_reg *arg0)
{
struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_reg delta_x = retype(arg0[0], BRW_REGISTER_TYPE_W);
+ struct brw_reg delta_y = retype(arg0[1], BRW_REGISTER_TYPE_W);
if (mask & WRITEMASK_X) {
+ if (intel->gen >= 6) {
+ struct brw_reg delta_x_f = retype(delta_x, BRW_REGISTER_TYPE_F);
+ brw_MOV(p, delta_x_f, delta_x);
+ delta_x = delta_x_f;
+ }
+
if (c->fp->program.PixelCenterInteger) {
/* X' = X */
- brw_MOV(p,
- dst[0],
- retype(arg0[0], BRW_REGISTER_TYPE_W));
+ brw_MOV(p, dst[0], delta_x);
} else {
/* X' = X + 0.5 */
- brw_ADD(p,
- dst[0],
- retype(arg0[0], BRW_REGISTER_TYPE_W),
- brw_imm_f(0.5));
+ brw_ADD(p, dst[0], delta_x, brw_imm_f(0.5));
}
}
if (mask & WRITEMASK_Y) {
+ if (intel->gen >= 6) {
+ struct brw_reg delta_y_f = retype(delta_y, BRW_REGISTER_TYPE_F);
+ brw_MOV(p, delta_y_f, delta_y);
+ delta_y = delta_y_f;
+ }
+
if (c->fp->program.OriginUpperLeft) {
if (c->fp->program.PixelCenterInteger) {
/* Y' = Y */
- brw_MOV(p,
- dst[1],
- retype(arg0[1], BRW_REGISTER_TYPE_W));
+ brw_MOV(p, dst[1], delta_y);
} else {
- /* Y' = Y + 0.5 */
- brw_ADD(p,
- dst[1],
- retype(arg0[1], BRW_REGISTER_TYPE_W),
- brw_imm_f(0.5));
+ brw_ADD(p, dst[1], delta_y, brw_imm_f(0.5));
}
} else {
float center_offset = c->fp->program.PixelCenterInteger ? 0.0 : 0.5;
/* Y' = (height - 1) - Y + center */
- brw_ADD(p,
- dst[1],
- negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
+ brw_ADD(p, dst[1], negate(delta_y),
brw_imm_f(c->key.drawable_height - 1 + center_offset));
}
}
@@ -971,34 +973,23 @@ void emit_math2(struct brw_wm_compile *c,
struct brw_reg temp_dst = dst[dst_chan];
if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
- if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
- /* Both scalar arguments. Do scalar calc. */
- src0.hstride = BRW_HORIZONTAL_STRIDE_1;
- src1.hstride = BRW_HORIZONTAL_STRIDE_1;
- temp_dst.hstride = BRW_HORIZONTAL_STRIDE_1;
- temp_dst.width = BRW_WIDTH_1;
-
- if (arg0[0].subnr != 0) {
- brw_MOV(p, temp_dst, src0);
- src0 = temp_dst;
-
- /* Ouch. We've used the temp as a dst, and we still
- * need a temp to store arg1 in, because src and dst
- * offsets have to be equal. Leaving this up to
- * glsl2-965 to handle correctly.
- */
- assert(arg1[0].subnr == 0);
- } else if (arg1[0].subnr != 0) {
- brw_MOV(p, temp_dst, src1);
- src1 = temp_dst;
- }
- } else {
- brw_MOV(p, temp_dst, src0);
- src0 = temp_dst;
- }
- } else if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
- brw_MOV(p, temp_dst, src1);
- src1 = temp_dst;
+ brw_MOV(p, temp_dst, src0);
+ src0 = temp_dst;
+ }
+
+ if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
+ /* This is a heinous hack to get a temporary register for use
+ * in case both arg0 and arg1 are constants. Why you're
+ * doing exponentiation on constant values in the shader, we
+ * don't know.
+ *
+ * max_wm_grf is almost surely less than the maximum GRF, and
+ * gen6 doesn't care about the number of GRFs used in a
+ * shader like pre-gen6 did.
+ */
+ struct brw_reg temp = brw_vec8_grf(c->max_wm_grf, 0);
+ brw_MOV(p, temp, src1);
+ src1 = temp;
}
brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
@@ -1016,14 +1007,6 @@ void emit_math2(struct brw_wm_compile *c,
sechalf(src0),
sechalf(src1));
}
-
- /* Splat a scalar result into all the channels. */
- if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 &&
- arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
- temp_dst.hstride = BRW_HORIZONTAL_STRIDE_0;
- temp_dst.vstride = BRW_VERTICAL_STRIDE_0;
- brw_MOV(p, dst[dst_chan], temp_dst);
- }
} else {
GLuint saturate = ((mask & SATURATE) ?
BRW_MATH_SATURATE_SATURATE :
@@ -1350,9 +1333,11 @@ static void fire_fb_write( struct brw_wm_compile *c,
dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
/* Pass through control information:
+ *
+ * Gen6 has done m1 mov in emit_fb_write() for current SIMD16 case.
*/
/* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
- if (intel->gen < 6) /* gen6, use headerless for fb write */
+ if (intel->gen < 6)
{
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
@@ -1373,7 +1358,8 @@ static void fire_fb_write( struct brw_wm_compile *c,
target,
nr,
0,
- eot);
+ eot,
+ GL_TRUE);
}
@@ -1518,7 +1504,8 @@ void emit_fb_write(struct brw_wm_compile *c,
*/
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_MOV(p, brw_message_reg(0), brw_vec8_grf(0, 0));
+ brw_MOV(p, retype(brw_message_reg(0), BRW_REGISTER_TYPE_UD),
+ retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
brw_pop_insn_state(p);
if (target != 0) {
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index fea96d3538..30672b4251 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -69,12 +69,43 @@ static GLuint translate_wrap_mode( GLenum wrap )
static drm_intel_bo *upload_default_color( struct brw_context *brw,
const GLfloat *color )
{
- struct brw_sampler_default_color sdc;
+ struct intel_context *intel = &brw->intel;
- COPY_4V(sdc.color, color);
-
- return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR,
- &sdc, sizeof(sdc));
+ if (intel->gen >= 5) {
+ struct gen5_sampler_default_color sdc;
+
+ memset(&sdc, 0, sizeof(sdc));
+
+ UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[0], color[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[1], color[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[2], color[2]);
+ UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[3], color[3]);
+
+ UNCLAMPED_FLOAT_TO_USHORT(sdc.us[0], color[0]);
+ UNCLAMPED_FLOAT_TO_USHORT(sdc.us[1], color[1]);
+ UNCLAMPED_FLOAT_TO_USHORT(sdc.us[2], color[2]);
+ UNCLAMPED_FLOAT_TO_USHORT(sdc.us[3], color[3]);
+
+ UNCLAMPED_FLOAT_TO_SHORT(sdc.s[0], color[0]);
+ UNCLAMPED_FLOAT_TO_SHORT(sdc.s[1], color[1]);
+ UNCLAMPED_FLOAT_TO_SHORT(sdc.s[2], color[2]);
+ UNCLAMPED_FLOAT_TO_SHORT(sdc.s[3], color[3]);
+
+ /* XXX: Fill in half floats */
+ /* XXX: Fill in signed bytes */
+
+ COPY_4V(sdc.f, color);
+
+ return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR,
+ &sdc, sizeof(sdc));
+ } else {
+ struct brw_sampler_default_color sdc;
+
+ COPY_4V(sdc.color, color);
+
+ return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR,
+ &sdc, sizeof(sdc));
+ }
}
@@ -245,9 +276,8 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
struct wm_sampler_entry *entry = &key->sampler[unit];
struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
struct gl_texture_object *texObj = texUnit->_Current;
- struct intel_texture_object *intelObj = intel_texture_object(texObj);
struct gl_texture_image *firstImage =
- texObj->Image[0][intelObj->firstLevel];
+ texObj->Image[0][texObj->BaseLevel];
memset(last_entry_end, 0,
(char*)entry - last_entry_end + sizeof(*entry));
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 82835470a3..5b5afc4626 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -47,7 +47,6 @@ struct brw_wm_unit_key {
unsigned int dispatch_grf_start_reg;
unsigned int curbe_offset;
- unsigned int urb_size;
unsigned int nr_surfaces, sampler_count;
GLboolean uses_depth, computes_depth, uses_kill, is_glsl;
@@ -87,7 +86,6 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
{
struct gl_context *ctx = &brw->intel.ctx;
const struct gl_fragment_program *fp = brw->fragment_program;
- const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp;
struct intel_context *intel = &brw->intel;
memset(key, 0, sizeof(*key));
@@ -99,9 +97,6 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
key->total_scratch = brw->wm.prog_data->total_scratch;
- /* BRW_NEW_URB_FENCE */
- key->urb_size = brw->urb.vsize;
-
/* BRW_NEW_CURBE_OFFSETS */
key->curbe_offset = brw->curbe.wm_start;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 76fc94df1f..e1f8f57a9d 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -42,7 +42,7 @@
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
-
+#include "brw_wm.h"
static GLuint translate_tex_target( GLenum target )
{
@@ -68,104 +68,74 @@ static GLuint translate_tex_target( GLenum target )
}
}
+static uint32_t brw_format_for_mesa_format[MESA_FORMAT_COUNT] =
+{
+ [MESA_FORMAT_L8] = BRW_SURFACEFORMAT_L8_UNORM,
+ [MESA_FORMAT_I8] = BRW_SURFACEFORMAT_I8_UNORM,
+ [MESA_FORMAT_A8] = BRW_SURFACEFORMAT_A8_UNORM,
+ [MESA_FORMAT_AL88] = BRW_SURFACEFORMAT_L8A8_UNORM,
+ [MESA_FORMAT_AL1616] = BRW_SURFACEFORMAT_L16A16_UNORM,
+ [MESA_FORMAT_R8] = BRW_SURFACEFORMAT_R8_UNORM,
+ [MESA_FORMAT_R16] = BRW_SURFACEFORMAT_R16_UNORM,
+ [MESA_FORMAT_RG88] = BRW_SURFACEFORMAT_R8G8_UNORM,
+ [MESA_FORMAT_RG1616] = BRW_SURFACEFORMAT_R16G16_UNORM,
+ [MESA_FORMAT_ARGB8888] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM,
+ [MESA_FORMAT_XRGB8888] = BRW_SURFACEFORMAT_B8G8R8X8_UNORM,
+ [MESA_FORMAT_RGB565] = BRW_SURFACEFORMAT_B5G6R5_UNORM,
+ [MESA_FORMAT_ARGB1555] = BRW_SURFACEFORMAT_B5G5R5A1_UNORM,
+ [MESA_FORMAT_ARGB4444] = BRW_SURFACEFORMAT_B4G4R4A4_UNORM,
+ [MESA_FORMAT_YCBCR_REV] = BRW_SURFACEFORMAT_YCRCB_NORMAL,
+ [MESA_FORMAT_YCBCR] = BRW_SURFACEFORMAT_YCRCB_SWAPUVY,
+ [MESA_FORMAT_RGB_FXT1] = BRW_SURFACEFORMAT_FXT1,
+ [MESA_FORMAT_RGBA_FXT1] = BRW_SURFACEFORMAT_FXT1,
+ [MESA_FORMAT_RGB_DXT1] = BRW_SURFACEFORMAT_DXT1_RGB,
+ [MESA_FORMAT_RGBA_DXT1] = BRW_SURFACEFORMAT_BC1_UNORM,
+ [MESA_FORMAT_RGBA_DXT3] = BRW_SURFACEFORMAT_BC2_UNORM,
+ [MESA_FORMAT_RGBA_DXT5] = BRW_SURFACEFORMAT_BC3_UNORM,
+ [MESA_FORMAT_SRGB_DXT1] = BRW_SURFACEFORMAT_DXT1_RGB_SRGB,
+ [MESA_FORMAT_SRGBA_DXT1] = BRW_SURFACEFORMAT_BC1_UNORM_SRGB,
+ [MESA_FORMAT_SRGBA_DXT3] = BRW_SURFACEFORMAT_BC2_UNORM_SRGB,
+ [MESA_FORMAT_SRGBA_DXT5] = BRW_SURFACEFORMAT_BC3_UNORM_SRGB,
+ [MESA_FORMAT_SARGB8] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB,
+ [MESA_FORMAT_SLA8] = BRW_SURFACEFORMAT_L8A8_UNORM_SRGB,
+ [MESA_FORMAT_SL8] = BRW_SURFACEFORMAT_L8_UNORM_SRGB,
+ [MESA_FORMAT_DUDV8] = BRW_SURFACEFORMAT_R8G8_SNORM,
+ [MESA_FORMAT_SIGNED_RGBA8888_REV] = BRW_SURFACEFORMAT_R8G8B8A8_SNORM,
+ [MESA_FORMAT_RGBA8888_REV] = BRW_SURFACEFORMAT_R8G8B8A8_UNORM,
+};
+
+bool
+brw_render_target_supported(gl_format format)
+{
+ if (format == MESA_FORMAT_S8_Z24 ||
+ format == MESA_FORMAT_X8_Z24 ||
+ format == MESA_FORMAT_Z16) {
+ return true;
+ }
+
+ /* Not exactly true, as some of those formats are not renderable.
+ * But at least we know how to translate them.
+ */
+ return brw_format_for_mesa_format[format] != 0;
+}
static GLuint translate_tex_format( gl_format mesa_format,
GLenum internal_format,
- GLenum depth_mode )
+ GLenum depth_mode,
+ GLenum srgb_decode )
{
switch( mesa_format ) {
- case MESA_FORMAT_L8:
- return BRW_SURFACEFORMAT_L8_UNORM;
-
- case MESA_FORMAT_I8:
- return BRW_SURFACEFORMAT_I8_UNORM;
-
- case MESA_FORMAT_A8:
- return BRW_SURFACEFORMAT_A8_UNORM;
-
- case MESA_FORMAT_AL88:
- return BRW_SURFACEFORMAT_L8A8_UNORM;
-
- case MESA_FORMAT_AL1616:
- return BRW_SURFACEFORMAT_L16A16_UNORM;
-
- case MESA_FORMAT_R8:
- return BRW_SURFACEFORMAT_R8_UNORM;
-
- case MESA_FORMAT_R16:
- return BRW_SURFACEFORMAT_R16_UNORM;
-
- case MESA_FORMAT_RG88:
- return BRW_SURFACEFORMAT_R8G8_UNORM;
-
- case MESA_FORMAT_RG1616:
- return BRW_SURFACEFORMAT_R16G16_UNORM;
-
- case MESA_FORMAT_RGB888:
- assert(0); /* not supported for sampling */
- return BRW_SURFACEFORMAT_R8G8B8_UNORM;
-
- case MESA_FORMAT_ARGB8888:
- return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
-
- case MESA_FORMAT_XRGB8888:
- return BRW_SURFACEFORMAT_B8G8R8X8_UNORM;
-
- case MESA_FORMAT_RGBA8888_REV:
- _mesa_problem(NULL, "unexpected format in i965:translate_tex_format()");
- return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
-
- case MESA_FORMAT_RGB565:
- return BRW_SURFACEFORMAT_B5G6R5_UNORM;
-
- case MESA_FORMAT_ARGB1555:
- return BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
-
- case MESA_FORMAT_ARGB4444:
- return BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
-
- case MESA_FORMAT_YCBCR_REV:
- return BRW_SURFACEFORMAT_YCRCB_NORMAL;
-
- case MESA_FORMAT_YCBCR:
- return BRW_SURFACEFORMAT_YCRCB_SWAPUVY;
-
- case MESA_FORMAT_RGB_FXT1:
- case MESA_FORMAT_RGBA_FXT1:
- return BRW_SURFACEFORMAT_FXT1;
case MESA_FORMAT_Z16:
if (depth_mode == GL_INTENSITY)
return BRW_SURFACEFORMAT_I16_UNORM;
else if (depth_mode == GL_ALPHA)
return BRW_SURFACEFORMAT_A16_UNORM;
+ else if (depth_mode == GL_RED)
+ return BRW_SURFACEFORMAT_R16_UNORM;
else
return BRW_SURFACEFORMAT_L16_UNORM;
- case MESA_FORMAT_RGB_DXT1:
- return BRW_SURFACEFORMAT_DXT1_RGB;
-
- case MESA_FORMAT_RGBA_DXT1:
- return BRW_SURFACEFORMAT_BC1_UNORM;
-
- case MESA_FORMAT_RGBA_DXT3:
- return BRW_SURFACEFORMAT_BC2_UNORM;
-
- case MESA_FORMAT_RGBA_DXT5:
- return BRW_SURFACEFORMAT_BC3_UNORM;
-
- case MESA_FORMAT_SARGB8:
- return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB;
-
- case MESA_FORMAT_SLA8:
- return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB;
-
- case MESA_FORMAT_SL8:
- return BRW_SURFACEFORMAT_L8_UNORM_SRGB;
-
- case MESA_FORMAT_SRGB_DXT1:
- return BRW_SURFACEFORMAT_BC1_UNORM_SRGB;
-
case MESA_FORMAT_S8_Z24:
/* XXX: these different surface formats don't seem to
* make any difference for shadow sampler/compares.
@@ -174,18 +144,21 @@ static GLuint translate_tex_format( gl_format mesa_format,
return BRW_SURFACEFORMAT_I24X8_UNORM;
else if (depth_mode == GL_ALPHA)
return BRW_SURFACEFORMAT_A24X8_UNORM;
+ else if (depth_mode == GL_RED)
+ return BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS;
else
return BRW_SURFACEFORMAT_L24X8_UNORM;
-
- case MESA_FORMAT_DUDV8:
- return BRW_SURFACEFORMAT_R8G8_SNORM;
-
- case MESA_FORMAT_SIGNED_RGBA8888_REV:
- return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
-
+
+ case MESA_FORMAT_SARGB8:
+ case MESA_FORMAT_SLA8:
+ case MESA_FORMAT_SL8:
+ if (srgb_decode == GL_DECODE_EXT)
+ return brw_format_for_mesa_format[mesa_format];
+ else if (srgb_decode == GL_SKIP_DECODE_EXT)
+ return brw_format_for_mesa_format[_mesa_get_srgb_format_linear(mesa_format)];
default:
- assert(0);
- return 0;
+ assert(brw_format_for_mesa_format[mesa_format] != 0);
+ return brw_format_for_mesa_format[mesa_format];
}
}
@@ -214,49 +187,45 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit )
struct brw_context *brw = brw_context(ctx);
struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
struct intel_texture_object *intelObj = intel_texture_object(tObj);
- struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
+ struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
const GLuint surf_index = SURF_INDEX_TEXTURE(unit);
- struct brw_surface_state surf;
- void *map;
+ struct brw_surface_state *surf;
- memset(&surf, 0, sizeof(surf));
+ surf = brw_state_batch(brw, sizeof(*surf), 32,
+ &brw->wm.surf_offset[surf_index]);
+ memset(surf, 0, sizeof(*surf));
- surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
- surf.ss0.surface_type = translate_tex_target(tObj->Target);
- surf.ss0.surface_format = translate_tex_format(firstImage->TexFormat,
+ surf->ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+ surf->ss0.surface_type = translate_tex_target(tObj->Target);
+ surf->ss0.surface_format = translate_tex_format(firstImage->TexFormat,
firstImage->InternalFormat,
- tObj->DepthMode);
+ tObj->DepthMode, tObj->sRGBDecode);
/* This is ok for all textures with channel width 8bit or less:
*/
-/* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
- surf.ss1.base_addr = intelObj->mt->region->buffer->offset; /* reloc */
+/* surf->ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
+ surf->ss1.base_addr = intelObj->mt->region->buffer->offset; /* reloc */
- surf.ss2.mip_count = intelObj->lastLevel - intelObj->firstLevel;
- surf.ss2.width = firstImage->Width - 1;
- surf.ss2.height = firstImage->Height - 1;
- brw_set_surface_tiling(&surf, intelObj->mt->region->tiling);
- surf.ss3.pitch = (intelObj->mt->region->pitch * intelObj->mt->cpp) - 1;
- surf.ss3.depth = firstImage->Depth - 1;
+ surf->ss2.mip_count = intelObj->_MaxLevel - tObj->BaseLevel;
+ surf->ss2.width = firstImage->Width - 1;
+ surf->ss2.height = firstImage->Height - 1;
+ brw_set_surface_tiling(surf, intelObj->mt->region->tiling);
+ surf->ss3.pitch = (intelObj->mt->region->pitch * intelObj->mt->cpp) - 1;
+ surf->ss3.depth = firstImage->Depth - 1;
- surf.ss4.min_lod = 0;
+ surf->ss4.min_lod = 0;
if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
- surf.ss0.cube_pos_x = 1;
- surf.ss0.cube_pos_y = 1;
- surf.ss0.cube_pos_z = 1;
- surf.ss0.cube_neg_x = 1;
- surf.ss0.cube_neg_y = 1;
- surf.ss0.cube_neg_z = 1;
+ surf->ss0.cube_pos_x = 1;
+ surf->ss0.cube_pos_y = 1;
+ surf->ss0.cube_pos_z = 1;
+ surf->ss0.cube_neg_x = 1;
+ surf->ss0.cube_neg_y = 1;
+ surf->ss0.cube_neg_z = 1;
}
- map = brw_state_batch(brw, sizeof(surf), 32,
- &brw->wm.surf_bo[surf_index],
- &brw->wm.surf_offset[surf_index]);
- memcpy(map, &surf, sizeof(surf));
-
/* Emit relocation to surface contents */
- drm_intel_bo_emit_reloc(brw->wm.surf_bo[surf_index],
+ drm_intel_bo_emit_reloc(brw->intel.batch.bo,
brw->wm.surf_offset[surf_index] +
offsetof(struct brw_surface_state, ss1),
intelObj->mt->region->buffer, 0,
@@ -271,37 +240,38 @@ void
brw_create_constant_surface(struct brw_context *brw,
drm_intel_bo *bo,
int width,
- drm_intel_bo **out_bo,
uint32_t *out_offset)
{
+ struct intel_context *intel = &brw->intel;
const GLint w = width - 1;
- struct brw_surface_state surf;
- void *map;
+ struct brw_surface_state *surf;
- memset(&surf, 0, sizeof(surf));
+ surf = brw_state_batch(brw, sizeof(*surf), 32, out_offset);
+ memset(surf, 0, sizeof(*surf));
- surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
- surf.ss0.surface_type = BRW_SURFACE_BUFFER;
- surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+ surf->ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+ surf->ss0.surface_type = BRW_SURFACE_BUFFER;
+ surf->ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
- assert(bo);
- surf.ss1.base_addr = bo->offset; /* reloc */
+ if (intel->gen >= 6)
+ surf->ss0.render_cache_read_write = 1;
- surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */
- surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */
- surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */
- surf.ss3.pitch = (width * 16) - 1; /* ignored?? */
- brw_set_surface_tiling(&surf, I915_TILING_NONE); /* tiling now allowed */
+ assert(bo);
+ surf->ss1.base_addr = bo->offset; /* reloc */
- map = brw_state_batch(brw, sizeof(surf), 32, out_bo, out_offset);
- memcpy(map, &surf, sizeof(surf));
+ surf->ss2.width = w & 0x7f; /* bits 6:0 of size or width */
+ surf->ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */
+ surf->ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */
+ surf->ss3.pitch = (width * 16) - 1; /* ignored?? */
+ brw_set_surface_tiling(surf, I915_TILING_NONE); /* tiling now allowed */
/* Emit relocation to surface contents. Section 5.1.1 of the gen4
* bspec ("Data Cache") says that the data cache does not exist as
* a separate cache and is just the sampler cache.
*/
- drm_intel_bo_emit_reloc(*out_bo, (*out_offset +
- offsetof(struct brw_surface_state, ss1)),
+ drm_intel_bo_emit_reloc(brw->intel.batch.bo,
+ (*out_offset +
+ offsetof(struct brw_surface_state, ss1)),
bo, 0,
I915_GEM_DOMAIN_SAMPLER, 0);
}
@@ -380,16 +350,14 @@ static void upload_wm_constant_surface(struct brw_context *brw )
* it.
*/
if (brw->wm.const_bo == 0) {
- if (brw->wm.surf_bo[surf] != NULL) {
- drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
- brw->wm.surf_bo[surf] = NULL;
+ if (brw->wm.surf_offset[surf]) {
brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+ brw->wm.surf_offset[surf] = 0;
}
return;
}
brw_create_constant_surface(brw, brw->wm.const_bo, params->NumParameters,
- &brw->wm.surf_bo[surf],
&brw->wm.surf_offset[surf]);
brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
}
@@ -404,6 +372,28 @@ const struct brw_tracked_state brw_wm_constant_surface = {
.emit = upload_wm_constant_surface,
};
+static void
+brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
+{
+ struct intel_context *intel = &brw->intel;
+ struct brw_surface_state *surf;
+
+ surf = brw_state_batch(brw, sizeof(*surf), 32,
+ &brw->wm.surf_offset[unit]);
+ memset(surf, 0, sizeof(*surf));
+
+ surf->ss0.surface_type = BRW_SURFACE_NULL;
+ surf->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+
+ if (intel->gen < 6) {
+ /* _NEW_COLOR */
+ surf->ss0.color_blend = 0;
+ surf->ss0.writedisable_red = 1;
+ surf->ss0.writedisable_green = 1;
+ surf->ss0.writedisable_blue = 1;
+ surf->ss0.writedisable_alpha = 1;
+ }
+}
/**
* Sets up a surface state structure to point at the given region.
@@ -417,123 +407,57 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
{
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &intel->ctx;
- drm_intel_bo *region_bo = NULL;
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
- struct intel_region *region = irb ? irb->region : NULL;
- struct {
- unsigned int surface_type;
- unsigned int surface_format;
- unsigned int width, height, pitch, cpp;
- GLubyte color_mask[4];
- GLboolean color_blend;
- uint32_t tiling;
- uint32_t draw_x;
- uint32_t draw_y;
- } key;
- struct brw_surface_state surf;
- void *map;
-
- memset(&key, 0, sizeof(key));
-
- if (region != NULL) {
- region_bo = region->buffer;
-
- key.surface_type = BRW_SURFACE_2D;
- switch (irb->Base.Format) {
- /* XRGB and ARGB are treated the same here because the chips in this
- * family cannot render to XRGB targets. This means that we have to
- * mask writes to alpha (ala glColorMask) and reconfigure the alpha
- * blending hardware to use GL_ONE (or GL_ZERO) for cases where
- * GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is used.
- */
- case MESA_FORMAT_ARGB8888:
- case MESA_FORMAT_XRGB8888:
- key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
- break;
- case MESA_FORMAT_SARGB8:
- key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB;
- break;
- case MESA_FORMAT_RGB565:
- key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
- break;
- case MESA_FORMAT_ARGB1555:
- key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
- break;
- case MESA_FORMAT_ARGB4444:
- key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
- break;
- case MESA_FORMAT_A8:
- key.surface_format = BRW_SURFACEFORMAT_A8_UNORM;
- break;
- case MESA_FORMAT_R8:
- key.surface_format = BRW_SURFACEFORMAT_R8_UNORM;
- break;
- case MESA_FORMAT_R16:
- key.surface_format = BRW_SURFACEFORMAT_R16_UNORM;
- break;
- case MESA_FORMAT_RG88:
- key.surface_format = BRW_SURFACEFORMAT_R8G8_UNORM;
- break;
- case MESA_FORMAT_RG1616:
- key.surface_format = BRW_SURFACEFORMAT_R16G16_UNORM;
- break;
- default:
- _mesa_problem(ctx, "Bad renderbuffer format: %d\n", irb->Base.Format);
- }
- key.tiling = region->tiling;
- key.width = rb->Width;
- key.height = rb->Height;
- key.pitch = region->pitch;
- key.cpp = region->cpp;
- key.draw_x = region->draw_x;
- key.draw_y = region->draw_y;
- } else {
- key.surface_type = BRW_SURFACE_NULL;
- key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
- key.tiling = I915_TILING_X;
- key.width = 1;
- key.height = 1;
- key.cpp = 4;
- key.draw_x = 0;
- key.draw_y = 0;
- }
+ struct intel_region *region = irb->region;
+ struct brw_surface_state *surf;
- if (intel->gen < 6) {
- /* _NEW_COLOR */
- memcpy(key.color_mask, ctx->Color.ColorMask[unit],
- sizeof(key.color_mask));
+ surf = brw_state_batch(brw, sizeof(*surf), 32,
+ &brw->wm.surf_offset[unit]);
+ memset(surf, 0, sizeof(*surf));
- /* As mentioned above, disable writes to the alpha component when the
- * renderbuffer is XRGB.
+ switch (irb->Base.Format) {
+ case MESA_FORMAT_XRGB8888:
+ /* XRGB is handled as ARGB because the chips in this family
+ * cannot render to XRGB targets. This means that we have to
+ * mask writes to alpha (ala glColorMask) and reconfigure the
+ * alpha blending hardware to use GL_ONE (or GL_ZERO) for
+ * cases where GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is
+ * used.
*/
- if (ctx->DrawBuffer->Visual.alphaBits == 0)
- key.color_mask[3] = GL_FALSE;
-
- key.color_blend = (!ctx->Color._LogicOpEnabled &&
- (ctx->Color.BlendEnabled & (1 << unit)));
+ surf->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ break;
+ case MESA_FORMAT_SARGB8:
+ /* without GL_EXT_framebuffer_sRGB we shouldn't bind sRGB
+ surfaces to the blend/update as sRGB */
+ if (ctx->Color.sRGBEnabled)
+ surf->ss0.surface_format = brw_format_for_mesa_format[irb->Base.Format];
+ else
+ surf->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ break;
+ default:
+ surf->ss0.surface_format = brw_format_for_mesa_format[irb->Base.Format];
+ assert(surf->ss0.surface_format != 0);
}
- memset(&surf, 0, sizeof(surf));
-
- surf.ss0.surface_format = key.surface_format;
- surf.ss0.surface_type = key.surface_type;
- if (key.tiling == I915_TILING_NONE) {
- surf.ss1.base_addr = (key.draw_x + key.draw_y * key.pitch) * key.cpp;
+ surf->ss0.surface_type = BRW_SURFACE_2D;
+ if (region->tiling == I915_TILING_NONE) {
+ surf->ss1.base_addr = (region->draw_x +
+ region->draw_y * region->pitch) * region->cpp;
} else {
uint32_t tile_base, tile_x, tile_y;
- uint32_t pitch = key.pitch * key.cpp;
+ uint32_t pitch = region->pitch * region->cpp;
- if (key.tiling == I915_TILING_X) {
- tile_x = key.draw_x % (512 / key.cpp);
- tile_y = key.draw_y % 8;
- tile_base = ((key.draw_y / 8) * (8 * pitch));
- tile_base += (key.draw_x - tile_x) / (512 / key.cpp) * 4096;
+ if (region->tiling == I915_TILING_X) {
+ tile_x = region->draw_x % (512 / region->cpp);
+ tile_y = region->draw_y % 8;
+ tile_base = ((region->draw_y / 8) * (8 * pitch));
+ tile_base += (region->draw_x - tile_x) / (512 / region->cpp) * 4096;
} else {
/* Y */
- tile_x = key.draw_x % (128 / key.cpp);
- tile_y = key.draw_y % 32;
- tile_base = ((key.draw_y / 32) * (32 * pitch));
- tile_base += (key.draw_x - tile_x) / (128 / key.cpp) * 4096;
+ tile_x = region->draw_x % (128 / region->cpp);
+ tile_y = region->draw_y % 32;
+ tile_base = ((region->draw_y / 32) * (32 * pitch));
+ tile_base += (region->draw_x - tile_x) / (128 / region->cpp) * 4096;
}
assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
assert(tile_x % 4 == 0);
@@ -541,41 +465,40 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
/* Note that the low bits of these fields are missing, so
* there's the possibility of getting in trouble.
*/
- surf.ss1.base_addr = tile_base;
- surf.ss5.x_offset = tile_x / 4;
- surf.ss5.y_offset = tile_y / 2;
+ surf->ss1.base_addr = tile_base;
+ surf->ss5.x_offset = tile_x / 4;
+ surf->ss5.y_offset = tile_y / 2;
}
- if (region_bo != NULL)
- surf.ss1.base_addr += region_bo->offset; /* reloc */
+ surf->ss1.base_addr += region->buffer->offset; /* reloc */
- surf.ss2.width = key.width - 1;
- surf.ss2.height = key.height - 1;
- brw_set_surface_tiling(&surf, key.tiling);
- surf.ss3.pitch = (key.pitch * key.cpp) - 1;
+ surf->ss2.width = rb->Width - 1;
+ surf->ss2.height = rb->Height - 1;
+ brw_set_surface_tiling(surf, region->tiling);
+ surf->ss3.pitch = (region->pitch * region->cpp) - 1;
if (intel->gen < 6) {
/* _NEW_COLOR */
- surf.ss0.color_blend = key.color_blend;
- surf.ss0.writedisable_red = !key.color_mask[0];
- surf.ss0.writedisable_green = !key.color_mask[1];
- surf.ss0.writedisable_blue = !key.color_mask[2];
- surf.ss0.writedisable_alpha = !key.color_mask[3];
+ surf->ss0.color_blend = (!ctx->Color._LogicOpEnabled &&
+ (ctx->Color.BlendEnabled & (1 << unit)));
+ surf->ss0.writedisable_red = !ctx->Color.ColorMask[unit][0];
+ surf->ss0.writedisable_green = !ctx->Color.ColorMask[unit][1];
+ surf->ss0.writedisable_blue = !ctx->Color.ColorMask[unit][2];
+ /* As mentioned above, disable writes to the alpha component when the
+ * renderbuffer is XRGB.
+ */
+ if (ctx->DrawBuffer->Visual.alphaBits == 0)
+ surf->ss0.writedisable_alpha = 1;
+ else
+ surf->ss0.writedisable_alpha = !ctx->Color.ColorMask[unit][3];
}
- map = brw_state_batch(brw, sizeof(surf), 32,
- &brw->wm.surf_bo[unit],
- &brw->wm.surf_offset[unit]);
- memcpy(map, &surf, sizeof(surf));
-
- if (region_bo != NULL) {
- drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit],
- brw->wm.surf_offset[unit] +
- offsetof(struct brw_surface_state, ss1),
- region_bo,
- surf.ss1.base_addr - region_bo->offset,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER);
- }
+ drm_intel_bo_emit_reloc(brw->intel.batch.bo,
+ brw->wm.surf_offset[unit] +
+ offsetof(struct brw_surface_state, ss1),
+ region->buffer,
+ surf->ss1.base_addr - region->buffer->offset,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER);
}
static void
@@ -591,6 +514,11 @@ prepare_wm_surfaces(struct brw_context *brw)
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
struct intel_region *region = irb ? irb->region : NULL;
+ if (region == NULL || region->buffer == NULL) {
+ brw->intel.Fallback = GL_TRUE; /* boolean, not bitfield */
+ return;
+ }
+
brw_add_validated_bo(brw, region->buffer);
nr_surfaces = SURF_INDEX_DRAW(i) + 1;
}
@@ -635,12 +563,16 @@ upload_wm_surfaces(struct brw_context *brw)
/* Update surfaces for drawing buffers */
if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
- brw_update_renderbuffer_surface(brw,
- ctx->DrawBuffer->_ColorDrawBuffers[i],
- i);
+ if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
+ brw_update_renderbuffer_surface(brw,
+ ctx->DrawBuffer->_ColorDrawBuffers[i],
+ i);
+ } else {
+ brw_update_null_renderbuffer_surface(brw, i);
+ }
}
} else {
- brw_update_renderbuffer_surface(brw, NULL, 0);
+ brw_update_null_renderbuffer_surface(brw, 0);
}
/* Update surfaces for textures */
@@ -652,8 +584,7 @@ upload_wm_surfaces(struct brw_context *brw)
if (texUnit->_ReallyEnabled) {
brw_update_texture_surface(ctx, i);
} else {
- drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
- brw->wm.surf_bo[surf] = NULL;
+ brw->wm.surf_offset[surf] = 0;
}
}
@@ -686,16 +617,11 @@ brw_wm_upload_binding_table(struct brw_context *brw)
* space for the binding table.
*/
bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_WM_MAX_SURF,
- 32, &brw->wm.bind_bo, &brw->wm.bind_bo_offset);
+ 32, &brw->wm.bind_bo_offset);
for (i = 0; i < BRW_WM_MAX_SURF; i++) {
/* BRW_NEW_WM_SURFACES */
bind[i] = brw->wm.surf_offset[i];
- if (brw->wm.surf_bo[i]) {
- bind[i] = brw->wm.surf_offset[i];
- } else {
- bind[i] = 0;
- }
}
brw->state.dirty.brw |= BRW_NEW_BINDING_TABLE;
diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c
index c2631a7b4d..d1648a102d 100644
--- a/src/mesa/drivers/dri/i965/gen6_cc.c
+++ b/src/mesa/drivers/dri/i965/gen6_cc.c
@@ -66,12 +66,12 @@ blend_state_populate_key(struct brw_context *brw,
/* _NEW_COLOR */
key->color_blend = ctx->Color.BlendEnabled;
if (key->color_blend) {
- key->blend_eq_rgb = ctx->Color.BlendEquationRGB;
- key->blend_eq_a = ctx->Color.BlendEquationA;
- key->blend_src_rgb = ctx->Color.BlendSrcRGB;
- key->blend_dst_rgb = ctx->Color.BlendDstRGB;
- key->blend_src_a = ctx->Color.BlendSrcA;
- key->blend_dst_a = ctx->Color.BlendDstA;
+ key->blend_eq_rgb = ctx->Color.Blend[0].EquationRGB;
+ key->blend_eq_a = ctx->Color.Blend[0].EquationA;
+ key->blend_src_rgb = ctx->Color.Blend[0].SrcRGB;
+ key->blend_dst_rgb = ctx->Color.Blend[0].DstRGB;
+ key->blend_src_a = ctx->Color.Blend[0].SrcA;
+ key->blend_dst_a = ctx->Color.Blend[0].DstA;
}
/* _NEW_COLOR */
@@ -254,14 +254,14 @@ prepare_color_calc_state(struct brw_context *brw)
color_calc_state_populate_key(brw, &key);
- drm_intel_bo_unreference(brw->cc.state_bo);
- brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_COLOR_CALC_STATE,
+ drm_intel_bo_unreference(brw->cc.color_calc_state_bo);
+ brw->cc.color_calc_state_bo = brw_search_cache(&brw->cache, BRW_COLOR_CALC_STATE,
&key, sizeof(key),
NULL, 0,
NULL);
- if (brw->cc.state_bo == NULL)
- brw->cc.state_bo = color_calc_state_create_from_key(brw, &key);
+ if (brw->cc.color_calc_state_bo == NULL)
+ brw->cc.color_calc_state_bo = color_calc_state_create_from_key(brw, &key);
}
const struct brw_tracked_state gen6_color_calc_state = {
@@ -278,17 +278,17 @@ static void upload_cc_state_pointers(struct brw_context *brw)
struct intel_context *intel = &brw->intel;
BEGIN_BATCH(4);
- OUT_BATCH(CMD_3D_CC_STATE_POINTERS << 16 | (4 - 2));
+ OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2));
OUT_RELOC(brw->cc.blend_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
OUT_RELOC(brw->cc.depth_stencil_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
- OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+ OUT_RELOC(brw->cc.color_calc_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
ADVANCE_BATCH();
}
static void prepare_cc_state_pointers(struct brw_context *brw)
{
- brw_add_validated_bo(brw, brw->cc.state_bo);
+ brw_add_validated_bo(brw, brw->cc.color_calc_state_bo);
brw_add_validated_bo(brw, brw->cc.blend_state_bo);
brw_add_validated_bo(brw, brw->cc.depth_stencil_state_bo);
}
diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c
index c7c4eb1f27..d6c1f1c893 100644
--- a/src/mesa/drivers/dri/i965/gen6_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c
@@ -43,7 +43,10 @@ upload_clip_state(struct brw_context *brw)
depth_clamp = GEN6_CLIP_Z_TEST;
if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) {
- provoking = 0;
+ provoking =
+ (0 << GEN6_CLIP_TRI_PROVOKE_SHIFT) |
+ (1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) |
+ (0 << GEN6_CLIP_LINE_PROVOKE_SHIFT);
} else {
provoking =
(2 << GEN6_CLIP_TRI_PROVOKE_SHIFT) |
@@ -55,7 +58,7 @@ upload_clip_state(struct brw_context *brw)
userclip = (1 << brw_count_bits(ctx->Transform.ClipPlanesEnabled)) - 1;
BEGIN_BATCH(4);
- OUT_BATCH(CMD_3D_CLIP_STATE << 16 | (4 - 2));
+ OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
OUT_BATCH(GEN6_CLIP_STATISTICS_ENABLE);
OUT_BATCH(GEN6_CLIP_ENABLE |
GEN6_CLIP_API_OGL |
@@ -65,7 +68,7 @@ upload_clip_state(struct brw_context *brw)
depth_clamp |
provoking);
OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
- U_FIXED(225.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
+ U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
GEN6_CLIP_FORCE_ZERO_RTAINDEX);
ADVANCE_BATCH();
}
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c
index 6127b9197a..7296c7cd1b 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c
@@ -37,7 +37,7 @@ upload_gs_state(struct brw_context *brw)
/* Disable all the constant buffers. */
BEGIN_BATCH(5);
- OUT_BATCH(CMD_3D_CONSTANT_GS_STATE << 16 | (5 - 2));
+ OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
@@ -46,7 +46,7 @@ upload_gs_state(struct brw_context *brw)
if (brw->gs.prog_bo) {
BEGIN_BATCH(7);
- OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2));
+ OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
OUT_RELOC(brw->gs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
OUT_BATCH(GEN6_GS_SPF_MODE |
(0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
@@ -62,7 +62,7 @@ upload_gs_state(struct brw_context *brw)
ADVANCE_BATCH();
} else {
BEGIN_BATCH(7);
- OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2));
+ OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
OUT_BATCH(0); /* prog_bo */
OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
(0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
diff --git a/src/mesa/drivers/dri/i965/gen6_sampler_state.c b/src/mesa/drivers/dri/i965/gen6_sampler_state.c
index fc5d391c3c..f65c651bdf 100644
--- a/src/mesa/drivers/dri/i965/gen6_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sampler_state.c
@@ -36,7 +36,7 @@ upload_sampler_state_pointers(struct brw_context *brw)
struct intel_context *intel = &brw->intel;
BEGIN_BATCH(4);
- OUT_BATCH(CMD_3D_SAMPLER_STATE_POINTERS << 16 |
+ OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS << 16 |
VS_SAMPLER_STATE_CHANGE |
GS_SAMPLER_STATE_CHANGE |
PS_SAMPLER_STATE_CHANGE |
diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c
index b57126c793..12b65826ae 100644
--- a/src/mesa/drivers/dri/i965/gen6_scissor_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c
@@ -92,7 +92,7 @@ static void upload_scissor_state_pointers(struct brw_context *brw)
struct intel_context *intel = &brw->intel;
BEGIN_BATCH(2);
- OUT_BATCH(CMD_3D_SCISSOR_STATE_POINTERS << 16 | (2 - 2));
+ OUT_BATCH(_3DSTATE_SCISSOR_STATE_POINTERS << 16 | (2 - 2));
OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
ADVANCE_BATCH();
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index 4cd2d69583..50a5ad38c6 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -33,9 +33,10 @@
#include "intel_batchbuffer.h"
static uint32_t
-get_attr_override(struct brw_context *brw, int fs_attr)
+get_attr_override(struct brw_context *brw, int fs_attr, int two_side_color)
{
int attr_index = 0, i, vs_attr;
+ int bfc = 0;
if (fs_attr <= FRAG_ATTRIB_TEX7)
vs_attr = fs_attr;
@@ -53,10 +54,36 @@ get_attr_override(struct brw_context *brw, int fs_attr)
* be FRAG_ATTRIB_*.
*/
for (i = 1; i < vs_attr; i++) {
+ if (i == VERT_RESULT_PSIZ)
+ continue;
if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(i))
attr_index++;
}
+ assert(attr_index < 32);
+
+ if (two_side_color) {
+ if ((brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL1)) &&
+ (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC1))) {
+ assert(brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0));
+ assert(brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0));
+ bfc = 2;
+ } else if ((brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)) &&
+ (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0)))
+ bfc = 1;
+ }
+
+ if (bfc && (fs_attr <= FRAG_ATTRIB_TEX7 && fs_attr > FRAG_ATTRIB_WPOS)) {
+ if (fs_attr == FRAG_ATTRIB_COL0)
+ attr_index |= (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT);
+ else if (fs_attr == FRAG_ATTRIB_COL1 && bfc == 2) {
+ attr_index++;
+ attr_index |= (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT);
+ } else {
+ attr_index += bfc;
+ }
+ }
+
return attr_index;
}
@@ -75,6 +102,7 @@ upload_sf_state(struct brw_context *brw)
GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
int attr = 0;
int urb_start;
+ int two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
/* _NEW_TRANSFORM */
if (ctx->Transform.ClipPlanesEnabled)
@@ -181,7 +209,7 @@ upload_sf_state(struct brw_context *brw)
ctx->Point._Attenuated))
dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH;
- dw4 |= U_FIXED(CLAMP(ctx->Point.Size, 0.125, 225.875), 3) <<
+ dw4 |= U_FIXED(CLAMP(ctx->Point.Size, 0.125, 255.875), 3) <<
GEN6_SF_POINT_WIDTH_SHIFT;
if (ctx->Point.SpriteOrigin == GL_LOWER_LEFT)
dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT;
@@ -211,7 +239,7 @@ upload_sf_state(struct brw_context *brw)
}
BEGIN_BATCH(20);
- OUT_BATCH(CMD_3D_SF_STATE << 16 | (20 - 2));
+ OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2));
OUT_BATCH(dw1);
OUT_BATCH(dw2);
OUT_BATCH(dw3);
@@ -224,7 +252,7 @@ upload_sf_state(struct brw_context *brw)
for (; attr < 64; attr++) {
if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) {
- attr_overrides |= get_attr_override(brw, attr);
+ attr_overrides |= get_attr_override(brw, attr, two_side_color);
attr++;
break;
}
@@ -232,7 +260,7 @@ upload_sf_state(struct brw_context *brw)
for (; attr < 64; attr++) {
if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) {
- attr_overrides |= get_attr_override(brw, attr) << 16;
+ attr_overrides |= get_attr_override(brw, attr, two_side_color) << 16;
attr++;
break;
}
diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c
index de97fd3783..c3819f9b36 100644
--- a/src/mesa/drivers/dri/i965/gen6_urb.c
+++ b/src/mesa/drivers/dri/i965/gen6_urb.c
@@ -34,19 +34,26 @@
static void
prepare_urb( struct brw_context *brw )
{
- brw->urb.nr_vs_entries = 24;
- if (brw->gs.prog_bo)
- brw->urb.nr_gs_entries = 4;
- else
- brw->urb.nr_gs_entries = 0;
+ int urb_size, max_urb_entry;
+ struct intel_context *intel = &brw->intel;
+
+ if (IS_GT1(intel->intelScreen->deviceID)) {
+ urb_size = 32 * 1024;
+ max_urb_entry = 128;
+ } else {
+ urb_size = 64 * 1024;
+ max_urb_entry = 256;
+ }
+
+ brw->urb.nr_vs_entries = max_urb_entry;
+ brw->urb.nr_gs_entries = max_urb_entry;
+
/* CACHE_NEW_VS_PROG */
brw->urb.vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1);
- /* Check that the number of URB rows (8 floats each) allocated is less
- * than the URB space.
- */
- assert((brw->urb.nr_vs_entries +
- brw->urb.nr_gs_entries) * brw->urb.vs_size * 8 < 64 * 1024);
+ if (2 * brw->urb.vs_size > urb_size)
+ brw->urb.nr_vs_entries = brw->urb.nr_gs_entries =
+ (urb_size ) / (2 * brw->urb.vs_size);
}
static void
@@ -60,7 +67,7 @@ upload_urb(struct brw_context *brw)
assert(!brw->gs.prog_bo || brw->urb.vs_size < 5);
BEGIN_BATCH(3);
- OUT_BATCH(CMD_URB << 16 | (3 - 2));
+ OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2));
OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) |
((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT));
OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) |
diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c
index d691bbebc8..cd7d209e3e 100644
--- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c
@@ -117,7 +117,7 @@ static void upload_viewport_state_pointers(struct brw_context *brw)
struct intel_context *intel = &brw->intel;
BEGIN_BATCH(4);
- OUT_BATCH(CMD_VIEWPORT_STATE_POINTERS << 16 | (4 - 2) |
+ OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS << 16 | (4 - 2) |
GEN6_CC_VIEWPORT_MODIFY |
GEN6_SF_VIEWPORT_MODIFY |
GEN6_CLIP_VIEWPORT_MODIFY);
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index 4ef9e2e607..ce0b8ea7ea 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -47,7 +47,7 @@ upload_vs_state(struct brw_context *brw)
if (brw->vs.prog_data->nr_params == 0 && !ctx->Transform.ClipPlanesEnabled) {
/* Disable the push constant buffers. */
BEGIN_BATCH(5);
- OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | (5 - 2));
+ OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
@@ -112,7 +112,7 @@ upload_vs_state(struct brw_context *brw)
assert(param_regs <= 32);
BEGIN_BATCH(5);
- OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 |
+ OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 |
GEN6_CONSTANT_BUFFER_0_ENABLE |
(5 - 2));
OUT_RELOC(constant_bo,
@@ -127,15 +127,17 @@ upload_vs_state(struct brw_context *brw)
}
BEGIN_BATCH(6);
- OUT_BATCH(CMD_3D_VS_STATE << 16 | (6 - 2));
+ OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
+ GEN6_VS_FLOATING_POINT_MODE_ALT |
(brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
OUT_BATCH(0); /* scratch space base offset */
OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
(brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
(0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
- OUT_BATCH((0 << GEN6_VS_MAX_THREADS_SHIFT) |
+
+ OUT_BATCH(((60 - 1) << GEN6_VS_MAX_THREADS_SHIFT) | /* max 60 threads for gen6 */
GEN6_VS_STATISTICS_ENABLE |
GEN6_VS_ENABLE);
ADVANCE_BATCH();
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index d80df4e254..78901ecac5 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -107,7 +107,7 @@ upload_wm_state(struct brw_context *brw)
if (brw->wm.prog_data->nr_params == 0) {
/* Disable the push constant buffers. */
BEGIN_BATCH(5);
- OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 | (5 - 2));
+ OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
@@ -115,7 +115,7 @@ upload_wm_state(struct brw_context *brw)
ADVANCE_BATCH();
} else {
BEGIN_BATCH(5);
- OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 |
+ OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 |
GEN6_CONSTANT_BUFFER_0_ENABLE |
(5 - 2));
OUT_RELOC(brw->wm.push_const_bo,
@@ -133,6 +133,9 @@ upload_wm_state(struct brw_context *brw)
dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0;
dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5;
+ /* OpenGL non-ieee floating point mode */
+ dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT;
+
/* BRW_NEW_NR_WM_SURFACES */
dw2 |= brw->wm.nr_surfaces << GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT;
@@ -178,7 +181,7 @@ upload_wm_state(struct brw_context *brw)
GEN6_WM_NUM_SF_OUTPUTS_SHIFT;
BEGIN_BATCH(9);
- OUT_BATCH(CMD_3D_WM_STATE << 16 | (9 - 2));
+ OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
OUT_BATCH(dw2);
OUT_BATCH(0); /* scratch space base offset */