summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri')
-rw-r--r--src/mesa/drivers/dri/common/dri_util.c3
-rw-r--r--src/mesa/drivers/dri/common/spantmp2.h122
-rw-r--r--src/mesa/drivers/dri/i915/i830_reg.h4
-rw-r--r--src/mesa/drivers/dri/i915/i830_texstate.c31
-rw-r--r--src/mesa/drivers/dri/i915/i830_vtbl.c57
-rw-r--r--src/mesa/drivers/dri/i915/i915_context.c1
-rw-r--r--src/mesa/drivers/dri/i915/i915_fragprog.c16
-rw-r--r--src/mesa/drivers/dri/i915/i915_texstate.c2
-rw-r--r--src/mesa/drivers/dri/i915/i915_vtbl.c58
-rw-r--r--src/mesa/drivers/dri/i965/Makefile1
-rw-r--r--src/mesa/drivers/dri/i965/brw_cc.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_curbe.c22
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h96
-rw-r--r--src/mesa/drivers/dri/i965/brw_disasm.c5
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c5
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.c35
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h9
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c342
-rw-r--r--src/mesa/drivers/dri/i965/brw_fallback.c43
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp472
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h21
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp5
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp4
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.c20
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c22
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_queryobj.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_batch.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_cache.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_structs.h52
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c349
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_surface_state.c9
-rw-r--r--src/mesa/drivers/dri/i965/brw_vtbl.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c131
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h31
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c180
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_fp.c19
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_glsl.c1035
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_iz.c32
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass0.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass1.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass2.c45
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_sampler_state.c44
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c382
-rw-r--r--src/mesa/drivers/dri/i965/gen6_cc.c94
-rw-r--r--src/mesa/drivers/dri/i965/gen6_clip_state.c11
-rw-r--r--src/mesa/drivers/dri/i965/gen6_gs_state.c6
-rw-r--r--src/mesa/drivers/dri/i965/gen6_sampler_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_scissor_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_sf_state.c91
-rw-r--r--src/mesa/drivers/dri/i965/gen6_urb.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen6_viewport_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_vs_state.c31
-rw-r--r--src/mesa/drivers/dri/i965/gen6_wm_state.c37
-rw-r--r--src/mesa/drivers/dri/intel/intel_batchbuffer.c60
-rw-r--r--src/mesa/drivers/dri/intel/intel_batchbuffer.h21
-rw-r--r--src/mesa/drivers/dri/intel/intel_blit.c122
-rw-r--r--src/mesa/drivers/dri/intel/intel_blit.h4
-rw-r--r--src/mesa/drivers/dri/intel/intel_buffer_objects.c1
-rw-r--r--src/mesa/drivers/dri/intel/intel_clear.c50
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.c77
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.h5
-rw-r--r--src/mesa/drivers/dri/intel/intel_extensions.c6
-rw-r--r--src/mesa/drivers/dri/intel/intel_extensions_es2.c15
-rw-r--r--src/mesa/drivers/dri/intel/intel_fbo.c319
-rw-r--r--src/mesa/drivers/dri/intel/intel_mipmap_tree.c4
-rw-r--r--src/mesa/drivers/dri/intel/intel_mipmap_tree.h2
-rw-r--r--src/mesa/drivers/dri/intel/intel_reg.h2
-rw-r--r--src/mesa/drivers/dri/intel/intel_screen.c41
-rw-r--r--src/mesa/drivers/dri/intel/intel_span.c82
-rw-r--r--src/mesa/drivers/dri/intel/intel_span.h4
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex.c1
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex.h3
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_copy.c73
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_format.c239
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_image.c153
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_obj.h8
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_validate.c121
-rw-r--r--src/mesa/drivers/dri/mach64/mach64_context.h6
-rw-r--r--src/mesa/drivers/dri/nouveau/nouveau_driver.c2
-rw-r--r--src/mesa/drivers/dri/nouveau/nouveau_driver.h2
-rw-r--r--src/mesa/drivers/dri/nouveau/nouveau_fbo.c1
-rw-r--r--src/mesa/drivers/dri/r200/r200_context.c1
-rw-r--r--src/mesa/drivers/dri/r200/r200_maos_arrays.c1
-rw-r--r--src/mesa/drivers/dri/r200/r200_tex.c1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c36
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c25
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.c1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.c103
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.h23
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c94
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h16
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c12
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h3
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_optimize.c7
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program.c32
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program.h55
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c165
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c8
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c8
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.c1
-rw-r--r--src/mesa/drivers/dri/r300/r300_tex.c1
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_blit.c88
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_state.c24
-rw-r--r--src/mesa/drivers/dri/r600/r600_context.c12
-rw-r--r--src/mesa/drivers/dri/r600/r600_tex.c1
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.c149
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h3
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_chipset.h42
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common_context.c3
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_context.c1
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_fbo.c1
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_screen.c48
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_tex.c1
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texture.c1
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texture.h1
-rw-r--r--src/mesa/drivers/dri/sis/server/sis_dri.h9
-rw-r--r--src/mesa/drivers/dri/tdfx/tdfx_context.h4
-rw-r--r--src/mesa/drivers/dri/unichrome/server/via_dri.h2
131 files changed, 3200 insertions, 3250 deletions
diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c
index a5b71bd40a..bf8cf6eec0 100644
--- a/src/mesa/drivers/dri/common/dri_util.c
+++ b/src/mesa/drivers/dri/common/dri_util.c
@@ -790,6 +790,9 @@ driCreateNewScreen(int scrn,
static const __DRIextension *emptyExtensionList[] = { NULL };
__DRIscreen *psp;
+ if (driDriverAPI.InitScreen == NULL)
+ return NULL;
+
psp = calloc(1, sizeof *psp);
if (!psp)
return NULL;
diff --git a/src/mesa/drivers/dri/common/spantmp2.h b/src/mesa/drivers/dri/common/spantmp2.h
index abd79562f9..f436d1398c 100644
--- a/src/mesa/drivers/dri/common/spantmp2.h
+++ b/src/mesa/drivers/dri/common/spantmp2.h
@@ -48,6 +48,15 @@
#define HW_WRITE_CLIPLOOP() HW_CLIPLOOP()
#endif
+#ifdef SPANTMP_MESA_FMT
+#define SPANTMP_PIXEL_FMT GL_NONE
+#define SPANTMP_PIXEL_TYPE GL_NONE
+#endif
+
+#ifndef SPANTMP_MESA_FMT
+#define SPANTMP_MESA_FMT MESA_FORMAT_COUNT
+#endif
+
#if (SPANTMP_PIXEL_FMT == GL_RGB) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)
/**
@@ -445,6 +454,118 @@
rgba[3] = p; \
} while (0)
+#elif (SPANTMP_MESA_FMT == MESA_FORMAT_R8)
+
+#ifndef GET_VALUE
+#ifndef GET_PTR
+#define GET_PTR(_x, _y) ( buf + (_x) + (_y) * pitch)
+#endif
+
+#define GET_VALUE(_x, _y) *(volatile GLubyte *)(GET_PTR(_x, _y))
+#define PUT_VALUE(_x, _y, _v) *(volatile GLubyte *)(GET_PTR(_x, _y)) = (_v)
+#endif /* GET_VALUE */
+
+# define INIT_MONO_PIXEL(p, color) \
+ p = color[0]
+
+# define WRITE_RGBA(_x, _y, r, g, b, a) \
+ PUT_VALUE(_x, _y, r)
+
+#define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p)
+
+#define READ_RGBA( rgba, _x, _y ) \
+ do { \
+ GLubyte p = GET_VALUE(_x, _y); \
+ rgba[0] = p; \
+ rgba[1] = 0; \
+ rgba[2] = 0; \
+ rgba[3] = 0; \
+ } while (0)
+
+#elif (SPANTMP_MESA_FMT == MESA_FORMAT_RG88)
+
+#ifndef GET_VALUE
+#ifndef GET_PTR
+#define GET_PTR(_x, _y) ( buf + (_x) * 2 + (_y) * pitch)
+#endif
+
+#define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y))
+#define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v)
+#endif /* GET_VALUE */
+
+# define INIT_MONO_PIXEL(p, color) \
+ PACK_COLOR_8888(color[0], color[1], 0, 0)
+
+# define WRITE_RGBA(_x, _y, r, g, b, a) \
+ PUT_VALUE(_x, _y, r)
+
+#define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p)
+
+#define READ_RGBA( rgba, _x, _y ) \
+ do { \
+ GLushort p = GET_VALUE(_x, _y); \
+ rgba[0] = p & 0xff; \
+ rgba[1] = (p >> 8) & 0xff; \
+ rgba[2] = 0; \
+ rgba[3] = 0; \
+ } while (0)
+
+#elif (SPANTMP_MESA_FMT == MESA_FORMAT_R16)
+
+#ifndef GET_VALUE
+#ifndef GET_PTR
+#define GET_PTR(_x, _y) ( buf + (_x) * 2 + (_y) * pitch)
+#endif
+
+#define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y))
+#define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v)
+#endif /* GET_VALUE */
+
+# define INIT_MONO_PIXEL(p, color) \
+ p = color[0]
+
+# define WRITE_RGBA(_x, _y, r, g, b, a) \
+ PUT_VALUE(_x, _y, r)
+
+#define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p)
+
+#define READ_RGBA( rgba, _x, _y ) \
+ do { \
+ GLushort p = GET_VALUE(_x, _y); \
+ rgba[0] = p; \
+ rgba[1] = 0; \
+ rgba[2] = 0; \
+ rgba[3] = 0; \
+ } while (0)
+
+#elif (SPANTMP_MESA_FMT == MESA_FORMAT_RG1616)
+
+#ifndef GET_VALUE
+#ifndef GET_PTR
+#define GET_PTR(_x, _y) ( buf + (_x) * 4 + (_y) * pitch)
+#endif
+
+#define GET_VALUE(_x, _y) *(volatile GLuint *)(GET_PTR(_x, _y))
+#define PUT_VALUE(_x, _y, _v) *(volatile GLuint *)(GET_PTR(_x, _y)) = (_v)
+#endif /* GET_VALUE */
+
+# define INIT_MONO_PIXEL(p, color) \
+ ((color[1] << 16) | (color[0]))
+
+# define WRITE_RGBA(_x, _y, r, g, b, a) \
+ PUT_VALUE(_x, _y, r)
+
+#define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p)
+
+#define READ_RGBA( rgba, _x, _y ) \
+ do { \
+ GLuint p = GET_VALUE(_x, _y); \
+ rgba[0] = p & 0xffff; \
+ rgba[1] = (p >> 16) & 0xffff; \
+ rgba[2] = 0; \
+ rgba[3] = 0; \
+ } while (0)
+
#else
#error SPANTMP_PIXEL_FMT must be set to a valid value!
#endif
@@ -914,3 +1035,4 @@ static void TAG(InitPointers)(struct gl_renderbuffer *rb)
#undef GET_PTR
#undef SPANTMP_PIXEL_FMT
#undef SPANTMP_PIXEL_TYPE
+#undef SPANTMP_MESA_FMT
diff --git a/src/mesa/drivers/dri/i915/i830_reg.h b/src/mesa/drivers/dri/i915/i830_reg.h
index ae1317029a..99ee1bb4e9 100644
--- a/src/mesa/drivers/dri/i915/i830_reg.h
+++ b/src/mesa/drivers/dri/i915/i830_reg.h
@@ -585,6 +585,8 @@
#define TM0S2_VERITCAL_LINE_STRIDE_OFF (1<<12)
#define TM0S2_OUTPUT_CHAN_SHIFT 10
#define TM0S2_OUTPUT_CHAN_MASK (3<<10)
+#define TM0S2_BASE_MIP_LEVEL_SHIFT 1
+#define TM0S2_LOD_PRECLAMP (1 << 0)
#define TM0S3_MIP_FILTER_MASK (0x3<<30)
#define TM0S3_MIP_FILTER_SHIFT 30
@@ -605,6 +607,8 @@
#define TM0S3_MAX_MIP_MASK (0xff<<9)
#define TM0S3_MIN_MIP_SHIFT 3
#define TM0S3_MIN_MIP_MASK (0x3f<<3)
+#define TM0S3_MIN_MIP_SHIFT_830 5
+#define TM0S3_MIN_MIP_MASK_830 (0x3f<<5)
#define TM0S3_KILL_PIXEL (1<<2)
#define TM0S3_KEYED_FILTER (1<<1)
#define TM0S3_CHROMA_KEY (1<<0)
diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c
index b3bb8837cc..c35b4b5ed0 100644
--- a/src/mesa/drivers/dri/i915/i830_texstate.c
+++ b/src/mesa/drivers/dri/i915/i830_texstate.c
@@ -28,13 +28,14 @@
#include "main/mtypes.h"
#include "main/enums.h"
#include "main/colormac.h"
+#include "main/macros.h"
#include "intel_mipmap_tree.h"
#include "intel_tex.h"
#include "i830_context.h"
#include "i830_reg.h"
-
+#include "intel_chipset.h"
static GLuint
@@ -139,9 +140,9 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
/* Get first image here, since intelObj->firstLevel will get set in
* the intel_finalize_mipmap_tree() call above.
*/
- firstImage = tObj->Image[0][intelObj->firstLevel];
+ firstImage = tObj->Image[0][tObj->BaseLevel];
- intel_miptree_get_image_offset(intelObj->mt, intelObj->firstLevel, 0, 0,
+ intel_miptree_get_image_offset(intelObj->mt, tObj->BaseLevel, 0, 0,
&dst_x, &dst_y);
drm_intel_bo_reference(intelObj->mt->region->buffer);
@@ -189,6 +190,8 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
{
GLuint minFilt, mipFilt, magFilt;
+ float maxlod;
+ uint32_t minlod_fixed, maxlod_fixed;
switch (tObj->MinFilter) {
case GL_NEAREST:
@@ -252,10 +255,24 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
state[I830_TEXREG_TM0S3] |= SS2_COLORSPACE_CONVERSION;
#endif
- state[I830_TEXREG_TM0S3] |= ((intelObj->lastLevel -
- intelObj->firstLevel) *
- 4) << TM0S3_MIN_MIP_SHIFT;
-
+ /* We get one field with fraction bits for the maximum
+ * addressable (smallest resolution) LOD. Use it to cover both
+ * MAX_LEVEL and MAX_LOD.
+ */
+ minlod_fixed = U_FIXED(CLAMP(tObj->MinLod, 0.0, 11), 4);
+ maxlod = MIN2(tObj->MaxLod, tObj->_MaxLevel - tObj->BaseLevel);
+ if (intel->intelScreen->deviceID == PCI_CHIP_I855_GM ||
+ intel->intelScreen->deviceID == PCI_CHIP_I865_G) {
+ maxlod_fixed = U_FIXED(CLAMP(maxlod, 0.0, 11.75), 2);
+ maxlod_fixed = MAX2(maxlod_fixed, (minlod_fixed + 3) >> 2);
+ state[I830_TEXREG_TM0S3] |= maxlod_fixed << TM0S3_MIN_MIP_SHIFT;
+ state[I830_TEXREG_TM0S2] |= TM0S2_LOD_PRECLAMP;
+ } else {
+ maxlod_fixed = U_FIXED(CLAMP(maxlod, 0.0, 11), 0);
+ maxlod_fixed = MAX2(maxlod_fixed, (minlod_fixed + 15) >> 4);
+ state[I830_TEXREG_TM0S3] |= maxlod_fixed << TM0S3_MIN_MIP_SHIFT_830;
+ }
+ state[I830_TEXREG_TM0S3] |= minlod_fixed << TM0S3_MAX_MIP_SHIFT;
state[I830_TEXREG_TM0S3] |= ((minFilt << TM0S3_MIN_FILTER_SHIFT) |
(mipFilt << TM0S3_MIP_FILTER_SHIFT) |
(magFilt << TM0S3_MAG_FILTER_SHIFT));
diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c
index f7fdb78d05..ebdefeac87 100644
--- a/src/mesa/drivers/dri/i915/i830_vtbl.c
+++ b/src/mesa/drivers/dri/i915/i830_vtbl.c
@@ -364,7 +364,7 @@ i830_emit_invarient_state(struct intel_context *intel)
#define emit( intel, state, size ) \
- intel_batchbuffer_data(intel->batch, state, size )
+ intel_batchbuffer_data(intel->batch, state, size, false)
static GLuint
get_dirty(struct i830_hw_state *state)
@@ -429,7 +429,8 @@ i830_emit_state(struct intel_context *intel)
* batchbuffer fills up.
*/
intel_batchbuffer_require_space(intel->batch,
- get_state_size(state) + INTEL_PRIM_EMIT_SIZE);
+ get_state_size(state) + INTEL_PRIM_EMIT_SIZE,
+ false);
count = 0;
again:
aper_count = 0;
@@ -534,14 +535,9 @@ i830_emit_state(struct intel_context *intel)
BEGIN_BATCH(I830_TEX_SETUP_SIZE + 1);
OUT_BATCH(state->Tex[i][I830_TEXREG_TM0LI]);
- if (state->tex_buffer[i]) {
- OUT_RELOC(state->tex_buffer[i],
- I915_GEM_DOMAIN_SAMPLER, 0,
- state->tex_offset[i]);
- }
- else {
- OUT_BATCH(state->tex_offset[i]);
- }
+ OUT_RELOC(state->tex_buffer[i],
+ I915_GEM_DOMAIN_SAMPLER, 0,
+ state->tex_offset[i]);
OUT_BATCH(state->Tex[i][I830_TEXREG_TM0S1]);
OUT_BATCH(state->Tex[i][I830_TEXREG_TM0S2]);
@@ -584,6 +580,27 @@ i830_destroy_context(struct intel_context *intel)
_tnl_free_vertices(&intel->ctx);
}
+static uint32_t i830_render_target_format_for_mesa_format[MESA_FORMAT_COUNT] =
+{
+ [MESA_FORMAT_ARGB8888] = DV_PF_8888,
+ [MESA_FORMAT_XRGB8888] = DV_PF_8888,
+ [MESA_FORMAT_RGB565] = DV_PF_565,
+ [MESA_FORMAT_ARGB1555] = DV_PF_1555,
+ [MESA_FORMAT_ARGB4444] = DV_PF_4444,
+};
+
+static bool
+i830_render_target_supported(gl_format format)
+{
+ if (format == MESA_FORMAT_S8_Z24 ||
+ format == MESA_FORMAT_X8_Z24 ||
+ format == MESA_FORMAT_Z16) {
+ return true;
+ }
+
+ return i830_render_target_format_for_mesa_format[format] != 0;
+}
+
static void
i830_set_draw_region(struct intel_context *intel,
struct intel_region *color_regions[],
@@ -623,24 +640,7 @@ i830_set_draw_region(struct intel_context *intel,
DSTORG_VERT_BIAS(0x8) | DEPTH_IS_Z); /* .5 */
if (irb != NULL) {
- switch (irb->Base.Format) {
- case MESA_FORMAT_ARGB8888:
- case MESA_FORMAT_XRGB8888:
- value |= DV_PF_8888;
- break;
- case MESA_FORMAT_RGB565:
- value |= DV_PF_565;
- break;
- case MESA_FORMAT_ARGB1555:
- value |= DV_PF_1555;
- break;
- case MESA_FORMAT_ARGB4444:
- value |= DV_PF_4444;
- break;
- default:
- _mesa_problem(ctx, "Bad renderbuffer format: %d\n",
- irb->Base.Format);
- }
+ value |= i830_render_target_format_for_mesa_format[irb->Base.Format];
}
if (depth_region && depth_region->cpp == 4) {
@@ -728,4 +728,5 @@ i830InitVtbl(struct i830_context *i830)
i830->intel.vtbl.assert_not_dirty = i830_assert_not_dirty;
i830->intel.vtbl.finish_batch = intel_finish_vb;
i830->intel.vtbl.invalidate_state = i830_invalidate_state;
+ i830->intel.vtbl.render_target_supported = i830_render_target_supported;
}
diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c
index f943f81dd0..f32f3cf602 100644
--- a/src/mesa/drivers/dri/i915/i915_context.c
+++ b/src/mesa/drivers/dri/i915/i915_context.c
@@ -176,6 +176,7 @@ i915CreateContext(int api,
ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitCondCodes = GL_TRUE;
ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].EmitNoIfs = GL_TRUE;
ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].EmitNoNoise = GL_TRUE;
+ ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].EmitNoPow = GL_TRUE;
ctx->Const.MaxDrawBuffers = 1;
diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c
index c00ee415b6..1c6e984517 100644
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -569,10 +569,14 @@ upload_program(struct i915_fragment_program *p)
if (inst->DstReg.CondMask == COND_TR) {
tmp = i915_get_utemp(p);
+ /* The KIL instruction discards the fragment if any component of
+ * the source is < 0. Emit an immediate operand of {-1}.xywz.
+ */
i915_emit_texld(p, get_live_regs(p, inst),
tmp, A0_DEST_CHANNEL_ALL,
0, /* use a dummy dest reg */
- swizzle(tmp, ONE, ONE, ONE, ONE), /* always */
+ negate(swizzle(tmp, ONE, ONE, ONE, ONE),
+ 1, 1, 1, 1),
T0_TEXKILL);
} else {
p->error = 1;
@@ -1158,11 +1162,6 @@ translate_program(struct i915_fragment_program *p)
fixup_depth_write(p);
i915_fini_program(p);
- if (INTEL_DEBUG & DEBUG_WM) {
- printf("i915:\n");
- i915_disassemble_program(i915->state.Program, i915->state.ProgramSize);
- }
-
p->translated = 1;
}
@@ -1423,6 +1422,11 @@ i915ValidateFragmentProgram(struct i915_context *i915)
if (!p->on_hardware)
i915_upload_program(i915, p);
+
+ if (INTEL_DEBUG & DEBUG_WM) {
+ printf("i915:\n");
+ i915_disassemble_program(i915->state.Program, i915->state.ProgramSize);
+ }
}
void
diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c
index c724a21496..af140c85f5 100644
--- a/src/mesa/drivers/dri/i915/i915_texstate.c
+++ b/src/mesa/drivers/dri/i915/i915_texstate.c
@@ -156,7 +156,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
/* Get first image here, since intelObj->firstLevel will get set in
* the intel_finalize_mipmap_tree() call above.
*/
- firstImage = tObj->Image[0][intelObj->firstLevel];
+ firstImage = tObj->Image[0][tObj->BaseLevel];
drm_intel_bo_reference(intelObj->mt->region->buffer);
i915->state.tex_buffer[unit] = intelObj->mt->region->buffer;
diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c
index 59dfe08563..a94b957127 100644
--- a/src/mesa/drivers/dri/i915/i915_vtbl.c
+++ b/src/mesa/drivers/dri/i915/i915_vtbl.c
@@ -217,7 +217,7 @@ i915_emit_invarient_state(struct intel_context *intel)
#define emit(intel, state, size ) \
- intel_batchbuffer_data(intel->batch, state, size)
+ intel_batchbuffer_data(intel->batch, state, size, false)
static GLuint
get_dirty(struct i915_hw_state *state)
@@ -300,7 +300,8 @@ i915_emit_state(struct intel_context *intel)
* batchbuffer fills up.
*/
intel_batchbuffer_require_space(intel->batch,
- get_state_size(state) + INTEL_PRIM_EMIT_SIZE);
+ get_state_size(state) + INTEL_PRIM_EMIT_SIZE,
+ false);
count = 0;
again:
aper_count = 0;
@@ -435,15 +436,9 @@ i915_emit_state(struct intel_context *intel)
OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
for (i = 0; i < I915_TEX_UNITS; i++)
if (dirty & I915_UPLOAD_TEX(i)) {
-
- if (state->tex_buffer[i]) {
- OUT_RELOC(state->tex_buffer[i],
- I915_GEM_DOMAIN_SAMPLER, 0,
- state->tex_offset[i]);
- }
- else {
- OUT_BATCH(state->tex_offset[i]);
- }
+ OUT_RELOC(state->tex_buffer[i],
+ I915_GEM_DOMAIN_SAMPLER, 0,
+ state->tex_offset[i]);
OUT_BATCH(state->Tex[i][I915_TEXREG_MS3]);
OUT_BATCH(state->Tex[i][I915_TEXREG_MS4]);
@@ -523,6 +518,27 @@ i915_set_buf_info_for_region(uint32_t *state, struct intel_region *region,
}
}
+static uint32_t i915_render_target_format_for_mesa_format[MESA_FORMAT_COUNT] =
+{
+ [MESA_FORMAT_ARGB8888] = DV_PF_8888,
+ [MESA_FORMAT_XRGB8888] = DV_PF_8888,
+ [MESA_FORMAT_RGB565] = DV_PF_565 | DITHER_FULL_ALWAYS,
+ [MESA_FORMAT_ARGB1555] = DV_PF_1555 | DITHER_FULL_ALWAYS,
+ [MESA_FORMAT_ARGB4444] = DV_PF_4444 | DITHER_FULL_ALWAYS,
+};
+
+static bool
+i915_render_target_supported(gl_format format)
+{
+ if (format == MESA_FORMAT_S8_Z24 ||
+ format == MESA_FORMAT_X8_Z24 ||
+ format == MESA_FORMAT_Z16) {
+ return true;
+ }
+
+ return i915_render_target_format_for_mesa_format[format] != 0;
+}
+
static void
i915_set_draw_region(struct intel_context *intel,
struct intel_region *color_regions[],
@@ -562,24 +578,7 @@ i915_set_draw_region(struct intel_context *intel,
DSTORG_VERT_BIAS(0x8) | /* .5 */
LOD_PRECLAMP_OGL | TEX_DEFAULT_COLOR_OGL);
if (irb != NULL) {
- switch (irb->Base.Format) {
- case MESA_FORMAT_ARGB8888:
- case MESA_FORMAT_XRGB8888:
- value |= DV_PF_8888;
- break;
- case MESA_FORMAT_RGB565:
- value |= DV_PF_565 | DITHER_FULL_ALWAYS;
- break;
- case MESA_FORMAT_ARGB1555:
- value |= DV_PF_1555 | DITHER_FULL_ALWAYS;
- break;
- case MESA_FORMAT_ARGB4444:
- value |= DV_PF_4444 | DITHER_FULL_ALWAYS;
- break;
- default:
- _mesa_problem(ctx, "Bad renderbuffer format: %d\n",
- irb->Base.Format);
- }
+ value |= i915_render_target_format_for_mesa_format[irb->Base.Format];
}
/* This isn't quite safe, thus being hidden behind an option. When changing
@@ -686,4 +685,5 @@ i915InitVtbl(struct i915_context *i915)
i915->intel.vtbl.update_texture_state = i915UpdateTextureState;
i915->intel.vtbl.assert_not_dirty = i915_assert_not_dirty;
i915->intel.vtbl.finish_batch = intel_finish_vb;
+ i915->intel.vtbl.render_target_supported = i915_render_target_supported;
}
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index e3ca863fe5..7c3ac0c14e 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -81,7 +81,6 @@ DRIVER_SOURCES = \
brw_wm_emit.c \
brw_wm_fp.c \
brw_wm_iz.c \
- brw_wm_glsl.c \
brw_wm_pass0.c \
brw_wm_pass1.c \
brw_wm_pass2.c \
diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c
index d3a1233aac..d286c9dbdc 100644
--- a/src/mesa/drivers/dri/i965/brw_cc.c
+++ b/src/mesa/drivers/dri/i965/brw_cc.c
@@ -239,7 +239,7 @@ static void upload_blend_constant_color(struct brw_context *brw)
struct brw_blend_constant_color bcc;
memset(&bcc, 0, sizeof(bcc));
- bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR;
+ bcc.header.opcode = _3DSTATE_BLEND_CONSTANT_COLOR;
bcc.header.length = sizeof(bcc)/4-2;
bcc.blend_constant_color[0] = ctx->Color.BlendColor[0];
bcc.blend_constant_color[1] = ctx->Color.BlendColor[1];
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index cb0a8b96c9..8fc322fd82 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -122,9 +122,6 @@ GLboolean brwCreateContext( int api,
(i == MESA_SHADER_FRAGMENT);
ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp =
(i == MESA_SHADER_FRAGMENT);
-
- if (intel->gen == 6)
- ctx->ShaderCompilerOptions[i].EmitNoIfs = (i == MESA_SHADER_VERTEX);
}
ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024);
@@ -154,6 +151,13 @@ GLboolean brwCreateContext( int api,
MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
ctx->Const.FragmentProgram.MaxEnvParams);
+ /* Gen6 converts quads to polygon in beginning of 3D pipeline,
+ but we're not sure how it's actually done for vertex order,
+ that affect provoking vertex decision. Always use last vertex
+ convention for quad primitive which works as expected for now. */
+ if (intel->gen == 6)
+ ctx->Const.QuadsFollowProvokingVertexConvention = GL_FALSE;
+
if (intel->is_g4x || intel->gen >= 5) {
brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_GM45;
brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 335339515a..7069724466 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -171,7 +171,6 @@ struct brw_vertex_program {
struct brw_fragment_program {
struct gl_fragment_program program;
GLuint id; /**< serial no. to identify frag progs, never re-used */
- GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */
/** for debugging, which texture units are referenced */
GLbitfield tex_units_used;
@@ -211,6 +210,7 @@ struct brw_wm_prog_data {
GLuint nr_params; /**< number of float params/constants */
GLuint nr_pull_params;
GLboolean error;
+ int dispatch_width;
/* Pointer to tracked values (only valid once
* _mesa_load_state_parameters has been called at runtime).
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index 7b823eb201..877b22fec1 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -242,21 +242,13 @@ static void prepare_constant_buffer(struct brw_context *brw)
GLuint offset = brw->curbe.vs_start * 16;
GLuint nr = brw->vs.prog_data->nr_params / 4;
- if (vp->use_const_buffer) {
- /* Load the subset of push constants that will get used when
- * we also have a pull constant buffer.
- */
- for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
- if (brw->vs.constant_map[i] != -1) {
- assert(brw->vs.constant_map[i] <= nr);
- memcpy(buf + offset + brw->vs.constant_map[i] * 4,
- vp->program.Base.Parameters->ParameterValues[i],
- 4 * sizeof(float));
- }
- }
- } else {
- for (i = 0; i < nr; i++) {
- memcpy(buf + offset + i * 4,
+ /* Load the subset of push constants that will get used when
+ * we also have a pull constant buffer.
+ */
+ for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+ if (brw->vs.constant_map[i] != -1) {
+ assert(brw->vs.constant_map[i] <= nr);
+ memcpy(buf + offset + brw->vs.constant_map[i] * 4,
vp->program.Base.Parameters->ParameterValues[i],
4 * sizeof(float));
}
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 239586a036..2f7dcc2dda 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -35,28 +35,6 @@
/* 3D state:
*/
-#define _3DOP_3DSTATE_PIPELINED 0x0
-#define _3DOP_3DSTATE_NONPIPELINED 0x1
-#define _3DOP_3DCONTROL 0x2
-#define _3DOP_3DPRIMITIVE 0x3
-
-#define _3DSTATE_PIPELINED_POINTERS 0x00
-#define _3DSTATE_BINDING_TABLE_POINTERS 0x01
-#define _3DSTATE_VERTEX_BUFFERS 0x08
-#define _3DSTATE_VERTEX_ELEMENTS 0x09
-#define _3DSTATE_INDEX_BUFFER 0x0A
-#define _3DSTATE_VF_STATISTICS 0x0B
-#define _3DSTATE_DRAWING_RECTANGLE 0x00
-#define _3DSTATE_CONSTANT_COLOR 0x01
-#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02
-#define _3DSTATE_CHROMA_KEY 0x04
-#define _3DSTATE_DEPTH_BUFFER 0x05
-#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06
-#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07
-#define _3DSTATE_LINE_STIPPLE 0x08
-#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09
-#define _3DCONTROL 0x00
-
#define PIPE_CONTROL_NOWRITE 0x00
#define PIPE_CONTROL_WRITEIMMEDIATE 0x01
#define PIPE_CONTROL_WRITEDEPTH 0x02
@@ -389,6 +367,7 @@
#define BRW_SURFACEFORMAT_R8_SSCALED 0x149
#define BRW_SURFACEFORMAT_R8_USCALED 0x14A
#define BRW_SURFACEFORMAT_L8_UNORM_SRGB 0x14C
+#define BRW_SURFACEFORMAT_DXT1_RGB_SRGB 0x180
#define BRW_SURFACEFORMAT_R1_UINT 0x181
#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182
#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
@@ -462,6 +441,13 @@
#define BRW_COMPRESSION_2NDHALF 1
#define BRW_COMPRESSION_COMPRESSED 2
+#define GEN6_COMPRESSION_1Q 0
+#define GEN6_COMPRESSION_2Q 1
+#define GEN6_COMPRESSION_3Q 2
+#define GEN6_COMPRESSION_4Q 3
+#define GEN6_COMPRESSION_1H 0
+#define GEN6_COMPRESSION_2H 2
+
#define BRW_CONDITIONAL_NONE 0
#define BRW_CONDITIONAL_Z 1
#define BRW_CONDITIONAL_NZ 2
@@ -837,7 +823,7 @@
# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9)
# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12)
-#define CMD_3D_SAMPLER_STATE_POINTERS 0x7802 /* SNB+ */
+#define _3DSTATE_SAMPLER_STATE_POINTERS 0x7802 /* GEN6+ */
# define PS_SAMPLER_STATE_CHANGE (1 << 12)
# define GS_SAMPLER_STATE_CHANGE (1 << 9)
# define VS_SAMPLER_STATE_CHANGE (1 << 8)
@@ -878,27 +864,29 @@
#define CMD_INDEX_BUFFER 0x780a
#define CMD_VF_STATISTICS_965 0x780b
#define CMD_VF_STATISTICS_GM45 0x680b
-#define CMD_3D_CC_STATE_POINTERS 0x780e /* GEN6+ */
+#define _3DSTATE_CC_STATE_POINTERS 0x780e /* GEN6+ */
-#define CMD_URB 0x7805 /* GEN6+ */
+#define _3DSTATE_URB 0x7805 /* GEN6+ */
# define GEN6_URB_VS_SIZE_SHIFT 16
# define GEN6_URB_VS_ENTRIES_SHIFT 0
# define GEN6_URB_GS_ENTRIES_SHIFT 8
# define GEN6_URB_GS_SIZE_SHIFT 0
-#define CMD_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */
+#define _3DSTATE_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */
# define GEN6_CC_VIEWPORT_MODIFY (1 << 12)
# define GEN6_SF_VIEWPORT_MODIFY (1 << 11)
# define GEN6_CLIP_VIEWPORT_MODIFY (1 << 10)
-#define CMD_3D_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */
+#define _3DSTATE_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */
-#define CMD_3D_VS_STATE 0x7810 /* GEN6+ */
+#define _3DSTATE_VS 0x7810 /* GEN6+ */
/* DW2 */
# define GEN6_VS_SPF_MODE (1 << 31)
# define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30)
# define GEN6_VS_SAMPLER_COUNT_SHIFT 27
# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+# define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
+# define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16)
/* DW4 */
# define GEN6_VS_DISPATCH_START_GRF_SHIFT 20
# define GEN6_VS_URB_READ_LENGTH_SHIFT 11
@@ -909,7 +897,7 @@
# define GEN6_VS_CACHE_DISABLE (1 << 1)
# define GEN6_VS_ENABLE (1 << 0)
-#define CMD_3D_GS_STATE 0x7811 /* GEN6+ */
+#define _3DSTATE_GS 0x7811 /* GEN6+ */
/* DW2 */
# define GEN6_GS_SPF_MODE (1 << 31)
# define GEN6_GS_VECTOR_MASK_ENABLE (1 << 30)
@@ -927,7 +915,7 @@
/* DW6 */
# define GEN6_GS_ENABLE (1 << 15)
-#define CMD_3D_CLIP_STATE 0x7812 /* GEN6+ */
+#define _3DSTATE_CLIP 0x7812 /* GEN6+ */
/* DW1 */
# define GEN6_CLIP_STATISTICS_ENABLE (1 << 10)
/**
@@ -957,7 +945,7 @@
# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT 6
# define GEN6_CLIP_FORCE_ZERO_RTAINDEX (1 << 5)
-#define CMD_3D_SF_STATE 0x7813 /* GEN6+ */
+#define _3DSTATE_SF 0x7813 /* GEN6+ */
/* DW1 */
# define GEN6_SF_NUM_OUTPUTS_SHIFT 22
# define GEN6_SF_SWIZZLE_ENABLE (1 << 21)
@@ -1022,18 +1010,27 @@
# define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9
# define ATTRIBUTE_0_SWIZZLE_SHIFT 6
# define ATTRIBUTE_0_SOURCE_SHIFT 0
+
+# define ATTRIBUTE_SWIZZLE_INPUTATTR 0
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING 1
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_W 2
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W 3
+# define ATTRIBUTE_SWIZZLE_SHIFT 6
+
/* DW16: Point sprite texture coordinate enables */
/* DW17: Constant interpolation enables */
/* DW18: attr 0-7 wrap shortest enables */
/* DW19: attr 8-16 wrap shortest enables */
-#define CMD_3D_WM_STATE 0x7814 /* GEN6+ */
+#define _3DSTATE_WM 0x7814 /* GEN6+ */
/* DW1: kernel pointer */
/* DW2 */
# define GEN6_WM_SPF_MODE (1 << 31)
# define GEN6_WM_VECTOR_MASK_ENABLE (1 << 30)
# define GEN6_WM_SAMPLER_COUNT_SHIFT 27
# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+# define GEN6_WM_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
+# define GEN6_WM_FLOATING_POINT_MODE_ALT (1 << 16)
/* DW3: scratch space */
/* DW4 */
# define GEN6_WM_STATISTICS_ENABLE (1 << 31)
@@ -1088,34 +1085,34 @@
/* DW7: kernel 1 pointer */
/* DW8: kernel 2 pointer */
-#define CMD_3D_CONSTANT_VS_STATE 0x7815 /* GEN6+ */
-#define CMD_3D_CONSTANT_GS_STATE 0x7816 /* GEN6+ */
-#define CMD_3D_CONSTANT_PS_STATE 0x7817 /* GEN6+ */
+#define _3DSTATE_CONSTANT_VS 0x7815 /* GEN6+ */
+#define _3DSTATE_CONSTANT_GS 0x7816 /* GEN6+ */
+#define _3DSTATE_CONSTANT_PS 0x7817 /* GEN6+ */
# define GEN6_CONSTANT_BUFFER_3_ENABLE (1 << 15)
# define GEN6_CONSTANT_BUFFER_2_ENABLE (1 << 14)
# define GEN6_CONSTANT_BUFFER_1_ENABLE (1 << 13)
# define GEN6_CONSTANT_BUFFER_0_ENABLE (1 << 12)
-#define CMD_3D_SAMPLE_MASK 0x7818 /* GEN6+ */
+#define _3DSTATE_SAMPLE_MASK 0x7818 /* GEN6+ */
-#define CMD_DRAW_RECT 0x7900
-#define CMD_BLEND_CONSTANT_COLOR 0x7901
-#define CMD_CHROMA_KEY 0x7904
-#define CMD_DEPTH_BUFFER 0x7905
-#define CMD_POLY_STIPPLE_OFFSET 0x7906
-#define CMD_POLY_STIPPLE_PATTERN 0x7907
-#define CMD_LINE_STIPPLE_PATTERN 0x7908
-#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909
-#define CMD_AA_LINE_PARAMETERS 0x790a
+#define _3DSTATE_DRAWING_RECTANGLE 0x7900
+#define _3DSTATE_BLEND_CONSTANT_COLOR 0x7901
+#define _3DSTATE_CHROMA_KEY 0x7904
+#define _3DSTATE_DEPTH_BUFFER 0x7905
+#define _3DSTATE_POLY_STIPPLE_OFFSET 0x7906
+#define _3DSTATE_POLY_STIPPLE_PATTERN 0x7907
+#define _3DSTATE_LINE_STIPPLE_PATTERN 0x7908
+#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909
+#define _3DSTATE_AA_LINE_PARAMETERS 0x790a /* G45+ */
-#define CMD_GS_SVB_INDEX 0x790b /* CTG+ */
+#define _3DSTATE_GS_SVB_INDEX 0x790b /* CTG+ */
/* DW1 */
# define SVB_INDEX_SHIFT 29
# define SVB_LOAD_INTERNAL_VERTEX_COUNT (1 << 0) /* SNB+ */
/* DW2: SVB index */
/* DW3: SVB maximum index */
-#define CMD_3D_MULTISAMPLE 0x790d /* SNB+ */
+#define _3DSTATE_MULTISAMPLE 0x790d /* GEN6+ */
/* DW1 */
# define MS_PIXEL_LOCATION_CENTER (0 << 4)
# define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
@@ -1123,7 +1120,10 @@
# define MS_NUMSAMPLES_4 (2 << 1)
# define MS_NUMSAMPLES_8 (3 << 1)
-#define CMD_3D_CLEAR_PARAMS 0x7910 /* ILK+ */
+#define _3DSTATE_STENCIL_BUFFER 0x790e /* ILK, SNB */
+#define _3DSTATE_HIER_DEPTH_BUFFER 0x790f /* ILK, SNB */
+
+#define _3DSTATE_CLEAR_PARAMS 0x7910 /* ILK+ */
# define DEPTH_CLEAR_VALID (1 << 15)
/* DW1: depth clear value */
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index 962c04128b..111cb9974e 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -899,7 +899,8 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
err |= dest (file, inst);
} else if (gen >= 6 && (inst->header.opcode == BRW_OPCODE_IF ||
inst->header.opcode == BRW_OPCODE_ELSE ||
- inst->header.opcode == BRW_OPCODE_ENDIF)) {
+ inst->header.opcode == BRW_OPCODE_ENDIF ||
+ inst->header.opcode == BRW_OPCODE_WHILE)) {
format (file, " %d", inst->bits1.branch_gen6.jump_count);
}
@@ -972,7 +973,7 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
inst->bits3.dp_render_cache.send_commit_msg,
inst->bits3.dp_render_cache.msg_length,
inst->bits3.dp_render_cache.response_length);
- } else if (gen >= 5) {
+ } else if (gen >= 5 /* FINISHME: || is_g4x */) {
format (file, " (%d, %d, %d)",
inst->bits3.dp_read_gen5.binding_table_index,
inst->bits3.dp_read_gen5.msg_control,
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index a1f403ca4e..7eb16b71f4 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -159,7 +159,7 @@ static void brw_emit_prim(struct brw_context *brw,
}
if (prim_packet.verts_per_instance) {
intel_batchbuffer_data( brw->intel.batch, &prim_packet,
- sizeof(prim_packet));
+ sizeof(prim_packet), false);
}
if (intel->always_flush_cache) {
intel_batchbuffer_emit_mi_flush(intel->batch);
@@ -351,7 +351,8 @@ static GLboolean brw_try_draw_prims( struct gl_context *ctx,
* an upper bound of how much we might emit in a single
* brw_try_draw_prims().
*/
- intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4);
+ intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4,
+ false);
hw_prim = brw_set_prim(brw, &prim[i]);
diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c
index 2ff39e8e64..3b5c4c071e 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.c
+++ b/src/mesa/drivers/dri/i965/brw_eu.c
@@ -72,7 +72,37 @@ void brw_set_access_mode( struct brw_compile *p, GLuint access_mode )
void brw_set_compression_control( struct brw_compile *p, GLboolean compression_control )
{
- p->current->header.compression_control = compression_control;
+ p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED);
+
+ if (p->brw->intel.gen >= 6) {
+ /* Since we don't use the 32-wide support in gen6, we translate
+ * the pre-gen6 compression control here.
+ */
+ switch (compression_control) {
+ case BRW_COMPRESSION_NONE:
+ /* This is the "use the first set of bits of dmask/vmask/arf
+ * according to execsize" option.
+ */
+ p->current->header.compression_control = GEN6_COMPRESSION_1Q;
+ break;
+ case BRW_COMPRESSION_2NDHALF:
+ /* For 8-wide, this is "use the second set of 8 bits." */
+ p->current->header.compression_control = GEN6_COMPRESSION_2Q;
+ break;
+ case BRW_COMPRESSION_COMPRESSED:
+ /* For 16-wide instruction compression, use the first set of 16 bits
+ * since we don't do 32-wide dispatch.
+ */
+ p->current->header.compression_control = GEN6_COMPRESSION_1H;
+ break;
+ default:
+ assert(!"not reached");
+ p->current->header.compression_control = GEN6_COMPRESSION_1H;
+ break;
+ }
+ } else {
+ p->current->header.compression_control = compression_control;
+ }
}
void brw_set_mask_control( struct brw_compile *p, GLuint value )
@@ -95,6 +125,7 @@ void brw_push_insn_state( struct brw_compile *p )
{
assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
memcpy(p->current+1, p->current, sizeof(struct brw_instruction));
+ p->compressed_stack[p->current - p->stack] = p->compressed;
p->current++;
}
@@ -102,6 +133,7 @@ void brw_pop_insn_state( struct brw_compile *p )
{
assert(p->current != p->stack);
p->current--;
+ p->compressed = p->compressed_stack[p->current - p->stack];
}
@@ -112,6 +144,7 @@ void brw_init_compile( struct brw_context *brw, struct brw_compile *p )
p->brw = brw;
p->nr_insn = 0;
p->current = p->stack;
+ p->compressed = false;
memset(p->current, 0, sizeof(p->current[0]));
/* Some defaults?
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index b4538e6e8a..119ffc7237 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -33,6 +33,7 @@
#ifndef BRW_EU_H
#define BRW_EU_H
+#include <stdbool.h>
#include "brw_structs.h"
#include "brw_defines.h"
#include "program/prog_instruction.h"
@@ -106,10 +107,12 @@ struct brw_compile {
/* Allow clients to push/pop instruction state:
*/
struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
+ bool compressed_stack[BRW_EU_MAX_INSN_STACK];
struct brw_instruction *current;
GLuint flag_value;
GLboolean single_program_flow;
+ bool compressed;
struct brw_context *brw;
struct brw_glsl_label *first_label; /**< linked list of labels */
@@ -858,7 +861,8 @@ void brw_fb_WRITE(struct brw_compile *p,
GLuint binding_table_index,
GLuint msg_length,
GLuint response_length,
- GLboolean eot);
+ GLboolean eot,
+ GLboolean header_present);
void brw_SAMPLE(struct brw_compile *p,
struct brw_reg dest,
@@ -954,6 +958,8 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
struct brw_instruction *patch_insn);
struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count);
+struct brw_instruction *brw_CONT_gen6(struct brw_compile *p,
+ struct brw_instruction *do_insn);
struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count);
/* Forward jumps:
*/
@@ -1009,6 +1015,7 @@ void brw_math_invert( struct brw_compile *p,
void brw_set_src1( struct brw_instruction *insn,
struct brw_reg reg );
+void brw_set_uip_jip(struct brw_compile *p);
/* brw_optimize.c */
void brw_optimize(struct brw_compile *p);
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 9cb941dacf..88131c432e 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -41,19 +41,20 @@
* Internal helper for constructing instructions
*/
-static void guess_execution_size( struct brw_instruction *insn,
- struct brw_reg reg )
+static void guess_execution_size(struct brw_compile *p,
+ struct brw_instruction *insn,
+ struct brw_reg reg)
{
- if (reg.width == BRW_WIDTH_8 &&
- insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
+ if (reg.width == BRW_WIDTH_8 && p->compressed)
insn->header.execution_size = BRW_EXECUTE_16;
else
insn->header.execution_size = reg.width; /* note - definitions are compatible */
}
-static void brw_set_dest( struct brw_instruction *insn,
- struct brw_reg dest )
+static void brw_set_dest(struct brw_compile *p,
+ struct brw_instruction *insn,
+ struct brw_reg dest)
{
if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
dest.file != BRW_MESSAGE_REGISTER_FILE)
@@ -100,7 +101,7 @@ static void brw_set_dest( struct brw_instruction *insn,
/* NEW: Set the execution size based on dest.width and
* insn->compression_control:
*/
- guess_execution_size(insn, dest);
+ guess_execution_size(p, insn, dest);
}
extern int reg_type_size[];
@@ -535,6 +536,16 @@ brw_set_dp_read_message(struct brw_context *brw,
insn->bits3.dp_read_gen5.end_of_thread = 0;
insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
insn->bits2.send_gen5.end_of_thread = 0;
+ } else if (intel->is_g4x) {
+ insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
+ insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
+ insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
+ insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
+ insn->bits3.dp_read_g4x.response_length = response_length; /*16:19*/
+ insn->bits3.dp_read_g4x.msg_length = msg_length; /*20:23*/
+ insn->bits3.dp_read_g4x.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
+ insn->bits3.dp_read_g4x.pad1 = 0;
+ insn->bits3.dp_read_g4x.end_of_thread = 0;
} else {
insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
@@ -629,7 +640,7 @@ static struct brw_instruction *brw_alu1( struct brw_compile *p,
struct brw_reg src )
{
struct brw_instruction *insn = next_insn(p, opcode);
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src);
return insn;
}
@@ -641,7 +652,7 @@ static struct brw_instruction *brw_alu2(struct brw_compile *p,
struct brw_reg src1 )
{
struct brw_instruction *insn = next_insn(p, opcode);
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src0);
brw_set_src1(insn, src1);
return insn;
@@ -680,7 +691,7 @@ void brw_##OP(struct brw_compile *p, \
{ \
struct brw_instruction *rnd, *add; \
rnd = next_insn(p, BRW_OPCODE_##OP); \
- brw_set_dest(rnd, dest); \
+ brw_set_dest(p, rnd, dest); \
brw_set_src0(rnd, src); \
rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \
\
@@ -779,7 +790,7 @@ struct brw_instruction *brw_MUL(struct brw_compile *p,
void brw_NOP(struct brw_compile *p)
{
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
- brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
brw_set_src1(insn, brw_imm_ud(0x0));
}
@@ -840,11 +851,11 @@ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
/* Override the defaults for this instruction:
*/
if (intel->gen < 6) {
- brw_set_dest(insn, brw_ip_reg());
+ brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(insn, brw_ip_reg());
brw_set_src1(insn, brw_imm_d(0x0));
} else {
- brw_set_dest(insn, brw_imm_w(0));
+ brw_set_dest(p, insn, brw_imm_w(0));
insn->bits1.branch_gen6.jump_count = 0;
brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
@@ -870,7 +881,7 @@ brw_IF_gen6(struct brw_compile *p, uint32_t conditional,
insn = next_insn(p, BRW_OPCODE_IF);
- brw_set_dest(insn, brw_imm_w(0));
+ brw_set_dest(p, insn, brw_imm_w(0));
insn->header.execution_size = BRW_EXECUTE_8;
insn->bits1.branch_gen6.jump_count = 0;
brw_set_src0(insn, src0);
@@ -905,11 +916,11 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p,
}
if (intel->gen < 6) {
- brw_set_dest(insn, brw_ip_reg());
+ brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(insn, brw_ip_reg());
brw_set_src1(insn, brw_imm_d(0x0));
} else {
- brw_set_dest(insn, brw_imm_w(0));
+ brw_set_dest(p, insn, brw_imm_w(0));
insn->bits1.branch_gen6.jump_count = 0;
brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
@@ -965,11 +976,11 @@ void brw_ENDIF(struct brw_compile *p,
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
if (intel->gen < 6) {
- brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
brw_set_src1(insn, brw_imm_d(0x0));
} else {
- brw_set_dest(insn, brw_imm_w(0));
+ brw_set_dest(p, insn, brw_imm_w(0));
brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
}
@@ -1029,16 +1040,44 @@ void brw_ENDIF(struct brw_compile *p,
struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
{
+ struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
+
insn = next_insn(p, BRW_OPCODE_BREAK);
- brw_set_dest(insn, brw_ip_reg());
+ if (intel->gen >= 6) {
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(insn, brw_imm_d(0x0));
+ } else {
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+ insn->bits3.if_else.pad0 = 0;
+ insn->bits3.if_else.pop_count = pop_count;
+ }
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = BRW_EXECUTE_8;
+
+ return insn;
+}
+
+struct brw_instruction *brw_CONT_gen6(struct brw_compile *p,
+ struct brw_instruction *do_insn)
+{
+ struct brw_instruction *insn;
+ int br = 2;
+
+ insn = next_insn(p, BRW_OPCODE_CONTINUE);
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(insn, brw_ip_reg());
brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->bits3.break_cont.uip = br * (do_insn - insn);
+
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.execution_size = BRW_EXECUTE_8;
- /* insn->header.mask_control = BRW_MASK_DISABLE; */
- insn->bits3.if_else.pad0 = 0;
- insn->bits3.if_else.pop_count = pop_count;
return insn;
}
@@ -1046,7 +1085,7 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
{
struct brw_instruction *insn;
insn = next_insn(p, BRW_OPCODE_CONTINUE);
- brw_set_dest(insn, brw_ip_reg());
+ brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(insn, brw_ip_reg());
brw_set_src1(insn, brw_imm_d(0x0));
insn->header.compression_control = BRW_COMPRESSION_NONE;
@@ -1058,17 +1097,33 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
}
/* DO/WHILE loop:
+ *
+ * The DO/WHILE is just an unterminated loop -- break or continue are
+ * used for control within the loop. We have a few ways they can be
+ * done.
+ *
+ * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
+ * jip and no DO instruction.
+ *
+ * For non-uniform control flow pre-gen6, there's a DO instruction to
+ * push the mask, and a WHILE to jump back, and BREAK to get out and
+ * pop the mask.
+ *
+ * For gen6, there's no more mask stack, so no need for DO. WHILE
+ * just points back to the first instruction of the loop.
*/
struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
{
- if (p->single_program_flow) {
+ struct intel_context *intel = &p->brw->intel;
+
+ if (intel->gen >= 6 || p->single_program_flow) {
return &p->store[p->nr_insn];
} else {
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
/* Override the defaults for this instruction:
*/
- brw_set_dest(insn, brw_null_reg());
+ brw_set_dest(p, insn, brw_null_reg());
brw_set_src0(insn, brw_null_reg());
brw_set_src1(insn, brw_null_reg());
@@ -1094,34 +1149,42 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
if (intel->gen >= 5)
br = 2;
- if (p->single_program_flow)
- insn = next_insn(p, BRW_OPCODE_ADD);
- else
+ if (intel->gen >= 6) {
insn = next_insn(p, BRW_OPCODE_WHILE);
- brw_set_dest(insn, brw_ip_reg());
- brw_set_src0(insn, brw_ip_reg());
- brw_set_src1(insn, brw_imm_d(0x0));
+ brw_set_dest(p, insn, brw_imm_w(0));
+ insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
+ brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = do_insn->header.execution_size;
+ assert(insn->header.execution_size == BRW_EXECUTE_8);
+ } else {
+ if (p->single_program_flow) {
+ insn = next_insn(p, BRW_OPCODE_ADD);
- if (p->single_program_flow) {
- insn->header.execution_size = BRW_EXECUTE_1;
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d((do_insn - insn) * 16));
+ insn->header.execution_size = BRW_EXECUTE_1;
+ } else {
+ insn = next_insn(p, BRW_OPCODE_WHILE);
- insn->bits3.d = (do_insn - insn) * 16;
- } else {
- insn->header.execution_size = do_insn->header.execution_size;
+ assert(do_insn->header.opcode == BRW_OPCODE_DO);
- assert(do_insn->header.opcode == BRW_OPCODE_DO);
- insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
- insn->bits3.if_else.pop_count = 0;
- insn->bits3.if_else.pad0 = 0;
- }
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0));
-/* insn->header.mask_control = BRW_MASK_ENABLE; */
+ insn->header.execution_size = do_insn->header.execution_size;
+ insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
+ insn->bits3.if_else.pop_count = 0;
+ insn->bits3.if_else.pad0 = 0;
+ }
+ }
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
- /* insn->header.mask_control = BRW_MASK_DISABLE; */
- p->current->header.predicate_control = BRW_PREDICATE_NONE;
return insn;
}
@@ -1159,7 +1222,7 @@ void brw_CMP(struct brw_compile *p,
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
insn->header.destreg__conditionalmod = conditional;
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src0);
brw_set_src1(insn, src1);
@@ -1184,7 +1247,7 @@ void brw_WAIT (struct brw_compile *p)
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
struct brw_reg src = brw_notification_1_reg();
- brw_set_dest(insn, src);
+ brw_set_dest(p, insn, src);
brw_set_src0(insn, src);
brw_set_src1(insn, brw_null_reg());
insn->header.execution_size = 0; /* must */
@@ -1219,6 +1282,10 @@ void brw_math( struct brw_compile *p,
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
+ /* Source modifiers are ignored for extended math instructions. */
+ assert(!src.negate);
+ assert(!src.abs);
+
if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
assert(src.type == BRW_REGISTER_TYPE_F);
@@ -1228,8 +1295,9 @@ void brw_math( struct brw_compile *p,
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
*/
insn->header.destreg__conditionalmod = function;
+ insn->header.saturate = saturate;
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src);
brw_set_src1(insn, brw_null_reg());
} else {
@@ -1242,7 +1310,7 @@ void brw_math( struct brw_compile *p,
insn->header.predicate_control = 0;
insn->header.destreg__conditionalmod = msg_reg_nr;
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src);
brw_set_math_message(p->brw,
insn,
@@ -1284,12 +1352,18 @@ void brw_math2(struct brw_compile *p,
assert(src1.type == BRW_REGISTER_TYPE_F);
}
+ /* Source modifiers are ignored for extended math instructions. */
+ assert(!src0.negate);
+ assert(!src0.abs);
+ assert(!src1.negate);
+ assert(!src1.abs);
+
/* Math is the same ISA format as other opcodes, except that CondModifier
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
*/
insn->header.destreg__conditionalmod = function;
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src0);
brw_set_src1(insn, src1);
}
@@ -1318,8 +1392,13 @@ void brw_math_16( struct brw_compile *p,
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
*/
insn->header.destreg__conditionalmod = function;
+ insn->header.saturate = saturate;
- brw_set_dest(insn, dest);
+ /* Source modifiers are ignored for extended math instructions. */
+ assert(!src.negate);
+ assert(!src.abs);
+
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src);
brw_set_src1(insn, brw_null_reg());
return;
@@ -1334,7 +1413,7 @@ void brw_math_16( struct brw_compile *p,
insn = next_insn(p, BRW_OPCODE_SEND);
insn->header.destreg__conditionalmod = msg_reg_nr;
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src);
brw_set_math_message(p->brw,
insn,
@@ -1351,7 +1430,7 @@ void brw_math_16( struct brw_compile *p,
insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
insn->header.destreg__conditionalmod = msg_reg_nr+1;
- brw_set_dest(insn, offset(dest,1));
+ brw_set_dest(p, insn, offset(dest,1));
brw_set_src0(insn, src);
brw_set_math_message(p->brw,
insn,
@@ -1446,7 +1525,7 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
send_commit_msg = 1;
}
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, brw_null_reg());
brw_set_dp_write_message(p->brw,
@@ -1516,7 +1595,7 @@ brw_oword_block_read_scratch(struct brw_compile *p,
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.destreg__conditionalmod = mrf.nr;
- brw_set_dest(insn, dest); /* UW? */
+ brw_set_dest(p, insn, dest); /* UW? */
brw_set_src0(insn, brw_null_reg());
brw_set_dp_read_message(p->brw,
@@ -1569,7 +1648,7 @@ void brw_oword_block_read(struct brw_compile *p,
/* cast dest to a uword[8] vector */
dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
if (intel->gen >= 6) {
brw_set_src0(insn, mrf);
} else {
@@ -1614,7 +1693,7 @@ void brw_dword_scattered_read(struct brw_compile *p,
/* cast dest to a uword[8] vector */
dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, brw_null_reg());
brw_set_dp_read_message(p->brw,
@@ -1639,29 +1718,22 @@ void brw_dp_READ_4_vs(struct brw_compile *p,
GLuint location,
GLuint bind_table_index)
{
+ struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
GLuint msg_reg_nr = 1;
- struct brw_reg b;
- /*
- printf("vs const read msg, location %u, msg_reg_nr %d\n",
- location, msg_reg_nr);
- */
+ if (intel->gen >= 6)
+ location /= 16;
/* Setup MRF[1] with location/offset into const buffer */
brw_push_insn_state(p);
+ brw_set_access_mode(p, BRW_ALIGN_1);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-
- /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
- * when the docs say only dword[2] should be set. Hmmm. But it works.
- */
- b = brw_message_reg(msg_reg_nr);
- b = retype(b, BRW_REGISTER_TYPE_UD);
- /*b = get_element_ud(b, 2);*/
- brw_MOV(p, b, brw_imm_ud(location));
-
+ brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
+ BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(location));
brw_pop_insn_state(p);
insn = next_insn(p, BRW_OPCODE_SEND);
@@ -1671,8 +1743,12 @@ void brw_dp_READ_4_vs(struct brw_compile *p,
insn->header.destreg__conditionalmod = msg_reg_nr;
insn->header.mask_control = BRW_MASK_DISABLE;
- brw_set_dest(insn, dest);
- brw_set_src0(insn, brw_null_reg());
+ brw_set_dest(p, insn, dest);
+ if (intel->gen >= 6) {
+ brw_set_src0(insn, brw_message_reg(msg_reg_nr));
+ } else {
+ brw_set_src0(insn, brw_null_reg());
+ }
brw_set_dp_read_message(p->brw,
insn,
@@ -1699,6 +1775,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
/* Setup MRF[1] with offset into const buffer */
brw_push_insn_state(p);
+ brw_set_access_mode(p, BRW_ALIGN_1);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
@@ -1706,7 +1783,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
/* M1.0 is block offset 0, M1.4 is block offset 1, all other
* fields ignored.
*/
- brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD),
+ brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
addr_reg, brw_imm_d(offset));
brw_pop_insn_state(p);
@@ -1717,7 +1794,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
insn->header.destreg__conditionalmod = 0;
insn->header.mask_control = BRW_MASK_DISABLE;
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, brw_vec8_grf(0, 0));
if (intel->gen == 6)
@@ -1747,12 +1824,12 @@ void brw_fb_WRITE(struct brw_compile *p,
GLuint binding_table_index,
GLuint msg_length,
GLuint response_length,
- GLboolean eot)
+ GLboolean eot,
+ GLboolean header_present)
{
struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
GLuint msg_control, msg_type;
- GLboolean header_present = GL_TRUE;
if (intel->gen >= 6 && binding_table_index == 0) {
insn = next_insn(p, BRW_OPCODE_SENDC);
@@ -1764,9 +1841,6 @@ void brw_fb_WRITE(struct brw_compile *p,
insn->header.compression_control = BRW_COMPRESSION_NONE;
if (intel->gen >= 6) {
- if (msg_length == 4)
- header_present = GL_FALSE;
-
/* headerless version, just submit color payload */
src0 = brw_message_reg(msg_reg_nr);
@@ -1782,7 +1856,7 @@ void brw_fb_WRITE(struct brw_compile *p,
else
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src0);
brw_set_dp_write_message(p->brw,
insn,
@@ -1860,7 +1934,7 @@ void brw_SAMPLE(struct brw_compile *p,
struct brw_reg m1 = brw_message_reg(msg_reg_nr);
- guess_execution_size(p->current, dest);
+ guess_execution_size(p, p->current, dest);
if (p->current->header.execution_size == BRW_EXECUTE_16)
dispatch_16 = GL_TRUE;
@@ -1871,7 +1945,8 @@ void brw_SAMPLE(struct brw_compile *p,
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_MOV(p, m1, brw_vec8_grf(0,0));
+ brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
+ retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
brw_pop_insn_state(p);
@@ -1895,12 +1970,15 @@ void brw_SAMPLE(struct brw_compile *p,
* and the first message register index comes from src0.
*/
if (intel->gen >= 6) {
- brw_push_insn_state(p);
- brw_set_mask_control( p, BRW_MASK_DISABLE );
- /* m1 contains header? */
- brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
- brw_pop_insn_state(p);
- src0 = brw_message_reg(msg_reg_nr);
+ if (src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
+ src0.nr != BRW_ARF_NULL) {
+ brw_push_insn_state(p);
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, retype(brw_message_reg(msg_reg_nr), src0.type), src0);
+ brw_pop_insn_state(p);
+ }
+ src0 = brw_message_reg(msg_reg_nr);
}
insn = next_insn(p, BRW_OPCODE_SEND);
@@ -1909,7 +1987,7 @@ void brw_SAMPLE(struct brw_compile *p,
if (intel->gen < 6)
insn->header.destreg__conditionalmod = msg_reg_nr;
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src0);
brw_set_sampler_message(p->brw, insn,
binding_table_index,
@@ -1929,7 +2007,8 @@ void brw_SAMPLE(struct brw_compile *p,
*/
brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_MOV(p, reg, reg);
+ brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD),
+ retype(reg, BRW_REGISTER_TYPE_UD));
brw_pop_insn_state(p);
}
@@ -1961,7 +2040,8 @@ void brw_urb_WRITE(struct brw_compile *p,
if (intel->gen >= 6) {
brw_push_insn_state(p);
brw_set_mask_control( p, BRW_MASK_DISABLE );
- brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
+ brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
+ retype(src0, BRW_REGISTER_TYPE_UD));
brw_pop_insn_state(p);
src0 = brw_message_reg(msg_reg_nr);
}
@@ -1970,7 +2050,7 @@ void brw_urb_WRITE(struct brw_compile *p,
assert(msg_length < BRW_MAX_MRF);
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src0);
brw_set_src1(insn, brw_imm_d(0));
@@ -1989,6 +2069,80 @@ void brw_urb_WRITE(struct brw_compile *p,
swizzle);
}
+static int
+brw_find_next_block_end(struct brw_compile *p, int start)
+{
+ int ip;
+
+ for (ip = start + 1; ip < p->nr_insn; ip++) {
+ struct brw_instruction *insn = &p->store[ip];
+
+ switch (insn->header.opcode) {
+ case BRW_OPCODE_ENDIF:
+ case BRW_OPCODE_ELSE:
+ case BRW_OPCODE_WHILE:
+ return ip;
+ }
+ }
+ assert(!"not reached");
+ return start + 1;
+}
+
+/* There is no DO instruction on gen6, so to find the end of the loop
+ * we have to see if the loop is jumping back before our start
+ * instruction.
+ */
+static int
+brw_find_loop_end(struct brw_compile *p, int start)
+{
+ int ip;
+ int br = 2;
+
+ for (ip = start + 1; ip < p->nr_insn; ip++) {
+ struct brw_instruction *insn = &p->store[ip];
+
+ if (insn->header.opcode == BRW_OPCODE_WHILE) {
+ if (ip + insn->bits1.branch_gen6.jump_count / br < start)
+ return ip;
+ }
+ }
+ assert(!"not reached");
+ return start + 1;
+}
+
+/* After program generation, go back and update the UIP and JIP of
+ * BREAK and CONT instructions to their correct locations.
+ */
+void
+brw_set_uip_jip(struct brw_compile *p)
+{
+ struct intel_context *intel = &p->brw->intel;
+ int ip;
+ int br = 2;
+
+ if (intel->gen < 6)
+ return;
+
+ for (ip = 0; ip < p->nr_insn; ip++) {
+ struct brw_instruction *insn = &p->store[ip];
+
+ switch (insn->header.opcode) {
+ case BRW_OPCODE_BREAK:
+ insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
+ insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip + 1);
+ break;
+ case BRW_OPCODE_CONTINUE:
+ /* JIP is set at CONTINUE emit time, since that's when we
+ * know where the start of the loop is.
+ */
+ insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
+ assert(insn->bits3.break_cont.uip != 0);
+ assert(insn->bits3.break_cont.jip != 0);
+ break;
+ }
+ }
+}
+
void brw_ff_sync(struct brw_compile *p,
struct brw_reg dest,
GLuint msg_reg_nr,
@@ -2013,7 +2167,7 @@ void brw_ff_sync(struct brw_compile *p,
}
insn = next_insn(p, BRW_OPCODE_SEND);
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src0);
brw_set_src1(insn, brw_imm_d(0));
diff --git a/src/mesa/drivers/dri/i965/brw_fallback.c b/src/mesa/drivers/dri/i965/brw_fallback.c
index 6796fb208d..d0b0c22abf 100644
--- a/src/mesa/drivers/dri/i965/brw_fallback.c
+++ b/src/mesa/drivers/dri/i965/brw_fallback.c
@@ -36,8 +36,6 @@
#include "swrast/swrast.h"
#include "tnl/tnl.h"
#include "brw_context.h"
-#include "intel_fbo.h"
-#include "intel_regions.h"
#define FILE_DEBUG_FLAG DEBUG_FALLBACKS
@@ -63,49 +61,14 @@ static GLboolean do_check_fallback(struct brw_context *brw)
for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
if (texUnit->_ReallyEnabled) {
- struct intel_texture_object *intelObj = intel_texture_object(texUnit->_Current);
- struct gl_texture_image *texImage = intelObj->base.Image[0][intelObj->firstLevel];
+ struct gl_texture_object *tex_obj = texUnit->_Current;
+ struct gl_texture_image *texImage = tex_obj->Image[0][tex_obj->BaseLevel];
if (texImage->Border) {
DBG("FALLBACK: texture border\n");
return GL_TRUE;
}
}
}
-
- /* _NEW_STENCIL
- */
- if (ctx->Stencil._Enabled &&
- (ctx->DrawBuffer->Name == 0 && !brw->intel.hw_stencil)) {
- DBG("FALLBACK: stencil\n");
- return GL_TRUE;
- }
-
- /* _NEW_BUFFERS */
- if (!brw->has_surface_tile_offset) {
- for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
- struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
- struct intel_renderbuffer *irb = intel_renderbuffer(rb);
-
- /* The original gen4 hardware couldn't set up WM surfaces pointing
- * at an offset within a tile, which can happen when rendering to
- * anything but the base level of a texture or the +X face/0 depth.
- * This was fixed with the 4 Series hardware.
- *
- * For these original chips, you would have to make the depth and
- * color destination surfaces include information on the texture
- * type, LOD, face, and various limits to use them as a destination.
- * I would have done this, but there's also a nasty requirement that
- * the depth and the color surfaces all be of the same LOD, which
- * may be a worse requirement than this alignment. (Also, we may
- * want to just demote the texture to untiled, instead).
- */
- if (irb->region && irb->region->tiling != I915_TILING_NONE &&
- (irb->region->draw_offset & 4095)) {
- DBG("FALLBACK: non-tile-aligned destination for tiled FBO\n");
- return GL_TRUE;
- }
- }
- }
return GL_FALSE;
}
@@ -117,7 +80,7 @@ static void check_fallback(struct brw_context *brw)
const struct brw_tracked_state brw_check_fallback = {
.dirty = {
- .mesa = _NEW_BUFFERS | _NEW_RENDERMODE | _NEW_TEXTURE | _NEW_STENCIL,
+ .mesa = _NEW_RENDERMODE | _NEW_TEXTURE | _NEW_STENCIL,
.brw = 0,
.cache = 0
},
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index edb02fabb2..a35687d599 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -48,6 +48,7 @@ extern "C" {
#include "../glsl/ir_optimization.h"
#include "../glsl/ir_print_visitor.h"
+#define MAX_INSTRUCTION (1 << 30)
static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg);
struct gl_shader *
@@ -89,6 +90,9 @@ brw_compile_shader(struct gl_context *ctx, struct gl_shader *shader)
GLboolean
brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
{
+ struct brw_context *brw = brw_context(ctx);
+ struct intel_context *intel = &brw->intel;
+
struct brw_shader *shader =
(struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
if (shader != NULL) {
@@ -107,7 +111,15 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
SUB_TO_ADD_NEG |
EXP_TO_EXP2 |
LOG_TO_LOG2);
+
+ /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this,
+ * if-statements need to be flattened.
+ */
+ if (intel->gen < 6)
+ lower_if_to_cond_assign(shader->ir, 16);
+
do_lower_texture_projection(shader->ir);
+ do_vec_index_to_cond_assign(shader->ir);
brw_do_cubemap_normalize(shader->ir);
do {
@@ -474,8 +486,13 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
wpos.reg_offset++;
/* gl_FragCoord.z */
- emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
- interp_reg(FRAG_ATTRIB_WPOS, 2)));
+ if (intel->gen >= 6) {
+ emit(fs_inst(BRW_OPCODE_MOV, wpos,
+ fs_reg(brw_vec8_grf(c->source_depth_reg, 0))));
+ } else {
+ emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
+ interp_reg(FRAG_ATTRIB_WPOS, 2)));
+ }
wpos.reg_offset++;
/* gl_FragCoord.w: Already set up in emit_interpolation */
@@ -518,25 +535,40 @@ fs_visitor::emit_general_interpolation(ir_variable *ir)
continue;
}
- for (unsigned int c = 0; c < type->vector_elements; c++) {
- struct brw_reg interp = interp_reg(location, c);
- emit(fs_inst(FS_OPCODE_LINTERP,
- attr,
- this->delta_x,
- this->delta_y,
- fs_reg(interp)));
- attr.reg_offset++;
- }
-
- if (intel->gen < 6) {
- attr.reg_offset -= type->vector_elements;
+ if (c->key.flat_shade && (location == FRAG_ATTRIB_COL0 ||
+ location == FRAG_ATTRIB_COL1)) {
+ /* Constant interpolation (flat shading) case. The SF has
+ * handed us defined values in only the constant offset
+ * field of the setup reg.
+ */
for (unsigned int c = 0; c < type->vector_elements; c++) {
- emit(fs_inst(BRW_OPCODE_MUL,
- attr,
+ struct brw_reg interp = interp_reg(location, c);
+ interp = suboffset(interp, 3);
+ emit(fs_inst(FS_OPCODE_CINTERP, attr, fs_reg(interp)));
+ attr.reg_offset++;
+ }
+ } else {
+ /* Perspective interpolation case. */
+ for (unsigned int c = 0; c < type->vector_elements; c++) {
+ struct brw_reg interp = interp_reg(location, c);
+ emit(fs_inst(FS_OPCODE_LINTERP,
attr,
- this->pixel_w));
+ this->delta_x,
+ this->delta_y,
+ fs_reg(interp)));
attr.reg_offset++;
}
+
+ if (intel->gen < 6) {
+ attr.reg_offset -= type->vector_elements;
+ for (unsigned int c = 0; c < type->vector_elements; c++) {
+ emit(fs_inst(BRW_OPCODE_MUL,
+ attr,
+ attr,
+ this->pixel_w));
+ attr.reg_offset++;
+ }
+ }
}
location++;
}
@@ -600,8 +632,13 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
* might be able to do better by doing execsize = 1 math and then
* expanding that result out, but we would need to be careful with
* masking.
+ *
+ * The hardware ignores source modifiers (negate and abs) on math
+ * instructions, so we also move to a temp to set those up.
*/
- if (intel->gen >= 6 && src.file == UNIFORM) {
+ if (intel->gen >= 6 && (src.file == UNIFORM ||
+ src.abs ||
+ src.negate)) {
fs_reg expanded = fs_reg(this, glsl_type::float_type);
emit(fs_inst(BRW_OPCODE_MOV, expanded, src));
src = expanded;
@@ -765,6 +802,30 @@ fs_visitor::try_emit_saturate(ir_expression *ir)
return true;
}
+static uint32_t
+brw_conditional_for_comparison(unsigned int op)
+{
+ switch (op) {
+ case ir_binop_less:
+ return BRW_CONDITIONAL_L;
+ case ir_binop_greater:
+ return BRW_CONDITIONAL_G;
+ case ir_binop_lequal:
+ return BRW_CONDITIONAL_LE;
+ case ir_binop_gequal:
+ return BRW_CONDITIONAL_GE;
+ case ir_binop_equal:
+ case ir_binop_all_equal: /* same as equal for scalars */
+ return BRW_CONDITIONAL_Z;
+ case ir_binop_nequal:
+ case ir_binop_any_nequal: /* same as nequal for scalars */
+ return BRW_CONDITIONAL_NZ;
+ default:
+ assert(!"not reached: bad operation for comparison");
+ return BRW_CONDITIONAL_NZ;
+ }
+}
+
void
fs_visitor::visit(ir_expression *ir)
{
@@ -814,6 +875,7 @@ fs_visitor::visit(ir_expression *ir)
break;
case ir_unop_abs:
op[0].abs = true;
+ op[0].negate = false;
this->result = op[0];
break;
case ir_unop_sign:
@@ -880,35 +942,20 @@ fs_visitor::visit(ir_expression *ir)
break;
case ir_binop_less:
- inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_L;
- emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
- break;
case ir_binop_greater:
- inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_G;
- emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
- break;
case ir_binop_lequal:
- inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_LE;
- emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
- break;
case ir_binop_gequal:
- inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_GE;
- emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
- break;
case ir_binop_equal:
- case ir_binop_all_equal: /* same as nequal for scalars */
- inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_Z;
- emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
- break;
+ case ir_binop_all_equal:
case ir_binop_nequal:
- case ir_binop_any_nequal: /* same as nequal for scalars */
- inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ case ir_binop_any_nequal:
+ temp = this->result;
+ /* original gen4 does implicit conversion before comparison. */
+ if (intel->gen < 5)
+ temp.type = op[0].type;
+
+ inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], op[1]));
+ inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
break;
@@ -933,6 +980,10 @@ fs_visitor::visit(ir_expression *ir)
assert(!"not reached: should be handled by lower_noise");
break;
+ case ir_quadop_vector:
+ assert(!"not reached: should be handled by lower_quadop_vector");
+ break;
+
case ir_unop_sqrt:
emit_math(FS_OPCODE_SQRT, this->result, op[0]);
break;
@@ -949,7 +1000,12 @@ fs_visitor::visit(ir_expression *ir)
break;
case ir_unop_f2b:
case ir_unop_i2b:
- inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
+ temp = this->result;
+ /* original gen4 does implicit conversion before comparison. */
+ if (intel->gen < 5)
+ temp.type = op[0].type;
+
+ inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f)));
inst->conditional_mod = BRW_CONDITIONAL_NZ;
inst = emit(fs_inst(BRW_OPCODE_AND, this->result,
this->result, fs_reg(1)));
@@ -1423,28 +1479,70 @@ fs_visitor::visit(ir_discard *ir)
void
fs_visitor::visit(ir_constant *ir)
{
- fs_reg reg(this, ir->type);
- this->result = reg;
+ /* Set this->result to reg at the bottom of the function because some code
+ * paths will cause this visitor to be applied to other fields. This will
+ * cause the value stored in this->result to be modified.
+ *
+ * Make reg constant so that it doesn't get accidentally modified along the
+ * way. Yes, I actually had this problem. :(
+ */
+ const fs_reg reg(this, ir->type);
+ fs_reg dst_reg = reg;
- for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
- switch (ir->type->base_type) {
- case GLSL_TYPE_FLOAT:
- emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
- break;
- case GLSL_TYPE_UINT:
- emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
- break;
- case GLSL_TYPE_INT:
- emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
- break;
- case GLSL_TYPE_BOOL:
- emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
- break;
- default:
- assert(!"Non-float/uint/int/bool constant");
+ if (ir->type->is_array()) {
+ const unsigned size = type_size(ir->type->fields.array);
+
+ for (unsigned i = 0; i < ir->type->length; i++) {
+ ir->array_elements[i]->accept(this);
+ fs_reg src_reg = this->result;
+
+ dst_reg.type = src_reg.type;
+ for (unsigned j = 0; j < size; j++) {
+ emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg));
+ src_reg.reg_offset++;
+ dst_reg.reg_offset++;
+ }
+ }
+ } else if (ir->type->is_record()) {
+ foreach_list(node, &ir->components) {
+ ir_instruction *const field = (ir_instruction *) node;
+ const unsigned size = type_size(field->type);
+
+ field->accept(this);
+ fs_reg src_reg = this->result;
+
+ dst_reg.type = src_reg.type;
+ for (unsigned j = 0; j < size; j++) {
+ emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg));
+ src_reg.reg_offset++;
+ dst_reg.reg_offset++;
+ }
+ }
+ } else {
+ const unsigned size = type_size(ir->type);
+
+ for (unsigned i = 0; i < size; i++) {
+ switch (ir->type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i])));
+ break;
+ case GLSL_TYPE_UINT:
+ emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i])));
+ break;
+ case GLSL_TYPE_INT:
+ emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i])));
+ break;
+ case GLSL_TYPE_BOOL:
+ emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i])));
+ break;
+ default:
+ assert(!"Non-float/uint/int/bool constant");
+ }
+ dst_reg.reg_offset++;
}
- reg.reg_offset++;
}
+
+ this->result = reg;
}
void
@@ -1490,7 +1588,7 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d,
op[0], fs_reg(0.0f)));
} else {
- inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, op[0]));
+ inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_f, op[0]));
}
inst->conditional_mod = BRW_CONDITIONAL_NZ;
break;
@@ -1505,31 +1603,18 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
break;
case ir_binop_greater:
- inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_G;
- break;
case ir_binop_gequal:
- inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_GE;
- break;
case ir_binop_less:
- inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_L;
- break;
case ir_binop_lequal:
- inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_LE;
- break;
case ir_binop_equal:
case ir_binop_all_equal:
- inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_Z;
- break;
case ir_binop_nequal:
case ir_binop_any_nequal:
- inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1]));
+ inst->conditional_mod =
+ brw_conditional_for_comparison(expr->operation);
break;
+
default:
assert(!"not reached");
this->fail = true;
@@ -1574,7 +1659,7 @@ fs_visitor::emit_if_gen6(ir_if *ir)
switch (expr->operation) {
case ir_unop_logic_not:
- inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(1)));
+ inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(0)));
inst->conditional_mod = BRW_CONDITIONAL_Z;
return;
@@ -1608,30 +1693,16 @@ fs_visitor::emit_if_gen6(ir_if *ir)
return;
case ir_binop_greater:
- inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_G;
- return;
case ir_binop_gequal:
- inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_GE;
- return;
case ir_binop_less:
- inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_L;
- return;
case ir_binop_lequal:
- inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_LE;
- return;
case ir_binop_equal:
case ir_binop_all_equal:
- inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_Z;
- return;
case ir_binop_nequal:
case ir_binop_any_nequal:
inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ inst->conditional_mod =
+ brw_conditional_for_comparison(expr->operation);
return;
default:
assert(!"not reached");
@@ -1713,32 +1784,9 @@ fs_visitor::visit(ir_loop *ir)
this->base_ir = ir->to;
ir->to->accept(this);
- fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d,
+ fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_cmp,
counter, this->result));
- switch (ir->cmp) {
- case ir_binop_equal:
- inst->conditional_mod = BRW_CONDITIONAL_Z;
- break;
- case ir_binop_nequal:
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- break;
- case ir_binop_gequal:
- inst->conditional_mod = BRW_CONDITIONAL_GE;
- break;
- case ir_binop_lequal:
- inst->conditional_mod = BRW_CONDITIONAL_LE;
- break;
- case ir_binop_greater:
- inst->conditional_mod = BRW_CONDITIONAL_G;
- break;
- case ir_binop_less:
- inst->conditional_mod = BRW_CONDITIONAL_L;
- break;
- default:
- assert(!"not reached: unknown loop condition");
- this->fail = true;
- break;
- }
+ inst->conditional_mod = brw_conditional_for_comparison(ir->cmp);
inst = emit(fs_inst(BRW_OPCODE_BREAK));
inst->predicated = true;
@@ -1951,7 +1999,7 @@ fs_visitor::emit_interpolation_setup_gen6()
emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y));
this->current_annotation = "compute 1/pos.w";
- this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0));
+ this->wpos_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0));
this->pixel_w = fs_reg(this, glsl_type::float_type);
emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w);
@@ -1979,17 +2027,17 @@ fs_visitor::emit_fb_writes()
nr += 2;
}
- if (c->key.aa_dest_stencil_reg) {
+ if (c->aa_dest_stencil_reg) {
emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
- fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0))));
+ fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0))));
}
/* Reserve space for color. It'll be filled in per MRT below. */
int color_mrf = nr;
nr += 4;
- if (c->key.source_depth_to_render_target) {
- if (c->key.computes_depth) {
+ if (c->source_depth_to_render_target) {
+ if (c->computes_depth) {
/* Hand over gl_FragDepth. */
assert(this->frag_depth);
fs_reg depth = *(variable_storage(this->frag_depth));
@@ -1998,20 +2046,22 @@ fs_visitor::emit_fb_writes()
} else {
/* Pass through the payload depth. */
emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
- fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0))));
+ fs_reg(brw_vec8_grf(c->source_depth_reg, 0))));
}
}
- if (c->key.dest_depth_reg) {
+ if (c->dest_depth_reg) {
emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
- fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0))));
+ fs_reg(brw_vec8_grf(c->dest_depth_reg, 0))));
}
fs_reg color = reg_undef;
if (this->frag_color)
color = *(variable_storage(this->frag_color));
- else if (this->frag_data)
+ else if (this->frag_data) {
color = *(variable_storage(this->frag_data));
+ color.type = BRW_REGISTER_TYPE_F;
+ }
for (int target = 0; target < c->key.nr_color_regions; target++) {
this->current_annotation = talloc_asprintf(this->mem_ctx,
@@ -2105,7 +2155,8 @@ fs_visitor::generate_fb_write(fs_inst *inst)
inst->target,
inst->mlen,
0,
- eot);
+ eot,
+ inst->header_present);
}
void
@@ -2452,7 +2503,7 @@ fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
void
fs_visitor::assign_curb_setup()
{
- c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
+ c->prog_data.first_curbe_grf = c->nr_payload_regs;
c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
/* Map the offsets in the UNIFORM file to fixed HW regs. */
@@ -2522,12 +2573,15 @@ fs_visitor::assign_urb_setup()
foreach_iter(exec_list_iterator, iter, this->instructions) {
fs_inst *inst = (fs_inst *)iter.get();
- if (inst->opcode != FS_OPCODE_LINTERP)
- continue;
-
- assert(inst->src[2].file == FIXED_HW_REG);
+ if (inst->opcode == FS_OPCODE_LINTERP) {
+ assert(inst->src[2].file == FIXED_HW_REG);
+ inst->src[2].fixed_hw_reg.nr += urb_start;
+ }
- inst->src[2].fixed_hw_reg.nr += urb_start;
+ if (inst->opcode == FS_OPCODE_CINTERP) {
+ assert(inst->src[0].file == FIXED_HW_REG);
+ inst->src[0].fixed_hw_reg.nr += urb_start;
+ }
}
this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
@@ -2618,6 +2672,7 @@ fs_visitor::split_virtual_grfs()
}
}
}
+ this->live_intervals_valid = false;
}
/**
@@ -2692,8 +2747,11 @@ fs_visitor::calculate_live_intervals()
int loop_start = 0;
int bb_header_ip = 0;
+ if (this->live_intervals_valid)
+ return;
+
for (int i = 0; i < num_vars; i++) {
- def[i] = 1 << 30;
+ def[i] = MAX_INSTRUCTION;
use[i] = -1;
}
@@ -2771,6 +2829,8 @@ fs_visitor::calculate_live_intervals()
talloc_free(this->virtual_grf_use);
this->virtual_grf_def = def;
this->virtual_grf_use = use;
+
+ this->live_intervals_valid = true;
}
/**
@@ -2786,6 +2846,8 @@ fs_visitor::propagate_constants()
{
bool progress = false;
+ calculate_live_intervals();
+
foreach_iter(exec_list_iterator, iter, this->instructions) {
fs_inst *inst = (fs_inst *)iter.get();
@@ -2843,6 +2905,7 @@ fs_visitor::propagate_constants()
/* Fit this constant in by commuting the operands */
scan_inst->src[0] = scan_inst->src[1];
scan_inst->src[1] = inst->src[0];
+ progress = true;
}
break;
case BRW_OPCODE_CMP:
@@ -2863,6 +2926,9 @@ fs_visitor::propagate_constants()
}
}
+ if (progress)
+ this->live_intervals_valid = false;
+
return progress;
}
/**
@@ -2877,6 +2943,8 @@ fs_visitor::dead_code_eliminate()
bool progress = false;
int pc = 0;
+ calculate_live_intervals();
+
foreach_iter(exec_list_iterator, iter, this->instructions) {
fs_inst *inst = (fs_inst *)iter.get();
@@ -2888,6 +2956,9 @@ fs_visitor::dead_code_eliminate()
pc++;
}
+ if (progress)
+ live_intervals_valid = false;
+
return progress;
}
@@ -2895,10 +2966,35 @@ bool
fs_visitor::register_coalesce()
{
bool progress = false;
+ int if_depth = 0;
+ int loop_depth = 0;
foreach_iter(exec_list_iterator, iter, this->instructions) {
fs_inst *inst = (fs_inst *)iter.get();
+ /* Make sure that we dominate the instructions we're going to
+ * scan for interfering with our coalescing, or we won't have
+ * scanned enough to see if anything interferes with our
+ * coalescing. We don't dominate the following instructions if
+ * we're in a loop or an if block.
+ */
+ switch (inst->opcode) {
+ case BRW_OPCODE_DO:
+ loop_depth++;
+ break;
+ case BRW_OPCODE_WHILE:
+ loop_depth--;
+ break;
+ case BRW_OPCODE_IF:
+ if_depth++;
+ break;
+ case BRW_OPCODE_ENDIF:
+ if_depth--;
+ break;
+ }
+ if (loop_depth || if_depth)
+ continue;
+
if (inst->opcode != BRW_OPCODE_MOV ||
inst->predicated ||
inst->saturate ||
@@ -2916,14 +3012,6 @@ fs_visitor::register_coalesce()
for (; scan_iter.has_next(); scan_iter.next()) {
fs_inst *scan_inst = (fs_inst *)scan_iter.get();
- if (scan_inst->opcode == BRW_OPCODE_DO ||
- scan_inst->opcode == BRW_OPCODE_WHILE ||
- scan_inst->opcode == BRW_OPCODE_ENDIF) {
- interfered = true;
- iter = scan_iter;
- break;
- }
-
if (scan_inst->dst.file == GRF) {
if (scan_inst->dst.reg == inst->dst.reg &&
(scan_inst->dst.reg_offset == inst->dst.reg_offset ||
@@ -2943,10 +3031,6 @@ fs_visitor::register_coalesce()
continue;
}
- /* Update live interval so we don't have to recalculate. */
- this->virtual_grf_use[inst->src[0].reg] = MAX2(virtual_grf_use[inst->src[0].reg],
- virtual_grf_use[inst->dst.reg]);
-
/* Rewrite the later usage to point at the source of the move to
* be removed.
*/
@@ -2971,6 +3055,9 @@ fs_visitor::register_coalesce()
progress = true;
}
+ if (progress)
+ live_intervals_valid = false;
+
return progress;
}
@@ -2981,6 +3068,8 @@ fs_visitor::compute_to_mrf()
bool progress = false;
int next_ip = 0;
+ calculate_live_intervals();
+
foreach_iter(exec_list_iterator, iter, this->instructions) {
fs_inst *inst = (fs_inst *)iter.get();
@@ -3184,15 +3273,16 @@ fs_visitor::virtual_grf_interferes(int a, int b)
int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]);
int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]);
- /* For dead code, just check if the def interferes with the other range. */
- if (this->virtual_grf_use[a] == -1) {
- return (this->virtual_grf_def[a] >= this->virtual_grf_def[b] &&
- this->virtual_grf_def[a] < this->virtual_grf_use[b]);
- }
- if (this->virtual_grf_use[b] == -1) {
- return (this->virtual_grf_def[b] >= this->virtual_grf_def[a] &&
- this->virtual_grf_def[b] < this->virtual_grf_use[a]);
- }
+ /* We can't handle dead register writes here, without iterating
+ * over the whole instruction stream to find every single dead
+ * write to that register to compare to the live interval of the
+ * other register. Just assert that dead_code_eliminate() has been
+ * called.
+ */
+ assert((this->virtual_grf_use[a] != -1 ||
+ this->virtual_grf_def[a] == MAX_INSTRUCTION) &&
+ (this->virtual_grf_use[b] != -1 ||
+ this->virtual_grf_def[b] == MAX_INSTRUCTION));
return start < end;
}
@@ -3227,6 +3317,7 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
break;
default:
assert(!"not reached");
+ brw_reg = brw_null_reg();
break;
}
break;
@@ -3241,6 +3332,10 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
assert(!"not reached");
brw_reg = brw_null_reg();
break;
+ default:
+ assert(!"not reached");
+ brw_reg = brw_null_reg();
+ break;
}
if (reg->abs)
brw_reg = brw_abs(brw_reg);
@@ -3373,10 +3468,6 @@ fs_visitor::generate_code()
break;
case BRW_OPCODE_DO:
- /* FINISHME: We need to write the loop instruction support still. */
- if (intel->gen >= 6)
- this->fail = true;
-
loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
if_depth_in_loop[loop_stack_depth] = 0;
break;
@@ -3386,7 +3477,11 @@ fs_visitor::generate_code()
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
case BRW_OPCODE_CONTINUE:
- brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
+ /* FINISHME: We need to write the loop instruction support still. */
+ if (intel->gen >= 6)
+ brw_CONT_gen6(p, loop_stack[loop_stack_depth - 1]);
+ else
+ brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
@@ -3400,16 +3495,18 @@ fs_visitor::generate_code()
assert(loop_stack_depth > 0);
loop_stack_depth--;
inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
- /* patch all the BREAK/CONT instructions from last BGNLOOP */
- while (inst0 > loop_stack[loop_stack_depth]) {
- inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK &&
- inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+ if (intel->gen < 6) {
+ /* patch all the BREAK/CONT instructions from last BGNLOOP */
+ while (inst0 > loop_stack[loop_stack_depth]) {
+ inst0--;
+ if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+ inst0->bits3.if_else.jump_count == 0) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
}
- else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
- inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+ inst0->bits3.if_else.jump_count == 0) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ }
}
}
}
@@ -3425,6 +3522,9 @@ fs_visitor::generate_code()
case FS_OPCODE_COS:
generate_math(inst, dst, src);
break;
+ case FS_OPCODE_CINTERP:
+ brw_MOV(p, dst, src[0]);
+ break;
case FS_OPCODE_LINTERP:
generate_linterp(inst, dst, src);
break;
@@ -3486,6 +3586,26 @@ fs_visitor::generate_code()
last_native_inst = p->nr_insn;
}
+
+ brw_set_uip_jip(p);
+
+ /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS
+ * emit issues, it doesn't get the jump distances into the output,
+ * which is often something we want to debug. So this is here in
+ * case you're doing that.
+ */
+ if (0) {
+ if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
+ for (unsigned int i = 0; i < p->nr_insn; i++) {
+ printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+ ((uint32_t *)&p->store[i])[3],
+ ((uint32_t *)&p->store[i])[2],
+ ((uint32_t *)&p->store[i])[1],
+ ((uint32_t *)&p->store[i])[0]);
+ brw_disasm(stdout, &p->store[i], intel->gen);
+ }
+ }
+ }
}
GLboolean
@@ -3553,7 +3673,6 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
progress = v.remove_duplicate_mrf_writes() || progress;
- v.calculate_live_intervals();
progress = v.propagate_constants() || progress;
progress = v.register_coalesce() || progress;
progress = v.compute_to_mrf() || progress;
@@ -3566,7 +3685,6 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
for (int i = 1; i < virtual_grf_count; i++) {
v.spill_reg(i);
}
- v.calculate_live_intervals();
}
if (0)
@@ -3575,8 +3693,6 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
while (!v.assign_regs()) {
if (v.fail)
break;
-
- v.calculate_live_intervals();
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index de7b15312a..82d96f6ac0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -68,6 +68,7 @@ enum fs_opcodes {
FS_OPCODE_COS,
FS_OPCODE_DDX,
FS_OPCODE_DDY,
+ FS_OPCODE_CINTERP,
FS_OPCODE_LINTERP,
FS_OPCODE_TEX,
FS_OPCODE_TXB,
@@ -348,6 +349,23 @@ public:
hash_table_pointer_hash,
hash_table_pointer_compare);
+ /* There's a question that appears to be left open in the spec:
+ * How do implicit dst conversions interact with the CMP
+ * instruction or conditional mods? On gen6, the instruction:
+ *
+ * CMP null<d> src0<f> src1<f>
+ *
+ * will do src1 - src0 and compare that result as if it was an
+ * integer. On gen4, it will do src1 - src0 as float, convert
+ * the result to int, and compare as int. In between, it
+ * appears that it does src1 - src0 and does the compare in the
+ * execution type so dst type doesn't matter.
+ */
+ if (this->intel->gen > 4)
+ this->reg_null_cmp = reg_null_d;
+ else
+ this->reg_null_cmp = reg_null_f;
+
this->frag_color = NULL;
this->frag_data = NULL;
this->frag_depth = NULL;
@@ -361,6 +379,7 @@ public:
this->virtual_grf_array_size = 0;
this->virtual_grf_def = NULL;
this->virtual_grf_use = NULL;
+ this->live_intervals_valid = false;
this->kill_emitted = false;
}
@@ -462,6 +481,7 @@ public:
int virtual_grf_array_size;
int *virtual_grf_def;
int *virtual_grf_use;
+ bool live_intervals_valid;
struct hash_table *variable_ht;
ir_variable *frag_color, *frag_data, *frag_depth;
@@ -485,6 +505,7 @@ public:
fs_reg pixel_w;
fs_reg delta_x;
fs_reg delta_y;
+ fs_reg reg_null_cmp;
int grf_used;
};
diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
index 3b7b03a05b..20bfa4c3ea 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -205,6 +205,8 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
case ir_unop_round_even:
case ir_unop_sin:
case ir_unop_cos:
+ case ir_unop_sin_reduced:
+ case ir_unop_cos_reduced:
case ir_unop_dFdx:
case ir_unop_dFdy:
for (i = 0; i < vector_elements; i++) {
@@ -328,6 +330,9 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
case ir_unop_noise:
assert(!"noise should have been broken down to function call");
break;
+ case ir_quadop_vector:
+ assert(!"should have been lowered");
+ break;
}
ir->remove();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index bbb210cd44..078a349abd 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -94,6 +94,8 @@ fs_visitor::assign_regs()
int class_count = 0;
int aligned_pair_class = -1;
+ calculate_live_intervals();
+
/* Set up the register classes.
*
* The base registers store a scalar value. For texture samples,
@@ -416,4 +418,6 @@ fs_visitor::spill_reg(int spill_reg)
}
}
}
+
+ this->live_intervals_valid = false;
}
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index b0c76f4094..70c451d071 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -96,6 +96,9 @@ static void compile_gs_prog( struct brw_context *brw,
brw_gs_quad_strip( &c, key );
break;
case GL_LINE_LOOP:
+ /* Gen6: LINELOOP is converted to LINESTRIP at the beginning of the 3D pipeline */
+ if (intel->gen == 6)
+ return;
brw_gs_lines( &c );
break;
case GL_LINES:
@@ -166,6 +169,9 @@ static void populate_key( struct brw_context *brw,
struct brw_gs_prog_key *key )
{
struct gl_context *ctx = &brw->intel.ctx;
+ struct intel_context *intel = &brw->intel;
+ int prim_gs_always;
+
memset(key, 0, sizeof(*key));
/* CACHE_NEW_VS_PROG */
@@ -185,10 +191,14 @@ static void populate_key( struct brw_context *brw,
key->pv_first = GL_TRUE;
}
- key->need_gs_prog = (key->hint_gs_always ||
- brw->primitive == GL_QUADS ||
+ if (intel->gen == 6)
+ prim_gs_always = 0;
+ else
+ prim_gs_always = brw->primitive == GL_QUADS ||
brw->primitive == GL_QUAD_STRIP ||
- brw->primitive == GL_LINE_LOOP);
+ brw->primitive == GL_LINE_LOOP;
+
+ key->need_gs_prog = (key->hint_gs_always || prim_gs_always);
}
/* Calculate interpolants for triangle and line rasterization.
@@ -205,8 +215,10 @@ static void prepare_gs_prog(struct brw_context *brw)
brw->gs.prog_active = key.need_gs_prog;
}
+ drm_intel_bo_unreference(brw->gs.prog_bo);
+ brw->gs.prog_bo = NULL;
+
if (brw->gs.prog_active) {
- drm_intel_bo_unreference(brw->gs.prog_bo);
brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG,
&key, sizeof(key),
NULL, 0,
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index a91b0528fa..79afe19deb 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -214,7 +214,7 @@ static void emit_depthbuffer(struct brw_context *brw)
if (region == NULL) {
BEGIN_BATCH(len);
- OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
+ OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
(BRW_SURFACE_NULL << 29));
OUT_BATCH(0);
@@ -251,7 +251,7 @@ static void emit_depthbuffer(struct brw_context *brw)
assert(region->tiling != I915_TILING_NONE);
BEGIN_BATCH(len);
- OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
+ OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
OUT_BATCH(((region->pitch * region->cpp) - 1) |
(format << 18) |
(BRW_TILEWALK_YMAJOR << 26) |
@@ -277,7 +277,7 @@ static void emit_depthbuffer(struct brw_context *brw)
/* Initialize it for safety. */
if (intel->gen >= 6) {
BEGIN_BATCH(2);
- OUT_BATCH(CMD_3D_CLEAR_PARAMS << 16 | (2 - 2));
+ OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2));
OUT_BATCH(0);
ADVANCE_BATCH();
}
@@ -309,7 +309,7 @@ static void upload_polygon_stipple(struct brw_context *brw)
return;
memset(&bps, 0, sizeof(bps));
- bps.header.opcode = CMD_POLY_STIPPLE_PATTERN;
+ bps.header.opcode = _3DSTATE_POLY_STIPPLE_PATTERN;
bps.header.length = sizeof(bps)/4-2;
/* Polygon stipple is provided in OpenGL order, i.e. bottom
@@ -354,7 +354,7 @@ static void upload_polygon_stipple_offset(struct brw_context *brw)
return;
memset(&bpso, 0, sizeof(bpso));
- bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
+ bpso.header.opcode = _3DSTATE_POLY_STIPPLE_OFFSET;
bpso.header.length = sizeof(bpso)/4-2;
/* If we're drawing to a system window (ctx->DrawBuffer->Name == 0),
@@ -401,7 +401,7 @@ static void upload_aa_line_parameters(struct brw_context *brw)
/* use legacy aa line coverage computation */
memset(&balp, 0, sizeof(balp));
- balp.header.opcode = CMD_AA_LINE_PARAMETERS;
+ balp.header.opcode = _3DSTATE_AA_LINE_PARAMETERS;
balp.header.length = sizeof(balp) / 4 - 2;
BRW_CACHED_BATCH_STRUCT(brw, &balp);
@@ -431,7 +431,7 @@ static void upload_line_stipple(struct brw_context *brw)
return;
memset(&bls, 0, sizeof(bls));
- bls.header.opcode = CMD_LINE_STIPPLE_PATTERN;
+ bls.header.opcode = _3DSTATE_LINE_STIPPLE_PATTERN;
bls.header.length = sizeof(bls)/4 - 2;
bls.bits0.pattern = ctx->Line.StipplePattern;
@@ -481,7 +481,7 @@ static void upload_invarient_state( struct brw_context *brw )
/* Disable depth offset clamping.
*/
- gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP;
+ gdo.header.opcode = _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP;
gdo.header.length = sizeof(gdo)/4 - 2;
gdo.depth_offset_clamp = 0.0;
@@ -492,20 +492,20 @@ static void upload_invarient_state( struct brw_context *brw )
int i;
BEGIN_BATCH(3);
- OUT_BATCH(CMD_3D_MULTISAMPLE << 16 | (3 - 2));
+ OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (3 - 2));
OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
MS_NUMSAMPLES_1);
OUT_BATCH(0); /* positions for 4/8-sample */
ADVANCE_BATCH();
BEGIN_BATCH(2);
- OUT_BATCH(CMD_3D_SAMPLE_MASK << 16 | (2 - 2));
+ OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
OUT_BATCH(1);
ADVANCE_BATCH();
for (i = 0; i < 4; i++) {
BEGIN_BATCH(4);
- OUT_BATCH(CMD_GS_SVB_INDEX << 16 | (4 - 2));
+ OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
OUT_BATCH(i << SVB_INDEX_SHIFT);
OUT_BATCH(0);
OUT_BATCH(0xffffffff);
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 1367d81469..94efa79109 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -142,7 +142,6 @@ static GLboolean brwProgramStringNotify( struct gl_context *ctx,
if (newFP == curFP)
brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
newFP->id = brw->program_id++;
- newFP->isGLSL = brw_wm_is_glsl(fprog);
/* Don't reject fragment shaders for their Mesa IR state when we're
* using the new FS backend.
diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c
index f28f28663e..656aad630a 100644
--- a/src/mesa/drivers/dri/i965/brw_queryobj.c
+++ b/src/mesa/drivers/dri/i965/brw_queryobj.c
@@ -232,6 +232,12 @@ brw_prepare_query_begin(struct brw_context *brw)
brw->query.bo = NULL;
brw->query.bo = drm_intel_bo_alloc(intel->bufmgr, "query", 4096, 1);
+
+ /* clear target buffer */
+ drm_intel_bo_map(brw->query.bo, GL_TRUE);
+ memset((char *)brw->query.bo->virtual, 0, 4096);
+ drm_intel_bo_unmap(brw->query.bo);
+
brw->query.index = 0;
}
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 3beed16945..4bb93e7336 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -164,7 +164,8 @@ void brw_destroy_caches( struct brw_context *brw );
/***********************************************************************
* brw_state_batch.c
*/
-#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)))
+#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data(brw->intel.batch, (s), \
+ sizeof(*(s)), false)
#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) )
GLboolean brw_cached_batch_struct( struct brw_context *brw,
diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c
index be3989eb7d..a21af13caa 100644
--- a/src/mesa/drivers/dri/i965/brw_state_batch.c
+++ b/src/mesa/drivers/dri/i965/brw_state_batch.c
@@ -48,7 +48,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
struct header *newheader = (struct header *)data;
if (brw->emit_state_always) {
- intel_batchbuffer_data(brw->intel.batch, data, sz);
+ intel_batchbuffer_data(brw->intel.batch, data, sz, false);
return GL_TRUE;
}
@@ -75,7 +75,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
emit:
memcpy(item->header, newheader, sz);
- intel_batchbuffer_data(brw->intel.batch, data, sz);
+ intel_batchbuffer_data(brw->intel.batch, data, sz, false);
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index 58ff528d44..7045888ad4 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -58,8 +58,6 @@
#include "main/imports.h"
#include "brw_state.h"
-#include "intel_batchbuffer.h"
-#include "brw_wm.h"
#define FILE_DEBUG_FLAG DEBUG_STATE
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 338f3876b3..eba4411ca7 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -129,7 +129,7 @@ const struct brw_tracked_state *gen6_atoms[] =
&brw_vs_constants, /* Before vs_surfaces and constant_buffer */
&brw_wm_constants, /* Before wm_surfaces and constant_buffer */
- &gen6_wm_constants, /* Before wm_surfaces and constant_buffer */
+ &gen6_wm_constants, /* Before wm_state */
&brw_vs_surfaces, /* must do before unit */
&brw_wm_constant_surface, /* must do before wm surfaces/bind bo */
diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
index 8ce9af9c4f..6687a89e80 100644
--- a/src/mesa/drivers/dri/i965/brw_structs.h
+++ b/src/mesa/drivers/dri/i965/brw_structs.h
@@ -1017,7 +1017,14 @@ struct brw_wm_unit_state
GLuint enable_32_pix:1;
GLuint enable_con_32_pix:1;
GLuint enable_con_64_pix:1;
- GLuint pad0:5;
+ GLuint pad0:1;
+
+ /* These next four bits are for Ironlake+ */
+ GLuint fast_span_coverage_enable:1;
+ GLuint depth_buffer_clear:1;
+ GLuint depth_buffer_resolve_enable:1;
+ GLuint hierarchical_depth_buffer_resolve_enable:1;
+
GLuint legacy_global_depth_bias:1;
GLuint line_stipple:1;
GLuint depth_offset:1;
@@ -1064,6 +1071,15 @@ struct brw_sampler_default_color {
GLfloat color[4];
};
+struct gen5_sampler_default_color {
+ uint8_t ub[4];
+ float f[4];
+ uint16_t hf[4];
+ uint16_t us[4];
+ int16_t s[4];
+ uint8_t b[4];
+};
+
struct brw_sampler_state
{
@@ -1169,7 +1185,12 @@ struct brw_surface_state
GLuint cube_neg_y:1;
GLuint cube_pos_x:1;
GLuint cube_neg_x:1;
- GLuint pad:4;
+ GLuint pad:2;
+ /* Required on gen6 for surfaces accessed through render cache messages.
+ */
+ GLuint render_cache_read_write:1;
+ /* Ironlake and newer: instead of replicating one of the texels */
+ GLuint cube_corner_average:1;
GLuint mipmap_layout_mode:1;
GLuint vert_line_stride_ofs:1;
GLuint vert_line_stride:1;
@@ -1539,6 +1560,21 @@ struct brw_instruction
GLuint pad0:12;
} if_else;
+ struct
+ {
+ /* Signed jump distance to the ip to jump to if all channels
+ * are disabled after the break or continue. It should point
+ * to the end of the innermost control flow block, as that's
+ * where some channel could get re-enabled.
+ */
+ int jip:16;
+
+ /* Signed jump distance to the location to resume execution
+ * of this channel if it's enabled for the break or continue.
+ */
+ int uip:16;
+ } break_cont;
+
struct {
GLuint function:4;
GLuint int_type:1;
@@ -1636,6 +1672,18 @@ struct brw_instruction
struct {
GLuint binding_table_index:8;
+ GLuint msg_control:3;
+ GLuint msg_type:3;
+ GLuint target_cache:2;
+ GLuint response_length:4;
+ GLuint msg_length:4;
+ GLuint msg_target:4;
+ GLuint pad1:3;
+ GLuint end_of_thread:1;
+ } dp_read_g4x;
+
+ struct {
+ GLuint binding_table_index:8;
GLuint msg_control:3;
GLuint msg_type:3;
GLuint target_cache:2;
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 4a41c7a517..6ae75d22c1 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -99,8 +99,8 @@ static void do_vs_prog( struct brw_context *brw,
(void) ctx;
aux_size = sizeof(c.prog_data);
- if (c.vp->use_const_buffer)
- aux_size += c.vp->program.Base.Parameters->NumParameters;
+ /* constant_map */
+ aux_size += c.vp->program.Base.Parameters->NumParameters;
drm_intel_bo_unreference(brw->vs.prog_bo);
brw->vs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_VS_PROG,
@@ -130,6 +130,7 @@ static void brw_upload_vs_prog(struct brw_context *brw)
key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
ctx->Polygon.BackMode != GL_FILL);
+ key.two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
/* _NEW_POINT */
if (ctx->Point.PointSprite) {
@@ -157,7 +158,7 @@ static void brw_upload_vs_prog(struct brw_context *brw)
*/
const struct brw_tracked_state brw_vs_prog = {
.dirty = {
- .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT,
+ .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT | _NEW_LIGHT,
.brw = BRW_NEW_VERTEX_PROGRAM,
.cache = 0
},
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 9338a6b7db..0b88cc1ec7 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -44,6 +44,7 @@ struct brw_vs_prog_key {
GLuint nr_userclip:4;
GLuint copy_edgeflag:1;
GLuint point_coord_replace:8;
+ GLuint two_side_color: 1;
};
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index a13c3cae0b..0411ce0b36 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -140,9 +140,13 @@ clear_current_const(struct brw_vs_compile *c)
static void brw_vs_alloc_regs( struct brw_vs_compile *c )
{
struct intel_context *intel = &c->func.brw->intel;
- GLuint i, reg = 0, mrf;
+ GLuint i, reg = 0, mrf, j;
int attributes_in_vue;
int first_reladdr_output;
+ int max_constant;
+ int constant = 0;
+ int vert_result_reoder[VERT_RESULT_MAX];
+ int bfc = 0;
/* Determine whether to use a real constant buffer or use a block
* of GRF registers for constants. The later is faster but only
@@ -181,62 +185,81 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
}
- /* Vertex program parameters from curbe:
+ /* Assign some (probably all) of the vertex program constants to
+ * the push constant buffer/CURBE.
+ *
+ * There's an obvious limit to the numer of push constants equal to
+ * the number of register available, and that number is smaller
+ * than the minimum maximum number of vertex program parameters, so
+ * support for pull constants is required if we overflow.
+ * Additionally, on gen6 the number of push constants is even
+ * lower.
+ *
+ * When there's relative addressing, we don't know what range of
+ * Mesa IR registers can be accessed. And generally, when relative
+ * addressing is used we also have too many constants to load them
+ * all as push constants. So, we'll just support relative
+ * addressing out of the pull constant buffers, and try to load as
+ * many statically-accessed constants into the push constant buffer
+ * as we can.
*/
- if (c->vp->use_const_buffer) {
- int max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries;
- int constant = 0;
-
- /* We've got more constants than we can load with the push
- * mechanism. This is often correlated with reladdr loads where
- * we should probably be using a pull mechanism anyway to avoid
- * excessive reading. However, the pull mechanism is slow in
- * general. So, we try to allocate as many non-reladdr-loaded
- * constants through the push buffer as we can before giving up.
- */
- memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters);
- for (i = 0;
- i < c->vp->program.Base.NumInstructions && constant < max_constant;
- i++) {
- struct prog_instruction *inst = &c->vp->program.Base.Instructions[i];
- int arg;
-
- for (arg = 0; arg < 3 && constant < max_constant; arg++) {
- if ((inst->SrcReg[arg].File != PROGRAM_STATE_VAR &&
- inst->SrcReg[arg].File != PROGRAM_CONSTANT &&
- inst->SrcReg[arg].File != PROGRAM_UNIFORM &&
- inst->SrcReg[arg].File != PROGRAM_ENV_PARAM &&
- inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) ||
- inst->SrcReg[arg].RelAddr)
- continue;
-
- if (c->constant_map[inst->SrcReg[arg].Index] == -1) {
- c->constant_map[inst->SrcReg[arg].Index] = constant++;
- }
+ if (intel->gen >= 6) {
+ /* We can only load 32 regs of push constants. */
+ max_constant = 32 * 2 - c->key.nr_userclip;
+ } else {
+ max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries;
+ }
+
+ /* constant_map maps from ParameterValues[] index to index in the
+ * push constant buffer, or -1 if it's only in the pull constant
+ * buffer.
+ */
+ memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters);
+ for (i = 0;
+ i < c->vp->program.Base.NumInstructions && constant < max_constant;
+ i++) {
+ struct prog_instruction *inst = &c->vp->program.Base.Instructions[i];
+ int arg;
+
+ for (arg = 0; arg < 3 && constant < max_constant; arg++) {
+ if (inst->SrcReg[arg].File != PROGRAM_STATE_VAR &&
+ inst->SrcReg[arg].File != PROGRAM_CONSTANT &&
+ inst->SrcReg[arg].File != PROGRAM_UNIFORM &&
+ inst->SrcReg[arg].File != PROGRAM_ENV_PARAM &&
+ inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) {
+ continue;
+ }
+
+ if (inst->SrcReg[arg].RelAddr) {
+ c->vp->use_const_buffer = GL_TRUE;
+ continue;
}
- }
- for (i = 0; i < constant; i++) {
- c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2,
- (i%2) * 4),
- 0, 4, 1);
+ if (c->constant_map[inst->SrcReg[arg].Index] == -1) {
+ c->constant_map[inst->SrcReg[arg].Index] = constant++;
+ }
}
- reg += (constant + 1) / 2;
- c->prog_data.curb_read_length = reg - 1;
- /* XXX 0 causes a bug elsewhere... */
- c->prog_data.nr_params = MAX2(constant * 4, 4);
}
- else {
- /* use a section of the GRF for constants */
- GLuint nr_params = c->vp->program.Base.Parameters->NumParameters;
- for (i = 0; i < nr_params; i++) {
- c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
- }
- reg += (nr_params + 1) / 2;
- c->prog_data.curb_read_length = reg - 1;
- c->prog_data.nr_params = nr_params * 4;
+ /* If we ran out of push constant space, then we'll also upload all
+ * constants through the pull constant buffer so that they can be
+ * accessed no matter what. For relative addressing (the common
+ * case) we need them all in place anyway.
+ */
+ if (constant == max_constant)
+ c->vp->use_const_buffer = GL_TRUE;
+
+ for (i = 0; i < constant; i++) {
+ c->regs[PROGRAM_STATE_VAR][i] = stride(brw_vec4_grf(reg + i / 2,
+ (i % 2) * 4),
+ 0, 4, 1);
}
+ reg += (constant + 1) / 2;
+ c->prog_data.curb_read_length = reg - 1;
+ c->prog_data.nr_params = constant * 4;
+ /* XXX 0 causes a bug elsewhere... */
+ if (intel->gen < 6 && c->prog_data.nr_params == 0)
+ c->prog_data.nr_params = 4;
/* Allocate input regs:
*/
@@ -270,7 +293,36 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
mrf = 4;
first_reladdr_output = get_first_reladdr_output(&c->vp->program);
- for (i = 0; i < VERT_RESULT_MAX; i++) {
+
+ for (i = 0; i < VERT_RESULT_MAX; i++)
+ vert_result_reoder[i] = i;
+
+ /* adjust attribute order in VUE for BFC0/BFC1 on Gen6+ */
+ if (intel->gen >= 6 && c->key.two_side_color) {
+ if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL1)) &&
+ (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC1))) {
+ assert(c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0));
+ assert(c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0));
+ bfc = 2;
+ } else if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)) &&
+ (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0)))
+ bfc = 1;
+
+ if (bfc) {
+ for (i = 0; i < bfc; i++) {
+ vert_result_reoder[VERT_RESULT_COL0 + i * 2 + 0] = VERT_RESULT_COL0 + i;
+ vert_result_reoder[VERT_RESULT_COL0 + i * 2 + 1] = VERT_RESULT_BFC0 + i;
+ }
+
+ for (i = VERT_RESULT_COL0 + bfc * 2; i < VERT_RESULT_BFC0 + bfc; i++) {
+ vert_result_reoder[i] = i - bfc;
+ }
+ }
+ }
+
+ for (j = 0; j < VERT_RESULT_MAX; j++) {
+ i = vert_result_reoder[j];
+
if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) {
c->nr_outputs++;
assert(i < Elements(c->regs[PROGRAM_OUTPUT]));
@@ -281,7 +333,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
else if (i == VERT_RESULT_PSIZ) {
c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
reg++;
- mrf++; /* just a placeholder? XXX fix later stages & remove this */
}
else {
/* Two restrictions on our compute-to-MRF here. The
@@ -607,6 +658,22 @@ static void emit_min( struct brw_compile *p,
}
}
+static void emit_arl(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ struct intel_context *intel = &p->brw->intel;
+
+ if (intel->gen >= 6) {
+ struct brw_reg dst_f = retype(dst, BRW_REGISTER_TYPE_F);
+
+ brw_RNDD(p, dst_f, src);
+ brw_MOV(p, dst, dst_f);
+ } else {
+ brw_RNDD(p, dst, src);
+ }
+}
+
static void emit_math1_gen4(struct brw_vs_compile *c,
GLuint function,
struct brw_reg dst,
@@ -698,7 +765,7 @@ emit_math1(struct brw_vs_compile *c,
emit_math1_gen4(c, function, dst, arg0, precision);
}
-static void emit_math2( struct brw_vs_compile *c,
+static void emit_math2_gen4( struct brw_vs_compile *c,
GLuint function,
struct brw_reg dst,
struct brw_reg arg0,
@@ -706,14 +773,11 @@ static void emit_math2( struct brw_vs_compile *c,
GLuint precision)
{
struct brw_compile *p = &c->func;
- struct intel_context *intel = &p->brw->intel;
struct brw_reg tmp = dst;
GLboolean need_tmp = GL_FALSE;
- if (dst.file != BRW_GENERAL_REGISTER_FILE)
- need_tmp = GL_TRUE;
-
- if (intel->gen < 6 && dst.dw1.bits.writemask != 0xf)
+ if (dst.file != BRW_GENERAL_REGISTER_FILE ||
+ dst.dw1.bits.writemask != 0xf)
need_tmp = GL_TRUE;
if (need_tmp)
@@ -736,6 +800,53 @@ static void emit_math2( struct brw_vs_compile *c,
}
}
+static void emit_math2_gen6( struct brw_vs_compile *c,
+ GLuint function,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ GLuint precision)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp_src0, tmp_src1, tmp_dst;
+
+ tmp_src0 = get_tmp(c);
+ tmp_src1 = get_tmp(c);
+ tmp_dst = get_tmp(c);
+
+ brw_MOV(p, tmp_src0, arg0);
+ brw_MOV(p, tmp_src1, arg1);
+
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_math2(p,
+ tmp_dst,
+ function,
+ tmp_src0,
+ tmp_src1);
+ brw_set_access_mode(p, BRW_ALIGN_16);
+
+ brw_MOV(p, dst, tmp_dst);
+
+ release_tmp(c, tmp_src0);
+ release_tmp(c, tmp_src1);
+ release_tmp(c, tmp_dst);
+}
+
+static void emit_math2( struct brw_vs_compile *c,
+ GLuint function,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ GLuint precision)
+{
+ struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
+
+ if (intel->gen >= 6)
+ emit_math2_gen6(c, function, dst, arg0, arg1, precision);
+ else
+ emit_math2_gen4(c, function, dst, arg0, arg1, precision);
+}
static void emit_exp_noalias( struct brw_vs_compile *c,
struct brw_reg dst,
@@ -1008,8 +1119,6 @@ get_constant(struct brw_vs_compile *c,
assert(argIndex < 3);
- assert(c->func.brw->intel.gen < 6); /* FINISHME */
-
if (c->current_const[argIndex].index != src->Index) {
/* Keep track of the last constant loaded in this slot, for reuse. */
c->current_const[argIndex].index = src->Index;
@@ -1027,7 +1136,7 @@ get_constant(struct brw_vs_compile *c,
}
/* replicate lower four floats into upper half (to get XYZWXYZW) */
- const_reg = stride(const_reg, 0, 4, 0);
+ const_reg = stride(const_reg, 0, 4, 1);
const_reg.subnr = 0;
return const_reg;
@@ -1040,14 +1149,14 @@ get_reladdr_constant(struct brw_vs_compile *c,
{
const struct prog_src_register *src = &inst->SrcReg[argIndex];
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
struct brw_reg const_reg = c->current_const[argIndex].reg;
- struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
- struct brw_reg byte_addr_reg = retype(get_tmp(c), BRW_REGISTER_TYPE_D);
+ struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
+ uint32_t offset;
assert(argIndex < 3);
- assert(c->func.brw->intel.gen < 6); /* FINISHME */
-
/* Can't reuse a reladdr constant load. */
c->current_const[argIndex].index = -1;
@@ -1056,15 +1165,21 @@ get_reladdr_constant(struct brw_vs_compile *c,
src->Index, argIndex, c->current_const[argIndex].reg.nr);
#endif
- brw_MUL(p, byte_addr_reg, addrReg, brw_imm_ud(16));
+ if (intel->gen >= 6) {
+ offset = src->Index;
+ } else {
+ struct brw_reg byte_addr_reg = retype(get_tmp(c), BRW_REGISTER_TYPE_D);
+ brw_MUL(p, byte_addr_reg, addr_reg, brw_imm_d(16));
+ addr_reg = byte_addr_reg;
+ offset = 16 * src->Index;
+ }
/* fetch the first vec4 */
brw_dp_READ_4_vs_relative(p,
- const_reg, /* writeback dest */
- byte_addr_reg, /* address register */
- 16 * src->Index, /* byte offset */
- SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
- );
+ const_reg,
+ addr_reg,
+ offset,
+ SURF_INDEX_VERT_CONST_BUFFER);
return const_reg;
}
@@ -1259,22 +1374,18 @@ get_src_reg( struct brw_vs_compile *c,
case PROGRAM_UNIFORM:
case PROGRAM_ENV_PARAM:
case PROGRAM_LOCAL_PARAM:
- if (c->vp->use_const_buffer) {
- if (!relAddr && c->constant_map[index] != -1) {
- assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0);
- return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]];
- } else if (relAddr)
+ if (!relAddr && c->constant_map[index] != -1) {
+ /* Take from the push constant buffer if possible. */
+ assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0);
+ return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]];
+ } else {
+ /* Must be in the pull constant buffer then .*/
+ assert(c->vp->use_const_buffer);
+ if (relAddr)
return get_reladdr_constant(c, inst, argIndex);
else
return get_constant(c, inst, argIndex);
}
- else if (relAddr) {
- return deref(c, c->regs[PROGRAM_STATE_VAR][0], index, 16);
- }
- else {
- assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
- return c->regs[PROGRAM_STATE_VAR][index];
- }
case PROGRAM_ADDRESS:
assert(index == 0);
return c->regs[file][index];
@@ -1315,11 +1426,10 @@ static struct brw_reg get_arg( struct brw_vs_compile *c,
GET_SWZ(src->Swizzle, 1),
GET_SWZ(src->Swizzle, 2),
GET_SWZ(src->Swizzle, 3));
- }
- /* Note this is ok for non-swizzle instructions:
- */
- reg.negate = src->Negate ? 1 : 0;
+ /* Note this is ok for non-swizzle ARB_vp instructions */
+ reg.negate = src->Negate ? 1 : 0;
+ }
return reg;
}
@@ -1603,6 +1713,8 @@ static void emit_vertex_write( struct brw_vs_compile *c)
break;
if (!(c->prog_data.outputs_written & BITFIELD64_BIT(i)))
continue;
+ if (i == VERT_RESULT_PSIZ)
+ continue;
if (i >= VERT_RESULT_TEX0 &&
c->regs[PROGRAM_OUTPUT][i].file == BRW_GENERAL_REGISTER_FILE) {
@@ -1830,6 +1942,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
switch (inst->Opcode) {
case OPCODE_ABS:
+ args[0].negate = false;
brw_MOV(p, dst, brw_abs(args[0]));
break;
case OPCODE_ADD:
@@ -1866,7 +1979,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
break;
case OPCODE_ARL:
- brw_RNDD(p, dst, args[0]);
+ emit_arl(p, dst, args[0]);
break;
case OPCODE_FLR:
brw_RNDD(p, dst, args[0]);
@@ -1913,7 +2026,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
break;
case OPCODE_RSQ:
- emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
+ emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, brw_abs(args[0]), BRW_MATH_PRECISION_FULL);
break;
case OPCODE_SEQ:
@@ -1987,35 +2100,42 @@ void brw_vs_emit(struct brw_vs_compile *c )
break;
case OPCODE_CONT:
brw_set_predicate_control(p, get_predicate(inst));
- brw_CONT(p, if_depth_in_loop[loop_depth]);
+ if (intel->gen >= 6) {
+ brw_CONT_gen6(p, loop_inst[loop_depth - 1]);
+ } else {
+ brw_CONT(p, if_depth_in_loop[loop_depth]);
+ }
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
- case OPCODE_ENDLOOP:
- {
- clear_current_const(c);
- struct brw_instruction *inst0, *inst1;
- GLuint br = 1;
-
- loop_depth--;
-
- if (intel->gen == 5)
- br = 2;
-
- inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
- /* patch all the BREAK/CONT instructions from last BEGINLOOP */
- while (inst0 > loop_inst[loop_depth]) {
- inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+
+ case OPCODE_ENDLOOP: {
+ clear_current_const(c);
+ struct brw_instruction *inst0, *inst1;
+ GLuint br = 1;
+
+ loop_depth--;
+
+ if (intel->gen == 5)
+ br = 2;
+
+ inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
+
+ if (intel->gen < 6) {
+ /* patch all the BREAK/CONT instructions from last BEGINLOOP */
+ while (inst0 > loop_inst[loop_depth]) {
+ inst0--;
+ if (inst0->header.opcode == BRW_OPCODE_BREAK &&
inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
- }
- else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
- inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
- }
- }
- }
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+ } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+ inst0->bits3.if_else.jump_count == 0) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ }
+ }
+ }
+ }
break;
+
case OPCODE_BRA:
brw_set_predicate_control(p, get_predicate(inst));
brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
@@ -2106,6 +2226,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
}
brw_resolve_cals(p);
+ brw_set_uip_jip(p);
brw_optimize(p);
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
index eabac51160..b0b05445eb 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -82,6 +82,15 @@ prepare_vs_constants(struct brw_context *brw)
params->ParameterValues[i],
4 * sizeof(float));
}
+
+ if (0) {
+ for (i = 0; i < params->NumParameters; i++) {
+ float *row = (float *)brw->vs.const_bo->virtual + i * 4;
+ printf("vs const surface %3d: %4.3f %4.3f %4.3f %4.3f\n",
+ i, row[0], row[1], row[2], row[3]);
+ }
+ }
+
drm_intel_gem_bo_unmap_gtt(brw->vs.const_bo);
brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF;
}
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index 3d7a98c981..100a21b59d 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -203,4 +203,5 @@ void brwInitVtbl( struct brw_context *brw )
brw->intel.vtbl.destroy = brw_destroy_context;
brw->intel.vtbl.set_draw_region = brw_set_draw_region;
brw->intel.vtbl.debug_batch = brw_debug_batch;
+ brw->intel.vtbl.render_target_supported = brw_render_target_supported;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index ccdc18e0b8..656501b4f7 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -119,6 +119,62 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
brw_wm_emit(c);
}
+static void
+brw_wm_payload_setup(struct brw_context *brw,
+ struct brw_wm_compile *c)
+{
+ struct intel_context *intel = &brw->intel;
+ bool uses_depth = (c->fp->program.Base.InputsRead &
+ (1 << FRAG_ATTRIB_WPOS)) != 0;
+
+ if (intel->gen >= 6) {
+ /* R0-1: masks, pixel X/Y coordinates. */
+ c->nr_payload_regs = 2;
+ /* R2: only for 32-pixel dispatch.*/
+ /* R3-4: perspective pixel location barycentric */
+ c->nr_payload_regs += 2;
+ /* R5-6: perspective pixel location bary for dispatch width != 8 */
+ if (c->dispatch_width == 16) {
+ c->nr_payload_regs += 2;
+ }
+ /* R7-10: perspective centroid barycentric */
+ /* R11-14: perspective sample barycentric */
+ /* R15-18: linear pixel location barycentric */
+ /* R19-22: linear centroid barycentric */
+ /* R23-26: linear sample barycentric */
+
+ /* R27: interpolated depth if uses source depth */
+ if (uses_depth) {
+ c->source_depth_reg = c->nr_payload_regs;
+ c->nr_payload_regs++;
+ if (c->dispatch_width == 16) {
+ /* R28: interpolated depth if not 8-wide. */
+ c->nr_payload_regs++;
+ }
+ }
+ /* R29: interpolated W set if GEN6_WM_USES_SOURCE_W.
+ */
+ if (uses_depth) {
+ c->source_w_reg = c->nr_payload_regs;
+ c->nr_payload_regs++;
+ if (c->dispatch_width == 16) {
+ /* R30: interpolated W if not 8-wide. */
+ c->nr_payload_regs++;
+ }
+ }
+ /* R31: MSAA position offsets. */
+ /* R32-: bary for 32-pixel. */
+ /* R58-59: interp W for 32-pixel. */
+
+ if (c->fp->program.Base.OutputsWritten &
+ BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
+ c->source_depth_to_render_target = GL_TRUE;
+ c->computes_depth = GL_TRUE;
+ }
+ } else {
+ brw_wm_lookup_iz(intel, c);
+ }
+}
/**
* All Mesa program -> GPU code generation goes through this function.
@@ -167,23 +223,18 @@ static void do_wm_prog( struct brw_context *brw,
brw_init_compile(brw, &c->func);
- /* temporary sanity check assertion */
- ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program));
+ brw_wm_payload_setup(brw, c);
if (!brw_wm_fs_emit(brw, c)) {
/*
* Shader which use GLSL features such as flow control are handled
* differently from "simple" shaders.
*/
- if (fp->isGLSL) {
- c->dispatch_width = 8;
- brw_wm_glsl_emit(brw, c);
- }
- else {
- c->dispatch_width = 16;
- brw_wm_non_glsl_emit(brw, c);
- }
+ c->dispatch_width = 16;
+ brw_wm_payload_setup(brw, c);
+ brw_wm_non_glsl_emit(brw, c);
}
+ c->prog_data.dispatch_width = c->dispatch_width;
/* Scratch space is used for register spilling */
if (c->last_scratch) {
@@ -220,12 +271,10 @@ static void do_wm_prog( struct brw_context *brw,
static void brw_wm_populate_key( struct brw_context *brw,
struct brw_wm_prog_key *key )
{
- struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &brw->intel.ctx;
/* BRW_NEW_FRAGMENT_PROGRAM */
const struct brw_fragment_program *fp =
(struct brw_fragment_program *)brw->fragment_program;
- GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
GLuint lookup = 0;
GLuint line_aa;
GLuint i;
@@ -285,57 +334,9 @@ static void brw_wm_populate_key( struct brw_context *brw,
}
}
- if (intel->gen >= 6) {
- /* R0-1: masks, pixel X/Y coordinates. */
- key->nr_payload_regs = 2;
- /* R2: only for 32-pixel dispatch.*/
- /* R3-4: perspective pixel location barycentric */
- key->nr_payload_regs += 2;
- /* R5-6: perspective pixel location bary for dispatch width != 8 */
- if (!fp->isGLSL) { /* dispatch_width != 8 */
- key->nr_payload_regs += 2;
- }
- /* R7-10: perspective centroid barycentric */
- /* R11-14: perspective sample barycentric */
- /* R15-18: linear pixel location barycentric */
- /* R19-22: linear centroid barycentric */
- /* R23-26: linear sample barycentric */
-
- /* R27: interpolated depth if uses source depth */
- if (uses_depth) {
- key->source_depth_reg = key->nr_payload_regs;
- key->nr_payload_regs++;
- if (!fp->isGLSL) { /* dispatch_width != 8 */
- /* R28: interpolated depth if not 8-wide. */
- key->nr_payload_regs++;
- }
- }
- /* R29: interpolated W set if GEN6_WM_USES_SOURCE_W.
- */
- if (uses_depth) {
- key->source_w_reg = key->nr_payload_regs;
- key->nr_payload_regs++;
- if (!fp->isGLSL) { /* dispatch_width != 8 */
- /* R30: interpolated W if not 8-wide. */
- key->nr_payload_regs++;
- }
- }
- /* R31: MSAA position offsets. */
- /* R32-: bary for 32-pixel. */
- /* R58-59: interp W for 32-pixel. */
-
- if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
- key->source_depth_to_render_target = GL_TRUE;
- key->computes_depth = GL_TRUE;
- }
-
- } else {
- brw_wm_lookup_iz(intel,
- line_aa,
- lookup,
- uses_depth,
- key);
- }
+ key->iz_lookup = lookup;
+ key->line_aa = line_aa;
+ key->stats_wm = brw->intel.stats_wm;
/* BRW_NEW_WM_INPUT_DIMENSIONS */
key->proj_attrib_mask = brw->wm.input_size_masks[4-1];
@@ -377,6 +378,10 @@ static void brw_wm_populate_key( struct brw_context *brw,
swizzles[2] = SWIZZLE_ZERO;
} else if (t->DepthMode == GL_LUMINANCE) {
swizzles[3] = SWIZZLE_ONE;
+ } else if (t->DepthMode == GL_RED) {
+ swizzles[1] = SWIZZLE_ZERO;
+ swizzles[2] = SWIZZLE_ZERO;
+ swizzles[3] = SWIZZLE_ZERO;
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 2ca685784f..d9cae75ab5 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -59,16 +59,9 @@
#define AA_ALWAYS 2
struct brw_wm_prog_key {
- GLuint source_depth_reg:3;
- GLuint source_w_reg:3;
- GLuint aa_dest_stencil_reg:3;
- GLuint dest_depth_reg:3;
- GLuint nr_payload_regs:4;
- GLuint computes_depth:1; /* could be derived from program string */
- GLuint source_depth_to_render_target:1;
+ GLuint stats_wm:1;
GLuint flat_shade:1;
GLuint linear_color:1; /**< linear interpolation vs perspective interp */
- GLuint runtime_check_aads_emit:1;
GLuint nr_color_regions:5;
GLuint render_to_fbo:1;
@@ -81,6 +74,8 @@ struct brw_wm_prog_key {
GLushort drawable_height;
GLbitfield64 vp_outputs_written;
+ GLuint iz_lookup;
+ GLuint line_aa;
GLuint program_string_id:32;
};
@@ -204,6 +199,15 @@ struct brw_wm_compile {
PASS2_DONE
} state;
+ GLuint source_depth_reg:3;
+ GLuint source_w_reg:3;
+ GLuint aa_dest_stencil_reg:3;
+ GLuint dest_depth_reg:3;
+ GLuint nr_payload_regs:4;
+ GLuint computes_depth:1; /* could be derived from program string */
+ GLuint source_depth_to_render_target:1;
+ GLuint runtime_check_aads_emit:1;
+
/* Initial pass - translate fp instructions to fp instructions,
* simplifying and adding instructions for interpolation and
* framebuffer writes.
@@ -306,14 +310,9 @@ void brw_wm_print_insn( struct brw_wm_compile *c,
void brw_wm_print_program( struct brw_wm_compile *c,
const char *stage );
-void brw_wm_lookup_iz( struct intel_context *intel,
- GLuint line_aa,
- GLuint lookup,
- GLboolean ps_uses_depth,
- struct brw_wm_prog_key *key );
+void brw_wm_lookup_iz(struct intel_context *intel,
+ struct brw_wm_compile *c);
-GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
-void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c);
GLboolean brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c);
/* brw_wm_emit.c */
@@ -381,7 +380,6 @@ void emit_fb_write(struct brw_wm_compile *c,
void emit_frontfacing(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask);
-void emit_kil_nv(struct brw_wm_compile *c);
void emit_linterp(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
@@ -476,5 +474,6 @@ struct gl_shader *brw_new_shader(struct gl_context *ctx, GLuint name, GLuint typ
struct gl_shader_program *brw_new_shader_program(struct gl_context *ctx, GLuint name);
bool brw_color_buffer_write_enabled(struct brw_context *brw);
+bool brw_render_target_supported(gl_format format);
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index 96fecc97ee..2336e27c1e 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -219,43 +219,45 @@ void emit_wpos_xy(struct brw_wm_compile *c,
const struct brw_reg *arg0)
{
struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_reg delta_x = retype(arg0[0], BRW_REGISTER_TYPE_W);
+ struct brw_reg delta_y = retype(arg0[1], BRW_REGISTER_TYPE_W);
if (mask & WRITEMASK_X) {
+ if (intel->gen >= 6) {
+ struct brw_reg delta_x_f = retype(delta_x, BRW_REGISTER_TYPE_F);
+ brw_MOV(p, delta_x_f, delta_x);
+ delta_x = delta_x_f;
+ }
+
if (c->fp->program.PixelCenterInteger) {
/* X' = X */
- brw_MOV(p,
- dst[0],
- retype(arg0[0], BRW_REGISTER_TYPE_W));
+ brw_MOV(p, dst[0], delta_x);
} else {
/* X' = X + 0.5 */
- brw_ADD(p,
- dst[0],
- retype(arg0[0], BRW_REGISTER_TYPE_W),
- brw_imm_f(0.5));
+ brw_ADD(p, dst[0], delta_x, brw_imm_f(0.5));
}
}
if (mask & WRITEMASK_Y) {
+ if (intel->gen >= 6) {
+ struct brw_reg delta_y_f = retype(delta_y, BRW_REGISTER_TYPE_F);
+ brw_MOV(p, delta_y_f, delta_y);
+ delta_y = delta_y_f;
+ }
+
if (c->fp->program.OriginUpperLeft) {
if (c->fp->program.PixelCenterInteger) {
/* Y' = Y */
- brw_MOV(p,
- dst[1],
- retype(arg0[1], BRW_REGISTER_TYPE_W));
+ brw_MOV(p, dst[1], delta_y);
} else {
- /* Y' = Y + 0.5 */
- brw_ADD(p,
- dst[1],
- retype(arg0[1], BRW_REGISTER_TYPE_W),
- brw_imm_f(0.5));
+ brw_ADD(p, dst[1], delta_y, brw_imm_f(0.5));
}
} else {
float center_offset = c->fp->program.PixelCenterInteger ? 0.0 : 0.5;
/* Y' = (height - 1) - Y + center */
- brw_ADD(p,
- dst[1],
- negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
+ brw_ADD(p, dst[1], negate(delta_y),
brw_imm_f(c->key.drawable_height - 1 + center_offset));
}
}
@@ -896,10 +898,14 @@ void emit_math1(struct brw_wm_compile *c,
BRW_MATH_SATURATE_NONE);
struct brw_reg src;
- if (intel->gen >= 6 && (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 ||
- arg0[0].file != BRW_GENERAL_REGISTER_FILE)) {
+ if (intel->gen >= 6 && ((arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 ||
+ arg0[0].file != BRW_GENERAL_REGISTER_FILE) ||
+ arg0[0].negate || arg0[0].abs)) {
/* Gen6 math requires that source and dst horizontal stride be 1,
* and that the argument be in the GRF.
+ *
+ * The hardware ignores source modifiers (negate and abs) on math
+ * instructions, so we also move to a temp to set those up.
*/
src = dst[dst_chan];
brw_MOV(p, src, arg0[0]);
@@ -967,34 +973,23 @@ void emit_math2(struct brw_wm_compile *c,
struct brw_reg temp_dst = dst[dst_chan];
if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
- if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
- /* Both scalar arguments. Do scalar calc. */
- src0.hstride = BRW_HORIZONTAL_STRIDE_1;
- src1.hstride = BRW_HORIZONTAL_STRIDE_1;
- temp_dst.hstride = BRW_HORIZONTAL_STRIDE_1;
- temp_dst.width = BRW_WIDTH_1;
-
- if (arg0[0].subnr != 0) {
- brw_MOV(p, temp_dst, src0);
- src0 = temp_dst;
-
- /* Ouch. We've used the temp as a dst, and we still
- * need a temp to store arg1 in, because src and dst
- * offsets have to be equal. Leaving this up to
- * glsl2-965 to handle correctly.
- */
- assert(arg1[0].subnr == 0);
- } else if (arg1[0].subnr != 0) {
- brw_MOV(p, temp_dst, src1);
- src1 = temp_dst;
- }
- } else {
- brw_MOV(p, temp_dst, src0);
- src0 = temp_dst;
- }
- } else if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
- brw_MOV(p, temp_dst, src1);
- src1 = temp_dst;
+ brw_MOV(p, temp_dst, src0);
+ src0 = temp_dst;
+ }
+
+ if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
+ /* This is a heinous hack to get a temporary register for use
+ * in case both arg0 and arg1 are constants. Why you're
+ * doing exponentiation on constant values in the shader, we
+ * don't know.
+ *
+ * max_wm_grf is almost surely less than the maximum GRF, and
+ * gen6 doesn't care about the number of GRFs used in a
+ * shader like pre-gen6 did.
+ */
+ struct brw_reg temp = brw_vec8_grf(c->max_wm_grf, 0);
+ brw_MOV(p, temp, src1);
+ src1 = temp;
}
brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
@@ -1012,14 +1007,6 @@ void emit_math2(struct brw_wm_compile *c,
sechalf(src0),
sechalf(src1));
}
-
- /* Splat a scalar result into all the channels. */
- if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 &&
- arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
- temp_dst.hstride = BRW_HORIZONTAL_STRIDE_0;
- temp_dst.vstride = BRW_VERTICAL_STRIDE_0;
- brw_MOV(p, dst[dst_chan], temp_dst);
- }
} else {
GLuint saturate = ((mask & SATURATE) ?
BRW_MATH_SATURATE_SATURATE :
@@ -1301,9 +1288,15 @@ static void emit_kil( struct brw_wm_compile *c,
struct brw_reg *arg0)
{
struct brw_compile *p = &c->func;
- struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_reg pixelmask;
GLuint i, j;
+ if (intel->gen >= 6)
+ pixelmask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+ else
+ pixelmask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+
for (i = 0; i < 4; i++) {
/* Check if we've already done the comparison for this reg
* -- common when someone does KIL TEMP.wwww.
@@ -1319,26 +1312,11 @@ static void emit_kil( struct brw_wm_compile *c,
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
brw_set_predicate_control_flag_value(p, 0xff);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_AND(p, r0uw, brw_flag_reg(), r0uw);
+ brw_AND(p, pixelmask, brw_flag_reg(), pixelmask);
brw_pop_insn_state(p);
}
}
-/* KIL_NV kills the pixels that are currently executing, not based on a test
- * of the arguments.
- */
-void emit_kil_nv( struct brw_wm_compile *c )
-{
- struct brw_compile *p = &c->func;
- struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
-
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
- brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
- brw_pop_insn_state(p);
-}
-
static void fire_fb_write( struct brw_wm_compile *c,
GLuint base_reg,
GLuint nr,
@@ -1355,9 +1333,11 @@ static void fire_fb_write( struct brw_wm_compile *c,
dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
/* Pass through control information:
+ *
+ * Gen6 has done m1 mov in emit_fb_write() for current SIMD16 case.
*/
/* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
- if (intel->gen < 6) /* gen6, use headerless for fb write */
+ if (intel->gen < 6)
{
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
@@ -1378,7 +1358,8 @@ static void fire_fb_write( struct brw_wm_compile *c,
target,
nr,
0,
- eot);
+ eot,
+ GL_TRUE);
}
@@ -1387,8 +1368,8 @@ static void emit_aa( struct brw_wm_compile *c,
GLuint reg )
{
struct brw_compile *p = &c->func;
- GLuint comp = c->key.aa_dest_stencil_reg / 2;
- GLuint off = c->key.aa_dest_stencil_reg % 2;
+ GLuint comp = c->aa_dest_stencil_reg / 2;
+ GLuint off = c->aa_dest_stencil_reg % 2;
struct brw_reg aa = offset(arg1[comp], off);
brw_push_insn_state(p);
@@ -1416,11 +1397,10 @@ void emit_fb_write(struct brw_wm_compile *c,
struct intel_context *intel = &brw->intel;
GLuint nr = 2;
GLuint channel;
- int base_reg; /* For gen6 fb write with no header, starting from color payload directly!. */
/* Reserve a space for AA - may not be needed:
*/
- if (c->key.aa_dest_stencil_reg)
+ if (c->aa_dest_stencil_reg)
nr += 1;
/* I don't really understand how this achieves the color interleave
@@ -1428,11 +1408,6 @@ void emit_fb_write(struct brw_wm_compile *c,
*/
brw_push_insn_state(p);
- if (intel->gen >= 6)
- base_reg = nr;
- else
- base_reg = 0;
-
for (channel = 0; channel < 4; channel++) {
if (intel->gen >= 6) {
/* gen6 SIMD16 single source DP write looks like:
@@ -1493,9 +1468,9 @@ void emit_fb_write(struct brw_wm_compile *c,
brw_pop_insn_state(p);
- if (c->key.source_depth_to_render_target)
+ if (c->source_depth_to_render_target)
{
- if (c->key.computes_depth)
+ if (c->computes_depth)
brw_MOV(p, brw_message_reg(nr), arg2[2]);
else
brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
@@ -1503,10 +1478,10 @@ void emit_fb_write(struct brw_wm_compile *c,
nr += 2;
}
- if (c->key.dest_depth_reg)
+ if (c->dest_depth_reg)
{
- GLuint comp = c->key.dest_depth_reg / 2;
- GLuint off = c->key.dest_depth_reg % 2;
+ GLuint comp = c->dest_depth_reg / 2;
+ GLuint off = c->dest_depth_reg % 2;
if (off != 0) {
brw_push_insn_state(p);
@@ -1524,15 +1499,28 @@ void emit_fb_write(struct brw_wm_compile *c,
}
if (intel->gen >= 6) {
- /* Subtract off the message header, since we send headerless. */
- nr -= 2;
+ /* Load the message header. There's no implied move from src0
+ * to the base mrf on gen6.
+ */
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p, retype(brw_message_reg(0), BRW_REGISTER_TYPE_UD),
+ retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+ brw_pop_insn_state(p);
+
+ if (target != 0) {
+ brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+ 0,
+ 2), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(target));
+ }
}
- if (!c->key.runtime_check_aads_emit) {
- if (c->key.aa_dest_stencil_reg)
+ if (!c->runtime_check_aads_emit) {
+ if (c->aa_dest_stencil_reg)
emit_aa(c, arg1, 2);
- fire_fb_write(c, base_reg, nr, target, eot);
+ fire_fb_write(c, 0, nr, target, eot);
}
else {
struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
@@ -1897,10 +1885,6 @@ void brw_wm_emit( struct brw_wm_compile *c )
emit_kil(c, args[0]);
break;
- case OPCODE_KIL_NV:
- emit_kil_nv(c);
- break;
-
default:
printf("Unsupported opcode %i (%s) in fragment shader\n",
inst->opcode, inst->opcode < MAX_OPCODE ?
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index 2cae698880..4759b289a0 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -338,11 +338,13 @@ static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
{
- /* This is only called for producing 1/w in pre-gen6 interp. for
- * gen6, the interp opcodes don't use this argument.
+ /* This is called for producing 1/w in pre-gen6 interp. for gen6,
+ * the interp opcodes don't use this argument. But to keep the
+ * nr_args = 3 expectations of pinterp happy, just stuff delta_xy
+ * into the slot.
*/
if (c->func.brw->intel.gen >= 6)
- return src_undef();
+ return c->delta_xy;
if (src_is_undef(c->pixel_w)) {
struct prog_dst_register pixel_w = get_temp(c);
@@ -373,11 +375,7 @@ static void emit_interp( struct brw_wm_compile *c,
struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
struct prog_src_register deltas;
- if (c->func.brw->intel.gen < 6) {
- deltas = get_delta_xy(c);
- } else {
- deltas = src_undef();
- }
+ deltas = get_delta_xy(c);
/* Need to use PINTERP on attributes which have been
* multiplied by 1/W in the SF program, and LINTERP on those
@@ -1133,6 +1131,11 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
precalc_lit(c, inst);
break;
+ case OPCODE_RSQ:
+ out = emit_scalar_insn(c, inst);
+ out->SrcReg[0].Abs = GL_TRUE;
+ break;
+
case OPCODE_TEX:
precalc_tex(c, inst);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
deleted file mode 100644
index 7fe8ab1f33..0000000000
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ /dev/null
@@ -1,1035 +0,0 @@
-#include "main/macros.h"
-#include "program/prog_parameter.h"
-#include "program/prog_print.h"
-#include "program/prog_optimize.h"
-#include "brw_context.h"
-#include "brw_eu.h"
-#include "brw_wm.h"
-
-static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
- const struct prog_instruction *inst,
- GLuint component);
-
-/**
- * Determine if the given fragment program uses GLSL features such
- * as flow conditionals, loops, subroutines.
- * Some GLSL shaders may use these features, others might not.
- */
-GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
-{
- int i;
-
- if (unlikely(INTEL_DEBUG & DEBUG_GLSL_FORCE))
- return GL_TRUE;
-
- for (i = 0; i < fp->Base.NumInstructions; i++) {
- const struct prog_instruction *inst = &fp->Base.Instructions[i];
- switch (inst->Opcode) {
- case OPCODE_ARL:
- case OPCODE_IF:
- case OPCODE_ENDIF:
- case OPCODE_CAL:
- case OPCODE_BRK:
- case OPCODE_RET:
- case OPCODE_BGNLOOP:
- return GL_TRUE;
- default:
- break;
- }
- }
- return GL_FALSE;
-}
-
-
-
-static void
-reclaim_temps(struct brw_wm_compile *c);
-
-
-/** Mark GRF register as used. */
-static void
-prealloc_grf(struct brw_wm_compile *c, int r)
-{
- c->used_grf[r] = GL_TRUE;
-}
-
-
-/** Mark given GRF register as not in use. */
-static void
-release_grf(struct brw_wm_compile *c, int r)
-{
- /*assert(c->used_grf[r]);*/
- c->used_grf[r] = GL_FALSE;
- c->first_free_grf = MIN2(c->first_free_grf, r);
-}
-
-
-/** Return index of a free GRF, mark it as used. */
-static int
-alloc_grf(struct brw_wm_compile *c)
-{
- GLuint r;
- for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
- if (!c->used_grf[r]) {
- c->used_grf[r] = GL_TRUE;
- c->first_free_grf = r + 1; /* a guess */
- return r;
- }
- }
-
- /* no free temps, try to reclaim some */
- reclaim_temps(c);
- c->first_free_grf = 0;
-
- /* try alloc again */
- for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
- if (!c->used_grf[r]) {
- c->used_grf[r] = GL_TRUE;
- c->first_free_grf = r + 1; /* a guess */
- return r;
- }
- }
-
- for (r = 0; r < BRW_WM_MAX_GRF; r++) {
- assert(c->used_grf[r]);
- }
-
- /* really, no free GRF regs found */
- if (!c->out_of_regs) {
- /* print warning once per compilation */
- _mesa_warning(NULL, "i965: ran out of registers for fragment program");
- c->out_of_regs = GL_TRUE;
- }
-
- return -1;
-}
-
-
-/** Return number of GRF registers used */
-static int
-num_grf_used(const struct brw_wm_compile *c)
-{
- int r;
- for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--)
- if (c->used_grf[r])
- return r + 1;
- return 0;
-}
-
-
-
-/**
- * Record the mapping of a Mesa register to a hardware register.
- */
-static void set_reg(struct brw_wm_compile *c, int file, int index,
- int component, struct brw_reg reg)
-{
- c->wm_regs[file][index][component].reg = reg;
- c->wm_regs[file][index][component].inited = GL_TRUE;
-}
-
-static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
-{
- struct brw_reg reg;
-
- /* if we need to allocate another temp, grow the tmp_regs[] array */
- if (c->tmp_index == c->tmp_max) {
- int r = alloc_grf(c);
- if (r < 0) {
- /*printf("Out of temps in %s\n", __FUNCTION__);*/
- r = 50; /* XXX random register! */
- }
- c->tmp_regs[ c->tmp_max++ ] = r;
- }
-
- /* form the GRF register */
- reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0);
- /*printf("alloc_temp %d\n", reg.nr);*/
- assert(reg.nr < BRW_WM_MAX_GRF);
- return reg;
-
-}
-
-/**
- * Save current temp register info.
- * There must be a matching call to release_tmps().
- */
-static int mark_tmps(struct brw_wm_compile *c)
-{
- return c->tmp_index;
-}
-
-static void release_tmps(struct brw_wm_compile *c, int mark)
-{
- c->tmp_index = mark;
-}
-
-/**
- * Convert Mesa src register to brw register.
- *
- * Since we're running in SOA mode each Mesa register corresponds to four
- * hardware registers. We allocate the hardware registers as needed here.
- *
- * \param file register file, one of PROGRAM_x
- * \param index register number
- * \param component src component (X=0, Y=1, Z=2, W=3)
- * \param nr not used?!?
- * \param neg negate value?
- * \param abs take absolute value?
- */
-static struct brw_reg
-get_reg(struct brw_wm_compile *c, int file, int index, int component,
- int nr, GLuint neg, GLuint abs)
-{
- struct brw_reg reg;
- switch (file) {
- case PROGRAM_STATE_VAR:
- case PROGRAM_CONSTANT:
- case PROGRAM_UNIFORM:
- file = PROGRAM_STATE_VAR;
- break;
- case PROGRAM_UNDEFINED:
- return brw_null_reg();
- case PROGRAM_TEMPORARY:
- case PROGRAM_INPUT:
- case PROGRAM_OUTPUT:
- case PROGRAM_PAYLOAD:
- break;
- default:
- _mesa_problem(NULL, "Unexpected file in get_reg()");
- return brw_null_reg();
- }
-
- assert(index < 256);
- assert(component < 4);
-
- /* see if we've already allocated a HW register for this Mesa register */
- if (c->wm_regs[file][index][component].inited) {
- /* yes, re-use */
- reg = c->wm_regs[file][index][component].reg;
- }
- else {
- /* no, allocate new register */
- int grf = alloc_grf(c);
- /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
- if (grf < 0) {
- /* totally out of temps */
- grf = 51; /* XXX random register! */
- }
-
- reg = brw_vec8_grf(grf, 0);
- /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
-
- set_reg(c, file, index, component, reg);
- }
-
- if (neg & (1 << component)) {
- reg = negate(reg);
- }
- if (abs)
- reg = brw_abs(reg);
- return reg;
-}
-
-
-
-/**
- * This is called if we run out of GRF registers. Examine the live intervals
- * of temp regs in the program and free those which won't be used again.
- */
-static void
-reclaim_temps(struct brw_wm_compile *c)
-{
- GLint intBegin[MAX_PROGRAM_TEMPS];
- GLint intEnd[MAX_PROGRAM_TEMPS];
- int index;
-
- /*printf("Reclaim temps:\n");*/
-
- _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns,
- intBegin, intEnd);
-
- for (index = 0; index < MAX_PROGRAM_TEMPS; index++) {
- if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) {
- /* program temp[i] can be freed */
- int component;
- /*printf(" temp[%d] is dead\n", index);*/
- for (component = 0; component < 4; component++) {
- if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) {
- int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr;
- release_grf(c, r);
- /*
- printf(" Reclaim temp %d, reg %d at inst %d\n",
- index, r, c->cur_inst);
- */
- c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE;
- }
- }
- }
- }
-}
-
-
-
-
-/**
- * Preallocate registers. This sets up the Mesa to hardware register
- * mapping for certain registers, such as constants (uniforms/state vars)
- * and shader inputs.
- */
-static void prealloc_reg(struct brw_wm_compile *c)
-{
- struct intel_context *intel = &c->func.brw->intel;
- int i, j;
- struct brw_reg reg;
- int urb_read_length = 0;
- GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted;
- GLuint reg_index = 0;
-
- memset(c->used_grf, GL_FALSE, sizeof(c->used_grf));
- c->first_free_grf = 0;
-
- for (i = 0; i < 4; i++) {
- if (i < (c->key.nr_payload_regs + 1) / 2)
- reg = brw_vec8_grf(i * 2, 0);
- else
- reg = brw_vec8_grf(0, 0);
- set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
- }
- set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_W, 0,
- brw_vec8_grf(c->key.source_w_reg, 0));
- reg_index += c->key.nr_payload_regs;
-
- /* constants */
- {
- const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters;
- const GLuint nr_temps = c->fp->program.Base.NumTemporaries;
-
- /* use a real constant buffer, or just use a section of the GRF? */
- /* XXX this heuristic may need adjustment... */
- if ((nr_params + nr_temps) * 4 + reg_index > 80) {
- for (i = 0; i < nr_params; i++) {
- float *pv = c->fp->program.Base.Parameters->ParameterValues[i];
- for (j = 0; j < 4; j++) {
- c->prog_data.pull_param[c->prog_data.nr_pull_params] = &pv[j];
- c->prog_data.nr_pull_params++;
- }
- }
-
- c->prog_data.nr_params = 0;
- }
- /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
-
- if (!c->prog_data.nr_pull_params) {
- const struct gl_program_parameter_list *plist =
- c->fp->program.Base.Parameters;
- int index = 0;
-
- /* number of float constants in CURBE */
- c->prog_data.nr_params = 4 * nr_params;
-
- /* loop over program constants (float[4]) */
- for (i = 0; i < nr_params; i++) {
- /* loop over XYZW channels */
- for (j = 0; j < 4; j++, index++) {
- reg = brw_vec1_grf(reg_index + index / 8, index % 8);
- /* Save pointer to parameter/constant value.
- * Constants will be copied in prepare_constant_buffer()
- */
- c->prog_data.param[index] = &plist->ParameterValues[i][j];
- set_reg(c, PROGRAM_STATE_VAR, i, j, reg);
- }
- }
- /* number of constant regs used (each reg is float[8]) */
- c->nr_creg = ALIGN(nr_params, 2) / 2;
- reg_index += c->nr_creg;
- }
- }
-
- /* fragment shader inputs: One 2-reg pair of interpolation
- * coefficients for each vec4 to be set up.
- */
- if (intel->gen >= 6) {
- for (i = 0; i < FRAG_ATTRIB_MAX; i++) {
- if (!(c->fp->program.Base.InputsRead & BITFIELD64_BIT(i)))
- continue;
-
- reg = brw_vec8_grf(reg_index, 0);
- for (j = 0; j < 4; j++) {
- set_reg(c, PROGRAM_PAYLOAD, i, j, reg);
- }
- reg_index += 2;
- }
- urb_read_length = reg_index;
- } else {
- for (i = 0; i < VERT_RESULT_MAX; i++) {
- int fp_input;
-
- if (i >= VERT_RESULT_VAR0)
- fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0;
- else if (i <= VERT_RESULT_TEX7)
- fp_input = i;
- else
- fp_input = -1;
-
- if (fp_input >= 0 && inputs & (1 << fp_input)) {
- urb_read_length = reg_index;
- reg = brw_vec8_grf(reg_index, 0);
- for (j = 0; j < 4; j++)
- set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg);
- }
- if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) {
- reg_index += 2;
- }
- }
- }
-
- c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
- c->prog_data.urb_read_length = urb_read_length;
- c->prog_data.curb_read_length = c->nr_creg;
- c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
- reg_index++;
- c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
- reg_index += 2;
-
- /* mark GRF regs [0..reg_index-1] as in-use */
- for (i = 0; i < reg_index; i++)
- prealloc_grf(c, i);
-
- /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */
- prealloc_grf(c, 126);
- prealloc_grf(c, 127);
-
- for (i = 0; i < c->nr_fp_insns; i++) {
- const struct prog_instruction *inst = &c->prog_instructions[i];
- struct brw_reg dst[4];
-
- switch (inst->Opcode) {
- case OPCODE_TEX:
- case OPCODE_TXB:
- /* Allocate the channels of texture results contiguously,
- * since they are written out that way by the sampler unit.
- */
- for (j = 0; j < 4; j++) {
- dst[j] = get_dst_reg(c, inst, j);
- if (j != 0)
- assert(dst[j].nr == dst[j - 1].nr + 1);
- }
- break;
- default:
- break;
- }
- }
-
- for (i = 0; i < c->nr_fp_insns; i++) {
- const struct prog_instruction *inst = &c->prog_instructions[i];
-
- switch (inst->Opcode) {
- case WM_DELTAXY:
- /* Allocate WM_DELTAXY destination on G45/GM45 to an
- * even-numbered GRF if possible so that we can use the PLN
- * instruction.
- */
- if (inst->DstReg.WriteMask == WRITEMASK_XY &&
- !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][0].inited &&
- !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][1].inited &&
- (IS_G4X(intel->intelScreen->deviceID) || intel->gen == 5)) {
- int grf;
-
- for (grf = c->first_free_grf & ~1;
- grf < BRW_WM_MAX_GRF;
- grf += 2)
- {
- if (!c->used_grf[grf] && !c->used_grf[grf + 1]) {
- c->used_grf[grf] = GL_TRUE;
- c->used_grf[grf + 1] = GL_TRUE;
- c->first_free_grf = grf + 2; /* a guess */
-
- set_reg(c, inst->DstReg.File, inst->DstReg.Index, 0,
- brw_vec8_grf(grf, 0));
- set_reg(c, inst->DstReg.File, inst->DstReg.Index, 1,
- brw_vec8_grf(grf + 1, 0));
- break;
- }
- }
- }
- default:
- break;
- }
- }
-
- /* An instruction may reference up to three constants.
- * They'll be found in these registers.
- * XXX alloc these on demand!
- */
- if (c->prog_data.nr_pull_params) {
- for (i = 0; i < 3; i++) {
- c->current_const[i].index = -1;
- c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0);
- }
- }
-#if 0
- printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer);
- printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index);
-#endif
-}
-
-
-/**
- * Check if any of the instruction's src registers are constants, uniforms,
- * or statevars. If so, fetch any constants that we don't already have in
- * the three GRF slots.
- */
-static void fetch_constants(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint i;
-
- /* loop over instruction src regs */
- for (i = 0; i < 3; i++) {
- const struct prog_src_register *src = &inst->SrcReg[i];
- if (src->File == PROGRAM_STATE_VAR ||
- src->File == PROGRAM_CONSTANT ||
- src->File == PROGRAM_UNIFORM) {
- c->current_const[i].index = src->Index;
-
-#if 0
- printf(" fetch const[%d] for arg %d into reg %d\n",
- src->Index, i, c->current_const[i].reg.nr);
-#endif
-
- /* need to fetch the constant now */
- brw_oword_block_read(p,
- c->current_const[i].reg,
- brw_message_reg(1),
- 16 * src->Index,
- SURF_INDEX_FRAG_CONST_BUFFER);
- }
- }
-}
-
-
-/**
- * Convert Mesa dst register to brw register.
- */
-static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
- const struct prog_instruction *inst,
- GLuint component)
-{
- const int nr = 1;
- return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr,
- 0, 0);
-}
-
-
-static struct brw_reg
-get_src_reg_const(struct brw_wm_compile *c,
- const struct prog_instruction *inst,
- GLuint srcRegIndex, GLuint component)
-{
- /* We should have already fetched the constant from the constant
- * buffer in fetch_constants(). Now we just have to return a
- * register description that extracts the needed component and
- * smears it across all eight vector components.
- */
- const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
- struct brw_reg const_reg;
-
- assert(component < 4);
- assert(srcRegIndex < 3);
- assert(c->current_const[srcRegIndex].index != -1);
- const_reg = c->current_const[srcRegIndex].reg;
-
- /* extract desired float from the const_reg, and smear */
- const_reg = stride(const_reg, 0, 1, 0);
- const_reg.subnr = component * 4;
-
- if (src->Negate & (1 << component))
- const_reg = negate(const_reg);
- if (src->Abs)
- const_reg = brw_abs(const_reg);
-
-#if 0
- printf(" form const[%d].%d for arg %d, reg %d\n",
- c->current_const[srcRegIndex].index,
- component,
- srcRegIndex,
- const_reg.nr);
-#endif
-
- return const_reg;
-}
-
-
-/**
- * Convert Mesa src register to brw register.
- */
-static struct brw_reg get_src_reg(struct brw_wm_compile *c,
- const struct prog_instruction *inst,
- GLuint srcRegIndex, GLuint channel)
-{
- const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
- const GLuint nr = 1;
- const GLuint component = GET_SWZ(src->Swizzle, channel);
-
- /* Only one immediate value can be used per native opcode, and it
- * has be in the src1 slot, so not all Mesa instructions will get
- * to take advantage of immediate constants.
- */
- if (brw_wm_arg_can_be_immediate(inst->Opcode, srcRegIndex)) {
- const struct gl_program_parameter_list *params;
-
- params = c->fp->program.Base.Parameters;
-
- /* Extended swizzle terms */
- if (component == SWIZZLE_ZERO) {
- return brw_imm_f(0.0F);
- } else if (component == SWIZZLE_ONE) {
- if (src->Negate)
- return brw_imm_f(-1.0F);
- else
- return brw_imm_f(1.0F);
- }
-
- if (src->File == PROGRAM_CONSTANT) {
- float f = params->ParameterValues[src->Index][component];
-
- if (src->Abs)
- f = fabs(f);
- if (src->Negate)
- f = -f;
-
- return brw_imm_f(f);
- }
- }
-
- if (c->prog_data.nr_pull_params &&
- (src->File == PROGRAM_STATE_VAR ||
- src->File == PROGRAM_CONSTANT ||
- src->File == PROGRAM_UNIFORM)) {
- return get_src_reg_const(c, inst, srcRegIndex, component);
- }
- else {
- /* other type of source register */
- return get_reg(c, src->File, src->Index, component, nr,
- src->Negate, src->Abs);
- }
-}
-
-static void emit_arl(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg src0, addr_reg;
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
- BRW_ARF_ADDRESS, 0);
- src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */
- brw_MOV(p, addr_reg, src0);
- brw_set_saturate(p, 0);
-}
-
-static INLINE struct brw_reg high_words( struct brw_reg reg )
-{
- return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ),
- 0, 8, 2 );
-}
-
-static INLINE struct brw_reg low_words( struct brw_reg reg )
-{
- return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 );
-}
-
-static INLINE struct brw_reg even_bytes( struct brw_reg reg )
-{
- return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 );
-}
-
-static INLINE struct brw_reg odd_bytes( struct brw_reg reg )
-{
- return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ),
- 0, 16, 2 );
-}
-
-/**
- * Resolve subroutine calls after code emit is done.
- */
-static void post_wm_emit( struct brw_wm_compile *c )
-{
- brw_resolve_cals(&c->func);
-}
-
-static void
-get_argument_regs(struct brw_wm_compile *c,
- const struct prog_instruction *inst,
- int index,
- struct brw_reg *dst,
- struct brw_reg *regs,
- int mask)
-{
- struct brw_compile *p = &c->func;
- int i, j;
-
- for (i = 0; i < 4; i++) {
- if (mask & (1 << i)) {
- regs[i] = get_src_reg(c, inst, index, i);
-
- /* Unalias destination registers from our sources. */
- if (regs[i].file == BRW_GENERAL_REGISTER_FILE) {
- for (j = 0; j < 4; j++) {
- if (memcmp(&regs[i], &dst[j], sizeof(regs[0])) == 0) {
- struct brw_reg tmp = alloc_tmp(c);
- brw_MOV(p, tmp, regs[i]);
- regs[i] = tmp;
- break;
- }
- }
- }
- }
- }
-}
-
-static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
-{
- struct intel_context *intel = &brw->intel;
-#define MAX_IF_DEPTH 32
-#define MAX_LOOP_DEPTH 32
- struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
- int if_depth_in_loop[MAX_LOOP_DEPTH];
- GLuint i, if_depth = 0, loop_depth = 0;
- struct brw_compile *p = &c->func;
- struct brw_indirect stack_index = brw_indirect(0, 0);
-
- c->out_of_regs = GL_FALSE;
-
- if_depth_in_loop[loop_depth] = 0;
-
- prealloc_reg(c);
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
-
- if (intel->gen >= 6)
- brw_set_acc_write_control(p, 1);
-
- for (i = 0; i < c->nr_fp_insns; i++) {
- const struct prog_instruction *inst = &c->prog_instructions[i];
- int dst_flags;
- struct brw_reg args[3][4], dst[4];
- int j;
- int mark = mark_tmps( c );
-
- c->cur_inst = i;
-
-#if 0
- printf("Inst %d: ", i);
- _mesa_print_instruction(inst);
-#endif
-
- /* fetch any constants that this instruction needs */
- if (c->prog_data.nr_pull_params)
- fetch_constants(c, inst);
-
- if (inst->Opcode != OPCODE_ARL) {
- for (j = 0; j < 4; j++) {
- if (inst->DstReg.WriteMask & (1 << j))
- dst[j] = get_dst_reg(c, inst, j);
- else
- dst[j] = brw_null_reg();
- }
- }
- for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++)
- get_argument_regs(c, inst, j, dst, args[j], WRITEMASK_XYZW);
-
- dst_flags = inst->DstReg.WriteMask;
- if (inst->SaturateMode == SATURATE_ZERO_ONE)
- dst_flags |= SATURATE;
-
- if (inst->CondUpdate)
- brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
- else
- brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
-
- switch (inst->Opcode) {
- case WM_PIXELXY:
- emit_pixel_xy(c, dst, dst_flags);
- break;
- case WM_DELTAXY:
- emit_delta_xy(p, dst, dst_flags, args[0]);
- break;
- case WM_PIXELW:
- emit_pixel_w(c, dst, dst_flags, args[0], args[1]);
- break;
- case WM_LINTERP:
- emit_linterp(p, dst, dst_flags, args[0], args[1]);
- break;
- case WM_PINTERP:
- emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
- break;
- case WM_CINTERP:
- emit_cinterp(p, dst, dst_flags, args[0]);
- break;
- case WM_WPOSXY:
- emit_wpos_xy(c, dst, dst_flags, args[0]);
- break;
- case WM_FB_WRITE:
- emit_fb_write(c, args[0], args[1], args[2],
- INST_AUX_GET_TARGET(inst->Aux),
- inst->Aux & INST_AUX_EOT);
- break;
- case WM_FRONTFACING:
- emit_frontfacing(p, dst, dst_flags);
- break;
- case OPCODE_ADD:
- emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_ARL:
- emit_arl(c, inst);
- break;
- case OPCODE_FRC:
- emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
- break;
- case OPCODE_FLR:
- emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
- break;
- case OPCODE_LRP:
- emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
- break;
- case OPCODE_TRUNC:
- emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
- break;
- case OPCODE_MOV:
- case OPCODE_SWZ:
- emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
- break;
- case OPCODE_DP2:
- emit_dp2(p, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_DP3:
- emit_dp3(p, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_DP4:
- emit_dp4(p, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_XPD:
- emit_xpd(p, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_DPH:
- emit_dph(p, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_RCP:
- emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
- break;
- case OPCODE_RSQ:
- emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
- break;
- case OPCODE_SIN:
- emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
- break;
- case OPCODE_COS:
- emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
- break;
- case OPCODE_EX2:
- emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
- break;
- case OPCODE_LG2:
- emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
- break;
- case OPCODE_CMP:
- emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
- break;
- case OPCODE_MIN:
- emit_min(p, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_MAX:
- emit_max(p, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_DDX:
- case OPCODE_DDY:
- emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX),
- args[0]);
- break;
- case OPCODE_SLT:
- emit_sop(p, dst, dst_flags,
- BRW_CONDITIONAL_L, args[0], args[1]);
- break;
- case OPCODE_SLE:
- emit_sop(p, dst, dst_flags,
- BRW_CONDITIONAL_LE, args[0], args[1]);
- break;
- case OPCODE_SGT:
- emit_sop(p, dst, dst_flags,
- BRW_CONDITIONAL_G, args[0], args[1]);
- break;
- case OPCODE_SGE:
- emit_sop(p, dst, dst_flags,
- BRW_CONDITIONAL_GE, args[0], args[1]);
- break;
- case OPCODE_SEQ:
- emit_sop(p, dst, dst_flags,
- BRW_CONDITIONAL_EQ, args[0], args[1]);
- break;
- case OPCODE_SNE:
- emit_sop(p, dst, dst_flags,
- BRW_CONDITIONAL_NEQ, args[0], args[1]);
- break;
- case OPCODE_SSG:
- emit_sign(p, dst, dst_flags, args[0]);
- break;
- case OPCODE_MUL:
- emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_POW:
- emit_math2(c, BRW_MATH_FUNCTION_POW,
- dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_MAD:
- emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
- break;
- case OPCODE_TEX:
- emit_tex(c, dst, dst_flags, args[0],
- get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH,
- 0, 1, 0, 0),
- inst->TexSrcTarget,
- inst->TexSrcUnit,
- (c->key.shadowtex_mask & (1 << inst->TexSrcUnit)) != 0);
- break;
- case OPCODE_TXB:
- emit_txb(c, dst, dst_flags, args[0],
- get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH,
- 0, 1, 0, 0),
- inst->TexSrcTarget,
- c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]);
- break;
- case OPCODE_KIL_NV:
- emit_kil_nv(c);
- break;
- case OPCODE_IF:
- assert(if_depth < MAX_IF_DEPTH);
- if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8);
- if_depth_in_loop[loop_depth]++;
- break;
- case OPCODE_ELSE:
- assert(if_depth > 0);
- if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
- break;
- case OPCODE_ENDIF:
- assert(if_depth > 0);
- brw_ENDIF(p, if_inst[--if_depth]);
- if_depth_in_loop[loop_depth]--;
- break;
- case OPCODE_BGNSUB:
- brw_save_label(p, inst->Comment, p->nr_insn);
- break;
- case OPCODE_ENDSUB:
- /* no-op */
- break;
- case OPCODE_CAL:
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
- brw_set_access_mode(p, BRW_ALIGN_16);
- brw_ADD(p, get_addr_reg(stack_index),
- get_addr_reg(stack_index), brw_imm_d(4));
- brw_save_call(&c->func, inst->Comment, p->nr_insn);
- brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
- brw_pop_insn_state(p);
- break;
-
- case OPCODE_RET:
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_ADD(p, get_addr_reg(stack_index),
- get_addr_reg(stack_index), brw_imm_d(-4));
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0));
- brw_set_access_mode(p, BRW_ALIGN_16);
- brw_pop_insn_state(p);
-
- break;
- case OPCODE_BGNLOOP:
- /* XXX may need to invalidate the current_constant regs */
- loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
- if_depth_in_loop[loop_depth] = 0;
- break;
- case OPCODE_BRK:
- brw_BREAK(p, if_depth_in_loop[loop_depth]);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- break;
- case OPCODE_CONT:
- brw_CONT(p, if_depth_in_loop[loop_depth]);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- break;
- case OPCODE_ENDLOOP:
- {
- struct brw_instruction *inst0, *inst1;
- GLuint br = 1;
-
- if (intel->gen == 5)
- br = 2;
-
- assert(loop_depth > 0);
- loop_depth--;
- inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
- /* patch all the BREAK/CONT instructions from last BGNLOOP */
- while (inst0 > loop_inst[loop_depth]) {
- inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK &&
- inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
- }
- else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
- inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
- }
- }
- }
- break;
- default:
- printf("unsupported opcode %d (%s) in fragment shader\n",
- inst->Opcode, inst->Opcode < MAX_OPCODE ?
- _mesa_opcode_string(inst->Opcode) : "unknown");
- }
-
- /* Release temporaries containing any unaliased source regs. */
- release_tmps( c, mark );
-
- if (inst->CondUpdate)
- brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
- else
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- }
- post_wm_emit(c);
-
- if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- printf("wm-native:\n");
- for (i = 0; i < p->nr_insn; i++)
- brw_disasm(stdout, &p->store[i], intel->gen);
- printf("\n");
- }
-}
-
-/**
- * Do GPU code generation for shaders that use GLSL features such as
- * flow control. Other shaders will be compiled with the
- */
-void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
-{
- if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- printf("brw_wm_glsl_emit:\n");
- }
-
- /* initial instruction translation/simplification */
- brw_wm_pass_fp(c);
-
- /* actual code generation */
- brw_wm_emit_glsl(brw, c);
-
- if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- brw_wm_print_program(c, "brw_wm_glsl_emit done");
- }
-
- c->prog_data.total_grf = num_grf_used(c);
- c->prog_data.total_scratch = 0;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.c b/src/mesa/drivers/dri/i965/brw_wm_iz.c
index 62e556698b..471ea1c18d 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_iz.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_iz.c
@@ -120,14 +120,14 @@ const struct {
* \param line_aa AA_NEVER, AA_ALWAYS or AA_SOMETIMES
* \param lookup bitmask of IZ_* flags
*/
-void brw_wm_lookup_iz( struct intel_context *intel,
- GLuint line_aa,
- GLuint lookup,
- GLboolean ps_uses_depth,
- struct brw_wm_prog_key *key )
+void brw_wm_lookup_iz(struct intel_context *intel,
+ struct brw_wm_compile *c)
{
GLuint reg = 2;
GLboolean kill_stats_promoted_workaround = GL_FALSE;
+ int lookup = c->key.iz_lookup;
+ bool uses_depth = (c->fp->program.Base.InputsRead &
+ (1 << FRAG_ATTRIB_WPOS)) != 0;
assert (lookup < IZ_BIT_MAX);
@@ -136,36 +136,36 @@ void brw_wm_lookup_iz( struct intel_context *intel,
* statistics are enabled..." paragraph of 11.5.3.2: Early Depth
* Test Cases [Pre-DevGT] of the 3D Pipeline - Windower B-Spec.
*/
- if (intel->stats_wm &&
+ if (c->key.stats_wm &&
(lookup & IZ_PS_KILL_ALPHATEST_BIT) &&
wm_iz_table[lookup].mode == P) {
kill_stats_promoted_workaround = GL_TRUE;
}
if (lookup & IZ_PS_COMPUTES_DEPTH_BIT)
- key->computes_depth = 1;
+ c->computes_depth = 1;
- if (wm_iz_table[lookup].sd_present || ps_uses_depth ||
+ if (wm_iz_table[lookup].sd_present || uses_depth ||
kill_stats_promoted_workaround) {
- key->source_depth_reg = reg;
+ c->source_depth_reg = reg;
reg += 2;
}
if (wm_iz_table[lookup].sd_to_rt || kill_stats_promoted_workaround)
- key->source_depth_to_render_target = 1;
+ c->source_depth_to_render_target = 1;
- if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) {
- key->aa_dest_stencil_reg = reg;
- key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present &&
- line_aa == AA_SOMETIMES);
+ if (wm_iz_table[lookup].ds_present || c->key.line_aa != AA_NEVER) {
+ c->aa_dest_stencil_reg = reg;
+ c->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present &&
+ c->key.line_aa == AA_SOMETIMES);
reg++;
}
if (wm_iz_table[lookup].dd_present) {
- key->dest_depth_reg = reg;
+ c->dest_depth_reg = reg;
reg+=2;
}
- key->nr_payload_regs = reg;
+ c->nr_payload_regs = reg;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
index 83152526b3..f78bdc3186 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
@@ -380,7 +380,7 @@ static void pass0_init_payload( struct brw_wm_compile *c )
GLuint i;
for (i = 0; i < 4; i++) {
- GLuint j = i >= (c->key.nr_payload_regs + 1) / 2 ? 0 : i;
+ GLuint j = i >= (c->nr_payload_regs + 1) / 2 ? 0 : i;
pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i,
&c->payload.depth[j] );
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c
index 3a2874b6dd..7d6a3fa9f1 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c
@@ -128,8 +128,7 @@ void brw_wm_pass1( struct brw_wm_compile *c )
if (inst->opcode == WM_FB_WRITE) {
track_arg(c, inst, 0, WRITEMASK_XYZW);
track_arg(c, inst, 1, WRITEMASK_XYZW);
- if (c->key.source_depth_to_render_target &&
- c->key.computes_depth)
+ if (c->source_depth_to_render_target && c->computes_depth)
track_arg(c, inst, 2, WRITEMASK_Z);
else
track_arg(c, inst, 2, 0);
@@ -281,7 +280,6 @@ void brw_wm_pass1( struct brw_wm_compile *c )
case OPCODE_DST:
case WM_FRONTFACING:
- case OPCODE_KIL_NV:
default:
break;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass2.c b/src/mesa/drivers/dri/i965/brw_wm_pass2.c
index 44e3953814..8c2b9e7020 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass2.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass2.c
@@ -69,6 +69,8 @@ static void prealloc_reg(struct brw_wm_compile *c,
*/
static void init_registers( struct brw_wm_compile *c )
{
+ struct brw_context *brw = c->func.brw;
+ struct intel_context *intel = &brw->intel;
GLuint nr_interp_regs = 0;
GLuint i = 0;
GLuint j;
@@ -76,32 +78,41 @@ static void init_registers( struct brw_wm_compile *c )
for (j = 0; j < c->grf_limit; j++)
c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN;
- for (j = 0; j < (c->key.nr_payload_regs + 1) / 2; j++)
+ for (j = 0; j < (c->nr_payload_regs + 1) / 2; j++)
prealloc_reg(c, &c->payload.depth[j], i++);
for (j = 0; j < c->nr_creg; j++)
prealloc_reg(c, &c->creg[j], i++);
- for (j = 0; j < VERT_RESULT_MAX; j++) {
- if (c->key.vp_outputs_written & BITFIELD64_BIT(j)) {
- int fp_index;
-
- if (j >= VERT_RESULT_VAR0)
- fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
- else if (j <= VERT_RESULT_TEX7)
- fp_index = j;
- else
- fp_index = -1;
-
- nr_interp_regs++;
- if (fp_index >= 0)
- prealloc_reg(c, &c->payload.input_interp[fp_index], i++);
+ if (intel->gen >= 6) {
+ for (unsigned int j = 0; j < FRAG_ATTRIB_MAX; j++) {
+ if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(j)) {
+ nr_interp_regs++;
+ prealloc_reg(c, &c->payload.input_interp[j], i++);
+ }
+ }
+ } else {
+ for (j = 0; j < VERT_RESULT_MAX; j++) {
+ if (c->key.vp_outputs_written & BITFIELD64_BIT(j)) {
+ int fp_index;
+
+ if (j >= VERT_RESULT_VAR0)
+ fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
+ else if (j <= VERT_RESULT_TEX7)
+ fp_index = j;
+ else
+ fp_index = -1;
+
+ nr_interp_regs++;
+ if (fp_index >= 0)
+ prealloc_reg(c, &c->payload.input_interp[fp_index], i++);
+ }
}
+ assert(nr_interp_regs >= 1);
}
- assert(nr_interp_regs >= 1);
- c->prog_data.first_curbe_grf = ALIGN(c->key.nr_payload_regs, 2);
+ c->prog_data.first_curbe_grf = ALIGN(c->nr_payload_regs, 2);
c->prog_data.urb_read_length = nr_interp_regs * 2;
c->prog_data.curb_read_length = c->nr_creg * 2;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index fea96d3538..30672b4251 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -69,12 +69,43 @@ static GLuint translate_wrap_mode( GLenum wrap )
static drm_intel_bo *upload_default_color( struct brw_context *brw,
const GLfloat *color )
{
- struct brw_sampler_default_color sdc;
+ struct intel_context *intel = &brw->intel;
- COPY_4V(sdc.color, color);
-
- return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR,
- &sdc, sizeof(sdc));
+ if (intel->gen >= 5) {
+ struct gen5_sampler_default_color sdc;
+
+ memset(&sdc, 0, sizeof(sdc));
+
+ UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[0], color[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[1], color[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[2], color[2]);
+ UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[3], color[3]);
+
+ UNCLAMPED_FLOAT_TO_USHORT(sdc.us[0], color[0]);
+ UNCLAMPED_FLOAT_TO_USHORT(sdc.us[1], color[1]);
+ UNCLAMPED_FLOAT_TO_USHORT(sdc.us[2], color[2]);
+ UNCLAMPED_FLOAT_TO_USHORT(sdc.us[3], color[3]);
+
+ UNCLAMPED_FLOAT_TO_SHORT(sdc.s[0], color[0]);
+ UNCLAMPED_FLOAT_TO_SHORT(sdc.s[1], color[1]);
+ UNCLAMPED_FLOAT_TO_SHORT(sdc.s[2], color[2]);
+ UNCLAMPED_FLOAT_TO_SHORT(sdc.s[3], color[3]);
+
+ /* XXX: Fill in half floats */
+ /* XXX: Fill in signed bytes */
+
+ COPY_4V(sdc.f, color);
+
+ return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR,
+ &sdc, sizeof(sdc));
+ } else {
+ struct brw_sampler_default_color sdc;
+
+ COPY_4V(sdc.color, color);
+
+ return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR,
+ &sdc, sizeof(sdc));
+ }
}
@@ -245,9 +276,8 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
struct wm_sampler_entry *entry = &key->sampler[unit];
struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
struct gl_texture_object *texObj = texUnit->_Current;
- struct intel_texture_object *intelObj = intel_texture_object(texObj);
struct gl_texture_image *firstImage =
- texObj->Image[0][intelObj->firstLevel];
+ texObj->Image[0][texObj->BaseLevel];
memset(last_entry_end, 0,
(char*)entry - last_entry_end + sizeof(*entry));
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 76de7b7b6f..e9ef635bca 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -87,7 +87,6 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
{
struct gl_context *ctx = &brw->intel.ctx;
const struct gl_fragment_program *fp = brw->fragment_program;
- const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp;
struct intel_context *intel = &brw->intel;
memset(key, 0, sizeof(*key));
@@ -132,7 +131,6 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
/* _NEW_COLOR */
key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
- key->is_glsl = bfp->isGLSL;
/* If using the fragment shader backend, the program is always
* 8-wide.
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 76fc94df1f..233fe3b731 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -42,7 +42,7 @@
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
-
+#include "brw_wm.h"
static GLuint translate_tex_target( GLenum target )
{
@@ -68,104 +68,72 @@ static GLuint translate_tex_target( GLenum target )
}
}
+static uint32_t brw_format_for_mesa_format[MESA_FORMAT_COUNT] =
+{
+ [MESA_FORMAT_L8] = BRW_SURFACEFORMAT_L8_UNORM,
+ [MESA_FORMAT_I8] = BRW_SURFACEFORMAT_I8_UNORM,
+ [MESA_FORMAT_A8] = BRW_SURFACEFORMAT_A8_UNORM,
+ [MESA_FORMAT_AL88] = BRW_SURFACEFORMAT_L8A8_UNORM,
+ [MESA_FORMAT_AL1616] = BRW_SURFACEFORMAT_L16A16_UNORM,
+ [MESA_FORMAT_R8] = BRW_SURFACEFORMAT_R8_UNORM,
+ [MESA_FORMAT_R16] = BRW_SURFACEFORMAT_R16_UNORM,
+ [MESA_FORMAT_RG88] = BRW_SURFACEFORMAT_R8G8_UNORM,
+ [MESA_FORMAT_RG1616] = BRW_SURFACEFORMAT_R16G16_UNORM,
+ [MESA_FORMAT_ARGB8888] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM,
+ [MESA_FORMAT_XRGB8888] = BRW_SURFACEFORMAT_B8G8R8X8_UNORM,
+ [MESA_FORMAT_RGB565] = BRW_SURFACEFORMAT_B5G6R5_UNORM,
+ [MESA_FORMAT_ARGB1555] = BRW_SURFACEFORMAT_B5G5R5A1_UNORM,
+ [MESA_FORMAT_ARGB4444] = BRW_SURFACEFORMAT_B4G4R4A4_UNORM,
+ [MESA_FORMAT_YCBCR_REV] = BRW_SURFACEFORMAT_YCRCB_NORMAL,
+ [MESA_FORMAT_YCBCR] = BRW_SURFACEFORMAT_YCRCB_SWAPUVY,
+ [MESA_FORMAT_RGB_FXT1] = BRW_SURFACEFORMAT_FXT1,
+ [MESA_FORMAT_RGBA_FXT1] = BRW_SURFACEFORMAT_FXT1,
+ [MESA_FORMAT_RGB_DXT1] = BRW_SURFACEFORMAT_DXT1_RGB,
+ [MESA_FORMAT_RGBA_DXT1] = BRW_SURFACEFORMAT_BC1_UNORM,
+ [MESA_FORMAT_RGBA_DXT3] = BRW_SURFACEFORMAT_BC2_UNORM,
+ [MESA_FORMAT_RGBA_DXT5] = BRW_SURFACEFORMAT_BC3_UNORM,
+ [MESA_FORMAT_SRGB_DXT1] = BRW_SURFACEFORMAT_DXT1_RGB_SRGB,
+ [MESA_FORMAT_SRGBA_DXT1] = BRW_SURFACEFORMAT_BC1_UNORM_SRGB,
+ [MESA_FORMAT_SRGBA_DXT3] = BRW_SURFACEFORMAT_BC2_UNORM_SRGB,
+ [MESA_FORMAT_SRGBA_DXT5] = BRW_SURFACEFORMAT_BC3_UNORM_SRGB,
+ [MESA_FORMAT_SARGB8] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB,
+ [MESA_FORMAT_SLA8] = BRW_SURFACEFORMAT_L8A8_UNORM_SRGB,
+ [MESA_FORMAT_SL8] = BRW_SURFACEFORMAT_L8_UNORM_SRGB,
+ [MESA_FORMAT_DUDV8] = BRW_SURFACEFORMAT_R8G8_SNORM,
+ [MESA_FORMAT_SIGNED_RGBA8888_REV] = BRW_SURFACEFORMAT_R8G8B8A8_SNORM,
+};
+
+bool
+brw_render_target_supported(gl_format format)
+{
+ if (format == MESA_FORMAT_S8_Z24 ||
+ format == MESA_FORMAT_X8_Z24 ||
+ format == MESA_FORMAT_Z16) {
+ return true;
+ }
+
+ /* Not exactly true, as some of those formats are not renderable.
+ * But at least we know how to translate them.
+ */
+ return brw_format_for_mesa_format[format] != 0;
+}
static GLuint translate_tex_format( gl_format mesa_format,
GLenum internal_format,
GLenum depth_mode )
{
switch( mesa_format ) {
- case MESA_FORMAT_L8:
- return BRW_SURFACEFORMAT_L8_UNORM;
-
- case MESA_FORMAT_I8:
- return BRW_SURFACEFORMAT_I8_UNORM;
-
- case MESA_FORMAT_A8:
- return BRW_SURFACEFORMAT_A8_UNORM;
-
- case MESA_FORMAT_AL88:
- return BRW_SURFACEFORMAT_L8A8_UNORM;
-
- case MESA_FORMAT_AL1616:
- return BRW_SURFACEFORMAT_L16A16_UNORM;
-
- case MESA_FORMAT_R8:
- return BRW_SURFACEFORMAT_R8_UNORM;
-
- case MESA_FORMAT_R16:
- return BRW_SURFACEFORMAT_R16_UNORM;
-
- case MESA_FORMAT_RG88:
- return BRW_SURFACEFORMAT_R8G8_UNORM;
-
- case MESA_FORMAT_RG1616:
- return BRW_SURFACEFORMAT_R16G16_UNORM;
-
- case MESA_FORMAT_RGB888:
- assert(0); /* not supported for sampling */
- return BRW_SURFACEFORMAT_R8G8B8_UNORM;
-
- case MESA_FORMAT_ARGB8888:
- return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
-
- case MESA_FORMAT_XRGB8888:
- return BRW_SURFACEFORMAT_B8G8R8X8_UNORM;
-
- case MESA_FORMAT_RGBA8888_REV:
- _mesa_problem(NULL, "unexpected format in i965:translate_tex_format()");
- return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
-
- case MESA_FORMAT_RGB565:
- return BRW_SURFACEFORMAT_B5G6R5_UNORM;
-
- case MESA_FORMAT_ARGB1555:
- return BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
-
- case MESA_FORMAT_ARGB4444:
- return BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
-
- case MESA_FORMAT_YCBCR_REV:
- return BRW_SURFACEFORMAT_YCRCB_NORMAL;
-
- case MESA_FORMAT_YCBCR:
- return BRW_SURFACEFORMAT_YCRCB_SWAPUVY;
-
- case MESA_FORMAT_RGB_FXT1:
- case MESA_FORMAT_RGBA_FXT1:
- return BRW_SURFACEFORMAT_FXT1;
case MESA_FORMAT_Z16:
if (depth_mode == GL_INTENSITY)
return BRW_SURFACEFORMAT_I16_UNORM;
else if (depth_mode == GL_ALPHA)
return BRW_SURFACEFORMAT_A16_UNORM;
+ else if (depth_mode == GL_RED)
+ return BRW_SURFACEFORMAT_R16_UNORM;
else
return BRW_SURFACEFORMAT_L16_UNORM;
- case MESA_FORMAT_RGB_DXT1:
- return BRW_SURFACEFORMAT_DXT1_RGB;
-
- case MESA_FORMAT_RGBA_DXT1:
- return BRW_SURFACEFORMAT_BC1_UNORM;
-
- case MESA_FORMAT_RGBA_DXT3:
- return BRW_SURFACEFORMAT_BC2_UNORM;
-
- case MESA_FORMAT_RGBA_DXT5:
- return BRW_SURFACEFORMAT_BC3_UNORM;
-
- case MESA_FORMAT_SARGB8:
- return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB;
-
- case MESA_FORMAT_SLA8:
- return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB;
-
- case MESA_FORMAT_SL8:
- return BRW_SURFACEFORMAT_L8_UNORM_SRGB;
-
- case MESA_FORMAT_SRGB_DXT1:
- return BRW_SURFACEFORMAT_BC1_UNORM_SRGB;
-
case MESA_FORMAT_S8_Z24:
/* XXX: these different surface formats don't seem to
* make any difference for shadow sampler/compares.
@@ -174,18 +142,14 @@ static GLuint translate_tex_format( gl_format mesa_format,
return BRW_SURFACEFORMAT_I24X8_UNORM;
else if (depth_mode == GL_ALPHA)
return BRW_SURFACEFORMAT_A24X8_UNORM;
+ else if (depth_mode == GL_RED)
+ return BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS;
else
return BRW_SURFACEFORMAT_L24X8_UNORM;
- case MESA_FORMAT_DUDV8:
- return BRW_SURFACEFORMAT_R8G8_SNORM;
-
- case MESA_FORMAT_SIGNED_RGBA8888_REV:
- return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
-
default:
- assert(0);
- return 0;
+ assert(brw_format_for_mesa_format[mesa_format] != 0);
+ return brw_format_for_mesa_format[mesa_format];
}
}
@@ -214,7 +178,7 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit )
struct brw_context *brw = brw_context(ctx);
struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
struct intel_texture_object *intelObj = intel_texture_object(tObj);
- struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
+ struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
const GLuint surf_index = SURF_INDEX_TEXTURE(unit);
struct brw_surface_state surf;
void *map;
@@ -232,7 +196,7 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit )
/* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
surf.ss1.base_addr = intelObj->mt->region->buffer->offset; /* reloc */
- surf.ss2.mip_count = intelObj->lastLevel - intelObj->firstLevel;
+ surf.ss2.mip_count = intelObj->_MaxLevel - tObj->BaseLevel;
surf.ss2.width = firstImage->Width - 1;
surf.ss2.height = firstImage->Height - 1;
brw_set_surface_tiling(&surf, intelObj->mt->region->tiling);
@@ -274,6 +238,7 @@ brw_create_constant_surface(struct brw_context *brw,
drm_intel_bo **out_bo,
uint32_t *out_offset)
{
+ struct intel_context *intel = &brw->intel;
const GLint w = width - 1;
struct brw_surface_state surf;
void *map;
@@ -284,6 +249,9 @@ brw_create_constant_surface(struct brw_context *brw,
surf.ss0.surface_type = BRW_SURFACE_BUFFER;
surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+ if (intel->gen >= 6)
+ surf.ss0.render_cache_read_write = 1;
+
assert(bo);
surf.ss1.base_addr = bo->offset; /* reloc */
@@ -404,6 +372,38 @@ const struct brw_tracked_state brw_wm_constant_surface = {
.emit = upload_wm_constant_surface,
};
+static void
+brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
+{
+ struct intel_context *intel = &brw->intel;
+ struct brw_surface_state surf;
+ void *map;
+
+ memset(&surf, 0, sizeof(surf));
+
+ surf.ss0.surface_type = BRW_SURFACE_NULL;
+ surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ surf.ss1.base_addr = 0;
+
+ surf.ss2.width = 0;
+ surf.ss2.height = 0;
+ brw_set_surface_tiling(&surf, I915_TILING_NONE);
+ surf.ss3.pitch = 0;
+
+ if (intel->gen < 6) {
+ /* _NEW_COLOR */
+ surf.ss0.color_blend = 0;
+ surf.ss0.writedisable_red = 1;
+ surf.ss0.writedisable_green = 1;
+ surf.ss0.writedisable_blue = 1;
+ surf.ss0.writedisable_alpha = 1;
+ }
+
+ map = brw_state_batch(brw, sizeof(surf), 32,
+ &brw->wm.surf_bo[unit],
+ &brw->wm.surf_offset[unit]);
+ memcpy(map, &surf, sizeof(surf));
+}
/**
* Sets up a surface state structure to point at the given region.
@@ -417,123 +417,53 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
{
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &intel->ctx;
- drm_intel_bo *region_bo = NULL;
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
- struct intel_region *region = irb ? irb->region : NULL;
- struct {
- unsigned int surface_type;
- unsigned int surface_format;
- unsigned int width, height, pitch, cpp;
- GLubyte color_mask[4];
- GLboolean color_blend;
- uint32_t tiling;
- uint32_t draw_x;
- uint32_t draw_y;
- } key;
+ struct intel_region *region = irb->region;
struct brw_surface_state surf;
void *map;
- memset(&key, 0, sizeof(key));
-
- if (region != NULL) {
- region_bo = region->buffer;
-
- key.surface_type = BRW_SURFACE_2D;
- switch (irb->Base.Format) {
- /* XRGB and ARGB are treated the same here because the chips in this
- * family cannot render to XRGB targets. This means that we have to
- * mask writes to alpha (ala glColorMask) and reconfigure the alpha
- * blending hardware to use GL_ONE (or GL_ZERO) for cases where
- * GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is used.
- */
- case MESA_FORMAT_ARGB8888:
- case MESA_FORMAT_XRGB8888:
- key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
- break;
- case MESA_FORMAT_SARGB8:
- key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB;
- break;
- case MESA_FORMAT_RGB565:
- key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
- break;
- case MESA_FORMAT_ARGB1555:
- key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
- break;
- case MESA_FORMAT_ARGB4444:
- key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
- break;
- case MESA_FORMAT_A8:
- key.surface_format = BRW_SURFACEFORMAT_A8_UNORM;
- break;
- case MESA_FORMAT_R8:
- key.surface_format = BRW_SURFACEFORMAT_R8_UNORM;
- break;
- case MESA_FORMAT_R16:
- key.surface_format = BRW_SURFACEFORMAT_R16_UNORM;
- break;
- case MESA_FORMAT_RG88:
- key.surface_format = BRW_SURFACEFORMAT_R8G8_UNORM;
- break;
- case MESA_FORMAT_RG1616:
- key.surface_format = BRW_SURFACEFORMAT_R16G16_UNORM;
- break;
- default:
- _mesa_problem(ctx, "Bad renderbuffer format: %d\n", irb->Base.Format);
- }
- key.tiling = region->tiling;
- key.width = rb->Width;
- key.height = rb->Height;
- key.pitch = region->pitch;
- key.cpp = region->cpp;
- key.draw_x = region->draw_x;
- key.draw_y = region->draw_y;
- } else {
- key.surface_type = BRW_SURFACE_NULL;
- key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
- key.tiling = I915_TILING_X;
- key.width = 1;
- key.height = 1;
- key.cpp = 4;
- key.draw_x = 0;
- key.draw_y = 0;
- }
-
- if (intel->gen < 6) {
- /* _NEW_COLOR */
- memcpy(key.color_mask, ctx->Color.ColorMask[unit],
- sizeof(key.color_mask));
+ memset(&surf, 0, sizeof(surf));
- /* As mentioned above, disable writes to the alpha component when the
- * renderbuffer is XRGB.
+ switch (irb->Base.Format) {
+ case MESA_FORMAT_XRGB8888:
+ /* XRGB is handled as ARGB because the chips in this family
+ * cannot render to XRGB targets. This means that we have to
+ * mask writes to alpha (ala glColorMask) and reconfigure the
+ * alpha blending hardware to use GL_ONE (or GL_ZERO) for
+ * cases where GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is
+ * used.
*/
- if (ctx->DrawBuffer->Visual.alphaBits == 0)
- key.color_mask[3] = GL_FALSE;
-
- key.color_blend = (!ctx->Color._LogicOpEnabled &&
- (ctx->Color.BlendEnabled & (1 << unit)));
+ surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ break;
+ case MESA_FORMAT_SARGB8:
+ /* without GL_EXT_framebuffer_sRGB we shouldn't bind sRGB
+ surfaces to the blend/update as sRGB */
+ surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ break;
+ default:
+ surf.ss0.surface_format = brw_format_for_mesa_format[irb->Base.Format];
+ assert(surf.ss0.surface_format != 0);
}
- memset(&surf, 0, sizeof(surf));
-
- surf.ss0.surface_format = key.surface_format;
- surf.ss0.surface_type = key.surface_type;
- if (key.tiling == I915_TILING_NONE) {
- surf.ss1.base_addr = (key.draw_x + key.draw_y * key.pitch) * key.cpp;
+ surf.ss0.surface_type = BRW_SURFACE_2D;
+ if (region->tiling == I915_TILING_NONE) {
+ surf.ss1.base_addr = (region->draw_x +
+ region->draw_y * region->pitch) * region->cpp;
} else {
uint32_t tile_base, tile_x, tile_y;
- uint32_t pitch = key.pitch * key.cpp;
+ uint32_t pitch = region->pitch * region->cpp;
- if (key.tiling == I915_TILING_X) {
- tile_x = key.draw_x % (512 / key.cpp);
- tile_y = key.draw_y % 8;
- tile_base = ((key.draw_y / 8) * (8 * pitch));
- tile_base += (key.draw_x - tile_x) / (512 / key.cpp) * 4096;
+ if (region->tiling == I915_TILING_X) {
+ tile_x = region->draw_x % (512 / region->cpp);
+ tile_y = region->draw_y % 8;
+ tile_base = ((region->draw_y / 8) * (8 * pitch));
+ tile_base += (region->draw_x - tile_x) / (512 / region->cpp) * 4096;
} else {
/* Y */
- tile_x = key.draw_x % (128 / key.cpp);
- tile_y = key.draw_y % 32;
- tile_base = ((key.draw_y / 32) * (32 * pitch));
- tile_base += (key.draw_x - tile_x) / (128 / key.cpp) * 4096;
+ tile_x = region->draw_x % (128 / region->cpp);
+ tile_y = region->draw_y % 32;
+ tile_base = ((region->draw_y / 32) * (32 * pitch));
+ tile_base += (region->draw_x - tile_x) / (128 / region->cpp) * 4096;
}
assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
assert(tile_x % 4 == 0);
@@ -545,21 +475,27 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
surf.ss5.x_offset = tile_x / 4;
surf.ss5.y_offset = tile_y / 2;
}
- if (region_bo != NULL)
- surf.ss1.base_addr += region_bo->offset; /* reloc */
+ surf.ss1.base_addr += region->buffer->offset; /* reloc */
- surf.ss2.width = key.width - 1;
- surf.ss2.height = key.height - 1;
- brw_set_surface_tiling(&surf, key.tiling);
- surf.ss3.pitch = (key.pitch * key.cpp) - 1;
+ surf.ss2.width = rb->Width - 1;
+ surf.ss2.height = rb->Height - 1;
+ brw_set_surface_tiling(&surf, region->tiling);
+ surf.ss3.pitch = (region->pitch * region->cpp) - 1;
if (intel->gen < 6) {
/* _NEW_COLOR */
- surf.ss0.color_blend = key.color_blend;
- surf.ss0.writedisable_red = !key.color_mask[0];
- surf.ss0.writedisable_green = !key.color_mask[1];
- surf.ss0.writedisable_blue = !key.color_mask[2];
- surf.ss0.writedisable_alpha = !key.color_mask[3];
+ surf.ss0.color_blend = (!ctx->Color._LogicOpEnabled &&
+ (ctx->Color.BlendEnabled & (1 << unit)));
+ surf.ss0.writedisable_red = !ctx->Color.ColorMask[unit][0];
+ surf.ss0.writedisable_green = !ctx->Color.ColorMask[unit][1];
+ surf.ss0.writedisable_blue = !ctx->Color.ColorMask[unit][2];
+ /* As mentioned above, disable writes to the alpha component when the
+ * renderbuffer is XRGB.
+ */
+ if (ctx->DrawBuffer->Visual.alphaBits == 0)
+ surf.ss0.writedisable_alpha = 1;
+ else
+ surf.ss0.writedisable_alpha = !ctx->Color.ColorMask[unit][3];
}
map = brw_state_batch(brw, sizeof(surf), 32,
@@ -567,15 +503,13 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
&brw->wm.surf_offset[unit]);
memcpy(map, &surf, sizeof(surf));
- if (region_bo != NULL) {
- drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit],
- brw->wm.surf_offset[unit] +
- offsetof(struct brw_surface_state, ss1),
- region_bo,
- surf.ss1.base_addr - region_bo->offset,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER);
- }
+ drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit],
+ brw->wm.surf_offset[unit] +
+ offsetof(struct brw_surface_state, ss1),
+ region->buffer,
+ surf.ss1.base_addr - region->buffer->offset,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER);
}
static void
@@ -635,12 +569,16 @@ upload_wm_surfaces(struct brw_context *brw)
/* Update surfaces for drawing buffers */
if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
- brw_update_renderbuffer_surface(brw,
- ctx->DrawBuffer->_ColorDrawBuffers[i],
- i);
+ if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
+ brw_update_renderbuffer_surface(brw,
+ ctx->DrawBuffer->_ColorDrawBuffers[i],
+ i);
+ } else {
+ brw_update_null_renderbuffer_surface(brw, i);
+ }
}
} else {
- brw_update_renderbuffer_surface(brw, NULL, 0);
+ brw_update_null_renderbuffer_surface(brw, 0);
}
/* Update surfaces for textures */
diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c
index 800a255521..dbcdc5b869 100644
--- a/src/mesa/drivers/dri/i965/gen6_cc.c
+++ b/src/mesa/drivers/dri/i965/gen6_cc.c
@@ -35,6 +35,7 @@
struct gen6_blend_state_key {
GLboolean color_blend, alpha_enabled;
GLboolean dither;
+ GLboolean color_mask[BRW_MAX_DRAW_BUFFERS][4];
GLenum logic_op;
@@ -54,6 +55,9 @@ blend_state_populate_key(struct brw_context *brw,
memset(key, 0, sizeof(*key));
/* _NEW_COLOR */
+ memcpy(key->color_mask, ctx->Color.ColorMask, sizeof(key->color_mask));
+
+ /* _NEW_COLOR */
if (ctx->Color._LogicOpEnabled)
key->logic_op = ctx->Color.LogicOp;
else
@@ -87,54 +91,62 @@ static drm_intel_bo *
blend_state_create_from_key(struct brw_context *brw,
struct gen6_blend_state_key *key)
{
- struct gen6_blend_state blend;
+ struct gen6_blend_state blend[BRW_MAX_DRAW_BUFFERS];
drm_intel_bo *bo;
+ int b;
memset(&blend, 0, sizeof(blend));
- if (key->logic_op != GL_COPY) {
- blend.blend1.logic_op_enable = 1;
- blend.blend1.logic_op_func = intel_translate_logic_op(key->logic_op);
- } else if (key->color_blend) {
- GLenum eqRGB = key->blend_eq_rgb;
- GLenum eqA = key->blend_eq_a;
- GLenum srcRGB = key->blend_src_rgb;
- GLenum dstRGB = key->blend_dst_rgb;
- GLenum srcA = key->blend_src_a;
- GLenum dstA = key->blend_dst_a;
-
- if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
- srcRGB = dstRGB = GL_ONE;
- }
-
- if (eqA == GL_MIN || eqA == GL_MAX) {
- srcA = dstA = GL_ONE;
+ for (b = 0; b < BRW_MAX_DRAW_BUFFERS; b++) {
+ if (key->logic_op != GL_COPY) {
+ blend[b].blend1.logic_op_enable = 1;
+ blend[b].blend1.logic_op_func = intel_translate_logic_op(key->logic_op);
+ } else if (key->color_blend & (1 << b)) {
+ GLenum eqRGB = key->blend_eq_rgb;
+ GLenum eqA = key->blend_eq_a;
+ GLenum srcRGB = key->blend_src_rgb;
+ GLenum dstRGB = key->blend_dst_rgb;
+ GLenum srcA = key->blend_src_a;
+ GLenum dstA = key->blend_dst_a;
+
+ if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
+ srcRGB = dstRGB = GL_ONE;
+ }
+
+ if (eqA == GL_MIN || eqA == GL_MAX) {
+ srcA = dstA = GL_ONE;
+ }
+
+ blend[b].blend0.dest_blend_factor = brw_translate_blend_factor(dstRGB);
+ blend[b].blend0.source_blend_factor = brw_translate_blend_factor(srcRGB);
+ blend[b].blend0.blend_func = brw_translate_blend_equation(eqRGB);
+
+ blend[b].blend0.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
+ blend[b].blend0.ia_source_blend_factor = brw_translate_blend_factor(srcA);
+ blend[b].blend0.ia_blend_func = brw_translate_blend_equation(eqA);
+
+ blend[b].blend0.blend_enable = 1;
+ blend[b].blend0.ia_blend_enable = (srcA != srcRGB ||
+ dstA != dstRGB ||
+ eqA != eqRGB);
}
- blend.blend0.dest_blend_factor = brw_translate_blend_factor(dstRGB);
- blend.blend0.source_blend_factor = brw_translate_blend_factor(srcRGB);
- blend.blend0.blend_func = brw_translate_blend_equation(eqRGB);
-
- blend.blend0.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
- blend.blend0.ia_source_blend_factor = brw_translate_blend_factor(srcA);
- blend.blend0.ia_blend_func = brw_translate_blend_equation(eqA);
+ if (key->alpha_enabled) {
+ blend[b].blend1.alpha_test_enable = 1;
+ blend[b].blend1.alpha_test_func = intel_translate_compare_func(key->alpha_func);
- blend.blend0.blend_enable = 1;
- blend.blend0.ia_blend_enable = (srcA != srcRGB ||
- dstA != dstRGB ||
- eqA != eqRGB);
- }
-
- if (key->alpha_enabled) {
- blend.blend1.alpha_test_enable = 1;
- blend.blend1.alpha_test_func = intel_translate_compare_func(key->alpha_func);
+ }
- }
+ if (key->dither) {
+ blend[b].blend1.dither_enable = 1;
+ blend[b].blend1.y_dither_offset = 0;
+ blend[b].blend1.x_dither_offset = 0;
+ }
- if (key->dither) {
- blend.blend1.dither_enable = 1;
- blend.blend1.y_dither_offset = 0;
- blend.blend1.x_dither_offset = 0;
+ blend[b].blend1.write_disable_r = !key->color_mask[b][0];
+ blend[b].blend1.write_disable_g = !key->color_mask[b][1];
+ blend[b].blend1.write_disable_b = !key->color_mask[b][2];
+ blend[b].blend1.write_disable_a = !key->color_mask[b][3];
}
bo = brw_upload_cache(&brw->cache, BRW_BLEND_STATE,
@@ -172,7 +184,7 @@ const struct brw_tracked_state gen6_blend_state = {
};
struct gen6_color_calc_state_key {
- GLubyte blend_constant_color[4];
+ float blend_constant_color[4];
GLclampf alpha_ref;
GLubyte stencil_ref[2];
};
@@ -266,7 +278,7 @@ static void upload_cc_state_pointers(struct brw_context *brw)
struct intel_context *intel = &brw->intel;
BEGIN_BATCH(4);
- OUT_BATCH(CMD_3D_CC_STATE_POINTERS << 16 | (4 - 2));
+ OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2));
OUT_RELOC(brw->cc.blend_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
OUT_RELOC(brw->cc.depth_stencil_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c
index c65b41e2b6..38c98f30ef 100644
--- a/src/mesa/drivers/dri/i965/gen6_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c
@@ -43,7 +43,10 @@ upload_clip_state(struct brw_context *brw)
depth_clamp = GEN6_CLIP_Z_TEST;
if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) {
- provoking = 0;
+ provoking =
+ (0 << GEN6_CLIP_TRI_PROVOKE_SHIFT) |
+ (1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) |
+ (0 << GEN6_CLIP_LINE_PROVOKE_SHIFT);
} else {
provoking =
(2 << GEN6_CLIP_TRI_PROVOKE_SHIFT) |
@@ -55,7 +58,7 @@ upload_clip_state(struct brw_context *brw)
userclip = (1 << brw_count_bits(ctx->Transform.ClipPlanesEnabled)) - 1;
BEGIN_BATCH(4);
- OUT_BATCH(CMD_3D_CLIP_STATE << 16 | (4 - 2));
+ OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
OUT_BATCH(GEN6_CLIP_STATISTICS_ENABLE);
OUT_BATCH(GEN6_CLIP_ENABLE |
GEN6_CLIP_API_OGL |
@@ -64,7 +67,9 @@ upload_clip_state(struct brw_context *brw)
userclip << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT |
depth_clamp |
provoking);
- OUT_BATCH(GEN6_CLIP_FORCE_ZERO_RTAINDEX);
+ OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
+ U_FIXED(225.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
+ GEN6_CLIP_FORCE_ZERO_RTAINDEX);
ADVANCE_BATCH();
}
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c
index 6127b9197a..7296c7cd1b 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c
@@ -37,7 +37,7 @@ upload_gs_state(struct brw_context *brw)
/* Disable all the constant buffers. */
BEGIN_BATCH(5);
- OUT_BATCH(CMD_3D_CONSTANT_GS_STATE << 16 | (5 - 2));
+ OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
@@ -46,7 +46,7 @@ upload_gs_state(struct brw_context *brw)
if (brw->gs.prog_bo) {
BEGIN_BATCH(7);
- OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2));
+ OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
OUT_RELOC(brw->gs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
OUT_BATCH(GEN6_GS_SPF_MODE |
(0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
@@ -62,7 +62,7 @@ upload_gs_state(struct brw_context *brw)
ADVANCE_BATCH();
} else {
BEGIN_BATCH(7);
- OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2));
+ OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
OUT_BATCH(0); /* prog_bo */
OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
(0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
diff --git a/src/mesa/drivers/dri/i965/gen6_sampler_state.c b/src/mesa/drivers/dri/i965/gen6_sampler_state.c
index fc5d391c3c..f65c651bdf 100644
--- a/src/mesa/drivers/dri/i965/gen6_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sampler_state.c
@@ -36,7 +36,7 @@ upload_sampler_state_pointers(struct brw_context *brw)
struct intel_context *intel = &brw->intel;
BEGIN_BATCH(4);
- OUT_BATCH(CMD_3D_SAMPLER_STATE_POINTERS << 16 |
+ OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS << 16 |
VS_SAMPLER_STATE_CHANGE |
GS_SAMPLER_STATE_CHANGE |
PS_SAMPLER_STATE_CHANGE |
diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c
index b57126c793..12b65826ae 100644
--- a/src/mesa/drivers/dri/i965/gen6_scissor_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c
@@ -92,7 +92,7 @@ static void upload_scissor_state_pointers(struct brw_context *brw)
struct intel_context *intel = &brw->intel;
BEGIN_BATCH(2);
- OUT_BATCH(CMD_3D_SCISSOR_STATE_POINTERS << 16 | (2 - 2));
+ OUT_BATCH(_3DSTATE_SCISSOR_STATE_POINTERS << 16 | (2 - 2));
OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
ADVANCE_BATCH();
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index 471067e8f0..f27782935d 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -33,9 +33,10 @@
#include "intel_batchbuffer.h"
static uint32_t
-get_attr_override(struct brw_context *brw, int fs_attr)
+get_attr_override(struct brw_context *brw, int fs_attr, int two_side_color)
{
int attr_index = 0, i, vs_attr;
+ int bfc = 0;
if (fs_attr <= FRAG_ATTRIB_TEX7)
vs_attr = fs_attr;
@@ -57,6 +58,30 @@ get_attr_override(struct brw_context *brw, int fs_attr)
attr_index++;
}
+ assert(attr_index < 32);
+
+ if (two_side_color) {
+ if ((brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL1)) &&
+ (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC1))) {
+ assert(brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0));
+ assert(brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0));
+ bfc = 2;
+ } else if ((brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)) &&
+ (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0)))
+ bfc = 1;
+ }
+
+ if (bfc && (fs_attr <= FRAG_ATTRIB_TEX7 && fs_attr > FRAG_ATTRIB_WPOS)) {
+ if (fs_attr == FRAG_ATTRIB_COL0)
+ attr_index |= (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT);
+ else if (fs_attr == FRAG_ATTRIB_COL1 && bfc == 2) {
+ attr_index++;
+ attr_index |= (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT);
+ } else {
+ attr_index += bfc;
+ }
+ }
+
return attr_index;
}
@@ -67,13 +92,15 @@ upload_sf_state(struct brw_context *brw)
struct gl_context *ctx = &intel->ctx;
/* CACHE_NEW_VS_PROG */
uint32_t num_inputs = brw_count_bits(brw->vs.prog_data->outputs_written);
+ /* BRW_NEW_FRAGMENT_PROGRAM */
uint32_t num_outputs = brw_count_bits(brw->fragment_program->Base.InputsRead);
- uint32_t dw1, dw2, dw3, dw4, dw16;
+ uint32_t dw1, dw2, dw3, dw4, dw16, dw17;
int i;
/* _NEW_BUFFER */
GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
int attr = 0;
int urb_start;
+ int two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
/* _NEW_TRANSFORM */
if (ctx->Transform.ClipPlanesEnabled)
@@ -91,6 +118,7 @@ upload_sf_state(struct brw_context *brw)
dw3 = 0;
dw4 = 0;
dw16 = 0;
+ dw17 = 0;
/* _NEW_POLYGON */
if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo)
@@ -99,6 +127,48 @@ upload_sf_state(struct brw_context *brw)
if (ctx->Polygon.OffsetFill)
dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
+ if (ctx->Polygon.OffsetLine)
+ dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
+
+ if (ctx->Polygon.OffsetPoint)
+ dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
+
+ switch (ctx->Polygon.FrontMode) {
+ case GL_FILL:
+ dw2 |= GEN6_SF_FRONT_SOLID;
+ break;
+
+ case GL_LINE:
+ dw2 |= GEN6_SF_FRONT_WIREFRAME;
+ break;
+
+ case GL_POINT:
+ dw2 |= GEN6_SF_FRONT_POINT;
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (ctx->Polygon.BackMode) {
+ case GL_FILL:
+ dw2 |= GEN6_SF_BACK_SOLID;
+ break;
+
+ case GL_LINE:
+ dw2 |= GEN6_SF_BACK_WIREFRAME;
+ break;
+
+ case GL_POINT:
+ dw2 |= GEN6_SF_BACK_POINT;
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
/* _NEW_SCISSOR */
if (ctx->Scissor.Enabled)
dw3 |= GEN6_SF_SCISSOR_ENABLE;
@@ -160,8 +230,14 @@ upload_sf_state(struct brw_context *brw)
}
}
+ /* flat shading */
+ if (ctx->Light.ShadeModel == GL_FLAT) {
+ dw17 |= ((brw->fragment_program->Base.InputsRead & (FRAG_BIT_COL0 | FRAG_BIT_COL1)) >>
+ ((brw->fragment_program->Base.InputsRead & FRAG_BIT_WPOS) ? 0 : 1));
+ }
+
BEGIN_BATCH(20);
- OUT_BATCH(CMD_3D_SF_STATE << 16 | (20 - 2));
+ OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2));
OUT_BATCH(dw1);
OUT_BATCH(dw2);
OUT_BATCH(dw3);
@@ -174,7 +250,7 @@ upload_sf_state(struct brw_context *brw)
for (; attr < 64; attr++) {
if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) {
- attr_overrides |= get_attr_override(brw, attr);
+ attr_overrides |= get_attr_override(brw, attr, two_side_color);
attr++;
break;
}
@@ -182,7 +258,7 @@ upload_sf_state(struct brw_context *brw)
for (; attr < 64; attr++) {
if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) {
- attr_overrides |= get_attr_override(brw, attr) << 16;
+ attr_overrides |= get_attr_override(brw, attr, two_side_color) << 16;
attr++;
break;
}
@@ -190,7 +266,7 @@ upload_sf_state(struct brw_context *brw)
OUT_BATCH(attr_overrides);
}
OUT_BATCH(dw16); /* point sprite texcoord bitmask */
- OUT_BATCH(0); /* constant interp bitmask */
+ OUT_BATCH(dw17); /* constant interp bitmask */
OUT_BATCH(0); /* wrapshortest enables 0-7 */
OUT_BATCH(0); /* wrapshortest enables 8-15 */
ADVANCE_BATCH();
@@ -205,7 +281,8 @@ const struct brw_tracked_state gen6_sf_state = {
_NEW_BUFFERS |
_NEW_POINT |
_NEW_TRANSFORM),
- .brw = BRW_NEW_CONTEXT,
+ .brw = (BRW_NEW_CONTEXT |
+ BRW_NEW_FRAGMENT_PROGRAM),
.cache = CACHE_NEW_VS_PROG
},
.emit = upload_sf_state,
diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c
index a34123478f..fc46c4cb79 100644
--- a/src/mesa/drivers/dri/i965/gen6_urb.c
+++ b/src/mesa/drivers/dri/i965/gen6_urb.c
@@ -60,7 +60,7 @@ upload_urb(struct brw_context *brw)
assert(!brw->gs.prog_bo || brw->urb.vs_size < 5);
BEGIN_BATCH(3);
- OUT_BATCH(CMD_URB << 16 | (3 - 2));
+ OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2));
OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) |
((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT));
OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) |
@@ -72,7 +72,7 @@ const struct brw_tracked_state gen6_urb = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_CONTEXT,
- .cache = CACHE_NEW_VS_PROG,
+ .cache = (CACHE_NEW_VS_PROG | CACHE_NEW_GS_PROG),
},
.prepare = prepare_urb,
.emit = upload_urb,
diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c
index d691bbebc8..cd7d209e3e 100644
--- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c
@@ -117,7 +117,7 @@ static void upload_viewport_state_pointers(struct brw_context *brw)
struct intel_context *intel = &brw->intel;
BEGIN_BATCH(4);
- OUT_BATCH(CMD_VIEWPORT_STATE_POINTERS << 16 | (4 - 2) |
+ OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS << 16 | (4 - 2) |
GEN6_CC_VIEWPORT_MODIFY |
GEN6_SF_VIEWPORT_MODIFY |
GEN6_CLIP_VIEWPORT_MODIFY);
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index e94d0c0ddb..e68c0ac261 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -47,14 +47,14 @@ upload_vs_state(struct brw_context *brw)
if (brw->vs.prog_data->nr_params == 0 && !ctx->Transform.ClipPlanesEnabled) {
/* Disable the push constant buffers. */
BEGIN_BATCH(5);
- OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | (5 - 2));
+ OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
} else {
- int params_uploaded = 0;
+ int params_uploaded = 0, param_regs;
float *param;
if (brw->vertex_program->IsNVProgram)
@@ -88,20 +88,11 @@ upload_vs_state(struct brw_context *brw)
params_uploaded++;
}
- if (vp->use_const_buffer) {
- for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
- if (brw->vs.constant_map[i] != -1) {
- memcpy(param + brw->vs.constant_map[i] * 4,
- vp->program.Base.Parameters->ParameterValues[i],
- 4 * sizeof(float));
- params_uploaded++;
- }
- }
- } else {
- for (i = 0; i < nr_params; i++) {
- memcpy(param, vp->program.Base.Parameters->ParameterValues[i],
+ for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+ if (brw->vs.constant_map[i] != -1) {
+ memcpy(param + brw->vs.constant_map[i] * 4,
+ vp->program.Base.Parameters->ParameterValues[i],
4 * sizeof(float));
- param += 4;
params_uploaded++;
}
}
@@ -117,13 +108,16 @@ upload_vs_state(struct brw_context *brw)
drm_intel_gem_bo_unmap_gtt(constant_bo);
+ param_regs = (params_uploaded + 1) / 2;
+ assert(param_regs <= 32);
+
BEGIN_BATCH(5);
- OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 |
+ OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 |
GEN6_CONSTANT_BUFFER_0_ENABLE |
(5 - 2));
OUT_RELOC(constant_bo,
I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
- ALIGN(params_uploaded, 2) / 2 - 1);
+ param_regs - 1);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
@@ -133,9 +127,10 @@ upload_vs_state(struct brw_context *brw)
}
BEGIN_BATCH(6);
- OUT_BATCH(CMD_3D_VS_STATE << 16 | (6 - 2));
+ OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
+ GEN6_VS_FLOATING_POINT_MODE_ALT |
(brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
OUT_BATCH(0); /* scratch space base offset */
OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index ea5418bacf..78901ecac5 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -66,6 +66,21 @@ prepare_wm_constants(struct brw_context *brw)
constants[i] = convert_param(brw->wm.prog_data->param_convert[i],
*brw->wm.prog_data->param[i]);
}
+
+ if (0) {
+ printf("WM constants:\n");
+ for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
+ if ((i & 7) == 0)
+ printf("g%d: ", brw->wm.prog_data->first_curbe_grf + i / 8);
+ printf("%8f ", constants[i]);
+ if ((i & 7) == 7)
+ printf("\n");
+ }
+ if ((i & 7) != 0)
+ printf("\n");
+ printf("\n");
+ }
+
drm_intel_gem_bo_unmap_gtt(brw->wm.push_const_bo);
}
}
@@ -88,10 +103,11 @@ upload_wm_state(struct brw_context *brw)
brw_fragment_program_const(brw->fragment_program);
uint32_t dw2, dw4, dw5, dw6;
+ /* CACHE_NEW_WM_PROG */
if (brw->wm.prog_data->nr_params == 0) {
/* Disable the push constant buffers. */
BEGIN_BATCH(5);
- OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 | (5 - 2));
+ OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
@@ -99,12 +115,13 @@ upload_wm_state(struct brw_context *brw)
ADVANCE_BATCH();
} else {
BEGIN_BATCH(5);
- OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 |
+ OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 |
GEN6_CONSTANT_BUFFER_0_ENABLE |
(5 - 2));
OUT_RELOC(brw->wm.push_const_bo,
I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
- ALIGN(brw->wm.prog_data->nr_params, 8) / 8 - 1);
+ ALIGN(brw->wm.prog_data->nr_params,
+ brw->wm.prog_data->dispatch_width) / 8 - 1);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
@@ -116,6 +133,9 @@ upload_wm_state(struct brw_context *brw)
dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0;
dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5;
+ /* OpenGL non-ieee floating point mode */
+ dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT;
+
/* BRW_NEW_NR_WM_SURFACES */
dw2 |= brw->wm.nr_surfaces << GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT;
@@ -126,8 +146,8 @@ upload_wm_state(struct brw_context *brw)
dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT;
- /* BRW_NEW_FRAGMENT_PROGRAM */
- if (fp->isGLSL)
+ /* CACHE_NEW_WM_PROG */
+ if (brw->wm.prog_data->dispatch_width == 8)
dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
else
dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
@@ -161,7 +181,7 @@ upload_wm_state(struct brw_context *brw)
GEN6_WM_NUM_SF_OUTPUTS_SHIFT;
BEGIN_BATCH(9);
- OUT_BATCH(CMD_3D_WM_STATE << 16 | (9 - 2));
+ OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
OUT_BATCH(dw2);
OUT_BATCH(0); /* scratch space base offset */
@@ -176,13 +196,14 @@ upload_wm_state(struct brw_context *brw)
const struct brw_tracked_state gen6_wm_state = {
.dirty = {
.mesa = (_NEW_LINE | _NEW_POLYGONSTIPPLE | _NEW_COLOR | _NEW_BUFFERS |
- _NEW_PROGRAM_CONSTANTS),
+ _NEW_PROGRAM_CONSTANTS | _NEW_POLYGON),
.brw = (BRW_NEW_CURBE_OFFSETS |
BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_NR_WM_SURFACES |
BRW_NEW_URB_FENCE |
BRW_NEW_BATCH),
- .cache = CACHE_NEW_SAMPLER
+ .cache = (CACHE_NEW_SAMPLER |
+ CACHE_NEW_WM_PROG)
},
.emit = upload_wm_state,
};
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index 4b498f8c5b..67ce8a4da0 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -92,9 +92,17 @@ do_flush_locked(struct intel_batchbuffer *batch, GLuint used)
batch->ptr = NULL;
- if (!intel->no_hw) {
- drm_intel_bo_exec(batch->buf, used, NULL, 0,
- (x_off & 0xffff) | (y_off << 16));
+ if (!intel->intelScreen->no_hw) {
+ int ring;
+
+ if (intel->gen < 6 || !intel->batch->is_blit) {
+ ring = I915_EXEC_RENDER;
+ } else {
+ ring = I915_EXEC_BLT;
+ }
+
+ drm_intel_bo_mrb_exec(batch->buf, used, NULL, 0,
+ (x_off & 0xffff) | (y_off << 16), ring);
}
if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) {
@@ -242,10 +250,10 @@ intel_batchbuffer_emit_reloc_fenced(struct intel_batchbuffer *batch,
void
intel_batchbuffer_data(struct intel_batchbuffer *batch,
- const void *data, GLuint bytes)
+ const void *data, GLuint bytes, bool is_blit)
{
assert((bytes & 3) == 0);
- intel_batchbuffer_require_space(batch, bytes);
+ intel_batchbuffer_require_space(batch, bytes, is_blit);
__memcpy(batch->ptr, data, bytes);
batch->ptr += bytes;
}
@@ -262,22 +270,32 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
struct intel_context *intel = batch->intel;
if (intel->gen >= 6) {
- BEGIN_BATCH(8);
-
- /* XXX workaround: issue any post sync != 0 before write cache flush = 1 */
- OUT_BATCH(_3DSTATE_PIPE_CONTROL);
- OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
- OUT_BATCH(0); /* write address */
- OUT_BATCH(0); /* write data */
-
- OUT_BATCH(_3DSTATE_PIPE_CONTROL);
- OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
- PIPE_CONTROL_WRITE_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_NO_WRITE);
- OUT_BATCH(0); /* write address */
- OUT_BATCH(0); /* write data */
- ADVANCE_BATCH();
+ if (intel->batch->is_blit) {
+ BEGIN_BATCH_BLT(4);
+ OUT_BATCH(MI_FLUSH_DW);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ } else {
+ BEGIN_BATCH(8);
+ /* XXX workaround: issue any post sync != 0 before write
+ * cache flush = 1
+ */
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+ OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
+ OUT_BATCH(0); /* write address */
+ OUT_BATCH(0); /* write data */
+
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+ OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
+ PIPE_CONTROL_WRITE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_NO_WRITE);
+ OUT_BATCH(0); /* write address */
+ OUT_BATCH(0); /* write data */
+ ADVANCE_BATCH();
+ }
} else if (intel->gen >= 4) {
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_PIPE_CONTROL |
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
index 428c027c2f..635708587a 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
@@ -31,6 +31,7 @@ struct intel_batchbuffer
} emit;
#endif
+ bool is_blit;
GLuint dirty_state;
GLuint reserved_space;
};
@@ -55,7 +56,7 @@ void intel_batchbuffer_reset(struct intel_batchbuffer *batch);
* intel_buffer_dword() calls.
*/
void intel_batchbuffer_data(struct intel_batchbuffer *batch,
- const void *data, GLuint bytes);
+ const void *data, GLuint bytes, bool is_blit);
void intel_batchbuffer_release_space(struct intel_batchbuffer *batch,
GLuint bytes);
@@ -114,8 +115,16 @@ intel_batchbuffer_emit_float(struct intel_batchbuffer *batch, float f)
static INLINE void
intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
- GLuint sz)
+ GLuint sz, int is_blit)
{
+
+ if (batch->intel->gen >= 6 && batch->is_blit != is_blit &&
+ batch->ptr != batch->map) {
+ intel_batchbuffer_flush(batch);
+ }
+
+ batch->is_blit = is_blit;
+
#ifdef DEBUG
assert(sz < batch->size - 8);
#endif
@@ -124,9 +133,10 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
}
static INLINE void
-intel_batchbuffer_begin(struct intel_batchbuffer *batch, int n)
+intel_batchbuffer_begin(struct intel_batchbuffer *batch, int n, bool is_blit)
{
- intel_batchbuffer_require_space(batch, n * 4);
+ intel_batchbuffer_require_space(batch, n * 4, is_blit);
+
#ifdef DEBUG
assert(batch->map);
assert(batch->emit.start_ptr == NULL);
@@ -154,7 +164,8 @@ intel_batchbuffer_advance(struct intel_batchbuffer *batch)
*/
#define BATCH_LOCALS
-#define BEGIN_BATCH(n) intel_batchbuffer_begin(intel->batch, n)
+#define BEGIN_BATCH(n) intel_batchbuffer_begin(intel->batch, n, false)
+#define BEGIN_BATCH_BLT(n) intel_batchbuffer_begin(intel->batch, n, true)
#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d)
#define OUT_BATCH_F(f) intel_batchbuffer_emit_float(intel->batch,f)
#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \
diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c
index c2917e9b07..6232e479cb 100644
--- a/src/mesa/drivers/dri/intel/intel_blit.c
+++ b/src/mesa/drivers/dri/intel/intel_blit.c
@@ -38,6 +38,7 @@
#include "intel_reg.h"
#include "intel_regions.h"
#include "intel_batchbuffer.h"
+#include "intel_mipmap_tree.h"
#define FILE_DEBUG_FLAG DEBUG_BLIT
@@ -107,10 +108,6 @@ intelEmitCopyBlit(struct intel_context *intel,
drm_intel_bo *aper_array[3];
BATCH_LOCALS;
- /* Blits are in a different ringbuffer so we don't use them. */
- if (intel->gen >= 6)
- return GL_FALSE;
-
if (dst_tiling != I915_TILING_NONE) {
if (dst_offset & 4095)
return GL_FALSE;
@@ -140,7 +137,7 @@ intelEmitCopyBlit(struct intel_context *intel,
if (pass >= 2)
return GL_FALSE;
- intel_batchbuffer_require_space(intel->batch, 8 * 4);
+ intel_batchbuffer_require_space(intel->batch, 8 * 4, true);
DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
__FUNCTION__,
src_buffer, src_pitch, src_offset, src_x, src_y,
@@ -181,7 +178,7 @@ intelEmitCopyBlit(struct intel_context *intel,
assert(dst_x < dst_x2);
assert(dst_y < dst_y2);
- BEGIN_BATCH(8);
+ BEGIN_BATCH_BLT(8);
OUT_BATCH(CMD);
OUT_BATCH(BR13 | (uint16_t)dst_pitch);
OUT_BATCH((dst_y << 16) | dst_x);
@@ -209,7 +206,7 @@ intelEmitCopyBlit(struct intel_context *intel,
* which we're clearing with triangles.
* \param mask bitmask of BUFFER_BIT_* values indicating buffers to clear
*/
-void
+GLbitfield
intelClearWithBlit(struct gl_context *ctx, GLbitfield mask)
{
struct intel_context *intel = intel_context(ctx);
@@ -217,11 +214,9 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask)
GLuint clear_depth;
GLboolean all;
GLint cx, cy, cw, ch;
+ GLbitfield fail_mask = 0;
BATCH_LOCALS;
- /* Blits are in a different ringbuffer so we don't use them. */
- assert(intel->gen < 6);
-
/*
* Compute values for clearing the buffers.
*/
@@ -242,7 +237,7 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask)
ch = fb->_Ymax - fb->_Ymin;
if (cw == 0 || ch == 0)
- return;
+ return 0;
GLuint buf;
all = (cw == fb->Width && ch == fb->Height);
@@ -338,9 +333,9 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask)
clear[3], clear[3]);
break;
default:
- _mesa_problem(ctx, "Unexpected renderbuffer format: %d\n",
- irb->Base.Format);
- clear_val = 0;
+ fail_mask |= bufBit;
+ mask &= ~bufBit;
+ continue;
}
}
@@ -356,7 +351,7 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask)
intel_batchbuffer_flush(intel->batch);
}
- BEGIN_BATCH(6);
+ BEGIN_BATCH_BLT(6);
OUT_BATCH(CMD);
OUT_BATCH(BR13);
OUT_BATCH((y1 << 16) | x1);
@@ -375,6 +370,8 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask)
else
mask &= ~bufBit; /* turn off bit, for faster loop exit */
}
+
+ return fail_mask;
}
GLboolean
@@ -393,10 +390,6 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
int dwords = ALIGN(src_size, 8) / 4;
uint32_t opcode, br13, blit_cmd;
- /* Blits are in a different ringbuffer so we don't use them. */
- if (intel->gen >= 6)
- return GL_FALSE;
-
if (dst_tiling != I915_TILING_NONE) {
if (dst_offset & 4095)
return GL_FALSE;
@@ -420,7 +413,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
intel_batchbuffer_require_space( intel->batch,
(8 * 4) +
(3 * 4) +
- dwords * 4 );
+ dwords * 4, true);
opcode = XY_SETUP_BLT_CMD;
if (cpp == 4)
@@ -439,7 +432,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
if (dst_tiling != I915_TILING_NONE)
blit_cmd |= XY_DST_TILED;
- BEGIN_BATCH(8 + 3);
+ BEGIN_BATCH_BLT(8 + 3);
OUT_BATCH(opcode);
OUT_BATCH(br13);
OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
@@ -456,9 +449,9 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
OUT_BATCH(((y + h) << 16) | (x + w));
ADVANCE_BATCH();
- intel_batchbuffer_data( intel->batch,
- src_bits,
- dwords * 4 );
+ intel_batchbuffer_data(intel->batch,
+ src_bits,
+ dwords * 4, true);
intel_batchbuffer_emit_mi_flush(intel->batch);
@@ -480,9 +473,6 @@ intel_emit_linear_blit(struct intel_context *intel,
GLuint pitch, height;
GLboolean ok;
- /* Blits are in a different ringbuffer so we don't use them. */
- assert(intel->gen < 6);
-
/* The pitch given to the GPU must be DWORD aligned, and
* we want width to match pitch. Max width is (1 << 15 - 1),
* rounding that down to the nearest DWORD is 1 << 15 - 4
@@ -514,3 +504,81 @@ intel_emit_linear_blit(struct intel_context *intel,
assert(ok);
}
}
+
+/**
+ * Used to initialize the alpha value of an ARGB8888 teximage after
+ * loading it from an XRGB8888 source.
+ *
+ * This is very common with glCopyTexImage2D().
+ */
+void
+intel_set_teximage_alpha_to_one(struct gl_context *ctx,
+ struct intel_texture_image *intel_image)
+{
+ struct intel_context *intel = intel_context(ctx);
+ unsigned int image_x, image_y;
+ uint32_t x1, y1, x2, y2;
+ uint32_t BR13, CMD;
+ int pitch, cpp;
+ drm_intel_bo *aper_array[2];
+ struct intel_region *region = intel_image->mt->region;
+ BATCH_LOCALS;
+
+ assert(intel_image->base.TexFormat == MESA_FORMAT_ARGB8888);
+
+ /* get dest x/y in destination texture */
+ intel_miptree_get_image_offset(intel_image->mt,
+ intel_image->level,
+ intel_image->face,
+ 0,
+ &image_x, &image_y);
+
+ x1 = image_x;
+ y1 = image_y;
+ x2 = image_x + intel_image->base.Width;
+ y2 = image_y + intel_image->base.Height;
+
+ pitch = region->pitch;
+ cpp = region->cpp;
+
+ DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
+ __FUNCTION__,
+ intel_image->mt->region->buffer, (pitch * region->cpp),
+ x1, y1, x2 - x1, y2 - y1);
+
+ BR13 = br13_for_cpp(region->cpp) | 0xf0 << 16;
+ CMD = XY_COLOR_BLT_CMD;
+ CMD |= XY_BLT_WRITE_ALPHA;
+
+ assert(region->tiling != I915_TILING_Y);
+
+#ifndef I915
+ if (region->tiling != I915_TILING_NONE) {
+ CMD |= XY_DST_TILED;
+ pitch /= 4;
+ }
+#endif
+ BR13 |= (pitch * region->cpp);
+
+ /* do space check before going any further */
+ aper_array[0] = intel->batch->buf;
+ aper_array[1] = region->buffer;
+
+ if (drm_intel_bufmgr_check_aperture_space(aper_array,
+ ARRAY_SIZE(aper_array)) != 0) {
+ intel_batchbuffer_flush(intel->batch);
+ }
+
+ BEGIN_BATCH_BLT(6);
+ OUT_BATCH(CMD);
+ OUT_BATCH(BR13);
+ OUT_BATCH((y1 << 16) | x1);
+ OUT_BATCH((y2 << 16) | x2);
+ OUT_RELOC_FENCED(region->buffer,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ 0);
+ OUT_BATCH(0xffffffff); /* white, but only alpha gets written */
+ ADVANCE_BATCH();
+
+ intel_batchbuffer_emit_mi_flush(intel->batch);
+}
diff --git a/src/mesa/drivers/dri/intel/intel_blit.h b/src/mesa/drivers/dri/intel/intel_blit.h
index 0163146573..88322c7b49 100644
--- a/src/mesa/drivers/dri/intel/intel_blit.h
+++ b/src/mesa/drivers/dri/intel/intel_blit.h
@@ -33,7 +33,7 @@
extern void intelCopyBuffer(const __DRIdrawable * dpriv,
const drm_clip_rect_t * rect);
-extern void intelClearWithBlit(struct gl_context * ctx, GLbitfield mask);
+extern GLbitfield intelClearWithBlit(struct gl_context * ctx, GLbitfield mask);
GLboolean
intelEmitCopyBlit(struct intel_context *intel,
@@ -69,5 +69,7 @@ void intel_emit_linear_blit(struct intel_context *intel,
drm_intel_bo *src_bo,
unsigned int src_offset,
unsigned int size);
+void intel_set_teximage_alpha_to_one(struct gl_context *ctx,
+ struct intel_texture_image *intel_image);
#endif
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 87da60a771..d917161c4b 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -27,6 +27,7 @@
#include "main/imports.h"
+#include "main/mfeatures.h"
#include "main/mtypes.h"
#include "main/macros.h"
#include "main/bufferobj.h"
diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c
index fa451f0045..82d29e7671 100644
--- a/src/mesa/drivers/dri/intel/intel_clear.c
+++ b/src/mesa/drivers/dri/intel/intel_clear.c
@@ -85,6 +85,8 @@ intelClear(struct gl_context *ctx, GLbitfield mask)
GLbitfield blit_mask = 0;
GLbitfield swrast_mask = 0;
struct gl_framebuffer *fb = ctx->DrawBuffer;
+ struct intel_renderbuffer *irb;
+ int i;
if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
intel->front_buffer_dirty = GL_TRUE;
@@ -93,6 +95,22 @@ intelClear(struct gl_context *ctx, GLbitfield mask)
if (0)
fprintf(stderr, "%s\n", __FUNCTION__);
+ /* Get SW clears out of the way: Anything without an intel_renderbuffer */
+ for (i = 0; i < BUFFER_COUNT; i++) {
+ if (!(mask & (1 << i)))
+ continue;
+
+ irb = intel_get_renderbuffer(fb, i);
+ if (unlikely(!irb)) {
+ swrast_mask |= (1 << i);
+ mask &= ~(1 << i);
+ }
+ }
+ if (unlikely(swrast_mask)) {
+ debug_mask("swrast", swrast_mask);
+ _swrast_Clear(ctx, swrast_mask);
+ }
+
/* HW color buffers (front, back, aux, generic FBO, etc) */
if (colorMask == ~0) {
/* clear all R,G,B,A */
@@ -151,44 +169,18 @@ intelClear(struct gl_context *ctx, GLbitfield mask)
}
}
- if (intel->gen >= 6) {
- /* Blits are in a different ringbuffer so we don't use them. */
- tri_mask |= blit_mask;
- blit_mask = 0;
- }
-
- /* SW fallback clearing */
- swrast_mask = mask & ~tri_mask & ~blit_mask;
-
- {
- /* look for non-Intel renderbuffers (clear them with swrast) */
- GLbitfield blit_or_tri = blit_mask | tri_mask;
- while (blit_or_tri) {
- GLuint i = _mesa_ffs(blit_or_tri) - 1;
- GLbitfield bufBit = 1 << i;
- if (!fb->Attachment[i].Renderbuffer->ClassID) {
- blit_mask &= ~bufBit;
- tri_mask &= ~bufBit;
- swrast_mask |= bufBit;
- }
- blit_or_tri ^= bufBit;
- }
- }
+ /* Anything left, just use tris */
+ tri_mask |= mask & ~blit_mask;
if (blit_mask) {
debug_mask("blit", blit_mask);
- intelClearWithBlit(ctx, blit_mask);
+ tri_mask |= intelClearWithBlit(ctx, blit_mask);
}
if (tri_mask) {
debug_mask("tri", tri_mask);
_mesa_meta_Clear(&intel->ctx, tri_mask);
}
-
- if (swrast_mask) {
- debug_mask("swrast", swrast_mask);
- _swrast_Clear(ctx, swrast_mask);
- }
}
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 152cdcaf37..2a5029964b 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -519,7 +519,6 @@ static const struct dri_debug_control debug_control[] = {
{ "sing", DEBUG_SINGLE_THREAD },
{ "thre", DEBUG_SINGLE_THREAD },
{ "wm", DEBUG_WM },
- { "glsl_force", DEBUG_GLSL_FORCE },
{ "urb", DEBUG_URB },
{ "vs", DEBUG_VS },
{ "clip", DEBUG_CLIP },
@@ -566,7 +565,8 @@ intel_glFlush(struct gl_context *ctx)
intel_flush(ctx);
intel_flush_front(ctx);
- intel->need_throttle = GL_TRUE;
+ if (intel->is_front_buffer_rendering)
+ intel->need_throttle = GL_TRUE;
}
void
@@ -683,6 +683,69 @@ intelInitContext(struct intel_context *intel,
}
}
+ memset(&ctx->TextureFormatSupported, 0,
+ sizeof(ctx->TextureFormatSupported));
+ ctx->TextureFormatSupported[MESA_FORMAT_ARGB8888] = GL_TRUE;
+ if (intel->has_xrgb_textures)
+ ctx->TextureFormatSupported[MESA_FORMAT_XRGB8888] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_ARGB4444] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_ARGB1555] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_RGB565] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_L8] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_A8] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_I8] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_AL88] = GL_TRUE;
+ if (intel->gen >= 4)
+ ctx->TextureFormatSupported[MESA_FORMAT_AL1616] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_S8_Z24] = GL_TRUE;
+ /*
+ * This was disabled in initial FBO enabling to avoid combinations
+ * of depth+stencil that wouldn't work together. We since decided
+ * that it was OK, since it's up to the app to come up with the
+ * combo that actually works, so this can probably be re-enabled.
+ */
+ /*
+ ctx->TextureFormatSupported[MESA_FORMAT_Z16] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_Z24] = GL_TRUE;
+ */
+
+ /* ctx->Extensions.MESA_ycbcr_texture */
+ ctx->TextureFormatSupported[MESA_FORMAT_YCBCR] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_YCBCR_REV] = GL_TRUE;
+
+ /* GL_3DFX_texture_compression_FXT1 */
+ ctx->TextureFormatSupported[MESA_FORMAT_RGB_FXT1] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_RGBA_FXT1] = GL_TRUE;
+
+ /* GL_EXT_texture_compression_s3tc */
+ ctx->TextureFormatSupported[MESA_FORMAT_RGB_DXT1] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_RGBA_DXT1] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_RGBA_DXT3] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_RGBA_DXT5] = GL_TRUE;
+
+#ifndef I915
+ /* GL_ARB_texture_rg */
+ ctx->TextureFormatSupported[MESA_FORMAT_R8] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_R16] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_RG88] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_RG1616] = GL_TRUE;
+
+ ctx->TextureFormatSupported[MESA_FORMAT_DUDV8] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_SIGNED_RGBA8888_REV] = GL_TRUE;
+
+ /* GL_EXT_texture_sRGB */
+ ctx->TextureFormatSupported[MESA_FORMAT_SARGB8] = GL_TRUE;
+ if (intel->gen >= 5 || intel->is_g4x)
+ ctx->TextureFormatSupported[MESA_FORMAT_SRGB_DXT1] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_SRGBA_DXT1] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_SRGBA_DXT3] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_SRGBA_DXT5] = GL_TRUE;
+ if (intel->has_luminance_srgb) {
+ ctx->TextureFormatSupported[MESA_FORMAT_SL8] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_SLA8] = GL_TRUE;
+ }
+#endif
+
driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache,
sPriv->myNum, (intel->gen >= 4) ? "i965" : "i915");
if (intelScreen->deviceID == PCI_CHIP_I865_G)
@@ -800,11 +863,6 @@ intelInitContext(struct intel_context *intel,
if (INTEL_DEBUG & DEBUG_BUFMGR)
dri_bufmgr_set_debug(intel->bufmgr, GL_TRUE);
- /* XXX force SIMD8 kernel for Sandybridge before we fixed
- SIMD16 interpolation. */
- if (intel->gen == 6)
- INTEL_DEBUG |= DEBUG_GLSL_FORCE;
-
intel->batch = intel_batchbuffer_alloc(intel);
intel_fbo_init(intel);
@@ -838,11 +896,6 @@ intelInitContext(struct intel_context *intel,
intel->always_flush_cache = 1;
}
- /* Disable all hardware rendering (skip emitting batches and fences/waits
- * to the kernel)
- */
- intel->no_hw = getenv("INTEL_NO_HW") != NULL;
-
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index 9d5139c000..fd3c3ba58f 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -29,7 +29,7 @@
#define INTELCONTEXT_INC
-
+#include <stdbool.h>
#include "main/mtypes.h"
#include "main/mm.h"
#include "dri_metaops.h"
@@ -149,6 +149,7 @@ struct intel_context
void (*assert_not_dirty) (struct intel_context *intel);
void (*debug_batch)(struct intel_context *intel);
+ bool (*render_target_supported)(gl_format format);
} vtbl;
struct dri_metaops meta;
@@ -207,7 +208,6 @@ struct intel_context
GLboolean hw_stipple;
GLboolean depth_buffer_is_float;
GLboolean no_rast;
- GLboolean no_hw;
GLboolean always_flush_batch;
GLboolean always_flush_cache;
@@ -362,7 +362,6 @@ extern int INTEL_DEBUG;
#define DEBUG_WM 0x800000
#define DEBUG_URB 0x1000000
#define DEBUG_VS 0x2000000
-#define DEBUG_GLSL_FORCE 0x4000000
#define DEBUG_CLIP 0x8000000
#define DBG(...) do { \
diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c
index 556a4195bd..fab533f39f 100644
--- a/src/mesa/drivers/dri/intel/intel_extensions.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions.c
@@ -25,12 +25,15 @@
*
**************************************************************************/
+#include "main/mfeatures.h"
+
#include "intel_chipset.h"
#include "intel_context.h"
#include "intel_extensions.h"
#include "utils.h"
+#define need_GL_ARB_ES2_compatibility
#define need_GL_ARB_draw_elements_base_vertex
#define need_GL_ARB_framebuffer_object
#define need_GL_ARB_map_buffer_range
@@ -78,8 +81,10 @@
* i965_dri.
*/
static const struct dri_extension card_extensions[] = {
+ { "GL_ARB_ES2_compatibility", GL_ARB_ES2_compatibility_functions },
{ "GL_ARB_draw_elements_base_vertex", GL_ARB_draw_elements_base_vertex_functions },
{ "GL_ARB_explicit_attrib_location", NULL },
+ { "GL_ARB_framebuffer_object", GL_ARB_framebuffer_object_functions},
{ "GL_ARB_half_float_pixel", NULL },
{ "GL_ARB_map_buffer_range", GL_ARB_map_buffer_range_functions },
{ "GL_ARB_multitexture", NULL },
@@ -161,7 +166,6 @@ static const struct dri_extension brw_extensions[] = {
{ "GL_ARB_fragment_program", NULL },
{ "GL_ARB_fragment_program_shadow", NULL },
{ "GL_ARB_fragment_shader", NULL },
- { "GL_ARB_framebuffer_object", GL_ARB_framebuffer_object_functions},
{ "GL_ARB_half_float_vertex", NULL },
{ "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions },
{ "GL_ARB_point_sprite", NULL },
diff --git a/src/mesa/drivers/dri/intel/intel_extensions_es2.c b/src/mesa/drivers/dri/intel/intel_extensions_es2.c
index 71c86339c7..5ef6b0561d 100644
--- a/src/mesa/drivers/dri/intel/intel_extensions_es2.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions_es2.c
@@ -26,6 +26,7 @@
**************************************************************************/
#include "main/extensions.h"
+#include "main/mfeatures.h"
#include "intel_extensions.h"
@@ -62,6 +63,7 @@ static const char *es2_extensions[] = {
"GL_EXT_blend_minmax",
"GL_EXT_blend_subtract",
"GL_EXT_stencil_wrap",
+ "GL_NV_blend_square",
/* Optional GLES2 */
"GL_ARB_framebuffer_object",
@@ -79,6 +81,17 @@ static const char *es2_extensions[] = {
};
/**
+ * \brief Extensions to disable.
+ *
+ * These extensions must be manually disabled because they may have been
+ * enabled by default.
+ */
+static const char* es2_extensions_disabled[] = {
+ "GL_OES_standard_derivatives",
+ NULL,
+};
+
+/**
* Initializes potential list of extensions if ctx == NULL, or actually enables
* extensions for a context.
*/
@@ -92,4 +105,6 @@ intelInitExtensionsES2(struct gl_context *ctx)
for (i = 0; es2_extensions[i]; i++)
_mesa_enable_extension(ctx, es2_extensions[i]);
+ for (i = 0; es2_extensions_disabled[i]; i++)
+ _mesa_disable_extension(ctx, es2_extensions_disabled[i]);
}
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 862a13d2ea..0db5a491c8 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -28,6 +28,7 @@
#include "main/imports.h"
#include "main/macros.h"
+#include "main/mfeatures.h"
#include "main/mtypes.h"
#include "main/fbobject.h"
#include "main/framebuffer.h"
@@ -42,6 +43,11 @@
#include "intel_fbo.h"
#include "intel_mipmap_tree.h"
#include "intel_regions.h"
+#include "intel_tex.h"
+#include "intel_span.h"
+#ifndef I915
+#include "brw_context.h"
+#endif
#define FILE_DEBUG_FLAG DEBUG_FBO
@@ -107,79 +113,27 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer
ASSERT(rb->Name != 0);
switch (internalFormat) {
- case GL_RED:
- case GL_R8:
- rb->Format = MESA_FORMAT_R8;
- rb->DataType = GL_UNSIGNED_BYTE;
- break;
- case GL_R16:
- rb->Format = MESA_FORMAT_R16;
- rb->DataType = GL_UNSIGNED_SHORT;
- break;
- case GL_RG:
- case GL_RG8:
- rb->Format = MESA_FORMAT_RG88;
- rb->DataType = GL_UNSIGNED_BYTE;
- break;
- case GL_RG16:
- rb->Format = MESA_FORMAT_RG1616;
- rb->DataType = GL_UNSIGNED_SHORT;
- break;
- case GL_R3_G3_B2:
- case GL_RGB4:
- case GL_RGB5:
- rb->Format = MESA_FORMAT_RGB565;
- rb->DataType = GL_UNSIGNED_BYTE;
- break;
- case GL_RGB:
- case GL_RGB8:
- case GL_RGB10:
- case GL_RGB12:
- case GL_RGB16:
- rb->Format = MESA_FORMAT_XRGB8888;
- rb->DataType = GL_UNSIGNED_BYTE;
- break;
- case GL_RGBA:
- case GL_RGBA2:
- case GL_RGBA4:
- case GL_RGB5_A1:
- case GL_RGBA8:
- case GL_RGB10_A2:
- case GL_RGBA12:
- case GL_RGBA16:
- rb->Format = MESA_FORMAT_ARGB8888;
- rb->DataType = GL_UNSIGNED_BYTE;
- break;
- case GL_ALPHA:
- case GL_ALPHA8:
- rb->Format = MESA_FORMAT_A8;
- rb->DataType = GL_UNSIGNED_BYTE;
- break;
- case GL_DEPTH_COMPONENT16:
- rb->Format = MESA_FORMAT_Z16;
- rb->DataType = GL_UNSIGNED_SHORT;
+ default:
+ /* Use the same format-choice logic as for textures.
+ * Renderbuffers aren't any different from textures for us,
+ * except they're less useful because you can't texture with
+ * them.
+ */
+ rb->Format = intel->ctx.Driver.ChooseTextureFormat(ctx, internalFormat,
+ GL_NONE, GL_NONE);
break;
case GL_STENCIL_INDEX:
case GL_STENCIL_INDEX1_EXT:
case GL_STENCIL_INDEX4_EXT:
case GL_STENCIL_INDEX8_EXT:
case GL_STENCIL_INDEX16_EXT:
- case GL_DEPTH_COMPONENT:
- case GL_DEPTH_COMPONENT24:
- case GL_DEPTH_COMPONENT32:
- case GL_DEPTH_STENCIL_EXT:
- case GL_DEPTH24_STENCIL8_EXT:
- /* alloc a depth+stencil buffer */
+ /* These aren't actual texture formats, so force them here. */
rb->Format = MESA_FORMAT_S8_Z24;
- rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
break;
- default:
- _mesa_problem(ctx,
- "Unexpected format in intel_alloc_renderbuffer_storage");
- return GL_FALSE;
}
rb->_BaseFormat = _mesa_base_fbo_format(ctx, internalFormat);
+ rb->DataType = intel_mesa_format_to_rb_datatype(rb->Format);
cpp = _mesa_get_format_bytes(rb->Format);
intel_flush(ctx);
@@ -195,10 +149,15 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer
DBG("Allocating %d x %d Intel RBO\n", width, height);
tiling = I915_TILING_NONE;
+ if (intel->use_texture_tiling) {
+ GLenum base_format = _mesa_get_format_base_format(rb->Format);
- /* Gen6 requires depth must be tiling */
- if (intel->gen >= 6 && rb->Format == MESA_FORMAT_S8_Z24)
- tiling = I915_TILING_Y;
+ if (intel->gen >= 4 && (base_format == GL_DEPTH_COMPONENT ||
+ base_format == GL_DEPTH_STENCIL))
+ tiling = I915_TILING_Y;
+ else
+ tiling = I915_TILING_X;
+ }
irb->region = intel_region_alloc(intel->intelScreen, tiling, cpp,
width, height, GL_TRUE);
@@ -334,53 +293,10 @@ intel_create_renderbuffer(gl_format format)
_mesa_init_renderbuffer(&irb->Base, 0);
irb->Base.ClassID = INTEL_RB_CLASS;
-
- switch (format) {
- case MESA_FORMAT_RGB565:
- irb->Base._BaseFormat = GL_RGB;
- irb->Base.DataType = GL_UNSIGNED_BYTE;
- break;
- case MESA_FORMAT_XRGB8888:
- irb->Base._BaseFormat = GL_RGB;
- irb->Base.DataType = GL_UNSIGNED_BYTE;
- break;
- case MESA_FORMAT_ARGB8888:
- irb->Base._BaseFormat = GL_RGBA;
- irb->Base.DataType = GL_UNSIGNED_BYTE;
- break;
- case MESA_FORMAT_Z16:
- irb->Base._BaseFormat = GL_DEPTH_COMPONENT;
- irb->Base.DataType = GL_UNSIGNED_SHORT;
- break;
- case MESA_FORMAT_X8_Z24:
- irb->Base._BaseFormat = GL_DEPTH_COMPONENT;
- irb->Base.DataType = GL_UNSIGNED_INT;
- break;
- case MESA_FORMAT_S8_Z24:
- irb->Base._BaseFormat = GL_DEPTH_STENCIL;
- irb->Base.DataType = GL_UNSIGNED_INT_24_8_EXT;
- break;
- case MESA_FORMAT_A8:
- irb->Base._BaseFormat = GL_ALPHA;
- irb->Base.DataType = GL_UNSIGNED_BYTE;
- break;
- case MESA_FORMAT_R8:
- irb->Base._BaseFormat = GL_RED;
- irb->Base.DataType = GL_UNSIGNED_BYTE;
- break;
- case MESA_FORMAT_RG88:
- irb->Base._BaseFormat = GL_RG;
- irb->Base.DataType = GL_UNSIGNED_BYTE;
- break;
- default:
- _mesa_problem(NULL,
- "Unexpected intFormat in intel_create_renderbuffer");
- free(irb);
- return NULL;
- }
-
+ irb->Base._BaseFormat = _mesa_get_format_base_format(format);
irb->Base.Format = format;
irb->Base.InternalFormat = irb->Base._BaseFormat;
+ irb->Base.DataType = intel_mesa_format_to_rb_datatype(format);
/* intel-specific methods */
irb->Base.Delete = intel_delete_renderbuffer;
@@ -457,66 +373,16 @@ static GLboolean
intel_update_wrapper(struct gl_context *ctx, struct intel_renderbuffer *irb,
struct gl_texture_image *texImage)
{
- if (texImage->TexFormat == MESA_FORMAT_ARGB8888) {
- irb->Base.DataType = GL_UNSIGNED_BYTE;
- DBG("Render to RGBA8 texture OK\n");
- }
- else if (texImage->TexFormat == MESA_FORMAT_XRGB8888) {
- irb->Base.DataType = GL_UNSIGNED_BYTE;
- DBG("Render to XGBA8 texture OK\n");
- }
- else if (texImage->TexFormat == MESA_FORMAT_SARGB8) {
- irb->Base.DataType = GL_UNSIGNED_BYTE;
- DBG("Render to SARGB8 texture OK\n");
- }
- else if (texImage->TexFormat == MESA_FORMAT_RGB565) {
- irb->Base.DataType = GL_UNSIGNED_BYTE;
- DBG("Render to RGB5 texture OK\n");
- }
- else if (texImage->TexFormat == MESA_FORMAT_ARGB1555) {
- irb->Base.DataType = GL_UNSIGNED_BYTE;
- DBG("Render to ARGB1555 texture OK\n");
- }
- else if (texImage->TexFormat == MESA_FORMAT_ARGB4444) {
- irb->Base.DataType = GL_UNSIGNED_BYTE;
- DBG("Render to ARGB4444 texture OK\n");
- }
- else if (texImage->TexFormat == MESA_FORMAT_A8) {
- irb->Base.DataType = GL_UNSIGNED_BYTE;
- DBG("Render to A8 texture OK\n");
- }
- else if (texImage->TexFormat == MESA_FORMAT_R8) {
- irb->Base.DataType = GL_UNSIGNED_BYTE;
- DBG("Render to R8 texture OK\n");
- }
- else if (texImage->TexFormat == MESA_FORMAT_RG88) {
- irb->Base.DataType = GL_UNSIGNED_BYTE;
- DBG("Render to RG88 texture OK\n");
- }
- else if (texImage->TexFormat == MESA_FORMAT_R16) {
- irb->Base.DataType = GL_UNSIGNED_SHORT;
- DBG("Render to R8 texture OK\n");
- }
- else if (texImage->TexFormat == MESA_FORMAT_RG1616) {
- irb->Base.DataType = GL_UNSIGNED_SHORT;
- DBG("Render to RG88 texture OK\n");
- }
- else if (texImage->TexFormat == MESA_FORMAT_Z16) {
- irb->Base.DataType = GL_UNSIGNED_SHORT;
- DBG("Render to DEPTH16 texture OK\n");
- }
- else if (texImage->TexFormat == MESA_FORMAT_S8_Z24) {
- irb->Base.DataType = GL_UNSIGNED_INT_24_8_EXT;
- DBG("Render to DEPTH_STENCIL texture OK\n");
- }
- else {
+ if (!intel_span_supports_format(texImage->TexFormat)) {
DBG("Render to texture BAD FORMAT %s\n",
_mesa_get_format_name(texImage->TexFormat));
return GL_FALSE;
+ } else {
+ DBG("Render to texture %s\n", _mesa_get_format_name(texImage->TexFormat));
}
irb->Base.Format = texImage->TexFormat;
-
+ irb->Base.DataType = intel_mesa_format_to_rb_datatype(texImage->TexFormat);
irb->Base.InternalFormat = texImage->InternalFormat;
irb->Base._BaseFormat = _mesa_base_fbo_format(ctx, irb->Base.InternalFormat);
irb->Base.Width = texImage->Width;
@@ -558,6 +424,24 @@ intel_wrap_texture(struct gl_context * ctx, struct gl_texture_image *texImage)
return irb;
}
+static void
+intel_set_draw_offset_for_image(struct intel_texture_image *intel_image,
+ int zoffset)
+{
+ struct intel_mipmap_tree *mt = intel_image->mt;
+ unsigned int dst_x, dst_y;
+
+ /* compute offset of the particular 2D image within the texture region */
+ intel_miptree_get_image_offset(intel_image->mt,
+ intel_image->level,
+ intel_image->face,
+ zoffset,
+ &dst_x, &dst_y);
+
+ mt->region->draw_offset = (dst_y * mt->region->pitch + dst_x) * mt->cpp;
+ mt->region->draw_x = dst_x;
+ mt->region->draw_y = dst_y;
+}
/**
* Called by glFramebufferTexture[123]DEXT() (and other places) to
@@ -574,7 +458,6 @@ intel_render_texture(struct gl_context * ctx,
= att->Texture->Image[att->CubeMapFace][att->TextureLevel];
struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer);
struct intel_texture_image *intel_image;
- GLuint dst_x, dst_y;
(void) fb;
@@ -620,19 +503,53 @@ intel_render_texture(struct gl_context * ctx,
intel_region_reference(&irb->region, intel_image->mt->region);
}
- /* compute offset of the particular 2D image within the texture region */
- intel_miptree_get_image_offset(intel_image->mt,
- att->TextureLevel,
- att->CubeMapFace,
- att->Zoffset,
- &dst_x, &dst_y);
-
- intel_image->mt->region->draw_offset = (dst_y * intel_image->mt->region->pitch +
- dst_x) * intel_image->mt->cpp;
- intel_image->mt->region->draw_x = dst_x;
- intel_image->mt->region->draw_y = dst_y;
+ intel_set_draw_offset_for_image(intel_image, att->Zoffset);
intel_image->used_as_render_target = GL_TRUE;
+#ifndef I915
+ if (!brw_context(ctx)->has_surface_tile_offset &&
+ (intel_image->mt->region->draw_offset & 4095) != 0) {
+ /* Original gen4 hardware couldn't draw to a non-tile-aligned
+ * destination in a miptree unless you actually setup your
+ * renderbuffer as a miptree and used the fragile
+ * lod/array_index/etc. controls to select the image. So,
+ * instead, we just make a new single-level miptree and render
+ * into that.
+ */
+ struct intel_context *intel = intel_context(ctx);
+ struct intel_mipmap_tree *old_mt = intel_image->mt;
+ struct intel_mipmap_tree *new_mt;
+ int comp_byte = 0, texel_bytes;
+
+ if (_mesa_is_format_compressed(intel_image->base.TexFormat))
+ comp_byte = intel_compressed_num_bytes(intel_image->base.TexFormat);
+
+ texel_bytes = _mesa_get_format_bytes(intel_image->base.TexFormat);
+
+ new_mt = intel_miptree_create(intel, newImage->TexObject->Target,
+ intel_image->base._BaseFormat,
+ intel_image->base.InternalFormat,
+ intel_image->level,
+ intel_image->level,
+ intel_image->base.Width,
+ intel_image->base.Height,
+ intel_image->base.Depth,
+ texel_bytes, comp_byte, GL_TRUE);
+
+ intel_miptree_image_copy(intel,
+ new_mt,
+ intel_image->face,
+ intel_image->level,
+ old_mt);
+
+ intel_miptree_release(intel, &intel_image->mt);
+ intel_image->mt = new_mt;
+ intel_set_draw_offset_for_image(intel_image, att->Zoffset);
+
+ intel_region_release(&irb->region);
+ intel_region_reference(&irb->region, intel_image->mt->region);
+ }
+#endif
/* update drawing region, etc */
intel_draw_buffer(ctx, fb);
}
@@ -655,7 +572,8 @@ intel_finish_render_texture(struct gl_context * ctx,
_glthread_GetID(), att->Texture->Name);
/* Flag that this image may now be validated into the object's miptree. */
- intel_image->used_as_render_target = GL_FALSE;
+ if (intel_image)
+ intel_image->used_as_render_target = GL_FALSE;
/* Since we've (probably) rendered to the texture and will (likely) use
* it in the texture domain later on in this batchbuffer, flush the
@@ -671,6 +589,7 @@ intel_finish_render_texture(struct gl_context * ctx,
static void
intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
{
+ struct intel_context *intel = intel_context(ctx);
const struct intel_renderbuffer *depthRb =
intel_get_renderbuffer(fb, BUFFER_DEPTH);
const struct intel_renderbuffer *stencilRb =
@@ -678,10 +597,10 @@ intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
int i;
if (depthRb && stencilRb && stencilRb != depthRb) {
- if (ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Type == GL_TEXTURE &&
- ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Type == GL_TEXTURE &&
- (ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Texture->Name ==
- ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Texture->Name)) {
+ if (fb->Attachment[BUFFER_DEPTH].Type == GL_TEXTURE &&
+ fb->Attachment[BUFFER_STENCIL].Type == GL_TEXTURE &&
+ (fb->Attachment[BUFFER_DEPTH].Texture->Name ==
+ fb->Attachment[BUFFER_STENCIL].Texture->Name)) {
/* OK */
} else {
/* we only support combined depth/stencil buffers, not separate
@@ -694,33 +613,35 @@ intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
}
}
- for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) {
- struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
- struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+ for (i = 0; i < Elements(fb->Attachment); i++) {
+ struct gl_renderbuffer *rb;
+ struct intel_renderbuffer *irb;
+
+ if (fb->Attachment[i].Type == GL_NONE)
+ continue;
- if (rb == NULL)
+ /* A supported attachment will have a Renderbuffer set either
+ * from being a Renderbuffer or being a texture that got the
+ * intel_wrap_texture() treatment.
+ */
+ rb = fb->Attachment[i].Renderbuffer;
+ if (rb == NULL) {
+ DBG("attachment without renderbuffer\n");
+ fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT;
continue;
+ }
+ irb = intel_renderbuffer(rb);
if (irb == NULL) {
DBG("software rendering renderbuffer\n");
fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT;
continue;
}
- switch (irb->Base.Format) {
- case MESA_FORMAT_ARGB8888:
- case MESA_FORMAT_XRGB8888:
- case MESA_FORMAT_SARGB8:
- case MESA_FORMAT_RGB565:
- case MESA_FORMAT_ARGB1555:
- case MESA_FORMAT_ARGB4444:
- case MESA_FORMAT_A8:
- case MESA_FORMAT_R8:
- case MESA_FORMAT_R16:
- case MESA_FORMAT_RG88:
- case MESA_FORMAT_RG1616:
- break;
- default:
+ if (!intel_span_supports_format(irb->Base.Format) ||
+ !intel->vtbl.render_target_supported(irb->Base.Format)) {
+ DBG("Unsupported texture/renderbuffer format attached: %s\n",
+ _mesa_get_format_name(irb->Base.Format));
fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT;
}
}
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index 9c4e5c5ee8..a3409274fb 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -157,8 +157,6 @@ struct intel_mipmap_tree *
intel_miptree_create_for_region(struct intel_context *intel,
GLenum target,
GLenum internal_format,
- GLuint first_level,
- GLuint last_level,
struct intel_region *region,
GLuint depth0,
GLuint compress_byte)
@@ -166,7 +164,7 @@ intel_miptree_create_for_region(struct intel_context *intel,
struct intel_mipmap_tree *mt;
mt = intel_miptree_create_internal(intel, target, internal_format,
- first_level, last_level,
+ 0, 0,
region->width, region->height, 1,
region->cpp, compress_byte,
I915_TILING_NONE);
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
index 21db2f4d3b..760a8bce60 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
@@ -137,8 +137,6 @@ struct intel_mipmap_tree *
intel_miptree_create_for_region(struct intel_context *intel,
GLenum target,
GLenum internal_format,
- GLuint first_level,
- GLuint last_level,
struct intel_region *region,
GLuint depth0,
GLuint compress_byte);
diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h
index 955b100b21..5258699d3f 100644
--- a/src/mesa/drivers/dri/intel/intel_reg.h
+++ b/src/mesa/drivers/dri/intel/intel_reg.h
@@ -37,6 +37,8 @@
#define FLUSH_MAP_CACHE (1 << 0)
#define INHIBIT_FLUSH_RENDER_CACHE (1 << 2)
+#define MI_FLUSH_DW (CMD_MI | (0x26 << 23) | 2)
+
/* Stalls command execution waiting for the given events to have occurred. */
#define MI_WAIT_FOR_EVENT (CMD_MI | (0x3 << 23))
#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6)
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index 061f0d278d..5d14bcd34c 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -31,23 +31,11 @@
#include "main/renderbuffer.h"
#include "main/hash.h"
#include "main/fbobject.h"
+#include "main/mfeatures.h"
#include "utils.h"
#include "xmlpool.h"
-#include "intel_batchbuffer.h"
-#include "intel_buffers.h"
-#include "intel_bufmgr.h"
-#include "intel_chipset.h"
-#include "intel_fbo.h"
-#include "intel_screen.h"
-#include "intel_tex.h"
-#include "intel_regions.h"
-
-#include "i915_drm.h"
-
-#define DRI_CONF_TEXTURE_TILING(def) \
-
PUBLIC const char __driConfigOptions[] =
DRI_CONF_BEGIN
DRI_CONF_SECTION_PERFORMANCE
@@ -92,6 +80,17 @@ DRI_CONF_END;
const GLuint __driNConfigOptions = 11;
+#include "intel_batchbuffer.h"
+#include "intel_buffers.h"
+#include "intel_bufmgr.h"
+#include "intel_chipset.h"
+#include "intel_fbo.h"
+#include "intel_screen.h"
+#include "intel_tex.h"
+#include "intel_regions.h"
+
+#include "i915_drm.h"
+
#ifdef USE_NEW_INTERFACE
static PFNGLXCREATECONTEXTMODES create_context_modes = NULL;
#endif /*USE_NEW_INTERFACE */
@@ -452,7 +451,7 @@ intelCreateContext(gl_api api,
return brwCreateContext(api, mesaVis,
driContextPriv, sharedContextPrivate);
#endif
- fprintf(stderr, "Unrecognized deviceID %x\n", intelScreen->deviceID);
+ fprintf(stderr, "Unrecognized deviceID 0x%x\n", intelScreen->deviceID);
return GL_FALSE;
}
@@ -462,7 +461,8 @@ intel_init_bufmgr(struct intel_screen *intelScreen)
__DRIscreen *spriv = intelScreen->driScrnPriv;
int num_fences = 0;
- intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL;
+ intelScreen->no_hw = (getenv("INTEL_NO_HW") != NULL ||
+ getenv("INTEL_DEVID_OVERRIDE") != NULL);
intelScreen->bufmgr = intel_bufmgr_gem_init(spriv->fd, BATCH_SZ);
if (intelScreen->bufmgr == NULL) {
@@ -497,6 +497,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
GLenum fb_format[3];
GLenum fb_type[3];
unsigned int api_mask;
+ char *devid_override;
static const GLenum back_buffer_modes[] = {
GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML
@@ -523,6 +524,16 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
&intelScreen->deviceID))
return GL_FALSE;
+ /* Allow an override of the device ID for the purpose of making the
+ * driver produce dumps for debugging of new chipset enablement.
+ * This implies INTEL_NO_HW, to avoid programming your actual GPU
+ * incorrectly.
+ */
+ devid_override = getenv("INTEL_DEVID_OVERRIDE");
+ if (devid_override) {
+ intelScreen->deviceID = strtod(devid_override, NULL);
+ }
+
api_mask = (1 << __DRI_API_OPENGL);
#if FEATURE_ES1
api_mask |= (1 << __DRI_API_GLES);
diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c
index 104cadf0f9..1f41518535 100644
--- a/src/mesa/drivers/dri/intel/intel_span.c
+++ b/src/mesa/drivers/dri/intel/intel_span.c
@@ -25,6 +25,7 @@
*
**************************************************************************/
+#include <stdbool.h>
#include "main/glheader.h"
#include "main/macros.h"
#include "main/mtypes.h"
@@ -113,6 +114,26 @@ intel_set_span_functions(struct intel_context *intel,
#define TAG2(x,y) intel_##x##y##_A8
#include "spantmp2.h"
+#define SPANTMP_MESA_FMT MESA_FORMAT_R8
+#define TAG(x) intel_##x##_R8
+#define TAG2(x,y) intel_##x##y##_R8
+#include "spantmp2.h"
+
+#define SPANTMP_MESA_FMT MESA_FORMAT_RG88
+#define TAG(x) intel_##x##_RG88
+#define TAG2(x,y) intel_##x##y##_RG88
+#include "spantmp2.h"
+
+#define SPANTMP_MESA_FMT MESA_FORMAT_R16
+#define TAG(x) intel_##x##_R16
+#define TAG2(x,y) intel_##x##y##_R16
+#include "spantmp2.h"
+
+#define SPANTMP_MESA_FMT MESA_FORMAT_RG1616
+#define TAG(x) intel_##x##_RG1616
+#define TAG2(x,y) intel_##x##y##_RG1616
+#include "spantmp2.h"
+
#define LOCAL_DEPTH_VARS \
struct intel_renderbuffer *irb = intel_renderbuffer(rb); \
const GLint yScale = rb->Name ? 1 : -1; \
@@ -339,6 +360,32 @@ intel_unmap_vertex_shader_textures(struct gl_context *ctx)
}
}
+typedef void (*span_init_func)(struct gl_renderbuffer *rb);
+
+static span_init_func intel_span_init_funcs[MESA_FORMAT_COUNT] =
+{
+ [MESA_FORMAT_A8] = intel_InitPointers_A8,
+ [MESA_FORMAT_RGB565] = intel_InitPointers_RGB565,
+ [MESA_FORMAT_ARGB4444] = intel_InitPointers_ARGB4444,
+ [MESA_FORMAT_ARGB1555] = intel_InitPointers_ARGB1555,
+ [MESA_FORMAT_XRGB8888] = intel_InitPointers_xRGB8888,
+ [MESA_FORMAT_ARGB8888] = intel_InitPointers_ARGB8888,
+ [MESA_FORMAT_SARGB8] = intel_InitPointers_ARGB8888,
+ [MESA_FORMAT_Z16] = intel_InitDepthPointers_z16,
+ [MESA_FORMAT_X8_Z24] = intel_InitDepthPointers_z24_s8,
+ [MESA_FORMAT_S8_Z24] = intel_InitDepthPointers_z24_s8,
+ [MESA_FORMAT_R8] = intel_InitPointers_R8,
+ [MESA_FORMAT_RG88] = intel_InitPointers_RG88,
+ [MESA_FORMAT_R16] = intel_InitPointers_R16,
+ [MESA_FORMAT_RG1616] = intel_InitPointers_RG1616,
+};
+
+bool
+intel_span_supports_format(gl_format format)
+{
+ return intel_span_init_funcs[format] != NULL;
+}
+
/**
* Plug in appropriate span read/write functions for the given renderbuffer.
* These are used for the software fallbacks.
@@ -349,37 +396,6 @@ intel_set_span_functions(struct intel_context *intel,
{
struct intel_renderbuffer *irb = (struct intel_renderbuffer *) rb;
- switch (irb->Base.Format) {
- case MESA_FORMAT_A8:
- intel_InitPointers_A8(rb);
- break;
- case MESA_FORMAT_RGB565:
- intel_InitPointers_RGB565(rb);
- break;
- case MESA_FORMAT_ARGB4444:
- intel_InitPointers_ARGB4444(rb);
- break;
- case MESA_FORMAT_ARGB1555:
- intel_InitPointers_ARGB1555(rb);
- break;
- case MESA_FORMAT_XRGB8888:
- intel_InitPointers_xRGB8888(rb);
- break;
- case MESA_FORMAT_ARGB8888:
- case MESA_FORMAT_SARGB8:
- intel_InitPointers_ARGB8888(rb);
- break;
- case MESA_FORMAT_Z16:
- intel_InitDepthPointers_z16(rb);
- break;
- case MESA_FORMAT_X8_Z24:
- case MESA_FORMAT_S8_Z24:
- intel_InitDepthPointers_z24_s8(rb);
- break;
- default:
- _mesa_problem(NULL,
- "Unexpected MesaFormat %d in intelSetSpanFunctions",
- irb->Base.Format);
- break;
- }
+ assert(intel_span_init_funcs[irb->Base.Format]);
+ intel_span_init_funcs[irb->Base.Format](rb);
}
diff --git a/src/mesa/drivers/dri/intel/intel_span.h b/src/mesa/drivers/dri/intel/intel_span.h
index aa8d08e843..5a4c4e8e52 100644
--- a/src/mesa/drivers/dri/intel/intel_span.h
+++ b/src/mesa/drivers/dri/intel/intel_span.h
@@ -28,6 +28,9 @@
#ifndef _INTEL_SPAN_H
#define _INTEL_SPAN_H
+#include "main/formats.h"
+#include <stdbool.h>
+
extern void intelInitSpanFuncs(struct gl_context * ctx);
extern void intelSpanRenderFinish(struct gl_context * ctx);
@@ -38,5 +41,6 @@ void intel_renderbuffer_unmap(struct intel_context *intel,
struct gl_renderbuffer *rb);
void intel_map_vertex_shader_textures(struct gl_context *ctx);
void intel_unmap_vertex_shader_textures(struct gl_context *ctx);
+bool intel_span_supports_format(gl_format format);
#endif
diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c
index 2c21ea0576..2c3eab20fd 100644
--- a/src/mesa/drivers/dri/intel/intel_tex.c
+++ b/src/mesa/drivers/dri/intel/intel_tex.c
@@ -113,7 +113,6 @@ intelGenerateMipmap(struct gl_context *ctx, GLenum target,
void
intelInitTextureFuncs(struct dd_function_table *functions)
{
- functions->ChooseTextureFormat = intelChooseTextureFormat;
functions->GenerateMipmap = intelGenerateMipmap;
functions->NewTextureObject = intelNewTextureObject;
diff --git a/src/mesa/drivers/dri/intel/intel_tex.h b/src/mesa/drivers/dri/intel/intel_tex.h
index 7906554e45..6552ed0d33 100644
--- a/src/mesa/drivers/dri/intel/intel_tex.h
+++ b/src/mesa/drivers/dri/intel/intel_tex.h
@@ -40,8 +40,7 @@ void intelInitTextureSubImageFuncs(struct dd_function_table *functions);
void intelInitTextureCopyImageFuncs(struct dd_function_table *functions);
-gl_format intelChooseTextureFormat(struct gl_context *ctx, GLint internalFormat,
- GLenum format, GLenum type);
+GLenum intel_mesa_format_to_rb_datatype(gl_format format);
void intelSetTexBuffer(__DRIcontext *pDRICtx,
GLint target, __DRIdrawable *pDraw);
diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c
index 87b31bf078..a40011ab40 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@@ -35,7 +35,6 @@
#include "intel_screen.h"
#include "intel_context.h"
-#include "intel_buffers.h"
#include "intel_mipmap_tree.h"
#include "intel_regions.h"
#include "intel_fbo.h"
@@ -50,44 +49,20 @@
* Do the best we can using the blitter. A future project is to use
* the texture engine and fragment programs for these copies.
*/
-static const struct intel_region *
-get_teximage_source(struct intel_context *intel, GLenum internalFormat)
+static struct intel_renderbuffer *
+get_teximage_readbuffer(struct intel_context *intel, GLenum internalFormat)
{
- struct intel_renderbuffer *irb;
-
DBG("%s %s\n", __FUNCTION__,
_mesa_lookup_enum_by_nr(internalFormat));
switch (internalFormat) {
case GL_DEPTH_COMPONENT:
case GL_DEPTH_COMPONENT16:
- irb = intel_get_renderbuffer(intel->ctx.ReadBuffer, BUFFER_DEPTH);
- if (irb && irb->region && irb->region->cpp == 2)
- return irb->region;
- return NULL;
case GL_DEPTH24_STENCIL8_EXT:
case GL_DEPTH_STENCIL_EXT:
- irb = intel_get_renderbuffer(intel->ctx.ReadBuffer, BUFFER_DEPTH);
- if (irb && irb->region && irb->region->cpp == 4)
- return irb->region;
- return NULL;
- case 4:
- case GL_RGBA:
- case GL_RGBA8:
- irb = intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer);
- /* We're required to set alpha to 1.0 in this case, but we can't
- * do that with the blitter, so fall back. We could use the 3D
- * engine or do two passes with the blitter, but it doesn't seem
- * worth it for this case. */
- if (irb->Base._BaseFormat == GL_RGB)
- return NULL;
- return irb->region;
- case 3:
- case GL_RGB:
- case GL_RGB8:
- return intel_readbuf_region(intel);
+ return intel_get_renderbuffer(intel->ctx.ReadBuffer, BUFFER_DEPTH);
default:
- return NULL;
+ return intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer);
}
}
@@ -101,23 +76,34 @@ do_copy_texsubimage(struct intel_context *intel,
GLint x, GLint y, GLsizei width, GLsizei height)
{
struct gl_context *ctx = &intel->ctx;
- const struct intel_region *src = get_teximage_source(intel, internalFormat);
+ struct intel_renderbuffer *irb;
+ bool copy_supported_with_alpha_override = false;
+
+ intel_prepare_render(intel);
- if (!intelImage->mt || !src || !src->buffer) {
+ irb = get_teximage_readbuffer(intel, internalFormat);
+ if (!intelImage->mt || !irb || !irb->region) {
if (unlikely(INTEL_DEBUG & DEBUG_FALLBACKS))
fprintf(stderr, "%s fail %p %p (0x%08x)\n",
- __FUNCTION__, intelImage->mt, src, internalFormat);
+ __FUNCTION__, intelImage->mt, irb, internalFormat);
return GL_FALSE;
}
- if (intelImage->mt->cpp != src->cpp) {
- fallback_debug("%s fail %d vs %d cpp\n",
- __FUNCTION__, intelImage->mt->cpp, src->cpp);
+ if (irb->Base.Format == MESA_FORMAT_XRGB8888 &&
+ intelImage->base.TexFormat == MESA_FORMAT_ARGB8888) {
+ copy_supported_with_alpha_override = true;
+ }
+
+ if (intelImage->base.TexFormat != irb->Base.Format &&
+ !copy_supported_with_alpha_override) {
+ if (unlikely(INTEL_DEBUG & DEBUG_FALLBACKS))
+ fprintf(stderr, "%s mismatched formats %s, %s\n",
+ __FUNCTION__,
+ _mesa_get_format_name(intelImage->base.TexFormat),
+ _mesa_get_format_name(irb->Base.Format));
return GL_FALSE;
}
- /* intel_flush(ctx); */
- intel_prepare_render(intel);
{
drm_intel_bo *dst_bo = intel_region_buffer(intel,
intelImage->mt->region,
@@ -140,24 +126,24 @@ do_copy_texsubimage(struct intel_context *intel,
if (ctx->ReadBuffer->Name == 0) {
/* Flip vertical orientation for system framebuffers */
y = ctx->ReadBuffer->Height - (y + height);
- src_pitch = -src->pitch;
+ src_pitch = -irb->region->pitch;
} else {
/* reading from a FBO, y is already oriented the way we like */
- src_pitch = src->pitch;
+ src_pitch = irb->region->pitch;
}
/* blit from src buffer to texture */
if (!intelEmitCopyBlit(intel,
intelImage->mt->cpp,
src_pitch,
- src->buffer,
+ irb->region->buffer,
0,
- src->tiling,
+ irb->region->tiling,
intelImage->mt->region->pitch,
dst_bo,
0,
intelImage->mt->region->tiling,
- src->draw_x + x, src->draw_y + y,
+ irb->region->draw_x + x, irb->region->draw_y + y,
image_x + dstx, image_y + dsty,
width, height,
GL_COPY)) {
@@ -165,6 +151,9 @@ do_copy_texsubimage(struct intel_context *intel,
}
}
+ if (copy_supported_with_alpha_override)
+ intel_set_teximage_alpha_to_one(ctx, intelImage);
+
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/intel/intel_tex_format.c b/src/mesa/drivers/dri/intel/intel_tex_format.c
index 9d73a2fb37..87745bc66d 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_format.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_format.c
@@ -4,224 +4,35 @@
#include "main/formats.h"
/**
- * Choose hardware texture format given the user's glTexImage parameters.
- *
- * It works out that this function is fine for all the supported
- * hardware. However, there is still a need to map the formats onto
- * hardware descriptors.
- *
- * Note that the i915 can actually support many more formats than
- * these if we take the step of simply swizzling the colors
- * immediately after sampling...
+ * Returns the renderbuffer DataType for a MESA_FORMAT.
*/
-gl_format
-intelChooseTextureFormat(struct gl_context * ctx, GLint internalFormat,
- GLenum format, GLenum type)
+GLenum
+intel_mesa_format_to_rb_datatype(gl_format format)
{
- struct intel_context *intel = intel_context(ctx);
-
-#if 0
- printf("%s intFmt=0x%x format=0x%x type=0x%x\n",
- __FUNCTION__, internalFormat, format, type);
-#endif
-
- switch (internalFormat) {
- case 4:
- case GL_RGBA:
- case GL_COMPRESSED_RGBA:
- if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV)
- return MESA_FORMAT_ARGB4444;
- else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV)
- return MESA_FORMAT_ARGB1555;
- else
- return MESA_FORMAT_ARGB8888;
-
- case 3:
- case GL_RGB:
- case GL_COMPRESSED_RGB:
- if (type == GL_UNSIGNED_SHORT_5_6_5)
- return MESA_FORMAT_RGB565;
- else if (intel->has_xrgb_textures)
- return MESA_FORMAT_XRGB8888;
- else
- return MESA_FORMAT_ARGB8888;
-
- case GL_RGBA8:
- case GL_RGB10_A2:
- case GL_RGBA12:
- case GL_RGBA16:
- return MESA_FORMAT_ARGB8888;
-
- case GL_RGBA4:
- case GL_RGBA2:
- return MESA_FORMAT_ARGB4444;
-
- case GL_RGB5_A1:
- return MESA_FORMAT_ARGB1555;
-
- case GL_RGB8:
- case GL_RGB10:
- case GL_RGB12:
- case GL_RGB16:
- if (intel->has_xrgb_textures)
- return MESA_FORMAT_XRGB8888;
- else
- return MESA_FORMAT_ARGB8888;
-
- case GL_RGB5:
- case GL_RGB4:
- case GL_R3_G3_B2:
- return MESA_FORMAT_RGB565;
-
- case GL_ALPHA:
- case GL_ALPHA4:
- case GL_ALPHA8:
- case GL_ALPHA12:
- case GL_ALPHA16:
- case GL_COMPRESSED_ALPHA:
- return MESA_FORMAT_A8;
-
- case 1:
- case GL_LUMINANCE:
- case GL_LUMINANCE4:
- case GL_LUMINANCE8:
- case GL_LUMINANCE12:
- case GL_LUMINANCE16:
- case GL_COMPRESSED_LUMINANCE:
- return MESA_FORMAT_L8;
-
- case GL_LUMINANCE12_ALPHA4:
- case GL_LUMINANCE12_ALPHA12:
- case GL_LUMINANCE16_ALPHA16:
- /* i915 could implement this mode using MT_32BIT_RG1616. However, this
- * would require an extra swizzle instruction in the fragment shader to
- * convert the { R, G, 1.0, 1.0 } to { R, R, R, G }.
- */
-#ifndef I915
- return MESA_FORMAT_AL1616;
-#else
- /* FALLTHROUGH */
-#endif
-
- case 2:
- case GL_LUMINANCE_ALPHA:
- case GL_LUMINANCE4_ALPHA4:
- case GL_LUMINANCE6_ALPHA2:
- case GL_LUMINANCE8_ALPHA8:
- case GL_COMPRESSED_LUMINANCE_ALPHA:
- return MESA_FORMAT_AL88;
-
- case GL_INTENSITY:
- case GL_INTENSITY4:
- case GL_INTENSITY8:
- case GL_INTENSITY12:
- case GL_INTENSITY16:
- case GL_COMPRESSED_INTENSITY:
- return MESA_FORMAT_I8;
-
- case GL_YCBCR_MESA:
- if (type == GL_UNSIGNED_SHORT_8_8_MESA || type == GL_UNSIGNED_BYTE)
- return MESA_FORMAT_YCBCR;
- else
- return MESA_FORMAT_YCBCR_REV;
-
- case GL_COMPRESSED_RGB_FXT1_3DFX:
- return MESA_FORMAT_RGB_FXT1;
- case GL_COMPRESSED_RGBA_FXT1_3DFX:
- return MESA_FORMAT_RGBA_FXT1;
-
- case GL_RGB_S3TC:
- case GL_RGB4_S3TC:
- case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
- return MESA_FORMAT_RGB_DXT1;
-
- case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
- return MESA_FORMAT_RGBA_DXT1;
-
- case GL_RGBA_S3TC:
- case GL_RGBA4_S3TC:
- case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
- return MESA_FORMAT_RGBA_DXT3;
-
- case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
- return MESA_FORMAT_RGBA_DXT5;
-
- case GL_DEPTH_COMPONENT:
- case GL_DEPTH_COMPONENT16:
- case GL_DEPTH_COMPONENT24:
- case GL_DEPTH_COMPONENT32:
-#if 0
- return MESA_FORMAT_Z16;
-#else
- /* fall-through.
- * 16bpp depth texture can't be paired with a stencil buffer so
- * always used combined depth/stencil format.
- */
-#endif
- case GL_DEPTH_STENCIL_EXT:
- case GL_DEPTH24_STENCIL8_EXT:
- return MESA_FORMAT_S8_Z24;
-
-#ifndef I915
- case GL_SRGB_EXT:
- case GL_SRGB8_EXT:
- case GL_SRGB_ALPHA_EXT:
- case GL_SRGB8_ALPHA8_EXT:
- case GL_COMPRESSED_SRGB_EXT:
- case GL_COMPRESSED_SRGB_ALPHA_EXT:
- case GL_COMPRESSED_SLUMINANCE_EXT:
- case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT:
- return MESA_FORMAT_SARGB8;
- case GL_SLUMINANCE_EXT:
- case GL_SLUMINANCE8_EXT:
- if (intel->has_luminance_srgb)
- return MESA_FORMAT_SL8;
- else
- return MESA_FORMAT_SARGB8;
- case GL_SLUMINANCE_ALPHA_EXT:
- case GL_SLUMINANCE8_ALPHA8_EXT:
- if (intel->has_luminance_srgb)
- return MESA_FORMAT_SLA8;
- else
- return MESA_FORMAT_SARGB8;
- case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
- case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
- case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
- case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
- return MESA_FORMAT_SRGB_DXT1;
-
- /* i915 could also do this */
- case GL_DUDV_ATI:
- case GL_DU8DV8_ATI:
- return MESA_FORMAT_DUDV8;
- case GL_RGBA_SNORM:
- case GL_RGBA8_SNORM:
- return MESA_FORMAT_SIGNED_RGBA8888_REV;
-
- /* i915 can do a RG16, but it can't do any of the other RED or RG formats.
- * In addition, it only implements the broken D3D mode where undefined
- * components are read as 1.0. I'm not sure who thought reading
- * { R, G, 1.0, 1.0 } from a red-green texture would be useful.
- */
- case GL_RED:
- case GL_R8:
- return MESA_FORMAT_R8;
- case GL_R16:
- return MESA_FORMAT_R16;
- case GL_RG:
- case GL_RG8:
- return MESA_FORMAT_RG88;
- case GL_RG16:
- return MESA_FORMAT_RG1616;
-#endif
-
+ switch (format) {
+ case MESA_FORMAT_ARGB8888:
+ case MESA_FORMAT_XRGB8888:
+ case MESA_FORMAT_SARGB8:
+ case MESA_FORMAT_R8:
+ case MESA_FORMAT_RG88:
+ case MESA_FORMAT_A8:
+ case MESA_FORMAT_AL88:
+ case MESA_FORMAT_RGB565:
+ case MESA_FORMAT_ARGB1555:
+ case MESA_FORMAT_ARGB4444:
+ return GL_UNSIGNED_BYTE;
+ case MESA_FORMAT_R16:
+ case MESA_FORMAT_RG1616:
+ case MESA_FORMAT_Z16:
+ return GL_UNSIGNED_SHORT;
+ case MESA_FORMAT_X8_Z24:
+ return GL_UNSIGNED_INT;
+ case MESA_FORMAT_S8_Z24:
+ return GL_UNSIGNED_INT_24_8_EXT;
default:
- fprintf(stderr, "unexpected texture format %s in %s\n",
- _mesa_lookup_enum_by_nr(internalFormat), __FUNCTION__);
- return MESA_FORMAT_NONE;
+ _mesa_problem(NULL, "unexpected MESA_FORMAT for renderbuffer");
+ return GL_UNSIGNED_BYTE;
}
-
- return MESA_FORMAT_NONE; /* never get here */
}
int intel_compressed_num_bytes(GLuint mesaFormat)
diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c
index 41cdbfd2cb..cd8c4c22e5 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_image.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_image.c
@@ -1,6 +1,7 @@
#include "main/glheader.h"
#include "main/macros.h"
+#include "main/mfeatures.h"
#include "main/mtypes.h"
#include "main/enums.h"
#include "main/bufferobj.h"
@@ -55,11 +56,11 @@ logbase2(int n)
* 0)..(1x1). Consider pruning this tree at a validation if the
* saving is worth it.
*/
-static void
-guess_and_alloc_mipmap_tree(struct intel_context *intel,
- struct intel_texture_object *intelObj,
- struct intel_texture_image *intelImage,
- GLboolean expect_accelerated_upload)
+static struct intel_mipmap_tree *
+intel_miptree_create_for_teximage(struct intel_context *intel,
+ struct intel_texture_object *intelObj,
+ struct intel_texture_image *intelImage,
+ GLboolean expect_accelerated_upload)
{
GLuint firstLevel;
GLuint lastLevel;
@@ -72,70 +73,71 @@ guess_and_alloc_mipmap_tree(struct intel_context *intel,
DBG("%s\n", __FUNCTION__);
if (intelImage->base.Border)
- return;
+ return NULL;
if (intelImage->level > intelObj->base.BaseLevel &&
(intelImage->base.Width == 1 ||
(intelObj->base.Target != GL_TEXTURE_1D &&
intelImage->base.Height == 1) ||
(intelObj->base.Target == GL_TEXTURE_3D &&
- intelImage->base.Depth == 1)))
- return;
-
- /* If this image disrespects BaseLevel, allocate from level zero.
- * Usually BaseLevel == 0, so it's unlikely to happen.
- */
- if (intelImage->level < intelObj->base.BaseLevel)
- firstLevel = 0;
- else
- firstLevel = intelObj->base.BaseLevel;
-
-
- /* Figure out image dimensions at start level.
- */
- for (i = intelImage->level; i > firstLevel; i--) {
- width <<= 1;
- if (height != 1)
- height <<= 1;
- if (depth != 1)
- depth <<= 1;
- }
+ intelImage->base.Depth == 1))) {
+ /* For this combination, we're at some lower mipmap level and
+ * some important dimension is 1. We can't extrapolate up to a
+ * likely base level width/height/depth for a full mipmap stack
+ * from this info, so just allocate this one level.
+ */
+ firstLevel = intelImage->level;
+ lastLevel = intelImage->level;
+ } else {
+ /* If this image disrespects BaseLevel, allocate from level zero.
+ * Usually BaseLevel == 0, so it's unlikely to happen.
+ */
+ if (intelImage->level < intelObj->base.BaseLevel)
+ firstLevel = 0;
+ else
+ firstLevel = intelObj->base.BaseLevel;
+
+ /* Figure out image dimensions at start level. */
+ for (i = intelImage->level; i > firstLevel; i--) {
+ width <<= 1;
+ if (height != 1)
+ height <<= 1;
+ if (depth != 1)
+ depth <<= 1;
+ }
- /* Guess a reasonable value for lastLevel. This is probably going
- * to be wrong fairly often and might mean that we have to look at
- * resizable buffers, or require that buffers implement lazy
- * pagetable arrangements.
- */
- if ((intelObj->base.MinFilter == GL_NEAREST ||
- intelObj->base.MinFilter == GL_LINEAR) &&
- intelImage->level == firstLevel &&
- (intel->gen < 4 || firstLevel == 0)) {
- lastLevel = firstLevel;
- }
- else {
- lastLevel = firstLevel + logbase2(MAX2(MAX2(width, height), depth));
+ /* Guess a reasonable value for lastLevel. This is probably going
+ * to be wrong fairly often and might mean that we have to look at
+ * resizable buffers, or require that buffers implement lazy
+ * pagetable arrangements.
+ */
+ if ((intelObj->base.MinFilter == GL_NEAREST ||
+ intelObj->base.MinFilter == GL_LINEAR) &&
+ intelImage->level == firstLevel &&
+ (intel->gen < 4 || firstLevel == 0)) {
+ lastLevel = firstLevel;
+ } else {
+ lastLevel = firstLevel + logbase2(MAX2(MAX2(width, height), depth));
+ }
}
- assert(!intelObj->mt);
if (_mesa_is_format_compressed(intelImage->base.TexFormat))
comp_byte = intel_compressed_num_bytes(intelImage->base.TexFormat);
texelBytes = _mesa_get_format_bytes(intelImage->base.TexFormat);
- intelObj->mt = intel_miptree_create(intel,
- intelObj->base.Target,
- intelImage->base._BaseFormat,
- intelImage->base.InternalFormat,
- firstLevel,
- lastLevel,
- width,
- height,
- depth,
- texelBytes,
- comp_byte,
- expect_accelerated_upload);
-
- DBG("%s - success\n", __FUNCTION__);
+ return intel_miptree_create(intel,
+ intelObj->base.Target,
+ intelImage->base._BaseFormat,
+ intelImage->base.InternalFormat,
+ firstLevel,
+ lastLevel,
+ width,
+ height,
+ depth,
+ texelBytes,
+ comp_byte,
+ expect_accelerated_upload);
}
@@ -343,41 +345,29 @@ intelTexImage(struct gl_context * ctx,
texImage->Data = NULL;
}
- if (!intelObj->mt) {
- guess_and_alloc_mipmap_tree(intel, intelObj, intelImage, pixels == NULL);
- if (!intelObj->mt) {
- DBG("guess_and_alloc_mipmap_tree: failed\n");
- }
- }
-
assert(!intelImage->mt);
if (intelObj->mt &&
intel_miptree_match_image(intelObj->mt, &intelImage->base)) {
-
+ /* Use an existing miptree when possible */
intel_miptree_reference(&intelImage->mt, intelObj->mt);
assert(intelImage->mt);
} else if (intelImage->base.Border == 0) {
- int comp_byte = 0;
- GLuint texelBytes = _mesa_get_format_bytes(intelImage->base.TexFormat);
- GLenum baseFormat = _mesa_get_format_base_format(intelImage->base.TexFormat);
- if (_mesa_is_format_compressed(intelImage->base.TexFormat)) {
- comp_byte =
- intel_compressed_num_bytes(intelImage->base.TexFormat);
- }
-
/* Didn't fit in the object miptree, but it's suitable for inclusion in
* a miptree, so create one just for our level and store it in the image.
* It'll get moved into the object miptree at validate time.
*/
- intelImage->mt = intel_miptree_create(intel, target,
- baseFormat,
- internalFormat,
- level, level,
- width, height, depth,
- texelBytes,
- comp_byte, pixels == NULL);
-
+ intelImage->mt = intel_miptree_create_for_teximage(intel, intelObj,
+ intelImage,
+ pixels == NULL);
+
+ /* Even if the object currently has a mipmap tree associated
+ * with it, this one is a more likely candidate to represent the
+ * whole object since our level didn't fit what was there
+ * before, and any lower levels would fit into our miptree.
+ */
+ if (intelImage->mt)
+ intel_miptree_reference(&intelObj->mt, intelImage->mt);
}
/* PBO fastpaths:
@@ -711,8 +701,7 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
}
mt = intel_miptree_create_for_region(intel, target,
- internalFormat,
- 0, 0, rb->region, 1, 0);
+ internalFormat, rb->region, 1, 0);
if (mt == NULL)
return;
@@ -777,7 +766,7 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target,
mt = intel_miptree_create_for_region(intel, target,
image->internal_format,
- 0, 0, image->region, 1, 0);
+ image->region, 1, 0);
if (mt == NULL)
return;
diff --git a/src/mesa/drivers/dri/intel/intel_tex_obj.h b/src/mesa/drivers/dri/intel/intel_tex_obj.h
index 5f60e0ea4f..e93ef4a472 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_obj.h
+++ b/src/mesa/drivers/dri/intel/intel_tex_obj.h
@@ -32,11 +32,11 @@ struct intel_texture_object
{
struct gl_texture_object base; /* The "parent" object */
- /* The mipmap tree must include at least these levels once
- * validated:
+ /* This is a mirror of base._MaxLevel, updated at validate time,
+ * except that we don't bother with the non-base levels for
+ * non-mipmapped textures.
*/
- GLuint firstLevel;
- GLuint lastLevel;
+ unsigned int _MaxLevel;
/* Offset for firstLevel image:
*/
diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c
index ed5c5d896b..8537e7f368 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c
@@ -8,72 +8,21 @@
#define FILE_DEBUG_FLAG DEBUG_TEXTURE
/**
- * Compute which mipmap levels that really need to be sent to the hardware.
- * This depends on the base image size, GL_TEXTURE_MIN_LOD,
- * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL.
+ * When validating, we only care about the texture images that could
+ * be seen, so for non-mipmapped modes we want to ignore everything
+ * but BaseLevel.
*/
static void
-intel_calculate_first_last_level(struct intel_context *intel,
- struct intel_texture_object *intelObj)
+intel_update_max_level(struct intel_context *intel,
+ struct intel_texture_object *intelObj)
{
struct gl_texture_object *tObj = &intelObj->base;
- const struct gl_texture_image *const baseImage =
- tObj->Image[0][tObj->BaseLevel];
- /* These must be signed values. MinLod and MaxLod can be negative numbers,
- * and having firstLevel and lastLevel as signed prevents the need for
- * extra sign checks.
- */
- int firstLevel;
- int lastLevel;
-
- /* Yes, this looks overly complicated, but it's all needed.
- */
- switch (tObj->Target) {
- case GL_TEXTURE_1D:
- case GL_TEXTURE_2D:
- case GL_TEXTURE_3D:
- case GL_TEXTURE_CUBE_MAP:
- if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) {
- /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL.
- */
- firstLevel = lastLevel = tObj->BaseLevel;
- }
- else {
- if (intel->gen == 2) {
- firstLevel = tObj->BaseLevel + (GLint) (tObj->MinLod + 0.5);
- firstLevel = MAX2(firstLevel, tObj->BaseLevel);
- firstLevel = MIN2(firstLevel, tObj->BaseLevel + baseImage->MaxLog2);
- lastLevel = tObj->BaseLevel + (GLint) (tObj->MaxLod + 0.5);
- lastLevel = MAX2(lastLevel, tObj->BaseLevel);
- lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2);
- lastLevel = MIN2(lastLevel, tObj->MaxLevel);
- lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
- } else {
- /* Min/max LOD are taken into account in sampler state. We don't
- * want to re-layout textures just because clamping has been applied
- * since it means a bunch of blitting around and probably no memory
- * savings (since we have to keep the other levels around anyway).
- */
- firstLevel = tObj->BaseLevel;
- lastLevel = MIN2(tObj->BaseLevel + baseImage->MaxLog2,
- tObj->MaxLevel);
- /* need at least one level */
- lastLevel = MAX2(firstLevel, lastLevel);
- }
- }
- break;
- case GL_TEXTURE_RECTANGLE_NV:
- case GL_TEXTURE_4D_SGIS:
- firstLevel = lastLevel = 0;
- break;
- default:
- return;
+ if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) {
+ intelObj->_MaxLevel = tObj->BaseLevel;
+ } else {
+ intelObj->_MaxLevel = tObj->_MaxLevel;
}
-
- /* save these values */
- intelObj->firstLevel = firstLevel;
- intelObj->lastLevel = lastLevel;
}
/**
@@ -135,8 +84,8 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
/* What levels must the tree include at a minimum?
*/
- intel_calculate_first_last_level(intel, intelObj);
- firstImage = intel_texture_image(tObj->Image[0][intelObj->firstLevel]);
+ intel_update_max_level(intel, intelObj);
+ firstImage = intel_texture_image(tObj->Image[0][tObj->BaseLevel]);
/* Fallback case:
*/
@@ -147,23 +96,6 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
return GL_FALSE;
}
-
- /* If both firstImage and intelObj have a tree which can contain
- * all active images, favour firstImage. Note that because of the
- * completeness requirement, we know that the image dimensions
- * will match.
- */
- if (firstImage->mt &&
- firstImage->mt != intelObj->mt &&
- firstImage->mt->first_level <= intelObj->firstLevel &&
- firstImage->mt->last_level >= intelObj->lastLevel) {
-
- if (intelObj->mt)
- intel_miptree_release(intel, &intelObj->mt);
-
- intel_miptree_reference(&intelObj->mt, firstImage->mt);
- }
-
if (_mesa_is_format_compressed(firstImage->base.TexFormat)) {
comp_byte = intel_compressed_num_bytes(firstImage->base.TexFormat);
cpp = comp_byte;
@@ -173,18 +105,17 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
/* Check tree can hold all active levels. Check tree matches
* target, imageFormat, etc.
- *
- * XXX: For some layouts (eg i945?), the test might have to be
- * first_level == firstLevel, as the tree isn't valid except at the
- * original start level. Hope to get around this by
- * programming minLod, maxLod, baseLevel into the hardware and
- * leaving the tree alone.
+ *
+ * For pre-gen4, we have to match first_level == tObj->BaseLevel,
+ * because we don't have the control that gen4 does to make min/mag
+ * determination happen at a nonzero (hardware) baselevel. Because
+ * of that, we just always relayout on baselevel change.
*/
if (intelObj->mt &&
(intelObj->mt->target != intelObj->base.Target ||
intelObj->mt->internal_format != firstImage->base.InternalFormat ||
- intelObj->mt->first_level != intelObj->firstLevel ||
- intelObj->mt->last_level != intelObj->lastLevel ||
+ intelObj->mt->first_level != tObj->BaseLevel ||
+ intelObj->mt->last_level < intelObj->_MaxLevel ||
intelObj->mt->width0 != firstImage->base.Width ||
intelObj->mt->height0 != firstImage->base.Height ||
intelObj->mt->depth0 != firstImage->base.Depth ||
@@ -201,8 +132,8 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
intelObj->base.Target,
firstImage->base._BaseFormat,
firstImage->base.InternalFormat,
- intelObj->firstLevel,
- intelObj->lastLevel,
+ tObj->BaseLevel,
+ intelObj->_MaxLevel,
firstImage->base.Width,
firstImage->base.Height,
firstImage->base.Depth,
@@ -215,11 +146,13 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
*/
nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
for (face = 0; face < nr_faces; face++) {
- for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++) {
+ for (i = tObj->BaseLevel; i <= intelObj->_MaxLevel; i++) {
struct intel_texture_image *intelImage =
intel_texture_image(intelObj->base.Image[face][i]);
-
- /* Need to import images in main memory or held in other trees.
+ /* skip too small size mipmap */
+ if (intelImage == NULL)
+ break;
+ /* Need to import images in main memory or held in other trees.
* If it's a render target, then its data isn't needed to be in
* the object tree (otherwise we'd be FBO incomplete), and we need
* to keep track of the image's MT as needing to be pulled in still,
@@ -289,7 +222,7 @@ intel_tex_map_images(struct intel_context *intel,
DBG("%s\n", __FUNCTION__);
- for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++)
+ for (i = intelObj->base.BaseLevel; i <= intelObj->_MaxLevel; i++)
intel_tex_map_level_images(intel, intelObj, i);
}
@@ -299,6 +232,6 @@ intel_tex_unmap_images(struct intel_context *intel,
{
int i;
- for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++)
+ for (i = intelObj->base.BaseLevel; i <= intelObj->_MaxLevel; i++)
intel_tex_unmap_level_images(intel, intelObj, i);
}
diff --git a/src/mesa/drivers/dri/mach64/mach64_context.h b/src/mesa/drivers/dri/mach64/mach64_context.h
index 11e8f53b28..70bc0ae79d 100644
--- a/src/mesa/drivers/dri/mach64/mach64_context.h
+++ b/src/mesa/drivers/dri/mach64/mach64_context.h
@@ -295,11 +295,11 @@ extern GLboolean mach64UnbindContext( __DRIcontext *driContextPriv );
#define LE32_OUT( x, y ) do { *(GLuint *)(x) = (y); } while (0)
#define LE32_OUT_FLOAT( x, y ) do { *(GLfloat *)(x) = (y); } while (0)
#else
-#ifndef __OpenBSD__
-#include <byteswap.h>
-#else
+#if defined(__OpenBSD__) || defined(__NetBSD__)
#include <machine/endian.h>
#define bswap_32 bswap32
+#else
+#include <byteswap.h>
#endif
#define LE32_IN( x ) bswap_32( *(GLuint *)(x) )
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_driver.c b/src/mesa/drivers/dri/nouveau/nouveau_driver.c
index 27e2892f71..45630be7f6 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_driver.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_driver.c
@@ -24,6 +24,8 @@
*
*/
+#include "main/mfeatures.h"
+
#include "nouveau_driver.h"
#include "nouveau_context.h"
#include "nouveau_fbo.h"
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_driver.h b/src/mesa/drivers/dri/nouveau/nouveau_driver.h
index 8036b18edc..c5ac1282d0 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_driver.h
+++ b/src/mesa/drivers/dri/nouveau/nouveau_driver.h
@@ -38,7 +38,6 @@
#include <assert.h>
#include "nouveau_device.h"
-#include "nouveau_pushbuf.h"
#include "nouveau_grobj.h"
#include "nouveau_channel.h"
#include "nouveau_bo.h"
@@ -46,6 +45,7 @@
#include "nouveau_screen.h"
#include "nouveau_state.h"
#include "nouveau_surface.h"
+#include "nv04_pushbuf.h"
#define DRIVER_DATE "20091015"
#define DRIVER_AUTHOR "Nouveau"
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
index 079b5d63e4..b36b578878 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
@@ -32,6 +32,7 @@
#include "main/framebuffer.h"
#include "main/renderbuffer.h"
#include "main/fbobject.h"
+#include "main/mfeatures.h"
static GLboolean
set_renderbuffer_format(struct gl_renderbuffer *rb, GLenum internalFormat)
diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c
index 5abfc9dac5..9c045b73ac 100644
--- a/src/mesa/drivers/dri/r200/r200_context.c
+++ b/src/mesa/drivers/dri/r200/r200_context.c
@@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/simple_list.h"
#include "main/imports.h"
#include "main/extensions.h"
+#include "main/mfeatures.h"
#include "swrast/swrast.h"
#include "swrast_setup/swrast_setup.h"
diff --git a/src/mesa/drivers/dri/r200/r200_maos_arrays.c b/src/mesa/drivers/dri/r200/r200_maos_arrays.c
index 8a047e6419..b62290231b 100644
--- a/src/mesa/drivers/dri/r200/r200_maos_arrays.c
+++ b/src/mesa/drivers/dri/r200/r200_maos_arrays.c
@@ -200,6 +200,7 @@ void r200EmitArrays( struct gl_context *ctx, GLubyte *vimap_rev )
}
default:
assert(0);
+ emitsize = 0;
}
if (!rmesa->radeon.tcl.aos[nr].bo) {
rcommon_emit_vector( ctx,
diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c
index 064324731b..092b757583 100644
--- a/src/mesa/drivers/dri/r200/r200_tex.c
+++ b/src/mesa/drivers/dri/r200/r200_tex.c
@@ -37,6 +37,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/context.h"
#include "main/enums.h"
#include "main/image.h"
+#include "main/mfeatures.h"
#include "main/simple_list.h"
#include "main/texstore.h"
#include "main/teximage.h"
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index 7b9c316794..e0d349b98c 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -24,6 +24,7 @@
#include <stdio.h>
+#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
#include "radeon_emulate_branches.h"
#include "radeon_emulate_loops.h"
@@ -54,6 +55,8 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user)
for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) {
struct rc_sub_instruction * inst = &rci->U.I;
+ unsigned i;
+ const struct rc_opcode_info *info = rc_get_opcode_info(inst->Opcode);
if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth)
continue;
@@ -65,27 +68,12 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user)
continue;
}
- switch (inst->Opcode) {
- case RC_OPCODE_FRC:
- case RC_OPCODE_MOV:
- inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]);
- break;
- case RC_OPCODE_ADD:
- case RC_OPCODE_MAX:
- case RC_OPCODE_MIN:
- case RC_OPCODE_MUL:
- inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]);
- inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]);
- break;
- case RC_OPCODE_CMP:
- case RC_OPCODE_MAD:
- inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]);
- inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]);
- inst->SrcReg[2] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[2]);
- break;
- default:
- // Scalar instructions needn't be reswizzled
- break;
+ if (!info->IsComponentwise) {
+ continue;
+ }
+
+ for (i = 0; i < info->NumSrcRegs; i++) {
+ inst->SrcReg[i] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[i]);
}
}
}
@@ -93,7 +81,6 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user)
void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{
int is_r500 = c->Base.is_r500;
- int kill_consts = c->Base.remove_unused_constants;
int opt = !c->Base.disable_optimizations;
/* Lists of instruction transformations. */
@@ -133,7 +120,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{"emulate loops", 1, !is_r500, rc_emulate_loops, NULL},
{"dataflow optimize", 1, opt, rc_optimize, NULL},
{"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL},
- {"dead constants", 1, kill_consts, rc_remove_unused_constants, &c->code->constants_remap_table},
+ {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table},
/* This pass makes it easier for the scheduler to group TEX
* instructions and reduces the chances of creating too
* many texture indirections.*/
@@ -150,9 +137,10 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{NULL, 0, 0, NULL, NULL}
};
+ c->Base.type = RC_FRAGMENT_PROGRAM;
c->Base.SwizzleCaps = c->Base.is_r500 ? &r500_swizzle_caps : &r300_swizzle_caps;
- rc_run_compiler(&c->Base, fs_list, "Fragment Program");
+ rc_run_compiler(&c->Base, fs_list);
rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index bf8341f017..472029f63d 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -26,6 +26,7 @@
#include "../r300_reg.h"
+#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
#include "radeon_program_alu.h"
#include "radeon_swizzle.h"
@@ -790,19 +791,14 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
if (!hwtemps[j])
break;
}
- if (j >= c->max_temp_regs) {
- rc_error(c, "Too many temporaries\n");
- return;
+ ta[orig].Allocated = 1;
+ if (last_inst_src_reladdr &&
+ last_inst_src_reladdr->IP > inst->IP) {
+ ta[orig].HwTemp = orig;
} else {
- ta[orig].Allocated = 1;
- if (last_inst_src_reladdr &&
- last_inst_src_reladdr->IP > inst->IP) {
- ta[orig].HwTemp = orig;
- } else {
- ta[orig].HwTemp = j;
- }
- hwtemps[ta[orig].HwTemp] = 1;
+ ta[orig].HwTemp = j;
}
+ hwtemps[ta[orig].HwTemp] = 1;
}
inst->U.I.DstReg.Index = ta[orig].HwTemp;
@@ -1018,7 +1014,6 @@ static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
{
int is_r500 = c->Base.is_r500;
- int kill_consts = c->Base.remove_unused_constants;
int opt = !c->Base.disable_optimizations;
/* Lists of instruction transformations. */
@@ -1062,18 +1057,18 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
{"dataflow optimize", 1, opt, rc_optimize, NULL},
/* This pass must be done after optimizations. */
{"source conflict resolve", 1, 1, rc_local_transform, resolve_src_conflicts},
- {"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL},
{"register allocation", 1, opt, allocate_temporary_registers, NULL},
- {"dead constants", 1, kill_consts, rc_remove_unused_constants, &c->code->constants_remap_table},
+ {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table},
{"final code validation", 0, 1, rc_validate_final_shader, NULL},
{"machine code generation", 0, 1, translate_vertex_program, NULL},
{"dump machine code", 0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump, NULL},
{NULL, 0, 0, NULL, NULL}
};
+ c->Base.type = RC_VERTEX_PROGRAM;
c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
- rc_run_compiler(&c->Base, vs_list, "Vertex Program");
+ rc_run_compiler(&c->Base, vs_list);
c->code->InputsRead = c->Base.Program.InputsRead;
c->code->OutputsWritten = c->Base.Program.OutputsWritten;
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index 289bb87ae5..ef81be48f7 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -29,6 +29,7 @@
#include <stdio.h>
+#include "radeon_compiler_util.h"
#include "../r300_reg.h"
/**
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
index 4286baed0c..65548604bc 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -29,6 +29,7 @@
#include "radeon_dataflow.h"
#include "radeon_program.h"
#include "radeon_program_pair.h"
+#include "radeon_compiler_util.h"
void rc_init(struct radeon_compiler * c)
@@ -356,66 +357,92 @@ void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
static void reg_count_callback(void * userdata, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
- unsigned int * max_reg = userdata;
+ int *max_reg = userdata;
if (file == RC_FILE_TEMPORARY)
- index > *max_reg ? *max_reg = index : 0;
+ (int)index > *max_reg ? *max_reg = index : 0;
}
-static void print_stats(struct radeon_compiler * c)
+void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
{
+ int max_reg = -1;
struct rc_instruction * tmp;
- unsigned max_reg, insts, fc, tex, alpha, rgb, presub;
- max_reg = insts = fc = tex = alpha = rgb = presub = 0;
+ memset(s, 0, sizeof(*s));
+
for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
tmp = tmp->Next){
const struct rc_opcode_info * info;
rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg);
if (tmp->Type == RC_INSTRUCTION_NORMAL) {
if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE)
- presub++;
+ s->num_presub_ops++;
info = rc_get_opcode_info(tmp->U.I.Opcode);
} else {
if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
- presub++;
+ s->num_presub_ops++;
if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
- presub++;
+ s->num_presub_ops++;
/* Assuming alpha will never be a flow control or
* a tex instruction. */
if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)
- alpha++;
+ s->num_alpha_insts++;
if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
- rgb++;
+ s->num_rgb_insts++;
info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
}
if (info->IsFlowControl)
- fc++;
+ s->num_fc_insts++;
if (info->HasTexture)
- tex++;
- insts++;
+ s->num_tex_insts++;
+ s->num_insts++;
}
- if (insts < 4)
- return;
- fprintf(stderr,"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"
- "~%4u Instructions\n"
- "~%4u Vector Instructions (RGB)\n"
- "~%4u Scalar Instructions (Alpha)\n"
- "~%4u Flow Control Instructions\n"
- "~%4u Texture Instructions\n"
- "~%4u Presub Operations\n"
- "~%4u Temporary Registers\n"
- "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n",
- insts, rgb, alpha, fc, tex, presub, max_reg + 1);
+ s->num_temp_regs = max_reg + 1;
}
-/* Executes a list of compiler passes given in the parameter 'list'. */
-void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list,
- const char *shader_name)
+static void print_stats(struct radeon_compiler * c)
{
- if (c->Debug & RC_DBG_LOG) {
- fprintf(stderr, "%s: before compilation\n", shader_name);
- rc_print_program(&c->Program);
+ struct rc_program_stats s;
+
+ rc_get_stats(c, &s);
+
+ if (s.num_insts < 4)
+ return;
+
+ switch (c->type) {
+ case RC_VERTEX_PROGRAM:
+ fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n"
+ "~%4u Instructions\n"
+ "~%4u Flow Control Instructions\n"
+ "~%4u Temporary Registers\n"
+ "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
+ s.num_insts, s.num_fc_insts, s.num_temp_regs);
+ break;
+
+ case RC_FRAGMENT_PROGRAM:
+ fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n"
+ "~%4u Instructions\n"
+ "~%4u Vector Instructions (RGB)\n"
+ "~%4u Scalar Instructions (Alpha)\n"
+ "~%4u Flow Control Instructions\n"
+ "~%4u Texture Instructions\n"
+ "~%4u Presub Operations\n"
+ "~%4u Temporary Registers\n"
+ "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
+ s.num_insts, s.num_rgb_insts, s.num_alpha_insts,
+ s.num_fc_insts, s.num_tex_insts, s.num_presub_ops,
+ s.num_temp_regs);
+ break;
+ default:
+ assert(0);
}
+}
+static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {
+ "Vertex Program",
+ "Fragment Program"
+};
+
+void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list)
+{
for (unsigned i = 0; list[i].name; i++) {
if (list[i].predicate) {
list[i].run(c, list[i].user);
@@ -424,11 +451,23 @@ void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *lis
return;
if ((c->Debug & RC_DBG_LOG) && list[i].dump) {
- fprintf(stderr, "%s: after '%s'\n", shader_name, list[i].name);
+ fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name);
rc_print_program(&c->Program);
}
}
}
+}
+
+/* Executes a list of compiler passes given in the parameter 'list'. */
+void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)
+{
+ if (c->Debug & RC_DBG_LOG) {
+ fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);
+ rc_print_program(&c->Program);
+ }
+
+ rc_run_compiler_passes(c, list);
+
if (c->Debug & RC_DBG_STATS)
print_stats(c);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index 31fd469a04..e663339589 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -35,9 +35,16 @@
struct rc_swizzle_caps;
+enum rc_program_type {
+ RC_VERTEX_PROGRAM,
+ RC_FRAGMENT_PROGRAM,
+ RC_NUM_PROGRAM_TYPES
+};
+
struct radeon_compiler {
struct memory_pool Pool;
struct rc_program Program;
+ enum rc_program_type type;
unsigned Debug:2;
unsigned Error:1;
char * ErrorMsg;
@@ -140,9 +147,21 @@ struct radeon_compiler_pass {
void *user; /* Optional parameter which is passed to the run function. */
};
+struct rc_program_stats {
+ unsigned num_insts;
+ unsigned num_fc_insts;
+ unsigned num_tex_insts;
+ unsigned num_rgb_insts;
+ unsigned num_alpha_insts;
+ unsigned num_presub_ops;
+ unsigned num_temp_regs;
+};
+
+void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s);
+
/* Executes a list of compiler passes given in the parameter 'list'. */
-void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list,
- const char *shader_name);
+void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list);
+void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list);
void rc_validate_final_shader(struct radeon_compiler *c, void *user);
#endif /* RADEON_COMPILER_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
index 2b8d284ce9..2482fc68be 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
@@ -48,6 +48,91 @@ unsigned int rc_swizzle_to_writemask(unsigned int swz)
return mask;
}
+rc_swizzle get_swz(unsigned int swz, rc_swizzle idx)
+{
+ if (idx & 0x4)
+ return idx;
+ return GET_SWZ(swz, idx);
+}
+
+unsigned int combine_swizzles4(unsigned int src,
+ rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w)
+{
+ unsigned int ret = 0;
+
+ ret |= get_swz(src, swz_x);
+ ret |= get_swz(src, swz_y) << 3;
+ ret |= get_swz(src, swz_z) << 6;
+ ret |= get_swz(src, swz_w) << 9;
+
+ return ret;
+}
+
+unsigned int combine_swizzles(unsigned int src, unsigned int swz)
+{
+ unsigned int ret = 0;
+
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X));
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3;
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6;
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9;
+
+ return ret;
+}
+
+/**
+ * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W
+ */
+rc_swizzle rc_mask_to_swizzle(unsigned int mask)
+{
+ switch (mask) {
+ case RC_MASK_X: return RC_SWIZZLE_X;
+ case RC_MASK_Y: return RC_SWIZZLE_Y;
+ case RC_MASK_Z: return RC_SWIZZLE_Z;
+ case RC_MASK_W: return RC_SWIZZLE_W;
+ }
+ return RC_SWIZZLE_UNUSED;
+}
+
+/* Reorder mask bits according to swizzle. */
+unsigned swizzle_mask(unsigned swizzle, unsigned mask)
+{
+ unsigned ret = 0;
+ for (unsigned chan = 0; chan < 4; ++chan) {
+ unsigned swz = GET_SWZ(swizzle, chan);
+ if (swz < 4)
+ ret |= GET_BIT(mask, swz) << chan;
+ }
+ return ret;
+}
+
+/**
+ * Left multiplication of a register with a swizzle
+ */
+struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg)
+{
+ struct rc_src_register tmp = srcreg;
+ int i;
+ tmp.Swizzle = 0;
+ tmp.Negate = 0;
+ for(i = 0; i < 4; ++i) {
+ rc_swizzle swz = GET_SWZ(swizzle, i);
+ if (swz < 4) {
+ tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
+ tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
+ } else {
+ tmp.Swizzle |= swz << (i*3);
+ }
+ }
+ return tmp;
+}
+
+void reset_srcreg(struct rc_src_register* reg)
+{
+ memset(reg, 0, sizeof(struct rc_src_register));
+ reg->Swizzle = RC_SWIZZLE_XYZW;
+}
+
unsigned int rc_src_reads_dst_mask(
rc_register_file src_file,
unsigned int src_idx,
@@ -138,7 +223,6 @@ unsigned int rc_inst_can_use_presub(
{
struct can_use_presub_data d;
unsigned int num_presub_srcs;
- unsigned int presub_src_type = rc_source_type_mask(presub_writemask);
const struct rc_opcode_info * info =
rc_get_opcode_info(inst->U.I.Opcode);
@@ -168,13 +252,7 @@ unsigned int rc_inst_can_use_presub(
num_presub_srcs = rc_presubtract_src_reg_count(presub_op);
- if ((presub_src_type & RC_SOURCE_RGB)
- && d.RGBCount + num_presub_srcs > 3) {
- return 0;
- }
-
- if ((presub_src_type & RC_SOURCE_ALPHA)
- && d.AlphaCount + num_presub_srcs > 3) {
+ if (d.RGBCount + num_presub_srcs > 3 || d.AlphaCount + num_presub_srcs > 3) {
return 0;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
index e50dfbd4fb..461ab9ffb1 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
@@ -8,6 +8,22 @@ struct rc_src_register;
unsigned int rc_swizzle_to_writemask(unsigned int swz);
+rc_swizzle get_swz(unsigned int swz, rc_swizzle idx);
+
+unsigned int combine_swizzles4(unsigned int src,
+ rc_swizzle swz_x, rc_swizzle swz_y,
+ rc_swizzle swz_z, rc_swizzle swz_w);
+
+unsigned int combine_swizzles(unsigned int src, unsigned int swz);
+
+rc_swizzle rc_mask_to_swizzle(unsigned int mask);
+
+unsigned swizzle_mask(unsigned swizzle, unsigned mask);
+
+struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg);
+
+void reset_srcreg(struct rc_src_register* reg);
+
unsigned int rc_src_reads_dst_mask(
rc_register_file src_file,
unsigned int src_idx,
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c
index a0f7bd8174..133a9f72ec 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c
@@ -56,6 +56,7 @@ static void rewrite_source(struct radeon_compiler * c,
mov->U.I.DstReg.Index = tempreg;
mov->U.I.DstReg.WriteMask = split.Phase[phase];
mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src];
+ mov->U.I.PreSub = inst->U.I.PreSub;
phase_refmask = 0;
for(unsigned int chan = 0; chan < 4; ++chan) {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
index da495a3afa..25afd272be 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -67,6 +67,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.IsComponentwise = 1
},
{
+ .Opcode = RC_OPCODE_CLAMP,
+ .Name = "CLAMP",
+ .NumSrcRegs = 3,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
.Opcode = RC_OPCODE_CMP,
.Name = "CMP",
.NumSrcRegs = 3,
@@ -453,6 +460,7 @@ void rc_compute_sources_for_writemask(
srcmasks[1] |= RC_MASK_XY;
break;
case RC_OPCODE_DP3:
+ case RC_OPCODE_XPD:
srcmasks[0] |= RC_MASK_XYZ;
srcmasks[1] |= RC_MASK_XYZ;
break;
@@ -460,6 +468,10 @@ void rc_compute_sources_for_writemask(
srcmasks[0] |= RC_MASK_XYZW;
srcmasks[1] |= RC_MASK_XYZW;
break;
+ case RC_OPCODE_DPH:
+ srcmasks[0] |= RC_MASK_XYZ;
+ srcmasks[1] |= RC_MASK_XYZW;
+ break;
case RC_OPCODE_TXB:
case RC_OPCODE_TXP:
srcmasks[0] |= RC_MASK_W;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
index d3f639c870..7e66610127 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
@@ -50,6 +50,9 @@ typedef enum {
/** vec4 instruction: dst.c = ceil(src0.c) */
RC_OPCODE_CEIL,
+ /** vec4 instruction: dst.c = clamp(src0.c, src1.c, src2.c) */
+ RC_OPCODE_CLAMP,
+
/** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */
RC_OPCODE_CMP,
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index 27b10ffbd6..44f4c0fbdc 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -54,12 +54,7 @@ static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct
combine.Negate = outer.Negate;
} else {
combine.Abs = inner.Abs;
- combine.Negate = 0;
- for(unsigned int chan = 0; chan < 4; ++chan) {
- unsigned int swz = GET_SWZ(outer.Swizzle, chan);
- if (swz < 4)
- combine.Negate |= GET_BIT(inner.Negate, swz) << chan;
- }
+ combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
combine.Negate ^= outer.Negate;
}
combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
index 3f880c88fa..d53181e1f7 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
@@ -217,9 +217,9 @@ static void compute_live_intervals(struct radeon_compiler *c,
* instruction is used as the end of the live interval and
* the BGNLOOP instruction is used as the beginning. */
if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && s->EndLoop < 0) {
- s->BeginLoop = inst->IP;
int loops = 1;
struct rc_instruction * tmp;
+ s->BeginLoop = inst->IP;
for(tmp = inst->Next;
tmp != &s->C->Program.Instructions;
tmp = tmp->Next) {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
index cbb5ef6237..9beb5d6357 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
@@ -701,7 +701,7 @@ static int convert_rgb_to_alpha(
get_reg_valuep(s,
RC_FILE_TEMPORARY,
pair_inst->RGB.DestIndex,
- rc_mask_to_swz(old_mask));
+ rc_mask_to_swizzle(old_mask));
new_index = i;
*new_regvalp = *old_regvalp;
*old_regvalp = NULL;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
index d7bedc5729..fe5756ebc4 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
@@ -71,27 +71,6 @@ void rc_local_transform(
}
}
-/**
- * Left multiplication of a register with a swizzle
- */
-struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg)
-{
- struct rc_src_register tmp = srcreg;
- int i;
- tmp.Swizzle = 0;
- tmp.Negate = 0;
- for(i = 0; i < 4; ++i) {
- rc_swizzle swz = GET_SWZ(swizzle, i);
- if (swz < 4) {
- tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
- tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
- } else {
- tmp.Swizzle |= swz << (i*3);
- }
- }
- return tmp;
-}
-
struct get_used_temporaries_data {
unsigned char * Used;
unsigned int UsedLength;
@@ -244,14 +223,3 @@ unsigned int rc_recompute_ips(struct radeon_compiler * c)
return ip;
}
-
-rc_swizzle rc_mask_to_swizzle(unsigned int mask)
-{
- switch(mask) {
- case RC_MASK_X: return RC_SWIZZLE_X;
- case RC_MASK_Y: return RC_SWIZZLE_Y;
- case RC_MASK_Z: return RC_SWIZZLE_Z;
- case RC_MASK_W: return RC_SWIZZLE_W;
- default: return RC_SWIZZLE_UNUSED;
- }
-}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
index be078b4f4f..df6c94b35f 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
@@ -159,61 +159,6 @@ struct rc_program {
struct rc_constant_list Constants;
};
-static inline rc_swizzle get_swz(unsigned int swz, rc_swizzle idx)
-{
- if (idx & 0x4)
- return idx;
- return GET_SWZ(swz, idx);
-}
-
-static inline unsigned int combine_swizzles4(unsigned int src,
- rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w)
-{
- unsigned int ret = 0;
-
- ret |= get_swz(src, swz_x);
- ret |= get_swz(src, swz_y) << 3;
- ret |= get_swz(src, swz_z) << 6;
- ret |= get_swz(src, swz_w) << 9;
-
- return ret;
-}
-
-static inline unsigned int combine_swizzles(unsigned int src, unsigned int swz)
-{
- unsigned int ret = 0;
-
- ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X));
- ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3;
- ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6;
- ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9;
-
- return ret;
-}
-
-/**
- * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W
- */
-static inline rc_swizzle rc_mask_to_swz(unsigned int mask)
-{
- switch (mask) {
- case RC_MASK_X: return RC_SWIZZLE_X;
- case RC_MASK_Y: return RC_SWIZZLE_Y;
- case RC_MASK_Z: return RC_SWIZZLE_Z;
- case RC_MASK_W: return RC_SWIZZLE_W;
- default: assert(0);
- }
- return RC_SWIZZLE_UNUSED;
-}
-struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg);
-
-static inline void reset_srcreg(struct rc_src_register* reg)
-{
- memset(reg, 0, sizeof(struct rc_src_register));
- reg->Swizzle = RC_SWIZZLE_XYZW;
-}
-
-
/**
* A transformation that can be passed to \ref rc_local_transform.
*
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index 39408845d5..c8063171b8 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -36,6 +36,7 @@
#include "radeon_program_alu.h"
#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
static struct rc_instruction *emit1(
@@ -84,16 +85,6 @@ static struct rc_instruction *emit3(
return fpi;
}
-static struct rc_dst_register dstreg(int file, int index)
-{
- struct rc_dst_register dst;
- dst.File = file;
- dst.Index = index;
- dst.WriteMask = RC_MASK_XYZW;
- dst.RelAddr = 0;
- return dst;
-}
-
static struct rc_dst_register dstregtmpmask(int index, int mask)
{
struct rc_dst_register dst = {0};
@@ -186,6 +177,38 @@ static struct rc_src_register swizzle_wwww(struct rc_src_register reg)
return swizzle_smear(reg, RC_SWIZZLE_W);
}
+static int is_dst_safe_to_reuse(struct rc_instruction *inst)
+{
+ const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned i;
+
+ assert(info->HasDstReg);
+
+ if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY)
+ return 0;
+
+ for (i = 0; i < info->NumSrcRegs; i++) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY &&
+ inst->U.I.SrcReg[i].Index == inst->U.I.DstReg.Index)
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c,
+ struct rc_instruction *inst)
+{
+ unsigned tmp;
+
+ if (is_dst_safe_to_reuse(inst))
+ tmp = inst->U.I.DstReg.Index;
+ else
+ tmp = rc_find_free_temporary(c);
+
+ return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask);
+}
+
static void transform_ABS(struct radeon_compiler* c,
struct rc_instruction* inst)
{
@@ -209,10 +232,26 @@ static void transform_CEIL(struct radeon_compiler* c,
* ceil(x) = x+frac(-x)
*/
- int tempreg = rc_find_free_temporary(c);
- emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]));
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0]));
emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
- inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg));
+ inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index));
+ rc_remove_instruction(inst);
+}
+
+static void transform_CLAMP(struct radeon_compiler *c,
+ struct rc_instruction *inst)
+{
+ /* CLAMP dst, src, min, max
+ * into:
+ * MIN tmp, src, max
+ * MAX dst, tmp, min
+ */
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+ emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst,
+ inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]);
+ emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]);
rc_remove_instruction(inst);
}
@@ -258,10 +297,10 @@ static void transform_DST(struct radeon_compiler* c,
static void transform_FLR(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
- emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0]);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]);
emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
- inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, tempreg)));
+ inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
rc_remove_instruction(inst);
}
@@ -351,14 +390,14 @@ static void transform_LIT(struct radeon_compiler* c,
static void transform_LRP(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
- dstreg(RC_FILE_TEMPORARY, tempreg),
+ dst,
inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2]));
emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode,
inst->U.I.DstReg,
- inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[2]);
+ inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]);
rc_remove_instruction(inst);
}
@@ -366,9 +405,8 @@ static void transform_LRP(struct radeon_compiler* c,
static void transform_POW(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
- struct rc_dst_register tempdst = dstreg(RC_FILE_TEMPORARY, tempreg);
- struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempreg);
+ struct rc_dst_register tempdst = try_to_reuse_dst(c, inst);
+ struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempdst.Index);
tempdst.WriteMask = RC_MASK_W;
tempsrc.Swizzle = RC_SWIZZLE_WWWW;
@@ -388,11 +426,11 @@ static void transform_RSQ(struct radeon_compiler* c,
static void transform_SEQ(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
- emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
- negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_zero, builtin_one);
+ negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one);
rc_remove_instruction(inst);
}
@@ -407,11 +445,11 @@ static void transform_SFL(struct radeon_compiler* c,
static void transform_SGE(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
- emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
- srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one);
+ srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one);
rc_remove_instruction(inst);
}
@@ -419,11 +457,11 @@ static void transform_SGE(struct radeon_compiler* c,
static void transform_SGT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
- emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
- srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero);
+ srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero);
rc_remove_instruction(inst);
}
@@ -431,11 +469,11 @@ static void transform_SGT(struct radeon_compiler* c,
static void transform_SLE(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
- emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
- srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one);
+ srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one);
rc_remove_instruction(inst);
}
@@ -443,11 +481,11 @@ static void transform_SLE(struct radeon_compiler* c,
static void transform_SLT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
- emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
- srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero);
+ srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero);
rc_remove_instruction(inst);
}
@@ -455,11 +493,11 @@ static void transform_SLT(struct radeon_compiler* c,
static void transform_SNE(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
- emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
- negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_one, builtin_zero);
+ negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero);
rc_remove_instruction(inst);
}
@@ -473,12 +511,13 @@ static void transform_SSG(struct radeon_compiler* c,
* CMP tmp1, x, 1, 0
* ADD result, tmp0, -tmp1;
*/
- unsigned tmp0, tmp1;
+ struct rc_dst_register dst0;
+ unsigned tmp1;
/* 0 < x */
- tmp0 = rc_find_free_temporary(c);
+ dst0 = try_to_reuse_dst(c, inst);
emit3(c, inst->Prev, RC_OPCODE_CMP, 0,
- dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask),
+ dst0,
negate(inst->U.I.SrcReg[0]),
builtin_one,
builtin_zero);
@@ -495,7 +534,7 @@ static void transform_SSG(struct radeon_compiler* c,
/* result = tmp0 - tmp1 */
emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
inst->U.I.DstReg,
- srcreg(RC_FILE_TEMPORARY, tmp0),
+ srcreg(RC_FILE_TEMPORARY, dst0.Index),
negate(srcreg(RC_FILE_TEMPORARY, tmp1)));
rc_remove_instruction(inst);
@@ -517,15 +556,15 @@ static void transform_SWZ(struct radeon_compiler* c,
static void transform_XPD(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
- emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstreg(RC_FILE_TEMPORARY, tempreg),
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst,
swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W));
emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg,
swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W),
swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
- negate(srcreg(RC_FILE_TEMPORARY, tempreg)));
+ negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
rc_remove_instruction(inst);
}
@@ -553,6 +592,7 @@ int radeonTransformALU(
switch(inst->U.I.Opcode) {
case RC_OPCODE_ABS: transform_ABS(c, inst); return 1;
case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+ case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1;
case RC_OPCODE_DP2: transform_DP2(c, inst); return 1;
case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
case RC_OPCODE_DST: transform_DST(c, inst); return 1;
@@ -592,7 +632,7 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c,
{
/* There is no decent CMP available, so let's rig one up.
* CMP is defined as dst = src0 < 0.0 ? src1 : src2
- * The following sequence consumes two temps and two extra slots
+ * The following sequence consumes zero to two temps and two extra slots
* (the second temp and the second slot is consumed by transform_LRP),
* but should be equivalent:
*
@@ -600,18 +640,18 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c,
* LRP dst, tmp0, src1, src2
*
* Yes, I know, I'm a mad scientist. ~ C. & M. */
- int tempreg0 = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
/* SLT tmp0, src0, 0.0 */
emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
- dstreg(RC_FILE_TEMPORARY, tempreg0),
+ dst,
inst->U.I.SrcReg[0], builtin_zero);
/* LRP dst, tmp0, src1, src2 */
transform_LRP(c,
emit3(c, inst->Prev, RC_OPCODE_LRP, 0,
inst->U.I.DstReg,
- srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]));
+ srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]));
rc_remove_instruction(inst);
}
@@ -642,24 +682,25 @@ static void transform_r300_vertex_DP3(struct radeon_compiler* c,
static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
unsigned constant_swizzle;
int constant = rc_constants_add_immediate_scalar(&c->Program.Constants,
0.0000000000000000001,
&constant_swizzle);
/* MOV dst, src */
+ dst.WriteMask = RC_MASK_XYZW;
emit1(c, inst->Prev, RC_OPCODE_MOV, 0,
- dstreg(RC_FILE_TEMPORARY, tempreg),
+ dst,
inst->U.I.SrcReg[0]);
- /* MAX dst.z, src, 0.00...001 */
+ /* MAX dst.y, src, 0.00...001 */
emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
- dstregtmpmask(tempreg, RC_MASK_Y),
- srcreg(RC_FILE_TEMPORARY, tempreg),
+ dstregtmpmask(dst.Index, RC_MASK_Y),
+ srcreg(RC_FILE_TEMPORARY, dst.Index),
srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
- inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, tempreg);
+ inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index);
}
static void transform_r300_vertex_SEQ(struct radeon_compiler *c,
@@ -743,12 +784,13 @@ static void transform_r300_vertex_SSG(struct radeon_compiler* c,
* SLT tmp1, x, 0;
* ADD result, tmp0, -tmp1;
*/
- unsigned tmp0, tmp1;
+ struct rc_dst_register dst0 = try_to_reuse_dst(c, inst);
+ unsigned tmp1;
/* 0 < x */
- tmp0 = rc_find_free_temporary(c);
+ dst0 = try_to_reuse_dst(c, inst);
emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
- dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask),
+ dst0,
builtin_zero,
inst->U.I.SrcReg[0]);
@@ -763,7 +805,7 @@ static void transform_r300_vertex_SSG(struct radeon_compiler* c,
/* result = tmp0 - tmp1 */
emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
inst->U.I.DstReg,
- srcreg(RC_FILE_TEMPORARY, tmp0),
+ srcreg(RC_FILE_TEMPORARY, dst0.Index),
negate(srcreg(RC_FILE_TEMPORARY, tmp1)));
rc_remove_instruction(inst);
@@ -781,6 +823,7 @@ int r300_transform_vertex_alu(
switch(inst->U.I.Opcode) {
case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1;
case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+ case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1;
case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1;
case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1;
case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
index 530afa5e08..f9d9f34b6a 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
@@ -28,6 +28,8 @@
#include "radeon_program_tex.h"
+#include "radeon_compiler_util.h"
+
/* Series of transformations to be done on textures. */
static struct rc_src_register shadow_ambient(struct r300_fragment_program_compiler *compiler,
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c
index 5f67f536f6..7d76585a59 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c
@@ -87,8 +87,9 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
rc_for_all_reads_src(inst, mark_used, &d);
}
- /* Pass 2: If there is relative addressing, mark all externals as used. */
- if (has_rel_addr) {
+ /* Pass 2: If there is relative addressing or dead constant elimination
+ * is disabled, mark all externals as used. */
+ if (has_rel_addr || !c->remove_unused_constants) {
for (unsigned i = 0; i < c->Program.Constants.Count; i++)
if (constants[i].Type == RC_CONSTANT_EXTERNAL)
const_used[i] = 1;
@@ -119,7 +120,7 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
/* is_identity ==> new_count == old_count
* !is_identity ==> new_count < old_count */
assert( is_identity || new_count < c->Program.Constants.Count);
- assert(!(has_rel_addr && are_externals_remapped));
+ assert(!((has_rel_addr || !c->remove_unused_constants) && are_externals_remapped));
/* Pass 4: Redirect reads of all constants to their new locations. */
if (!is_identity) {
@@ -127,7 +128,6 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
inst != &c->Program.Instructions; inst = inst->Next) {
rc_remap_registers(inst, remap_regs, inv_remap_table);
}
-
}
/* Set the new constant count. Note that new_count may be less than
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
index 88165f7895..5bd19c0b9c 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
@@ -51,6 +51,14 @@ void rc_rename_regs(struct radeon_compiler *c, void *user)
struct rc_reader_data reader_data;
unsigned char * used;
+ /* XXX Remove this once the register allocation works with flow control. */
+ for(inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP)
+ return;
+ }
+
used_length = 2 * rc_recompute_ips(c);
used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length);
memset(used, 0, sizeof(unsigned char) * used_length);
diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
index c288834d24..0d8bd4fc70 100644
--- a/src/mesa/drivers/dri/r300/r300_context.c
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -43,6 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/extensions.h"
#include "main/bufferobj.h"
#include "main/texobj.h"
+#include "main/mfeatures.h"
#include "swrast/swrast.h"
#include "swrast_setup/swrast_setup.h"
diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c
index de66293999..f930b4d06b 100644
--- a/src/mesa/drivers/dri/r300/r300_tex.c
+++ b/src/mesa/drivers/dri/r300/r300_tex.c
@@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/context.h"
#include "main/enums.h"
#include "main/image.h"
+#include "main/mfeatures.h"
#include "main/mipmap.h"
#include "main/simple_list.h"
#include "main/texstore.h"
diff --git a/src/mesa/drivers/dri/r600/evergreen_blit.c b/src/mesa/drivers/dri/r600/evergreen_blit.c
index fc9fa9d22c..e07da8c15b 100644
--- a/src/mesa/drivers/dri/r600/evergreen_blit.c
+++ b/src/mesa/drivers/dri/r600/evergreen_blit.c
@@ -1406,9 +1406,95 @@ eg_set_default_state(context_t *context)
num_hs_stack_entries = 85;
num_ls_stack_entries = 85;
break;
+ case CHIP_FAMILY_PALM:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ num_ps_threads = 96;
+ num_vs_threads = 16;
+ num_gs_threads = 16;
+ num_es_threads = 16;
+ num_hs_threads = 16;
+ num_ls_threads = 16;
+ num_ps_stack_entries = 42;
+ num_vs_stack_entries = 42;
+ num_gs_stack_entries = 42;
+ num_es_stack_entries = 42;
+ num_hs_stack_entries = 42;
+ num_ls_stack_entries = 42;
+ break;
+ case CHIP_FAMILY_BARTS:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ num_ps_threads = 128;
+ num_vs_threads = 20;
+ num_gs_threads = 20;
+ num_es_threads = 20;
+ num_hs_threads = 20;
+ num_ls_threads = 20;
+ num_ps_stack_entries = 85;
+ num_vs_stack_entries = 85;
+ num_gs_stack_entries = 85;
+ num_es_stack_entries = 85;
+ num_hs_stack_entries = 85;
+ num_ls_stack_entries = 85;
+ break;
+ case CHIP_FAMILY_TURKS:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ num_ps_threads = 128;
+ num_vs_threads = 20;
+ num_gs_threads = 20;
+ num_es_threads = 20;
+ num_hs_threads = 20;
+ num_ls_threads = 20;
+ num_ps_stack_entries = 42;
+ num_vs_stack_entries = 42;
+ num_gs_stack_entries = 42;
+ num_es_stack_entries = 42;
+ num_hs_stack_entries = 42;
+ num_ls_stack_entries = 42;
+ break;
+ case CHIP_FAMILY_CAICOS:
+ num_ps_gprs = 93;
+ num_vs_gprs = 46;
+ num_temp_gprs = 4;
+ num_gs_gprs = 31;
+ num_es_gprs = 31;
+ num_hs_gprs = 23;
+ num_ls_gprs = 23;
+ num_ps_threads = 128;
+ num_vs_threads = 10;
+ num_gs_threads = 10;
+ num_es_threads = 10;
+ num_hs_threads = 10;
+ num_ls_threads = 10;
+ num_ps_stack_entries = 42;
+ num_vs_stack_entries = 42;
+ num_gs_stack_entries = 42;
+ num_es_stack_entries = 42;
+ num_hs_stack_entries = 42;
+ num_ls_stack_entries = 42;
+ break;
}
- if (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_CEDAR)
+ if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_CEDAR) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_PALM) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_CAICOS))
CLEARbit(sq_config, EG_SQ_CONFIG__VC_ENABLE_bit);
else
SETbit(sq_config, EG_SQ_CONFIG__VC_ENABLE_bit);
diff --git a/src/mesa/drivers/dri/r600/evergreen_state.c b/src/mesa/drivers/dri/r600/evergreen_state.c
index 076a608573..648cda0078 100644
--- a/src/mesa/drivers/dri/r600/evergreen_state.c
+++ b/src/mesa/drivers/dri/r600/evergreen_state.c
@@ -1469,6 +1469,30 @@ static void evergreenInitSQConfig(struct gl_context * ctx)
uMaxThreads = 192;
uMaxStackEntries = 256;
break;
+ case CHIP_FAMILY_BARTS:
+ uSqNumCfInsts = 2;
+ bVC_ENABLE = GL_TRUE;
+ uMaxGPRs = 256;
+ uPSThreadCount = 128;
+ uMaxThreads = 248;
+ uMaxStackEntries = 512;
+ break;
+ case CHIP_FAMILY_TURKS:
+ uSqNumCfInsts = 2;
+ bVC_ENABLE = GL_TRUE;
+ uMaxGPRs = 256;
+ uPSThreadCount = 128;
+ uMaxThreads = 248;
+ uMaxStackEntries = 256;
+ break;
+ case CHIP_FAMILY_CAICOS:
+ uSqNumCfInsts = 1;
+ bVC_ENABLE = GL_FALSE;
+ uMaxGPRs = 256;
+ uPSThreadCount = 128;
+ uMaxThreads = 192;
+ uMaxStackEntries = 256;
+ break;
default:
uSqNumCfInsts = 2;
bVC_ENABLE = GL_TRUE;
diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c
index 057d98e0fc..00708be199 100644
--- a/src/mesa/drivers/dri/r600/r600_context.c
+++ b/src/mesa/drivers/dri/r600/r600_context.c
@@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/bufferobj.h"
#include "main/texobj.h"
#include "main/points.h"
+#include "main/mfeatures.h"
#include "swrast/swrast.h"
#include "swrast_setup/swrast_setup.h"
@@ -259,7 +260,7 @@ static void r600InitConstValues(struct gl_context *ctx, radeonScreenPtr screen)
R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
if( (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_CEDAR)
- &&(context->radeon.radeonScreen->chip_family <= CHIP_FAMILY_PALM) )
+ &&(context->radeon.radeonScreen->chip_family <= CHIP_FAMILY_CAICOS) )
{
r700->bShaderUseMemConstant = GL_TRUE;
}
@@ -285,8 +286,13 @@ static void r600InitConstValues(struct gl_context *ctx, radeonScreenPtr screen)
ctx->Const.MaxTextureMaxAnisotropy = 16.0;
ctx->Const.MaxTextureLodBias = 16.0;
- ctx->Const.MaxTextureLevels = 13; /* hw support 14 */
- ctx->Const.MaxTextureRectSize = 4096; /* hw support 8192 */
+ if (screen->chip_family >= CHIP_FAMILY_CEDAR) {
+ ctx->Const.MaxTextureLevels = 15;
+ ctx->Const.MaxTextureRectSize = 16384;
+ } else {
+ ctx->Const.MaxTextureLevels = 14;
+ ctx->Const.MaxTextureRectSize = 8192;
+ }
ctx->Const.MinPointSize = 0x0001 / 8.0;
ctx->Const.MinPointSizeAA = 0x0001 / 8.0;
diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c
index c3d68c41e5..2a99ded5d6 100644
--- a/src/mesa/drivers/dri/r600/r600_tex.c
+++ b/src/mesa/drivers/dri/r600/r600_tex.c
@@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/context.h"
#include "main/enums.h"
#include "main/image.h"
+#include "main/mfeatures.h"
#include "main/mipmap.h"
#include "main/simple_list.h"
#include "main/texstore.h"
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c
index 2bf24096a0..024853c1be 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.c
+++ b/src/mesa/drivers/dri/r600/r700_assembler.c
@@ -481,6 +481,8 @@ unsigned int EG_GetNumOperands(GLuint opcode, GLuint nIsOp3)
case EG_OP2_INST_FLT_TO_INT:
case EG_OP2_INST_SIN:
case EG_OP2_INST_COS:
+ case EG_OP2_INST_FLT_TO_INT_FLOOR:
+ case EG_OP2_INST_MOVA_INT:
return 1;
default: radeon_error(
@@ -1134,7 +1136,7 @@ GLboolean EG_assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
EG_VTX_WORD1__DST_SEL_W_shift,
EG_VTX_WORD1__DST_SEL_W_mask);
- SETfield(vfetch_instruction_ptr->m_Word1.val, 0, /* use format here, in r6/r7, format used set in const, need to use same */
+ SETfield(vfetch_instruction_ptr->m_Word1.val, 1,
EG_VTX_WORD1__UCF_shift,
EG_VTX_WORD1__UCF_bit);
SETfield(vfetch_instruction_ptr->m_Word1.val, data_format,
@@ -3297,23 +3299,76 @@ GLboolean assemble_ARL(r700_AssemblerBase *pAsm)
return GL_FALSE;
}
- pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR;
- setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
- pAsm->D.dst.rtype = DST_REG_TEMPORARY;
- pAsm->D.dst.reg = 0;
- pAsm->D.dst.writex = 0;
- pAsm->D.dst.writey = 0;
- pAsm->D.dst.writez = 0;
- pAsm->D.dst.writew = 0;
-
- if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ if(8 == pAsm->unAsic)
{
- return GL_FALSE;
- }
+ /* Evergreen */
- if( GL_FALSE == next_ins(pAsm) )
+ /* Float to Signed Integer Using FLOOR */
+ pAsm->D.dst.opcode = EG_OP2_INST_FLT_TO_INT_FLOOR;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = 0;
+ pAsm->D.dst.writex = 0;
+ pAsm->D.dst.writey = 0;
+ pAsm->D.dst.writez = 0;
+ pAsm->D.dst.writew = 0;
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ /* Copy Signed Integer To Integer in AR and GPR */
+ pAsm->D.dst.opcode = EG_OP2_INST_MOVA_INT;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = 0;
+ pAsm->D.dst.writex = 0;
+ pAsm->D.dst.writey = 0;
+ pAsm->D.dst.writez = 0;
+ pAsm->D.dst.writew = 0;
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else
{
- return GL_FALSE;
+ /* r6xx/r7xx */
+
+ /* Truncate floating-point to the nearest integer
+ in the range [-256, +255], and copy to AR and
+ to a GPR.
+ */
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = 0;
+ pAsm->D.dst.writex = 0;
+ pAsm->D.dst.writey = 0;
+ pAsm->D.dst.writez = 0;
+ pAsm->D.dst.writew = 0;
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
}
return GL_TRUE;
@@ -3334,7 +3389,14 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
return GL_FALSE;
}
- pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_CNDGE;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
+ }
pAsm->D.dst.op3 = 1;
tmp = (-1);
@@ -3416,8 +3478,14 @@ GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
checkop1(pAsm);
tmp = gethelpr(pAsm);
-
- pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ }
pAsm->D.dst.op3 = 1;
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
@@ -3457,7 +3525,14 @@ GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
{
return GL_FALSE;
}
- pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ }
pAsm->D.dst.op3 = 1;
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
@@ -4742,7 +4817,14 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
tmp = gethelpr(pAsm);
- pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ }
pAsm->D.dst.op3 = 1;
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
@@ -4782,7 +4864,14 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
{
return GL_FALSE;
}
- pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ }
pAsm->D.dst.op3 = 1;
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
@@ -5010,7 +5099,14 @@ GLboolean assemble_SSG(r700_AssemblerBase *pAsm)
GLuint tmp = gethelpr(pAsm);
/* tmp = (src > 0 ? 1 : src) */
- pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_CNDGT;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT;
+ }
pAsm->D.dst.op3 = 1;
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
pAsm->D.dst.reg = tmp;
@@ -5033,7 +5129,14 @@ GLboolean assemble_SSG(r700_AssemblerBase *pAsm)
}
/* dst = (-tmp > 0 ? -1 : tmp) */
- pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_CNDGT;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT;
+ }
pAsm->D.dst.op3 = 1;
if( GL_FALSE == assemble_dst(pAsm) )
diff --git a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
index 6c2648b6bd..60f1049602 100644
--- a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
+++ b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
@@ -67,6 +67,9 @@ struct drm_radeon_info {
#define DRM_RADEON_INFO 0x1
#endif
+static inline void radeon_gem_get_kernel_name(struct radeon_bo *dummy, uint32_t *value)
+{
+}
static inline uint32_t radeon_gem_name_bo(struct radeon_bo *dummy)
{
diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h
index 82789cec5e..399052cbcb 100644
--- a/src/mesa/drivers/dri/radeon/radeon_chipset.h
+++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h
@@ -445,6 +445,45 @@
#define PCI_CHIP_PALM_9804 0x9804
#define PCI_CHIP_PALM_9805 0x9805
+#define PCI_CHIP_BARTS_6720 0x6720
+#define PCI_CHIP_BARTS_6721 0x6721
+#define PCI_CHIP_BARTS_6722 0x6722
+#define PCI_CHIP_BARTS_6723 0x6723
+#define PCI_CHIP_BARTS_6724 0x6724
+#define PCI_CHIP_BARTS_6725 0x6725
+#define PCI_CHIP_BARTS_6726 0x6726
+#define PCI_CHIP_BARTS_6727 0x6727
+#define PCI_CHIP_BARTS_6728 0x6728
+#define PCI_CHIP_BARTS_6729 0x6729
+#define PCI_CHIP_BARTS_6738 0x6738
+#define PCI_CHIP_BARTS_6739 0x6739
+
+#define PCI_CHIP_TURKS_6740 0x6740
+#define PCI_CHIP_TURKS_6741 0x6741
+#define PCI_CHIP_TURKS_6742 0x6742
+#define PCI_CHIP_TURKS_6743 0x6743
+#define PCI_CHIP_TURKS_6744 0x6744
+#define PCI_CHIP_TURKS_6745 0x6745
+#define PCI_CHIP_TURKS_6746 0x6746
+#define PCI_CHIP_TURKS_6747 0x6747
+#define PCI_CHIP_TURKS_6748 0x6748
+#define PCI_CHIP_TURKS_6749 0x6749
+#define PCI_CHIP_TURKS_6750 0x6750
+#define PCI_CHIP_TURKS_6758 0x6758
+#define PCI_CHIP_TURKS_6759 0x6759
+
+#define PCI_CHIP_CAICOS_6760 0x6760
+#define PCI_CHIP_CAICOS_6761 0x6761
+#define PCI_CHIP_CAICOS_6762 0x6762
+#define PCI_CHIP_CAICOS_6763 0x6763
+#define PCI_CHIP_CAICOS_6764 0x6764
+#define PCI_CHIP_CAICOS_6765 0x6765
+#define PCI_CHIP_CAICOS_6766 0x6766
+#define PCI_CHIP_CAICOS_6767 0x6767
+#define PCI_CHIP_CAICOS_6768 0x6768
+#define PCI_CHIP_CAICOS_6770 0x6770
+#define PCI_CHIP_CAICOS_6779 0x6779
+
enum {
CHIP_FAMILY_R100,
CHIP_FAMILY_RV100,
@@ -489,6 +528,9 @@ enum {
CHIP_FAMILY_CYPRESS,
CHIP_FAMILY_HEMLOCK,
CHIP_FAMILY_PALM,
+ CHIP_FAMILY_BARTS,
+ CHIP_FAMILY_TURKS,
+ CHIP_FAMILY_CAICOS,
CHIP_FAMILY_LAST
};
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
index ca6ab46ca4..a1124483a6 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -100,6 +100,9 @@ static const char* get_chip_family_name(int chip_family)
case CHIP_FAMILY_CYPRESS: return "CYPRESS";
case CHIP_FAMILY_HEMLOCK: return "HEMLOCK";
case CHIP_FAMILY_PALM: return "PALM";
+ case CHIP_FAMILY_BARTS: return "BARTS";
+ case CHIP_FAMILY_TURKS: return "TURKS";
+ case CHIP_FAMILY_CAICOS: return "CAICOS";
default: return "unknown";
}
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c
index e3de534b5f..154a8815e4 100644
--- a/src/mesa/drivers/dri/radeon/radeon_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_context.c
@@ -40,6 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/simple_list.h"
#include "main/imports.h"
#include "main/extensions.h"
+#include "main/mfeatures.h"
#include "swrast/swrast.h"
#include "swrast_setup/swrast_setup.h"
diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c
index a36a1dc94a..6656d391e0 100644
--- a/src/mesa/drivers/dri/radeon/radeon_fbo.c
+++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c
@@ -28,6 +28,7 @@
#include "main/imports.h"
#include "main/macros.h"
+#include "main/mfeatures.h"
#include "main/mtypes.h"
#include "main/enums.h"
#include "main/fbobject.h"
diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h
index 088f970172..a68a976877 100644
--- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h
+++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h
@@ -49,7 +49,7 @@ struct _radeon_mipmap_level {
};
/* store the max possible in the miptree */
-#define RADEON_MIPTREE_MAX_TEXTURE_LEVELS 13
+#define RADEON_MIPTREE_MAX_TEXTURE_LEVELS 15
/**
* A mipmap tree contains texture images in the layout that the hardware
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
index 94e56c2ade..a35fcfe9d7 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -1163,6 +1163,54 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
screen->chip_flags = RADEON_CHIPSET_TCL;
break;
+ case PCI_CHIP_BARTS_6720:
+ case PCI_CHIP_BARTS_6721:
+ case PCI_CHIP_BARTS_6722:
+ case PCI_CHIP_BARTS_6723:
+ case PCI_CHIP_BARTS_6724:
+ case PCI_CHIP_BARTS_6725:
+ case PCI_CHIP_BARTS_6726:
+ case PCI_CHIP_BARTS_6727:
+ case PCI_CHIP_BARTS_6728:
+ case PCI_CHIP_BARTS_6729:
+ case PCI_CHIP_BARTS_6738:
+ case PCI_CHIP_BARTS_6739:
+ screen->chip_family = CHIP_FAMILY_BARTS;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
+ case PCI_CHIP_TURKS_6740:
+ case PCI_CHIP_TURKS_6741:
+ case PCI_CHIP_TURKS_6742:
+ case PCI_CHIP_TURKS_6743:
+ case PCI_CHIP_TURKS_6744:
+ case PCI_CHIP_TURKS_6745:
+ case PCI_CHIP_TURKS_6746:
+ case PCI_CHIP_TURKS_6747:
+ case PCI_CHIP_TURKS_6748:
+ case PCI_CHIP_TURKS_6749:
+ case PCI_CHIP_TURKS_6750:
+ case PCI_CHIP_TURKS_6758:
+ case PCI_CHIP_TURKS_6759:
+ screen->chip_family = CHIP_FAMILY_TURKS;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
+ case PCI_CHIP_CAICOS_6760:
+ case PCI_CHIP_CAICOS_6761:
+ case PCI_CHIP_CAICOS_6762:
+ case PCI_CHIP_CAICOS_6763:
+ case PCI_CHIP_CAICOS_6764:
+ case PCI_CHIP_CAICOS_6765:
+ case PCI_CHIP_CAICOS_6766:
+ case PCI_CHIP_CAICOS_6767:
+ case PCI_CHIP_CAICOS_6768:
+ case PCI_CHIP_CAICOS_6770:
+ case PCI_CHIP_CAICOS_6779:
+ screen->chip_family = CHIP_FAMILY_CAICOS;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
default:
fprintf(stderr, "unknown chip id 0x%x, can't guess.\n",
device_id);
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c
index 83b1d1b1d7..8a35c7d2d2 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tex.c
@@ -37,6 +37,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/context.h"
#include "main/enums.h"
#include "main/image.h"
+#include "main/mfeatures.h"
#include "main/simple_list.h"
#include "main/texstore.h"
#include "main/teximage.h"
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c
index 8b1e34fe76..cf85a5bb57 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.c
@@ -33,6 +33,7 @@
#include "main/imports.h"
#include "main/context.h"
#include "main/enums.h"
+#include "main/mfeatures.h"
#include "main/mipmap.h"
#include "main/texcompress.h"
#include "main/texstore.h"
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h
index a1908c6bc7..538a07fbba 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.h
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.h
@@ -32,6 +32,7 @@
#define RADEON_TEXTURE_H
#include "main/formats.h"
+#include "main/mfeatures.h"
void copy_rows(void* dst, GLuint dststride, const void* src, GLuint srcstride,
GLuint numrows, GLuint rowsize);
diff --git a/src/mesa/drivers/dri/sis/server/sis_dri.h b/src/mesa/drivers/dri/sis/server/sis_dri.h
index f0171f3c0f..7d8f507115 100644
--- a/src/mesa/drivers/dri/sis/server/sis_dri.h
+++ b/src/mesa/drivers/dri/sis/server/sis_dri.h
@@ -72,13 +72,4 @@ typedef struct {
int dummy;
} SISDRIContextRec, *SISDRIContextPtr;
-#ifdef XFree86Server
-
-#include "screenint.h"
-
-Bool SISDRIScreenInit(ScreenPtr pScreen);
-void SISDRICloseScreen(ScreenPtr pScreen);
-Bool SISDRIFinishScreenInit(ScreenPtr pScreen);
-
-#endif
#endif
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_context.h b/src/mesa/drivers/dri/tdfx/tdfx_context.h
index fb38419dcd..7e2f0e00a8 100644
--- a/src/mesa/drivers/dri/tdfx/tdfx_context.h
+++ b/src/mesa/drivers/dri/tdfx/tdfx_context.h
@@ -41,11 +41,7 @@
#include <sys/time.h>
#include "dri_util.h"
-#ifdef XFree86Server
-#include "GL/xf86glx.h"
-#else
#include "main/glheader.h"
-#endif
#if defined(__linux__)
#include <signal.h>
#endif
diff --git a/src/mesa/drivers/dri/unichrome/server/via_dri.h b/src/mesa/drivers/dri/unichrome/server/via_dri.h
index b47397d572..c6eed03c1c 100644
--- a/src/mesa/drivers/dri/unichrome/server/via_dri.h
+++ b/src/mesa/drivers/dri/unichrome/server/via_dri.h
@@ -35,9 +35,7 @@
#define VIA_DRIDDX_VERSION_MINOR 0
#define VIA_DRIDDX_VERSION_PATCH 0
-#ifndef XFree86Server
typedef int Bool;
-#endif
typedef struct {
drm_handle_t handle;