diff options
Diffstat (limited to 'src/mesa/drivers/dri')
131 files changed, 3200 insertions, 3250 deletions
diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index a5b71bd40a..bf8cf6eec0 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -790,6 +790,9 @@ driCreateNewScreen(int scrn, static const __DRIextension *emptyExtensionList[] = { NULL }; __DRIscreen *psp; + if (driDriverAPI.InitScreen == NULL) + return NULL; + psp = calloc(1, sizeof *psp); if (!psp) return NULL; diff --git a/src/mesa/drivers/dri/common/spantmp2.h b/src/mesa/drivers/dri/common/spantmp2.h index abd79562f9..f436d1398c 100644 --- a/src/mesa/drivers/dri/common/spantmp2.h +++ b/src/mesa/drivers/dri/common/spantmp2.h @@ -48,6 +48,15 @@ #define HW_WRITE_CLIPLOOP() HW_CLIPLOOP() #endif +#ifdef SPANTMP_MESA_FMT +#define SPANTMP_PIXEL_FMT GL_NONE +#define SPANTMP_PIXEL_TYPE GL_NONE +#endif + +#ifndef SPANTMP_MESA_FMT +#define SPANTMP_MESA_FMT MESA_FORMAT_COUNT +#endif + #if (SPANTMP_PIXEL_FMT == GL_RGB) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5) /** @@ -445,6 +454,118 @@ rgba[3] = p; \ } while (0) +#elif (SPANTMP_MESA_FMT == MESA_FORMAT_R8) + +#ifndef GET_VALUE +#ifndef GET_PTR +#define GET_PTR(_x, _y) ( buf + (_x) + (_y) * pitch) +#endif + +#define GET_VALUE(_x, _y) *(volatile GLubyte *)(GET_PTR(_x, _y)) +#define PUT_VALUE(_x, _y, _v) *(volatile GLubyte *)(GET_PTR(_x, _y)) = (_v) +#endif /* GET_VALUE */ + +# define INIT_MONO_PIXEL(p, color) \ + p = color[0] + +# define WRITE_RGBA(_x, _y, r, g, b, a) \ + PUT_VALUE(_x, _y, r) + +#define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p) + +#define READ_RGBA( rgba, _x, _y ) \ + do { \ + GLubyte p = GET_VALUE(_x, _y); \ + rgba[0] = p; \ + rgba[1] = 0; \ + rgba[2] = 0; \ + rgba[3] = 0; \ + } while (0) + +#elif (SPANTMP_MESA_FMT == MESA_FORMAT_RG88) + +#ifndef GET_VALUE +#ifndef GET_PTR +#define GET_PTR(_x, _y) ( buf + (_x) * 2 + (_y) * pitch) +#endif + +#define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y)) +#define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v) +#endif /* GET_VALUE */ + +# define INIT_MONO_PIXEL(p, color) \ + PACK_COLOR_8888(color[0], color[1], 0, 0) + +# define WRITE_RGBA(_x, _y, r, g, b, a) \ + PUT_VALUE(_x, _y, r) + +#define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p) + +#define READ_RGBA( rgba, _x, _y ) \ + do { \ + GLushort p = GET_VALUE(_x, _y); \ + rgba[0] = p & 0xff; \ + rgba[1] = (p >> 8) & 0xff; \ + rgba[2] = 0; \ + rgba[3] = 0; \ + } while (0) + +#elif (SPANTMP_MESA_FMT == MESA_FORMAT_R16) + +#ifndef GET_VALUE +#ifndef GET_PTR +#define GET_PTR(_x, _y) ( buf + (_x) * 2 + (_y) * pitch) +#endif + +#define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y)) +#define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v) +#endif /* GET_VALUE */ + +# define INIT_MONO_PIXEL(p, color) \ + p = color[0] + +# define WRITE_RGBA(_x, _y, r, g, b, a) \ + PUT_VALUE(_x, _y, r) + +#define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p) + +#define READ_RGBA( rgba, _x, _y ) \ + do { \ + GLushort p = GET_VALUE(_x, _y); \ + rgba[0] = p; \ + rgba[1] = 0; \ + rgba[2] = 0; \ + rgba[3] = 0; \ + } while (0) + +#elif (SPANTMP_MESA_FMT == MESA_FORMAT_RG1616) + +#ifndef GET_VALUE +#ifndef GET_PTR +#define GET_PTR(_x, _y) ( buf + (_x) * 4 + (_y) * pitch) +#endif + +#define GET_VALUE(_x, _y) *(volatile GLuint *)(GET_PTR(_x, _y)) +#define PUT_VALUE(_x, _y, _v) *(volatile GLuint *)(GET_PTR(_x, _y)) = (_v) +#endif /* GET_VALUE */ + +# define INIT_MONO_PIXEL(p, color) \ + ((color[1] << 16) | (color[0])) + +# define WRITE_RGBA(_x, _y, r, g, b, a) \ + PUT_VALUE(_x, _y, r) + +#define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p) + +#define READ_RGBA( rgba, _x, _y ) \ + do { \ + GLuint p = GET_VALUE(_x, _y); \ + rgba[0] = p & 0xffff; \ + rgba[1] = (p >> 16) & 0xffff; \ + rgba[2] = 0; \ + rgba[3] = 0; \ + } while (0) + #else #error SPANTMP_PIXEL_FMT must be set to a valid value! #endif @@ -914,3 +1035,4 @@ static void TAG(InitPointers)(struct gl_renderbuffer *rb) #undef GET_PTR #undef SPANTMP_PIXEL_FMT #undef SPANTMP_PIXEL_TYPE +#undef SPANTMP_MESA_FMT diff --git a/src/mesa/drivers/dri/i915/i830_reg.h b/src/mesa/drivers/dri/i915/i830_reg.h index ae1317029a..99ee1bb4e9 100644 --- a/src/mesa/drivers/dri/i915/i830_reg.h +++ b/src/mesa/drivers/dri/i915/i830_reg.h @@ -585,6 +585,8 @@ #define TM0S2_VERITCAL_LINE_STRIDE_OFF (1<<12) #define TM0S2_OUTPUT_CHAN_SHIFT 10 #define TM0S2_OUTPUT_CHAN_MASK (3<<10) +#define TM0S2_BASE_MIP_LEVEL_SHIFT 1 +#define TM0S2_LOD_PRECLAMP (1 << 0) #define TM0S3_MIP_FILTER_MASK (0x3<<30) #define TM0S3_MIP_FILTER_SHIFT 30 @@ -605,6 +607,8 @@ #define TM0S3_MAX_MIP_MASK (0xff<<9) #define TM0S3_MIN_MIP_SHIFT 3 #define TM0S3_MIN_MIP_MASK (0x3f<<3) +#define TM0S3_MIN_MIP_SHIFT_830 5 +#define TM0S3_MIN_MIP_MASK_830 (0x3f<<5) #define TM0S3_KILL_PIXEL (1<<2) #define TM0S3_KEYED_FILTER (1<<1) #define TM0S3_CHROMA_KEY (1<<0) diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c index b3bb8837cc..c35b4b5ed0 100644 --- a/src/mesa/drivers/dri/i915/i830_texstate.c +++ b/src/mesa/drivers/dri/i915/i830_texstate.c @@ -28,13 +28,14 @@ #include "main/mtypes.h" #include "main/enums.h" #include "main/colormac.h" +#include "main/macros.h" #include "intel_mipmap_tree.h" #include "intel_tex.h" #include "i830_context.h" #include "i830_reg.h" - +#include "intel_chipset.h" static GLuint @@ -139,9 +140,9 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) /* Get first image here, since intelObj->firstLevel will get set in * the intel_finalize_mipmap_tree() call above. */ - firstImage = tObj->Image[0][intelObj->firstLevel]; + firstImage = tObj->Image[0][tObj->BaseLevel]; - intel_miptree_get_image_offset(intelObj->mt, intelObj->firstLevel, 0, 0, + intel_miptree_get_image_offset(intelObj->mt, tObj->BaseLevel, 0, 0, &dst_x, &dst_y); drm_intel_bo_reference(intelObj->mt->region->buffer); @@ -189,6 +190,8 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) { GLuint minFilt, mipFilt, magFilt; + float maxlod; + uint32_t minlod_fixed, maxlod_fixed; switch (tObj->MinFilter) { case GL_NEAREST: @@ -252,10 +255,24 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) state[I830_TEXREG_TM0S3] |= SS2_COLORSPACE_CONVERSION; #endif - state[I830_TEXREG_TM0S3] |= ((intelObj->lastLevel - - intelObj->firstLevel) * - 4) << TM0S3_MIN_MIP_SHIFT; - + /* We get one field with fraction bits for the maximum + * addressable (smallest resolution) LOD. Use it to cover both + * MAX_LEVEL and MAX_LOD. + */ + minlod_fixed = U_FIXED(CLAMP(tObj->MinLod, 0.0, 11), 4); + maxlod = MIN2(tObj->MaxLod, tObj->_MaxLevel - tObj->BaseLevel); + if (intel->intelScreen->deviceID == PCI_CHIP_I855_GM || + intel->intelScreen->deviceID == PCI_CHIP_I865_G) { + maxlod_fixed = U_FIXED(CLAMP(maxlod, 0.0, 11.75), 2); + maxlod_fixed = MAX2(maxlod_fixed, (minlod_fixed + 3) >> 2); + state[I830_TEXREG_TM0S3] |= maxlod_fixed << TM0S3_MIN_MIP_SHIFT; + state[I830_TEXREG_TM0S2] |= TM0S2_LOD_PRECLAMP; + } else { + maxlod_fixed = U_FIXED(CLAMP(maxlod, 0.0, 11), 0); + maxlod_fixed = MAX2(maxlod_fixed, (minlod_fixed + 15) >> 4); + state[I830_TEXREG_TM0S3] |= maxlod_fixed << TM0S3_MIN_MIP_SHIFT_830; + } + state[I830_TEXREG_TM0S3] |= minlod_fixed << TM0S3_MAX_MIP_SHIFT; state[I830_TEXREG_TM0S3] |= ((minFilt << TM0S3_MIN_FILTER_SHIFT) | (mipFilt << TM0S3_MIP_FILTER_SHIFT) | (magFilt << TM0S3_MAG_FILTER_SHIFT)); diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index f7fdb78d05..ebdefeac87 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -364,7 +364,7 @@ i830_emit_invarient_state(struct intel_context *intel) #define emit( intel, state, size ) \ - intel_batchbuffer_data(intel->batch, state, size ) + intel_batchbuffer_data(intel->batch, state, size, false) static GLuint get_dirty(struct i830_hw_state *state) @@ -429,7 +429,8 @@ i830_emit_state(struct intel_context *intel) * batchbuffer fills up. */ intel_batchbuffer_require_space(intel->batch, - get_state_size(state) + INTEL_PRIM_EMIT_SIZE); + get_state_size(state) + INTEL_PRIM_EMIT_SIZE, + false); count = 0; again: aper_count = 0; @@ -534,14 +535,9 @@ i830_emit_state(struct intel_context *intel) BEGIN_BATCH(I830_TEX_SETUP_SIZE + 1); OUT_BATCH(state->Tex[i][I830_TEXREG_TM0LI]); - if (state->tex_buffer[i]) { - OUT_RELOC(state->tex_buffer[i], - I915_GEM_DOMAIN_SAMPLER, 0, - state->tex_offset[i]); - } - else { - OUT_BATCH(state->tex_offset[i]); - } + OUT_RELOC(state->tex_buffer[i], + I915_GEM_DOMAIN_SAMPLER, 0, + state->tex_offset[i]); OUT_BATCH(state->Tex[i][I830_TEXREG_TM0S1]); OUT_BATCH(state->Tex[i][I830_TEXREG_TM0S2]); @@ -584,6 +580,27 @@ i830_destroy_context(struct intel_context *intel) _tnl_free_vertices(&intel->ctx); } +static uint32_t i830_render_target_format_for_mesa_format[MESA_FORMAT_COUNT] = +{ + [MESA_FORMAT_ARGB8888] = DV_PF_8888, + [MESA_FORMAT_XRGB8888] = DV_PF_8888, + [MESA_FORMAT_RGB565] = DV_PF_565, + [MESA_FORMAT_ARGB1555] = DV_PF_1555, + [MESA_FORMAT_ARGB4444] = DV_PF_4444, +}; + +static bool +i830_render_target_supported(gl_format format) +{ + if (format == MESA_FORMAT_S8_Z24 || + format == MESA_FORMAT_X8_Z24 || + format == MESA_FORMAT_Z16) { + return true; + } + + return i830_render_target_format_for_mesa_format[format] != 0; +} + static void i830_set_draw_region(struct intel_context *intel, struct intel_region *color_regions[], @@ -623,24 +640,7 @@ i830_set_draw_region(struct intel_context *intel, DSTORG_VERT_BIAS(0x8) | DEPTH_IS_Z); /* .5 */ if (irb != NULL) { - switch (irb->Base.Format) { - case MESA_FORMAT_ARGB8888: - case MESA_FORMAT_XRGB8888: - value |= DV_PF_8888; - break; - case MESA_FORMAT_RGB565: - value |= DV_PF_565; - break; - case MESA_FORMAT_ARGB1555: - value |= DV_PF_1555; - break; - case MESA_FORMAT_ARGB4444: - value |= DV_PF_4444; - break; - default: - _mesa_problem(ctx, "Bad renderbuffer format: %d\n", - irb->Base.Format); - } + value |= i830_render_target_format_for_mesa_format[irb->Base.Format]; } if (depth_region && depth_region->cpp == 4) { @@ -728,4 +728,5 @@ i830InitVtbl(struct i830_context *i830) i830->intel.vtbl.assert_not_dirty = i830_assert_not_dirty; i830->intel.vtbl.finish_batch = intel_finish_vb; i830->intel.vtbl.invalidate_state = i830_invalidate_state; + i830->intel.vtbl.render_target_supported = i830_render_target_supported; } diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index f943f81dd0..f32f3cf602 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -176,6 +176,7 @@ i915CreateContext(int api, ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitCondCodes = GL_TRUE; ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].EmitNoIfs = GL_TRUE; ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].EmitNoNoise = GL_TRUE; + ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].EmitNoPow = GL_TRUE; ctx->Const.MaxDrawBuffers = 1; diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index c00ee415b6..1c6e984517 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -569,10 +569,14 @@ upload_program(struct i915_fragment_program *p) if (inst->DstReg.CondMask == COND_TR) { tmp = i915_get_utemp(p); + /* The KIL instruction discards the fragment if any component of + * the source is < 0. Emit an immediate operand of {-1}.xywz. + */ i915_emit_texld(p, get_live_regs(p, inst), tmp, A0_DEST_CHANNEL_ALL, 0, /* use a dummy dest reg */ - swizzle(tmp, ONE, ONE, ONE, ONE), /* always */ + negate(swizzle(tmp, ONE, ONE, ONE, ONE), + 1, 1, 1, 1), T0_TEXKILL); } else { p->error = 1; @@ -1158,11 +1162,6 @@ translate_program(struct i915_fragment_program *p) fixup_depth_write(p); i915_fini_program(p); - if (INTEL_DEBUG & DEBUG_WM) { - printf("i915:\n"); - i915_disassemble_program(i915->state.Program, i915->state.ProgramSize); - } - p->translated = 1; } @@ -1423,6 +1422,11 @@ i915ValidateFragmentProgram(struct i915_context *i915) if (!p->on_hardware) i915_upload_program(i915, p); + + if (INTEL_DEBUG & DEBUG_WM) { + printf("i915:\n"); + i915_disassemble_program(i915->state.Program, i915->state.ProgramSize); + } } void diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index c724a21496..af140c85f5 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -156,7 +156,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) /* Get first image here, since intelObj->firstLevel will get set in * the intel_finalize_mipmap_tree() call above. */ - firstImage = tObj->Image[0][intelObj->firstLevel]; + firstImage = tObj->Image[0][tObj->BaseLevel]; drm_intel_bo_reference(intelObj->mt->region->buffer); i915->state.tex_buffer[unit] = intelObj->mt->region->buffer; diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 59dfe08563..a94b957127 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -217,7 +217,7 @@ i915_emit_invarient_state(struct intel_context *intel) #define emit(intel, state, size ) \ - intel_batchbuffer_data(intel->batch, state, size) + intel_batchbuffer_data(intel->batch, state, size, false) static GLuint get_dirty(struct i915_hw_state *state) @@ -300,7 +300,8 @@ i915_emit_state(struct intel_context *intel) * batchbuffer fills up. */ intel_batchbuffer_require_space(intel->batch, - get_state_size(state) + INTEL_PRIM_EMIT_SIZE); + get_state_size(state) + INTEL_PRIM_EMIT_SIZE, + false); count = 0; again: aper_count = 0; @@ -435,15 +436,9 @@ i915_emit_state(struct intel_context *intel) OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT); for (i = 0; i < I915_TEX_UNITS; i++) if (dirty & I915_UPLOAD_TEX(i)) { - - if (state->tex_buffer[i]) { - OUT_RELOC(state->tex_buffer[i], - I915_GEM_DOMAIN_SAMPLER, 0, - state->tex_offset[i]); - } - else { - OUT_BATCH(state->tex_offset[i]); - } + OUT_RELOC(state->tex_buffer[i], + I915_GEM_DOMAIN_SAMPLER, 0, + state->tex_offset[i]); OUT_BATCH(state->Tex[i][I915_TEXREG_MS3]); OUT_BATCH(state->Tex[i][I915_TEXREG_MS4]); @@ -523,6 +518,27 @@ i915_set_buf_info_for_region(uint32_t *state, struct intel_region *region, } } +static uint32_t i915_render_target_format_for_mesa_format[MESA_FORMAT_COUNT] = +{ + [MESA_FORMAT_ARGB8888] = DV_PF_8888, + [MESA_FORMAT_XRGB8888] = DV_PF_8888, + [MESA_FORMAT_RGB565] = DV_PF_565 | DITHER_FULL_ALWAYS, + [MESA_FORMAT_ARGB1555] = DV_PF_1555 | DITHER_FULL_ALWAYS, + [MESA_FORMAT_ARGB4444] = DV_PF_4444 | DITHER_FULL_ALWAYS, +}; + +static bool +i915_render_target_supported(gl_format format) +{ + if (format == MESA_FORMAT_S8_Z24 || + format == MESA_FORMAT_X8_Z24 || + format == MESA_FORMAT_Z16) { + return true; + } + + return i915_render_target_format_for_mesa_format[format] != 0; +} + static void i915_set_draw_region(struct intel_context *intel, struct intel_region *color_regions[], @@ -562,24 +578,7 @@ i915_set_draw_region(struct intel_context *intel, DSTORG_VERT_BIAS(0x8) | /* .5 */ LOD_PRECLAMP_OGL | TEX_DEFAULT_COLOR_OGL); if (irb != NULL) { - switch (irb->Base.Format) { - case MESA_FORMAT_ARGB8888: - case MESA_FORMAT_XRGB8888: - value |= DV_PF_8888; - break; - case MESA_FORMAT_RGB565: - value |= DV_PF_565 | DITHER_FULL_ALWAYS; - break; - case MESA_FORMAT_ARGB1555: - value |= DV_PF_1555 | DITHER_FULL_ALWAYS; - break; - case MESA_FORMAT_ARGB4444: - value |= DV_PF_4444 | DITHER_FULL_ALWAYS; - break; - default: - _mesa_problem(ctx, "Bad renderbuffer format: %d\n", - irb->Base.Format); - } + value |= i915_render_target_format_for_mesa_format[irb->Base.Format]; } /* This isn't quite safe, thus being hidden behind an option. When changing @@ -686,4 +685,5 @@ i915InitVtbl(struct i915_context *i915) i915->intel.vtbl.update_texture_state = i915UpdateTextureState; i915->intel.vtbl.assert_not_dirty = i915_assert_not_dirty; i915->intel.vtbl.finish_batch = intel_finish_vb; + i915->intel.vtbl.render_target_supported = i915_render_target_supported; } diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index e3ca863fe5..7c3ac0c14e 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -81,7 +81,6 @@ DRIVER_SOURCES = \ brw_wm_emit.c \ brw_wm_fp.c \ brw_wm_iz.c \ - brw_wm_glsl.c \ brw_wm_pass0.c \ brw_wm_pass1.c \ brw_wm_pass2.c \ diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index d3a1233aac..d286c9dbdc 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -239,7 +239,7 @@ static void upload_blend_constant_color(struct brw_context *brw) struct brw_blend_constant_color bcc; memset(&bcc, 0, sizeof(bcc)); - bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR; + bcc.header.opcode = _3DSTATE_BLEND_CONSTANT_COLOR; bcc.header.length = sizeof(bcc)/4-2; bcc.blend_constant_color[0] = ctx->Color.BlendColor[0]; bcc.blend_constant_color[1] = ctx->Color.BlendColor[1]; diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index cb0a8b96c9..8fc322fd82 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -122,9 +122,6 @@ GLboolean brwCreateContext( int api, (i == MESA_SHADER_FRAGMENT); ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp = (i == MESA_SHADER_FRAGMENT); - - if (intel->gen == 6) - ctx->ShaderCompilerOptions[i].EmitNoIfs = (i == MESA_SHADER_VERTEX); } ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024); @@ -154,6 +151,13 @@ GLboolean brwCreateContext( int api, MIN2(ctx->Const.FragmentProgram.MaxNativeParameters, ctx->Const.FragmentProgram.MaxEnvParams); + /* Gen6 converts quads to polygon in beginning of 3D pipeline, + but we're not sure how it's actually done for vertex order, + that affect provoking vertex decision. Always use last vertex + convention for quad primitive which works as expected for now. */ + if (intel->gen == 6) + ctx->Const.QuadsFollowProvokingVertexConvention = GL_FALSE; + if (intel->is_g4x || intel->gen >= 5) { brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_GM45; brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 335339515a..7069724466 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -171,7 +171,6 @@ struct brw_vertex_program { struct brw_fragment_program { struct gl_fragment_program program; GLuint id; /**< serial no. to identify frag progs, never re-used */ - GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */ /** for debugging, which texture units are referenced */ GLbitfield tex_units_used; @@ -211,6 +210,7 @@ struct brw_wm_prog_data { GLuint nr_params; /**< number of float params/constants */ GLuint nr_pull_params; GLboolean error; + int dispatch_width; /* Pointer to tracked values (only valid once * _mesa_load_state_parameters has been called at runtime). diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 7b823eb201..877b22fec1 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -242,21 +242,13 @@ static void prepare_constant_buffer(struct brw_context *brw) GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->vs.prog_data->nr_params / 4; - if (vp->use_const_buffer) { - /* Load the subset of push constants that will get used when - * we also have a pull constant buffer. - */ - for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { - if (brw->vs.constant_map[i] != -1) { - assert(brw->vs.constant_map[i] <= nr); - memcpy(buf + offset + brw->vs.constant_map[i] * 4, - vp->program.Base.Parameters->ParameterValues[i], - 4 * sizeof(float)); - } - } - } else { - for (i = 0; i < nr; i++) { - memcpy(buf + offset + i * 4, + /* Load the subset of push constants that will get used when + * we also have a pull constant buffer. + */ + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + assert(brw->vs.constant_map[i] <= nr); + memcpy(buf + offset + brw->vs.constant_map[i] * 4, vp->program.Base.Parameters->ParameterValues[i], 4 * sizeof(float)); } diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 239586a036..2f7dcc2dda 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -35,28 +35,6 @@ /* 3D state: */ -#define _3DOP_3DSTATE_PIPELINED 0x0 -#define _3DOP_3DSTATE_NONPIPELINED 0x1 -#define _3DOP_3DCONTROL 0x2 -#define _3DOP_3DPRIMITIVE 0x3 - -#define _3DSTATE_PIPELINED_POINTERS 0x00 -#define _3DSTATE_BINDING_TABLE_POINTERS 0x01 -#define _3DSTATE_VERTEX_BUFFERS 0x08 -#define _3DSTATE_VERTEX_ELEMENTS 0x09 -#define _3DSTATE_INDEX_BUFFER 0x0A -#define _3DSTATE_VF_STATISTICS 0x0B -#define _3DSTATE_DRAWING_RECTANGLE 0x00 -#define _3DSTATE_CONSTANT_COLOR 0x01 -#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02 -#define _3DSTATE_CHROMA_KEY 0x04 -#define _3DSTATE_DEPTH_BUFFER 0x05 -#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06 -#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07 -#define _3DSTATE_LINE_STIPPLE 0x08 -#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09 -#define _3DCONTROL 0x00 - #define PIPE_CONTROL_NOWRITE 0x00 #define PIPE_CONTROL_WRITEIMMEDIATE 0x01 #define PIPE_CONTROL_WRITEDEPTH 0x02 @@ -389,6 +367,7 @@ #define BRW_SURFACEFORMAT_R8_SSCALED 0x149 #define BRW_SURFACEFORMAT_R8_USCALED 0x14A #define BRW_SURFACEFORMAT_L8_UNORM_SRGB 0x14C +#define BRW_SURFACEFORMAT_DXT1_RGB_SRGB 0x180 #define BRW_SURFACEFORMAT_R1_UINT 0x181 #define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182 #define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 @@ -462,6 +441,13 @@ #define BRW_COMPRESSION_2NDHALF 1 #define BRW_COMPRESSION_COMPRESSED 2 +#define GEN6_COMPRESSION_1Q 0 +#define GEN6_COMPRESSION_2Q 1 +#define GEN6_COMPRESSION_3Q 2 +#define GEN6_COMPRESSION_4Q 3 +#define GEN6_COMPRESSION_1H 0 +#define GEN6_COMPRESSION_2H 2 + #define BRW_CONDITIONAL_NONE 0 #define BRW_CONDITIONAL_Z 1 #define BRW_CONDITIONAL_NZ 2 @@ -837,7 +823,7 @@ # define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9) # define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12) -#define CMD_3D_SAMPLER_STATE_POINTERS 0x7802 /* SNB+ */ +#define _3DSTATE_SAMPLER_STATE_POINTERS 0x7802 /* GEN6+ */ # define PS_SAMPLER_STATE_CHANGE (1 << 12) # define GS_SAMPLER_STATE_CHANGE (1 << 9) # define VS_SAMPLER_STATE_CHANGE (1 << 8) @@ -878,27 +864,29 @@ #define CMD_INDEX_BUFFER 0x780a #define CMD_VF_STATISTICS_965 0x780b #define CMD_VF_STATISTICS_GM45 0x680b -#define CMD_3D_CC_STATE_POINTERS 0x780e /* GEN6+ */ +#define _3DSTATE_CC_STATE_POINTERS 0x780e /* GEN6+ */ -#define CMD_URB 0x7805 /* GEN6+ */ +#define _3DSTATE_URB 0x7805 /* GEN6+ */ # define GEN6_URB_VS_SIZE_SHIFT 16 # define GEN6_URB_VS_ENTRIES_SHIFT 0 # define GEN6_URB_GS_ENTRIES_SHIFT 8 # define GEN6_URB_GS_SIZE_SHIFT 0 -#define CMD_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */ +#define _3DSTATE_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */ # define GEN6_CC_VIEWPORT_MODIFY (1 << 12) # define GEN6_SF_VIEWPORT_MODIFY (1 << 11) # define GEN6_CLIP_VIEWPORT_MODIFY (1 << 10) -#define CMD_3D_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */ +#define _3DSTATE_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */ -#define CMD_3D_VS_STATE 0x7810 /* GEN6+ */ +#define _3DSTATE_VS 0x7810 /* GEN6+ */ /* DW2 */ # define GEN6_VS_SPF_MODE (1 << 31) # define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30) # define GEN6_VS_SAMPLER_COUNT_SHIFT 27 # define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +# define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) +# define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16) /* DW4 */ # define GEN6_VS_DISPATCH_START_GRF_SHIFT 20 # define GEN6_VS_URB_READ_LENGTH_SHIFT 11 @@ -909,7 +897,7 @@ # define GEN6_VS_CACHE_DISABLE (1 << 1) # define GEN6_VS_ENABLE (1 << 0) -#define CMD_3D_GS_STATE 0x7811 /* GEN6+ */ +#define _3DSTATE_GS 0x7811 /* GEN6+ */ /* DW2 */ # define GEN6_GS_SPF_MODE (1 << 31) # define GEN6_GS_VECTOR_MASK_ENABLE (1 << 30) @@ -927,7 +915,7 @@ /* DW6 */ # define GEN6_GS_ENABLE (1 << 15) -#define CMD_3D_CLIP_STATE 0x7812 /* GEN6+ */ +#define _3DSTATE_CLIP 0x7812 /* GEN6+ */ /* DW1 */ # define GEN6_CLIP_STATISTICS_ENABLE (1 << 10) /** @@ -957,7 +945,7 @@ # define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT 6 # define GEN6_CLIP_FORCE_ZERO_RTAINDEX (1 << 5) -#define CMD_3D_SF_STATE 0x7813 /* GEN6+ */ +#define _3DSTATE_SF 0x7813 /* GEN6+ */ /* DW1 */ # define GEN6_SF_NUM_OUTPUTS_SHIFT 22 # define GEN6_SF_SWIZZLE_ENABLE (1 << 21) @@ -1022,18 +1010,27 @@ # define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9 # define ATTRIBUTE_0_SWIZZLE_SHIFT 6 # define ATTRIBUTE_0_SOURCE_SHIFT 0 + +# define ATTRIBUTE_SWIZZLE_INPUTATTR 0 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING 1 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_W 2 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W 3 +# define ATTRIBUTE_SWIZZLE_SHIFT 6 + /* DW16: Point sprite texture coordinate enables */ /* DW17: Constant interpolation enables */ /* DW18: attr 0-7 wrap shortest enables */ /* DW19: attr 8-16 wrap shortest enables */ -#define CMD_3D_WM_STATE 0x7814 /* GEN6+ */ +#define _3DSTATE_WM 0x7814 /* GEN6+ */ /* DW1: kernel pointer */ /* DW2 */ # define GEN6_WM_SPF_MODE (1 << 31) # define GEN6_WM_VECTOR_MASK_ENABLE (1 << 30) # define GEN6_WM_SAMPLER_COUNT_SHIFT 27 # define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +# define GEN6_WM_FLOATING_POINT_MODE_IEEE_754 (0 << 16) +# define GEN6_WM_FLOATING_POINT_MODE_ALT (1 << 16) /* DW3: scratch space */ /* DW4 */ # define GEN6_WM_STATISTICS_ENABLE (1 << 31) @@ -1088,34 +1085,34 @@ /* DW7: kernel 1 pointer */ /* DW8: kernel 2 pointer */ -#define CMD_3D_CONSTANT_VS_STATE 0x7815 /* GEN6+ */ -#define CMD_3D_CONSTANT_GS_STATE 0x7816 /* GEN6+ */ -#define CMD_3D_CONSTANT_PS_STATE 0x7817 /* GEN6+ */ +#define _3DSTATE_CONSTANT_VS 0x7815 /* GEN6+ */ +#define _3DSTATE_CONSTANT_GS 0x7816 /* GEN6+ */ +#define _3DSTATE_CONSTANT_PS 0x7817 /* GEN6+ */ # define GEN6_CONSTANT_BUFFER_3_ENABLE (1 << 15) # define GEN6_CONSTANT_BUFFER_2_ENABLE (1 << 14) # define GEN6_CONSTANT_BUFFER_1_ENABLE (1 << 13) # define GEN6_CONSTANT_BUFFER_0_ENABLE (1 << 12) -#define CMD_3D_SAMPLE_MASK 0x7818 /* GEN6+ */ +#define _3DSTATE_SAMPLE_MASK 0x7818 /* GEN6+ */ -#define CMD_DRAW_RECT 0x7900 -#define CMD_BLEND_CONSTANT_COLOR 0x7901 -#define CMD_CHROMA_KEY 0x7904 -#define CMD_DEPTH_BUFFER 0x7905 -#define CMD_POLY_STIPPLE_OFFSET 0x7906 -#define CMD_POLY_STIPPLE_PATTERN 0x7907 -#define CMD_LINE_STIPPLE_PATTERN 0x7908 -#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 -#define CMD_AA_LINE_PARAMETERS 0x790a +#define _3DSTATE_DRAWING_RECTANGLE 0x7900 +#define _3DSTATE_BLEND_CONSTANT_COLOR 0x7901 +#define _3DSTATE_CHROMA_KEY 0x7904 +#define _3DSTATE_DEPTH_BUFFER 0x7905 +#define _3DSTATE_POLY_STIPPLE_OFFSET 0x7906 +#define _3DSTATE_POLY_STIPPLE_PATTERN 0x7907 +#define _3DSTATE_LINE_STIPPLE_PATTERN 0x7908 +#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 +#define _3DSTATE_AA_LINE_PARAMETERS 0x790a /* G45+ */ -#define CMD_GS_SVB_INDEX 0x790b /* CTG+ */ +#define _3DSTATE_GS_SVB_INDEX 0x790b /* CTG+ */ /* DW1 */ # define SVB_INDEX_SHIFT 29 # define SVB_LOAD_INTERNAL_VERTEX_COUNT (1 << 0) /* SNB+ */ /* DW2: SVB index */ /* DW3: SVB maximum index */ -#define CMD_3D_MULTISAMPLE 0x790d /* SNB+ */ +#define _3DSTATE_MULTISAMPLE 0x790d /* GEN6+ */ /* DW1 */ # define MS_PIXEL_LOCATION_CENTER (0 << 4) # define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4) @@ -1123,7 +1120,10 @@ # define MS_NUMSAMPLES_4 (2 << 1) # define MS_NUMSAMPLES_8 (3 << 1) -#define CMD_3D_CLEAR_PARAMS 0x7910 /* ILK+ */ +#define _3DSTATE_STENCIL_BUFFER 0x790e /* ILK, SNB */ +#define _3DSTATE_HIER_DEPTH_BUFFER 0x790f /* ILK, SNB */ + +#define _3DSTATE_CLEAR_PARAMS 0x7910 /* ILK+ */ # define DEPTH_CLEAR_VALID (1 << 15) /* DW1: depth clear value */ diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index 962c04128b..111cb9974e 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -899,7 +899,8 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) err |= dest (file, inst); } else if (gen >= 6 && (inst->header.opcode == BRW_OPCODE_IF || inst->header.opcode == BRW_OPCODE_ELSE || - inst->header.opcode == BRW_OPCODE_ENDIF)) { + inst->header.opcode == BRW_OPCODE_ENDIF || + inst->header.opcode == BRW_OPCODE_WHILE)) { format (file, " %d", inst->bits1.branch_gen6.jump_count); } @@ -972,7 +973,7 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) inst->bits3.dp_render_cache.send_commit_msg, inst->bits3.dp_render_cache.msg_length, inst->bits3.dp_render_cache.response_length); - } else if (gen >= 5) { + } else if (gen >= 5 /* FINISHME: || is_g4x */) { format (file, " (%d, %d, %d)", inst->bits3.dp_read_gen5.binding_table_index, inst->bits3.dp_read_gen5.msg_control, diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index a1f403ca4e..7eb16b71f4 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -159,7 +159,7 @@ static void brw_emit_prim(struct brw_context *brw, } if (prim_packet.verts_per_instance) { intel_batchbuffer_data( brw->intel.batch, &prim_packet, - sizeof(prim_packet)); + sizeof(prim_packet), false); } if (intel->always_flush_cache) { intel_batchbuffer_emit_mi_flush(intel->batch); @@ -351,7 +351,8 @@ static GLboolean brw_try_draw_prims( struct gl_context *ctx, * an upper bound of how much we might emit in a single * brw_try_draw_prims(). */ - intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4); + intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4, + false); hw_prim = brw_set_prim(brw, &prim[i]); diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index 2ff39e8e64..3b5c4c071e 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -72,7 +72,37 @@ void brw_set_access_mode( struct brw_compile *p, GLuint access_mode ) void brw_set_compression_control( struct brw_compile *p, GLboolean compression_control ) { - p->current->header.compression_control = compression_control; + p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED); + + if (p->brw->intel.gen >= 6) { + /* Since we don't use the 32-wide support in gen6, we translate + * the pre-gen6 compression control here. + */ + switch (compression_control) { + case BRW_COMPRESSION_NONE: + /* This is the "use the first set of bits of dmask/vmask/arf + * according to execsize" option. + */ + p->current->header.compression_control = GEN6_COMPRESSION_1Q; + break; + case BRW_COMPRESSION_2NDHALF: + /* For 8-wide, this is "use the second set of 8 bits." */ + p->current->header.compression_control = GEN6_COMPRESSION_2Q; + break; + case BRW_COMPRESSION_COMPRESSED: + /* For 16-wide instruction compression, use the first set of 16 bits + * since we don't do 32-wide dispatch. + */ + p->current->header.compression_control = GEN6_COMPRESSION_1H; + break; + default: + assert(!"not reached"); + p->current->header.compression_control = GEN6_COMPRESSION_1H; + break; + } + } else { + p->current->header.compression_control = compression_control; + } } void brw_set_mask_control( struct brw_compile *p, GLuint value ) @@ -95,6 +125,7 @@ void brw_push_insn_state( struct brw_compile *p ) { assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); memcpy(p->current+1, p->current, sizeof(struct brw_instruction)); + p->compressed_stack[p->current - p->stack] = p->compressed; p->current++; } @@ -102,6 +133,7 @@ void brw_pop_insn_state( struct brw_compile *p ) { assert(p->current != p->stack); p->current--; + p->compressed = p->compressed_stack[p->current - p->stack]; } @@ -112,6 +144,7 @@ void brw_init_compile( struct brw_context *brw, struct brw_compile *p ) p->brw = brw; p->nr_insn = 0; p->current = p->stack; + p->compressed = false; memset(p->current, 0, sizeof(p->current[0])); /* Some defaults? diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index b4538e6e8a..119ffc7237 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -33,6 +33,7 @@ #ifndef BRW_EU_H #define BRW_EU_H +#include <stdbool.h> #include "brw_structs.h" #include "brw_defines.h" #include "program/prog_instruction.h" @@ -106,10 +107,12 @@ struct brw_compile { /* Allow clients to push/pop instruction state: */ struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; + bool compressed_stack[BRW_EU_MAX_INSN_STACK]; struct brw_instruction *current; GLuint flag_value; GLboolean single_program_flow; + bool compressed; struct brw_context *brw; struct brw_glsl_label *first_label; /**< linked list of labels */ @@ -858,7 +861,8 @@ void brw_fb_WRITE(struct brw_compile *p, GLuint binding_table_index, GLuint msg_length, GLuint response_length, - GLboolean eot); + GLboolean eot, + GLboolean header_present); void brw_SAMPLE(struct brw_compile *p, struct brw_reg dest, @@ -954,6 +958,8 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *patch_insn); struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count); +struct brw_instruction *brw_CONT_gen6(struct brw_compile *p, + struct brw_instruction *do_insn); struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count); /* Forward jumps: */ @@ -1009,6 +1015,7 @@ void brw_math_invert( struct brw_compile *p, void brw_set_src1( struct brw_instruction *insn, struct brw_reg reg ); +void brw_set_uip_jip(struct brw_compile *p); /* brw_optimize.c */ void brw_optimize(struct brw_compile *p); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 9cb941dacf..88131c432e 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -41,19 +41,20 @@ * Internal helper for constructing instructions */ -static void guess_execution_size( struct brw_instruction *insn, - struct brw_reg reg ) +static void guess_execution_size(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg reg) { - if (reg.width == BRW_WIDTH_8 && - insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) + if (reg.width == BRW_WIDTH_8 && p->compressed) insn->header.execution_size = BRW_EXECUTE_16; else insn->header.execution_size = reg.width; /* note - definitions are compatible */ } -static void brw_set_dest( struct brw_instruction *insn, - struct brw_reg dest ) +static void brw_set_dest(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg dest) { if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && dest.file != BRW_MESSAGE_REGISTER_FILE) @@ -100,7 +101,7 @@ static void brw_set_dest( struct brw_instruction *insn, /* NEW: Set the execution size based on dest.width and * insn->compression_control: */ - guess_execution_size(insn, dest); + guess_execution_size(p, insn, dest); } extern int reg_type_size[]; @@ -535,6 +536,16 @@ brw_set_dp_read_message(struct brw_context *brw, insn->bits3.dp_read_gen5.end_of_thread = 0; insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; insn->bits2.send_gen5.end_of_thread = 0; + } else if (intel->is_g4x) { + insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/ + insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/ + insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/ + insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/ + insn->bits3.dp_read_g4x.response_length = response_length; /*16:19*/ + insn->bits3.dp_read_g4x.msg_length = msg_length; /*20:23*/ + insn->bits3.dp_read_g4x.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ + insn->bits3.dp_read_g4x.pad1 = 0; + insn->bits3.dp_read_g4x.end_of_thread = 0; } else { insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ @@ -629,7 +640,7 @@ static struct brw_instruction *brw_alu1( struct brw_compile *p, struct brw_reg src ) { struct brw_instruction *insn = next_insn(p, opcode); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src); return insn; } @@ -641,7 +652,7 @@ static struct brw_instruction *brw_alu2(struct brw_compile *p, struct brw_reg src1 ) { struct brw_instruction *insn = next_insn(p, opcode); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, src1); return insn; @@ -680,7 +691,7 @@ void brw_##OP(struct brw_compile *p, \ { \ struct brw_instruction *rnd, *add; \ rnd = next_insn(p, BRW_OPCODE_##OP); \ - brw_set_dest(rnd, dest); \ + brw_set_dest(p, rnd, dest); \ brw_set_src0(rnd, src); \ rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \ \ @@ -779,7 +790,7 @@ struct brw_instruction *brw_MUL(struct brw_compile *p, void brw_NOP(struct brw_compile *p) { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); - brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src1(insn, brw_imm_ud(0x0)); } @@ -840,11 +851,11 @@ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) /* Override the defaults for this instruction: */ if (intel->gen < 6) { - brw_set_dest(insn, brw_ip_reg()); + brw_set_dest(p, insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); } else { - brw_set_dest(insn, brw_imm_w(0)); + brw_set_dest(p, insn, brw_imm_w(0)); insn->bits1.branch_gen6.jump_count = 0; brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); @@ -870,7 +881,7 @@ brw_IF_gen6(struct brw_compile *p, uint32_t conditional, insn = next_insn(p, BRW_OPCODE_IF); - brw_set_dest(insn, brw_imm_w(0)); + brw_set_dest(p, insn, brw_imm_w(0)); insn->header.execution_size = BRW_EXECUTE_8; insn->bits1.branch_gen6.jump_count = 0; brw_set_src0(insn, src0); @@ -905,11 +916,11 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, } if (intel->gen < 6) { - brw_set_dest(insn, brw_ip_reg()); + brw_set_dest(p, insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); } else { - brw_set_dest(insn, brw_imm_w(0)); + brw_set_dest(p, insn, brw_imm_w(0)); insn->bits1.branch_gen6.jump_count = 0; brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); @@ -965,11 +976,11 @@ void brw_ENDIF(struct brw_compile *p, struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); if (intel->gen < 6) { - brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src1(insn, brw_imm_d(0x0)); } else { - brw_set_dest(insn, brw_imm_w(0)); + brw_set_dest(p, insn, brw_imm_w(0)); brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); } @@ -1029,16 +1040,44 @@ void brw_ENDIF(struct brw_compile *p, struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) { + struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_BREAK); - brw_set_dest(insn, brw_ip_reg()); + if (intel->gen >= 6) { + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(insn, brw_imm_d(0x0)); + } else { + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->bits3.if_else.pad0 = 0; + insn->bits3.if_else.pop_count = pop_count; + } + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + + return insn; +} + +struct brw_instruction *brw_CONT_gen6(struct brw_compile *p, + struct brw_instruction *do_insn) +{ + struct brw_instruction *insn; + int br = 2; + + insn = next_insn(p, BRW_OPCODE_CONTINUE); + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_dest(p, insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); + + insn->bits3.break_cont.uip = br * (do_insn - insn); + insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = BRW_EXECUTE_8; - /* insn->header.mask_control = BRW_MASK_DISABLE; */ - insn->bits3.if_else.pad0 = 0; - insn->bits3.if_else.pop_count = pop_count; return insn; } @@ -1046,7 +1085,7 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) { struct brw_instruction *insn; insn = next_insn(p, BRW_OPCODE_CONTINUE); - brw_set_dest(insn, brw_ip_reg()); + brw_set_dest(p, insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); insn->header.compression_control = BRW_COMPRESSION_NONE; @@ -1058,17 +1097,33 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) } /* DO/WHILE loop: + * + * The DO/WHILE is just an unterminated loop -- break or continue are + * used for control within the loop. We have a few ways they can be + * done. + * + * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, + * jip and no DO instruction. + * + * For non-uniform control flow pre-gen6, there's a DO instruction to + * push the mask, and a WHILE to jump back, and BREAK to get out and + * pop the mask. + * + * For gen6, there's no more mask stack, so no need for DO. WHILE + * just points back to the first instruction of the loop. */ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) { - if (p->single_program_flow) { + struct intel_context *intel = &p->brw->intel; + + if (intel->gen >= 6 || p->single_program_flow) { return &p->store[p->nr_insn]; } else { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); /* Override the defaults for this instruction: */ - brw_set_dest(insn, brw_null_reg()); + brw_set_dest(p, insn, brw_null_reg()); brw_set_src0(insn, brw_null_reg()); brw_set_src1(insn, brw_null_reg()); @@ -1094,34 +1149,42 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, if (intel->gen >= 5) br = 2; - if (p->single_program_flow) - insn = next_insn(p, BRW_OPCODE_ADD); - else + if (intel->gen >= 6) { insn = next_insn(p, BRW_OPCODE_WHILE); - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); + brw_set_dest(p, insn, brw_imm_w(0)); + insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); + brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = do_insn->header.execution_size; + assert(insn->header.execution_size == BRW_EXECUTE_8); + } else { + if (p->single_program_flow) { + insn = next_insn(p, BRW_OPCODE_ADD); - if (p->single_program_flow) { - insn->header.execution_size = BRW_EXECUTE_1; + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d((do_insn - insn) * 16)); + insn->header.execution_size = BRW_EXECUTE_1; + } else { + insn = next_insn(p, BRW_OPCODE_WHILE); - insn->bits3.d = (do_insn - insn) * 16; - } else { - insn->header.execution_size = do_insn->header.execution_size; + assert(do_insn->header.opcode == BRW_OPCODE_DO); - assert(do_insn->header.opcode == BRW_OPCODE_DO); - insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); - insn->bits3.if_else.pop_count = 0; - insn->bits3.if_else.pad0 = 0; - } + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0)); -/* insn->header.mask_control = BRW_MASK_ENABLE; */ + insn->header.execution_size = do_insn->header.execution_size; + insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); + insn->bits3.if_else.pop_count = 0; + insn->bits3.if_else.pad0 = 0; + } + } + insn->header.compression_control = BRW_COMPRESSION_NONE; + p->current->header.predicate_control = BRW_PREDICATE_NONE; - /* insn->header.mask_control = BRW_MASK_DISABLE; */ - p->current->header.predicate_control = BRW_PREDICATE_NONE; return insn; } @@ -1159,7 +1222,7 @@ void brw_CMP(struct brw_compile *p, struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); insn->header.destreg__conditionalmod = conditional; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, src1); @@ -1184,7 +1247,7 @@ void brw_WAIT (struct brw_compile *p) struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT); struct brw_reg src = brw_notification_1_reg(); - brw_set_dest(insn, src); + brw_set_dest(p, insn, src); brw_set_src0(insn, src); brw_set_src1(insn, brw_null_reg()); insn->header.execution_size = 0; /* must */ @@ -1219,6 +1282,10 @@ void brw_math( struct brw_compile *p, assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); + /* Source modifiers are ignored for extended math instructions. */ + assert(!src.negate); + assert(!src.abs); + if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { assert(src.type == BRW_REGISTER_TYPE_F); @@ -1228,8 +1295,9 @@ void brw_math( struct brw_compile *p, * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. */ insn->header.destreg__conditionalmod = function; + insn->header.saturate = saturate; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src); brw_set_src1(insn, brw_null_reg()); } else { @@ -1242,7 +1310,7 @@ void brw_math( struct brw_compile *p, insn->header.predicate_control = 0; insn->header.destreg__conditionalmod = msg_reg_nr; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src); brw_set_math_message(p->brw, insn, @@ -1284,12 +1352,18 @@ void brw_math2(struct brw_compile *p, assert(src1.type == BRW_REGISTER_TYPE_F); } + /* Source modifiers are ignored for extended math instructions. */ + assert(!src0.negate); + assert(!src0.abs); + assert(!src1.negate); + assert(!src1.abs); + /* Math is the same ISA format as other opcodes, except that CondModifier * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. */ insn->header.destreg__conditionalmod = function; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, src1); } @@ -1318,8 +1392,13 @@ void brw_math_16( struct brw_compile *p, * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. */ insn->header.destreg__conditionalmod = function; + insn->header.saturate = saturate; - brw_set_dest(insn, dest); + /* Source modifiers are ignored for extended math instructions. */ + assert(!src.negate); + assert(!src.abs); + + brw_set_dest(p, insn, dest); brw_set_src0(insn, src); brw_set_src1(insn, brw_null_reg()); return; @@ -1334,7 +1413,7 @@ void brw_math_16( struct brw_compile *p, insn = next_insn(p, BRW_OPCODE_SEND); insn->header.destreg__conditionalmod = msg_reg_nr; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src); brw_set_math_message(p->brw, insn, @@ -1351,7 +1430,7 @@ void brw_math_16( struct brw_compile *p, insn->header.compression_control = BRW_COMPRESSION_2NDHALF; insn->header.destreg__conditionalmod = msg_reg_nr+1; - brw_set_dest(insn, offset(dest,1)); + brw_set_dest(p, insn, offset(dest,1)); brw_set_src0(insn, src); brw_set_math_message(p->brw, insn, @@ -1446,7 +1525,7 @@ void brw_oword_block_write_scratch(struct brw_compile *p, send_commit_msg = 1; } - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, brw_null_reg()); brw_set_dp_write_message(p->brw, @@ -1516,7 +1595,7 @@ brw_oword_block_read_scratch(struct brw_compile *p, insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = mrf.nr; - brw_set_dest(insn, dest); /* UW? */ + brw_set_dest(p, insn, dest); /* UW? */ brw_set_src0(insn, brw_null_reg()); brw_set_dp_read_message(p->brw, @@ -1569,7 +1648,7 @@ void brw_oword_block_read(struct brw_compile *p, /* cast dest to a uword[8] vector */ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); if (intel->gen >= 6) { brw_set_src0(insn, mrf); } else { @@ -1614,7 +1693,7 @@ void brw_dword_scattered_read(struct brw_compile *p, /* cast dest to a uword[8] vector */ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, brw_null_reg()); brw_set_dp_read_message(p->brw, @@ -1639,29 +1718,22 @@ void brw_dp_READ_4_vs(struct brw_compile *p, GLuint location, GLuint bind_table_index) { + struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; GLuint msg_reg_nr = 1; - struct brw_reg b; - /* - printf("vs const read msg, location %u, msg_reg_nr %d\n", - location, msg_reg_nr); - */ + if (intel->gen >= 6) + location /= 16; /* Setup MRF[1] with location/offset into const buffer */ brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - /* XXX I think we're setting all the dwords of MRF[1] to 'location'. - * when the docs say only dword[2] should be set. Hmmm. But it works. - */ - b = brw_message_reg(msg_reg_nr); - b = retype(b, BRW_REGISTER_TYPE_UD); - /*b = get_element_ud(b, 2);*/ - brw_MOV(p, b, brw_imm_ud(location)); - + brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2), + BRW_REGISTER_TYPE_UD), + brw_imm_ud(location)); brw_pop_insn_state(p); insn = next_insn(p, BRW_OPCODE_SEND); @@ -1671,8 +1743,12 @@ void brw_dp_READ_4_vs(struct brw_compile *p, insn->header.destreg__conditionalmod = msg_reg_nr; insn->header.mask_control = BRW_MASK_DISABLE; - brw_set_dest(insn, dest); - brw_set_src0(insn, brw_null_reg()); + brw_set_dest(p, insn, dest); + if (intel->gen >= 6) { + brw_set_src0(insn, brw_message_reg(msg_reg_nr)); + } else { + brw_set_src0(insn, brw_null_reg()); + } brw_set_dp_read_message(p->brw, insn, @@ -1699,6 +1775,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, /* Setup MRF[1] with offset into const buffer */ brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_predicate_control(p, BRW_PREDICATE_NONE); @@ -1706,7 +1783,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, /* M1.0 is block offset 0, M1.4 is block offset 1, all other * fields ignored. */ - brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), + brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D), addr_reg, brw_imm_d(offset)); brw_pop_insn_state(p); @@ -1717,7 +1794,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, insn->header.destreg__conditionalmod = 0; insn->header.mask_control = BRW_MASK_DISABLE; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, brw_vec8_grf(0, 0)); if (intel->gen == 6) @@ -1747,12 +1824,12 @@ void brw_fb_WRITE(struct brw_compile *p, GLuint binding_table_index, GLuint msg_length, GLuint response_length, - GLboolean eot) + GLboolean eot, + GLboolean header_present) { struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; GLuint msg_control, msg_type; - GLboolean header_present = GL_TRUE; if (intel->gen >= 6 && binding_table_index == 0) { insn = next_insn(p, BRW_OPCODE_SENDC); @@ -1764,9 +1841,6 @@ void brw_fb_WRITE(struct brw_compile *p, insn->header.compression_control = BRW_COMPRESSION_NONE; if (intel->gen >= 6) { - if (msg_length == 4) - header_present = GL_FALSE; - /* headerless version, just submit color payload */ src0 = brw_message_reg(msg_reg_nr); @@ -1782,7 +1856,7 @@ void brw_fb_WRITE(struct brw_compile *p, else msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_dp_write_message(p->brw, insn, @@ -1860,7 +1934,7 @@ void brw_SAMPLE(struct brw_compile *p, struct brw_reg m1 = brw_message_reg(msg_reg_nr); - guess_execution_size(p->current, dest); + guess_execution_size(p, p->current, dest); if (p->current->header.execution_size == BRW_EXECUTE_16) dispatch_16 = GL_TRUE; @@ -1871,7 +1945,8 @@ void brw_SAMPLE(struct brw_compile *p, brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_MOV(p, m1, brw_vec8_grf(0,0)); + brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD), + retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); brw_pop_insn_state(p); @@ -1895,12 +1970,15 @@ void brw_SAMPLE(struct brw_compile *p, * and the first message register index comes from src0. */ if (intel->gen >= 6) { - brw_push_insn_state(p); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - /* m1 contains header? */ - brw_MOV(p, brw_message_reg(msg_reg_nr), src0); - brw_pop_insn_state(p); - src0 = brw_message_reg(msg_reg_nr); + if (src0.file != BRW_ARCHITECTURE_REGISTER_FILE || + src0.nr != BRW_ARF_NULL) { + brw_push_insn_state(p); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, retype(brw_message_reg(msg_reg_nr), src0.type), src0); + brw_pop_insn_state(p); + } + src0 = brw_message_reg(msg_reg_nr); } insn = next_insn(p, BRW_OPCODE_SEND); @@ -1909,7 +1987,7 @@ void brw_SAMPLE(struct brw_compile *p, if (intel->gen < 6) insn->header.destreg__conditionalmod = msg_reg_nr; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_sampler_message(p->brw, insn, binding_table_index, @@ -1929,7 +2007,8 @@ void brw_SAMPLE(struct brw_compile *p, */ brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, reg, reg); + brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD), + retype(reg, BRW_REGISTER_TYPE_UD)); brw_pop_insn_state(p); } @@ -1961,7 +2040,8 @@ void brw_urb_WRITE(struct brw_compile *p, if (intel->gen >= 6) { brw_push_insn_state(p); brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_MOV(p, brw_message_reg(msg_reg_nr), src0); + brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), + retype(src0, BRW_REGISTER_TYPE_UD)); brw_pop_insn_state(p); src0 = brw_message_reg(msg_reg_nr); } @@ -1970,7 +2050,7 @@ void brw_urb_WRITE(struct brw_compile *p, assert(msg_length < BRW_MAX_MRF); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); @@ -1989,6 +2069,80 @@ void brw_urb_WRITE(struct brw_compile *p, swizzle); } +static int +brw_find_next_block_end(struct brw_compile *p, int start) +{ + int ip; + + for (ip = start + 1; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + switch (insn->header.opcode) { + case BRW_OPCODE_ENDIF: + case BRW_OPCODE_ELSE: + case BRW_OPCODE_WHILE: + return ip; + } + } + assert(!"not reached"); + return start + 1; +} + +/* There is no DO instruction on gen6, so to find the end of the loop + * we have to see if the loop is jumping back before our start + * instruction. + */ +static int +brw_find_loop_end(struct brw_compile *p, int start) +{ + int ip; + int br = 2; + + for (ip = start + 1; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + if (insn->header.opcode == BRW_OPCODE_WHILE) { + if (ip + insn->bits1.branch_gen6.jump_count / br < start) + return ip; + } + } + assert(!"not reached"); + return start + 1; +} + +/* After program generation, go back and update the UIP and JIP of + * BREAK and CONT instructions to their correct locations. + */ +void +brw_set_uip_jip(struct brw_compile *p) +{ + struct intel_context *intel = &p->brw->intel; + int ip; + int br = 2; + + if (intel->gen < 6) + return; + + for (ip = 0; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + switch (insn->header.opcode) { + case BRW_OPCODE_BREAK: + insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); + insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip + 1); + break; + case BRW_OPCODE_CONTINUE: + /* JIP is set at CONTINUE emit time, since that's when we + * know where the start of the loop is. + */ + insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); + assert(insn->bits3.break_cont.uip != 0); + assert(insn->bits3.break_cont.jip != 0); + break; + } + } +} + void brw_ff_sync(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, @@ -2013,7 +2167,7 @@ void brw_ff_sync(struct brw_compile *p, } insn = next_insn(p, BRW_OPCODE_SEND); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); diff --git a/src/mesa/drivers/dri/i965/brw_fallback.c b/src/mesa/drivers/dri/i965/brw_fallback.c index 6796fb208d..d0b0c22abf 100644 --- a/src/mesa/drivers/dri/i965/brw_fallback.c +++ b/src/mesa/drivers/dri/i965/brw_fallback.c @@ -36,8 +36,6 @@ #include "swrast/swrast.h" #include "tnl/tnl.h" #include "brw_context.h" -#include "intel_fbo.h" -#include "intel_regions.h" #define FILE_DEBUG_FLAG DEBUG_FALLBACKS @@ -63,49 +61,14 @@ static GLboolean do_check_fallback(struct brw_context *brw) for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; if (texUnit->_ReallyEnabled) { - struct intel_texture_object *intelObj = intel_texture_object(texUnit->_Current); - struct gl_texture_image *texImage = intelObj->base.Image[0][intelObj->firstLevel]; + struct gl_texture_object *tex_obj = texUnit->_Current; + struct gl_texture_image *texImage = tex_obj->Image[0][tex_obj->BaseLevel]; if (texImage->Border) { DBG("FALLBACK: texture border\n"); return GL_TRUE; } } } - - /* _NEW_STENCIL - */ - if (ctx->Stencil._Enabled && - (ctx->DrawBuffer->Name == 0 && !brw->intel.hw_stencil)) { - DBG("FALLBACK: stencil\n"); - return GL_TRUE; - } - - /* _NEW_BUFFERS */ - if (!brw->has_surface_tile_offset) { - for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { - struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; - struct intel_renderbuffer *irb = intel_renderbuffer(rb); - - /* The original gen4 hardware couldn't set up WM surfaces pointing - * at an offset within a tile, which can happen when rendering to - * anything but the base level of a texture or the +X face/0 depth. - * This was fixed with the 4 Series hardware. - * - * For these original chips, you would have to make the depth and - * color destination surfaces include information on the texture - * type, LOD, face, and various limits to use them as a destination. - * I would have done this, but there's also a nasty requirement that - * the depth and the color surfaces all be of the same LOD, which - * may be a worse requirement than this alignment. (Also, we may - * want to just demote the texture to untiled, instead). - */ - if (irb->region && irb->region->tiling != I915_TILING_NONE && - (irb->region->draw_offset & 4095)) { - DBG("FALLBACK: non-tile-aligned destination for tiled FBO\n"); - return GL_TRUE; - } - } - } return GL_FALSE; } @@ -117,7 +80,7 @@ static void check_fallback(struct brw_context *brw) const struct brw_tracked_state brw_check_fallback = { .dirty = { - .mesa = _NEW_BUFFERS | _NEW_RENDERMODE | _NEW_TEXTURE | _NEW_STENCIL, + .mesa = _NEW_RENDERMODE | _NEW_TEXTURE | _NEW_STENCIL, .brw = 0, .cache = 0 }, diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index edb02fabb2..a35687d599 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -48,6 +48,7 @@ extern "C" { #include "../glsl/ir_optimization.h" #include "../glsl/ir_print_visitor.h" +#define MAX_INSTRUCTION (1 << 30) static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg); struct gl_shader * @@ -89,6 +90,9 @@ brw_compile_shader(struct gl_context *ctx, struct gl_shader *shader) GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) { + struct brw_context *brw = brw_context(ctx); + struct intel_context *intel = &brw->intel; + struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; if (shader != NULL) { @@ -107,7 +111,15 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) SUB_TO_ADD_NEG | EXP_TO_EXP2 | LOG_TO_LOG2); + + /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, + * if-statements need to be flattened. + */ + if (intel->gen < 6) + lower_if_to_cond_assign(shader->ir, 16); + do_lower_texture_projection(shader->ir); + do_vec_index_to_cond_assign(shader->ir); brw_do_cubemap_normalize(shader->ir); do { @@ -474,8 +486,13 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) wpos.reg_offset++; /* gl_FragCoord.z */ - emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, - interp_reg(FRAG_ATTRIB_WPOS, 2))); + if (intel->gen >= 6) { + emit(fs_inst(BRW_OPCODE_MOV, wpos, + fs_reg(brw_vec8_grf(c->source_depth_reg, 0)))); + } else { + emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, + interp_reg(FRAG_ATTRIB_WPOS, 2))); + } wpos.reg_offset++; /* gl_FragCoord.w: Already set up in emit_interpolation */ @@ -518,25 +535,40 @@ fs_visitor::emit_general_interpolation(ir_variable *ir) continue; } - for (unsigned int c = 0; c < type->vector_elements; c++) { - struct brw_reg interp = interp_reg(location, c); - emit(fs_inst(FS_OPCODE_LINTERP, - attr, - this->delta_x, - this->delta_y, - fs_reg(interp))); - attr.reg_offset++; - } - - if (intel->gen < 6) { - attr.reg_offset -= type->vector_elements; + if (c->key.flat_shade && (location == FRAG_ATTRIB_COL0 || + location == FRAG_ATTRIB_COL1)) { + /* Constant interpolation (flat shading) case. The SF has + * handed us defined values in only the constant offset + * field of the setup reg. + */ for (unsigned int c = 0; c < type->vector_elements; c++) { - emit(fs_inst(BRW_OPCODE_MUL, - attr, + struct brw_reg interp = interp_reg(location, c); + interp = suboffset(interp, 3); + emit(fs_inst(FS_OPCODE_CINTERP, attr, fs_reg(interp))); + attr.reg_offset++; + } + } else { + /* Perspective interpolation case. */ + for (unsigned int c = 0; c < type->vector_elements; c++) { + struct brw_reg interp = interp_reg(location, c); + emit(fs_inst(FS_OPCODE_LINTERP, attr, - this->pixel_w)); + this->delta_x, + this->delta_y, + fs_reg(interp))); attr.reg_offset++; } + + if (intel->gen < 6) { + attr.reg_offset -= type->vector_elements; + for (unsigned int c = 0; c < type->vector_elements; c++) { + emit(fs_inst(BRW_OPCODE_MUL, + attr, + attr, + this->pixel_w)); + attr.reg_offset++; + } + } } location++; } @@ -600,8 +632,13 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) * might be able to do better by doing execsize = 1 math and then * expanding that result out, but we would need to be careful with * masking. + * + * The hardware ignores source modifiers (negate and abs) on math + * instructions, so we also move to a temp to set those up. */ - if (intel->gen >= 6 && src.file == UNIFORM) { + if (intel->gen >= 6 && (src.file == UNIFORM || + src.abs || + src.negate)) { fs_reg expanded = fs_reg(this, glsl_type::float_type); emit(fs_inst(BRW_OPCODE_MOV, expanded, src)); src = expanded; @@ -765,6 +802,30 @@ fs_visitor::try_emit_saturate(ir_expression *ir) return true; } +static uint32_t +brw_conditional_for_comparison(unsigned int op) +{ + switch (op) { + case ir_binop_less: + return BRW_CONDITIONAL_L; + case ir_binop_greater: + return BRW_CONDITIONAL_G; + case ir_binop_lequal: + return BRW_CONDITIONAL_LE; + case ir_binop_gequal: + return BRW_CONDITIONAL_GE; + case ir_binop_equal: + case ir_binop_all_equal: /* same as equal for scalars */ + return BRW_CONDITIONAL_Z; + case ir_binop_nequal: + case ir_binop_any_nequal: /* same as nequal for scalars */ + return BRW_CONDITIONAL_NZ; + default: + assert(!"not reached: bad operation for comparison"); + return BRW_CONDITIONAL_NZ; + } +} + void fs_visitor::visit(ir_expression *ir) { @@ -814,6 +875,7 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_abs: op[0].abs = true; + op[0].negate = false; this->result = op[0]; break; case ir_unop_sign: @@ -880,35 +942,20 @@ fs_visitor::visit(ir_expression *ir) break; case ir_binop_less: - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_L; - emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); - break; case ir_binop_greater: - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_G; - emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); - break; case ir_binop_lequal: - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_LE; - emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); - break; case ir_binop_gequal: - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_GE; - emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); - break; case ir_binop_equal: - case ir_binop_all_equal: /* same as nequal for scalars */ - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_Z; - emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); - break; + case ir_binop_all_equal: case ir_binop_nequal: - case ir_binop_any_nequal: /* same as nequal for scalars */ - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + case ir_binop_any_nequal: + temp = this->result; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], op[1])); + inst->conditional_mod = brw_conditional_for_comparison(ir->operation); emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); break; @@ -933,6 +980,10 @@ fs_visitor::visit(ir_expression *ir) assert(!"not reached: should be handled by lower_noise"); break; + case ir_quadop_vector: + assert(!"not reached: should be handled by lower_quadop_vector"); + break; + case ir_unop_sqrt: emit_math(FS_OPCODE_SQRT, this->result, op[0]); break; @@ -949,7 +1000,12 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_f2b: case ir_unop_i2b: - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f))); + temp = this->result; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f))); inst->conditional_mod = BRW_CONDITIONAL_NZ; inst = emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(1))); @@ -1423,28 +1479,70 @@ fs_visitor::visit(ir_discard *ir) void fs_visitor::visit(ir_constant *ir) { - fs_reg reg(this, ir->type); - this->result = reg; + /* Set this->result to reg at the bottom of the function because some code + * paths will cause this visitor to be applied to other fields. This will + * cause the value stored in this->result to be modified. + * + * Make reg constant so that it doesn't get accidentally modified along the + * way. Yes, I actually had this problem. :( + */ + const fs_reg reg(this, ir->type); + fs_reg dst_reg = reg; - for (unsigned int i = 0; i < ir->type->vector_elements; i++) { - switch (ir->type->base_type) { - case GLSL_TYPE_FLOAT: - emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i]))); - break; - case GLSL_TYPE_UINT: - emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i]))); - break; - case GLSL_TYPE_INT: - emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i]))); - break; - case GLSL_TYPE_BOOL: - emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i]))); - break; - default: - assert(!"Non-float/uint/int/bool constant"); + if (ir->type->is_array()) { + const unsigned size = type_size(ir->type->fields.array); + + for (unsigned i = 0; i < ir->type->length; i++) { + ir->array_elements[i]->accept(this); + fs_reg src_reg = this->result; + + dst_reg.type = src_reg.type; + for (unsigned j = 0; j < size; j++) { + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg)); + src_reg.reg_offset++; + dst_reg.reg_offset++; + } + } + } else if (ir->type->is_record()) { + foreach_list(node, &ir->components) { + ir_instruction *const field = (ir_instruction *) node; + const unsigned size = type_size(field->type); + + field->accept(this); + fs_reg src_reg = this->result; + + dst_reg.type = src_reg.type; + for (unsigned j = 0; j < size; j++) { + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg)); + src_reg.reg_offset++; + dst_reg.reg_offset++; + } + } + } else { + const unsigned size = type_size(ir->type); + + for (unsigned i = 0; i < size; i++) { + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i]))); + break; + case GLSL_TYPE_UINT: + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i]))); + break; + case GLSL_TYPE_INT: + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i]))); + break; + case GLSL_TYPE_BOOL: + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i]))); + break; + default: + assert(!"Non-float/uint/int/bool constant"); + } + dst_reg.reg_offset++; } - reg.reg_offset++; } + + this->result = reg; } void @@ -1490,7 +1588,7 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0.0f))); } else { - inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, op[0])); + inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_f, op[0])); } inst->conditional_mod = BRW_CONDITIONAL_NZ; break; @@ -1505,31 +1603,18 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) break; case ir_binop_greater: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_G; - break; case ir_binop_gequal: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_GE; - break; case ir_binop_less: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_L; - break; case ir_binop_lequal: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_LE; - break; case ir_binop_equal: case ir_binop_all_equal: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_Z; - break; case ir_binop_nequal: case ir_binop_any_nequal: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1])); + inst->conditional_mod = + brw_conditional_for_comparison(expr->operation); break; + default: assert(!"not reached"); this->fail = true; @@ -1574,7 +1659,7 @@ fs_visitor::emit_if_gen6(ir_if *ir) switch (expr->operation) { case ir_unop_logic_not: - inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(1))); + inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(0))); inst->conditional_mod = BRW_CONDITIONAL_Z; return; @@ -1608,30 +1693,16 @@ fs_visitor::emit_if_gen6(ir_if *ir) return; case ir_binop_greater: - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_G; - return; case ir_binop_gequal: - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_GE; - return; case ir_binop_less: - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_L; - return; case ir_binop_lequal: - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_LE; - return; case ir_binop_equal: case ir_binop_all_equal: - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_Z; - return; case ir_binop_nequal: case ir_binop_any_nequal: inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + inst->conditional_mod = + brw_conditional_for_comparison(expr->operation); return; default: assert(!"not reached"); @@ -1713,32 +1784,9 @@ fs_visitor::visit(ir_loop *ir) this->base_ir = ir->to; ir->to->accept(this); - fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, + fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_cmp, counter, this->result)); - switch (ir->cmp) { - case ir_binop_equal: - inst->conditional_mod = BRW_CONDITIONAL_Z; - break; - case ir_binop_nequal: - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - case ir_binop_gequal: - inst->conditional_mod = BRW_CONDITIONAL_GE; - break; - case ir_binop_lequal: - inst->conditional_mod = BRW_CONDITIONAL_LE; - break; - case ir_binop_greater: - inst->conditional_mod = BRW_CONDITIONAL_G; - break; - case ir_binop_less: - inst->conditional_mod = BRW_CONDITIONAL_L; - break; - default: - assert(!"not reached: unknown loop condition"); - this->fail = true; - break; - } + inst->conditional_mod = brw_conditional_for_comparison(ir->cmp); inst = emit(fs_inst(BRW_OPCODE_BREAK)); inst->predicated = true; @@ -1951,7 +1999,7 @@ fs_visitor::emit_interpolation_setup_gen6() emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y)); this->current_annotation = "compute 1/pos.w"; - this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0)); + this->wpos_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); this->pixel_w = fs_reg(this, glsl_type::float_type); emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); @@ -1979,17 +2027,17 @@ fs_visitor::emit_fb_writes() nr += 2; } - if (c->key.aa_dest_stencil_reg) { + if (c->aa_dest_stencil_reg) { emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), - fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0)))); + fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0)))); } /* Reserve space for color. It'll be filled in per MRT below. */ int color_mrf = nr; nr += 4; - if (c->key.source_depth_to_render_target) { - if (c->key.computes_depth) { + if (c->source_depth_to_render_target) { + if (c->computes_depth) { /* Hand over gl_FragDepth. */ assert(this->frag_depth); fs_reg depth = *(variable_storage(this->frag_depth)); @@ -1998,20 +2046,22 @@ fs_visitor::emit_fb_writes() } else { /* Pass through the payload depth. */ emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), - fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0)))); + fs_reg(brw_vec8_grf(c->source_depth_reg, 0)))); } } - if (c->key.dest_depth_reg) { + if (c->dest_depth_reg) { emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), - fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0)))); + fs_reg(brw_vec8_grf(c->dest_depth_reg, 0)))); } fs_reg color = reg_undef; if (this->frag_color) color = *(variable_storage(this->frag_color)); - else if (this->frag_data) + else if (this->frag_data) { color = *(variable_storage(this->frag_data)); + color.type = BRW_REGISTER_TYPE_F; + } for (int target = 0; target < c->key.nr_color_regions; target++) { this->current_annotation = talloc_asprintf(this->mem_ctx, @@ -2105,7 +2155,8 @@ fs_visitor::generate_fb_write(fs_inst *inst) inst->target, inst->mlen, 0, - eot); + eot, + inst->header_present); } void @@ -2452,7 +2503,7 @@ fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst) void fs_visitor::assign_curb_setup() { - c->prog_data.first_curbe_grf = c->key.nr_payload_regs; + c->prog_data.first_curbe_grf = c->nr_payload_regs; c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; /* Map the offsets in the UNIFORM file to fixed HW regs. */ @@ -2522,12 +2573,15 @@ fs_visitor::assign_urb_setup() foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); - if (inst->opcode != FS_OPCODE_LINTERP) - continue; - - assert(inst->src[2].file == FIXED_HW_REG); + if (inst->opcode == FS_OPCODE_LINTERP) { + assert(inst->src[2].file == FIXED_HW_REG); + inst->src[2].fixed_hw_reg.nr += urb_start; + } - inst->src[2].fixed_hw_reg.nr += urb_start; + if (inst->opcode == FS_OPCODE_CINTERP) { + assert(inst->src[0].file == FIXED_HW_REG); + inst->src[0].fixed_hw_reg.nr += urb_start; + } } this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length; @@ -2618,6 +2672,7 @@ fs_visitor::split_virtual_grfs() } } } + this->live_intervals_valid = false; } /** @@ -2692,8 +2747,11 @@ fs_visitor::calculate_live_intervals() int loop_start = 0; int bb_header_ip = 0; + if (this->live_intervals_valid) + return; + for (int i = 0; i < num_vars; i++) { - def[i] = 1 << 30; + def[i] = MAX_INSTRUCTION; use[i] = -1; } @@ -2771,6 +2829,8 @@ fs_visitor::calculate_live_intervals() talloc_free(this->virtual_grf_use); this->virtual_grf_def = def; this->virtual_grf_use = use; + + this->live_intervals_valid = true; } /** @@ -2786,6 +2846,8 @@ fs_visitor::propagate_constants() { bool progress = false; + calculate_live_intervals(); + foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); @@ -2843,6 +2905,7 @@ fs_visitor::propagate_constants() /* Fit this constant in by commuting the operands */ scan_inst->src[0] = scan_inst->src[1]; scan_inst->src[1] = inst->src[0]; + progress = true; } break; case BRW_OPCODE_CMP: @@ -2863,6 +2926,9 @@ fs_visitor::propagate_constants() } } + if (progress) + this->live_intervals_valid = false; + return progress; } /** @@ -2877,6 +2943,8 @@ fs_visitor::dead_code_eliminate() bool progress = false; int pc = 0; + calculate_live_intervals(); + foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); @@ -2888,6 +2956,9 @@ fs_visitor::dead_code_eliminate() pc++; } + if (progress) + live_intervals_valid = false; + return progress; } @@ -2895,10 +2966,35 @@ bool fs_visitor::register_coalesce() { bool progress = false; + int if_depth = 0; + int loop_depth = 0; foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); + /* Make sure that we dominate the instructions we're going to + * scan for interfering with our coalescing, or we won't have + * scanned enough to see if anything interferes with our + * coalescing. We don't dominate the following instructions if + * we're in a loop or an if block. + */ + switch (inst->opcode) { + case BRW_OPCODE_DO: + loop_depth++; + break; + case BRW_OPCODE_WHILE: + loop_depth--; + break; + case BRW_OPCODE_IF: + if_depth++; + break; + case BRW_OPCODE_ENDIF: + if_depth--; + break; + } + if (loop_depth || if_depth) + continue; + if (inst->opcode != BRW_OPCODE_MOV || inst->predicated || inst->saturate || @@ -2916,14 +3012,6 @@ fs_visitor::register_coalesce() for (; scan_iter.has_next(); scan_iter.next()) { fs_inst *scan_inst = (fs_inst *)scan_iter.get(); - if (scan_inst->opcode == BRW_OPCODE_DO || - scan_inst->opcode == BRW_OPCODE_WHILE || - scan_inst->opcode == BRW_OPCODE_ENDIF) { - interfered = true; - iter = scan_iter; - break; - } - if (scan_inst->dst.file == GRF) { if (scan_inst->dst.reg == inst->dst.reg && (scan_inst->dst.reg_offset == inst->dst.reg_offset || @@ -2943,10 +3031,6 @@ fs_visitor::register_coalesce() continue; } - /* Update live interval so we don't have to recalculate. */ - this->virtual_grf_use[inst->src[0].reg] = MAX2(virtual_grf_use[inst->src[0].reg], - virtual_grf_use[inst->dst.reg]); - /* Rewrite the later usage to point at the source of the move to * be removed. */ @@ -2971,6 +3055,9 @@ fs_visitor::register_coalesce() progress = true; } + if (progress) + live_intervals_valid = false; + return progress; } @@ -2981,6 +3068,8 @@ fs_visitor::compute_to_mrf() bool progress = false; int next_ip = 0; + calculate_live_intervals(); + foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); @@ -3184,15 +3273,16 @@ fs_visitor::virtual_grf_interferes(int a, int b) int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]); int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]); - /* For dead code, just check if the def interferes with the other range. */ - if (this->virtual_grf_use[a] == -1) { - return (this->virtual_grf_def[a] >= this->virtual_grf_def[b] && - this->virtual_grf_def[a] < this->virtual_grf_use[b]); - } - if (this->virtual_grf_use[b] == -1) { - return (this->virtual_grf_def[b] >= this->virtual_grf_def[a] && - this->virtual_grf_def[b] < this->virtual_grf_use[a]); - } + /* We can't handle dead register writes here, without iterating + * over the whole instruction stream to find every single dead + * write to that register to compare to the live interval of the + * other register. Just assert that dead_code_eliminate() has been + * called. + */ + assert((this->virtual_grf_use[a] != -1 || + this->virtual_grf_def[a] == MAX_INSTRUCTION) && + (this->virtual_grf_use[b] != -1 || + this->virtual_grf_def[b] == MAX_INSTRUCTION)); return start < end; } @@ -3227,6 +3317,7 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) break; default: assert(!"not reached"); + brw_reg = brw_null_reg(); break; } break; @@ -3241,6 +3332,10 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) assert(!"not reached"); brw_reg = brw_null_reg(); break; + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; } if (reg->abs) brw_reg = brw_abs(brw_reg); @@ -3373,10 +3468,6 @@ fs_visitor::generate_code() break; case BRW_OPCODE_DO: - /* FINISHME: We need to write the loop instruction support still. */ - if (intel->gen >= 6) - this->fail = true; - loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); if_depth_in_loop[loop_stack_depth] = 0; break; @@ -3386,7 +3477,11 @@ fs_visitor::generate_code() brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case BRW_OPCODE_CONTINUE: - brw_CONT(p, if_depth_in_loop[loop_stack_depth]); + /* FINISHME: We need to write the loop instruction support still. */ + if (intel->gen >= 6) + brw_CONT_gen6(p, loop_stack[loop_stack_depth - 1]); + else + brw_CONT(p, if_depth_in_loop[loop_stack_depth]); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; @@ -3400,16 +3495,18 @@ fs_visitor::generate_code() assert(loop_stack_depth > 0); loop_stack_depth--; inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); - /* patch all the BREAK/CONT instructions from last BGNLOOP */ - while (inst0 > loop_stack[loop_stack_depth]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + if (intel->gen < 6) { + /* patch all the BREAK/CONT instructions from last BGNLOOP */ + while (inst0 > loop_stack[loop_stack_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + } } } } @@ -3425,6 +3522,9 @@ fs_visitor::generate_code() case FS_OPCODE_COS: generate_math(inst, dst, src); break; + case FS_OPCODE_CINTERP: + brw_MOV(p, dst, src[0]); + break; case FS_OPCODE_LINTERP: generate_linterp(inst, dst, src); break; @@ -3486,6 +3586,26 @@ fs_visitor::generate_code() last_native_inst = p->nr_insn; } + + brw_set_uip_jip(p); + + /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS + * emit issues, it doesn't get the jump distances into the output, + * which is often something we want to debug. So this is here in + * case you're doing that. + */ + if (0) { + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { + for (unsigned int i = 0; i < p->nr_insn; i++) { + printf("0x%08x 0x%08x 0x%08x 0x%08x ", + ((uint32_t *)&p->store[i])[3], + ((uint32_t *)&p->store[i])[2], + ((uint32_t *)&p->store[i])[1], + ((uint32_t *)&p->store[i])[0]); + brw_disasm(stdout, &p->store[i], intel->gen); + } + } + } } GLboolean @@ -3553,7 +3673,6 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) progress = v.remove_duplicate_mrf_writes() || progress; - v.calculate_live_intervals(); progress = v.propagate_constants() || progress; progress = v.register_coalesce() || progress; progress = v.compute_to_mrf() || progress; @@ -3566,7 +3685,6 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) for (int i = 1; i < virtual_grf_count; i++) { v.spill_reg(i); } - v.calculate_live_intervals(); } if (0) @@ -3575,8 +3693,6 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) while (!v.assign_regs()) { if (v.fail) break; - - v.calculate_live_intervals(); } } } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index de7b15312a..82d96f6ac0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -68,6 +68,7 @@ enum fs_opcodes { FS_OPCODE_COS, FS_OPCODE_DDX, FS_OPCODE_DDY, + FS_OPCODE_CINTERP, FS_OPCODE_LINTERP, FS_OPCODE_TEX, FS_OPCODE_TXB, @@ -348,6 +349,23 @@ public: hash_table_pointer_hash, hash_table_pointer_compare); + /* There's a question that appears to be left open in the spec: + * How do implicit dst conversions interact with the CMP + * instruction or conditional mods? On gen6, the instruction: + * + * CMP null<d> src0<f> src1<f> + * + * will do src1 - src0 and compare that result as if it was an + * integer. On gen4, it will do src1 - src0 as float, convert + * the result to int, and compare as int. In between, it + * appears that it does src1 - src0 and does the compare in the + * execution type so dst type doesn't matter. + */ + if (this->intel->gen > 4) + this->reg_null_cmp = reg_null_d; + else + this->reg_null_cmp = reg_null_f; + this->frag_color = NULL; this->frag_data = NULL; this->frag_depth = NULL; @@ -361,6 +379,7 @@ public: this->virtual_grf_array_size = 0; this->virtual_grf_def = NULL; this->virtual_grf_use = NULL; + this->live_intervals_valid = false; this->kill_emitted = false; } @@ -462,6 +481,7 @@ public: int virtual_grf_array_size; int *virtual_grf_def; int *virtual_grf_use; + bool live_intervals_valid; struct hash_table *variable_ht; ir_variable *frag_color, *frag_data, *frag_depth; @@ -485,6 +505,7 @@ public: fs_reg pixel_w; fs_reg delta_x; fs_reg delta_y; + fs_reg reg_null_cmp; int grf_used; }; diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index 3b7b03a05b..20bfa4c3ea 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -205,6 +205,8 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) case ir_unop_round_even: case ir_unop_sin: case ir_unop_cos: + case ir_unop_sin_reduced: + case ir_unop_cos_reduced: case ir_unop_dFdx: case ir_unop_dFdy: for (i = 0; i < vector_elements; i++) { @@ -328,6 +330,9 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) case ir_unop_noise: assert(!"noise should have been broken down to function call"); break; + case ir_quadop_vector: + assert(!"should have been lowered"); + break; } ir->remove(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index bbb210cd44..078a349abd 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -94,6 +94,8 @@ fs_visitor::assign_regs() int class_count = 0; int aligned_pair_class = -1; + calculate_live_intervals(); + /* Set up the register classes. * * The base registers store a scalar value. For texture samples, @@ -416,4 +418,6 @@ fs_visitor::spill_reg(int spill_reg) } } } + + this->live_intervals_valid = false; } diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index b0c76f4094..70c451d071 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -96,6 +96,9 @@ static void compile_gs_prog( struct brw_context *brw, brw_gs_quad_strip( &c, key ); break; case GL_LINE_LOOP: + /* Gen6: LINELOOP is converted to LINESTRIP at the beginning of the 3D pipeline */ + if (intel->gen == 6) + return; brw_gs_lines( &c ); break; case GL_LINES: @@ -166,6 +169,9 @@ static void populate_key( struct brw_context *brw, struct brw_gs_prog_key *key ) { struct gl_context *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + int prim_gs_always; + memset(key, 0, sizeof(*key)); /* CACHE_NEW_VS_PROG */ @@ -185,10 +191,14 @@ static void populate_key( struct brw_context *brw, key->pv_first = GL_TRUE; } - key->need_gs_prog = (key->hint_gs_always || - brw->primitive == GL_QUADS || + if (intel->gen == 6) + prim_gs_always = 0; + else + prim_gs_always = brw->primitive == GL_QUADS || brw->primitive == GL_QUAD_STRIP || - brw->primitive == GL_LINE_LOOP); + brw->primitive == GL_LINE_LOOP; + + key->need_gs_prog = (key->hint_gs_always || prim_gs_always); } /* Calculate interpolants for triangle and line rasterization. @@ -205,8 +215,10 @@ static void prepare_gs_prog(struct brw_context *brw) brw->gs.prog_active = key.need_gs_prog; } + drm_intel_bo_unreference(brw->gs.prog_bo); + brw->gs.prog_bo = NULL; + if (brw->gs.prog_active) { - drm_intel_bo_unreference(brw->gs.prog_bo); brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG, &key, sizeof(key), NULL, 0, diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index a91b0528fa..79afe19deb 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -214,7 +214,7 @@ static void emit_depthbuffer(struct brw_context *brw) if (region == NULL) { BEGIN_BATCH(len); - OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); + OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | (BRW_SURFACE_NULL << 29)); OUT_BATCH(0); @@ -251,7 +251,7 @@ static void emit_depthbuffer(struct brw_context *brw) assert(region->tiling != I915_TILING_NONE); BEGIN_BATCH(len); - OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); + OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH(((region->pitch * region->cpp) - 1) | (format << 18) | (BRW_TILEWALK_YMAJOR << 26) | @@ -277,7 +277,7 @@ static void emit_depthbuffer(struct brw_context *brw) /* Initialize it for safety. */ if (intel->gen >= 6) { BEGIN_BATCH(2); - OUT_BATCH(CMD_3D_CLEAR_PARAMS << 16 | (2 - 2)); + OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2)); OUT_BATCH(0); ADVANCE_BATCH(); } @@ -309,7 +309,7 @@ static void upload_polygon_stipple(struct brw_context *brw) return; memset(&bps, 0, sizeof(bps)); - bps.header.opcode = CMD_POLY_STIPPLE_PATTERN; + bps.header.opcode = _3DSTATE_POLY_STIPPLE_PATTERN; bps.header.length = sizeof(bps)/4-2; /* Polygon stipple is provided in OpenGL order, i.e. bottom @@ -354,7 +354,7 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) return; memset(&bpso, 0, sizeof(bpso)); - bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; + bpso.header.opcode = _3DSTATE_POLY_STIPPLE_OFFSET; bpso.header.length = sizeof(bpso)/4-2; /* If we're drawing to a system window (ctx->DrawBuffer->Name == 0), @@ -401,7 +401,7 @@ static void upload_aa_line_parameters(struct brw_context *brw) /* use legacy aa line coverage computation */ memset(&balp, 0, sizeof(balp)); - balp.header.opcode = CMD_AA_LINE_PARAMETERS; + balp.header.opcode = _3DSTATE_AA_LINE_PARAMETERS; balp.header.length = sizeof(balp) / 4 - 2; BRW_CACHED_BATCH_STRUCT(brw, &balp); @@ -431,7 +431,7 @@ static void upload_line_stipple(struct brw_context *brw) return; memset(&bls, 0, sizeof(bls)); - bls.header.opcode = CMD_LINE_STIPPLE_PATTERN; + bls.header.opcode = _3DSTATE_LINE_STIPPLE_PATTERN; bls.header.length = sizeof(bls)/4 - 2; bls.bits0.pattern = ctx->Line.StipplePattern; @@ -481,7 +481,7 @@ static void upload_invarient_state( struct brw_context *brw ) /* Disable depth offset clamping. */ - gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP; + gdo.header.opcode = _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP; gdo.header.length = sizeof(gdo)/4 - 2; gdo.depth_offset_clamp = 0.0; @@ -492,20 +492,20 @@ static void upload_invarient_state( struct brw_context *brw ) int i; BEGIN_BATCH(3); - OUT_BATCH(CMD_3D_MULTISAMPLE << 16 | (3 - 2)); + OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (3 - 2)); OUT_BATCH(MS_PIXEL_LOCATION_CENTER | MS_NUMSAMPLES_1); OUT_BATCH(0); /* positions for 4/8-sample */ ADVANCE_BATCH(); BEGIN_BATCH(2); - OUT_BATCH(CMD_3D_SAMPLE_MASK << 16 | (2 - 2)); + OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2)); OUT_BATCH(1); ADVANCE_BATCH(); for (i = 0; i < 4; i++) { BEGIN_BATCH(4); - OUT_BATCH(CMD_GS_SVB_INDEX << 16 | (4 - 2)); + OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2)); OUT_BATCH(i << SVB_INDEX_SHIFT); OUT_BATCH(0); OUT_BATCH(0xffffffff); diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 1367d81469..94efa79109 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -142,7 +142,6 @@ static GLboolean brwProgramStringNotify( struct gl_context *ctx, if (newFP == curFP) brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; newFP->id = brw->program_id++; - newFP->isGLSL = brw_wm_is_glsl(fprog); /* Don't reject fragment shaders for their Mesa IR state when we're * using the new FS backend. diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c index f28f28663e..656aad630a 100644 --- a/src/mesa/drivers/dri/i965/brw_queryobj.c +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c @@ -232,6 +232,12 @@ brw_prepare_query_begin(struct brw_context *brw) brw->query.bo = NULL; brw->query.bo = drm_intel_bo_alloc(intel->bufmgr, "query", 4096, 1); + + /* clear target buffer */ + drm_intel_bo_map(brw->query.bo, GL_TRUE); + memset((char *)brw->query.bo->virtual, 0, 4096); + drm_intel_bo_unmap(brw->query.bo); + brw->query.index = 0; } diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 3beed16945..4bb93e7336 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -164,7 +164,8 @@ void brw_destroy_caches( struct brw_context *brw ); /*********************************************************************** * brw_state_batch.c */ -#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s))) +#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data(brw->intel.batch, (s), \ + sizeof(*(s)), false) #define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) ) GLboolean brw_cached_batch_struct( struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index be3989eb7d..a21af13caa 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -48,7 +48,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, struct header *newheader = (struct header *)data; if (brw->emit_state_always) { - intel_batchbuffer_data(brw->intel.batch, data, sz); + intel_batchbuffer_data(brw->intel.batch, data, sz, false); return GL_TRUE; } @@ -75,7 +75,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, emit: memcpy(item->header, newheader, sz); - intel_batchbuffer_data(brw->intel.batch, data, sz); + intel_batchbuffer_data(brw->intel.batch, data, sz, false); return GL_TRUE; } diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index 58ff528d44..7045888ad4 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -58,8 +58,6 @@ #include "main/imports.h" #include "brw_state.h" -#include "intel_batchbuffer.h" -#include "brw_wm.h" #define FILE_DEBUG_FLAG DEBUG_STATE diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 338f3876b3..eba4411ca7 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -129,7 +129,7 @@ const struct brw_tracked_state *gen6_atoms[] = &brw_vs_constants, /* Before vs_surfaces and constant_buffer */ &brw_wm_constants, /* Before wm_surfaces and constant_buffer */ - &gen6_wm_constants, /* Before wm_surfaces and constant_buffer */ + &gen6_wm_constants, /* Before wm_state */ &brw_vs_surfaces, /* must do before unit */ &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */ diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 8ce9af9c4f..6687a89e80 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -1017,7 +1017,14 @@ struct brw_wm_unit_state GLuint enable_32_pix:1; GLuint enable_con_32_pix:1; GLuint enable_con_64_pix:1; - GLuint pad0:5; + GLuint pad0:1; + + /* These next four bits are for Ironlake+ */ + GLuint fast_span_coverage_enable:1; + GLuint depth_buffer_clear:1; + GLuint depth_buffer_resolve_enable:1; + GLuint hierarchical_depth_buffer_resolve_enable:1; + GLuint legacy_global_depth_bias:1; GLuint line_stipple:1; GLuint depth_offset:1; @@ -1064,6 +1071,15 @@ struct brw_sampler_default_color { GLfloat color[4]; }; +struct gen5_sampler_default_color { + uint8_t ub[4]; + float f[4]; + uint16_t hf[4]; + uint16_t us[4]; + int16_t s[4]; + uint8_t b[4]; +}; + struct brw_sampler_state { @@ -1169,7 +1185,12 @@ struct brw_surface_state GLuint cube_neg_y:1; GLuint cube_pos_x:1; GLuint cube_neg_x:1; - GLuint pad:4; + GLuint pad:2; + /* Required on gen6 for surfaces accessed through render cache messages. + */ + GLuint render_cache_read_write:1; + /* Ironlake and newer: instead of replicating one of the texels */ + GLuint cube_corner_average:1; GLuint mipmap_layout_mode:1; GLuint vert_line_stride_ofs:1; GLuint vert_line_stride:1; @@ -1539,6 +1560,21 @@ struct brw_instruction GLuint pad0:12; } if_else; + struct + { + /* Signed jump distance to the ip to jump to if all channels + * are disabled after the break or continue. It should point + * to the end of the innermost control flow block, as that's + * where some channel could get re-enabled. + */ + int jip:16; + + /* Signed jump distance to the location to resume execution + * of this channel if it's enabled for the break or continue. + */ + int uip:16; + } break_cont; + struct { GLuint function:4; GLuint int_type:1; @@ -1636,6 +1672,18 @@ struct brw_instruction struct { GLuint binding_table_index:8; + GLuint msg_control:3; + GLuint msg_type:3; + GLuint target_cache:2; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; + } dp_read_g4x; + + struct { + GLuint binding_table_index:8; GLuint msg_control:3; GLuint msg_type:3; GLuint target_cache:2; diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 4a41c7a517..6ae75d22c1 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -99,8 +99,8 @@ static void do_vs_prog( struct brw_context *brw, (void) ctx; aux_size = sizeof(c.prog_data); - if (c.vp->use_const_buffer) - aux_size += c.vp->program.Base.Parameters->NumParameters; + /* constant_map */ + aux_size += c.vp->program.Base.Parameters->NumParameters; drm_intel_bo_unreference(brw->vs.prog_bo); brw->vs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_VS_PROG, @@ -130,6 +130,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL); + key.two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); /* _NEW_POINT */ if (ctx->Point.PointSprite) { @@ -157,7 +158,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) */ const struct brw_tracked_state brw_vs_prog = { .dirty = { - .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT, + .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT | _NEW_LIGHT, .brw = BRW_NEW_VERTEX_PROGRAM, .cache = 0 }, diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 9338a6b7db..0b88cc1ec7 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -44,6 +44,7 @@ struct brw_vs_prog_key { GLuint nr_userclip:4; GLuint copy_edgeflag:1; GLuint point_coord_replace:8; + GLuint two_side_color: 1; }; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index a13c3cae0b..0411ce0b36 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -140,9 +140,13 @@ clear_current_const(struct brw_vs_compile *c) static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { struct intel_context *intel = &c->func.brw->intel; - GLuint i, reg = 0, mrf; + GLuint i, reg = 0, mrf, j; int attributes_in_vue; int first_reladdr_output; + int max_constant; + int constant = 0; + int vert_result_reoder[VERT_RESULT_MAX]; + int bfc = 0; /* Determine whether to use a real constant buffer or use a block * of GRF registers for constants. The later is faster but only @@ -181,62 +185,81 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) } - /* Vertex program parameters from curbe: + /* Assign some (probably all) of the vertex program constants to + * the push constant buffer/CURBE. + * + * There's an obvious limit to the numer of push constants equal to + * the number of register available, and that number is smaller + * than the minimum maximum number of vertex program parameters, so + * support for pull constants is required if we overflow. + * Additionally, on gen6 the number of push constants is even + * lower. + * + * When there's relative addressing, we don't know what range of + * Mesa IR registers can be accessed. And generally, when relative + * addressing is used we also have too many constants to load them + * all as push constants. So, we'll just support relative + * addressing out of the pull constant buffers, and try to load as + * many statically-accessed constants into the push constant buffer + * as we can. */ - if (c->vp->use_const_buffer) { - int max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries; - int constant = 0; - - /* We've got more constants than we can load with the push - * mechanism. This is often correlated with reladdr loads where - * we should probably be using a pull mechanism anyway to avoid - * excessive reading. However, the pull mechanism is slow in - * general. So, we try to allocate as many non-reladdr-loaded - * constants through the push buffer as we can before giving up. - */ - memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters); - for (i = 0; - i < c->vp->program.Base.NumInstructions && constant < max_constant; - i++) { - struct prog_instruction *inst = &c->vp->program.Base.Instructions[i]; - int arg; - - for (arg = 0; arg < 3 && constant < max_constant; arg++) { - if ((inst->SrcReg[arg].File != PROGRAM_STATE_VAR && - inst->SrcReg[arg].File != PROGRAM_CONSTANT && - inst->SrcReg[arg].File != PROGRAM_UNIFORM && - inst->SrcReg[arg].File != PROGRAM_ENV_PARAM && - inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) || - inst->SrcReg[arg].RelAddr) - continue; - - if (c->constant_map[inst->SrcReg[arg].Index] == -1) { - c->constant_map[inst->SrcReg[arg].Index] = constant++; - } + if (intel->gen >= 6) { + /* We can only load 32 regs of push constants. */ + max_constant = 32 * 2 - c->key.nr_userclip; + } else { + max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries; + } + + /* constant_map maps from ParameterValues[] index to index in the + * push constant buffer, or -1 if it's only in the pull constant + * buffer. + */ + memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters); + for (i = 0; + i < c->vp->program.Base.NumInstructions && constant < max_constant; + i++) { + struct prog_instruction *inst = &c->vp->program.Base.Instructions[i]; + int arg; + + for (arg = 0; arg < 3 && constant < max_constant; arg++) { + if (inst->SrcReg[arg].File != PROGRAM_STATE_VAR && + inst->SrcReg[arg].File != PROGRAM_CONSTANT && + inst->SrcReg[arg].File != PROGRAM_UNIFORM && + inst->SrcReg[arg].File != PROGRAM_ENV_PARAM && + inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) { + continue; + } + + if (inst->SrcReg[arg].RelAddr) { + c->vp->use_const_buffer = GL_TRUE; + continue; } - } - for (i = 0; i < constant; i++) { - c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, - (i%2) * 4), - 0, 4, 1); + if (c->constant_map[inst->SrcReg[arg].Index] == -1) { + c->constant_map[inst->SrcReg[arg].Index] = constant++; + } } - reg += (constant + 1) / 2; - c->prog_data.curb_read_length = reg - 1; - /* XXX 0 causes a bug elsewhere... */ - c->prog_data.nr_params = MAX2(constant * 4, 4); } - else { - /* use a section of the GRF for constants */ - GLuint nr_params = c->vp->program.Base.Parameters->NumParameters; - for (i = 0; i < nr_params; i++) { - c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); - } - reg += (nr_params + 1) / 2; - c->prog_data.curb_read_length = reg - 1; - c->prog_data.nr_params = nr_params * 4; + /* If we ran out of push constant space, then we'll also upload all + * constants through the pull constant buffer so that they can be + * accessed no matter what. For relative addressing (the common + * case) we need them all in place anyway. + */ + if (constant == max_constant) + c->vp->use_const_buffer = GL_TRUE; + + for (i = 0; i < constant; i++) { + c->regs[PROGRAM_STATE_VAR][i] = stride(brw_vec4_grf(reg + i / 2, + (i % 2) * 4), + 0, 4, 1); } + reg += (constant + 1) / 2; + c->prog_data.curb_read_length = reg - 1; + c->prog_data.nr_params = constant * 4; + /* XXX 0 causes a bug elsewhere... */ + if (intel->gen < 6 && c->prog_data.nr_params == 0) + c->prog_data.nr_params = 4; /* Allocate input regs: */ @@ -270,7 +293,36 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) mrf = 4; first_reladdr_output = get_first_reladdr_output(&c->vp->program); - for (i = 0; i < VERT_RESULT_MAX; i++) { + + for (i = 0; i < VERT_RESULT_MAX; i++) + vert_result_reoder[i] = i; + + /* adjust attribute order in VUE for BFC0/BFC1 on Gen6+ */ + if (intel->gen >= 6 && c->key.two_side_color) { + if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL1)) && + (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC1))) { + assert(c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)); + assert(c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0)); + bfc = 2; + } else if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)) && + (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0))) + bfc = 1; + + if (bfc) { + for (i = 0; i < bfc; i++) { + vert_result_reoder[VERT_RESULT_COL0 + i * 2 + 0] = VERT_RESULT_COL0 + i; + vert_result_reoder[VERT_RESULT_COL0 + i * 2 + 1] = VERT_RESULT_BFC0 + i; + } + + for (i = VERT_RESULT_COL0 + bfc * 2; i < VERT_RESULT_BFC0 + bfc; i++) { + vert_result_reoder[i] = i - bfc; + } + } + } + + for (j = 0; j < VERT_RESULT_MAX; j++) { + i = vert_result_reoder[j]; + if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) { c->nr_outputs++; assert(i < Elements(c->regs[PROGRAM_OUTPUT])); @@ -281,7 +333,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) else if (i == VERT_RESULT_PSIZ) { c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); reg++; - mrf++; /* just a placeholder? XXX fix later stages & remove this */ } else { /* Two restrictions on our compute-to-MRF here. The @@ -607,6 +658,22 @@ static void emit_min( struct brw_compile *p, } } +static void emit_arl(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src) +{ + struct intel_context *intel = &p->brw->intel; + + if (intel->gen >= 6) { + struct brw_reg dst_f = retype(dst, BRW_REGISTER_TYPE_F); + + brw_RNDD(p, dst_f, src); + brw_MOV(p, dst, dst_f); + } else { + brw_RNDD(p, dst, src); + } +} + static void emit_math1_gen4(struct brw_vs_compile *c, GLuint function, struct brw_reg dst, @@ -698,7 +765,7 @@ emit_math1(struct brw_vs_compile *c, emit_math1_gen4(c, function, dst, arg0, precision); } -static void emit_math2( struct brw_vs_compile *c, +static void emit_math2_gen4( struct brw_vs_compile *c, GLuint function, struct brw_reg dst, struct brw_reg arg0, @@ -706,14 +773,11 @@ static void emit_math2( struct brw_vs_compile *c, GLuint precision) { struct brw_compile *p = &c->func; - struct intel_context *intel = &p->brw->intel; struct brw_reg tmp = dst; GLboolean need_tmp = GL_FALSE; - if (dst.file != BRW_GENERAL_REGISTER_FILE) - need_tmp = GL_TRUE; - - if (intel->gen < 6 && dst.dw1.bits.writemask != 0xf) + if (dst.file != BRW_GENERAL_REGISTER_FILE || + dst.dw1.bits.writemask != 0xf) need_tmp = GL_TRUE; if (need_tmp) @@ -736,6 +800,53 @@ static void emit_math2( struct brw_vs_compile *c, } } +static void emit_math2_gen6( struct brw_vs_compile *c, + GLuint function, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + GLuint precision) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp_src0, tmp_src1, tmp_dst; + + tmp_src0 = get_tmp(c); + tmp_src1 = get_tmp(c); + tmp_dst = get_tmp(c); + + brw_MOV(p, tmp_src0, arg0); + brw_MOV(p, tmp_src1, arg1); + + brw_set_access_mode(p, BRW_ALIGN_1); + brw_math2(p, + tmp_dst, + function, + tmp_src0, + tmp_src1); + brw_set_access_mode(p, BRW_ALIGN_16); + + brw_MOV(p, dst, tmp_dst); + + release_tmp(c, tmp_src0); + release_tmp(c, tmp_src1); + release_tmp(c, tmp_dst); +} + +static void emit_math2( struct brw_vs_compile *c, + GLuint function, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + GLuint precision) +{ + struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; + + if (intel->gen >= 6) + emit_math2_gen6(c, function, dst, arg0, arg1, precision); + else + emit_math2_gen4(c, function, dst, arg0, arg1, precision); +} static void emit_exp_noalias( struct brw_vs_compile *c, struct brw_reg dst, @@ -1008,8 +1119,6 @@ get_constant(struct brw_vs_compile *c, assert(argIndex < 3); - assert(c->func.brw->intel.gen < 6); /* FINISHME */ - if (c->current_const[argIndex].index != src->Index) { /* Keep track of the last constant loaded in this slot, for reuse. */ c->current_const[argIndex].index = src->Index; @@ -1027,7 +1136,7 @@ get_constant(struct brw_vs_compile *c, } /* replicate lower four floats into upper half (to get XYZWXYZW) */ - const_reg = stride(const_reg, 0, 4, 0); + const_reg = stride(const_reg, 0, 4, 1); const_reg.subnr = 0; return const_reg; @@ -1040,14 +1149,14 @@ get_reladdr_constant(struct brw_vs_compile *c, { const struct prog_src_register *src = &inst->SrcReg[argIndex]; struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; struct brw_reg const_reg = c->current_const[argIndex].reg; - struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; - struct brw_reg byte_addr_reg = retype(get_tmp(c), BRW_REGISTER_TYPE_D); + struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0]; + uint32_t offset; assert(argIndex < 3); - assert(c->func.brw->intel.gen < 6); /* FINISHME */ - /* Can't reuse a reladdr constant load. */ c->current_const[argIndex].index = -1; @@ -1056,15 +1165,21 @@ get_reladdr_constant(struct brw_vs_compile *c, src->Index, argIndex, c->current_const[argIndex].reg.nr); #endif - brw_MUL(p, byte_addr_reg, addrReg, brw_imm_ud(16)); + if (intel->gen >= 6) { + offset = src->Index; + } else { + struct brw_reg byte_addr_reg = retype(get_tmp(c), BRW_REGISTER_TYPE_D); + brw_MUL(p, byte_addr_reg, addr_reg, brw_imm_d(16)); + addr_reg = byte_addr_reg; + offset = 16 * src->Index; + } /* fetch the first vec4 */ brw_dp_READ_4_vs_relative(p, - const_reg, /* writeback dest */ - byte_addr_reg, /* address register */ - 16 * src->Index, /* byte offset */ - SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ - ); + const_reg, + addr_reg, + offset, + SURF_INDEX_VERT_CONST_BUFFER); return const_reg; } @@ -1259,22 +1374,18 @@ get_src_reg( struct brw_vs_compile *c, case PROGRAM_UNIFORM: case PROGRAM_ENV_PARAM: case PROGRAM_LOCAL_PARAM: - if (c->vp->use_const_buffer) { - if (!relAddr && c->constant_map[index] != -1) { - assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0); - return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]]; - } else if (relAddr) + if (!relAddr && c->constant_map[index] != -1) { + /* Take from the push constant buffer if possible. */ + assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0); + return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]]; + } else { + /* Must be in the pull constant buffer then .*/ + assert(c->vp->use_const_buffer); + if (relAddr) return get_reladdr_constant(c, inst, argIndex); else return get_constant(c, inst, argIndex); } - else if (relAddr) { - return deref(c, c->regs[PROGRAM_STATE_VAR][0], index, 16); - } - else { - assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0); - return c->regs[PROGRAM_STATE_VAR][index]; - } case PROGRAM_ADDRESS: assert(index == 0); return c->regs[file][index]; @@ -1315,11 +1426,10 @@ static struct brw_reg get_arg( struct brw_vs_compile *c, GET_SWZ(src->Swizzle, 1), GET_SWZ(src->Swizzle, 2), GET_SWZ(src->Swizzle, 3)); - } - /* Note this is ok for non-swizzle instructions: - */ - reg.negate = src->Negate ? 1 : 0; + /* Note this is ok for non-swizzle ARB_vp instructions */ + reg.negate = src->Negate ? 1 : 0; + } return reg; } @@ -1603,6 +1713,8 @@ static void emit_vertex_write( struct brw_vs_compile *c) break; if (!(c->prog_data.outputs_written & BITFIELD64_BIT(i))) continue; + if (i == VERT_RESULT_PSIZ) + continue; if (i >= VERT_RESULT_TEX0 && c->regs[PROGRAM_OUTPUT][i].file == BRW_GENERAL_REGISTER_FILE) { @@ -1830,6 +1942,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) switch (inst->Opcode) { case OPCODE_ABS: + args[0].negate = false; brw_MOV(p, dst, brw_abs(args[0])); break; case OPCODE_ADD: @@ -1866,7 +1979,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL); break; case OPCODE_ARL: - brw_RNDD(p, dst, args[0]); + emit_arl(p, dst, args[0]); break; case OPCODE_FLR: brw_RNDD(p, dst, args[0]); @@ -1913,7 +2026,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL); break; case OPCODE_RSQ: - emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL); + emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, brw_abs(args[0]), BRW_MATH_PRECISION_FULL); break; case OPCODE_SEQ: @@ -1987,35 +2100,42 @@ void brw_vs_emit(struct brw_vs_compile *c ) break; case OPCODE_CONT: brw_set_predicate_control(p, get_predicate(inst)); - brw_CONT(p, if_depth_in_loop[loop_depth]); + if (intel->gen >= 6) { + brw_CONT_gen6(p, loop_inst[loop_depth - 1]); + } else { + brw_CONT(p, if_depth_in_loop[loop_depth]); + } brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; - case OPCODE_ENDLOOP: - { - clear_current_const(c); - struct brw_instruction *inst0, *inst1; - GLuint br = 1; - - loop_depth--; - - if (intel->gen == 5) - br = 2; - - inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); - /* patch all the BREAK/CONT instructions from last BEGINLOOP */ - while (inst0 > loop_inst[loop_depth]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK && + + case OPCODE_ENDLOOP: { + clear_current_const(c); + struct brw_instruction *inst0, *inst1; + GLuint br = 1; + + loop_depth--; + + if (intel->gen == 5) + br = 2; + + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); + + if (intel->gen < 6) { + /* patch all the BREAK/CONT instructions from last BEGINLOOP */ + while (inst0 > loop_inst[loop_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK && inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); - } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0); - } - } - } + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + } + } + } + } break; + case OPCODE_BRA: brw_set_predicate_control(p, get_predicate(inst)); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); @@ -2106,6 +2226,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) } brw_resolve_cals(p); + brw_set_uip_jip(p); brw_optimize(p); diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index eabac51160..b0b05445eb 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -82,6 +82,15 @@ prepare_vs_constants(struct brw_context *brw) params->ParameterValues[i], 4 * sizeof(float)); } + + if (0) { + for (i = 0; i < params->NumParameters; i++) { + float *row = (float *)brw->vs.const_bo->virtual + i * 4; + printf("vs const surface %3d: %4.3f %4.3f %4.3f %4.3f\n", + i, row[0], row[1], row[2], row[3]); + } + } + drm_intel_gem_bo_unmap_gtt(brw->vs.const_bo); brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF; } diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 3d7a98c981..100a21b59d 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -203,4 +203,5 @@ void brwInitVtbl( struct brw_context *brw ) brw->intel.vtbl.destroy = brw_destroy_context; brw->intel.vtbl.set_draw_region = brw_set_draw_region; brw->intel.vtbl.debug_batch = brw_debug_batch; + brw->intel.vtbl.render_target_supported = brw_render_target_supported; } diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index ccdc18e0b8..656501b4f7 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -119,6 +119,62 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) brw_wm_emit(c); } +static void +brw_wm_payload_setup(struct brw_context *brw, + struct brw_wm_compile *c) +{ + struct intel_context *intel = &brw->intel; + bool uses_depth = (c->fp->program.Base.InputsRead & + (1 << FRAG_ATTRIB_WPOS)) != 0; + + if (intel->gen >= 6) { + /* R0-1: masks, pixel X/Y coordinates. */ + c->nr_payload_regs = 2; + /* R2: only for 32-pixel dispatch.*/ + /* R3-4: perspective pixel location barycentric */ + c->nr_payload_regs += 2; + /* R5-6: perspective pixel location bary for dispatch width != 8 */ + if (c->dispatch_width == 16) { + c->nr_payload_regs += 2; + } + /* R7-10: perspective centroid barycentric */ + /* R11-14: perspective sample barycentric */ + /* R15-18: linear pixel location barycentric */ + /* R19-22: linear centroid barycentric */ + /* R23-26: linear sample barycentric */ + + /* R27: interpolated depth if uses source depth */ + if (uses_depth) { + c->source_depth_reg = c->nr_payload_regs; + c->nr_payload_regs++; + if (c->dispatch_width == 16) { + /* R28: interpolated depth if not 8-wide. */ + c->nr_payload_regs++; + } + } + /* R29: interpolated W set if GEN6_WM_USES_SOURCE_W. + */ + if (uses_depth) { + c->source_w_reg = c->nr_payload_regs; + c->nr_payload_regs++; + if (c->dispatch_width == 16) { + /* R30: interpolated W if not 8-wide. */ + c->nr_payload_regs++; + } + } + /* R31: MSAA position offsets. */ + /* R32-: bary for 32-pixel. */ + /* R58-59: interp W for 32-pixel. */ + + if (c->fp->program.Base.OutputsWritten & + BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { + c->source_depth_to_render_target = GL_TRUE; + c->computes_depth = GL_TRUE; + } + } else { + brw_wm_lookup_iz(intel, c); + } +} /** * All Mesa program -> GPU code generation goes through this function. @@ -167,23 +223,18 @@ static void do_wm_prog( struct brw_context *brw, brw_init_compile(brw, &c->func); - /* temporary sanity check assertion */ - ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program)); + brw_wm_payload_setup(brw, c); if (!brw_wm_fs_emit(brw, c)) { /* * Shader which use GLSL features such as flow control are handled * differently from "simple" shaders. */ - if (fp->isGLSL) { - c->dispatch_width = 8; - brw_wm_glsl_emit(brw, c); - } - else { - c->dispatch_width = 16; - brw_wm_non_glsl_emit(brw, c); - } + c->dispatch_width = 16; + brw_wm_payload_setup(brw, c); + brw_wm_non_glsl_emit(brw, c); } + c->prog_data.dispatch_width = c->dispatch_width; /* Scratch space is used for register spilling */ if (c->last_scratch) { @@ -220,12 +271,10 @@ static void do_wm_prog( struct brw_context *brw, static void brw_wm_populate_key( struct brw_context *brw, struct brw_wm_prog_key *key ) { - struct intel_context *intel = &brw->intel; struct gl_context *ctx = &brw->intel.ctx; /* BRW_NEW_FRAGMENT_PROGRAM */ const struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; - GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; GLuint lookup = 0; GLuint line_aa; GLuint i; @@ -285,57 +334,9 @@ static void brw_wm_populate_key( struct brw_context *brw, } } - if (intel->gen >= 6) { - /* R0-1: masks, pixel X/Y coordinates. */ - key->nr_payload_regs = 2; - /* R2: only for 32-pixel dispatch.*/ - /* R3-4: perspective pixel location barycentric */ - key->nr_payload_regs += 2; - /* R5-6: perspective pixel location bary for dispatch width != 8 */ - if (!fp->isGLSL) { /* dispatch_width != 8 */ - key->nr_payload_regs += 2; - } - /* R7-10: perspective centroid barycentric */ - /* R11-14: perspective sample barycentric */ - /* R15-18: linear pixel location barycentric */ - /* R19-22: linear centroid barycentric */ - /* R23-26: linear sample barycentric */ - - /* R27: interpolated depth if uses source depth */ - if (uses_depth) { - key->source_depth_reg = key->nr_payload_regs; - key->nr_payload_regs++; - if (!fp->isGLSL) { /* dispatch_width != 8 */ - /* R28: interpolated depth if not 8-wide. */ - key->nr_payload_regs++; - } - } - /* R29: interpolated W set if GEN6_WM_USES_SOURCE_W. - */ - if (uses_depth) { - key->source_w_reg = key->nr_payload_regs; - key->nr_payload_regs++; - if (!fp->isGLSL) { /* dispatch_width != 8 */ - /* R30: interpolated W if not 8-wide. */ - key->nr_payload_regs++; - } - } - /* R31: MSAA position offsets. */ - /* R32-: bary for 32-pixel. */ - /* R58-59: interp W for 32-pixel. */ - - if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { - key->source_depth_to_render_target = GL_TRUE; - key->computes_depth = GL_TRUE; - } - - } else { - brw_wm_lookup_iz(intel, - line_aa, - lookup, - uses_depth, - key); - } + key->iz_lookup = lookup; + key->line_aa = line_aa; + key->stats_wm = brw->intel.stats_wm; /* BRW_NEW_WM_INPUT_DIMENSIONS */ key->proj_attrib_mask = brw->wm.input_size_masks[4-1]; @@ -377,6 +378,10 @@ static void brw_wm_populate_key( struct brw_context *brw, swizzles[2] = SWIZZLE_ZERO; } else if (t->DepthMode == GL_LUMINANCE) { swizzles[3] = SWIZZLE_ONE; + } else if (t->DepthMode == GL_RED) { + swizzles[1] = SWIZZLE_ZERO; + swizzles[2] = SWIZZLE_ZERO; + swizzles[3] = SWIZZLE_ZERO; } } diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 2ca685784f..d9cae75ab5 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -59,16 +59,9 @@ #define AA_ALWAYS 2 struct brw_wm_prog_key { - GLuint source_depth_reg:3; - GLuint source_w_reg:3; - GLuint aa_dest_stencil_reg:3; - GLuint dest_depth_reg:3; - GLuint nr_payload_regs:4; - GLuint computes_depth:1; /* could be derived from program string */ - GLuint source_depth_to_render_target:1; + GLuint stats_wm:1; GLuint flat_shade:1; GLuint linear_color:1; /**< linear interpolation vs perspective interp */ - GLuint runtime_check_aads_emit:1; GLuint nr_color_regions:5; GLuint render_to_fbo:1; @@ -81,6 +74,8 @@ struct brw_wm_prog_key { GLushort drawable_height; GLbitfield64 vp_outputs_written; + GLuint iz_lookup; + GLuint line_aa; GLuint program_string_id:32; }; @@ -204,6 +199,15 @@ struct brw_wm_compile { PASS2_DONE } state; + GLuint source_depth_reg:3; + GLuint source_w_reg:3; + GLuint aa_dest_stencil_reg:3; + GLuint dest_depth_reg:3; + GLuint nr_payload_regs:4; + GLuint computes_depth:1; /* could be derived from program string */ + GLuint source_depth_to_render_target:1; + GLuint runtime_check_aads_emit:1; + /* Initial pass - translate fp instructions to fp instructions, * simplifying and adding instructions for interpolation and * framebuffer writes. @@ -306,14 +310,9 @@ void brw_wm_print_insn( struct brw_wm_compile *c, void brw_wm_print_program( struct brw_wm_compile *c, const char *stage ); -void brw_wm_lookup_iz( struct intel_context *intel, - GLuint line_aa, - GLuint lookup, - GLboolean ps_uses_depth, - struct brw_wm_prog_key *key ); +void brw_wm_lookup_iz(struct intel_context *intel, + struct brw_wm_compile *c); -GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); -void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c); GLboolean brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c); /* brw_wm_emit.c */ @@ -381,7 +380,6 @@ void emit_fb_write(struct brw_wm_compile *c, void emit_frontfacing(struct brw_compile *p, const struct brw_reg *dst, GLuint mask); -void emit_kil_nv(struct brw_wm_compile *c); void emit_linterp(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, @@ -476,5 +474,6 @@ struct gl_shader *brw_new_shader(struct gl_context *ctx, GLuint name, GLuint typ struct gl_shader_program *brw_new_shader_program(struct gl_context *ctx, GLuint name); bool brw_color_buffer_write_enabled(struct brw_context *brw); +bool brw_render_target_supported(gl_format format); #endif diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 96fecc97ee..2336e27c1e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -219,43 +219,45 @@ void emit_wpos_xy(struct brw_wm_compile *c, const struct brw_reg *arg0) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; + struct brw_reg delta_x = retype(arg0[0], BRW_REGISTER_TYPE_W); + struct brw_reg delta_y = retype(arg0[1], BRW_REGISTER_TYPE_W); if (mask & WRITEMASK_X) { + if (intel->gen >= 6) { + struct brw_reg delta_x_f = retype(delta_x, BRW_REGISTER_TYPE_F); + brw_MOV(p, delta_x_f, delta_x); + delta_x = delta_x_f; + } + if (c->fp->program.PixelCenterInteger) { /* X' = X */ - brw_MOV(p, - dst[0], - retype(arg0[0], BRW_REGISTER_TYPE_W)); + brw_MOV(p, dst[0], delta_x); } else { /* X' = X + 0.5 */ - brw_ADD(p, - dst[0], - retype(arg0[0], BRW_REGISTER_TYPE_W), - brw_imm_f(0.5)); + brw_ADD(p, dst[0], delta_x, brw_imm_f(0.5)); } } if (mask & WRITEMASK_Y) { + if (intel->gen >= 6) { + struct brw_reg delta_y_f = retype(delta_y, BRW_REGISTER_TYPE_F); + brw_MOV(p, delta_y_f, delta_y); + delta_y = delta_y_f; + } + if (c->fp->program.OriginUpperLeft) { if (c->fp->program.PixelCenterInteger) { /* Y' = Y */ - brw_MOV(p, - dst[1], - retype(arg0[1], BRW_REGISTER_TYPE_W)); + brw_MOV(p, dst[1], delta_y); } else { - /* Y' = Y + 0.5 */ - brw_ADD(p, - dst[1], - retype(arg0[1], BRW_REGISTER_TYPE_W), - brw_imm_f(0.5)); + brw_ADD(p, dst[1], delta_y, brw_imm_f(0.5)); } } else { float center_offset = c->fp->program.PixelCenterInteger ? 0.0 : 0.5; /* Y' = (height - 1) - Y + center */ - brw_ADD(p, - dst[1], - negate(retype(arg0[1], BRW_REGISTER_TYPE_W)), + brw_ADD(p, dst[1], negate(delta_y), brw_imm_f(c->key.drawable_height - 1 + center_offset)); } } @@ -896,10 +898,14 @@ void emit_math1(struct brw_wm_compile *c, BRW_MATH_SATURATE_NONE); struct brw_reg src; - if (intel->gen >= 6 && (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 || - arg0[0].file != BRW_GENERAL_REGISTER_FILE)) { + if (intel->gen >= 6 && ((arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 || + arg0[0].file != BRW_GENERAL_REGISTER_FILE) || + arg0[0].negate || arg0[0].abs)) { /* Gen6 math requires that source and dst horizontal stride be 1, * and that the argument be in the GRF. + * + * The hardware ignores source modifiers (negate and abs) on math + * instructions, so we also move to a temp to set those up. */ src = dst[dst_chan]; brw_MOV(p, src, arg0[0]); @@ -967,34 +973,23 @@ void emit_math2(struct brw_wm_compile *c, struct brw_reg temp_dst = dst[dst_chan]; if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) { - if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { - /* Both scalar arguments. Do scalar calc. */ - src0.hstride = BRW_HORIZONTAL_STRIDE_1; - src1.hstride = BRW_HORIZONTAL_STRIDE_1; - temp_dst.hstride = BRW_HORIZONTAL_STRIDE_1; - temp_dst.width = BRW_WIDTH_1; - - if (arg0[0].subnr != 0) { - brw_MOV(p, temp_dst, src0); - src0 = temp_dst; - - /* Ouch. We've used the temp as a dst, and we still - * need a temp to store arg1 in, because src and dst - * offsets have to be equal. Leaving this up to - * glsl2-965 to handle correctly. - */ - assert(arg1[0].subnr == 0); - } else if (arg1[0].subnr != 0) { - brw_MOV(p, temp_dst, src1); - src1 = temp_dst; - } - } else { - brw_MOV(p, temp_dst, src0); - src0 = temp_dst; - } - } else if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { - brw_MOV(p, temp_dst, src1); - src1 = temp_dst; + brw_MOV(p, temp_dst, src0); + src0 = temp_dst; + } + + if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { + /* This is a heinous hack to get a temporary register for use + * in case both arg0 and arg1 are constants. Why you're + * doing exponentiation on constant values in the shader, we + * don't know. + * + * max_wm_grf is almost surely less than the maximum GRF, and + * gen6 doesn't care about the number of GRFs used in a + * shader like pre-gen6 did. + */ + struct brw_reg temp = brw_vec8_grf(c->max_wm_grf, 0); + brw_MOV(p, temp, src1); + src1 = temp; } brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); @@ -1012,14 +1007,6 @@ void emit_math2(struct brw_wm_compile *c, sechalf(src0), sechalf(src1)); } - - /* Splat a scalar result into all the channels. */ - if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 && - arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { - temp_dst.hstride = BRW_HORIZONTAL_STRIDE_0; - temp_dst.vstride = BRW_VERTICAL_STRIDE_0; - brw_MOV(p, dst[dst_chan], temp_dst); - } } else { GLuint saturate = ((mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : @@ -1301,9 +1288,15 @@ static void emit_kil( struct brw_wm_compile *c, struct brw_reg *arg0) { struct brw_compile *p = &c->func; - struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + struct intel_context *intel = &p->brw->intel; + struct brw_reg pixelmask; GLuint i, j; + if (intel->gen >= 6) + pixelmask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); + else + pixelmask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + for (i = 0; i < 4; i++) { /* Check if we've already done the comparison for this reg * -- common when someone does KIL TEMP.wwww. @@ -1319,26 +1312,11 @@ static void emit_kil( struct brw_wm_compile *c, brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0)); brw_set_predicate_control_flag_value(p, 0xff); brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_AND(p, r0uw, brw_flag_reg(), r0uw); + brw_AND(p, pixelmask, brw_flag_reg(), pixelmask); brw_pop_insn_state(p); } } -/* KIL_NV kills the pixels that are currently executing, not based on a test - * of the arguments. - */ -void emit_kil_nv( struct brw_wm_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); - - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */ - brw_AND(p, r0uw, c->emit_mask_reg, r0uw); - brw_pop_insn_state(p); -} - static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, GLuint nr, @@ -1355,9 +1333,11 @@ static void fire_fb_write( struct brw_wm_compile *c, dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); /* Pass through control information: + * + * Gen6 has done m1 mov in emit_fb_write() for current SIMD16 case. */ /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ - if (intel->gen < 6) /* gen6, use headerless for fb write */ + if (intel->gen < 6) { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ @@ -1378,7 +1358,8 @@ static void fire_fb_write( struct brw_wm_compile *c, target, nr, 0, - eot); + eot, + GL_TRUE); } @@ -1387,8 +1368,8 @@ static void emit_aa( struct brw_wm_compile *c, GLuint reg ) { struct brw_compile *p = &c->func; - GLuint comp = c->key.aa_dest_stencil_reg / 2; - GLuint off = c->key.aa_dest_stencil_reg % 2; + GLuint comp = c->aa_dest_stencil_reg / 2; + GLuint off = c->aa_dest_stencil_reg % 2; struct brw_reg aa = offset(arg1[comp], off); brw_push_insn_state(p); @@ -1416,11 +1397,10 @@ void emit_fb_write(struct brw_wm_compile *c, struct intel_context *intel = &brw->intel; GLuint nr = 2; GLuint channel; - int base_reg; /* For gen6 fb write with no header, starting from color payload directly!. */ /* Reserve a space for AA - may not be needed: */ - if (c->key.aa_dest_stencil_reg) + if (c->aa_dest_stencil_reg) nr += 1; /* I don't really understand how this achieves the color interleave @@ -1428,11 +1408,6 @@ void emit_fb_write(struct brw_wm_compile *c, */ brw_push_insn_state(p); - if (intel->gen >= 6) - base_reg = nr; - else - base_reg = 0; - for (channel = 0; channel < 4; channel++) { if (intel->gen >= 6) { /* gen6 SIMD16 single source DP write looks like: @@ -1493,9 +1468,9 @@ void emit_fb_write(struct brw_wm_compile *c, brw_pop_insn_state(p); - if (c->key.source_depth_to_render_target) + if (c->source_depth_to_render_target) { - if (c->key.computes_depth) + if (c->computes_depth) brw_MOV(p, brw_message_reg(nr), arg2[2]); else brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */ @@ -1503,10 +1478,10 @@ void emit_fb_write(struct brw_wm_compile *c, nr += 2; } - if (c->key.dest_depth_reg) + if (c->dest_depth_reg) { - GLuint comp = c->key.dest_depth_reg / 2; - GLuint off = c->key.dest_depth_reg % 2; + GLuint comp = c->dest_depth_reg / 2; + GLuint off = c->dest_depth_reg % 2; if (off != 0) { brw_push_insn_state(p); @@ -1524,15 +1499,28 @@ void emit_fb_write(struct brw_wm_compile *c, } if (intel->gen >= 6) { - /* Subtract off the message header, since we send headerless. */ - nr -= 2; + /* Load the message header. There's no implied move from src0 + * to the base mrf on gen6. + */ + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, retype(brw_message_reg(0), BRW_REGISTER_TYPE_UD), + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); + brw_pop_insn_state(p); + + if (target != 0) { + brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, + 0, + 2), BRW_REGISTER_TYPE_UD), + brw_imm_ud(target)); + } } - if (!c->key.runtime_check_aads_emit) { - if (c->key.aa_dest_stencil_reg) + if (!c->runtime_check_aads_emit) { + if (c->aa_dest_stencil_reg) emit_aa(c, arg1, 2); - fire_fb_write(c, base_reg, nr, target, eot); + fire_fb_write(c, 0, nr, target, eot); } else { struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); @@ -1897,10 +1885,6 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_kil(c, args[0]); break; - case OPCODE_KIL_NV: - emit_kil_nv(c); - break; - default: printf("Unsupported opcode %i (%s) in fragment shader\n", inst->opcode, inst->opcode < MAX_OPCODE ? diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 2cae698880..4759b289a0 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -338,11 +338,13 @@ static struct prog_src_register get_delta_xy( struct brw_wm_compile *c ) static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) { - /* This is only called for producing 1/w in pre-gen6 interp. for - * gen6, the interp opcodes don't use this argument. + /* This is called for producing 1/w in pre-gen6 interp. for gen6, + * the interp opcodes don't use this argument. But to keep the + * nr_args = 3 expectations of pinterp happy, just stuff delta_xy + * into the slot. */ if (c->func.brw->intel.gen >= 6) - return src_undef(); + return c->delta_xy; if (src_is_undef(c->pixel_w)) { struct prog_dst_register pixel_w = get_temp(c); @@ -373,11 +375,7 @@ static void emit_interp( struct brw_wm_compile *c, struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); struct prog_src_register deltas; - if (c->func.brw->intel.gen < 6) { - deltas = get_delta_xy(c); - } else { - deltas = src_undef(); - } + deltas = get_delta_xy(c); /* Need to use PINTERP on attributes which have been * multiplied by 1/W in the SF program, and LINTERP on those @@ -1133,6 +1131,11 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) precalc_lit(c, inst); break; + case OPCODE_RSQ: + out = emit_scalar_insn(c, inst); + out->SrcReg[0].Abs = GL_TRUE; + break; + case OPCODE_TEX: precalc_tex(c, inst); break; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c deleted file mode 100644 index 7fe8ab1f33..0000000000 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ /dev/null @@ -1,1035 +0,0 @@ -#include "main/macros.h" -#include "program/prog_parameter.h" -#include "program/prog_print.h" -#include "program/prog_optimize.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_wm.h" - -static struct brw_reg get_dst_reg(struct brw_wm_compile *c, - const struct prog_instruction *inst, - GLuint component); - -/** - * Determine if the given fragment program uses GLSL features such - * as flow conditionals, loops, subroutines. - * Some GLSL shaders may use these features, others might not. - */ -GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) -{ - int i; - - if (unlikely(INTEL_DEBUG & DEBUG_GLSL_FORCE)) - return GL_TRUE; - - for (i = 0; i < fp->Base.NumInstructions; i++) { - const struct prog_instruction *inst = &fp->Base.Instructions[i]; - switch (inst->Opcode) { - case OPCODE_ARL: - case OPCODE_IF: - case OPCODE_ENDIF: - case OPCODE_CAL: - case OPCODE_BRK: - case OPCODE_RET: - case OPCODE_BGNLOOP: - return GL_TRUE; - default: - break; - } - } - return GL_FALSE; -} - - - -static void -reclaim_temps(struct brw_wm_compile *c); - - -/** Mark GRF register as used. */ -static void -prealloc_grf(struct brw_wm_compile *c, int r) -{ - c->used_grf[r] = GL_TRUE; -} - - -/** Mark given GRF register as not in use. */ -static void -release_grf(struct brw_wm_compile *c, int r) -{ - /*assert(c->used_grf[r]);*/ - c->used_grf[r] = GL_FALSE; - c->first_free_grf = MIN2(c->first_free_grf, r); -} - - -/** Return index of a free GRF, mark it as used. */ -static int -alloc_grf(struct brw_wm_compile *c) -{ - GLuint r; - for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { - if (!c->used_grf[r]) { - c->used_grf[r] = GL_TRUE; - c->first_free_grf = r + 1; /* a guess */ - return r; - } - } - - /* no free temps, try to reclaim some */ - reclaim_temps(c); - c->first_free_grf = 0; - - /* try alloc again */ - for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { - if (!c->used_grf[r]) { - c->used_grf[r] = GL_TRUE; - c->first_free_grf = r + 1; /* a guess */ - return r; - } - } - - for (r = 0; r < BRW_WM_MAX_GRF; r++) { - assert(c->used_grf[r]); - } - - /* really, no free GRF regs found */ - if (!c->out_of_regs) { - /* print warning once per compilation */ - _mesa_warning(NULL, "i965: ran out of registers for fragment program"); - c->out_of_regs = GL_TRUE; - } - - return -1; -} - - -/** Return number of GRF registers used */ -static int -num_grf_used(const struct brw_wm_compile *c) -{ - int r; - for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--) - if (c->used_grf[r]) - return r + 1; - return 0; -} - - - -/** - * Record the mapping of a Mesa register to a hardware register. - */ -static void set_reg(struct brw_wm_compile *c, int file, int index, - int component, struct brw_reg reg) -{ - c->wm_regs[file][index][component].reg = reg; - c->wm_regs[file][index][component].inited = GL_TRUE; -} - -static struct brw_reg alloc_tmp(struct brw_wm_compile *c) -{ - struct brw_reg reg; - - /* if we need to allocate another temp, grow the tmp_regs[] array */ - if (c->tmp_index == c->tmp_max) { - int r = alloc_grf(c); - if (r < 0) { - /*printf("Out of temps in %s\n", __FUNCTION__);*/ - r = 50; /* XXX random register! */ - } - c->tmp_regs[ c->tmp_max++ ] = r; - } - - /* form the GRF register */ - reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0); - /*printf("alloc_temp %d\n", reg.nr);*/ - assert(reg.nr < BRW_WM_MAX_GRF); - return reg; - -} - -/** - * Save current temp register info. - * There must be a matching call to release_tmps(). - */ -static int mark_tmps(struct brw_wm_compile *c) -{ - return c->tmp_index; -} - -static void release_tmps(struct brw_wm_compile *c, int mark) -{ - c->tmp_index = mark; -} - -/** - * Convert Mesa src register to brw register. - * - * Since we're running in SOA mode each Mesa register corresponds to four - * hardware registers. We allocate the hardware registers as needed here. - * - * \param file register file, one of PROGRAM_x - * \param index register number - * \param component src component (X=0, Y=1, Z=2, W=3) - * \param nr not used?!? - * \param neg negate value? - * \param abs take absolute value? - */ -static struct brw_reg -get_reg(struct brw_wm_compile *c, int file, int index, int component, - int nr, GLuint neg, GLuint abs) -{ - struct brw_reg reg; - switch (file) { - case PROGRAM_STATE_VAR: - case PROGRAM_CONSTANT: - case PROGRAM_UNIFORM: - file = PROGRAM_STATE_VAR; - break; - case PROGRAM_UNDEFINED: - return brw_null_reg(); - case PROGRAM_TEMPORARY: - case PROGRAM_INPUT: - case PROGRAM_OUTPUT: - case PROGRAM_PAYLOAD: - break; - default: - _mesa_problem(NULL, "Unexpected file in get_reg()"); - return brw_null_reg(); - } - - assert(index < 256); - assert(component < 4); - - /* see if we've already allocated a HW register for this Mesa register */ - if (c->wm_regs[file][index][component].inited) { - /* yes, re-use */ - reg = c->wm_regs[file][index][component].reg; - } - else { - /* no, allocate new register */ - int grf = alloc_grf(c); - /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/ - if (grf < 0) { - /* totally out of temps */ - grf = 51; /* XXX random register! */ - } - - reg = brw_vec8_grf(grf, 0); - /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/ - - set_reg(c, file, index, component, reg); - } - - if (neg & (1 << component)) { - reg = negate(reg); - } - if (abs) - reg = brw_abs(reg); - return reg; -} - - - -/** - * This is called if we run out of GRF registers. Examine the live intervals - * of temp regs in the program and free those which won't be used again. - */ -static void -reclaim_temps(struct brw_wm_compile *c) -{ - GLint intBegin[MAX_PROGRAM_TEMPS]; - GLint intEnd[MAX_PROGRAM_TEMPS]; - int index; - - /*printf("Reclaim temps:\n");*/ - - _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns, - intBegin, intEnd); - - for (index = 0; index < MAX_PROGRAM_TEMPS; index++) { - if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) { - /* program temp[i] can be freed */ - int component; - /*printf(" temp[%d] is dead\n", index);*/ - for (component = 0; component < 4; component++) { - if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) { - int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr; - release_grf(c, r); - /* - printf(" Reclaim temp %d, reg %d at inst %d\n", - index, r, c->cur_inst); - */ - c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE; - } - } - } - } -} - - - - -/** - * Preallocate registers. This sets up the Mesa to hardware register - * mapping for certain registers, such as constants (uniforms/state vars) - * and shader inputs. - */ -static void prealloc_reg(struct brw_wm_compile *c) -{ - struct intel_context *intel = &c->func.brw->intel; - int i, j; - struct brw_reg reg; - int urb_read_length = 0; - GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted; - GLuint reg_index = 0; - - memset(c->used_grf, GL_FALSE, sizeof(c->used_grf)); - c->first_free_grf = 0; - - for (i = 0; i < 4; i++) { - if (i < (c->key.nr_payload_regs + 1) / 2) - reg = brw_vec8_grf(i * 2, 0); - else - reg = brw_vec8_grf(0, 0); - set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); - } - set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_W, 0, - brw_vec8_grf(c->key.source_w_reg, 0)); - reg_index += c->key.nr_payload_regs; - - /* constants */ - { - const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters; - const GLuint nr_temps = c->fp->program.Base.NumTemporaries; - - /* use a real constant buffer, or just use a section of the GRF? */ - /* XXX this heuristic may need adjustment... */ - if ((nr_params + nr_temps) * 4 + reg_index > 80) { - for (i = 0; i < nr_params; i++) { - float *pv = c->fp->program.Base.Parameters->ParameterValues[i]; - for (j = 0; j < 4; j++) { - c->prog_data.pull_param[c->prog_data.nr_pull_params] = &pv[j]; - c->prog_data.nr_pull_params++; - } - } - - c->prog_data.nr_params = 0; - } - /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/ - - if (!c->prog_data.nr_pull_params) { - const struct gl_program_parameter_list *plist = - c->fp->program.Base.Parameters; - int index = 0; - - /* number of float constants in CURBE */ - c->prog_data.nr_params = 4 * nr_params; - - /* loop over program constants (float[4]) */ - for (i = 0; i < nr_params; i++) { - /* loop over XYZW channels */ - for (j = 0; j < 4; j++, index++) { - reg = brw_vec1_grf(reg_index + index / 8, index % 8); - /* Save pointer to parameter/constant value. - * Constants will be copied in prepare_constant_buffer() - */ - c->prog_data.param[index] = &plist->ParameterValues[i][j]; - set_reg(c, PROGRAM_STATE_VAR, i, j, reg); - } - } - /* number of constant regs used (each reg is float[8]) */ - c->nr_creg = ALIGN(nr_params, 2) / 2; - reg_index += c->nr_creg; - } - } - - /* fragment shader inputs: One 2-reg pair of interpolation - * coefficients for each vec4 to be set up. - */ - if (intel->gen >= 6) { - for (i = 0; i < FRAG_ATTRIB_MAX; i++) { - if (!(c->fp->program.Base.InputsRead & BITFIELD64_BIT(i))) - continue; - - reg = brw_vec8_grf(reg_index, 0); - for (j = 0; j < 4; j++) { - set_reg(c, PROGRAM_PAYLOAD, i, j, reg); - } - reg_index += 2; - } - urb_read_length = reg_index; - } else { - for (i = 0; i < VERT_RESULT_MAX; i++) { - int fp_input; - - if (i >= VERT_RESULT_VAR0) - fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0; - else if (i <= VERT_RESULT_TEX7) - fp_input = i; - else - fp_input = -1; - - if (fp_input >= 0 && inputs & (1 << fp_input)) { - urb_read_length = reg_index; - reg = brw_vec8_grf(reg_index, 0); - for (j = 0; j < 4; j++) - set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg); - } - if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) { - reg_index += 2; - } - } - } - - c->prog_data.first_curbe_grf = c->key.nr_payload_regs; - c->prog_data.urb_read_length = urb_read_length; - c->prog_data.curb_read_length = c->nr_creg; - c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); - reg_index++; - c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); - reg_index += 2; - - /* mark GRF regs [0..reg_index-1] as in-use */ - for (i = 0; i < reg_index; i++) - prealloc_grf(c, i); - - /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */ - prealloc_grf(c, 126); - prealloc_grf(c, 127); - - for (i = 0; i < c->nr_fp_insns; i++) { - const struct prog_instruction *inst = &c->prog_instructions[i]; - struct brw_reg dst[4]; - - switch (inst->Opcode) { - case OPCODE_TEX: - case OPCODE_TXB: - /* Allocate the channels of texture results contiguously, - * since they are written out that way by the sampler unit. - */ - for (j = 0; j < 4; j++) { - dst[j] = get_dst_reg(c, inst, j); - if (j != 0) - assert(dst[j].nr == dst[j - 1].nr + 1); - } - break; - default: - break; - } - } - - for (i = 0; i < c->nr_fp_insns; i++) { - const struct prog_instruction *inst = &c->prog_instructions[i]; - - switch (inst->Opcode) { - case WM_DELTAXY: - /* Allocate WM_DELTAXY destination on G45/GM45 to an - * even-numbered GRF if possible so that we can use the PLN - * instruction. - */ - if (inst->DstReg.WriteMask == WRITEMASK_XY && - !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][0].inited && - !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][1].inited && - (IS_G4X(intel->intelScreen->deviceID) || intel->gen == 5)) { - int grf; - - for (grf = c->first_free_grf & ~1; - grf < BRW_WM_MAX_GRF; - grf += 2) - { - if (!c->used_grf[grf] && !c->used_grf[grf + 1]) { - c->used_grf[grf] = GL_TRUE; - c->used_grf[grf + 1] = GL_TRUE; - c->first_free_grf = grf + 2; /* a guess */ - - set_reg(c, inst->DstReg.File, inst->DstReg.Index, 0, - brw_vec8_grf(grf, 0)); - set_reg(c, inst->DstReg.File, inst->DstReg.Index, 1, - brw_vec8_grf(grf + 1, 0)); - break; - } - } - } - default: - break; - } - } - - /* An instruction may reference up to three constants. - * They'll be found in these registers. - * XXX alloc these on demand! - */ - if (c->prog_data.nr_pull_params) { - for (i = 0; i < 3; i++) { - c->current_const[i].index = -1; - c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0); - } - } -#if 0 - printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer); - printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index); -#endif -} - - -/** - * Check if any of the instruction's src registers are constants, uniforms, - * or statevars. If so, fetch any constants that we don't already have in - * the three GRF slots. - */ -static void fetch_constants(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint i; - - /* loop over instruction src regs */ - for (i = 0; i < 3; i++) { - const struct prog_src_register *src = &inst->SrcReg[i]; - if (src->File == PROGRAM_STATE_VAR || - src->File == PROGRAM_CONSTANT || - src->File == PROGRAM_UNIFORM) { - c->current_const[i].index = src->Index; - -#if 0 - printf(" fetch const[%d] for arg %d into reg %d\n", - src->Index, i, c->current_const[i].reg.nr); -#endif - - /* need to fetch the constant now */ - brw_oword_block_read(p, - c->current_const[i].reg, - brw_message_reg(1), - 16 * src->Index, - SURF_INDEX_FRAG_CONST_BUFFER); - } - } -} - - -/** - * Convert Mesa dst register to brw register. - */ -static struct brw_reg get_dst_reg(struct brw_wm_compile *c, - const struct prog_instruction *inst, - GLuint component) -{ - const int nr = 1; - return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr, - 0, 0); -} - - -static struct brw_reg -get_src_reg_const(struct brw_wm_compile *c, - const struct prog_instruction *inst, - GLuint srcRegIndex, GLuint component) -{ - /* We should have already fetched the constant from the constant - * buffer in fetch_constants(). Now we just have to return a - * register description that extracts the needed component and - * smears it across all eight vector components. - */ - const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; - struct brw_reg const_reg; - - assert(component < 4); - assert(srcRegIndex < 3); - assert(c->current_const[srcRegIndex].index != -1); - const_reg = c->current_const[srcRegIndex].reg; - - /* extract desired float from the const_reg, and smear */ - const_reg = stride(const_reg, 0, 1, 0); - const_reg.subnr = component * 4; - - if (src->Negate & (1 << component)) - const_reg = negate(const_reg); - if (src->Abs) - const_reg = brw_abs(const_reg); - -#if 0 - printf(" form const[%d].%d for arg %d, reg %d\n", - c->current_const[srcRegIndex].index, - component, - srcRegIndex, - const_reg.nr); -#endif - - return const_reg; -} - - -/** - * Convert Mesa src register to brw register. - */ -static struct brw_reg get_src_reg(struct brw_wm_compile *c, - const struct prog_instruction *inst, - GLuint srcRegIndex, GLuint channel) -{ - const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; - const GLuint nr = 1; - const GLuint component = GET_SWZ(src->Swizzle, channel); - - /* Only one immediate value can be used per native opcode, and it - * has be in the src1 slot, so not all Mesa instructions will get - * to take advantage of immediate constants. - */ - if (brw_wm_arg_can_be_immediate(inst->Opcode, srcRegIndex)) { - const struct gl_program_parameter_list *params; - - params = c->fp->program.Base.Parameters; - - /* Extended swizzle terms */ - if (component == SWIZZLE_ZERO) { - return brw_imm_f(0.0F); - } else if (component == SWIZZLE_ONE) { - if (src->Negate) - return brw_imm_f(-1.0F); - else - return brw_imm_f(1.0F); - } - - if (src->File == PROGRAM_CONSTANT) { - float f = params->ParameterValues[src->Index][component]; - - if (src->Abs) - f = fabs(f); - if (src->Negate) - f = -f; - - return brw_imm_f(f); - } - } - - if (c->prog_data.nr_pull_params && - (src->File == PROGRAM_STATE_VAR || - src->File == PROGRAM_CONSTANT || - src->File == PROGRAM_UNIFORM)) { - return get_src_reg_const(c, inst, srcRegIndex, component); - } - else { - /* other type of source register */ - return get_reg(c, src->File, src->Index, component, nr, - src->Negate, src->Abs); - } -} - -static void emit_arl(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, addr_reg; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_ADDRESS, 0); - src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */ - brw_MOV(p, addr_reg, src0); - brw_set_saturate(p, 0); -} - -static INLINE struct brw_reg high_words( struct brw_reg reg ) -{ - return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ), - 0, 8, 2 ); -} - -static INLINE struct brw_reg low_words( struct brw_reg reg ) -{ - return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 ); -} - -static INLINE struct brw_reg even_bytes( struct brw_reg reg ) -{ - return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 ); -} - -static INLINE struct brw_reg odd_bytes( struct brw_reg reg ) -{ - return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ), - 0, 16, 2 ); -} - -/** - * Resolve subroutine calls after code emit is done. - */ -static void post_wm_emit( struct brw_wm_compile *c ) -{ - brw_resolve_cals(&c->func); -} - -static void -get_argument_regs(struct brw_wm_compile *c, - const struct prog_instruction *inst, - int index, - struct brw_reg *dst, - struct brw_reg *regs, - int mask) -{ - struct brw_compile *p = &c->func; - int i, j; - - for (i = 0; i < 4; i++) { - if (mask & (1 << i)) { - regs[i] = get_src_reg(c, inst, index, i); - - /* Unalias destination registers from our sources. */ - if (regs[i].file == BRW_GENERAL_REGISTER_FILE) { - for (j = 0; j < 4; j++) { - if (memcmp(®s[i], &dst[j], sizeof(regs[0])) == 0) { - struct brw_reg tmp = alloc_tmp(c); - brw_MOV(p, tmp, regs[i]); - regs[i] = tmp; - break; - } - } - } - } - } -} - -static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) -{ - struct intel_context *intel = &brw->intel; -#define MAX_IF_DEPTH 32 -#define MAX_LOOP_DEPTH 32 - struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; - int if_depth_in_loop[MAX_LOOP_DEPTH]; - GLuint i, if_depth = 0, loop_depth = 0; - struct brw_compile *p = &c->func; - struct brw_indirect stack_index = brw_indirect(0, 0); - - c->out_of_regs = GL_FALSE; - - if_depth_in_loop[loop_depth] = 0; - - prealloc_reg(c); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); - - if (intel->gen >= 6) - brw_set_acc_write_control(p, 1); - - for (i = 0; i < c->nr_fp_insns; i++) { - const struct prog_instruction *inst = &c->prog_instructions[i]; - int dst_flags; - struct brw_reg args[3][4], dst[4]; - int j; - int mark = mark_tmps( c ); - - c->cur_inst = i; - -#if 0 - printf("Inst %d: ", i); - _mesa_print_instruction(inst); -#endif - - /* fetch any constants that this instruction needs */ - if (c->prog_data.nr_pull_params) - fetch_constants(c, inst); - - if (inst->Opcode != OPCODE_ARL) { - for (j = 0; j < 4; j++) { - if (inst->DstReg.WriteMask & (1 << j)) - dst[j] = get_dst_reg(c, inst, j); - else - dst[j] = brw_null_reg(); - } - } - for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++) - get_argument_regs(c, inst, j, dst, args[j], WRITEMASK_XYZW); - - dst_flags = inst->DstReg.WriteMask; - if (inst->SaturateMode == SATURATE_ZERO_ONE) - dst_flags |= SATURATE; - - if (inst->CondUpdate) - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - else - brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); - - switch (inst->Opcode) { - case WM_PIXELXY: - emit_pixel_xy(c, dst, dst_flags); - break; - case WM_DELTAXY: - emit_delta_xy(p, dst, dst_flags, args[0]); - break; - case WM_PIXELW: - emit_pixel_w(c, dst, dst_flags, args[0], args[1]); - break; - case WM_LINTERP: - emit_linterp(p, dst, dst_flags, args[0], args[1]); - break; - case WM_PINTERP: - emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]); - break; - case WM_CINTERP: - emit_cinterp(p, dst, dst_flags, args[0]); - break; - case WM_WPOSXY: - emit_wpos_xy(c, dst, dst_flags, args[0]); - break; - case WM_FB_WRITE: - emit_fb_write(c, args[0], args[1], args[2], - INST_AUX_GET_TARGET(inst->Aux), - inst->Aux & INST_AUX_EOT); - break; - case WM_FRONTFACING: - emit_frontfacing(p, dst, dst_flags); - break; - case OPCODE_ADD: - emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_ARL: - emit_arl(c, inst); - break; - case OPCODE_FRC: - emit_alu1(p, brw_FRC, dst, dst_flags, args[0]); - break; - case OPCODE_FLR: - emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); - break; - case OPCODE_LRP: - emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]); - break; - case OPCODE_TRUNC: - emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]); - break; - case OPCODE_MOV: - case OPCODE_SWZ: - emit_alu1(p, brw_MOV, dst, dst_flags, args[0]); - break; - case OPCODE_DP2: - emit_dp2(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_DP3: - emit_dp3(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_DP4: - emit_dp4(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_XPD: - emit_xpd(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_DPH: - emit_dph(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_RCP: - emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); - break; - case OPCODE_RSQ: - emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); - break; - case OPCODE_SIN: - emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); - break; - case OPCODE_COS: - emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); - break; - case OPCODE_EX2: - emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); - break; - case OPCODE_LG2: - emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); - break; - case OPCODE_CMP: - emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]); - break; - case OPCODE_MIN: - emit_min(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_MAX: - emit_max(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_DDX: - case OPCODE_DDY: - emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX), - args[0]); - break; - case OPCODE_SLT: - emit_sop(p, dst, dst_flags, - BRW_CONDITIONAL_L, args[0], args[1]); - break; - case OPCODE_SLE: - emit_sop(p, dst, dst_flags, - BRW_CONDITIONAL_LE, args[0], args[1]); - break; - case OPCODE_SGT: - emit_sop(p, dst, dst_flags, - BRW_CONDITIONAL_G, args[0], args[1]); - break; - case OPCODE_SGE: - emit_sop(p, dst, dst_flags, - BRW_CONDITIONAL_GE, args[0], args[1]); - break; - case OPCODE_SEQ: - emit_sop(p, dst, dst_flags, - BRW_CONDITIONAL_EQ, args[0], args[1]); - break; - case OPCODE_SNE: - emit_sop(p, dst, dst_flags, - BRW_CONDITIONAL_NEQ, args[0], args[1]); - break; - case OPCODE_SSG: - emit_sign(p, dst, dst_flags, args[0]); - break; - case OPCODE_MUL: - emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_POW: - emit_math2(c, BRW_MATH_FUNCTION_POW, - dst, dst_flags, args[0], args[1]); - break; - case OPCODE_MAD: - emit_mad(p, dst, dst_flags, args[0], args[1], args[2]); - break; - case OPCODE_TEX: - emit_tex(c, dst, dst_flags, args[0], - get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, - 0, 1, 0, 0), - inst->TexSrcTarget, - inst->TexSrcUnit, - (c->key.shadowtex_mask & (1 << inst->TexSrcUnit)) != 0); - break; - case OPCODE_TXB: - emit_txb(c, dst, dst_flags, args[0], - get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, - 0, 1, 0, 0), - inst->TexSrcTarget, - c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]); - break; - case OPCODE_KIL_NV: - emit_kil_nv(c); - break; - case OPCODE_IF: - assert(if_depth < MAX_IF_DEPTH); - if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8); - if_depth_in_loop[loop_depth]++; - break; - case OPCODE_ELSE: - assert(if_depth > 0); - if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); - break; - case OPCODE_ENDIF: - assert(if_depth > 0); - brw_ENDIF(p, if_inst[--if_depth]); - if_depth_in_loop[loop_depth]--; - break; - case OPCODE_BGNSUB: - brw_save_label(p, inst->Comment, p->nr_insn); - break; - case OPCODE_ENDSUB: - /* no-op */ - break; - case OPCODE_CAL: - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_set_access_mode(p, BRW_ALIGN_1); - brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); - brw_set_access_mode(p, BRW_ALIGN_16); - brw_ADD(p, get_addr_reg(stack_index), - get_addr_reg(stack_index), brw_imm_d(4)); - brw_save_call(&c->func, inst->Comment, p->nr_insn); - brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); - brw_pop_insn_state(p); - break; - - case OPCODE_RET: - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_ADD(p, get_addr_reg(stack_index), - get_addr_reg(stack_index), brw_imm_d(-4)); - brw_set_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0)); - brw_set_access_mode(p, BRW_ALIGN_16); - brw_pop_insn_state(p); - - break; - case OPCODE_BGNLOOP: - /* XXX may need to invalidate the current_constant regs */ - loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); - if_depth_in_loop[loop_depth] = 0; - break; - case OPCODE_BRK: - brw_BREAK(p, if_depth_in_loop[loop_depth]); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - break; - case OPCODE_CONT: - brw_CONT(p, if_depth_in_loop[loop_depth]); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - break; - case OPCODE_ENDLOOP: - { - struct brw_instruction *inst0, *inst1; - GLuint br = 1; - - if (intel->gen == 5) - br = 2; - - assert(loop_depth > 0); - loop_depth--; - inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); - /* patch all the BREAK/CONT instructions from last BGNLOOP */ - while (inst0 > loop_inst[loop_depth]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); - } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0); - } - } - } - break; - default: - printf("unsupported opcode %d (%s) in fragment shader\n", - inst->Opcode, inst->Opcode < MAX_OPCODE ? - _mesa_opcode_string(inst->Opcode) : "unknown"); - } - - /* Release temporaries containing any unaliased source regs. */ - release_tmps( c, mark ); - - if (inst->CondUpdate) - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - else - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - } - post_wm_emit(c); - - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - printf("wm-native:\n"); - for (i = 0; i < p->nr_insn; i++) - brw_disasm(stdout, &p->store[i], intel->gen); - printf("\n"); - } -} - -/** - * Do GPU code generation for shaders that use GLSL features such as - * flow control. Other shaders will be compiled with the - */ -void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) -{ - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - printf("brw_wm_glsl_emit:\n"); - } - - /* initial instruction translation/simplification */ - brw_wm_pass_fp(c); - - /* actual code generation */ - brw_wm_emit_glsl(brw, c); - - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - brw_wm_print_program(c, "brw_wm_glsl_emit done"); - } - - c->prog_data.total_grf = num_grf_used(c); - c->prog_data.total_scratch = 0; -} diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.c b/src/mesa/drivers/dri/i965/brw_wm_iz.c index 62e556698b..471ea1c18d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_iz.c +++ b/src/mesa/drivers/dri/i965/brw_wm_iz.c @@ -120,14 +120,14 @@ const struct { * \param line_aa AA_NEVER, AA_ALWAYS or AA_SOMETIMES * \param lookup bitmask of IZ_* flags */ -void brw_wm_lookup_iz( struct intel_context *intel, - GLuint line_aa, - GLuint lookup, - GLboolean ps_uses_depth, - struct brw_wm_prog_key *key ) +void brw_wm_lookup_iz(struct intel_context *intel, + struct brw_wm_compile *c) { GLuint reg = 2; GLboolean kill_stats_promoted_workaround = GL_FALSE; + int lookup = c->key.iz_lookup; + bool uses_depth = (c->fp->program.Base.InputsRead & + (1 << FRAG_ATTRIB_WPOS)) != 0; assert (lookup < IZ_BIT_MAX); @@ -136,36 +136,36 @@ void brw_wm_lookup_iz( struct intel_context *intel, * statistics are enabled..." paragraph of 11.5.3.2: Early Depth * Test Cases [Pre-DevGT] of the 3D Pipeline - Windower B-Spec. */ - if (intel->stats_wm && + if (c->key.stats_wm && (lookup & IZ_PS_KILL_ALPHATEST_BIT) && wm_iz_table[lookup].mode == P) { kill_stats_promoted_workaround = GL_TRUE; } if (lookup & IZ_PS_COMPUTES_DEPTH_BIT) - key->computes_depth = 1; + c->computes_depth = 1; - if (wm_iz_table[lookup].sd_present || ps_uses_depth || + if (wm_iz_table[lookup].sd_present || uses_depth || kill_stats_promoted_workaround) { - key->source_depth_reg = reg; + c->source_depth_reg = reg; reg += 2; } if (wm_iz_table[lookup].sd_to_rt || kill_stats_promoted_workaround) - key->source_depth_to_render_target = 1; + c->source_depth_to_render_target = 1; - if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) { - key->aa_dest_stencil_reg = reg; - key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present && - line_aa == AA_SOMETIMES); + if (wm_iz_table[lookup].ds_present || c->key.line_aa != AA_NEVER) { + c->aa_dest_stencil_reg = reg; + c->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present && + c->key.line_aa == AA_SOMETIMES); reg++; } if (wm_iz_table[lookup].dd_present) { - key->dest_depth_reg = reg; + c->dest_depth_reg = reg; reg+=2; } - key->nr_payload_regs = reg; + c->nr_payload_regs = reg; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index 83152526b3..f78bdc3186 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -380,7 +380,7 @@ static void pass0_init_payload( struct brw_wm_compile *c ) GLuint i; for (i = 0; i < 4; i++) { - GLuint j = i >= (c->key.nr_payload_regs + 1) / 2 ? 0 : i; + GLuint j = i >= (c->nr_payload_regs + 1) / 2 ? 0 : i; pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, &c->payload.depth[j] ); } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c index 3a2874b6dd..7d6a3fa9f1 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c @@ -128,8 +128,7 @@ void brw_wm_pass1( struct brw_wm_compile *c ) if (inst->opcode == WM_FB_WRITE) { track_arg(c, inst, 0, WRITEMASK_XYZW); track_arg(c, inst, 1, WRITEMASK_XYZW); - if (c->key.source_depth_to_render_target && - c->key.computes_depth) + if (c->source_depth_to_render_target && c->computes_depth) track_arg(c, inst, 2, WRITEMASK_Z); else track_arg(c, inst, 2, 0); @@ -281,7 +280,6 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case OPCODE_DST: case WM_FRONTFACING: - case OPCODE_KIL_NV: default: break; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass2.c b/src/mesa/drivers/dri/i965/brw_wm_pass2.c index 44e3953814..8c2b9e7020 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass2.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass2.c @@ -69,6 +69,8 @@ static void prealloc_reg(struct brw_wm_compile *c, */ static void init_registers( struct brw_wm_compile *c ) { + struct brw_context *brw = c->func.brw; + struct intel_context *intel = &brw->intel; GLuint nr_interp_regs = 0; GLuint i = 0; GLuint j; @@ -76,32 +78,41 @@ static void init_registers( struct brw_wm_compile *c ) for (j = 0; j < c->grf_limit; j++) c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN; - for (j = 0; j < (c->key.nr_payload_regs + 1) / 2; j++) + for (j = 0; j < (c->nr_payload_regs + 1) / 2; j++) prealloc_reg(c, &c->payload.depth[j], i++); for (j = 0; j < c->nr_creg; j++) prealloc_reg(c, &c->creg[j], i++); - for (j = 0; j < VERT_RESULT_MAX; j++) { - if (c->key.vp_outputs_written & BITFIELD64_BIT(j)) { - int fp_index; - - if (j >= VERT_RESULT_VAR0) - fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); - else if (j <= VERT_RESULT_TEX7) - fp_index = j; - else - fp_index = -1; - - nr_interp_regs++; - if (fp_index >= 0) - prealloc_reg(c, &c->payload.input_interp[fp_index], i++); + if (intel->gen >= 6) { + for (unsigned int j = 0; j < FRAG_ATTRIB_MAX; j++) { + if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(j)) { + nr_interp_regs++; + prealloc_reg(c, &c->payload.input_interp[j], i++); + } + } + } else { + for (j = 0; j < VERT_RESULT_MAX; j++) { + if (c->key.vp_outputs_written & BITFIELD64_BIT(j)) { + int fp_index; + + if (j >= VERT_RESULT_VAR0) + fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); + else if (j <= VERT_RESULT_TEX7) + fp_index = j; + else + fp_index = -1; + + nr_interp_regs++; + if (fp_index >= 0) + prealloc_reg(c, &c->payload.input_interp[fp_index], i++); + } } + assert(nr_interp_regs >= 1); } - assert(nr_interp_regs >= 1); - c->prog_data.first_curbe_grf = ALIGN(c->key.nr_payload_regs, 2); + c->prog_data.first_curbe_grf = ALIGN(c->nr_payload_regs, 2); c->prog_data.urb_read_length = nr_interp_regs * 2; c->prog_data.curb_read_length = c->nr_creg * 2; diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index fea96d3538..30672b4251 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -69,12 +69,43 @@ static GLuint translate_wrap_mode( GLenum wrap ) static drm_intel_bo *upload_default_color( struct brw_context *brw, const GLfloat *color ) { - struct brw_sampler_default_color sdc; + struct intel_context *intel = &brw->intel; - COPY_4V(sdc.color, color); - - return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR, - &sdc, sizeof(sdc)); + if (intel->gen >= 5) { + struct gen5_sampler_default_color sdc; + + memset(&sdc, 0, sizeof(sdc)); + + UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[0], color[0]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[1], color[1]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[2], color[2]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[3], color[3]); + + UNCLAMPED_FLOAT_TO_USHORT(sdc.us[0], color[0]); + UNCLAMPED_FLOAT_TO_USHORT(sdc.us[1], color[1]); + UNCLAMPED_FLOAT_TO_USHORT(sdc.us[2], color[2]); + UNCLAMPED_FLOAT_TO_USHORT(sdc.us[3], color[3]); + + UNCLAMPED_FLOAT_TO_SHORT(sdc.s[0], color[0]); + UNCLAMPED_FLOAT_TO_SHORT(sdc.s[1], color[1]); + UNCLAMPED_FLOAT_TO_SHORT(sdc.s[2], color[2]); + UNCLAMPED_FLOAT_TO_SHORT(sdc.s[3], color[3]); + + /* XXX: Fill in half floats */ + /* XXX: Fill in signed bytes */ + + COPY_4V(sdc.f, color); + + return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR, + &sdc, sizeof(sdc)); + } else { + struct brw_sampler_default_color sdc; + + COPY_4V(sdc.color, color); + + return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR, + &sdc, sizeof(sdc)); + } } @@ -245,9 +276,8 @@ brw_wm_sampler_populate_key(struct brw_context *brw, struct wm_sampler_entry *entry = &key->sampler[unit]; struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; struct gl_texture_object *texObj = texUnit->_Current; - struct intel_texture_object *intelObj = intel_texture_object(texObj); struct gl_texture_image *firstImage = - texObj->Image[0][intelObj->firstLevel]; + texObj->Image[0][texObj->BaseLevel]; memset(last_entry_end, 0, (char*)entry - last_entry_end + sizeof(*entry)); diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 76de7b7b6f..e9ef635bca 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -87,7 +87,6 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) { struct gl_context *ctx = &brw->intel.ctx; const struct gl_fragment_program *fp = brw->fragment_program; - const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp; struct intel_context *intel = &brw->intel; memset(key, 0, sizeof(*key)); @@ -132,7 +131,6 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) /* _NEW_COLOR */ key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled; - key->is_glsl = bfp->isGLSL; /* If using the fragment shader backend, the program is always * 8-wide. diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 76fc94df1f..233fe3b731 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -42,7 +42,7 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" - +#include "brw_wm.h" static GLuint translate_tex_target( GLenum target ) { @@ -68,104 +68,72 @@ static GLuint translate_tex_target( GLenum target ) } } +static uint32_t brw_format_for_mesa_format[MESA_FORMAT_COUNT] = +{ + [MESA_FORMAT_L8] = BRW_SURFACEFORMAT_L8_UNORM, + [MESA_FORMAT_I8] = BRW_SURFACEFORMAT_I8_UNORM, + [MESA_FORMAT_A8] = BRW_SURFACEFORMAT_A8_UNORM, + [MESA_FORMAT_AL88] = BRW_SURFACEFORMAT_L8A8_UNORM, + [MESA_FORMAT_AL1616] = BRW_SURFACEFORMAT_L16A16_UNORM, + [MESA_FORMAT_R8] = BRW_SURFACEFORMAT_R8_UNORM, + [MESA_FORMAT_R16] = BRW_SURFACEFORMAT_R16_UNORM, + [MESA_FORMAT_RG88] = BRW_SURFACEFORMAT_R8G8_UNORM, + [MESA_FORMAT_RG1616] = BRW_SURFACEFORMAT_R16G16_UNORM, + [MESA_FORMAT_ARGB8888] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM, + [MESA_FORMAT_XRGB8888] = BRW_SURFACEFORMAT_B8G8R8X8_UNORM, + [MESA_FORMAT_RGB565] = BRW_SURFACEFORMAT_B5G6R5_UNORM, + [MESA_FORMAT_ARGB1555] = BRW_SURFACEFORMAT_B5G5R5A1_UNORM, + [MESA_FORMAT_ARGB4444] = BRW_SURFACEFORMAT_B4G4R4A4_UNORM, + [MESA_FORMAT_YCBCR_REV] = BRW_SURFACEFORMAT_YCRCB_NORMAL, + [MESA_FORMAT_YCBCR] = BRW_SURFACEFORMAT_YCRCB_SWAPUVY, + [MESA_FORMAT_RGB_FXT1] = BRW_SURFACEFORMAT_FXT1, + [MESA_FORMAT_RGBA_FXT1] = BRW_SURFACEFORMAT_FXT1, + [MESA_FORMAT_RGB_DXT1] = BRW_SURFACEFORMAT_DXT1_RGB, + [MESA_FORMAT_RGBA_DXT1] = BRW_SURFACEFORMAT_BC1_UNORM, + [MESA_FORMAT_RGBA_DXT3] = BRW_SURFACEFORMAT_BC2_UNORM, + [MESA_FORMAT_RGBA_DXT5] = BRW_SURFACEFORMAT_BC3_UNORM, + [MESA_FORMAT_SRGB_DXT1] = BRW_SURFACEFORMAT_DXT1_RGB_SRGB, + [MESA_FORMAT_SRGBA_DXT1] = BRW_SURFACEFORMAT_BC1_UNORM_SRGB, + [MESA_FORMAT_SRGBA_DXT3] = BRW_SURFACEFORMAT_BC2_UNORM_SRGB, + [MESA_FORMAT_SRGBA_DXT5] = BRW_SURFACEFORMAT_BC3_UNORM_SRGB, + [MESA_FORMAT_SARGB8] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB, + [MESA_FORMAT_SLA8] = BRW_SURFACEFORMAT_L8A8_UNORM_SRGB, + [MESA_FORMAT_SL8] = BRW_SURFACEFORMAT_L8_UNORM_SRGB, + [MESA_FORMAT_DUDV8] = BRW_SURFACEFORMAT_R8G8_SNORM, + [MESA_FORMAT_SIGNED_RGBA8888_REV] = BRW_SURFACEFORMAT_R8G8B8A8_SNORM, +}; + +bool +brw_render_target_supported(gl_format format) +{ + if (format == MESA_FORMAT_S8_Z24 || + format == MESA_FORMAT_X8_Z24 || + format == MESA_FORMAT_Z16) { + return true; + } + + /* Not exactly true, as some of those formats are not renderable. + * But at least we know how to translate them. + */ + return brw_format_for_mesa_format[format] != 0; +} static GLuint translate_tex_format( gl_format mesa_format, GLenum internal_format, GLenum depth_mode ) { switch( mesa_format ) { - case MESA_FORMAT_L8: - return BRW_SURFACEFORMAT_L8_UNORM; - - case MESA_FORMAT_I8: - return BRW_SURFACEFORMAT_I8_UNORM; - - case MESA_FORMAT_A8: - return BRW_SURFACEFORMAT_A8_UNORM; - - case MESA_FORMAT_AL88: - return BRW_SURFACEFORMAT_L8A8_UNORM; - - case MESA_FORMAT_AL1616: - return BRW_SURFACEFORMAT_L16A16_UNORM; - - case MESA_FORMAT_R8: - return BRW_SURFACEFORMAT_R8_UNORM; - - case MESA_FORMAT_R16: - return BRW_SURFACEFORMAT_R16_UNORM; - - case MESA_FORMAT_RG88: - return BRW_SURFACEFORMAT_R8G8_UNORM; - - case MESA_FORMAT_RG1616: - return BRW_SURFACEFORMAT_R16G16_UNORM; - - case MESA_FORMAT_RGB888: - assert(0); /* not supported for sampling */ - return BRW_SURFACEFORMAT_R8G8B8_UNORM; - - case MESA_FORMAT_ARGB8888: - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - - case MESA_FORMAT_XRGB8888: - return BRW_SURFACEFORMAT_B8G8R8X8_UNORM; - - case MESA_FORMAT_RGBA8888_REV: - _mesa_problem(NULL, "unexpected format in i965:translate_tex_format()"); - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; - - case MESA_FORMAT_RGB565: - return BRW_SURFACEFORMAT_B5G6R5_UNORM; - - case MESA_FORMAT_ARGB1555: - return BRW_SURFACEFORMAT_B5G5R5A1_UNORM; - - case MESA_FORMAT_ARGB4444: - return BRW_SURFACEFORMAT_B4G4R4A4_UNORM; - - case MESA_FORMAT_YCBCR_REV: - return BRW_SURFACEFORMAT_YCRCB_NORMAL; - - case MESA_FORMAT_YCBCR: - return BRW_SURFACEFORMAT_YCRCB_SWAPUVY; - - case MESA_FORMAT_RGB_FXT1: - case MESA_FORMAT_RGBA_FXT1: - return BRW_SURFACEFORMAT_FXT1; case MESA_FORMAT_Z16: if (depth_mode == GL_INTENSITY) return BRW_SURFACEFORMAT_I16_UNORM; else if (depth_mode == GL_ALPHA) return BRW_SURFACEFORMAT_A16_UNORM; + else if (depth_mode == GL_RED) + return BRW_SURFACEFORMAT_R16_UNORM; else return BRW_SURFACEFORMAT_L16_UNORM; - case MESA_FORMAT_RGB_DXT1: - return BRW_SURFACEFORMAT_DXT1_RGB; - - case MESA_FORMAT_RGBA_DXT1: - return BRW_SURFACEFORMAT_BC1_UNORM; - - case MESA_FORMAT_RGBA_DXT3: - return BRW_SURFACEFORMAT_BC2_UNORM; - - case MESA_FORMAT_RGBA_DXT5: - return BRW_SURFACEFORMAT_BC3_UNORM; - - case MESA_FORMAT_SARGB8: - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB; - - case MESA_FORMAT_SLA8: - return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB; - - case MESA_FORMAT_SL8: - return BRW_SURFACEFORMAT_L8_UNORM_SRGB; - - case MESA_FORMAT_SRGB_DXT1: - return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; - case MESA_FORMAT_S8_Z24: /* XXX: these different surface formats don't seem to * make any difference for shadow sampler/compares. @@ -174,18 +142,14 @@ static GLuint translate_tex_format( gl_format mesa_format, return BRW_SURFACEFORMAT_I24X8_UNORM; else if (depth_mode == GL_ALPHA) return BRW_SURFACEFORMAT_A24X8_UNORM; + else if (depth_mode == GL_RED) + return BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS; else return BRW_SURFACEFORMAT_L24X8_UNORM; - case MESA_FORMAT_DUDV8: - return BRW_SURFACEFORMAT_R8G8_SNORM; - - case MESA_FORMAT_SIGNED_RGBA8888_REV: - return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; - default: - assert(0); - return 0; + assert(brw_format_for_mesa_format[mesa_format] != 0); + return brw_format_for_mesa_format[mesa_format]; } } @@ -214,7 +178,7 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit ) struct brw_context *brw = brw_context(ctx); struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; struct intel_texture_object *intelObj = intel_texture_object(tObj); - struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; + struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel]; const GLuint surf_index = SURF_INDEX_TEXTURE(unit); struct brw_surface_state surf; void *map; @@ -232,7 +196,7 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit ) /* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ surf.ss1.base_addr = intelObj->mt->region->buffer->offset; /* reloc */ - surf.ss2.mip_count = intelObj->lastLevel - intelObj->firstLevel; + surf.ss2.mip_count = intelObj->_MaxLevel - tObj->BaseLevel; surf.ss2.width = firstImage->Width - 1; surf.ss2.height = firstImage->Height - 1; brw_set_surface_tiling(&surf, intelObj->mt->region->tiling); @@ -274,6 +238,7 @@ brw_create_constant_surface(struct brw_context *brw, drm_intel_bo **out_bo, uint32_t *out_offset) { + struct intel_context *intel = &brw->intel; const GLint w = width - 1; struct brw_surface_state surf; void *map; @@ -284,6 +249,9 @@ brw_create_constant_surface(struct brw_context *brw, surf.ss0.surface_type = BRW_SURFACE_BUFFER; surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + if (intel->gen >= 6) + surf.ss0.render_cache_read_write = 1; + assert(bo); surf.ss1.base_addr = bo->offset; /* reloc */ @@ -404,6 +372,38 @@ const struct brw_tracked_state brw_wm_constant_surface = { .emit = upload_wm_constant_surface, }; +static void +brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit) +{ + struct intel_context *intel = &brw->intel; + struct brw_surface_state surf; + void *map; + + memset(&surf, 0, sizeof(surf)); + + surf.ss0.surface_type = BRW_SURFACE_NULL; + surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + surf.ss1.base_addr = 0; + + surf.ss2.width = 0; + surf.ss2.height = 0; + brw_set_surface_tiling(&surf, I915_TILING_NONE); + surf.ss3.pitch = 0; + + if (intel->gen < 6) { + /* _NEW_COLOR */ + surf.ss0.color_blend = 0; + surf.ss0.writedisable_red = 1; + surf.ss0.writedisable_green = 1; + surf.ss0.writedisable_blue = 1; + surf.ss0.writedisable_alpha = 1; + } + + map = brw_state_batch(brw, sizeof(surf), 32, + &brw->wm.surf_bo[unit], + &brw->wm.surf_offset[unit]); + memcpy(map, &surf, sizeof(surf)); +} /** * Sets up a surface state structure to point at the given region. @@ -417,123 +417,53 @@ brw_update_renderbuffer_surface(struct brw_context *brw, { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; - drm_intel_bo *region_bo = NULL; struct intel_renderbuffer *irb = intel_renderbuffer(rb); - struct intel_region *region = irb ? irb->region : NULL; - struct { - unsigned int surface_type; - unsigned int surface_format; - unsigned int width, height, pitch, cpp; - GLubyte color_mask[4]; - GLboolean color_blend; - uint32_t tiling; - uint32_t draw_x; - uint32_t draw_y; - } key; + struct intel_region *region = irb->region; struct brw_surface_state surf; void *map; - memset(&key, 0, sizeof(key)); - - if (region != NULL) { - region_bo = region->buffer; - - key.surface_type = BRW_SURFACE_2D; - switch (irb->Base.Format) { - /* XRGB and ARGB are treated the same here because the chips in this - * family cannot render to XRGB targets. This means that we have to - * mask writes to alpha (ala glColorMask) and reconfigure the alpha - * blending hardware to use GL_ONE (or GL_ZERO) for cases where - * GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is used. - */ - case MESA_FORMAT_ARGB8888: - case MESA_FORMAT_XRGB8888: - key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - break; - case MESA_FORMAT_SARGB8: - key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB; - break; - case MESA_FORMAT_RGB565: - key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; - break; - case MESA_FORMAT_ARGB1555: - key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM; - break; - case MESA_FORMAT_ARGB4444: - key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM; - break; - case MESA_FORMAT_A8: - key.surface_format = BRW_SURFACEFORMAT_A8_UNORM; - break; - case MESA_FORMAT_R8: - key.surface_format = BRW_SURFACEFORMAT_R8_UNORM; - break; - case MESA_FORMAT_R16: - key.surface_format = BRW_SURFACEFORMAT_R16_UNORM; - break; - case MESA_FORMAT_RG88: - key.surface_format = BRW_SURFACEFORMAT_R8G8_UNORM; - break; - case MESA_FORMAT_RG1616: - key.surface_format = BRW_SURFACEFORMAT_R16G16_UNORM; - break; - default: - _mesa_problem(ctx, "Bad renderbuffer format: %d\n", irb->Base.Format); - } - key.tiling = region->tiling; - key.width = rb->Width; - key.height = rb->Height; - key.pitch = region->pitch; - key.cpp = region->cpp; - key.draw_x = region->draw_x; - key.draw_y = region->draw_y; - } else { - key.surface_type = BRW_SURFACE_NULL; - key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - key.tiling = I915_TILING_X; - key.width = 1; - key.height = 1; - key.cpp = 4; - key.draw_x = 0; - key.draw_y = 0; - } - - if (intel->gen < 6) { - /* _NEW_COLOR */ - memcpy(key.color_mask, ctx->Color.ColorMask[unit], - sizeof(key.color_mask)); + memset(&surf, 0, sizeof(surf)); - /* As mentioned above, disable writes to the alpha component when the - * renderbuffer is XRGB. + switch (irb->Base.Format) { + case MESA_FORMAT_XRGB8888: + /* XRGB is handled as ARGB because the chips in this family + * cannot render to XRGB targets. This means that we have to + * mask writes to alpha (ala glColorMask) and reconfigure the + * alpha blending hardware to use GL_ONE (or GL_ZERO) for + * cases where GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is + * used. */ - if (ctx->DrawBuffer->Visual.alphaBits == 0) - key.color_mask[3] = GL_FALSE; - - key.color_blend = (!ctx->Color._LogicOpEnabled && - (ctx->Color.BlendEnabled & (1 << unit))); + surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + break; + case MESA_FORMAT_SARGB8: + /* without GL_EXT_framebuffer_sRGB we shouldn't bind sRGB + surfaces to the blend/update as sRGB */ + surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + break; + default: + surf.ss0.surface_format = brw_format_for_mesa_format[irb->Base.Format]; + assert(surf.ss0.surface_format != 0); } - memset(&surf, 0, sizeof(surf)); - - surf.ss0.surface_format = key.surface_format; - surf.ss0.surface_type = key.surface_type; - if (key.tiling == I915_TILING_NONE) { - surf.ss1.base_addr = (key.draw_x + key.draw_y * key.pitch) * key.cpp; + surf.ss0.surface_type = BRW_SURFACE_2D; + if (region->tiling == I915_TILING_NONE) { + surf.ss1.base_addr = (region->draw_x + + region->draw_y * region->pitch) * region->cpp; } else { uint32_t tile_base, tile_x, tile_y; - uint32_t pitch = key.pitch * key.cpp; + uint32_t pitch = region->pitch * region->cpp; - if (key.tiling == I915_TILING_X) { - tile_x = key.draw_x % (512 / key.cpp); - tile_y = key.draw_y % 8; - tile_base = ((key.draw_y / 8) * (8 * pitch)); - tile_base += (key.draw_x - tile_x) / (512 / key.cpp) * 4096; + if (region->tiling == I915_TILING_X) { + tile_x = region->draw_x % (512 / region->cpp); + tile_y = region->draw_y % 8; + tile_base = ((region->draw_y / 8) * (8 * pitch)); + tile_base += (region->draw_x - tile_x) / (512 / region->cpp) * 4096; } else { /* Y */ - tile_x = key.draw_x % (128 / key.cpp); - tile_y = key.draw_y % 32; - tile_base = ((key.draw_y / 32) * (32 * pitch)); - tile_base += (key.draw_x - tile_x) / (128 / key.cpp) * 4096; + tile_x = region->draw_x % (128 / region->cpp); + tile_y = region->draw_y % 32; + tile_base = ((region->draw_y / 32) * (32 * pitch)); + tile_base += (region->draw_x - tile_x) / (128 / region->cpp) * 4096; } assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0)); assert(tile_x % 4 == 0); @@ -545,21 +475,27 @@ brw_update_renderbuffer_surface(struct brw_context *brw, surf.ss5.x_offset = tile_x / 4; surf.ss5.y_offset = tile_y / 2; } - if (region_bo != NULL) - surf.ss1.base_addr += region_bo->offset; /* reloc */ + surf.ss1.base_addr += region->buffer->offset; /* reloc */ - surf.ss2.width = key.width - 1; - surf.ss2.height = key.height - 1; - brw_set_surface_tiling(&surf, key.tiling); - surf.ss3.pitch = (key.pitch * key.cpp) - 1; + surf.ss2.width = rb->Width - 1; + surf.ss2.height = rb->Height - 1; + brw_set_surface_tiling(&surf, region->tiling); + surf.ss3.pitch = (region->pitch * region->cpp) - 1; if (intel->gen < 6) { /* _NEW_COLOR */ - surf.ss0.color_blend = key.color_blend; - surf.ss0.writedisable_red = !key.color_mask[0]; - surf.ss0.writedisable_green = !key.color_mask[1]; - surf.ss0.writedisable_blue = !key.color_mask[2]; - surf.ss0.writedisable_alpha = !key.color_mask[3]; + surf.ss0.color_blend = (!ctx->Color._LogicOpEnabled && + (ctx->Color.BlendEnabled & (1 << unit))); + surf.ss0.writedisable_red = !ctx->Color.ColorMask[unit][0]; + surf.ss0.writedisable_green = !ctx->Color.ColorMask[unit][1]; + surf.ss0.writedisable_blue = !ctx->Color.ColorMask[unit][2]; + /* As mentioned above, disable writes to the alpha component when the + * renderbuffer is XRGB. + */ + if (ctx->DrawBuffer->Visual.alphaBits == 0) + surf.ss0.writedisable_alpha = 1; + else + surf.ss0.writedisable_alpha = !ctx->Color.ColorMask[unit][3]; } map = brw_state_batch(brw, sizeof(surf), 32, @@ -567,15 +503,13 @@ brw_update_renderbuffer_surface(struct brw_context *brw, &brw->wm.surf_offset[unit]); memcpy(map, &surf, sizeof(surf)); - if (region_bo != NULL) { - drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit], - brw->wm.surf_offset[unit] + - offsetof(struct brw_surface_state, ss1), - region_bo, - surf.ss1.base_addr - region_bo->offset, - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER); - } + drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit], + brw->wm.surf_offset[unit] + + offsetof(struct brw_surface_state, ss1), + region->buffer, + surf.ss1.base_addr - region->buffer->offset, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER); } static void @@ -635,12 +569,16 @@ upload_wm_surfaces(struct brw_context *brw) /* Update surfaces for drawing buffers */ if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) { for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { - brw_update_renderbuffer_surface(brw, - ctx->DrawBuffer->_ColorDrawBuffers[i], - i); + if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) { + brw_update_renderbuffer_surface(brw, + ctx->DrawBuffer->_ColorDrawBuffers[i], + i); + } else { + brw_update_null_renderbuffer_surface(brw, i); + } } } else { - brw_update_renderbuffer_surface(brw, NULL, 0); + brw_update_null_renderbuffer_surface(brw, 0); } /* Update surfaces for textures */ diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c index 800a255521..dbcdc5b869 100644 --- a/src/mesa/drivers/dri/i965/gen6_cc.c +++ b/src/mesa/drivers/dri/i965/gen6_cc.c @@ -35,6 +35,7 @@ struct gen6_blend_state_key { GLboolean color_blend, alpha_enabled; GLboolean dither; + GLboolean color_mask[BRW_MAX_DRAW_BUFFERS][4]; GLenum logic_op; @@ -54,6 +55,9 @@ blend_state_populate_key(struct brw_context *brw, memset(key, 0, sizeof(*key)); /* _NEW_COLOR */ + memcpy(key->color_mask, ctx->Color.ColorMask, sizeof(key->color_mask)); + + /* _NEW_COLOR */ if (ctx->Color._LogicOpEnabled) key->logic_op = ctx->Color.LogicOp; else @@ -87,54 +91,62 @@ static drm_intel_bo * blend_state_create_from_key(struct brw_context *brw, struct gen6_blend_state_key *key) { - struct gen6_blend_state blend; + struct gen6_blend_state blend[BRW_MAX_DRAW_BUFFERS]; drm_intel_bo *bo; + int b; memset(&blend, 0, sizeof(blend)); - if (key->logic_op != GL_COPY) { - blend.blend1.logic_op_enable = 1; - blend.blend1.logic_op_func = intel_translate_logic_op(key->logic_op); - } else if (key->color_blend) { - GLenum eqRGB = key->blend_eq_rgb; - GLenum eqA = key->blend_eq_a; - GLenum srcRGB = key->blend_src_rgb; - GLenum dstRGB = key->blend_dst_rgb; - GLenum srcA = key->blend_src_a; - GLenum dstA = key->blend_dst_a; - - if (eqRGB == GL_MIN || eqRGB == GL_MAX) { - srcRGB = dstRGB = GL_ONE; - } - - if (eqA == GL_MIN || eqA == GL_MAX) { - srcA = dstA = GL_ONE; + for (b = 0; b < BRW_MAX_DRAW_BUFFERS; b++) { + if (key->logic_op != GL_COPY) { + blend[b].blend1.logic_op_enable = 1; + blend[b].blend1.logic_op_func = intel_translate_logic_op(key->logic_op); + } else if (key->color_blend & (1 << b)) { + GLenum eqRGB = key->blend_eq_rgb; + GLenum eqA = key->blend_eq_a; + GLenum srcRGB = key->blend_src_rgb; + GLenum dstRGB = key->blend_dst_rgb; + GLenum srcA = key->blend_src_a; + GLenum dstA = key->blend_dst_a; + + if (eqRGB == GL_MIN || eqRGB == GL_MAX) { + srcRGB = dstRGB = GL_ONE; + } + + if (eqA == GL_MIN || eqA == GL_MAX) { + srcA = dstA = GL_ONE; + } + + blend[b].blend0.dest_blend_factor = brw_translate_blend_factor(dstRGB); + blend[b].blend0.source_blend_factor = brw_translate_blend_factor(srcRGB); + blend[b].blend0.blend_func = brw_translate_blend_equation(eqRGB); + + blend[b].blend0.ia_dest_blend_factor = brw_translate_blend_factor(dstA); + blend[b].blend0.ia_source_blend_factor = brw_translate_blend_factor(srcA); + blend[b].blend0.ia_blend_func = brw_translate_blend_equation(eqA); + + blend[b].blend0.blend_enable = 1; + blend[b].blend0.ia_blend_enable = (srcA != srcRGB || + dstA != dstRGB || + eqA != eqRGB); } - blend.blend0.dest_blend_factor = brw_translate_blend_factor(dstRGB); - blend.blend0.source_blend_factor = brw_translate_blend_factor(srcRGB); - blend.blend0.blend_func = brw_translate_blend_equation(eqRGB); - - blend.blend0.ia_dest_blend_factor = brw_translate_blend_factor(dstA); - blend.blend0.ia_source_blend_factor = brw_translate_blend_factor(srcA); - blend.blend0.ia_blend_func = brw_translate_blend_equation(eqA); + if (key->alpha_enabled) { + blend[b].blend1.alpha_test_enable = 1; + blend[b].blend1.alpha_test_func = intel_translate_compare_func(key->alpha_func); - blend.blend0.blend_enable = 1; - blend.blend0.ia_blend_enable = (srcA != srcRGB || - dstA != dstRGB || - eqA != eqRGB); - } - - if (key->alpha_enabled) { - blend.blend1.alpha_test_enable = 1; - blend.blend1.alpha_test_func = intel_translate_compare_func(key->alpha_func); + } - } + if (key->dither) { + blend[b].blend1.dither_enable = 1; + blend[b].blend1.y_dither_offset = 0; + blend[b].blend1.x_dither_offset = 0; + } - if (key->dither) { - blend.blend1.dither_enable = 1; - blend.blend1.y_dither_offset = 0; - blend.blend1.x_dither_offset = 0; + blend[b].blend1.write_disable_r = !key->color_mask[b][0]; + blend[b].blend1.write_disable_g = !key->color_mask[b][1]; + blend[b].blend1.write_disable_b = !key->color_mask[b][2]; + blend[b].blend1.write_disable_a = !key->color_mask[b][3]; } bo = brw_upload_cache(&brw->cache, BRW_BLEND_STATE, @@ -172,7 +184,7 @@ const struct brw_tracked_state gen6_blend_state = { }; struct gen6_color_calc_state_key { - GLubyte blend_constant_color[4]; + float blend_constant_color[4]; GLclampf alpha_ref; GLubyte stencil_ref[2]; }; @@ -266,7 +278,7 @@ static void upload_cc_state_pointers(struct brw_context *brw) struct intel_context *intel = &brw->intel; BEGIN_BATCH(4); - OUT_BATCH(CMD_3D_CC_STATE_POINTERS << 16 | (4 - 2)); + OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2)); OUT_RELOC(brw->cc.blend_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); OUT_RELOC(brw->cc.depth_stencil_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index c65b41e2b6..38c98f30ef 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -43,7 +43,10 @@ upload_clip_state(struct brw_context *brw) depth_clamp = GEN6_CLIP_Z_TEST; if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) { - provoking = 0; + provoking = + (0 << GEN6_CLIP_TRI_PROVOKE_SHIFT) | + (1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) | + (0 << GEN6_CLIP_LINE_PROVOKE_SHIFT); } else { provoking = (2 << GEN6_CLIP_TRI_PROVOKE_SHIFT) | @@ -55,7 +58,7 @@ upload_clip_state(struct brw_context *brw) userclip = (1 << brw_count_bits(ctx->Transform.ClipPlanesEnabled)) - 1; BEGIN_BATCH(4); - OUT_BATCH(CMD_3D_CLIP_STATE << 16 | (4 - 2)); + OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); OUT_BATCH(GEN6_CLIP_STATISTICS_ENABLE); OUT_BATCH(GEN6_CLIP_ENABLE | GEN6_CLIP_API_OGL | @@ -64,7 +67,9 @@ upload_clip_state(struct brw_context *brw) userclip << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT | depth_clamp | provoking); - OUT_BATCH(GEN6_CLIP_FORCE_ZERO_RTAINDEX); + OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT | + U_FIXED(225.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT | + GEN6_CLIP_FORCE_ZERO_RTAINDEX); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c index 6127b9197a..7296c7cd1b 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c @@ -37,7 +37,7 @@ upload_gs_state(struct brw_context *brw) /* Disable all the constant buffers. */ BEGIN_BATCH(5); - OUT_BATCH(CMD_3D_CONSTANT_GS_STATE << 16 | (5 - 2)); + OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (5 - 2)); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); @@ -46,7 +46,7 @@ upload_gs_state(struct brw_context *brw) if (brw->gs.prog_bo) { BEGIN_BATCH(7); - OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2)); + OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); OUT_RELOC(brw->gs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); OUT_BATCH(GEN6_GS_SPF_MODE | (0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | @@ -62,7 +62,7 @@ upload_gs_state(struct brw_context *brw) ADVANCE_BATCH(); } else { BEGIN_BATCH(7); - OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2)); + OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); OUT_BATCH(0); /* prog_bo */ OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); diff --git a/src/mesa/drivers/dri/i965/gen6_sampler_state.c b/src/mesa/drivers/dri/i965/gen6_sampler_state.c index fc5d391c3c..f65c651bdf 100644 --- a/src/mesa/drivers/dri/i965/gen6_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sampler_state.c @@ -36,7 +36,7 @@ upload_sampler_state_pointers(struct brw_context *brw) struct intel_context *intel = &brw->intel; BEGIN_BATCH(4); - OUT_BATCH(CMD_3D_SAMPLER_STATE_POINTERS << 16 | + OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS << 16 | VS_SAMPLER_STATE_CHANGE | GS_SAMPLER_STATE_CHANGE | PS_SAMPLER_STATE_CHANGE | diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c index b57126c793..12b65826ae 100644 --- a/src/mesa/drivers/dri/i965/gen6_scissor_state.c +++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c @@ -92,7 +92,7 @@ static void upload_scissor_state_pointers(struct brw_context *brw) struct intel_context *intel = &brw->intel; BEGIN_BATCH(2); - OUT_BATCH(CMD_3D_SCISSOR_STATE_POINTERS << 16 | (2 - 2)); + OUT_BATCH(_3DSTATE_SCISSOR_STATE_POINTERS << 16 | (2 - 2)); OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 471067e8f0..f27782935d 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -33,9 +33,10 @@ #include "intel_batchbuffer.h" static uint32_t -get_attr_override(struct brw_context *brw, int fs_attr) +get_attr_override(struct brw_context *brw, int fs_attr, int two_side_color) { int attr_index = 0, i, vs_attr; + int bfc = 0; if (fs_attr <= FRAG_ATTRIB_TEX7) vs_attr = fs_attr; @@ -57,6 +58,30 @@ get_attr_override(struct brw_context *brw, int fs_attr) attr_index++; } + assert(attr_index < 32); + + if (two_side_color) { + if ((brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL1)) && + (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC1))) { + assert(brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)); + assert(brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0)); + bfc = 2; + } else if ((brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)) && + (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0))) + bfc = 1; + } + + if (bfc && (fs_attr <= FRAG_ATTRIB_TEX7 && fs_attr > FRAG_ATTRIB_WPOS)) { + if (fs_attr == FRAG_ATTRIB_COL0) + attr_index |= (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT); + else if (fs_attr == FRAG_ATTRIB_COL1 && bfc == 2) { + attr_index++; + attr_index |= (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT); + } else { + attr_index += bfc; + } + } + return attr_index; } @@ -67,13 +92,15 @@ upload_sf_state(struct brw_context *brw) struct gl_context *ctx = &intel->ctx; /* CACHE_NEW_VS_PROG */ uint32_t num_inputs = brw_count_bits(brw->vs.prog_data->outputs_written); + /* BRW_NEW_FRAGMENT_PROGRAM */ uint32_t num_outputs = brw_count_bits(brw->fragment_program->Base.InputsRead); - uint32_t dw1, dw2, dw3, dw4, dw16; + uint32_t dw1, dw2, dw3, dw4, dw16, dw17; int i; /* _NEW_BUFFER */ GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; int attr = 0; int urb_start; + int two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); /* _NEW_TRANSFORM */ if (ctx->Transform.ClipPlanesEnabled) @@ -91,6 +118,7 @@ upload_sf_state(struct brw_context *brw) dw3 = 0; dw4 = 0; dw16 = 0; + dw17 = 0; /* _NEW_POLYGON */ if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo) @@ -99,6 +127,48 @@ upload_sf_state(struct brw_context *brw) if (ctx->Polygon.OffsetFill) dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID; + if (ctx->Polygon.OffsetLine) + dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME; + + if (ctx->Polygon.OffsetPoint) + dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT; + + switch (ctx->Polygon.FrontMode) { + case GL_FILL: + dw2 |= GEN6_SF_FRONT_SOLID; + break; + + case GL_LINE: + dw2 |= GEN6_SF_FRONT_WIREFRAME; + break; + + case GL_POINT: + dw2 |= GEN6_SF_FRONT_POINT; + break; + + default: + assert(0); + break; + } + + switch (ctx->Polygon.BackMode) { + case GL_FILL: + dw2 |= GEN6_SF_BACK_SOLID; + break; + + case GL_LINE: + dw2 |= GEN6_SF_BACK_WIREFRAME; + break; + + case GL_POINT: + dw2 |= GEN6_SF_BACK_POINT; + break; + + default: + assert(0); + break; + } + /* _NEW_SCISSOR */ if (ctx->Scissor.Enabled) dw3 |= GEN6_SF_SCISSOR_ENABLE; @@ -160,8 +230,14 @@ upload_sf_state(struct brw_context *brw) } } + /* flat shading */ + if (ctx->Light.ShadeModel == GL_FLAT) { + dw17 |= ((brw->fragment_program->Base.InputsRead & (FRAG_BIT_COL0 | FRAG_BIT_COL1)) >> + ((brw->fragment_program->Base.InputsRead & FRAG_BIT_WPOS) ? 0 : 1)); + } + BEGIN_BATCH(20); - OUT_BATCH(CMD_3D_SF_STATE << 16 | (20 - 2)); + OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2)); OUT_BATCH(dw1); OUT_BATCH(dw2); OUT_BATCH(dw3); @@ -174,7 +250,7 @@ upload_sf_state(struct brw_context *brw) for (; attr < 64; attr++) { if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { - attr_overrides |= get_attr_override(brw, attr); + attr_overrides |= get_attr_override(brw, attr, two_side_color); attr++; break; } @@ -182,7 +258,7 @@ upload_sf_state(struct brw_context *brw) for (; attr < 64; attr++) { if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { - attr_overrides |= get_attr_override(brw, attr) << 16; + attr_overrides |= get_attr_override(brw, attr, two_side_color) << 16; attr++; break; } @@ -190,7 +266,7 @@ upload_sf_state(struct brw_context *brw) OUT_BATCH(attr_overrides); } OUT_BATCH(dw16); /* point sprite texcoord bitmask */ - OUT_BATCH(0); /* constant interp bitmask */ + OUT_BATCH(dw17); /* constant interp bitmask */ OUT_BATCH(0); /* wrapshortest enables 0-7 */ OUT_BATCH(0); /* wrapshortest enables 8-15 */ ADVANCE_BATCH(); @@ -205,7 +281,8 @@ const struct brw_tracked_state gen6_sf_state = { _NEW_BUFFERS | _NEW_POINT | _NEW_TRANSFORM), - .brw = BRW_NEW_CONTEXT, + .brw = (BRW_NEW_CONTEXT | + BRW_NEW_FRAGMENT_PROGRAM), .cache = CACHE_NEW_VS_PROG }, .emit = upload_sf_state, diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c index a34123478f..fc46c4cb79 100644 --- a/src/mesa/drivers/dri/i965/gen6_urb.c +++ b/src/mesa/drivers/dri/i965/gen6_urb.c @@ -60,7 +60,7 @@ upload_urb(struct brw_context *brw) assert(!brw->gs.prog_bo || brw->urb.vs_size < 5); BEGIN_BATCH(3); - OUT_BATCH(CMD_URB << 16 | (3 - 2)); + OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2)); OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) | ((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT)); OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) | @@ -72,7 +72,7 @@ const struct brw_tracked_state gen6_urb = { .dirty = { .mesa = 0, .brw = BRW_NEW_CONTEXT, - .cache = CACHE_NEW_VS_PROG, + .cache = (CACHE_NEW_VS_PROG | CACHE_NEW_GS_PROG), }, .prepare = prepare_urb, .emit = upload_urb, diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c index d691bbebc8..cd7d209e3e 100644 --- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c @@ -117,7 +117,7 @@ static void upload_viewport_state_pointers(struct brw_context *brw) struct intel_context *intel = &brw->intel; BEGIN_BATCH(4); - OUT_BATCH(CMD_VIEWPORT_STATE_POINTERS << 16 | (4 - 2) | + OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS << 16 | (4 - 2) | GEN6_CC_VIEWPORT_MODIFY | GEN6_SF_VIEWPORT_MODIFY | GEN6_CLIP_VIEWPORT_MODIFY); diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index e94d0c0ddb..e68c0ac261 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -47,14 +47,14 @@ upload_vs_state(struct brw_context *brw) if (brw->vs.prog_data->nr_params == 0 && !ctx->Transform.ClipPlanesEnabled) { /* Disable the push constant buffers. */ BEGIN_BATCH(5); - OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | (5 - 2)); + OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2)); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); } else { - int params_uploaded = 0; + int params_uploaded = 0, param_regs; float *param; if (brw->vertex_program->IsNVProgram) @@ -88,20 +88,11 @@ upload_vs_state(struct brw_context *brw) params_uploaded++; } - if (vp->use_const_buffer) { - for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { - if (brw->vs.constant_map[i] != -1) { - memcpy(param + brw->vs.constant_map[i] * 4, - vp->program.Base.Parameters->ParameterValues[i], - 4 * sizeof(float)); - params_uploaded++; - } - } - } else { - for (i = 0; i < nr_params; i++) { - memcpy(param, vp->program.Base.Parameters->ParameterValues[i], + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + memcpy(param + brw->vs.constant_map[i] * 4, + vp->program.Base.Parameters->ParameterValues[i], 4 * sizeof(float)); - param += 4; params_uploaded++; } } @@ -117,13 +108,16 @@ upload_vs_state(struct brw_context *brw) drm_intel_gem_bo_unmap_gtt(constant_bo); + param_regs = (params_uploaded + 1) / 2; + assert(param_regs <= 32); + BEGIN_BATCH(5); - OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | + OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | GEN6_CONSTANT_BUFFER_0_ENABLE | (5 - 2)); OUT_RELOC(constant_bo, I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ - ALIGN(params_uploaded, 2) / 2 - 1); + param_regs - 1); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); @@ -133,9 +127,10 @@ upload_vs_state(struct brw_context *brw) } BEGIN_BATCH(6); - OUT_BATCH(CMD_3D_VS_STATE << 16 | (6 - 2)); + OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | + GEN6_VS_FLOATING_POINT_MODE_ALT | (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); OUT_BATCH(0); /* scratch space base offset */ OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index ea5418bacf..78901ecac5 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -66,6 +66,21 @@ prepare_wm_constants(struct brw_context *brw) constants[i] = convert_param(brw->wm.prog_data->param_convert[i], *brw->wm.prog_data->param[i]); } + + if (0) { + printf("WM constants:\n"); + for (i = 0; i < brw->wm.prog_data->nr_params; i++) { + if ((i & 7) == 0) + printf("g%d: ", brw->wm.prog_data->first_curbe_grf + i / 8); + printf("%8f ", constants[i]); + if ((i & 7) == 7) + printf("\n"); + } + if ((i & 7) != 0) + printf("\n"); + printf("\n"); + } + drm_intel_gem_bo_unmap_gtt(brw->wm.push_const_bo); } } @@ -88,10 +103,11 @@ upload_wm_state(struct brw_context *brw) brw_fragment_program_const(brw->fragment_program); uint32_t dw2, dw4, dw5, dw6; + /* CACHE_NEW_WM_PROG */ if (brw->wm.prog_data->nr_params == 0) { /* Disable the push constant buffers. */ BEGIN_BATCH(5); - OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 | (5 - 2)); + OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (5 - 2)); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); @@ -99,12 +115,13 @@ upload_wm_state(struct brw_context *brw) ADVANCE_BATCH(); } else { BEGIN_BATCH(5); - OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 | + OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | GEN6_CONSTANT_BUFFER_0_ENABLE | (5 - 2)); OUT_RELOC(brw->wm.push_const_bo, I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ - ALIGN(brw->wm.prog_data->nr_params, 8) / 8 - 1); + ALIGN(brw->wm.prog_data->nr_params, + brw->wm.prog_data->dispatch_width) / 8 - 1); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); @@ -116,6 +133,9 @@ upload_wm_state(struct brw_context *brw) dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0; dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5; + /* OpenGL non-ieee floating point mode */ + dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT; + /* BRW_NEW_NR_WM_SURFACES */ dw2 |= brw->wm.nr_surfaces << GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT; @@ -126,8 +146,8 @@ upload_wm_state(struct brw_context *brw) dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT; - /* BRW_NEW_FRAGMENT_PROGRAM */ - if (fp->isGLSL) + /* CACHE_NEW_WM_PROG */ + if (brw->wm.prog_data->dispatch_width == 8) dw5 |= GEN6_WM_8_DISPATCH_ENABLE; else dw5 |= GEN6_WM_16_DISPATCH_ENABLE; @@ -161,7 +181,7 @@ upload_wm_state(struct brw_context *brw) GEN6_WM_NUM_SF_OUTPUTS_SHIFT; BEGIN_BATCH(9); - OUT_BATCH(CMD_3D_WM_STATE << 16 | (9 - 2)); + OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); OUT_BATCH(dw2); OUT_BATCH(0); /* scratch space base offset */ @@ -176,13 +196,14 @@ upload_wm_state(struct brw_context *brw) const struct brw_tracked_state gen6_wm_state = { .dirty = { .mesa = (_NEW_LINE | _NEW_POLYGONSTIPPLE | _NEW_COLOR | _NEW_BUFFERS | - _NEW_PROGRAM_CONSTANTS), + _NEW_PROGRAM_CONSTANTS | _NEW_POLYGON), .brw = (BRW_NEW_CURBE_OFFSETS | BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_NR_WM_SURFACES | BRW_NEW_URB_FENCE | BRW_NEW_BATCH), - .cache = CACHE_NEW_SAMPLER + .cache = (CACHE_NEW_SAMPLER | + CACHE_NEW_WM_PROG) }, .emit = upload_wm_state, }; diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 4b498f8c5b..67ce8a4da0 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -92,9 +92,17 @@ do_flush_locked(struct intel_batchbuffer *batch, GLuint used) batch->ptr = NULL; - if (!intel->no_hw) { - drm_intel_bo_exec(batch->buf, used, NULL, 0, - (x_off & 0xffff) | (y_off << 16)); + if (!intel->intelScreen->no_hw) { + int ring; + + if (intel->gen < 6 || !intel->batch->is_blit) { + ring = I915_EXEC_RENDER; + } else { + ring = I915_EXEC_BLT; + } + + drm_intel_bo_mrb_exec(batch->buf, used, NULL, 0, + (x_off & 0xffff) | (y_off << 16), ring); } if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) { @@ -242,10 +250,10 @@ intel_batchbuffer_emit_reloc_fenced(struct intel_batchbuffer *batch, void intel_batchbuffer_data(struct intel_batchbuffer *batch, - const void *data, GLuint bytes) + const void *data, GLuint bytes, bool is_blit) { assert((bytes & 3) == 0); - intel_batchbuffer_require_space(batch, bytes); + intel_batchbuffer_require_space(batch, bytes, is_blit); __memcpy(batch->ptr, data, bytes); batch->ptr += bytes; } @@ -262,22 +270,32 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) struct intel_context *intel = batch->intel; if (intel->gen >= 6) { - BEGIN_BATCH(8); - - /* XXX workaround: issue any post sync != 0 before write cache flush = 1 */ - OUT_BATCH(_3DSTATE_PIPE_CONTROL); - OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); - OUT_BATCH(0); /* write address */ - OUT_BATCH(0); /* write data */ - - OUT_BATCH(_3DSTATE_PIPE_CONTROL); - OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH | - PIPE_CONTROL_WRITE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_NO_WRITE); - OUT_BATCH(0); /* write address */ - OUT_BATCH(0); /* write data */ - ADVANCE_BATCH(); + if (intel->batch->is_blit) { + BEGIN_BATCH_BLT(4); + OUT_BATCH(MI_FLUSH_DW); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(8); + /* XXX workaround: issue any post sync != 0 before write + * cache flush = 1 + */ + OUT_BATCH(_3DSTATE_PIPE_CONTROL); + OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); + OUT_BATCH(0); /* write address */ + OUT_BATCH(0); /* write data */ + + OUT_BATCH(_3DSTATE_PIPE_CONTROL); + OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH | + PIPE_CONTROL_WRITE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_NO_WRITE); + OUT_BATCH(0); /* write address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + } } else if (intel->gen >= 4) { BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_PIPE_CONTROL | diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index 428c027c2f..635708587a 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -31,6 +31,7 @@ struct intel_batchbuffer } emit; #endif + bool is_blit; GLuint dirty_state; GLuint reserved_space; }; @@ -55,7 +56,7 @@ void intel_batchbuffer_reset(struct intel_batchbuffer *batch); * intel_buffer_dword() calls. */ void intel_batchbuffer_data(struct intel_batchbuffer *batch, - const void *data, GLuint bytes); + const void *data, GLuint bytes, bool is_blit); void intel_batchbuffer_release_space(struct intel_batchbuffer *batch, GLuint bytes); @@ -114,8 +115,16 @@ intel_batchbuffer_emit_float(struct intel_batchbuffer *batch, float f) static INLINE void intel_batchbuffer_require_space(struct intel_batchbuffer *batch, - GLuint sz) + GLuint sz, int is_blit) { + + if (batch->intel->gen >= 6 && batch->is_blit != is_blit && + batch->ptr != batch->map) { + intel_batchbuffer_flush(batch); + } + + batch->is_blit = is_blit; + #ifdef DEBUG assert(sz < batch->size - 8); #endif @@ -124,9 +133,10 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch, } static INLINE void -intel_batchbuffer_begin(struct intel_batchbuffer *batch, int n) +intel_batchbuffer_begin(struct intel_batchbuffer *batch, int n, bool is_blit) { - intel_batchbuffer_require_space(batch, n * 4); + intel_batchbuffer_require_space(batch, n * 4, is_blit); + #ifdef DEBUG assert(batch->map); assert(batch->emit.start_ptr == NULL); @@ -154,7 +164,8 @@ intel_batchbuffer_advance(struct intel_batchbuffer *batch) */ #define BATCH_LOCALS -#define BEGIN_BATCH(n) intel_batchbuffer_begin(intel->batch, n) +#define BEGIN_BATCH(n) intel_batchbuffer_begin(intel->batch, n, false) +#define BEGIN_BATCH_BLT(n) intel_batchbuffer_begin(intel->batch, n, true) #define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) #define OUT_BATCH_F(f) intel_batchbuffer_emit_float(intel->batch,f) #define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index c2917e9b07..6232e479cb 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -38,6 +38,7 @@ #include "intel_reg.h" #include "intel_regions.h" #include "intel_batchbuffer.h" +#include "intel_mipmap_tree.h" #define FILE_DEBUG_FLAG DEBUG_BLIT @@ -107,10 +108,6 @@ intelEmitCopyBlit(struct intel_context *intel, drm_intel_bo *aper_array[3]; BATCH_LOCALS; - /* Blits are in a different ringbuffer so we don't use them. */ - if (intel->gen >= 6) - return GL_FALSE; - if (dst_tiling != I915_TILING_NONE) { if (dst_offset & 4095) return GL_FALSE; @@ -140,7 +137,7 @@ intelEmitCopyBlit(struct intel_context *intel, if (pass >= 2) return GL_FALSE; - intel_batchbuffer_require_space(intel->batch, 8 * 4); + intel_batchbuffer_require_space(intel->batch, 8 * 4, true); DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __FUNCTION__, src_buffer, src_pitch, src_offset, src_x, src_y, @@ -181,7 +178,7 @@ intelEmitCopyBlit(struct intel_context *intel, assert(dst_x < dst_x2); assert(dst_y < dst_y2); - BEGIN_BATCH(8); + BEGIN_BATCH_BLT(8); OUT_BATCH(CMD); OUT_BATCH(BR13 | (uint16_t)dst_pitch); OUT_BATCH((dst_y << 16) | dst_x); @@ -209,7 +206,7 @@ intelEmitCopyBlit(struct intel_context *intel, * which we're clearing with triangles. * \param mask bitmask of BUFFER_BIT_* values indicating buffers to clear */ -void +GLbitfield intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) { struct intel_context *intel = intel_context(ctx); @@ -217,11 +214,9 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) GLuint clear_depth; GLboolean all; GLint cx, cy, cw, ch; + GLbitfield fail_mask = 0; BATCH_LOCALS; - /* Blits are in a different ringbuffer so we don't use them. */ - assert(intel->gen < 6); - /* * Compute values for clearing the buffers. */ @@ -242,7 +237,7 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) ch = fb->_Ymax - fb->_Ymin; if (cw == 0 || ch == 0) - return; + return 0; GLuint buf; all = (cw == fb->Width && ch == fb->Height); @@ -338,9 +333,9 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) clear[3], clear[3]); break; default: - _mesa_problem(ctx, "Unexpected renderbuffer format: %d\n", - irb->Base.Format); - clear_val = 0; + fail_mask |= bufBit; + mask &= ~bufBit; + continue; } } @@ -356,7 +351,7 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) intel_batchbuffer_flush(intel->batch); } - BEGIN_BATCH(6); + BEGIN_BATCH_BLT(6); OUT_BATCH(CMD); OUT_BATCH(BR13); OUT_BATCH((y1 << 16) | x1); @@ -375,6 +370,8 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) else mask &= ~bufBit; /* turn off bit, for faster loop exit */ } + + return fail_mask; } GLboolean @@ -393,10 +390,6 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, int dwords = ALIGN(src_size, 8) / 4; uint32_t opcode, br13, blit_cmd; - /* Blits are in a different ringbuffer so we don't use them. */ - if (intel->gen >= 6) - return GL_FALSE; - if (dst_tiling != I915_TILING_NONE) { if (dst_offset & 4095) return GL_FALSE; @@ -420,7 +413,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, intel_batchbuffer_require_space( intel->batch, (8 * 4) + (3 * 4) + - dwords * 4 ); + dwords * 4, true); opcode = XY_SETUP_BLT_CMD; if (cpp == 4) @@ -439,7 +432,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, if (dst_tiling != I915_TILING_NONE) blit_cmd |= XY_DST_TILED; - BEGIN_BATCH(8 + 3); + BEGIN_BATCH_BLT(8 + 3); OUT_BATCH(opcode); OUT_BATCH(br13); OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */ @@ -456,9 +449,9 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, OUT_BATCH(((y + h) << 16) | (x + w)); ADVANCE_BATCH(); - intel_batchbuffer_data( intel->batch, - src_bits, - dwords * 4 ); + intel_batchbuffer_data(intel->batch, + src_bits, + dwords * 4, true); intel_batchbuffer_emit_mi_flush(intel->batch); @@ -480,9 +473,6 @@ intel_emit_linear_blit(struct intel_context *intel, GLuint pitch, height; GLboolean ok; - /* Blits are in a different ringbuffer so we don't use them. */ - assert(intel->gen < 6); - /* The pitch given to the GPU must be DWORD aligned, and * we want width to match pitch. Max width is (1 << 15 - 1), * rounding that down to the nearest DWORD is 1 << 15 - 4 @@ -514,3 +504,81 @@ intel_emit_linear_blit(struct intel_context *intel, assert(ok); } } + +/** + * Used to initialize the alpha value of an ARGB8888 teximage after + * loading it from an XRGB8888 source. + * + * This is very common with glCopyTexImage2D(). + */ +void +intel_set_teximage_alpha_to_one(struct gl_context *ctx, + struct intel_texture_image *intel_image) +{ + struct intel_context *intel = intel_context(ctx); + unsigned int image_x, image_y; + uint32_t x1, y1, x2, y2; + uint32_t BR13, CMD; + int pitch, cpp; + drm_intel_bo *aper_array[2]; + struct intel_region *region = intel_image->mt->region; + BATCH_LOCALS; + + assert(intel_image->base.TexFormat == MESA_FORMAT_ARGB8888); + + /* get dest x/y in destination texture */ + intel_miptree_get_image_offset(intel_image->mt, + intel_image->level, + intel_image->face, + 0, + &image_x, &image_y); + + x1 = image_x; + y1 = image_y; + x2 = image_x + intel_image->base.Width; + y2 = image_y + intel_image->base.Height; + + pitch = region->pitch; + cpp = region->cpp; + + DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n", + __FUNCTION__, + intel_image->mt->region->buffer, (pitch * region->cpp), + x1, y1, x2 - x1, y2 - y1); + + BR13 = br13_for_cpp(region->cpp) | 0xf0 << 16; + CMD = XY_COLOR_BLT_CMD; + CMD |= XY_BLT_WRITE_ALPHA; + + assert(region->tiling != I915_TILING_Y); + +#ifndef I915 + if (region->tiling != I915_TILING_NONE) { + CMD |= XY_DST_TILED; + pitch /= 4; + } +#endif + BR13 |= (pitch * region->cpp); + + /* do space check before going any further */ + aper_array[0] = intel->batch->buf; + aper_array[1] = region->buffer; + + if (drm_intel_bufmgr_check_aperture_space(aper_array, + ARRAY_SIZE(aper_array)) != 0) { + intel_batchbuffer_flush(intel->batch); + } + + BEGIN_BATCH_BLT(6); + OUT_BATCH(CMD); + OUT_BATCH(BR13); + OUT_BATCH((y1 << 16) | x1); + OUT_BATCH((y2 << 16) | x2); + OUT_RELOC_FENCED(region->buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); + OUT_BATCH(0xffffffff); /* white, but only alpha gets written */ + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} diff --git a/src/mesa/drivers/dri/intel/intel_blit.h b/src/mesa/drivers/dri/intel/intel_blit.h index 0163146573..88322c7b49 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.h +++ b/src/mesa/drivers/dri/intel/intel_blit.h @@ -33,7 +33,7 @@ extern void intelCopyBuffer(const __DRIdrawable * dpriv, const drm_clip_rect_t * rect); -extern void intelClearWithBlit(struct gl_context * ctx, GLbitfield mask); +extern GLbitfield intelClearWithBlit(struct gl_context * ctx, GLbitfield mask); GLboolean intelEmitCopyBlit(struct intel_context *intel, @@ -69,5 +69,7 @@ void intel_emit_linear_blit(struct intel_context *intel, drm_intel_bo *src_bo, unsigned int src_offset, unsigned int size); +void intel_set_teximage_alpha_to_one(struct gl_context *ctx, + struct intel_texture_image *intel_image); #endif diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index 87da60a771..d917161c4b 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -27,6 +27,7 @@ #include "main/imports.h" +#include "main/mfeatures.h" #include "main/mtypes.h" #include "main/macros.h" #include "main/bufferobj.h" diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c index fa451f0045..82d29e7671 100644 --- a/src/mesa/drivers/dri/intel/intel_clear.c +++ b/src/mesa/drivers/dri/intel/intel_clear.c @@ -85,6 +85,8 @@ intelClear(struct gl_context *ctx, GLbitfield mask) GLbitfield blit_mask = 0; GLbitfield swrast_mask = 0; struct gl_framebuffer *fb = ctx->DrawBuffer; + struct intel_renderbuffer *irb; + int i; if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) { intel->front_buffer_dirty = GL_TRUE; @@ -93,6 +95,22 @@ intelClear(struct gl_context *ctx, GLbitfield mask) if (0) fprintf(stderr, "%s\n", __FUNCTION__); + /* Get SW clears out of the way: Anything without an intel_renderbuffer */ + for (i = 0; i < BUFFER_COUNT; i++) { + if (!(mask & (1 << i))) + continue; + + irb = intel_get_renderbuffer(fb, i); + if (unlikely(!irb)) { + swrast_mask |= (1 << i); + mask &= ~(1 << i); + } + } + if (unlikely(swrast_mask)) { + debug_mask("swrast", swrast_mask); + _swrast_Clear(ctx, swrast_mask); + } + /* HW color buffers (front, back, aux, generic FBO, etc) */ if (colorMask == ~0) { /* clear all R,G,B,A */ @@ -151,44 +169,18 @@ intelClear(struct gl_context *ctx, GLbitfield mask) } } - if (intel->gen >= 6) { - /* Blits are in a different ringbuffer so we don't use them. */ - tri_mask |= blit_mask; - blit_mask = 0; - } - - /* SW fallback clearing */ - swrast_mask = mask & ~tri_mask & ~blit_mask; - - { - /* look for non-Intel renderbuffers (clear them with swrast) */ - GLbitfield blit_or_tri = blit_mask | tri_mask; - while (blit_or_tri) { - GLuint i = _mesa_ffs(blit_or_tri) - 1; - GLbitfield bufBit = 1 << i; - if (!fb->Attachment[i].Renderbuffer->ClassID) { - blit_mask &= ~bufBit; - tri_mask &= ~bufBit; - swrast_mask |= bufBit; - } - blit_or_tri ^= bufBit; - } - } + /* Anything left, just use tris */ + tri_mask |= mask & ~blit_mask; if (blit_mask) { debug_mask("blit", blit_mask); - intelClearWithBlit(ctx, blit_mask); + tri_mask |= intelClearWithBlit(ctx, blit_mask); } if (tri_mask) { debug_mask("tri", tri_mask); _mesa_meta_Clear(&intel->ctx, tri_mask); } - - if (swrast_mask) { - debug_mask("swrast", swrast_mask); - _swrast_Clear(ctx, swrast_mask); - } } diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 152cdcaf37..2a5029964b 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -519,7 +519,6 @@ static const struct dri_debug_control debug_control[] = { { "sing", DEBUG_SINGLE_THREAD }, { "thre", DEBUG_SINGLE_THREAD }, { "wm", DEBUG_WM }, - { "glsl_force", DEBUG_GLSL_FORCE }, { "urb", DEBUG_URB }, { "vs", DEBUG_VS }, { "clip", DEBUG_CLIP }, @@ -566,7 +565,8 @@ intel_glFlush(struct gl_context *ctx) intel_flush(ctx); intel_flush_front(ctx); - intel->need_throttle = GL_TRUE; + if (intel->is_front_buffer_rendering) + intel->need_throttle = GL_TRUE; } void @@ -683,6 +683,69 @@ intelInitContext(struct intel_context *intel, } } + memset(&ctx->TextureFormatSupported, 0, + sizeof(ctx->TextureFormatSupported)); + ctx->TextureFormatSupported[MESA_FORMAT_ARGB8888] = GL_TRUE; + if (intel->has_xrgb_textures) + ctx->TextureFormatSupported[MESA_FORMAT_XRGB8888] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_ARGB4444] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_ARGB1555] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_RGB565] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_L8] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_A8] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_I8] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_AL88] = GL_TRUE; + if (intel->gen >= 4) + ctx->TextureFormatSupported[MESA_FORMAT_AL1616] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_S8_Z24] = GL_TRUE; + /* + * This was disabled in initial FBO enabling to avoid combinations + * of depth+stencil that wouldn't work together. We since decided + * that it was OK, since it's up to the app to come up with the + * combo that actually works, so this can probably be re-enabled. + */ + /* + ctx->TextureFormatSupported[MESA_FORMAT_Z16] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_Z24] = GL_TRUE; + */ + + /* ctx->Extensions.MESA_ycbcr_texture */ + ctx->TextureFormatSupported[MESA_FORMAT_YCBCR] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_YCBCR_REV] = GL_TRUE; + + /* GL_3DFX_texture_compression_FXT1 */ + ctx->TextureFormatSupported[MESA_FORMAT_RGB_FXT1] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_RGBA_FXT1] = GL_TRUE; + + /* GL_EXT_texture_compression_s3tc */ + ctx->TextureFormatSupported[MESA_FORMAT_RGB_DXT1] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_RGBA_DXT1] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_RGBA_DXT3] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_RGBA_DXT5] = GL_TRUE; + +#ifndef I915 + /* GL_ARB_texture_rg */ + ctx->TextureFormatSupported[MESA_FORMAT_R8] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_R16] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_RG88] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_RG1616] = GL_TRUE; + + ctx->TextureFormatSupported[MESA_FORMAT_DUDV8] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_SIGNED_RGBA8888_REV] = GL_TRUE; + + /* GL_EXT_texture_sRGB */ + ctx->TextureFormatSupported[MESA_FORMAT_SARGB8] = GL_TRUE; + if (intel->gen >= 5 || intel->is_g4x) + ctx->TextureFormatSupported[MESA_FORMAT_SRGB_DXT1] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_SRGBA_DXT1] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_SRGBA_DXT3] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_SRGBA_DXT5] = GL_TRUE; + if (intel->has_luminance_srgb) { + ctx->TextureFormatSupported[MESA_FORMAT_SL8] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_SLA8] = GL_TRUE; + } +#endif + driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache, sPriv->myNum, (intel->gen >= 4) ? "i965" : "i915"); if (intelScreen->deviceID == PCI_CHIP_I865_G) @@ -800,11 +863,6 @@ intelInitContext(struct intel_context *intel, if (INTEL_DEBUG & DEBUG_BUFMGR) dri_bufmgr_set_debug(intel->bufmgr, GL_TRUE); - /* XXX force SIMD8 kernel for Sandybridge before we fixed - SIMD16 interpolation. */ - if (intel->gen == 6) - INTEL_DEBUG |= DEBUG_GLSL_FORCE; - intel->batch = intel_batchbuffer_alloc(intel); intel_fbo_init(intel); @@ -838,11 +896,6 @@ intelInitContext(struct intel_context *intel, intel->always_flush_cache = 1; } - /* Disable all hardware rendering (skip emitting batches and fences/waits - * to the kernel) - */ - intel->no_hw = getenv("INTEL_NO_HW") != NULL; - return GL_TRUE; } diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 9d5139c000..fd3c3ba58f 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -29,7 +29,7 @@ #define INTELCONTEXT_INC - +#include <stdbool.h> #include "main/mtypes.h" #include "main/mm.h" #include "dri_metaops.h" @@ -149,6 +149,7 @@ struct intel_context void (*assert_not_dirty) (struct intel_context *intel); void (*debug_batch)(struct intel_context *intel); + bool (*render_target_supported)(gl_format format); } vtbl; struct dri_metaops meta; @@ -207,7 +208,6 @@ struct intel_context GLboolean hw_stipple; GLboolean depth_buffer_is_float; GLboolean no_rast; - GLboolean no_hw; GLboolean always_flush_batch; GLboolean always_flush_cache; @@ -362,7 +362,6 @@ extern int INTEL_DEBUG; #define DEBUG_WM 0x800000 #define DEBUG_URB 0x1000000 #define DEBUG_VS 0x2000000 -#define DEBUG_GLSL_FORCE 0x4000000 #define DEBUG_CLIP 0x8000000 #define DBG(...) do { \ diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c index 556a4195bd..fab533f39f 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions.c +++ b/src/mesa/drivers/dri/intel/intel_extensions.c @@ -25,12 +25,15 @@ * **************************************************************************/ +#include "main/mfeatures.h" + #include "intel_chipset.h" #include "intel_context.h" #include "intel_extensions.h" #include "utils.h" +#define need_GL_ARB_ES2_compatibility #define need_GL_ARB_draw_elements_base_vertex #define need_GL_ARB_framebuffer_object #define need_GL_ARB_map_buffer_range @@ -78,8 +81,10 @@ * i965_dri. */ static const struct dri_extension card_extensions[] = { + { "GL_ARB_ES2_compatibility", GL_ARB_ES2_compatibility_functions }, { "GL_ARB_draw_elements_base_vertex", GL_ARB_draw_elements_base_vertex_functions }, { "GL_ARB_explicit_attrib_location", NULL }, + { "GL_ARB_framebuffer_object", GL_ARB_framebuffer_object_functions}, { "GL_ARB_half_float_pixel", NULL }, { "GL_ARB_map_buffer_range", GL_ARB_map_buffer_range_functions }, { "GL_ARB_multitexture", NULL }, @@ -161,7 +166,6 @@ static const struct dri_extension brw_extensions[] = { { "GL_ARB_fragment_program", NULL }, { "GL_ARB_fragment_program_shadow", NULL }, { "GL_ARB_fragment_shader", NULL }, - { "GL_ARB_framebuffer_object", GL_ARB_framebuffer_object_functions}, { "GL_ARB_half_float_vertex", NULL }, { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions }, { "GL_ARB_point_sprite", NULL }, diff --git a/src/mesa/drivers/dri/intel/intel_extensions_es2.c b/src/mesa/drivers/dri/intel/intel_extensions_es2.c index 71c86339c7..5ef6b0561d 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions_es2.c +++ b/src/mesa/drivers/dri/intel/intel_extensions_es2.c @@ -26,6 +26,7 @@ **************************************************************************/ #include "main/extensions.h" +#include "main/mfeatures.h" #include "intel_extensions.h" @@ -62,6 +63,7 @@ static const char *es2_extensions[] = { "GL_EXT_blend_minmax", "GL_EXT_blend_subtract", "GL_EXT_stencil_wrap", + "GL_NV_blend_square", /* Optional GLES2 */ "GL_ARB_framebuffer_object", @@ -79,6 +81,17 @@ static const char *es2_extensions[] = { }; /** + * \brief Extensions to disable. + * + * These extensions must be manually disabled because they may have been + * enabled by default. + */ +static const char* es2_extensions_disabled[] = { + "GL_OES_standard_derivatives", + NULL, +}; + +/** * Initializes potential list of extensions if ctx == NULL, or actually enables * extensions for a context. */ @@ -92,4 +105,6 @@ intelInitExtensionsES2(struct gl_context *ctx) for (i = 0; es2_extensions[i]; i++) _mesa_enable_extension(ctx, es2_extensions[i]); + for (i = 0; es2_extensions_disabled[i]; i++) + _mesa_disable_extension(ctx, es2_extensions_disabled[i]); } diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 862a13d2ea..0db5a491c8 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -28,6 +28,7 @@ #include "main/imports.h" #include "main/macros.h" +#include "main/mfeatures.h" #include "main/mtypes.h" #include "main/fbobject.h" #include "main/framebuffer.h" @@ -42,6 +43,11 @@ #include "intel_fbo.h" #include "intel_mipmap_tree.h" #include "intel_regions.h" +#include "intel_tex.h" +#include "intel_span.h" +#ifndef I915 +#include "brw_context.h" +#endif #define FILE_DEBUG_FLAG DEBUG_FBO @@ -107,79 +113,27 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer ASSERT(rb->Name != 0); switch (internalFormat) { - case GL_RED: - case GL_R8: - rb->Format = MESA_FORMAT_R8; - rb->DataType = GL_UNSIGNED_BYTE; - break; - case GL_R16: - rb->Format = MESA_FORMAT_R16; - rb->DataType = GL_UNSIGNED_SHORT; - break; - case GL_RG: - case GL_RG8: - rb->Format = MESA_FORMAT_RG88; - rb->DataType = GL_UNSIGNED_BYTE; - break; - case GL_RG16: - rb->Format = MESA_FORMAT_RG1616; - rb->DataType = GL_UNSIGNED_SHORT; - break; - case GL_R3_G3_B2: - case GL_RGB4: - case GL_RGB5: - rb->Format = MESA_FORMAT_RGB565; - rb->DataType = GL_UNSIGNED_BYTE; - break; - case GL_RGB: - case GL_RGB8: - case GL_RGB10: - case GL_RGB12: - case GL_RGB16: - rb->Format = MESA_FORMAT_XRGB8888; - rb->DataType = GL_UNSIGNED_BYTE; - break; - case GL_RGBA: - case GL_RGBA2: - case GL_RGBA4: - case GL_RGB5_A1: - case GL_RGBA8: - case GL_RGB10_A2: - case GL_RGBA12: - case GL_RGBA16: - rb->Format = MESA_FORMAT_ARGB8888; - rb->DataType = GL_UNSIGNED_BYTE; - break; - case GL_ALPHA: - case GL_ALPHA8: - rb->Format = MESA_FORMAT_A8; - rb->DataType = GL_UNSIGNED_BYTE; - break; - case GL_DEPTH_COMPONENT16: - rb->Format = MESA_FORMAT_Z16; - rb->DataType = GL_UNSIGNED_SHORT; + default: + /* Use the same format-choice logic as for textures. + * Renderbuffers aren't any different from textures for us, + * except they're less useful because you can't texture with + * them. + */ + rb->Format = intel->ctx.Driver.ChooseTextureFormat(ctx, internalFormat, + GL_NONE, GL_NONE); break; case GL_STENCIL_INDEX: case GL_STENCIL_INDEX1_EXT: case GL_STENCIL_INDEX4_EXT: case GL_STENCIL_INDEX8_EXT: case GL_STENCIL_INDEX16_EXT: - case GL_DEPTH_COMPONENT: - case GL_DEPTH_COMPONENT24: - case GL_DEPTH_COMPONENT32: - case GL_DEPTH_STENCIL_EXT: - case GL_DEPTH24_STENCIL8_EXT: - /* alloc a depth+stencil buffer */ + /* These aren't actual texture formats, so force them here. */ rb->Format = MESA_FORMAT_S8_Z24; - rb->DataType = GL_UNSIGNED_INT_24_8_EXT; break; - default: - _mesa_problem(ctx, - "Unexpected format in intel_alloc_renderbuffer_storage"); - return GL_FALSE; } rb->_BaseFormat = _mesa_base_fbo_format(ctx, internalFormat); + rb->DataType = intel_mesa_format_to_rb_datatype(rb->Format); cpp = _mesa_get_format_bytes(rb->Format); intel_flush(ctx); @@ -195,10 +149,15 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer DBG("Allocating %d x %d Intel RBO\n", width, height); tiling = I915_TILING_NONE; + if (intel->use_texture_tiling) { + GLenum base_format = _mesa_get_format_base_format(rb->Format); - /* Gen6 requires depth must be tiling */ - if (intel->gen >= 6 && rb->Format == MESA_FORMAT_S8_Z24) - tiling = I915_TILING_Y; + if (intel->gen >= 4 && (base_format == GL_DEPTH_COMPONENT || + base_format == GL_DEPTH_STENCIL)) + tiling = I915_TILING_Y; + else + tiling = I915_TILING_X; + } irb->region = intel_region_alloc(intel->intelScreen, tiling, cpp, width, height, GL_TRUE); @@ -334,53 +293,10 @@ intel_create_renderbuffer(gl_format format) _mesa_init_renderbuffer(&irb->Base, 0); irb->Base.ClassID = INTEL_RB_CLASS; - - switch (format) { - case MESA_FORMAT_RGB565: - irb->Base._BaseFormat = GL_RGB; - irb->Base.DataType = GL_UNSIGNED_BYTE; - break; - case MESA_FORMAT_XRGB8888: - irb->Base._BaseFormat = GL_RGB; - irb->Base.DataType = GL_UNSIGNED_BYTE; - break; - case MESA_FORMAT_ARGB8888: - irb->Base._BaseFormat = GL_RGBA; - irb->Base.DataType = GL_UNSIGNED_BYTE; - break; - case MESA_FORMAT_Z16: - irb->Base._BaseFormat = GL_DEPTH_COMPONENT; - irb->Base.DataType = GL_UNSIGNED_SHORT; - break; - case MESA_FORMAT_X8_Z24: - irb->Base._BaseFormat = GL_DEPTH_COMPONENT; - irb->Base.DataType = GL_UNSIGNED_INT; - break; - case MESA_FORMAT_S8_Z24: - irb->Base._BaseFormat = GL_DEPTH_STENCIL; - irb->Base.DataType = GL_UNSIGNED_INT_24_8_EXT; - break; - case MESA_FORMAT_A8: - irb->Base._BaseFormat = GL_ALPHA; - irb->Base.DataType = GL_UNSIGNED_BYTE; - break; - case MESA_FORMAT_R8: - irb->Base._BaseFormat = GL_RED; - irb->Base.DataType = GL_UNSIGNED_BYTE; - break; - case MESA_FORMAT_RG88: - irb->Base._BaseFormat = GL_RG; - irb->Base.DataType = GL_UNSIGNED_BYTE; - break; - default: - _mesa_problem(NULL, - "Unexpected intFormat in intel_create_renderbuffer"); - free(irb); - return NULL; - } - + irb->Base._BaseFormat = _mesa_get_format_base_format(format); irb->Base.Format = format; irb->Base.InternalFormat = irb->Base._BaseFormat; + irb->Base.DataType = intel_mesa_format_to_rb_datatype(format); /* intel-specific methods */ irb->Base.Delete = intel_delete_renderbuffer; @@ -457,66 +373,16 @@ static GLboolean intel_update_wrapper(struct gl_context *ctx, struct intel_renderbuffer *irb, struct gl_texture_image *texImage) { - if (texImage->TexFormat == MESA_FORMAT_ARGB8888) { - irb->Base.DataType = GL_UNSIGNED_BYTE; - DBG("Render to RGBA8 texture OK\n"); - } - else if (texImage->TexFormat == MESA_FORMAT_XRGB8888) { - irb->Base.DataType = GL_UNSIGNED_BYTE; - DBG("Render to XGBA8 texture OK\n"); - } - else if (texImage->TexFormat == MESA_FORMAT_SARGB8) { - irb->Base.DataType = GL_UNSIGNED_BYTE; - DBG("Render to SARGB8 texture OK\n"); - } - else if (texImage->TexFormat == MESA_FORMAT_RGB565) { - irb->Base.DataType = GL_UNSIGNED_BYTE; - DBG("Render to RGB5 texture OK\n"); - } - else if (texImage->TexFormat == MESA_FORMAT_ARGB1555) { - irb->Base.DataType = GL_UNSIGNED_BYTE; - DBG("Render to ARGB1555 texture OK\n"); - } - else if (texImage->TexFormat == MESA_FORMAT_ARGB4444) { - irb->Base.DataType = GL_UNSIGNED_BYTE; - DBG("Render to ARGB4444 texture OK\n"); - } - else if (texImage->TexFormat == MESA_FORMAT_A8) { - irb->Base.DataType = GL_UNSIGNED_BYTE; - DBG("Render to A8 texture OK\n"); - } - else if (texImage->TexFormat == MESA_FORMAT_R8) { - irb->Base.DataType = GL_UNSIGNED_BYTE; - DBG("Render to R8 texture OK\n"); - } - else if (texImage->TexFormat == MESA_FORMAT_RG88) { - irb->Base.DataType = GL_UNSIGNED_BYTE; - DBG("Render to RG88 texture OK\n"); - } - else if (texImage->TexFormat == MESA_FORMAT_R16) { - irb->Base.DataType = GL_UNSIGNED_SHORT; - DBG("Render to R8 texture OK\n"); - } - else if (texImage->TexFormat == MESA_FORMAT_RG1616) { - irb->Base.DataType = GL_UNSIGNED_SHORT; - DBG("Render to RG88 texture OK\n"); - } - else if (texImage->TexFormat == MESA_FORMAT_Z16) { - irb->Base.DataType = GL_UNSIGNED_SHORT; - DBG("Render to DEPTH16 texture OK\n"); - } - else if (texImage->TexFormat == MESA_FORMAT_S8_Z24) { - irb->Base.DataType = GL_UNSIGNED_INT_24_8_EXT; - DBG("Render to DEPTH_STENCIL texture OK\n"); - } - else { + if (!intel_span_supports_format(texImage->TexFormat)) { DBG("Render to texture BAD FORMAT %s\n", _mesa_get_format_name(texImage->TexFormat)); return GL_FALSE; + } else { + DBG("Render to texture %s\n", _mesa_get_format_name(texImage->TexFormat)); } irb->Base.Format = texImage->TexFormat; - + irb->Base.DataType = intel_mesa_format_to_rb_datatype(texImage->TexFormat); irb->Base.InternalFormat = texImage->InternalFormat; irb->Base._BaseFormat = _mesa_base_fbo_format(ctx, irb->Base.InternalFormat); irb->Base.Width = texImage->Width; @@ -558,6 +424,24 @@ intel_wrap_texture(struct gl_context * ctx, struct gl_texture_image *texImage) return irb; } +static void +intel_set_draw_offset_for_image(struct intel_texture_image *intel_image, + int zoffset) +{ + struct intel_mipmap_tree *mt = intel_image->mt; + unsigned int dst_x, dst_y; + + /* compute offset of the particular 2D image within the texture region */ + intel_miptree_get_image_offset(intel_image->mt, + intel_image->level, + intel_image->face, + zoffset, + &dst_x, &dst_y); + + mt->region->draw_offset = (dst_y * mt->region->pitch + dst_x) * mt->cpp; + mt->region->draw_x = dst_x; + mt->region->draw_y = dst_y; +} /** * Called by glFramebufferTexture[123]DEXT() (and other places) to @@ -574,7 +458,6 @@ intel_render_texture(struct gl_context * ctx, = att->Texture->Image[att->CubeMapFace][att->TextureLevel]; struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer); struct intel_texture_image *intel_image; - GLuint dst_x, dst_y; (void) fb; @@ -620,19 +503,53 @@ intel_render_texture(struct gl_context * ctx, intel_region_reference(&irb->region, intel_image->mt->region); } - /* compute offset of the particular 2D image within the texture region */ - intel_miptree_get_image_offset(intel_image->mt, - att->TextureLevel, - att->CubeMapFace, - att->Zoffset, - &dst_x, &dst_y); - - intel_image->mt->region->draw_offset = (dst_y * intel_image->mt->region->pitch + - dst_x) * intel_image->mt->cpp; - intel_image->mt->region->draw_x = dst_x; - intel_image->mt->region->draw_y = dst_y; + intel_set_draw_offset_for_image(intel_image, att->Zoffset); intel_image->used_as_render_target = GL_TRUE; +#ifndef I915 + if (!brw_context(ctx)->has_surface_tile_offset && + (intel_image->mt->region->draw_offset & 4095) != 0) { + /* Original gen4 hardware couldn't draw to a non-tile-aligned + * destination in a miptree unless you actually setup your + * renderbuffer as a miptree and used the fragile + * lod/array_index/etc. controls to select the image. So, + * instead, we just make a new single-level miptree and render + * into that. + */ + struct intel_context *intel = intel_context(ctx); + struct intel_mipmap_tree *old_mt = intel_image->mt; + struct intel_mipmap_tree *new_mt; + int comp_byte = 0, texel_bytes; + + if (_mesa_is_format_compressed(intel_image->base.TexFormat)) + comp_byte = intel_compressed_num_bytes(intel_image->base.TexFormat); + + texel_bytes = _mesa_get_format_bytes(intel_image->base.TexFormat); + + new_mt = intel_miptree_create(intel, newImage->TexObject->Target, + intel_image->base._BaseFormat, + intel_image->base.InternalFormat, + intel_image->level, + intel_image->level, + intel_image->base.Width, + intel_image->base.Height, + intel_image->base.Depth, + texel_bytes, comp_byte, GL_TRUE); + + intel_miptree_image_copy(intel, + new_mt, + intel_image->face, + intel_image->level, + old_mt); + + intel_miptree_release(intel, &intel_image->mt); + intel_image->mt = new_mt; + intel_set_draw_offset_for_image(intel_image, att->Zoffset); + + intel_region_release(&irb->region); + intel_region_reference(&irb->region, intel_image->mt->region); + } +#endif /* update drawing region, etc */ intel_draw_buffer(ctx, fb); } @@ -655,7 +572,8 @@ intel_finish_render_texture(struct gl_context * ctx, _glthread_GetID(), att->Texture->Name); /* Flag that this image may now be validated into the object's miptree. */ - intel_image->used_as_render_target = GL_FALSE; + if (intel_image) + intel_image->used_as_render_target = GL_FALSE; /* Since we've (probably) rendered to the texture and will (likely) use * it in the texture domain later on in this batchbuffer, flush the @@ -671,6 +589,7 @@ intel_finish_render_texture(struct gl_context * ctx, static void intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) { + struct intel_context *intel = intel_context(ctx); const struct intel_renderbuffer *depthRb = intel_get_renderbuffer(fb, BUFFER_DEPTH); const struct intel_renderbuffer *stencilRb = @@ -678,10 +597,10 @@ intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) int i; if (depthRb && stencilRb && stencilRb != depthRb) { - if (ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Type == GL_TEXTURE && - ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Type == GL_TEXTURE && - (ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Texture->Name == - ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Texture->Name)) { + if (fb->Attachment[BUFFER_DEPTH].Type == GL_TEXTURE && + fb->Attachment[BUFFER_STENCIL].Type == GL_TEXTURE && + (fb->Attachment[BUFFER_DEPTH].Texture->Name == + fb->Attachment[BUFFER_STENCIL].Texture->Name)) { /* OK */ } else { /* we only support combined depth/stencil buffers, not separate @@ -694,33 +613,35 @@ intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) } } - for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) { - struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; - struct intel_renderbuffer *irb = intel_renderbuffer(rb); + for (i = 0; i < Elements(fb->Attachment); i++) { + struct gl_renderbuffer *rb; + struct intel_renderbuffer *irb; + + if (fb->Attachment[i].Type == GL_NONE) + continue; - if (rb == NULL) + /* A supported attachment will have a Renderbuffer set either + * from being a Renderbuffer or being a texture that got the + * intel_wrap_texture() treatment. + */ + rb = fb->Attachment[i].Renderbuffer; + if (rb == NULL) { + DBG("attachment without renderbuffer\n"); + fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; continue; + } + irb = intel_renderbuffer(rb); if (irb == NULL) { DBG("software rendering renderbuffer\n"); fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; continue; } - switch (irb->Base.Format) { - case MESA_FORMAT_ARGB8888: - case MESA_FORMAT_XRGB8888: - case MESA_FORMAT_SARGB8: - case MESA_FORMAT_RGB565: - case MESA_FORMAT_ARGB1555: - case MESA_FORMAT_ARGB4444: - case MESA_FORMAT_A8: - case MESA_FORMAT_R8: - case MESA_FORMAT_R16: - case MESA_FORMAT_RG88: - case MESA_FORMAT_RG1616: - break; - default: + if (!intel_span_supports_format(irb->Base.Format) || + !intel->vtbl.render_target_supported(irb->Base.Format)) { + DBG("Unsupported texture/renderbuffer format attached: %s\n", + _mesa_get_format_name(irb->Base.Format)); fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; } } diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index 9c4e5c5ee8..a3409274fb 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -157,8 +157,6 @@ struct intel_mipmap_tree * intel_miptree_create_for_region(struct intel_context *intel, GLenum target, GLenum internal_format, - GLuint first_level, - GLuint last_level, struct intel_region *region, GLuint depth0, GLuint compress_byte) @@ -166,7 +164,7 @@ intel_miptree_create_for_region(struct intel_context *intel, struct intel_mipmap_tree *mt; mt = intel_miptree_create_internal(intel, target, internal_format, - first_level, last_level, + 0, 0, region->width, region->height, 1, region->cpp, compress_byte, I915_TILING_NONE); diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h index 21db2f4d3b..760a8bce60 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h @@ -137,8 +137,6 @@ struct intel_mipmap_tree * intel_miptree_create_for_region(struct intel_context *intel, GLenum target, GLenum internal_format, - GLuint first_level, - GLuint last_level, struct intel_region *region, GLuint depth0, GLuint compress_byte); diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h index 955b100b21..5258699d3f 100644 --- a/src/mesa/drivers/dri/intel/intel_reg.h +++ b/src/mesa/drivers/dri/intel/intel_reg.h @@ -37,6 +37,8 @@ #define FLUSH_MAP_CACHE (1 << 0) #define INHIBIT_FLUSH_RENDER_CACHE (1 << 2) +#define MI_FLUSH_DW (CMD_MI | (0x26 << 23) | 2) + /* Stalls command execution waiting for the given events to have occurred. */ #define MI_WAIT_FOR_EVENT (CMD_MI | (0x3 << 23)) #define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 061f0d278d..5d14bcd34c 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -31,23 +31,11 @@ #include "main/renderbuffer.h" #include "main/hash.h" #include "main/fbobject.h" +#include "main/mfeatures.h" #include "utils.h" #include "xmlpool.h" -#include "intel_batchbuffer.h" -#include "intel_buffers.h" -#include "intel_bufmgr.h" -#include "intel_chipset.h" -#include "intel_fbo.h" -#include "intel_screen.h" -#include "intel_tex.h" -#include "intel_regions.h" - -#include "i915_drm.h" - -#define DRI_CONF_TEXTURE_TILING(def) \ - PUBLIC const char __driConfigOptions[] = DRI_CONF_BEGIN DRI_CONF_SECTION_PERFORMANCE @@ -92,6 +80,17 @@ DRI_CONF_END; const GLuint __driNConfigOptions = 11; +#include "intel_batchbuffer.h" +#include "intel_buffers.h" +#include "intel_bufmgr.h" +#include "intel_chipset.h" +#include "intel_fbo.h" +#include "intel_screen.h" +#include "intel_tex.h" +#include "intel_regions.h" + +#include "i915_drm.h" + #ifdef USE_NEW_INTERFACE static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; #endif /*USE_NEW_INTERFACE */ @@ -452,7 +451,7 @@ intelCreateContext(gl_api api, return brwCreateContext(api, mesaVis, driContextPriv, sharedContextPrivate); #endif - fprintf(stderr, "Unrecognized deviceID %x\n", intelScreen->deviceID); + fprintf(stderr, "Unrecognized deviceID 0x%x\n", intelScreen->deviceID); return GL_FALSE; } @@ -462,7 +461,8 @@ intel_init_bufmgr(struct intel_screen *intelScreen) __DRIscreen *spriv = intelScreen->driScrnPriv; int num_fences = 0; - intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL; + intelScreen->no_hw = (getenv("INTEL_NO_HW") != NULL || + getenv("INTEL_DEVID_OVERRIDE") != NULL); intelScreen->bufmgr = intel_bufmgr_gem_init(spriv->fd, BATCH_SZ); if (intelScreen->bufmgr == NULL) { @@ -497,6 +497,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) GLenum fb_format[3]; GLenum fb_type[3]; unsigned int api_mask; + char *devid_override; static const GLenum back_buffer_modes[] = { GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML @@ -523,6 +524,16 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) &intelScreen->deviceID)) return GL_FALSE; + /* Allow an override of the device ID for the purpose of making the + * driver produce dumps for debugging of new chipset enablement. + * This implies INTEL_NO_HW, to avoid programming your actual GPU + * incorrectly. + */ + devid_override = getenv("INTEL_DEVID_OVERRIDE"); + if (devid_override) { + intelScreen->deviceID = strtod(devid_override, NULL); + } + api_mask = (1 << __DRI_API_OPENGL); #if FEATURE_ES1 api_mask |= (1 << __DRI_API_GLES); diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 104cadf0f9..1f41518535 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -25,6 +25,7 @@ * **************************************************************************/ +#include <stdbool.h> #include "main/glheader.h" #include "main/macros.h" #include "main/mtypes.h" @@ -113,6 +114,26 @@ intel_set_span_functions(struct intel_context *intel, #define TAG2(x,y) intel_##x##y##_A8 #include "spantmp2.h" +#define SPANTMP_MESA_FMT MESA_FORMAT_R8 +#define TAG(x) intel_##x##_R8 +#define TAG2(x,y) intel_##x##y##_R8 +#include "spantmp2.h" + +#define SPANTMP_MESA_FMT MESA_FORMAT_RG88 +#define TAG(x) intel_##x##_RG88 +#define TAG2(x,y) intel_##x##y##_RG88 +#include "spantmp2.h" + +#define SPANTMP_MESA_FMT MESA_FORMAT_R16 +#define TAG(x) intel_##x##_R16 +#define TAG2(x,y) intel_##x##y##_R16 +#include "spantmp2.h" + +#define SPANTMP_MESA_FMT MESA_FORMAT_RG1616 +#define TAG(x) intel_##x##_RG1616 +#define TAG2(x,y) intel_##x##y##_RG1616 +#include "spantmp2.h" + #define LOCAL_DEPTH_VARS \ struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ const GLint yScale = rb->Name ? 1 : -1; \ @@ -339,6 +360,32 @@ intel_unmap_vertex_shader_textures(struct gl_context *ctx) } } +typedef void (*span_init_func)(struct gl_renderbuffer *rb); + +static span_init_func intel_span_init_funcs[MESA_FORMAT_COUNT] = +{ + [MESA_FORMAT_A8] = intel_InitPointers_A8, + [MESA_FORMAT_RGB565] = intel_InitPointers_RGB565, + [MESA_FORMAT_ARGB4444] = intel_InitPointers_ARGB4444, + [MESA_FORMAT_ARGB1555] = intel_InitPointers_ARGB1555, + [MESA_FORMAT_XRGB8888] = intel_InitPointers_xRGB8888, + [MESA_FORMAT_ARGB8888] = intel_InitPointers_ARGB8888, + [MESA_FORMAT_SARGB8] = intel_InitPointers_ARGB8888, + [MESA_FORMAT_Z16] = intel_InitDepthPointers_z16, + [MESA_FORMAT_X8_Z24] = intel_InitDepthPointers_z24_s8, + [MESA_FORMAT_S8_Z24] = intel_InitDepthPointers_z24_s8, + [MESA_FORMAT_R8] = intel_InitPointers_R8, + [MESA_FORMAT_RG88] = intel_InitPointers_RG88, + [MESA_FORMAT_R16] = intel_InitPointers_R16, + [MESA_FORMAT_RG1616] = intel_InitPointers_RG1616, +}; + +bool +intel_span_supports_format(gl_format format) +{ + return intel_span_init_funcs[format] != NULL; +} + /** * Plug in appropriate span read/write functions for the given renderbuffer. * These are used for the software fallbacks. @@ -349,37 +396,6 @@ intel_set_span_functions(struct intel_context *intel, { struct intel_renderbuffer *irb = (struct intel_renderbuffer *) rb; - switch (irb->Base.Format) { - case MESA_FORMAT_A8: - intel_InitPointers_A8(rb); - break; - case MESA_FORMAT_RGB565: - intel_InitPointers_RGB565(rb); - break; - case MESA_FORMAT_ARGB4444: - intel_InitPointers_ARGB4444(rb); - break; - case MESA_FORMAT_ARGB1555: - intel_InitPointers_ARGB1555(rb); - break; - case MESA_FORMAT_XRGB8888: - intel_InitPointers_xRGB8888(rb); - break; - case MESA_FORMAT_ARGB8888: - case MESA_FORMAT_SARGB8: - intel_InitPointers_ARGB8888(rb); - break; - case MESA_FORMAT_Z16: - intel_InitDepthPointers_z16(rb); - break; - case MESA_FORMAT_X8_Z24: - case MESA_FORMAT_S8_Z24: - intel_InitDepthPointers_z24_s8(rb); - break; - default: - _mesa_problem(NULL, - "Unexpected MesaFormat %d in intelSetSpanFunctions", - irb->Base.Format); - break; - } + assert(intel_span_init_funcs[irb->Base.Format]); + intel_span_init_funcs[irb->Base.Format](rb); } diff --git a/src/mesa/drivers/dri/intel/intel_span.h b/src/mesa/drivers/dri/intel/intel_span.h index aa8d08e843..5a4c4e8e52 100644 --- a/src/mesa/drivers/dri/intel/intel_span.h +++ b/src/mesa/drivers/dri/intel/intel_span.h @@ -28,6 +28,9 @@ #ifndef _INTEL_SPAN_H #define _INTEL_SPAN_H +#include "main/formats.h" +#include <stdbool.h> + extern void intelInitSpanFuncs(struct gl_context * ctx); extern void intelSpanRenderFinish(struct gl_context * ctx); @@ -38,5 +41,6 @@ void intel_renderbuffer_unmap(struct intel_context *intel, struct gl_renderbuffer *rb); void intel_map_vertex_shader_textures(struct gl_context *ctx); void intel_unmap_vertex_shader_textures(struct gl_context *ctx); +bool intel_span_supports_format(gl_format format); #endif diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c index 2c21ea0576..2c3eab20fd 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.c +++ b/src/mesa/drivers/dri/intel/intel_tex.c @@ -113,7 +113,6 @@ intelGenerateMipmap(struct gl_context *ctx, GLenum target, void intelInitTextureFuncs(struct dd_function_table *functions) { - functions->ChooseTextureFormat = intelChooseTextureFormat; functions->GenerateMipmap = intelGenerateMipmap; functions->NewTextureObject = intelNewTextureObject; diff --git a/src/mesa/drivers/dri/intel/intel_tex.h b/src/mesa/drivers/dri/intel/intel_tex.h index 7906554e45..6552ed0d33 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.h +++ b/src/mesa/drivers/dri/intel/intel_tex.h @@ -40,8 +40,7 @@ void intelInitTextureSubImageFuncs(struct dd_function_table *functions); void intelInitTextureCopyImageFuncs(struct dd_function_table *functions); -gl_format intelChooseTextureFormat(struct gl_context *ctx, GLint internalFormat, - GLenum format, GLenum type); +GLenum intel_mesa_format_to_rb_datatype(gl_format format); void intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *pDraw); diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 87b31bf078..a40011ab40 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -35,7 +35,6 @@ #include "intel_screen.h" #include "intel_context.h" -#include "intel_buffers.h" #include "intel_mipmap_tree.h" #include "intel_regions.h" #include "intel_fbo.h" @@ -50,44 +49,20 @@ * Do the best we can using the blitter. A future project is to use * the texture engine and fragment programs for these copies. */ -static const struct intel_region * -get_teximage_source(struct intel_context *intel, GLenum internalFormat) +static struct intel_renderbuffer * +get_teximage_readbuffer(struct intel_context *intel, GLenum internalFormat) { - struct intel_renderbuffer *irb; - DBG("%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(internalFormat)); switch (internalFormat) { case GL_DEPTH_COMPONENT: case GL_DEPTH_COMPONENT16: - irb = intel_get_renderbuffer(intel->ctx.ReadBuffer, BUFFER_DEPTH); - if (irb && irb->region && irb->region->cpp == 2) - return irb->region; - return NULL; case GL_DEPTH24_STENCIL8_EXT: case GL_DEPTH_STENCIL_EXT: - irb = intel_get_renderbuffer(intel->ctx.ReadBuffer, BUFFER_DEPTH); - if (irb && irb->region && irb->region->cpp == 4) - return irb->region; - return NULL; - case 4: - case GL_RGBA: - case GL_RGBA8: - irb = intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer); - /* We're required to set alpha to 1.0 in this case, but we can't - * do that with the blitter, so fall back. We could use the 3D - * engine or do two passes with the blitter, but it doesn't seem - * worth it for this case. */ - if (irb->Base._BaseFormat == GL_RGB) - return NULL; - return irb->region; - case 3: - case GL_RGB: - case GL_RGB8: - return intel_readbuf_region(intel); + return intel_get_renderbuffer(intel->ctx.ReadBuffer, BUFFER_DEPTH); default: - return NULL; + return intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer); } } @@ -101,23 +76,34 @@ do_copy_texsubimage(struct intel_context *intel, GLint x, GLint y, GLsizei width, GLsizei height) { struct gl_context *ctx = &intel->ctx; - const struct intel_region *src = get_teximage_source(intel, internalFormat); + struct intel_renderbuffer *irb; + bool copy_supported_with_alpha_override = false; + + intel_prepare_render(intel); - if (!intelImage->mt || !src || !src->buffer) { + irb = get_teximage_readbuffer(intel, internalFormat); + if (!intelImage->mt || !irb || !irb->region) { if (unlikely(INTEL_DEBUG & DEBUG_FALLBACKS)) fprintf(stderr, "%s fail %p %p (0x%08x)\n", - __FUNCTION__, intelImage->mt, src, internalFormat); + __FUNCTION__, intelImage->mt, irb, internalFormat); return GL_FALSE; } - if (intelImage->mt->cpp != src->cpp) { - fallback_debug("%s fail %d vs %d cpp\n", - __FUNCTION__, intelImage->mt->cpp, src->cpp); + if (irb->Base.Format == MESA_FORMAT_XRGB8888 && + intelImage->base.TexFormat == MESA_FORMAT_ARGB8888) { + copy_supported_with_alpha_override = true; + } + + if (intelImage->base.TexFormat != irb->Base.Format && + !copy_supported_with_alpha_override) { + if (unlikely(INTEL_DEBUG & DEBUG_FALLBACKS)) + fprintf(stderr, "%s mismatched formats %s, %s\n", + __FUNCTION__, + _mesa_get_format_name(intelImage->base.TexFormat), + _mesa_get_format_name(irb->Base.Format)); return GL_FALSE; } - /* intel_flush(ctx); */ - intel_prepare_render(intel); { drm_intel_bo *dst_bo = intel_region_buffer(intel, intelImage->mt->region, @@ -140,24 +126,24 @@ do_copy_texsubimage(struct intel_context *intel, if (ctx->ReadBuffer->Name == 0) { /* Flip vertical orientation for system framebuffers */ y = ctx->ReadBuffer->Height - (y + height); - src_pitch = -src->pitch; + src_pitch = -irb->region->pitch; } else { /* reading from a FBO, y is already oriented the way we like */ - src_pitch = src->pitch; + src_pitch = irb->region->pitch; } /* blit from src buffer to texture */ if (!intelEmitCopyBlit(intel, intelImage->mt->cpp, src_pitch, - src->buffer, + irb->region->buffer, 0, - src->tiling, + irb->region->tiling, intelImage->mt->region->pitch, dst_bo, 0, intelImage->mt->region->tiling, - src->draw_x + x, src->draw_y + y, + irb->region->draw_x + x, irb->region->draw_y + y, image_x + dstx, image_y + dsty, width, height, GL_COPY)) { @@ -165,6 +151,9 @@ do_copy_texsubimage(struct intel_context *intel, } } + if (copy_supported_with_alpha_override) + intel_set_teximage_alpha_to_one(ctx, intelImage); + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/intel/intel_tex_format.c b/src/mesa/drivers/dri/intel/intel_tex_format.c index 9d73a2fb37..87745bc66d 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_format.c +++ b/src/mesa/drivers/dri/intel/intel_tex_format.c @@ -4,224 +4,35 @@ #include "main/formats.h" /** - * Choose hardware texture format given the user's glTexImage parameters. - * - * It works out that this function is fine for all the supported - * hardware. However, there is still a need to map the formats onto - * hardware descriptors. - * - * Note that the i915 can actually support many more formats than - * these if we take the step of simply swizzling the colors - * immediately after sampling... + * Returns the renderbuffer DataType for a MESA_FORMAT. */ -gl_format -intelChooseTextureFormat(struct gl_context * ctx, GLint internalFormat, - GLenum format, GLenum type) +GLenum +intel_mesa_format_to_rb_datatype(gl_format format) { - struct intel_context *intel = intel_context(ctx); - -#if 0 - printf("%s intFmt=0x%x format=0x%x type=0x%x\n", - __FUNCTION__, internalFormat, format, type); -#endif - - switch (internalFormat) { - case 4: - case GL_RGBA: - case GL_COMPRESSED_RGBA: - if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV) - return MESA_FORMAT_ARGB4444; - else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV) - return MESA_FORMAT_ARGB1555; - else - return MESA_FORMAT_ARGB8888; - - case 3: - case GL_RGB: - case GL_COMPRESSED_RGB: - if (type == GL_UNSIGNED_SHORT_5_6_5) - return MESA_FORMAT_RGB565; - else if (intel->has_xrgb_textures) - return MESA_FORMAT_XRGB8888; - else - return MESA_FORMAT_ARGB8888; - - case GL_RGBA8: - case GL_RGB10_A2: - case GL_RGBA12: - case GL_RGBA16: - return MESA_FORMAT_ARGB8888; - - case GL_RGBA4: - case GL_RGBA2: - return MESA_FORMAT_ARGB4444; - - case GL_RGB5_A1: - return MESA_FORMAT_ARGB1555; - - case GL_RGB8: - case GL_RGB10: - case GL_RGB12: - case GL_RGB16: - if (intel->has_xrgb_textures) - return MESA_FORMAT_XRGB8888; - else - return MESA_FORMAT_ARGB8888; - - case GL_RGB5: - case GL_RGB4: - case GL_R3_G3_B2: - return MESA_FORMAT_RGB565; - - case GL_ALPHA: - case GL_ALPHA4: - case GL_ALPHA8: - case GL_ALPHA12: - case GL_ALPHA16: - case GL_COMPRESSED_ALPHA: - return MESA_FORMAT_A8; - - case 1: - case GL_LUMINANCE: - case GL_LUMINANCE4: - case GL_LUMINANCE8: - case GL_LUMINANCE12: - case GL_LUMINANCE16: - case GL_COMPRESSED_LUMINANCE: - return MESA_FORMAT_L8; - - case GL_LUMINANCE12_ALPHA4: - case GL_LUMINANCE12_ALPHA12: - case GL_LUMINANCE16_ALPHA16: - /* i915 could implement this mode using MT_32BIT_RG1616. However, this - * would require an extra swizzle instruction in the fragment shader to - * convert the { R, G, 1.0, 1.0 } to { R, R, R, G }. - */ -#ifndef I915 - return MESA_FORMAT_AL1616; -#else - /* FALLTHROUGH */ -#endif - - case 2: - case GL_LUMINANCE_ALPHA: - case GL_LUMINANCE4_ALPHA4: - case GL_LUMINANCE6_ALPHA2: - case GL_LUMINANCE8_ALPHA8: - case GL_COMPRESSED_LUMINANCE_ALPHA: - return MESA_FORMAT_AL88; - - case GL_INTENSITY: - case GL_INTENSITY4: - case GL_INTENSITY8: - case GL_INTENSITY12: - case GL_INTENSITY16: - case GL_COMPRESSED_INTENSITY: - return MESA_FORMAT_I8; - - case GL_YCBCR_MESA: - if (type == GL_UNSIGNED_SHORT_8_8_MESA || type == GL_UNSIGNED_BYTE) - return MESA_FORMAT_YCBCR; - else - return MESA_FORMAT_YCBCR_REV; - - case GL_COMPRESSED_RGB_FXT1_3DFX: - return MESA_FORMAT_RGB_FXT1; - case GL_COMPRESSED_RGBA_FXT1_3DFX: - return MESA_FORMAT_RGBA_FXT1; - - case GL_RGB_S3TC: - case GL_RGB4_S3TC: - case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: - return MESA_FORMAT_RGB_DXT1; - - case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: - return MESA_FORMAT_RGBA_DXT1; - - case GL_RGBA_S3TC: - case GL_RGBA4_S3TC: - case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: - return MESA_FORMAT_RGBA_DXT3; - - case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: - return MESA_FORMAT_RGBA_DXT5; - - case GL_DEPTH_COMPONENT: - case GL_DEPTH_COMPONENT16: - case GL_DEPTH_COMPONENT24: - case GL_DEPTH_COMPONENT32: -#if 0 - return MESA_FORMAT_Z16; -#else - /* fall-through. - * 16bpp depth texture can't be paired with a stencil buffer so - * always used combined depth/stencil format. - */ -#endif - case GL_DEPTH_STENCIL_EXT: - case GL_DEPTH24_STENCIL8_EXT: - return MESA_FORMAT_S8_Z24; - -#ifndef I915 - case GL_SRGB_EXT: - case GL_SRGB8_EXT: - case GL_SRGB_ALPHA_EXT: - case GL_SRGB8_ALPHA8_EXT: - case GL_COMPRESSED_SRGB_EXT: - case GL_COMPRESSED_SRGB_ALPHA_EXT: - case GL_COMPRESSED_SLUMINANCE_EXT: - case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT: - return MESA_FORMAT_SARGB8; - case GL_SLUMINANCE_EXT: - case GL_SLUMINANCE8_EXT: - if (intel->has_luminance_srgb) - return MESA_FORMAT_SL8; - else - return MESA_FORMAT_SARGB8; - case GL_SLUMINANCE_ALPHA_EXT: - case GL_SLUMINANCE8_ALPHA8_EXT: - if (intel->has_luminance_srgb) - return MESA_FORMAT_SLA8; - else - return MESA_FORMAT_SARGB8; - case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT: - case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: - case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: - case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: - return MESA_FORMAT_SRGB_DXT1; - - /* i915 could also do this */ - case GL_DUDV_ATI: - case GL_DU8DV8_ATI: - return MESA_FORMAT_DUDV8; - case GL_RGBA_SNORM: - case GL_RGBA8_SNORM: - return MESA_FORMAT_SIGNED_RGBA8888_REV; - - /* i915 can do a RG16, but it can't do any of the other RED or RG formats. - * In addition, it only implements the broken D3D mode where undefined - * components are read as 1.0. I'm not sure who thought reading - * { R, G, 1.0, 1.0 } from a red-green texture would be useful. - */ - case GL_RED: - case GL_R8: - return MESA_FORMAT_R8; - case GL_R16: - return MESA_FORMAT_R16; - case GL_RG: - case GL_RG8: - return MESA_FORMAT_RG88; - case GL_RG16: - return MESA_FORMAT_RG1616; -#endif - + switch (format) { + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + case MESA_FORMAT_SARGB8: + case MESA_FORMAT_R8: + case MESA_FORMAT_RG88: + case MESA_FORMAT_A8: + case MESA_FORMAT_AL88: + case MESA_FORMAT_RGB565: + case MESA_FORMAT_ARGB1555: + case MESA_FORMAT_ARGB4444: + return GL_UNSIGNED_BYTE; + case MESA_FORMAT_R16: + case MESA_FORMAT_RG1616: + case MESA_FORMAT_Z16: + return GL_UNSIGNED_SHORT; + case MESA_FORMAT_X8_Z24: + return GL_UNSIGNED_INT; + case MESA_FORMAT_S8_Z24: + return GL_UNSIGNED_INT_24_8_EXT; default: - fprintf(stderr, "unexpected texture format %s in %s\n", - _mesa_lookup_enum_by_nr(internalFormat), __FUNCTION__); - return MESA_FORMAT_NONE; + _mesa_problem(NULL, "unexpected MESA_FORMAT for renderbuffer"); + return GL_UNSIGNED_BYTE; } - - return MESA_FORMAT_NONE; /* never get here */ } int intel_compressed_num_bytes(GLuint mesaFormat) diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 41cdbfd2cb..cd8c4c22e5 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -1,6 +1,7 @@ #include "main/glheader.h" #include "main/macros.h" +#include "main/mfeatures.h" #include "main/mtypes.h" #include "main/enums.h" #include "main/bufferobj.h" @@ -55,11 +56,11 @@ logbase2(int n) * 0)..(1x1). Consider pruning this tree at a validation if the * saving is worth it. */ -static void -guess_and_alloc_mipmap_tree(struct intel_context *intel, - struct intel_texture_object *intelObj, - struct intel_texture_image *intelImage, - GLboolean expect_accelerated_upload) +static struct intel_mipmap_tree * +intel_miptree_create_for_teximage(struct intel_context *intel, + struct intel_texture_object *intelObj, + struct intel_texture_image *intelImage, + GLboolean expect_accelerated_upload) { GLuint firstLevel; GLuint lastLevel; @@ -72,70 +73,71 @@ guess_and_alloc_mipmap_tree(struct intel_context *intel, DBG("%s\n", __FUNCTION__); if (intelImage->base.Border) - return; + return NULL; if (intelImage->level > intelObj->base.BaseLevel && (intelImage->base.Width == 1 || (intelObj->base.Target != GL_TEXTURE_1D && intelImage->base.Height == 1) || (intelObj->base.Target == GL_TEXTURE_3D && - intelImage->base.Depth == 1))) - return; - - /* If this image disrespects BaseLevel, allocate from level zero. - * Usually BaseLevel == 0, so it's unlikely to happen. - */ - if (intelImage->level < intelObj->base.BaseLevel) - firstLevel = 0; - else - firstLevel = intelObj->base.BaseLevel; - - - /* Figure out image dimensions at start level. - */ - for (i = intelImage->level; i > firstLevel; i--) { - width <<= 1; - if (height != 1) - height <<= 1; - if (depth != 1) - depth <<= 1; - } + intelImage->base.Depth == 1))) { + /* For this combination, we're at some lower mipmap level and + * some important dimension is 1. We can't extrapolate up to a + * likely base level width/height/depth for a full mipmap stack + * from this info, so just allocate this one level. + */ + firstLevel = intelImage->level; + lastLevel = intelImage->level; + } else { + /* If this image disrespects BaseLevel, allocate from level zero. + * Usually BaseLevel == 0, so it's unlikely to happen. + */ + if (intelImage->level < intelObj->base.BaseLevel) + firstLevel = 0; + else + firstLevel = intelObj->base.BaseLevel; + + /* Figure out image dimensions at start level. */ + for (i = intelImage->level; i > firstLevel; i--) { + width <<= 1; + if (height != 1) + height <<= 1; + if (depth != 1) + depth <<= 1; + } - /* Guess a reasonable value for lastLevel. This is probably going - * to be wrong fairly often and might mean that we have to look at - * resizable buffers, or require that buffers implement lazy - * pagetable arrangements. - */ - if ((intelObj->base.MinFilter == GL_NEAREST || - intelObj->base.MinFilter == GL_LINEAR) && - intelImage->level == firstLevel && - (intel->gen < 4 || firstLevel == 0)) { - lastLevel = firstLevel; - } - else { - lastLevel = firstLevel + logbase2(MAX2(MAX2(width, height), depth)); + /* Guess a reasonable value for lastLevel. This is probably going + * to be wrong fairly often and might mean that we have to look at + * resizable buffers, or require that buffers implement lazy + * pagetable arrangements. + */ + if ((intelObj->base.MinFilter == GL_NEAREST || + intelObj->base.MinFilter == GL_LINEAR) && + intelImage->level == firstLevel && + (intel->gen < 4 || firstLevel == 0)) { + lastLevel = firstLevel; + } else { + lastLevel = firstLevel + logbase2(MAX2(MAX2(width, height), depth)); + } } - assert(!intelObj->mt); if (_mesa_is_format_compressed(intelImage->base.TexFormat)) comp_byte = intel_compressed_num_bytes(intelImage->base.TexFormat); texelBytes = _mesa_get_format_bytes(intelImage->base.TexFormat); - intelObj->mt = intel_miptree_create(intel, - intelObj->base.Target, - intelImage->base._BaseFormat, - intelImage->base.InternalFormat, - firstLevel, - lastLevel, - width, - height, - depth, - texelBytes, - comp_byte, - expect_accelerated_upload); - - DBG("%s - success\n", __FUNCTION__); + return intel_miptree_create(intel, + intelObj->base.Target, + intelImage->base._BaseFormat, + intelImage->base.InternalFormat, + firstLevel, + lastLevel, + width, + height, + depth, + texelBytes, + comp_byte, + expect_accelerated_upload); } @@ -343,41 +345,29 @@ intelTexImage(struct gl_context * ctx, texImage->Data = NULL; } - if (!intelObj->mt) { - guess_and_alloc_mipmap_tree(intel, intelObj, intelImage, pixels == NULL); - if (!intelObj->mt) { - DBG("guess_and_alloc_mipmap_tree: failed\n"); - } - } - assert(!intelImage->mt); if (intelObj->mt && intel_miptree_match_image(intelObj->mt, &intelImage->base)) { - + /* Use an existing miptree when possible */ intel_miptree_reference(&intelImage->mt, intelObj->mt); assert(intelImage->mt); } else if (intelImage->base.Border == 0) { - int comp_byte = 0; - GLuint texelBytes = _mesa_get_format_bytes(intelImage->base.TexFormat); - GLenum baseFormat = _mesa_get_format_base_format(intelImage->base.TexFormat); - if (_mesa_is_format_compressed(intelImage->base.TexFormat)) { - comp_byte = - intel_compressed_num_bytes(intelImage->base.TexFormat); - } - /* Didn't fit in the object miptree, but it's suitable for inclusion in * a miptree, so create one just for our level and store it in the image. * It'll get moved into the object miptree at validate time. */ - intelImage->mt = intel_miptree_create(intel, target, - baseFormat, - internalFormat, - level, level, - width, height, depth, - texelBytes, - comp_byte, pixels == NULL); - + intelImage->mt = intel_miptree_create_for_teximage(intel, intelObj, + intelImage, + pixels == NULL); + + /* Even if the object currently has a mipmap tree associated + * with it, this one is a more likely candidate to represent the + * whole object since our level didn't fit what was there + * before, and any lower levels would fit into our miptree. + */ + if (intelImage->mt) + intel_miptree_reference(&intelObj->mt, intelImage->mt); } /* PBO fastpaths: @@ -711,8 +701,7 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, } mt = intel_miptree_create_for_region(intel, target, - internalFormat, - 0, 0, rb->region, 1, 0); + internalFormat, rb->region, 1, 0); if (mt == NULL) return; @@ -777,7 +766,7 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target, mt = intel_miptree_create_for_region(intel, target, image->internal_format, - 0, 0, image->region, 1, 0); + image->region, 1, 0); if (mt == NULL) return; diff --git a/src/mesa/drivers/dri/intel/intel_tex_obj.h b/src/mesa/drivers/dri/intel/intel_tex_obj.h index 5f60e0ea4f..e93ef4a472 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_obj.h +++ b/src/mesa/drivers/dri/intel/intel_tex_obj.h @@ -32,11 +32,11 @@ struct intel_texture_object { struct gl_texture_object base; /* The "parent" object */ - /* The mipmap tree must include at least these levels once - * validated: + /* This is a mirror of base._MaxLevel, updated at validate time, + * except that we don't bother with the non-base levels for + * non-mipmapped textures. */ - GLuint firstLevel; - GLuint lastLevel; + unsigned int _MaxLevel; /* Offset for firstLevel image: */ diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c index ed5c5d896b..8537e7f368 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_validate.c +++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c @@ -8,72 +8,21 @@ #define FILE_DEBUG_FLAG DEBUG_TEXTURE /** - * Compute which mipmap levels that really need to be sent to the hardware. - * This depends on the base image size, GL_TEXTURE_MIN_LOD, - * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL. + * When validating, we only care about the texture images that could + * be seen, so for non-mipmapped modes we want to ignore everything + * but BaseLevel. */ static void -intel_calculate_first_last_level(struct intel_context *intel, - struct intel_texture_object *intelObj) +intel_update_max_level(struct intel_context *intel, + struct intel_texture_object *intelObj) { struct gl_texture_object *tObj = &intelObj->base; - const struct gl_texture_image *const baseImage = - tObj->Image[0][tObj->BaseLevel]; - /* These must be signed values. MinLod and MaxLod can be negative numbers, - * and having firstLevel and lastLevel as signed prevents the need for - * extra sign checks. - */ - int firstLevel; - int lastLevel; - - /* Yes, this looks overly complicated, but it's all needed. - */ - switch (tObj->Target) { - case GL_TEXTURE_1D: - case GL_TEXTURE_2D: - case GL_TEXTURE_3D: - case GL_TEXTURE_CUBE_MAP: - if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) { - /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL. - */ - firstLevel = lastLevel = tObj->BaseLevel; - } - else { - if (intel->gen == 2) { - firstLevel = tObj->BaseLevel + (GLint) (tObj->MinLod + 0.5); - firstLevel = MAX2(firstLevel, tObj->BaseLevel); - firstLevel = MIN2(firstLevel, tObj->BaseLevel + baseImage->MaxLog2); - lastLevel = tObj->BaseLevel + (GLint) (tObj->MaxLod + 0.5); - lastLevel = MAX2(lastLevel, tObj->BaseLevel); - lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2); - lastLevel = MIN2(lastLevel, tObj->MaxLevel); - lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */ - } else { - /* Min/max LOD are taken into account in sampler state. We don't - * want to re-layout textures just because clamping has been applied - * since it means a bunch of blitting around and probably no memory - * savings (since we have to keep the other levels around anyway). - */ - firstLevel = tObj->BaseLevel; - lastLevel = MIN2(tObj->BaseLevel + baseImage->MaxLog2, - tObj->MaxLevel); - /* need at least one level */ - lastLevel = MAX2(firstLevel, lastLevel); - } - } - break; - case GL_TEXTURE_RECTANGLE_NV: - case GL_TEXTURE_4D_SGIS: - firstLevel = lastLevel = 0; - break; - default: - return; + if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) { + intelObj->_MaxLevel = tObj->BaseLevel; + } else { + intelObj->_MaxLevel = tObj->_MaxLevel; } - - /* save these values */ - intelObj->firstLevel = firstLevel; - intelObj->lastLevel = lastLevel; } /** @@ -135,8 +84,8 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) /* What levels must the tree include at a minimum? */ - intel_calculate_first_last_level(intel, intelObj); - firstImage = intel_texture_image(tObj->Image[0][intelObj->firstLevel]); + intel_update_max_level(intel, intelObj); + firstImage = intel_texture_image(tObj->Image[0][tObj->BaseLevel]); /* Fallback case: */ @@ -147,23 +96,6 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) return GL_FALSE; } - - /* If both firstImage and intelObj have a tree which can contain - * all active images, favour firstImage. Note that because of the - * completeness requirement, we know that the image dimensions - * will match. - */ - if (firstImage->mt && - firstImage->mt != intelObj->mt && - firstImage->mt->first_level <= intelObj->firstLevel && - firstImage->mt->last_level >= intelObj->lastLevel) { - - if (intelObj->mt) - intel_miptree_release(intel, &intelObj->mt); - - intel_miptree_reference(&intelObj->mt, firstImage->mt); - } - if (_mesa_is_format_compressed(firstImage->base.TexFormat)) { comp_byte = intel_compressed_num_bytes(firstImage->base.TexFormat); cpp = comp_byte; @@ -173,18 +105,17 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) /* Check tree can hold all active levels. Check tree matches * target, imageFormat, etc. - * - * XXX: For some layouts (eg i945?), the test might have to be - * first_level == firstLevel, as the tree isn't valid except at the - * original start level. Hope to get around this by - * programming minLod, maxLod, baseLevel into the hardware and - * leaving the tree alone. + * + * For pre-gen4, we have to match first_level == tObj->BaseLevel, + * because we don't have the control that gen4 does to make min/mag + * determination happen at a nonzero (hardware) baselevel. Because + * of that, we just always relayout on baselevel change. */ if (intelObj->mt && (intelObj->mt->target != intelObj->base.Target || intelObj->mt->internal_format != firstImage->base.InternalFormat || - intelObj->mt->first_level != intelObj->firstLevel || - intelObj->mt->last_level != intelObj->lastLevel || + intelObj->mt->first_level != tObj->BaseLevel || + intelObj->mt->last_level < intelObj->_MaxLevel || intelObj->mt->width0 != firstImage->base.Width || intelObj->mt->height0 != firstImage->base.Height || intelObj->mt->depth0 != firstImage->base.Depth || @@ -201,8 +132,8 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) intelObj->base.Target, firstImage->base._BaseFormat, firstImage->base.InternalFormat, - intelObj->firstLevel, - intelObj->lastLevel, + tObj->BaseLevel, + intelObj->_MaxLevel, firstImage->base.Width, firstImage->base.Height, firstImage->base.Depth, @@ -215,11 +146,13 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) */ nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; for (face = 0; face < nr_faces; face++) { - for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++) { + for (i = tObj->BaseLevel; i <= intelObj->_MaxLevel; i++) { struct intel_texture_image *intelImage = intel_texture_image(intelObj->base.Image[face][i]); - - /* Need to import images in main memory or held in other trees. + /* skip too small size mipmap */ + if (intelImage == NULL) + break; + /* Need to import images in main memory or held in other trees. * If it's a render target, then its data isn't needed to be in * the object tree (otherwise we'd be FBO incomplete), and we need * to keep track of the image's MT as needing to be pulled in still, @@ -289,7 +222,7 @@ intel_tex_map_images(struct intel_context *intel, DBG("%s\n", __FUNCTION__); - for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++) + for (i = intelObj->base.BaseLevel; i <= intelObj->_MaxLevel; i++) intel_tex_map_level_images(intel, intelObj, i); } @@ -299,6 +232,6 @@ intel_tex_unmap_images(struct intel_context *intel, { int i; - for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++) + for (i = intelObj->base.BaseLevel; i <= intelObj->_MaxLevel; i++) intel_tex_unmap_level_images(intel, intelObj, i); } diff --git a/src/mesa/drivers/dri/mach64/mach64_context.h b/src/mesa/drivers/dri/mach64/mach64_context.h index 11e8f53b28..70bc0ae79d 100644 --- a/src/mesa/drivers/dri/mach64/mach64_context.h +++ b/src/mesa/drivers/dri/mach64/mach64_context.h @@ -295,11 +295,11 @@ extern GLboolean mach64UnbindContext( __DRIcontext *driContextPriv ); #define LE32_OUT( x, y ) do { *(GLuint *)(x) = (y); } while (0) #define LE32_OUT_FLOAT( x, y ) do { *(GLfloat *)(x) = (y); } while (0) #else -#ifndef __OpenBSD__ -#include <byteswap.h> -#else +#if defined(__OpenBSD__) || defined(__NetBSD__) #include <machine/endian.h> #define bswap_32 bswap32 +#else +#include <byteswap.h> #endif #define LE32_IN( x ) bswap_32( *(GLuint *)(x) ) diff --git a/src/mesa/drivers/dri/nouveau/nouveau_driver.c b/src/mesa/drivers/dri/nouveau/nouveau_driver.c index 27e2892f71..45630be7f6 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_driver.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_driver.c @@ -24,6 +24,8 @@ * */ +#include "main/mfeatures.h" + #include "nouveau_driver.h" #include "nouveau_context.h" #include "nouveau_fbo.h" diff --git a/src/mesa/drivers/dri/nouveau/nouveau_driver.h b/src/mesa/drivers/dri/nouveau/nouveau_driver.h index 8036b18edc..c5ac1282d0 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_driver.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_driver.h @@ -38,7 +38,6 @@ #include <assert.h> #include "nouveau_device.h" -#include "nouveau_pushbuf.h" #include "nouveau_grobj.h" #include "nouveau_channel.h" #include "nouveau_bo.h" @@ -46,6 +45,7 @@ #include "nouveau_screen.h" #include "nouveau_state.h" #include "nouveau_surface.h" +#include "nv04_pushbuf.h" #define DRIVER_DATE "20091015" #define DRIVER_AUTHOR "Nouveau" diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c index 079b5d63e4..b36b578878 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c @@ -32,6 +32,7 @@ #include "main/framebuffer.h" #include "main/renderbuffer.h" #include "main/fbobject.h" +#include "main/mfeatures.h" static GLboolean set_renderbuffer_format(struct gl_renderbuffer *rb, GLenum internalFormat) diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c index 5abfc9dac5..9c045b73ac 100644 --- a/src/mesa/drivers/dri/r200/r200_context.c +++ b/src/mesa/drivers/dri/r200/r200_context.c @@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/simple_list.h" #include "main/imports.h" #include "main/extensions.h" +#include "main/mfeatures.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" diff --git a/src/mesa/drivers/dri/r200/r200_maos_arrays.c b/src/mesa/drivers/dri/r200/r200_maos_arrays.c index 8a047e6419..b62290231b 100644 --- a/src/mesa/drivers/dri/r200/r200_maos_arrays.c +++ b/src/mesa/drivers/dri/r200/r200_maos_arrays.c @@ -200,6 +200,7 @@ void r200EmitArrays( struct gl_context *ctx, GLubyte *vimap_rev ) } default: assert(0); + emitsize = 0; } if (!rmesa->radeon.tcl.aos[nr].bo) { rcommon_emit_vector( ctx, diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c index 064324731b..092b757583 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.c +++ b/src/mesa/drivers/dri/r200/r200_tex.c @@ -37,6 +37,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/context.h" #include "main/enums.h" #include "main/image.h" +#include "main/mfeatures.h" #include "main/simple_list.h" #include "main/texstore.h" #include "main/teximage.h" diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 7b9c316794..e0d349b98c 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -24,6 +24,7 @@ #include <stdio.h> +#include "radeon_compiler_util.h" #include "radeon_dataflow.h" #include "radeon_emulate_branches.h" #include "radeon_emulate_loops.h" @@ -54,6 +55,8 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) { struct rc_sub_instruction * inst = &rci->U.I; + unsigned i; + const struct rc_opcode_info *info = rc_get_opcode_info(inst->Opcode); if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth) continue; @@ -65,27 +68,12 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) continue; } - switch (inst->Opcode) { - case RC_OPCODE_FRC: - case RC_OPCODE_MOV: - inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]); - break; - case RC_OPCODE_ADD: - case RC_OPCODE_MAX: - case RC_OPCODE_MIN: - case RC_OPCODE_MUL: - inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]); - break; - case RC_OPCODE_CMP: - case RC_OPCODE_MAD: - inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]); - inst->SrcReg[2] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[2]); - break; - default: - // Scalar instructions needn't be reswizzled - break; + if (!info->IsComponentwise) { + continue; + } + + for (i = 0; i < info->NumSrcRegs; i++) { + inst->SrcReg[i] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[i]); } } } @@ -93,7 +81,6 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) { int is_r500 = c->Base.is_r500; - int kill_consts = c->Base.remove_unused_constants; int opt = !c->Base.disable_optimizations; /* Lists of instruction transformations. */ @@ -133,7 +120,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) {"emulate loops", 1, !is_r500, rc_emulate_loops, NULL}, {"dataflow optimize", 1, opt, rc_optimize, NULL}, {"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL}, - {"dead constants", 1, kill_consts, rc_remove_unused_constants, &c->code->constants_remap_table}, + {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, /* This pass makes it easier for the scheduler to group TEX * instructions and reduces the chances of creating too * many texture indirections.*/ @@ -150,9 +137,10 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) {NULL, 0, 0, NULL, NULL} }; + c->Base.type = RC_FRAGMENT_PROGRAM; c->Base.SwizzleCaps = c->Base.is_r500 ? &r500_swizzle_caps : &r300_swizzle_caps; - rc_run_compiler(&c->Base, fs_list, "Fragment Program"); + rc_run_compiler(&c->Base, fs_list); rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index bf8341f017..472029f63d 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -26,6 +26,7 @@ #include "../r300_reg.h" +#include "radeon_compiler_util.h" #include "radeon_dataflow.h" #include "radeon_program_alu.h" #include "radeon_swizzle.h" @@ -790,19 +791,14 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user) if (!hwtemps[j]) break; } - if (j >= c->max_temp_regs) { - rc_error(c, "Too many temporaries\n"); - return; + ta[orig].Allocated = 1; + if (last_inst_src_reladdr && + last_inst_src_reladdr->IP > inst->IP) { + ta[orig].HwTemp = orig; } else { - ta[orig].Allocated = 1; - if (last_inst_src_reladdr && - last_inst_src_reladdr->IP > inst->IP) { - ta[orig].HwTemp = orig; - } else { - ta[orig].HwTemp = j; - } - hwtemps[ta[orig].HwTemp] = 1; + ta[orig].HwTemp = j; } + hwtemps[ta[orig].HwTemp] = 1; } inst->U.I.DstReg.Index = ta[orig].HwTemp; @@ -1018,7 +1014,6 @@ static struct rc_swizzle_caps r300_vertprog_swizzle_caps = { void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) { int is_r500 = c->Base.is_r500; - int kill_consts = c->Base.remove_unused_constants; int opt = !c->Base.disable_optimizations; /* Lists of instruction transformations. */ @@ -1062,18 +1057,18 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) {"dataflow optimize", 1, opt, rc_optimize, NULL}, /* This pass must be done after optimizations. */ {"source conflict resolve", 1, 1, rc_local_transform, resolve_src_conflicts}, - {"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL}, {"register allocation", 1, opt, allocate_temporary_registers, NULL}, - {"dead constants", 1, kill_consts, rc_remove_unused_constants, &c->code->constants_remap_table}, + {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, {"final code validation", 0, 1, rc_validate_final_shader, NULL}, {"machine code generation", 0, 1, translate_vertex_program, NULL}, {"dump machine code", 0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump, NULL}, {NULL, 0, 0, NULL, NULL} }; + c->Base.type = RC_VERTEX_PROGRAM; c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; - rc_run_compiler(&c->Base, vs_list, "Vertex Program"); + rc_run_compiler(&c->Base, vs_list); c->code->InputsRead = c->Base.Program.InputsRead; c->code->OutputsWritten = c->Base.Program.OutputsWritten; diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index 289bb87ae5..ef81be48f7 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -29,6 +29,7 @@ #include <stdio.h> +#include "radeon_compiler_util.h" #include "../r300_reg.h" /** diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c index 4286baed0c..65548604bc 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -29,6 +29,7 @@ #include "radeon_dataflow.h" #include "radeon_program.h" #include "radeon_program_pair.h" +#include "radeon_compiler_util.h" void rc_init(struct radeon_compiler * c) @@ -356,66 +357,92 @@ void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) static void reg_count_callback(void * userdata, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int mask) { - unsigned int * max_reg = userdata; + int *max_reg = userdata; if (file == RC_FILE_TEMPORARY) - index > *max_reg ? *max_reg = index : 0; + (int)index > *max_reg ? *max_reg = index : 0; } -static void print_stats(struct radeon_compiler * c) +void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) { + int max_reg = -1; struct rc_instruction * tmp; - unsigned max_reg, insts, fc, tex, alpha, rgb, presub; - max_reg = insts = fc = tex = alpha = rgb = presub = 0; + memset(s, 0, sizeof(*s)); + for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions; tmp = tmp->Next){ const struct rc_opcode_info * info; rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg); if (tmp->Type == RC_INSTRUCTION_NORMAL) { if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE) - presub++; + s->num_presub_ops++; info = rc_get_opcode_info(tmp->U.I.Opcode); } else { if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) - presub++; + s->num_presub_ops++; if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) - presub++; + s->num_presub_ops++; /* Assuming alpha will never be a flow control or * a tex instruction. */ if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP) - alpha++; + s->num_alpha_insts++; if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP) - rgb++; + s->num_rgb_insts++; info = rc_get_opcode_info(tmp->U.P.RGB.Opcode); } if (info->IsFlowControl) - fc++; + s->num_fc_insts++; if (info->HasTexture) - tex++; - insts++; + s->num_tex_insts++; + s->num_insts++; } - if (insts < 4) - return; - fprintf(stderr,"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" - "~%4u Instructions\n" - "~%4u Vector Instructions (RGB)\n" - "~%4u Scalar Instructions (Alpha)\n" - "~%4u Flow Control Instructions\n" - "~%4u Texture Instructions\n" - "~%4u Presub Operations\n" - "~%4u Temporary Registers\n" - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - insts, rgb, alpha, fc, tex, presub, max_reg + 1); + s->num_temp_regs = max_reg + 1; } -/* Executes a list of compiler passes given in the parameter 'list'. */ -void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list, - const char *shader_name) +static void print_stats(struct radeon_compiler * c) { - if (c->Debug & RC_DBG_LOG) { - fprintf(stderr, "%s: before compilation\n", shader_name); - rc_print_program(&c->Program); + struct rc_program_stats s; + + rc_get_stats(c, &s); + + if (s.num_insts < 4) + return; + + switch (c->type) { + case RC_VERTEX_PROGRAM: + fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n" + "~%4u Instructions\n" + "~%4u Flow Control Instructions\n" + "~%4u Temporary Registers\n" + "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", + s.num_insts, s.num_fc_insts, s.num_temp_regs); + break; + + case RC_FRAGMENT_PROGRAM: + fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n" + "~%4u Instructions\n" + "~%4u Vector Instructions (RGB)\n" + "~%4u Scalar Instructions (Alpha)\n" + "~%4u Flow Control Instructions\n" + "~%4u Texture Instructions\n" + "~%4u Presub Operations\n" + "~%4u Temporary Registers\n" + "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", + s.num_insts, s.num_rgb_insts, s.num_alpha_insts, + s.num_fc_insts, s.num_tex_insts, s.num_presub_ops, + s.num_temp_regs); + break; + default: + assert(0); } +} +static const char *shader_name[RC_NUM_PROGRAM_TYPES] = { + "Vertex Program", + "Fragment Program" +}; + +void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list) +{ for (unsigned i = 0; list[i].name; i++) { if (list[i].predicate) { list[i].run(c, list[i].user); @@ -424,11 +451,23 @@ void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *lis return; if ((c->Debug & RC_DBG_LOG) && list[i].dump) { - fprintf(stderr, "%s: after '%s'\n", shader_name, list[i].name); + fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name); rc_print_program(&c->Program); } } } +} + +/* Executes a list of compiler passes given in the parameter 'list'. */ +void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list) +{ + if (c->Debug & RC_DBG_LOG) { + fprintf(stderr, "%s: before compilation\n", shader_name[c->type]); + rc_print_program(&c->Program); + } + + rc_run_compiler_passes(c, list); + if (c->Debug & RC_DBG_STATS) print_stats(c); } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index 31fd469a04..e663339589 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -35,9 +35,16 @@ struct rc_swizzle_caps; +enum rc_program_type { + RC_VERTEX_PROGRAM, + RC_FRAGMENT_PROGRAM, + RC_NUM_PROGRAM_TYPES +}; + struct radeon_compiler { struct memory_pool Pool; struct rc_program Program; + enum rc_program_type type; unsigned Debug:2; unsigned Error:1; char * ErrorMsg; @@ -140,9 +147,21 @@ struct radeon_compiler_pass { void *user; /* Optional parameter which is passed to the run function. */ }; +struct rc_program_stats { + unsigned num_insts; + unsigned num_fc_insts; + unsigned num_tex_insts; + unsigned num_rgb_insts; + unsigned num_alpha_insts; + unsigned num_presub_ops; + unsigned num_temp_regs; +}; + +void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s); + /* Executes a list of compiler passes given in the parameter 'list'. */ -void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list, - const char *shader_name); +void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list); +void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list); void rc_validate_final_shader(struct radeon_compiler *c, void *user); #endif /* RADEON_COMPILER_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c index 2b8d284ce9..2482fc68be 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c @@ -48,6 +48,91 @@ unsigned int rc_swizzle_to_writemask(unsigned int swz) return mask; } +rc_swizzle get_swz(unsigned int swz, rc_swizzle idx) +{ + if (idx & 0x4) + return idx; + return GET_SWZ(swz, idx); +} + +unsigned int combine_swizzles4(unsigned int src, + rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w) +{ + unsigned int ret = 0; + + ret |= get_swz(src, swz_x); + ret |= get_swz(src, swz_y) << 3; + ret |= get_swz(src, swz_z) << 6; + ret |= get_swz(src, swz_w) << 9; + + return ret; +} + +unsigned int combine_swizzles(unsigned int src, unsigned int swz) +{ + unsigned int ret = 0; + + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X)); + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9; + + return ret; +} + +/** + * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W + */ +rc_swizzle rc_mask_to_swizzle(unsigned int mask) +{ + switch (mask) { + case RC_MASK_X: return RC_SWIZZLE_X; + case RC_MASK_Y: return RC_SWIZZLE_Y; + case RC_MASK_Z: return RC_SWIZZLE_Z; + case RC_MASK_W: return RC_SWIZZLE_W; + } + return RC_SWIZZLE_UNUSED; +} + +/* Reorder mask bits according to swizzle. */ +unsigned swizzle_mask(unsigned swizzle, unsigned mask) +{ + unsigned ret = 0; + for (unsigned chan = 0; chan < 4; ++chan) { + unsigned swz = GET_SWZ(swizzle, chan); + if (swz < 4) + ret |= GET_BIT(mask, swz) << chan; + } + return ret; +} + +/** + * Left multiplication of a register with a swizzle + */ +struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg) +{ + struct rc_src_register tmp = srcreg; + int i; + tmp.Swizzle = 0; + tmp.Negate = 0; + for(i = 0; i < 4; ++i) { + rc_swizzle swz = GET_SWZ(swizzle, i); + if (swz < 4) { + tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); + tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; + } else { + tmp.Swizzle |= swz << (i*3); + } + } + return tmp; +} + +void reset_srcreg(struct rc_src_register* reg) +{ + memset(reg, 0, sizeof(struct rc_src_register)); + reg->Swizzle = RC_SWIZZLE_XYZW; +} + unsigned int rc_src_reads_dst_mask( rc_register_file src_file, unsigned int src_idx, @@ -138,7 +223,6 @@ unsigned int rc_inst_can_use_presub( { struct can_use_presub_data d; unsigned int num_presub_srcs; - unsigned int presub_src_type = rc_source_type_mask(presub_writemask); const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); @@ -168,13 +252,7 @@ unsigned int rc_inst_can_use_presub( num_presub_srcs = rc_presubtract_src_reg_count(presub_op); - if ((presub_src_type & RC_SOURCE_RGB) - && d.RGBCount + num_presub_srcs > 3) { - return 0; - } - - if ((presub_src_type & RC_SOURCE_ALPHA) - && d.AlphaCount + num_presub_srcs > 3) { + if (d.RGBCount + num_presub_srcs > 3 || d.AlphaCount + num_presub_srcs > 3) { return 0; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h index e50dfbd4fb..461ab9ffb1 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h @@ -8,6 +8,22 @@ struct rc_src_register; unsigned int rc_swizzle_to_writemask(unsigned int swz); +rc_swizzle get_swz(unsigned int swz, rc_swizzle idx); + +unsigned int combine_swizzles4(unsigned int src, + rc_swizzle swz_x, rc_swizzle swz_y, + rc_swizzle swz_z, rc_swizzle swz_w); + +unsigned int combine_swizzles(unsigned int src, unsigned int swz); + +rc_swizzle rc_mask_to_swizzle(unsigned int mask); + +unsigned swizzle_mask(unsigned swizzle, unsigned mask); + +struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); + +void reset_srcreg(struct rc_src_register* reg); + unsigned int rc_src_reads_dst_mask( rc_register_file src_file, unsigned int src_idx, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c index a0f7bd8174..133a9f72ec 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c @@ -56,6 +56,7 @@ static void rewrite_source(struct radeon_compiler * c, mov->U.I.DstReg.Index = tempreg; mov->U.I.DstReg.WriteMask = split.Phase[phase]; mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src]; + mov->U.I.PreSub = inst->U.I.PreSub; phase_refmask = 0; for(unsigned int chan = 0; chan < 4; ++chan) { diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index da495a3afa..25afd272be 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -67,6 +67,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .IsComponentwise = 1 }, { + .Opcode = RC_OPCODE_CLAMP, + .Name = "CLAMP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { .Opcode = RC_OPCODE_CMP, .Name = "CMP", .NumSrcRegs = 3, @@ -453,6 +460,7 @@ void rc_compute_sources_for_writemask( srcmasks[1] |= RC_MASK_XY; break; case RC_OPCODE_DP3: + case RC_OPCODE_XPD: srcmasks[0] |= RC_MASK_XYZ; srcmasks[1] |= RC_MASK_XYZ; break; @@ -460,6 +468,10 @@ void rc_compute_sources_for_writemask( srcmasks[0] |= RC_MASK_XYZW; srcmasks[1] |= RC_MASK_XYZW; break; + case RC_OPCODE_DPH: + srcmasks[0] |= RC_MASK_XYZ; + srcmasks[1] |= RC_MASK_XYZW; + break; case RC_OPCODE_TXB: case RC_OPCODE_TXP: srcmasks[0] |= RC_MASK_W; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h index d3f639c870..7e66610127 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -50,6 +50,9 @@ typedef enum { /** vec4 instruction: dst.c = ceil(src0.c) */ RC_OPCODE_CEIL, + /** vec4 instruction: dst.c = clamp(src0.c, src1.c, src2.c) */ + RC_OPCODE_CLAMP, + /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */ RC_OPCODE_CMP, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index 27b10ffbd6..44f4c0fbdc 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -54,12 +54,7 @@ static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct combine.Negate = outer.Negate; } else { combine.Abs = inner.Abs; - combine.Negate = 0; - for(unsigned int chan = 0; chan < 4; ++chan) { - unsigned int swz = GET_SWZ(outer.Swizzle, chan); - if (swz < 4) - combine.Negate |= GET_BIT(inner.Negate, swz) << chan; - } + combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate); combine.Negate ^= outer.Negate; } combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c index 3f880c88fa..d53181e1f7 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c @@ -217,9 +217,9 @@ static void compute_live_intervals(struct radeon_compiler *c, * instruction is used as the end of the live interval and * the BGNLOOP instruction is used as the beginning. */ if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && s->EndLoop < 0) { - s->BeginLoop = inst->IP; int loops = 1; struct rc_instruction * tmp; + s->BeginLoop = inst->IP; for(tmp = inst->Next; tmp != &s->C->Program.Instructions; tmp = tmp->Next) { diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index cbb5ef6237..9beb5d6357 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -701,7 +701,7 @@ static int convert_rgb_to_alpha( get_reg_valuep(s, RC_FILE_TEMPORARY, pair_inst->RGB.DestIndex, - rc_mask_to_swz(old_mask)); + rc_mask_to_swizzle(old_mask)); new_index = i; *new_regvalp = *old_regvalp; *old_regvalp = NULL; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c index d7bedc5729..fe5756ebc4 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c @@ -71,27 +71,6 @@ void rc_local_transform( } } -/** - * Left multiplication of a register with a swizzle - */ -struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg) -{ - struct rc_src_register tmp = srcreg; - int i; - tmp.Swizzle = 0; - tmp.Negate = 0; - for(i = 0; i < 4; ++i) { - rc_swizzle swz = GET_SWZ(swizzle, i); - if (swz < 4) { - tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); - tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; - } else { - tmp.Swizzle |= swz << (i*3); - } - } - return tmp; -} - struct get_used_temporaries_data { unsigned char * Used; unsigned int UsedLength; @@ -244,14 +223,3 @@ unsigned int rc_recompute_ips(struct radeon_compiler * c) return ip; } - -rc_swizzle rc_mask_to_swizzle(unsigned int mask) -{ - switch(mask) { - case RC_MASK_X: return RC_SWIZZLE_X; - case RC_MASK_Y: return RC_SWIZZLE_Y; - case RC_MASK_Z: return RC_SWIZZLE_Z; - case RC_MASK_W: return RC_SWIZZLE_W; - default: return RC_SWIZZLE_UNUSED; - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index be078b4f4f..df6c94b35f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -159,61 +159,6 @@ struct rc_program { struct rc_constant_list Constants; }; -static inline rc_swizzle get_swz(unsigned int swz, rc_swizzle idx) -{ - if (idx & 0x4) - return idx; - return GET_SWZ(swz, idx); -} - -static inline unsigned int combine_swizzles4(unsigned int src, - rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w) -{ - unsigned int ret = 0; - - ret |= get_swz(src, swz_x); - ret |= get_swz(src, swz_y) << 3; - ret |= get_swz(src, swz_z) << 6; - ret |= get_swz(src, swz_w) << 9; - - return ret; -} - -static inline unsigned int combine_swizzles(unsigned int src, unsigned int swz) -{ - unsigned int ret = 0; - - ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X)); - ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3; - ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6; - ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9; - - return ret; -} - -/** - * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W - */ -static inline rc_swizzle rc_mask_to_swz(unsigned int mask) -{ - switch (mask) { - case RC_MASK_X: return RC_SWIZZLE_X; - case RC_MASK_Y: return RC_SWIZZLE_Y; - case RC_MASK_Z: return RC_SWIZZLE_Z; - case RC_MASK_W: return RC_SWIZZLE_W; - default: assert(0); - } - return RC_SWIZZLE_UNUSED; -} -struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); - -static inline void reset_srcreg(struct rc_src_register* reg) -{ - memset(reg, 0, sizeof(struct rc_src_register)); - reg->Swizzle = RC_SWIZZLE_XYZW; -} - - /** * A transformation that can be passed to \ref rc_local_transform. * diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 39408845d5..c8063171b8 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -36,6 +36,7 @@ #include "radeon_program_alu.h" #include "radeon_compiler.h" +#include "radeon_compiler_util.h" static struct rc_instruction *emit1( @@ -84,16 +85,6 @@ static struct rc_instruction *emit3( return fpi; } -static struct rc_dst_register dstreg(int file, int index) -{ - struct rc_dst_register dst; - dst.File = file; - dst.Index = index; - dst.WriteMask = RC_MASK_XYZW; - dst.RelAddr = 0; - return dst; -} - static struct rc_dst_register dstregtmpmask(int index, int mask) { struct rc_dst_register dst = {0}; @@ -186,6 +177,38 @@ static struct rc_src_register swizzle_wwww(struct rc_src_register reg) return swizzle_smear(reg, RC_SWIZZLE_W); } +static int is_dst_safe_to_reuse(struct rc_instruction *inst) +{ + const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + assert(info->HasDstReg); + + if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) + return 0; + + for (i = 0; i < info->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[i].Index == inst->U.I.DstReg.Index) + return 0; + } + + return 1; +} + +static struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + unsigned tmp; + + if (is_dst_safe_to_reuse(inst)) + tmp = inst->U.I.DstReg.Index; + else + tmp = rc_find_free_temporary(c); + + return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask); +} + static void transform_ABS(struct radeon_compiler* c, struct rc_instruction* inst) { @@ -209,10 +232,26 @@ static void transform_CEIL(struct radeon_compiler* c, * ceil(x) = x+frac(-x) */ - int tempreg = rc_find_free_temporary(c); - emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0])); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0])); emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, - inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg)); + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index)); + rc_remove_instruction(inst); +} + +static void transform_CLAMP(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + /* CLAMP dst, src, min, max + * into: + * MIN tmp, src, max + * MAX dst, tmp, min + */ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst, + inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]); + emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]); rc_remove_instruction(inst); } @@ -258,10 +297,10 @@ static void transform_DST(struct radeon_compiler* c, static void transform_FLR(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); - emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0]); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]); emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, - inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, tempreg))); + inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); rc_remove_instruction(inst); } @@ -351,14 +390,14 @@ static void transform_LIT(struct radeon_compiler* c, static void transform_LRP(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_ADD, 0, - dstreg(RC_FILE_TEMPORARY, tempreg), + dst, inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2])); emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, - inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[2]); + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]); rc_remove_instruction(inst); } @@ -366,9 +405,8 @@ static void transform_LRP(struct radeon_compiler* c, static void transform_POW(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); - struct rc_dst_register tempdst = dstreg(RC_FILE_TEMPORARY, tempreg); - struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempreg); + struct rc_dst_register tempdst = try_to_reuse_dst(c, inst); + struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempdst.Index); tempdst.WriteMask = RC_MASK_W; tempsrc.Swizzle = RC_SWIZZLE_WWWW; @@ -388,11 +426,11 @@ static void transform_RSQ(struct radeon_compiler* c, static void transform_SEQ(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_zero, builtin_one); + negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one); rc_remove_instruction(inst); } @@ -407,11 +445,11 @@ static void transform_SFL(struct radeon_compiler* c, static void transform_SGE(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one); + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); rc_remove_instruction(inst); } @@ -419,11 +457,11 @@ static void transform_SGE(struct radeon_compiler* c, static void transform_SGT(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero); + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); rc_remove_instruction(inst); } @@ -431,11 +469,11 @@ static void transform_SGT(struct radeon_compiler* c, static void transform_SLE(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one); + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); rc_remove_instruction(inst); } @@ -443,11 +481,11 @@ static void transform_SLE(struct radeon_compiler* c, static void transform_SLT(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero); + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); rc_remove_instruction(inst); } @@ -455,11 +493,11 @@ static void transform_SLT(struct radeon_compiler* c, static void transform_SNE(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_one, builtin_zero); + negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero); rc_remove_instruction(inst); } @@ -473,12 +511,13 @@ static void transform_SSG(struct radeon_compiler* c, * CMP tmp1, x, 1, 0 * ADD result, tmp0, -tmp1; */ - unsigned tmp0, tmp1; + struct rc_dst_register dst0; + unsigned tmp1; /* 0 < x */ - tmp0 = rc_find_free_temporary(c); + dst0 = try_to_reuse_dst(c, inst); emit3(c, inst->Prev, RC_OPCODE_CMP, 0, - dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask), + dst0, negate(inst->U.I.SrcReg[0]), builtin_one, builtin_zero); @@ -495,7 +534,7 @@ static void transform_SSG(struct radeon_compiler* c, /* result = tmp0 - tmp1 */ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tmp0), + srcreg(RC_FILE_TEMPORARY, dst0.Index), negate(srcreg(RC_FILE_TEMPORARY, tmp1))); rc_remove_instruction(inst); @@ -517,15 +556,15 @@ static void transform_SWZ(struct radeon_compiler* c, static void transform_XPD(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstreg(RC_FILE_TEMPORARY, tempreg), + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst, swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W)); emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W), swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), - negate(srcreg(RC_FILE_TEMPORARY, tempreg))); + negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); rc_remove_instruction(inst); } @@ -553,6 +592,7 @@ int radeonTransformALU( switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; case RC_OPCODE_DP2: transform_DP2(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; case RC_OPCODE_DST: transform_DST(c, inst); return 1; @@ -592,7 +632,7 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c, { /* There is no decent CMP available, so let's rig one up. * CMP is defined as dst = src0 < 0.0 ? src1 : src2 - * The following sequence consumes two temps and two extra slots + * The following sequence consumes zero to two temps and two extra slots * (the second temp and the second slot is consumed by transform_LRP), * but should be equivalent: * @@ -600,18 +640,18 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c, * LRP dst, tmp0, src1, src2 * * Yes, I know, I'm a mad scientist. ~ C. & M. */ - int tempreg0 = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); /* SLT tmp0, src0, 0.0 */ emit2(c, inst->Prev, RC_OPCODE_SLT, 0, - dstreg(RC_FILE_TEMPORARY, tempreg0), + dst, inst->U.I.SrcReg[0], builtin_zero); /* LRP dst, tmp0, src1, src2 */ transform_LRP(c, emit3(c, inst->Prev, RC_OPCODE_LRP, 0, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); + srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); rc_remove_instruction(inst); } @@ -642,24 +682,25 @@ static void transform_r300_vertex_DP3(struct radeon_compiler* c, static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); unsigned constant_swizzle; int constant = rc_constants_add_immediate_scalar(&c->Program.Constants, 0.0000000000000000001, &constant_swizzle); /* MOV dst, src */ + dst.WriteMask = RC_MASK_XYZW; emit1(c, inst->Prev, RC_OPCODE_MOV, 0, - dstreg(RC_FILE_TEMPORARY, tempreg), + dst, inst->U.I.SrcReg[0]); - /* MAX dst.z, src, 0.00...001 */ + /* MAX dst.y, src, 0.00...001 */ emit2(c, inst->Prev, RC_OPCODE_MAX, 0, - dstregtmpmask(tempreg, RC_MASK_Y), - srcreg(RC_FILE_TEMPORARY, tempreg), + dstregtmpmask(dst.Index, RC_MASK_Y), + srcreg(RC_FILE_TEMPORARY, dst.Index), srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); - inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, tempreg); + inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index); } static void transform_r300_vertex_SEQ(struct radeon_compiler *c, @@ -743,12 +784,13 @@ static void transform_r300_vertex_SSG(struct radeon_compiler* c, * SLT tmp1, x, 0; * ADD result, tmp0, -tmp1; */ - unsigned tmp0, tmp1; + struct rc_dst_register dst0 = try_to_reuse_dst(c, inst); + unsigned tmp1; /* 0 < x */ - tmp0 = rc_find_free_temporary(c); + dst0 = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_SLT, 0, - dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask), + dst0, builtin_zero, inst->U.I.SrcReg[0]); @@ -763,7 +805,7 @@ static void transform_r300_vertex_SSG(struct radeon_compiler* c, /* result = tmp0 - tmp1 */ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tmp0), + srcreg(RC_FILE_TEMPORARY, dst0.Index), negate(srcreg(RC_FILE_TEMPORARY, tmp1))); rc_remove_instruction(inst); @@ -781,6 +823,7 @@ int r300_transform_vertex_alu( switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1; case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c index 530afa5e08..f9d9f34b6a 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c @@ -28,6 +28,8 @@ #include "radeon_program_tex.h" +#include "radeon_compiler_util.h" + /* Series of transformations to be done on textures. */ static struct rc_src_register shadow_ambient(struct r300_fragment_program_compiler *compiler, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c index 5f67f536f6..7d76585a59 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c @@ -87,8 +87,9 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user) rc_for_all_reads_src(inst, mark_used, &d); } - /* Pass 2: If there is relative addressing, mark all externals as used. */ - if (has_rel_addr) { + /* Pass 2: If there is relative addressing or dead constant elimination + * is disabled, mark all externals as used. */ + if (has_rel_addr || !c->remove_unused_constants) { for (unsigned i = 0; i < c->Program.Constants.Count; i++) if (constants[i].Type == RC_CONSTANT_EXTERNAL) const_used[i] = 1; @@ -119,7 +120,7 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user) /* is_identity ==> new_count == old_count * !is_identity ==> new_count < old_count */ assert( is_identity || new_count < c->Program.Constants.Count); - assert(!(has_rel_addr && are_externals_remapped)); + assert(!((has_rel_addr || !c->remove_unused_constants) && are_externals_remapped)); /* Pass 4: Redirect reads of all constants to their new locations. */ if (!is_identity) { @@ -127,7 +128,6 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user) inst != &c->Program.Instructions; inst = inst->Next) { rc_remap_registers(inst, remap_regs, inv_remap_table); } - } /* Set the new constant count. Note that new_count may be less than diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c index 88165f7895..5bd19c0b9c 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c @@ -51,6 +51,14 @@ void rc_rename_regs(struct radeon_compiler *c, void *user) struct rc_reader_data reader_data; unsigned char * used; + /* XXX Remove this once the register allocation works with flow control. */ + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) + return; + } + used_length = 2 * rc_recompute_ips(c); used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length); memset(used, 0, sizeof(unsigned char) * used_length); diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index c288834d24..0d8bd4fc70 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -43,6 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/extensions.h" #include "main/bufferobj.h" #include "main/texobj.h" +#include "main/mfeatures.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index de66293999..f930b4d06b 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/context.h" #include "main/enums.h" #include "main/image.h" +#include "main/mfeatures.h" #include "main/mipmap.h" #include "main/simple_list.h" #include "main/texstore.h" diff --git a/src/mesa/drivers/dri/r600/evergreen_blit.c b/src/mesa/drivers/dri/r600/evergreen_blit.c index fc9fa9d22c..e07da8c15b 100644 --- a/src/mesa/drivers/dri/r600/evergreen_blit.c +++ b/src/mesa/drivers/dri/r600/evergreen_blit.c @@ -1406,9 +1406,95 @@ eg_set_default_state(context_t *context) num_hs_stack_entries = 85; num_ls_stack_entries = 85; break; + case CHIP_FAMILY_PALM: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 96; + num_vs_threads = 16; + num_gs_threads = 16; + num_es_threads = 16; + num_hs_threads = 16; + num_ls_threads = 16; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; + case CHIP_FAMILY_BARTS: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 85; + num_vs_stack_entries = 85; + num_gs_stack_entries = 85; + num_es_stack_entries = 85; + num_hs_stack_entries = 85; + num_ls_stack_entries = 85; + break; + case CHIP_FAMILY_TURKS: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; + case CHIP_FAMILY_CAICOS: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 10; + num_gs_threads = 10; + num_es_threads = 10; + num_hs_threads = 10; + num_ls_threads = 10; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; } - if (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_CEDAR) + if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_CEDAR) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_PALM) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_CAICOS)) CLEARbit(sq_config, EG_SQ_CONFIG__VC_ENABLE_bit); else SETbit(sq_config, EG_SQ_CONFIG__VC_ENABLE_bit); diff --git a/src/mesa/drivers/dri/r600/evergreen_state.c b/src/mesa/drivers/dri/r600/evergreen_state.c index 076a608573..648cda0078 100644 --- a/src/mesa/drivers/dri/r600/evergreen_state.c +++ b/src/mesa/drivers/dri/r600/evergreen_state.c @@ -1469,6 +1469,30 @@ static void evergreenInitSQConfig(struct gl_context * ctx) uMaxThreads = 192; uMaxStackEntries = 256; break; + case CHIP_FAMILY_BARTS: + uSqNumCfInsts = 2; + bVC_ENABLE = GL_TRUE; + uMaxGPRs = 256; + uPSThreadCount = 128; + uMaxThreads = 248; + uMaxStackEntries = 512; + break; + case CHIP_FAMILY_TURKS: + uSqNumCfInsts = 2; + bVC_ENABLE = GL_TRUE; + uMaxGPRs = 256; + uPSThreadCount = 128; + uMaxThreads = 248; + uMaxStackEntries = 256; + break; + case CHIP_FAMILY_CAICOS: + uSqNumCfInsts = 1; + bVC_ENABLE = GL_FALSE; + uMaxGPRs = 256; + uPSThreadCount = 128; + uMaxThreads = 192; + uMaxStackEntries = 256; + break; default: uSqNumCfInsts = 2; bVC_ENABLE = GL_TRUE; diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index 057d98e0fc..00708be199 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/bufferobj.h" #include "main/texobj.h" #include "main/points.h" +#include "main/mfeatures.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" @@ -259,7 +260,7 @@ static void r600InitConstValues(struct gl_context *ctx, radeonScreenPtr screen) R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); if( (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_CEDAR) - &&(context->radeon.radeonScreen->chip_family <= CHIP_FAMILY_PALM) ) + &&(context->radeon.radeonScreen->chip_family <= CHIP_FAMILY_CAICOS) ) { r700->bShaderUseMemConstant = GL_TRUE; } @@ -285,8 +286,13 @@ static void r600InitConstValues(struct gl_context *ctx, radeonScreenPtr screen) ctx->Const.MaxTextureMaxAnisotropy = 16.0; ctx->Const.MaxTextureLodBias = 16.0; - ctx->Const.MaxTextureLevels = 13; /* hw support 14 */ - ctx->Const.MaxTextureRectSize = 4096; /* hw support 8192 */ + if (screen->chip_family >= CHIP_FAMILY_CEDAR) { + ctx->Const.MaxTextureLevels = 15; + ctx->Const.MaxTextureRectSize = 16384; + } else { + ctx->Const.MaxTextureLevels = 14; + ctx->Const.MaxTextureRectSize = 8192; + } ctx->Const.MinPointSize = 0x0001 / 8.0; ctx->Const.MinPointSizeAA = 0x0001 / 8.0; diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c index c3d68c41e5..2a99ded5d6 100644 --- a/src/mesa/drivers/dri/r600/r600_tex.c +++ b/src/mesa/drivers/dri/r600/r600_tex.c @@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/context.h" #include "main/enums.h" #include "main/image.h" +#include "main/mfeatures.h" #include "main/mipmap.h" #include "main/simple_list.h" #include "main/texstore.h" diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 2bf24096a0..024853c1be 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -481,6 +481,8 @@ unsigned int EG_GetNumOperands(GLuint opcode, GLuint nIsOp3) case EG_OP2_INST_FLT_TO_INT: case EG_OP2_INST_SIN: case EG_OP2_INST_COS: + case EG_OP2_INST_FLT_TO_INT_FLOOR: + case EG_OP2_INST_MOVA_INT: return 1; default: radeon_error( @@ -1134,7 +1136,7 @@ GLboolean EG_assemble_vfetch_instruction(r700_AssemblerBase* pAsm, EG_VTX_WORD1__DST_SEL_W_shift, EG_VTX_WORD1__DST_SEL_W_mask); - SETfield(vfetch_instruction_ptr->m_Word1.val, 0, /* use format here, in r6/r7, format used set in const, need to use same */ + SETfield(vfetch_instruction_ptr->m_Word1.val, 1, EG_VTX_WORD1__UCF_shift, EG_VTX_WORD1__UCF_bit); SETfield(vfetch_instruction_ptr->m_Word1.val, data_format, @@ -3297,23 +3299,76 @@ GLboolean assemble_ARL(r700_AssemblerBase *pAsm) return GL_FALSE; } - pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR; - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = 0; - pAsm->D.dst.writex = 0; - pAsm->D.dst.writey = 0; - pAsm->D.dst.writez = 0; - pAsm->D.dst.writew = 0; - - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + if(8 == pAsm->unAsic) { - return GL_FALSE; - } + /* Evergreen */ - if( GL_FALSE == next_ins(pAsm) ) + /* Float to Signed Integer Using FLOOR */ + pAsm->D.dst.opcode = EG_OP2_INST_FLT_TO_INT_FLOOR; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = 0; + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + /* Copy Signed Integer To Integer in AR and GPR */ + pAsm->D.dst.opcode = EG_OP2_INST_MOVA_INT; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = 0; + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + else { - return GL_FALSE; + /* r6xx/r7xx */ + + /* Truncate floating-point to the nearest integer + in the range [-256, +255], and copy to AR and + to a GPR. + */ + pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = 0; + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } } return GL_TRUE; @@ -3334,7 +3389,14 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm) return GL_FALSE; } - pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_CNDGE; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE; + } pAsm->D.dst.op3 = 1; tmp = (-1); @@ -3416,8 +3478,14 @@ GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode) checkop1(pAsm); tmp = gethelpr(pAsm); - - pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_MULADD; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + } pAsm->D.dst.op3 = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); @@ -3457,7 +3525,14 @@ GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode) { return GL_FALSE; } - pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_MULADD; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + } pAsm->D.dst.op3 = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); @@ -4742,7 +4817,14 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm) tmp = gethelpr(pAsm); - pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_MULADD; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + } pAsm->D.dst.op3 = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); @@ -4782,7 +4864,14 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm) { return GL_FALSE; } - pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_MULADD; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + } pAsm->D.dst.op3 = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); @@ -5010,7 +5099,14 @@ GLboolean assemble_SSG(r700_AssemblerBase *pAsm) GLuint tmp = gethelpr(pAsm); /* tmp = (src > 0 ? 1 : src) */ - pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_CNDGT; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT; + } pAsm->D.dst.op3 = 1; pAsm->D.dst.rtype = DST_REG_TEMPORARY; pAsm->D.dst.reg = tmp; @@ -5033,7 +5129,14 @@ GLboolean assemble_SSG(r700_AssemblerBase *pAsm) } /* dst = (-tmp > 0 ? -1 : tmp) */ - pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_CNDGT; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT; + } pAsm->D.dst.op3 = 1; if( GL_FALSE == assemble_dst(pAsm) ) diff --git a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h index 6c2648b6bd..60f1049602 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h +++ b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h @@ -67,6 +67,9 @@ struct drm_radeon_info { #define DRM_RADEON_INFO 0x1 #endif +static inline void radeon_gem_get_kernel_name(struct radeon_bo *dummy, uint32_t *value) +{ +} static inline uint32_t radeon_gem_name_bo(struct radeon_bo *dummy) { diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index 82789cec5e..399052cbcb 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -445,6 +445,45 @@ #define PCI_CHIP_PALM_9804 0x9804 #define PCI_CHIP_PALM_9805 0x9805 +#define PCI_CHIP_BARTS_6720 0x6720 +#define PCI_CHIP_BARTS_6721 0x6721 +#define PCI_CHIP_BARTS_6722 0x6722 +#define PCI_CHIP_BARTS_6723 0x6723 +#define PCI_CHIP_BARTS_6724 0x6724 +#define PCI_CHIP_BARTS_6725 0x6725 +#define PCI_CHIP_BARTS_6726 0x6726 +#define PCI_CHIP_BARTS_6727 0x6727 +#define PCI_CHIP_BARTS_6728 0x6728 +#define PCI_CHIP_BARTS_6729 0x6729 +#define PCI_CHIP_BARTS_6738 0x6738 +#define PCI_CHIP_BARTS_6739 0x6739 + +#define PCI_CHIP_TURKS_6740 0x6740 +#define PCI_CHIP_TURKS_6741 0x6741 +#define PCI_CHIP_TURKS_6742 0x6742 +#define PCI_CHIP_TURKS_6743 0x6743 +#define PCI_CHIP_TURKS_6744 0x6744 +#define PCI_CHIP_TURKS_6745 0x6745 +#define PCI_CHIP_TURKS_6746 0x6746 +#define PCI_CHIP_TURKS_6747 0x6747 +#define PCI_CHIP_TURKS_6748 0x6748 +#define PCI_CHIP_TURKS_6749 0x6749 +#define PCI_CHIP_TURKS_6750 0x6750 +#define PCI_CHIP_TURKS_6758 0x6758 +#define PCI_CHIP_TURKS_6759 0x6759 + +#define PCI_CHIP_CAICOS_6760 0x6760 +#define PCI_CHIP_CAICOS_6761 0x6761 +#define PCI_CHIP_CAICOS_6762 0x6762 +#define PCI_CHIP_CAICOS_6763 0x6763 +#define PCI_CHIP_CAICOS_6764 0x6764 +#define PCI_CHIP_CAICOS_6765 0x6765 +#define PCI_CHIP_CAICOS_6766 0x6766 +#define PCI_CHIP_CAICOS_6767 0x6767 +#define PCI_CHIP_CAICOS_6768 0x6768 +#define PCI_CHIP_CAICOS_6770 0x6770 +#define PCI_CHIP_CAICOS_6779 0x6779 + enum { CHIP_FAMILY_R100, CHIP_FAMILY_RV100, @@ -489,6 +528,9 @@ enum { CHIP_FAMILY_CYPRESS, CHIP_FAMILY_HEMLOCK, CHIP_FAMILY_PALM, + CHIP_FAMILY_BARTS, + CHIP_FAMILY_TURKS, + CHIP_FAMILY_CAICOS, CHIP_FAMILY_LAST }; diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index ca6ab46ca4..a1124483a6 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -100,6 +100,9 @@ static const char* get_chip_family_name(int chip_family) case CHIP_FAMILY_CYPRESS: return "CYPRESS"; case CHIP_FAMILY_HEMLOCK: return "HEMLOCK"; case CHIP_FAMILY_PALM: return "PALM"; + case CHIP_FAMILY_BARTS: return "BARTS"; + case CHIP_FAMILY_TURKS: return "TURKS"; + case CHIP_FAMILY_CAICOS: return "CAICOS"; default: return "unknown"; } } diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c index e3de534b5f..154a8815e4 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_context.c @@ -40,6 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/simple_list.h" #include "main/imports.h" #include "main/extensions.h" +#include "main/mfeatures.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c index a36a1dc94a..6656d391e0 100644 --- a/src/mesa/drivers/dri/radeon/radeon_fbo.c +++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c @@ -28,6 +28,7 @@ #include "main/imports.h" #include "main/macros.h" +#include "main/mfeatures.h" #include "main/mtypes.h" #include "main/enums.h" #include "main/fbobject.h" diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h index 088f970172..a68a976877 100644 --- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h +++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h @@ -49,7 +49,7 @@ struct _radeon_mipmap_level { }; /* store the max possible in the miptree */ -#define RADEON_MIPTREE_MAX_TEXTURE_LEVELS 13 +#define RADEON_MIPTREE_MAX_TEXTURE_LEVELS 15 /** * A mipmap tree contains texture images in the layout that the hardware diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 94e56c2ade..a35fcfe9d7 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -1163,6 +1163,54 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) screen->chip_flags = RADEON_CHIPSET_TCL; break; + case PCI_CHIP_BARTS_6720: + case PCI_CHIP_BARTS_6721: + case PCI_CHIP_BARTS_6722: + case PCI_CHIP_BARTS_6723: + case PCI_CHIP_BARTS_6724: + case PCI_CHIP_BARTS_6725: + case PCI_CHIP_BARTS_6726: + case PCI_CHIP_BARTS_6727: + case PCI_CHIP_BARTS_6728: + case PCI_CHIP_BARTS_6729: + case PCI_CHIP_BARTS_6738: + case PCI_CHIP_BARTS_6739: + screen->chip_family = CHIP_FAMILY_BARTS; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_TURKS_6740: + case PCI_CHIP_TURKS_6741: + case PCI_CHIP_TURKS_6742: + case PCI_CHIP_TURKS_6743: + case PCI_CHIP_TURKS_6744: + case PCI_CHIP_TURKS_6745: + case PCI_CHIP_TURKS_6746: + case PCI_CHIP_TURKS_6747: + case PCI_CHIP_TURKS_6748: + case PCI_CHIP_TURKS_6749: + case PCI_CHIP_TURKS_6750: + case PCI_CHIP_TURKS_6758: + case PCI_CHIP_TURKS_6759: + screen->chip_family = CHIP_FAMILY_TURKS; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_CAICOS_6760: + case PCI_CHIP_CAICOS_6761: + case PCI_CHIP_CAICOS_6762: + case PCI_CHIP_CAICOS_6763: + case PCI_CHIP_CAICOS_6764: + case PCI_CHIP_CAICOS_6765: + case PCI_CHIP_CAICOS_6766: + case PCI_CHIP_CAICOS_6767: + case PCI_CHIP_CAICOS_6768: + case PCI_CHIP_CAICOS_6770: + case PCI_CHIP_CAICOS_6779: + screen->chip_family = CHIP_FAMILY_CAICOS; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + default: fprintf(stderr, "unknown chip id 0x%x, can't guess.\n", device_id); diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c index 83b1d1b1d7..8a35c7d2d2 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex.c @@ -37,6 +37,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/context.h" #include "main/enums.h" #include "main/image.h" +#include "main/mfeatures.h" #include "main/simple_list.h" #include "main/texstore.h" #include "main/teximage.h" diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index 8b1e34fe76..cf85a5bb57 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -33,6 +33,7 @@ #include "main/imports.h" #include "main/context.h" #include "main/enums.h" +#include "main/mfeatures.h" #include "main/mipmap.h" #include "main/texcompress.h" #include "main/texstore.h" diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h index a1908c6bc7..538a07fbba 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.h +++ b/src/mesa/drivers/dri/radeon/radeon_texture.h @@ -32,6 +32,7 @@ #define RADEON_TEXTURE_H #include "main/formats.h" +#include "main/mfeatures.h" void copy_rows(void* dst, GLuint dststride, const void* src, GLuint srcstride, GLuint numrows, GLuint rowsize); diff --git a/src/mesa/drivers/dri/sis/server/sis_dri.h b/src/mesa/drivers/dri/sis/server/sis_dri.h index f0171f3c0f..7d8f507115 100644 --- a/src/mesa/drivers/dri/sis/server/sis_dri.h +++ b/src/mesa/drivers/dri/sis/server/sis_dri.h @@ -72,13 +72,4 @@ typedef struct { int dummy; } SISDRIContextRec, *SISDRIContextPtr; -#ifdef XFree86Server - -#include "screenint.h" - -Bool SISDRIScreenInit(ScreenPtr pScreen); -void SISDRICloseScreen(ScreenPtr pScreen); -Bool SISDRIFinishScreenInit(ScreenPtr pScreen); - -#endif #endif diff --git a/src/mesa/drivers/dri/tdfx/tdfx_context.h b/src/mesa/drivers/dri/tdfx/tdfx_context.h index fb38419dcd..7e2f0e00a8 100644 --- a/src/mesa/drivers/dri/tdfx/tdfx_context.h +++ b/src/mesa/drivers/dri/tdfx/tdfx_context.h @@ -41,11 +41,7 @@ #include <sys/time.h> #include "dri_util.h" -#ifdef XFree86Server -#include "GL/xf86glx.h" -#else #include "main/glheader.h" -#endif #if defined(__linux__) #include <signal.h> #endif diff --git a/src/mesa/drivers/dri/unichrome/server/via_dri.h b/src/mesa/drivers/dri/unichrome/server/via_dri.h index b47397d572..c6eed03c1c 100644 --- a/src/mesa/drivers/dri/unichrome/server/via_dri.h +++ b/src/mesa/drivers/dri/unichrome/server/via_dri.h @@ -35,9 +35,7 @@ #define VIA_DRIDDX_VERSION_MINOR 0 #define VIA_DRIDDX_VERSION_PATCH 0 -#ifndef XFree86Server typedef int Bool; -#endif typedef struct { drm_handle_t handle; |