diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965')
44 files changed, 2589 insertions, 420 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 7758a792fd..a242580273 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -84,11 +84,21 @@ DRIVER_SOURCES = \ brw_wm_pass2.c \ brw_wm_sampler_state.c \ brw_wm_state.c \ - brw_wm_surface_state.c + brw_wm_surface_state.c \ + gen6_cc.c \ + gen6_clip_state.c \ + gen6_depthstencil.c \ + gen6_gs_state.c \ + gen6_sampler_state.c \ + gen6_scissor_state.c \ + gen6_sf_state.c \ + gen6_urb.c \ + gen6_viewport_state.c \ + gen6_vs_state.c \ + gen6_wm_state.c C_SOURCES = \ $(COMMON_SOURCES) \ - $(MINIGLX_SOURCES) \ $(DRIVER_SOURCES) ASM_SOURCES = diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 65f51be341..a512896f31 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -78,7 +78,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, GLcontext *ctx = &intel->ctx; if (!brw) { - _mesa_printf("%s: failed to alloc context\n", __FUNCTION__); + printf("%s: failed to alloc context\n", __FUNCTION__); return GL_FALSE; } @@ -87,7 +87,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, if (!intelInitContext( intel, mesaVis, driContextPriv, sharedContextPrivate, &functions )) { - _mesa_printf("%s: failed to init intel context\n", __FUNCTION__); + printf("%s: failed to init intel context\n", __FUNCTION__); FREE(brw); return GL_FALSE; } @@ -150,7 +150,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, MIN2(ctx->Const.FragmentProgram.MaxNativeParameters, ctx->Const.FragmentProgram.MaxEnvParams); - if (intel->is_ironlake || intel->is_g4x) { + if (intel->is_ironlake || intel->is_g4x || intel->gen >= 6) { brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_GM45; brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45; brw->has_surface_tile_offset = GL_TRUE; @@ -170,7 +170,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, brw->urb.size = 384; brw->vs_max_threads = 32; brw->wm_max_threads = 10 * 5; - } else { + } else if (intel->gen < 6) { brw->urb.size = 256; brw->vs_max_threads = 16; brw->wm_max_threads = 8 * 4; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 21c4cd38a7..d6fc37e4d8 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -282,6 +282,9 @@ struct brw_vs_ouput_sizes { enum brw_cache_id { + BRW_BLEND_STATE, + BRW_DEPTH_STENCIL_STATE, + BRW_COLOR_CALC_STATE, BRW_CC_VP, BRW_CC_UNIT, BRW_WM_PROG, @@ -290,7 +293,7 @@ enum brw_cache_id { BRW_WM_UNIT, BRW_SF_PROG, BRW_SF_VP, - BRW_SF_UNIT, + BRW_SF_UNIT, /* scissor state on gen6 */ BRW_VS_UNIT, BRW_VS_PROG, BRW_GS_UNIT, @@ -354,6 +357,9 @@ struct brw_tracked_state { /* Flags for brw->state.cache. */ +#define CACHE_NEW_BLEND_STATE (1<<BRW_BLEND_STATE) +#define CACHE_NEW_DEPTH_STENCIL_STATE (1<<BRW_DEPTH_STENCIL_STATE) +#define CACHE_NEW_COLOR_CALC_STATE (1<<BRW_COLOR_CALC_STATE) #define CACHE_NEW_CC_VP (1<<BRW_CC_VP) #define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT) #define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG) @@ -538,7 +544,8 @@ struct brw_context GLuint nr_sf_entries; GLuint nr_cs_entries; -/* GLuint vs_size; */ + /* gen6 */ + GLuint vs_size; /* GLuint gs_size; */ /* GLuint clip_size; */ /* GLuint sf_size; */ @@ -643,9 +650,16 @@ struct brw_context struct { + /* gen4 */ dri_bo *prog_bo; - dri_bo *state_bo; dri_bo *vp_bo; + + /* gen6 */ + dri_bo *blend_state_bo; + dri_bo *depth_stencil_state_bo; + dri_bo *color_calc_state_bo; + + dri_bo *state_bo; } cc; struct { diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 6cb8edb611..4e78b08cfe 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -114,13 +114,13 @@ static void calculate_curbe_offsets( struct brw_context *brw ) brw->curbe.total_size = reg; if (0) - _mesa_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n", - brw->curbe.wm_start, - brw->curbe.wm_size, - brw->curbe.clip_start, - brw->curbe.clip_size, - brw->curbe.vs_start, - brw->curbe.vs_size ); + printf("curbe wm %d+%d clip %d+%d vs %d+%d\n", + brw->curbe.wm_start, + brw->curbe.wm_size, + brw->curbe.clip_start, + brw->curbe.clip_size, + brw->curbe.vs_start, + brw->curbe.vs_size ); brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS; } @@ -179,7 +179,6 @@ static GLfloat fixed_plane[6][4] = { */ static void prepare_constant_buffer(struct brw_context *brw) { - struct intel_context *intel = &brw->intel; GLcontext *ctx = &brw->intel.ctx; const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); @@ -199,7 +198,7 @@ static void prepare_constant_buffer(struct brw_context *brw) return; } - buf = (GLfloat *) _mesa_calloc(bufsz); + buf = (GLfloat *) calloc(1, bufsz); /* fragment shader constants */ if (brw->curbe.wm_size) { @@ -280,13 +279,13 @@ static void prepare_constant_buffer(struct brw_context *brw) if (0) { for (i = 0; i < sz*16; i+=4) - _mesa_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, - buf[i+0], buf[i+1], buf[i+2], buf[i+3]); + printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, + buf[i+0], buf[i+1], buf[i+2], buf[i+3]); - _mesa_printf("last_buf %p buf %p sz %d/%d cmp %d\n", - brw->curbe.last_buf, buf, - bufsz, brw->curbe.last_bufsz, - brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); + printf("last_buf %p buf %p sz %d/%d cmp %d\n", + brw->curbe.last_buf, buf, + bufsz, brw->curbe.last_bufsz, + brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); } if (brw->curbe.curbe_bo != NULL && @@ -294,12 +293,12 @@ static void prepare_constant_buffer(struct brw_context *brw) bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { /* constants have not changed */ - _mesa_free(buf); + free(buf); } else { /* constants have changed */ if (brw->curbe.last_buf) - _mesa_free(brw->curbe.last_buf); + free(brw->curbe.last_buf); brw->curbe.last_buf = buf; brw->curbe.last_bufsz = bufsz; @@ -307,7 +306,7 @@ static void prepare_constant_buffer(struct brw_context *brw) if (brw->curbe.curbe_bo != NULL && brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size) { - intel_bo_unmap_gtt_preferred(intel, brw->curbe.curbe_bo); + drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo); dri_bo_unreference(brw->curbe.curbe_bo); brw->curbe.curbe_bo = NULL; } @@ -319,7 +318,7 @@ static void prepare_constant_buffer(struct brw_context *brw) brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE", 4096, 1 << 6); brw->curbe.curbe_next_offset = 0; - intel_bo_map_gtt_preferred(intel, brw->curbe.curbe_bo, GL_TRUE); + drm_intel_gem_bo_map_gtt(brw->curbe.curbe_bo); } brw->curbe.curbe_offset = brw->curbe.curbe_next_offset; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index ea0d7e05d4..bb1b5f5ef0 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -530,6 +530,7 @@ #define BRW_OPCODE_POP 47 #define BRW_OPCODE_WAIT 48 #define BRW_OPCODE_SEND 49 +#define BRW_OPCODE_MATH 56 #define BRW_OPCODE_ADD 64 #define BRW_OPCODE_MUL 65 #define BRW_OPCODE_AVG 66 @@ -727,7 +728,8 @@ #define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ #define BRW_MATH_FUNCTION_COS 7 /* was 8 */ #define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ -#define BRW_MATH_FUNCTION_TAN 9 +#define BRW_MATH_FUNCTION_TAN 9 /* gen4 */ +#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */ #define BRW_MATH_FUNCTION_POW 10 #define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 #define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 @@ -778,17 +780,33 @@ #define CMD_PIPELINED_STATE_POINTERS 0x7800 #define CMD_BINDING_TABLE_PTRS 0x7801 +# define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8) +# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9) +# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 10) + +#define CMD_3D_SAMPLER_STATE_POINTERS 0x7802 /* SNB+ */ +# define PS_SAMPLER_STATE_CHANGE (1 << 12) +# define GS_SAMPLER_STATE_CHANGE (1 << 9) +# define VS_SAMPLER_STATE_CHANGE (1 << 8) +/* DW1: VS */ +/* DW2: GS */ +/* DW3: PS */ #define CMD_VERTEX_BUFFER 0x7808 # define BRW_VB0_INDEX_SHIFT 27 +# define GEN6_VB0_INDEX_SHIFT 26 # define BRW_VB0_ACCESS_VERTEXDATA (0 << 26) # define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26) +# define GEN6_VB0_ACCESS_VERTEXDATA (0 << 20) +# define GEN6_VB0_ACCESS_INSTANCEDATA (1 << 20) # define BRW_VB0_PITCH_SHIFT 0 #define CMD_VERTEX_ELEMENT 0x7809 # define BRW_VE0_INDEX_SHIFT 27 +# define GEN6_VE0_INDEX_SHIFT 26 # define BRW_VE0_FORMAT_SHIFT 16 # define BRW_VE0_VALID (1 << 26) +# define GEN6_VE0_VALID (1 << 25) # define BRW_VE0_SRC_OFFSET_SHIFT 0 # define BRW_VE1_COMPONENT_NOSTORE 0 # define BRW_VE1_COMPONENT_STORE_SRC 1 @@ -805,8 +823,219 @@ # define BRW_VE1_DST_OFFSET_SHIFT 0 #define CMD_INDEX_BUFFER 0x780a -#define CMD_VF_STATISTICS_965 0x780b +#define CMD_VF_STATISTICS_965 0x780b #define CMD_VF_STATISTICS_GM45 0x680b +#define CMD_3D_CC_STATE_POINTERS 0x780e /* GEN6+ */ + +#define CMD_URB 0x7805 /* GEN6+ */ +# define GEN6_URB_VS_SIZE_SHIFT 16 +# define GEN6_URB_VS_ENTRIES_SHIFT 0 +# define GEN6_URB_GS_SIZE_SHIFT 8 +# define GEN6_URB_GS_ENTRIES_SHIFT 0 + +#define CMD_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */ +# define GEN6_CC_VIEWPORT_MODIFY (1 << 12) +# define GEN6_SF_VIEWPORT_MODIFY (1 << 11) +# define GEN6_CLIP_VIEWPORT_MODIFY (1 << 10) + +#define CMD_3D_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */ + +#define CMD_3D_VS_STATE 0x7810 /* GEN6+ */ +/* DW2 */ +# define GEN6_VS_SPF_MODE (1 << 31) +# define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_VS_SAMPLER_COUNT_SHIFT 27 +# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +/* DW4 */ +# define GEN6_VS_DISPATCH_START_GRF_SHIFT 20 +# define GEN6_VS_URB_READ_LENGTH_SHIFT 11 +# define GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW5 */ +# define GEN6_VS_MAX_THREADS_SHIFT 25 +# define GEN6_VS_STATISTICS_ENABLE (1 << 10) +# define GEN6_VS_CACHE_DISABLE (1 << 1) +# define GEN6_VS_ENABLE (1 << 0) + +#define CMD_3D_GS_STATE 0x7811 /* GEN6+ */ +/* DW2 */ +# define GEN6_GS_SPF_MODE (1 << 31) +# define GEN6_GS_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_GS_SAMPLER_COUNT_SHIFT 27 +# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +/* DW4 */ +# define GEN6_GS_URB_READ_LENGTH_SHIFT 11 +# define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT 4 +# define GEN6_GS_DISPATCH_START_GRF_SHIFT 0 +/* DW5 */ +# define GEN6_GS_MAX_THREADS_SHIFT 25 +# define GEN6_GS_STATISTICS_ENABLE (1 << 10) +# define GEN6_GS_SO_STATISTICS_ENABLE (1 << 9) +# define GEN6_GS_RENDERING_ENABLE (1 << 8) +/* DW6 */ +# define GEN6_GS_ENABLE (1 << 15) + +#define CMD_3D_CLIP_STATE 0x7812 /* GEN6+ */ +/* DW1 */ +# define GEN6_CLIP_STATISTICS_ENABLE (1 << 10) +/* DW2 */ +# define GEN6_CLIP_ENABLE (1 << 31) +# define GEN6_CLIP_API_OGL (0 << 30) +# define GEN6_CLIP_API_D3D (1 << 30) +# define GEN6_CLIP_XY_TEST (1 << 28) +# define GEN6_CLIP_Z_TEST (1 << 27) +# define GEN6_CLIP_GB_TEST (1 << 26) +# define GEN6_CLIP_MODE_NORMAL (0 << 13) +# define GEN6_CLIP_MODE_REJECT_ALL (3 << 13) +# define GEN6_CLIP_MODE_ACCEPT_ALL (4 << 13) +# define GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE (1 << 9) +# define GEN6_CLIP_BARYCENTRIC_ENABLE (1 << 8) +# define GEN6_CLIP_TRI_PROVOKE_SHIFT 4 +# define GEN6_CLIP_LINE_PROVOKE_SHIFT 2 +# define GEN6_CLIP_TRIFAN_PROVOKE_SHIFT 0 +/* DW3 */ +# define GEN6_CLIP_MIN_POINT_WIDTH_SHIFT 17 +# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT 6 + +#define CMD_3D_SF_STATE 0x7813 /* GEN6+ */ +/* DW1 */ +# define GEN6_SF_NUM_OUTPUTS_SHIFT 22 +# define GEN6_SF_SWIZZLE_ENABLE (1 << 21) +# define GEN6_SF_POINT_SPRITE_LOWERLEFT (1 << 20) +# define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 +# define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW2 */ +# define GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS (1 << 11) +# define GEN6_SF_STATISTICS_ENABLE (1 << 10) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID (1 << 9) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME (1 << 8) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT (1 << 7) +# define GEN6_SF_FRONT_SOLID (0 << 5) +# define GEN6_SF_FRONT_WIREFRAME (1 << 5) +# define GEN6_SF_FRONT_POINT (2 << 5) +# define GEN6_SF_BACK_SOLID (0 << 3) +# define GEN6_SF_BACK_WIREFRAME (1 << 3) +# define GEN6_SF_BACK_POINT (2 << 3) +# define GEN6_SF_VIEWPORT_TRANSFORM_ENABLE (1 << 1) +# define GEN6_SF_WINDING_CCW (1 << 0) +/* DW3 */ +# define GEN6_SF_LINE_AA_ENABLE (1 << 31) +# define GEN6_SF_CULL_BOTH (0 << 29) +# define GEN6_SF_CULL_NONE (1 << 29) +# define GEN6_SF_CULL_FRONT (2 << 29) +# define GEN6_SF_CULL_BACK (3 << 29) +# define GEN6_SF_LINE_WIDTH_SHIFT 18 /* U3.7 */ +# define GEN6_SF_LINE_END_CAP_WIDTH_0_5 (0 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_1_0 (1 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_2_0 (2 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_4_0 (3 << 16) +# define GEN6_SF_SCISSOR_ENABLE (1 << 11) +# define GEN6_SF_MSRAST_OFF_PIXEL (0 << 8) +# define GEN6_SF_MSRAST_OFF_PATTERN (1 << 8) +# define GEN6_SF_MSRAST_ON_PIXEL (2 << 8) +# define GEN6_SF_MSRAST_ON_PATTERN (3 << 8) +/* DW4 */ +# define GEN6_SF_TRI_PROVOKE_SHIFT 29 +# define GEN6_SF_LINE_PROVOKE_SHIFT 27 +# define GEN6_SF_TRIFAN_PROVOKE_SHIFT 25 +# define GEN6_SF_LINE_AA_MODE_MANHATTAN (0 << 14) +# define GEN6_SF_LINE_AA_MODE_TRUE (1 << 14) +# define GEN6_SF_VERTEX_SUBPIXEL_8BITS (0 << 12) +# define GEN6_SF_VERTEX_SUBPIXEL_4BITS (1 << 12) +# define GEN6_SF_USE_STATE_POINT_WIDTH (1 << 11) +# define GEN6_SF_POINT_WIDTH_SHIFT 0 /* U8.3 */ +/* DW5: depth offset constant */ +/* DW6: depth offset scale */ +/* DW7: depth offset clamp */ +/* DW8 */ +# define ATTRIBUTE_1_OVERRIDE_W (1 << 31) +# define ATTRIBUTE_1_OVERRIDE_Z (1 << 30) +# define ATTRIBUTE_1_OVERRIDE_Y (1 << 29) +# define ATTRIBUTE_1_OVERRIDE_X (1 << 28) +# define ATTRIBUTE_1_CONST_SOURCE_SHIFT 25 +# define ATTRIBUTE_1_SWIZZLE_SHIFT 22 +# define ATTRIBUTE_1_SOURCE_SHIFT 16 +# define ATTRIBUTE_0_OVERRIDE_W (1 << 15) +# define ATTRIBUTE_0_OVERRIDE_Z (1 << 14) +# define ATTRIBUTE_0_OVERRIDE_Y (1 << 13) +# define ATTRIBUTE_0_OVERRIDE_X (1 << 12) +# define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9 +# define ATTRIBUTE_0_SWIZZLE_SHIFT 6 +# define ATTRIBUTE_0_SOURCE_SHIFT 0 +/* DW16: Point sprite texture coordinate enables */ +/* DW17: Constant interpolation enables */ +/* DW18: attr 0-7 wrap shortest enables */ +/* DW19: attr 8-16 wrap shortest enables */ + +#define CMD_3D_WM_STATE 0x7814 /* GEN6+ */ +/* DW1: kernel pointer */ +/* DW2 */ +# define GEN6_WM_SPF_MODE (1 << 31) +# define GEN6_WM_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_WM_SAMPLER_COUNT_SHIFT 27 +# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +/* DW3: scratch space */ +/* DW4 */ +# define GEN6_WM_STATISTICS_ENABLE (1 << 31) +# define GEN6_WM_DEPTH_CLEAR (1 << 30) +# define GEN6_WM_DEPTH_RESOLVE (1 << 28) +# define GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27) +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_0 16 +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_1 8 +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_2 0 +/* DW5 */ +# define GEN6_WM_MAX_THREADS_SHIFT 25 +# define GEN6_WM_KILL_ENABLE (1 << 22) +# define GEN6_WM_COMPUTED_DEPTH (1 << 21) +# define GEN6_WM_USES_SOURCE_DEPTH (1 << 20) +# define GEN6_WM_DISPATCH_ENABLE (1 << 19) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 16) +# define GEN6_WM_LINE_AA_WIDTH_0_5 (0 << 14) +# define GEN6_WM_LINE_AA_WIDTH_1_0 (1 << 14) +# define GEN6_WM_LINE_AA_WIDTH_2_0 (2 << 14) +# define GEN6_WM_LINE_AA_WIDTH_4_0 (3 << 14) +# define GEN6_WM_POLYGON_STIPPLE_ENABLE (1 << 13) +# define GEN6_WM_LINE_STIPPLE_ENABLE (1 << 12) +# define GEN6_WM_OMASK_TO_RENDER_TARGET (1 << 9) +# define GEN6_WM_USES_SOURCE_W (1 << 8) +# define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE (1 << 7) +# define GEN6_WM_32_DISPATCH_ENABLE (1 << 2) +# define GEN6_WM_16_DISPATCH_ENABLE (1 << 1) +# define GEN6_WM_8_DISPATCH_ENABLE (1 << 0) +/* DW6 */ +# define GEN6_WM_NUM_SF_OUTPUTS_SHIFT 20 +# define GEN6_WM_POSOFFSET_NONE (0 << 18) +# define GEN6_WM_POSOFFSET_CENTROID (2 << 18) +# define GEN6_WM_POSOFFSET_SAMPLE (3 << 18) +# define GEN6_WM_POSITION_ZW_PIXEL (0 << 16) +# define GEN6_WM_POSITION_ZW_CENTROID (2 << 16) +# define GEN6_WM_POSITION_ZW_SAMPLE (3 << 16) +# define GEN6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15) +# define GEN6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14) +# define GEN6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13) +# define GEN6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12) +# define GEN6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11) +# define GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10) +# define GEN6_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 9) +# define GEN6_WM_MSRAST_OFF_PIXEL (0 << 1) +# define GEN6_WM_MSRAST_OFF_PATTERN (1 << 1) +# define GEN6_WM_MSRAST_ON_PIXEL (2 << 1) +# define GEN6_WM_MSRAST_ON_PATTERN (3 << 1) +# define GEN6_WM_MSDISPMODE_PERPIXEL (1 << 0) +/* DW7: kernel 1 pointer */ +/* DW8: kernel 2 pointer */ + +#define CMD_3D_CONSTANT_VS_STATE 0x7815 /* GEN6+ */ +#define CMD_3D_CONSTANT_GS_STATE 0x7816 /* GEN6+ */ +#define CMD_3D_CONSTANT_PS_STATE 0x7817 /* GEN6+ */ +# define GEN6_CONSTANT_BUFFER_3_ENABLE (1 << 15) +# define GEN6_CONSTANT_BUFFER_2_ENABLE (1 << 14) +# define GEN6_CONSTANT_BUFFER_1_ENABLE (1 << 13) +# define GEN6_CONSTANT_BUFFER_0_ENABLE (1 << 12) + +#define CMD_3D_SAMPLE_MASK 0x7818 /* GEN6+ */ #define CMD_DRAW_RECT 0x7900 #define CMD_BLEND_CONSTANT_COLOR 0x7901 @@ -818,6 +1047,25 @@ #define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 #define CMD_AA_LINE_PARAMETERS 0x790a +#define CMD_GS_SVB_INDEX 0x790b /* CTG+ */ +/* DW1 */ +# define SVB_INDEX_SHIFT 29 +# define SVB_LOAD_INTERNAL_VERTEX_COUNT (1 << 0) /* SNB+ */ +/* DW2: SVB index */ +/* DW3: SVB maximum index */ + +#define CMD_3D_MULTISAMPLE 0x790d /* SNB+ */ +/* DW1 */ +# define MS_PIXEL_LOCATION_CENTER (0 << 4) +# define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4) +# define MS_NUMSAMPLES_1 (0 << 1) +# define MS_NUMSAMPLES_4 (2 << 1) +# define MS_NUMSAMPLES_8 (3 << 1) + +#define CMD_3D_CLEAR_PARAMS 0x7910 /* ILK+ */ +# define DEPTH_CLEAR_VALID (1 << 15) +/* DW1: depth clear value */ + #define CMD_PIPE_CONTROL 0x7a00 #define CMD_3D_PRIM 0x7b00 diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index d510d767f9..e348d4686b 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -82,7 +82,7 @@ static GLuint brw_set_prim(struct brw_context *brw, GLenum prim) GLcontext *ctx = &brw->intel.ctx; if (INTEL_DEBUG & DEBUG_PRIMS) - _mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim)); + printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim)); /* Slight optimization to avoid the GS program when not needed: */ @@ -125,7 +125,7 @@ static void brw_emit_prim(struct brw_context *brw, struct intel_context *intel = &brw->intel; if (INTEL_DEBUG & DEBUG_PRIMS) - _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), + printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), prim->start, prim->count); prim_packet.header.opcode = CMD_3D_PRIM; @@ -337,6 +337,8 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, * so can't access it earlier. */ + intel_prepare_render(intel); + for (i = 0; i < nr_prims; i++) { uint32_t hw_prim; diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index c46b9ba89c..71a43577bf 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -169,7 +169,7 @@ static GLuint get_surface_type( GLenum type, GLuint size, GLenum format, GLboolean normalized ) { if (INTEL_DEBUG & DEBUG_VERTS) - _mesa_printf("type %s size %d normalized %d\n", + printf("type %s size %d normalized %d\n", _mesa_lookup_enum_by_nr(type), size, normalized); if (normalized) { @@ -276,7 +276,6 @@ copy_array_to_vbo_array( struct brw_context *brw, struct brw_vertex_element *element, GLuint dst_stride) { - struct intel_context *intel = &brw->intel; GLuint size = element->count * dst_stride; get_space(brw, size, &element->bo, &element->offset); @@ -289,52 +288,26 @@ copy_array_to_vbo_array( struct brw_context *brw, } if (dst_stride == element->glarray->StrideB) { - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(element->bo); - memcpy((char *)element->bo->virtual + element->offset, - element->glarray->Ptr, size); - drm_intel_gem_bo_unmap_gtt(element->bo); - } else { - dri_bo_subdata(element->bo, - element->offset, - size, - element->glarray->Ptr); - } + drm_intel_gem_bo_map_gtt(element->bo); + memcpy((char *)element->bo->virtual + element->offset, + element->glarray->Ptr, size); + drm_intel_gem_bo_unmap_gtt(element->bo); } else { char *dest; const unsigned char *src = element->glarray->Ptr; int i; - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(element->bo); - dest = element->bo->virtual; - dest += element->offset; - - for (i = 0; i < element->count; i++) { - memcpy(dest, src, dst_stride); - src += element->glarray->StrideB; - dest += dst_stride; - } - - drm_intel_gem_bo_unmap_gtt(element->bo); - } else { - void *data; - - data = _mesa_malloc(dst_stride * element->count); - dest = data; - for (i = 0; i < element->count; i++) { - memcpy(dest, src, dst_stride); - src += element->glarray->StrideB; - dest += dst_stride; - } + drm_intel_gem_bo_map_gtt(element->bo); + dest = element->bo->virtual; + dest += element->offset; - dri_bo_subdata(element->bo, - element->offset, - size, - data); - - _mesa_free(data); + for (i = 0; i < element->count; i++) { + memcpy(dest, src, dst_stride); + src += element->glarray->StrideB; + dest += dst_stride; } + + drm_intel_gem_bo_unmap_gtt(element->bo); } } @@ -355,7 +328,7 @@ static void brw_prepare_vertices(struct brw_context *brw) /* First build an array of pointers to ve's in vb.inputs_read */ if (0) - _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); + printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); /* Accumulate the list of enabled arrays. */ brw->vb.nr_enabled = 0; @@ -503,10 +476,17 @@ static void brw_emit_vertices(struct brw_context *brw) if (brw->vb.nr_enabled == 0) { BEGIN_BATCH(3); OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1); - OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | - BRW_VE0_VALID | - (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | - (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + if (IS_GEN6(intel->intelScreen->deviceID)) { + OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) | + GEN6_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + } else { + OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | + BRW_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + } OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | @@ -527,14 +507,22 @@ static void brw_emit_vertices(struct brw_context *brw) for (i = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; + uint32_t dw0; + + if (intel->gen >= 6) { + dw0 = GEN6_VB0_ACCESS_VERTEXDATA | + (i << GEN6_VB0_INDEX_SHIFT); + } else { + dw0 = BRW_VB0_ACCESS_VERTEXDATA | + (i << BRW_VB0_INDEX_SHIFT); + } - OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) | - BRW_VB0_ACCESS_VERTEXDATA | + OUT_BATCH(dw0 | (input->stride << BRW_VB0_PITCH_SHIFT)); OUT_RELOC(input->bo, I915_GEM_DOMAIN_VERTEX, 0, input->offset); - if (intel->is_ironlake) { + if (intel->is_ironlake || intel->gen >= 6) { OUT_RELOC(input->bo, I915_GEM_DOMAIN_VERTEX, 0, input->bo->size - 1); @@ -565,12 +553,19 @@ static void brw_emit_vertices(struct brw_context *brw) break; } - OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) | - BRW_VE0_VALID | - (format << BRW_VE0_FORMAT_SHIFT) | - (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + if (IS_GEN6(intel->intelScreen->deviceID)) { + OUT_BATCH((i << GEN6_VE0_INDEX_SHIFT) | + GEN6_VE0_VALID | + (format << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + } else { + OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) | + BRW_VE0_VALID | + (format << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + } - if (intel->is_ironlake) + if (intel->is_ironlake || intel->gen >= 6) OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | @@ -624,13 +619,9 @@ static void brw_prepare_indices(struct brw_context *brw) /* Straight upload */ - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(bo); - memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size); - drm_intel_gem_bo_unmap_gtt(bo); - } else { - dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); - } + drm_intel_gem_bo_map_gtt(bo); + memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size); + drm_intel_gem_bo_unmap_gtt(bo); } else { offset = (GLuint) (unsigned long) index_buffer->ptr; brw->ib.start_vertex_offset = 0; diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index 1df561386e..4e7c1226ad 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -237,7 +237,7 @@ brw_resolve_cals(struct brw_compile *c) struct brw_glsl_call *call, *next; for (call = c->first_call; call; call = next) { next = call->next; - _mesa_free(call); + free(call); } c->first_call = NULL; } @@ -247,7 +247,7 @@ brw_resolve_cals(struct brw_compile *c) struct brw_glsl_label *label, *next; for (label = c->first_label; label; label = next) { next = label->next; - _mesa_free(label); + free(label); } c->first_label = NULL; } diff --git a/src/mesa/drivers/dri/i965/brw_eu_debug.c b/src/mesa/drivers/dri/i965/brw_eu_debug.c index 29f3f6d02f..99453afdca 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_debug.c +++ b/src/mesa/drivers/dri/i965/brw_eu_debug.c @@ -54,9 +54,9 @@ void brw_print_reg( struct brw_reg hwreg ) "f" }; - _mesa_printf("%s%s", - hwreg.abs ? "abs/" : "", - hwreg.negate ? "-" : ""); + printf("%s%s", + hwreg.abs ? "abs/" : "", + hwreg.negate ? "-" : ""); if (hwreg.file == BRW_GENERAL_REGISTER_FILE && hwreg.nr % 2 == 0 && @@ -66,7 +66,7 @@ void brw_print_reg( struct brw_reg hwreg ) hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 && hwreg.type == BRW_REGISTER_TYPE_F) { /* vector register */ - _mesa_printf("vec%d", hwreg.nr); + printf("vec%d", hwreg.nr); } else if (hwreg.file == BRW_GENERAL_REGISTER_FILE && hwreg.vstride == BRW_VERTICAL_STRIDE_0 && @@ -74,13 +74,13 @@ void brw_print_reg( struct brw_reg hwreg ) hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 && hwreg.type == BRW_REGISTER_TYPE_F) { /* "scalar" register */ - _mesa_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); + printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); } else if (hwreg.file == BRW_IMMEDIATE_VALUE) { - _mesa_printf("imm %f", hwreg.dw1.f); + printf("imm %f", hwreg.dw1.f); } else { - _mesa_printf("%s%d.%d<%d;%d,%d>:%s", + printf("%s%d.%d<%d;%d,%d>:%s", file[hwreg.file], hwreg.nr, hwreg.subnr / type_sz(hwreg.type), diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 8d6ac00839..f69d529613 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -102,8 +102,6 @@ static void brw_set_dest( struct brw_instruction *insn, static void brw_set_src0( struct brw_instruction *insn, struct brw_reg reg ) { - assert(reg.file != BRW_MESSAGE_REGISTER_FILE); - if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) assert(reg.nr < 128); @@ -323,7 +321,7 @@ static void brw_set_urb_message( struct brw_context *brw, struct intel_context *intel = &brw->intel; brw_set_src1(insn, brw_imm_d(0)); - if (intel->is_ironlake) { + if (intel->is_ironlake || intel->gen >= 6) { insn->bits3.urb_igdng.opcode = 0; /* ? */ insn->bits3.urb_igdng.offset = offset; insn->bits3.urb_igdng.swizzle_control = swizzle_control; @@ -334,8 +332,16 @@ static void brw_set_urb_message( struct brw_context *brw, insn->bits3.urb_igdng.response_length = response_length; insn->bits3.urb_igdng.msg_length = msg_length; insn->bits3.urb_igdng.end_of_thread = end_of_thread; - insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; - insn->bits2.send_igdng.end_of_thread = end_of_thread; + if (intel->gen >= 6) { + /* For SNB, the SFID bits moved to the condmod bits, and + * EOT stayed in bits3 above. Does the EOT bit setting + * below on Ironlake even do anything? + */ + insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; + } else { + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_igdng.end_of_thread = end_of_thread; + } } else { insn->bits3.urb.opcode = 0; /* ? */ insn->bits3.urb.offset = offset; @@ -917,26 +923,40 @@ void brw_math( struct brw_compile *p, GLuint data_type, GLuint precision ) { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; - GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + struct intel_context *intel = &p->brw->intel; - /* Example code doesn't set predicate_control for send - * instructions. - */ - insn->header.predicate_control = 0; - insn->header.destreg__conditionalmod = msg_reg_nr; + if (intel->gen >= 6) { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); - brw_set_dest(insn, dest); - brw_set_src0(insn, src); - brw_set_math_message(p->brw, - insn, - msg_length, response_length, - function, - BRW_MATH_INTEGER_UNSIGNED, - precision, - saturate, - data_type); + /* Math is the same ISA format as other opcodes, except that CondModifier + * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. + */ + insn->header.destreg__conditionalmod = function; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_src1(insn, brw_null_reg()); + } else { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; + GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + /* Example code doesn't set predicate_control for send + * instructions. + */ + insn->header.predicate_control = 0; + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_math_message(p->brw, + insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + data_type); + } } /** @@ -1272,7 +1292,7 @@ void brw_SAMPLE(struct brw_compile *p, GLboolean need_stall = 0; if (writemask == 0) { - /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ + /*printf("%s: zero writemask??\n", __FUNCTION__); */ return; } @@ -1304,7 +1324,7 @@ void brw_SAMPLE(struct brw_compile *p, if (newmask != writemask) { need_stall = 1; - /* _mesa_printf("need stall %x %x\n", newmask , writemask); */ + /* printf("need stall %x %x\n", newmask , writemask); */ } else { struct brw_reg m1 = brw_message_reg(msg_reg_nr); @@ -1377,7 +1397,18 @@ void brw_urb_WRITE(struct brw_compile *p, GLuint offset, GLuint swizzle) { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *insn; + + /* Sandybridge doesn't have the implied move for SENDs, + * and the first message register index comes from src0. + */ + if (intel->gen >= 6) { + brw_MOV(p, brw_message_reg(msg_reg_nr), src0); + src0 = brw_message_reg(msg_reg_nr); + } + + insn = next_insn(p, BRW_OPCODE_SEND); assert(msg_length < BRW_MAX_MRF); @@ -1385,7 +1416,8 @@ void brw_urb_WRITE(struct brw_compile *p, brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); - insn->header.destreg__conditionalmod = msg_reg_nr; + if (intel->gen < 6) + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_urb_message(p->brw, insn, diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index f708ee0063..d030ed41f4 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -136,6 +136,41 @@ const struct brw_tracked_state brw_binding_table_pointers = { .emit = upload_binding_table_pointers, }; +/** + * Upload the binding table pointers, which point each stage's array of surface + * state pointers. + * + * The binding table pointers are relative to the surface state base address, + * which is 0. + */ +static void upload_gen6_binding_table_pointers(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(4); + OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | + GEN6_BINDING_TABLE_MODIFY_VS | + GEN6_BINDING_TABLE_MODIFY_GS | + GEN6_BINDING_TABLE_MODIFY_PS | + (4 - 2)); + if (brw->vs.bind_bo != NULL) + OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ + else + OUT_BATCH(0); + OUT_BATCH(0); /* gs */ + OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */ + ADVANCE_BATCH(); +} + +const struct brw_tracked_state gen6_binding_table_pointers = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = CACHE_NEW_SURF_BIND, + }, + .prepare = prepare_binding_table_pointers, + .emit = upload_gen6_binding_table_pointers, +}; /** * Upload pointers to the per-stage state. @@ -209,7 +244,14 @@ static void emit_depthbuffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct intel_region *region = brw->state.depth_region; - unsigned int len = (intel->is_g4x || intel->is_ironlake) ? 6 : 5; + unsigned int len; + + if (intel->gen >= 6) + len = 7; + else if (intel->is_g4x || intel->is_ironlake) + len = 6; + else + len = 5; if (region == NULL) { BEGIN_BATCH(len); @@ -220,9 +262,12 @@ static void emit_depthbuffer(struct brw_context *brw) OUT_BATCH(0); OUT_BATCH(0); - if (intel->is_g4x || intel->is_ironlake) + if (intel->is_g4x || intel->is_ironlake || intel->gen >= 6) OUT_BATCH(0); + if (intel->gen >= 6) + OUT_BATCH(0); + ADVANCE_BATCH(); } else { unsigned int format; @@ -243,6 +288,8 @@ static void emit_depthbuffer(struct brw_context *brw) } assert(region->tiling != I915_TILING_X); + if (IS_GEN6(intel->intelScreen->deviceID)) + assert(region->tiling != I915_TILING_NONE); BEGIN_BATCH(len); OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); @@ -259,9 +306,20 @@ static void emit_depthbuffer(struct brw_context *brw) ((region->height - 1) << 19)); OUT_BATCH(0); - if (intel->is_g4x || intel->is_ironlake) + if (intel->is_g4x || intel->is_ironlake || intel->gen >= 6) OUT_BATCH(0); + if (intel->gen >= 6) + OUT_BATCH(0); + + ADVANCE_BATCH(); + } + + /* Initialize it for safety. */ + if (intel->gen >= 6) { + BEGIN_BATCH(2); + OUT_BATCH(CMD_3D_CLEAR_PARAMS << 16 | (2 - 2)); + OUT_BATCH(0); ADVANCE_BATCH(); } } @@ -435,6 +493,8 @@ const struct brw_tracked_state brw_line_stipple = { static void upload_invarient_state( struct brw_context *brw ) { + struct intel_context *intel = &brw->intel; + { /* 0x61040000 Pipeline Select */ /* PipelineSelect : 0 */ @@ -446,7 +506,7 @@ static void upload_invarient_state( struct brw_context *brw ) BRW_BATCH_STRUCT(brw, &ps); } - { + if (intel->gen < 6) { struct brw_global_depth_offset_clamp gdo; memset(&gdo, 0, sizeof(gdo)); @@ -459,6 +519,32 @@ static void upload_invarient_state( struct brw_context *brw ) BRW_BATCH_STRUCT(brw, &gdo); } + intel_batchbuffer_emit_mi_flush(intel->batch); + + if (intel->gen >= 6) { + int i; + + BEGIN_BATCH(3); + OUT_BATCH(CMD_3D_MULTISAMPLE << 16 | (3 - 2)); + OUT_BATCH(MS_PIXEL_LOCATION_CENTER | + MS_NUMSAMPLES_1); + OUT_BATCH(0); /* positions for 4/8-sample */ + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(CMD_3D_SAMPLE_MASK << 16 | (2 - 2)); + OUT_BATCH(1); + ADVANCE_BATCH(); + + for (i = 0; i < 4; i++) { + BEGIN_BATCH(4); + OUT_BATCH(CMD_GS_SVB_INDEX << 16 | (4 - 2)); + OUT_BATCH(i << SVB_INDEX_SHIFT); + OUT_BATCH(0); + OUT_BATCH(0xffffffff); + ADVANCE_BATCH(); + } + } /* 0x61020000 State Instruction Pointer */ { @@ -509,7 +595,20 @@ static void upload_state_base_address( struct brw_context *brw ) /* Output the structure (brw_state_base_address) directly to the * batchbuffer, so we can emit relocations inline. */ - if (intel->is_ironlake) { + if (intel->gen >= 6) { + BEGIN_BATCH(10); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2)); + OUT_BATCH(1); /* General state base address */ + OUT_BATCH(1); /* Surface state base address */ + OUT_BATCH(1); /* Dynamic state base address */ + OUT_BATCH(1); /* Indirect object base address */ + OUT_BATCH(1); /* Instruction base address */ + OUT_BATCH(1); /* General state upper bound */ + OUT_BATCH(1); /* Dynamic state upper bound */ + OUT_BATCH(1); /* Indirect object upper bound */ + OUT_BATCH(1); /* Instruction access upper bound */ + ADVANCE_BATCH(); + } else if (intel->is_ironlake) { BEGIN_BATCH(8); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2)); OUT_BATCH(1); /* General state base address */ diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c index 5399a74244..6cce7e5089 100644 --- a/src/mesa/drivers/dri/i965/brw_queryobj.c +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c @@ -73,7 +73,7 @@ brw_new_query_object(GLcontext *ctx, GLuint id) { struct brw_query_object *query; - query = _mesa_calloc(sizeof(struct brw_query_object)); + query = calloc(1, sizeof(struct brw_query_object)); query->Base.Id = id; query->Base.Result = 0; @@ -89,7 +89,7 @@ brw_delete_query(GLcontext *ctx, struct gl_query_object *q) struct brw_query_object *query = (struct brw_query_object *)q; dri_bo_unreference(query->bo); - _mesa_free(query); + free(query); } static void diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 8e6839b812..57d1c29ade 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -46,7 +46,6 @@ static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) { - GLcontext *ctx = &brw->intel.ctx; struct brw_sf_compile c; const GLuint *program; GLuint program_size; @@ -69,20 +68,14 @@ static void compile_sf_prog( struct brw_context *brw, /* Construct map from attribute number to position in the vertex. */ - for (i = idx = 0; i < VERT_RESULT_MAX; i++) + for (i = idx = 0; i < VERT_RESULT_MAX; i++) { if (c.key.attrs & BITFIELD64_BIT(i)) { c.attr_to_idx[i] = idx; c.idx_to_attr[idx] = i; - if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) { - c.point_attrs[i].CoordReplace = - ctx->Point.CoordReplace[i - VERT_RESULT_TEX0]; - } - else { - c.point_attrs[i].CoordReplace = GL_FALSE; - } idx++; } - + } + /* Which primitive? Or all three? */ switch (key->primitive) { @@ -162,6 +155,14 @@ static void upload_sf_prog(struct brw_context *brw) } key.do_point_sprite = ctx->Point.PointSprite; + if (key.do_point_sprite) { + int i; + + for (i = 0; i < 8; i++) { + if (ctx->Point.CoordReplace[i]) + key.point_sprite_coord_replace |= (1 << i); + } + } key.sprite_origin_lower_left = (ctx->Point.SpriteOrigin == GL_LOWER_LEFT); /* _NEW_LIGHT */ key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h index 0ba731fac9..a0680a56f2 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.h +++ b/src/mesa/drivers/dri/i965/brw_sf.h @@ -46,6 +46,7 @@ struct brw_sf_prog_key { GLbitfield64 attrs; + uint8_t point_sprite_coord_replace; GLuint primitive:2; GLuint do_twoside_color:1; GLuint do_flat_shading:1; @@ -56,10 +57,6 @@ struct brw_sf_prog_key { GLuint pad:24; }; -struct brw_sf_point_tex { - GLboolean CoordReplace; -}; - struct brw_sf_compile { struct brw_compile func; struct brw_sf_prog_key key; @@ -100,7 +97,6 @@ struct brw_sf_compile { GLubyte attr_to_idx[VERT_RESULT_MAX]; GLubyte idx_to_attr[VERT_RESULT_MAX]; - struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX]; }; diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c index bb08055e3b..56f7c986e7 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c @@ -354,6 +354,33 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, return is_last_attr; } +/* Calculates the predicate control for which channels of a reg + * (containing 2 attrs) to do point sprite coordinate replacement on. + */ +static uint16_t +calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg) +{ + int attr1, attr2; + uint16_t pc = 0; + + attr1 = c->idx_to_attr[reg * 2]; + if (attr1 >= VERT_RESULT_TEX0 && attr1 <= VERT_RESULT_TEX7) { + if (c->key.point_sprite_coord_replace & (1 << (attr1 - VERT_RESULT_TEX0))) + pc |= 0x0f; + } + + if (reg * 2 + 1 < c->nr_setup_attrs) { + attr2 = c->idx_to_attr[reg * 2 + 1]; + if (attr2 >= VERT_RESULT_TEX0 && attr2 <= VERT_RESULT_TEX7) { + if (c->key.point_sprite_coord_replace & (1 << (attr2 - + VERT_RESULT_TEX0))) + pc |= 0xf0; + } + } + + return pc; +} + void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate) @@ -529,22 +556,27 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) copy_z_inv_w(c); for (i = 0; i < c->nr_setup_regs; i++) { - struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]]; struct brw_reg a0 = offset(c->vert[0], i); - GLushort pc, pc_persp, pc_linear; + GLushort pc, pc_persp, pc_linear, pc_coord_replace; GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); - - if (pc_persp) - { - if (!tex->CoordReplace) { - brw_set_predicate_control_flag_value(p, pc_persp); - brw_MUL(p, a0, a0, c->inv_w[0]); - } + + pc_coord_replace = calculate_point_sprite_mask(c, i); + pc_persp &= ~pc_coord_replace; + + if (pc_persp) { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); } - if (tex->CoordReplace) { - /* Caculate 1.0/PointWidth */ - brw_math(&c->func, + /* Point sprite coordinate replacement: A texcoord with this + * enabled gets replaced with the value (x, y, 0, 1) where x and + * y vary from 0 to 1 across the horizontal and vertical of the + * point. + */ + if (pc_coord_replace) { + brw_set_predicate_control_flag_value(p, pc_coord_replace); + /* Caculate 1.0/PointWidth */ + brw_math(&c->func, c->tmp, BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, @@ -553,50 +585,51 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) BRW_MATH_DATA_SCALAR, BRW_MATH_PRECISION_FULL); - if (c->key.sprite_origin_lower_left) { - brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); - brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); - brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0])); - brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); - } else { - brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); - brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); - brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]); - brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); - } - } else { - brw_MOV(p, c->m1Cx, brw_imm_ud(0)); - brw_MOV(p, c->m2Cy, brw_imm_ud(0)); - } + brw_set_access_mode(p, BRW_ALIGN_16); - { - brw_set_predicate_control_flag_value(p, pc); - if (tex->CoordReplace) { - if (c->key.sprite_origin_lower_left) { - brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0)); - brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0)); - } - else - brw_MOV(p, c->m3C0, brw_imm_f(0.0)); + /* dA/dx, dA/dy */ + brw_MOV(p, c->m1Cx, brw_imm_f(0.0)); + brw_MOV(p, c->m2Cy, brw_imm_f(0.0)); + brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp); + if (c->key.sprite_origin_lower_left) { + brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp)); } else { - brw_MOV(p, c->m3C0, a0); /* constant value */ + brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp); } - /* Copy m0..m3 to URB. - */ - brw_urb_WRITE(p, - brw_null_reg(), - 0, - brw_vec8_grf(0, 0), - 0, /* allocate */ - 1, /* used */ - 4, /* msg len */ - 0, /* response len */ - last, /* eot */ - last, /* writes complete */ - i*4, /* urb destination offset */ - BRW_URB_SWIZZLE_TRANSPOSE); + /* attribute constant offset */ + brw_MOV(p, c->m3C0, brw_imm_f(0.0)); + if (c->key.sprite_origin_lower_left) { + brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0)); + } else { + brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0)); + } + + brw_set_access_mode(p, BRW_ALIGN_1); } + + if (pc & ~pc_coord_replace) { + brw_set_predicate_control_flag_value(p, pc & ~pc_coord_replace); + brw_MOV(p, c->m1Cx, brw_imm_ud(0)); + brw_MOV(p, c->m2Cy, brw_imm_ud(0)); + brw_MOV(p, c->m3C0, a0); /* constant value */ + } + + + brw_set_predicate_control_flag_value(p, pc); + /* Copy m0..m3 to URB. */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); } } diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 536fe8b249..f790cfabe2 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -90,6 +90,23 @@ const struct brw_tracked_state brw_drawing_rect; const struct brw_tracked_state brw_indices; const struct brw_tracked_state brw_vertices; const struct brw_tracked_state brw_index_buffer; +const struct brw_tracked_state gen6_binding_table_pointers; +const struct brw_tracked_state gen6_blend_state; +const struct brw_tracked_state gen6_cc_state_pointers; +const struct brw_tracked_state gen6_cc_vp; +const struct brw_tracked_state gen6_clip_state; +const struct brw_tracked_state gen6_clip_vp; +const struct brw_tracked_state gen6_color_calc_state; +const struct brw_tracked_state gen6_depth_stencil_state; +const struct brw_tracked_state gen6_gs_state; +const struct brw_tracked_state gen6_sampler_state; +const struct brw_tracked_state gen6_scissor_state; +const struct brw_tracked_state gen6_sf_state; +const struct brw_tracked_state gen6_sf_vp; +const struct brw_tracked_state gen6_urb; +const struct brw_tracked_state gen6_viewport_state; +const struct brw_tracked_state gen6_vs_state; +const struct brw_tracked_state gen6_wm_state; /** * Use same key for WM and VS surfaces. diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index ed8120d617..39019412fd 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -57,8 +57,8 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, if (item->sz == sz && memcmp(item->header, newheader, sz) == 0) return GL_FALSE; if (item->sz != sz) { - _mesa_free(item->header); - item->header = _mesa_malloc(sz); + free(item->header); + item->header = malloc(sz); item->sz = sz; } goto emit; @@ -68,7 +68,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, assert(!item); item = CALLOC_STRUCT(brw_cached_batch_item); - item->header = _mesa_malloc(sz); + item->header = malloc(sz); item->sz = sz; item->next = brw->cached_batch_items; brw->cached_batch_items = item; diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index 1369d97e21..c08cb45b75 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -150,7 +150,7 @@ rehash(struct brw_cache *cache) GLuint size, i; size = cache->size * 3; - items = (struct brw_cache_item**) _mesa_calloc(size * sizeof(*items)); + items = (struct brw_cache_item**) calloc(1, size * sizeof(*items)); for (i = 0; i < cache->size; i++) for (c = cache->items[i]; c; c = next) { @@ -237,7 +237,7 @@ brw_upload_cache_with_auxdata(struct brw_cache *cache, /* Set up the memory containing the key, aux_data, and reloc_bufs */ - tmp = _mesa_malloc(key_size + aux_size + relocs_size); + tmp = malloc(key_size + aux_size + relocs_size); memcpy(tmp, key, key_size); memcpy(tmp + key_size, aux, aux_size); @@ -266,7 +266,7 @@ brw_upload_cache_with_auxdata(struct brw_cache *cache, } if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("upload %s: %d bytes to cache id %d\n", + printf("upload %s: %d bytes to cache id %d\n", cache->name[cache_id], data_size, cache_id); @@ -366,7 +366,7 @@ brw_init_non_surface_cache(struct brw_context *brw) cache->size = 7; cache->n_items = 0; cache->items = (struct brw_cache_item **) - _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); + calloc(1, cache->size * sizeof(struct brw_cache_item)); brw_init_cache_id(cache, "CC_VP", BRW_CC_VP); brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT); @@ -390,6 +390,7 @@ brw_init_non_surface_cache(struct brw_context *brw) brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT); brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG); + brw_init_cache_id(cache, "BLEND_STATE", BRW_BLEND_STATE); } @@ -403,7 +404,7 @@ brw_init_surface_cache(struct brw_context *brw) cache->size = 7; cache->n_items = 0; cache->items = (struct brw_cache_item **) - _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); + calloc(1, cache->size * sizeof(struct brw_cache_item)); brw_init_cache_id(cache, "SS_SURFACE", BRW_SS_SURFACE); brw_init_cache_id(cache, "SS_SURF_BIND", BRW_SS_SURF_BIND); @@ -425,7 +426,7 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) GLuint i; if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s\n", __FUNCTION__); + printf("%s\n", __FUNCTION__); for (i = 0; i < cache->size; i++) { for (c = cache->items[i]; c; c = next) { @@ -444,7 +445,7 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) cache->n_items = 0; if (brw->curbe.last_buf) { - _mesa_free(brw->curbe.last_buf); + free(brw->curbe.last_buf); brw->curbe.last_buf = NULL; } @@ -465,7 +466,7 @@ brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo) GLuint i; if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s\n", __FUNCTION__); + printf("%s\n", __FUNCTION__); for (i = 0; i < cache->size; i++) { for (prev = &cache->items[i]; *prev;) { @@ -493,7 +494,7 @@ void brw_state_cache_check_size(struct brw_context *brw) { if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); + printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); /* un-tuned guess. We've got around 20 state objects for a total of around * 32k, so 1000 of them is around 1.5MB. @@ -512,7 +513,7 @@ brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) GLuint i; if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s\n", __FUNCTION__); + printf("%s\n", __FUNCTION__); brw_clear_cache(brw, cache); for (i = 0; i < BRW_MAX_CACHE; i++) { diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 0ecbef1ef9..9e54f29f0f 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -35,8 +35,15 @@ #include "brw_state.h" #include "intel_batchbuffer.h" #include "intel_buffers.h" +#include "intel_chipset.h" -static const struct brw_tracked_state *atoms[] = +/* This is used to initialize brw->state.atoms[]. We could use this + * list directly except for a single atom, brw_constant_buffer, which + * has a .dirty value which changes according to the parameters of the + * current fragment and vertex programs, and so cannot be a static + * value. + */ +static const struct brw_tracked_state *gen4_atoms[] = { &brw_check_fallback, @@ -95,6 +102,63 @@ static const struct brw_tracked_state *atoms[] = &brw_constant_buffer }; +const struct brw_tracked_state *gen6_atoms[] = +{ + &brw_check_fallback, + + &brw_wm_input_sizes, + &brw_vs_prog, + &brw_gs_prog, + &brw_wm_prog, + + &gen6_clip_vp, + &gen6_sf_vp, + &gen6_cc_vp, + + /* Command packets: */ + &brw_invarient_state, + + &gen6_viewport_state, /* must do after *_vp stages */ + + &gen6_urb, + &gen6_blend_state, /* must do before cc unit */ + &gen6_color_calc_state, /* must do before cc unit */ + &gen6_depth_stencil_state, /* must do before cc unit */ + &gen6_cc_state_pointers, + + &brw_vs_surfaces, /* must do before unit */ + &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */ + &brw_wm_surfaces, /* must do before samplers and unit */ + + &brw_wm_samplers, + &gen6_sampler_state, + + &gen6_vs_state, + &gen6_gs_state, + &gen6_clip_state, + &gen6_sf_state, + &gen6_wm_state, + + &gen6_scissor_state, + + &brw_state_base_address, + + &gen6_binding_table_pointers, + + &brw_depthbuffer, + + &brw_polygon_stipple, + &brw_polygon_stipple_offset, + + &brw_line_stipple, + &brw_aa_line_parameters, + + &brw_drawing_rect, + + &brw_indices, + &brw_index_buffer, + &brw_vertices, +}; void brw_init_state( struct brw_context *brw ) { @@ -211,6 +275,7 @@ static struct dirty_bit_map brw_bits[] = { }; static struct dirty_bit_map cache_bits[] = { + DEFINE_BIT(CACHE_NEW_BLEND_STATE), DEFINE_BIT(CACHE_NEW_CC_VP), DEFINE_BIT(CACHE_NEW_CC_UNIT), DEFINE_BIT(CACHE_NEW_WM_PROG), @@ -270,6 +335,8 @@ void brw_validate_state( struct brw_context *brw ) struct intel_context *intel = &brw->intel; struct brw_state_flags *state = &brw->state.dirty; GLuint i; + const struct brw_tracked_state **atoms; + int num_atoms; brw_clear_validated_bos(brw); @@ -278,6 +345,14 @@ void brw_validate_state( struct brw_context *brw ) brw_add_validated_bo(brw, intel->batch->buf); + if (IS_GEN6(intel->intelScreen->deviceID)) { + atoms = gen6_atoms; + num_atoms = ARRAY_SIZE(gen6_atoms); + } else { + atoms = gen4_atoms; + num_atoms = ARRAY_SIZE(gen4_atoms); + } + if (brw->emit_state_always) { state->mesa |= ~0; state->brw |= ~0; @@ -305,7 +380,7 @@ void brw_validate_state( struct brw_context *brw ) brw->intel.Fallback = GL_FALSE; /* boolean, not bitfield */ /* do prepare stage for all atoms */ - for (i = 0; i < Elements(atoms); i++) { + for (i = 0; i < num_atoms; i++) { const struct brw_tracked_state *atom = atoms[i]; if (brw->intel.Fallback) @@ -337,9 +412,20 @@ void brw_validate_state( struct brw_context *brw ) void brw_upload_state(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; struct brw_state_flags *state = &brw->state.dirty; int i; static int dirty_count = 0; + const struct brw_tracked_state **atoms; + int num_atoms; + + if (IS_GEN6(intel->intelScreen->deviceID)) { + atoms = gen6_atoms; + num_atoms = ARRAY_SIZE(gen6_atoms); + } else { + atoms = gen4_atoms; + num_atoms = ARRAY_SIZE(gen4_atoms); + } brw_clear_validated_bos(brw); @@ -349,10 +435,10 @@ void brw_upload_state(struct brw_context *brw) * state atoms are ordered correctly in the list. */ struct brw_state_flags examined, prev; - _mesa_memset(&examined, 0, sizeof(examined)); + memset(&examined, 0, sizeof(examined)); prev = *state; - for (i = 0; i < Elements(atoms); i++) { + for (i = 0; i < num_atoms; i++) { const struct brw_tracked_state *atom = atoms[i]; struct brw_state_flags generated; @@ -381,7 +467,7 @@ void brw_upload_state(struct brw_context *brw) } } else { - for (i = 0; i < Elements(atoms); i++) { + for (i = 0; i < num_atoms; i++) { const struct brw_tracked_state *atom = atoms[i]; if (brw->intel.Fallback) diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 66d4127271..3c2adfc87d 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -658,7 +658,105 @@ struct brw_clip_unit_state GLfloat viewport_ymax; }; +struct gen6_blend_state +{ + struct { + GLuint dest_blend_factor:5; + GLuint source_blend_factor:5; + GLuint pad3:1; + GLuint blend_func:3; + GLuint pad2:1; + GLuint ia_dest_blend_factor:5; + GLuint ia_source_blend_factor:5; + GLuint pad1:1; + GLuint ia_blend_func:3; + GLuint pad0:1; + GLuint ia_blend_enable:1; + GLuint blend_enable:1; + } blend0; + + struct { + GLuint post_blend_clamp_enable:1; + GLuint pre_blend_clamp_enable:1; + GLuint clamp_range:2; + GLuint pad0:4; + GLuint x_dither_offset:2; + GLuint y_dither_offset:2; + GLuint dither_enable:1; + GLuint alpha_test_func:3; + GLuint alpha_test_enable:1; + GLuint pad1:1; + GLuint logic_op_func:4; + GLuint logic_op_enable:1; + GLuint pad2:1; + GLuint write_disable_b:1; + GLuint write_disable_g:1; + GLuint write_disable_r:1; + GLuint write_disable_a:1; + GLuint pad3:1; + GLuint alpha_to_coverage_dither:1; + GLuint alpha_to_one:1; + GLuint alpha_to_coverage:1; + } blend1; +}; + +struct gen6_color_calc_state +{ + struct { + GLuint alpha_test_format:1; + GLuint pad0:14; + GLuint round_disable:1; + GLuint bf_stencil_ref:8; + GLuint stencil_ref:8; + } cc0; + union { + GLfloat alpha_ref_f; + struct { + GLuint ui:8; + GLuint pad0:24; + } alpha_ref_fi; + } cc1; + + GLfloat constant_r; + GLfloat constant_g; + GLfloat constant_b; + GLfloat constant_a; +}; + +struct gen6_depth_stencil_state +{ + struct { + GLuint pad0:3; + GLuint bf_stencil_pass_depth_pass_op:3; + GLuint bf_stencil_pass_depth_fail_op:3; + GLuint bf_stencil_fail_op:3; + GLuint bf_stencil_func:3; + GLuint bf_stencil_enable:1; + GLuint pad1:2; + GLuint stencil_write_enable:1; + GLuint stencil_pass_depth_pass_op:3; + GLuint stencil_pass_depth_fail_op:3; + GLuint stencil_fail_op:3; + GLuint stencil_func:3; + GLuint stencil_enable:1; + } ds0; + + struct { + GLuint bf_stencil_write_mask:8; + GLuint bf_stencil_test_mask:8; + GLuint stencil_write_mask:8; + GLuint stencil_test_mask:8; + } ds1; + + struct { + GLuint pad0:25; + GLuint depth_write_enable:1; + GLuint depth_test_func:3; + GLuint pad1:1; + GLuint depth_test_enable:1; + } ds2; +}; struct brw_cc_unit_state { @@ -752,8 +850,6 @@ struct brw_cc_unit_state } cc7; }; - - struct brw_sf_unit_state { struct thread0 thread0; @@ -813,6 +909,11 @@ struct brw_sf_unit_state }; +struct gen6_scissor_state +{ + GLuint ymin, xmin; + GLuint ymax, xmax; +}; struct brw_gs_unit_state { @@ -1043,6 +1144,15 @@ struct brw_sf_viewport } scissor; }; +struct gen6_sf_viewport { + GLfloat m00; + GLfloat m11; + GLfloat m22; + GLfloat m30; + GLfloat m31; + GLfloat m32; +}; + /* Documented in the subsystem/shared-functions/sampler chapter... */ struct brw_surface_state diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c index f2cdb203b8..4f6b9002ad 100644 --- a/src/mesa/drivers/dri/i965/brw_urb.c +++ b/src/mesa/drivers/dri/i965/brw_urb.c @@ -186,17 +186,17 @@ static void recalculate_urb_fence( struct brw_context *brw ) * entries and the values for minimum nr of entries * provided above. */ - _mesa_printf("couldn't calculate URB layout!\n"); + printf("couldn't calculate URB layout!\n"); exit(1); } if (INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS)) - _mesa_printf("URB CONSTRAINED\n"); + printf("URB CONSTRAINED\n"); } done: if (INTEL_DEBUG & DEBUG_URB) - _mesa_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", + printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", brw->urb.vs_start, brw->urb.gs_start, brw->urb.clip_start, diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 4f4eef85e8..a7c4b58972 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -180,10 +180,12 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) c->first_output = reg; c->first_overflow_output = 0; - if (intel->is_ironlake) - mrf = 8; + if (intel->gen >= 6) + mrf = 6; + else if (intel->is_ironlake) + mrf = 8; else - mrf = 4; + mrf = 4; for (i = 0; i < VERT_RESULT_MAX; i++) { if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) { @@ -279,17 +281,19 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) */ attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs); - if (intel->is_ironlake) - c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; + if (intel->gen >= 6) + c->prog_data.urb_entry_size = (attributes_in_vue + 4 + 7) / 8; + else if (intel->is_ironlake) + c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; else - c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; + c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; c->prog_data.total_grf = reg; if (INTEL_DEBUG & DEBUG_VS) { - _mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs); - _mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries); - _mesa_printf("%s reg = %d\n", __FUNCTION__, reg); + printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs); + printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries); + printf("%s reg = %d\n", __FUNCTION__, reg); } } @@ -479,9 +483,11 @@ static void emit_math1( struct brw_vs_compile *c, * whether that turns out to be a simulator bug or not: */ struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; struct brw_reg tmp = dst; - GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf || - dst.file != BRW_GENERAL_REGISTER_FILE); + GLboolean need_tmp = (intel->gen < 6 && + (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE)); if (need_tmp) tmp = get_tmp(c); @@ -510,9 +516,11 @@ static void emit_math2( struct brw_vs_compile *c, GLuint precision) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; struct brw_reg tmp = dst; - GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf || - dst.file != BRW_GENERAL_REGISTER_FILE); + GLboolean need_tmp = (intel->gen < 6 && + (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE)); if (need_tmp) tmp = get_tmp(c); @@ -1191,7 +1199,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS]; struct brw_reg ndc; int eot; - GLuint len_vertext_header = 2; + GLuint len_vertex_header = 2; if (c->key.copy_edgeflag) { brw_MOV(p, @@ -1199,12 +1207,14 @@ static void emit_vertex_write( struct brw_vs_compile *c) get_reg(c, PROGRAM_INPUT, VERT_ATTRIB_EDGEFLAG)); } - /* Build ndc coords */ - ndc = get_tmp(c); - /* ndc = 1.0 / pos.w */ - emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); - /* ndc.xyz = pos * ndc */ - brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc); + if (intel->gen < 6) { + /* Build ndc coords */ + ndc = get_tmp(c); + /* ndc = 1.0 / pos.w */ + emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); + /* ndc.xyz = pos * ndc */ + brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc); + } /* Update the header for point size, user clipping flags, and -ve rhw * workaround. @@ -1267,21 +1277,41 @@ static void emit_vertex_write( struct brw_vs_compile *c) * of zeros followed by two sets of NDC coordinates: */ brw_set_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, offset(m0, 2), ndc); - - if (intel->is_ironlake) { - /* There are 20 DWs (D0-D19) in VUE vertex header on Ironlake */ - brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */ - /* m4, m5 contain the distances from vertex to the user clip planeXXX. - * Seems it is useless for us. - * m6 is used for aligning, so that the remainder of vertex element is - * reg-aligned. - */ - brw_MOV(p, offset(m0, 7), pos); /* the remainder of vertex element */ - len_vertext_header = 6; + + if (intel->gen >= 6) { + /* There are 16 DWs (D0-D15) in VUE header on Sandybridge: + * dword 0-3 (m1) of the header is indices, point width, clip flags. + * dword 4-7 (m2) is the 4D space position + * dword 8-15 (m3,m4) of the vertex header is the user clip distance. + * m5 is the first vertex data we fill, which is the vertex position. + */ + brw_MOV(p, offset(m0, 2), pos); + brw_MOV(p, offset(m0, 5), pos); + len_vertex_header = 4; + } else if (intel->is_ironlake) { + /* There are 20 DWs (D0-D19) in VUE header on Ironlake: + * dword 0-3 (m1) of the header is indices, point width, clip flags. + * dword 4-7 (m2) is the ndc position (set above) + * dword 8-11 (m3) of the vertex header is the 4D space position + * dword 12-19 (m4,m5) of the vertex header is the user clip distance. + * m6 is a pad so that the vertex element data is aligned + * m7 is the first vertex data we fill, which is the vertex position. + */ + brw_MOV(p, offset(m0, 2), ndc); + brw_MOV(p, offset(m0, 3), pos); + brw_MOV(p, offset(m0, 7), pos); + len_vertex_header = 6; } else { - brw_MOV(p, offset(m0, 3), pos); - len_vertext_header = 2; + /* There are 8 dwords in VUE header pre-Ironlake: + * dword 0-3 (m1) is indices, point width, clip flags. + * dword 4-7 (m2) is ndc position (set above) + * + * dword 8-11 (m3) is the first vertex data, which we always have be the + * vertex position. + */ + brw_MOV(p, offset(m0, 2), ndc); + brw_MOV(p, offset(m0, 3), pos); + len_vertex_header = 2; } eot = (c->first_overflow_output == 0); @@ -1292,7 +1322,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) c->r0, /* src */ 0, /* allocate */ 1, /* used */ - MIN2(c->nr_outputs + 1 + len_vertext_header, (BRW_MAX_MRF-1)), /* msg len */ + MIN2(c->nr_outputs + 1 + len_vertex_header, (BRW_MAX_MRF-1)), /* msg len */ 0, /* response len */ eot, /* eot */ eot, /* writes complete */ @@ -1444,9 +1474,9 @@ void brw_vs_emit(struct brw_vs_compile *c ) GLuint file; if (INTEL_DEBUG & DEBUG_VS) { - _mesa_printf("vs-mesa:\n"); + printf("vs-mesa:\n"); _mesa_print_program(&c->vp->program.Base); - _mesa_printf("\n"); + printf("\n"); } brw_set_compression_control(p, BRW_COMPRESSION_NONE); @@ -1796,9 +1826,9 @@ void brw_vs_emit(struct brw_vs_compile *c ) if (INTEL_DEBUG & DEBUG_VS) { int i; - _mesa_printf("vs-native:\n"); + printf("vs-native:\n"); for (i = 0; i < p->nr_insn; i++) brw_disasm(stderr, &p->store[i]); - _mesa_printf("\n"); + printf("\n"); } } diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index ead623fc0e..4007b5a15c 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -67,13 +67,13 @@ brw_vs_update_constant_buffer(struct brw_context *brw) */ _mesa_load_state_parameters(&brw->intel.ctx, vp->program.Base.Parameters); - intel_bo_map_gtt_preferred(intel, const_buffer, GL_TRUE); + drm_intel_gem_bo_map_gtt(const_buffer); for (i = 0; i < params->NumParameters; i++) { memcpy(const_buffer->virtual + i * 4 * sizeof(float), params->ParameterValues[i], 4 * sizeof(float)); } - intel_bo_unmap_gtt_preferred(intel, const_buffer); + drm_intel_gem_bo_unmap_gtt(const_buffer); return const_buffer; } @@ -104,7 +104,7 @@ brw_update_vs_constant_surface( GLcontext *ctx, /* If there's no constant buffer, then no surface BO is needed to point at * it. */ - if (vp->const_buffer == 0) { + if (vp->const_buffer == NULL) { drm_intel_bo_unreference(brw->vs.surf_bo[surf]); brw->vs.surf_bo[surf] = NULL; return; @@ -132,7 +132,7 @@ brw_update_vs_constant_surface( GLcontext *ctx, brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache, BRW_SS_SURFACE, &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, + &key.bo, 1, NULL); if (brw->vs.surf_bo[surf] == NULL) { brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key); diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 681319dedf..96a44bfbec 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -67,11 +67,11 @@ static void brw_destroy_context( struct intel_context *intel ) brw_draw_destroy( brw ); brw_clear_validated_bos(brw); if (brw->wm.compile_data) { - _mesa_free(brw->wm.compile_data->instruction); - _mesa_free(brw->wm.compile_data->vreg); - _mesa_free(brw->wm.compile_data->refs); - _mesa_free(brw->wm.compile_data->prog_instructions); - _mesa_free(brw->wm.compile_data); + free(brw->wm.compile_data->instruction); + free(brw->wm.compile_data->vreg); + free(brw->wm.compile_data->refs); + free(brw->wm.compile_data->prog_instructions); + free(brw->wm.compile_data); } for (i = 0; i < brw->state.nr_color_regions; i++) @@ -102,6 +102,9 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->cc.prog_bo); dri_bo_release(&brw->cc.state_bo); dri_bo_release(&brw->cc.vp_bo); + dri_bo_release(&brw->cc.blend_state_bo); + dri_bo_release(&brw->cc.depth_stencil_state_bo); + dri_bo_release(&brw->cc.color_calc_state_bo); } @@ -141,7 +144,7 @@ static void brw_finish_batch(struct intel_context *intel) brw_emit_query_end(brw); if (brw->curbe.curbe_bo) { - intel_bo_unmap_gtt_preferred(intel, brw->curbe.curbe_bo); + drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo); drm_intel_bo_unreference(brw->curbe.curbe_bo); brw->curbe.curbe_bo = NULL; } diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 9191c81755..991e1b964b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -151,11 +151,11 @@ static void do_wm_prog( struct brw_context *brw, */ return; } - c->instruction = _mesa_calloc(BRW_WM_MAX_INSN * sizeof(*c->instruction)); - c->prog_instructions = _mesa_calloc(BRW_WM_MAX_INSN * + c->instruction = calloc(1, BRW_WM_MAX_INSN * sizeof(*c->instruction)); + c->prog_instructions = calloc(1, BRW_WM_MAX_INSN * sizeof(*c->prog_instructions)); - c->vreg = _mesa_calloc(BRW_WM_MAX_VREG * sizeof(*c->vreg)); - c->refs = _mesa_calloc(BRW_WM_MAX_REF * sizeof(*c->refs)); + c->vreg = calloc(1, BRW_WM_MAX_VREG * sizeof(*c->vreg)); + c->refs = calloc(1, BRW_WM_MAX_REF * sizeof(*c->refs)); } else { void *instruction = c->instruction; void *prog_instructions = c->prog_instructions; diff --git a/src/mesa/drivers/dri/i965/brw_wm_debug.c b/src/mesa/drivers/dri/i965/brw_wm_debug.c index 220821087c..a78cc8b54e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_debug.c +++ b/src/mesa/drivers/dri/i965/brw_wm_debug.c @@ -41,21 +41,21 @@ void brw_wm_print_value( struct brw_wm_compile *c, if (c->state >= PASS2_DONE) brw_print_reg(value->hw_reg); else if( value == &c->undef_value ) - _mesa_printf("undef"); + printf("undef"); else if( value - c->vreg >= 0 && value - c->vreg < BRW_WM_MAX_VREG) - _mesa_printf("r%d", value - c->vreg); + printf("r%d", value - c->vreg); else if (value - c->creg >= 0 && value - c->creg < BRW_WM_MAX_PARAM) - _mesa_printf("c%d", value - c->creg); + printf("c%d", value - c->creg); else if (value - c->payload.input_interp >= 0 && value - c->payload.input_interp < FRAG_ATTRIB_MAX) - _mesa_printf("i%d", value - c->payload.input_interp); + printf("i%d", value - c->payload.input_interp); else if (value - c->payload.depth >= 0 && value - c->payload.depth < FRAG_ATTRIB_MAX) - _mesa_printf("d%d", value - c->payload.depth); + printf("d%d", value - c->payload.depth); else - _mesa_printf("?"); + printf("?"); } void brw_wm_print_ref( struct brw_wm_compile *c, @@ -64,16 +64,16 @@ void brw_wm_print_ref( struct brw_wm_compile *c, struct brw_reg hw_reg = ref->hw_reg; if (ref->unspill_reg) - _mesa_printf("UNSPILL(%x)/", ref->value->spill_slot); + printf("UNSPILL(%x)/", ref->value->spill_slot); if (c->state >= PASS2_DONE) brw_print_reg(ref->hw_reg); else { - _mesa_printf("%s", hw_reg.negate ? "-" : ""); - _mesa_printf("%s", hw_reg.abs ? "abs/" : ""); + printf("%s", hw_reg.negate ? "-" : ""); + printf("%s", hw_reg.abs ? "abs/" : ""); brw_wm_print_value(c, ref->value); if ((hw_reg.nr&1) || hw_reg.subnr) { - _mesa_printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr); + printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr); } } } @@ -84,22 +84,22 @@ void brw_wm_print_insn( struct brw_wm_compile *c, GLuint i, arg; GLuint nr_args = brw_wm_nr_args(inst->opcode); - _mesa_printf("["); + printf("["); for (i = 0; i < 4; i++) { if (inst->dst[i]) { brw_wm_print_value(c, inst->dst[i]); if (inst->dst[i]->spill_slot) - _mesa_printf("/SPILL(%x)",inst->dst[i]->spill_slot); + printf("/SPILL(%x)",inst->dst[i]->spill_slot); } else - _mesa_printf("#"); + printf("#"); if (i < 3) - _mesa_printf(","); + printf(","); } - _mesa_printf("]"); + printf("]"); if (inst->writemask != WRITEMASK_XYZW) - _mesa_printf(".%s%s%s%s", + printf(".%s%s%s%s", GET_BIT(inst->writemask, 0) ? "x" : "", GET_BIT(inst->writemask, 1) ? "y" : "", GET_BIT(inst->writemask, 2) ? "z" : "", @@ -107,58 +107,58 @@ void brw_wm_print_insn( struct brw_wm_compile *c, switch (inst->opcode) { case WM_PIXELXY: - _mesa_printf(" = PIXELXY"); + printf(" = PIXELXY"); break; case WM_DELTAXY: - _mesa_printf(" = DELTAXY"); + printf(" = DELTAXY"); break; case WM_PIXELW: - _mesa_printf(" = PIXELW"); + printf(" = PIXELW"); break; case WM_WPOSXY: - _mesa_printf(" = WPOSXY"); + printf(" = WPOSXY"); break; case WM_PINTERP: - _mesa_printf(" = PINTERP"); + printf(" = PINTERP"); break; case WM_LINTERP: - _mesa_printf(" = LINTERP"); + printf(" = LINTERP"); break; case WM_CINTERP: - _mesa_printf(" = CINTERP"); + printf(" = CINTERP"); break; case WM_FB_WRITE: - _mesa_printf(" = FB_WRITE"); + printf(" = FB_WRITE"); break; case WM_FRONTFACING: - _mesa_printf(" = FRONTFACING"); + printf(" = FRONTFACING"); break; default: - _mesa_printf(" = %s", _mesa_opcode_string(inst->opcode)); + printf(" = %s", _mesa_opcode_string(inst->opcode)); break; } if (inst->saturate) - _mesa_printf("_SAT"); + printf("_SAT"); for (arg = 0; arg < nr_args; arg++) { - _mesa_printf(" ["); + printf(" ["); for (i = 0; i < 4; i++) { if (inst->src[arg][i]) { brw_wm_print_ref(c, inst->src[arg][i]); } else - _mesa_printf("%%"); + printf("%%"); if (i < 3) - _mesa_printf(","); + printf(","); else - _mesa_printf("]"); + printf("]"); } } - _mesa_printf("\n"); + printf("\n"); } void brw_wm_print_program( struct brw_wm_compile *c, @@ -166,9 +166,9 @@ void brw_wm_print_program( struct brw_wm_compile *c, { GLuint insn; - _mesa_printf("%s:\n", stage); + printf("%s:\n", stage); for (insn = 0; insn < c->nr_insns; insn++) brw_wm_print_insn(c, &c->instruction[insn]); - _mesa_printf("\n"); + printf("\n"); } diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index fa0898c586..9315bca315 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -1622,10 +1622,10 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; default: - _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n", - inst->opcode, inst->opcode < MAX_OPCODE ? - _mesa_opcode_string(inst->opcode) : - "unknown"); + printf("Unsupported opcode %i (%s) in fragment shader\n", + inst->opcode, inst->opcode < MAX_OPCODE ? + _mesa_opcode_string(inst->opcode) : + "unknown"); } for (i = 0; i < 4; i++) @@ -1638,9 +1638,9 @@ void brw_wm_emit( struct brw_wm_compile *c ) if (INTEL_DEBUG & DEBUG_WM) { int i; - _mesa_printf("wm-native:\n"); + printf("wm-native:\n"); for (i = 0; i < p->nr_insn; i++) brw_disasm(stderr, &p->store[i]); - _mesa_printf("\n"); + printf("\n"); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 3737faf26f..d73c391582 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -159,7 +159,7 @@ static struct prog_dst_register get_temp( struct brw_wm_compile *c ) int bit = _mesa_ffs( ~c->fp_temp ); if (!bit) { - _mesa_printf("%s: out of temporaries\n", __FILE__); + printf("%s: out of temporaries\n", __FILE__); exit(1); } @@ -1034,7 +1034,7 @@ static void print_insns( const struct prog_instruction *insn, { GLuint i; for (i = 0; i < nr; i++, insn++) { - _mesa_printf("%3d: ", i); + printf("%3d: ", i); if (insn->Opcode < MAX_OPCODE) _mesa_print_instruction(insn); else if (insn->Opcode < MAX_WM_OPCODE) { @@ -1045,7 +1045,7 @@ static void print_insns( const struct prog_instruction *insn, 3); } else - _mesa_printf("965 Opcode %d\n", insn->Opcode); + printf("965 Opcode %d\n", insn->Opcode); } } @@ -1060,9 +1060,9 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) GLuint insn; if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("pre-fp:\n"); + printf("pre-fp:\n"); _mesa_print_program(&fp->program.Base); - _mesa_printf("\n"); + printf("\n"); } c->pixel_xy = src_undef(); @@ -1168,9 +1168,9 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) } if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("pass_fp:\n"); + printf("pass_fp:\n"); print_insns( c->prog_instructions, c->nr_fp_insns ); - _mesa_printf("\n"); + printf("\n"); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index fde83eea62..562608e2ec 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -1849,7 +1849,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) c->cur_inst = i; #if 0 - _mesa_printf("Inst %d: ", i); + printf("Inst %d: ", i); _mesa_print_instruction(inst); #endif @@ -2115,7 +2115,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) } break; default: - _mesa_printf("unsupported IR in fragment shader %d\n", + printf("unsupported IR in fragment shader %d\n", inst->Opcode); } @@ -2127,10 +2127,10 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) post_wm_emit(c); if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("wm-native:\n"); + printf("wm-native:\n"); for (i = 0; i < p->nr_insn; i++) brw_disasm(stderr, &p->store[i]); - _mesa_printf("\n"); + printf("\n"); } } @@ -2141,7 +2141,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) { if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("brw_wm_glsl_emit:\n"); + printf("brw_wm_glsl_emit:\n"); } /* initial instruction translation/simplification */ diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index ff4c082d5e..60bd92ed22 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -105,7 +105,7 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, GLuint i = c->prog_data.nr_params++; if (i >= BRW_WM_MAX_PARAM) { - _mesa_printf("%s: out of params\n", __FUNCTION__); + printf("%s: out of params\n", __FUNCTION__); c->prog_data.error = 1; return NULL; } @@ -154,7 +154,7 @@ static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c, return c->constref[i].ref; } else { - _mesa_printf("%s: out of constrefs\n", __FUNCTION__); + printf("%s: out of constrefs\n", __FUNCTION__); c->prog_data.error = 1; return NULL; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 87387b1e2d..d7650af3d9 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -89,7 +89,6 @@ struct wm_sampler_key { float max_aniso; GLenum minfilter, magfilter; GLenum comparemode, comparefunc; - dri_bo *sdc_bo; /** If target is cubemap, take context setting. */ @@ -105,7 +104,7 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, dri_bo *sdc_bo, struct brw_sampler_state *sampler) { - _mesa_memset(sampler, 0, sizeof(*sampler)); + memset(sampler, 0, sizeof(*sampler)); switch (key->minfilter) { case GL_NEAREST: @@ -230,7 +229,7 @@ brw_wm_sampler_populate_key(struct brw_context *brw, GLcontext *ctx = &brw->intel.ctx; int unit; - memset(key, 0, sizeof(*key)); + key->sampler_count = 0; for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { if (ctx->Texture.Unit[unit]._ReallyEnabled) { @@ -241,6 +240,8 @@ brw_wm_sampler_populate_key(struct brw_context *brw, struct gl_texture_image *firstImage = texObj->Image[0][intelObj->firstLevel]; + memset(entry, 0, sizeof(*entry)); + entry->tex_target = texObj->Target; entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP) @@ -289,7 +290,7 @@ static void upload_wm_samplers( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; struct wm_sampler_key key; - int i; + int i, sampler_key_size; brw_wm_sampler_populate_key(brw, &key); @@ -303,8 +304,11 @@ static void upload_wm_samplers( struct brw_context *brw ) if (brw->wm.sampler_count == 0) return; + /* Only include the populated portion of the key in the search. */ + sampler_key_size = offsetof(struct wm_sampler_key, + sampler[key.sampler_count]); brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER, - &key, sizeof(key), + &key, sampler_key_size, brw->wm.sdc_bo, key.sampler_count, NULL); @@ -324,7 +328,7 @@ static void upload_wm_samplers( struct brw_context *brw ) } brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER, - &key, sizeof(key), + &key, sampler_key_size, brw->wm.sdc_bo, key.sampler_count, &sampler, sizeof(sampler)); diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 1db438ae7b..ce0bf0b97d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -207,33 +207,14 @@ brw_create_texture_surface( struct brw_context *brw, surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; surf.ss0.surface_type = translate_tex_target(key->target); - if (key->bo) { - surf.ss0.surface_format = translate_tex_format(key->format, - key->internal_format, - key->depthmode); - } - else { - switch (key->depth) { - case 32: - surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - break; - default: - case 24: - surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8X8_UNORM; - break; - case 16: - surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; - break; - } - } + surf.ss0.surface_format = translate_tex_format(key->format, + key->internal_format, + key->depthmode); /* This is ok for all textures with channel width 8bit or less: */ /* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ - if (key->bo) - surf.ss1.base_addr = key->bo->offset; /* reloc */ - else - surf.ss1.base_addr = key->offset; + surf.ss1.base_addr = key->bo->offset; /* reloc */ surf.ss2.mip_count = key->last_level - key->first_level; surf.ss2.width = key->width - 1; @@ -255,17 +236,14 @@ brw_create_texture_surface( struct brw_context *brw, bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), - &key->bo, key->bo ? 1 : 0, + &key->bo, 1, &surf, sizeof(surf)); - if (key->bo) { - /* Emit relocation to surface contents */ - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_SAMPLER, 0, - 0, - offsetof(struct brw_surface_state, ss1), - key->bo); - } + /* Emit relocation to surface contents */ + drm_intel_bo_emit_reloc(bo, offsetof(struct brw_surface_state, ss1), + key->bo, 0, + I915_GEM_DOMAIN_SAMPLER, 0); + return bo; } @@ -281,19 +259,12 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) memset(&key, 0, sizeof(key)); - if (intelObj->imageOverride) { - key.pitch = intelObj->pitchOverride / intelObj->mt->cpp; - key.depth = intelObj->depthOverride; - key.bo = NULL; - key.offset = intelObj->textureOffset; - } else { - key.format = firstImage->TexFormat; - key.internal_format = firstImage->InternalFormat; - key.pitch = intelObj->mt->pitch; - key.depth = firstImage->Depth; - key.bo = intelObj->mt->region->buffer; - key.offset = 0; - } + key.format = firstImage->TexFormat; + key.internal_format = firstImage->InternalFormat; + key.pitch = intelObj->mt->pitch; + key.depth = firstImage->Depth; + key.bo = intelObj->mt->region->buffer; + key.offset = 0; key.target = tObj->Target; key.depthmode = tObj->DepthMode; @@ -308,7 +279,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, BRW_SS_SURFACE, &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, + &key.bo, 1, NULL); if (brw->wm.surf_bo[surf] == NULL) { brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key); @@ -336,10 +307,7 @@ brw_create_constant_surface( struct brw_context *brw, surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; assert(key->bo); - if (key->bo) - surf.ss1.base_addr = key->bo->offset; /* reloc */ - else - surf.ss1.base_addr = key->offset; + surf.ss1.base_addr = key->bo->offset; /* reloc */ surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */ surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */ @@ -349,20 +317,16 @@ brw_create_constant_surface( struct brw_context *brw, bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), - &key->bo, key->bo ? 1 : 0, + &key->bo, 1, &surf, sizeof(surf)); - if (key->bo) { - /* Emit relocation to surface contents. Section 5.1.1 of the gen4 - * bspec ("Data Cache") says that the data cache does not exist as - * a separate cache and is just the sampler cache. - */ - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_SAMPLER, 0, - 0, - offsetof(struct brw_surface_state, ss1), - key->bo); - } + /* Emit relocation to surface contents. Section 5.1.1 of the gen4 + * bspec ("Data Cache") says that the data cache does not exist as + * a separate cache and is just the sampler cache. + */ + drm_intel_bo_emit_reloc(bo, offsetof(struct brw_surface_state, ss1), + key->bo, 0, + I915_GEM_DOMAIN_SAMPLER, 0); return bo; } @@ -420,7 +384,7 @@ brw_update_wm_constant_surface( GLcontext *ctx, /* If there's no constant buffer, then no surface BO is needed to point at * it. */ - if (fp->const_buffer == 0) { + if (fp->const_buffer == NULL) { drm_intel_bo_unreference(brw->wm.surf_bo[surf]); brw->wm.surf_bo[surf] = NULL; return; @@ -448,7 +412,7 @@ brw_update_wm_constant_surface( GLcontext *ctx, brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, BRW_SS_SURFACE, &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, + &key.bo, 1, NULL); if (brw->wm.surf_bo[surf] == NULL) { brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key); @@ -509,7 +473,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, struct gl_renderbuffer *rb, unsigned int unit) { - struct intel_context *intel = &brw->intel;; + struct intel_context *intel = &brw->intel; GLcontext *ctx = &intel->ctx; dri_bo *region_bo = NULL; struct intel_renderbuffer *irb = intel_renderbuffer(rb); @@ -576,18 +540,21 @@ brw_update_renderbuffer_surface(struct brw_context *brw, key.draw_x = 0; key.draw_y = 0; } - /* _NEW_COLOR */ - memcpy(key.color_mask, ctx->Color.ColorMask[unit], - sizeof(key.color_mask)); - /* As mentioned above, disable writes to the alpha component when the - * renderbuffer is XRGB. - */ - if (ctx->DrawBuffer->Visual.alphaBits == 0) - key.color_mask[3] = GL_FALSE; + if (intel->gen < 6) { + /* _NEW_COLOR */ + memcpy(key.color_mask, ctx->Color.ColorMask[unit], + sizeof(key.color_mask)); - key.color_blend = (!ctx->Color._LogicOpEnabled && - (ctx->Color.BlendEnabled & (1 << unit))); + /* As mentioned above, disable writes to the alpha component when the + * renderbuffer is XRGB. + */ + if (ctx->DrawBuffer->Visual.alphaBits == 0) + key.color_mask[3] = GL_FALSE; + + key.color_blend = (!ctx->Color._LogicOpEnabled && + (ctx->Color.BlendEnabled & (1 << unit))); + } dri_bo_unreference(brw->wm.surf_bo[unit]); brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache, @@ -639,12 +606,14 @@ brw_update_renderbuffer_surface(struct brw_context *brw, brw_set_surface_tiling(&surf, key.tiling); surf.ss3.pitch = (key.pitch * key.cpp) - 1; - /* _NEW_COLOR */ - surf.ss0.color_blend = key.color_blend; - surf.ss0.writedisable_red = !key.color_mask[0]; - surf.ss0.writedisable_green = !key.color_mask[1]; - surf.ss0.writedisable_blue = !key.color_mask[2]; - surf.ss0.writedisable_alpha = !key.color_mask[3]; + if (intel->gen < 6) { + /* _NEW_COLOR */ + surf.ss0.color_blend = key.color_blend; + surf.ss0.writedisable_red = !key.color_mask[0]; + surf.ss0.writedisable_green = !key.color_mask[1]; + surf.ss0.writedisable_blue = !key.color_mask[2]; + surf.ss0.writedisable_alpha = !key.color_mask[3]; + } /* Key size will never match key size for textures, so we're safe. */ brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache, diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c new file mode 100644 index 0000000000..f7acad6912 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_cc.c @@ -0,0 +1,296 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_util.h" +#include "intel_batchbuffer.h" +#include "main/macros.h" + +struct gen6_blend_state_key { + GLboolean color_blend, alpha_enabled; + GLboolean dither; + + GLenum logic_op; + + GLenum blend_eq_rgb, blend_eq_a; + GLenum blend_src_rgb, blend_src_a; + GLenum blend_dst_rgb, blend_dst_a; + + GLenum alpha_func; +}; + +static void +blend_state_populate_key(struct brw_context *brw, + struct gen6_blend_state_key *key) +{ + GLcontext *ctx = &brw->intel.ctx; + + memset(key, 0, sizeof(*key)); + + /* _NEW_COLOR */ + if (ctx->Color._LogicOpEnabled) + key->logic_op = ctx->Color.LogicOp; + else + key->logic_op = GL_COPY; + + /* _NEW_COLOR */ + key->color_blend = ctx->Color.BlendEnabled; + if (key->color_blend) { + key->blend_eq_rgb = ctx->Color.BlendEquationRGB; + key->blend_eq_a = ctx->Color.BlendEquationA; + key->blend_src_rgb = ctx->Color.BlendSrcRGB; + key->blend_dst_rgb = ctx->Color.BlendDstRGB; + key->blend_src_a = ctx->Color.BlendSrcA; + key->blend_dst_a = ctx->Color.BlendDstA; + } + + /* _NEW_COLOR */ + key->alpha_enabled = ctx->Color.AlphaEnabled; + if (key->alpha_enabled) { + key->alpha_func = ctx->Color.AlphaFunc; + } + + /* _NEW_COLOR */ + key->dither = ctx->Color.DitherFlag; +} + +/** + * Creates the state cache entry for the given CC unit key. + */ +static drm_intel_bo * +blend_state_create_from_key(struct brw_context *brw, + struct gen6_blend_state_key *key) +{ + struct gen6_blend_state blend; + drm_intel_bo *bo; + + memset(&blend, 0, sizeof(blend)); + + if (key->logic_op != GL_COPY) { + blend.blend1.logic_op_enable = 1; + blend.blend1.logic_op_func = intel_translate_logic_op(key->logic_op); + } else if (key->color_blend) { + GLenum eqRGB = key->blend_eq_rgb; + GLenum eqA = key->blend_eq_a; + GLenum srcRGB = key->blend_src_rgb; + GLenum dstRGB = key->blend_dst_rgb; + GLenum srcA = key->blend_src_a; + GLenum dstA = key->blend_dst_a; + + if (eqRGB == GL_MIN || eqRGB == GL_MAX) { + srcRGB = dstRGB = GL_ONE; + } + + if (eqA == GL_MIN || eqA == GL_MAX) { + srcA = dstA = GL_ONE; + } + + blend.blend0.dest_blend_factor = brw_translate_blend_factor(dstRGB); + blend.blend0.source_blend_factor = brw_translate_blend_factor(srcRGB); + blend.blend0.blend_func = brw_translate_blend_equation(eqRGB); + + blend.blend0.ia_dest_blend_factor = brw_translate_blend_factor(dstA); + blend.blend0.ia_source_blend_factor = brw_translate_blend_factor(srcA); + blend.blend0.ia_blend_func = brw_translate_blend_equation(eqA); + + blend.blend0.blend_enable = 1; + blend.blend0.ia_blend_enable = (srcA != srcRGB || + dstA != dstRGB || + eqA != eqRGB); + } + + if (key->alpha_enabled) { + blend.blend1.alpha_test_enable = 1; + blend.blend1.alpha_test_func = intel_translate_compare_func(key->alpha_func); + + } + + if (key->dither) { + blend.blend1.dither_enable = 1; + blend.blend1.y_dither_offset = 0; + blend.blend1.x_dither_offset = 0; + } + + bo = brw_upload_cache(&brw->cache, BRW_BLEND_STATE, + key, sizeof(*key), + NULL, 0, + &blend, sizeof(blend)); + + return bo; +} + +static void +prepare_blend_state(struct brw_context *brw) +{ + struct gen6_blend_state_key key; + + blend_state_populate_key(brw, &key); + + drm_intel_bo_unreference(brw->cc.blend_state_bo); + brw->cc.blend_state_bo = brw_search_cache(&brw->cache, BRW_BLEND_STATE, + &key, sizeof(key), + NULL, 0, + NULL); + + if (brw->cc.blend_state_bo == NULL) + brw->cc.blend_state_bo = blend_state_create_from_key(brw, &key); +} + +const struct brw_tracked_state gen6_blend_state = { + .dirty = { + .mesa = _NEW_COLOR, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_blend_state, +}; + +struct gen6_color_calc_state_key { + GLubyte blend_constant_color[4]; + GLclampf alpha_ref; + GLubyte stencil_ref[2]; +}; + +static void +color_calc_state_populate_key(struct brw_context *brw, + struct gen6_color_calc_state_key *key) +{ + GLcontext *ctx = &brw->intel.ctx; + + memset(key, 0, sizeof(*key)); + + /* _NEW_STENCIL */ + if (ctx->Stencil._Enabled) { + const unsigned back = ctx->Stencil._BackFace; + + key->stencil_ref[0] = ctx->Stencil.Ref[0]; + if (ctx->Stencil._TestTwoSide) + key->stencil_ref[1] = ctx->Stencil.Ref[back]; + } + + /* _NEW_COLOR */ + if (ctx->Color.AlphaEnabled) + key->alpha_ref = ctx->Color.AlphaRef; + + key->blend_constant_color[0] = ctx->Color.BlendColor[0]; + key->blend_constant_color[1] = ctx->Color.BlendColor[1]; + key->blend_constant_color[2] = ctx->Color.BlendColor[2]; + key->blend_constant_color[3] = ctx->Color.BlendColor[3]; +} + +/** + * Creates the state cache entry for the given CC state key. + */ +static drm_intel_bo * +color_calc_state_create_from_key(struct brw_context *brw, + struct gen6_color_calc_state_key *key) +{ + struct gen6_color_calc_state cc; + drm_intel_bo *bo; + + memset(&cc, 0, sizeof(cc)); + + cc.cc0.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; + UNCLAMPED_FLOAT_TO_UBYTE(cc.cc1.alpha_ref_fi.ui, key->alpha_ref); + + cc.cc0.stencil_ref = key->stencil_ref[0]; + cc.cc0.bf_stencil_ref = key->stencil_ref[1]; + + cc.constant_r = key->blend_constant_color[0]; + cc.constant_g = key->blend_constant_color[1]; + cc.constant_b = key->blend_constant_color[2]; + cc.constant_a = key->blend_constant_color[3]; + + bo = brw_upload_cache(&brw->cache, BRW_COLOR_CALC_STATE, + key, sizeof(*key), + NULL, 0, + &cc, sizeof(cc)); + + return bo; +} + +static void +prepare_color_calc_state(struct brw_context *brw) +{ + struct gen6_color_calc_state_key key; + + color_calc_state_populate_key(brw, &key); + + drm_intel_bo_unreference(brw->cc.state_bo); + brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_COLOR_CALC_STATE, + &key, sizeof(key), + NULL, 0, + NULL); + + if (brw->cc.state_bo == NULL) + brw->cc.state_bo = color_calc_state_create_from_key(brw, &key); +} + +const struct brw_tracked_state gen6_color_calc_state = { + .dirty = { + .mesa = _NEW_COLOR, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_color_calc_state, +}; + +static void upload_cc_state_pointers(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(4); + OUT_BATCH(CMD_3D_CC_STATE_POINTERS << 16 | (4 - 2)); + OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC(brw->cc.blend_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC(brw->cc.depth_stencil_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + + +static void prepare_cc_state_pointers(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->cc.state_bo); + brw_add_validated_bo(brw, brw->cc.blend_state_bo); + brw_add_validated_bo(brw, brw->cc.depth_stencil_state_bo); +} + +const struct brw_tracked_state gen6_cc_state_pointers = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = (CACHE_NEW_BLEND_STATE | + CACHE_NEW_COLOR_CALC_STATE | + CACHE_NEW_DEPTH_STENCIL_STATE) + }, + .prepare = prepare_cc_state_pointers, + .emit = upload_cc_state_pointers, +}; diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c new file mode 100644 index 0000000000..06f8145e32 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -0,0 +1,75 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" + +static void +upload_clip_state(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + uint32_t depth_clamp = 0; + uint32_t provoking; + + if (!ctx->Transform.DepthClamp) + depth_clamp = GEN6_CLIP_Z_TEST; + + if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) { + provoking = 0; + } else { + provoking = + (2 << GEN6_CLIP_TRI_PROVOKE_SHIFT) | + (2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) | + (1 << GEN6_CLIP_LINE_PROVOKE_SHIFT); + } + + BEGIN_BATCH(4); + OUT_BATCH(CMD_3D_CLIP_STATE << 16 | (4 - 2)); + OUT_BATCH(GEN6_CLIP_STATISTICS_ENABLE); + OUT_BATCH(GEN6_CLIP_ENABLE | + GEN6_CLIP_API_OGL | + GEN6_CLIP_MODE_REJECT_ALL | /* XXX: debug: get VS working */ + GEN6_CLIP_XY_TEST | + depth_clamp | + provoking); + OUT_BATCH(0); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + +const struct brw_tracked_state gen6_clip_state = { + .dirty = { + .mesa = _NEW_TRANSFORM, + .brw = BRW_NEW_CONTEXT, + .cache = 0 + }, + .emit = upload_clip_state, +}; diff --git a/src/mesa/drivers/dri/i965/gen6_depthstencil.c b/src/mesa/drivers/dri/i965/gen6_depthstencil.c new file mode 100644 index 0000000000..4924f0fd55 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_depthstencil.c @@ -0,0 +1,165 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" + +struct brw_depth_stencil_state_key { + GLenum depth_func; + GLboolean depth_test, depth_write; + GLboolean stencil, stencil_two_side; + GLenum stencil_func[2], stencil_fail_op[2]; + GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2]; + GLubyte stencil_write_mask[2], stencil_test_mask[2]; +}; + +static void +depth_stencil_state_populate_key(struct brw_context *brw, + struct brw_depth_stencil_state_key *key) +{ + GLcontext *ctx = &brw->intel.ctx; + const unsigned back = ctx->Stencil._BackFace; + + memset(key, 0, sizeof(*key)); + + /* _NEW_STENCIL */ + key->stencil = ctx->Stencil._Enabled; + key->stencil_two_side = ctx->Stencil._TestTwoSide; + + if (key->stencil) { + key->stencil_func[0] = ctx->Stencil.Function[0]; + key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0]; + key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0]; + key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0]; + key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0]; + key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0]; + } + if (key->stencil_two_side) { + key->stencil_func[1] = ctx->Stencil.Function[back]; + key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back]; + key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back]; + key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back]; + key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back]; + key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back]; + } + + key->depth_test = ctx->Depth.Test; + if (key->depth_test) { + key->depth_func = ctx->Depth.Func; + key->depth_write = ctx->Depth.Mask; + } +} + +/** + * Creates the state cache entry for the given DEPTH_STENCIL_STATE state key. + */ +static dri_bo * +depth_stencil_state_create_from_key(struct brw_context *brw, + struct brw_depth_stencil_state_key *key) +{ + struct gen6_depth_stencil_state ds; + dri_bo *bo; + + memset(&ds, 0, sizeof(ds)); + + /* _NEW_STENCIL */ + if (key->stencil) { + ds.ds0.stencil_enable = 1; + ds.ds0.stencil_func = + intel_translate_compare_func(key->stencil_func[0]); + ds.ds0.stencil_fail_op = + intel_translate_stencil_op(key->stencil_fail_op[0]); + ds.ds0.stencil_pass_depth_fail_op = + intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]); + ds.ds0.stencil_pass_depth_pass_op = + intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]); + ds.ds1.stencil_write_mask = key->stencil_write_mask[0]; + ds.ds1.stencil_test_mask = key->stencil_test_mask[0]; + + if (key->stencil_two_side) { + ds.ds0.bf_stencil_enable = 1; + ds.ds0.bf_stencil_func = + intel_translate_compare_func(key->stencil_func[1]); + ds.ds0.bf_stencil_fail_op = + intel_translate_stencil_op(key->stencil_fail_op[1]); + ds.ds0.bf_stencil_pass_depth_fail_op = + intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]); + ds.ds0.bf_stencil_pass_depth_pass_op = + intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]); + ds.ds1.bf_stencil_write_mask = key->stencil_write_mask[1]; + ds.ds1.bf_stencil_test_mask = key->stencil_test_mask[1]; + } + + /* Not really sure about this: + */ + if (key->stencil_write_mask[0] || + (key->stencil_two_side && key->stencil_write_mask[1])) + ds.ds0.stencil_write_enable = 1; + } + + /* _NEW_DEPTH */ + if (key->depth_test) { + ds.ds2.depth_test_enable = 1; + ds.ds2.depth_test_func = intel_translate_compare_func(key->depth_func); + ds.ds2.depth_write_enable = key->depth_write; + } + + bo = brw_upload_cache(&brw->cache, BRW_DEPTH_STENCIL_STATE, + key, sizeof(*key), + NULL, 0, + &ds, sizeof(ds)); + + return bo; +} + +static void +prepare_depth_stencil_state(struct brw_context *brw) +{ + struct brw_depth_stencil_state_key key; + + depth_stencil_state_populate_key(brw, &key); + + dri_bo_unreference(brw->cc.depth_stencil_state_bo); + brw->cc.depth_stencil_state_bo = brw_search_cache(&brw->cache, + BRW_DEPTH_STENCIL_STATE, + &key, sizeof(key), + NULL, 0, + NULL); + + if (brw->cc.depth_stencil_state_bo == NULL) + brw->cc.depth_stencil_state_bo = + depth_stencil_state_create_from_key(brw, &key); +} + +const struct brw_tracked_state gen6_depth_stencil_state = { + .dirty = { + .mesa = _NEW_DEPTH | _NEW_STENCIL, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_depth_stencil_state, +}; diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c new file mode 100644 index 0000000000..161e7b85c2 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c @@ -0,0 +1,91 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" + +static void +upload_gs_state(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + /* Disable all the constant buffers. */ + BEGIN_BATCH(5); + OUT_BATCH(CMD_3D_CONSTANT_GS_STATE << 16 | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); + + if (brw->gs.prog_bo) { + BEGIN_BATCH(7); + OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2)); + OUT_RELOC(brw->gs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | + (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + OUT_BATCH(0); /* scratch space base offset */ + OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) | + (brw->gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT) | + (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT)); + OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) | + GEN6_GS_STATISTICS_ENABLE | + GEN6_GS_RENDERING_ENABLE); + OUT_BATCH(GEN6_GS_ENABLE); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(7); + OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2)); + OUT_BATCH(0); /* prog_bo */ + OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | + (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + OUT_BATCH(0); /* scratch space base offset */ + OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) | + (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) | + (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT)); + OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) | + GEN6_GS_STATISTICS_ENABLE | + GEN6_GS_RENDERING_ENABLE); + OUT_BATCH(0); + ADVANCE_BATCH(); + } +} + +const struct brw_tracked_state gen6_gs_state = { + .dirty = { + .mesa = _NEW_TRANSFORM, + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE | + BRW_NEW_CONTEXT), + .cache = CACHE_NEW_GS_PROG + }, + .emit = upload_gs_state, +}; diff --git a/src/mesa/drivers/dri/i965/gen6_sampler_state.c b/src/mesa/drivers/dri/i965/gen6_sampler_state.c new file mode 100644 index 0000000000..ab8e7516d2 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_sampler_state.c @@ -0,0 +1,71 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" + +static void +upload_sampler_state_pointers(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(4); + OUT_BATCH(CMD_3D_SAMPLER_STATE_POINTERS << 16 | + VS_SAMPLER_STATE_CHANGE | + GS_SAMPLER_STATE_CHANGE | + PS_SAMPLER_STATE_CHANGE | + (4 - 2)); + OUT_BATCH(0); /* VS */ + OUT_BATCH(0); /* GS */ + if (brw->wm.sampler_bo) + OUT_RELOC(brw->wm.sampler_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + else + OUT_BATCH(0); + + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + + +static void +prepare_sampler_state_pointers(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->wm.sampler_bo); +} + +const struct brw_tracked_state gen6_sampler_state = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = CACHE_NEW_SAMPLER + }, + .prepare = prepare_sampler_state_pointers, + .emit = upload_sampler_state_pointers, +}; diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c new file mode 100644 index 0000000000..2e21e5f733 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c @@ -0,0 +1,105 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" + +static void +prepare_scissor_state(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); + struct gen6_scissor_state scissor; + + /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT */ + + /* The scissor only needs to handle the intersection of drawable and + * scissor rect. Clipping to the boundaries of static shared buffers + * for front/back/depth is covered by looping over cliprects in brw_draw.c. + * + * Note that the hardware's coordinates are inclusive, while Mesa's min is + * inclusive but max is exclusive. + */ + if (render_to_fbo) { + /* texmemory: Y=0=bottom */ + scissor.xmin = ctx->DrawBuffer->_Xmin; + scissor.xmax = ctx->DrawBuffer->_Xmax - 1; + scissor.ymin = ctx->DrawBuffer->_Ymin; + scissor.ymax = ctx->DrawBuffer->_Ymax - 1; + } + else { + /* memory: Y=0=top */ + scissor.xmin = ctx->DrawBuffer->_Xmin; + scissor.xmax = ctx->DrawBuffer->_Xmax - 1; + scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax; + scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1; + } + + drm_intel_bo_unreference(brw->sf.state_bo); + brw->sf.state_bo = brw_cache_data(&brw->cache, BRW_SF_UNIT, + &scissor, sizeof(scissor), + NULL, 0); +} + +const struct brw_tracked_state gen6_scissor_state = { + .dirty = { + .mesa = _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_scissor_state, +}; + +static void upload_scissor_state_pointers(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(2); + OUT_BATCH(CMD_3D_SCISSOR_STATE_POINTERS << 16 | (2 - 2)); + OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + + +static void prepare_scissor_state_pointers(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->sf.state_bo); +} + +const struct brw_tracked_state gen6_scissor_state_pointers = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = CACHE_NEW_SF_UNIT + }, + .prepare = prepare_scissor_state_pointers, + .emit = upload_scissor_state_pointers, +}; diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c new file mode 100644 index 0000000000..8d96b44f1d --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -0,0 +1,187 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_util.h" +#include "main/macros.h" +#include "intel_batchbuffer.h" + +static uint32_t +get_attr_override(struct brw_context *brw, int attr) +{ + uint32_t attr_override; + int attr_index = 0, i; + + /* Find the source index (0 = first attribute after the 4D position) + * for this output attribute. attr is currently a VERT_RESULT_* but should + * be FRAG_ATTRIB_*. + */ + for (i = 0; i < attr; i++) { + if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(i)) + attr_index++; + } + attr_override = attr_index; + + return attr_index; +} + +static void +upload_sf_state(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + /* CACHE_NEW_VS_PROG */ + uint32_t num_inputs = brw_count_bits(brw->vs.prog_data->outputs_written); + /* This should probably be FS inputs read */ + uint32_t num_outputs = brw_count_bits(brw->vs.prog_data->outputs_written); + uint32_t dw1, dw2, dw3, dw4; + int i; + /* _NEW_BUFFER */ + GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; + int attr = 0; + + dw1 = + num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT | + (num_inputs + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | + 3 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT; + dw2 = GEN6_SF_VIEWPORT_TRANSFORM_ENABLE | + GEN6_SF_STATISTICS_ENABLE; + dw3 = 0; + dw4 = 0; + + /* _NEW_POLYGON */ + if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo) + dw2 |= GEN6_SF_WINDING_CCW; + + /* _NEW_SCISSOR */ + if (ctx->Scissor.Enabled) + dw3 |= GEN6_SF_SCISSOR_ENABLE; + + /* _NEW_POLYGON */ + if (ctx->Polygon.CullFlag) { + switch (ctx->Polygon.CullFaceMode) { + case GL_FRONT: + dw3 |= GEN6_SF_CULL_BOTH; + break; + case GL_BACK: + dw3 |= GEN6_SF_CULL_BACK; + break; + case GL_FRONT_AND_BACK: + dw3 |= GEN6_SF_CULL_BOTH; + break; + default: + assert(0); + break; + } + } else { + dw3 |= GEN6_SF_CULL_NONE; + } + + /* _NEW_LINE */ + dw3 |= U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7) << + GEN6_SF_LINE_WIDTH_SHIFT; + if (ctx->Line.SmoothFlag) { + dw3 |= GEN6_SF_LINE_AA_ENABLE; + dw3 |= GEN6_SF_LINE_AA_MODE_TRUE; + dw3 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0; + } + + /* _NEW_POINT */ + if (ctx->Point._Attenuated) + dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH; + + dw4 |= U_FIXED(CLAMP(ctx->Point.Size, 0.125, 225.875), 3) << + GEN6_SF_POINT_WIDTH_SHIFT; + if (render_to_fbo) + dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT; + + /* _NEW_LIGHT */ + if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) { + dw4 |= + (2 << GEN6_SF_TRI_PROVOKE_SHIFT) | + (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) | + (1 << GEN6_SF_LINE_PROVOKE_SHIFT); + } else { + dw4 |= + (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT); + } + + BEGIN_BATCH(20); + OUT_BATCH(CMD_3D_SF_STATE << 16 | (20 - 2)); + OUT_BATCH(dw1); + OUT_BATCH(dw2); + OUT_BATCH(dw3); + OUT_BATCH(dw4); + OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant. copied from gen4 */ + OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */ + OUT_BATCH_F(0.0); /* XXX: global depth offset clamp */ + for (i = 0; i < 8; i++) { + uint32_t attr_overrides = 0; + + /* These should be generating FS inputs read instead of VS + * outputs written + */ + for (; attr < 64; attr++) { + if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(attr)) { + attr_overrides |= get_attr_override(brw, attr); + attr++; + break; + } + } + + for (; attr < 64; attr++) { + if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(attr)) { + attr_overrides |= get_attr_override(brw, attr) << 16; + attr++; + break; + } + } + OUT_BATCH(attr_overrides); + } + OUT_BATCH(0); /* point sprite texcoord bitmask */ + OUT_BATCH(0); /* constant interp bitmask */ + OUT_BATCH(0); /* wrapshortest enables 0-7 */ + OUT_BATCH(0); /* wrapshortest enables 8-15 */ + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + +const struct brw_tracked_state gen6_sf_state = { + .dirty = { + .mesa = (_NEW_LIGHT | + _NEW_POLYGON | + _NEW_LINE | + _NEW_SCISSOR | + _NEW_BUFFERS), + .brw = BRW_NEW_CONTEXT, + .cache = CACHE_NEW_VS_PROG + }, + .emit = upload_sf_state, +}; diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c new file mode 100644 index 0000000000..5445e4035a --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_urb.c @@ -0,0 +1,83 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "main/macros.h" +#include "intel_batchbuffer.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +static void +prepare_urb( struct brw_context *brw ) +{ + brw->urb.nr_vs_entries = 24; + if (brw->gs.prog_bo) + brw->urb.nr_gs_entries = 4; + else + brw->urb.nr_gs_entries = 0; + /* CACHE_NEW_VS_PROG */ + brw->urb.vs_size = MIN2(brw->vs.prog_data->urb_entry_size, 1); + + /* Check that the number of URB rows (8 floats each) allocated is less + * than the URB space. + */ + assert((brw->urb.nr_vs_entries + + brw->urb.nr_gs_entries) * brw->urb.vs_size * 8 < 64 * 1024); +} + +static void +upload_urb(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + assert(brw->urb.nr_vs_entries % 4 == 0); + assert(brw->urb.nr_gs_entries % 4 == 0); + /* GS requirement */ + assert(!brw->gs.prog_bo || brw->urb.vs_size < 5); + + intel_batchbuffer_emit_mi_flush(intel->batch); + + BEGIN_BATCH(3); + OUT_BATCH(CMD_URB << 16 | (3 - 2)); + OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) | + ((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT)); + OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) | + ((brw->urb.nr_gs_entries) << GEN6_URB_GS_ENTRIES_SHIFT)); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + +const struct brw_tracked_state gen6_urb = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_CONTEXT, + .cache = CACHE_NEW_VS_PROG, + }, + .prepare = prepare_urb, + .emit = upload_urb, +}; diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c new file mode 100644 index 0000000000..0c2aa4206c --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c @@ -0,0 +1,173 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" +#include "main/macros.h" + +/* The clip VP defines the guardband region where expensive clipping is skipped + * and fragments are allowed to be generated and clipped out cheaply by the SF. + * + * By setting it to NDC bounds of [-1,1], we don't do GB clipping. It's + * supposed to cause seams to become visible in apps due to shared edges taking + * different clip/no clip paths depending on whether the rest of the prim ends + * up in the guardband or not. + */ +static void +prepare_clip_vp(struct brw_context *brw) +{ + struct brw_clipper_viewport vp; + + vp.xmin = -1.0; + vp.xmax = 1.0; + vp.ymin = -1.0; + vp.ymax = 1.0; + + drm_intel_bo_unreference(brw->clip.vp_bo); + brw->clip.vp_bo = brw_cache_data(&brw->cache, BRW_CLIP_VP, + &vp, sizeof(vp), + NULL, 0); +} + +const struct brw_tracked_state gen6_clip_vp = { + .dirty = { + .mesa = _NEW_VIEWPORT, /* XXX: not really, but we need nonzero */ + .brw = 0, + .cache = 0, + }, + .prepare = prepare_clip_vp, +}; + +static void +prepare_sf_vp(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; + struct brw_sf_viewport sfv; + GLfloat y_scale, y_bias; + const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); + const GLfloat *v = ctx->Viewport._WindowMap.m; + + memset(&sfv, 0, sizeof(sfv)); + + /* _NEW_BUFFERS */ + if (render_to_fbo) { + y_scale = 1.0; + y_bias = 0; + } else { + y_scale = -1.0; + y_bias = ctx->DrawBuffer->Height; + } + + /* _NEW_VIEWPORT */ + sfv.viewport.m00 = v[MAT_SX]; + sfv.viewport.m11 = v[MAT_SY] * y_scale; + sfv.viewport.m22 = v[MAT_SZ] * depth_scale; + sfv.viewport.m30 = v[MAT_TX]; + sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias; + sfv.viewport.m32 = v[MAT_TZ] * depth_scale; + + drm_intel_bo_unreference(brw->sf.vp_bo); + brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP, + &sfv, sizeof(sfv), + NULL, 0); +} + +const struct brw_tracked_state gen6_sf_vp = { + .dirty = { + .mesa = _NEW_VIEWPORT | _NEW_BUFFERS, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_sf_vp, +}; + +static void +prepare_cc_vp(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + struct brw_cc_viewport ccv; + + /* _NEW_TRANSOFORM */ + if (ctx->Transform.DepthClamp) { + /* _NEW_VIEWPORT */ + ccv.min_depth = MIN2(ctx->Viewport.Near, ctx->Viewport.Far); + ccv.max_depth = MAX2(ctx->Viewport.Near, ctx->Viewport.Far); + } else { + ccv.min_depth = 0.0; + ccv.max_depth = 1.0; + } + + drm_intel_bo_unreference(brw->cc.vp_bo); + brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv), + NULL, 0); +} + +const struct brw_tracked_state gen6_cc_vp = { + .dirty = { + .mesa = _NEW_VIEWPORT | _NEW_TRANSFORM, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_cc_vp, +}; + +static void prepare_viewport_state_pointers(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->sf.state_bo); +} + +static void upload_viewport_state_pointers(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(4); + OUT_BATCH(CMD_VIEWPORT_STATE_POINTERS << 16 | (4 - 2) | + GEN6_CC_VIEWPORT_MODIFY | + GEN6_SF_VIEWPORT_MODIFY | + GEN6_CLIP_VIEWPORT_MODIFY); + OUT_RELOC(brw->clip.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(brw->sf.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(brw->cc.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + +const struct brw_tracked_state gen6_viewport_state = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = (CACHE_NEW_CLIP_VP | + CACHE_NEW_SF_VP | + CACHE_NEW_CC_VP) + }, + .prepare = prepare_viewport_state_pointers, + .emit = upload_viewport_state_pointers, +}; diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c new file mode 100644 index 0000000000..fe597dfb94 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -0,0 +1,119 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_util.h" +#include "shader/prog_parameter.h" +#include "shader/prog_statevars.h" +#include "intel_batchbuffer.h" + +static void +upload_vs_state(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + const struct brw_vertex_program *vp = + brw_vertex_program_const(brw->vertex_program); + unsigned int nr_params = vp->program.Base.Parameters->NumParameters; + drm_intel_bo *constant_bo; + int i; + + if (vp->use_const_buffer || nr_params == 0) { + /* Disable the push constant buffers. */ + BEGIN_BATCH(5); + OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } else { + if (brw->vertex_program->IsNVProgram) + _mesa_load_tracked_matrices(ctx); + + /* Updates the ParamaterValues[i] pointers for all parameters of the + * basic type of PROGRAM_STATE_VAR. + */ + _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); + + constant_bo = drm_intel_bo_alloc(intel->bufmgr, "VS constant_bo", + nr_params * 4 * sizeof(float), + 4096); + drm_intel_gem_bo_map_gtt(constant_bo); + for (i = 0; i < nr_params; i++) { + memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float), + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + } + drm_intel_gem_bo_unmap_gtt(constant_bo); + + BEGIN_BATCH(5); + OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | + GEN6_CONSTANT_BUFFER_0_ENABLE | + (5 - 2)); + OUT_RELOC(constant_bo, + I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ + ALIGN(nr_params, 2) / 2 - 1); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + drm_intel_bo_unreference(constant_bo); + } + + intel_batchbuffer_emit_mi_flush(intel->batch); + + BEGIN_BATCH(6); + OUT_BATCH(CMD_3D_VS_STATE << 16 | (6 - 2)); + OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | + (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + OUT_BATCH(0); /* scratch space base offset */ + OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | + (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | + (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); + OUT_BATCH((0 << GEN6_VS_MAX_THREADS_SHIFT) | + GEN6_VS_STATISTICS_ENABLE); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + +const struct brw_tracked_state gen6_vs_state = { + .dirty = { + .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS, + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_NR_VS_SURFACES | + BRW_NEW_URB_FENCE | + BRW_NEW_CONTEXT), + .cache = CACHE_NEW_VS_PROG + }, + .emit = upload_vs_state, +}; diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c new file mode 100644 index 0000000000..1eb17ca627 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -0,0 +1,160 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_util.h" +#include "shader/prog_parameter.h" +#include "shader/prog_statevars.h" +#include "intel_batchbuffer.h" + +static void +upload_wm_state(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + const struct brw_fragment_program *fp = + brw_fragment_program_const(brw->fragment_program); + unsigned int nr_params = fp->program.Base.Parameters->NumParameters; + drm_intel_bo *constant_bo; + int i; + uint32_t dw2, dw4, dw5, dw6; + + if (fp->use_const_buffer || nr_params == 0) { + /* Disable the push constant buffers. */ + BEGIN_BATCH(5); + OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } else { + /* Updates the ParamaterValues[i] pointers for all parameters of the + * basic type of PROGRAM_STATE_VAR. + */ + _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); + + constant_bo = drm_intel_bo_alloc(intel->bufmgr, "WM constant_bo", + nr_params * 4 * sizeof(float), + 4096); + drm_intel_gem_bo_map_gtt(constant_bo); + for (i = 0; i < nr_params; i++) { + memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float), + fp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + } + drm_intel_gem_bo_unmap_gtt(constant_bo); + + BEGIN_BATCH(5); + OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 | + GEN6_CONSTANT_BUFFER_0_ENABLE | + (5 - 2)); + OUT_RELOC(constant_bo, + I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ + ALIGN(nr_params, 2) / 2 - 1); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + drm_intel_bo_unreference(constant_bo); + } + + intel_batchbuffer_emit_mi_flush(intel->batch); + + dw2 = dw4 = dw5 = dw6 = 0; + dw4 |= GEN6_WM_STATISTICS_ENABLE; + dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0; + dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5; + + /* BRW_NEW_NR_SURFACES */ + dw2 |= brw->wm.nr_surfaces << GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT; + + /* CACHE_NEW_SAMPLER */ + dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT; + dw4 |= (1 << GEN6_WM_DISPATCH_START_GRF_SHIFT_0); + + dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT; + dw5 |= GEN6_WM_DISPATCH_ENABLE; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + if (fp->isGLSL) + dw5 |= GEN6_WM_8_DISPATCH_ENABLE; + else + dw5 |= GEN6_WM_16_DISPATCH_ENABLE; + + /* _NEW_LINE */ + if (ctx->Line.StippleFlag) + dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE; + + /* _NEW_POLYGONSTIPPLE */ + if (ctx->Polygon.StippleFlag) + dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + if (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) + dw5 |= GEN6_WM_USES_SOURCE_DEPTH | GEN6_WM_USES_SOURCE_W; + if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) + dw5 |= GEN6_WM_COMPUTED_DEPTH; + + /* _NEW_COLOR */ + if (fp->program.UsesKill || ctx->Color.AlphaEnabled) + dw5 |= GEN6_WM_KILL_ENABLE; + + /* This should probably be FS inputs read */ + dw6 |= brw_count_bits(brw->vs.prog_data->outputs_written) << + GEN6_WM_NUM_SF_OUTPUTS_SHIFT; + + BEGIN_BATCH(9); + OUT_BATCH(CMD_3D_WM_STATE << 16 | (9 - 2)); + OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH(dw2); + OUT_BATCH(0); /* scratch space base offset */ + OUT_BATCH(dw4); + OUT_BATCH(dw5); + OUT_BATCH(dw6); + OUT_BATCH(0); /* kernel 1 pointer */ + OUT_BATCH(0); /* kernel 2 pointer */ + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + +const struct brw_tracked_state gen6_wm_state = { + .dirty = { + .mesa = _NEW_LINE | _NEW_POLYGONSTIPPLE | _NEW_COLOR, + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_NR_WM_SURFACES | + BRW_NEW_URB_FENCE | + BRW_NEW_BATCH), + .cache = CACHE_NEW_SAMPLER + }, + .emit = upload_wm_state, +}; diff --git a/src/mesa/drivers/dri/i965/server/intel_dri.c b/src/mesa/drivers/dri/i965/server/intel_dri.c deleted file mode 120000 index effdd26448..0000000000 --- a/src/mesa/drivers/dri/i965/server/intel_dri.c +++ /dev/null @@ -1 +0,0 @@ -../../intel/server/intel_dri.c
\ No newline at end of file |