diff options
author | Zack Rusin <zackr@vmware.com> | 2010-03-15 15:24:38 -0400 |
---|---|---|
committer | Zack Rusin <zackr@vmware.com> | 2010-03-15 15:24:38 -0400 |
commit | 275c4bd3643d773210780cb8d578ca84f2604684 (patch) | |
tree | 8266edc39d4253ac0f2a0ecd41f560f3d815bb5c /src/mesa/drivers/dri/i965 | |
parent | c5c5cd7132e18f4aad8e73d8ee879f8823c4c1e7 (diff) | |
parent | d0b35352ed27b1e66785c45ee95a352ed06b47ce (diff) |
Merge remote branch 'origin/master' into gallium_draw_llvm
Diffstat (limited to 'src/mesa/drivers/dri/i965')
39 files changed, 2800 insertions, 482 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 7758a792fd..842d4b7aa1 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -54,6 +54,7 @@ DRIVER_SOURCES = \ brw_gs_emit.c \ brw_gs_state.c \ brw_misc_state.c \ + brw_optimize.c \ brw_program.c \ brw_queryobj.c \ brw_sf.c \ @@ -84,11 +85,21 @@ DRIVER_SOURCES = \ brw_wm_pass2.c \ brw_wm_sampler_state.c \ brw_wm_state.c \ - brw_wm_surface_state.c + brw_wm_surface_state.c \ + gen6_cc.c \ + gen6_clip_state.c \ + gen6_depthstencil.c \ + gen6_gs_state.c \ + gen6_sampler_state.c \ + gen6_scissor_state.c \ + gen6_sf_state.c \ + gen6_urb.c \ + gen6_viewport_state.c \ + gen6_vs_state.c \ + gen6_wm_state.c C_SOURCES = \ $(COMMON_SOURCES) \ - $(MINIGLX_SOURCES) \ $(DRIVER_SOURCES) ASM_SOURCES = diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 2ca29b7ae1..241193c357 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -150,12 +150,13 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, MIN2(ctx->Const.FragmentProgram.MaxNativeParameters, ctx->Const.FragmentProgram.MaxEnvParams); - if (intel->is_ironlake || intel->is_g4x) { + if (intel->is_ironlake || intel->is_g4x || intel->gen >= 6) { brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_GM45; brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45; brw->has_surface_tile_offset = GL_TRUE; brw->has_compr4 = GL_TRUE; brw->has_aa_line_parameters = GL_TRUE; + brw->has_pln = GL_TRUE; } else { brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_965; brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965; @@ -170,7 +171,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, brw->urb.size = 384; brw->vs_max_threads = 32; brw->wm_max_threads = 10 * 5; - } else { + } else if (intel->gen < 6) { brw->urb.size = 256; brw->vs_max_threads = 16; brw->wm_max_threads = 8 * 4; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 21c4cd38a7..2855c93ea6 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -282,6 +282,9 @@ struct brw_vs_ouput_sizes { enum brw_cache_id { + BRW_BLEND_STATE, + BRW_DEPTH_STENCIL_STATE, + BRW_COLOR_CALC_STATE, BRW_CC_VP, BRW_CC_UNIT, BRW_WM_PROG, @@ -290,7 +293,7 @@ enum brw_cache_id { BRW_WM_UNIT, BRW_SF_PROG, BRW_SF_VP, - BRW_SF_UNIT, + BRW_SF_UNIT, /* scissor state on gen6 */ BRW_VS_UNIT, BRW_VS_PROG, BRW_GS_UNIT, @@ -354,6 +357,9 @@ struct brw_tracked_state { /* Flags for brw->state.cache. */ +#define CACHE_NEW_BLEND_STATE (1<<BRW_BLEND_STATE) +#define CACHE_NEW_DEPTH_STENCIL_STATE (1<<BRW_DEPTH_STENCIL_STATE) +#define CACHE_NEW_COLOR_CALC_STATE (1<<BRW_COLOR_CALC_STATE) #define CACHE_NEW_CC_VP (1<<BRW_CC_VP) #define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT) #define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG) @@ -440,6 +446,7 @@ struct brw_context GLboolean has_compr4; GLboolean has_negative_rhw_bug; GLboolean has_aa_line_parameters; + GLboolean has_pln; ; struct { struct brw_state_flags dirty; @@ -538,7 +545,8 @@ struct brw_context GLuint nr_sf_entries; GLuint nr_cs_entries; -/* GLuint vs_size; */ + /* gen6 */ + GLuint vs_size; /* GLuint gs_size; */ /* GLuint clip_size; */ /* GLuint sf_size; */ @@ -643,9 +651,16 @@ struct brw_context struct { + /* gen4 */ dri_bo *prog_bo; - dri_bo *state_bo; dri_bo *vp_bo; + + /* gen6 */ + dri_bo *blend_state_bo; + dri_bo *depth_stencil_state_bo; + dri_bo *color_calc_state_bo; + + dri_bo *state_bo; } cc; struct { diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 6f2ead793d..4e78b08cfe 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -179,7 +179,6 @@ static GLfloat fixed_plane[6][4] = { */ static void prepare_constant_buffer(struct brw_context *brw) { - struct intel_context *intel = &brw->intel; GLcontext *ctx = &brw->intel.ctx; const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); @@ -307,7 +306,7 @@ static void prepare_constant_buffer(struct brw_context *brw) if (brw->curbe.curbe_bo != NULL && brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size) { - intel_bo_unmap_gtt_preferred(intel, brw->curbe.curbe_bo); + drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo); dri_bo_unreference(brw->curbe.curbe_bo); brw->curbe.curbe_bo = NULL; } @@ -319,7 +318,7 @@ static void prepare_constant_buffer(struct brw_context *brw) brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE", 4096, 1 << 6); brw->curbe.curbe_next_offset = 0; - intel_bo_map_gtt_preferred(intel, brw->curbe.curbe_bo, GL_TRUE); + drm_intel_gem_bo_map_gtt(brw->curbe.curbe_bo); } brw->curbe.curbe_offset = brw->curbe.curbe_next_offset; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index ea0d7e05d4..984e56d00c 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -530,6 +530,7 @@ #define BRW_OPCODE_POP 47 #define BRW_OPCODE_WAIT 48 #define BRW_OPCODE_SEND 49 +#define BRW_OPCODE_MATH 56 #define BRW_OPCODE_ADD 64 #define BRW_OPCODE_MUL 65 #define BRW_OPCODE_AVG 66 @@ -549,6 +550,7 @@ #define BRW_OPCODE_DP2 87 #define BRW_OPCODE_DPA2 88 #define BRW_OPCODE_LINE 89 +#define BRW_OPCODE_PLN 90 #define BRW_OPCODE_NOP 126 #define BRW_PREDICATE_NONE 0 @@ -727,7 +729,8 @@ #define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ #define BRW_MATH_FUNCTION_COS 7 /* was 8 */ #define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ -#define BRW_MATH_FUNCTION_TAN 9 +#define BRW_MATH_FUNCTION_TAN 9 /* gen4 */ +#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */ #define BRW_MATH_FUNCTION_POW 10 #define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 #define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 @@ -778,17 +781,33 @@ #define CMD_PIPELINED_STATE_POINTERS 0x7800 #define CMD_BINDING_TABLE_PTRS 0x7801 +# define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8) +# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9) +# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 10) + +#define CMD_3D_SAMPLER_STATE_POINTERS 0x7802 /* SNB+ */ +# define PS_SAMPLER_STATE_CHANGE (1 << 12) +# define GS_SAMPLER_STATE_CHANGE (1 << 9) +# define VS_SAMPLER_STATE_CHANGE (1 << 8) +/* DW1: VS */ +/* DW2: GS */ +/* DW3: PS */ #define CMD_VERTEX_BUFFER 0x7808 # define BRW_VB0_INDEX_SHIFT 27 +# define GEN6_VB0_INDEX_SHIFT 26 # define BRW_VB0_ACCESS_VERTEXDATA (0 << 26) # define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26) +# define GEN6_VB0_ACCESS_VERTEXDATA (0 << 20) +# define GEN6_VB0_ACCESS_INSTANCEDATA (1 << 20) # define BRW_VB0_PITCH_SHIFT 0 #define CMD_VERTEX_ELEMENT 0x7809 # define BRW_VE0_INDEX_SHIFT 27 +# define GEN6_VE0_INDEX_SHIFT 26 # define BRW_VE0_FORMAT_SHIFT 16 # define BRW_VE0_VALID (1 << 26) +# define GEN6_VE0_VALID (1 << 25) # define BRW_VE0_SRC_OFFSET_SHIFT 0 # define BRW_VE1_COMPONENT_NOSTORE 0 # define BRW_VE1_COMPONENT_STORE_SRC 1 @@ -805,8 +824,219 @@ # define BRW_VE1_DST_OFFSET_SHIFT 0 #define CMD_INDEX_BUFFER 0x780a -#define CMD_VF_STATISTICS_965 0x780b +#define CMD_VF_STATISTICS_965 0x780b #define CMD_VF_STATISTICS_GM45 0x680b +#define CMD_3D_CC_STATE_POINTERS 0x780e /* GEN6+ */ + +#define CMD_URB 0x7805 /* GEN6+ */ +# define GEN6_URB_VS_SIZE_SHIFT 16 +# define GEN6_URB_VS_ENTRIES_SHIFT 0 +# define GEN6_URB_GS_SIZE_SHIFT 8 +# define GEN6_URB_GS_ENTRIES_SHIFT 0 + +#define CMD_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */ +# define GEN6_CC_VIEWPORT_MODIFY (1 << 12) +# define GEN6_SF_VIEWPORT_MODIFY (1 << 11) +# define GEN6_CLIP_VIEWPORT_MODIFY (1 << 10) + +#define CMD_3D_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */ + +#define CMD_3D_VS_STATE 0x7810 /* GEN6+ */ +/* DW2 */ +# define GEN6_VS_SPF_MODE (1 << 31) +# define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_VS_SAMPLER_COUNT_SHIFT 27 +# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +/* DW4 */ +# define GEN6_VS_DISPATCH_START_GRF_SHIFT 20 +# define GEN6_VS_URB_READ_LENGTH_SHIFT 11 +# define GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW5 */ +# define GEN6_VS_MAX_THREADS_SHIFT 25 +# define GEN6_VS_STATISTICS_ENABLE (1 << 10) +# define GEN6_VS_CACHE_DISABLE (1 << 1) +# define GEN6_VS_ENABLE (1 << 0) + +#define CMD_3D_GS_STATE 0x7811 /* GEN6+ */ +/* DW2 */ +# define GEN6_GS_SPF_MODE (1 << 31) +# define GEN6_GS_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_GS_SAMPLER_COUNT_SHIFT 27 +# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +/* DW4 */ +# define GEN6_GS_URB_READ_LENGTH_SHIFT 11 +# define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT 4 +# define GEN6_GS_DISPATCH_START_GRF_SHIFT 0 +/* DW5 */ +# define GEN6_GS_MAX_THREADS_SHIFT 25 +# define GEN6_GS_STATISTICS_ENABLE (1 << 10) +# define GEN6_GS_SO_STATISTICS_ENABLE (1 << 9) +# define GEN6_GS_RENDERING_ENABLE (1 << 8) +/* DW6 */ +# define GEN6_GS_ENABLE (1 << 15) + +#define CMD_3D_CLIP_STATE 0x7812 /* GEN6+ */ +/* DW1 */ +# define GEN6_CLIP_STATISTICS_ENABLE (1 << 10) +/* DW2 */ +# define GEN6_CLIP_ENABLE (1 << 31) +# define GEN6_CLIP_API_OGL (0 << 30) +# define GEN6_CLIP_API_D3D (1 << 30) +# define GEN6_CLIP_XY_TEST (1 << 28) +# define GEN6_CLIP_Z_TEST (1 << 27) +# define GEN6_CLIP_GB_TEST (1 << 26) +# define GEN6_CLIP_MODE_NORMAL (0 << 13) +# define GEN6_CLIP_MODE_REJECT_ALL (3 << 13) +# define GEN6_CLIP_MODE_ACCEPT_ALL (4 << 13) +# define GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE (1 << 9) +# define GEN6_CLIP_BARYCENTRIC_ENABLE (1 << 8) +# define GEN6_CLIP_TRI_PROVOKE_SHIFT 4 +# define GEN6_CLIP_LINE_PROVOKE_SHIFT 2 +# define GEN6_CLIP_TRIFAN_PROVOKE_SHIFT 0 +/* DW3 */ +# define GEN6_CLIP_MIN_POINT_WIDTH_SHIFT 17 +# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT 6 + +#define CMD_3D_SF_STATE 0x7813 /* GEN6+ */ +/* DW1 */ +# define GEN6_SF_NUM_OUTPUTS_SHIFT 22 +# define GEN6_SF_SWIZZLE_ENABLE (1 << 21) +# define GEN6_SF_POINT_SPRITE_LOWERLEFT (1 << 20) +# define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 +# define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW2 */ +# define GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS (1 << 11) +# define GEN6_SF_STATISTICS_ENABLE (1 << 10) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID (1 << 9) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME (1 << 8) +# define GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT (1 << 7) +# define GEN6_SF_FRONT_SOLID (0 << 5) +# define GEN6_SF_FRONT_WIREFRAME (1 << 5) +# define GEN6_SF_FRONT_POINT (2 << 5) +# define GEN6_SF_BACK_SOLID (0 << 3) +# define GEN6_SF_BACK_WIREFRAME (1 << 3) +# define GEN6_SF_BACK_POINT (2 << 3) +# define GEN6_SF_VIEWPORT_TRANSFORM_ENABLE (1 << 1) +# define GEN6_SF_WINDING_CCW (1 << 0) +/* DW3 */ +# define GEN6_SF_LINE_AA_ENABLE (1 << 31) +# define GEN6_SF_CULL_BOTH (0 << 29) +# define GEN6_SF_CULL_NONE (1 << 29) +# define GEN6_SF_CULL_FRONT (2 << 29) +# define GEN6_SF_CULL_BACK (3 << 29) +# define GEN6_SF_LINE_WIDTH_SHIFT 18 /* U3.7 */ +# define GEN6_SF_LINE_END_CAP_WIDTH_0_5 (0 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_1_0 (1 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_2_0 (2 << 16) +# define GEN6_SF_LINE_END_CAP_WIDTH_4_0 (3 << 16) +# define GEN6_SF_SCISSOR_ENABLE (1 << 11) +# define GEN6_SF_MSRAST_OFF_PIXEL (0 << 8) +# define GEN6_SF_MSRAST_OFF_PATTERN (1 << 8) +# define GEN6_SF_MSRAST_ON_PIXEL (2 << 8) +# define GEN6_SF_MSRAST_ON_PATTERN (3 << 8) +/* DW4 */ +# define GEN6_SF_TRI_PROVOKE_SHIFT 29 +# define GEN6_SF_LINE_PROVOKE_SHIFT 27 +# define GEN6_SF_TRIFAN_PROVOKE_SHIFT 25 +# define GEN6_SF_LINE_AA_MODE_MANHATTAN (0 << 14) +# define GEN6_SF_LINE_AA_MODE_TRUE (1 << 14) +# define GEN6_SF_VERTEX_SUBPIXEL_8BITS (0 << 12) +# define GEN6_SF_VERTEX_SUBPIXEL_4BITS (1 << 12) +# define GEN6_SF_USE_STATE_POINT_WIDTH (1 << 11) +# define GEN6_SF_POINT_WIDTH_SHIFT 0 /* U8.3 */ +/* DW5: depth offset constant */ +/* DW6: depth offset scale */ +/* DW7: depth offset clamp */ +/* DW8 */ +# define ATTRIBUTE_1_OVERRIDE_W (1 << 31) +# define ATTRIBUTE_1_OVERRIDE_Z (1 << 30) +# define ATTRIBUTE_1_OVERRIDE_Y (1 << 29) +# define ATTRIBUTE_1_OVERRIDE_X (1 << 28) +# define ATTRIBUTE_1_CONST_SOURCE_SHIFT 25 +# define ATTRIBUTE_1_SWIZZLE_SHIFT 22 +# define ATTRIBUTE_1_SOURCE_SHIFT 16 +# define ATTRIBUTE_0_OVERRIDE_W (1 << 15) +# define ATTRIBUTE_0_OVERRIDE_Z (1 << 14) +# define ATTRIBUTE_0_OVERRIDE_Y (1 << 13) +# define ATTRIBUTE_0_OVERRIDE_X (1 << 12) +# define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9 +# define ATTRIBUTE_0_SWIZZLE_SHIFT 6 +# define ATTRIBUTE_0_SOURCE_SHIFT 0 +/* DW16: Point sprite texture coordinate enables */ +/* DW17: Constant interpolation enables */ +/* DW18: attr 0-7 wrap shortest enables */ +/* DW19: attr 8-16 wrap shortest enables */ + +#define CMD_3D_WM_STATE 0x7814 /* GEN6+ */ +/* DW1: kernel pointer */ +/* DW2 */ +# define GEN6_WM_SPF_MODE (1 << 31) +# define GEN6_WM_VECTOR_MASK_ENABLE (1 << 30) +# define GEN6_WM_SAMPLER_COUNT_SHIFT 27 +# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +/* DW3: scratch space */ +/* DW4 */ +# define GEN6_WM_STATISTICS_ENABLE (1 << 31) +# define GEN6_WM_DEPTH_CLEAR (1 << 30) +# define GEN6_WM_DEPTH_RESOLVE (1 << 28) +# define GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27) +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_0 16 +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_1 8 +# define GEN6_WM_DISPATCH_START_GRF_SHIFT_2 0 +/* DW5 */ +# define GEN6_WM_MAX_THREADS_SHIFT 25 +# define GEN6_WM_KILL_ENABLE (1 << 22) +# define GEN6_WM_COMPUTED_DEPTH (1 << 21) +# define GEN6_WM_USES_SOURCE_DEPTH (1 << 20) +# define GEN6_WM_DISPATCH_ENABLE (1 << 19) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 16) +# define GEN6_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 16) +# define GEN6_WM_LINE_AA_WIDTH_0_5 (0 << 14) +# define GEN6_WM_LINE_AA_WIDTH_1_0 (1 << 14) +# define GEN6_WM_LINE_AA_WIDTH_2_0 (2 << 14) +# define GEN6_WM_LINE_AA_WIDTH_4_0 (3 << 14) +# define GEN6_WM_POLYGON_STIPPLE_ENABLE (1 << 13) +# define GEN6_WM_LINE_STIPPLE_ENABLE (1 << 12) +# define GEN6_WM_OMASK_TO_RENDER_TARGET (1 << 9) +# define GEN6_WM_USES_SOURCE_W (1 << 8) +# define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE (1 << 7) +# define GEN6_WM_32_DISPATCH_ENABLE (1 << 2) +# define GEN6_WM_16_DISPATCH_ENABLE (1 << 1) +# define GEN6_WM_8_DISPATCH_ENABLE (1 << 0) +/* DW6 */ +# define GEN6_WM_NUM_SF_OUTPUTS_SHIFT 20 +# define GEN6_WM_POSOFFSET_NONE (0 << 18) +# define GEN6_WM_POSOFFSET_CENTROID (2 << 18) +# define GEN6_WM_POSOFFSET_SAMPLE (3 << 18) +# define GEN6_WM_POSITION_ZW_PIXEL (0 << 16) +# define GEN6_WM_POSITION_ZW_CENTROID (2 << 16) +# define GEN6_WM_POSITION_ZW_SAMPLE (3 << 16) +# define GEN6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15) +# define GEN6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14) +# define GEN6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13) +# define GEN6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12) +# define GEN6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11) +# define GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10) +# define GEN6_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 9) +# define GEN6_WM_MSRAST_OFF_PIXEL (0 << 1) +# define GEN6_WM_MSRAST_OFF_PATTERN (1 << 1) +# define GEN6_WM_MSRAST_ON_PIXEL (2 << 1) +# define GEN6_WM_MSRAST_ON_PATTERN (3 << 1) +# define GEN6_WM_MSDISPMODE_PERPIXEL (1 << 0) +/* DW7: kernel 1 pointer */ +/* DW8: kernel 2 pointer */ + +#define CMD_3D_CONSTANT_VS_STATE 0x7815 /* GEN6+ */ +#define CMD_3D_CONSTANT_GS_STATE 0x7816 /* GEN6+ */ +#define CMD_3D_CONSTANT_PS_STATE 0x7817 /* GEN6+ */ +# define GEN6_CONSTANT_BUFFER_3_ENABLE (1 << 15) +# define GEN6_CONSTANT_BUFFER_2_ENABLE (1 << 14) +# define GEN6_CONSTANT_BUFFER_1_ENABLE (1 << 13) +# define GEN6_CONSTANT_BUFFER_0_ENABLE (1 << 12) + +#define CMD_3D_SAMPLE_MASK 0x7818 /* GEN6+ */ #define CMD_DRAW_RECT 0x7900 #define CMD_BLEND_CONSTANT_COLOR 0x7901 @@ -818,6 +1048,25 @@ #define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 #define CMD_AA_LINE_PARAMETERS 0x790a +#define CMD_GS_SVB_INDEX 0x790b /* CTG+ */ +/* DW1 */ +# define SVB_INDEX_SHIFT 29 +# define SVB_LOAD_INTERNAL_VERTEX_COUNT (1 << 0) /* SNB+ */ +/* DW2: SVB index */ +/* DW3: SVB maximum index */ + +#define CMD_3D_MULTISAMPLE 0x790d /* SNB+ */ +/* DW1 */ +# define MS_PIXEL_LOCATION_CENTER (0 << 4) +# define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4) +# define MS_NUMSAMPLES_1 (0 << 1) +# define MS_NUMSAMPLES_4 (2 << 1) +# define MS_NUMSAMPLES_8 (3 << 1) + +#define CMD_3D_CLEAR_PARAMS 0x7910 /* ILK+ */ +# define DEPTH_CLEAR_VALID (1 << 15) +/* DW1: depth clear value */ + #define CMD_PIPE_CONTROL 0x7a00 #define CMD_3D_PRIM 0x7b00 diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index a8f6b993ac..ad61770212 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -50,6 +50,7 @@ struct { [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, @@ -73,10 +74,10 @@ struct { [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 }, [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, - [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 }, - [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 }, [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, - [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 }, [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 106454de4a..71a43577bf 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -276,7 +276,6 @@ copy_array_to_vbo_array( struct brw_context *brw, struct brw_vertex_element *element, GLuint dst_stride) { - struct intel_context *intel = &brw->intel; GLuint size = element->count * dst_stride; get_space(brw, size, &element->bo, &element->offset); @@ -289,52 +288,26 @@ copy_array_to_vbo_array( struct brw_context *brw, } if (dst_stride == element->glarray->StrideB) { - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(element->bo); - memcpy((char *)element->bo->virtual + element->offset, - element->glarray->Ptr, size); - drm_intel_gem_bo_unmap_gtt(element->bo); - } else { - dri_bo_subdata(element->bo, - element->offset, - size, - element->glarray->Ptr); - } + drm_intel_gem_bo_map_gtt(element->bo); + memcpy((char *)element->bo->virtual + element->offset, + element->glarray->Ptr, size); + drm_intel_gem_bo_unmap_gtt(element->bo); } else { char *dest; const unsigned char *src = element->glarray->Ptr; int i; - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(element->bo); - dest = element->bo->virtual; - dest += element->offset; - - for (i = 0; i < element->count; i++) { - memcpy(dest, src, dst_stride); - src += element->glarray->StrideB; - dest += dst_stride; - } - - drm_intel_gem_bo_unmap_gtt(element->bo); - } else { - void *data; - - data = malloc(dst_stride * element->count); - dest = data; - for (i = 0; i < element->count; i++) { - memcpy(dest, src, dst_stride); - src += element->glarray->StrideB; - dest += dst_stride; - } + drm_intel_gem_bo_map_gtt(element->bo); + dest = element->bo->virtual; + dest += element->offset; - dri_bo_subdata(element->bo, - element->offset, - size, - data); - - free(data); + for (i = 0; i < element->count; i++) { + memcpy(dest, src, dst_stride); + src += element->glarray->StrideB; + dest += dst_stride; } + + drm_intel_gem_bo_unmap_gtt(element->bo); } } @@ -503,10 +476,17 @@ static void brw_emit_vertices(struct brw_context *brw) if (brw->vb.nr_enabled == 0) { BEGIN_BATCH(3); OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1); - OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | - BRW_VE0_VALID | - (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | - (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + if (IS_GEN6(intel->intelScreen->deviceID)) { + OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) | + GEN6_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + } else { + OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | + BRW_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + } OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | @@ -527,14 +507,22 @@ static void brw_emit_vertices(struct brw_context *brw) for (i = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; + uint32_t dw0; + + if (intel->gen >= 6) { + dw0 = GEN6_VB0_ACCESS_VERTEXDATA | + (i << GEN6_VB0_INDEX_SHIFT); + } else { + dw0 = BRW_VB0_ACCESS_VERTEXDATA | + (i << BRW_VB0_INDEX_SHIFT); + } - OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) | - BRW_VB0_ACCESS_VERTEXDATA | + OUT_BATCH(dw0 | (input->stride << BRW_VB0_PITCH_SHIFT)); OUT_RELOC(input->bo, I915_GEM_DOMAIN_VERTEX, 0, input->offset); - if (intel->is_ironlake) { + if (intel->is_ironlake || intel->gen >= 6) { OUT_RELOC(input->bo, I915_GEM_DOMAIN_VERTEX, 0, input->bo->size - 1); @@ -565,12 +553,19 @@ static void brw_emit_vertices(struct brw_context *brw) break; } - OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) | - BRW_VE0_VALID | - (format << BRW_VE0_FORMAT_SHIFT) | - (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + if (IS_GEN6(intel->intelScreen->deviceID)) { + OUT_BATCH((i << GEN6_VE0_INDEX_SHIFT) | + GEN6_VE0_VALID | + (format << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + } else { + OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) | + BRW_VE0_VALID | + (format << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + } - if (intel->is_ironlake) + if (intel->is_ironlake || intel->gen >= 6) OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | @@ -624,13 +619,9 @@ static void brw_prepare_indices(struct brw_context *brw) /* Straight upload */ - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(bo); - memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size); - drm_intel_gem_bo_unmap_gtt(bo); - } else { - dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); - } + drm_intel_gem_bo_map_gtt(bo); + memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size); + drm_intel_gem_bo_unmap_gtt(bo); } else { offset = (GLuint) (unsigned long) index_buffer->ptr; brw->ib.start_vertex_offset = 0; diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 39eb88d7c2..4f55158e8f 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -795,6 +795,7 @@ ALU2(DPH) ALU2(DP3) ALU2(DP2) ALU2(LINE) +ALU2(PLN) #undef ALU1 #undef ALU2 @@ -965,4 +966,9 @@ void brw_math_invert( struct brw_compile *p, void brw_set_src1( struct brw_instruction *insn, struct brw_reg reg ); + + +/* brw_optimize.c */ +void brw_optimize(struct brw_compile *p); + #endif diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index b832c7165d..d2395dec28 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -102,8 +102,6 @@ static void brw_set_dest( struct brw_instruction *insn, static void brw_set_src0( struct brw_instruction *insn, struct brw_reg reg ) { - assert(reg.file != BRW_MESSAGE_REGISTER_FILE); - if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) assert(reg.nr < 128); @@ -323,7 +321,7 @@ static void brw_set_urb_message( struct brw_context *brw, struct intel_context *intel = &brw->intel; brw_set_src1(insn, brw_imm_d(0)); - if (intel->is_ironlake) { + if (intel->is_ironlake || intel->gen >= 6) { insn->bits3.urb_igdng.opcode = 0; /* ? */ insn->bits3.urb_igdng.offset = offset; insn->bits3.urb_igdng.swizzle_control = swizzle_control; @@ -334,8 +332,16 @@ static void brw_set_urb_message( struct brw_context *brw, insn->bits3.urb_igdng.response_length = response_length; insn->bits3.urb_igdng.msg_length = msg_length; insn->bits3.urb_igdng.end_of_thread = end_of_thread; - insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; - insn->bits2.send_igdng.end_of_thread = end_of_thread; + if (intel->gen >= 6) { + /* For SNB, the SFID bits moved to the condmod bits, and + * EOT stayed in bits3 above. Does the EOT bit setting + * below on Ironlake even do anything? + */ + insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; + } else { + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_igdng.end_of_thread = end_of_thread; + } } else { insn->bits3.urb.opcode = 0; /* ? */ insn->bits3.urb.offset = offset; @@ -567,7 +573,7 @@ ALU2(DPH) ALU2(DP3) ALU2(DP2) ALU2(LINE) - +ALU2(PLN) @@ -917,26 +923,40 @@ void brw_math( struct brw_compile *p, GLuint data_type, GLuint precision ) { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; - GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + struct intel_context *intel = &p->brw->intel; - /* Example code doesn't set predicate_control for send - * instructions. - */ - insn->header.predicate_control = 0; - insn->header.destreg__conditionalmod = msg_reg_nr; + if (intel->gen >= 6) { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); - brw_set_dest(insn, dest); - brw_set_src0(insn, src); - brw_set_math_message(p->brw, - insn, - msg_length, response_length, - function, - BRW_MATH_INTEGER_UNSIGNED, - precision, - saturate, - data_type); + /* Math is the same ISA format as other opcodes, except that CondModifier + * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. + */ + insn->header.destreg__conditionalmod = function; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_src1(insn, brw_null_reg()); + } else { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; + GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + /* Example code doesn't set predicate_control for send + * instructions. + */ + insn->header.predicate_control = 0; + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_math_message(p->brw, + insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + data_type); + } } /** @@ -1270,7 +1290,7 @@ void brw_SAMPLE(struct brw_compile *p, GLuint simd_mode) { GLboolean need_stall = 0; - + if (writemask == 0) { /*printf("%s: zero writemask??\n", __FUNCTION__); */ return; @@ -1307,8 +1327,14 @@ void brw_SAMPLE(struct brw_compile *p, /* printf("need stall %x %x\n", newmask , writemask); */ } else { + GLboolean dispatch_16 = GL_FALSE; + struct brw_reg m1 = brw_message_reg(msg_reg_nr); - + + guess_execution_size(p->current, dest); + if (p->current->header.execution_size == BRW_EXECUTE_16) + dispatch_16 = GL_TRUE; + newmask = ~newmask & WRITEMASK_XYZW; brw_push_insn_state(p); @@ -1323,7 +1349,13 @@ void brw_SAMPLE(struct brw_compile *p, src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); dest = offset(dest, dst_offset); - response_length = len * 2; + + /* For 16-wide dispatch, masked channels are skipped in the + * response. For 8-wide, masked channels still take up slots, + * and are just not written to. + */ + if (dispatch_16) + response_length = len * 2; } } @@ -1377,7 +1409,18 @@ void brw_urb_WRITE(struct brw_compile *p, GLuint offset, GLuint swizzle) { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *insn; + + /* Sandybridge doesn't have the implied move for SENDs, + * and the first message register index comes from src0. + */ + if (intel->gen >= 6) { + brw_MOV(p, brw_message_reg(msg_reg_nr), src0); + src0 = brw_message_reg(msg_reg_nr); + } + + insn = next_insn(p, BRW_OPCODE_SEND); assert(msg_length < BRW_MAX_MRF); @@ -1385,7 +1428,8 @@ void brw_urb_WRITE(struct brw_compile *p, brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); - insn->header.destreg__conditionalmod = msg_reg_nr; + if (intel->gen < 6) + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_urb_message(p->brw, insn, diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index f708ee0063..d030ed41f4 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -136,6 +136,41 @@ const struct brw_tracked_state brw_binding_table_pointers = { .emit = upload_binding_table_pointers, }; +/** + * Upload the binding table pointers, which point each stage's array of surface + * state pointers. + * + * The binding table pointers are relative to the surface state base address, + * which is 0. + */ +static void upload_gen6_binding_table_pointers(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(4); + OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | + GEN6_BINDING_TABLE_MODIFY_VS | + GEN6_BINDING_TABLE_MODIFY_GS | + GEN6_BINDING_TABLE_MODIFY_PS | + (4 - 2)); + if (brw->vs.bind_bo != NULL) + OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ + else + OUT_BATCH(0); + OUT_BATCH(0); /* gs */ + OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */ + ADVANCE_BATCH(); +} + +const struct brw_tracked_state gen6_binding_table_pointers = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = CACHE_NEW_SURF_BIND, + }, + .prepare = prepare_binding_table_pointers, + .emit = upload_gen6_binding_table_pointers, +}; /** * Upload pointers to the per-stage state. @@ -209,7 +244,14 @@ static void emit_depthbuffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct intel_region *region = brw->state.depth_region; - unsigned int len = (intel->is_g4x || intel->is_ironlake) ? 6 : 5; + unsigned int len; + + if (intel->gen >= 6) + len = 7; + else if (intel->is_g4x || intel->is_ironlake) + len = 6; + else + len = 5; if (region == NULL) { BEGIN_BATCH(len); @@ -220,9 +262,12 @@ static void emit_depthbuffer(struct brw_context *brw) OUT_BATCH(0); OUT_BATCH(0); - if (intel->is_g4x || intel->is_ironlake) + if (intel->is_g4x || intel->is_ironlake || intel->gen >= 6) OUT_BATCH(0); + if (intel->gen >= 6) + OUT_BATCH(0); + ADVANCE_BATCH(); } else { unsigned int format; @@ -243,6 +288,8 @@ static void emit_depthbuffer(struct brw_context *brw) } assert(region->tiling != I915_TILING_X); + if (IS_GEN6(intel->intelScreen->deviceID)) + assert(region->tiling != I915_TILING_NONE); BEGIN_BATCH(len); OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); @@ -259,9 +306,20 @@ static void emit_depthbuffer(struct brw_context *brw) ((region->height - 1) << 19)); OUT_BATCH(0); - if (intel->is_g4x || intel->is_ironlake) + if (intel->is_g4x || intel->is_ironlake || intel->gen >= 6) OUT_BATCH(0); + if (intel->gen >= 6) + OUT_BATCH(0); + + ADVANCE_BATCH(); + } + + /* Initialize it for safety. */ + if (intel->gen >= 6) { + BEGIN_BATCH(2); + OUT_BATCH(CMD_3D_CLEAR_PARAMS << 16 | (2 - 2)); + OUT_BATCH(0); ADVANCE_BATCH(); } } @@ -435,6 +493,8 @@ const struct brw_tracked_state brw_line_stipple = { static void upload_invarient_state( struct brw_context *brw ) { + struct intel_context *intel = &brw->intel; + { /* 0x61040000 Pipeline Select */ /* PipelineSelect : 0 */ @@ -446,7 +506,7 @@ static void upload_invarient_state( struct brw_context *brw ) BRW_BATCH_STRUCT(brw, &ps); } - { + if (intel->gen < 6) { struct brw_global_depth_offset_clamp gdo; memset(&gdo, 0, sizeof(gdo)); @@ -459,6 +519,32 @@ static void upload_invarient_state( struct brw_context *brw ) BRW_BATCH_STRUCT(brw, &gdo); } + intel_batchbuffer_emit_mi_flush(intel->batch); + + if (intel->gen >= 6) { + int i; + + BEGIN_BATCH(3); + OUT_BATCH(CMD_3D_MULTISAMPLE << 16 | (3 - 2)); + OUT_BATCH(MS_PIXEL_LOCATION_CENTER | + MS_NUMSAMPLES_1); + OUT_BATCH(0); /* positions for 4/8-sample */ + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(CMD_3D_SAMPLE_MASK << 16 | (2 - 2)); + OUT_BATCH(1); + ADVANCE_BATCH(); + + for (i = 0; i < 4; i++) { + BEGIN_BATCH(4); + OUT_BATCH(CMD_GS_SVB_INDEX << 16 | (4 - 2)); + OUT_BATCH(i << SVB_INDEX_SHIFT); + OUT_BATCH(0); + OUT_BATCH(0xffffffff); + ADVANCE_BATCH(); + } + } /* 0x61020000 State Instruction Pointer */ { @@ -509,7 +595,20 @@ static void upload_state_base_address( struct brw_context *brw ) /* Output the structure (brw_state_base_address) directly to the * batchbuffer, so we can emit relocations inline. */ - if (intel->is_ironlake) { + if (intel->gen >= 6) { + BEGIN_BATCH(10); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2)); + OUT_BATCH(1); /* General state base address */ + OUT_BATCH(1); /* Surface state base address */ + OUT_BATCH(1); /* Dynamic state base address */ + OUT_BATCH(1); /* Indirect object base address */ + OUT_BATCH(1); /* Instruction base address */ + OUT_BATCH(1); /* General state upper bound */ + OUT_BATCH(1); /* Dynamic state upper bound */ + OUT_BATCH(1); /* Indirect object upper bound */ + OUT_BATCH(1); /* Instruction access upper bound */ + ADVANCE_BATCH(); + } else if (intel->is_ironlake) { BEGIN_BATCH(8); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2)); OUT_BATCH(1); /* General state base address */ diff --git a/src/mesa/drivers/dri/i965/brw_optimize.c b/src/mesa/drivers/dri/i965/brw_optimize.c new file mode 100644 index 0000000000..57df9ea115 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_optimize.c @@ -0,0 +1,115 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "main/macros.h" +#include "shader/program.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + +static GLboolean +is_single_channel_dp4(struct brw_instruction *insn) +{ + if (insn->header.opcode != BRW_OPCODE_DP4 || + insn->header.execution_size != BRW_EXECUTE_8 || + insn->header.access_mode != BRW_ALIGN_16 || + insn->bits1.da1.dest_reg_file != BRW_GENERAL_REGISTER_FILE) + return GL_FALSE; + + if (!is_power_of_two(insn->bits1.da16.dest_writemask)) + return GL_FALSE; + + return GL_TRUE; +} + +/** + * Sets the dependency control fields on DP4 instructions. + * + * The hardware only tracks dependencies on a register basis, so when + * you do: + * + * DP4 dst.x src1 src2 + * DP4 dst.y src1 src3 + * DP4 dst.z src1 src4 + * DP4 dst.w src1 src5 + * + * It will wait to do the DP4 dst.y until the dst.x is resolved, etc. + * We can examine our instruction stream and set the dependency + * control fields to tell the hardware when to do it. + * + * We may want to extend this to other instructions that are used to + * fill in a channel at a time of the destination register. + */ +static void +brw_set_dp4_dependency_control(struct brw_compile *p) +{ + int i; + + for (i = 1; i < p->nr_insn; i++) { + struct brw_instruction *insn = &p->store[i]; + struct brw_instruction *prev = &p->store[i - 1]; + + if (!is_single_channel_dp4(prev)) + continue; + + if (!is_single_channel_dp4(insn)) { + i++; + continue; + } + + /* Only avoid hw dep control if the write masks are different + * channels of one reg. + */ + if (insn->bits1.da16.dest_writemask == prev->bits1.da16.dest_writemask) + continue; + if (insn->bits1.da16.dest_reg_nr != prev->bits1.da16.dest_reg_nr) + continue; + + /* Check if the second instruction depends on the previous one + * for a src. + */ + if (insn->bits1.da1.src0_reg_file == BRW_GENERAL_REGISTER_FILE && + (insn->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT || + insn->bits2.da1.src0_reg_nr == insn->bits1.da16.dest_reg_nr)) + continue; + if (insn->bits1.da1.src1_reg_file == BRW_GENERAL_REGISTER_FILE && + (insn->bits3.da1.src1_address_mode != BRW_ADDRESS_DIRECT || + insn->bits3.da1.src1_reg_nr == insn->bits1.da16.dest_reg_nr)) + continue; + + prev->header.dependency_control |= BRW_DEPENDENCY_NOTCLEARED; + insn->header.dependency_control |= BRW_DEPENDENCY_NOTCHECKED; + } +} + +void +brw_optimize(struct brw_compile *p) +{ + brw_set_dp4_dependency_control(p); +} diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index c78f7b38ae..1fd957b3ad 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -95,9 +95,17 @@ static void brwDeleteProgram( GLcontext *ctx, struct gl_program *prog ) { if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) { - struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; - struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog); - dri_bo_unreference(brw_fprog->const_buffer); + struct gl_fragment_program *fp = (struct gl_fragment_program *) prog; + struct brw_fragment_program *brw_fp = brw_fragment_program(fp); + + dri_bo_unreference(brw_fp->const_buffer); + } + + if (prog->Target == GL_VERTEX_PROGRAM_ARB) { + struct gl_vertex_program *vp = (struct gl_vertex_program *) prog; + struct brw_vertex_program *brw_vp = brw_vertex_program(vp); + + dri_bo_unreference(brw_vp->const_buffer); } _mesa_delete_program( ctx, prog ); diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 8e6839b812..57d1c29ade 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -46,7 +46,6 @@ static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) { - GLcontext *ctx = &brw->intel.ctx; struct brw_sf_compile c; const GLuint *program; GLuint program_size; @@ -69,20 +68,14 @@ static void compile_sf_prog( struct brw_context *brw, /* Construct map from attribute number to position in the vertex. */ - for (i = idx = 0; i < VERT_RESULT_MAX; i++) + for (i = idx = 0; i < VERT_RESULT_MAX; i++) { if (c.key.attrs & BITFIELD64_BIT(i)) { c.attr_to_idx[i] = idx; c.idx_to_attr[idx] = i; - if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) { - c.point_attrs[i].CoordReplace = - ctx->Point.CoordReplace[i - VERT_RESULT_TEX0]; - } - else { - c.point_attrs[i].CoordReplace = GL_FALSE; - } idx++; } - + } + /* Which primitive? Or all three? */ switch (key->primitive) { @@ -162,6 +155,14 @@ static void upload_sf_prog(struct brw_context *brw) } key.do_point_sprite = ctx->Point.PointSprite; + if (key.do_point_sprite) { + int i; + + for (i = 0; i < 8; i++) { + if (ctx->Point.CoordReplace[i]) + key.point_sprite_coord_replace |= (1 << i); + } + } key.sprite_origin_lower_left = (ctx->Point.SpriteOrigin == GL_LOWER_LEFT); /* _NEW_LIGHT */ key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h index 0ba731fac9..a0680a56f2 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.h +++ b/src/mesa/drivers/dri/i965/brw_sf.h @@ -46,6 +46,7 @@ struct brw_sf_prog_key { GLbitfield64 attrs; + uint8_t point_sprite_coord_replace; GLuint primitive:2; GLuint do_twoside_color:1; GLuint do_flat_shading:1; @@ -56,10 +57,6 @@ struct brw_sf_prog_key { GLuint pad:24; }; -struct brw_sf_point_tex { - GLboolean CoordReplace; -}; - struct brw_sf_compile { struct brw_compile func; struct brw_sf_prog_key key; @@ -100,7 +97,6 @@ struct brw_sf_compile { GLubyte attr_to_idx[VERT_RESULT_MAX]; GLubyte idx_to_attr[VERT_RESULT_MAX]; - struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX]; }; diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c index bb08055e3b..56f7c986e7 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c @@ -354,6 +354,33 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, return is_last_attr; } +/* Calculates the predicate control for which channels of a reg + * (containing 2 attrs) to do point sprite coordinate replacement on. + */ +static uint16_t +calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg) +{ + int attr1, attr2; + uint16_t pc = 0; + + attr1 = c->idx_to_attr[reg * 2]; + if (attr1 >= VERT_RESULT_TEX0 && attr1 <= VERT_RESULT_TEX7) { + if (c->key.point_sprite_coord_replace & (1 << (attr1 - VERT_RESULT_TEX0))) + pc |= 0x0f; + } + + if (reg * 2 + 1 < c->nr_setup_attrs) { + attr2 = c->idx_to_attr[reg * 2 + 1]; + if (attr2 >= VERT_RESULT_TEX0 && attr2 <= VERT_RESULT_TEX7) { + if (c->key.point_sprite_coord_replace & (1 << (attr2 - + VERT_RESULT_TEX0))) + pc |= 0xf0; + } + } + + return pc; +} + void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate) @@ -529,22 +556,27 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) copy_z_inv_w(c); for (i = 0; i < c->nr_setup_regs; i++) { - struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]]; struct brw_reg a0 = offset(c->vert[0], i); - GLushort pc, pc_persp, pc_linear; + GLushort pc, pc_persp, pc_linear, pc_coord_replace; GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); - - if (pc_persp) - { - if (!tex->CoordReplace) { - brw_set_predicate_control_flag_value(p, pc_persp); - brw_MUL(p, a0, a0, c->inv_w[0]); - } + + pc_coord_replace = calculate_point_sprite_mask(c, i); + pc_persp &= ~pc_coord_replace; + + if (pc_persp) { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); } - if (tex->CoordReplace) { - /* Caculate 1.0/PointWidth */ - brw_math(&c->func, + /* Point sprite coordinate replacement: A texcoord with this + * enabled gets replaced with the value (x, y, 0, 1) where x and + * y vary from 0 to 1 across the horizontal and vertical of the + * point. + */ + if (pc_coord_replace) { + brw_set_predicate_control_flag_value(p, pc_coord_replace); + /* Caculate 1.0/PointWidth */ + brw_math(&c->func, c->tmp, BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, @@ -553,50 +585,51 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) BRW_MATH_DATA_SCALAR, BRW_MATH_PRECISION_FULL); - if (c->key.sprite_origin_lower_left) { - brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); - brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); - brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0])); - brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); - } else { - brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); - brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); - brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]); - brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); - } - } else { - brw_MOV(p, c->m1Cx, brw_imm_ud(0)); - brw_MOV(p, c->m2Cy, brw_imm_ud(0)); - } + brw_set_access_mode(p, BRW_ALIGN_16); - { - brw_set_predicate_control_flag_value(p, pc); - if (tex->CoordReplace) { - if (c->key.sprite_origin_lower_left) { - brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0)); - brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0)); - } - else - brw_MOV(p, c->m3C0, brw_imm_f(0.0)); + /* dA/dx, dA/dy */ + brw_MOV(p, c->m1Cx, brw_imm_f(0.0)); + brw_MOV(p, c->m2Cy, brw_imm_f(0.0)); + brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp); + if (c->key.sprite_origin_lower_left) { + brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp)); } else { - brw_MOV(p, c->m3C0, a0); /* constant value */ + brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp); } - /* Copy m0..m3 to URB. - */ - brw_urb_WRITE(p, - brw_null_reg(), - 0, - brw_vec8_grf(0, 0), - 0, /* allocate */ - 1, /* used */ - 4, /* msg len */ - 0, /* response len */ - last, /* eot */ - last, /* writes complete */ - i*4, /* urb destination offset */ - BRW_URB_SWIZZLE_TRANSPOSE); + /* attribute constant offset */ + brw_MOV(p, c->m3C0, brw_imm_f(0.0)); + if (c->key.sprite_origin_lower_left) { + brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0)); + } else { + brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0)); + } + + brw_set_access_mode(p, BRW_ALIGN_1); } + + if (pc & ~pc_coord_replace) { + brw_set_predicate_control_flag_value(p, pc & ~pc_coord_replace); + brw_MOV(p, c->m1Cx, brw_imm_ud(0)); + brw_MOV(p, c->m2Cy, brw_imm_ud(0)); + brw_MOV(p, c->m3C0, a0); /* constant value */ + } + + + brw_set_predicate_control_flag_value(p, pc); + /* Copy m0..m3 to URB. */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); } } diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 536fe8b249..f790cfabe2 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -90,6 +90,23 @@ const struct brw_tracked_state brw_drawing_rect; const struct brw_tracked_state brw_indices; const struct brw_tracked_state brw_vertices; const struct brw_tracked_state brw_index_buffer; +const struct brw_tracked_state gen6_binding_table_pointers; +const struct brw_tracked_state gen6_blend_state; +const struct brw_tracked_state gen6_cc_state_pointers; +const struct brw_tracked_state gen6_cc_vp; +const struct brw_tracked_state gen6_clip_state; +const struct brw_tracked_state gen6_clip_vp; +const struct brw_tracked_state gen6_color_calc_state; +const struct brw_tracked_state gen6_depth_stencil_state; +const struct brw_tracked_state gen6_gs_state; +const struct brw_tracked_state gen6_sampler_state; +const struct brw_tracked_state gen6_scissor_state; +const struct brw_tracked_state gen6_sf_state; +const struct brw_tracked_state gen6_sf_vp; +const struct brw_tracked_state gen6_urb; +const struct brw_tracked_state gen6_viewport_state; +const struct brw_tracked_state gen6_vs_state; +const struct brw_tracked_state gen6_wm_state; /** * Use same key for WM and VS surfaces. diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index 4bb98d8d5d..c08cb45b75 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -390,6 +390,7 @@ brw_init_non_surface_cache(struct brw_context *brw) brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT); brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG); + brw_init_cache_id(cache, "BLEND_STATE", BRW_BLEND_STATE); } diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 4f477cfc6b..9e54f29f0f 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -35,8 +35,15 @@ #include "brw_state.h" #include "intel_batchbuffer.h" #include "intel_buffers.h" +#include "intel_chipset.h" -static const struct brw_tracked_state *atoms[] = +/* This is used to initialize brw->state.atoms[]. We could use this + * list directly except for a single atom, brw_constant_buffer, which + * has a .dirty value which changes according to the parameters of the + * current fragment and vertex programs, and so cannot be a static + * value. + */ +static const struct brw_tracked_state *gen4_atoms[] = { &brw_check_fallback, @@ -95,6 +102,63 @@ static const struct brw_tracked_state *atoms[] = &brw_constant_buffer }; +const struct brw_tracked_state *gen6_atoms[] = +{ + &brw_check_fallback, + + &brw_wm_input_sizes, + &brw_vs_prog, + &brw_gs_prog, + &brw_wm_prog, + + &gen6_clip_vp, + &gen6_sf_vp, + &gen6_cc_vp, + + /* Command packets: */ + &brw_invarient_state, + + &gen6_viewport_state, /* must do after *_vp stages */ + + &gen6_urb, + &gen6_blend_state, /* must do before cc unit */ + &gen6_color_calc_state, /* must do before cc unit */ + &gen6_depth_stencil_state, /* must do before cc unit */ + &gen6_cc_state_pointers, + + &brw_vs_surfaces, /* must do before unit */ + &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */ + &brw_wm_surfaces, /* must do before samplers and unit */ + + &brw_wm_samplers, + &gen6_sampler_state, + + &gen6_vs_state, + &gen6_gs_state, + &gen6_clip_state, + &gen6_sf_state, + &gen6_wm_state, + + &gen6_scissor_state, + + &brw_state_base_address, + + &gen6_binding_table_pointers, + + &brw_depthbuffer, + + &brw_polygon_stipple, + &brw_polygon_stipple_offset, + + &brw_line_stipple, + &brw_aa_line_parameters, + + &brw_drawing_rect, + + &brw_indices, + &brw_index_buffer, + &brw_vertices, +}; void brw_init_state( struct brw_context *brw ) { @@ -211,6 +275,7 @@ static struct dirty_bit_map brw_bits[] = { }; static struct dirty_bit_map cache_bits[] = { + DEFINE_BIT(CACHE_NEW_BLEND_STATE), DEFINE_BIT(CACHE_NEW_CC_VP), DEFINE_BIT(CACHE_NEW_CC_UNIT), DEFINE_BIT(CACHE_NEW_WM_PROG), @@ -270,6 +335,8 @@ void brw_validate_state( struct brw_context *brw ) struct intel_context *intel = &brw->intel; struct brw_state_flags *state = &brw->state.dirty; GLuint i; + const struct brw_tracked_state **atoms; + int num_atoms; brw_clear_validated_bos(brw); @@ -278,6 +345,14 @@ void brw_validate_state( struct brw_context *brw ) brw_add_validated_bo(brw, intel->batch->buf); + if (IS_GEN6(intel->intelScreen->deviceID)) { + atoms = gen6_atoms; + num_atoms = ARRAY_SIZE(gen6_atoms); + } else { + atoms = gen4_atoms; + num_atoms = ARRAY_SIZE(gen4_atoms); + } + if (brw->emit_state_always) { state->mesa |= ~0; state->brw |= ~0; @@ -305,7 +380,7 @@ void brw_validate_state( struct brw_context *brw ) brw->intel.Fallback = GL_FALSE; /* boolean, not bitfield */ /* do prepare stage for all atoms */ - for (i = 0; i < Elements(atoms); i++) { + for (i = 0; i < num_atoms; i++) { const struct brw_tracked_state *atom = atoms[i]; if (brw->intel.Fallback) @@ -337,9 +412,20 @@ void brw_validate_state( struct brw_context *brw ) void brw_upload_state(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; struct brw_state_flags *state = &brw->state.dirty; int i; static int dirty_count = 0; + const struct brw_tracked_state **atoms; + int num_atoms; + + if (IS_GEN6(intel->intelScreen->deviceID)) { + atoms = gen6_atoms; + num_atoms = ARRAY_SIZE(gen6_atoms); + } else { + atoms = gen4_atoms; + num_atoms = ARRAY_SIZE(gen4_atoms); + } brw_clear_validated_bos(brw); @@ -352,7 +438,7 @@ void brw_upload_state(struct brw_context *brw) memset(&examined, 0, sizeof(examined)); prev = *state; - for (i = 0; i < Elements(atoms); i++) { + for (i = 0; i < num_atoms; i++) { const struct brw_tracked_state *atom = atoms[i]; struct brw_state_flags generated; @@ -381,7 +467,7 @@ void brw_upload_state(struct brw_context *brw) } } else { - for (i = 0; i < Elements(atoms); i++) { + for (i = 0; i < num_atoms; i++) { const struct brw_tracked_state *atom = atoms[i]; if (brw->intel.Fallback) diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 66d4127271..3c2adfc87d 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -658,7 +658,105 @@ struct brw_clip_unit_state GLfloat viewport_ymax; }; +struct gen6_blend_state +{ + struct { + GLuint dest_blend_factor:5; + GLuint source_blend_factor:5; + GLuint pad3:1; + GLuint blend_func:3; + GLuint pad2:1; + GLuint ia_dest_blend_factor:5; + GLuint ia_source_blend_factor:5; + GLuint pad1:1; + GLuint ia_blend_func:3; + GLuint pad0:1; + GLuint ia_blend_enable:1; + GLuint blend_enable:1; + } blend0; + + struct { + GLuint post_blend_clamp_enable:1; + GLuint pre_blend_clamp_enable:1; + GLuint clamp_range:2; + GLuint pad0:4; + GLuint x_dither_offset:2; + GLuint y_dither_offset:2; + GLuint dither_enable:1; + GLuint alpha_test_func:3; + GLuint alpha_test_enable:1; + GLuint pad1:1; + GLuint logic_op_func:4; + GLuint logic_op_enable:1; + GLuint pad2:1; + GLuint write_disable_b:1; + GLuint write_disable_g:1; + GLuint write_disable_r:1; + GLuint write_disable_a:1; + GLuint pad3:1; + GLuint alpha_to_coverage_dither:1; + GLuint alpha_to_one:1; + GLuint alpha_to_coverage:1; + } blend1; +}; + +struct gen6_color_calc_state +{ + struct { + GLuint alpha_test_format:1; + GLuint pad0:14; + GLuint round_disable:1; + GLuint bf_stencil_ref:8; + GLuint stencil_ref:8; + } cc0; + union { + GLfloat alpha_ref_f; + struct { + GLuint ui:8; + GLuint pad0:24; + } alpha_ref_fi; + } cc1; + + GLfloat constant_r; + GLfloat constant_g; + GLfloat constant_b; + GLfloat constant_a; +}; + +struct gen6_depth_stencil_state +{ + struct { + GLuint pad0:3; + GLuint bf_stencil_pass_depth_pass_op:3; + GLuint bf_stencil_pass_depth_fail_op:3; + GLuint bf_stencil_fail_op:3; + GLuint bf_stencil_func:3; + GLuint bf_stencil_enable:1; + GLuint pad1:2; + GLuint stencil_write_enable:1; + GLuint stencil_pass_depth_pass_op:3; + GLuint stencil_pass_depth_fail_op:3; + GLuint stencil_fail_op:3; + GLuint stencil_func:3; + GLuint stencil_enable:1; + } ds0; + + struct { + GLuint bf_stencil_write_mask:8; + GLuint bf_stencil_test_mask:8; + GLuint stencil_write_mask:8; + GLuint stencil_test_mask:8; + } ds1; + + struct { + GLuint pad0:25; + GLuint depth_write_enable:1; + GLuint depth_test_func:3; + GLuint pad1:1; + GLuint depth_test_enable:1; + } ds2; +}; struct brw_cc_unit_state { @@ -752,8 +850,6 @@ struct brw_cc_unit_state } cc7; }; - - struct brw_sf_unit_state { struct thread0 thread0; @@ -813,6 +909,11 @@ struct brw_sf_unit_state }; +struct gen6_scissor_state +{ + GLuint ymin, xmin; + GLuint ymax, xmax; +}; struct brw_gs_unit_state { @@ -1043,6 +1144,15 @@ struct brw_sf_viewport } scissor; }; +struct gen6_sf_viewport { + GLfloat m00; + GLfloat m11; + GLfloat m22; + GLfloat m30; + GLfloat m31; + GLfloat m32; +}; + /* Documented in the subsystem/shared-functions/sampler chapter... */ struct brw_surface_state diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 88327d9927..d16e916832 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -180,10 +180,12 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) c->first_output = reg; c->first_overflow_output = 0; - if (intel->is_ironlake) - mrf = 8; + if (intel->gen >= 6) + mrf = 6; + else if (intel->is_ironlake) + mrf = 8; else - mrf = 4; + mrf = 4; for (i = 0; i < VERT_RESULT_MAX; i++) { if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) { @@ -279,10 +281,12 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) */ attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs); - if (intel->is_ironlake) - c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; + if (intel->gen >= 6) + c->prog_data.urb_entry_size = (attributes_in_vue + 4 + 7) / 8; + else if (intel->is_ironlake) + c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; else - c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; + c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; c->prog_data.total_grf = reg; @@ -380,9 +384,8 @@ static void emit_sop( struct brw_vs_compile *c, { struct brw_compile *p = &c->func; - brw_MOV(p, dst, brw_imm_f(0.0f)); - brw_CMP(p, brw_null_reg(), cond, arg0, arg1); - brw_MOV(p, dst, brw_imm_f(1.0f)); + brw_CMP(p, brw_null_reg(), cond, arg1, arg0); + brw_SEL(p, dst, brw_null_reg(), brw_imm_f(1.0f)); brw_set_predicate_control_flag_value(p, 0xff); } @@ -479,9 +482,11 @@ static void emit_math1( struct brw_vs_compile *c, * whether that turns out to be a simulator bug or not: */ struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; struct brw_reg tmp = dst; - GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf || - dst.file != BRW_GENERAL_REGISTER_FILE); + GLboolean need_tmp = (intel->gen < 6 && + (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE)); if (need_tmp) tmp = get_tmp(c); @@ -510,9 +515,11 @@ static void emit_math2( struct brw_vs_compile *c, GLuint precision) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; struct brw_reg tmp = dst; - GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf || - dst.file != BRW_GENERAL_REGISTER_FILE); + GLboolean need_tmp = (intel->gen < 6 && + (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE)); if (need_tmp) tmp = get_tmp(c); @@ -1191,7 +1198,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS]; struct brw_reg ndc; int eot; - GLuint len_vertext_header = 2; + GLuint len_vertex_header = 2; if (c->key.copy_edgeflag) { brw_MOV(p, @@ -1199,12 +1206,14 @@ static void emit_vertex_write( struct brw_vs_compile *c) get_reg(c, PROGRAM_INPUT, VERT_ATTRIB_EDGEFLAG)); } - /* Build ndc coords */ - ndc = get_tmp(c); - /* ndc = 1.0 / pos.w */ - emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); - /* ndc.xyz = pos * ndc */ - brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc); + if (intel->gen < 6) { + /* Build ndc coords */ + ndc = get_tmp(c); + /* ndc = 1.0 / pos.w */ + emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); + /* ndc.xyz = pos * ndc */ + brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc); + } /* Update the header for point size, user clipping flags, and -ve rhw * workaround. @@ -1267,21 +1276,41 @@ static void emit_vertex_write( struct brw_vs_compile *c) * of zeros followed by two sets of NDC coordinates: */ brw_set_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, offset(m0, 2), ndc); - - if (intel->is_ironlake) { - /* There are 20 DWs (D0-D19) in VUE vertex header on Ironlake */ - brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */ - /* m4, m5 contain the distances from vertex to the user clip planeXXX. - * Seems it is useless for us. - * m6 is used for aligning, so that the remainder of vertex element is - * reg-aligned. - */ - brw_MOV(p, offset(m0, 7), pos); /* the remainder of vertex element */ - len_vertext_header = 6; + + if (intel->gen >= 6) { + /* There are 16 DWs (D0-D15) in VUE header on Sandybridge: + * dword 0-3 (m1) of the header is indices, point width, clip flags. + * dword 4-7 (m2) is the 4D space position + * dword 8-15 (m3,m4) of the vertex header is the user clip distance. + * m5 is the first vertex data we fill, which is the vertex position. + */ + brw_MOV(p, offset(m0, 2), pos); + brw_MOV(p, offset(m0, 5), pos); + len_vertex_header = 4; + } else if (intel->is_ironlake) { + /* There are 20 DWs (D0-D19) in VUE header on Ironlake: + * dword 0-3 (m1) of the header is indices, point width, clip flags. + * dword 4-7 (m2) is the ndc position (set above) + * dword 8-11 (m3) of the vertex header is the 4D space position + * dword 12-19 (m4,m5) of the vertex header is the user clip distance. + * m6 is a pad so that the vertex element data is aligned + * m7 is the first vertex data we fill, which is the vertex position. + */ + brw_MOV(p, offset(m0, 2), ndc); + brw_MOV(p, offset(m0, 3), pos); + brw_MOV(p, offset(m0, 7), pos); + len_vertex_header = 6; } else { - brw_MOV(p, offset(m0, 3), pos); - len_vertext_header = 2; + /* There are 8 dwords in VUE header pre-Ironlake: + * dword 0-3 (m1) is indices, point width, clip flags. + * dword 4-7 (m2) is ndc position (set above) + * + * dword 8-11 (m3) is the first vertex data, which we always have be the + * vertex position. + */ + brw_MOV(p, offset(m0, 2), ndc); + brw_MOV(p, offset(m0, 3), pos); + len_vertex_header = 2; } eot = (c->first_overflow_output == 0); @@ -1292,7 +1321,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) c->r0, /* src */ 0, /* allocate */ 1, /* used */ - MIN2(c->nr_outputs + 1 + len_vertext_header, (BRW_MAX_MRF-1)), /* msg len */ + MIN2(c->nr_outputs + 1 + len_vertex_header, (BRW_MAX_MRF-1)), /* msg len */ 0, /* response len */ eot, /* eot */ eot, /* writes complete */ @@ -1687,11 +1716,13 @@ void brw_vs_emit(struct brw_vs_compile *c ) /* patch all the BREAK/CONT instructions from last BEGINLOOP */ while (inst0 > loop_inst[loop_depth]) { inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK) { + if (inst0->header.opcode == BRW_OPCODE_BREAK && + inst0->bits3.if_else.jump_count == 0) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); inst0->bits3.if_else.pop_count = 0; } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0); inst0->bits3.if_else.pop_count = 0; } @@ -1793,6 +1824,8 @@ void brw_vs_emit(struct brw_vs_compile *c ) post_vs_emit(c, end_inst, last_inst); + brw_optimize(p); + if (INTEL_DEBUG & DEBUG_VS) { int i; diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index ead623fc0e..4007b5a15c 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -67,13 +67,13 @@ brw_vs_update_constant_buffer(struct brw_context *brw) */ _mesa_load_state_parameters(&brw->intel.ctx, vp->program.Base.Parameters); - intel_bo_map_gtt_preferred(intel, const_buffer, GL_TRUE); + drm_intel_gem_bo_map_gtt(const_buffer); for (i = 0; i < params->NumParameters; i++) { memcpy(const_buffer->virtual + i * 4 * sizeof(float), params->ParameterValues[i], 4 * sizeof(float)); } - intel_bo_unmap_gtt_preferred(intel, const_buffer); + drm_intel_gem_bo_unmap_gtt(const_buffer); return const_buffer; } @@ -104,7 +104,7 @@ brw_update_vs_constant_surface( GLcontext *ctx, /* If there's no constant buffer, then no surface BO is needed to point at * it. */ - if (vp->const_buffer == 0) { + if (vp->const_buffer == NULL) { drm_intel_bo_unreference(brw->vs.surf_bo[surf]); brw->vs.surf_bo[surf] = NULL; return; @@ -132,7 +132,7 @@ brw_update_vs_constant_surface( GLcontext *ctx, brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache, BRW_SS_SURFACE, &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, + &key.bo, 1, NULL); if (brw->vs.surf_bo[surf] == NULL) { brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key); diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 0b0be02dd2..96a44bfbec 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -102,6 +102,9 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->cc.prog_bo); dri_bo_release(&brw->cc.state_bo); dri_bo_release(&brw->cc.vp_bo); + dri_bo_release(&brw->cc.blend_state_bo); + dri_bo_release(&brw->cc.depth_stencil_state_bo); + dri_bo_release(&brw->cc.color_calc_state_bo); } @@ -141,7 +144,7 @@ static void brw_finish_batch(struct intel_context *intel) brw_emit_query_end(brw); if (brw->curbe.curbe_bo) { - intel_bo_unmap_gtt_preferred(intel, brw->curbe.curbe_bo); + drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo); drm_intel_bo_unreference(brw->curbe.curbe_bo); brw->curbe.curbe_bo = NULL; } diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 88d84ee82f..47b764d24d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -328,6 +328,12 @@ void emit_cinterp(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0); +void emit_cmp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2); void emit_ddxy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 9315bca315..05e464d4b6 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -34,6 +34,23 @@ #include "brw_context.h" #include "brw_wm.h" +static GLboolean can_do_pln(struct intel_context *intel, + const struct brw_reg *deltas) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + if (!brw->has_pln) + return GL_FALSE; + + if (deltas[1].nr != deltas[0].nr + 1) + return GL_FALSE; + + if (intel->gen < 6 && ((deltas[0].nr & 1) != 0)) + return GL_FALSE; + + return GL_TRUE; +} + /* Not quite sure how correct this is - need to understand horiz * vs. vertical strides a little better. */ @@ -45,7 +62,13 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) } -/* Payload R0: +/** + * Computes the screen-space x,y position of the pixels. + * + * This will be used by emit_delta_xy() or emit_wpos_xy() for + * interpolation of attributes.. + * + * Payload R0: * * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles, * corresponding to each of the 16 execution channels. @@ -60,7 +83,6 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) * R1.7 -- ? * R1.8 -- ? */ - void emit_pixel_xy(struct brw_wm_compile *c, const struct brw_reg *dst, GLuint mask) @@ -100,7 +122,14 @@ void emit_pixel_xy(struct brw_wm_compile *c, brw_pop_insn_state(p); } - +/** + * Computes the screen-space x,y distance of the pixels from the start + * vertex. + * + * This will be used in linterp or pinterp with the start vertex value + * and the Cx, Cy, and C0 coefficients passed in from the setup engine + * to produce interpolated attribute values. + */ void emit_delta_xy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, @@ -108,25 +137,27 @@ void emit_delta_xy(struct brw_compile *p, { struct brw_reg r1 = brw_vec1_grf(1, 0); - /* Calc delta X,Y by subtracting origin in r1 from the pixel - * centers. - */ - if (mask & WRITEMASK_X) { - brw_ADD(p, - dst[0], - retype(arg0[0], BRW_REGISTER_TYPE_UW), - negate(r1)); - } + if (mask == 0) + return; - if (mask & WRITEMASK_Y) { - brw_ADD(p, - dst[1], - retype(arg0[1], BRW_REGISTER_TYPE_UW), - negate(suboffset(r1,1))); + assert(mask == WRITEMASK_XY); - } + /* Calc delta X,Y by subtracting origin in r1 from the pixel + * centers produced by emit_pixel_xy(). + */ + brw_ADD(p, + dst[0], + retype(arg0[0], BRW_REGISTER_TYPE_UW), + negate(r1)); + brw_ADD(p, + dst[1], + retype(arg0[1], BRW_REGISTER_TYPE_UW), + negate(suboffset(r1,1))); } +/** + * Computes the pixel offset from the window origin for gl_FragCoord(). + */ void emit_wpos_xy(struct brw_wm_compile *c, const struct brw_reg *dst, GLuint mask, @@ -134,9 +165,6 @@ void emit_wpos_xy(struct brw_wm_compile *c, { struct brw_compile *p = &c->func; - /* Calculate the pixel offset from window bottom left into destination - * X and Y channels. - */ if (mask & WRITEMASK_X) { if (c->fp->program.PixelCenterInteger) { /* X' = X */ @@ -186,6 +214,7 @@ void emit_pixel_w(struct brw_wm_compile *c, const struct brw_reg *deltas) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; /* Don't need this if all you are doing is interpolating color, for * instance. @@ -196,8 +225,12 @@ void emit_pixel_w(struct brw_wm_compile *c, /* Calc 1/w - just linterp wpos[3] optimized by putting the * result straight into a message reg. */ - brw_LINE(p, brw_null_reg(), interp3, deltas[0]); - brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]); + if (can_do_pln(intel, deltas)) { + brw_PLN(p, brw_message_reg(2), interp3, deltas[0]); + } else { + brw_LINE(p, brw_null_reg(), interp3, deltas[0]); + brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]); + } /* Calc w */ if (c->dispatch_width == 16) { @@ -224,6 +257,7 @@ void emit_linterp(struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *deltas) { + struct intel_context *intel = &p->brw->intel; struct brw_reg interp[4]; GLuint nr = arg0[0].nr; GLuint i; @@ -235,8 +269,12 @@ void emit_linterp(struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); - brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); + if (can_do_pln(intel, deltas)) { + brw_PLN(p, dst[i], interp[i], deltas[0]); + } else { + brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); + brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); + } } } } @@ -249,6 +287,7 @@ void emit_pinterp(struct brw_compile *p, const struct brw_reg *deltas, const struct brw_reg *w) { + struct intel_context *intel = &p->brw->intel; struct brw_reg interp[4]; GLuint nr = arg0[0].nr; GLuint i; @@ -260,8 +299,12 @@ void emit_pinterp(struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); - brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); + if (can_do_pln(intel, deltas)) { + brw_PLN(p, dst[i], interp[i], deltas[0]); + } else { + brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); + brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); + } } } for (i = 0; i < 4; i++) { @@ -502,11 +545,8 @@ void emit_sop(struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { brw_push_insn_state(p); - brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_MOV(p, dst[i], brw_imm_f(0)); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst[i], brw_imm_f(1.0)); + brw_CMP(p, brw_null_reg(), cond, arg1[i], arg0[i]); + brw_SEL(p, dst[i], brw_null_reg(), brw_imm_f(1.0)); brw_pop_insn_state(p); } } @@ -566,12 +606,12 @@ static void emit_sne( struct brw_compile *p, emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1); } -static void emit_cmp( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1, - const struct brw_reg *arg2 ) +void emit_cmp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2) { GLuint i; @@ -601,14 +641,10 @@ void emit_max(struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MOV(p, dst[i], arg0[i]); - brw_set_saturate(p, 0); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], arg1[i]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MOV(p, dst[i], arg1[i]); + brw_SEL(p, dst[i], arg0[i], arg1[i]); brw_set_saturate(p, 0); brw_set_predicate_control_flag_value(p, 0xff); } @@ -625,14 +661,10 @@ void emit_min(struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MOV(p, dst[i], arg1[i]); - brw_set_saturate(p, 0); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MOV(p, dst[i], arg0[i]); + brw_SEL(p, dst[i], arg0[i], arg1[i]); brw_set_saturate(p, 0); brw_set_predicate_control_flag_value(p, 0xff); } @@ -1086,11 +1118,19 @@ static void emit_kil( struct brw_wm_compile *c, { struct brw_compile *p = &c->func; struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); - GLuint i; - - /* XXX - usually won't need 4 compares! - */ + GLuint i, j; + for (i = 0; i < 4; i++) { + /* Check if we've already done the comparison for this reg + * -- common when someone does KIL TEMP.wwww. + */ + for (j = 0; j < i; j++) { + if (memcmp(&arg0[j], &arg0[i], sizeof(arg0[0])) == 0) + break; + } + if (j != i) + continue; + brw_push_insn_state(p); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0)); brw_set_predicate_control_flag_value(p, 0xff); diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 562608e2ec..0b66cc6c9f 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -289,6 +289,7 @@ reclaim_temps(struct brw_wm_compile *c) */ static void prealloc_reg(struct brw_wm_compile *c) { + struct intel_context *intel = &c->func.brw->intel; int i, j; struct brw_reg reg; int urb_read_length = 0; @@ -413,6 +414,43 @@ static void prealloc_reg(struct brw_wm_compile *c) } } + for (i = 0; i < c->nr_fp_insns; i++) { + const struct prog_instruction *inst = &c->prog_instructions[i]; + + switch (inst->Opcode) { + case WM_DELTAXY: + /* Allocate WM_DELTAXY destination on G45/GM45 to an + * even-numbered GRF if possible so that we can use the PLN + * instruction. + */ + if (inst->DstReg.WriteMask == WRITEMASK_XY && + !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][0].inited && + !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][1].inited && + (IS_G4X(intel->intelScreen->deviceID) || intel->gen == 5)) { + int grf; + + for (grf = c->first_free_grf & ~1; + grf < BRW_WM_MAX_GRF; + grf += 2) + { + if (!c->used_grf[grf] && !c->used_grf[grf + 1]) { + c->used_grf[grf] = GL_TRUE; + c->used_grf[grf + 1] = GL_TRUE; + c->first_free_grf = grf + 2; /* a guess */ + + set_reg(c, inst->DstReg.File, inst->DstReg.Index, 0, + brw_vec8_grf(grf, 0)); + set_reg(c, inst->DstReg.File, inst->DstReg.Index, 1, + brw_vec8_grf(grf + 1, 0)); + break; + } + } + } + default: + break; + } + } + /* An instruction may reference up to three constants. * They'll be found in these registers. * XXX alloc these on demand! @@ -614,112 +652,6 @@ static void invoke_subroutine( struct brw_wm_compile *c, } } -/* Workaround for using brw_wm_emit.c's emit functions, which expect - * destination regs to be uniquely written. Moves arguments out to - * temporaries as necessary for instructions which use their destination as - * a temporary. - */ -static void -unalias3(struct brw_wm_compile *c, - void (*func)(struct brw_compile *c, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1, - const struct brw_reg *arg2), - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1, - const struct brw_reg *arg2) -{ - struct brw_compile *p = &c->func; - struct brw_reg tmp_arg0[4], tmp_arg1[4], tmp_arg2[4]; - int i, j; - int mark = mark_tmps(c); - - for (j = 0; j < 4; j++) { - tmp_arg0[j] = arg0[j]; - tmp_arg1[j] = arg1[j]; - tmp_arg2[j] = arg2[j]; - } - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - for (j = 0; j < 4; j++) { - if (arg0[j].file == dst[i].file && - dst[i].nr == arg0[j].nr) { - tmp_arg0[j] = alloc_tmp(c); - brw_MOV(p, tmp_arg0[j], arg0[j]); - } - if (arg1[j].file == dst[i].file && - dst[i].nr == arg1[j].nr) { - tmp_arg1[j] = alloc_tmp(c); - brw_MOV(p, tmp_arg1[j], arg1[j]); - } - if (arg2[j].file == dst[i].file && - dst[i].nr == arg2[j].nr) { - tmp_arg2[j] = alloc_tmp(c); - brw_MOV(p, tmp_arg2[j], arg2[j]); - } - } - } - } - - func(p, dst, mask, tmp_arg0, tmp_arg1, tmp_arg2); - - release_tmps(c, mark); -} - -/* Workaround for using brw_wm_emit.c's emit functions, which expect - * destination regs to be uniquely written. Moves arguments out to - * temporaries as necessary for instructions which use their destination as - * a temporary. - */ -static void -unalias2(struct brw_wm_compile *c, - void (*func)(struct brw_compile *c, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1), - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1) -{ - struct brw_compile *p = &c->func; - struct brw_reg tmp_arg0[4], tmp_arg1[4]; - int i, j; - int mark = mark_tmps(c); - - for (j = 0; j < 4; j++) { - tmp_arg0[j] = arg0[j]; - tmp_arg1[j] = arg1[j]; - } - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - for (j = 0; j < 4; j++) { - if (arg0[j].file == dst[i].file && - dst[i].nr == arg0[j].nr) { - tmp_arg0[j] = alloc_tmp(c); - brw_MOV(p, tmp_arg0[j], arg0[j]); - } - if (arg1[j].file == dst[i].file && - dst[i].nr == arg1[j].nr) { - tmp_arg1[j] = alloc_tmp(c); - brw_MOV(p, tmp_arg1[j], arg1[j]); - } - } - } - } - - func(p, dst, mask, tmp_arg0, tmp_arg1); - - release_tmps(c, mark); -} - static void emit_arl(struct brw_wm_compile *c, const struct prog_instruction *inst) { @@ -1813,14 +1745,29 @@ static void get_argument_regs(struct brw_wm_compile *c, const struct prog_instruction *inst, int index, + struct brw_reg *dst, struct brw_reg *regs, int mask) { - int i; + struct brw_compile *p = &c->func; + int i, j; for (i = 0; i < 4; i++) { - if (mask & (1 << i)) + if (mask & (1 << i)) { regs[i] = get_src_reg(c, inst, index, i); + + /* Unalias destination registers from our sources. */ + if (regs[i].file == BRW_GENERAL_REGISTER_FILE) { + for (j = 0; j < 4; j++) { + if (memcmp(®s[i], &dst[j], sizeof(regs[0])) == 0) { + struct brw_reg tmp = alloc_tmp(c); + brw_MOV(p, tmp, regs[i]); + regs[i] = tmp; + break; + } + } + } + } } } @@ -1845,6 +1792,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) int dst_flags; struct brw_reg args[3][4], dst[4]; int j; + int mark = mark_tmps( c ); c->cur_inst = i; @@ -1866,7 +1814,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) } } for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++) - get_argument_regs(c, inst, j, args[j], WRITEMASK_XYZW); + get_argument_regs(c, inst, j, dst, args[j], WRITEMASK_XYZW); dst_flags = inst->DstReg.WriteMask; if (inst->SaturateMode == SATURATE_ZERO_ONE) @@ -1920,8 +1868,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); break; case OPCODE_LRP: - unalias3(c, emit_lrp, - dst, dst_flags, args[0], args[1], args[2]); + emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]); break; case OPCODE_TRUNC: emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]); @@ -1960,11 +1907,14 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_LG2: emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); break; + case OPCODE_CMP: + emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]); + break; case OPCODE_MIN: - unalias2(c, emit_min, dst, dst_flags, args[0], args[1]); + emit_min(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_MAX: - unalias2(c, emit_max, dst, dst_flags, args[0], args[1]); + emit_max(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_DDX: case OPCODE_DDY: @@ -2103,11 +2053,13 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) /* patch all the BREAK/CONT instructions from last BGNLOOP */ while (inst0 > loop_inst[loop_depth]) { inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK) { + if (inst0->header.opcode == BRW_OPCODE_BREAK && + inst0->bits3.if_else.jump_count == 0) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); inst0->bits3.if_else.pop_count = 0; } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0); inst0->bits3.if_else.pop_count = 0; } @@ -2115,10 +2067,14 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) } break; default: - printf("unsupported IR in fragment shader %d\n", - inst->Opcode); + printf("unsupported opcode %d (%s) in fragment shader\n", + inst->Opcode, inst->Opcode < MAX_OPCODE ? + _mesa_opcode_string(inst->Opcode) : "unknown"); } + /* Release temporaries containing any unaliased source regs. */ + release_tmps( c, mark ); + if (inst->CondUpdate) brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); else diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index c232cd2791..d7650af3d9 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -89,7 +89,6 @@ struct wm_sampler_key { float max_aniso; GLenum minfilter, magfilter; GLenum comparemode, comparefunc; - dri_bo *sdc_bo; /** If target is cubemap, take context setting. */ @@ -230,7 +229,7 @@ brw_wm_sampler_populate_key(struct brw_context *brw, GLcontext *ctx = &brw->intel.ctx; int unit; - memset(key, 0, sizeof(*key)); + key->sampler_count = 0; for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { if (ctx->Texture.Unit[unit]._ReallyEnabled) { @@ -241,6 +240,8 @@ brw_wm_sampler_populate_key(struct brw_context *brw, struct gl_texture_image *firstImage = texObj->Image[0][intelObj->firstLevel]; + memset(entry, 0, sizeof(*entry)); + entry->tex_target = texObj->Target; entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP) @@ -289,7 +290,7 @@ static void upload_wm_samplers( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; struct wm_sampler_key key; - int i; + int i, sampler_key_size; brw_wm_sampler_populate_key(brw, &key); @@ -303,8 +304,11 @@ static void upload_wm_samplers( struct brw_context *brw ) if (brw->wm.sampler_count == 0) return; + /* Only include the populated portion of the key in the search. */ + sampler_key_size = offsetof(struct wm_sampler_key, + sampler[key.sampler_count]); brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER, - &key, sizeof(key), + &key, sampler_key_size, brw->wm.sdc_bo, key.sampler_count, NULL); @@ -324,7 +328,7 @@ static void upload_wm_samplers( struct brw_context *brw ) } brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER, - &key, sizeof(key), + &key, sampler_key_size, brw->wm.sdc_bo, key.sampler_count, &sampler, sizeof(sampler)); diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 1db438ae7b..ce0bf0b97d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -207,33 +207,14 @@ brw_create_texture_surface( struct brw_context *brw, surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; surf.ss0.surface_type = translate_tex_target(key->target); - if (key->bo) { - surf.ss0.surface_format = translate_tex_format(key->format, - key->internal_format, - key->depthmode); - } - else { - switch (key->depth) { - case 32: - surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - break; - default: - case 24: - surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8X8_UNORM; - break; - case 16: - surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; - break; - } - } + surf.ss0.surface_format = translate_tex_format(key->format, + key->internal_format, + key->depthmode); /* This is ok for all textures with channel width 8bit or less: */ /* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ - if (key->bo) - surf.ss1.base_addr = key->bo->offset; /* reloc */ - else - surf.ss1.base_addr = key->offset; + surf.ss1.base_addr = key->bo->offset; /* reloc */ surf.ss2.mip_count = key->last_level - key->first_level; surf.ss2.width = key->width - 1; @@ -255,17 +236,14 @@ brw_create_texture_surface( struct brw_context *brw, bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), - &key->bo, key->bo ? 1 : 0, + &key->bo, 1, &surf, sizeof(surf)); - if (key->bo) { - /* Emit relocation to surface contents */ - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_SAMPLER, 0, - 0, - offsetof(struct brw_surface_state, ss1), - key->bo); - } + /* Emit relocation to surface contents */ + drm_intel_bo_emit_reloc(bo, offsetof(struct brw_surface_state, ss1), + key->bo, 0, + I915_GEM_DOMAIN_SAMPLER, 0); + return bo; } @@ -281,19 +259,12 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) memset(&key, 0, sizeof(key)); - if (intelObj->imageOverride) { - key.pitch = intelObj->pitchOverride / intelObj->mt->cpp; - key.depth = intelObj->depthOverride; - key.bo = NULL; - key.offset = intelObj->textureOffset; - } else { - key.format = firstImage->TexFormat; - key.internal_format = firstImage->InternalFormat; - key.pitch = intelObj->mt->pitch; - key.depth = firstImage->Depth; - key.bo = intelObj->mt->region->buffer; - key.offset = 0; - } + key.format = firstImage->TexFormat; + key.internal_format = firstImage->InternalFormat; + key.pitch = intelObj->mt->pitch; + key.depth = firstImage->Depth; + key.bo = intelObj->mt->region->buffer; + key.offset = 0; key.target = tObj->Target; key.depthmode = tObj->DepthMode; @@ -308,7 +279,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, BRW_SS_SURFACE, &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, + &key.bo, 1, NULL); if (brw->wm.surf_bo[surf] == NULL) { brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key); @@ -336,10 +307,7 @@ brw_create_constant_surface( struct brw_context *brw, surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; assert(key->bo); - if (key->bo) - surf.ss1.base_addr = key->bo->offset; /* reloc */ - else - surf.ss1.base_addr = key->offset; + surf.ss1.base_addr = key->bo->offset; /* reloc */ surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */ surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */ @@ -349,20 +317,16 @@ brw_create_constant_surface( struct brw_context *brw, bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), - &key->bo, key->bo ? 1 : 0, + &key->bo, 1, &surf, sizeof(surf)); - if (key->bo) { - /* Emit relocation to surface contents. Section 5.1.1 of the gen4 - * bspec ("Data Cache") says that the data cache does not exist as - * a separate cache and is just the sampler cache. - */ - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_SAMPLER, 0, - 0, - offsetof(struct brw_surface_state, ss1), - key->bo); - } + /* Emit relocation to surface contents. Section 5.1.1 of the gen4 + * bspec ("Data Cache") says that the data cache does not exist as + * a separate cache and is just the sampler cache. + */ + drm_intel_bo_emit_reloc(bo, offsetof(struct brw_surface_state, ss1), + key->bo, 0, + I915_GEM_DOMAIN_SAMPLER, 0); return bo; } @@ -420,7 +384,7 @@ brw_update_wm_constant_surface( GLcontext *ctx, /* If there's no constant buffer, then no surface BO is needed to point at * it. */ - if (fp->const_buffer == 0) { + if (fp->const_buffer == NULL) { drm_intel_bo_unreference(brw->wm.surf_bo[surf]); brw->wm.surf_bo[surf] = NULL; return; @@ -448,7 +412,7 @@ brw_update_wm_constant_surface( GLcontext *ctx, brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, BRW_SS_SURFACE, &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, + &key.bo, 1, NULL); if (brw->wm.surf_bo[surf] == NULL) { brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key); @@ -509,7 +473,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, struct gl_renderbuffer *rb, unsigned int unit) { - struct intel_context *intel = &brw->intel;; + struct intel_context *intel = &brw->intel; GLcontext *ctx = &intel->ctx; dri_bo *region_bo = NULL; struct intel_renderbuffer *irb = intel_renderbuffer(rb); @@ -576,18 +540,21 @@ brw_update_renderbuffer_surface(struct brw_context *brw, key.draw_x = 0; key.draw_y = 0; } - /* _NEW_COLOR */ - memcpy(key.color_mask, ctx->Color.ColorMask[unit], - sizeof(key.color_mask)); - /* As mentioned above, disable writes to the alpha component when the - * renderbuffer is XRGB. - */ - if (ctx->DrawBuffer->Visual.alphaBits == 0) - key.color_mask[3] = GL_FALSE; + if (intel->gen < 6) { + /* _NEW_COLOR */ + memcpy(key.color_mask, ctx->Color.ColorMask[unit], + sizeof(key.color_mask)); - key.color_blend = (!ctx->Color._LogicOpEnabled && - (ctx->Color.BlendEnabled & (1 << unit))); + /* As mentioned above, disable writes to the alpha component when the + * renderbuffer is XRGB. + */ + if (ctx->DrawBuffer->Visual.alphaBits == 0) + key.color_mask[3] = GL_FALSE; + + key.color_blend = (!ctx->Color._LogicOpEnabled && + (ctx->Color.BlendEnabled & (1 << unit))); + } dri_bo_unreference(brw->wm.surf_bo[unit]); brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache, @@ -639,12 +606,14 @@ brw_update_renderbuffer_surface(struct brw_context *brw, brw_set_surface_tiling(&surf, key.tiling); surf.ss3.pitch = (key.pitch * key.cpp) - 1; - /* _NEW_COLOR */ - surf.ss0.color_blend = key.color_blend; - surf.ss0.writedisable_red = !key.color_mask[0]; - surf.ss0.writedisable_green = !key.color_mask[1]; - surf.ss0.writedisable_blue = !key.color_mask[2]; - surf.ss0.writedisable_alpha = !key.color_mask[3]; + if (intel->gen < 6) { + /* _NEW_COLOR */ + surf.ss0.color_blend = key.color_blend; + surf.ss0.writedisable_red = !key.color_mask[0]; + surf.ss0.writedisable_green = !key.color_mask[1]; + surf.ss0.writedisable_blue = !key.color_mask[2]; + surf.ss0.writedisable_alpha = !key.color_mask[3]; + } /* Key size will never match key size for textures, so we're safe. */ brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache, diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c new file mode 100644 index 0000000000..f7acad6912 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_cc.c @@ -0,0 +1,296 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_util.h" +#include "intel_batchbuffer.h" +#include "main/macros.h" + +struct gen6_blend_state_key { + GLboolean color_blend, alpha_enabled; + GLboolean dither; + + GLenum logic_op; + + GLenum blend_eq_rgb, blend_eq_a; + GLenum blend_src_rgb, blend_src_a; + GLenum blend_dst_rgb, blend_dst_a; + + GLenum alpha_func; +}; + +static void +blend_state_populate_key(struct brw_context *brw, + struct gen6_blend_state_key *key) +{ + GLcontext *ctx = &brw->intel.ctx; + + memset(key, 0, sizeof(*key)); + + /* _NEW_COLOR */ + if (ctx->Color._LogicOpEnabled) + key->logic_op = ctx->Color.LogicOp; + else + key->logic_op = GL_COPY; + + /* _NEW_COLOR */ + key->color_blend = ctx->Color.BlendEnabled; + if (key->color_blend) { + key->blend_eq_rgb = ctx->Color.BlendEquationRGB; + key->blend_eq_a = ctx->Color.BlendEquationA; + key->blend_src_rgb = ctx->Color.BlendSrcRGB; + key->blend_dst_rgb = ctx->Color.BlendDstRGB; + key->blend_src_a = ctx->Color.BlendSrcA; + key->blend_dst_a = ctx->Color.BlendDstA; + } + + /* _NEW_COLOR */ + key->alpha_enabled = ctx->Color.AlphaEnabled; + if (key->alpha_enabled) { + key->alpha_func = ctx->Color.AlphaFunc; + } + + /* _NEW_COLOR */ + key->dither = ctx->Color.DitherFlag; +} + +/** + * Creates the state cache entry for the given CC unit key. + */ +static drm_intel_bo * +blend_state_create_from_key(struct brw_context *brw, + struct gen6_blend_state_key *key) +{ + struct gen6_blend_state blend; + drm_intel_bo *bo; + + memset(&blend, 0, sizeof(blend)); + + if (key->logic_op != GL_COPY) { + blend.blend1.logic_op_enable = 1; + blend.blend1.logic_op_func = intel_translate_logic_op(key->logic_op); + } else if (key->color_blend) { + GLenum eqRGB = key->blend_eq_rgb; + GLenum eqA = key->blend_eq_a; + GLenum srcRGB = key->blend_src_rgb; + GLenum dstRGB = key->blend_dst_rgb; + GLenum srcA = key->blend_src_a; + GLenum dstA = key->blend_dst_a; + + if (eqRGB == GL_MIN || eqRGB == GL_MAX) { + srcRGB = dstRGB = GL_ONE; + } + + if (eqA == GL_MIN || eqA == GL_MAX) { + srcA = dstA = GL_ONE; + } + + blend.blend0.dest_blend_factor = brw_translate_blend_factor(dstRGB); + blend.blend0.source_blend_factor = brw_translate_blend_factor(srcRGB); + blend.blend0.blend_func = brw_translate_blend_equation(eqRGB); + + blend.blend0.ia_dest_blend_factor = brw_translate_blend_factor(dstA); + blend.blend0.ia_source_blend_factor = brw_translate_blend_factor(srcA); + blend.blend0.ia_blend_func = brw_translate_blend_equation(eqA); + + blend.blend0.blend_enable = 1; + blend.blend0.ia_blend_enable = (srcA != srcRGB || + dstA != dstRGB || + eqA != eqRGB); + } + + if (key->alpha_enabled) { + blend.blend1.alpha_test_enable = 1; + blend.blend1.alpha_test_func = intel_translate_compare_func(key->alpha_func); + + } + + if (key->dither) { + blend.blend1.dither_enable = 1; + blend.blend1.y_dither_offset = 0; + blend.blend1.x_dither_offset = 0; + } + + bo = brw_upload_cache(&brw->cache, BRW_BLEND_STATE, + key, sizeof(*key), + NULL, 0, + &blend, sizeof(blend)); + + return bo; +} + +static void +prepare_blend_state(struct brw_context *brw) +{ + struct gen6_blend_state_key key; + + blend_state_populate_key(brw, &key); + + drm_intel_bo_unreference(brw->cc.blend_state_bo); + brw->cc.blend_state_bo = brw_search_cache(&brw->cache, BRW_BLEND_STATE, + &key, sizeof(key), + NULL, 0, + NULL); + + if (brw->cc.blend_state_bo == NULL) + brw->cc.blend_state_bo = blend_state_create_from_key(brw, &key); +} + +const struct brw_tracked_state gen6_blend_state = { + .dirty = { + .mesa = _NEW_COLOR, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_blend_state, +}; + +struct gen6_color_calc_state_key { + GLubyte blend_constant_color[4]; + GLclampf alpha_ref; + GLubyte stencil_ref[2]; +}; + +static void +color_calc_state_populate_key(struct brw_context *brw, + struct gen6_color_calc_state_key *key) +{ + GLcontext *ctx = &brw->intel.ctx; + + memset(key, 0, sizeof(*key)); + + /* _NEW_STENCIL */ + if (ctx->Stencil._Enabled) { + const unsigned back = ctx->Stencil._BackFace; + + key->stencil_ref[0] = ctx->Stencil.Ref[0]; + if (ctx->Stencil._TestTwoSide) + key->stencil_ref[1] = ctx->Stencil.Ref[back]; + } + + /* _NEW_COLOR */ + if (ctx->Color.AlphaEnabled) + key->alpha_ref = ctx->Color.AlphaRef; + + key->blend_constant_color[0] = ctx->Color.BlendColor[0]; + key->blend_constant_color[1] = ctx->Color.BlendColor[1]; + key->blend_constant_color[2] = ctx->Color.BlendColor[2]; + key->blend_constant_color[3] = ctx->Color.BlendColor[3]; +} + +/** + * Creates the state cache entry for the given CC state key. + */ +static drm_intel_bo * +color_calc_state_create_from_key(struct brw_context *brw, + struct gen6_color_calc_state_key *key) +{ + struct gen6_color_calc_state cc; + drm_intel_bo *bo; + + memset(&cc, 0, sizeof(cc)); + + cc.cc0.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; + UNCLAMPED_FLOAT_TO_UBYTE(cc.cc1.alpha_ref_fi.ui, key->alpha_ref); + + cc.cc0.stencil_ref = key->stencil_ref[0]; + cc.cc0.bf_stencil_ref = key->stencil_ref[1]; + + cc.constant_r = key->blend_constant_color[0]; + cc.constant_g = key->blend_constant_color[1]; + cc.constant_b = key->blend_constant_color[2]; + cc.constant_a = key->blend_constant_color[3]; + + bo = brw_upload_cache(&brw->cache, BRW_COLOR_CALC_STATE, + key, sizeof(*key), + NULL, 0, + &cc, sizeof(cc)); + + return bo; +} + +static void +prepare_color_calc_state(struct brw_context *brw) +{ + struct gen6_color_calc_state_key key; + + color_calc_state_populate_key(brw, &key); + + drm_intel_bo_unreference(brw->cc.state_bo); + brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_COLOR_CALC_STATE, + &key, sizeof(key), + NULL, 0, + NULL); + + if (brw->cc.state_bo == NULL) + brw->cc.state_bo = color_calc_state_create_from_key(brw, &key); +} + +const struct brw_tracked_state gen6_color_calc_state = { + .dirty = { + .mesa = _NEW_COLOR, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_color_calc_state, +}; + +static void upload_cc_state_pointers(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(4); + OUT_BATCH(CMD_3D_CC_STATE_POINTERS << 16 | (4 - 2)); + OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC(brw->cc.blend_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC(brw->cc.depth_stencil_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + + +static void prepare_cc_state_pointers(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->cc.state_bo); + brw_add_validated_bo(brw, brw->cc.blend_state_bo); + brw_add_validated_bo(brw, brw->cc.depth_stencil_state_bo); +} + +const struct brw_tracked_state gen6_cc_state_pointers = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = (CACHE_NEW_BLEND_STATE | + CACHE_NEW_COLOR_CALC_STATE | + CACHE_NEW_DEPTH_STENCIL_STATE) + }, + .prepare = prepare_cc_state_pointers, + .emit = upload_cc_state_pointers, +}; diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c new file mode 100644 index 0000000000..06f8145e32 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -0,0 +1,75 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" + +static void +upload_clip_state(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + uint32_t depth_clamp = 0; + uint32_t provoking; + + if (!ctx->Transform.DepthClamp) + depth_clamp = GEN6_CLIP_Z_TEST; + + if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) { + provoking = 0; + } else { + provoking = + (2 << GEN6_CLIP_TRI_PROVOKE_SHIFT) | + (2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) | + (1 << GEN6_CLIP_LINE_PROVOKE_SHIFT); + } + + BEGIN_BATCH(4); + OUT_BATCH(CMD_3D_CLIP_STATE << 16 | (4 - 2)); + OUT_BATCH(GEN6_CLIP_STATISTICS_ENABLE); + OUT_BATCH(GEN6_CLIP_ENABLE | + GEN6_CLIP_API_OGL | + GEN6_CLIP_MODE_REJECT_ALL | /* XXX: debug: get VS working */ + GEN6_CLIP_XY_TEST | + depth_clamp | + provoking); + OUT_BATCH(0); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + +const struct brw_tracked_state gen6_clip_state = { + .dirty = { + .mesa = _NEW_TRANSFORM, + .brw = BRW_NEW_CONTEXT, + .cache = 0 + }, + .emit = upload_clip_state, +}; diff --git a/src/mesa/drivers/dri/i965/gen6_depthstencil.c b/src/mesa/drivers/dri/i965/gen6_depthstencil.c new file mode 100644 index 0000000000..4924f0fd55 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_depthstencil.c @@ -0,0 +1,165 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" + +struct brw_depth_stencil_state_key { + GLenum depth_func; + GLboolean depth_test, depth_write; + GLboolean stencil, stencil_two_side; + GLenum stencil_func[2], stencil_fail_op[2]; + GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2]; + GLubyte stencil_write_mask[2], stencil_test_mask[2]; +}; + +static void +depth_stencil_state_populate_key(struct brw_context *brw, + struct brw_depth_stencil_state_key *key) +{ + GLcontext *ctx = &brw->intel.ctx; + const unsigned back = ctx->Stencil._BackFace; + + memset(key, 0, sizeof(*key)); + + /* _NEW_STENCIL */ + key->stencil = ctx->Stencil._Enabled; + key->stencil_two_side = ctx->Stencil._TestTwoSide; + + if (key->stencil) { + key->stencil_func[0] = ctx->Stencil.Function[0]; + key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0]; + key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0]; + key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0]; + key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0]; + key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0]; + } + if (key->stencil_two_side) { + key->stencil_func[1] = ctx->Stencil.Function[back]; + key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back]; + key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back]; + key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back]; + key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back]; + key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back]; + } + + key->depth_test = ctx->Depth.Test; + if (key->depth_test) { + key->depth_func = ctx->Depth.Func; + key->depth_write = ctx->Depth.Mask; + } +} + +/** + * Creates the state cache entry for the given DEPTH_STENCIL_STATE state key. + */ +static dri_bo * +depth_stencil_state_create_from_key(struct brw_context *brw, + struct brw_depth_stencil_state_key *key) +{ + struct gen6_depth_stencil_state ds; + dri_bo *bo; + + memset(&ds, 0, sizeof(ds)); + + /* _NEW_STENCIL */ + if (key->stencil) { + ds.ds0.stencil_enable = 1; + ds.ds0.stencil_func = + intel_translate_compare_func(key->stencil_func[0]); + ds.ds0.stencil_fail_op = + intel_translate_stencil_op(key->stencil_fail_op[0]); + ds.ds0.stencil_pass_depth_fail_op = + intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]); + ds.ds0.stencil_pass_depth_pass_op = + intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]); + ds.ds1.stencil_write_mask = key->stencil_write_mask[0]; + ds.ds1.stencil_test_mask = key->stencil_test_mask[0]; + + if (key->stencil_two_side) { + ds.ds0.bf_stencil_enable = 1; + ds.ds0.bf_stencil_func = + intel_translate_compare_func(key->stencil_func[1]); + ds.ds0.bf_stencil_fail_op = + intel_translate_stencil_op(key->stencil_fail_op[1]); + ds.ds0.bf_stencil_pass_depth_fail_op = + intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]); + ds.ds0.bf_stencil_pass_depth_pass_op = + intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]); + ds.ds1.bf_stencil_write_mask = key->stencil_write_mask[1]; + ds.ds1.bf_stencil_test_mask = key->stencil_test_mask[1]; + } + + /* Not really sure about this: + */ + if (key->stencil_write_mask[0] || + (key->stencil_two_side && key->stencil_write_mask[1])) + ds.ds0.stencil_write_enable = 1; + } + + /* _NEW_DEPTH */ + if (key->depth_test) { + ds.ds2.depth_test_enable = 1; + ds.ds2.depth_test_func = intel_translate_compare_func(key->depth_func); + ds.ds2.depth_write_enable = key->depth_write; + } + + bo = brw_upload_cache(&brw->cache, BRW_DEPTH_STENCIL_STATE, + key, sizeof(*key), + NULL, 0, + &ds, sizeof(ds)); + + return bo; +} + +static void +prepare_depth_stencil_state(struct brw_context *brw) +{ + struct brw_depth_stencil_state_key key; + + depth_stencil_state_populate_key(brw, &key); + + dri_bo_unreference(brw->cc.depth_stencil_state_bo); + brw->cc.depth_stencil_state_bo = brw_search_cache(&brw->cache, + BRW_DEPTH_STENCIL_STATE, + &key, sizeof(key), + NULL, 0, + NULL); + + if (brw->cc.depth_stencil_state_bo == NULL) + brw->cc.depth_stencil_state_bo = + depth_stencil_state_create_from_key(brw, &key); +} + +const struct brw_tracked_state gen6_depth_stencil_state = { + .dirty = { + .mesa = _NEW_DEPTH | _NEW_STENCIL, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_depth_stencil_state, +}; diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c new file mode 100644 index 0000000000..161e7b85c2 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c @@ -0,0 +1,91 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" + +static void +upload_gs_state(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + /* Disable all the constant buffers. */ + BEGIN_BATCH(5); + OUT_BATCH(CMD_3D_CONSTANT_GS_STATE << 16 | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); + + if (brw->gs.prog_bo) { + BEGIN_BATCH(7); + OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2)); + OUT_RELOC(brw->gs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | + (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + OUT_BATCH(0); /* scratch space base offset */ + OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) | + (brw->gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT) | + (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT)); + OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) | + GEN6_GS_STATISTICS_ENABLE | + GEN6_GS_RENDERING_ENABLE); + OUT_BATCH(GEN6_GS_ENABLE); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(7); + OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2)); + OUT_BATCH(0); /* prog_bo */ + OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | + (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + OUT_BATCH(0); /* scratch space base offset */ + OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) | + (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) | + (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT)); + OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) | + GEN6_GS_STATISTICS_ENABLE | + GEN6_GS_RENDERING_ENABLE); + OUT_BATCH(0); + ADVANCE_BATCH(); + } +} + +const struct brw_tracked_state gen6_gs_state = { + .dirty = { + .mesa = _NEW_TRANSFORM, + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE | + BRW_NEW_CONTEXT), + .cache = CACHE_NEW_GS_PROG + }, + .emit = upload_gs_state, +}; diff --git a/src/mesa/drivers/dri/i965/gen6_sampler_state.c b/src/mesa/drivers/dri/i965/gen6_sampler_state.c new file mode 100644 index 0000000000..ab8e7516d2 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_sampler_state.c @@ -0,0 +1,71 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" + +static void +upload_sampler_state_pointers(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(4); + OUT_BATCH(CMD_3D_SAMPLER_STATE_POINTERS << 16 | + VS_SAMPLER_STATE_CHANGE | + GS_SAMPLER_STATE_CHANGE | + PS_SAMPLER_STATE_CHANGE | + (4 - 2)); + OUT_BATCH(0); /* VS */ + OUT_BATCH(0); /* GS */ + if (brw->wm.sampler_bo) + OUT_RELOC(brw->wm.sampler_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + else + OUT_BATCH(0); + + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + + +static void +prepare_sampler_state_pointers(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->wm.sampler_bo); +} + +const struct brw_tracked_state gen6_sampler_state = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = CACHE_NEW_SAMPLER + }, + .prepare = prepare_sampler_state_pointers, + .emit = upload_sampler_state_pointers, +}; diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c new file mode 100644 index 0000000000..2e21e5f733 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c @@ -0,0 +1,105 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" + +static void +prepare_scissor_state(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); + struct gen6_scissor_state scissor; + + /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT */ + + /* The scissor only needs to handle the intersection of drawable and + * scissor rect. Clipping to the boundaries of static shared buffers + * for front/back/depth is covered by looping over cliprects in brw_draw.c. + * + * Note that the hardware's coordinates are inclusive, while Mesa's min is + * inclusive but max is exclusive. + */ + if (render_to_fbo) { + /* texmemory: Y=0=bottom */ + scissor.xmin = ctx->DrawBuffer->_Xmin; + scissor.xmax = ctx->DrawBuffer->_Xmax - 1; + scissor.ymin = ctx->DrawBuffer->_Ymin; + scissor.ymax = ctx->DrawBuffer->_Ymax - 1; + } + else { + /* memory: Y=0=top */ + scissor.xmin = ctx->DrawBuffer->_Xmin; + scissor.xmax = ctx->DrawBuffer->_Xmax - 1; + scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax; + scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1; + } + + drm_intel_bo_unreference(brw->sf.state_bo); + brw->sf.state_bo = brw_cache_data(&brw->cache, BRW_SF_UNIT, + &scissor, sizeof(scissor), + NULL, 0); +} + +const struct brw_tracked_state gen6_scissor_state = { + .dirty = { + .mesa = _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_scissor_state, +}; + +static void upload_scissor_state_pointers(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(2); + OUT_BATCH(CMD_3D_SCISSOR_STATE_POINTERS << 16 | (2 - 2)); + OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + + +static void prepare_scissor_state_pointers(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->sf.state_bo); +} + +const struct brw_tracked_state gen6_scissor_state_pointers = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = CACHE_NEW_SF_UNIT + }, + .prepare = prepare_scissor_state_pointers, + .emit = upload_scissor_state_pointers, +}; diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c new file mode 100644 index 0000000000..8d96b44f1d --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -0,0 +1,187 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_util.h" +#include "main/macros.h" +#include "intel_batchbuffer.h" + +static uint32_t +get_attr_override(struct brw_context *brw, int attr) +{ + uint32_t attr_override; + int attr_index = 0, i; + + /* Find the source index (0 = first attribute after the 4D position) + * for this output attribute. attr is currently a VERT_RESULT_* but should + * be FRAG_ATTRIB_*. + */ + for (i = 0; i < attr; i++) { + if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(i)) + attr_index++; + } + attr_override = attr_index; + + return attr_index; +} + +static void +upload_sf_state(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + /* CACHE_NEW_VS_PROG */ + uint32_t num_inputs = brw_count_bits(brw->vs.prog_data->outputs_written); + /* This should probably be FS inputs read */ + uint32_t num_outputs = brw_count_bits(brw->vs.prog_data->outputs_written); + uint32_t dw1, dw2, dw3, dw4; + int i; + /* _NEW_BUFFER */ + GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; + int attr = 0; + + dw1 = + num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT | + (num_inputs + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | + 3 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT; + dw2 = GEN6_SF_VIEWPORT_TRANSFORM_ENABLE | + GEN6_SF_STATISTICS_ENABLE; + dw3 = 0; + dw4 = 0; + + /* _NEW_POLYGON */ + if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo) + dw2 |= GEN6_SF_WINDING_CCW; + + /* _NEW_SCISSOR */ + if (ctx->Scissor.Enabled) + dw3 |= GEN6_SF_SCISSOR_ENABLE; + + /* _NEW_POLYGON */ + if (ctx->Polygon.CullFlag) { + switch (ctx->Polygon.CullFaceMode) { + case GL_FRONT: + dw3 |= GEN6_SF_CULL_BOTH; + break; + case GL_BACK: + dw3 |= GEN6_SF_CULL_BACK; + break; + case GL_FRONT_AND_BACK: + dw3 |= GEN6_SF_CULL_BOTH; + break; + default: + assert(0); + break; + } + } else { + dw3 |= GEN6_SF_CULL_NONE; + } + + /* _NEW_LINE */ + dw3 |= U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7) << + GEN6_SF_LINE_WIDTH_SHIFT; + if (ctx->Line.SmoothFlag) { + dw3 |= GEN6_SF_LINE_AA_ENABLE; + dw3 |= GEN6_SF_LINE_AA_MODE_TRUE; + dw3 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0; + } + + /* _NEW_POINT */ + if (ctx->Point._Attenuated) + dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH; + + dw4 |= U_FIXED(CLAMP(ctx->Point.Size, 0.125, 225.875), 3) << + GEN6_SF_POINT_WIDTH_SHIFT; + if (render_to_fbo) + dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT; + + /* _NEW_LIGHT */ + if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) { + dw4 |= + (2 << GEN6_SF_TRI_PROVOKE_SHIFT) | + (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) | + (1 << GEN6_SF_LINE_PROVOKE_SHIFT); + } else { + dw4 |= + (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT); + } + + BEGIN_BATCH(20); + OUT_BATCH(CMD_3D_SF_STATE << 16 | (20 - 2)); + OUT_BATCH(dw1); + OUT_BATCH(dw2); + OUT_BATCH(dw3); + OUT_BATCH(dw4); + OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant. copied from gen4 */ + OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */ + OUT_BATCH_F(0.0); /* XXX: global depth offset clamp */ + for (i = 0; i < 8; i++) { + uint32_t attr_overrides = 0; + + /* These should be generating FS inputs read instead of VS + * outputs written + */ + for (; attr < 64; attr++) { + if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(attr)) { + attr_overrides |= get_attr_override(brw, attr); + attr++; + break; + } + } + + for (; attr < 64; attr++) { + if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(attr)) { + attr_overrides |= get_attr_override(brw, attr) << 16; + attr++; + break; + } + } + OUT_BATCH(attr_overrides); + } + OUT_BATCH(0); /* point sprite texcoord bitmask */ + OUT_BATCH(0); /* constant interp bitmask */ + OUT_BATCH(0); /* wrapshortest enables 0-7 */ + OUT_BATCH(0); /* wrapshortest enables 8-15 */ + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + +const struct brw_tracked_state gen6_sf_state = { + .dirty = { + .mesa = (_NEW_LIGHT | + _NEW_POLYGON | + _NEW_LINE | + _NEW_SCISSOR | + _NEW_BUFFERS), + .brw = BRW_NEW_CONTEXT, + .cache = CACHE_NEW_VS_PROG + }, + .emit = upload_sf_state, +}; diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c new file mode 100644 index 0000000000..5445e4035a --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_urb.c @@ -0,0 +1,83 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "main/macros.h" +#include "intel_batchbuffer.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +static void +prepare_urb( struct brw_context *brw ) +{ + brw->urb.nr_vs_entries = 24; + if (brw->gs.prog_bo) + brw->urb.nr_gs_entries = 4; + else + brw->urb.nr_gs_entries = 0; + /* CACHE_NEW_VS_PROG */ + brw->urb.vs_size = MIN2(brw->vs.prog_data->urb_entry_size, 1); + + /* Check that the number of URB rows (8 floats each) allocated is less + * than the URB space. + */ + assert((brw->urb.nr_vs_entries + + brw->urb.nr_gs_entries) * brw->urb.vs_size * 8 < 64 * 1024); +} + +static void +upload_urb(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + assert(brw->urb.nr_vs_entries % 4 == 0); + assert(brw->urb.nr_gs_entries % 4 == 0); + /* GS requirement */ + assert(!brw->gs.prog_bo || brw->urb.vs_size < 5); + + intel_batchbuffer_emit_mi_flush(intel->batch); + + BEGIN_BATCH(3); + OUT_BATCH(CMD_URB << 16 | (3 - 2)); + OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) | + ((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT)); + OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) | + ((brw->urb.nr_gs_entries) << GEN6_URB_GS_ENTRIES_SHIFT)); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + +const struct brw_tracked_state gen6_urb = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_CONTEXT, + .cache = CACHE_NEW_VS_PROG, + }, + .prepare = prepare_urb, + .emit = upload_urb, +}; diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c new file mode 100644 index 0000000000..0c2aa4206c --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c @@ -0,0 +1,173 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" +#include "main/macros.h" + +/* The clip VP defines the guardband region where expensive clipping is skipped + * and fragments are allowed to be generated and clipped out cheaply by the SF. + * + * By setting it to NDC bounds of [-1,1], we don't do GB clipping. It's + * supposed to cause seams to become visible in apps due to shared edges taking + * different clip/no clip paths depending on whether the rest of the prim ends + * up in the guardband or not. + */ +static void +prepare_clip_vp(struct brw_context *brw) +{ + struct brw_clipper_viewport vp; + + vp.xmin = -1.0; + vp.xmax = 1.0; + vp.ymin = -1.0; + vp.ymax = 1.0; + + drm_intel_bo_unreference(brw->clip.vp_bo); + brw->clip.vp_bo = brw_cache_data(&brw->cache, BRW_CLIP_VP, + &vp, sizeof(vp), + NULL, 0); +} + +const struct brw_tracked_state gen6_clip_vp = { + .dirty = { + .mesa = _NEW_VIEWPORT, /* XXX: not really, but we need nonzero */ + .brw = 0, + .cache = 0, + }, + .prepare = prepare_clip_vp, +}; + +static void +prepare_sf_vp(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; + struct brw_sf_viewport sfv; + GLfloat y_scale, y_bias; + const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); + const GLfloat *v = ctx->Viewport._WindowMap.m; + + memset(&sfv, 0, sizeof(sfv)); + + /* _NEW_BUFFERS */ + if (render_to_fbo) { + y_scale = 1.0; + y_bias = 0; + } else { + y_scale = -1.0; + y_bias = ctx->DrawBuffer->Height; + } + + /* _NEW_VIEWPORT */ + sfv.viewport.m00 = v[MAT_SX]; + sfv.viewport.m11 = v[MAT_SY] * y_scale; + sfv.viewport.m22 = v[MAT_SZ] * depth_scale; + sfv.viewport.m30 = v[MAT_TX]; + sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias; + sfv.viewport.m32 = v[MAT_TZ] * depth_scale; + + drm_intel_bo_unreference(brw->sf.vp_bo); + brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP, + &sfv, sizeof(sfv), + NULL, 0); +} + +const struct brw_tracked_state gen6_sf_vp = { + .dirty = { + .mesa = _NEW_VIEWPORT | _NEW_BUFFERS, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_sf_vp, +}; + +static void +prepare_cc_vp(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + struct brw_cc_viewport ccv; + + /* _NEW_TRANSOFORM */ + if (ctx->Transform.DepthClamp) { + /* _NEW_VIEWPORT */ + ccv.min_depth = MIN2(ctx->Viewport.Near, ctx->Viewport.Far); + ccv.max_depth = MAX2(ctx->Viewport.Near, ctx->Viewport.Far); + } else { + ccv.min_depth = 0.0; + ccv.max_depth = 1.0; + } + + drm_intel_bo_unreference(brw->cc.vp_bo); + brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv), + NULL, 0); +} + +const struct brw_tracked_state gen6_cc_vp = { + .dirty = { + .mesa = _NEW_VIEWPORT | _NEW_TRANSFORM, + .brw = 0, + .cache = 0, + }, + .prepare = prepare_cc_vp, +}; + +static void prepare_viewport_state_pointers(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->sf.state_bo); +} + +static void upload_viewport_state_pointers(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(4); + OUT_BATCH(CMD_VIEWPORT_STATE_POINTERS << 16 | (4 - 2) | + GEN6_CC_VIEWPORT_MODIFY | + GEN6_SF_VIEWPORT_MODIFY | + GEN6_CLIP_VIEWPORT_MODIFY); + OUT_RELOC(brw->clip.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(brw->sf.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(brw->cc.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + +const struct brw_tracked_state gen6_viewport_state = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = (CACHE_NEW_CLIP_VP | + CACHE_NEW_SF_VP | + CACHE_NEW_CC_VP) + }, + .prepare = prepare_viewport_state_pointers, + .emit = upload_viewport_state_pointers, +}; diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c new file mode 100644 index 0000000000..fe597dfb94 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -0,0 +1,119 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_util.h" +#include "shader/prog_parameter.h" +#include "shader/prog_statevars.h" +#include "intel_batchbuffer.h" + +static void +upload_vs_state(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + const struct brw_vertex_program *vp = + brw_vertex_program_const(brw->vertex_program); + unsigned int nr_params = vp->program.Base.Parameters->NumParameters; + drm_intel_bo *constant_bo; + int i; + + if (vp->use_const_buffer || nr_params == 0) { + /* Disable the push constant buffers. */ + BEGIN_BATCH(5); + OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } else { + if (brw->vertex_program->IsNVProgram) + _mesa_load_tracked_matrices(ctx); + + /* Updates the ParamaterValues[i] pointers for all parameters of the + * basic type of PROGRAM_STATE_VAR. + */ + _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); + + constant_bo = drm_intel_bo_alloc(intel->bufmgr, "VS constant_bo", + nr_params * 4 * sizeof(float), + 4096); + drm_intel_gem_bo_map_gtt(constant_bo); + for (i = 0; i < nr_params; i++) { + memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float), + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + } + drm_intel_gem_bo_unmap_gtt(constant_bo); + + BEGIN_BATCH(5); + OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | + GEN6_CONSTANT_BUFFER_0_ENABLE | + (5 - 2)); + OUT_RELOC(constant_bo, + I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ + ALIGN(nr_params, 2) / 2 - 1); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + drm_intel_bo_unreference(constant_bo); + } + + intel_batchbuffer_emit_mi_flush(intel->batch); + + BEGIN_BATCH(6); + OUT_BATCH(CMD_3D_VS_STATE << 16 | (6 - 2)); + OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | + (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + OUT_BATCH(0); /* scratch space base offset */ + OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | + (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | + (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); + OUT_BATCH((0 << GEN6_VS_MAX_THREADS_SHIFT) | + GEN6_VS_STATISTICS_ENABLE); + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + +const struct brw_tracked_state gen6_vs_state = { + .dirty = { + .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS, + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_NR_VS_SURFACES | + BRW_NEW_URB_FENCE | + BRW_NEW_CONTEXT), + .cache = CACHE_NEW_VS_PROG + }, + .emit = upload_vs_state, +}; diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c new file mode 100644 index 0000000000..1eb17ca627 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -0,0 +1,160 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_util.h" +#include "shader/prog_parameter.h" +#include "shader/prog_statevars.h" +#include "intel_batchbuffer.h" + +static void +upload_wm_state(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + const struct brw_fragment_program *fp = + brw_fragment_program_const(brw->fragment_program); + unsigned int nr_params = fp->program.Base.Parameters->NumParameters; + drm_intel_bo *constant_bo; + int i; + uint32_t dw2, dw4, dw5, dw6; + + if (fp->use_const_buffer || nr_params == 0) { + /* Disable the push constant buffers. */ + BEGIN_BATCH(5); + OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } else { + /* Updates the ParamaterValues[i] pointers for all parameters of the + * basic type of PROGRAM_STATE_VAR. + */ + _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); + + constant_bo = drm_intel_bo_alloc(intel->bufmgr, "WM constant_bo", + nr_params * 4 * sizeof(float), + 4096); + drm_intel_gem_bo_map_gtt(constant_bo); + for (i = 0; i < nr_params; i++) { + memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float), + fp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + } + drm_intel_gem_bo_unmap_gtt(constant_bo); + + BEGIN_BATCH(5); + OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 | + GEN6_CONSTANT_BUFFER_0_ENABLE | + (5 - 2)); + OUT_RELOC(constant_bo, + I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ + ALIGN(nr_params, 2) / 2 - 1); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + drm_intel_bo_unreference(constant_bo); + } + + intel_batchbuffer_emit_mi_flush(intel->batch); + + dw2 = dw4 = dw5 = dw6 = 0; + dw4 |= GEN6_WM_STATISTICS_ENABLE; + dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0; + dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5; + + /* BRW_NEW_NR_SURFACES */ + dw2 |= brw->wm.nr_surfaces << GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT; + + /* CACHE_NEW_SAMPLER */ + dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT; + dw4 |= (1 << GEN6_WM_DISPATCH_START_GRF_SHIFT_0); + + dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT; + dw5 |= GEN6_WM_DISPATCH_ENABLE; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + if (fp->isGLSL) + dw5 |= GEN6_WM_8_DISPATCH_ENABLE; + else + dw5 |= GEN6_WM_16_DISPATCH_ENABLE; + + /* _NEW_LINE */ + if (ctx->Line.StippleFlag) + dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE; + + /* _NEW_POLYGONSTIPPLE */ + if (ctx->Polygon.StippleFlag) + dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + if (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) + dw5 |= GEN6_WM_USES_SOURCE_DEPTH | GEN6_WM_USES_SOURCE_W; + if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) + dw5 |= GEN6_WM_COMPUTED_DEPTH; + + /* _NEW_COLOR */ + if (fp->program.UsesKill || ctx->Color.AlphaEnabled) + dw5 |= GEN6_WM_KILL_ENABLE; + + /* This should probably be FS inputs read */ + dw6 |= brw_count_bits(brw->vs.prog_data->outputs_written) << + GEN6_WM_NUM_SF_OUTPUTS_SHIFT; + + BEGIN_BATCH(9); + OUT_BATCH(CMD_3D_WM_STATE << 16 | (9 - 2)); + OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH(dw2); + OUT_BATCH(0); /* scratch space base offset */ + OUT_BATCH(dw4); + OUT_BATCH(dw5); + OUT_BATCH(dw6); + OUT_BATCH(0); /* kernel 1 pointer */ + OUT_BATCH(0); /* kernel 2 pointer */ + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel->batch); +} + +const struct brw_tracked_state gen6_wm_state = { + .dirty = { + .mesa = _NEW_LINE | _NEW_POLYGONSTIPPLE | _NEW_COLOR, + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_NR_WM_SURFACES | + BRW_NEW_URB_FENCE | + BRW_NEW_BATCH), + .cache = CACHE_NEW_SAMPLER + }, + .emit = upload_wm_state, +}; diff --git a/src/mesa/drivers/dri/i965/server/intel_dri.c b/src/mesa/drivers/dri/i965/server/intel_dri.c deleted file mode 120000 index effdd26448..0000000000 --- a/src/mesa/drivers/dri/i965/server/intel_dri.c +++ /dev/null @@ -1 +0,0 @@ -../../intel/server/intel_dri.c
\ No newline at end of file |