diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.c | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_defines.h | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_disasm.c | 14 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu.h | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu_emit.c | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_optimize.c | 115 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_tex_layout.c | 23 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vs_emit.c | 43 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_emit.c | 171 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_glsl.c | 81 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen6_clip_state.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen6_gs_state.c | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen6_vs_state.c | 3 | ||||
l--------- | src/mesa/drivers/dri/i965/server/intel_dri.c | 1 |
18 files changed, 358 insertions, 122 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index f98a1a27db..842d4b7aa1 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -54,6 +54,7 @@ DRIVER_SOURCES = \ brw_gs_emit.c \ brw_gs_state.c \ brw_misc_state.c \ + brw_optimize.c \ brw_program.c \ brw_queryobj.c \ brw_sf.c \ @@ -99,7 +100,6 @@ DRIVER_SOURCES = \ C_SOURCES = \ $(COMMON_SOURCES) \ - $(MINIGLX_SOURCES) \ $(DRIVER_SOURCES) ASM_SOURCES = diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index a512896f31..241193c357 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -156,6 +156,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, brw->has_surface_tile_offset = GL_TRUE; brw->has_compr4 = GL_TRUE; brw->has_aa_line_parameters = GL_TRUE; + brw->has_pln = GL_TRUE; } else { brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_965; brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index d6fc37e4d8..2855c93ea6 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -446,6 +446,7 @@ struct brw_context GLboolean has_compr4; GLboolean has_negative_rhw_bug; GLboolean has_aa_line_parameters; + GLboolean has_pln; ; struct { struct brw_state_flags dirty; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index bb1b5f5ef0..6fe574b22e 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -550,6 +550,7 @@ #define BRW_OPCODE_DP2 87 #define BRW_OPCODE_DPA2 88 #define BRW_OPCODE_LINE 89 +#define BRW_OPCODE_PLN 90 #define BRW_OPCODE_NOP 126 #define BRW_PREDICATE_NONE 0 @@ -830,8 +831,8 @@ #define CMD_URB 0x7805 /* GEN6+ */ # define GEN6_URB_VS_SIZE_SHIFT 16 # define GEN6_URB_VS_ENTRIES_SHIFT 0 -# define GEN6_URB_GS_SIZE_SHIFT 8 -# define GEN6_URB_GS_ENTRIES_SHIFT 0 +# define GEN6_URB_GS_ENTRIES_SHIFT 8 +# define GEN6_URB_GS_SIZE_SHIFT 0 #define CMD_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */ # define GEN6_CC_VIEWPORT_MODIFY (1 << 12) diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index a8f6b993ac..db3fc50a63 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -50,12 +50,14 @@ struct { [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 }, @@ -73,10 +75,10 @@ struct { [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 }, [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, - [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 }, - [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 }, [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, - [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 }, [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, @@ -796,7 +798,11 @@ int brw_disasm (FILE *file, struct brw_instruction *inst) err |= control (file, "saturate", saturate, inst->header.saturate, NULL); err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL); - if (inst->header.opcode != BRW_OPCODE_SEND) + if (inst->header.opcode == BRW_OPCODE_MATH) { + string (file, " "); + err |= control (file, "function", math_function, + inst->header.destreg__conditionalmod, NULL); + } else if (inst->header.opcode != BRW_OPCODE_SEND) err |= control (file, "conditional modifier", conditional_modifier, inst->header.destreg__conditionalmod, NULL); diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 39eb88d7c2..4f55158e8f 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -795,6 +795,7 @@ ALU2(DPH) ALU2(DP3) ALU2(DP2) ALU2(LINE) +ALU2(PLN) #undef ALU1 #undef ALU2 @@ -965,4 +966,9 @@ void brw_math_invert( struct brw_compile *p, void brw_set_src1( struct brw_instruction *insn, struct brw_reg reg ); + + +/* brw_optimize.c */ +void brw_optimize(struct brw_compile *p); + #endif diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 82f2fdab2f..c33c3def30 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -573,7 +573,7 @@ ALU2(DPH) ALU2(DP3) ALU2(DP2) ALU2(LINE) - +ALU2(PLN) @@ -1416,7 +1416,10 @@ void brw_urb_WRITE(struct brw_compile *p, * and the first message register index comes from src0. */ if (intel->gen >= 6) { + brw_push_insn_state(p); + brw_set_mask_control( p, BRW_MASK_DISABLE ); brw_MOV(p, brw_message_reg(msg_reg_nr), src0); + brw_pop_insn_state(p); src0 = brw_message_reg(msg_reg_nr); } diff --git a/src/mesa/drivers/dri/i965/brw_optimize.c b/src/mesa/drivers/dri/i965/brw_optimize.c new file mode 100644 index 0000000000..57df9ea115 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_optimize.c @@ -0,0 +1,115 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "main/macros.h" +#include "shader/program.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + +static GLboolean +is_single_channel_dp4(struct brw_instruction *insn) +{ + if (insn->header.opcode != BRW_OPCODE_DP4 || + insn->header.execution_size != BRW_EXECUTE_8 || + insn->header.access_mode != BRW_ALIGN_16 || + insn->bits1.da1.dest_reg_file != BRW_GENERAL_REGISTER_FILE) + return GL_FALSE; + + if (!is_power_of_two(insn->bits1.da16.dest_writemask)) + return GL_FALSE; + + return GL_TRUE; +} + +/** + * Sets the dependency control fields on DP4 instructions. + * + * The hardware only tracks dependencies on a register basis, so when + * you do: + * + * DP4 dst.x src1 src2 + * DP4 dst.y src1 src3 + * DP4 dst.z src1 src4 + * DP4 dst.w src1 src5 + * + * It will wait to do the DP4 dst.y until the dst.x is resolved, etc. + * We can examine our instruction stream and set the dependency + * control fields to tell the hardware when to do it. + * + * We may want to extend this to other instructions that are used to + * fill in a channel at a time of the destination register. + */ +static void +brw_set_dp4_dependency_control(struct brw_compile *p) +{ + int i; + + for (i = 1; i < p->nr_insn; i++) { + struct brw_instruction *insn = &p->store[i]; + struct brw_instruction *prev = &p->store[i - 1]; + + if (!is_single_channel_dp4(prev)) + continue; + + if (!is_single_channel_dp4(insn)) { + i++; + continue; + } + + /* Only avoid hw dep control if the write masks are different + * channels of one reg. + */ + if (insn->bits1.da16.dest_writemask == prev->bits1.da16.dest_writemask) + continue; + if (insn->bits1.da16.dest_reg_nr != prev->bits1.da16.dest_reg_nr) + continue; + + /* Check if the second instruction depends on the previous one + * for a src. + */ + if (insn->bits1.da1.src0_reg_file == BRW_GENERAL_REGISTER_FILE && + (insn->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT || + insn->bits2.da1.src0_reg_nr == insn->bits1.da16.dest_reg_nr)) + continue; + if (insn->bits1.da1.src1_reg_file == BRW_GENERAL_REGISTER_FILE && + (insn->bits3.da1.src1_address_mode != BRW_ADDRESS_DIRECT || + insn->bits3.da1.src1_reg_nr == insn->bits1.da16.dest_reg_nr)) + continue; + + prev->header.dependency_control |= BRW_DEPENDENCY_NOTCLEARED; + insn->header.dependency_control |= BRW_DEPENDENCY_NOTCHECKED; + } +} + +void +brw_optimize(struct brw_compile *p) +{ + brw_set_dp4_dependency_control(p); +} diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 09edfd81fd..b1f56a8e64 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -58,12 +58,12 @@ GLboolean brw_miptree_layout(struct intel_context *intel, GLuint qpitch = 0; GLuint y_pitch = 0; - mt->pitch = mt->width0; + mt->total_width = mt->width0; intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); y_pitch = ALIGN(height, align_h); if (mt->compressed) { - mt->pitch = ALIGN(mt->width0, align_w); + mt->total_width = ALIGN(mt->width0, align_w); } if (mt->first_level != mt->last_level) { @@ -77,13 +77,11 @@ GLboolean brw_miptree_layout(struct intel_context *intel, + minify(minify(mt->width0)); } - if (mip1_width > mt->pitch) { - mt->pitch = mip1_width; + if (mip1_width > mt->total_width) { + mt->total_width = mip1_width; } } - mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch); - if (mt->compressed) { qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4; mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6; @@ -137,10 +135,10 @@ GLboolean brw_miptree_layout(struct intel_context *intel, intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); if (mt->compressed) { - mt->pitch = ALIGN(width, align_w); + mt->total_width = ALIGN(width, align_w); pack_y_pitch = (height + 3) / 4; } else { - mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0); + mt->total_width = mt->width0; pack_y_pitch = ALIGN(mt->height0, align_h); } @@ -184,7 +182,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel, if (pack_x_pitch > 4) { pack_x_pitch >>= 1; pack_x_nr <<= 1; - assert(pack_x_pitch * pack_x_nr <= mt->pitch); + assert(pack_x_pitch * pack_x_nr <= mt->total_width); } if (pack_y_pitch > 2) { @@ -211,11 +209,8 @@ GLboolean brw_miptree_layout(struct intel_context *intel, i945_miptree_layout_2d(intel, mt, tiling); break; } - DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, - mt->pitch, - mt->total_height, - mt->cpp, - mt->pitch * mt->total_height * mt->cpp ); + DBG("%s: %dx%dx%d\n", __FUNCTION__, + mt->total_width, mt->total_height, mt->cpp); return GL_TRUE; } diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index a48804a660..227261409c 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1361,31 +1361,6 @@ static void emit_vertex_write( struct brw_vs_compile *c) } } - -/** - * Called after code generation to resolve subroutine calls and the - * END instruction. - * \param end_inst points to brw code for END instruction - * \param last_inst points to last instruction emitted before vertex write - */ -static void -post_vs_emit( struct brw_vs_compile *c, - struct brw_instruction *end_inst, - struct brw_instruction *last_inst ) -{ - GLint offset; - - brw_resolve_cals(&c->func); - - /* patch up the END code to jump past subroutines, etc */ - offset = last_inst - end_inst; - if (offset > 1) { - brw_set_src1(end_inst, brw_imm_d(offset * 16)); - } else { - end_inst->header.opcode = BRW_OPCODE_NOP; - } -} - static GLboolean accumulator_contains(struct brw_vs_compile *c, struct brw_reg val) { @@ -1466,8 +1441,6 @@ void brw_vs_emit(struct brw_vs_compile *c ) struct intel_context *intel = &brw->intel; const GLuint nr_insns = c->vp->program.Base.NumInstructions; GLuint insn, if_depth = 0, loop_depth = 0; - GLuint end_offset = 0; - struct brw_instruction *end_inst, *last_inst; struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH] = { 0 }; const struct brw_indirect stack_index = brw_indirect(0, 0); GLuint index; @@ -1751,12 +1724,8 @@ void brw_vs_emit(struct brw_vs_compile *c ) brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0)); brw_set_access_mode(p, BRW_ALIGN_16); break; - case OPCODE_END: - end_offset = p->nr_insn; - /* this instruction will get patched later to jump past subroutine - * code, etc. - */ - brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + case OPCODE_END: + emit_vertex_write(c); break; case OPCODE_PRINT: /* no-op */ @@ -1817,13 +1786,9 @@ void brw_vs_emit(struct brw_vs_compile *c ) release_tmps(c); } - end_inst = &p->store[end_offset]; - last_inst = &p->store[p->nr_insn]; - - /* The END instruction will be patched to jump to this code */ - emit_vertex_write(c); + brw_resolve_cals(p); - post_vs_emit(c, end_inst, last_inst); + brw_optimize(p); if (INTEL_DEBUG & DEBUG_VS) { int i; diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 47b764d24d..5aade1c4e6 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -286,7 +286,7 @@ void brw_wm_pass0( struct brw_wm_compile *c ); void brw_wm_pass1( struct brw_wm_compile *c ); void brw_wm_pass2( struct brw_wm_compile *c ); void brw_wm_emit( struct brw_wm_compile *c ); - +GLboolean brw_wm_arg_can_be_immediate(enum prog_opcode, int arg); void brw_wm_print_value( struct brw_wm_compile *c, struct brw_wm_value *value ); diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index c7d87b9d94..f79de5dbff 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -34,6 +34,23 @@ #include "brw_context.h" #include "brw_wm.h" +static GLboolean can_do_pln(struct intel_context *intel, + const struct brw_reg *deltas) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + if (!brw->has_pln) + return GL_FALSE; + + if (deltas[1].nr != deltas[0].nr + 1) + return GL_FALSE; + + if (intel->gen < 6 && ((deltas[0].nr & 1) != 0)) + return GL_FALSE; + + return GL_TRUE; +} + /* Not quite sure how correct this is - need to understand horiz * vs. vertical strides a little better. */ @@ -44,8 +61,52 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) return reg; } +/* Return the SrcReg index of the channels that can be immediate float operands + * instead of usage of PROGRAM_CONSTANT values through push/pull. + */ +GLboolean +brw_wm_arg_can_be_immediate(enum prog_opcode opcode, int arg) +{ + int opcode_array[] = { + [OPCODE_ADD] = 2, + [OPCODE_CMP] = 3, + [OPCODE_DP3] = 2, + [OPCODE_DP4] = 2, + [OPCODE_DPH] = 2, + [OPCODE_MAX] = 2, + [OPCODE_MIN] = 2, + [OPCODE_MOV] = 1, + [OPCODE_MUL] = 2, + [OPCODE_SEQ] = 2, + [OPCODE_SGE] = 2, + [OPCODE_SGT] = 2, + [OPCODE_SLE] = 2, + [OPCODE_SLT] = 2, + [OPCODE_SNE] = 2, + [OPCODE_XPD] = 2, + }; + + /* These opcodes get broken down in a way that allow two + * args to be immediates. + */ + if (opcode == OPCODE_MAD || opcode == OPCODE_LRP) { + if (arg == 1 || arg == 2) + return GL_TRUE; + } + + if (opcode > ARRAY_SIZE(opcode_array)) + return GL_FALSE; -/* Payload R0: + return arg == opcode_array[opcode] - 1; +} + +/** + * Computes the screen-space x,y position of the pixels. + * + * This will be used by emit_delta_xy() or emit_wpos_xy() for + * interpolation of attributes.. + * + * Payload R0: * * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles, * corresponding to each of the 16 execution channels. @@ -60,7 +121,6 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) * R1.7 -- ? * R1.8 -- ? */ - void emit_pixel_xy(struct brw_wm_compile *c, const struct brw_reg *dst, GLuint mask) @@ -100,7 +160,14 @@ void emit_pixel_xy(struct brw_wm_compile *c, brw_pop_insn_state(p); } - +/** + * Computes the screen-space x,y distance of the pixels from the start + * vertex. + * + * This will be used in linterp or pinterp with the start vertex value + * and the Cx, Cy, and C0 coefficients passed in from the setup engine + * to produce interpolated attribute values. + */ void emit_delta_xy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, @@ -108,25 +175,27 @@ void emit_delta_xy(struct brw_compile *p, { struct brw_reg r1 = brw_vec1_grf(1, 0); - /* Calc delta X,Y by subtracting origin in r1 from the pixel - * centers. - */ - if (mask & WRITEMASK_X) { - brw_ADD(p, - dst[0], - retype(arg0[0], BRW_REGISTER_TYPE_UW), - negate(r1)); - } + if (mask == 0) + return; - if (mask & WRITEMASK_Y) { - brw_ADD(p, - dst[1], - retype(arg0[1], BRW_REGISTER_TYPE_UW), - negate(suboffset(r1,1))); + assert(mask == WRITEMASK_XY); - } + /* Calc delta X,Y by subtracting origin in r1 from the pixel + * centers produced by emit_pixel_xy(). + */ + brw_ADD(p, + dst[0], + retype(arg0[0], BRW_REGISTER_TYPE_UW), + negate(r1)); + brw_ADD(p, + dst[1], + retype(arg0[1], BRW_REGISTER_TYPE_UW), + negate(suboffset(r1,1))); } +/** + * Computes the pixel offset from the window origin for gl_FragCoord(). + */ void emit_wpos_xy(struct brw_wm_compile *c, const struct brw_reg *dst, GLuint mask, @@ -134,9 +203,6 @@ void emit_wpos_xy(struct brw_wm_compile *c, { struct brw_compile *p = &c->func; - /* Calculate the pixel offset from window bottom left into destination - * X and Y channels. - */ if (mask & WRITEMASK_X) { if (c->fp->program.PixelCenterInteger) { /* X' = X */ @@ -186,6 +252,7 @@ void emit_pixel_w(struct brw_wm_compile *c, const struct brw_reg *deltas) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; /* Don't need this if all you are doing is interpolating color, for * instance. @@ -196,8 +263,12 @@ void emit_pixel_w(struct brw_wm_compile *c, /* Calc 1/w - just linterp wpos[3] optimized by putting the * result straight into a message reg. */ - brw_LINE(p, brw_null_reg(), interp3, deltas[0]); - brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]); + if (can_do_pln(intel, deltas)) { + brw_PLN(p, brw_message_reg(2), interp3, deltas[0]); + } else { + brw_LINE(p, brw_null_reg(), interp3, deltas[0]); + brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]); + } /* Calc w */ if (c->dispatch_width == 16) { @@ -224,6 +295,7 @@ void emit_linterp(struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *deltas) { + struct intel_context *intel = &p->brw->intel; struct brw_reg interp[4]; GLuint nr = arg0[0].nr; GLuint i; @@ -235,8 +307,12 @@ void emit_linterp(struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); - brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); + if (can_do_pln(intel, deltas)) { + brw_PLN(p, dst[i], interp[i], deltas[0]); + } else { + brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); + brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); + } } } } @@ -249,6 +325,7 @@ void emit_pinterp(struct brw_compile *p, const struct brw_reg *deltas, const struct brw_reg *w) { + struct intel_context *intel = &p->brw->intel; struct brw_reg interp[4]; GLuint nr = arg0[0].nr; GLuint i; @@ -260,8 +337,12 @@ void emit_pinterp(struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); - brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); + if (can_do_pln(intel, deltas)) { + brw_PLN(p, dst[i], interp[i], deltas[0]); + } else { + brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); + brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); + } } } for (i = 0; i < 4; i++) { @@ -577,14 +658,10 @@ void emit_cmp(struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MOV(p, dst[i], arg2[i]); - brw_set_saturate(p, 0); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0)); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MOV(p, dst[i], arg1[i]); + brw_SEL(p, dst[i], arg1[i], arg2[i]); brw_set_saturate(p, 0); brw_set_predicate_control_flag_value(p, 0xff); } @@ -601,14 +678,10 @@ void emit_max(struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MOV(p, dst[i], arg0[i]); - brw_set_saturate(p, 0); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], arg1[i]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MOV(p, dst[i], arg1[i]); + brw_SEL(p, dst[i], arg0[i], arg1[i]); brw_set_saturate(p, 0); brw_set_predicate_control_flag_value(p, 0xff); } @@ -625,14 +698,10 @@ void emit_min(struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MOV(p, dst[i], arg1[i]); - brw_set_saturate(p, 0); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MOV(p, dst[i], arg0[i]); + brw_SEL(p, dst[i], arg0[i], arg1[i]); brw_set_saturate(p, 0); brw_set_predicate_control_flag_value(p, 0xff); } @@ -1086,11 +1155,19 @@ static void emit_kil( struct brw_wm_compile *c, { struct brw_compile *p = &c->func; struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); - GLuint i; - - /* XXX - usually won't need 4 compares! - */ + GLuint i, j; + for (i = 0; i < 4; i++) { + /* Check if we've already done the comparison for this reg + * -- common when someone does KIL TEMP.wwww. + */ + for (j = 0; j < i; j++) { + if (memcmp(&arg0[j], &arg0[i], sizeof(arg0[0])) == 0) + break; + } + if (j != i) + continue; + brw_push_insn_state(p); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0)); brw_set_predicate_control_flag_value(p, 0xff); diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 315b030484..3b7e421b16 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -23,6 +23,9 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) { int i; + if (INTEL_DEBUG & DEBUG_GLSL_FORCE) + return GL_TRUE; + for (i = 0; i < fp->Base.NumInstructions; i++) { const struct prog_instruction *inst = &fp->Base.Instructions[i]; switch (inst->Opcode) { @@ -289,6 +292,7 @@ reclaim_temps(struct brw_wm_compile *c) */ static void prealloc_reg(struct brw_wm_compile *c) { + struct intel_context *intel = &c->func.brw->intel; int i, j; struct brw_reg reg; int urb_read_length = 0; @@ -413,6 +417,43 @@ static void prealloc_reg(struct brw_wm_compile *c) } } + for (i = 0; i < c->nr_fp_insns; i++) { + const struct prog_instruction *inst = &c->prog_instructions[i]; + + switch (inst->Opcode) { + case WM_DELTAXY: + /* Allocate WM_DELTAXY destination on G45/GM45 to an + * even-numbered GRF if possible so that we can use the PLN + * instruction. + */ + if (inst->DstReg.WriteMask == WRITEMASK_XY && + !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][0].inited && + !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][1].inited && + (IS_G4X(intel->intelScreen->deviceID) || intel->gen == 5)) { + int grf; + + for (grf = c->first_free_grf & ~1; + grf < BRW_WM_MAX_GRF; + grf += 2) + { + if (!c->used_grf[grf] && !c->used_grf[grf + 1]) { + c->used_grf[grf] = GL_TRUE; + c->used_grf[grf + 1] = GL_TRUE; + c->first_free_grf = grf + 2; /* a guess */ + + set_reg(c, inst->DstReg.File, inst->DstReg.Index, 0, + brw_vec8_grf(grf, 0)); + set_reg(c, inst->DstReg.File, inst->DstReg.Index, 1, + brw_vec8_grf(grf + 1, 0)); + break; + } + } + } + default: + break; + } + } + /* An instruction may reference up to three constants. * They'll be found in these registers. * XXX alloc these on demand! @@ -529,12 +570,35 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, const GLuint nr = 1; const GLuint component = GET_SWZ(src->Swizzle, channel); - /* Extended swizzle terms */ - if (component == SWIZZLE_ZERO) { - return brw_imm_f(0.0F); - } - else if (component == SWIZZLE_ONE) { - return brw_imm_f(1.0F); + /* Only one immediate value can be used per native opcode, and it + * has be in the src1 slot, so not all Mesa instructions will get + * to take advantage of immediate constants. + */ + if (brw_wm_arg_can_be_immediate(inst->Opcode, srcRegIndex)) { + const struct gl_program_parameter_list *params; + + params = c->fp->program.Base.Parameters; + + /* Extended swizzle terms */ + if (component == SWIZZLE_ZERO) { + return brw_imm_f(0.0F); + } else if (component == SWIZZLE_ONE) { + if (src->Negate) + return brw_imm_f(-1.0F); + else + return brw_imm_f(1.0F); + } + + if (src->File == PROGRAM_CONSTANT) { + float f = params->ParameterValues[src->Index][component]; + + if (src->Abs) + f = fabs(f); + if (src->Negate) + f = -f; + + return brw_imm_f(f); + } } if (c->fp->use_const_buffer && @@ -2029,8 +2093,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) } break; default: - printf("unsupported IR in fragment shader %d\n", - inst->Opcode); + printf("unsupported opcode %d (%s) in fragment shader\n", + inst->Opcode, inst->Opcode < MAX_OPCODE ? + _mesa_opcode_string(inst->Opcode) : "unknown"); } /* Release temporaries containing any unaliased source regs. */ diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index ce0bf0b97d..6b9e566886 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -261,7 +261,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.format = firstImage->TexFormat; key.internal_format = firstImage->InternalFormat; - key.pitch = intelObj->mt->pitch; + key.pitch = intelObj->mt->region->pitch; key.depth = firstImage->Depth; key.bo = intelObj->mt->region->buffer; key.offset = 0; diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index 06f8145e32..acc4b7f101 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -55,7 +55,7 @@ upload_clip_state(struct brw_context *brw) OUT_BATCH(GEN6_CLIP_STATISTICS_ENABLE); OUT_BATCH(GEN6_CLIP_ENABLE | GEN6_CLIP_API_OGL | - GEN6_CLIP_MODE_REJECT_ALL | /* XXX: debug: get VS working */ + GEN6_CLIP_MODE_NORMAL | GEN6_CLIP_XY_TEST | depth_clamp | provoking); diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c index 161e7b85c2..cefc93ba48 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c @@ -50,7 +50,8 @@ upload_gs_state(struct brw_context *brw) BEGIN_BATCH(7); OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2)); OUT_RELOC(brw->gs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | + OUT_BATCH(GEN6_GS_SPF_MODE | + (0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); OUT_BATCH(0); /* scratch space base offset */ OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) | diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index fe597dfb94..5916a13994 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -100,7 +100,8 @@ upload_vs_state(struct brw_context *brw) (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); OUT_BATCH((0 << GEN6_VS_MAX_THREADS_SHIFT) | - GEN6_VS_STATISTICS_ENABLE); + GEN6_VS_STATISTICS_ENABLE | + GEN6_VS_ENABLE); ADVANCE_BATCH(); intel_batchbuffer_emit_mi_flush(intel->batch); diff --git a/src/mesa/drivers/dri/i965/server/intel_dri.c b/src/mesa/drivers/dri/i965/server/intel_dri.c deleted file mode 120000 index effdd26448..0000000000 --- a/src/mesa/drivers/dri/i965/server/intel_dri.c +++ /dev/null @@ -1 +0,0 @@ -../../intel/server/intel_dri.c
\ No newline at end of file |