#include "util/u_math.h" #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" static struct brw_reg get_dst_reg(struct brw_wm_compile *c, const struct brw_fp_instruction *inst, GLuint component); static void reclaim_temps(struct brw_wm_compile *c); /** Mark GRF register as used. */ static void prealloc_grf(struct brw_wm_compile *c, int r) { c->used_grf[r] = GL_TRUE; } /** Mark given GRF register as not in use. */ static void release_grf(struct brw_wm_compile *c, int r) { /*assert(c->used_grf[r]);*/ c->used_grf[r] = GL_FALSE; c->first_free_grf = MIN2(c->first_free_grf, r); } /** Return index of a free GRF, mark it as used. */ static int alloc_grf(struct brw_wm_compile *c) { GLuint r; for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { if (!c->used_grf[r]) { c->used_grf[r] = GL_TRUE; c->first_free_grf = r + 1; /* a guess */ return r; } } /* no free temps, try to reclaim some */ reclaim_temps(c); c->first_free_grf = 0; /* try alloc again */ for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { if (!c->used_grf[r]) { c->used_grf[r] = GL_TRUE; c->first_free_grf = r + 1; /* a guess */ return r; } } for (r = 0; r < BRW_WM_MAX_GRF; r++) { assert(c->used_grf[r]); } /* really, no free GRF regs found */ if (!c->out_of_regs) { /* print warning once per compilation */ debug_printf("%s: ran out of registers for fragment program", __FUNCTION__); c->out_of_regs = GL_TRUE; } return -1; } /** Return number of GRF registers used */ static int num_grf_used(const struct brw_wm_compile *c) { int r; for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--) if (c->used_grf[r]) return r + 1; return 0; } /** * Record the mapping of a Mesa register to a hardware register. */ static void set_reg(struct brw_wm_compile *c, int file, int index, int component, struct brw_reg reg) { c->wm_regs[file][index][component].reg = reg; c->wm_regs[file][index][component].inited = GL_TRUE; } static struct brw_reg alloc_tmp(struct brw_wm_compile *c) { struct brw_reg reg; /* if we need to allocate another temp, grow the tmp_regs[] array */ if (c->tmp_index == c->tmp_max) { int r = alloc_grf(c); if (r < 0) { /*printf("Out of temps in %s\n", __FUNCTION__);*/ r = 50; /* XXX random register! */ } c->tmp_regs[ c->tmp_max++ ] = r; } /* form the GRF register */ reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0); /*printf("alloc_temp %d\n", reg.nr);*/ assert(reg.nr < BRW_WM_MAX_GRF); return reg; } /** * Save current temp register info. * There must be a matching call to release_tmps(). */ static int mark_tmps(struct brw_wm_compile *c) { return c->tmp_index; } static struct brw_reg lookup_tmp( struct brw_wm_compile *c, int index ) { return brw_vec8_grf( c->tmp_regs[ index ], 0 ); } static void release_tmps(struct brw_wm_compile *c, int mark) { c->tmp_index = mark; } /** * Convert Mesa src register to brw register. * * Since we're running in SOA mode each Mesa register corresponds to four * hardware registers. We allocate the hardware registers as needed here. * * \param file register file, one of PROGRAM_x * \param index register number * \param component src component (X=0, Y=1, Z=2, W=3) * \param nr not used?!? * \param neg negate value? * \param abs take absolute value? */ static struct brw_reg get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GLuint neg, GLuint abs) { struct brw_reg reg; switch (file) { case TGSI_FILE_NULL: return brw_null_reg(); case TGSI_FILE_CONSTANT: case TGSI_FILE_TEMPORARY: case TGSI_FILE_INPUT: case TGSI_FILE_OUTPUT: case BRW_FILE_PAYLOAD: break; default: debug_printf("%s: Unexpected file type\n", __FUNCTION__); return brw_null_reg(); } assert(index < 256); assert(component < 4); /* see if we've already allocated a HW register for this Mesa register */ if (c->wm_regs[file][index][component].inited) { /* yes, re-use */ reg = c->wm_regs[file][index][component].reg; } else { /* no, allocate new register */ int grf = alloc_grf(c); /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/ if (grf < 0) { /* totally out of temps */ grf = 51; /* XXX random register! */ } reg = brw_vec8_grf(grf, 0); /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/ set_reg(c, file, index, component, reg); } if (neg & (1 << component)) { reg = negate(reg); } if (abs) reg = brw_abs(reg); return reg; } /** * Find first/last instruction that references each temporary register. */ GLboolean _mesa_find_temp_intervals(const struct prog_instruction *instructions, GLuint numInstructions, GLint intBegin[MAX_PROGRAM_TEMPS], GLint intEnd[MAX_PROGRAM_TEMPS]) { struct loop_info { GLuint Start, End; /**< Start, end instructions of loop */ }; struct loop_info loopStack[MAX_LOOP_NESTING]; GLuint loopStackDepth = 0; GLuint i; for (i = 0; i < MAX_PROGRAM_TEMPS; i++){ intBegin[i] = intEnd[i] = -1; } /* Scan instructions looking for temporary registers */ for (i = 0; i < numInstructions; i++) { const struct prog_instruction *inst = instructions + i; if (inst->Opcode == OPCODE_BGNLOOP) { loopStack[loopStackDepth].Start = i; loopStack[loopStackDepth].End = inst->BranchTarget; loopStackDepth++; } else if (inst->Opcode == OPCODE_ENDLOOP) { loopStackDepth--; } else if (inst->Opcode == OPCODE_CAL) { return GL_FALSE; } else { const GLuint numSrc = 3; GLuint j; for (j = 0; j < numSrc; j++) { if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { const GLuint index = inst->SrcReg[j].Index; if (inst->SrcReg[j].RelAddr) return GL_FALSE; update_interval(intBegin, intEnd, index, i); if (loopStackDepth > 0) { /* extend temp register's interval to end of loop */ GLuint loopEnd = loopStack[loopStackDepth - 1].End; update_interval(intBegin, intEnd, index, loopEnd); } } } if (inst->DstReg.File == PROGRAM_TEMPORARY) { const GLuint index = inst->DstReg.Index; if (inst->DstReg.RelAddr) return GL_FALSE; update_interval(intBegin, intEnd, index, i); if (loopStackDepth > 0) { /* extend temp register's interval to end of loop */ GLuint loopEnd = loopStack[loopStackDepth - 1].End; update_interval(intBegin, intEnd, index, loopEnd); } } } } return GL_TRUE; } /** * This is called if we run out of GRF registers. Examine the live intervals * of temp regs in the program and free those which won't be used again. */ static void reclaim_temps(struct brw_wm_compile *c) { GLint intBegin[BRW_WM_MAX_TEMPS]; GLint intEnd[BRW_WM_MAX_TEMPS]; int index; /*printf("Reclaim temps:\n");*/ _mesa_find_temp_intervals(c->fp_instructions, c->nr_fp_insns, intBegin, intEnd); for (index = 0; index < BRW_WM_MAX_TEMPS; index++) { if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) { /* program temp[i] can be freed */ int component; /*printf(" temp[%d] is dead\n", index);*/ for (component = 0; component < 4; component++) { if (c->wm_regs[TGSI_FILE_TEMPORARY][index][component].inited) { int r = c->wm_regs[TGSI_FILE_TEMPORARY][index][component].reg.nr; release_grf(c, r); /* printf(" Reclaim temp %d, reg %d at inst %d\n", index, r, c->cur_inst); */ c->wm_regs[TGSI_FILE_TEMPORARY][index][component].inited = GL_FALSE; } } } } } /** * Preallocate registers. This sets up the Mesa to hardware register * mapping for certain registers, such as constants (uniforms/state vars) * and shader inputs. */ static void prealloc_reg(struct brw_wm_compile *c) { int i, j; struct brw_reg reg; int urb_read_length = 0; GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted; GLuint reg_index = 0; memset(c->used_grf, GL_FALSE, sizeof(c->used_grf)); c->first_free_grf = 0; for (i = 0; i < 4; i++) { if (i < c->key.nr_depth_regs) reg = brw_vec8_grf(i * 2, 0); else reg = brw_vec8_grf(0, 0); set_reg(c, TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH, i, reg); } reg_index += 2 * c->key.nr_depth_regs; /* constants */ { const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters; const GLuint nr_temps = c->fp->program.Base.NumTemporaries; /* use a real constant buffer, or just use a section of the GRF? */ /* XXX this heuristic may need adjustment... */ if ((nr_params + nr_temps) * 4 + reg_index > 80) c->fp->use_const_buffer = GL_TRUE; else c->fp->use_const_buffer = GL_FALSE; /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/ if (c->fp->use_const_buffer) { /* We'll use a real constant buffer and fetch constants from * it with a dataport read message. */ /* number of float constants in CURBE */ c->prog_data.nr_params = 0; } else { const struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters; int index = 0; /* number of float constants in CURBE */ c->prog_data.nr_params = 4 * nr_params; /* loop over program constants (float[4]) */ for (i = 0; i < nr_params; i++) { /* loop over XYZW channels */ for (j = 0; j < 4; j++, index++) { reg = brw_vec1_grf(reg_index + index / 8, index % 8); /* Save pointer to parameter/constant value. * Constants will be copied in prepare_constant_buffer() */ c->prog_data.param[index] = &plist->ParameterValues[i][j]; set_reg(c, TGSI_FILE_STATE_VAR, i, j, reg); } } /* number of constant regs used (each reg is float[8]) */ c->nr_creg = 2 * ((4 * nr_params + 15) / 16); reg_index += c->nr_creg; } } /* fragment shader inputs */ for (i = 0; i < VERT_RESULT_MAX; i++) { int fp_input; if (i >= VERT_RESULT_VAR0) fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0; else if (i <= VERT_RESULT_TEX7) fp_input = i; else fp_input = -1; if (fp_input >= 0 && inputs & (1 << fp_input)) { urb_read_length = reg_index; reg = brw_vec8_grf(reg_index, 0); for (j = 0; j < 4; j++) set_reg(c, TGSI_FILE_PAYLOAD, fp_input, j, reg); } if (c->key.nr_vp_outputs > i) { reg_index += 2; } } c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; c->prog_data.urb_read_length = urb_read_length; c->prog_data.curb_read_length = c->nr_creg; c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); reg_index++; c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); reg_index += 2; /* mark GRF regs [0..reg_index-1] as in-use */ for (i = 0; i < reg_index; i++) prealloc_grf(c, i); /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */ prealloc_grf(c, 126); prealloc_grf(c, 127); for (i = 0; i < c->nr_fp_insns; i++) { const struct brw_fp_instruction *inst = &c->fp_instructions[i]; struct brw_reg dst[4]; switch (inst->Opcode) { case OPCODE_TEX: case OPCODE_TXB: /* Allocate the channels of texture results contiguously, * since they are written out that way by the sampler unit. */ for (j = 0; j < 4; j++) { dst[j] = get_dst_reg(c, inst, j); if (j != 0) assert(dst[j].nr == dst[j - 1].nr + 1); } break; default: break; } } /* An instruction may reference up to three constants. * They'll be found in these registers. * XXX alloc these on demand! */ if (c->fp->use_const_buffer) { for (i = 0; i < 3; i++) { c->current_const[i].index = -1; c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0); } } #if 0 printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer); printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index); #endif } /** * Check if any of the instruction's src registers are constants, uniforms, * or statevars. If so, fetch any constants that we don't already have in * the three GRF slots. */ static void fetch_constants(struct brw_wm_compile *c, const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; GLuint i; /* loop over instruction src regs */ for (i = 0; i < 3; i++) { const struct prog_src_register *src = &inst->SrcReg[i]; if (src->File == TGSI_FILE_IMMEDIATE || src->File == TGSI_FILE_CONSTANT) { c->current_const[i].index = src->Index; #if 0 printf(" fetch const[%d] for arg %d into reg %d\n", src->Index, i, c->current_const[i].reg.nr); #endif /* need to fetch the constant now */ brw_dp_READ_4(p, c->current_const[i].reg, /* writeback dest */ src->RelAddr, /* relative indexing? */ 16 * src->Index, /* byte offset */ SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */ ); } } } /** * Convert Mesa dst register to brw register. */ static struct brw_reg get_dst_reg(struct brw_wm_compile *c, const struct brw_fp_instruction *inst, GLuint component) { const int nr = 1; return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr, 0, 0); } static struct brw_reg get_src_reg_const(struct brw_wm_compile *c, const struct brw_fp_instruction *inst, GLuint srcRegIndex, GLuint component) { /* We should have already fetched the constant from the constant * buffer in fetch_constants(). Now we just have to return a * register description that extracts the needed component and * smears it across all eight vector components. */ const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; struct brw_reg const_reg; assert(component < 4); assert(srcRegIndex < 3); assert(c->current_const[srcRegIndex].index != -1); const_reg = c->current_const[srcRegIndex].reg; /* extract desired float from the const_reg, and smear */ const_reg = stride(const_reg, 0, 1, 0); const_reg.subnr = component * 4; if (src->Negate) const_reg = negate(const_reg); if (src->Abs) const_reg = brw_abs(const_reg); #if 0 printf(" form const[%d].%d for arg %d, reg %d\n", c->current_const[srcRegIndex].index, component, srcRegIndex, const_reg.nr); #endif return const_reg; } /** * Convert Mesa src register to brw register. */ static struct brw_reg get_src_reg(struct brw_wm_compile *c, const struct brw_fp_instruction *inst, GLuint srcRegIndex, GLuint channel) { const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; const GLuint nr = 1; const GLuint component = BRW_GET_SWZ(src->Swizzle, channel); /* Extended swizzle terms */ if (component == SWIZZLE_ZERO) { return brw_imm_f(0.0F); } else if (component == SWIZZLE_ONE) { return brw_imm_f(1.0F); } if (c->fp->use_const_buffer && (src->File == TGSI_FILE_STATE_VAR || src->File == TGSI_FILE_CONSTANT || src->File == TGSI_FILE_UNIFORM)) { return get_src_reg_const(c, inst, srcRegIndex, component); } else { /* other type of source register */ return get_reg(c, src->File, src->Index, component, nr, src->Negate, src->Abs); } } /** * Same as \sa get_src_reg() but if the register is a immediate, emit * a brw_reg encoding the immediate. * Note that a brw instruction only allows one src operand to be a immediate. * For instructions with more than one operand, only the second can be a * immediate. This means that we treat some immediates as constants * (which why TGSI_FILE_IMMEDIATE is checked in fetch_constants()). * */ static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c, const struct brw_fp_instruction *inst, GLuint srcRegIndex, GLuint channel) { const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; if (src->File == TGSI_FILE_IMMEDIATE) { /* an immediate */ const int component = BRW_GET_SWZ(src->Swizzle, channel); const GLfloat *param = c->fp->program.Base.Parameters->ParameterValues[src->Index]; GLfloat value = param[component]; if (src->Negate) value = -value; if (src->Abs) value = FABSF(value); #if 0 printf(" form immed value %f for chan %d\n", value, channel); #endif return brw_imm_f(value); } else { return get_src_reg(c, inst, srcRegIndex, channel); } } /** * Subroutines are minimal support for resusable instruction sequences. * They are implemented as simply as possible to minimise overhead: there * is no explicit support for communication between the caller and callee * other than saving the return address in a temporary register, nor is * there any automatic local storage. This implies that great care is * required before attempting reentrancy or any kind of nested * subroutine invocations. */ static void invoke_subroutine( struct brw_wm_compile *c, enum _subroutine subroutine, void (*emit)( struct brw_wm_compile * ) ) { struct brw_compile *p = &c->func; assert( subroutine < BRW_WM_MAX_SUBROUTINE ); if( c->subroutines[ subroutine ] ) { /* subroutine previously emitted: reuse existing instructions */ int mark = mark_tmps( c ); struct brw_reg return_address = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD ); int here = p->nr_insn; brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 2 << 4 ) ); brw_ADD( p, brw_ip_reg(), brw_ip_reg(), brw_imm_d( ( c->subroutines[ subroutine ] - here - 1 ) << 4 ) ); brw_pop_insn_state(p); release_tmps( c, mark ); } else { /* previously unused subroutine: emit, and mark for later reuse */ int mark = mark_tmps( c ); struct brw_reg return_address = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD ); struct brw_instruction *calc; int base = p->nr_insn; brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); calc = brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 0 ) ); brw_pop_insn_state(p); c->subroutines[ subroutine ] = p->nr_insn; emit( c ); brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_MOV( p, brw_ip_reg(), return_address ); brw_pop_insn_state(p); brw_set_src1( calc, brw_imm_ud( ( p->nr_insn - base ) << 4 ) ); release_tmps( c, mark ); } } static void emit_trunc( struct brw_wm_compile *c, const struct brw_fp_instruction *inst) { int i; struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); for (i = 0; i < 4; i++) { if (mask & (1<func; GLuint mask = inst->DstReg.WriteMask; brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); for (i = 0; i < 4; i++) { if (mask & (1<func; GLuint mask = inst->DstReg.WriteMask; dst0 = get_dst_reg(c, inst, 0); dst1 = get_dst_reg(c, inst, 1); /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ if (mask & WRITEMASK_X) { brw_ADD(p, vec8(retype(dst0, BRW_REGISTER_TYPE_UW)), stride(suboffset(r1_uw, 4), 2, 4, 0), brw_imm_v(0x10101010)); } if (mask & WRITEMASK_Y) { brw_ADD(p, vec8(retype(dst1, BRW_REGISTER_TYPE_UW)), stride(suboffset(r1_uw, 5), 2, 4, 0), brw_imm_v(0x11001100)); } } static void emit_delta_xy(struct brw_wm_compile *c, const struct brw_fp_instruction *inst) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg dst0, dst1, src0, src1; struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; dst0 = get_dst_reg(c, inst, 0); dst1 = get_dst_reg(c, inst, 1); src0 = get_src_reg(c, inst, 0, 0); src1 = get_src_reg(c, inst, 0, 1); /* Calc delta X,Y by subtracting origin in r1 from the pixel * centers. */ if (mask & WRITEMASK_X) { brw_ADD(p, dst0, retype(src0, BRW_REGISTER_TYPE_UW), negate(r1)); } if (mask & WRITEMASK_Y) { brw_ADD(p, dst1, retype(src1, BRW_REGISTER_TYPE_UW), negate(suboffset(r1,1))); } } static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, GLuint nr, GLuint target, GLuint eot) { struct brw_compile *p = &c->func; /* Pass through control information: */ /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ brw_MOV(p, brw_message_reg(base_reg + 1), brw_vec8_grf(1, 0)); brw_pop_insn_state(p); } /* Send framebuffer write message: */ brw_fb_WRITE(p, retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), base_reg, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), target, nr, 0, eot); } static void emit_fb_write(struct brw_wm_compile *c, const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; int nr = 2; int channel; GLuint target, eot; struct brw_reg src0; /* Reserve a space for AA - may not be needed: */ if (c->key.aa_dest_stencil_reg) nr += 1; brw_push_insn_state(p); for (channel = 0; channel < 4; channel++) { src0 = get_src_reg(c, inst, 0, channel); /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ brw_MOV(p, brw_message_reg(nr + channel), src0); } /* skip over the regs populated above: */ nr += 8; brw_pop_insn_state(p); if (c->key.source_depth_to_render_target) { if (c->key.computes_depth) { src0 = get_src_reg(c, inst, 2, 2); brw_MOV(p, brw_message_reg(nr), src0); } else { src0 = get_src_reg(c, inst, 1, 1); brw_MOV(p, brw_message_reg(nr), src0); } nr += 2; } if (c->key.dest_depth_reg) { const GLuint comp = c->key.dest_depth_reg / 2; const GLuint off = c->key.dest_depth_reg % 2; if (off != 0) { /* XXX this code needs review/testing */ struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp); struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1); brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1)); /* 2nd half? */ brw_MOV(p, brw_message_reg(nr+1), arg1_1); brw_pop_insn_state(p); } else { struct brw_reg src = get_src_reg(c, inst, 1, 1); brw_MOV(p, brw_message_reg(nr), src); } nr += 2; } target = inst->Aux >> 1; eot = inst->Aux & 1; fire_fb_write(c, 0, nr, target, eot); } static void emit_pixel_w( struct brw_wm_compile *c, const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; if (mask & WRITEMASK_W) { struct brw_reg dst, src0, delta0, delta1; struct brw_reg interp3; dst = get_dst_reg(c, inst, 3); src0 = get_src_reg(c, inst, 0, 0); delta0 = get_src_reg(c, inst, 1, 0); delta1 = get_src_reg(c, inst, 1, 1); interp3 = brw_vec1_grf(src0.nr+1, 4); /* Calc 1/w - just linterp wpos[3] optimized by putting the * result straight into a message reg. */ brw_LINE(p, brw_null_reg(), interp3, delta0); brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1); /* Calc w */ brw_math_16( p, dst, BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, 2, brw_null_reg(), BRW_MATH_PRECISION_FULL); } } static void emit_linterp(struct brw_wm_compile *c, const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; struct brw_reg interp[4]; struct brw_reg dst, delta0, delta1; struct brw_reg src0; GLuint nr, i; src0 = get_src_reg(c, inst, 0, 0); delta0 = get_src_reg(c, inst, 1, 0); delta1 = get_src_reg(c, inst, 1, 1); nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); interp[2] = brw_vec1_grf(nr+1, 0); interp[3] = brw_vec1_grf(nr+1, 4); for(i = 0; i < 4; i++ ) { if (mask & (1<func; GLuint mask = inst->DstReg.WriteMask; struct brw_reg interp[4]; struct brw_reg dst, src0; GLuint nr, i; src0 = get_src_reg(c, inst, 0, 0); nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); interp[2] = brw_vec1_grf(nr+1, 0); interp[3] = brw_vec1_grf(nr+1, 4); for(i = 0; i < 4; i++ ) { if (mask & (1<func; GLuint mask = inst->DstReg.WriteMask; struct brw_reg interp[4]; struct brw_reg dst, delta0, delta1; struct brw_reg src0, w; GLuint nr, i; src0 = get_src_reg(c, inst, 0, 0); delta0 = get_src_reg(c, inst, 1, 0); delta1 = get_src_reg(c, inst, 1, 1); w = get_src_reg(c, inst, 2, 3); nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); interp[2] = brw_vec1_grf(nr+1, 0); interp[3] = brw_vec1_grf(nr+1, 4); for(i = 0; i < 4; i++ ) { if (mask & (1<func; struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); struct brw_reg dst; GLuint mask = inst->DstReg.WriteMask; int i; for (i = 0; i < 4; i++) { if (mask & (1<func; GLuint mask = inst->DstReg.WriteMask; for (i = 0; i < 4; i++) { GLuint i2 = (i+2)%3; GLuint i1 = (i+1)%3; if (mask & (1<SaturateMode != SATURATE_OFF); brw_MAC(p, dst, src0, src1); brw_set_saturate(p, 0); } } brw_set_saturate(p, 0); } static void emit_dp3(struct brw_wm_compile *c, const struct brw_fp_instruction *inst) { struct brw_reg src0[3], src1[3], dst; int i; struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; if (!(mask & WRITEMASK_XYZW)) return; assert(is_power_of_two(mask & WRITEMASK_XYZW)); for (i = 0; i < 3; i++) { src0[i] = get_src_reg(c, inst, 0, i); src1[i] = get_src_reg_imm(c, inst, 1, i); } dst = get_dst_reg(c, inst, dst_chan); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_MAC(p, dst, src0[2], src1[2]); brw_set_saturate(p, 0); } static void emit_dp4(struct brw_wm_compile *c, const struct brw_fp_instruction *inst) { struct brw_reg src0[4], src1[4], dst; int i; struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; if (!(mask & WRITEMASK_XYZW)) return; assert(is_power_of_two(mask & WRITEMASK_XYZW)); for (i = 0; i < 4; i++) { src0[i] = get_src_reg(c, inst, 0, i); src1[i] = get_src_reg_imm(c, inst, 1, i); } dst = get_dst_reg(c, inst, dst_chan); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_MAC(p, brw_null_reg(), src0[2], src1[2]); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_MAC(p, dst, src0[3], src1[3]); brw_set_saturate(p, 0); } static void emit_dph(struct brw_wm_compile *c, const struct brw_fp_instruction *inst) { struct brw_reg src0[4], src1[4], dst; int i; struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; if (!(mask & WRITEMASK_XYZW)) return; assert(is_power_of_two(mask & WRITEMASK_XYZW)); for (i = 0; i < 4; i++) { src0[i] = get_src_reg(c, inst, 0, i); src1[i] = get_src_reg_imm(c, inst, 1, i); } dst = get_dst_reg(c, inst, dst_chan); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_MAC(p, dst, src0[2], src1[2]); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_ADD(p, dst, dst, src1[3]); brw_set_saturate(p, 0); } /** * Emit a scalar instruction, like RCP, RSQ, LOG, EXP. * Note that the result of the function is smeared across the dest * register's X, Y, Z and W channels (subject to writemasking of course). */ static void emit_math1(struct brw_wm_compile *c, const struct brw_fp_instruction *inst, GLuint func) { struct brw_compile *p = &c->func; struct brw_reg src0, dst; GLuint mask = inst->DstReg.WriteMask; int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; if (!(mask & WRITEMASK_XYZW)) return; assert(is_power_of_two(mask & WRITEMASK_XYZW)); /* Get first component of source register */ dst = get_dst_reg(c, inst, dst_chan); src0 = get_src_reg(c, inst, 0, 0); brw_MOV(p, brw_message_reg(2), src0); brw_math(p, dst, func, (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 2, brw_null_reg(), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); } static void emit_rcp(struct brw_wm_compile *c, const struct brw_fp_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_INV); } static void emit_rsq(struct brw_wm_compile *c, const struct brw_fp_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); } static void emit_sin(struct brw_wm_compile *c, const struct brw_fp_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); } static void emit_cos(struct brw_wm_compile *c, const struct brw_fp_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_COS); } static void emit_ex2(struct brw_wm_compile *c, const struct brw_fp_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); } static void emit_lg2(struct brw_wm_compile *c, const struct brw_fp_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); } static void emit_add(struct brw_wm_compile *c, const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, dst; GLuint mask = inst->DstReg.WriteMask; int i; brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<func; struct brw_reg src0, addr_reg; brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ADDRESS, 0); src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */ brw_MOV(p, addr_reg, src0); brw_set_saturate(p, 0); } static void emit_mul(struct brw_wm_compile *c, const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, dst; GLuint mask = inst->DstReg.WriteMask; int i; brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<func; struct brw_reg src0, dst; GLuint mask = inst->DstReg.WriteMask; int i; brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<SaturateMode != SATURATE_OFF) brw_set_saturate(p, 0); } static void emit_flr(struct brw_wm_compile *c, const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, dst; GLuint mask = inst->DstReg.WriteMask; int i; brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<func; const GLuint mask = inst->DstReg.WriteMask; const int mark = mark_tmps(c); int i; brw_push_insn_state(p); for (i = 0; i < 4; i++) { if (mask & (1<SaturateMode != SATURATE_OFF) ? 1 : 0); brw_MOV(p, dst, src0); brw_set_saturate(p, 0); if (inst->Opcode == OPCODE_MIN) brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); else brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, src1, src0); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); brw_MOV(p, dst, src1); brw_set_saturate(p, 0); brw_set_predicate_control_flag_value(p, 0xff); if (use_temp) brw_MOV(p, real_dst, dst); } } brw_pop_insn_state(p); release_tmps(c, mark); } static void emit_pow(struct brw_wm_compile *c, const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst, src0, src1; GLuint mask = inst->DstReg.WriteMask; int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; if (!(mask & WRITEMASK_XYZW)) return; assert(is_power_of_two(mask & WRITEMASK_XYZW)); dst = get_dst_reg(c, inst, dst_chan); src0 = get_src_reg_imm(c, inst, 0, 0); src1 = get_src_reg_imm(c, inst, 1, 0); brw_MOV(p, brw_message_reg(2), src0); brw_MOV(p, brw_message_reg(3), src1); brw_math(p, dst, BRW_MATH_FUNCTION_POW, (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 2, brw_null_reg(), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); } static void emit_lrp(struct brw_wm_compile *c, const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; struct brw_reg dst, tmp1, tmp2, src0, src1, src2; int i; int mark = mark_tmps(c); for (i = 0; i < 4; i++) { if (mask & (1<SaturateMode != SATURATE_OFF) ? 1 : 0); brw_MAC(p, dst, src0, tmp1); brw_set_saturate(p, 0); } release_tmps(c, mark); } } /** * For GLSL shaders, this KIL will be unconditional. * It may be contained inside an IF/ENDIF structure of course. */ static void emit_kil(struct brw_wm_compile *c) { struct brw_compile *p = &c->func; struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK brw_AND(p, depth, c->emit_mask_reg, depth); brw_pop_insn_state(p); } static void emit_mad(struct brw_wm_compile *c, const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; struct brw_reg dst, src0, src1, src2; int i; for (i = 0; i < 4; i++) { if (mask & (1<SaturateMode != SATURATE_OFF) ? 1 : 0); brw_ADD(p, dst, dst, src2); brw_set_saturate(p, 0); } } } static void emit_sop(struct brw_wm_compile *c, const struct brw_fp_instruction *inst, GLuint cond) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; struct brw_reg dst, src0, src1; int i; for (i = 0; i < 4; i++) { if (mask & (1<func; GLuint mask = inst->DstReg.WriteMask; struct brw_reg src0[2], dst[2]; dst[0] = get_dst_reg(c, inst, 0); dst[1] = get_dst_reg(c, inst, 1); src0[0] = get_src_reg(c, inst, 0, 0); src0[1] = get_src_reg(c, inst, 0, 1); /* Calculate the pixel offset from window bottom left into destination * X and Y channels. */ if (mask & WRITEMASK_X) { /* X' = X */ brw_MOV(p, dst[0], retype(src0[0], BRW_REGISTER_TYPE_W)); } if (mask & WRITEMASK_Y) { /* Y' = height - 1 - Y */ brw_ADD(p, dst[1], negate(retype(src0[1], BRW_REGISTER_TYPE_W)), brw_imm_d(c->key.drawable_height - 1)); } } /* TODO BIAS on SIMD8 not working yet... */ static void emit_txb(struct brw_wm_compile *c, const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; /* Note: tex_unit was already looked up through SamplerTextures[] */ const GLuint unit = inst->tex_unit; GLuint i; GLuint msg_type; assert(unit < BRW_MAX_TEX_UNIT); payload_reg = get_reg(c, TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); for (i = 0; i < 4; i++) dst[i] = get_dst_reg(c, inst, i); for (i = 0; i < 4; i++) src[i] = get_src_reg(c, inst, 0, i); switch (inst->tex_target) { case TEXTURE_1D_INDEX: brw_MOV(p, brw_message_reg(2), src[0]); /* s coord */ brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); /* t coord */ brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); /* r coord */ break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: brw_MOV(p, brw_message_reg(2), src[0]); brw_MOV(p, brw_message_reg(3), src[1]); brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); break; case TEXTURE_3D_INDEX: case TEXTURE_CUBE_INDEX: brw_MOV(p, brw_message_reg(2), src[0]); brw_MOV(p, brw_message_reg(3), src[1]); brw_MOV(p, brw_message_reg(4), src[2]); break; default: /* invalid target */ abort(); } brw_MOV(p, brw_message_reg(5), src[3]); /* bias */ brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */ if (p->brw->gen == 5) { msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG; } else { /* Does it work well on SIMD8? */ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; } brw_SAMPLE(p, retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ 1, /* msg_reg_nr */ retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ SURF_INDEX_TEXTURE(unit), unit, /* sampler */ inst->DstReg.WriteMask, /* writemask */ msg_type, /* msg_type */ 4, /* response_length */ 4, /* msg_length */ 0, /* eot */ 1, BRW_SAMPLER_SIMD_MODE_SIMD8); } static void emit_tex(struct brw_wm_compile *c, const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; /* Note: tex_unit was already looked up through SamplerTextures[] */ const GLuint unit = inst->tex_unit; GLuint msg_len; GLuint i, nr; GLuint emit; GLboolean shadow = (c->key.shadowtex_mask & (1<tex_target) { case TEXTURE_1D_INDEX: emit = WRITEMASK_X; nr = 1; break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: emit = WRITEMASK_XY; nr = 2; break; case TEXTURE_3D_INDEX: case TEXTURE_CUBE_INDEX: emit = WRITEMASK_XYZ; nr = 3; break; default: /* invalid target */ abort(); } msg_len = 1; /* move/load S, T, R coords */ for (i = 0; i < nr; i++) { static const GLuint swz[4] = {0,1,2,2}; if (emit & (1<brw->gen == 5) { if (shadow) msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG; else msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG; } else { /* Does it work for shadow on SIMD8 ? */ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; } brw_SAMPLE(p, retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ 1, /* msg_reg_nr */ retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ SURF_INDEX_TEXTURE(unit), unit, /* sampler */ inst->DstReg.WriteMask, /* writemask */ msg_type, /* msg_type */ 4, /* response_length */ shadow ? 6 : 4, /* msg_length */ 0, /* eot */ 1, BRW_SAMPLER_SIMD_MODE_SIMD8); if (shadow) brw_MOV(p, dst[3], brw_imm_f(1.0)); } /** * Resolve subroutine calls after code emit is done. */ static void post_wm_emit( struct brw_wm_compile *c ) { brw_resolve_cals(&c->func); } static void get_argument_regs(struct brw_wm_compile *c, const struct brw_fp_instruction *inst, int index, struct brw_reg *regs, int mask) { int i; for (i = 0; i < 4; i++) { if (mask & (1 << i)) regs[i] = get_src_reg(c, inst, index, i); } } static void brw_wm_emit_branching_shader(struct brw_context *brw, struct brw_wm_compile *c) { #define MAX_IF_DEPTH 32 #define MAX_LOOP_DEPTH 32 struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; GLuint i, if_depth = 0, loop_depth = 0; struct brw_compile *p = &c->func; struct brw_indirect stack_index = brw_indirect(0, 0); c->out_of_regs = GL_FALSE; prealloc_reg(c); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); for (i = 0; i < c->nr_fp_insns; i++) { const struct brw_fp_instruction *inst = &c->fp_instructions[i]; int dst_flags; struct brw_reg args[3][4], dst[4]; int j; c->cur_inst = i; #if 0 debug_printf("Inst %d: ", i); _mesa_print_instruction(inst); #endif /* fetch any constants that this instruction needs */ if (c->fp->use_const_buffer) fetch_constants(c, inst); if (inst->CondUpdate) brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); else brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); dst_flags = inst->DstReg.WriteMask; if (inst->SaturateMode == SATURATE_ZERO_ONE) dst_flags |= SATURATE; switch (inst->Opcode) { case WM_PIXELXY: emit_pixel_xy(c, inst); break; case WM_DELTAXY: emit_delta_xy(c, inst); break; case WM_PIXELW: emit_pixel_w(c, inst); break; case WM_LINTERP: emit_linterp(c, inst); break; case WM_PINTERP: emit_pinterp(c, inst); break; case WM_CINTERP: emit_cinterp(c, inst); break; case WM_WPOSXY: emit_wpos_xy(c, inst); break; case WM_FB_WRITE: emit_fb_write(c, inst); break; case WM_FRONTFACING: emit_frontfacing(c, inst); break; case OPCODE_ADD: emit_add(c, inst); break; case OPCODE_ARL: emit_arl(c, inst); break; case OPCODE_FRC: emit_frc(c, inst); break; case OPCODE_FLR: emit_flr(c, inst); break; case OPCODE_LRP: emit_lrp(c, inst); break; case OPCODE_TRUNC: emit_trunc(c, inst); break; case OPCODE_MOV: emit_mov(c, inst); break; case OPCODE_DP3: emit_dp3(c, inst); break; case OPCODE_DP4: emit_dp4(c, inst); break; case OPCODE_XPD: emit_xpd(c, inst); break; case OPCODE_DPH: emit_dph(c, inst); break; case OPCODE_RCP: emit_rcp(c, inst); break; case OPCODE_RSQ: emit_rsq(c, inst); break; case OPCODE_SIN: emit_sin(c, inst); break; case OPCODE_COS: emit_cos(c, inst); break; case OPCODE_EX2: emit_ex2(c, inst); break; case OPCODE_LG2: emit_lg2(c, inst); break; case OPCODE_MIN: case OPCODE_MAX: emit_min_max(c, inst); break; case OPCODE_DDX: case OPCODE_DDY: for (j = 0; j < 4; j++) { if (inst->DstReg.WriteMask & (1 << j)) dst[j] = get_dst_reg(c, inst, j); else dst[j] = brw_null_reg(); } get_argument_regs(c, inst, 0, args[0], WRITEMASK_XYZW); emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX), args[0]); break; case OPCODE_SLT: emit_slt(c, inst); break; case OPCODE_SLE: emit_sle(c, inst); break; case OPCODE_SGT: emit_sgt(c, inst); break; case OPCODE_SGE: emit_sge(c, inst); break; case OPCODE_SEQ: emit_seq(c, inst); break; case OPCODE_SNE: emit_sne(c, inst); break; case OPCODE_MUL: emit_mul(c, inst); break; case OPCODE_POW: emit_pow(c, inst); break; case OPCODE_MAD: emit_mad(c, inst); break; case OPCODE_TEX: emit_tex(c, inst); break; case OPCODE_TXB: emit_txb(c, inst); break; case OPCODE_KIL_NV: emit_kil(c); break; case OPCODE_IF: assert(if_depth < MAX_IF_DEPTH); if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8); break; case OPCODE_ELSE: if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); break; case OPCODE_ENDIF: assert(if_depth > 0); brw_ENDIF(p, if_inst[--if_depth]); break; case OPCODE_BGNSUB: brw_save_label(p, inst->Comment, p->nr_insn); break; case OPCODE_ENDSUB: /* no-op */ break; case OPCODE_CAL: brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_access_mode(p, BRW_ALIGN_1); brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); brw_set_access_mode(p, BRW_ALIGN_16); brw_ADD(p, get_addr_reg(stack_index), get_addr_reg(stack_index), brw_imm_d(4)); brw_save_call(&c->func, inst->label, p->nr_insn); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); brw_pop_insn_state(p); break; case OPCODE_RET: brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_ADD(p, get_addr_reg(stack_index), get_addr_reg(stack_index), brw_imm_d(-4)); brw_set_access_mode(p, BRW_ALIGN_1); brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0)); brw_set_access_mode(p, BRW_ALIGN_16); brw_pop_insn_state(p); break; case OPCODE_BGNLOOP: /* XXX may need to invalidate the current_constant regs */ loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); break; case OPCODE_BRK: brw_BREAK(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case OPCODE_CONT: brw_CONT(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case OPCODE_ENDLOOP: { struct brw_instruction *inst0, *inst1; GLuint br = 1; if (brw->gen == 5) br = 2; loop_depth--; inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); /* patch all the BREAK/CONT instructions from last BGNLOOP */ while (inst0 > loop_inst[loop_depth]) { inst0--; if (inst0->header.opcode == BRW_OPCODE_BREAK) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); inst0->bits3.if_else.pop_count = 0; } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0); inst0->bits3.if_else.pop_count = 0; } } } break; default: debug_printf("unsupported IR in fragment shader %d\n", inst->Opcode); } if (inst->CondUpdate) brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); else brw_set_predicate_control(p, BRW_PREDICATE_NONE); } post_wm_emit(c); if (BRW_DEBUG & DEBUG_WM) { debug_printf("wm-native:\n"); brw_disasm(stderr, p->store, p->nr_insn); } } /** * Do GPU code generation for shaders that use GLSL features such as * flow control. Other shaders will be compiled with the */ void brw_wm_branching_shader_emit(struct brw_context *brw, struct brw_wm_compile *c) { if (BRW_DEBUG & DEBUG_WM) { debug_printf("%s:\n", __FUNCTION__); } /* initial instruction translation/simplification */ brw_wm_pass_fp(c); /* actual code generation */ brw_wm_emit_branching_shader(brw, c); if (BRW_DEBUG & DEBUG_WM) { brw_wm_print_program(c, "brw_wm_branching_shader_emit done"); } c->prog_data.total_grf = num_grf_used(c); c->prog_data.total_scratch = 0; }