From 949e7383b597563b5603ba9b63fcfa4b1f45cf42 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 5 Nov 2008 09:17:55 -0700 Subject: mesa: add Initialized field to gl_uniform struct, for debugging purposes only --- src/mesa/shader/prog_uniform.c | 1 + src/mesa/shader/prog_uniform.h | 1 + src/mesa/shader/shader_api.c | 21 ++++++++++++++------- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/mesa/shader/prog_uniform.c b/src/mesa/shader/prog_uniform.c index f57df3d86d..a0aa615c5f 100644 --- a/src/mesa/shader/prog_uniform.c +++ b/src/mesa/shader/prog_uniform.c @@ -87,6 +87,7 @@ _mesa_append_uniform(struct gl_uniform_list *list, list->Uniforms[oldNum].Name = _mesa_strdup(name); list->Uniforms[oldNum].VertPos = -1; list->Uniforms[oldNum].FragPos = -1; + list->Uniforms[oldNum].Initialized = GL_FALSE; index = oldNum; list->NumUniforms++; } diff --git a/src/mesa/shader/prog_uniform.h b/src/mesa/shader/prog_uniform.h index 735de28705..deea732991 100644 --- a/src/mesa/shader/prog_uniform.h +++ b/src/mesa/shader/prog_uniform.h @@ -50,6 +50,7 @@ struct gl_uniform const char *Name; /**< Null-terminated string */ GLint VertPos; GLint FragPos; + GLboolean Initialized; /**< For debug. Has this uniform been set? */ #if 0 GLenum DataType; /**< GL_FLOAT, GL_FLOAT_VEC2, etc */ GLuint Size; /**< Number of components (1..4) */ diff --git a/src/mesa/shader/shader_api.c b/src/mesa/shader/shader_api.c index 504d769323..4fc9f3daaa 100644 --- a/src/mesa/shader/shader_api.c +++ b/src/mesa/shader/shader_api.c @@ -1505,10 +1505,12 @@ set_program_uniform(GLcontext *ctx, struct gl_program *program, GLenum type, GLsizei count, GLint elems, const void *values) { + struct gl_program_parameter *param = + &program->Parameters->Parameters[index]; + assert(offset >= 0); - if (!compatible_types(type, - program->Parameters->Parameters[index].DataType)) { + if (!compatible_types(type, param->DataType)) { _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(type mismatch)"); return; } @@ -1518,7 +1520,7 @@ set_program_uniform(GLcontext *ctx, struct gl_program *program, return; } - if (program->Parameters->Parameters[index].Type == PROGRAM_SAMPLER) { + if (param->Type == PROGRAM_SAMPLER) { /* This controls which texture unit which is used by a sampler */ GLuint texUnit, sampler; @@ -1549,9 +1551,9 @@ set_program_uniform(GLcontext *ctx, struct gl_program *program, else { /* ordinary uniform variable */ GLsizei k, i; - GLint slots = (program->Parameters->Parameters[index].Size + 3) / 4; + GLint slots = (param->Size + 3) / 4; - if (count * elems > (GLint) program->Parameters->Parameters[index].Size) { + if (count * elems > (GLint) param->Size) { _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(count too large)"); return; } @@ -1560,7 +1562,8 @@ set_program_uniform(GLcontext *ctx, struct gl_program *program, count = slots; for (k = 0; k < count; k++) { - GLfloat *uniformVal = program->Parameters->ParameterValues[index + offset + k]; + GLfloat *uniformVal = + program->Parameters->ParameterValues[index + offset + k]; if (is_integer_type(type)) { const GLint *iValues = ((const GLint *) values) + k * elems; for (i = 0; i < elems; i++) { @@ -1575,7 +1578,7 @@ set_program_uniform(GLcontext *ctx, struct gl_program *program, } /* if the uniform is bool-valued, convert to 1.0 or 0.0 */ - if (is_boolean_type(program->Parameters->Parameters[index].DataType)) { + if (is_boolean_type(param->DataType)) { for (i = 0; i < elems; i++) { uniformVal[i] = uniformVal[i] ? 1.0 : 0.0; } @@ -1659,6 +1662,8 @@ _mesa_uniform(GLcontext *ctx, GLint location, GLsizei count, index, offset, type, count, elems, values); } } + + shProg->Uniforms->Uniforms[location].Initialized = GL_TRUE; } @@ -1769,6 +1774,8 @@ _mesa_uniform_matrix(GLcontext *ctx, GLint cols, GLint rows, count, rows, cols, transpose, values); } } + + shProg->Uniforms->Uniforms[location].Initialized = GL_TRUE; } -- cgit v1.2.3 From dea4826b8481d7328e52dbaa2eb43fd861d73e79 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 5 Nov 2008 09:34:15 -0700 Subject: mesa: remove extra \n from printf string --- src/mesa/shader/slang/slang_log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/shader/slang/slang_log.c b/src/mesa/shader/slang/slang_log.c index dc838c72ad..25f696f67e 100644 --- a/src/mesa/shader/slang/slang_log.c +++ b/src/mesa/shader/slang/slang_log.c @@ -89,7 +89,7 @@ slang_info_log_message(slang_info_log * log, const char *prefix, slang_string_concat(log->text, "\n"); if (MESA_VERBOSE & VERBOSE_GLSL) { - _mesa_printf("Mesa: GLSL %s\n", log->text); + _mesa_printf("Mesa: GLSL %s", log->text); } return 1; -- cgit v1.2.3 From 50beb4e6fd2e06d6007c69899111f6a22319a4d8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 5 Nov 2008 14:03:15 -0700 Subject: mesa: fix a GLSL array indexing codegen bug Expressions like array[i] + array[j] didn't work properly before. --- src/mesa/shader/slang/slang_codegen.c | 2 +- src/mesa/shader/slang/slang_emit.c | 107 +++++++++++++++++++++++++++++++--- 2 files changed, 101 insertions(+), 8 deletions(-) diff --git a/src/mesa/shader/slang/slang_codegen.c b/src/mesa/shader/slang/slang_codegen.c index e7b2bad8c2..d83e3b01e6 100644 --- a/src/mesa/shader/slang/slang_codegen.c +++ b/src/mesa/shader/slang/slang_codegen.c @@ -3253,7 +3253,7 @@ _slang_gen_array_element(slang_assemble_ctx * A, slang_operation *oper) index = _slang_gen_operation(A, &oper->children[1]); if (array && index) { /* bounds check */ - GLint constIndex = 0; + GLint constIndex = -1; if (index->Opcode == IR_FLOAT) { constIndex = (int) index->Value[0]; if (constIndex < 0 || constIndex >= arrayLen) { diff --git a/src/mesa/shader/slang/slang_emit.c b/src/mesa/shader/slang/slang_emit.c index 010b55827f..672ec4bd60 100644 --- a/src/mesa/shader/slang/slang_emit.c +++ b/src/mesa/shader/slang/slang_emit.c @@ -330,6 +330,17 @@ constant_to_src_reg(struct prog_src_register *src, GLfloat val, } +static void +address_to_dst_reg(struct prog_dst_register *dst, GLuint index) +{ + assert(index == 0); /* only one address reg at this time */ + dst->File = PROGRAM_ADDRESS; + dst->Index = index; + dst->WriteMask = WRITEMASK_X; +} + + + /** * Add new instruction at end of given program. * \param prog the program to append instruction onto @@ -614,6 +625,7 @@ emit_arith(slang_emit_info *emitInfo, slang_ir_node *n) /* result storage */ alloc_node_storage(emitInfo, n, -1); + assert(n->Store->Index >= 0); if (n->Store->Size == 2) n->Writemask = WRITEMASK_XY; @@ -1545,6 +1557,60 @@ emit_swizzle(slang_emit_info *emitInfo, slang_ir_node *n) } +/** + * Move a block registers from src to dst (or move a single register). + * \param size size of block, in floats (<=4 means one register) + */ +static struct prog_instruction * +move_block(slang_emit_info *emitInfo, + GLuint size, GLboolean relAddr, + const slang_ir_storage *dst, + const slang_ir_storage *src) +{ + struct prog_instruction *inst; + + if (size > 4) { + /* move matrix/struct etc (block of registers) */ + slang_ir_storage dstStore = *dst; + slang_ir_storage srcStore = *src; + //GLint size = srcStore.Size; + /*ASSERT(n->Children[0]->Writemask == WRITEMASK_XYZW); + ASSERT(n->Children[1]->Store->Swizzle == SWIZZLE_NOOP); + */ + dstStore.Size = 4; + srcStore.Size = 4; + while (size >= 4) { + inst = new_instruction(emitInfo, OPCODE_MOV); + inst->Comment = _mesa_strdup("IR_COPY block"); + storage_to_dst_reg(&inst->DstReg, &dstStore, WRITEMASK_XYZW); + storage_to_src_reg(&inst->SrcReg[0], &srcStore); + inst->SrcReg[0].RelAddr = relAddr; + srcStore.Index++; + dstStore.Index++; + size -= 4; + } + } + else { + /* single register move */ + GLuint writemask; + if (size == 1) { + GLuint comp = GET_SWZ(src->Swizzle, 0); + assert(comp < 4); + writemask = WRITEMASK_X << comp; + } + else { + writemask = WRITEMASK_XYZW; + } + inst = new_instruction(emitInfo, OPCODE_MOV); + storage_to_dst_reg(&inst->DstReg, dst, writemask); + storage_to_src_reg(&inst->SrcReg[0], src); + inst->SrcReg[0].RelAddr = relAddr; + } + return inst; +} + + + /** * Dereference array element. Just resolve storage for the array * element represented by this node. @@ -1591,16 +1657,43 @@ emit_array_element(slang_emit_info *emitInfo, slang_ir_node *n) /* do codegen for array index expression */ emit(emitInfo, n->Children[1]); - inst = new_instruction(emitInfo, OPCODE_ARL); + /* allocate temp storage for the array element */ + assert(n->Store->Index < 0); + n->Store->File = PROGRAM_TEMPORARY; + n->Store->Parent = NULL; + alloc_node_storage(emitInfo, n, -1); - storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask); - storage_to_src_reg(&inst->SrcReg[0], n->Children[1]->Store); + if (n->Store->Size > 4) { + /* need to multiply the index by the element size */ + GLint elemSize = (n->Store->Size + 3) / 4; + slang_ir_storage indexTemp; + + /* allocate 1 float indexTemp */ + alloc_local_temp(emitInfo, &indexTemp, 1); + + /* MUL temp, index, elemSize */ + inst = new_instruction(emitInfo, OPCODE_MUL); + storage_to_dst_reg(&inst->DstReg, &indexTemp, WRITEMASK_X); + storage_to_src_reg(&inst->SrcReg[0], n->Children[1]->Store); + constant_to_src_reg(&inst->SrcReg[1], elemSize, emitInfo); + + /* load ADDR[0].X = temp */ + inst = new_instruction(emitInfo, OPCODE_ARL); + storage_to_src_reg(&inst->SrcReg[0], &indexTemp); + address_to_dst_reg(&inst->DstReg, 0); - inst->DstReg.File = PROGRAM_ADDRESS; - inst->DstReg.Index = 0; /* always address register [0] */ - inst->Comment = _mesa_strdup("ARL ADDR"); + _slang_free_temp(emitInfo->vt, &indexTemp); + } + else { + /* simply load address reg w/ array index */ + inst = new_instruction(emitInfo, OPCODE_ARL); + storage_to_src_reg(&inst->SrcReg[0], n->Children[1]->Store); + address_to_dst_reg(&inst->DstReg, 0); + } - n->Store->RelAddr = GL_TRUE; + /* copy from array element to temp storage */ + move_block(emitInfo, n->Store->Size, GL_TRUE, + n->Store, n->Children[0]->Store); } /* if array element size is one, make sure we only access X */ -- cgit v1.2.3 From 1bfdab781b515f8eefc0b3c963295a1adb8f8146 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 5 Nov 2008 16:04:40 -0700 Subject: mesa: Fix compiler warnings on Windows. cherry-picked subset of a77976d2ee578d0483c64f2aa41719bbae9c1c97 --- src/mesa/shader/slang/slang_link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/shader/slang/slang_link.c b/src/mesa/shader/slang/slang_link.c index 1398a5ec6c..5c8b626ea7 100644 --- a/src/mesa/shader/slang/slang_link.c +++ b/src/mesa/shader/slang/slang_link.c @@ -409,7 +409,7 @@ _slang_update_inputs_outputs(struct gl_program *prog) } } else if (inst->SrcReg[j].File == PROGRAM_ADDRESS) { - maxAddrReg = MAX2(maxAddrReg, inst->SrcReg[j].Index + 1); + maxAddrReg = MAX2(maxAddrReg, (GLuint) (inst->SrcReg[j].Index + 1)); } } if (inst->DstReg.File == PROGRAM_OUTPUT) { -- cgit v1.2.3 From 0060d4154999777bd3b17013c457ca073aa660dc Mon Sep 17 00:00:00 2001 From: Gary Wong Date: Wed, 5 Nov 2008 20:35:19 -0500 Subject: i965: Implement missing OPCODE_NOISE3 instruction in fragment shaders. OPCODE_NOISE4 coming later. --- src/mesa/drivers/dri/i965/brw_eu.h | 6 + src/mesa/drivers/dri/i965/brw_wm_glsl.c | 339 +++++++++++++++++++++++++++++++- 2 files changed, 335 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 8cbe4215fb..49b422ee2f 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -432,6 +432,12 @@ static INLINE struct brw_reg brw_uw8_grf( GLuint nr, return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } +static INLINE struct brw_reg brw_uw16_grf( GLuint nr, + GLuint subnr ) +{ + return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + static INLINE struct brw_reg brw_null_reg( void ) { return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 0ea8c3d50e..2da3bf3d09 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -1052,13 +1052,24 @@ static __inline struct brw_reg low_words( struct brw_reg reg ) return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 ); } -/* One- and two-dimensional Perlin noise, similar to the description in - _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */ +static __inline struct brw_reg even_bytes( struct brw_reg reg ) +{ + return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 ); +} + +static __inline struct brw_reg odd_bytes( struct brw_reg reg ) +{ + return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ), + 0, 16, 2 ); +} + +/* One-, two- and three-dimensional Perlin noise, similar to the description + in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */ static void noise1_sub( struct brw_wm_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg param, - x0, x1, /* gradients at each end */ + x0, x1, /* gradients at each end */ t, tmp[ 2 ], /* float temporaries */ itmp[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */ int i; @@ -1232,18 +1243,18 @@ static void noise2_sub( struct brw_wm_compile *c ) { for( i = 0; i < 4; i++ ) brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] ); for( i = 0; i < 4; i++ ) - brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), - high_words( itmp[ i ] ) ); + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); for( i = 0; i < 4; i++ ) brw_MUL( p, itmp[ i ], itmp[ 5 ], itmp[ i ] ); for( i = 0; i < 4; i++ ) - brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), - high_words( itmp[ i ] ) ); + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); for( i = 0; i < 4; i++ ) brw_MUL( p, itmp[ i ], itmp[ 6 ], itmp[ i ] ); for( i = 0; i < 4; i++ ) - brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), - high_words( itmp[ i ] ) ); + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); /* Now we want to initialise the four gradients based on the hashes. Format conversion from signed integer to float leaves @@ -1350,6 +1361,312 @@ static void emit_noise2( struct brw_wm_compile *c, release_tmps( c, mark ); } + +/* The three-dimensional case is much like the one- and two- versions above, + but since the number of corners is rapidly growing we now pack 16 16-bit + hashes into each register to extract more parallelism from the EUs. */ +static void noise3_sub( struct brw_wm_compile *c ) { + + struct brw_compile *p = &c->func; + struct brw_reg param0, param1, param2, + x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */ + xi, yi, zi, /* interpolation coefficients */ + t, tmp[ 8 ], /* float temporaries */ + itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */ + wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */ + int i; + int mark = mark_tmps( c ); + + x0y0 = alloc_tmp( c ); + x0y1 = alloc_tmp( c ); + x1y0 = alloc_tmp( c ); + x1y1 = alloc_tmp( c ); + xi = alloc_tmp( c ); + yi = alloc_tmp( c ); + zi = alloc_tmp( c ); + t = alloc_tmp( c ); + for( i = 0; i < 8; i++ ) { + tmp[ i ] = alloc_tmp( c ); + itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD ); + wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 ); + } + + param0 = lookup_tmp( c, mark - 4 ); + param1 = lookup_tmp( c, mark - 3 ); + param2 = lookup_tmp( c, mark - 2 ); + + brw_set_access_mode( p, BRW_ALIGN_1 ); + + /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to + be hashed. Also compute the remainders (offsets within the unit + cube), interleaved to reduce register dependency penalties. */ + brw_RNDD( p, itmp[ 0 ], param0 ); + brw_RNDD( p, itmp[ 1 ], param1 ); + brw_RNDD( p, itmp[ 2 ], param2 ); + brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBC8F ) ); /* constant used later */ + brw_MOV( p, itmp[ 5 ], brw_imm_ud( 0xD0BD ) ); /* constant used later */ + brw_MOV( p, itmp[ 6 ], brw_imm_ud( 0x9B93 ) ); /* constant used later */ + brw_FRC( p, param0, param0 ); + brw_FRC( p, param1, param1 ); + brw_FRC( p, param2, param2 ); + /* Since we now have only 16 bits of precision in the hash, we must + be more careful about thorough mixing to maintain entropy as we + squash the input vector into a small scalar. */ + brw_MUL( p, brw_acc_reg(), itmp[ 4 ], itmp[ 0 ] ); + brw_MAC( p, brw_acc_reg(), itmp[ 5 ], itmp[ 1 ] ); + brw_MAC( p, itmp[ 0 ], itmp[ 6 ], itmp[ 2 ] ); + brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ), + brw_imm_uw( 0xBC8F ) ); + + /* Temporarily disable the execution mask while we work with ExecSize=16 + channels (the mask is set for ExecSize=8 and is probably incorrect). + Although this might cause execution of unwanted channels, the code + writes only to temporary registers and has no side effects, so + disabling the mask is harmless. */ + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) ); + brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) ); + brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) ); + + /* We're now ready to perform the hashing. The eight hashes are + interleaved for performance. The hash function used is + designed to rapidly achieve avalanche and require only 16x16 + bit multiplication, and 8-bit swizzles (which we get for + free). */ + for( i = 0; i < 4; i++ ) + brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), + odd_bytes( wtmp[ i ] ) ); + for( i = 0; i < 4; i++ ) + brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), + odd_bytes( wtmp[ i ] ) ); + brw_pop_insn_state( p ); + + /* Now we want to initialise the four rear gradients based on the + hashes. Format conversion from signed integer to float leaves + everything scaled too high by a factor of pow( 2, 15 ), but + we correct for that right at the end. */ + /* x component */ + brw_ADD( p, t, param0, brw_imm_f( -1.0 ) ); + brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) ); + brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) ); + brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) ); + brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) ); + brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, x1y0, x1y0, t ); + brw_MUL( p, x1y1, x1y1, t ); + brw_ADD( p, t, param1, brw_imm_f( -1.0 ) ); + brw_MUL( p, x0y0, x0y0, param0 ); + brw_MUL( p, x0y1, x0y1, param0 ); + + /* y component */ + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) ); + brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + brw_ADD( p, t, param0, brw_imm_f( -1.0 ) ); + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 ); + + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + + /* z component */ + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); + + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param2 ); + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param2 ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param2 ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param2 ); + + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + + /* We interpolate between the gradients using the polynomial + 6t^5 - 15t^4 + 10t^3 (Perlin). */ + brw_MUL( p, xi, param0, brw_imm_f( 6.0 ) ); + brw_MUL( p, yi, param1, brw_imm_f( 6.0 ) ); + brw_MUL( p, zi, param2, brw_imm_f( 6.0 ) ); + brw_ADD( p, xi, xi, brw_imm_f( -15.0 ) ); + brw_ADD( p, yi, yi, brw_imm_f( -15.0 ) ); + brw_ADD( p, zi, zi, brw_imm_f( -15.0 ) ); + brw_MUL( p, xi, xi, param0 ); + brw_MUL( p, yi, yi, param1 ); + brw_MUL( p, zi, zi, param2 ); + brw_ADD( p, xi, xi, brw_imm_f( 10.0 ) ); + brw_ADD( p, yi, yi, brw_imm_f( 10.0 ) ); + brw_ADD( p, zi, zi, brw_imm_f( 10.0 ) ); + brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work */ + brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work */ + brw_MUL( p, xi, xi, param0 ); + brw_MUL( p, yi, yi, param1 ); + brw_MUL( p, zi, zi, param2 ); + brw_MUL( p, xi, xi, param0 ); + brw_MUL( p, yi, yi, param1 ); + brw_MUL( p, zi, zi, param2 ); + brw_MUL( p, xi, xi, param0 ); + brw_MUL( p, yi, yi, param1 ); + brw_MUL( p, zi, zi, param2 ); + + /* Here we interpolate in the y dimension... */ + brw_MUL( p, x0y1, x0y1, yi ); + brw_MUL( p, x1y1, x1y1, yi ); + brw_ADD( p, x0y0, x0y0, x0y1 ); + brw_ADD( p, x1y0, x1y0, x1y1 ); + + /* And now in x. Leave the result in tmp[ 0 ] (see below)... */ + brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); + brw_MUL( p, x1y0, x1y0, xi ); + brw_ADD( p, tmp[ 0 ], x0y0, x1y0 ); + + /* Now do the same thing for the front four gradients... */ + /* x component */ + brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) ); + brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) ); + brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) ); + brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) ); + brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, x1y0, x1y0, t ); + brw_MUL( p, x1y1, x1y1, t ); + brw_ADD( p, t, param1, brw_imm_f( -1.0 ) ); + brw_MUL( p, x0y0, x0y0, param0 ); + brw_MUL( p, x0y1, x0y1, param0 ); + + /* y component */ + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) ); + brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + brw_ADD( p, t, param2, brw_imm_f( -1.0 ) ); + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 ); + + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + + /* z component */ + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); + + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + + /* The interpolation coefficients are still around from last time, so + again interpolate in the y dimension... */ + brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); + brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); + brw_MUL( p, x0y1, x0y1, yi ); + brw_MUL( p, x1y1, x1y1, yi ); + brw_ADD( p, x0y0, x0y0, x0y1 ); + brw_ADD( p, x1y0, x1y0, x1y1 ); + + /* And now in x. The rear face is in tmp[ 0 ] (see above), so this + time put the front face in tmp[ 1 ] and we're nearly there... */ + brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); + brw_MUL( p, x1y0, x1y0, xi ); + brw_ADD( p, tmp[ 1 ], x0y0, x1y0 ); + + /* The final interpolation, in the z dimension: */ + brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) ); + brw_MUL( p, tmp[ 1 ], tmp[ 1 ], zi ); + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] ); + + /* scale by pow( 2, -15 ), as described above */ + brw_MUL( p, param0, tmp[ 0 ], brw_imm_f( 0.000030517578125 ) ); + + release_tmps( c, mark ); +} + +static void emit_noise3( struct brw_wm_compile *c, + struct prog_instruction *inst ) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, src2, param0, param1, param2, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + int mark = mark_tmps( c ); + + assert( mark == 0 ); + + src0 = get_src_reg( c, inst->SrcReg, 0, 1 ); + src1 = get_src_reg( c, inst->SrcReg, 1, 1 ); + src2 = get_src_reg( c, inst->SrcReg, 2, 1 ); + + param0 = alloc_tmp( c ); + param1 = alloc_tmp( c ); + param2 = alloc_tmp( c ); + + brw_MOV( p, param0, src0 ); + brw_MOV( p, param1, src1 ); + brw_MOV( p, param2, src2 ); + + invoke_subroutine( c, SUB_NOISE3, noise3_sub ); + + /* Fill in the result: */ + brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); + for (i = 0 ; i < 4; i++) { + if (mask & (1<SaturateMode == SATURATE_ZERO_ONE ) + brw_set_saturate( p, 0 ); + + release_tmps( c, mark ); +} static void emit_wpos_xy(struct brw_wm_compile *c, struct prog_instruction *inst) @@ -1676,7 +1993,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_NOISE2: emit_noise2(c, inst); break; - /* case OPCODE_NOISE3: */ + case OPCODE_NOISE3: + emit_noise3(c, inst); + break; /* case OPCODE_NOISE4: */ /* not yet implemented */ case OPCODE_TEX: -- cgit v1.2.3 From df94fd17647937975df031dcaa1ac24b2d79ce1b Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Thu, 6 Nov 2008 15:25:55 +0800 Subject: i965: Always check vertex program. Now i965 also uses the vertex program created by Mesa Core, but this vertex program is not only depend on mesa state _NEW_PROGRAM, so always check the current vertex program is updated or not. This fixes broken demo cubemap. --- src/mesa/drivers/dri/i965/brw_state_upload.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 16b0496f47..7a642bd2a8 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -45,7 +45,6 @@ const struct brw_tracked_state *atoms[] = { &brw_check_fallback, - &brw_active_vertprog, &brw_wm_input_sizes, &brw_vs_prog, &brw_gs_prog, @@ -212,6 +211,10 @@ void brw_validate_state( struct brw_context *brw ) brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; } + if (brw->vertex_program != brw->attribs.VertexProgram->_Current) { + brw->vertex_program = brw->attribs.VertexProgram->_Current; + brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; + } if (state->mesa == 0 && state->cache == 0 && -- cgit v1.2.3 From 517401af07ea17a7e88659e6ba95a0628ff826b3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 6 Nov 2008 15:04:11 -0700 Subject: mesa: update the shader programs->TexturesUsed array at link time If an application never calls glUniform() to set sampler variable values they'll remain 0 (the default value/unit). Now call _mesa_update_shader_textures_used() at link time in case glUniform() is never called. program->TextureUsed[] will then be correct for state validation. --- src/mesa/shader/shader_api.c | 20 ++++++++++++++++---- src/mesa/shader/shader_api.h | 4 ++++ src/mesa/shader/slang/slang_link.c | 6 ++++++ 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/mesa/shader/shader_api.c b/src/mesa/shader/shader_api.c index 4fc9f3daaa..e883f8b8be 100644 --- a/src/mesa/shader/shader_api.c +++ b/src/mesa/shader/shader_api.c @@ -1415,10 +1415,22 @@ _mesa_use_program(GLcontext *ctx, GLuint program) /** - * Update the vertex and fragment program's TexturesUsed arrays. + * Update the vertex/fragment program's TexturesUsed array. + * + * This needs to be called after glUniform(set sampler var) is called. + * A call to glUniform(samplerVar, value) causes a sampler to point to a + * particular texture unit. We know the sampler's texture target + * (1D/2D/3D/etc) from compile time but the sampler's texture unit is + * set by glUniform() calls. + * + * So, scan the program->SamplerUnits[] and program->SamplerTargets[] + * information to update the prog->TexturesUsed[] values. + * Each value of TexturesUsed[unit] is one of zero, TEXTURE_1D_INDEX, + * TEXTURE_2D_INDEX, TEXTURE_3D_INDEX, etc. + * We'll use that info for state validation before rendering. */ -static void -update_textures_used(struct gl_program *prog) +void +_mesa_update_shader_textures_used(struct gl_program *prog) { GLuint s; @@ -1544,7 +1556,7 @@ set_program_uniform(GLcontext *ctx, struct gl_program *program, /* This maps a sampler to a texture unit: */ program->SamplerUnits[sampler] = texUnit; - update_textures_used(program); + _mesa_update_shader_textures_used(program); FLUSH_VERTICES(ctx, _NEW_TEXTURE); } diff --git a/src/mesa/shader/shader_api.h b/src/mesa/shader/shader_api.h index e7f1266915..ec1996ee98 100644 --- a/src/mesa/shader/shader_api.h +++ b/src/mesa/shader/shader_api.h @@ -79,6 +79,10 @@ extern struct gl_shader * _mesa_lookup_shader(GLcontext *ctx, GLuint name); +extern void +_mesa_update_shader_textures_used(struct gl_program *prog); + + extern void _mesa_use_program(GLcontext *ctx, GLuint program); diff --git a/src/mesa/shader/slang/slang_link.c b/src/mesa/shader/slang/slang_link.c index 5c8b626ea7..511e740615 100644 --- a/src/mesa/shader/slang/slang_link.c +++ b/src/mesa/shader/slang/slang_link.c @@ -562,6 +562,9 @@ _slang_link(GLcontext *ctx, if (fragProg && shProg->FragmentProgram) { + /* Compute initial program's TexturesUsed info */ + _mesa_update_shader_textures_used(&shProg->FragmentProgram->Base); + /* notify driver that a new fragment program has been compiled/linked */ ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB, &shProg->FragmentProgram->Base); @@ -577,6 +580,9 @@ _slang_link(GLcontext *ctx, } if (vertProg && shProg->VertexProgram) { + /* Compute initial program's TexturesUsed info */ + _mesa_update_shader_textures_used(&shProg->VertexProgram->Base); + /* notify driver that a new vertex program has been compiled/linked */ ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB, &shProg->VertexProgram->Base); -- cgit v1.2.3 From 035c0cf71a5fe3beee55654e1f7148adfe626cc0 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 6 Nov 2008 17:14:33 -0700 Subject: mesa: rename OPCODE_INT -> OPCODE_TRUNC Trunc is a more accurate description; there's no type conversion involved. --- src/mesa/drivers/dri/i965/brw_wm_glsl.c | 8 ++++---- src/mesa/shader/prog_execute.c | 22 +++++++++++----------- src/mesa/shader/prog_instruction.c | 2 +- src/mesa/shader/prog_instruction.h | 5 ++++- src/mesa/shader/slang/slang_ir.c | 2 +- 5 files changed, 21 insertions(+), 18 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 2da3bf3d09..cb728190f5 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -16,7 +16,7 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) struct prog_instruction *inst = &fp->Base.Instructions[i]; switch (inst->Opcode) { case OPCODE_IF: - case OPCODE_INT: + case OPCODE_TRUNC: case OPCODE_ENDIF: case OPCODE_CAL: case OPCODE_BRK: @@ -255,7 +255,7 @@ static void emit_abs( struct brw_wm_compile *c, brw_set_saturate(p, 0); } -static void emit_int( struct brw_wm_compile *c, +static void emit_trunc( struct brw_wm_compile *c, struct prog_instruction *inst) { int i; @@ -1912,8 +1912,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_LRP: emit_lrp(c, inst); break; - case OPCODE_INT: - emit_int(c, inst); + case OPCODE_TRUNC: + emit_trunc(c, inst); break; case OPCODE_MOV: emit_mov(c, inst); diff --git a/src/mesa/shader/prog_execute.c b/src/mesa/shader/prog_execute.c index 32b6ff4fd4..d843761723 100644 --- a/src/mesa/shader/prog_execute.c +++ b/src/mesa/shader/prog_execute.c @@ -789,17 +789,6 @@ _mesa_execute_program(GLcontext * ctx, case OPCODE_ENDIF: /* nothing */ break; - case OPCODE_INT: /* float to int */ - { - GLfloat a[4], result[4]; - fetch_vector4(&inst->SrcReg[0], machine, a); - result[0] = (GLfloat) (GLint) a[0]; - result[1] = (GLfloat) (GLint) a[1]; - result[2] = (GLfloat) (GLint) a[2]; - result[3] = (GLfloat) (GLint) a[3]; - store_vector4(inst, machine, result); - } - break; case OPCODE_KIL_NV: /* NV_f_p only (conditional) */ if (eval_condition(machine, inst)) { return GL_FALSE; @@ -1425,6 +1414,17 @@ _mesa_execute_program(GLcontext * ctx, store_vector4(inst, machine, color); } break; + case OPCODE_TRUNC: /* truncate toward zero */ + { + GLfloat a[4], result[4]; + fetch_vector4(&inst->SrcReg[0], machine, a); + result[0] = (GLfloat) (GLint) a[0]; + result[1] = (GLfloat) (GLint) a[1]; + result[2] = (GLfloat) (GLint) a[2]; + result[3] = (GLfloat) (GLint) a[3]; + store_vector4(inst, machine, result); + } + break; case OPCODE_UP2H: /* unpack two 16-bit floats */ { GLfloat a[4], result[4]; diff --git a/src/mesa/shader/prog_instruction.c b/src/mesa/shader/prog_instruction.c index 1033496d97..7e340ce454 100644 --- a/src/mesa/shader/prog_instruction.c +++ b/src/mesa/shader/prog_instruction.c @@ -182,7 +182,6 @@ static const struct instruction_info InstInfo[MAX_OPCODE] = { { OPCODE_FLR, "FLR", 1, 1 }, { OPCODE_FRC, "FRC", 1, 1 }, { OPCODE_IF, "IF", 1, 0 }, - { OPCODE_INT, "INT", 1, 1 }, { OPCODE_KIL, "KIL", 1, 0 }, { OPCODE_KIL_NV, "KIL", 0, 0 }, { OPCODE_LG2, "LG2", 1, 1 }, @@ -230,6 +229,7 @@ static const struct instruction_info InstInfo[MAX_OPCODE] = { { OPCODE_TXL, "TXL", 1, 1 }, { OPCODE_TXP, "TXP", 1, 1 }, { OPCODE_TXP_NV, "TXP", 1, 1 }, + { OPCODE_TRUNC, "TRUNC", 1, 1 }, { OPCODE_UP2H, "UP2H", 1, 1 }, { OPCODE_UP2US, "UP2US", 1, 1 }, { OPCODE_UP4B, "UP4B", 1, 1 }, diff --git a/src/mesa/shader/prog_instruction.h b/src/mesa/shader/prog_instruction.h index aca768376a..16701e4ec9 100644 --- a/src/mesa/shader/prog_instruction.h +++ b/src/mesa/shader/prog_instruction.h @@ -173,7 +173,6 @@ typedef enum prog_opcode { OPCODE_FLR, /* X X 2 X X */ OPCODE_FRC, /* X X 2 X X */ OPCODE_IF, /* opt */ - OPCODE_INT, /* X */ OPCODE_KIL, /* X */ OPCODE_KIL_NV, /* X X */ OPCODE_LG2, /* X X 2 X X */ @@ -221,6 +220,7 @@ typedef enum prog_opcode { OPCODE_TXL, /* 3 2 X */ OPCODE_TXP, /* X X */ OPCODE_TXP_NV, /* 3 X */ + OPCODE_TRUNC, /* X */ OPCODE_UP2H, /* X */ OPCODE_UP2US, /* X */ OPCODE_UP4B, /* X */ @@ -231,6 +231,9 @@ typedef enum prog_opcode { } gl_inst_opcode; +/* temporary, just in case, remove soon */ +#define OPCODE_INT OPCODE_TRUNC + /** * Instruction source register. */ diff --git a/src/mesa/shader/slang/slang_ir.c b/src/mesa/shader/slang/slang_ir.c index 3a0b8bf3a0..20498e8c66 100644 --- a/src/mesa/shader/slang/slang_ir.c +++ b/src/mesa/shader/slang/slang_ir.c @@ -56,7 +56,7 @@ static const slang_ir_info IrInfo[] = { /* unary ops */ { IR_MOVE, "IR_MOVE", OPCODE_MOV, 4, 1 }, { IR_I_TO_F, "IR_I_TO_F", OPCODE_MOV, 4, 1 }, /* int[4] to float[4] */ - { IR_F_TO_I, "IR_F_TO_I", OPCODE_INT, 4, 1 }, /* 4 floats to 4 ints */ + { IR_F_TO_I, "IR_F_TO_I", OPCODE_TRUNC, 4, 1 }, { IR_EXP, "IR_EXP", OPCODE_EXP, 1, 1 }, { IR_EXP2, "IR_EXP2", OPCODE_EX2, 1, 1 }, { IR_LOG2, "IR_LOG2", OPCODE_LG2, 1, 1 }, -- cgit v1.2.3