diff options
| author | Nicolai Haehnle <nhaehnle@gmail.com> | 2008-07-05 22:21:24 +0200 | 
|---|---|---|
| committer | Nicolai Haehnle <nhaehnle@gmail.com> | 2008-07-06 09:59:43 +0200 | 
| commit | 62bccd6df0c963a14e801bcac95dc8046b978a7f (patch) | |
| tree | bef7225f252c272272cc445c24c1935967554d37 /src | |
| parent | 77fdfaa23adeaaf6a217ef1ee751410c6a5b0d21 (diff) | |
r300: Allow adding parameters during fragprog transform, share LIT code
Diffstat (limited to 'src')
| -rw-r--r-- | src/mesa/drivers/dri/r300/r300_context.h | 24 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/r300/r300_fragprog.c | 18 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/r300/r300_fragprog_emit.c | 143 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/r300/r300_state.c | 41 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/r300/r500_fragprog.c | 12 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/r300/r500_fragprog_emit.c | 112 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/r300/radeon_program_alu.c | 124 | 
7 files changed, 216 insertions, 258 deletions
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index a24ab0cad7..a69beba9a7 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -716,14 +716,11 @@ struct r300_fragment_program_code {  	int tex_offset;  	int tex_end; -	/* Hardware constants. -	 * Contains a pointer to the value. The destination of the pointer -	 * is supposed to be updated when GL state changes. -	 * Typically, this is either a pointer into -	 * gl_program_parameter_list::ParameterValues, or a pointer to a -	 * global constant (e.g. for sin/cos-approximation) +	/** +	 * Remember which program register a given hardware constant +	 * belongs to.  	 */ -	const GLfloat *constant[PFS_NUM_CONST_REGS]; +	struct prog_src_register constant[PFS_NUM_CONST_REGS];  	int const_nr;  	int max_temp_idx; @@ -787,14 +784,11 @@ struct r500_fragment_program_code {  	int inst_offset;  	int inst_end; -	/* Hardware constants. -	* Contains a pointer to the value. The destination of the pointer -	* is supposed to be updated when GL state changes. -	* Typically, this is either a pointer into -	* gl_program_parameter_list::ParameterValues, or a pointer to a -	* global constant (e.g. for sin/cos-approximation) -	*/ -	const GLfloat *constant[PFS_NUM_CONST_REGS]; +	/** +	 * Remember which program register a given hardware constant +	 * belongs to. +	 */ +	struct prog_src_register constant[PFS_NUM_CONST_REGS];  	int const_nr;  	int max_temp_idx; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 6a8ef0ef5f..57987f5d0f 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -117,9 +117,7 @@ static GLboolean transform_TEX(  		int factor_index;  		tokens[2] = inst.TexSrcUnit; -		factor_index = -			_mesa_add_state_reference( -				compiler->fp->mesa_program.Base.Parameters, tokens); +		factor_index = _mesa_add_state_reference(t->Program->Parameters, tokens);  		tgt = radeonAppendInstructions(t->Program, 1); @@ -303,7 +301,7 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)  	i++;  	/* viewport transformation */ -	window_index = _mesa_add_state_reference(compiler->fp->mesa_program.Base.Parameters, tokens); +	window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens);  	fpi[i].Opcode = OPCODE_MAD; @@ -401,6 +399,11 @@ void r300TranslateFragmentShader(r300ContextPtr r300,  		compiler.code = &fp->code;  		compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base); +		if (RADEON_DEBUG & DEBUG_PIXEL) { +			_mesa_printf("Fragment Program: Initial program:\n"); +			_mesa_print_program(compiler.program); +		} +  		insert_WPOS_trailer(&compiler);  		struct radeon_program_transformation transformations[] = { @@ -413,13 +416,18 @@ void r300TranslateFragmentShader(r300ContextPtr r300,  			2, transformations);  		if (RADEON_DEBUG & DEBUG_PIXEL) { -			_mesa_printf("Program after transformations:\n"); +			_mesa_printf("Fragment Program: After transformations:\n");  			_mesa_print_program(compiler.program);  		}  		if (!r300FragmentProgramEmit(&compiler))  			fp->error = GL_TRUE; +		/* Subtle: Rescue any parameters that have been added during transformations */ +		_mesa_free_parameter_list(fp->mesa_program.Base.Parameters); +		fp->mesa_program.Base.Parameters = compiler.program->Parameters; +		compiler.program->Parameters = 0; +  		_mesa_reference_program(r300->radeon.glCtx, &compiler.program, NULL);  		if (!fp->error) diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c index 889631f705..d95008edc0 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c @@ -549,22 +549,17 @@ static void free_temp(struct r300_pfs_compile_state *cs, GLuint r)  /**   * Emit a hardware constant/parameter. - * - * \p cp Stable pointer to an array of 4 floats. - *  The pointer must be stable in the sense that it remains to be valid - *  and hold the contents of the constant/parameter throughout the lifetime - *  of the fragment program (actually, up until the next time the fragment - *  program is translated).   */  static GLuint emit_const4fv(struct r300_pfs_compile_state *cs, -			    const GLfloat * cp) +			    struct prog_src_register srcreg)  {  	COMPILE_STATE;  	GLuint reg = undef;  	int index;  	for (index = 0; index < code->const_nr; ++index) { -		if (code->constant[index] == cp) +		if (code->constant[index].File == srcreg.File && +		    code->constant[index].Index == srcreg.Index)  			break;  	} @@ -575,7 +570,7 @@ static GLuint emit_const4fv(struct r300_pfs_compile_state *cs,  		}  		code->const_nr++; -		code->constant[index] = cp; +		code->constant[index] = srcreg;  	}  	REG_SET_TYPE(reg, REG_TYPE_CONST); @@ -806,20 +801,11 @@ static GLuint t_src(struct r300_pfs_compile_state *cs,  		REG_SET_TYPE(r, REG_TYPE_INPUT);  		break;  	case PROGRAM_LOCAL_PARAM: -		r = emit_const4fv(cs, -				  fp->mesa_program.Base.LocalParams[fpsrc. -								    Index]); -		break;  	case PROGRAM_ENV_PARAM: -		r = emit_const4fv(cs, -			cs->compiler->r300->radeon.glCtx->FragmentProgram.Parameters[fpsrc.Index]); -		break;  	case PROGRAM_STATE_VAR:  	case PROGRAM_NAMED_PARAM:  	case PROGRAM_CONSTANT: -		r = emit_const4fv(cs, -				  fp->mesa_program.Base.Parameters-> -				  ParameterValues[fpsrc.Index]); +		r = emit_const4fv(cs, fpsrc);  		break;  	case PROGRAM_BUILTIN:  		switch(fpsrc.Swizzle) { @@ -1452,100 +1438,17 @@ static GLfloat SinCosConsts[2][4] = {  	 }  }; -/** - * Emit a LIT instruction. - * \p flags may be PFS_FLAG_SAT - * - * Definition of LIT (from ARB_fragment_program): - * tmp = VectorLoad(op0); - * if (tmp.x < 0) tmp.x = 0; - * if (tmp.y < 0) tmp.y = 0; - * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); - * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; - * result.x = 1.0; - * result.y = tmp.x; - * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; - * result.w = 1.0; - * - * The longest path of computation is the one leading to result.z, - * consisting of 5 operations. This implementation of LIT takes - * 5 slots. So unless there's some special undocumented opcode, - * this implementation is potentially optimal. Unfortunately, - * emit_arith is a bit too conservative because it doesn't understand - * partial writes to the vector component. - */ -static const GLfloat LitConst[4] = -    { 127.999999, 127.999999, 127.999999, -127.999999 }; - -static void emit_lit(struct r300_pfs_compile_state *cs, -		     GLuint dest, int mask, GLuint src, int flags) +static GLuint emit_sincosconsts(struct r300_pfs_compile_state *cs, int i)  { -	COMPILE_STATE; -	GLuint cnst; -	int needTemporary; -	GLuint temp; +	struct prog_src_register srcreg; +	GLuint constant_swizzle; -	cnst = emit_const4fv(cs, LitConst); - -	needTemporary = 0; -	if ((mask & WRITEMASK_XYZW) != WRITEMASK_XYZW) { -		needTemporary = 1; -	} else if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { -		// LIT is typically followed by DP3/DP4, so there's no point -		// in creating special code for this case -		needTemporary = 1; -	} - -	if (needTemporary) { -		temp = keep(get_temp_reg(cs)); -	} else { -		temp = keep(dest); -	} +	srcreg.File = PROGRAM_CONSTANT; +	srcreg.Index = _mesa_add_unnamed_constant(cs->compiler->program->Parameters, +		SinCosConsts[i], 4, &constant_swizzle); +	srcreg.Swizzle = constant_swizzle; -	// Note: The order of emit_arith inside the slots is relevant, -	// because emit_arith only looks at scalar vs. vector when resolving -	// dependencies, and it does not consider individual vector components, -	// so swizzling between the two parts can create fake dependencies. - -	// First slot -	emit_arith(cs, PFS_OP_MAX, temp, WRITEMASK_XY, -		   keep(src), pfs_zero, undef, 0); -	emit_arith(cs, PFS_OP_MAX, temp, WRITEMASK_W, src, cnst, undef, 0); - -	// Second slot -	emit_arith(cs, PFS_OP_MIN, temp, WRITEMASK_Z, -		   swizzle(temp, W, W, W, W), cnst, undef, 0); -	emit_arith(cs, PFS_OP_LG2, temp, WRITEMASK_W, -		   swizzle(temp, Y, Y, Y, Y), undef, undef, 0); - -	// Third slot -	// If desired, we saturate the y result here. -	// This does not affect the use as a condition variable in the CMP later -	emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_W, -		   temp, swizzle(temp, Z, Z, Z, Z), pfs_zero, 0); -	emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_Y, -		   swizzle(temp, X, X, X, X), pfs_one, pfs_zero, flags); - -	// Fourth slot -	emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_X, -		   pfs_one, pfs_one, pfs_zero, 0); -	emit_arith(cs, PFS_OP_EX2, temp, WRITEMASK_W, temp, undef, undef, 0); - -	// Fifth slot -	emit_arith(cs, PFS_OP_CMP, temp, WRITEMASK_Z, -		   pfs_zero, swizzle(temp, W, W, W, W), -		   negate(swizzle(temp, Y, Y, Y, Y)), flags); -	emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_W, pfs_one, pfs_one, -		   pfs_zero, 0); - -	if (needTemporary) { -		emit_arith(cs, PFS_OP_MAD, dest, mask, -			   temp, pfs_one, pfs_zero, flags); -		free_temp(cs, temp); -	} else { -		// Decrease refcount of the destination -		t_hw_dst(cs, dest, GL_FALSE, cs->nrslots); -	} +	return emit_const4fv(cs, srcreg);  }  static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_instruction *fpi) @@ -1577,8 +1480,8 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst  		src[1] = t_src(cs, fpi->SrcReg[1]);  		src[2] = t_src(cs, fpi->SrcReg[2]);  		/* ARB_f_p - if src0.c < 0.0 ? src1.c : src2.c -			*    r300 - if src2.c < 0.0 ? src1.c : src0.c -			*/ +		 *    r300 - if src2.c < 0.0 ? src1.c : src0.c +		 */  		emit_arith(cs, PFS_OP_CMP, dest, mask,  				src[2], src[1], src[0], flags);  		break; @@ -1592,8 +1495,8 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst  			*   result = sin(x)  			*/  		temp[0] = get_temp_reg(cs); -		const_sin[0] = emit_const4fv(cs, SinCosConsts[0]); -		const_sin[1] = emit_const4fv(cs, SinCosConsts[1]); +		const_sin[0] = emit_sincosconsts(cs, 0); +		const_sin[1] = emit_sincosconsts(cs, 1);  		src[0] = t_scalar_src(cs, fpi->SrcReg[0]);  		/* add 0.5*PI and do range reduction */ @@ -1687,10 +1590,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst  		emit_arith(cs, PFS_OP_LG2, dest, mask,  				src[0], undef, undef, flags);  		break; -	case OPCODE_LIT: -		src[0] = t_src(cs, fpi->SrcReg[0]); -		emit_lit(cs, dest, mask, src[0], flags); -		break;  	case OPCODE_LRP:  		src[0] = t_src(cs, fpi->SrcReg[0]);  		src[1] = t_src(cs, fpi->SrcReg[1]); @@ -1758,8 +1657,8 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst  			*/  		temp[0] = get_temp_reg(cs);  		temp[1] = get_temp_reg(cs); -		const_sin[0] = emit_const4fv(cs, SinCosConsts[0]); -		const_sin[1] = emit_const4fv(cs, SinCosConsts[1]); +		const_sin[0] = emit_sincosconsts(cs, 0); +		const_sin[1] = emit_sincosconsts(cs, 1);  		src[0] = t_scalar_src(cs, fpi->SrcReg[0]);  		/* x = -abs(x)+0.5*PI */ @@ -1825,8 +1724,8 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst  			*/  		temp[0] = get_temp_reg(cs); -		const_sin[0] = emit_const4fv(cs, SinCosConsts[0]); -		const_sin[1] = emit_const4fv(cs, SinCosConsts[1]); +		const_sin[0] = emit_sincosconsts(cs, 0); +		const_sin[1] = emit_sincosconsts(cs, 1);  		src[0] = t_scalar_src(cs, fpi->SrcReg[0]);  		/* do range reduction */ diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 0f7c179de8..d7a6962acc 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2453,6 +2453,27 @@ void r300UpdateShaders(r300ContextPtr rmesa)  	r300UpdateStateParameters(ctx, _NEW_PROGRAM);  } +static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, +	struct gl_program *program, struct prog_src_register srcreg) +{ +	static const GLfloat dummy[4] = { 0, 0, 0, 0 }; + +	switch(srcreg.File) { +	case PROGRAM_LOCAL_PARAM: +		return program->LocalParams[srcreg.Index]; +	case PROGRAM_ENV_PARAM: +		return ctx->FragmentProgram.Parameters[srcreg.Index]; +	case PROGRAM_STATE_VAR: +	case PROGRAM_NAMED_PARAM: +	case PROGRAM_CONSTANT: +		return program->Parameters->ParameterValues[srcreg.Index]; +	default: +		_mesa_problem(ctx, "get_fragmentprogram_constant: Unknown\n"); +		return dummy; +	} +} + +  static void r300SetupPixelShader(r300ContextPtr rmesa)  {  	GLcontext *ctx = rmesa->radeon.glCtx; @@ -2523,10 +2544,12 @@ static void r300SetupPixelShader(r300ContextPtr rmesa)  	R300_STATECHANGE(rmesa, fpp);  	rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, code->const_nr * 4);  	for (i = 0; i < code->const_nr; i++) { -		rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(code->constant[i][0]); -		rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(code->constant[i][1]); -		rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(code->constant[i][2]); -		rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(code->constant[i][3]); +		const GLfloat *constant = get_fragmentprogram_constant(ctx, +			&fp->mesa_program.Base, code->constant[i]); +		rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]); +		rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]); +		rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]); +		rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(constant[3]);  	}  } @@ -2595,10 +2618,12 @@ static void r500SetupPixelShader(r300ContextPtr rmesa)  	R300_STATECHANGE(rmesa, r500fp_const);  	for (i = 0; i < code->const_nr; i++) { -		rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(code->constant[i][0]); -		rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(code->constant[i][1]); -		rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(code->constant[i][2]); -		rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(code->constant[i][3]); +		const GLfloat *constant = get_fragmentprogram_constant(ctx, +			&fp->mesa_program.Base, code->constant[i]); +		rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]); +		rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]); +		rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]); +		rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(constant[3]);  	}  	bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, code->const_nr * 4); diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 7ee8494722..1cdb065354 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -212,7 +212,7 @@ static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler)  	i++;  	/* viewport transformation */ -	window_index = _mesa_add_state_reference(compiler->fp->mesa_program.Base.Parameters, tokens); +	window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens);  	fpi[i].Opcode = OPCODE_MAD; @@ -332,6 +332,11 @@ void r500TranslateFragmentShader(r300ContextPtr r300,  		fp->translated = r500FragmentProgramEmit(&compiler); +		/* Subtle: Rescue any parameters that have been added during transformations */ +		_mesa_free_parameter_list(fp->mesa_program.Base.Parameters); +		fp->mesa_program.Base.Parameters = compiler.program->Parameters; +		compiler.program->Parameters = 0; +  		_mesa_reference_program(r300->radeon.glCtx, &compiler.program, 0);  		r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); @@ -461,9 +466,8 @@ static void dump_program(struct r500_fragment_program_code *code)    if (code->const_nr) {      fprintf(stderr, "--------\nConstants:\n");      for (n = 0; n < code->const_nr; n++) { -      fprintf(stderr, "Constant %d: %f %f\n\t %f %f\n", n, -        code->constant[n][0], code->constant[n][1], code->constant[n][2], -        code->constant[n][3]); +      fprintf(stderr, "Constant %d: %i[%i]\n", n, +        code->constant[n].File, code->constant[n].Index);      }      fprintf(stderr, "--------\n");    } diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c index 0e95c81e48..c79bff96bd 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c @@ -266,7 +266,7 @@ static int get_temp(struct r500_pfs_compile_state *cs, int slot) {  /* Borrowed verbatim from r300_fragprog since it hasn't changed. */  static GLuint emit_const4fv(struct r500_pfs_compile_state *cs, -			    const GLfloat * cp) +			    struct prog_src_register srcreg)  {  	PROG_CODE; @@ -274,7 +274,8 @@ static GLuint emit_const4fv(struct r500_pfs_compile_state *cs,  	int index;  	for (index = 0; index < code->const_nr; ++index) { -		if (code->constant[index] == cp) +		if (code->constant[index].File == srcreg.File && +		    code->constant[index].Index == srcreg.Index)  			break;  	} @@ -285,7 +286,7 @@ static GLuint emit_const4fv(struct r500_pfs_compile_state *cs,  		}  		code->const_nr++; -		code->constant[index] = cp; +		code->constant[index] = srcreg;  	}  	reg = index | REG_CONSTANT; @@ -303,18 +304,11 @@ static GLuint make_src(struct r500_pfs_compile_state *cs, struct prog_src_regist  		reg = cs->inputs[src.Index].reg;  		break;  	case PROGRAM_LOCAL_PARAM: -		reg = emit_const4fv(cs, -			cs->compiler->fp->mesa_program.Base.LocalParams[src.Index]); -		break;  	case PROGRAM_ENV_PARAM: -		reg = emit_const4fv(cs, -			cs->compiler->r300->radeon.glCtx->FragmentProgram.Parameters[src.Index]); -		break;  	case PROGRAM_STATE_VAR:  	case PROGRAM_NAMED_PARAM:  	case PROGRAM_CONSTANT: -		reg = emit_const4fv(cs, -			cs->compiler->fp->mesa_program.Base.Parameters->ParameterValues[src.Index]); +		reg = emit_const4fv(cs, src);  		break;  	case PROGRAM_BUILTIN:  		reg = 0x0; @@ -628,12 +622,20 @@ static void emit_trig(struct r500_pfs_compile_state *cs, struct prog_instruction  	temp.Index = get_temp(cs, 0);  	temp.WriteMask = WRITEMASK_W; +	struct prog_src_register srcreg; +	GLuint constant_swizzle; + +	srcreg.File = PROGRAM_CONSTANT; +	srcreg.Index = _mesa_add_unnamed_constant(cs->compiler->program->Parameters, +		RCP_2PI, 4, &constant_swizzle); +	srcreg.Swizzle = constant_swizzle; +  	/* temp = Input*(1/2pi) */  	ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, temp);  	set_src0(cs, ip, fpi->SrcReg[0]); -	set_src1_direct(cs, ip, emit_const4fv(cs, RCP_2PI)); +	set_src1(cs, ip, srcreg);  	set_argA(cs, ip, 0, R500_SWIZ_RGB_ZERO, make_sop_swizzle(fpi->SrcReg[0])); -	set_argB(cs, ip, 1, R500_SWIZ_RGB_ZERO, SWIZZLE_W); +	set_argB(cs, ip, 1, R500_SWIZ_RGB_ZERO, make_alpha_swizzle(srcreg));  	set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO);  	/* temp = frac(dst) */ @@ -660,87 +662,6 @@ static void emit_trig(struct r500_pfs_compile_state *cs, struct prog_instruction  	}  } -/** - * Emit a LIT instruction. - * - * Definition of LIT (from ARB_fragment_program): - *  tmp = VectorLoad(op0); - *  if (tmp.x < 0) tmp.x = 0; - *  if (tmp.y < 0) tmp.y = 0; - *  if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); - *  else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; - *  result.x = 1.0; - *  result.y = tmp.x; - *  result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; - *  result.w = 1.0; - */ -static void emit_lit(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi) -{ -	GLuint cnst; -	int needTemporary; -	GLuint temp; -	int ip; - -	cnst = emit_const4fv(cs, LIT); - -	needTemporary = 0; -	if (fpi->DstReg.WriteMask != WRITEMASK_XYZW || fpi->DstReg.File == PROGRAM_OUTPUT) -		needTemporary = 1; - -	if (needTemporary) { -		temp = get_temp(cs, 0); -	} else { -		temp = fpi->DstReg.Index; -	} - -	// MAX tmp.xyw, op0, { 0, 0, 0, -128+eps } -	ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_MAX, R500_ALPHA_OP_MAX, temp, WRITEMASK_XYW); -	set_src0(cs, ip, fpi->SrcReg[0]); -	set_src1_direct(cs, ip, cnst); -	set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); -	set_argB(cs, ip, 1, R500_SWIZ_RGB_ZERO, SWIZZLE_W); - -	// MIN tmp.z, tmp.w, { 128-eps } -	// LG2 tmp.w, tmp.y -	ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_MIN, R500_ALPHA_OP_LN2, temp, WRITEMASK_ZW); -	set_src0_direct(cs, ip, temp); -	set_src1_direct(cs, ip, cnst); -	set_argA(cs, ip, 0, SWIZZLE_W | (SWIZZLE_W<<3) | (SWIZZLE_W<<6), SWIZZLE_Y); -	set_argB(cs, ip, 1, SWIZZLE_X | (SWIZZLE_X<<3) | (SWIZZLE_X<<6), SWIZZLE_X); - -	// MOV tmp.y, tmp.x -	// MUL tmp.w, tmp.z, tmp.w -	ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, temp, WRITEMASK_YW); -	set_src0_direct(cs, ip, temp); -	set_argA(cs, ip, 0, SWIZZLE_X | (SWIZZLE_X<<3) | (SWIZZLE_X<<6), SWIZZLE_Z); -	set_argB(cs, ip, 0, R500_SWIZ_RGB_ONE, SWIZZLE_W); -	set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO); - -	// MOV tmp.x, 1.0 -	// EX2 tmp.w, tmp.w -	ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_CMP, R500_ALPHA_OP_EX2, temp, WRITEMASK_XW); -	set_src0_direct(cs, ip, temp); -	set_argA(cs, ip, 0, R500_SWIZ_RGB_ONE, SWIZZLE_W); -	set_argB(cs, ip, 0, R500_SWIZ_RGB_ONE, R500_SWIZZLE_ZERO); -	set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO); - -	// tmp.z := (-tmp.x >= 0) ? tmp.y : 0.0 -	// MOV tmp.w, 1.0 -	ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_CMP, R500_ALPHA_OP_CMP, temp, WRITEMASK_ZW); -	set_src0_direct(cs, ip, temp); -	set_argA(cs, ip, 0, R500_SWIZZLE_ZERO, R500_SWIZZLE_ONE); -	set_argB(cs, ip, 0, SWIZZLE_W | (SWIZZLE_W<<3) | (SWIZZLE_W<<6), R500_SWIZZLE_ONE); -	set_argC(cs, ip, 0, SWIZZLE_Y | (SWIZZLE_Y<<3) | (SWIZZLE_Y<<6) | (R500_SWIZ_MOD_NEG<<9), R500_SWIZZLE_ZERO); - -	if (needTemporary) { -		ip = emit_alu(cs, R500_ALU_RGBA_OP_CMP, R500_ALPHA_OP_CMP, fpi->DstReg); -		set_src0_direct(cs, ip, temp); -		set_argA(cs, ip, 0, R500_SWIZ_RGB_RGB, SWIZZLE_W); -		set_argB(cs, ip, 1, R500_SWIZ_RGB_RGB, SWIZZLE_W); -		set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO); -	} -} -  static void do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi) {  	PROG_CODE;  	GLuint src[3], dest = 0; @@ -830,9 +751,6 @@ static void do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *  			src[0] = make_src(cs, fpi->SrcReg[0]);  			emit_sop(cs, R500_ALPHA_OP_LN2, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0]));  			break; -		case OPCODE_LIT: -			emit_lit(cs, fpi); -			break;  		case OPCODE_LRP:  			/* result = src0*src1 + (1-src0)*src2  			 *        = src0*src1 + src2 + (-src0)*src2 diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.c b/src/mesa/drivers/dri/r300/radeon_program_alu.c index d6d016d7c1..85ea810523 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/radeon_program_alu.c @@ -35,6 +35,8 @@  #include "radeon_program_alu.h" +#include "shader/prog_parameter.h" +  static struct prog_instruction *emit1(struct gl_program* p,  	gl_inst_opcode Opcode, struct prog_dst_register DstReg, @@ -101,6 +103,19 @@ static struct prog_dst_register dstreg(int file, int index)  	return dst;  } +static struct prog_dst_register dstregtmpmask(int index, int mask) +{ +	struct prog_dst_register dst; +	dst.File = PROGRAM_TEMPORARY; +	dst.Index = index; +	dst.WriteMask = mask; +	dst.CondMask = COND_TR; +	dst.CondSwizzle = SWIZZLE_NOOP; +	dst.CondSrc = 0; +	dst.pad = 0; +	return dst; +} +  static const struct prog_src_register builtin_zero = {  	.File = PROGRAM_BUILTIN,  	.Index = 0, @@ -125,6 +140,15 @@ static struct prog_src_register srcreg(int file, int index)  	return src;  } +static struct prog_src_register srcregswz(int file, int index, int swz) +{ +	struct prog_src_register src = srcreg_undefined; +	src.File = file; +	src.Index = index; +	src.Swizzle = swz; +	return src; +} +  static struct prog_src_register negate(struct prog_src_register reg)  {  	struct prog_src_register newreg = reg; @@ -136,10 +160,10 @@ static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x,  {  	struct prog_src_register swizzled = reg;  	swizzled.Swizzle = MAKE_SWIZZLE4( -		GET_SWZ(reg.Swizzle, x), -		GET_SWZ(reg.Swizzle, y), -		GET_SWZ(reg.Swizzle, z), -		GET_SWZ(reg.Swizzle, w)); +		x >= 4 ? x : GET_SWZ(reg.Swizzle, x), +		y >= 4 ? y : GET_SWZ(reg.Swizzle, y), +		z >= 4 ? z : GET_SWZ(reg.Swizzle, z), +		w >= 4 ? w : GET_SWZ(reg.Swizzle, w));  	return swizzled;  } @@ -185,6 +209,93 @@ static void transform_FLR(struct radeon_transform_context* t,  	emit2(t->Program, OPCODE_ADD, inst->DstReg, inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));  } +/** + * Definition of LIT (from ARB_fragment_program): + * + *  tmp = VectorLoad(op0); + *  if (tmp.x < 0) tmp.x = 0; + *  if (tmp.y < 0) tmp.y = 0; + *  if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); + *  else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; + *  result.x = 1.0; + *  result.y = tmp.x; + *  result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; + *  result.w = 1.0; + * + * The longest path of computation is the one leading to result.z, + * consisting of 5 operations. This implementation of LIT takes + * 5 slots, if the subsequent optimization passes are clever enough + * to pair instructions correctly. + */ +static void transform_LIT(struct radeon_transform_context* t, +	struct prog_instruction* inst) +{ +	static const GLfloat LitConst[4] = { -127.999999 }; + +	GLuint constant; +	GLuint constant_swizzle; +	GLuint temp; +	int needTemporary = 0; +	struct prog_src_register srctemp; + +	constant = _mesa_add_unnamed_constant(t->Program->Parameters, LitConst, 1, &constant_swizzle); + +	if (inst->DstReg.WriteMask != WRITEMASK_XYZW) { +		needTemporary = 1; +	} else if (inst->DstReg.File != PROGRAM_TEMPORARY) { +		// LIT is typically followed by DP3/DP4, so there's no point +		// in creating special code for this case +		needTemporary = 1; +	} + +	if (needTemporary) { +		temp = radeonFindFreeTemporary(t); +	} else { +		temp = inst->DstReg.Index; +	} +	srctemp = srcreg(PROGRAM_TEMPORARY, temp); + +	// tmp.x = max(0.0, Src.x); +	// tmp.y = max(0.0, Src.y); +	// tmp.w = clamp(Src.z, -128+eps, 128-eps); +	emit2(t->Program, OPCODE_MAX, +		dstregtmpmask(temp, WRITEMASK_XYW), +		inst->SrcReg[0], +		swizzle(srcreg(PROGRAM_CONSTANT, constant), +			SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3)); +	emit2(t->Program, OPCODE_MIN, +		dstregtmpmask(temp, WRITEMASK_Z), +		swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), +		negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle))); + +	// tmp.w = Pow(tmp.y, tmp.w) +	emit1(t->Program, OPCODE_LG2, +		dstregtmpmask(temp, WRITEMASK_W), +		swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); +	emit2(t->Program, OPCODE_MUL, +		dstregtmpmask(temp, WRITEMASK_W), +		swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), +		swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)); +	emit1(t->Program, OPCODE_EX2, +		dstregtmpmask(temp, WRITEMASK_W), +		swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); + +	// tmp.z = (tmp.x > 0) ? tmp.w : 0.0 +	emit3(t->Program, OPCODE_CMP, +		dstregtmpmask(temp, WRITEMASK_Z), +		negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), +		swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), +		builtin_zero); + +	// tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 +	emit1(t->Program, OPCODE_MOV, +		dstregtmpmask(temp, WRITEMASK_XYW), +		swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE)); + +	if (needTemporary) +		emit1(t->Program, OPCODE_MOV, inst->DstReg, srctemp); +} +  static void transform_POW(struct radeon_transform_context* t,  	struct prog_instruction* inst)  { @@ -249,13 +360,11 @@ static void transform_XPD(struct radeon_transform_context* t,   * no userData necessary.   *   * Eliminates the following ALU instructions: - *  ABS, DPH, FLR, POW, SGE, SLT, SUB, SWZ, XPD + *  ABS, DPH, FLR, LIT, POW, SGE, SLT, SUB, SWZ, XPD   * using:   *  MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP   *   * @note should be applicable to R300 and R500 fragment programs. - * - * @todo add LIT here as well?   */  GLboolean radeonTransformALU(struct radeon_transform_context* t,  	struct prog_instruction* inst, @@ -265,6 +374,7 @@ GLboolean radeonTransformALU(struct radeon_transform_context* t,  	case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE;  	case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE;  	case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE; +	case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE;  	case OPCODE_POW: transform_POW(t, inst); return GL_TRUE;  	case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE;  	case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE;  | 
