summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_arit.c152
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_arit.h14
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c103
3 files changed, 169 insertions, 100 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index 7b3932f522..aec3e297f4 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -736,42 +736,70 @@ const double lp_build_exp2_polynomial[] = {
};
-LLVMValueRef
-lp_build_exp2(struct lp_build_context *bld,
- LLVMValueRef x)
+void
+lp_build_exp2_approx(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef *p_exp2_int_part,
+ LLVMValueRef *p_frac_part,
+ LLVMValueRef *p_exp2)
{
const union lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
LLVMValueRef ipart;
- LLVMValueRef fpart, expipart, expfpart;
+ LLVMValueRef fpart, expipart, expfpart, res;
- /* TODO: optimize the constant case */
- if(LLVMIsConstant(x))
- debug_printf("%s: inefficient/imprecise constant arithmetic\n");
+ if(p_exp2_int_part || p_frac_part || p_exp2) {
+ /* TODO: optimize the constant case */
+ if(LLVMIsConstant(x))
+ debug_printf("%s: inefficient/imprecise constant arithmetic\n");
- assert(type.floating && type.width == 32);
+ assert(type.floating && type.width == 32);
- x = lp_build_min(bld, x, lp_build_const_uni(type, 129.0));
- x = lp_build_max(bld, x, lp_build_const_uni(type, -126.99999));
+ x = lp_build_min(bld, x, lp_build_const_uni(type, 129.0));
+ x = lp_build_max(bld, x, lp_build_const_uni(type, -126.99999));
- /* ipart = int(x - 0.5) */
- ipart = LLVMBuildSub(bld->builder, x, lp_build_const_uni(type, 0.5f), "");
- ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, "");
+ /* ipart = int(x - 0.5) */
+ ipart = LLVMBuildSub(bld->builder, x, lp_build_const_uni(type, 0.5f), "");
+ ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, "");
- /* fpart = x - ipart */
- fpart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, "");
- fpart = LLVMBuildSub(bld->builder, x, fpart, "");
+ /* fpart = x - ipart */
+ fpart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, "");
+ fpart = LLVMBuildSub(bld->builder, x, fpart, "");
+ }
+
+ if(p_exp2_int_part || p_exp2) {
+ /* expipart = (float) (1 << ipart) */
+ expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_uni(type, 127), "");
+ expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_uni(type, 23), "");
+ expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, "");
+ }
+
+ if(p_exp2) {
+ expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
+ Elements(lp_build_exp2_polynomial));
+
+ res = LLVMBuildMul(bld->builder, expipart, expfpart, "");
+ }
- /* expipart = (float) (1 << ipart) */
- expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_uni(type, 127), "");
- expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_uni(type, 23), "");
- expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, "");
+ if(p_exp2_int_part)
+ *p_exp2_int_part = expipart;
- expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
- Elements(lp_build_exp2_polynomial));
+ if(p_frac_part)
+ *p_frac_part = fpart;
- return LLVMBuildMul(bld->builder, expipart, expfpart, "");
+ if(p_exp2)
+ *p_exp2 = res;
+}
+
+
+LLVMValueRef
+lp_build_exp2(struct lp_build_context *bld,
+ LLVMValueRef x)
+{
+ LLVMValueRef res;
+ lp_build_exp2_approx(bld, x, NULL, NULL, &res);
+ return res;
}
@@ -798,9 +826,12 @@ const double lp_build_log2_polynomial[] = {
/**
* See http://www.devmaster.net/forums/showthread.php?p=43580
*/
-LLVMValueRef
-lp_build_log2(struct lp_build_context *bld,
- LLVMValueRef x)
+void
+lp_build_log2_approx(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef *p_exp,
+ LLVMValueRef *p_floor_log2,
+ LLVMValueRef *p_log2)
{
const union lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
@@ -810,34 +841,63 @@ lp_build_log2(struct lp_build_context *bld,
LLVMValueRef mantmask = lp_build_int_const_uni(type, 0x007fffff);
LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type);
- LLVMValueRef i = LLVMBuildBitCast(bld->builder, x, int_vec_type, "");
-
+ LLVMValueRef i;
LLVMValueRef exp;
LLVMValueRef mant;
+ LLVMValueRef logexp;
LLVMValueRef logmant;
+ LLVMValueRef res;
- /* TODO: optimize the constant case */
- if(LLVMIsConstant(x))
- debug_printf("%s: inefficient/imprecise constant arithmetic\n");
+ if(p_exp || p_floor_log2 || p_log2) {
+ /* TODO: optimize the constant case */
+ if(LLVMIsConstant(x))
+ debug_printf("%s: inefficient/imprecise constant arithmetic\n");
- assert(type.floating && type.width == 32);
+ assert(type.floating && type.width == 32);
- /* exp = (float) exponent(x) */
- exp = LLVMBuildAnd(bld->builder, i, expmask, "");
- exp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_uni(type, 23), "");
- exp = LLVMBuildSub(bld->builder, exp, lp_build_int_const_uni(type, 127), "");
- exp = LLVMBuildSIToFP(bld->builder, exp, vec_type, "");
+ i = LLVMBuildBitCast(bld->builder, x, int_vec_type, "");
- /* mant = (float) mantissa(x) */
- mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
- mant = LLVMBuildOr(bld->builder, mant, one, "");
- mant = LLVMBuildSIToFP(bld->builder, mant, vec_type, "");
+ /* exp = (float) exponent(x) */
+ exp = LLVMBuildAnd(bld->builder, i, expmask, "");
+ }
+
+ if(p_floor_log2 || p_log2) {
+ logexp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_uni(type, 23), "");
+ logexp = LLVMBuildSub(bld->builder, logexp, lp_build_int_const_uni(type, 127), "");
+ logexp = LLVMBuildSIToFP(bld->builder, logexp, vec_type, "");
+ }
- logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial,
- Elements(lp_build_log2_polynomial));
+ if(p_log2) {
+ /* mant = (float) mantissa(x) */
+ mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
+ mant = LLVMBuildOr(bld->builder, mant, one, "");
+ mant = LLVMBuildSIToFP(bld->builder, mant, vec_type, "");
- /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
- logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildMul(bld->builder, mant, bld->one, ""), "");
+ logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial,
+ Elements(lp_build_log2_polynomial));
+
+ /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
+ logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildMul(bld->builder, mant, bld->one, ""), "");
+
+ res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
+ }
- return LLVMBuildAdd(bld->builder, logmant, exp, "");
+ if(p_exp)
+ *p_exp = exp;
+
+ if(p_floor_log2)
+ *p_floor_log2 = logexp;
+
+ if(p_log2)
+ *p_log2 = res;
+}
+
+
+LLVMValueRef
+lp_build_log2(struct lp_build_context *bld,
+ LLVMValueRef x)
+{
+ LLVMValueRef res;
+ lp_build_log2_approx(bld, x, NULL, NULL, &res);
+ return res;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
index 9f8fccb0d4..fc8cb25966 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
@@ -126,4 +126,18 @@ LLVMValueRef
lp_build_log2(struct lp_build_context *bld,
LLVMValueRef a);
+void
+lp_build_exp2_approx(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef *p_exp2_int_part,
+ LLVMValueRef *p_frac_part,
+ LLVMValueRef *p_exp2);
+
+void
+lp_build_log2_approx(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef *p_exp,
+ LLVMValueRef *p_floor_log2,
+ LLVMValueRef *p_log2);
+
#endif /* !LP_BLD_ARIT_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index 48eb771486..c9143ebfe4 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -521,10 +521,7 @@ emit_instruction(
tmp2 = FETCH( bld, *inst, 0, CHAN_W );
tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
tmp0 = FETCH( bld, *inst, 0, CHAN_X );
- sse_xorps(
- bld,
- make_xmm( 2 ),
- make_xmm( 2 ) );
+ tmp2 = bld->base.zero;
sse_cmpps(
bld,
make_xmm( 2 ),
@@ -560,34 +557,31 @@ emit_instruction(
}
break;
-#if 0
case TGSI_OPCODE_EXP:
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- tmp0 = FETCH( bld, *inst, 0, CHAN_X );
- if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
- IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- tmp1 = tmp0;
- emit_flr( bld, 2, 1 );
- /* dst.x = ex2(floor(src.x)) */
- if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
- tmp2 = tmp1;
- tmp2 = lp_build_exp2( &bld->base, tmp2);
- STORE( bld, *inst, 0, CHAN_X, tmp2);
- }
- /* dst.y = src.x - floor(src.x) */
- if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- tmp2 = tmp0;
- tmp2 = lp_build_sub( &bld->base, tmp2, tmp1);
- STORE( bld, *inst, 0, CHAN_Y, tmp2);
- }
- }
- /* dst.z = ex2(src.x) */
- if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- tmp0 = lp_build_exp2( &bld->base, tmp0);
- STORE( bld, *inst, 0, CHAN_Z, tmp0);
- }
+ LLVMValueRef *p_exp2_int_part = NULL;
+ LLVMValueRef *p_frac_part = NULL;
+ LLVMValueRef *p_exp2 = NULL;
+
+ src0 = FETCH( bld, *inst, 0, CHAN_X );
+
+ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ))
+ p_exp2_int_part = &tmp0;
+ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ))
+ p_frac_part = &tmp1;
+ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ))
+ p_exp2 = &tmp2;
+
+ lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
+
+ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ))
+ STORE( bld, *inst, 0, CHAN_X, tmp0);
+ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ))
+ STORE( bld, *inst, 0, CHAN_Y, tmp1);
+ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ))
+ STORE( bld, *inst, 0, CHAN_Z, tmp2);
}
/* dst.w = 1.0 */
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {
@@ -595,44 +589,45 @@ emit_instruction(
STORE( bld, *inst, 0, CHAN_W, tmp0);
}
break;
-#endif
-#if 0
case TGSI_OPCODE_LOG:
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- tmp0 = FETCH( bld, *inst, 0, CHAN_X );
- tmp0 = lp_build_abs( &bld->base, tmp0 );
- tmp1 = tmp0;
- tmp1 = lp_build_log2( &bld->base, tmp1);
- /* dst.z = lg2(abs(src.x)) */
- if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- STORE( bld, *inst, 0, CHAN_Z, tmp1);
- }
- if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
- IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- emit_flr( bld, 2, 1 );
- /* dst.x = floor(lg2(abs(src.x))) */
- if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( bld, *inst, 0, CHAN_X, tmp1);
- }
- /* dst.x = abs(src)/ex2(floor(lg2(abs(src.x)))) */
- if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- tmp1 = lp_build_exp2( &bld->base, tmp1);
- emit_rcp( bld, 1, 1 );
- tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
- STORE( bld, *inst, 0, CHAN_Y, tmp0);
- }
+ LLVMValueRef *p_floor_log2;
+ LLVMValueRef *p_exp;
+ LLVMValueRef *p_log2;
+
+ src0 = FETCH( bld, *inst, 0, CHAN_X );
+ src0 = lp_build_abs( &bld->base, src0 );
+
+ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ))
+ p_floor_log2 = &tmp0;
+ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ))
+ p_exp = &tmp1;
+ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ))
+ p_log2 = &tmp2;
+
+ lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2);
+
+ /* dst.x = floor(lg2(abs(src.x))) */
+ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ))
+ STORE( bld, *inst, 0, CHAN_X, tmp0);
+ /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
+ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+ tmp1 = lp_build_div( &bld->base, src0, tmp1);
+ STORE( bld, *inst, 0, CHAN_Y, tmp1);
}
+ /* dst.z = lg2(abs(src.x)) */
+ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ))
+ STORE( bld, *inst, 0, CHAN_Z, tmp2);
}
/* dst.w = 1.0 */
if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {
- emit_tempf( bld, 0, TEMP_ONE_I, TEMP_ONE_C );
+ tmp0 = bld->base.one;
STORE( bld, *inst, 0, CHAN_W, tmp0);
}
break;
-#endif
case TGSI_OPCODE_MUL:
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {