From d28e8528958b472c821e3b72a28c22f337aba66e Mon Sep 17 00:00:00 2001 From: Gary Wong Date: Sat, 13 Dec 2008 14:15:33 -0700 Subject: i965: Finish OPCODE_NOISEn instructions. Added missing OPCODE_NOISE4, and use BRW_REGISTER_TYPE_D (instead of _UD) in the initial RNDD instructions (which avoids saturating negative inputs to 0). --- src/mesa/drivers/dri/i965/brw_wm_glsl.c | 451 +++++++++++++++++++++++++++++++- 1 file changed, 437 insertions(+), 14 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index cb728190f5..baecfdcb79 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -1095,7 +1095,7 @@ static void noise1_sub( struct brw_wm_compile *c ) { /* Arrange the two end coordinates into scalars (itmp0/itmp1) to be hashed. Also compute the remainder (offset within the unit length), interleaved to reduce register dependency penalties. */ - brw_RNDD( p, itmp[ 0 ], param ); + brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param ); brw_FRC( p, param, param ); brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 1 ) ); brw_MOV( p, itmp[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */ @@ -1220,8 +1220,8 @@ static void noise2_sub( struct brw_wm_compile *c ) { /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to be hashed. Also compute the remainders (offsets within the unit square), interleaved to reduce register dependency penalties. */ - brw_RNDD( p, itmp[ 0 ], param0 ); - brw_RNDD( p, itmp[ 1 ], param1 ); + brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 ); + brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 ); brw_FRC( p, param0, param0 ); brw_FRC( p, param1, param1 ); brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */ @@ -1400,21 +1400,19 @@ static void noise3_sub( struct brw_wm_compile *c ) { /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to be hashed. Also compute the remainders (offsets within the unit cube), interleaved to reduce register dependency penalties. */ - brw_RNDD( p, itmp[ 0 ], param0 ); - brw_RNDD( p, itmp[ 1 ], param1 ); - brw_RNDD( p, itmp[ 2 ], param2 ); - brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBC8F ) ); /* constant used later */ - brw_MOV( p, itmp[ 5 ], brw_imm_ud( 0xD0BD ) ); /* constant used later */ - brw_MOV( p, itmp[ 6 ], brw_imm_ud( 0x9B93 ) ); /* constant used later */ + brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 ); + brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 ); + brw_RNDD( p, retype( itmp[ 2 ], BRW_REGISTER_TYPE_D ), param2 ); brw_FRC( p, param0, param0 ); brw_FRC( p, param1, param1 ); brw_FRC( p, param2, param2 ); /* Since we now have only 16 bits of precision in the hash, we must be more careful about thorough mixing to maintain entropy as we squash the input vector into a small scalar. */ - brw_MUL( p, brw_acc_reg(), itmp[ 4 ], itmp[ 0 ] ); - brw_MAC( p, brw_acc_reg(), itmp[ 5 ], itmp[ 1 ] ); - brw_MAC( p, itmp[ 0 ], itmp[ 6 ], itmp[ 2 ] ); + brw_MUL( p, brw_null_reg(), low_words( itmp[ 0 ] ), brw_imm_uw( 0xBC8F ) ); + brw_MAC( p, brw_null_reg(), low_words( itmp[ 1 ] ), brw_imm_uw( 0xD0BD ) ); + brw_MAC( p, low_words( itmp[ 0 ] ), low_words( itmp[ 2 ] ), + brw_imm_uw( 0x9B93 ) ); brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ), brw_imm_uw( 0xBC8F ) ); @@ -1668,6 +1666,430 @@ static void emit_noise3( struct brw_wm_compile *c, release_tmps( c, mark ); } +/* For the four-dimensional case, the little micro-optimisation benefits + we obtain by unrolling all the loops aren't worth the massive bloat it + now causes. Instead, we loop twice around performing a similar operation + to noise3, once for the w=0 cube and once for the w=1, with a bit more + code to glue it all together. */ +static void noise4_sub( struct brw_wm_compile *c ) { + + struct brw_compile *p = &c->func; + struct brw_reg param[ 4 ], + x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */ + w0, /* noise for the w=0 cube */ + floors[ 2 ], /* integer coordinates of base corner of hypercube */ + interp[ 4 ], /* interpolation coefficients */ + t, tmp[ 8 ], /* float temporaries */ + itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */ + wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */ + int i, j; + int mark = mark_tmps( c ); + GLuint loop, origin; + + x0y0 = alloc_tmp( c ); + x0y1 = alloc_tmp( c ); + x1y0 = alloc_tmp( c ); + x1y1 = alloc_tmp( c ); + t = alloc_tmp( c ); + w0 = alloc_tmp( c ); + floors[ 0 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD ); + floors[ 1 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD ); + + for( i = 0; i < 4; i++ ) { + param[ i ] = lookup_tmp( c, mark - 5 + i ); + interp[ i ] = alloc_tmp( c ); + } + + for( i = 0; i < 8; i++ ) { + tmp[ i ] = alloc_tmp( c ); + itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD ); + wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 ); + } + + brw_set_access_mode( p, BRW_ALIGN_1 ); + + /* We only want 16 bits of precision from the integral part of each + co-ordinate, but unfortunately the RNDD semantics would saturate + at 16 bits if we performed the operation directly to a 16-bit + destination. Therefore, we round to 32-bit temporaries where + appropriate, and then store only the lower 16 bits. */ + brw_RNDD( p, retype( floors[ 0 ], BRW_REGISTER_TYPE_D ), param[ 0 ] ); + brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param[ 1 ] ); + brw_RNDD( p, retype( floors[ 1 ], BRW_REGISTER_TYPE_D ), param[ 2 ] ); + brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param[ 3 ] ); + brw_MOV( p, high_words( floors[ 0 ] ), low_words( itmp[ 0 ] ) ); + brw_MOV( p, high_words( floors[ 1 ] ), low_words( itmp[ 1 ] ) ); + + /* Modify the flag register here, because the side effect is useful + later (see below). We know for certain that all flags will be + cleared, since the FRC instruction cannot possibly generate + negative results. Even for exceptional inputs (infinities, denormals, + NaNs), the architecture guarantees that the L conditional is false. */ + brw_set_conditionalmod( p, BRW_CONDITIONAL_L ); + brw_FRC( p, param[ 0 ], param[ 0 ] ); + brw_set_predicate_control( p, BRW_PREDICATE_NONE ); + for( i = 1; i < 4; i++ ) + brw_FRC( p, param[ i ], param[ i ] ); + + /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first + of all. */ + for( i = 0; i < 4; i++ ) + brw_MUL( p, interp[ i ], param[ i ], brw_imm_f( 6.0 ) ); + for( i = 0; i < 4; i++ ) + brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( -15.0 ) ); + for( i = 0; i < 4; i++ ) + brw_MUL( p, interp[ i ], interp[ i ], param[ i ] ); + for( i = 0; i < 4; i++ ) + brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( 10.0 ) ); + for( j = 0; j < 3; j++ ) + for( i = 0; i < 4; i++ ) + brw_MUL( p, interp[ i ], interp[ i ], param[ i ] ); + + /* Mark the current address, as it will be a jump destination. The + following code will be executed twice: first, with the flag + register clear indicating the w=0 case, and second with flags + set for w=1. */ + loop = p->nr_insn; + + /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to + be hashed. Since we have only 16 bits of precision in the hash, we + must be careful about thorough mixing to maintain entropy as we + squash the input vector into a small scalar. */ + brw_MUL( p, brw_null_reg(), low_words( floors[ 0 ] ), + brw_imm_uw( 0xBC8F ) ); + brw_MAC( p, brw_null_reg(), high_words( floors[ 0 ] ), + brw_imm_uw( 0xD0BD ) ); + brw_MAC( p, brw_null_reg(), low_words( floors[ 1 ] ), + brw_imm_uw( 0x9B93 ) ); + brw_MAC( p, low_words( itmp[ 0 ] ), high_words( floors[ 1 ] ), + brw_imm_uw( 0xA359 ) ); + brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ), + brw_imm_uw( 0xBC8F ) ); + + /* Temporarily disable the execution mask while we work with ExecSize=16 + channels (the mask is set for ExecSize=8 and is probably incorrect). + Although this might cause execution of unwanted channels, the code + writes only to temporary registers and has no side effects, so + disabling the mask is harmless. */ + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) ); + brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) ); + brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) ); + + /* We're now ready to perform the hashing. The eight hashes are + interleaved for performance. The hash function used is + designed to rapidly achieve avalanche and require only 16x16 + bit multiplication, and 8-bit swizzles (which we get for + free). */ + for( i = 0; i < 4; i++ ) + brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), + odd_bytes( wtmp[ i ] ) ); + for( i = 0; i < 4; i++ ) + brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), + odd_bytes( wtmp[ i ] ) ); + brw_pop_insn_state( p ); + + /* Now we want to initialise the four rear gradients based on the + hashes. Format conversion from signed integer to float leaves + everything scaled too high by a factor of pow( 2, 15 ), but + we correct for that right at the end. */ + /* x component */ + brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) ); + brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) ); + brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) ); + brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) ); + brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) ); + brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, x1y0, x1y0, t ); + brw_MUL( p, x1y1, x1y1, t ); + brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) ); + brw_MUL( p, x0y0, x0y0, param[ 0 ] ); + brw_MUL( p, x0y1, x0y1, param[ 0 ] ); + + /* y component */ + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) ); + brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + /* prepare t for the w component (used below): w the first time through + the loop; w - 1 the second time) */ + brw_set_predicate_control( p, BRW_PREDICATE_NORMAL ); + brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) ); + p->current->header.predicate_inverse = 1; + brw_MOV( p, t, param[ 3 ] ); + p->current->header.predicate_inverse = 0; + brw_set_predicate_control( p, BRW_PREDICATE_NONE ); + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] ); + + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + + /* z component */ + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) ); + brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 2 ] ); + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param[ 2 ] ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 2 ] ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param[ 2 ] ); + + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + + /* w component */ + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); + + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) ); + + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + + /* Here we interpolate in the y dimension... */ + brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); + brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); + brw_MUL( p, x0y1, x0y1, interp[ 1 ] ); + brw_MUL( p, x1y1, x1y1, interp[ 1 ] ); + brw_ADD( p, x0y0, x0y0, x0y1 ); + brw_ADD( p, x1y0, x1y0, x1y1 ); + + /* And now in x. Leave the result in tmp[ 0 ] (see below)... */ + brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); + brw_MUL( p, x1y0, x1y0, interp[ 0 ] ); + brw_ADD( p, tmp[ 0 ], x0y0, x1y0 ); + + /* Now do the same thing for the front four gradients... */ + /* x component */ + brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) ); + brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) ); + brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) ); + brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) ); + brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, x1y0, x1y0, t ); + brw_MUL( p, x1y1, x1y1, t ); + brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) ); + brw_MUL( p, x0y0, x0y0, param[ 0 ] ); + brw_MUL( p, x0y1, x0y1, param[ 0 ] ); + + /* y component */ + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) ); + brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + brw_ADD( p, t, param[ 2 ], brw_imm_f( -1.0 ) ); + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] ); + + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + + /* z component */ + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) ); + brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + /* prepare t for the w component (used below): w the first time through + the loop; w - 1 the second time) */ + brw_set_predicate_control( p, BRW_PREDICATE_NORMAL ); + brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) ); + p->current->header.predicate_inverse = 1; + brw_MOV( p, t, param[ 3 ] ); + p->current->header.predicate_inverse = 0; + brw_set_predicate_control( p, BRW_PREDICATE_NONE ); + + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + + /* w component */ + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); + + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + + /* Interpolate in the y dimension: */ + brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); + brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); + brw_MUL( p, x0y1, x0y1, interp[ 1 ] ); + brw_MUL( p, x1y1, x1y1, interp[ 1 ] ); + brw_ADD( p, x0y0, x0y0, x0y1 ); + brw_ADD( p, x1y0, x1y0, x1y1 ); + + /* And now in x. The rear face is in tmp[ 0 ] (see above), so this + time put the front face in tmp[ 1 ] and we're nearly there... */ + brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); + brw_MUL( p, x1y0, x1y0, interp[ 0 ] ); + brw_ADD( p, tmp[ 1 ], x0y0, x1y0 ); + + /* Another interpolation, in the z dimension: */ + brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) ); + brw_MUL( p, tmp[ 1 ], tmp[ 1 ], interp[ 2 ] ); + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] ); + + /* Exit the loop if we've computed both cubes... */ + origin = p->nr_insn; + brw_push_insn_state( p ); + brw_set_predicate_control( p, BRW_PREDICATE_NORMAL ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_ADD( p, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) ); + brw_pop_insn_state( p ); + + /* Save the result for the w=0 case, and increment the w coordinate: */ + brw_MOV( p, w0, tmp[ 0 ] ); + brw_ADD( p, high_words( floors[ 1 ] ), high_words( floors[ 1 ] ), + brw_imm_uw( 1 ) ); + + /* Loop around for the other cube. Explicitly set the flag register + (unfortunately we must spend an extra instruction to do this: we + can't rely on a side effect of the previous MOV or ADD because + conditional modifiers which are normally true might be false in + exceptional circumstances, e.g. given a NaN input; the add to + brw_ip_reg() is not suitable because the IP is not an 8-vector). */ + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_MOV( p, brw_flag_reg(), brw_imm_uw( 0xFF ) ); + brw_ADD( p, brw_ip_reg(), brw_ip_reg(), + brw_imm_d( ( loop - p->nr_insn ) << 4 ) ); + brw_pop_insn_state( p ); + + /* Patch the previous conditional branch now that we know the + destination address. */ + brw_set_src1( p->store + origin, + brw_imm_d( ( p->nr_insn - origin ) << 4 ) ); + + /* The very last interpolation. */ + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], negate( w0 ) ); + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], interp[ 3 ] ); + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], w0 ); + + /* scale by pow( 2, -15 ), as described above */ + brw_MUL( p, param[ 0 ], tmp[ 0 ], brw_imm_f( 0.000030517578125 ) ); + + release_tmps( c, mark ); +} + +static void emit_noise4( struct brw_wm_compile *c, + struct prog_instruction *inst ) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, src2, src3, param0, param1, param2, param3, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + int mark = mark_tmps( c ); + + assert( mark == 0 ); + + src0 = get_src_reg( c, inst->SrcReg, 0, 1 ); + src1 = get_src_reg( c, inst->SrcReg, 1, 1 ); + src2 = get_src_reg( c, inst->SrcReg, 2, 1 ); + src3 = get_src_reg( c, inst->SrcReg, 3, 1 ); + + param0 = alloc_tmp( c ); + param1 = alloc_tmp( c ); + param2 = alloc_tmp( c ); + param3 = alloc_tmp( c ); + + brw_MOV( p, param0, src0 ); + brw_MOV( p, param1, src1 ); + brw_MOV( p, param2, src2 ); + brw_MOV( p, param3, src3 ); + + invoke_subroutine( c, SUB_NOISE4, noise4_sub ); + + /* Fill in the result: */ + brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); + for (i = 0 ; i < 4; i++) { + if (mask & (1<SaturateMode == SATURATE_ZERO_ONE ) + brw_set_saturate( p, 0 ); + + release_tmps( c, mark ); +} + static void emit_wpos_xy(struct brw_wm_compile *c, struct prog_instruction *inst) { @@ -1996,8 +2418,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_NOISE3: emit_noise3(c, inst); break; - /* case OPCODE_NOISE4: */ - /* not yet implemented */ + case OPCODE_NOISE4: + emit_noise4(c, inst); + break; case OPCODE_TEX: emit_tex(c, inst); break; -- cgit v1.2.3 From 71e208bafe9ca5e1c189fae3f251cc84034e5959 Mon Sep 17 00:00:00 2001 From: Dan Nicholson Date: Mon, 24 Nov 2008 11:01:57 -0800 Subject: Add more package metadata to the pkg-config files The pkg-config files have been filled in more thoroughly to allow users to use mesa more effectively. By adding metadata to Requires.private, Libs.private and Cflags, we can ensure that all the libraries and headers will be found in all situations. However, the full substitutions are only done when using the configure script. This also fixes the glu pkg-config file to account for using GL or OSMesa. Fixes bug 18161. --- configs/autoconf.in | 17 +++++++++++++++++ configs/default | 16 ++++++++++++++++ configure.ac | 41 +++++++++++++++++++++++++++++++++++++++++ src/glu/Makefile | 7 ++++++- src/glu/glu.pc.in | 8 +++++--- src/glut/glx/Makefile | 5 ++++- src/glut/glx/glut.pc.in | 4 +++- src/glut/mini/Makefile | 5 ++++- src/glut/mini/glut.pc.in | 4 +++- src/glw/Makefile | 5 ++++- src/glw/glw.pc.in | 4 +++- src/mesa/Makefile | 5 ++++- src/mesa/drivers/dri/Makefile | 3 ++- src/mesa/drivers/dri/dri.pc.in | 1 + src/mesa/gl.pc.in | 5 +++-- 15 files changed, 116 insertions(+), 14 deletions(-) (limited to 'src/mesa/drivers') diff --git a/configs/autoconf.in b/configs/autoconf.in index a3eaed5c9d..afd9f6a0c7 100644 --- a/configs/autoconf.in +++ b/configs/autoconf.in @@ -105,3 +105,20 @@ DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@ # Where libGL will look for DRI hardware drivers DRI_DRIVER_SEARCH_DIR = $(DRI_DRIVER_INSTALL_DIR) + +# pkg-config substitutions +GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@ +GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@ +GL_PC_CFLAGS = @GL_PC_CFLAGS@ +DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@ +GLU_PC_REQ = @GLU_PC_REQ@ +GLU_PC_REQ_PRIV = @GLU_PC_REQ_PRIV@ +GLU_PC_LIB = @GLU_PC_LIB@ +GLU_PC_LIB_PRIV = @GLU_PC_LIB_PRIV@ +GLU_PC_CFLAGS = @GLU_PC_CFLAGS@ +GLUT_PC_REQ_PRIV = @GLUT_PC_REQ_PRIV@ +GLUT_PC_LIB_PRIV = @GLUT_PC_LIB_PRIV@ +GLUT_PC_CFLAGS = @GLUT_PC_CFLAGS@ +GLW_PC_REQ_PRIV = @GLW_PC_REQ_PRIV@ +GLW_PC_LIB_PRIV = @GLW_PC_LIB_PRIV@ +GLW_PC_CFLAGS = @GLW_PC_CFLAGS@ diff --git a/configs/default b/configs/default index 21fa0d7f9a..52ad61cb92 100644 --- a/configs/default +++ b/configs/default @@ -100,3 +100,19 @@ DRI_DRIVER_INSTALL_DIR = $(INSTALL_LIB_DIR)/dri # Where libGL will look for DRI hardware drivers DRI_DRIVER_SEARCH_DIR = $(DRI_DRIVER_INSTALL_DIR) +# pkg-config substitutions +GL_PC_REQ_PRIV = +GL_PC_LIB_PRIV = +GL_PC_CFLAGS = +DRI_PC_REQ_PRIV = +GLU_PC_REQ = gl +GLU_PC_REQ_PRIV = +GLU_PC_LIB = +GLU_PC_LIB_PRIV = +GLU_PC_CFLAGS = +GLUT_PC_REQ_PRIV = +GLUT_PC_LIB_PRIV = +GLUT_PC_CFLAGS = +GLW_PC_REQ_PRIV = +GLW_PC_LIB_PRIV = +GLW_PC_CFLAGS = diff --git a/configure.ac b/configure.ac index 66e080eb8e..8026d9dca1 100644 --- a/configure.ac +++ b/configure.ac @@ -507,14 +507,18 @@ case "$mesa_driver" in xlib) if test "$x11_pkgconfig" = yes; then PKG_CHECK_MODULES([XLIBGL], [x11 xext]) + GL_PC_REQ_PRIV="x11 xext" X11_INCLUDES="$X11_INCLUDES $XLIBGL_CFLAGS" GL_LIB_DEPS="$XLIBGL_LIBS" else # should check these... X11_INCLUDES="$X11_INCLUDES $X_CFLAGS" GL_LIB_DEPS="$X_LIBS -lX11 -lXext" + GL_PC_LIB_PRIV="$GL_LIB_DEPS" + GL_PC_CFLAGS="$X11_INCLUDES" fi GL_LIB_DEPS="$GL_LIB_DEPS $SELINUX_LIBS -lm -lpthread $OS_LIBS" + GL_PC_LIB_PRIV="$GL_PC_LIB_PRIV $SELINUX_LIBS -lm -lpthread $OS_LIBS" # if static, move the external libraries to the programs # and empty the libraries for libGL @@ -532,6 +536,8 @@ dri) # Check for libdrm PKG_CHECK_MODULES([LIBDRM], [libdrm >= $LIBDRM_REQUIRED]) PKG_CHECK_MODULES([DRI2PROTO], [dri2proto >= $DRI2PROTO_REQUIRED]) + GL_PC_REQ_PRIV="libdrm >= $LIBDRM_REQUIRED dri2proto >= $DRI2PROTO_REQUIRED" + DRI_PC_REQ_PRIV="libdrm >= $LIBDRM_REQUIRED" # find the DRI deps for libGL if test "$x11_pkgconfig" = yes; then @@ -542,16 +548,20 @@ dri) fi PKG_CHECK_MODULES([DRIGL], [$dri_modules]) + GL_PC_REQ_PRIV="$GL_PC_REQ_PRIV $dri_modules" X11_INCLUDES="$X11_INCLUDES $DRIGL_CFLAGS" GL_LIB_DEPS="$DRIGL_LIBS" else # should check these... X11_INCLUDES="$X11_INCLUDES $X_CFLAGS" GL_LIB_DEPS="$X_LIBS -lX11 -lXext -lXxf86vm -lXdamage -lXfixes" + GL_PC_LIB_PRIV="$GL_LIB_DEPS" + GL_PC_CFLAGS="$X11_INCLUDES" # XCB can only be used from pkg-config if test "$enable_xcb" = yes; then PKG_CHECK_MODULES([XCB],[x11-xcb xcb-glx]) + GL_PC_REQ_PRIV="$GL_PC_REQ_PRIV x11-xcb xcb-glx" X11_INCLUDES="$X11_INCLUDES $XCB_CFLAGS" GL_LIB_DEPS="$GL_LIB_DEPS $XCB_LIBS" fi @@ -559,6 +569,7 @@ dri) # need DRM libs, -lpthread, etc. GL_LIB_DEPS="$GL_LIB_DEPS $LIBDRM_LIBS -lm -lpthread $DLOPEN_LIBS $OS_LIBS" + GL_PC_LIB_PRIV="-lm -lpthread $DLOPEN_LIBS $OS_LIBS" ;; osmesa) # No libGL for osmesa @@ -566,6 +577,10 @@ osmesa) ;; esac AC_SUBST([GL_LIB_DEPS]) +AC_SUBST([GL_PC_REQ_PRIV]) +AC_SUBST([GL_PC_LIB_PRIV]) +AC_SUBST([GL_PC_CFLAGS]) +AC_SUBST([DRI_PC_REQ_PRIV]) dnl dnl More X11 setup @@ -825,6 +840,8 @@ if test "x$enable_glu" = xyes; then # Link libGLU to libOSMesa instead of libGL GLU_LIB_DEPS="" + GLU_PC_LIB='-L$(INSTALL_LIB_DIR) -l$(OSMESA_LIB)' + GLU_PC_CFLAGS='-I$(INSTALL_INC_DIR)' if test "$enable_static" = no; then GLU_MESA_DEPS='-l$(OSMESA_LIB)' else @@ -833,6 +850,8 @@ if test "x$enable_glu" = xyes; then ;; *) # If static, empty GLU_LIB_DEPS and add libs for programs to link + GLU_PC_REQ="gl" + GLU_PC_LIB_PRIV="-lm" if test "$enable_static" = no; then GLU_LIB_DEPS="-lm" GLU_MESA_DEPS='-l$(GL_LIB)' @@ -847,8 +866,14 @@ fi if test "$enable_static" = no; then GLU_LIB_DEPS="$GLU_LIB_DEPS $OS_CPLUSPLUS_LIBS" fi +GLU_PC_LIB_PRIV="$GLU_PC_LIB_PRIV $OS_CPLUSPLUS_LIBS" AC_SUBST([GLU_LIB_DEPS]) AC_SUBST([GLU_MESA_DEPS]) +AC_SUBST([GLU_PC_REQ]) +AC_SUBST([GLU_PC_REQ_PRIV]) +AC_SUBST([GLU_PC_LIB]) +AC_SUBST([GLU_PC_LIB_PRIV]) +AC_SUBST([GLU_PC_CFLAGS]) dnl dnl GLw configuration @@ -873,10 +898,13 @@ if test "x$enable_glw" = xyes; then SRC_DIRS="$SRC_DIRS glw" if test "$x11_pkgconfig" = yes; then PKG_CHECK_MODULES([GLW],[x11 xt]) + GLW_PC_REQ_PRIV="x11 xt" GLW_LIB_DEPS="$GLW_LIBS" else # should check these... GLW_LIB_DEPS="$X_LIBS -lXt -lX11" + GLW_PC_LIB_PRIV="$GLW_LIB_DEPS" + GLW_PC_CFLAGS="$X11_INCLUDES" fi GLW_SOURCES="GLwDrawA.c" @@ -895,9 +923,12 @@ if test "x$enable_glw" = xyes; then fi # MOTIF_LIBS is prepended to GLW_LIB_DEPS since Xm needs Xt/X11 GLW_LIB_DEPS="$MOTIF_LIBS $GLW_LIB_DEPS" + GLW_PC_LIB_PRIV="$MOTIF_LIBS $GLW_PC_LIB_PRIV" + GLW_PC_CFLAGS="$MOTIF_CFLAGS $GLW_PC_CFLAGS" fi # If static, empty GLW_LIB_DEPS and add libs for programs to link + GLW_PC_LIB_PRIV="$GLW_PC_LIB_PRIV $OS_LIBS" if test "$enable_static" = no; then GLW_MESA_DEPS='-l$(GL_LIB)' GLW_LIB_DEPS="$GLW_LIB_DEPS $OS_LIBS" @@ -911,6 +942,9 @@ AC_SUBST([GLW_LIB_DEPS]) AC_SUBST([GLW_MESA_DEPS]) AC_SUBST([GLW_SOURCES]) AC_SUBST([MOTIF_CFLAGS]) +AC_SUBST([GLW_PC_REQ_PRIV]) +AC_SUBST([GLW_PC_LIB_PRIV]) +AC_SUBST([GLW_PC_CFLAGS]) dnl dnl GLUT configuration @@ -945,12 +979,16 @@ if test "x$enable_glut" = xyes; then fi if test "$x11_pkgconfig" = yes; then PKG_CHECK_MODULES([GLUT],[x11 xmu xi]) + GLUT_PC_REQ_PRIV="x11 xmu xi" GLUT_LIB_DEPS="$GLUT_LIBS" else # should check these... GLUT_LIB_DEPS="$X_LIBS -lX11 -lXmu -lXi" + GLUT_PC_LIB_PRIV="$GLUT_LIB_DEPS" + GLUT_PC_CFLAGS="$X11_INCLUDES" fi GLUT_LIB_DEPS="$GLUT_LIB_DEPS -lm $OS_LIBS" + GLUT_PC_LIB_PRIV="$GLUT_PC_LIB_PRIV -lm $OS_LIBS" # If glut is available, we can build most programs if test "$with_demos" = yes; then @@ -969,6 +1007,9 @@ fi AC_SUBST([GLUT_LIB_DEPS]) AC_SUBST([GLUT_MESA_DEPS]) AC_SUBST([GLUT_CFLAGS]) +AC_SUBST([GLUT_PC_REQ_PRIV]) +AC_SUBST([GLUT_PC_LIB_PRIV]) +AC_SUBST([GLUT_PC_CFLAGS]) dnl dnl Program library dependencies diff --git a/src/glu/Makefile b/src/glu/Makefile index b025a90b67..4351d23599 100644 --- a/src/glu/Makefile +++ b/src/glu/Makefile @@ -18,7 +18,12 @@ pcedit = sed \ -e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \ -e 's,@INSTALL_LIB_DIR@,$(INSTALL_LIB_DIR),' \ -e 's,@INSTALL_INC_DIR@,$(INSTALL_INC_DIR),' \ - -e 's,@VERSION@,$(MESA_MAJOR).$(MESA_MINOR).$(MESA_TINY),' + -e 's,@VERSION@,$(MESA_MAJOR).$(MESA_MINOR).$(MESA_TINY),' \ + -e 's,@GLU_PC_REQ@,$(GLU_PC_REQ),' \ + -e 's,@GLU_PC_REQ_PRIV@,$(GLU_PC_REQ_PRIV),' \ + -e 's,@GLU_PC_LIB@,$(GLU_PC_LIB),' \ + -e 's,@GLU_PC_LIB_PRIV@,$(GLU_PC_LIB_PRIV),' \ + -e 's,@GLU_PC_CFLAGS@,$(GLU_PC_CFLAGS),' glu.pc: glu.pc.in $(pcedit) $< > $@ diff --git a/src/glu/glu.pc.in b/src/glu/glu.pc.in index 8606b9b222..2c9bd955b8 100644 --- a/src/glu/glu.pc.in +++ b/src/glu/glu.pc.in @@ -5,7 +5,9 @@ includedir=@INSTALL_INC_DIR@ Name: glu Description: Mesa OpenGL Utility library -Requires: gl +Requires: @GLU_PC_REQ@ +Requires.private: @GLU_PC_REQ_PRIV@ Version: @VERSION@ -Libs: -L${libdir} -lGLU -Cflags: -I${includedir} +Libs: -L${libdir} -lGLU @GLU_PC_LIB@ +Libs.private: @GLU_PC_LIB_PRIV@ +Cflags: -I${includedir} @GLU_PC_CFLAGS@ diff --git a/src/glut/glx/Makefile b/src/glut/glx/Makefile index 7f886f775a..c8f0097ebe 100644 --- a/src/glut/glx/Makefile +++ b/src/glut/glx/Makefile @@ -103,7 +103,10 @@ pcedit = sed \ -e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \ -e 's,@INSTALL_LIB_DIR@,$(INSTALL_LIB_DIR),' \ -e 's,@INSTALL_INC_DIR@,$(INSTALL_INC_DIR),' \ - -e 's,@VERSION@,$(GLUT_MAJOR).$(GLUT_MINOR).$(GLUT_TINY),' + -e 's,@VERSION@,$(GLUT_MAJOR).$(GLUT_MINOR).$(GLUT_TINY),' \ + -e 's,@GLUT_PC_REQ_PRIV@,$(GLUT_PC_REQ_PRIV),' \ + -e 's,@GLUT_PC_LIB_PRIV@,$(GLUT_PC_LIB_PRIV),' \ + -e 's,@GLUT_PC_CFLAGS@,$(GLUT_PC_CFLAGS),' glut.pc: glut.pc.in $(pcedit) $< > $@ diff --git a/src/glut/glx/glut.pc.in b/src/glut/glx/glut.pc.in index e8638fe148..ae0689d7e8 100644 --- a/src/glut/glx/glut.pc.in +++ b/src/glut/glx/glut.pc.in @@ -6,6 +6,8 @@ includedir=@INSTALL_INC_DIR@ Name: glut Description: Mesa OpenGL Utility Toolkit library Requires: gl glu +Requires.private: @GLUT_PC_REQ_PRIV@ Version: @VERSION@ Libs: -L${libdir} -lglut -Cflags: -I${includedir} +Libs.private: @GLUT_PC_LIB_PRIV@ +Cflags: -I${includedir} @GLUT_PC_CFLAGS@ diff --git a/src/glut/mini/Makefile b/src/glut/mini/Makefile index e47d09edb7..112a250bbd 100644 --- a/src/glut/mini/Makefile +++ b/src/glut/mini/Makefile @@ -77,7 +77,10 @@ pcedit = sed \ -e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \ -e 's,@INSTALL_LIB_DIR@,$(INSTALL_LIB_DIR),' \ -e 's,@INSTALL_INC_DIR@,$(INSTALL_INC_DIR),' \ - -e 's,@VERSION@,$(GLUT_MAJOR).$(GLUT_MINOR).$(GLUT_TINY),' + -e 's,@VERSION@,$(GLUT_MAJOR).$(GLUT_MINOR).$(GLUT_TINY),' \ + -e 's,@GLUT_PC_REQ_PRIV@,$(GLUT_PC_REQ_PRIV),' \ + -e 's,@GLUT_PC_LIB_PRIV@,$(GLUT_PC_LIB_PRIV),' \ + -e 's,@GLUT_PC_CFLAGS@,$(GLUT_PC_CFLAGS),' glut.pc: glut.pc.in $(pcedit) $< > $@ diff --git a/src/glut/mini/glut.pc.in b/src/glut/mini/glut.pc.in index e8638fe148..ae0689d7e8 100644 --- a/src/glut/mini/glut.pc.in +++ b/src/glut/mini/glut.pc.in @@ -6,6 +6,8 @@ includedir=@INSTALL_INC_DIR@ Name: glut Description: Mesa OpenGL Utility Toolkit library Requires: gl glu +Requires.private: @GLUT_PC_REQ_PRIV@ Version: @VERSION@ Libs: -L${libdir} -lglut -Cflags: -I${includedir} +Libs.private: @GLUT_PC_LIB_PRIV@ +Cflags: -I${includedir} @GLUT_PC_CFLAGS@ diff --git a/src/glw/Makefile b/src/glw/Makefile index 753c4b74d4..cf412b225e 100644 --- a/src/glw/Makefile +++ b/src/glw/Makefile @@ -30,7 +30,10 @@ pcedit = sed \ -e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \ -e 's,@INSTALL_LIB_DIR@,$(INSTALL_LIB_DIR),' \ -e 's,@INSTALL_INC_DIR@,$(INSTALL_INC_DIR),' \ - -e 's,@VERSION@,$(MAJOR).$(MINOR).$(TINY),' + -e 's,@VERSION@,$(MAJOR).$(MINOR).$(TINY),' \ + -e 's,@GLW_PC_REQ_PRIV@,$(GLW_PC_REQ_PRIV),' \ + -e 's,@GLW_PC_LIB_PRIV@,$(GLW_PC_LIB_PRIV),' \ + -e 's,@GLW_PC_CFLAGS@,$(GLW_PC_CFLAGS),' glw.pc: glw.pc.in $(pcedit) $< > $@ diff --git a/src/glw/glw.pc.in b/src/glw/glw.pc.in index 25f3f73b8c..5493093be1 100644 --- a/src/glw/glw.pc.in +++ b/src/glw/glw.pc.in @@ -6,6 +6,8 @@ includedir=@INSTALL_INC_DIR@ Name: glw Description: Mesa OpenGL widget library Requires: gl +Requires.private: @GLW_PC_REQ_PRIV@ Version: @VERSION@ Libs: -L${libdir} -lGLw -Cflags: -I${includedir} +Libs.private: @GLW_PC_LIB_PRIV@ +Cflags: -I${includedir} @GLW_PC_CFLAGS@ diff --git a/src/mesa/Makefile b/src/mesa/Makefile index ba65ce695f..42a8673f4d 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -92,7 +92,10 @@ pcedit = sed \ -e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \ -e 's,@INSTALL_LIB_DIR@,$(INSTALL_LIB_DIR),' \ -e 's,@INSTALL_INC_DIR@,$(INSTALL_INC_DIR),' \ - -e 's,@VERSION@,$(MESA_MAJOR).$(MESA_MINOR).$(MESA_TINY),' + -e 's,@VERSION@,$(MESA_MAJOR).$(MESA_MINOR).$(MESA_TINY),' \ + -e 's,@GL_PC_REQ_PRIV@,$(GL_PC_REQ_PRIV),' \ + -e 's,@GL_PC_LIB_PRIV@,$(GL_PC_LIB_PRIV),' \ + -e 's,@GL_PC_CFLAGS@,$(GL_PC_CFLAGS),' gl.pc: gl.pc.in $(pcedit) $< > $@ diff --git a/src/mesa/drivers/dri/Makefile b/src/mesa/drivers/dri/Makefile index eef68825bc..9e49fb16f5 100644 --- a/src/mesa/drivers/dri/Makefile +++ b/src/mesa/drivers/dri/Makefile @@ -25,7 +25,8 @@ pcedit = sed \ -e 's,@INSTALL_LIB_DIR@,$(INSTALL_LIB_DIR),' \ -e 's,@INSTALL_INC_DIR@,$(INSTALL_INC_DIR),' \ -e 's,@VERSION@,$(MESA_MAJOR).$(MESA_MINOR).$(MESA_TINY),' \ - -e 's,@DRI_DRIVER_DIR@,$(DRI_DRIVER_SEARCH_DIR),' + -e 's,@DRI_DRIVER_DIR@,$(DRI_DRIVER_SEARCH_DIR),' \ + -e 's,@DRI_PC_REQ_PRIV@,$(DRI_PC_REQ_PRIV),' dri.pc: dri.pc.in $(pcedit) $< > $@ diff --git a/src/mesa/drivers/dri/dri.pc.in b/src/mesa/drivers/dri/dri.pc.in index c47ee9c7e7..695aa6cfd6 100644 --- a/src/mesa/drivers/dri/dri.pc.in +++ b/src/mesa/drivers/dri/dri.pc.in @@ -7,4 +7,5 @@ dridriverdir=@DRI_DRIVER_DIR@ Name: dri Description: Direct Rendering Infrastructure Version: @VERSION@ +Requires.private: @DRI_PC_REQ_PRIV@ Cflags: -I${includedir} diff --git a/src/mesa/gl.pc.in b/src/mesa/gl.pc.in index 1927880d5f..0462b9fca2 100644 --- a/src/mesa/gl.pc.in +++ b/src/mesa/gl.pc.in @@ -5,7 +5,8 @@ includedir=@INSTALL_INC_DIR@ Name: gl Description: Mesa OpenGL library -Requires: +Requires.private: @GL_PC_REQ_PRIV@ Version: @VERSION@ Libs: -L${libdir} -lGL -Cflags: -I${includedir} +Libs.private: @GL_PC_LIB_PRIV@ +Cflags: -I${includedir} @GL_PC_CFLAGS@ -- cgit v1.2.3 From bc3b8a39a7090d95942faf378e776e89c490e250 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 9 Dec 2008 23:02:48 -0800 Subject: intel: Add batchbuffer assertions to hopefully catch future mistakes. --- src/mesa/drivers/dri/intel/intel_batchbuffer.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index 8129996979..51579df09e 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -55,6 +55,12 @@ struct intel_batchbuffer GLuint size; + /** Tracking of BEGIN_BATCH()/OUT_BATCH()/ADVANCE_BATCH() debugging */ + struct { + GLuint total; + GLubyte *start_ptr; + } emit; + GLuint dirty_state; }; @@ -143,9 +149,12 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch, #define BEGIN_BATCH(n, cliprect_mode) do { \ intel_batchbuffer_require_space(intel->batch, (n)*4, cliprect_mode); \ + assert(intel->batch->emit.start_ptr == NULL); \ + intel->batch->emit.total = (n) * 4; \ + intel->batch->emit.start_ptr = intel->batch->ptr; \ } while (0) -#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) +#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) #define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ assert((delta) >= 0); \ @@ -153,7 +162,16 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch, read_domains, write_domain, delta); \ } while (0) -#define ADVANCE_BATCH() do { } while(0) +#define ADVANCE_BATCH() do { \ + unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr; \ + assert(intel->batch->emit.start_ptr != NULL); \ + if (_n != intel->batch->emit.total) { \ + fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n", \ + _n, intel->batch->emit.total); \ + abort(); \ + } \ + intel->batch->emit.start_ptr = NULL; \ +} while(0) static INLINE void -- cgit v1.2.3 From 0dfec4ab615d45e298bf47737a348c32eebe2acf Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 10 Dec 2008 12:43:58 -0800 Subject: i965: Add decode of index/vertex buffer and primitive emit. --- src/mesa/drivers/dri/intel/intel_decode.c | 146 +++++++++++++++++++++++++++++- 1 file changed, 142 insertions(+), 4 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/intel/intel_decode.c b/src/mesa/drivers/dri/intel/intel_decode.c index 0e72ca08b2..0b8a287f6f 100644 --- a/src/mesa/drivers/dri/intel/intel_decode.c +++ b/src/mesa/drivers/dri/intel/intel_decode.c @@ -836,10 +836,71 @@ get_965_depthformat(unsigned int depthformat) } } +static const char * +get_965_element_component(uint32_t data, int component) +{ + uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7; + + switch (component_control) { + case 0: + return "nostore"; + case 1: + switch (component) { + case 0: return "X"; + case 1: return "Y"; + case 2: return "Z"; + case 3: return "W"; + default: return "fail"; + } + case 2: + return "0.0"; + case 3: + return "1.0"; + case 4: + return "0x1"; + case 5: + return "VID"; + default: + return "fail"; + } +} + +static const char * +get_965_prim_type(uint32_t data) +{ + uint32_t primtype = (data >> 10) & 0x1f; + + switch (primtype) { + case 0x01: return "point list"; + case 0x02: return "line list"; + case 0x03: return "line strip"; + case 0x04: return "tri list"; + case 0x05: return "tri strip"; + case 0x06: return "tri fan"; + case 0x07: return "quad list"; + case 0x08: return "quad strip"; + case 0x09: return "line list adj"; + case 0x0a: return "line strip adj"; + case 0x0b: return "tri list adj"; + case 0x0c: return "tri strip adj"; + case 0x0d: return "tri strip reverse"; + case 0x0e: return "polygon"; + case 0x0f: return "rect list"; + case 0x10: return "line loop"; + case 0x11: return "point list bf"; + case 0x12: return "line strip cont"; + case 0x13: return "line strip bf"; + case 0x14: return "line strip cont bf"; + case 0x15: return "tri fan no stipple"; + default: return "fail"; + } +} + static int decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures) { unsigned int opcode, len; + int i; struct { uint32_t opcode; @@ -860,8 +921,7 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures) { 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" }, { 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" }, { 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" }, - /* 0x7808: 3DSTATE_VERTEX_BUFFERS */ - /* 0x7809: 3DSTATE_VERTEX_ELEMENTS */ + { 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" }, { 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" }, { 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" }, { 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" }, @@ -947,6 +1007,64 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures) return len; + case 0x7808: + len = (data[0] & 0xff) + 2; + if ((len - 1) % 4 != 0) + fprintf(out, "Bad count in 3DSTATE_VERTEX_BUFFERS\n"); + if (count < len) + BUFFER_FAIL(count, len, "3DSTATE_VERTEX_BUFFERS"); + instr_out(data, hw_offset, 0, "3DSTATE_VERTEX_BUFFERS\n"); + + for (i = 1; i < len;) { + instr_out(data, hw_offset, i, "buffer %d: %s, pitch %db\n", + data[i] >> 27, + data[i] & (1 << 26) ? "random" : "sequential", + data[i] & 0x07ff); + i++; + instr_out(data, hw_offset, i++, "buffer address\n"); + instr_out(data, hw_offset, i++, "max index\n"); + instr_out(data, hw_offset, i++, "mbz\n"); + } + return len; + + case 0x7809: + len = (data[0] & 0xff) + 2; + if ((len + 1) % 2 != 0) + fprintf(out, "Bad count in 3DSTATE_VERTEX_ELEMENTS\n"); + if (count < len) + BUFFER_FAIL(count, len, "3DSTATE_VERTEX_ELEMENTS"); + instr_out(data, hw_offset, 0, "3DSTATE_VERTEX_ELEMENTS\n"); + + for (i = 1; i < len;) { + instr_out(data, hw_offset, i, "buffer %d: %svalid, type 0x%04x, " + "src offset 0x%04xd bytes\n", + data[i] >> 27, + data[i] & (1 << 26) ? "" : "in", + (data[i] >> 16) & 0x1ff, + data[i] & 0x07ff); + i++; + instr_out(data, hw_offset, i, "(%s, %s, %s, %s), " + "dst offset 0x%02x bytes\n", + get_965_element_component(data[i], 0), + get_965_element_component(data[i], 1), + get_965_element_component(data[i], 2), + get_965_element_component(data[i], 3), + (data[i] & 0xff) * 4); + i++; + } + return len; + + case 0x780a: + len = (data[0] & 0xff) + 2; + if (len != 3) + fprintf(out, "Bad count in 3DSTATE_INDEX_BUFFER\n"); + if (count < len) + BUFFER_FAIL(count, len, "3DSTATE_INDEX_BUFFER"); + instr_out(data, hw_offset, 0, "3DSTATE_INDEX_BUFFER\n"); + instr_out(data, hw_offset, 1, "beginning buffer address\n"); + instr_out(data, hw_offset, 2, "ending buffer address\n"); + return len; + case 0x7900: if (len != 4) fprintf(out, "Bad count in 3DSTATE_DRAWING_RECTANGLE\n"); @@ -968,9 +1086,9 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures) return len; case 0x7905: - if (len != 5) + if (len != 5 && len != 6) fprintf(out, "Bad count in 3DSTATE_DEPTH_BUFFER\n"); - if (count < 5) + if (count < len) BUFFER_FAIL(count, len, "3DSTATE_DEPTH_BUFFER"); instr_out(data, hw_offset, 0, @@ -985,7 +1103,27 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures) ((data[3] & 0x0007ffc0) >> 6) + 1, ((data[3] & 0xfff80000) >> 19) + 1); instr_out(data, hw_offset, 4, "volume depth\n"); + if (len == 6) + instr_out(data, hw_offset, 5, "\n"); + + return len; + case 0x7b00: + len = (data[0] & 0xff) + 2; + if (len != 6) + fprintf(out, "Bad count in 3DPRIMITIVE\n"); + if (count < len) + BUFFER_FAIL(count, len, "3DPRIMITIVE"); + + instr_out(data, hw_offset, 0, + "3DPRIMITIVE: %s %s\n", + get_965_prim_type(data[0]), + (data[0] & (1 << 15)) ? "random" : "sequential"); + instr_out(data, hw_offset, 1, "primitive count\n"); + instr_out(data, hw_offset, 2, "start vertex\n"); + instr_out(data, hw_offset, 3, "instance count\n"); + instr_out(data, hw_offset, 4, "start instance\n"); + instr_out(data, hw_offset, 5, "index bias\n"); return len; } -- cgit v1.2.3 From e72a44215312ae1f3c812ba28e47b4aec3589de9 Mon Sep 17 00:00:00 2001 From: Pierre Willenbrock Date: Fri, 12 Dec 2008 21:18:23 +0100 Subject: intel: Don't steal renderbuffer from caller in intel_miptree_create_for_region Fixes double-frees of some regions, once from the renderbuffer code and once from the miptree itself. Bug #19062 --- src/mesa/drivers/dri/intel/intel_mipmap_tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index c677ddd63c..bf1c3f03f0 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -163,7 +163,7 @@ intel_miptree_create_for_region(struct intel_context *intel, mt->pitch = region->pitch; #endif - mt->region = region; + intel_region_reference(&mt->region, region); return mt; } -- cgit v1.2.3