diff options
Diffstat (limited to 'src/mesa')
26 files changed, 742 insertions, 44 deletions
diff --git a/src/mesa/Makefile b/src/mesa/Makefile index ba65ce695f..6b4057030e 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -88,14 +88,29 @@ install: default done -pcedit = sed \ +gl_pcedit = sed \ -e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \ -e 's,@INSTALL_LIB_DIR@,$(INSTALL_LIB_DIR),' \ -e 's,@INSTALL_INC_DIR@,$(INSTALL_INC_DIR),' \ - -e 's,@VERSION@,$(MESA_MAJOR).$(MESA_MINOR).$(MESA_TINY),' + -e 's,@VERSION@,$(MESA_MAJOR).$(MESA_MINOR).$(MESA_TINY),' \ + -e 's,@GL_PC_REQ_PRIV@,$(GL_PC_REQ_PRIV),' \ + -e 's,@GL_PC_LIB_PRIV@,$(GL_PC_LIB_PRIV),' \ + -e 's,@GL_PC_CFLAGS@,$(GL_PC_CFLAGS),' gl.pc: gl.pc.in - $(pcedit) $< > $@ + $(gl_pcedit) $< > $@ + +osmesa_pcedit = sed \ + -e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \ + -e 's,@INSTALL_LIB_DIR@,$(INSTALL_LIB_DIR),' \ + -e 's,@INSTALL_INC_DIR@,$(INSTALL_INC_DIR),' \ + -e 's,@VERSION@,$(MESA_MAJOR).$(MESA_MINOR).$(MESA_TINY),' \ + -e 's,@OSMESA_LIB@,$(OSMESA_LIB),' \ + -e 's,@OSMESA_PC_REQ@,$(OSMESA_PC_REQ),' \ + -e 's,@OSMESA_PC_LIB_PRIV@,$(OSMESA_PC_LIB_PRIV),' + +osmesa.pc: osmesa.pc.in + $(osmesa_pcedit) $< > $@ install-headers: $(INSTALL) -d $(DESTDIR)$(INSTALL_INC_DIR)/GL @@ -109,10 +124,12 @@ install-libgl: default gl.pc install-headers $(DESTDIR)$(INSTALL_LIB_DIR) $(INSTALL) -m 644 gl.pc $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig -install-osmesa: default +install-osmesa: default osmesa.pc $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR) + $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig $(INSTALL) $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_GLOB) \ $(DESTDIR)$(INSTALL_LIB_DIR) + $(INSTALL) -m 644 osmesa.pc $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig install-dri: default cd drivers/dri && $(MAKE) install diff --git a/src/mesa/drivers/dri/Makefile b/src/mesa/drivers/dri/Makefile index eef68825bc..9e49fb16f5 100644 --- a/src/mesa/drivers/dri/Makefile +++ b/src/mesa/drivers/dri/Makefile @@ -25,7 +25,8 @@ pcedit = sed \ -e 's,@INSTALL_LIB_DIR@,$(INSTALL_LIB_DIR),' \ -e 's,@INSTALL_INC_DIR@,$(INSTALL_INC_DIR),' \ -e 's,@VERSION@,$(MESA_MAJOR).$(MESA_MINOR).$(MESA_TINY),' \ - -e 's,@DRI_DRIVER_DIR@,$(DRI_DRIVER_SEARCH_DIR),' + -e 's,@DRI_DRIVER_DIR@,$(DRI_DRIVER_SEARCH_DIR),' \ + -e 's,@DRI_PC_REQ_PRIV@,$(DRI_PC_REQ_PRIV),' dri.pc: dri.pc.in $(pcedit) $< > $@ diff --git a/src/mesa/drivers/dri/dri.pc.in b/src/mesa/drivers/dri/dri.pc.in index c47ee9c7e7..695aa6cfd6 100644 --- a/src/mesa/drivers/dri/dri.pc.in +++ b/src/mesa/drivers/dri/dri.pc.in @@ -7,4 +7,5 @@ dridriverdir=@DRI_DRIVER_DIR@ Name: dri Description: Direct Rendering Infrastructure Version: @VERSION@ +Requires.private: @DRI_PC_REQ_PRIV@ Cflags: -I${includedir} diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index d1b0dcdf31..d53e2cbd5a 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -295,6 +295,13 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) wt == GL_CLAMP_TO_BORDER || wr == GL_CLAMP_TO_BORDER)) return GL_FALSE; + /* Only support TEXCOORDMODE_CLAMP_EDGE and TEXCOORDMODE_CUBE (not + * used) when using cube map texture coordinates + */ + if (tObj->Target == GL_TEXTURE_CUBE_MAP_ARB && + (((ws != GL_CLAMP) && (ws != GL_CLAMP_TO_EDGE)) || + ((wr != GL_CLAMP) && (wr != GL_CLAMP_TO_EDGE)))) + return GL_FALSE; state[I915_TEXREG_SS3] = ss3; /* SS3_NORMALIZED_COORDS */ diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index c3a26fc82e..785fb784ca 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -309,12 +309,12 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, if (first_time || (brw->state.dirty.brw & BRW_NEW_PRIMITIVE)) { first_time = GL_FALSE; + brw_validate_state(brw); + /* Various fallback checks: */ if (brw->intel.Fallback) goto out; - brw_validate_state(brw); - /* Check that we can fit our state in with our existing batchbuffer, or * flush otherwise. */ diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index cb728190f5..baecfdcb79 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -1095,7 +1095,7 @@ static void noise1_sub( struct brw_wm_compile *c ) { /* Arrange the two end coordinates into scalars (itmp0/itmp1) to be hashed. Also compute the remainder (offset within the unit length), interleaved to reduce register dependency penalties. */ - brw_RNDD( p, itmp[ 0 ], param ); + brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param ); brw_FRC( p, param, param ); brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 1 ) ); brw_MOV( p, itmp[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */ @@ -1220,8 +1220,8 @@ static void noise2_sub( struct brw_wm_compile *c ) { /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to be hashed. Also compute the remainders (offsets within the unit square), interleaved to reduce register dependency penalties. */ - brw_RNDD( p, itmp[ 0 ], param0 ); - brw_RNDD( p, itmp[ 1 ], param1 ); + brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 ); + brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 ); brw_FRC( p, param0, param0 ); brw_FRC( p, param1, param1 ); brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */ @@ -1400,21 +1400,19 @@ static void noise3_sub( struct brw_wm_compile *c ) { /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to be hashed. Also compute the remainders (offsets within the unit cube), interleaved to reduce register dependency penalties. */ - brw_RNDD( p, itmp[ 0 ], param0 ); - brw_RNDD( p, itmp[ 1 ], param1 ); - brw_RNDD( p, itmp[ 2 ], param2 ); - brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBC8F ) ); /* constant used later */ - brw_MOV( p, itmp[ 5 ], brw_imm_ud( 0xD0BD ) ); /* constant used later */ - brw_MOV( p, itmp[ 6 ], brw_imm_ud( 0x9B93 ) ); /* constant used later */ + brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 ); + brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 ); + brw_RNDD( p, retype( itmp[ 2 ], BRW_REGISTER_TYPE_D ), param2 ); brw_FRC( p, param0, param0 ); brw_FRC( p, param1, param1 ); brw_FRC( p, param2, param2 ); /* Since we now have only 16 bits of precision in the hash, we must be more careful about thorough mixing to maintain entropy as we squash the input vector into a small scalar. */ - brw_MUL( p, brw_acc_reg(), itmp[ 4 ], itmp[ 0 ] ); - brw_MAC( p, brw_acc_reg(), itmp[ 5 ], itmp[ 1 ] ); - brw_MAC( p, itmp[ 0 ], itmp[ 6 ], itmp[ 2 ] ); + brw_MUL( p, brw_null_reg(), low_words( itmp[ 0 ] ), brw_imm_uw( 0xBC8F ) ); + brw_MAC( p, brw_null_reg(), low_words( itmp[ 1 ] ), brw_imm_uw( 0xD0BD ) ); + brw_MAC( p, low_words( itmp[ 0 ] ), low_words( itmp[ 2 ] ), + brw_imm_uw( 0x9B93 ) ); brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ), brw_imm_uw( 0xBC8F ) ); @@ -1668,6 +1666,430 @@ static void emit_noise3( struct brw_wm_compile *c, release_tmps( c, mark ); } +/* For the four-dimensional case, the little micro-optimisation benefits + we obtain by unrolling all the loops aren't worth the massive bloat it + now causes. Instead, we loop twice around performing a similar operation + to noise3, once for the w=0 cube and once for the w=1, with a bit more + code to glue it all together. */ +static void noise4_sub( struct brw_wm_compile *c ) { + + struct brw_compile *p = &c->func; + struct brw_reg param[ 4 ], + x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */ + w0, /* noise for the w=0 cube */ + floors[ 2 ], /* integer coordinates of base corner of hypercube */ + interp[ 4 ], /* interpolation coefficients */ + t, tmp[ 8 ], /* float temporaries */ + itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */ + wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */ + int i, j; + int mark = mark_tmps( c ); + GLuint loop, origin; + + x0y0 = alloc_tmp( c ); + x0y1 = alloc_tmp( c ); + x1y0 = alloc_tmp( c ); + x1y1 = alloc_tmp( c ); + t = alloc_tmp( c ); + w0 = alloc_tmp( c ); + floors[ 0 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD ); + floors[ 1 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD ); + + for( i = 0; i < 4; i++ ) { + param[ i ] = lookup_tmp( c, mark - 5 + i ); + interp[ i ] = alloc_tmp( c ); + } + + for( i = 0; i < 8; i++ ) { + tmp[ i ] = alloc_tmp( c ); + itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD ); + wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 ); + } + + brw_set_access_mode( p, BRW_ALIGN_1 ); + + /* We only want 16 bits of precision from the integral part of each + co-ordinate, but unfortunately the RNDD semantics would saturate + at 16 bits if we performed the operation directly to a 16-bit + destination. Therefore, we round to 32-bit temporaries where + appropriate, and then store only the lower 16 bits. */ + brw_RNDD( p, retype( floors[ 0 ], BRW_REGISTER_TYPE_D ), param[ 0 ] ); + brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param[ 1 ] ); + brw_RNDD( p, retype( floors[ 1 ], BRW_REGISTER_TYPE_D ), param[ 2 ] ); + brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param[ 3 ] ); + brw_MOV( p, high_words( floors[ 0 ] ), low_words( itmp[ 0 ] ) ); + brw_MOV( p, high_words( floors[ 1 ] ), low_words( itmp[ 1 ] ) ); + + /* Modify the flag register here, because the side effect is useful + later (see below). We know for certain that all flags will be + cleared, since the FRC instruction cannot possibly generate + negative results. Even for exceptional inputs (infinities, denormals, + NaNs), the architecture guarantees that the L conditional is false. */ + brw_set_conditionalmod( p, BRW_CONDITIONAL_L ); + brw_FRC( p, param[ 0 ], param[ 0 ] ); + brw_set_predicate_control( p, BRW_PREDICATE_NONE ); + for( i = 1; i < 4; i++ ) + brw_FRC( p, param[ i ], param[ i ] ); + + /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first + of all. */ + for( i = 0; i < 4; i++ ) + brw_MUL( p, interp[ i ], param[ i ], brw_imm_f( 6.0 ) ); + for( i = 0; i < 4; i++ ) + brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( -15.0 ) ); + for( i = 0; i < 4; i++ ) + brw_MUL( p, interp[ i ], interp[ i ], param[ i ] ); + for( i = 0; i < 4; i++ ) + brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( 10.0 ) ); + for( j = 0; j < 3; j++ ) + for( i = 0; i < 4; i++ ) + brw_MUL( p, interp[ i ], interp[ i ], param[ i ] ); + + /* Mark the current address, as it will be a jump destination. The + following code will be executed twice: first, with the flag + register clear indicating the w=0 case, and second with flags + set for w=1. */ + loop = p->nr_insn; + + /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to + be hashed. Since we have only 16 bits of precision in the hash, we + must be careful about thorough mixing to maintain entropy as we + squash the input vector into a small scalar. */ + brw_MUL( p, brw_null_reg(), low_words( floors[ 0 ] ), + brw_imm_uw( 0xBC8F ) ); + brw_MAC( p, brw_null_reg(), high_words( floors[ 0 ] ), + brw_imm_uw( 0xD0BD ) ); + brw_MAC( p, brw_null_reg(), low_words( floors[ 1 ] ), + brw_imm_uw( 0x9B93 ) ); + brw_MAC( p, low_words( itmp[ 0 ] ), high_words( floors[ 1 ] ), + brw_imm_uw( 0xA359 ) ); + brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ), + brw_imm_uw( 0xBC8F ) ); + + /* Temporarily disable the execution mask while we work with ExecSize=16 + channels (the mask is set for ExecSize=8 and is probably incorrect). + Although this might cause execution of unwanted channels, the code + writes only to temporary registers and has no side effects, so + disabling the mask is harmless. */ + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) ); + brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) ); + brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) ); + + /* We're now ready to perform the hashing. The eight hashes are + interleaved for performance. The hash function used is + designed to rapidly achieve avalanche and require only 16x16 + bit multiplication, and 8-bit swizzles (which we get for + free). */ + for( i = 0; i < 4; i++ ) + brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), + odd_bytes( wtmp[ i ] ) ); + for( i = 0; i < 4; i++ ) + brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), + odd_bytes( wtmp[ i ] ) ); + brw_pop_insn_state( p ); + + /* Now we want to initialise the four rear gradients based on the + hashes. Format conversion from signed integer to float leaves + everything scaled too high by a factor of pow( 2, 15 ), but + we correct for that right at the end. */ + /* x component */ + brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) ); + brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) ); + brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) ); + brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) ); + brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) ); + brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, x1y0, x1y0, t ); + brw_MUL( p, x1y1, x1y1, t ); + brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) ); + brw_MUL( p, x0y0, x0y0, param[ 0 ] ); + brw_MUL( p, x0y1, x0y1, param[ 0 ] ); + + /* y component */ + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) ); + brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + /* prepare t for the w component (used below): w the first time through + the loop; w - 1 the second time) */ + brw_set_predicate_control( p, BRW_PREDICATE_NORMAL ); + brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) ); + p->current->header.predicate_inverse = 1; + brw_MOV( p, t, param[ 3 ] ); + p->current->header.predicate_inverse = 0; + brw_set_predicate_control( p, BRW_PREDICATE_NONE ); + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] ); + + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + + /* z component */ + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) ); + brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 2 ] ); + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param[ 2 ] ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 2 ] ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param[ 2 ] ); + + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + + /* w component */ + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); + + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) ); + + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + + /* Here we interpolate in the y dimension... */ + brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); + brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); + brw_MUL( p, x0y1, x0y1, interp[ 1 ] ); + brw_MUL( p, x1y1, x1y1, interp[ 1 ] ); + brw_ADD( p, x0y0, x0y0, x0y1 ); + brw_ADD( p, x1y0, x1y0, x1y1 ); + + /* And now in x. Leave the result in tmp[ 0 ] (see below)... */ + brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); + brw_MUL( p, x1y0, x1y0, interp[ 0 ] ); + brw_ADD( p, tmp[ 0 ], x0y0, x1y0 ); + + /* Now do the same thing for the front four gradients... */ + /* x component */ + brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) ); + brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) ); + brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) ); + brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) ); + brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, x1y0, x1y0, t ); + brw_MUL( p, x1y1, x1y1, t ); + brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) ); + brw_MUL( p, x0y0, x0y0, param[ 0 ] ); + brw_MUL( p, x0y1, x0y1, param[ 0 ] ); + + /* y component */ + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) ); + brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + brw_ADD( p, t, param[ 2 ], brw_imm_f( -1.0 ) ); + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] ); + + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + + /* z component */ + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) ); + brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + /* prepare t for the w component (used below): w the first time through + the loop; w - 1 the second time) */ + brw_set_predicate_control( p, BRW_PREDICATE_NORMAL ); + brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) ); + p->current->header.predicate_inverse = 1; + brw_MOV( p, t, param[ 3 ] ); + p->current->header.predicate_inverse = 0; + brw_set_predicate_control( p, BRW_PREDICATE_NONE ); + + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + + /* w component */ + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); + + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + + /* Interpolate in the y dimension: */ + brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); + brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); + brw_MUL( p, x0y1, x0y1, interp[ 1 ] ); + brw_MUL( p, x1y1, x1y1, interp[ 1 ] ); + brw_ADD( p, x0y0, x0y0, x0y1 ); + brw_ADD( p, x1y0, x1y0, x1y1 ); + + /* And now in x. The rear face is in tmp[ 0 ] (see above), so this + time put the front face in tmp[ 1 ] and we're nearly there... */ + brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); + brw_MUL( p, x1y0, x1y0, interp[ 0 ] ); + brw_ADD( p, tmp[ 1 ], x0y0, x1y0 ); + + /* Another interpolation, in the z dimension: */ + brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) ); + brw_MUL( p, tmp[ 1 ], tmp[ 1 ], interp[ 2 ] ); + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] ); + + /* Exit the loop if we've computed both cubes... */ + origin = p->nr_insn; + brw_push_insn_state( p ); + brw_set_predicate_control( p, BRW_PREDICATE_NORMAL ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_ADD( p, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) ); + brw_pop_insn_state( p ); + + /* Save the result for the w=0 case, and increment the w coordinate: */ + brw_MOV( p, w0, tmp[ 0 ] ); + brw_ADD( p, high_words( floors[ 1 ] ), high_words( floors[ 1 ] ), + brw_imm_uw( 1 ) ); + + /* Loop around for the other cube. Explicitly set the flag register + (unfortunately we must spend an extra instruction to do this: we + can't rely on a side effect of the previous MOV or ADD because + conditional modifiers which are normally true might be false in + exceptional circumstances, e.g. given a NaN input; the add to + brw_ip_reg() is not suitable because the IP is not an 8-vector). */ + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_MOV( p, brw_flag_reg(), brw_imm_uw( 0xFF ) ); + brw_ADD( p, brw_ip_reg(), brw_ip_reg(), + brw_imm_d( ( loop - p->nr_insn ) << 4 ) ); + brw_pop_insn_state( p ); + + /* Patch the previous conditional branch now that we know the + destination address. */ + brw_set_src1( p->store + origin, + brw_imm_d( ( p->nr_insn - origin ) << 4 ) ); + + /* The very last interpolation. */ + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], negate( w0 ) ); + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], interp[ 3 ] ); + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], w0 ); + + /* scale by pow( 2, -15 ), as described above */ + brw_MUL( p, param[ 0 ], tmp[ 0 ], brw_imm_f( 0.000030517578125 ) ); + + release_tmps( c, mark ); +} + +static void emit_noise4( struct brw_wm_compile *c, + struct prog_instruction *inst ) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, src2, src3, param0, param1, param2, param3, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + int mark = mark_tmps( c ); + + assert( mark == 0 ); + + src0 = get_src_reg( c, inst->SrcReg, 0, 1 ); + src1 = get_src_reg( c, inst->SrcReg, 1, 1 ); + src2 = get_src_reg( c, inst->SrcReg, 2, 1 ); + src3 = get_src_reg( c, inst->SrcReg, 3, 1 ); + + param0 = alloc_tmp( c ); + param1 = alloc_tmp( c ); + param2 = alloc_tmp( c ); + param3 = alloc_tmp( c ); + + brw_MOV( p, param0, src0 ); + brw_MOV( p, param1, src1 ); + brw_MOV( p, param2, src2 ); + brw_MOV( p, param3, src3 ); + + invoke_subroutine( c, SUB_NOISE4, noise4_sub ); + + /* Fill in the result: */ + brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_MOV( p, dst, param0 ); + } + } + if( inst->SaturateMode == SATURATE_ZERO_ONE ) + brw_set_saturate( p, 0 ); + + release_tmps( c, mark ); +} + static void emit_wpos_xy(struct brw_wm_compile *c, struct prog_instruction *inst) { @@ -1996,8 +2418,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_NOISE3: emit_noise3(c, inst); break; - /* case OPCODE_NOISE4: */ - /* not yet implemented */ + case OPCODE_NOISE4: + emit_noise4(c, inst); + break; case OPCODE_TEX: emit_tex(c, inst); break; diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index 8129996979..51579df09e 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -55,6 +55,12 @@ struct intel_batchbuffer GLuint size; + /** Tracking of BEGIN_BATCH()/OUT_BATCH()/ADVANCE_BATCH() debugging */ + struct { + GLuint total; + GLubyte *start_ptr; + } emit; + GLuint dirty_state; }; @@ -143,9 +149,12 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch, #define BEGIN_BATCH(n, cliprect_mode) do { \ intel_batchbuffer_require_space(intel->batch, (n)*4, cliprect_mode); \ + assert(intel->batch->emit.start_ptr == NULL); \ + intel->batch->emit.total = (n) * 4; \ + intel->batch->emit.start_ptr = intel->batch->ptr; \ } while (0) -#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) +#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) #define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ assert((delta) >= 0); \ @@ -153,7 +162,16 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch, read_domains, write_domain, delta); \ } while (0) -#define ADVANCE_BATCH() do { } while(0) +#define ADVANCE_BATCH() do { \ + unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr; \ + assert(intel->batch->emit.start_ptr != NULL); \ + if (_n != intel->batch->emit.total) { \ + fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n", \ + _n, intel->batch->emit.total); \ + abort(); \ + } \ + intel->batch->emit.start_ptr = NULL; \ +} while(0) static INLINE void diff --git a/src/mesa/drivers/dri/intel/intel_decode.c b/src/mesa/drivers/dri/intel/intel_decode.c index 0e72ca08b2..0b8a287f6f 100644 --- a/src/mesa/drivers/dri/intel/intel_decode.c +++ b/src/mesa/drivers/dri/intel/intel_decode.c @@ -836,10 +836,71 @@ get_965_depthformat(unsigned int depthformat) } } +static const char * +get_965_element_component(uint32_t data, int component) +{ + uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7; + + switch (component_control) { + case 0: + return "nostore"; + case 1: + switch (component) { + case 0: return "X"; + case 1: return "Y"; + case 2: return "Z"; + case 3: return "W"; + default: return "fail"; + } + case 2: + return "0.0"; + case 3: + return "1.0"; + case 4: + return "0x1"; + case 5: + return "VID"; + default: + return "fail"; + } +} + +static const char * +get_965_prim_type(uint32_t data) +{ + uint32_t primtype = (data >> 10) & 0x1f; + + switch (primtype) { + case 0x01: return "point list"; + case 0x02: return "line list"; + case 0x03: return "line strip"; + case 0x04: return "tri list"; + case 0x05: return "tri strip"; + case 0x06: return "tri fan"; + case 0x07: return "quad list"; + case 0x08: return "quad strip"; + case 0x09: return "line list adj"; + case 0x0a: return "line strip adj"; + case 0x0b: return "tri list adj"; + case 0x0c: return "tri strip adj"; + case 0x0d: return "tri strip reverse"; + case 0x0e: return "polygon"; + case 0x0f: return "rect list"; + case 0x10: return "line loop"; + case 0x11: return "point list bf"; + case 0x12: return "line strip cont"; + case 0x13: return "line strip bf"; + case 0x14: return "line strip cont bf"; + case 0x15: return "tri fan no stipple"; + default: return "fail"; + } +} + static int decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures) { unsigned int opcode, len; + int i; struct { uint32_t opcode; @@ -860,8 +921,7 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures) { 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" }, { 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" }, { 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" }, - /* 0x7808: 3DSTATE_VERTEX_BUFFERS */ - /* 0x7809: 3DSTATE_VERTEX_ELEMENTS */ + { 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" }, { 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" }, { 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" }, { 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" }, @@ -947,6 +1007,64 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures) return len; + case 0x7808: + len = (data[0] & 0xff) + 2; + if ((len - 1) % 4 != 0) + fprintf(out, "Bad count in 3DSTATE_VERTEX_BUFFERS\n"); + if (count < len) + BUFFER_FAIL(count, len, "3DSTATE_VERTEX_BUFFERS"); + instr_out(data, hw_offset, 0, "3DSTATE_VERTEX_BUFFERS\n"); + + for (i = 1; i < len;) { + instr_out(data, hw_offset, i, "buffer %d: %s, pitch %db\n", + data[i] >> 27, + data[i] & (1 << 26) ? "random" : "sequential", + data[i] & 0x07ff); + i++; + instr_out(data, hw_offset, i++, "buffer address\n"); + instr_out(data, hw_offset, i++, "max index\n"); + instr_out(data, hw_offset, i++, "mbz\n"); + } + return len; + + case 0x7809: + len = (data[0] & 0xff) + 2; + if ((len + 1) % 2 != 0) + fprintf(out, "Bad count in 3DSTATE_VERTEX_ELEMENTS\n"); + if (count < len) + BUFFER_FAIL(count, len, "3DSTATE_VERTEX_ELEMENTS"); + instr_out(data, hw_offset, 0, "3DSTATE_VERTEX_ELEMENTS\n"); + + for (i = 1; i < len;) { + instr_out(data, hw_offset, i, "buffer %d: %svalid, type 0x%04x, " + "src offset 0x%04xd bytes\n", + data[i] >> 27, + data[i] & (1 << 26) ? "" : "in", + (data[i] >> 16) & 0x1ff, + data[i] & 0x07ff); + i++; + instr_out(data, hw_offset, i, "(%s, %s, %s, %s), " + "dst offset 0x%02x bytes\n", + get_965_element_component(data[i], 0), + get_965_element_component(data[i], 1), + get_965_element_component(data[i], 2), + get_965_element_component(data[i], 3), + (data[i] & 0xff) * 4); + i++; + } + return len; + + case 0x780a: + len = (data[0] & 0xff) + 2; + if (len != 3) + fprintf(out, "Bad count in 3DSTATE_INDEX_BUFFER\n"); + if (count < len) + BUFFER_FAIL(count, len, "3DSTATE_INDEX_BUFFER"); + instr_out(data, hw_offset, 0, "3DSTATE_INDEX_BUFFER\n"); + instr_out(data, hw_offset, 1, "beginning buffer address\n"); + instr_out(data, hw_offset, 2, "ending buffer address\n"); + return len; + case 0x7900: if (len != 4) fprintf(out, "Bad count in 3DSTATE_DRAWING_RECTANGLE\n"); @@ -968,9 +1086,9 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures) return len; case 0x7905: - if (len != 5) + if (len != 5 && len != 6) fprintf(out, "Bad count in 3DSTATE_DEPTH_BUFFER\n"); - if (count < 5) + if (count < len) BUFFER_FAIL(count, len, "3DSTATE_DEPTH_BUFFER"); instr_out(data, hw_offset, 0, @@ -985,7 +1103,27 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures) ((data[3] & 0x0007ffc0) >> 6) + 1, ((data[3] & 0xfff80000) >> 19) + 1); instr_out(data, hw_offset, 4, "volume depth\n"); + if (len == 6) + instr_out(data, hw_offset, 5, "\n"); + + return len; + case 0x7b00: + len = (data[0] & 0xff) + 2; + if (len != 6) + fprintf(out, "Bad count in 3DPRIMITIVE\n"); + if (count < len) + BUFFER_FAIL(count, len, "3DPRIMITIVE"); + + instr_out(data, hw_offset, 0, + "3DPRIMITIVE: %s %s\n", + get_965_prim_type(data[0]), + (data[0] & (1 << 15)) ? "random" : "sequential"); + instr_out(data, hw_offset, 1, "primitive count\n"); + instr_out(data, hw_offset, 2, "start vertex\n"); + instr_out(data, hw_offset, 3, "instance count\n"); + instr_out(data, hw_offset, 4, "start instance\n"); + instr_out(data, hw_offset, 5, "index bias\n"); return len; } diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index b96ba72853..bf1c3f03f0 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -111,9 +111,9 @@ intel_miptree_create(struct intel_context *intel, first_level, last_level, width0, height0, depth0, cpp, compress_byte); /* - * pitch == 0 indicates the null texture + * pitch == 0 || height == 0 indicates the null texture */ - if (!mt || !mt->pitch) + if (!mt || !mt->pitch || !mt->total_height) return NULL; mt->region = intel_region_alloc(intel, @@ -163,7 +163,7 @@ intel_miptree_create_for_region(struct intel_context *intel, mt->pitch = region->pitch; #endif - mt->region = region; + intel_region_reference(&mt->region, region); return mt; } diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c index 82f8b87009..e64d8a1556 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.c +++ b/src/mesa/drivers/dri/intel/intel_tex.c @@ -231,6 +231,7 @@ intelInitTextureFuncs(struct dd_function_table *functions) /* compressed texture functions */ functions->CompressedTexImage2D = intelCompressedTexImage2D; + functions->CompressedTexSubImage2D = intelCompressedTexSubImage2D; functions->GetCompressedTexImage = intelGetCompressedTexImage; functions->NewTextureObject = intelNewTextureObject; diff --git a/src/mesa/drivers/dri/intel/intel_tex.h b/src/mesa/drivers/dri/intel/intel_tex.h index 6219c1c953..742ccc043a 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.h +++ b/src/mesa/drivers/dri/intel/intel_tex.h @@ -130,6 +130,16 @@ void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level, struct gl_texture_object *texObj, struct gl_texture_image *texImage ); +void intelCompressedTexSubImage2D(GLcontext * ctx, + GLenum target, + GLint level, + GLint xoffset, GLint yoffset, + GLsizei width, GLsizei height, + GLenum format, GLsizei imageSize, + const GLvoid * pixels, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage); + void intelGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level, GLvoid *pixels, struct gl_texture_object *texObj, diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c index b752361886..f86de56897 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c @@ -184,3 +184,18 @@ intelTexSubImage1D(GLcontext * ctx, format, type, pixels, packing, texObj, texImage); } + +void +intelCompressedTexSubImage2D(GLcontext * ctx, + GLenum target, + GLint level, + GLint xoffset, GLint yoffset, + GLsizei width, GLsizei height, + GLenum format, GLsizei imageSize, + const GLvoid * pixels, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + fprintf(stderr, "stubbed CompressedTexSubImage2D: %dx%d@%dx%d\n", + width, height, xoffset, yoffset); +} diff --git a/src/mesa/gl.pc.in b/src/mesa/gl.pc.in index 1927880d5f..0462b9fca2 100644 --- a/src/mesa/gl.pc.in +++ b/src/mesa/gl.pc.in @@ -5,7 +5,8 @@ includedir=@INSTALL_INC_DIR@ Name: gl Description: Mesa OpenGL library -Requires: +Requires.private: @GL_PC_REQ_PRIV@ Version: @VERSION@ Libs: -L${libdir} -lGL -Cflags: -I${includedir} +Libs.private: @GL_PC_LIB_PRIV@ +Cflags: -I${includedir} @GL_PC_CFLAGS@ diff --git a/src/mesa/glapi/extension_helper.py b/src/mesa/glapi/extension_helper.py index 375e3ea59e..64f64a2fd8 100644 --- a/src/mesa/glapi/extension_helper.py +++ b/src/mesa/glapi/extension_helper.py @@ -174,6 +174,9 @@ class PrintGlExtensionGlue(gl_XML.gl_print_base): parameter_signature = '' for p in f.parameterIterator(): + if p.is_padding: + continue + # FIXME: This is a *really* ugly hack. :( tn = p.type_expr.get_base_type_node() diff --git a/src/mesa/glapi/glX_proto_recv.py b/src/mesa/glapi/glX_proto_recv.py index 20f75575cf..923c1958f0 100644 --- a/src/mesa/glapi/glX_proto_recv.py +++ b/src/mesa/glapi/glX_proto_recv.py @@ -89,8 +89,8 @@ class PrintGlxDispatchFunctions(glX_proto_common.glx_print_proto): print '#include "glxbyteorder.h"' print '#include "indirect_util.h"' print '#include "singlesize.h"' - print '#include "glapitable.h"' print '#include "glapi.h"' + print '#include "glapitable.h"' print '#include "glthread.h"' print '#include "dispatch.h"' print '' @@ -225,6 +225,8 @@ class PrintGlxDispatchFunctions(glX_proto_common.glx_print_proto): list = [] for param in f.parameterIterator(): + if param.is_padding: + continue if param.is_counter or param.is_image() or param.is_output or param.name in f.count_parameter_list or len(param.count_parameter_list): location = param.name diff --git a/src/mesa/glapi/glX_proto_send.py b/src/mesa/glapi/glX_proto_send.py index b00b8a1ba6..501706acc7 100644 --- a/src/mesa/glapi/glX_proto_send.py +++ b/src/mesa/glapi/glX_proto_send.py @@ -333,7 +333,7 @@ const GLuint __glXDefaultPixelStore[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 1 }; if image.img_pad_dimensions: do_it = 1 break - + if do_it: [h, n] = hash_pixel_function(func) @@ -422,7 +422,10 @@ generic_%u_byte( GLint rop, const void * ptr ) else: src_ptr = "&" + p.name - if not extra_offset: + if p.is_padding: + print '(void) memset((void *)(%s + %u), 0, %s);' \ + % (pc, p.offset + adjust, p.size_string() ) + elif not extra_offset: print '(void) memcpy((void *)(%s + %u), (void *)(%s), %s);' \ % (pc, p.offset + adjust, src_ptr, p.size_string() ) else: @@ -472,6 +475,10 @@ generic_%u_byte( GLint rop, const void * ptr ) else: dim_str = str(dim) + if param.is_padding: + print '(void) memset((void *)(%s + %u), 0, %s);' \ + % (pc, (param.offset - 4) + adjust, param.size_string() ) + if param.img_null_flag: if large: print '(void) memcpy((void *)(%s + %u), zero, 4);' % (pc, (param.offset - 4) + adjust) @@ -739,6 +746,9 @@ generic_%u_byte( GLint rop, const void * ptr ) p_string = "" for param in f.parameterIterateGlxSend(): + if param.is_padding: + continue + p_string += ", " + param.name if param.is_image(): diff --git a/src/mesa/glapi/glX_proto_size.py b/src/mesa/glapi/glX_proto_size.py index 2b9a643362..95cb5110cc 100644 --- a/src/mesa/glapi/glX_proto_size.py +++ b/src/mesa/glapi/glX_proto_size.py @@ -581,6 +581,11 @@ class PrintGlxReqSize_c(PrintGlxReqSize_common): self.common_emit_fixups(fixup) + if img.img_null_flag: + print '' + print ' if (*(CARD32 *) (pc + %s))' % (img.offset - 4) + print ' return 0;' + print '' print ' return __glXImageSize(%s, %s, %s, %s, %s, %s,' % (img.img_format, img.img_type, img.img_target, w, h, d ) print ' image_height, row_length, skip_images,' diff --git a/src/mesa/glapi/gl_API.dtd b/src/mesa/glapi/gl_API.dtd index f89d381866..30c646c924 100644 --- a/src/mesa/glapi/gl_API.dtd +++ b/src/mesa/glapi/gl_API.dtd @@ -45,6 +45,7 @@ counter (true | false) "false" count_scale NMTOKEN "1" output (true | false) "false" + padding (true | false) "false" img_width NMTOKEN #IMPLIED img_height NMTOKEN #IMPLIED img_depth NMTOKEN #IMPLIED diff --git a/src/mesa/glapi/gl_API.xml b/src/mesa/glapi/gl_API.xml index 6c0367aad7..951fd95799 100644 --- a/src/mesa/glapi/gl_API.xml +++ b/src/mesa/glapi/gl_API.xml @@ -3267,7 +3267,8 @@ <param name="width" type="GLsizei"/> <param name="format" type="GLenum"/> <param name="type" type="GLenum"/> - <param name="pixels" type="const GLvoid *" img_width="width" img_xoff="xoffset" img_format="format" img_type="type" img_target="target" img_null_flag="true" img_pad_dimensions="true"/> + <param name="UNUSED" type="GLuint" padding="true"/> + <param name="pixels" type="const GLvoid *" img_width="width" img_xoff="xoffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/> <glx rop="4099" large="true"/> </function> @@ -3280,7 +3281,8 @@ <param name="height" type="GLsizei"/> <param name="format" type="GLenum"/> <param name="type" type="GLenum"/> - <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_xoff="xoffset" img_yoff="yoffset" img_format="format" img_type="type" img_target="target" img_null_flag="true" img_pad_dimensions="true"/> + <param name="UNUSED" type="GLuint" padding="true"/> + <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_xoff="xoffset" img_yoff="yoffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/> <glx rop="4100" large="true"/> </function> @@ -3994,7 +3996,8 @@ <param name="depth" type="GLsizei"/> <param name="format" type="GLenum"/> <param name="type" type="GLenum"/> - <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_xoff="xoffset" img_yoff="yoffset" img_zoff="zoffset" img_format="format" img_type="type" img_target="target" img_null_flag="true" img_pad_dimensions="true"/> + <param name="UNUSED" type="GLuint" padding="true"/> + <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_xoff="xoffset" img_yoff="yoffset" img_zoff="zoffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/> <glx rop="4115" large="true"/> </function> @@ -8061,6 +8064,7 @@ <param name="depth" type="GLsizei"/> <param name="format" type="GLenum"/> <param name="type" type="GLenum"/> + <param name="UNUSED" type="GLuint" padding="true"/> <param name="pixels" type="const GLvoid *"/> </function> </category> @@ -8092,6 +8096,7 @@ <param name="width" type="GLsizei"/> <param name="format" type="GLenum"/> <param name="type" type="GLenum"/> + <param name="UNUSED" type="GLuint" padding="true"/> <param name="pixels" type="const GLvoid *"/> </function> @@ -8104,6 +8109,7 @@ <param name="height" type="GLsizei"/> <param name="format" type="GLenum"/> <param name="type" type="GLenum"/> + <param name="UNUSED" type="GLuint" padding="true"/> <param name="pixels" type="const GLvoid *"/> </function> </category> @@ -8627,7 +8633,8 @@ <param name="size4d" type="GLsizei"/> <param name="format" type="GLenum"/> <param name="type" type="GLenum"/> - <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_extent="size4d" img_xoff="xoffset" img_yoff="yoffset" img_zoff="zoffset" img_woff="woffset" img_format="format" img_type="type" img_target="target" img_null_flag="true" img_pad_dimensions="true"/> + <param name="UNUSED" type="GLuint" padding="true"/> + <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_extent="size4d" img_xoff="xoffset" img_yoff="yoffset" img_zoff="zoffset" img_woff="woffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/> <glx rop="2058" ignore="true"/> </function> </category> diff --git a/src/mesa/glapi/gl_XML.py b/src/mesa/glapi/gl_XML.py index b7a7388400..b98919134f 100644 --- a/src/mesa/glapi/gl_XML.py +++ b/src/mesa/glapi/gl_XML.py @@ -309,6 +309,9 @@ def create_parameter_string(parameters, include_names): list = [] for p in parameters: + if p.is_padding: + continue + if include_names: list.append( p.string() ) else: @@ -463,6 +466,7 @@ class gl_parameter: self.img_null_flag = is_attr_true( element, 'img_null_flag' ) self.img_send_null = is_attr_true( element, 'img_send_null' ) + self.is_padding = is_attr_true( element, 'padding' ) return diff --git a/src/mesa/glapi/gl_apitemp.py b/src/mesa/glapi/gl_apitemp.py index 6e35571e14..a37c08d6ce 100644 --- a/src/mesa/glapi/gl_apitemp.py +++ b/src/mesa/glapi/gl_apitemp.py @@ -63,6 +63,9 @@ class PrintGlOffsets(gl_XML.gl_print_base): n = f.static_name(name) for p in f.parameterIterator(): + if p.is_padding: + continue + if p.is_pointer(): cast = "(const void *) " else: diff --git a/src/mesa/glapi/gl_x86_asm.py b/src/mesa/glapi/gl_x86_asm.py index 651cb03f14..0dbf3ebe0a 100644 --- a/src/mesa/glapi/gl_x86_asm.py +++ b/src/mesa/glapi/gl_x86_asm.py @@ -44,6 +44,9 @@ class PrintGenericStubs(gl_XML.gl_print_base): def get_stack_size(self, f): size = 0 for p in f.parameterIterator(): + if p.is_padding: + continue + size += p.get_stack_size() return size diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index 2a54ff7ff9..d8e8b559f5 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -383,6 +383,18 @@ _mesa_test_texobj_completeness( const GLcontext *ctx, t->_Complete = GL_TRUE; /* be optimistic */ + /* Detect cases where the application set the base level to an invalid + * value. + */ + if ((baseLevel < 0) || (baseLevel > MAX_TEXTURE_LEVELS)) { + char s[100]; + _mesa_sprintf(s, "obj %p (%d) base level = %d is invalid", + (void *) t, t->Name, baseLevel); + incomplete(t, s); + t->_Complete = GL_FALSE; + return; + } + /* Always need the base level image */ if (!t->Image[0][baseLevel]) { char s[100]; diff --git a/src/mesa/osmesa.pc.in b/src/mesa/osmesa.pc.in new file mode 100644 index 0000000000..05327f40aa --- /dev/null +++ b/src/mesa/osmesa.pc.in @@ -0,0 +1,12 @@ +prefix=@INSTALL_DIR@ +exec_prefix=${prefix} +libdir=@INSTALL_LIB_DIR@ +includedir=@INSTALL_INC_DIR@ + +Name: osmesa +Description: Mesa Off-screen Rendering library +Requires: @OSMESA_PC_REQ@ +Version: @VERSION@ +Libs: -L${libdir} -l@OSMESA_LIB@ +Libs.private: @OSMESA_PC_LIB_PRIV@ +Cflags: -I${includedir} diff --git a/src/mesa/tnl/t_vertex_generic.c b/src/mesa/tnl/t_vertex_generic.c index f763522f91..9812f8c808 100644 --- a/src/mesa/tnl/t_vertex_generic.c +++ b/src/mesa/tnl/t_vertex_generic.c @@ -113,7 +113,7 @@ static INLINE void insert_3f_viewport_2( const struct tnl_clipspace_attr *a, GLu DEBUG_INSERT; out[0] = vp[0] * in[0] + vp[12]; out[1] = vp[5] * in[1] + vp[13]; - out[2] = vp[10] * in[2] + vp[14]; + out[2] = vp[14]; } static INLINE void insert_3f_viewport_1( const struct tnl_clipspace_attr *a, GLubyte *v, diff --git a/src/mesa/tnl/t_vertex_sse.c b/src/mesa/tnl/t_vertex_sse.c index 76043bd1b5..7a255d680a 100644 --- a/src/mesa/tnl/t_vertex_sse.c +++ b/src/mesa/tnl/t_vertex_sse.c @@ -146,7 +146,8 @@ static void emit_load3f_1( struct x86_program *p, struct x86_reg dest, struct x86_reg arg0 ) { - emit_load4f_1(p, dest, arg0); + /* Loading from memory erases the upper bits. */ + sse_movss(&p->func, dest, arg0); } static void emit_load2f_2( struct x86_program *p, @@ -160,7 +161,8 @@ static void emit_load2f_1( struct x86_program *p, struct x86_reg dest, struct x86_reg arg0 ) { - emit_load4f_1(p, dest, arg0); + /* Loading from memory erases the upper bits. */ + sse_movss(&p->func, dest, arg0); } static void emit_load1f_1( struct x86_program *p, @@ -352,6 +354,7 @@ static GLboolean build_vertex_emit( struct x86_program *p ) struct x86_reg temp = x86_make_reg(file_XMM, 0); struct x86_reg vp0 = x86_make_reg(file_XMM, 1); struct x86_reg vp1 = x86_make_reg(file_XMM, 2); + struct x86_reg temp2 = x86_make_reg(file_XMM, 3); GLubyte *fixup, *label; /* Push a few regs? @@ -524,7 +527,8 @@ static GLboolean build_vertex_emit( struct x86_program *p ) sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z)); get_src_ptr(p, srcECX, vtxESI, &a[1]); - emit_load(p, temp, 1, x86_deref(srcECX), a[1].inputsize); + emit_load(p, temp2, 1, x86_deref(srcECX), a[1].inputsize); + sse_movss(&p->func, temp, temp2); update_src_ptr(p, srcECX, vtxESI, &a[1]); /* Rearrange and possibly do BGR conversion: @@ -539,8 +543,8 @@ static GLboolean build_vertex_emit( struct x86_program *p ) } else { _mesa_printf("Can't emit 3ub\n"); + return GL_FALSE; /* add this later */ } - return GL_FALSE; /* add this later */ break; case EMIT_4UB_4F_RGBA: |