diff options
Diffstat (limited to 'src/mesa/drivers')
61 files changed, 1365 insertions, 850 deletions
diff --git a/src/mesa/drivers/dri/common/dri_bufmgr.c b/src/mesa/drivers/dri/common/dri_bufmgr.c index 370b56c3a3..65d6545965 100644 --- a/src/mesa/drivers/dri/common/dri_bufmgr.c +++ b/src/mesa/drivers/dri/common/dri_bufmgr.c @@ -431,7 +431,7 @@ void driBOCreateList(int target, drmBOList * list) { _glthread_LOCK_MUTEX(bmMutex); - BM_CKFATAL(drmBOCreateList(20, list)); + BM_CKFATAL(drmBOCreateList(target, list)); _glthread_UNLOCK_MUTEX(bmMutex); } diff --git a/src/mesa/drivers/dri/i810/i810context.c b/src/mesa/drivers/dri/i810/i810context.c index db8f7a19a2..f0332d9ae8 100644 --- a/src/mesa/drivers/dri/i810/i810context.c +++ b/src/mesa/drivers/dri/i810/i810context.c @@ -65,6 +65,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define need_GL_ARB_multisample #define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_buffer_object #include "extension_helper.h" #ifndef I810_DEBUG @@ -129,6 +130,7 @@ const struct dri_extension card_extensions[] = { "GL_ARB_texture_env_combine", NULL }, { "GL_ARB_texture_env_crossbar", NULL }, { "GL_ARB_texture_mirrored_repeat", NULL }, + { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, { "GL_EXT_stencil_wrap", NULL }, { "GL_EXT_texture_edge_clamp", NULL }, { "GL_EXT_texture_env_combine", NULL }, diff --git a/src/mesa/drivers/dri/i915/i915_reg.h b/src/mesa/drivers/dri/i915/i915_reg.h index 3ba792a234..694cd4c8c3 100644 --- a/src/mesa/drivers/dri/i915/i915_reg.h +++ b/src/mesa/drivers/dri/i915/i915_reg.h @@ -138,7 +138,7 @@ /* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */ -#define _3DSTATE_DEPTH_SUBRECT_DISABLE (CMD_3D | (0x1c<<24) | (0x11<19) | 0x2) +#define _3DSTATE_DEPTH_SUBRECT_DISABLE (CMD_3D | (0x1c<<24) | (0x11<<19) | 0x2) /* p161 */ #define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c index 5692289336..d126208ce0 100644 --- a/src/mesa/drivers/dri/i915/i915_state.c +++ b/src/mesa/drivers/dri/i915/i915_state.c @@ -813,7 +813,7 @@ static void i915_init_packets( i915ContextPtr i915 ) I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | - (4)); + (3)); i915->state.Ctx[I915_CTXREG_LIS2] = 0; i915->state.Ctx[I915_CTXREG_LIS4] = 0; i915->state.Ctx[I915_CTXREG_LIS5] = 0; diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 82a421b961..c920fc47ea 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -180,7 +180,7 @@ static void i915_emit_invarient_state( intelContextPtr intel ) */ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | - (1)); + (0)); OUT_BATCH(0); /* XXX: Use this */ diff --git a/src/mesa/drivers/dri/i915tex/i915_reg.h b/src/mesa/drivers/dri/i915tex/i915_reg.h index 04b199905c..34c6821405 100644 --- a/src/mesa/drivers/dri/i915tex/i915_reg.h +++ b/src/mesa/drivers/dri/i915tex/i915_reg.h @@ -138,7 +138,7 @@ /* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */ -#define _3DSTATE_DEPTH_SUBRECT_DISABLE (CMD_3D | (0x1c<<24) | (0x11<19) | 0x2) +#define _3DSTATE_DEPTH_SUBRECT_DISABLE (CMD_3D | (0x1c<<24) | (0x11<<19) | 0x2) /* p161 */ #define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) diff --git a/src/mesa/drivers/dri/i915tex/i915_state.c b/src/mesa/drivers/dri/i915tex/i915_state.c index 4816afc934..c2c2ff366f 100644 --- a/src/mesa/drivers/dri/i915tex/i915_state.c +++ b/src/mesa/drivers/dri/i915tex/i915_state.c @@ -859,7 +859,7 @@ i915_init_packets(struct i915_context *i915) i915->state.Ctx[I915_CTXREG_LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(4) | - I1_LOAD_S(5) | I1_LOAD_S(6) | (4)); + I1_LOAD_S(5) | I1_LOAD_S(6) | (3)); i915->state.Ctx[I915_CTXREG_LIS2] = 0; i915->state.Ctx[I915_CTXREG_LIS4] = 0; i915->state.Ctx[I915_CTXREG_LIS5] = 0; diff --git a/src/mesa/drivers/dri/i915tex/intel_buffers.c b/src/mesa/drivers/dri/i915tex/intel_buffers.c index 45fd2fa8de..62ff54b007 100644 --- a/src/mesa/drivers/dri/i915tex/intel_buffers.c +++ b/src/mesa/drivers/dri/i915tex/intel_buffers.c @@ -327,6 +327,9 @@ intelWindowMoved(struct intel_context *intel) } for (i = 0; i < intel_fb->pf_num_pages; i++) { + if (!intel_fb->color_rb[i]) + continue; + vbl.request.sequence = intel_fb->color_rb[i]->vbl_pending; drmWaitVBlank(intel->driFd, &vbl); } @@ -336,7 +339,8 @@ intelWindowMoved(struct intel_context *intel) intel_fb->vbl_waited = intel_fb->vbl_seq; for (i = 0; i < intel_fb->pf_num_pages; i++) { - intel_fb->color_rb[i]->vbl_pending = intel_fb->vbl_waited; + if (intel_fb->color_rb[i]) + intel_fb->color_rb[i]->vbl_pending = intel_fb->vbl_waited; } } } else { diff --git a/src/mesa/drivers/dri/i915tex/intel_context.c b/src/mesa/drivers/dri/i915tex/intel_context.c index 5c2cdf0c7d..acda7b1c16 100644 --- a/src/mesa/drivers/dri/i915tex/intel_context.c +++ b/src/mesa/drivers/dri/i915tex/intel_context.c @@ -581,11 +581,7 @@ intelMakeCurrent(__DRIcontextPrivate * driContextPriv, } /* set GLframebuffer size to match window, if needed */ - if (intel_fb->Base.Width != driDrawPriv->w) { - _mesa_resize_framebuffer(&intel->ctx, &intel_fb->Base, - driDrawPriv->w, driDrawPriv->h); - } - if (readFb->Width != driReadPriv->w) { + if (driReadPriv != driDrawPriv && readFb->Width != driReadPriv->w) { _mesa_resize_framebuffer(&intel->ctx, readFb, driReadPriv->w, driReadPriv->h); } diff --git a/src/mesa/drivers/dri/i965/brw_tex.c b/src/mesa/drivers/dri/i965/brw_tex.c index c3ffa9e657..9d4b9867d2 100644 --- a/src/mesa/drivers/dri/i965/brw_tex.c +++ b/src/mesa/drivers/dri/i965/brw_tex.c @@ -148,8 +148,9 @@ brwChooseTextureFormat( GLcontext *ctx, GLint internalFormat, return &_mesa_texformat_ycbcr_rev; case GL_COMPRESSED_RGB_FXT1_3DFX: + return &_mesa_texformat_rgb_fxt1; case GL_COMPRESSED_RGBA_FXT1_3DFX: - return &_mesa_texformat_rgb_fxt1; + return &_mesa_texformat_rgba_fxt1; case GL_RGB_S3TC: case GL_RGB4_S3TC: diff --git a/src/mesa/drivers/dri/i965/brw_vs_tnl.c b/src/mesa/drivers/dri/i965/brw_vs_tnl.c index 21e961ce9c..35adc4846a 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_tnl.c +++ b/src/mesa/drivers/dri/i965/brw_vs_tnl.c @@ -849,14 +849,13 @@ static struct ureg calculate_light_attenuation( struct tnl_program *p, /* Calculate spot attenuation: */ if (!p->state->unit[i].light_spotcutoff_is_180) { - struct ureg spot_dir = register_param3(p, STATE_LIGHT, i, - STATE_SPOT_DIRECTION); + struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL, + STATE_SPOT_DIR_NORMALIZED, i); struct ureg spot = get_temp(p); struct ureg slt = get_temp(p); - - emit_normalize_vec3( p, spot, spot_dir ); /* XXX: precompute! */ - emit_op2(p, OPCODE_DP3, spot, 0, ureg_negate(VPpli), spot); - emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir,W), spot); + + emit_op2(p, OPCODE_DP3, spot, 0, ureg_negate(VPpli), spot_dir_norm); + emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot); emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W)); emit_op2(p, OPCODE_MUL, att, 0, slt, spot); @@ -894,7 +893,7 @@ static struct ureg calculate_light_attenuation( struct tnl_program *p, /* Need to add some addtional parameters to allow lighting in object - * space - STATE_SPOT_DIRECTION and STATE_HALF implicitly assume eye + * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye * space lighting. */ static void build_lighting( struct tnl_program *p ) @@ -987,7 +986,14 @@ static void build_lighting( struct tnl_program *p ) */ VPpli = register_param3(p, STATE_LIGHT, i, STATE_POSITION_NORMALIZED); - half = register_param3(p, STATE_LIGHT, i, STATE_HALF_VECTOR); + if (p->state->light_local_viewer) { + struct ureg eye_hat = get_eye_position_normalized(p); + half = get_temp(p); + emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); + emit_normalize_vec3(p, half, half); + } else { + half = register_param3(p, STATE_LIGHT, i, STATE_HALF_VECTOR); + } } else { struct ureg Ppli = register_param3(p, STATE_LIGHT, i, @@ -1158,7 +1164,8 @@ static void build_fog( struct tnl_program *p ) if (p->state->fog_option && p->state->tnl_do_vertex_fog) { - struct ureg params = register_param1(p, STATE_FOG_PARAMS); + struct ureg params = register_param2(p, STATE_INTERNAL, + STATE_FOG_PARAMS_OPTIMIZED); struct ureg tmp = get_temp(p); struct ureg id = get_identity_param(p); @@ -1166,24 +1173,21 @@ static void build_fog( struct tnl_program *p ) switch (p->state->fog_option) { case FOG_LINEAR: { - emit_op1(p, OPCODE_ABS, tmp, 0, input); - emit_op2(p, OPCODE_SUB, tmp, 0, swizzle1(params,Z), tmp); - emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,W)); + emit_op1(p, OPCODE_ABS, tmp, 0, input); + emit_op3(p, OPCODE_MAD, tmp, 0, tmp, swizzle1(params,X), swizzle1(params,Y)); emit_op2(p, OPCODE_MAX, tmp, 0, tmp, swizzle1(id,X)); /* saturate */ emit_op2(p, OPCODE_MIN, fog, WRITEMASK_X, tmp, swizzle1(id,W)); break; } case FOG_EXP: emit_op1(p, OPCODE_ABS, tmp, 0, input); - emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,X)); - emit_op2(p, OPCODE_POW, fog, WRITEMASK_X, - register_const1f(p, M_E), ureg_negate(tmp)); + emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,Z)); + emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, ureg_negate(tmp)); break; case FOG_EXP2: - emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,X)); + emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,W)); emit_op2(p, OPCODE_MUL, tmp, 0, tmp, tmp); - emit_op2(p, OPCODE_POW, fog, WRITEMASK_X, - register_const1f(p, M_E), ureg_negate(tmp)); + emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, ureg_negate(tmp)); break; } diff --git a/src/mesa/drivers/dri/mach64/mach64_context.c b/src/mesa/drivers/dri/mach64/mach64_context.c index 5a6c301da2..ad661e198c 100644 --- a/src/mesa/drivers/dri/mach64/mach64_context.c +++ b/src/mesa/drivers/dri/mach64/mach64_context.c @@ -58,6 +58,7 @@ #include "vblank.h" #define need_GL_ARB_multisample +#define need_GL_ARB_vertex_buffer_object #include "extension_helper.h" #ifndef MACH64_DEBUG @@ -83,6 +84,7 @@ const struct dri_extension card_extensions[] = { { "GL_ARB_multisample", GL_ARB_multisample_functions }, { "GL_ARB_multitexture", NULL }, + { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, { "GL_EXT_texture_edge_clamp", NULL }, { "GL_MESA_ycbcr_texture", NULL }, { "GL_SGIS_generate_mipmap", NULL }, diff --git a/src/mesa/drivers/dri/mga/mga_xmesa.c b/src/mesa/drivers/dri/mga/mga_xmesa.c index 67a6f8bdf0..ca2c8faa18 100644 --- a/src/mesa/drivers/dri/mga/mga_xmesa.c +++ b/src/mesa/drivers/dri/mga/mga_xmesa.c @@ -72,6 +72,7 @@ #define need_GL_ARB_multisample #define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_buffer_object #define need_GL_ARB_vertex_program #define need_GL_EXT_fog_coord #define need_GL_EXT_multi_draw_arrays @@ -401,6 +402,7 @@ static const struct dri_extension card_extensions[] = { "GL_ARB_multisample", GL_ARB_multisample_functions }, { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, { "GL_ARB_texture_rectangle", NULL }, + { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, { "GL_EXT_blend_logic_op", NULL }, { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, { "GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions }, diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.c b/src/mesa/drivers/dri/nouveau/nouveau_context.c index 627679a147..8e11eb6134 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_context.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_context.c @@ -352,6 +352,7 @@ static void nouveauDoSwapBuffers(nouveauContextPtr nmesa, OUT_RING (((box->y2 - box->y1) << 16) | (box->x2 - box->x1)); } + FIRE_RING(); UNLOCK_HARDWARE(nmesa); #endif diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.h b/src/mesa/drivers/dri/nouveau/nouveau_context.h index f79a8675f4..87e4479da3 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_context.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_context.h @@ -45,6 +45,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "xmlconfig.h" typedef struct nouveau_fifo_t{ + int channel; u_int32_t* buffer; u_int32_t* mmio; u_int32_t put_base; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fifo.c b/src/mesa/drivers/dri/nouveau/nouveau_fifo.c index 67b5aa4f8a..bd2b2eddd0 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_fifo.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_fifo.c @@ -124,6 +124,7 @@ GLboolean nouveauFifoInit(nouveauContextPtr nmesa) } /* Setup our initial FIFO tracking params */ + nmesa->fifo.channel = fifo_init.channel; nmesa->fifo.put_base = fifo_init.put_base; nmesa->fifo.current = 0; nmesa->fifo.put = 0; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_object.c b/src/mesa/drivers/dri/nouveau/nouveau_object.c index 468b18e6d9..b71acff430 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_object.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_object.c @@ -10,8 +10,9 @@ GLboolean nouveauCreateContextObject(nouveauContextPtr nmesa, drm_nouveau_object_init_t cto; int ret; - cto.handle = handle; - cto.class = class; + cto.channel = nmesa->fifo.channel; + cto.handle = handle; + cto.class = class; ret = drmCommandWrite(nmesa->driFd, DRM_NOUVEAU_OBJECT_INIT, &cto, sizeof(cto)); return ret == 0; @@ -28,12 +29,13 @@ GLboolean nouveauCreateDmaObject(nouveauContextPtr nmesa, drm_nouveau_dma_object_init_t dma; int ret; - dma.class = class; - dma.handle = handle; - dma.target = target; - dma.access = access; - dma.offset = offset; - dma.size = size; + dma.channel = nmesa->fifo.channel; + dma.class = class; + dma.handle = handle; + dma.target = target; + dma.access = access; + dma.offset = offset; + dma.size = size; ret = drmCommandWriteRead(nmesa->driFd, DRM_NOUVEAU_DMA_OBJECT_INIT, &dma, sizeof(dma)); return ret == 0; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_screen.c b/src/mesa/drivers/dri/nouveau/nouveau_screen.c index e00080fce1..65bde99671 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_screen.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_screen.c @@ -328,7 +328,7 @@ void * __driCreateNewScreen_20050727( __DRInativeDisplay *dpy, int scrn, __DRIsc static const __DRIversion ddx_expected = { 1, 2, 0 }; static const __DRIversion dri_expected = { 4, 0, 0 }; static const __DRIversion drm_expected = { 0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL }; -#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 4 +#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 5 #error nouveau_drm.h version doesn't match expected version #endif dri_interface = interface; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader.c b/src/mesa/drivers/dri/nouveau/nouveau_shader.c index bee8d5a935..b6837c5de1 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader.c @@ -49,6 +49,7 @@ static void nouveauBindProgram(GLcontext *ctx, GLenum target, struct gl_program *prog) { + NVSDBG("target=%s, prog=%p\n", _mesa_lookup_enum_by_nr(target), prog); } static struct gl_program * @@ -56,7 +57,10 @@ nouveauNewProgram(GLcontext *ctx, GLenum target, GLuint id) { nouveauShader *nvs; + NVSDBG("target=%s, id=%d\n", _mesa_lookup_enum_by_nr(target), id); + nvs = CALLOC_STRUCT(_nouveauShader); + NVSDBG("prog=%p\n", nvs); switch (target) { case GL_VERTEX_PROGRAM_ARB: return _mesa_init_vertex_program(ctx, &nvs->mesa.vp, target, id); @@ -76,6 +80,8 @@ nouveauDeleteProgram(GLcontext *ctx, struct gl_program *prog) { nouveauShader *nvs = (nouveauShader *)prog; + NVSDBG("prog=%p\n", prog); + if (nvs->translated) FREE(nvs->program); _mesa_delete_program(ctx, prog); @@ -87,9 +93,13 @@ nouveauProgramStringNotify(GLcontext *ctx, GLenum target, { nouveauShader *nvs = (nouveauShader *)prog; + NVSDBG("target=%s, prog=%p\n", _mesa_lookup_enum_by_nr(target), prog); + if (nvs->translated) FREE(nvs->program); - nvs->translated = 0; + + nvs->error = GL_FALSE; + nvs->translated = GL_FALSE; _tnl_program_string(ctx, target, prog); } @@ -99,6 +109,8 @@ nouveauIsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) { nouveauShader *nvs = (nouveauShader *)prog; + NVSDBG("target=%s, prog=%p\n", _mesa_lookup_enum_by_nr(target), prog); + return nvs->translated; } @@ -109,6 +121,8 @@ nvsUpdateShader(GLcontext *ctx, nouveauShader *nvs) struct gl_program_parameter_list *plist; int i; + NVSDBG("prog=%p\n", nvs); + /* Translate to HW format now if necessary */ if (!nvs->translated) { /* Mesa ASM shader -> nouveauShader */ diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader.h b/src/mesa/drivers/dri/nouveau/nouveau_shader.h index b2df3546f6..7125a2ae82 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader.h @@ -4,6 +4,12 @@ #include "mtypes.h" #include "bufferobj.h" +#define NVSDBG(fmt, args...) do { \ + if (NOUVEAU_DEBUG & DEBUG_SHADERS) { \ + fprintf(stderr, "%s: "fmt, __func__, ##args); \ + } \ +} while(0) + typedef struct _nvsFunc nvsFunc; #define NVS_MAX_TEMPS 32 @@ -45,6 +51,7 @@ typedef struct _nouveauShader { nvsFunc *func; /* State of the final program */ + GLboolean error; GLboolean translated; GLboolean on_hardware; unsigned int *program; @@ -418,6 +425,12 @@ nvsSwizzle(nvsRegister reg, nvsSwzComp x, nvsSwzComp y, return reg; } +#define nvsProgramError(nvs,fmt,args...) do { \ + fprintf(stderr, "nvsProgramError (%s): "fmt, __func__, ##args); \ + (nvs)->error = GL_TRUE; \ + (nvs)->translated = GL_FALSE; \ +} while(0) + extern GLboolean nvsUpdateShader(GLcontext *ctx, nouveauShader *nvs); extern void nvsDisasmHWShader(nvsPtr); extern void nvsDumpFragmentList(nvsFragmentHeader *f, int lvl); diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c index abba59dc0c..8c203cc664 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c @@ -264,6 +264,23 @@ pass0_make_mask(GLuint mesa_mask) return mask; } +static GLboolean +pass0_opcode_is_tex(enum prog_opcode op) +{ + switch (op) { + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXD: + case OPCODE_TXL: + case OPCODE_TXP: + return GL_TRUE; + default: + break; + } + + return GL_FALSE; +} + static nvsTexTarget pass0_make_tex_target(GLuint mesa) { @@ -724,7 +741,11 @@ pass0_translate_arith(nouveauShader *nvs, struct gl_program *prog, (inst->SaturateMode != SATURATE_OFF), src[0], src[1], src[2]); nvsinst->tex_unit = inst->TexSrcUnit; - nvsinst->tex_target = pass0_make_tex_target(inst->TexSrcTarget); + if (pass0_opcode_is_tex(inst->Opcode)) + nvsinst->tex_target = + pass0_make_tex_target(inst->TexSrcTarget); + else + nvsinst->tex_target = NVS_TEX_TARGET_UNKNOWN; ret = GL_TRUE; } else @@ -907,7 +928,7 @@ pass0_rebase_mesa_consts(nouveauShader *nvs) } } -static void +static GLboolean pass0_resolve_mesa_consts(nouveauShader *nvs) { struct pass0_rec *rec = nvs->pass_rec; @@ -928,6 +949,11 @@ pass0_resolve_mesa_consts(nouveauShader *nvs) for (i=0; i<plist->NumParameters; i++) { int hw = rec->mesa_const_base + i; + if (hw > NVS_MAX_CONSTS) { + nvsProgramError(nvs, "hw = %d > NVS_MAX_CONSTS!\n", hw); + return GL_FALSE; + } + switch (plist->Parameters[i].Type) { case PROGRAM_NAMED_PARAM: case PROGRAM_STATE_VAR: @@ -941,10 +967,13 @@ pass0_resolve_mesa_consts(nouveauShader *nvs) COPY_4V(nvs->params[hw].val, plist->ParameterValues[i]); break; default: - assert(0); - break; + nvsProgramError(nvs, "hit bad type=%d on param %d\n", + plist->Parameters[i].Type, i); + return GL_FALSE; } } + + return GL_TRUE; } GLboolean @@ -957,6 +986,16 @@ nouveau_shader_pass0(GLcontext *ctx, nouveauShader *nvs) struct pass0_rec *rec; int ret = GL_FALSE; + NVSDBG("start: nvs=%p\n", nvs); + + /* Previously detected an error, and haven't recieved new program + * string, so fail immediately. + */ + if (nvs->error) { + NVSDBG("failed previous compile attempt, not retrying\n"); + return GL_FALSE; + } + rec = CALLOC_STRUCT(pass0_rec); if (!rec) return GL_FALSE; @@ -1001,7 +1040,8 @@ nouveau_shader_pass0(GLcontext *ctx, nouveauShader *nvs) ret = pass0_translate_instructions(nvs, 0, 0, nvs->program_tree); if (ret) - pass0_resolve_mesa_consts(nvs); + ret = pass0_resolve_mesa_consts(nvs); + /*XXX: if (!ret) DESTROY TREE!!! */ FREE(rec); diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_1.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_1.c index 90c57d3807..78c1401f7d 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_1.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_1.c @@ -2,11 +2,13 @@ #include "macros.h" #include "enums.h" +#include "nouveau_context.h" #include "nouveau_shader.h" GLboolean nouveau_shader_pass1(nvsPtr nvs) { + NVSDBG("start: nvs=%p\n", nvs); return GL_TRUE; } diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c index 6eb9de4776..cd27daca88 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c @@ -210,6 +210,8 @@ nouveau_shader_pass2(nvsPtr nvs) struct pass2_rec *rec; int i; + NVSDBG("start: nvs=%p\n", nvs); + rec = calloc(1, sizeof(struct pass2_rec)); for (i=0; i<NVS_MAX_TEMPS; i++) rec->temps[i] = -1; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_sync.c b/src/mesa/drivers/dri/nouveau/nouveau_sync.c index 428b19b46e..30e6696269 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_sync.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_sync.c @@ -124,7 +124,7 @@ nouveau_notifier_wait_status(nouveau_notifier *notifier, GLuint id, while (time <= timeout) { if (n[NV_NOTIFY_STATE/4] & NV_NOTIFY_STATE_ERROR_CODE_MASK) { MESSAGE("Notifier returned error: 0x%04x\n", - n[NV_NOTIFY_STATE] & + n[NV_NOTIFY_STATE/4] & NV_NOTIFY_STATE_ERROR_CODE_MASK); return GL_FALSE; } diff --git a/src/mesa/drivers/dri/r128/r128_context.c b/src/mesa/drivers/dri/r128/r128_context.c index 89ddafa02a..95e54a6af5 100644 --- a/src/mesa/drivers/dri/r128/r128_context.c +++ b/src/mesa/drivers/dri/r128/r128_context.c @@ -68,6 +68,7 @@ int R128_DEBUG = 0; #define need_GL_ARB_multisample #define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_buffer_object #define need_GL_EXT_blend_minmax #define need_GL_EXT_fog_coord #define need_GL_EXT_secondary_color @@ -80,6 +81,7 @@ const struct dri_extension card_extensions[] = { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, { "GL_ARB_texture_env_add", NULL }, { "GL_ARB_texture_mirrored_repeat", NULL }, + { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, { "GL_EXT_blend_subtract", GL_EXT_blend_minmax_functions }, { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, { "GL_EXT_texture_edge_clamp", NULL }, diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c index fc6eb93daa..3abcdf9e18 100644 --- a/src/mesa/drivers/dri/r200/r200_context.c +++ b/src/mesa/drivers/dri/r200/r200_context.c @@ -673,11 +673,13 @@ r200MakeCurrent( __DRIcontextPrivate *driContextPriv, &newCtx->vbl_seq ); } + newCtx->dri.readable = driReadPriv; + if ( newCtx->dri.drawable != driDrawPriv || - newCtx->dri.readable != driReadPriv ) { + newCtx->lastStamp != driDrawPriv->lastStamp ) { newCtx->dri.drawable = driDrawPriv; - newCtx->dri.readable = driReadPriv; + r200SetCliprects(newCtx); r200UpdateWindow( newCtx->glCtx ); r200UpdateViewportOffset( newCtx->glCtx ); } diff --git a/src/mesa/drivers/dri/r200/r200_lock.c b/src/mesa/drivers/dri/r200/r200_lock.c index bcc0c91639..9ffdb2b212 100644 --- a/src/mesa/drivers/dri/r200/r200_lock.c +++ b/src/mesa/drivers/dri/r200/r200_lock.c @@ -92,13 +92,9 @@ void r200GetLock( r200ContextPtr rmesa, GLuint flags ) if ( rmesa->lastStamp != drawable->lastStamp ) { r200UpdatePageFlipping( rmesa ); - if (rmesa->glCtx->DrawBuffer->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT) - r200SetCliprects( rmesa, GL_BACK_LEFT ); - else - r200SetCliprects( rmesa, GL_FRONT_LEFT ); + r200SetCliprects( rmesa ); r200UpdateViewportOffset( rmesa->glCtx ); driUpdateFramebufferSize(rmesa->glCtx, drawable); - rmesa->lastStamp = drawable->lastStamp; } R200_STATECHANGE( rmesa, ctx ); diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index bdb487f2b9..16726d7d55 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -1691,6 +1691,11 @@ static void r200ClearStencil( GLcontext *ctx, GLint s ) #define SUBPIXEL_X 0.125 #define SUBPIXEL_Y 0.125 + +/** + * Called when window size or position changes or viewport or depth range + * state is changed. We update the hardware viewport state here. + */ void r200UpdateWindow( GLcontext *ctx ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); @@ -1843,19 +1848,18 @@ static void r200LogicOpCode( GLcontext *ctx, GLenum opcode ) } -void r200SetCliprects( r200ContextPtr rmesa, GLenum mode ) +/* + * Set up the cliprects for either front or back-buffer drawing. + */ +void r200SetCliprects( r200ContextPtr rmesa ) { __DRIdrawablePrivate *const drawable = rmesa->dri.drawable; __DRIdrawablePrivate *const readable = rmesa->dri.readable; GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate; GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate; - switch ( mode ) { - case GL_FRONT_LEFT: - rmesa->numClipRects = drawable->numClipRects; - rmesa->pClipRects = drawable->pClipRects; - break; - case GL_BACK_LEFT: + if (draw_fb->_ColorDrawBufferMask[0] + == BUFFER_BIT_BACK_LEFT) { /* Can't ignore 2d windows if we are page flipping. */ if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) { @@ -1866,11 +1870,12 @@ void r200SetCliprects( r200ContextPtr rmesa, GLenum mode ) rmesa->numClipRects = drawable->numBackClipRects; rmesa->pClipRects = drawable->pBackClipRects; } - break; - default: - fprintf(stderr, "bad mode in r200SetCliprects\n"); - return; } + else { + /* front buffer (or none, or multiple buffers) */ + rmesa->numClipRects = drawable->numClipRects; + rmesa->pClipRects = drawable->pClipRects; + } if ((draw_fb->Width != drawable->w) || (draw_fb->Height != drawable->h)) { _mesa_resize_framebuffer(rmesa->glCtx, draw_fb, @@ -1889,6 +1894,8 @@ void r200SetCliprects( r200ContextPtr rmesa, GLenum mode ) if (rmesa->state.scissor.enabled) r200RecalcScissorRects( rmesa ); + + rmesa->lastStamp = drawable->lastStamp; } @@ -1908,19 +1915,17 @@ static void r200DrawBuffer( GLcontext *ctx, GLenum mode ) */ switch ( ctx->DrawBuffer->_ColorDrawBufferMask[0] ) { case BUFFER_BIT_FRONT_LEFT: - FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_FALSE ); - r200SetCliprects( rmesa, GL_FRONT_LEFT ); - break; case BUFFER_BIT_BACK_LEFT: FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_FALSE ); - r200SetCliprects( rmesa, GL_BACK_LEFT ); break; default: - /* GL_NONE or GL_FRONT_AND_BACK or stereo left&right, etc */ + /* 0 (GL_NONE) buffers or multiple color drawing buffers */ FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_TRUE ); return; } + r200SetCliprects( rmesa ); + /* We'll set the drawing engine's offset/pitch parameters later * when we update other state. */ diff --git a/src/mesa/drivers/dri/r200/r200_state.h b/src/mesa/drivers/dri/r200/r200_state.h index 98c6fbe40b..f34090b619 100644 --- a/src/mesa/drivers/dri/r200/r200_state.h +++ b/src/mesa/drivers/dri/r200/r200_state.h @@ -44,7 +44,7 @@ extern void r200InitTnlFuncs( GLcontext *ctx ); extern void r200UpdateMaterial( GLcontext *ctx ); -extern void r200SetCliprects( r200ContextPtr rmesa, GLenum mode ); +extern void r200SetCliprects( r200ContextPtr rmesa ); extern void r200RecalcScissorRects( r200ContextPtr rmesa ); extern void r200UpdateViewportOffset( GLcontext *ctx ); extern void r200UpdateWindow( GLcontext *ctx ); diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 2ee2328934..d4bf0ae892 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -39,7 +39,7 @@ DRIVER_SOURCES = \ r300_texmem.c \ r300_tex.c \ r300_texstate.c \ - r300_vertexprog.c \ + r300_vertprog.c \ r300_fragprog.c \ r300_shader.c \ r300_maos.c \ diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 2c7b5aa011..0fb2e5a2e0 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -174,11 +174,12 @@ static __inline__ void r300DoEmitState(r300ContextPtr r300, GLboolean dirty) dest ++; r300->cmdbuf.count_used ++; + /* Emit cache flush */ *dest = cmdpacket0(R300_TX_CNTL, 1); dest ++; r300->cmdbuf.count_used ++; - *dest = 0x0; + *dest = R300_TX_FLUSH; dest ++; r300->cmdbuf.count_used ++; @@ -291,14 +292,14 @@ void r300InitCmdBuf(r300ContextPtr r300) /* Initialize state atoms */ ALLOC_STATE( vpt, always, R300_VPT_CMDSIZE, "vpt", 0 ); r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(R300_SE_VPORT_XSCALE, 6); - ALLOC_STATE( unk2080, always, 2, "unk2080", 0 ); - r300->hw.unk2080.cmd[0] = cmdpacket0(R300_VAP_CNTL, 1); + ALLOC_STATE( vap_cntl, always, 2, "vap_cntl", 0 ); + r300->hw.vap_cntl.cmd[0] = cmdpacket0(R300_VAP_CNTL, 1); ALLOC_STATE( vte, always, 3, "vte", 0 ); r300->hw.vte.cmd[0] = cmdpacket0(R300_SE_VTE_CNTL, 2); ALLOC_STATE( unk2134, always, 3, "unk2134", 0 ); r300->hw.unk2134.cmd[0] = cmdpacket0(0x2134, 2); - ALLOC_STATE( unk2140, always, 2, "unk2140", 0 ); - r300->hw.unk2140.cmd[0] = cmdpacket0(R300_VAP_CNTL_STATUS, 1); + ALLOC_STATE( vap_cntl_status, always, 2, "vap_cntl_status", 0 ); + r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(R300_VAP_CNTL_STATUS, 1); ALLOC_STATE( vir[0], variable, R300_VIR_CMDSIZE, "vir/0", 0 ); r300->hw.vir[0].cmd[R300_VIR_CMD_0] = cmdpacket0(R300_VAP_INPUT_ROUTE_0_0, 1); ALLOC_STATE( vir[1], variable, R300_VIR_CMDSIZE, "vir/1", 1 ); @@ -335,18 +336,18 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.lcntl.cmd[0] = cmdpacket0(R300_RE_LINE_CNT, 1); ALLOC_STATE( unk4260, always, 4, "unk4260", 0 ); r300->hw.unk4260.cmd[0] = cmdpacket0(0x4260, 3); - ALLOC_STATE( unk4274, always, 5, "unk4274", 0 ); - r300->hw.unk4274.cmd[0] = cmdpacket0(R300_RE_SHADE, 4); - ALLOC_STATE( unk4288, always, 4, "unk4288", 0 ); - r300->hw.unk4288.cmd[0] = cmdpacket0(R300_RE_POLYGON_MODE, 3); + ALLOC_STATE( shade, always, 5, "shade", 0 ); + r300->hw.shade.cmd[0] = cmdpacket0(R300_RE_SHADE, 4); + ALLOC_STATE( polygon_mode, always, 4, "polygon_mode", 0 ); + r300->hw.polygon_mode.cmd[0] = cmdpacket0(R300_RE_POLYGON_MODE, 3); ALLOC_STATE( fogp, always, 3, "fogp", 0 ); r300->hw.fogp.cmd[0] = cmdpacket0(R300_RE_FOG_SCALE, 2); - ALLOC_STATE( unk42A0, always, 2, "unk42A0", 0 ); - r300->hw.unk42A0.cmd[0] = cmdpacket0(0x42A0, 1); + ALLOC_STATE( zbias_cntl, always, 2, "zbias_cntl", 0 ); + r300->hw.zbias_cntl.cmd[0] = cmdpacket0(R300_RE_ZBIAS_CNTL, 1); ALLOC_STATE( zbs, always, R300_ZBS_CMDSIZE, "zbs", 0 ); r300->hw.zbs.cmd[R300_ZBS_CMD_0] = cmdpacket0(R300_RE_ZBIAS_T_FACTOR, 4); - ALLOC_STATE( unk42B4, always, 2, "unk42B4", 0 ); - r300->hw.unk42B4.cmd[0] = cmdpacket0(R300_RE_OCCLUSION_CNTL, 1); + ALLOC_STATE( occlusion_cntl, always, 2, "occlusion_cntl", 0 ); + r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(R300_RE_OCCLUSION_CNTL, 1); ALLOC_STATE( cul, always, R300_CUL_CMDSIZE, "cul", 0 ); r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(R300_RE_CULL_CNTL, 1); ALLOC_STATE( unk42C0, always, 3, "unk42C0", 0 ); @@ -392,8 +393,8 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(R300_RB3D_CBLEND, 2); ALLOC_STATE( cmk, always, R300_CMK_CMDSIZE, "cmk", 0 ); r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(R300_RB3D_COLORMASK, 1); - ALLOC_STATE( unk4E10, always, 4, "unk4E10", 0 ); - r300->hw.unk4E10.cmd[0] = cmdpacket0(R300_RB3D_BLEND_COLOR, 3); + ALLOC_STATE( blend_color, always, 4, "blend_color", 0 ); + r300->hw.blend_color.cmd[0] = cmdpacket0(R300_RB3D_BLEND_COLOR, 3); ALLOC_STATE( cb, always, R300_CB_CMDSIZE, "cb", 0 ); r300->hw.cb.cmd[R300_CB_CMD_0] = cmdpacket0(R300_RB3D_COLOROFFSET0, 1); r300->hw.cb.cmd[R300_CB_CMD_1] = cmdpacket0(R300_RB3D_COLORPITCH0, 1); @@ -405,8 +406,8 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.unk4EA0.cmd[0] = cmdpacket0(0x4EA0, 2); ALLOC_STATE( zs, always, R300_ZS_CMDSIZE, "zstencil", 0 ); r300->hw.zs.cmd[R300_ZS_CMD_0] = cmdpacket0(R300_RB3D_ZSTENCIL_CNTL_0, 3); - ALLOC_STATE( unk4F10, always, 5, "unk4F10", 0 ); - r300->hw.unk4F10.cmd[0] = cmdpacket0(R300_RB3D_ZSTENCIL_FORMAT, 4); + ALLOC_STATE( zstencil_format, always, 5, "zstencil_format", 0 ); + r300->hw.zstencil_format.cmd[0] = cmdpacket0(R300_RB3D_ZSTENCIL_FORMAT, 4); ALLOC_STATE( zb, always, R300_ZB_CMDSIZE, "zb", 0 ); r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_RB3D_DEPTHOFFSET, 2); ALLOC_STATE( unk4F28, always, 2, "unk4F28", 0 ); @@ -429,8 +430,8 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE( tex.filter, variable, mtu+1, "tex_filter", 0 ); r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FILTER_0, 0); - ALLOC_STATE( tex.unknown1, variable, mtu+1, "tex_unknown1", 0 ); - r300->hw.tex.unknown1.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FILTER1_0, 0); + ALLOC_STATE( tex.filter_1, variable, mtu+1, "tex_filter_1", 0 ); + r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FILTER1_0, 0); ALLOC_STATE( tex.size, variable, mtu+1, "tex_size", 0 ); r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_SIZE_0, 0); @@ -444,8 +445,8 @@ void r300InitCmdBuf(r300ContextPtr r300) ALLOC_STATE( tex.offset, variable, mtu+1, "tex_offset", 0 ); r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_OFFSET_0, 0); - ALLOC_STATE( tex.unknown4, variable, mtu+1, "tex_unknown4", 0 ); - r300->hw.tex.unknown4.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_CHROMA_KEY_0, 0); + ALLOC_STATE( tex.chroma_key, variable, mtu+1, "tex_chroma_key", 0 ); + r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_CHROMA_KEY_0, 0); ALLOC_STATE( tex.border_color, variable, mtu+1, "tex_border_color", 0 ); r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_BORDER_COLOR_0, 0); @@ -456,10 +457,10 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.atomlist.name = "atom-list"; insert_at_tail(&r300->hw.atomlist, &r300->hw.vpt); - insert_at_tail(&r300->hw.atomlist, &r300->hw.unk2080); + insert_at_tail(&r300->hw.atomlist, &r300->hw.vap_cntl); insert_at_tail(&r300->hw.atomlist, &r300->hw.vte); insert_at_tail(&r300->hw.atomlist, &r300->hw.unk2134); - insert_at_tail(&r300->hw.atomlist, &r300->hw.unk2140); + insert_at_tail(&r300->hw.atomlist, &r300->hw.vap_cntl_status); insert_at_tail(&r300->hw.atomlist, &r300->hw.vir[0]); insert_at_tail(&r300->hw.atomlist, &r300->hw.vir[1]); insert_at_tail(&r300->hw.atomlist, &r300->hw.vic); @@ -478,12 +479,12 @@ void r300InitCmdBuf(r300ContextPtr r300) insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4230); insert_at_tail(&r300->hw.atomlist, &r300->hw.lcntl); insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4260); - insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4274); - insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4288); + insert_at_tail(&r300->hw.atomlist, &r300->hw.shade); + insert_at_tail(&r300->hw.atomlist, &r300->hw.polygon_mode); insert_at_tail(&r300->hw.atomlist, &r300->hw.fogp); - insert_at_tail(&r300->hw.atomlist, &r300->hw.unk42A0); + insert_at_tail(&r300->hw.atomlist, &r300->hw.zbias_cntl); insert_at_tail(&r300->hw.atomlist, &r300->hw.zbs); - insert_at_tail(&r300->hw.atomlist, &r300->hw.unk42B4); + insert_at_tail(&r300->hw.atomlist, &r300->hw.occlusion_cntl); insert_at_tail(&r300->hw.atomlist, &r300->hw.cul); insert_at_tail(&r300->hw.atomlist, &r300->hw.unk42C0); insert_at_tail(&r300->hw.atomlist, &r300->hw.rc); @@ -506,13 +507,13 @@ void r300InitCmdBuf(r300ContextPtr r300) insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4E00); insert_at_tail(&r300->hw.atomlist, &r300->hw.bld); insert_at_tail(&r300->hw.atomlist, &r300->hw.cmk); - insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4E10); + insert_at_tail(&r300->hw.atomlist, &r300->hw.blend_color); insert_at_tail(&r300->hw.atomlist, &r300->hw.cb); insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4E50); insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4E88); insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4EA0); insert_at_tail(&r300->hw.atomlist, &r300->hw.zs); - insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4F10); + insert_at_tail(&r300->hw.atomlist, &r300->hw.zstencil_format); insert_at_tail(&r300->hw.atomlist, &r300->hw.zb); insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4F28); insert_at_tail(&r300->hw.atomlist, &r300->hw.unk4F30); @@ -524,12 +525,12 @@ void r300InitCmdBuf(r300ContextPtr r300) insert_at_tail(&r300->hw.atomlist, &r300->hw.vps); insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.filter); - insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.unknown1); + insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.filter_1); insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.size); insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.format); insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.pitch); insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.offset); - insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.unknown4); + insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.chroma_key); insert_at_tail(&r300->hw.atomlist, &r300->hw.tex.border_color); r300->hw.is_dirty = GL_TRUE; @@ -544,9 +545,9 @@ void r300InitCmdBuf(r300ContextPtr r300) size = 64*256; if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA)) { - fprintf(stderr, "sizeof(drm_r300_cmd_header_t)=%u\n", + fprintf(stderr, "sizeof(drm_r300_cmd_header_t)=%zd\n", sizeof(drm_r300_cmd_header_t)); - fprintf(stderr, "sizeof(drm_radeon_cmd_buffer_t)=%u\n", + fprintf(stderr, "sizeof(drm_radeon_cmd_buffer_t)=%zd\n", sizeof(drm_radeon_cmd_buffer_t)); fprintf(stderr, "Allocating %d bytes command buffer (max state is %d bytes)\n", diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index a339b2f9c1..fe261dbbc6 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -71,7 +71,7 @@ typedef struct r300_context *r300ContextPtr; /* Checkpoint.. for convenience */ #define CPT { fprintf(stderr, "%s:%s line %d\n", __FILE__, __FUNCTION__, __LINE__); } /* From http://gcc.gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html . - I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble + I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble with other compilers ... GLUE! */ #if 1 @@ -180,6 +180,7 @@ struct r300_tex_obj { /* hardware register values */ /* Note that R200 has 8 registers per texture and R300 only 7 */ GLuint filter; + GLuint filter_1; GLuint pitch_reg; GLuint size; /* npot only */ GLuint format; @@ -432,11 +433,11 @@ struct r300_hw_state { int max_state_size; /* in dwords */ struct r300_state_atom vpt; /* viewport (1D98) */ - struct r300_state_atom unk2080; /* (2080) */ + struct r300_state_atom vap_cntl; struct r300_state_atom vof; /* VAP output format register 0x2090 */ struct r300_state_atom vte; /* (20B0) */ struct r300_state_atom unk2134; /* (2134) */ - struct r300_state_atom unk2140; /* (2140) */ + struct r300_state_atom vap_cntl_status; struct r300_state_atom vir[2]; /* vap input route (2150/21E0) */ struct r300_state_atom vic; /* vap input control (2180) */ struct r300_state_atom unk21DC; /* (21DC) */ @@ -452,13 +453,13 @@ struct r300_hw_state { struct r300_state_atom unk4230; /* (4230) */ struct r300_state_atom lcntl; /* line control */ struct r300_state_atom unk4260; /* (4260) */ - struct r300_state_atom unk4274; /* (4274) */ - struct r300_state_atom unk4288; /* (4288) */ + struct r300_state_atom shade; + struct r300_state_atom polygon_mode; struct r300_state_atom fogp; /* fog parameters (4294) */ struct r300_state_atom unk429C; /* (429C) */ - struct r300_state_atom unk42A0; /* (42A0) */ + struct r300_state_atom zbias_cntl; struct r300_state_atom zbs; /* zbias (42A4) */ - struct r300_state_atom unk42B4; /* (42B4) */ + struct r300_state_atom occlusion_cntl; struct r300_state_atom cul; /* cull cntl (42B8) */ struct r300_state_atom unk42C0; /* (42C0) */ struct r300_state_atom rc; /* rs control (4300) */ @@ -478,13 +479,13 @@ struct r300_hw_state { struct r300_state_atom unk4E00; /* (4E00) */ struct r300_state_atom bld; /* blending (4E04) */ struct r300_state_atom cmk; /* colormask (4E0C) */ - struct r300_state_atom unk4E10; /* constant blend color + ??? (4E10) */ + struct r300_state_atom blend_color; /* constant blend color */ struct r300_state_atom cb; /* colorbuffer (4E28) */ struct r300_state_atom unk4E50; /* (4E50) */ struct r300_state_atom unk4E88; /* (4E88) */ struct r300_state_atom unk4EA0; /* (4E88) I saw it only written on RV350 hardware.. */ struct r300_state_atom zs; /* zstencil control (4F00) */ - struct r300_state_atom unk4F10; /* (4F10) */ + struct r300_state_atom zstencil_format; struct r300_state_atom zb; /* z buffer (4F20) */ struct r300_state_atom unk4F28; /* (4F28) */ struct r300_state_atom unk4F30; /* (4F30) */ @@ -501,12 +502,12 @@ struct r300_hw_state { updating the whole thing at once */ struct { struct r300_state_atom filter; - struct r300_state_atom unknown1; + struct r300_state_atom filter_1; struct r300_state_atom size; struct r300_state_atom format; struct r300_state_atom pitch; struct r300_state_atom offset; - struct r300_state_atom unknown4; + struct r300_state_atom chroma_key; struct r300_state_atom border_color; } tex; struct r300_state_atom txe; /* tex enable (4104) */ @@ -547,7 +548,7 @@ struct r300_stencilbuffer_state { /* Perhaps more if we store programs in vmem? */ /* drm_r300_cmd_header_t->vpu->count is unsigned char */ #define VSF_MAX_FRAGMENT_LENGTH (255*4) - + /* Can be tested with colormat currently. */ #define VSF_MAX_FRAGMENT_TEMPS (14) @@ -592,7 +593,7 @@ struct r300_vertex_shader_state { int unknown_ptr2; /* pointer within program space */ int unknown_ptr3; /* pointer within program space */ }; - + extern int hw_tcl_on; //#define CURRENT_VERTEX_SHADER(ctx) (ctx->VertexProgram._Current) @@ -601,16 +602,16 @@ extern int hw_tcl_on; /* Should but doesnt work */ //#define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->curr_vp) -//#define TMU_ENABLED(ctx, unit) (hw_tcl_on ? ctx->Texture.Unit[unit]._ReallyEnabled && (OutputsWritten & (1<<(VERT_RESULT_TEX0+(unit)))) : +//#define TMU_ENABLED(ctx, unit) (hw_tcl_on ? ctx->Texture.Unit[unit]._ReallyEnabled && (OutputsWritten & (1<<(VERT_RESULT_TEX0+(unit)))) : // (r300->state.render_inputs & (_TNL_BIT_TEX0<<(unit)))) -//#define TMU_ENABLED(ctx, unit) (hw_tcl_on ? ctx->Texture.Unit[unit]._ReallyEnabled && OutputsWritten & (1<<(VERT_RESULT_TEX0+(unit))) : +//#define TMU_ENABLED(ctx, unit) (hw_tcl_on ? ctx->Texture.Unit[unit]._ReallyEnabled && OutputsWritten & (1<<(VERT_RESULT_TEX0+(unit))) : // ctx->Texture.Unit[unit]._ReallyEnabled && r300->state.render_inputs & (_TNL_BIT_TEX0<<(unit))) #define TMU_ENABLED(ctx, unit) (ctx->Texture.Unit[unit]._ReallyEnabled) /* r300_vertex_shader_state and r300_vertex_program should probably be merged together someday. * Keeping them them seperate for now should ensure fixed pipeline keeps functioning properly. - */ + */ struct r300_vertex_program_key { GLuint InputsRead; @@ -621,9 +622,9 @@ struct r300_vertex_program { struct r300_vertex_program *next; struct r300_vertex_program_key key; int translated; - + struct r300_vertex_shader_fragment program; - + int pos_end; int num_temporaries; /* Number of temp vars used by program */ int wpos_idx; @@ -646,38 +647,89 @@ struct r300_vertex_program_cont { #define PFS_NUM_TEMP_REGS 32 #define PFS_NUM_CONST_REGS 16 -/* Tracking data for Mesa registers */ +/* Mapping Mesa registers to R300 temporaries */ struct reg_acc { int reg; /* Assigned hw temp */ unsigned int refcount; /* Number of uses by mesa program */ }; +/** + * Describe the current lifetime information for an R300 temporary + */ +struct reg_lifetime { + /* Index of the first slot where this register is free in the sense + that it can be used as a new destination register. + This is -1 if the register has been assigned to a Mesa register + and the last access to the register has not yet been emitted */ + int free; + + /* Index of the first slot where this register is currently reserved. + This is used to stop e.g. a scalar operation from being moved + before the allocation time of a register that was first allocated + for a vector operation. */ + int reserved; + + /* Index of the first slot in which the register can be used as a + source without losing the value that is written by the last + emitted instruction that writes to the register */ + int vector_valid; + int scalar_valid; + + /* Index to the slot where the register was last read. + This is also the first slot in which the register may be written again */ + int vector_lastread; + int scalar_lastread; +}; + + +/** + * Store usage information about an ALU instruction slot during the + * compilation of a fragment program. + */ +#define SLOT_SRC_VECTOR (1<<0) +#define SLOT_SRC_SCALAR (1<<3) +#define SLOT_SRC_BOTH (SLOT_SRC_VECTOR | SLOT_SRC_SCALAR) +#define SLOT_OP_VECTOR (1<<16) +#define SLOT_OP_SCALAR (1<<17) +#define SLOT_OP_BOTH (SLOT_OP_VECTOR | SLOT_OP_SCALAR) + +struct r300_pfs_compile_slot { + /* Bitmask indicating which parts of the slot are used, using SLOT_ constants + defined above */ + unsigned int used; + + /* Selected sources */ + int vsrc[3]; + int ssrc[3]; +}; + +/** + * Store information during compilation of fragment programs. + */ struct r300_pfs_compile_state { - int v_pos, s_pos; /* highest ALU slots used */ - - /* Track some information gathered during opcode - * construction. - * - * NOTE: Data is only set by the code, and isn't used yet. - */ - struct { - int vsrc[3]; - int ssrc[3]; - int umask; - } slot[PFS_MAX_ALU_INST]; - - /* Used to map Mesa's inputs/temps onto hardware temps */ - int temp_in_use; - struct reg_acc temps[PFS_NUM_TEMP_REGS]; - struct reg_acc inputs[32]; /* don't actually need 32... */ - - /* Track usage of hardware temps, for register allocation, - * indirection detection, etc. */ - int hwreg_in_use; - GLuint used_in_node; - GLuint dest_in_node; + int nrslots; /* number of ALU slots used so far */ + + /* Track which (parts of) slots are already filled with instructions */ + struct r300_pfs_compile_slot slot[PFS_MAX_ALU_INST]; + + /* Track the validity of R300 temporaries */ + struct reg_lifetime hwtemps[PFS_NUM_TEMP_REGS]; + + /* Used to map Mesa's inputs/temps onto hardware temps */ + int temp_in_use; + struct reg_acc temps[PFS_NUM_TEMP_REGS]; + struct reg_acc inputs[32]; /* don't actually need 32... */ + + /* Track usage of hardware temps, for register allocation, + * indirection detection, etc. */ + GLuint used_in_node; + GLuint dest_in_node; }; +/** + * Store everything about a fragment program that is needed + * to render with that program. + */ struct r300_fragment_program { struct gl_fragment_program mesa_program; @@ -715,23 +767,18 @@ struct r300_fragment_program { int tex_offset; int tex_end; - /* Hardware constants */ - GLfloat constant[PFS_NUM_CONST_REGS][4]; + /* Hardware constants. + * Contains a pointer to the value. The destination of the pointer + * is supposed to be updated when GL state changes. + * Typically, this is either a pointer into + * gl_program_parameter_list::ParameterValues, or a pointer to a + * global constant (e.g. for sin/cos-approximation) + */ + const GLfloat* constant[PFS_NUM_CONST_REGS]; int const_nr; - /* Tracked parameters */ - struct { - int idx; /* hardware index */ - GLfloat *values; /* pointer to values */ - } param[PFS_NUM_CONST_REGS]; - int param_nr; - GLboolean params_uptodate; - int max_temp_idx; - /* the index of the sin constant is stored here */ - GLint const_sin[2]; - GLuint optimization; }; @@ -758,10 +805,10 @@ struct radeon_vertex_buffer { void *Elts; int elt_size; int elt_min, elt_max; /* debug */ - + struct dt AttribPtr[VERT_ATTRIB_MAX]; - - const struct _mesa_prim *Primitive; + + const struct _mesa_prim *Primitive; GLuint PrimitiveCount; GLint LockFirst; GLsizei LockCount; @@ -793,16 +840,16 @@ struct r300_state { GLuint *Elts; struct r300_dma_region elt_dma; - - DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for. - They are the same as tnl->render_inputs for fixed pipeline */ - + + DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for. + They are the same as tnl->render_inputs for fixed pipeline */ + struct { int transform_offset; /* Transform matrix offset, -1 if none */ } vap_param; /* vertex processor parameter allocation - tells where to write parameters */ - + struct r300_stencilbuffer_state stencil; - + }; #define R300_FALLBACK_NONE 0 @@ -854,7 +901,7 @@ struct r300_buffer_object { struct gl_buffer_object mesa_obj; int id; }; - + #define R300_CONTEXT(ctx) ((r300ContextPtr)(ctx->DriverCtx)) static __inline GLuint r300PackColor( GLuint cpp, diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index f18a982deb..c407dfb5b0 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -50,6 +50,7 @@ #include "r300_context.h" #include "r300_fragprog.h" #include "r300_reg.h" +#include "r300_state.h" /* * Usefull macros and values @@ -94,8 +95,9 @@ #define REG_NEGV_SHIFT 18 #define REG_NEGS_SHIFT 19 #define REG_ABS_SHIFT 20 -#define REG_NO_USE_SHIFT 21 -#define REG_VALID_SHIFT 22 +#define REG_NO_USE_SHIFT 21 // Hack for refcounting +#define REG_VALID_SHIFT 22 // Does the register contain a defined value? +#define REG_BUILTIN_SHIFT 23 // Is it a builtin (like all zero/all one)? #define REG_TYPE_MASK (0x03 << REG_TYPE_SHIFT) #define REG_INDEX_MASK (0x3F << REG_INDEX_SHIFT) @@ -106,12 +108,14 @@ #define REG_ABS_MASK (0x01 << REG_ABS_SHIFT) #define REG_NO_USE_MASK (0x01 << REG_NO_USE_SHIFT) #define REG_VALID_MASK (0x01 << REG_VALID_SHIFT) +#define REG_BUILTIN_MASK (0x01 << REG_BUILTIN_SHIFT) -#define REG(type, index, vswz, sswz, nouse, valid) \ +#define REG(type, index, vswz, sswz, nouse, valid, builtin) \ (((type << REG_TYPE_SHIFT) & REG_TYPE_MASK) | \ ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK) | \ ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK) | \ ((valid << REG_VALID_SHIFT) & REG_VALID_MASK) | \ + ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK) | \ ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK) | \ ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK)) #define REG_GET_TYPE(reg) \ @@ -126,6 +130,8 @@ ((reg & REG_NO_USE_MASK) >> REG_NO_USE_SHIFT) #define REG_GET_VALID(reg) \ ((reg & REG_VALID_MASK) >> REG_VALID_SHIFT) +#define REG_GET_BUILTIN(reg) \ + ((reg & REG_BUILTIN_MASK) >> REG_BUILTIN_SHIFT) #define REG_SET_TYPE(reg, type) \ reg = ((reg & ~REG_TYPE_MASK) | \ ((type << REG_TYPE_SHIFT) & REG_TYPE_MASK)) @@ -144,6 +150,9 @@ #define REG_SET_VALID(reg, valid) \ reg = ((reg & ~REG_VALID_MASK) | \ ((valid << REG_VALID_SHIFT) & REG_VALID_MASK)) +#define REG_SET_BUILTIN(reg, builtin) \ + reg = ((reg & ~REG_BUILTIN_MASK) | \ + ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK)) #define REG_ABS(reg) \ reg = (reg | REG_ABS_MASK) #define REG_NEGV(reg) \ @@ -184,9 +193,6 @@ static const struct { * * REG_VSWZ/REG_SSWZ is an index into this table */ -#define SLOT_VECTOR (1<<0) -#define SLOT_SCALAR (1<<3) -#define SLOT_BOTH (SLOT_VECTOR | SLOT_SCALAR) /* mapping from SWIZZLE_* to r300 native values for scalar insns */ #define SWIZZLE_HALF 6 @@ -202,14 +208,14 @@ static const struct r300_pfs_swizzle { GLuint flags; } v_swiz[] = { /* native swizzles */ - { MAKE_SWZ3(X, Y, Z), R300_FPI0_ARGC_SRC0C_XYZ, 4, SLOT_VECTOR }, - { MAKE_SWZ3(X, X, X), R300_FPI0_ARGC_SRC0C_XXX, 4, SLOT_VECTOR }, - { MAKE_SWZ3(Y, Y, Y), R300_FPI0_ARGC_SRC0C_YYY, 4, SLOT_VECTOR }, - { MAKE_SWZ3(Z, Z, Z), R300_FPI0_ARGC_SRC0C_ZZZ, 4, SLOT_VECTOR }, - { MAKE_SWZ3(W, W, W), R300_FPI0_ARGC_SRC0A, 1, SLOT_SCALAR }, - { MAKE_SWZ3(Y, Z, X), R300_FPI0_ARGC_SRC0C_YZX, 1, SLOT_VECTOR }, - { MAKE_SWZ3(Z, X, Y), R300_FPI0_ARGC_SRC0C_ZXY, 1, SLOT_VECTOR }, - { MAKE_SWZ3(W, Z, Y), R300_FPI0_ARGC_SRC0CA_WZY, 1, SLOT_BOTH }, + { MAKE_SWZ3(X, Y, Z), R300_FPI0_ARGC_SRC0C_XYZ, 4, SLOT_SRC_VECTOR }, + { MAKE_SWZ3(X, X, X), R300_FPI0_ARGC_SRC0C_XXX, 4, SLOT_SRC_VECTOR }, + { MAKE_SWZ3(Y, Y, Y), R300_FPI0_ARGC_SRC0C_YYY, 4, SLOT_SRC_VECTOR }, + { MAKE_SWZ3(Z, Z, Z), R300_FPI0_ARGC_SRC0C_ZZZ, 4, SLOT_SRC_VECTOR }, + { MAKE_SWZ3(W, W, W), R300_FPI0_ARGC_SRC0A, 1, SLOT_SRC_SCALAR }, + { MAKE_SWZ3(Y, Z, X), R300_FPI0_ARGC_SRC0C_YZX, 1, SLOT_SRC_VECTOR }, + { MAKE_SWZ3(Z, X, Y), R300_FPI0_ARGC_SRC0C_ZXY, 1, SLOT_SRC_VECTOR }, + { MAKE_SWZ3(W, Z, Y), R300_FPI0_ARGC_SRC0CA_WZY, 1, SLOT_SRC_BOTH }, { MAKE_SWZ3(ONE, ONE, ONE), R300_FPI0_ARGC_ONE, 0, 0}, { MAKE_SWZ3(ZERO, ZERO, ZERO), R300_FPI0_ARGC_ZERO, 0, 0}, { MAKE_SWZ3(HALF, HALF, HALF), R300_FPI0_ARGC_HALF, 0, 0}, @@ -241,10 +247,10 @@ static const struct { int stride; /* difference between SRC0/1/2 */ GLuint flags; } s_swiz[] = { - { R300_FPI2_ARGA_SRC0C_X, 3, SLOT_VECTOR }, - { R300_FPI2_ARGA_SRC0C_Y, 3, SLOT_VECTOR }, - { R300_FPI2_ARGA_SRC0C_Z, 3, SLOT_VECTOR }, - { R300_FPI2_ARGA_SRC0A , 1, SLOT_SCALAR }, + { R300_FPI2_ARGA_SRC0C_X, 3, SLOT_SRC_VECTOR }, + { R300_FPI2_ARGA_SRC0C_Y, 3, SLOT_SRC_VECTOR }, + { R300_FPI2_ARGA_SRC0C_Z, 3, SLOT_SRC_VECTOR }, + { R300_FPI2_ARGA_SRC0A , 1, SLOT_SRC_SCALAR }, { R300_FPI2_ARGA_ZERO , 0, 0 }, { R300_FPI2_ARGA_ONE , 0, 0 }, { R300_FPI2_ARGA_HALF , 0, 0 } @@ -256,6 +262,7 @@ static const GLuint undef = REG(REG_TYPE_TEMP, SWIZZLE_XYZ, SWIZZLE_W, GL_FALSE, + GL_FALSE, GL_FALSE); /* constant one source */ @@ -264,6 +271,7 @@ static const GLuint pfs_one = REG(REG_TYPE_CONST, SWIZZLE_111, SWIZZLE_ONE, GL_FALSE, + GL_TRUE, GL_TRUE); /* constant half source */ @@ -272,6 +280,7 @@ static const GLuint pfs_half = REG(REG_TYPE_CONST, SWIZZLE_HHH, SWIZZLE_HALF, GL_FALSE, + GL_TRUE, GL_TRUE); /* constant zero source */ @@ -280,6 +289,7 @@ static const GLuint pfs_zero = REG(REG_TYPE_CONST, SWIZZLE_000, SWIZZLE_ZERO, GL_FALSE, + GL_TRUE, GL_TRUE); /* @@ -291,47 +301,105 @@ static void emit_arith(struct r300_fragment_program *rp, int op, GLuint src0, GLuint src1, GLuint src2, int flags); -/* - * Helper functions prototypes +/** + * Get an R300 temporary that can be written to in the given slot. */ -static int get_hw_temp(struct r300_fragment_program *rp) +static int get_hw_temp(struct r300_fragment_program *rp, int slot) { COMPILE_STATE; - int r = ffs(~cs->hwreg_in_use); - if (!r) { + int r; + + for(r = 0; r < PFS_NUM_TEMP_REGS; ++r) { + if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= slot) + break; + } + + if (r >= PFS_NUM_TEMP_REGS) { ERROR("Out of hardware temps\n"); return 0; } - cs->hwreg_in_use |= (1 << --r); + // Reserved is used to avoid the following scenario: + // R300 temporary X is first assigned to Mesa temporary Y during vector ops + // R300 temporary X is then assigned to Mesa temporary Z for further vector ops + // Then scalar ops on Mesa temporary Z are emitted and move back in time + // to overwrite the value of temporary Y. + // End scenario. + cs->hwtemps[r].reserved = cs->hwtemps[r].free; + cs->hwtemps[r].free = -1; + + // Reset to some value that won't mess things up when the user + // tries to read from a temporary that hasn't been assigned a value yet. + // In the normal case, vector_valid and scalar_valid should be set to + // a sane value by the first emit that writes to this temporary. + cs->hwtemps[r].vector_valid = 0; + cs->hwtemps[r].scalar_valid = 0; + if (r > rp->max_temp_idx) rp->max_temp_idx = r; return r; } +/** + * Get an R300 temporary that will act as a TEX destination register. + */ static int get_hw_temp_tex(struct r300_fragment_program *rp) { COMPILE_STATE; int r; - r = ffs(~(cs->hwreg_in_use | cs->used_in_node)); - if (!r) - return get_hw_temp(rp); /* Will cause an indirection */ + for(r = 0; r < PFS_NUM_TEMP_REGS; ++r) { + if (cs->used_in_node & (1 << r)) + continue; + + // Note: Be very careful here + if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= 0) + break; + } + + if (r >= PFS_NUM_TEMP_REGS) + return get_hw_temp(rp, 0); /* Will cause an indirection */ + + cs->hwtemps[r].reserved = cs->hwtemps[r].free; + cs->hwtemps[r].free = -1; + + // Reset to some value that won't mess things up when the user + // tries to read from a temporary that hasn't been assigned a value yet. + // In the normal case, vector_valid and scalar_valid should be set to + // a sane value by the first emit that writes to this temporary. + cs->hwtemps[r].vector_valid = cs->nrslots; + cs->hwtemps[r].scalar_valid = cs->nrslots; - cs->hwreg_in_use |= (1 << --r); if (r > rp->max_temp_idx) rp->max_temp_idx = r; return r; } +/** + * Mark the given hardware register as free. + */ static void free_hw_temp(struct r300_fragment_program *rp, int idx) { COMPILE_STATE; - cs->hwreg_in_use &= ~(1<<idx); + + // Be very careful here. Consider sequences like + // MAD r0, r1,r2,r3 + // TEX r4, ... + // The TEX instruction may be moved in front of the MAD instruction + // due to the way nodes work. We don't want to alias r1 and r4 in + // this case. + // I'm certain the register allocation could be further sanitized, + // but it's tricky because of stuff that can happen inside emit_tex + // and emit_arith. + cs->hwtemps[idx].free = cs->nrslots+1; } + +/** + * Create a new Mesa temporary register. + */ static GLuint get_temp_reg(struct r300_fragment_program *rp) { COMPILE_STATE; @@ -354,6 +422,10 @@ static GLuint get_temp_reg(struct r300_fragment_program *rp) return r; } +/** + * Create a new Mesa temporary register that will act as the destination + * register for a texture read. + */ static GLuint get_temp_reg_tex(struct r300_fragment_program *rp) { COMPILE_STATE; @@ -376,6 +448,9 @@ static GLuint get_temp_reg_tex(struct r300_fragment_program *rp) return r; } +/** + * Free a Mesa temporary and the associated R300 temporary. + */ static void free_temp(struct r300_fragment_program *rp, GLuint r) { COMPILE_STATE; @@ -383,7 +458,7 @@ static void free_temp(struct r300_fragment_program *rp, GLuint r) if (!(cs->temp_in_use & (1 << index))) return; - + if (REG_GET_TYPE(r) == REG_TYPE_TEMP) { free_hw_temp(rp, cs->temps[index].reg); cs->temps[index].reg = -1; @@ -394,47 +469,39 @@ static void free_temp(struct r300_fragment_program *rp, GLuint r) } } -static GLuint emit_param4fv(struct r300_fragment_program *rp, - GLfloat *values) +/** + * Emit a hardware constant/parameter. + * + * \p cp Stable pointer to an array of 4 floats. + * The pointer must be stable in the sense that it remains to be valid + * and hold the contents of the constant/parameter throughout the lifetime + * of the fragment program (actually, up until the next time the fragment + * program is translated). + */ +static GLuint emit_const4fv(struct r300_fragment_program *rp, const GLfloat* cp) { - GLuint r = undef; - GLuint index; - int pidx; + GLuint reg = undef; + int index; - pidx = rp->param_nr++; - index = rp->const_nr++; - if (pidx >= PFS_NUM_CONST_REGS || index >= PFS_NUM_CONST_REGS) { - ERROR("Out of const/param slots!\n"); - return r; + for(index = 0; index < rp->const_nr; ++index) { + if (rp->constant[index] == cp) + break; } - rp->param[pidx].idx = index; - rp->param[pidx].values = values; - rp->params_uptodate = GL_FALSE; - - REG_SET_TYPE(r, REG_TYPE_CONST); - REG_SET_INDEX(r, index); - REG_SET_VALID(r, GL_TRUE); - return r; -} - -static GLuint emit_const4fv(struct r300_fragment_program *rp, GLfloat *cp) -{ - GLuint r = undef; - GLuint index; + if (index >= rp->const_nr) { + if (index >= PFS_NUM_CONST_REGS) { + ERROR("Out of hw constants!\n"); + return reg; + } - index = rp->const_nr++; - if (index >= PFS_NUM_CONST_REGS) { - ERROR("Out of hw constants!\n"); - return r; + rp->const_nr++; + rp->constant[index] = cp; } - COPY_4V(rp->constant[index], cp); - - REG_SET_TYPE(r, REG_TYPE_CONST); - REG_SET_INDEX(r, index); - REG_SET_VALID(r, GL_TRUE); - return r; + REG_SET_TYPE(reg, REG_TYPE_CONST); + REG_SET_INDEX(reg, index); + REG_SET_VALID(reg, GL_TRUE); + return reg; } static inline GLuint negate(GLuint r) @@ -617,7 +684,7 @@ static GLuint do_swizzle(struct r300_fragment_program *rp, GLuint offset; for(i=0; i < 4; ++i){ offset = GET_SWZ(arbswz, i); - + newswz |= (offset <= 3)?GET_SWZ(vsrcswz, offset) << i*3:offset << i*3; } @@ -688,16 +755,16 @@ static GLuint t_src(struct r300_fragment_program *rp, REG_SET_TYPE(r, REG_TYPE_INPUT); break; case PROGRAM_LOCAL_PARAM: - r = emit_param4fv(rp, + r = emit_const4fv(rp, rp->mesa_program.Base.LocalParams[fpsrc.Index]); break; case PROGRAM_ENV_PARAM: - r = emit_param4fv(rp, + r = emit_const4fv(rp, rp->ctx->FragmentProgram.Parameters[fpsrc.Index]); break; case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: - r = emit_param4fv(rp, + r = emit_const4fv(rp, rp->mesa_program.Base.Parameters->ParameterValues[fpsrc.Index]); break; default: @@ -726,7 +793,7 @@ static GLuint t_dst(struct r300_fragment_program *rp, struct prog_dst_register dest) { GLuint r = undef; - + switch (dest.File) { case PROGRAM_TEMPORARY: REG_SET_INDEX(r, dest.Index); @@ -762,10 +829,10 @@ static int t_hw_src(struct r300_fragment_program *rp, switch(REG_GET_TYPE(src)) { case REG_TYPE_TEMP: /* NOTE: if reg==-1 here, a source is being read that - * hasn't been written to. Undefined results + * hasn't been written to. Undefined results. */ if (cs->temps[index].reg == -1) - cs->temps[index].reg = get_hw_temp(rp); + cs->temps[index].reg = get_hw_temp(rp, cs->nrslots); idx = cs->temps[index].reg; @@ -795,7 +862,8 @@ static int t_hw_src(struct r300_fragment_program *rp, static int t_hw_dst(struct r300_fragment_program *rp, GLuint dest, - GLboolean tex) + GLboolean tex, + int slot) { COMPILE_STATE; int idx; @@ -806,7 +874,7 @@ static int t_hw_dst(struct r300_fragment_program *rp, case REG_TYPE_TEMP: if (cs->temps[REG_GET_INDEX(dest)].reg == -1) { if (!tex) { - cs->temps[index].reg = get_hw_temp(rp); + cs->temps[index].reg = get_hw_temp(rp, slot); } else { cs->temps[index].reg = get_hw_temp_tex(rp); } @@ -835,30 +903,24 @@ static int t_hw_dst(struct r300_fragment_program *rp, ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest)); return 0; } - + return idx; } -static void emit_nop(struct r300_fragment_program *rp, - GLuint mask, - GLboolean sync) +static void emit_nop(struct r300_fragment_program *rp) { COMPILE_STATE; - - if (sync) - cs->v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos); - if (mask & WRITEMASK_XYZ) { - rp->alu.inst[cs->v_pos].inst0 = NOP_INST0; - rp->alu.inst[cs->v_pos].inst1 = NOP_INST1; - cs->v_pos++; + if (cs->nrslots >= PFS_MAX_ALU_INST) { + ERROR("Out of ALU instruction slots\n"); + return; } - if (mask & WRITEMASK_W) { - rp->alu.inst[cs->s_pos].inst2 = NOP_INST2; - rp->alu.inst[cs->s_pos].inst3 = NOP_INST3; - cs->s_pos++; - } + rp->alu.inst[cs->nrslots].inst0 = NOP_INST0; + rp->alu.inst[cs->nrslots].inst1 = NOP_INST1; + rp->alu.inst[cs->nrslots].inst2 = NOP_INST2; + rp->alu.inst[cs->nrslots].inst3 = NOP_INST3; + cs->nrslots++; } static void emit_tex(struct r300_fragment_program *rp, @@ -871,7 +933,7 @@ static void emit_tex(struct r300_fragment_program *rp, GLuint din = cs->dest_in_node, uin = cs->used_in_node; int unit = fpi->TexSrcUnit; int hwsrc, hwdest; - + /* Resolve source/dest to hardware registers */ hwsrc = t_hw_src(rp, coord, GL_TRUE); if (opcode != R300_FPITX_OP_KIL) { @@ -882,8 +944,8 @@ static void emit_tex(struct r300_fragment_program *rp, rdest = dest; dest = get_temp_reg_tex(rp); } - hwdest = t_hw_dst(rp, dest, GL_TRUE); - + hwdest = t_hw_dst(rp, dest, GL_TRUE, rp->node[rp->cur_node].alu_offset); + /* Use a temp that hasn't been used in this node, rather * than causing an indirection */ @@ -896,23 +958,19 @@ static void emit_tex(struct r300_fragment_program *rp, hwdest = 0; unit = 0; } - + /* Indirection if source has been written in this node, or if the * dest has been read/written in this node */ if ((REG_GET_TYPE(coord) != REG_TYPE_CONST && (din & (1<<hwsrc))) || (uin & (1<<hwdest))) { - + /* Finish off current node */ - cs->v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos); - if (rp->node[rp->cur_node].alu_offset == cs->v_pos) { - /* No alu instructions in the node? Emit a NOP. */ - emit_nop(rp, WRITEMASK_XYZW, GL_TRUE); - cs->v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos); - } - + if (rp->node[rp->cur_node].alu_offset == cs->nrslots) + emit_nop(rp); + rp->node[rp->cur_node].alu_end = - cs->v_pos - rp->node[rp->cur_node].alu_offset - 1; + cs->nrslots - rp->node[rp->cur_node].alu_offset - 1; assert(rp->node[rp->cur_node].alu_end >= 0); if (++rp->cur_node >= PFS_MAX_TEX_INDIRECT) { @@ -922,14 +980,14 @@ static void emit_tex(struct r300_fragment_program *rp, /* Start new node */ rp->node[rp->cur_node].tex_offset = rp->tex.length; - rp->node[rp->cur_node].alu_offset = cs->v_pos; + rp->node[rp->cur_node].alu_offset = cs->nrslots; rp->node[rp->cur_node].tex_end = -1; - rp->node[rp->cur_node].alu_end = -1; + rp->node[rp->cur_node].alu_end = -1; rp->node[rp->cur_node].flags = 0; cs->used_in_node = 0; cs->dest_in_node = 0; } - + if (rp->cur_node == 0) rp->first_node_has_tex = 1; @@ -940,7 +998,7 @@ static void emit_tex(struct r300_fragment_program *rp, /* not entirely sure about this */ | (opcode << R300_FPITX_OPCODE_SHIFT); - cs->dest_in_node |= (1 << hwdest); + cs->dest_in_node |= (1 << hwdest); if (REG_GET_TYPE(coord) != REG_TYPE_CONST) cs->used_in_node |= (1 << hwsrc); @@ -954,84 +1012,272 @@ static void emit_tex(struct r300_fragment_program *rp, } } -/* Add sources to FPI1/FPI3 lists. If source is already on list, - * reuse the index instead of wasting a source. + +/** + * Returns the first slot where we could possibly allow writing to dest, + * according to register allocation. */ -static int add_src(struct r300_fragment_program *rp, - int reg, - int pos, - int srcmask) +static int get_earliest_allowed_write( + struct r300_fragment_program* rp, + GLuint dest, int mask) { COMPILE_STATE; - int csm, i; - - /* Look for matches */ - for (i=0,csm=srcmask; i<3; i++,csm=csm<<1) { - /* If sources have been allocated in this position(s)... */ - if ((cs->slot[pos].umask & csm) == csm) { - /* ... and the register number(s) match, re-use the - source */ - if (srcmask == SLOT_VECTOR && - cs->slot[pos].vsrc[i] == reg) - return i; - if (srcmask == SLOT_SCALAR && - cs->slot[pos].ssrc[i] == reg) - return i; - if (srcmask == SLOT_BOTH && - cs->slot[pos].vsrc[i] == reg && - cs->slot[pos].ssrc[i] == reg) - return i; - } + int idx; + int pos; + GLuint index = REG_GET_INDEX(dest); + assert(REG_GET_VALID(dest)); + + switch(REG_GET_TYPE(dest)) { + case REG_TYPE_TEMP: + if (cs->temps[index].reg == -1) + return 0; + + idx = cs->temps[index].reg; + break; + case REG_TYPE_OUTPUT: + return 0; + default: + ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest)); + return 0; } - /* Look for free spaces */ - for (i=0,csm=srcmask; i<3; i++,csm=csm<<1) { - /* If the position(s) haven't been allocated */ - if ((cs->slot[pos].umask & csm) == 0) { - cs->slot[pos].umask |= csm; - - if (srcmask & SLOT_VECTOR) - cs->slot[pos].vsrc[i] = reg; - if (srcmask & SLOT_SCALAR) - cs->slot[pos].ssrc[i] = reg; - return i; - } + pos = cs->hwtemps[idx].reserved; + if (mask & WRITEMASK_XYZ) { + if (pos < cs->hwtemps[idx].vector_lastread) + pos = cs->hwtemps[idx].vector_lastread; } - - //ERROR("Failed to allocate sources in FPI1/FPI3!\n"); - return 0; + if (mask & WRITEMASK_W) { + if (pos < cs->hwtemps[idx].scalar_lastread) + pos = cs->hwtemps[idx].scalar_lastread; + } + + return pos; } -/* Determine whether or not to position opcode in the same ALU slot for both - * vector and scalar portions of an instruction. + +/** + * Allocates a slot for an ALU instruction that can consist of + * a vertex part or a scalar part or both. + * + * Sources from src (src[0] to src[argc-1]) are added to the slot in the + * appropriate position (vector and/or scalar), and their positions are + * recorded in the srcpos array. * - * It's not necessary to force the first case, but it makes disassembled - * shaders easier to read. + * This function emits instruction code for the source fetch and the + * argument selection. It does not emit instruction code for the + * opcode or the destination selection. + * + * @return the index of the slot */ -static GLboolean force_same_slot(int vop, - int sop, - GLboolean emit_vop, - GLboolean emit_sop, - int argc, - GLuint *src) +static int find_and_prepare_slot(struct r300_fragment_program* rp, + GLboolean emit_vop, + GLboolean emit_sop, + int argc, + GLuint* src, + GLuint dest, + int mask) { - int i; + COMPILE_STATE; + int hwsrc[3]; + int srcpos[3]; + unsigned int used; + int tempused; + int tempvsrc[3]; + int tempssrc[3]; + int pos; + int regnr; + int i,j; + + // Determine instruction slots, whether sources are required on + // vector or scalar side, and the smallest slot number where + // all source registers are available + used = 0; + if (emit_vop) + used |= SLOT_OP_VECTOR; + if (emit_sop) + used |= SLOT_OP_SCALAR; + + pos = get_earliest_allowed_write(rp, dest, mask); + + if (rp->node[rp->cur_node].alu_offset > pos) + pos = rp->node[rp->cur_node].alu_offset; + for(i = 0; i < argc; ++i) { + if (!REG_GET_BUILTIN(src[i])) { + if (emit_vop) + used |= v_swiz[REG_GET_VSWZ(src[i])].flags << i; + if (emit_sop) + used |= s_swiz[REG_GET_SSWZ(src[i])].flags << i; + } + + hwsrc[i] = t_hw_src(rp, src[i], GL_FALSE); /* Note: sideeffects wrt refcounting! */ + regnr = hwsrc[i] & 31; + + if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) { + if (used & (SLOT_SRC_VECTOR << i)) { + if (cs->hwtemps[regnr].vector_valid > pos) + pos = cs->hwtemps[regnr].vector_valid; + } + if (used & (SLOT_SRC_SCALAR << i)) { + if (cs->hwtemps[regnr].scalar_valid > pos) + pos = cs->hwtemps[regnr].scalar_valid; + } + } + } + + // Find a slot that fits + for(; ; ++pos) { + if (cs->slot[pos].used & used & SLOT_OP_BOTH) + continue; + + if (pos >= cs->nrslots) { + if (cs->nrslots >= PFS_MAX_ALU_INST) { + ERROR("Out of ALU instruction slots\n"); + return -1; + } - if (emit_vop && emit_sop) - return GL_TRUE; + rp->alu.inst[pos].inst0 = NOP_INST0; + rp->alu.inst[pos].inst1 = NOP_INST1; + rp->alu.inst[pos].inst2 = NOP_INST2; + rp->alu.inst[pos].inst3 = NOP_INST3; + + cs->nrslots++; + } + + // Note: When we need both parts (vector and scalar) of a source, + // we always try to put them into the same position. This makes the + // code easier to read, and it is optimal (i.e. one doesn't gain + // anything by splitting the parts). + // It also avoids headaches with swizzles that access both parts (i.e WXY) + tempused = cs->slot[pos].used; + for(i = 0; i < 3; ++i) { + tempvsrc[i] = cs->slot[pos].vsrc[i]; + tempssrc[i] = cs->slot[pos].ssrc[i]; + } - if (emit_vop && vop == R300_FPI0_OUTC_REPL_ALPHA) - return GL_TRUE; + for(i = 0; i < argc; ++i) { + int flags = (used >> i) & SLOT_SRC_BOTH; + if (!flags) { + srcpos[i] = 0; + continue; + } + + for(j = 0; j < 3; ++j) { + if ((tempused >> j) & flags & SLOT_SRC_VECTOR) { + if (tempvsrc[j] != hwsrc[i]) + continue; + } + + if ((tempused >> j) & flags & SLOT_SRC_SCALAR) { + if (tempssrc[j] != hwsrc[i]) + continue; + } + + break; + } + + if (j == 3) + break; + + srcpos[i] = j; + tempused |= flags << j; + if (flags & SLOT_SRC_VECTOR) + tempvsrc[j] = hwsrc[i]; + if (flags & SLOT_SRC_SCALAR) + tempssrc[j] = hwsrc[i]; + } + + if (i == argc) + break; + } + + // Found a slot, reserve it + cs->slot[pos].used = tempused | (used & SLOT_OP_BOTH); + for(i = 0; i < 3; ++i) { + cs->slot[pos].vsrc[i] = tempvsrc[i]; + cs->slot[pos].ssrc[i] = tempssrc[i]; + } + + for(i = 0; i < argc; ++i) { + if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) { + int regnr = hwsrc[i] & 31; + + if (used & (SLOT_SRC_VECTOR << i)) { + if (cs->hwtemps[regnr].vector_lastread < pos) + cs->hwtemps[regnr].vector_lastread = pos; + } + if (used & (SLOT_SRC_SCALAR << i)) { + if (cs->hwtemps[regnr].scalar_lastread < pos) + cs->hwtemps[regnr].scalar_lastread = pos; + } + } + } + + // Emit the source fetch code + rp->alu.inst[pos].inst1 &= ~R300_FPI1_SRC_MASK; + rp->alu.inst[pos].inst1 |= + ((cs->slot[pos].vsrc[0] << R300_FPI1_SRC0C_SHIFT) | + (cs->slot[pos].vsrc[1] << R300_FPI1_SRC1C_SHIFT) | + (cs->slot[pos].vsrc[2] << R300_FPI1_SRC2C_SHIFT)); + + rp->alu.inst[pos].inst3 &= ~R300_FPI3_SRC_MASK; + rp->alu.inst[pos].inst3 |= + ((cs->slot[pos].ssrc[0] << R300_FPI3_SRC0A_SHIFT) | + (cs->slot[pos].ssrc[1] << R300_FPI3_SRC1A_SHIFT) | + (cs->slot[pos].ssrc[2] << R300_FPI3_SRC2A_SHIFT)); + + // Emit the argument selection code if (emit_vop) { - for (i=0;i<argc;i++) - if (REG_GET_VSWZ(src[i]) == SWIZZLE_WZY) - return GL_TRUE; + int swz[3]; + + for(i = 0; i < 3; ++i) { + if (i < argc) { + swz[i] = (v_swiz[REG_GET_VSWZ(src[i])].base + + (srcpos[i] * v_swiz[REG_GET_VSWZ(src[i])].stride)) | + ((src[i] & REG_NEGV_MASK) ? ARG_NEG : 0) | + ((src[i] & REG_ABS_MASK) ? ARG_ABS : 0); + } else { + swz[i] = R300_FPI0_ARGC_ZERO; + } + } + + rp->alu.inst[pos].inst0 &= + ~(R300_FPI0_ARG0C_MASK|R300_FPI0_ARG1C_MASK|R300_FPI0_ARG2C_MASK); + rp->alu.inst[pos].inst0 |= + (swz[0] << R300_FPI0_ARG0C_SHIFT) | + (swz[1] << R300_FPI0_ARG1C_SHIFT) | + (swz[2] << R300_FPI0_ARG2C_SHIFT); + } + + if (emit_sop) { + int swz[3]; + + for(i = 0; i < 3; ++i) { + if (i < argc) { + swz[i] = (s_swiz[REG_GET_SSWZ(src[i])].base + + (srcpos[i] * s_swiz[REG_GET_SSWZ(src[i])].stride)) | + ((src[i] & REG_NEGV_MASK) ? ARG_NEG : 0) | + ((src[i] & REG_ABS_MASK) ? ARG_ABS : 0); + } else { + swz[i] = R300_FPI2_ARGA_ZERO; + } + } + + rp->alu.inst[pos].inst2 &= + ~(R300_FPI2_ARG0A_MASK|R300_FPI2_ARG1A_MASK|R300_FPI2_ARG2A_MASK); + rp->alu.inst[pos].inst2 |= + (swz[0] << R300_FPI2_ARG0A_SHIFT) | + (swz[1] << R300_FPI2_ARG1A_SHIFT) | + (swz[2] << R300_FPI2_ARG2A_SHIFT); } - return GL_FALSE; + return pos; } + +/** + * Append an ALU instruction to the instruction list. + */ static void emit_arith(struct r300_fragment_program *rp, int op, GLuint dest, @@ -1043,145 +1289,81 @@ static void emit_arith(struct r300_fragment_program *rp, { COMPILE_STATE; GLuint src[3] = { src0, src1, src2 }; - int hwsrc[3], sswz[3], vswz[3]; int hwdest; - GLboolean emit_vop = GL_FALSE, emit_sop = GL_FALSE; + GLboolean emit_vop, emit_sop; int vop, sop, argc; - int vpos, spos; - int i; + int pos; vop = r300_fpop[op].v_op; sop = r300_fpop[op].s_op; argc = r300_fpop[op].argc; + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT && + REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) { + if (mask & WRITEMASK_Z) { + mask = WRITEMASK_W; + } else { + return; + } + } + + emit_vop = GL_FALSE; + emit_sop = GL_FALSE; if ((mask & WRITEMASK_XYZ) || vop == R300_FPI0_OUTC_DP3) emit_vop = GL_TRUE; if ((mask & WRITEMASK_W) || vop == R300_FPI0_OUTC_REPL_ALPHA) emit_sop = GL_TRUE; - if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT && - REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) - emit_vop = GL_FALSE; - - if (force_same_slot(vop, sop, emit_vop, emit_sop, argc, src)) { - vpos = spos = MAX2(cs->v_pos, cs->s_pos); - } else { - vpos = cs->v_pos; - spos = cs->s_pos; - /* Here is where we'd decide on where a safe place is to - * combine this instruction with a previous one. - * - * This is extremely simple for now.. if a source depends - * on the opposite stream, force the same instruction. - */ - for (i=0;i<3;i++) { - if (emit_vop && - (v_swiz[REG_GET_VSWZ(src[i])].flags & SLOT_SCALAR)) { - vpos = spos = MAX2(vpos, spos); - break; - } - if (emit_sop && - (s_swiz[REG_GET_SSWZ(src[i])].flags & SLOT_VECTOR)) { - vpos = spos = MAX2(vpos, spos); - break; - } - } - } - - /* - Convert src->hwsrc, record for FPI1/FPI3 - * - Determine ARG parts of FPI0/FPI2, unused args are filled - * with ARG_ZERO. - */ - for (i=0;i<3;i++) { - int srcpos; - - if (i >= argc) { - vswz[i] = R300_FPI0_ARGC_ZERO; - sswz[i] = R300_FPI2_ARGA_ZERO; - continue; - } - - hwsrc[i] = t_hw_src(rp, src[i], GL_FALSE); - - if (emit_vop && vop != R300_FPI0_OUTC_REPL_ALPHA) { - srcpos = add_src(rp, hwsrc[i], vpos, - v_swiz[REG_GET_VSWZ(src[i])].flags); - vswz[i] = (v_swiz[REG_GET_VSWZ(src[i])].base + - (srcpos * - v_swiz[REG_GET_VSWZ(src[i])].stride)) | - ((src[i] & REG_NEGV_MASK) ? ARG_NEG : 0) | - ((src[i] & REG_ABS_MASK) ? ARG_ABS : 0); - } else vswz[i] = R300_FPI0_ARGC_ZERO; - - if (emit_sop) { - srcpos = add_src(rp, hwsrc[i], spos, - s_swiz[REG_GET_SSWZ(src[i])].flags); - sswz[i] = (s_swiz[REG_GET_SSWZ(src[i])].base + - (srcpos * - s_swiz[REG_GET_SSWZ(src[i])].stride)) | - ((src[i] & REG_NEGS_MASK) ? ARG_NEG : 0) | - ((src[i] & REG_ABS_MASK) ? ARG_ABS : 0); - } else sswz[i] = R300_FPI2_ARGA_ZERO; - } - hwdest = t_hw_dst(rp, dest, GL_FALSE); - + pos = find_and_prepare_slot(rp, emit_vop, emit_sop, argc, src, dest, mask); + if (pos < 0) + return; + + hwdest = t_hw_dst(rp, dest, GL_FALSE, pos); /* Note: Side effects wrt register allocation */ + if (flags & PFS_FLAG_SAT) { vop |= R300_FPI0_OUTC_SAT; sop |= R300_FPI2_OUTA_SAT; } /* Throw the pieces together and get FPI0/1 */ - rp->alu.inst[vpos].inst1 = - ((cs->slot[vpos].vsrc[0] << R300_FPI1_SRC0C_SHIFT) | - (cs->slot[vpos].vsrc[1] << R300_FPI1_SRC1C_SHIFT) | - (cs->slot[vpos].vsrc[2] << R300_FPI1_SRC2C_SHIFT)); if (emit_vop) { - rp->alu.inst[vpos].inst0 = vop | - (vswz[0] << R300_FPI0_ARG0C_SHIFT) | - (vswz[1] << R300_FPI0_ARG1C_SHIFT) | - (vswz[2] << R300_FPI0_ARG2C_SHIFT); + rp->alu.inst[pos].inst0 |= vop; + + rp->alu.inst[pos].inst1 |= hwdest << R300_FPI1_DSTC_SHIFT; - rp->alu.inst[vpos].inst1 |= hwdest << R300_FPI1_DSTC_SHIFT; if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { - rp->alu.inst[vpos].inst1 |= + rp->alu.inst[pos].inst1 |= (mask & WRITEMASK_XYZ) << R300_FPI1_DSTC_OUTPUT_MASK_SHIFT; } else assert(0); } else { - rp->alu.inst[vpos].inst1 |= + rp->alu.inst[pos].inst1 |= (mask & WRITEMASK_XYZ) << R300_FPI1_DSTC_REG_MASK_SHIFT; + + cs->hwtemps[hwdest].vector_valid = pos+1; } - cs->v_pos = vpos+1; - } else if (spos >= vpos) - rp->alu.inst[spos].inst0 = NOP_INST0; + } /* And now FPI2/3 */ - rp->alu.inst[spos].inst3 = - ((cs->slot[spos].ssrc[0] << R300_FPI3_SRC0A_SHIFT) | - (cs->slot[spos].ssrc[1] << R300_FPI3_SRC1A_SHIFT) | - (cs->slot[spos].ssrc[2] << R300_FPI3_SRC2A_SHIFT)); if (emit_sop) { - rp->alu.inst[spos].inst2 = sop | - sswz[0] << R300_FPI2_ARG0A_SHIFT | - sswz[1] << R300_FPI2_ARG1A_SHIFT | - sswz[2] << R300_FPI2_ARG2A_SHIFT; + rp->alu.inst[pos].inst2 |= sop; if (mask & WRITEMASK_W) { if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { - rp->alu.inst[spos].inst3 |= + rp->alu.inst[pos].inst3 |= (hwdest << R300_FPI3_DSTA_SHIFT) | R300_FPI3_DSTA_OUTPUT; } else if (REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) { - rp->alu.inst[spos].inst3 |= R300_FPI3_DSTA_DEPTH; + rp->alu.inst[pos].inst3 |= R300_FPI3_DSTA_DEPTH; } else assert(0); } else { - rp->alu.inst[spos].inst3 |= + rp->alu.inst[pos].inst3 |= (hwdest << R300_FPI3_DSTA_SHIFT) | R300_FPI3_DSTA_REG; + + cs->hwtemps[hwdest].scalar_valid = pos+1; } } - cs->s_pos = spos+1; - } else if (vpos >= spos) - rp->alu.inst[vpos].inst2 = NOP_INST2; + } return; } @@ -1204,34 +1386,130 @@ static GLuint get_attrib(struct r300_fragment_program *rp, GLuint attr) } #endif -static void make_sin_const(struct r300_fragment_program *rp) +static GLfloat SinCosConsts[2][4] = { + { + 1.273239545, // 4/PI + -0.405284735, // -4/(PI*PI) + 3.141592654, // PI + 0.2225 // weight + }, + { + 0.75, + 0.0, + 0.159154943, // 1/(2*PI) + 6.283185307 // 2*PI + } +}; + + +/** + * Emit a LIT instruction. + * \p flags may be PFS_FLAG_SAT + * + * Definition of LIT (from ARB_fragment_program): + * tmp = VectorLoad(op0); + * if (tmp.x < 0) tmp.x = 0; + * if (tmp.y < 0) tmp.y = 0; + * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); + * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; + * result.x = 1.0; + * result.y = tmp.x; + * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; + * result.w = 1.0; + * + * The longest path of computation is the one leading to result.z, + * consisting of 5 operations. This implementation of LIT takes + * 5 slots. So unless there's some special undocumented opcode, + * this implementation is potentially optimal. Unfortunately, + * emit_arith is a bit too conservative because it doesn't understand + * partial writes to the vector component. + */ +static const GLfloat LitConst[4] = { 127.999999, 127.999999, 127.999999, -127.999999 }; + +static void emit_lit(struct r300_fragment_program *rp, + GLuint dest, + int mask, + GLuint src, + int flags) { - if(rp->const_sin[0] == -1){ - GLfloat cnstv[4]; - - cnstv[0] = 1.273239545; // 4/PI - cnstv[1] =-0.405284735; // -4/(PI*PI) - cnstv[2] = 3.141592654; // PI - cnstv[3] = 0.2225; // weight - rp->const_sin[0] = emit_const4fv(rp, cnstv); - - cnstv[0] = 0.75; - cnstv[1] = 0.0; - cnstv[2] = 0.159154943; // 1/(2*PI) - cnstv[3] = 6.283185307; // 2*PI - rp->const_sin[1] = emit_const4fv(rp, cnstv); + COMPILE_STATE; + GLuint cnst; + int needTemporary; + GLuint temp; + + cnst = emit_const4fv(rp, LitConst); + + needTemporary = 0; + if ((mask & WRITEMASK_XYZW) != WRITEMASK_XYZW) { + needTemporary = 1; + } else if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { + // LIT is typically followed by DP3/DP4, so there's no point + // in creating special code for this case + needTemporary = 1; + } + + if (needTemporary) { + temp = keep(get_temp_reg(rp)); + } else { + temp = keep(dest); + } + + // Note: The order of emit_arith inside the slots is relevant, + // because emit_arith only looks at scalar vs. vector when resolving + // dependencies, and it does not consider individual vector components, + // so swizzling between the two parts can create fake dependencies. + + // First slot + emit_arith(rp, PFS_OP_MAX, temp, WRITEMASK_XY, + keep(src), pfs_zero, undef, 0); + emit_arith(rp, PFS_OP_MAX, temp, WRITEMASK_W, + src, cnst, undef, 0); + + // Second slot + emit_arith(rp, PFS_OP_MIN, temp, WRITEMASK_Z, + swizzle(temp, W, W, W, W), cnst, undef, 0); + emit_arith(rp, PFS_OP_LG2, temp, WRITEMASK_W, + swizzle(temp, Y, Y, Y, Y), undef, undef, 0); + + // Third slot + // If desired, we saturate the y result here. + // This does not affect the use as a condition variable in the CMP later + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W, + temp, swizzle(temp, Z, Z, Z, Z), pfs_zero, 0); + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y, + swizzle(temp, X, X, X, X), pfs_one, pfs_zero, flags); + + // Fourth slot + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X, + pfs_one, pfs_one, pfs_zero, 0); + emit_arith(rp, PFS_OP_EX2, temp, WRITEMASK_W, + temp, undef, undef, 0); + + // Fifth slot + emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_Z, + pfs_zero, swizzle(temp, W, W, W, W), negate(swizzle(temp, Y, Y, Y, Y)), flags); + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W, + pfs_one, pfs_one, pfs_zero, 0); + + if (needTemporary) { + emit_arith(rp, PFS_OP_MAD, dest, mask, + temp, pfs_one, pfs_zero, flags); + free_temp(rp, temp); + } else { + // Decrease refcount of the destination + t_hw_dst(rp, dest, GL_FALSE, cs->nrslots); } } + static GLboolean parse_program(struct r300_fragment_program *rp) -{ +{ struct gl_fragment_program *mp = &rp->mesa_program; const struct prog_instruction *inst = mp->Base.Instructions; struct prog_instruction *fpi; GLuint src[3], dest, temp[2]; - GLuint cnst; int flags, mask = 0; - GLfloat cnstv[4] = {0.0, 0.0, 0.0, 0.0}; + int const_sin[2]; if (!inst || inst[0].Opcode == OPCODE_END) { ERROR("empty program?\n"); @@ -1284,15 +1562,16 @@ static GLboolean parse_program(struct r300_fragment_program *rp) * result = sin(x) */ temp[0] = get_temp_reg(rp); - make_sin_const(rp); + const_sin[0] = emit_const4fv(rp, SinCosConsts[0]); + const_sin[1] = emit_const4fv(rp, SinCosConsts[1]); src[0] = t_scalar_src(rp, fpi->SrcReg[0]); /* add 0.5*PI and do range reduction */ emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_X, swizzle(src[0], X, X, X, X), - swizzle(rp->const_sin[1], Z, Z, Z, Z), - swizzle(rp->const_sin[1], X, X, X, X), + swizzle(const_sin[1], Z, Z, Z, Z), + swizzle(const_sin[1], X, X, X, X), 0); emit_arith(rp, PFS_OP_FRC, temp[0], WRITEMASK_X, @@ -1303,15 +1582,15 @@ static GLboolean parse_program(struct r300_fragment_program *rp) emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), - swizzle(rp->const_sin[1], W, W, W, W), //2*PI - negate(swizzle(rp->const_sin[0], Z, Z, Z, Z)), //-PI + swizzle(const_sin[1], W, W, W, W), //2*PI + negate(swizzle(const_sin[0], Z, Z, Z, Z)), //-PI 0); /* SIN */ emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], Z, Z, Z, Z), - rp->const_sin[0], + const_sin[0], pfs_zero, 0); @@ -1320,7 +1599,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) absolute(swizzle(temp[0], Z, Z, Z, Z)), swizzle(temp[0], X, X, X, X), 0); - + emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_Y, swizzle(temp[0], X, X, X, X), absolute(swizzle(temp[0], X, X, X, X)), @@ -1330,7 +1609,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) emit_arith(rp, PFS_OP_MAD, dest, mask, swizzle(temp[0], Y, Y, Y, Y), - swizzle(rp->const_sin[0], W, W, W, W), + swizzle(const_sin[0], W, W, W, W), swizzle(temp[0], X, X, X, X), flags); @@ -1364,12 +1643,12 @@ static GLboolean parse_program(struct r300_fragment_program *rp) 0); emit_arith(rp, PFS_OP_DP4, dest, mask, temp[0], src[1], undef, - flags); + flags); free_temp(rp, temp[0]); #else emit_arith(rp, PFS_OP_DP4, dest, mask, swizzle(src[0], X, Y, Z, ONE), src[1], - undef, flags); + undef, flags); #endif break; case OPCODE_DST: @@ -1400,7 +1679,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) src[0], undef, undef, flags); break; - case OPCODE_FLR: + case OPCODE_FLR: src[0] = t_src(rp, fpi->SrcReg[0]); temp[0] = get_temp_reg(rp); /* FRC temp, src0 @@ -1430,66 +1709,8 @@ static GLboolean parse_program(struct r300_fragment_program *rp) flags); break; case OPCODE_LIT: - /* LIT - * if (s.x < 0) t.x = 0; else t.x = s.x; - * if (s.y < 0) t.y = 0; else t.y = s.y; - * if (s.w > 128.0) t.w = 128.0; else t.w = s.w; - * if (s.w < -128.0) t.w = -128.0; else t.w = s.w; - * r.x = 1.0 - * if (t.x > 0) r.y = pow(t.y, t.w); else r.y = 0; - * Also r.y = 0 if t.y < 0 - * For the t.x > 0 FGLRX use the CMPH opcode which - * change the compare to (t.x + 0.5) > 0.5 we may - * save one instruction by doing CMP -t.x - */ - cnstv[0] = cnstv[1] = cnstv[2] = cnstv[3] = 0.50001; src[0] = t_src(rp, fpi->SrcReg[0]); - temp[0] = get_temp_reg(rp); - cnst = emit_const4fv(rp, cnstv); - emit_arith(rp, PFS_OP_CMP, temp[0], - WRITEMASK_X | WRITEMASK_Y, - src[0], pfs_zero, src[0], flags); - emit_arith(rp, PFS_OP_MIN, temp[0], WRITEMASK_Z, - swizzle(keep(src[0]), W, W, W, W), - cnst, undef, flags); - emit_arith(rp, PFS_OP_LG2, temp[0], WRITEMASK_W, - swizzle(temp[0], Y, Y, Y, Y), - undef, undef, flags); - emit_arith(rp, PFS_OP_MAX, temp[0], WRITEMASK_Z, - temp[0], negate(cnst), undef, flags); - emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_W, - temp[0], swizzle(temp[0], Z, Z, Z, Z), - pfs_zero, flags); - emit_arith(rp, PFS_OP_EX2, temp[0], WRITEMASK_W, - temp[0], undef, undef, flags); - emit_arith(rp, PFS_OP_MAD, dest, WRITEMASK_Y, - swizzle(keep(temp[0]), X, X, X, X), - pfs_one, pfs_zero, flags); -#if 0 - emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_X, - temp[0], pfs_one, pfs_half, flags); - emit_arith(rp, PFS_OP_CMPH, temp[0], WRITEMASK_Z, - swizzle(keep(temp[0]), W, W, W, W), - pfs_zero, swizzle(keep(temp[0]), X, X, X, X), - flags); -#else - emit_arith(rp, PFS_OP_CMP, temp[0], WRITEMASK_Z, - pfs_zero, - swizzle(keep(temp[0]), W, W, W, W), - negate(swizzle(keep(temp[0]), X, X, X, X)), - flags); -#endif - emit_arith(rp, PFS_OP_CMP, dest, WRITEMASK_Z, - pfs_zero, temp[0], - negate(swizzle(keep(temp[0]), Y, Y, Y, Y)), - flags); - emit_arith(rp, PFS_OP_MAD, dest, - WRITEMASK_X | WRITEMASK_W, - pfs_one, - pfs_one, - pfs_zero, - flags); - free_temp(rp, temp[0]); + emit_lit(rp, dest, mask, src[0], flags); break; case OPCODE_LRP: src[0] = t_src(rp, fpi->SrcReg[0]); @@ -1508,7 +1729,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) src[0], src[1], temp[0], flags); free_temp(rp, temp[0]); - break; + break; case OPCODE_MAD: src[0] = t_src(rp, fpi->SrcReg[0]); src[1] = t_src(rp, fpi->SrcReg[1]); @@ -1535,7 +1756,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) case OPCODE_SWZ: src[0] = t_src(rp, fpi->SrcReg[0]); emit_arith(rp, PFS_OP_MAD, dest, mask, - src[0], pfs_one, pfs_zero, + src[0], pfs_one, pfs_zero, flags); break; case OPCODE_MUL: @@ -1548,7 +1769,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) case OPCODE_POW: src[0] = t_scalar_src(rp, fpi->SrcReg[0]); src[1] = t_scalar_src(rp, fpi->SrcReg[1]); - temp[0] = get_temp_reg(rp); + temp[0] = get_temp_reg(rp); emit_arith(rp, PFS_OP_LG2, temp[0], WRITEMASK_W, src[0], undef, undef, 0); @@ -1582,19 +1803,20 @@ static GLboolean parse_program(struct r300_fragment_program *rp) */ temp[0] = get_temp_reg(rp); temp[1] = get_temp_reg(rp); - make_sin_const(rp); + const_sin[0] = emit_const4fv(rp, SinCosConsts[0]); + const_sin[1] = emit_const4fv(rp, SinCosConsts[1]); src[0] = t_scalar_src(rp, fpi->SrcReg[0]); /* x = -abs(x)+0.5*PI */ emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_Z, - swizzle(rp->const_sin[0], Z, Z, Z, Z), //PI + swizzle(const_sin[0], Z, Z, Z, Z), //PI pfs_half, negate(abs(swizzle(keep(src[0]), X, X, X, X))), 0); /* C*x (sin) */ emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_W, - swizzle(rp->const_sin[0], Y, Y, Y, Y), + swizzle(const_sin[0], Y, Y, Y, Y), swizzle(keep(src[0]), X, X, X, X), pfs_zero, 0); @@ -1602,13 +1824,13 @@ static GLboolean parse_program(struct r300_fragment_program *rp) /* B*x, C*x (cos) */ emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], Z, Z, Z, Z), - rp->const_sin[0], + const_sin[0], pfs_zero, 0); /* B*x (sin) */ emit_arith(rp, PFS_OP_MAD, temp[1], WRITEMASK_W, - swizzle(rp->const_sin[0], X, X, X, X), + swizzle(const_sin[0], X, X, X, X), keep(src[0]), pfs_zero, 0); @@ -1638,7 +1860,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) /* dest.xy = mad(temp.xy, P, temp2.wz) */ emit_arith(rp, PFS_OP_MAD, dest, mask & (WRITEMASK_X | WRITEMASK_Y), temp[0], - swizzle(rp->const_sin[0], W, W, W, W), + swizzle(const_sin[0], W, W, W, W), swizzle(temp[1], W, Z, Y, X), flags); @@ -1669,7 +1891,8 @@ static GLboolean parse_program(struct r300_fragment_program *rp) */ temp[0] = get_temp_reg(rp); - make_sin_const(rp); + const_sin[0] = emit_const4fv(rp, SinCosConsts[0]); + const_sin[1] = emit_const4fv(rp, SinCosConsts[1]); src[0] = t_scalar_src(rp, fpi->SrcReg[0]); @@ -1677,7 +1900,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_X, swizzle(keep(src[0]), X, X, X, X), - swizzle(rp->const_sin[1], Z, Z, Z, Z), + swizzle(const_sin[1], Z, Z, Z, Z), pfs_half, 0); @@ -1689,15 +1912,15 @@ static GLboolean parse_program(struct r300_fragment_program *rp) emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), - swizzle(rp->const_sin[1], W, W, W, W), //2*PI - negate(swizzle(rp->const_sin[0], Z, Z, Z, Z)), //PI + swizzle(const_sin[1], W, W, W, W), //2*PI + negate(swizzle(const_sin[0], Z, Z, Z, Z)), //PI 0); /* SIN */ emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], Z, Z, Z, Z), - rp->const_sin[0], + const_sin[0], pfs_zero, 0); @@ -1706,7 +1929,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) absolute(swizzle(temp[0], Z, Z, Z, Z)), swizzle(temp[0], X, X, X, X), 0); - + emit_arith(rp, PFS_OP_MAD, temp[0], WRITEMASK_Y, swizzle(temp[0], X, X, X, X), absolute(swizzle(temp[0], X, X, X, X)), @@ -1716,7 +1939,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) emit_arith(rp, PFS_OP_MAD, dest, mask, swizzle(temp[0], Y, Y, Y, Y), - swizzle(rp->const_sin[0], W, W, W, W), + swizzle(const_sin[0], W, W, W, W), swizzle(temp[0], X, X, X, X), flags); @@ -1763,7 +1986,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) swizzle(keep(src[1]), Y, Z, X, W), pfs_zero, 0); - /* dest.xyz = src0.yzx * src1.zxy - temp + /* dest.xyz = src0.yzx * src1.zxy - temp * dest.w = undefined * */ emit_arith(rp, PFS_OP_MAD, dest, mask & WRITEMASK_XYZ, @@ -1788,13 +2011,99 @@ static GLboolean parse_program(struct r300_fragment_program *rp) return GL_TRUE; } +static void insert_wpos(struct gl_program *prog) +{ + GLint tokens[6] = { STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0, 0 }; + struct prog_instruction *fpi; + GLuint window_index; + int i = 0; + GLuint tempregi = prog->NumTemporaries; + /* should do something else if no temps left... */ + prog->NumTemporaries++; + + fpi = _mesa_alloc_instructions (prog->NumInstructions + 3); + _mesa_init_instructions (fpi, prog->NumInstructions + 3); + + /* perspective divide */ + fpi[i].Opcode = OPCODE_RCP; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_W; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; + i++; + + fpi[i].Opcode = OPCODE_MUL; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + + fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[1].Index = tempregi; + fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; + i++; + + /* viewport transformation */ + window_index = _mesa_add_state_reference(prog->Parameters, tokens); + + fpi[i].Opcode = OPCODE_MAD; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[0].Index = tempregi; + fpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[1].Index = window_index; + fpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[2].Index = window_index; + fpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + i++; + + _mesa_copy_instructions (&fpi[i], prog->Instructions, prog->NumInstructions); + + free(prog->Instructions); + + prog->Instructions = fpi; + + prog->NumInstructions += i; + fpi = &prog->Instructions[prog->NumInstructions-1]; + + assert(fpi->Opcode == OPCODE_END); + + for(fpi = &prog->Instructions[3]; fpi->Opcode != OPCODE_END; fpi++){ + for(i=0; i<3; i++) + if( fpi->SrcReg[i].File == PROGRAM_INPUT && + fpi->SrcReg[i].Index == FRAG_ATTRIB_WPOS ){ + fpi->SrcReg[i].File = PROGRAM_TEMPORARY; + fpi->SrcReg[i].Index = tempregi; + } + } +} + /* - Init structures * - Determine what hwregs each input corresponds to */ static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp) { struct r300_pfs_compile_state *cs = NULL; - struct gl_fragment_program *mp = &rp->mesa_program; + struct gl_fragment_program *mp = &rp->mesa_program; struct prog_instruction *fpi; GLuint InputsRead = mp->Base.InputsRead; GLuint temps_used = 0; /* for rp->temps[] */ @@ -1809,13 +2118,10 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp) rp->cur_node = 0; rp->first_node_has_tex = 0; rp->const_nr = 0; - rp->param_nr = 0; - rp->params_uptodate = GL_FALSE; rp->max_temp_idx = 0; rp->node[0].alu_end = -1; rp->node[0].tex_end = -1; - rp->const_sin[0] = -1; - + _mesa_memset(cs, 0, sizeof(*rp->cs)); for (i=0;i<PFS_MAX_ALU_INST;i++) { for (j=0;j<3;j++) { @@ -1823,7 +2129,7 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp) cs->slot[i].ssrc[j] = SRC_CONST; } } - + /* Work out what temps the Mesa inputs correspond to, this must match * what setup_rs_unit does, which shouldn't be a problem as rs_unit * configures itself based on the fragprog's InputsRead @@ -1836,7 +2142,7 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp) for (i=0;i<rp->ctx->Const.MaxTextureUnits;i++) { if (InputsRead & (FRAG_BIT_TEX0 << i)) { cs->inputs[FRAG_ATTRIB_TEX0+i].refcount = 0; - cs->inputs[FRAG_ATTRIB_TEX0+i].reg = get_hw_temp(rp); + cs->inputs[FRAG_ATTRIB_TEX0+i].reg = get_hw_temp(rp, 0); } } InputsRead &= ~FRAG_BITS_TEX_ANY; @@ -1844,21 +2150,22 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp) /* fragment position treated as a texcoord */ if (InputsRead & FRAG_BIT_WPOS) { cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0; - cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(rp); + cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(rp, 0); + insert_wpos(&mp->Base); } InputsRead &= ~FRAG_BIT_WPOS; /* Then primary colour */ if (InputsRead & FRAG_BIT_COL0) { cs->inputs[FRAG_ATTRIB_COL0].refcount = 0; - cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(rp); + cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(rp, 0); } InputsRead &= ~FRAG_BIT_COL0; - + /* Secondary color */ if (InputsRead & FRAG_BIT_COL1) { cs->inputs[FRAG_ATTRIB_COL1].refcount = 0; - cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(rp); + cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(rp, 0); } InputsRead &= ~FRAG_BIT_COL1; @@ -1881,7 +2188,7 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp) for (fpi=mp->Base.Instructions;fpi->Opcode != OPCODE_END; fpi++) { int idx; - + for (i=0;i<3;i++) { idx = fpi->SrcReg[i].Index; switch (fpi->SrcReg[i].File) { @@ -1916,16 +2223,10 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp) static void update_params(struct r300_fragment_program *rp) { struct gl_fragment_program *mp = &rp->mesa_program; - int i; /* Ask Mesa nicely to fill in ParameterValues for us */ - if (rp->param_nr) + if (mp->Base.Parameters) _mesa_load_state_parameters(rp->ctx, mp->Base.Parameters); - - for (i=0;i<rp->param_nr;i++) - COPY_4V(rp->constant[rp->param[i].idx], rp->param[i].values); - - rp->params_uptodate = GL_TRUE; } void r300_translate_fragment_shader(r300ContextPtr r300, struct r300_fragment_program *rp) @@ -1933,7 +2234,7 @@ void r300_translate_fragment_shader(r300ContextPtr r300, struct r300_fragment_pr struct r300_pfs_compile_state *cs = NULL; if (!rp->translated) { - + init_program(r300, rp); cs = rp->cs; @@ -1941,22 +2242,22 @@ void r300_translate_fragment_shader(r300ContextPtr r300, struct r300_fragment_pr dump_program(rp); return; } - + /* Finish off */ - cs->v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos); rp->node[rp->cur_node].alu_end = - cs->v_pos - rp->node[rp->cur_node].alu_offset - 1; + cs->nrslots - rp->node[rp->cur_node].alu_offset - 1; if (rp->node[rp->cur_node].tex_end < 0) rp->node[rp->cur_node].tex_end = 0; rp->alu_offset = 0; - rp->alu_end = cs->v_pos - 1; + rp->alu_end = cs->nrslots - 1; rp->tex_offset = 0; rp->tex_end = rp->tex.length ? rp->tex.length - 1 : 0; assert(rp->node[rp->cur_node].alu_end >= 0); assert(rp->alu_end >= 0); - + rp->translated = GL_TRUE; - if (0) dump_program(rp); + if (RADEON_DEBUG & DEBUG_PIXEL) dump_program(rp); + r300UpdateStateParameters(rp->ctx, _NEW_PROGRAM); } update_params(rp); @@ -1965,11 +2266,11 @@ void r300_translate_fragment_shader(r300ContextPtr r300, struct r300_fragment_pr /* just some random things... */ static void dump_program(struct r300_fragment_program *rp) { - int i; + int n, i, j; static int pc = 0; fprintf(stderr, "pc=%d*************************************\n", pc++); - + fprintf(stderr, "Mesa program:\n"); fprintf(stderr, "-------------\n"); _mesa_print_program(&rp->mesa_program.Base); @@ -1977,47 +2278,180 @@ static void dump_program(struct r300_fragment_program *rp) fprintf(stderr, "Hardware program\n"); fprintf(stderr, "----------------\n"); - - fprintf(stderr, "tex:\n"); - - for(i=0;i<rp->tex.length;i++) { - fprintf(stderr, "%08x\n", rp->tex.inst[i]); - } - - for (i=0;i<(rp->cur_node+1);i++) { + + for (n = 0; n < (rp->cur_node+1); n++) { fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "\ - "alu_end: %d, tex_end: %d\n", i, - rp->node[i].alu_offset, - rp->node[i].tex_offset, - rp->node[i].alu_end, - rp->node[i].tex_end); + "alu_end: %d, tex_end: %d\n", n, + rp->node[n].alu_offset, + rp->node[n].tex_offset, + rp->node[n].alu_end, + rp->node[n].tex_end); + + if (rp->tex.length) { + fprintf(stderr, " TEX:\n"); + for(i = rp->node[n].tex_offset; i <= rp->node[n].tex_offset+rp->node[n].tex_end; ++i) { + const char* instr; + + switch((rp->tex.inst[i] >> R300_FPITX_OPCODE_SHIFT) & 15) { + case R300_FPITX_OP_TEX: + instr = "TEX"; + break; + case R300_FPITX_OP_KIL: + instr = "KIL"; + break; + case R300_FPITX_OP_TXP: + instr = "TXP"; + break; + case R300_FPITX_OP_TXB: + instr = "TXB"; + break; + default: + instr = "UNKNOWN"; + } + + fprintf(stderr, " %s t%i, %c%i, texture[%i] (%08x)\n", + instr, + (rp->tex.inst[i] >> R300_FPITX_DST_SHIFT) & 31, + (rp->tex.inst[i] & R300_FPITX_SRC_CONST) ? 'c': 't', + (rp->tex.inst[i] >> R300_FPITX_SRC_SHIFT) & 31, + (rp->tex.inst[i] & R300_FPITX_IMAGE_MASK) >> R300_FPITX_IMAGE_SHIFT, + rp->tex.inst[i]); + } + } + + for(i = rp->node[n].alu_offset; i <= rp->node[n].alu_offset+rp->node[n].alu_end; ++i) { + char srcc[3][10], dstc[20]; + char srca[3][10], dsta[20]; + char argc[3][20]; + char arga[3][20]; + char flags[5], tmp[10]; + + for(j = 0; j < 3; ++j) { + int regc = rp->alu.inst[i].inst1 >> (j*6); + int rega = rp->alu.inst[i].inst3 >> (j*6); + + sprintf(srcc[j], "%c%i", (regc & 32) ? 'c' : 't', regc & 31); + sprintf(srca[j], "%c%i", (rega & 32) ? 'c' : 't', rega & 31); + } + + dstc[0] = 0; + sprintf(flags, "%s%s%s", + (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_REG_X) ? "x" : "", + (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_REG_Y) ? "y" : "", + (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_REG_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(dstc, "t%i.%s ", + (rp->alu.inst[i].inst1 >> R300_FPI1_DSTC_SHIFT) & 31, + flags); + } + sprintf(flags, "%s%s%s", + (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_OUTPUT_X) ? "x" : "", + (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_OUTPUT_Y) ? "y" : "", + (rp->alu.inst[i].inst1 & R300_FPI1_DSTC_OUTPUT_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(tmp, "o%i.%s", + (rp->alu.inst[i].inst1 >> R300_FPI1_DSTC_SHIFT) & 31, + flags); + strcat(dstc, tmp); + } + + dsta[0] = 0; + if (rp->alu.inst[i].inst3 & R300_FPI3_DSTA_REG) { + sprintf(dsta, "t%i.w ", (rp->alu.inst[i].inst3 >> R300_FPI3_DSTA_SHIFT) & 31); + } + if (rp->alu.inst[i].inst3 & R300_FPI3_DSTA_OUTPUT) { + sprintf(tmp, "o%i.w ", (rp->alu.inst[i].inst3 >> R300_FPI3_DSTA_SHIFT) & 31); + strcat(dsta, tmp); + } + if (rp->alu.inst[i].inst3 & R300_FPI3_DSTA_DEPTH) { + strcat(dsta, "Z"); + } + + fprintf(stderr, "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" + " w: %3s %3s %3s -> %-20s (%08x)\n", + i, + srcc[0], srcc[1], srcc[2], dstc, rp->alu.inst[i].inst1, + srca[0], srca[1], srca[2], dsta, rp->alu.inst[i].inst3); + + for(j = 0; j < 3; ++j) { + int regc = rp->alu.inst[i].inst0 >> (j*7); + int rega = rp->alu.inst[i].inst2 >> (j*7); + int d; + char buf[20]; + + d = regc & 31; + if (d < 12) { + switch(d % 4) { + case R300_FPI0_ARGC_SRC0C_XYZ: + sprintf(buf, "%s.xyz", srcc[d / 4]); + break; + case R300_FPI0_ARGC_SRC0C_XXX: + sprintf(buf, "%s.xxx", srcc[d / 4]); + break; + case R300_FPI0_ARGC_SRC0C_YYY: + sprintf(buf, "%s.yyy", srcc[d / 4]); + break; + case R300_FPI0_ARGC_SRC0C_ZZZ: + sprintf(buf, "%s.zzz", srcc[d / 4]); + break; + } + } else if (d < 15) { + sprintf(buf, "%s.www", srca[d-12]); + } else if (d == 20) { + sprintf(buf, "0.0"); + } else if (d == 21) { + sprintf(buf, "1.0"); + } else if (d == 22) { + sprintf(buf, "0.5"); + } else if (d >= 23 && d < 32) { + d -= 23; + switch(d/3) { + case 0: + sprintf(buf, "%s.yzx", srcc[d % 3]); + break; + case 1: + sprintf(buf, "%s.zxy", srcc[d % 3]); + break; + case 2: + sprintf(buf, "%s.Wzy", srcc[d % 3]); + break; + } + } else { + sprintf(buf, "%i", d); + } + + sprintf(argc[j], "%s%s%s%s", + (regc & 32) ? "-" : "", + (regc & 64) ? "|" : "", + buf, + (regc & 64) ? "|" : ""); + + d = rega & 31; + if (d < 9) { + sprintf(buf, "%s.%c", srcc[d / 3], 'x' + (char)(d%3)); + } else if (d < 12) { + sprintf(buf, "%s.w", srca[d-9]); + } else if (d == 16) { + sprintf(buf, "0.0"); + } else if (d == 17) { + sprintf(buf, "1.0"); + } else if (d == 18) { + sprintf(buf, "0.5"); + } else { + sprintf(buf, "%i", d); + } + + sprintf(arga[j], "%s%s%s%s", + (rega & 32) ? "-" : "", + (rega & 64) ? "|" : "", + buf, + (rega & 64) ? "|" : ""); + } + + fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" + " w: %8s %8s %8s op: %08x\n", + argc[0], argc[1], argc[2], rp->alu.inst[i].inst0, + arga[0], arga[1], arga[2], rp->alu.inst[i].inst2); + } } - - fprintf(stderr, "%08x\n", - ((rp->tex_end << 16) | (R300_PFS_TEXI_0 >> 2))); - for (i=0;i<=rp->tex_end;i++) - fprintf(stderr, "%08x\n", rp->tex.inst[i]); - - /* dump program in pretty_print_command_stream.tcl-readable format */ - fprintf(stderr, "%08x\n", - ((rp->alu_end << 16) | (R300_PFS_INSTR0_0 >> 2))); - for (i=0;i<=rp->alu_end;i++) - fprintf(stderr, "%08x\n", rp->alu.inst[i].inst0); - - fprintf(stderr, "%08x\n", - ((rp->alu_end << 16) | (R300_PFS_INSTR1_0 >> 2))); - for (i=0;i<=rp->alu_end;i++) - fprintf(stderr, "%08x\n", rp->alu.inst[i].inst1); - - fprintf(stderr, "%08x\n", - ((rp->alu_end << 16) | (R300_PFS_INSTR2_0 >> 2))); - for (i=0;i<=rp->alu_end;i++) - fprintf(stderr, "%08x\n", rp->alu.inst[i].inst2); - - fprintf(stderr, "%08x\n", - ((rp->alu_end << 16) | (R300_PFS_INSTR3_0 >> 2))); - for (i=0;i<=rp->alu_end;i++) - fprintf(stderr, "%08x\n", rp->alu.inst[i].inst3); - - fprintf(stderr, "00000000\n"); } diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 6fa34ee482..11e2d42e49 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -162,11 +162,11 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]); reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(0x0000000a); + e32(R300_RB3D_DSTCACHE_UNKNOWN_0A); - reg_start(0x4f18,0); - e32(0x00000003); + reg_start(R300_RB3D_ZCACHE_CTLSTAT,0); + e32(R300_RB3D_ZCACHE_UNKNOWN_03); cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN); } @@ -389,7 +389,7 @@ void r300Flush(GLcontext * ctx) #ifdef USER_BUFFERS #include "radeon_mm.h" -void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size) +static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size) { struct r300_dma_buffer *dmabuf; size = MAX2(size, RADEON_BUFFER_SIZE*16); @@ -503,7 +503,7 @@ void r300AllocDmaRegion(r300ContextPtr rmesa, } #else -void r300RefillCurrentDmaRegion(r300ContextPtr rmesa) +static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa) { struct r300_dma_buffer *dmabuf; int fd = rmesa->radeon.dri.fd; diff --git a/src/mesa/drivers/dri/r300/r300_maos.c b/src/mesa/drivers/dri/r300/r300_maos.c index fcb87cbbb5..b0d96f7601 100644 --- a/src/mesa/drivers/dri/r300/r300_maos.c +++ b/src/mesa/drivers/dri/r300/r300_maos.c @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r300/r300_maos_arrays.c,v 1.3 2003/02/23 23:59:01 dawes Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/src/mesa/drivers/dri/r300/r300_maos.h b/src/mesa/drivers/dri/r300/r300_maos.h index 679f1c2558..ab28317894 100644 --- a/src/mesa/drivers/dri/r300/r300_maos.h +++ b/src/mesa/drivers/dri/r300/r300_maos.h @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_maos.h,v 1.1 2002/10/30 12:51:52 alanh Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 69bc994cf6..1f4a2d2e64 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -497,6 +497,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* Zero to flush caches. */ #define R300_TX_CNTL 0x4100 +#define R300_TX_FLUSH 0x0 /* The upper enable bits are guessed, based on fglrx reported limits. */ #define R300_TX_ENABLE 0x4104 @@ -571,6 +572,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Some of the tests indicate that fgl has a fallback implementation of zbias * via pixel shaders. */ +#define R300_RE_ZBIAS_CNTL 0x42A0 /* GUESS */ #define R300_RE_ZBIAS_T_FACTOR 0x42A4 #define R300_RE_ZBIAS_T_CONSTANT 0x42A8 #define R300_RE_ZBIAS_W_FACTOR 0x42AC @@ -1045,7 +1047,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * WRT swizzling. If, for example, you want to load an R component into an * Alpha operand, this R component is taken from a *color* source, not from * an alpha source. The corresponding register doesn't even have to appear in - * the alpha sources list. (I hope this alll makes sense to you) + * the alpha sources list. (I hope this all makes sense to you) * * Destination selection * The destination register index is in FPI1 (color) and FPI3 (alpha) @@ -1072,6 +1074,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_FPI1_SRC2C_SHIFT 12 # define R300_FPI1_SRC2C_MASK (31 << 12) # define R300_FPI1_SRC2C_CONST (1 << 17) +# define R300_FPI1_SRC_MASK 0x0003ffff # define R300_FPI1_DSTC_SHIFT 18 # define R300_FPI1_DSTC_MASK (31 << 18) # define R300_FPI1_DSTC_REG_MASK_SHIFT 23 @@ -1093,6 +1096,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_FPI3_SRC2A_SHIFT 12 # define R300_FPI3_SRC2A_MASK (31 << 12) # define R300_FPI3_SRC2A_CONST (1 << 17) +# define R300_FPI3_SRC_MASK 0x0003ffff # define R300_FPI3_DSTA_SHIFT 18 # define R300_FPI3_DSTA_MASK (31 << 18) # define R300_FPI3_DSTA_REG (1 << 23) @@ -1322,8 +1326,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * Set to 0A before 3D operations, set to 02 afterwards. */ #define R300_RB3D_DSTCACHE_CTLSTAT 0x4E4C -# define R300_RB3D_DSTCACHE_02 0x00000002 -# define R300_RB3D_DSTCACHE_0A 0x0000000A +# define R300_RB3D_DSTCACHE_UNKNOWN_02 0x00000002 +# define R300_RB3D_DSTCACHE_UNKNOWN_0A 0x0000000A /* gap */ /* There seems to be no "write only" setting, so use Z-test = ALWAYS @@ -1394,6 +1398,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* gap */ +#define R300_RB3D_ZCACHE_CTLSTAT 0x4F18 /* GUESS */ +# define R300_RB3D_ZCACHE_UNKNOWN_01 0x1 +# define R300_RB3D_ZCACHE_UNKNOWN_03 0x3 + +/* gap */ + #define R300_RB3D_DEPTHOFFSET 0x4F20 #define R300_RB3D_DEPTHPITCH 0x4F24 # define R300_DEPTHPITCH_MASK 0x00001FF8 /* GUESS */ diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 211c451f66..0864558e8d 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -344,10 +344,10 @@ GLboolean r300_run_vb_render(GLcontext *ctx, r300UpdateShaderStates(rmesa); reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(0x0000000a); + e32(R300_RB3D_DSTCACHE_UNKNOWN_0A); - reg_start(0x4f18,0); - e32(0x00000003); + reg_start(R300_RB3D_ZCACHE_CTLSTAT,0); + e32(R300_RB3D_ZCACHE_UNKNOWN_03); r300EmitState(rmesa); @@ -360,10 +360,10 @@ GLboolean r300_run_vb_render(GLcontext *ctx, } reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(0x0000000a/*0x2*/); + e32(R300_RB3D_DSTCACHE_UNKNOWN_0A /*R300_RB3D_DSTCACHE_UNKNOWN_02*/); - reg_start(0x4f18,0); - e32(0x00000003/*0x1*/); + reg_start(R300_RB3D_ZCACHE_CTLSTAT,0); + e32(R300_RB3D_ZCACHE_UNKNOWN_03 /*R300_RB3D_ZCACHE_UNKNOWN_01*/); #ifdef USER_BUFFERS r300UseArrays(ctx); diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c index e8bce93458..41c07a3188 100644 --- a/src/mesa/drivers/dri/r300/r300_shader.c +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -12,15 +12,6 @@ r300BindProgram(GLcontext *ctx, GLenum target, struct gl_program *prog) { switch(target){ case GL_VERTEX_PROGRAM_ARB: - //rmesa->curr_vp = (struct gl_vertex_program *)vp; - //vp->ref_count++; -#if 0 - if((vp->ref_count % 1500) == 0) { - fprintf(stderr, "id %p, ref_count %d\n", vp, vp->ref_count); - _mesa_print_program(&vp->mesa_program.Base); - } -#endif - case GL_FRAGMENT_PROGRAM_ARB: break; default: @@ -54,18 +45,9 @@ r300NewProgram(GLcontext *ctx, GLenum target, GLuint id) return NULL; } - static void r300DeleteProgram(GLcontext *ctx, struct gl_program *prog) { -#if 0 - r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct r300_vertex_program *vp=(void *)prog; - - if(rmesa->curr_vp == vp) - rmesa->curr_vp = NULL; -#endif - _mesa_delete_program(ctx, prog); } @@ -93,9 +75,6 @@ r300ProgramStringNotify(GLcontext *ctx, GLenum target, struct gl_program *prog) static GLboolean r300IsProgramNative(GLcontext *ctx, GLenum target, struct gl_program *prog) { - //struct r300_vertex_program *vp=(void *)prog; - //r300ContextPtr rmesa = R300_CONTEXT(ctx); - return 1; } diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 50dcee4755..fff11653de 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -71,14 +71,14 @@ static void r300BlendColor(GLcontext * ctx, const GLfloat cf[4]) GLubyte color[4]; r300ContextPtr rmesa = R300_CONTEXT(ctx); - R300_STATECHANGE(rmesa, unk4E10); + R300_STATECHANGE(rmesa, blend_color); CLAMPED_FLOAT_TO_UBYTE(color[0], cf[0]); CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]); CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]); CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]); - rmesa->hw.unk4E10.cmd[1] = r300PackColor(4, color[3], color[0], + rmesa->hw.blend_color.cmd[1] = r300PackColor(4, color[3], color[0], color[1], color[2]); } @@ -337,17 +337,17 @@ static void update_early_z(GLcontext *ctx) */ r300ContextPtr r300 = R300_CONTEXT(ctx); - R300_STATECHANGE(r300, unk4F10); + R300_STATECHANGE(r300, zstencil_format); if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS) /* disable early Z */ - r300->hw.unk4F10.cmd[2] = R300_EARLY_Z_DISABLE; + r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE; else { if (ctx->Depth.Test && ctx->Depth.Func != GL_NEVER) /* enable early Z */ - r300->hw.unk4F10.cmd[2] = R300_EARLY_Z_ENABLE; + r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_ENABLE; else /* disable early Z */ - r300->hw.unk4F10.cmd[2] = R300_EARLY_Z_DISABLE; + r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE; } } @@ -533,11 +533,11 @@ static void r300Enable(GLcontext* ctx, GLenum cap, GLboolean state) break; case GL_POLYGON_OFFSET_FILL: - R300_STATECHANGE(r300, unk42B4); + R300_STATECHANGE(r300, occlusion_cntl); if(state){ - r300->hw.unk42B4.cmd[1] |= (3<<0); + r300->hw.occlusion_cntl.cmd[1] |= (3<<0); } else { - r300->hw.unk42B4.cmd[1] &= ~(3<<0); + r300->hw.occlusion_cntl.cmd[1] &= ~(3<<0); } break; default: @@ -591,9 +591,9 @@ static void r300UpdatePolygonMode(GLcontext *ctx) } } - if (r300->hw.unk4288.cmd[1] != hw_mode) { - R300_STATECHANGE(r300, unk4288); - r300->hw.unk4288.cmd[1] = hw_mode; + if (r300->hw.polygon_mode.cmd[1] != hw_mode) { + R300_STATECHANGE(r300, polygon_mode); + r300->hw.polygon_mode.cmd[1] = hw_mode; } } @@ -832,13 +832,13 @@ static void r300ShadeModel(GLcontext * ctx, GLenum mode) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - R300_STATECHANGE(rmesa, unk4274); + R300_STATECHANGE(rmesa, shade); switch (mode) { case GL_FLAT: - rmesa->hw.unk4274.cmd[2] = R300_RE_SHADE_MODEL_FLAT; + rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_FLAT; break; case GL_SMOOTH: - rmesa->hw.unk4274.cmd[2] = R300_RE_SHADE_MODEL_SMOOTH; + rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_SMOOTH; break; default: return; @@ -1070,8 +1070,8 @@ r300FetchStateParameter(GLcontext *ctx, switch(state[1]) { case STATE_R300_WINDOW_DIMENSION: - value[0] = r300->radeon.dri.drawable->w; /* width */ - value[1] = r300->radeon.dri.drawable->h; /* height */ + value[0] = r300->radeon.dri.drawable->w*0.5f;/* width*0.5 */ + value[1] = r300->radeon.dri.drawable->h*0.5f;/* height*0.5 */ value[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */ value[3] = 1.0F; /* not used */ break; @@ -1085,20 +1085,20 @@ r300FetchStateParameter(GLcontext *ctx, * Update R300's own internal state parameters. * For now just STATE_R300_WINDOW_DIMENSION */ -static void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) +void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) { - struct r300_vertex_program_cont *vpc; + struct r300_fragment_program *fp; struct gl_program_parameter_list *paramList; GLuint i; if(!(new_state & (_NEW_BUFFERS|_NEW_PROGRAM))) return; - vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; - if (!vpc) + fp = (struct r300_fragment_program *)ctx->FragmentProgram._Current; + if (!fp) return; - paramList = vpc->mesa_program.Base.Parameters; + paramList = fp->mesa_program.Base.Parameters; if (!paramList) return; @@ -1221,12 +1221,12 @@ void r300_setup_textures(GLcontext *ctx) R300_STATECHANGE(r300, txe); R300_STATECHANGE(r300, tex.filter); - R300_STATECHANGE(r300, tex.unknown1); + R300_STATECHANGE(r300, tex.filter_1); R300_STATECHANGE(r300, tex.size); R300_STATECHANGE(r300, tex.format); R300_STATECHANGE(r300, tex.pitch); R300_STATECHANGE(r300, tex.offset); - R300_STATECHANGE(r300, tex.unknown4); + R300_STATECHANGE(r300, tex.chroma_key); R300_STATECHANGE(r300, tex.border_color); r300->hw.txe.cmd[R300_TXE_ENABLE]=0x0; @@ -1263,7 +1263,7 @@ void r300_setup_textures(GLcontext *ctx) r300->hw.tex.filter.cmd[R300_TEX_VALUE_0 + hw_tmu] = gen_fixed_filter(t->filter) | (hw_tmu << 28); /* Currently disabled! */ - r300->hw.tex.unknown1.cmd[R300_TEX_VALUE_0 + hw_tmu] = 0x0; //0x20501f80; + r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] = 0x0; //0x20501f80; r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->size; r300->hw.tex.format.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->format; r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->pitch_reg; @@ -1277,7 +1277,7 @@ void r300_setup_textures(GLcontext *ctx) WARN_ONCE("micro tiling enabled!\n"); } - r300->hw.tex.unknown4.cmd[R300_TEX_VALUE_0 + hw_tmu] = 0x0; + r300->hw.tex.chroma_key.cmd[R300_TEX_VALUE_0 + hw_tmu] = 0x0; r300->hw.tex.border_color.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->pp_border_color; last_hw_tmu = hw_tmu; @@ -1287,12 +1287,12 @@ void r300_setup_textures(GLcontext *ctx) } r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FILTER_0, last_hw_tmu + 1); - r300->hw.tex.unknown1.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FILTER1_0, last_hw_tmu + 1); + r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FILTER1_0, last_hw_tmu + 1); r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_SIZE_0, last_hw_tmu + 1); r300->hw.tex.format.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FORMAT_0, last_hw_tmu + 1); r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_PITCH_0, last_hw_tmu + 1); r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_OFFSET_0, last_hw_tmu + 1); - r300->hw.tex.unknown4.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_CHROMA_KEY_0, last_hw_tmu + 1); + r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_CHROMA_KEY_0, last_hw_tmu + 1); r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_BORDER_COLOR_0, last_hw_tmu + 1); @@ -1977,7 +1977,7 @@ void r300ResetHwState(r300ContextPtr r300) /* Initialize magic registers TODO : learn what they really do, or get rid of those we don't have to touch */ - r300->hw.unk2080.cmd[1] = 0x0030045A; //0x0030065a /* Dangerous */ + r300->hw.vap_cntl.cmd[1] = 0x0030045A; //0x0030065a /* Dangerous */ r300->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA | R300_VPORT_X_OFFSET_ENA @@ -1991,9 +1991,9 @@ void r300ResetHwState(r300ContextPtr r300) r300->hw.unk2134.cmd[1] = 0x00FFFFFF; r300->hw.unk2134.cmd[2] = 0x00000000; if (_mesa_little_endian()) - r300->hw.unk2140.cmd[1] = 0x00000000; + r300->hw.vap_cntl_status.cmd[1] = 0x00000000; else - r300->hw.unk2140.cmd[1] = 0x00000002; + r300->hw.vap_cntl_status.cmd[1] = 0x00000002; #if 0 /* Done in setup routing */ ((drm_r300_cmd_header_t*)r300->hw.vir[0].cmd)->packet0.count = 1; @@ -2084,16 +2084,16 @@ void r300ResetHwState(r300ContextPtr r300) r300->hw.unk4260.cmd[2] = r300PackFloat32(0.0); r300->hw.unk4260.cmd[3] = r300PackFloat32(1.0); - r300->hw.unk4274.cmd[1] = 0x00000002; + r300->hw.shade.cmd[1] = 0x00000002; r300ShadeModel(ctx, ctx->Light.ShadeModel); - r300->hw.unk4274.cmd[3] = 0x00000000; - r300->hw.unk4274.cmd[4] = 0x00000000; + r300->hw.shade.cmd[3] = 0x00000000; + r300->hw.shade.cmd[4] = 0x00000000; r300PolygonMode(ctx, GL_FRONT, ctx->Polygon.FrontMode); r300PolygonMode(ctx, GL_BACK, ctx->Polygon.BackMode); - r300->hw.unk4288.cmd[2] = 0x00000001; - r300->hw.unk4288.cmd[3] = 0x00000000; - r300->hw.unk42A0.cmd[1] = 0x00000000; + r300->hw.polygon_mode.cmd[2] = 0x00000001; + r300->hw.polygon_mode.cmd[3] = 0x00000000; + r300->hw.zbias_cntl.cmd[1] = 0x00000000; r300PolygonOffset(ctx, ctx->Polygon.OffsetFactor, ctx->Polygon.OffsetUnits); r300Enable(ctx, GL_POLYGON_OFFSET_FILL, ctx->Polygon.OffsetFill); @@ -2151,8 +2151,8 @@ void r300ResetHwState(r300ContextPtr r300) #endif r300BlendColor(ctx, ctx->Color.BlendColor); - r300->hw.unk4E10.cmd[2] = 0; - r300->hw.unk4E10.cmd[3] = 0; + r300->hw.blend_color.cmd[2] = 0; + r300->hw.blend_color.cmd[3] = 0; /* Again, r300ClearBuffer uses this */ r300->hw.cb.cmd[R300_CB_OFFSET] = r300->radeon.state.color.drawOffset + @@ -2184,10 +2184,10 @@ void r300ResetHwState(r300ContextPtr r300) switch (ctx->Visual.depthBits) { case 16: - r300->hw.unk4F10.cmd[1] = R300_DEPTH_FORMAT_16BIT_INT_Z; + r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_16BIT_INT_Z; break; case 24: - r300->hw.unk4F10.cmd[1] = R300_DEPTH_FORMAT_24BIT_INT_Z; + r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_24BIT_INT_Z; break; default: fprintf(stderr, "Error: Unsupported depth %d... exiting\n", @@ -2196,10 +2196,10 @@ void r300ResetHwState(r300ContextPtr r300) } /* z compress? */ - //r300->hw.unk4F10.cmd[1] |= R300_DEPTH_FORMAT_UNK32; + //r300->hw.zstencil_format.cmd[1] |= R300_DEPTH_FORMAT_UNK32; - r300->hw.unk4F10.cmd[3] = 0x00000003; - r300->hw.unk4F10.cmd[4] = 0x00000000; + r300->hw.zstencil_format.cmd[3] = 0x00000003; + r300->hw.zstencil_format.cmd[4] = 0x00000000; r300->hw.zb.cmd[R300_ZB_OFFSET] = r300->radeon.radeonScreen->depthOffset + diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h index f6a50655d1..52e606f241 100644 --- a/src/mesa/drivers/dri/r300/r300_state.h +++ b/src/mesa/drivers/dri/r300/r300_state.h @@ -61,6 +61,7 @@ do { \ extern void r300ResetHwState(r300ContextPtr r300); +extern void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state); extern void r300InitState(r300ContextPtr r300); extern void r300InitStateFuncs(struct dd_function_table* functions); extern void r300UpdateViewportOffset( GLcontext *ctx ); diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 6348ba1982..eb72802f8b 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r300/r300_tex.c,v 1.2 2002/11/05 17:46:08 tsi Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h index a18ff0e2ce..10aabc8b4b 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.h +++ b/src/mesa/drivers/dri/r300/r300_tex.h @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r300/r300_tex.h,v 1.1 2002/10/30 12:51:53 alanh Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/src/mesa/drivers/dri/r300/r300_texmem.c b/src/mesa/drivers/dri/r300/r300_texmem.c index 96973c0098..f531b54d11 100644 --- a/src/mesa/drivers/dri/r300/r300_texmem.c +++ b/src/mesa/drivers/dri/r300/r300_texmem.c @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r300/r300_texmem.c,v 1.5 2002/12/17 00:32:56 dawes Exp $ */ /************************************************************************** Copyright (C) Tungsten Graphics 2002. All Rights Reserved. diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index cc356afb23..4bc0ea14f8 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/r300/r300_texstate.c,v 1.3 2003/02/15 22:18:47 dawes Exp $ */ /* Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/src/mesa/drivers/dri/r300/r300_vertexprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index cc8e0775f8..aa2f20127b 100644 --- a/src/mesa/drivers/dri/r300/r300_vertexprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -891,8 +891,8 @@ static void position_invariant(struct gl_program *prog) #endif paramList = prog->Parameters; - vpi = malloc((prog->NumInstructions + 4) * sizeof(struct prog_instruction)); - memset(vpi, 0, 4 * sizeof(struct prog_instruction)); + vpi = _mesa_alloc_instructions (prog->NumInstructions + 4); + _mesa_init_instructions (vpi, prog->NumInstructions + 4); for (i=0; i < 4; i++) { GLint idx; @@ -910,11 +910,11 @@ static void position_invariant(struct gl_program *prog) vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR; vpi[i].SrcReg[0].Index = idx; - vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); + vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; vpi[i].SrcReg[1].File = PROGRAM_INPUT; vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS; - vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); + vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW; #else if (i == 0) vpi[i].Opcode = OPCODE_MUL; @@ -934,7 +934,7 @@ static void position_invariant(struct gl_program *prog) vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR; vpi[i].SrcReg[0].Index = idx; - vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); + vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; vpi[i].SrcReg[1].File = PROGRAM_INPUT; vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS; @@ -943,12 +943,12 @@ static void position_invariant(struct gl_program *prog) if (i > 0) { vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY; vpi[i].SrcReg[2].Index = 0; - vpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); + vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW; } #endif } - memcpy(&vpi[i], prog->Instructions, prog->NumInstructions * sizeof(struct prog_instruction)); + _mesa_copy_instructions (&vpi[i], prog->Instructions, prog->NumInstructions); free(prog->Instructions); @@ -964,23 +964,19 @@ static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog, GLuint temp_index) { - - gl_state_index tokens[STATE_LENGTH] - = { STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 }; struct prog_instruction *vpi; struct prog_instruction *vpi_insert; - GLuint window_index; int i = 0; - vpi = malloc((prog->NumInstructions + 5) * sizeof(struct prog_instruction)); + vpi = _mesa_alloc_instructions (prog->NumInstructions + 2); + _mesa_init_instructions (vpi, prog->NumInstructions + 2); /* all but END */ - memcpy(vpi, prog->Instructions, (prog->NumInstructions - 1) * sizeof(struct prog_instruction)); + _mesa_copy_instructions (vpi, prog->Instructions, prog->NumInstructions - 1); /* END */ - memcpy(&vpi[prog->NumInstructions + 4], &prog->Instructions[prog->NumInstructions - 1], - sizeof(struct prog_instruction)); - + _mesa_copy_instructions (&vpi[prog->NumInstructions + 1], + &prog->Instructions[prog->NumInstructions - 1], + 1); vpi_insert = &vpi[prog->NumInstructions - 1]; - memset(vpi_insert, 0, 5 * sizeof(struct prog_instruction)); vpi_insert[i].Opcode = OPCODE_MOV; @@ -991,62 +987,10 @@ static void insert_wpos(struct r300_vertex_program *vp, vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; vpi_insert[i].SrcReg[0].Index = temp_index; - vpi_insert[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); + vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; i++; - /* perspective divide */ - vpi_insert[i].Opcode = OPCODE_RCP; - - vpi_insert[i].DstReg.File = PROGRAM_TEMPORARY; - vpi_insert[i].DstReg.Index = temp_index; - vpi_insert[i].DstReg.WriteMask = WRITEMASK_W; - vpi_insert[i].DstReg.CondMask = COND_TR; - - vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; - vpi_insert[i].SrcReg[0].Index = temp_index; - vpi_insert[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO); - i++; - - vpi_insert[i].Opcode = OPCODE_MUL; - - vpi_insert[i].DstReg.File = PROGRAM_TEMPORARY; - vpi_insert[i].DstReg.Index = temp_index; - vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZ; - vpi_insert[i].DstReg.CondMask = COND_TR; - - vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; - vpi_insert[i].SrcReg[0].Index = temp_index; - vpi_insert[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - vpi_insert[i].SrcReg[1].File = PROGRAM_TEMPORARY; - vpi_insert[i].SrcReg[1].Index = temp_index; - vpi_insert[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_ZERO); - i++; - - /* viewport transformation */ - window_index = _mesa_add_state_reference(prog->Parameters, tokens); - - vpi_insert[i].Opcode = OPCODE_MAD; - - vpi_insert[i].DstReg.File = PROGRAM_TEMPORARY; - vpi_insert[i].DstReg.Index = temp_index; - vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZ; - vpi_insert[i].DstReg.CondMask = COND_TR; - - vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; - vpi_insert[i].SrcReg[0].Index = temp_index; - vpi_insert[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - vpi_insert[i].SrcReg[1].File = PROGRAM_STATE_VAR; - vpi_insert[i].SrcReg[1].Index = window_index; - vpi_insert[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_ZERO); - - vpi_insert[i].SrcReg[2].File = PROGRAM_STATE_VAR; - vpi_insert[i].SrcReg[2].Index = window_index; - vpi_insert[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_ZERO); - i++; - - vpi_insert[i].Opcode = OPCODE_MUL; + vpi_insert[i].Opcode = OPCODE_MOV; vpi_insert[i].DstReg.File = PROGRAM_OUTPUT; vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0+vp->wpos_idx; @@ -1055,11 +999,7 @@ static void insert_wpos(struct r300_vertex_program *vp, vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; vpi_insert[i].SrcReg[0].Index = temp_index; - vpi_insert[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); - - vpi_insert[i].SrcReg[1].File = PROGRAM_STATE_VAR; - vpi_insert[i].SrcReg[1].Index = window_index; - vpi_insert[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_ONE); + vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; i++; free(prog->Instructions); diff --git a/src/mesa/drivers/dri/r300/radeon_context.c b/src/mesa/drivers/dri/r300/radeon_context.c index 66d1b153b3..22f943ebf3 100644 --- a/src/mesa/drivers/dri/r300/radeon_context.c +++ b/src/mesa/drivers/dri/r300/radeon_context.c @@ -70,7 +70,10 @@ static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name) switch (name) { case GL_VENDOR: - return (GLubyte *) "Tungsten Graphics, Inc."; + if (IS_R300_CLASS(radeon->radeonScreen)) + return (GLubyte *) "DRI R300 Project"; + else + return (GLubyte *) "Tungsten Graphics, Inc."; case GL_RENDERER: { diff --git a/src/mesa/drivers/dri/r300/radeon_mm.c b/src/mesa/drivers/dri/r300/radeon_mm.c index 32ed1f4393..1502dac8fc 100644 --- a/src/mesa/drivers/dri/r300/radeon_mm.c +++ b/src/mesa/drivers/dri/r300/radeon_mm.c @@ -284,7 +284,7 @@ static void emit_lin_cp(r300ContextPtr rmesa, unsigned long dst, unsigned long s } reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(0x0000000a); + e32(R300_RB3D_DSTCACHE_UNKNOWN_0A); reg_start(0x342c,0); e32(0x00000005); diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c index 1b74f6779b..cc779d684f 100644 --- a/src/mesa/drivers/dri/r300/radeon_span.c +++ b/src/mesa/drivers/dri/r300/radeon_span.c @@ -1,4 +1,3 @@ -/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_span.c,v 1.6 2002/10/30 12:51:56 alanh Exp $ */ /************************************************************************** Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c index 6bc2c4aa5c..d7c2d1407d 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_context.c @@ -64,6 +64,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define need_GL_ARB_multisample #define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_buffer_object #define need_GL_EXT_blend_minmax #define need_GL_EXT_fog_coord #define need_GL_EXT_secondary_color @@ -122,6 +123,7 @@ const struct dri_extension card_extensions[] = { "GL_ARB_texture_env_crossbar", NULL }, { "GL_ARB_texture_env_dot3", NULL }, { "GL_ARB_texture_mirrored_repeat", NULL }, + { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, { "GL_EXT_blend_logic_op", NULL }, { "GL_EXT_blend_subtract", GL_EXT_blend_minmax_functions }, { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, @@ -594,12 +596,14 @@ radeonMakeCurrent( __DRIcontextPrivate *driContextPriv, driDrawableInitVBlank( driDrawPriv, newCtx->vblank_flags, &newCtx->vbl_seq ); } - - if ( (newCtx->dri.drawable != driDrawPriv) - || (newCtx->dri.readable != driReadPriv) ) { + + newCtx->dri.readable = driReadPriv; + + if ( (newCtx->dri.drawable != driDrawPriv) || + newCtx->lastStamp != driDrawPriv->lastStamp ) { newCtx->dri.drawable = driDrawPriv; - newCtx->dri.readable = driReadPriv; + radeonSetCliprects(newCtx); radeonUpdateWindow( newCtx->glCtx ); radeonUpdateViewportOffset( newCtx->glCtx ); } diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c index e6ab6af456..cdf8a19fb1 100644 --- a/src/mesa/drivers/dri/radeon/radeon_lock.c +++ b/src/mesa/drivers/dri/radeon/radeon_lock.c @@ -96,7 +96,6 @@ void radeonGetLock( radeonContextPtr rmesa, GLuint flags ) radeonSetCliprects( rmesa ); radeonUpdateViewportOffset( rmesa->glCtx ); driUpdateFramebufferSize(rmesa->glCtx, drawable); - rmesa->lastStamp = drawable->lastStamp; } RADEON_STATECHANGE( rmesa, ctx ); diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c index e19202fa44..4de05c7697 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.c +++ b/src/mesa/drivers/dri/radeon/radeon_state.c @@ -1675,6 +1675,8 @@ void radeonSetCliprects( radeonContextPtr rmesa ) if (rmesa->state.scissor.enabled) radeonRecalcScissorRects( rmesa ); + + rmesa->lastStamp = drawable->lastStamp; } diff --git a/src/mesa/drivers/dri/savage/savage_xmesa.c b/src/mesa/drivers/dri/savage/savage_xmesa.c index f859217069..43422db9a8 100644 --- a/src/mesa/drivers/dri/savage/savage_xmesa.c +++ b/src/mesa/drivers/dri/savage/savage_xmesa.c @@ -61,6 +61,7 @@ #define need_GL_ARB_multisample #define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_buffer_object #define need_GL_EXT_secondary_color #include "extension_helper.h" @@ -135,6 +136,7 @@ static const struct dri_extension card_extensions[] = { "GL_ARB_multisample", GL_ARB_multisample_functions }, { "GL_ARB_multitexture", NULL }, { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, + { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, { "GL_EXT_stencil_wrap", NULL }, { "GL_EXT_texture_lod_bias", NULL }, { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, diff --git a/src/mesa/drivers/dri/sis/sis_context.c b/src/mesa/drivers/dri/sis/sis_context.c index 89b81da347..b21df0a61e 100644 --- a/src/mesa/drivers/dri/sis/sis_context.c +++ b/src/mesa/drivers/dri/sis/sis_context.c @@ -60,6 +60,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define need_GL_ARB_multisample #define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_buffer_object #define need_GL_EXT_fog_coord #define need_GL_EXT_secondary_color #include "extension_helper.h" @@ -79,6 +80,7 @@ struct dri_extension card_extensions[] = { "GL_ARB_texture_border_clamp", NULL }, { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, { "GL_ARB_texture_mirrored_repeat", NULL }, + { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, /*{ "GL_EXT_fog_coord", GL_EXT_fog_coord_functions },*/ { "GL_EXT_texture_lod_bias", NULL }, { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, diff --git a/src/mesa/drivers/dri/unichrome/via_context.c b/src/mesa/drivers/dri/unichrome/via_context.c index bc5a414df6..4d25d328e3 100644 --- a/src/mesa/drivers/dri/unichrome/via_context.c +++ b/src/mesa/drivers/dri/unichrome/via_context.c @@ -64,6 +64,7 @@ #define need_GL_ARB_multisample #define need_GL_ARB_point_parameters +#define need_GL_ARB_vertex_buffer_object #define need_GL_EXT_fog_coord #define need_GL_EXT_secondary_color #include "extension_helper.h" @@ -372,6 +373,7 @@ const struct dri_extension card_extensions[] = { "GL_ARB_texture_env_combine", NULL }, /* { "GL_ARB_texture_env_dot3", NULL }, */ { "GL_ARB_texture_mirrored_repeat", NULL }, + { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, { "GL_EXT_stencil_wrap", NULL }, @@ -766,9 +768,7 @@ void viaXMesaWindowMoved(struct via_context *vmesa) drawable); } - draw_buffer->drawXoff = (GLuint)(((drawable->x * bytePerPixel) & 0x1f) / - bytePerPixel); - draw_buffer->drawX = drawable->x - draw_buffer->drawXoff; + draw_buffer->drawX = drawable->x; draw_buffer->drawY = drawable->y; draw_buffer->drawW = drawable->w; draw_buffer->drawH = drawable->h; @@ -780,9 +780,7 @@ void viaXMesaWindowMoved(struct via_context *vmesa) readable); } - read_buffer->drawXoff = (GLuint)(((readable->x * bytePerPixel) & 0x1f) / - bytePerPixel); - read_buffer->drawX = readable->x - read_buffer->drawXoff; + read_buffer->drawX = readable->x; read_buffer->drawY = readable->y; read_buffer->drawW = readable->w; read_buffer->drawH = readable->h; @@ -793,13 +791,24 @@ void viaXMesaWindowMoved(struct via_context *vmesa) draw_buffer->drawX * bytePerPixel); vmesa->front.origMap = (vmesa->front.map + - draw_buffer->drawY * vmesa->front.pitch + - draw_buffer->drawX * bytePerPixel); + draw_buffer->drawY * vmesa->front.pitch + + draw_buffer->drawX * bytePerPixel); + + vmesa->back.orig = (vmesa->back.offset + + draw_buffer->drawY * vmesa->back.pitch + + draw_buffer->drawX * bytePerPixel); - vmesa->back.orig = vmesa->back.offset; - vmesa->depth.orig = vmesa->depth.offset; - vmesa->back.origMap = vmesa->back.map; - vmesa->depth.origMap = vmesa->depth.map; + vmesa->back.origMap = (vmesa->back.map + + draw_buffer->drawY * vmesa->back.pitch + + draw_buffer->drawX * bytePerPixel); + + vmesa->depth.orig = (vmesa->depth.offset + + draw_buffer->drawY * vmesa->depth.pitch + + draw_buffer->drawX * bytePerPixel); + + vmesa->depth.origMap = (vmesa->depth.map + + draw_buffer->drawY * vmesa->depth.pitch + + draw_buffer->drawX * bytePerPixel); viaCalcViewport(vmesa->glCtx); } diff --git a/src/mesa/drivers/dri/unichrome/via_context.h b/src/mesa/drivers/dri/unichrome/via_context.h index 77161a8d5d..fecd2782fb 100644 --- a/src/mesa/drivers/dri/unichrome/via_context.h +++ b/src/mesa/drivers/dri/unichrome/via_context.h @@ -104,11 +104,6 @@ struct via_renderbuffer { int drawW; int drawH; - int drawXoff; /* drawX is 32byte aligned - this is - * the delta to the real origin, in - * pixel units. - */ - __DRIdrawablePrivate *dPriv; }; diff --git a/src/mesa/drivers/dri/unichrome/via_ioctl.c b/src/mesa/drivers/dri/unichrome/via_ioctl.c index 5d102de93e..4a733fb00c 100644 --- a/src/mesa/drivers/dri/unichrome/via_ioctl.c +++ b/src/mesa/drivers/dri/unichrome/via_ioctl.c @@ -187,7 +187,7 @@ static void viaFillBuffer(struct via_context *vmesa, int w = pbox[i].x2 - pbox[i].x1; int h = pbox[i].y2 - pbox[i].y1; - int offset = (buffer->orig + + int offset = (buffer->offset + y * buffer->pitch + x * bytePerPixel); @@ -276,7 +276,7 @@ static void viaClear(GLcontext *ctx, GLbitfield mask) /* flip top to bottom */ cy = dPriv->h - cy - ch; - cx += vrb->drawX + vrb->drawXoff; + cx += vrb->drawX; cy += vrb->drawY; if (!all) { @@ -359,8 +359,8 @@ static void viaDoSwapBuffers(struct via_context *vmesa, GLint w = b->x2 - b->x1; GLint h = b->y2 - b->y1; - GLuint src = back->orig + y * back->pitch + x * bytePerPixel; - GLuint dest = front->orig + y * front->pitch + x * bytePerPixel; + GLuint src = back->offset + y * back->pitch + x * bytePerPixel; + GLuint dest = front->offset + y * front->pitch + x * bytePerPixel; viaBlit(vmesa, bytePerPixel << 3, @@ -747,7 +747,7 @@ static void via_emit_cliprect(struct via_context *vmesa, : HC_HDBFM_RGB565); GLuint pitch = buffer->pitch; - GLuint offset = buffer->orig; + GLuint offset = buffer->offset; if (0) fprintf(stderr, "emit cliprect for box %d,%d %d,%d\n", @@ -768,7 +768,7 @@ static void via_emit_cliprect(struct via_context *vmesa, vb[4] = (HC_SubA_HDBBasL << 24) | (offset & 0xFFFFFF); vb[5] = (HC_SubA_HDBBasH << 24) | ((offset & 0xFF000000) >> 24); - vb[6] = (HC_SubA_HSPXYOS << 24) | ((31 - buffer->drawXoff) << HC_HSPXOS_SHIFT); + vb[6] = (HC_SubA_HSPXYOS << 24); vb[7] = (HC_SubA_HDBFM << 24) | HC_HDBLoc_Local | format | pitch; } @@ -887,22 +887,18 @@ void viaFlushDmaLocked(struct via_context *vmesa, GLuint flags) struct via_renderbuffer *const vrb = (struct via_renderbuffer *) dPriv->driverPrivate; - for (i = 0; i < vmesa->numClipRects; i++) { drm_clip_rect_t b; - b.x1 = pbox[i].x1 - (vrb->drawX + vrb->drawXoff); - b.x2 = pbox[i].x2 - (vrb->drawX + vrb->drawXoff); - b.y1 = pbox[i].y1 - vrb->drawY; - b.y2 = pbox[i].y2 - vrb->drawY; + b.x1 = pbox[i].x1; + b.x2 = pbox[i].x2; + b.y1 = pbox[i].y1; + b.y2 = pbox[i].y2; if (vmesa->scissor && !intersect_rect(&b, &b, &vmesa->scissorRect)) continue; - b.x1 += vrb->drawXoff; - b.x2 += vrb->drawXoff; - via_emit_cliprect(vmesa, &b); if (fire_buffer(vmesa) != 0) { diff --git a/src/mesa/drivers/dri/unichrome/via_span.c b/src/mesa/drivers/dri/unichrome/via_span.c index f1ed98036b..3a16dadd23 100644 --- a/src/mesa/drivers/dri/unichrome/via_span.c +++ b/src/mesa/drivers/dri/unichrome/via_span.c @@ -46,7 +46,7 @@ GLuint pitch = vrb->pitch; \ GLuint height = dPriv->h; \ GLint p = 0; \ - char *buf = (char *)(vrb->origMap + vrb->drawXoff * vrb->bpp); \ + char *buf = (char *)(vrb->origMap); \ (void) p; /* ================================================================ @@ -82,7 +82,7 @@ __DRIdrawablePrivate *dPriv = vrb->dPriv; \ GLuint depth_pitch = vrb->pitch; \ GLuint height = dPriv->h; \ - char *buf = (char *)(vrb->map + (vrb->drawXoff * vrb->bpp/8)) + char *buf = (char *)(vrb->map) #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS diff --git a/src/mesa/drivers/dri/unichrome/via_state.c b/src/mesa/drivers/dri/unichrome/via_state.c index 30b9dc289a..bccdbe9326 100644 --- a/src/mesa/drivers/dri/unichrome/via_state.c +++ b/src/mesa/drivers/dri/unichrome/via_state.c @@ -500,10 +500,8 @@ void viaEmitState(struct via_context *vmesa) OUT_RING( HC_HEADER2 ); OUT_RING( (HC_ParaType_NotTex << 16) ); - OUT_RING( (HC_SubA_HSPXYOS << 24) | - (((32- vrb->drawXoff) & 0x1f) << HC_HSPXOS_SHIFT)); - OUT_RING( (HC_SubA_HSPXYOS << 24) | - (((32 - vrb->drawXoff) & 0x1f) << HC_HSPXOS_SHIFT)); + OUT_RING( (HC_SubA_HSPXYOS << 24) ); + OUT_RING( (HC_SubA_HSPXYOS << 24) ); ADVANCE_RING(); } @@ -712,12 +710,8 @@ static void viaColorMask(GLcontext *ctx, } -/* ============================================================= - */ - -/* Using drawXoff like this is incorrect outside of locked regions. - * This hardware just isn't capable of private back buffers without +/* This hardware just isn't capable of private back buffers without * glitches and/or a hefty locking scheme. */ void viaCalcViewport(GLcontext *ctx) @@ -729,12 +723,10 @@ void viaCalcViewport(GLcontext *ctx) const GLfloat *v = ctx->Viewport._WindowMap.m; GLfloat *m = vmesa->ViewportMatrix.m; - /* See also via_translate_vertex. - */ m[MAT_SX] = v[MAT_SX]; - m[MAT_TX] = v[MAT_TX] + SUBPIXEL_X + vrb->drawXoff; + m[MAT_TX] = v[MAT_TX] + vrb->drawX + SUBPIXEL_X; m[MAT_SY] = - v[MAT_SY]; - m[MAT_TY] = - v[MAT_TY] + dPriv->h + SUBPIXEL_Y; + m[MAT_TY] = - v[MAT_TY] + vrb->drawY + SUBPIXEL_Y + vrb->drawH; m[MAT_SZ] = v[MAT_SZ] * (1.0 / vmesa->depth_max); m[MAT_TZ] = v[MAT_TZ] * (1.0 / vmesa->depth_max); } diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c index 776928dec4..b513dc8d40 100644 --- a/src/mesa/drivers/x11/xm_api.c +++ b/src/mesa/drivers/x11/xm_api.c @@ -483,6 +483,12 @@ xmesa_free_buffer(XMesaBuffer buffer) /* mark as delete pending */ fb->DeletePending = GL_TRUE; + + /* Since the X window for the XMesaBuffer is going away, we don't + * want to dereference this pointer in the future. + */ + b->frontxrb->drawable = 0; + /* Unreference. If count = zero we'll really delete the buffer */ _mesa_unreference_framebuffer(&fb); @@ -1836,16 +1842,18 @@ XMesaDestroyBuffer(XMesaBuffer b) * 1. the first time a buffer is bound to a context. * 2. from glViewport to poll for window size changes * 3. from the XMesaResizeBuffers() API function. + * Note: it's possible (and legal) for xmctx to be NULL. That can happen + * when resizing a buffer when no rendering context is bound. */ void xmesa_check_and_update_buffer_size(XMesaContext xmctx, XMesaBuffer drawBuffer) { GLuint width, height; - xmesa_get_window_size(xmctx->display, drawBuffer, &width, &height); + xmesa_get_window_size(drawBuffer->display, drawBuffer, &width, &height); if (drawBuffer->mesa_buffer.Width != width || drawBuffer->mesa_buffer.Height != height) { - _mesa_resize_framebuffer(&(xmctx->mesa), - &(drawBuffer->mesa_buffer), width, height); + GLcontext *ctx = xmctx ? &xmctx->mesa : NULL; + _mesa_resize_framebuffer(ctx, &(drawBuffer->mesa_buffer), width, height); } drawBuffer->mesa_buffer.Initialized = GL_TRUE; /* XXX TEMPORARY? */ } @@ -2169,7 +2177,7 @@ void XMesaSwapBuffers( XMesaBuffer b ) } #endif if (b->backxrb->ximage) { - /* Copy Ximage from host's memory to server's window */ + /* Copy Ximage (back buf) from client memory to server window */ #if defined(USE_XSHM) && !defined(XFree86Server) if (b->shm) { /*_glthread_LOCK_MUTEX(_xmesa_lock);*/ @@ -2191,8 +2199,8 @@ void XMesaSwapBuffers( XMesaBuffer b ) /*_glthread_UNLOCK_MUTEX(_xmesa_lock);*/ } } - else { - /* Copy pixmap to window on server */ + else if (b->backxrb->pixmap) { + /* Copy pixmap (back buf) to window (front buf) on server */ /*_glthread_LOCK_MUTEX(_xmesa_lock);*/ XMesaCopyArea( b->xm_visual->display, b->backxrb->pixmap, /* source drawable */ @@ -2493,6 +2501,8 @@ XMesaResizeBuffers( XMesaBuffer b ) { GET_CURRENT_CONTEXT(ctx); XMesaContext xmctx = XMESA_CONTEXT(ctx); + if (!xmctx) + return; xmesa_check_and_update_buffer_size(xmctx, b); } diff --git a/src/mesa/drivers/x11/xm_buffer.c b/src/mesa/drivers/x11/xm_buffer.c index 747971a6c3..c1fa23328f 100644 --- a/src/mesa/drivers/x11/xm_buffer.c +++ b/src/mesa/drivers/x11/xm_buffer.c @@ -168,9 +168,6 @@ alloc_back_shm_ximage(XMesaBuffer b, GLuint width, GLuint height) static void alloc_back_buffer(XMesaBuffer b, GLuint width, GLuint height) { - if (width == 0 || height == 0) - return; - if (b->db_mode == BACK_XIMAGE) { /* Deallocate the old backxrb->ximage, if any */ if (b->backxrb->ximage) { @@ -186,6 +183,9 @@ alloc_back_buffer(XMesaBuffer b, GLuint width, GLuint height) b->backxrb->ximage = NULL; } + if (width == 0 || height == 0) + return; + /* Allocate new back buffer */ #ifdef XFree86Server /* Allocate a regular XImage for the back buffer. */ @@ -218,20 +218,20 @@ alloc_back_buffer(XMesaBuffer b, GLuint width, GLuint height) b->backxrb->pixmap = None; } else if (b->db_mode == BACK_PIXMAP) { - if (!width) - width = 1; - if (!height) - height = 1; - /* Free the old back pixmap */ if (b->backxrb->pixmap) { - XMesaFreePixmap(b->xm_visual->display, b->backxrb->pixmap); + XMesaFreePixmap(b->xm_visual->display, b->backxrb->pixmap); + b->backxrb->pixmap = 0; + } + + if (width > 0 && height > 0) { + /* Allocate new back pixmap */ + b->backxrb->pixmap = XMesaCreatePixmap(b->xm_visual->display, + b->frontxrb->drawable, + width, height, + GET_VISUAL_DEPTH(b->xm_visual)); } - /* Allocate new back pixmap */ - b->backxrb->pixmap = XMesaCreatePixmap(b->xm_visual->display, - b->frontxrb->drawable, - width, height, - GET_VISUAL_DEPTH(b->xm_visual)); + b->backxrb->ximage = NULL; } } @@ -250,6 +250,7 @@ xmesa_delete_renderbuffer(struct gl_renderbuffer *rb) /** * Reallocate renderbuffer storage for front color buffer. + * Called via gl_renderbuffer::AllocStorage() */ static GLboolean xmesa_alloc_front_storage(GLcontext *ctx, struct gl_renderbuffer *rb, @@ -260,6 +261,7 @@ xmesa_alloc_front_storage(GLcontext *ctx, struct gl_renderbuffer *rb, /* just clear these to be sure we don't accidentally use them */ xrb->origin1 = NULL; xrb->origin2 = NULL; + xrb->origin3 = NULL; xrb->origin4 = NULL; /* for the FLIP macro: */ @@ -275,6 +277,7 @@ xmesa_alloc_front_storage(GLcontext *ctx, struct gl_renderbuffer *rb, /** * Reallocate renderbuffer storage for back color buffer. + * Called via gl_renderbuffer::AllocStorage() */ static GLboolean xmesa_alloc_back_storage(GLcontext *ctx, struct gl_renderbuffer *rb, @@ -309,8 +312,12 @@ xmesa_alloc_back_storage(GLcontext *ctx, struct gl_renderbuffer *rb, xrb->origin4 = (GLuint *) xrb->ximage->data + xrb->width4 * (height - 1); } else { - /* this assertion will fail if we happend to run out of memory */ - /*assert(xrb->pixmap);*/ + /* out of memory or buffer size is 0 x 0 */ + xrb->width1 = xrb->width2 = xrb->width3 = xrb->width4 = 0; + xrb->origin1 = NULL; + xrb->origin2 = NULL; + xrb->origin3 = NULL; + xrb->origin4 = NULL; } return GL_TRUE; @@ -362,16 +369,13 @@ xmesa_delete_framebuffer(struct gl_framebuffer *fb) { XMesaBuffer b = XMESA_BUFFER(fb); -#ifdef XFree86Server - int client = 0; - if (b->frontxrb->drawable) - client = CLIENT_ID(b->frontxrb->drawable->id); -#endif - if (b->num_alloced > 0) { /* If no other buffer uses this X colormap then free the colors. */ if (!xmesa_find_buffer(b->display, b->cmap, b)) { #ifdef XFree86Server + int client = 0; + if (b->frontxrb->drawable) + client = CLIENT_ID(b->frontxrb->drawable->id); (void)FreeColors(b->cmap, client, b->num_alloced, b->alloced_colors, 0); #else |