summaryrefslogtreecommitdiff
path: root/src/mesa
diff options
context:
space:
mode:
authorDave Airlie <airlied@linux.ie>2009-03-22 12:01:21 +1000
committerDave Airlie <airlied@linux.ie>2009-03-22 12:01:21 +1000
commit2d26d4ac66e8c3e48b73d3e172d0e0d2a2ad31a5 (patch)
tree6b0d899db605b28e10b9b953d181b09bf40bb472 /src/mesa
parent407e8ae5b167b0193e1e5b1266a5d61ed836dfb5 (diff)
parent699897e81c623e53be51fba0488f535b0a8d7761 (diff)
Merge remote branch 'origin/master' into HEAD
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/dri/i915/i830_texstate.c10
-rw-r--r--src/mesa/drivers/dri/i915/i915_texstate.c11
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c24
-rw-r--r--src/mesa/drivers/dri/intel/intel_screen.c1
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex.h2
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_image.c18
-rw-r--r--src/mesa/shader/prog_optimize.c428
-rw-r--r--src/mesa/shader/prog_statevars.c13
-rw-r--r--src/mesa/shader/prog_statevars.h1
-rw-r--r--src/mesa/shader/slang/slang_codegen.c2
-rw-r--r--src/mesa/state_tracker/st_atom_rasterizer.c2
-rw-r--r--src/mesa/state_tracker/st_atom_stipple.c38
-rw-r--r--src/mesa/state_tracker/st_cb_texture.c4
-rw-r--r--src/mesa/state_tracker/st_context.h3
-rw-r--r--src/mesa/state_tracker/st_mesa_to_tgsi.c179
15 files changed, 671 insertions, 65 deletions
diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c
index c718bb0055..df43b779a7 100644
--- a/src/mesa/drivers/dri/i915/i830_texstate.c
+++ b/src/mesa/drivers/dri/i915/i830_texstate.c
@@ -38,7 +38,7 @@
static GLuint
-translate_texture_format(GLuint mesa_format)
+translate_texture_format(GLuint mesa_format, GLuint internal_format)
{
switch (mesa_format) {
case MESA_FORMAT_L8:
@@ -56,7 +56,10 @@ translate_texture_format(GLuint mesa_format)
case MESA_FORMAT_ARGB4444:
return MAPSURF_16BIT | MT_16BIT_ARGB4444;
case MESA_FORMAT_ARGB8888:
- return MAPSURF_32BIT | MT_32BIT_ARGB8888;
+ if (internal_format == GL_RGB)
+ return MAPSURF_32BIT | MT_32BIT_XRGB8888;
+ else
+ return MAPSURF_32BIT | MT_32BIT_ARGB8888;
case MESA_FORMAT_YCBCR_REV:
return (MAPSURF_422 | MT_422_YCRCB_NORMAL);
case MESA_FORMAT_YCBCR:
@@ -162,7 +165,8 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
0, intelObj->
firstLevel);
- format = translate_texture_format(firstImage->TexFormat->MesaFormat);
+ format = translate_texture_format(firstImage->TexFormat->MesaFormat,
+ firstImage->InternalFormat);
pitch = intelObj->mt->pitch * intelObj->mt->cpp;
}
diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c
index adbb52a3a3..6d25f8dd8e 100644
--- a/src/mesa/drivers/dri/i915/i915_texstate.c
+++ b/src/mesa/drivers/dri/i915/i915_texstate.c
@@ -37,7 +37,8 @@
static GLuint
-translate_texture_format(GLuint mesa_format, GLenum DepthMode)
+translate_texture_format(GLuint mesa_format, GLuint internal_format,
+ GLenum DepthMode)
{
switch (mesa_format) {
case MESA_FORMAT_L8:
@@ -55,7 +56,10 @@ translate_texture_format(GLuint mesa_format, GLenum DepthMode)
case MESA_FORMAT_ARGB4444:
return MAPSURF_16BIT | MT_16BIT_ARGB4444;
case MESA_FORMAT_ARGB8888:
- return MAPSURF_32BIT | MT_32BIT_ARGB8888;
+ if (internal_format == GL_RGB)
+ return MAPSURF_32BIT | MT_32BIT_XRGB8888;
+ else
+ return MAPSURF_32BIT | MT_32BIT_ARGB8888;
case MESA_FORMAT_YCBCR_REV:
return (MAPSURF_422 | MT_422_YCRCB_NORMAL);
case MESA_FORMAT_YCBCR:
@@ -173,7 +177,8 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
firstLevel);
format = translate_texture_format(firstImage->TexFormat->MesaFormat,
- tObj->DepthMode);
+ firstImage->InternalFormat,
+ tObj->DepthMode);
pitch = intelObj->mt->pitch * intelObj->mt->cpp;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 9b320480b6..e6113eff87 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -69,7 +69,8 @@ static GLuint translate_tex_target( GLenum target )
}
-static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode )
+static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format,
+ GLenum depth_mode )
{
switch( mesa_format ) {
case MESA_FORMAT_L8:
@@ -89,10 +90,16 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode )
return BRW_SURFACEFORMAT_R8G8B8_UNORM;
case MESA_FORMAT_ARGB8888:
- return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ if (internal_format == GL_RGB)
+ return BRW_SURFACEFORMAT_B8G8R8X8_UNORM;
+ else
+ return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
case MESA_FORMAT_RGBA8888_REV:
- return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
+ if (internal_format == GL_RGB)
+ return BRW_SURFACEFORMAT_R8G8B8X8_UNORM;
+ else
+ return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
case MESA_FORMAT_RGB565:
return BRW_SURFACEFORMAT_B5G6R5_UNORM;
@@ -161,7 +168,7 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode )
struct brw_wm_surface_key {
GLenum target, depthmode;
dri_bo *bo;
- GLint format;
+ GLint format, internal_format;
GLint first_level, last_level;
GLint width, height, depth;
GLint pitch, cpp;
@@ -199,9 +206,11 @@ brw_create_texture_surface( struct brw_context *brw,
surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
surf.ss0.surface_type = translate_tex_target(key->target);
-
- if (key->bo)
- surf.ss0.surface_format = translate_tex_format(key->format, key->depthmode);
+ if (key->bo) {
+ surf.ss0.surface_format = translate_tex_format(key->format,
+ key->internal_format,
+ key->depthmode);
+ }
else {
switch (key->depth) {
case 32:
@@ -278,6 +287,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
key.offset = intelObj->textureOffset;
} else {
key.format = firstImage->TexFormat->MesaFormat;
+ key.internal_format = firstImage->InternalFormat;
key.pitch = intelObj->mt->pitch;
key.depth = firstImage->Depth;
key.bo = intelObj->mt->region->buffer;
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index e8c074712c..d20ea15187 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -211,6 +211,7 @@ static const __DRItexOffsetExtension intelTexOffsetExtension = {
static const __DRItexBufferExtension intelTexBufferExtension = {
{ __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
intelSetTexBuffer,
+ intelSetTexBuffer2,
};
static const __DRIextension *intelScreenExtensions[] = {
diff --git a/src/mesa/drivers/dri/intel/intel_tex.h b/src/mesa/drivers/dri/intel/intel_tex.h
index 742ccc043a..f5372d82fb 100644
--- a/src/mesa/drivers/dri/intel/intel_tex.h
+++ b/src/mesa/drivers/dri/intel/intel_tex.h
@@ -149,6 +149,8 @@ void intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname,
unsigned long long offset, GLint depth, GLuint pitch);
void intelSetTexBuffer(__DRIcontext *pDRICtx,
GLint target, __DRIdrawable *pDraw);
+void intelSetTexBuffer2(__DRIcontext *pDRICtx,
+ GLint target, GLint format, __DRIdrawable *pDraw);
GLuint intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit);
diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c
index 943636c37b..e902187637 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_image.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_image.c
@@ -714,7 +714,9 @@ intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname,
}
void
-intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
+intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
+ GLint glx_texture_format,
+ __DRIdrawable *dPriv)
{
struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
struct intel_context *intel = pDRICtx->driverPrivate;
@@ -745,7 +747,10 @@ intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
type = GL_BGRA;
format = GL_UNSIGNED_BYTE;
- internalFormat = (rb->region->cpp == 3 ? 3 : 4);
+ if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT)
+ internalFormat = GL_RGB;
+ else
+ internalFormat = GL_RGBA;
mt = intel_miptree_create_for_region(intel, target,
internalFormat,
@@ -785,3 +790,12 @@ intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
_mesa_unlock_texture(&intel->ctx, texObj);
}
+
+void
+intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
+{
+ /* The old interface didn't have the format argument, so copy our
+ * implementation's behavior at the time.
+ */
+ intelSetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv);
+}
diff --git a/src/mesa/shader/prog_optimize.c b/src/mesa/shader/prog_optimize.c
index ec06da141d..458a69f70b 100644
--- a/src/mesa/shader/prog_optimize.c
+++ b/src/mesa/shader/prog_optimize.c
@@ -33,6 +33,9 @@
#include "prog_print.h"
+#define MAX_LOOP_NESTING 50
+
+
static GLboolean dbg = GL_FALSE;
@@ -76,6 +79,37 @@ remove_instructions(struct gl_program *prog, const GLboolean *removeFlags)
/**
+ * Remap register indexes according to map.
+ * \param prog the program to search/replace
+ * \param file the type of register file to search/replace
+ * \param map maps old register indexes to new indexes
+ */
+static void
+replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[])
+{
+ GLuint i;
+
+ for (i = 0; i < prog->NumInstructions; i++) {
+ struct prog_instruction *inst = prog->Instructions + i;
+ const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
+ GLuint j;
+ for (j = 0; j < numSrc; j++) {
+ if (inst->SrcReg[j].File == file) {
+ GLuint index = inst->SrcReg[j].Index;
+ ASSERT(map[index] >= 0);
+ inst->SrcReg[j].Index = map[index];
+ }
+ }
+ if (inst->DstReg.File == file) {
+ const GLuint index = inst->DstReg.Index;
+ ASSERT(map[index] >= 0);
+ inst->DstReg.Index = map[index];
+ }
+ }
+}
+
+
+/**
* Consolidate temporary registers to use low numbers. For example, if the
* shader only uses temps 4, 5, 8, replace them with 0, 1, 2.
*/
@@ -83,7 +117,7 @@ static void
_mesa_consolidate_registers(struct gl_program *prog)
{
GLboolean tempUsed[MAX_PROGRAM_TEMPS];
- GLuint tempMap[MAX_PROGRAM_TEMPS];
+ GLint tempMap[MAX_PROGRAM_TEMPS];
GLuint tempMax = 0, i;
if (dbg) {
@@ -92,6 +126,10 @@ _mesa_consolidate_registers(struct gl_program *prog)
memset(tempUsed, 0, sizeof(tempUsed));
+ for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
+ tempMap[i] = -1;
+ }
+
/* set tempUsed[i] if temporary [i] is referenced */
for (i = 0; i < prog->NumInstructions; i++) {
const struct prog_instruction *inst = prog->Instructions + i;
@@ -132,26 +170,8 @@ _mesa_consolidate_registers(struct gl_program *prog)
}
}
- /* now replace occurances of old temp indexes with new indexes */
- for (i = 0; i < prog->NumInstructions; i++) {
- struct prog_instruction *inst = prog->Instructions + i;
- const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
- GLuint j;
- for (j = 0; j < numSrc; j++) {
- if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
- GLuint index = inst->SrcReg[j].Index;
- assert(index <= tempMax);
- assert(tempUsed[index]);
- inst->SrcReg[j].Index = tempMap[index];
- }
- }
- if (inst->DstReg.File == PROGRAM_TEMPORARY) {
- const GLuint index = inst->DstReg.Index;
- assert(tempUsed[index]);
- assert(index <= tempMax);
- inst->DstReg.Index = tempMap[index];
- }
- }
+ replace_regs(prog, PROGRAM_TEMPORARY, tempMap);
+
if (dbg) {
_mesa_printf("Optimize: End register consolidation\n");
}
@@ -409,6 +429,370 @@ _mesa_remove_extra_moves(struct gl_program *prog)
}
+/** A live register interval */
+struct interval
+{
+ GLuint Reg; /** The temporary register index */
+ GLuint Start, End; /** Start/end instruction numbers */
+};
+
+
+/** A list of register intervals */
+struct interval_list
+{
+ GLuint Num;
+ struct interval Intervals[MAX_PROGRAM_TEMPS];
+};
+
+
+static void
+append_interval(struct interval_list *list, const struct interval *inv)
+{
+ list->Intervals[list->Num++] = *inv;
+}
+
+
+/** Insert interval inv into list, sorted by interval end */
+static void
+insert_interval_by_end(struct interval_list *list, const struct interval *inv)
+{
+ /* XXX we could do a binary search insertion here since list is sorted */
+ GLint i = list->Num - 1;
+ while (i >= 0 && list->Intervals[i].End > inv->End) {
+ list->Intervals[i + 1] = list->Intervals[i];
+ i--;
+ }
+ list->Intervals[i + 1] = *inv;
+ list->Num++;
+
+#ifdef DEBUG
+ {
+ GLuint i;
+ for (i = 0; i + 1 < list->Num; i++) {
+ ASSERT(list->Intervals[i].End <= list->Intervals[i + 1].End);
+ }
+ }
+#endif
+}
+
+
+/** Remove the given interval from the interval list */
+static void
+remove_interval(struct interval_list *list, const struct interval *inv)
+{
+ /* XXX we could binary search since list is sorted */
+ GLuint k;
+ for (k = 0; k < list->Num; k++) {
+ if (list->Intervals[k].Reg == inv->Reg) {
+ /* found, remove it */
+ ASSERT(list->Intervals[k].Start == inv->Start);
+ ASSERT(list->Intervals[k].End == inv->End);
+ while (k < list->Num - 1) {
+ list->Intervals[k] = list->Intervals[k + 1];
+ k++;
+ }
+ list->Num--;
+ return;
+ }
+ }
+}
+
+
+/** called by qsort() */
+static int
+compare_start(const void *a, const void *b)
+{
+ const struct interval *ia = (const struct interval *) a;
+ const struct interval *ib = (const struct interval *) b;
+ if (ia->Start < ib->Start)
+ return -1;
+ else if (ia->Start > ib->Start)
+ return +1;
+ else
+ return 0;
+}
+
+/** sort the interval list according to interval starts */
+static void
+sort_interval_list_by_start(struct interval_list *list)
+{
+ qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start);
+#ifdef DEBUG
+ {
+ GLuint i;
+ for (i = 0; i + 1 < list->Num; i++) {
+ ASSERT(list->Intervals[i].Start <= list->Intervals[i + 1].Start);
+ }
+ }
+#endif
+}
+
+
+/**
+ * Update the intermediate interval info for register 'index' and
+ * instruction 'ic'.
+ */
+static void
+update_interval(GLint intBegin[], GLint intEnd[], GLuint index, GLuint ic)
+{
+ ASSERT(index < MAX_PROGRAM_TEMPS);
+ if (intBegin[index] == -1) {
+ ASSERT(intEnd[index] == -1);
+ intBegin[index] = intEnd[index] = ic;
+ }
+ else {
+ intEnd[index] = ic;
+ }
+}
+
+
+/**
+ * Find the live intervals for each temporary register in the program.
+ * For register R, the interval [A,B] indicates that R is referenced
+ * from instruction A through instruction B.
+ * Special consideration is needed for loops and subroutines.
+ * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
+ */
+static GLboolean
+find_live_intervals(struct gl_program *prog,
+ struct interval_list *liveIntervals)
+{
+ struct loop_info
+ {
+ GLuint Start, End; /**< Start, end instructions of loop */
+ };
+ struct loop_info loopStack[MAX_LOOP_NESTING];
+ GLuint loopStackDepth = 0;
+ GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS];
+ GLuint i;
+
+ /*
+ * Note: we'll return GL_FALSE below if we find relative indexing
+ * into the TEMP register file. We can't handle that yet.
+ * We also give up on subroutines for now.
+ */
+
+ if (dbg) {
+ _mesa_printf("Optimize: Begin find intervals\n");
+ }
+
+ for (i = 0; i < MAX_PROGRAM_TEMPS; i++){
+ intBegin[i] = intEnd[i] = -1;
+ }
+
+ /* Scan instructions looking for temporary registers */
+ for (i = 0; i < prog->NumInstructions; i++) {
+ const struct prog_instruction *inst = prog->Instructions + i;
+ if (inst->Opcode == OPCODE_BGNLOOP) {
+ loopStack[loopStackDepth].Start = i;
+ loopStack[loopStackDepth].End = inst->BranchTarget;
+ loopStackDepth++;
+ }
+ else if (inst->Opcode == OPCODE_ENDLOOP) {
+ loopStackDepth--;
+ }
+ else if (inst->Opcode == OPCODE_CAL) {
+ return GL_FALSE;
+ }
+ else {
+ const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
+ GLuint j;
+ for (j = 0; j < numSrc; j++) {
+ if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
+ const GLuint index = inst->SrcReg[j].Index;
+ if (inst->SrcReg[j].RelAddr)
+ return GL_FALSE;
+ update_interval(intBegin, intEnd, index, i);
+ if (loopStackDepth > 0) {
+ /* extend temp register's interval to end of loop */
+ GLuint loopEnd = loopStack[loopStackDepth - 1].End;
+ update_interval(intBegin, intEnd, index, loopEnd);
+ }
+ }
+ }
+ if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ const GLuint index = inst->DstReg.Index;
+ if (inst->DstReg.RelAddr)
+ return GL_FALSE;
+ update_interval(intBegin, intEnd, index, i);
+ if (loopStackDepth > 0) {
+ /* extend temp register's interval to end of loop */
+ GLuint loopEnd = loopStack[loopStackDepth - 1].End;
+ update_interval(intBegin, intEnd, index, loopEnd);
+ }
+ }
+ }
+ }
+
+ /* Build live intervals list from intermediate arrays */
+ liveIntervals->Num = 0;
+ for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
+ if (intBegin[i] >= 0) {
+ struct interval inv;
+ inv.Reg = i;
+ inv.Start = intBegin[i];
+ inv.End = intEnd[i];
+ append_interval(liveIntervals, &inv);
+ }
+ }
+
+ /* Sort the list according to interval starts */
+ sort_interval_list_by_start(liveIntervals);
+
+ if (dbg) {
+ /* print interval info */
+ for (i = 0; i < liveIntervals->Num; i++) {
+ const struct interval *inv = liveIntervals->Intervals + i;
+ _mesa_printf("Reg[%d] live [%d, %d]:",
+ inv->Reg, inv->Start, inv->End);
+ if (1) {
+ int j;
+ for (j = 0; j < inv->Start; j++)
+ _mesa_printf(" ");
+ for (j = inv->Start; j <= inv->End; j++)
+ _mesa_printf("x");
+ }
+ _mesa_printf("\n");
+ }
+ }
+
+ return GL_TRUE;
+}
+
+
+static GLuint
+alloc_register(GLboolean usedRegs[MAX_PROGRAM_TEMPS])
+{
+ GLuint k;
+ for (k = 0; k < MAX_PROGRAM_TEMPS; k++) {
+ if (!usedRegs[k]) {
+ usedRegs[k] = GL_TRUE;
+ return k;
+ }
+ }
+ return MAX_PROGRAM_TEMPS;
+}
+
+
+/**
+ * This function implements "Linear Scan Register Allocation" to reduce
+ * the number of temporary registers used by the program.
+ *
+ * We compute the "live interval" for all temporary registers then
+ * examine the overlap of the intervals to allocate new registers.
+ * Basically, if two intervals do not overlap, they can use the same register.
+ */
+static void
+_mesa_reallocate_registers(struct gl_program *prog)
+{
+ struct interval_list liveIntervals;
+ GLint registerMap[MAX_PROGRAM_TEMPS];
+ GLboolean usedRegs[MAX_PROGRAM_TEMPS];
+ GLuint i;
+ GLuint maxTemp = 0;
+
+ if (dbg) {
+ _mesa_printf("Optimize: Begin live-interval register reallocation\n");
+ _mesa_print_program(prog);
+ }
+
+ for (i = 0; i < MAX_PROGRAM_TEMPS; i++){
+ registerMap[i] = -1;
+ usedRegs[i] = GL_FALSE;
+ }
+
+ if (!find_live_intervals(prog, &liveIntervals)) {
+ if (dbg)
+ _mesa_printf("Aborting register reallocation\n");
+ return;
+ }
+
+ {
+ struct interval_list activeIntervals;
+ activeIntervals.Num = 0;
+
+ /* loop over live intervals, allocating a new register for each */
+ for (i = 0; i < liveIntervals.Num; i++) {
+ const struct interval *live = liveIntervals.Intervals + i;
+
+ if (dbg)
+ _mesa_printf("Consider register %u\n", live->Reg);
+
+ /* Expire old intervals. Intervals which have ended with respect
+ * to the live interval can have their remapped registers freed.
+ */
+ {
+ GLint j;
+ for (j = 0; j < activeIntervals.Num; j++) {
+ const struct interval *inv = activeIntervals.Intervals + j;
+ if (inv->End >= live->Start) {
+ /* Stop now. Since the activeInterval list is sorted
+ * we know we don't have to go further.
+ */
+ break;
+ }
+ else {
+ /* Interval 'inv' has expired */
+ const GLint regNew = registerMap[inv->Reg];
+ ASSERT(regNew >= 0);
+
+ if (dbg)
+ _mesa_printf(" expire interval for reg %u\n", inv->Reg);
+
+ /* remove interval j from active list */
+ remove_interval(&activeIntervals, inv);
+ j--; /* counter-act j++ in for-loop above */
+
+ /* return register regNew to the free pool */
+ if (dbg)
+ _mesa_printf(" free reg %d\n", regNew);
+ ASSERT(usedRegs[regNew] == GL_TRUE);
+ usedRegs[regNew] = GL_FALSE;
+ }
+ }
+ }
+
+ /* find a free register for this live interval */
+ {
+ const GLuint k = alloc_register(usedRegs);
+ if (k == MAX_PROGRAM_TEMPS) {
+ /* out of registers, give up */
+ return;
+ }
+ registerMap[live->Reg] = k;
+ maxTemp = MAX2(maxTemp, k);
+ if (dbg)
+ _mesa_printf(" remap register %d -> %d\n", live->Reg, k);
+ }
+
+ /* Insert this live interval into the active list which is sorted
+ * by increasing end points.
+ */
+ insert_interval_by_end(&activeIntervals, live);
+ }
+ }
+
+ if (maxTemp + 1 < liveIntervals.Num) {
+ /* OK, we've reduced the number of registers needed.
+ * Scan the program and replace all the old temporary register
+ * indexes with the new indexes.
+ */
+ replace_regs(prog, PROGRAM_TEMPORARY, registerMap);
+
+ prog->NumTemporaries = maxTemp + 1;
+ }
+
+ if (dbg) {
+ _mesa_printf("Optimize: End live-interval register reallocation\n");
+ _mesa_printf("Num temp regs before: %u after: %u\n",
+ liveIntervals.Num, maxTemp + 1);
+ _mesa_print_program(prog);
+ }
+}
+
+
+
+
/**
* Apply optimizations to the given program to eliminate unnecessary
* instructions, temp regs, etc.
@@ -424,4 +808,6 @@ _mesa_optimize_program(GLcontext *ctx, struct gl_program *program)
if (1)
_mesa_consolidate_registers(program);
+ else /*NEW*/
+ _mesa_reallocate_registers(program);
}
diff --git a/src/mesa/shader/prog_statevars.c b/src/mesa/shader/prog_statevars.c
index f51d9e2651..aeb7cf6de2 100644
--- a/src/mesa/shader/prog_statevars.c
+++ b/src/mesa/shader/prog_statevars.c
@@ -506,6 +506,13 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[],
}
}
return;
+ case STATE_FB_SIZE:
+ value[0] = (GLfloat) (ctx->DrawBuffer->Width - 1);
+ value[1] = (GLfloat) (ctx->DrawBuffer->Height - 1);
+ value[2] = 0.0F;
+ value[3] = 0.0F;
+ return;
+
case STATE_ROT_MATRIX_0:
{
const int unit = (int) state[2];
@@ -628,6 +635,9 @@ _mesa_program_state_flags(const gl_state_index state[STATE_LENGTH])
case STATE_PCM_BIAS:
return _NEW_PIXEL;
+ case STATE_FB_SIZE:
+ return _NEW_BUFFERS;
+
default:
/* unknown state indexes are silently ignored and
* no flag set, since it is handled by the driver.
@@ -828,6 +838,9 @@ append_token(char *dst, gl_state_index k)
case STATE_SHADOW_AMBIENT:
append(dst, "CompareFailValue");
break;
+ case STATE_FB_SIZE:
+ append(dst, "FbSize");
+ break;
case STATE_ROT_MATRIX_0:
append(dst, "rotMatrixRow0");
break;
diff --git a/src/mesa/shader/prog_statevars.h b/src/mesa/shader/prog_statevars.h
index d563080db1..1180d9eaa4 100644
--- a/src/mesa/shader/prog_statevars.h
+++ b/src/mesa/shader/prog_statevars.h
@@ -117,6 +117,7 @@ typedef enum gl_state_index_ {
STATE_PCM_SCALE, /**< Post color matrix RGBA scale */
STATE_PCM_BIAS, /**< Post color matrix RGBA bias */
STATE_SHADOW_AMBIENT, /**< ARB_shadow_ambient fail value; token[2] is texture unit index */
+ STATE_FB_SIZE, /**< (width-1, height-1, 0, 0) */
STATE_ROT_MATRIX_0, /**< ATI_envmap_bumpmap, rot matrix row 0 */
STATE_ROT_MATRIX_1, /**< ATI_envmap_bumpmap, rot matrix row 1 */
STATE_INTERNAL_DRIVER /* first available state index for drivers (must be last) */
diff --git a/src/mesa/shader/slang/slang_codegen.c b/src/mesa/shader/slang/slang_codegen.c
index 8263aae334..a7cfc45e6f 100644
--- a/src/mesa/shader/slang/slang_codegen.c
+++ b/src/mesa/shader/slang/slang_codegen.c
@@ -1441,7 +1441,7 @@ _slang_gen_function_call(slang_assemble_ctx *A, slang_function *fun,
if (A->pragmas->Debug) {
char s[1000];
- snprintf(s, sizeof(s), "Call/inline %s()", (char *) fun->header.a_name);
+ _mesa_snprintf(s, sizeof(s), "Call/inline %s()", (char *) fun->header.a_name);
n->Comment = _slang_strdup(s);
}
diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c
index ea76487bcf..61687fbc3e 100644
--- a/src/mesa/state_tracker/st_atom_rasterizer.c
+++ b/src/mesa/state_tracker/st_atom_rasterizer.c
@@ -79,8 +79,6 @@ static void update_raster_state( struct st_context *st )
memset(raster, 0, sizeof(*raster));
- raster->origin_lower_left = 1; /* Always true for OpenGL */
-
/* _NEW_POLYGON, _NEW_BUFFERS
*/
{
diff --git a/src/mesa/state_tracker/st_atom_stipple.c b/src/mesa/state_tracker/st_atom_stipple.c
index f395930ab4..31e124b329 100644
--- a/src/mesa/state_tracker/st_atom_stipple.c
+++ b/src/mesa/state_tracker/st_atom_stipple.c
@@ -39,24 +39,52 @@
#include "pipe/p_defines.h"
+/**
+ * OpenGL's polygon stipple is indexed with window coordinates in which
+ * the origin (0,0) is the lower-left corner of the window.
+ * With Gallium, the origin is the upper-left corner of the window.
+ * To convert GL's polygon stipple to what gallium expects we need to
+ * invert the pattern vertically and rotate the stipple rows according
+ * to the window height.
+ */
+static void
+invert_stipple(GLuint dest[32], const GLuint src[32], GLuint winHeight)
+{
+ GLuint i;
+
+ for (i = 0; i < 32; i++) {
+ dest[i] = src[(winHeight - 1 - i) & 0x1f];
+ }
+}
+
+
+
static void
update_stipple( struct st_context *st )
{
- const GLuint sz = sizeof(st->state.poly_stipple.stipple);
+ const GLuint sz = sizeof(st->state.poly_stipple);
assert(sz == sizeof(st->ctx->PolygonStipple));
- if (memcmp(&st->state.poly_stipple.stipple, st->ctx->PolygonStipple, sz)) {
+ if (memcmp(st->state.poly_stipple, st->ctx->PolygonStipple, sz)) {
/* state has changed */
- memcpy(st->state.poly_stipple.stipple, st->ctx->PolygonStipple, sz);
- st->pipe->set_polygon_stipple(st->pipe, &st->state.poly_stipple);
+ struct pipe_poly_stipple newStipple;
+
+ memcpy(st->state.poly_stipple, st->ctx->PolygonStipple, sz);
+
+ invert_stipple(newStipple.stipple, st->ctx->PolygonStipple,
+ st->ctx->DrawBuffer->Height);
+
+ st->pipe->set_polygon_stipple(st->pipe, &newStipple);
}
}
+/** Update the stipple when the pattern or window height changes */
const struct st_tracked_state st_update_polygon_stipple = {
"st_update_polygon_stipple", /* name */
{ /* dirty */
- (_NEW_POLYGONSTIPPLE), /* mesa */
+ (_NEW_POLYGONSTIPPLE |
+ _NEW_BUFFERS), /* mesa */
0, /* st */
},
update_stipple /* update */
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index edfa8854d8..311d812ccf 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -467,7 +467,7 @@ st_TexImage(GLcontext * ctx,
*/
if (stObj->pt) {
if (stObj->teximage_realloc ||
- level > stObj->pt->last_level ||
+ level > (GLint) stObj->pt->last_level ||
(stObj->pt->last_level == level &&
stObj->pt->target != PIPE_TEXTURE_CUBE &&
!st_texture_match_image(stObj->pt, &stImage->base,
@@ -803,7 +803,6 @@ st_TexSubimage(GLcontext * ctx,
PIPE_TRANSFER_WRITE,
xoffset, yoffset,
width, height);
- dstRowStride = stImage->transfer->stride;
}
if (!texImage->Data) {
@@ -812,6 +811,7 @@ st_TexSubimage(GLcontext * ctx,
}
src = (const GLubyte *) pixels;
+ dstRowStride = stImage->transfer->stride;
for (i = 0; i++ < depth;) {
if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat,
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
index d7518ab689..ae8c2978bf 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -93,12 +93,13 @@ struct st_context
struct pipe_constant_buffer constants[2];
struct pipe_framebuffer_state framebuffer;
struct pipe_texture *sampler_texture[PIPE_MAX_SAMPLERS];
- struct pipe_poly_stipple poly_stipple;
struct pipe_scissor_state scissor;
struct pipe_viewport_state viewport;
GLuint num_samplers;
GLuint num_textures;
+
+ GLuint poly_stipple[32]; /**< In OpenGL's bottom-to-top order */
} state;
struct {
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index cbf3f334c0..ffa607dd87 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -219,8 +219,9 @@ compile_instruction(
const GLuint immediateMapping[],
GLboolean indirectAccess,
GLuint preamble_size,
- GLuint processor,
- GLboolean *insideSubroutine)
+ GLuint procType,
+ GLboolean *insideSubroutine,
+ GLint wposTemp)
{
GLuint i;
struct tgsi_full_dst_register *fulldst;
@@ -247,19 +248,29 @@ compile_instruction(
GLuint j;
fullsrc = &fullinst->FullSrcRegisters[i];
- fullsrc->SrcRegister.File = map_register_file(
- inst->SrcReg[i].File,
- inst->SrcReg[i].Index,
- immediateMapping,
- indirectAccess );
- fullsrc->SrcRegister.Index = map_register_file_index(
- fullsrc->SrcRegister.File,
- inst->SrcReg[i].Index,
- inputMapping,
- outputMapping,
- immediateMapping,
- indirectAccess );
+ if (procType == TGSI_PROCESSOR_FRAGMENT &&
+ inst->SrcReg[i].File == PROGRAM_INPUT &&
+ inst->SrcReg[i].Index == FRAG_ATTRIB_WPOS) {
+ /* special case of INPUT[WPOS] */
+ fullsrc->SrcRegister.File = TGSI_FILE_TEMPORARY;
+ fullsrc->SrcRegister.Index = wposTemp;
+ }
+ else {
+ /* any other src register */
+ fullsrc->SrcRegister.File = map_register_file(
+ inst->SrcReg[i].File,
+ inst->SrcReg[i].Index,
+ immediateMapping,
+ indirectAccess );
+ fullsrc->SrcRegister.Index = map_register_file_index(
+ fullsrc->SrcRegister.File,
+ inst->SrcReg[i].Index,
+ inputMapping,
+ outputMapping,
+ immediateMapping,
+ indirectAccess );
+ }
/* swizzle (ext swizzle also depends on negation) */
{
@@ -733,6 +744,111 @@ find_temporaries(const struct gl_program *program,
}
+/**
+ * Find an unused temporary in the tempsUsed array.
+ */
+static int
+find_free_temporary(GLboolean tempsUsed[MAX_PROGRAM_TEMPS])
+{
+ int i;
+ for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
+ if (!tempsUsed[i]) {
+ tempsUsed[i] = GL_TRUE;
+ return i;
+ }
+ }
+ return -1;
+}
+
+
+/** helper for building simple TGSI instruction, one src register */
+static void
+build_tgsi_instruction1(struct tgsi_full_instruction *inst,
+ int opcode,
+ int dstFile, int dstIndex, int writemask,
+ int srcFile1, int srcIndex1)
+{
+ *inst = tgsi_default_full_instruction();
+
+ inst->Instruction.Opcode = opcode;
+
+ inst->Instruction.NumDstRegs = 1;
+ inst->FullDstRegisters[0].DstRegister.File = dstFile;
+ inst->FullDstRegisters[0].DstRegister.Index = dstIndex;
+ inst->FullDstRegisters[0].DstRegister.WriteMask = writemask;
+
+ inst->Instruction.NumSrcRegs = 1;
+ inst->FullSrcRegisters[0].SrcRegister.File = srcFile1;
+ inst->FullSrcRegisters[0].SrcRegister.Index = srcIndex1;
+}
+
+
+/** helper for building simple TGSI instruction, two src registers */
+static void
+build_tgsi_instruction2(struct tgsi_full_instruction *inst,
+ int opcode,
+ int dstFile, int dstIndex, int writemask,
+ int srcFile1, int srcIndex1,
+ int srcFile2, int srcIndex2)
+{
+ *inst = tgsi_default_full_instruction();
+
+ inst->Instruction.Opcode = opcode;
+
+ inst->Instruction.NumDstRegs = 1;
+ inst->FullDstRegisters[0].DstRegister.File = dstFile;
+ inst->FullDstRegisters[0].DstRegister.Index = dstIndex;
+ inst->FullDstRegisters[0].DstRegister.WriteMask = writemask;
+
+ inst->Instruction.NumSrcRegs = 2;
+ inst->FullSrcRegisters[0].SrcRegister.File = srcFile1;
+ inst->FullSrcRegisters[0].SrcRegister.Index = srcIndex1;
+ inst->FullSrcRegisters[1].SrcRegister.File = srcFile2;
+ inst->FullSrcRegisters[1].SrcRegister.Index = srcIndex2;
+}
+
+
+
+/**
+ * Emit the TGSI instructions for inverting the WPOS y coordinate.
+ */
+static int
+emit_inverted_wpos(struct tgsi_token *tokens,
+ int wpos_temp,
+ int winsize_const,
+ int wpos_input,
+ struct tgsi_header *header, int maxTokens)
+{
+ struct tgsi_full_instruction fullinst;
+ int ti = 0;
+
+ /* MOV wpos_temp.xzw, input[wpos]; */
+ build_tgsi_instruction1(&fullinst,
+ TGSI_OPCODE_MOV,
+ TGSI_FILE_TEMPORARY, wpos_temp, WRITEMASK_XZW,
+ TGSI_FILE_INPUT, 0);
+
+ ti += tgsi_build_full_instruction(&fullinst,
+ &tokens[ti],
+ header,
+ maxTokens - ti);
+
+ /* SUB wpos_temp.y, const[winsize_const] - input[wpos_input]; */
+ build_tgsi_instruction2(&fullinst,
+ TGSI_OPCODE_SUB,
+ TGSI_FILE_TEMPORARY, wpos_temp, WRITEMASK_Y,
+ TGSI_FILE_CONSTANT, winsize_const,
+ TGSI_FILE_INPUT, wpos_input);
+
+ ti += tgsi_build_full_instruction(&fullinst,
+ &tokens[ti],
+ header,
+ maxTokens - ti);
+
+ return ti;
+}
+
+
/**
@@ -778,16 +894,34 @@ st_translate_mesa_program(
GLuint ti; /* token index */
struct tgsi_header *header;
struct tgsi_processor *processor;
- struct tgsi_full_instruction fullinst;
GLuint preamble_size = 0;
GLuint immediates[1000];
GLuint numImmediates = 0;
GLboolean insideSubroutine = GL_FALSE;
GLboolean indirectAccess = GL_FALSE;
+ GLboolean tempsUsed[MAX_PROGRAM_TEMPS + 1];
+ GLint wposTemp = -1, winHeightConst = -1;
assert(procType == TGSI_PROCESSOR_FRAGMENT ||
procType == TGSI_PROCESSOR_VERTEX);
+ find_temporaries(program, tempsUsed);
+
+ if (procType == TGSI_PROCESSOR_FRAGMENT) {
+ if (program->InputsRead & FRAG_BIT_WPOS) {
+ /* Fragment program uses fragment position input.
+ * Need to replace instances of INPUT[WPOS] with temp T
+ * where T = INPUT[WPOS] by y is inverted.
+ */
+ static const gl_state_index winSizeState[STATE_LENGTH]
+ = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 };
+ winHeightConst = _mesa_add_state_reference(program->Parameters,
+ winSizeState);
+ wposTemp = find_free_temporary(tempsUsed);
+ }
+ }
+
+
*(struct tgsi_version *) &tokens[0] = tgsi_build_version();
header = (struct tgsi_header *) &tokens[1];
@@ -884,11 +1018,9 @@ st_translate_mesa_program(
/* temporary decls */
{
- GLboolean tempsUsed[MAX_PROGRAM_TEMPS + 1];
GLboolean inside_range = GL_FALSE;
GLuint start_range = 0;
- find_temporaries(program, tempsUsed);
tempsUsed[MAX_PROGRAM_TEMPS] = GL_FALSE;
for (i = 0; i < MAX_PROGRAM_TEMPS + 1; i++) {
if (tempsUsed[i] && !inside_range) {
@@ -1018,7 +1150,17 @@ st_translate_mesa_program(
}
}
+ /* invert WPOS fragment input */
+ if (wposTemp >= 0) {
+ ti += emit_inverted_wpos(&tokens[ti], wposTemp, winHeightConst,
+ inputMapping[FRAG_ATTRIB_WPOS],
+ header, maxTokens - ti);
+ preamble_size = 2; /* two instructions added */
+ }
+
for (i = 0; i < program->NumInstructions; i++) {
+ struct tgsi_full_instruction fullinst;
+
compile_instruction(
&program->Instructions[i],
&fullinst,
@@ -1028,7 +1170,8 @@ st_translate_mesa_program(
indirectAccess,
preamble_size,
procType,
- &insideSubroutine );
+ &insideSubroutine,
+ wposTemp);
ti += tgsi_build_full_instruction(
&fullinst,